1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Data-Link Driver 30 */ 31 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/atomic.h> 36 #include <sys/mkdev.h> 37 #include <sys/vlan.h> 38 #include <sys/dld.h> 39 #include <sys/dld_impl.h> 40 #include <sys/dls_impl.h> 41 #include <inet/common.h> 42 43 static int str_constructor(void *, void *, int); 44 static void str_destructor(void *, void *); 45 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *); 46 static void str_notify_promisc_on_phys(dld_str_t *); 47 static void str_notify_promisc_off_phys(dld_str_t *); 48 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 49 static void str_notify_link_up(dld_str_t *); 50 static void str_notify_link_down(dld_str_t *); 51 static void str_notify_capab_reneg(dld_str_t *); 52 static void str_notify_speed(dld_str_t *, uint32_t); 53 static void str_notify(void *, mac_notify_type_t); 54 55 static void ioc_raw(dld_str_t *, mblk_t *); 56 static void ioc_fast(dld_str_t *, mblk_t *); 57 static void ioc(dld_str_t *, mblk_t *); 58 static void dld_ioc(dld_str_t *, mblk_t *); 59 static minor_t dld_minor_hold(boolean_t); 60 static void dld_minor_rele(minor_t); 61 62 static uint32_t str_count; 63 static kmem_cache_t *str_cachep; 64 static vmem_t *minor_arenap; 65 static uint32_t minor_count; 66 static mod_hash_t *str_hashp; 67 68 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 69 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 70 71 #define STR_HASHSZ 64 72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 73 74 /* 75 * Some notes on entry points, flow-control, queueing and locking: 76 * 77 * This driver exports the traditional STREAMS put entry point as well as 78 * the non-STREAMS fast-path transmit routine which is provided to IP via 79 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 80 * and data operations, while the fast-path routine deals only with M_DATA 81 * fast-path packets. Regardless of the entry point, all outbound packets 82 * will end up in str_mdata_fastpath_put(), where they will be delivered to 83 * the MAC driver. 84 * 85 * The transmit logic operates in two modes: a "not busy" mode where the 86 * packets will be delivered to the MAC for a send attempt, or "busy" mode 87 * where they will be enqueued in the internal queue because of flow-control. 88 * Flow-control happens when the MAC driver indicates the packets couldn't 89 * be transmitted due to lack of resources (e.g. running out of descriptors). 90 * In such case, the driver will place a dummy message on its write-side 91 * STREAMS queue so that the queue is marked as "full". Any subsequent 92 * packets arriving at the driver will be enqueued in the internal queue, 93 * which is drained in the context of the service thread that gets scheduled 94 * whenever the driver is in the "busy" mode. When all packets have been 95 * successfully delivered by MAC and the internal queue is empty, it will 96 * transition to the "not busy" mode by removing the dummy message from the 97 * write-side STREAMS queue; in effect this will trigger backenabling. 98 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 99 * to the above reasons. 100 * 101 * The driver implements an internal transmit queue independent of STREAMS. 102 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 103 * compared to the putq() and get() STREAMS interfaces. The only putq() and 104 * getq() operations done by the driver are those related to placing and 105 * removing the dummy message to/from the write-side STREAMS queue for flow- 106 * control purposes. 107 * 108 * Locking is done independent of STREAMS due to the driver being fully MT. 109 * Threads entering the driver (either from put or service entry points) 110 * will most likely be readers, with the exception of a few writer cases 111 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 112 * DLD-related ioctl requests. The DLPI detach case is special, because 113 * it involves freeing resources and therefore must be single-threaded. 114 * Unfortunately the readers/writers lock can't be used to protect against 115 * it, because the lock is dropped prior to the driver calling places where 116 * putnext() may be invoked, and such places may depend on those resources 117 * to exist. Because of this, the driver always completes the DLPI detach 118 * process when there are no other threads running in the driver. This is 119 * done by keeping track of the number of threads, such that the the last 120 * thread leaving the driver will finish the pending DLPI detach operation. 121 */ 122 123 /* 124 * dld_max_q_count is the queue depth threshold used to limit the number of 125 * outstanding packets or bytes allowed in the queue; once this limit is 126 * reached the driver will free any incoming ones until the queue depth 127 * drops below the threshold. 128 * 129 * This buffering is provided to accomodate clients which do not employ 130 * their own buffering scheme, and to handle occasional packet bursts. 131 * Clients which handle their own buffering will receive positive feedback 132 * from this driver as soon as it transitions into the "busy" state, i.e. 133 * when the queue is initially filled up; they will get backenabled once 134 * the queue is empty. 135 * 136 * The value chosen here is rather arbitrary; in future some intelligent 137 * heuristics may be involved which could take into account the hardware's 138 * transmit ring size, etc. 139 */ 140 uint_t dld_max_q_count = (16 * 1024 *1024); 141 142 /* 143 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 144 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 145 * match dev_t. If a stream is found and it is attached, its dev_info_t * 146 * is returned. 147 */ 148 typedef struct i_dld_str_state_s { 149 major_t ds_major; 150 minor_t ds_minor; 151 dev_info_t *ds_dip; 152 } i_dld_str_state_t; 153 154 /* ARGSUSED */ 155 static uint_t 156 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 157 { 158 i_dld_str_state_t *statep = arg; 159 dld_str_t *dsp = (dld_str_t *)val; 160 161 if (statep->ds_major != dsp->ds_major) 162 return (MH_WALK_CONTINUE); 163 164 ASSERT(statep->ds_minor != 0); 165 166 /* 167 * Access to ds_ppa and ds_mh need to be protected by ds_lock. 168 */ 169 rw_enter(&dsp->ds_lock, RW_READER); 170 if (statep->ds_minor <= DLD_MAX_MINOR) { 171 /* 172 * Style 1: minor can be derived from the ppa. we 173 * continue to walk until we find a matching stream 174 * in attached state. 175 */ 176 if (statep->ds_minor == DLS_PPA2MINOR(dsp->ds_ppa) && 177 dsp->ds_mh != NULL) { 178 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 179 rw_exit(&dsp->ds_lock); 180 return (MH_WALK_TERMINATE); 181 } 182 } else { 183 /* 184 * Clone: a clone minor is unique. we can terminate the 185 * walk if we find a matching stream -- even if we fail 186 * to obtain the devinfo. 187 */ 188 if (statep->ds_minor == dsp->ds_minor) { 189 if (dsp->ds_mh != NULL) 190 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 191 rw_exit(&dsp->ds_lock); 192 return (MH_WALK_TERMINATE); 193 } 194 } 195 rw_exit(&dsp->ds_lock); 196 return (MH_WALK_CONTINUE); 197 } 198 199 static dev_info_t * 200 dld_finddevinfo(dev_t dev) 201 { 202 i_dld_str_state_t state; 203 204 state.ds_minor = getminor(dev); 205 state.ds_major = getmajor(dev); 206 state.ds_dip = NULL; 207 208 if (state.ds_minor == 0) 209 return (NULL); 210 211 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 212 return (state.ds_dip); 213 } 214 215 216 /* 217 * devo_getinfo: getinfo(9e) 218 */ 219 /*ARGSUSED*/ 220 int 221 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 222 { 223 dev_info_t *devinfo; 224 minor_t minor = getminor((dev_t)arg); 225 int rc = DDI_FAILURE; 226 227 switch (cmd) { 228 case DDI_INFO_DEVT2DEVINFO: 229 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 230 *(dev_info_t **)resp = devinfo; 231 rc = DDI_SUCCESS; 232 } 233 break; 234 case DDI_INFO_DEVT2INSTANCE: 235 if (minor > 0 && minor <= DLD_MAX_MINOR) { 236 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 237 rc = DDI_SUCCESS; 238 } else if (minor > DLD_MAX_MINOR && 239 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 240 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 241 rc = DDI_SUCCESS; 242 } 243 break; 244 } 245 return (rc); 246 } 247 248 /* 249 * qi_qopen: open(9e) 250 */ 251 /*ARGSUSED*/ 252 int 253 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 254 { 255 dld_str_t *dsp; 256 major_t major; 257 minor_t minor; 258 int err; 259 260 if (sflag == MODOPEN) 261 return (ENOTSUP); 262 263 /* 264 * This is a cloning driver and therefore each queue should only 265 * ever get opened once. 266 */ 267 if (rq->q_ptr != NULL) 268 return (EBUSY); 269 270 major = getmajor(*devp); 271 minor = getminor(*devp); 272 if (minor > DLD_MAX_MINOR) 273 return (ENODEV); 274 275 /* 276 * Create a new dld_str_t for the stream. This will grab a new minor 277 * number that will be handed back in the cloned dev_t. Creation may 278 * fail if we can't allocate the dummy mblk used for flow-control. 279 */ 280 dsp = dld_str_create(rq, DLD_DLPI, major, 281 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 282 if (dsp == NULL) 283 return (ENOSR); 284 285 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 286 if (minor != 0) { 287 /* 288 * Style 1 open 289 */ 290 291 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 292 goto failed; 293 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 294 } else { 295 (void) qassociate(rq, -1); 296 } 297 298 /* 299 * Enable the queue srv(9e) routine. 300 */ 301 qprocson(rq); 302 303 /* 304 * Construct a cloned dev_t to hand back. 305 */ 306 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 307 return (0); 308 309 failed: 310 dld_str_destroy(dsp); 311 return (err); 312 } 313 314 /* 315 * qi_qclose: close(9e) 316 */ 317 int 318 dld_close(queue_t *rq) 319 { 320 dld_str_t *dsp = rq->q_ptr; 321 322 /* 323 * Wait until pending requests are processed. 324 */ 325 mutex_enter(&dsp->ds_thr_lock); 326 while (dsp->ds_pending_cnt > 0) 327 cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); 328 mutex_exit(&dsp->ds_thr_lock); 329 330 /* 331 * Disable the queue srv(9e) routine. 332 */ 333 qprocsoff(rq); 334 335 /* 336 * At this point we can not be entered by any threads via STREAMS 337 * or the direct call interface, which is available only to IP. 338 * After the interface is unplumbed, IP wouldn't have any reference 339 * to this instance, and therefore we are now effectively single 340 * threaded and don't require any lock protection. Flush all 341 * pending packets which are sitting in the transmit queue. 342 */ 343 ASSERT(dsp->ds_thr == 0); 344 dld_tx_flush(dsp); 345 346 /* 347 * This stream was open to a provider node. Check to see 348 * if it has been cleanly shut down. 349 */ 350 if (dsp->ds_dlstate != DL_UNATTACHED) { 351 /* 352 * The stream is either open to a style 1 provider or 353 * this is not clean shutdown. Detach from the PPA. 354 * (This is still ok even in the style 1 case). 355 */ 356 dld_str_detach(dsp); 357 } 358 359 dld_str_destroy(dsp); 360 return (0); 361 } 362 363 /* 364 * qi_qputp: put(9e) 365 */ 366 void 367 dld_wput(queue_t *wq, mblk_t *mp) 368 { 369 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 370 371 DLD_ENTER(dsp); 372 373 switch (DB_TYPE(mp)) { 374 case M_DATA: 375 rw_enter(&dsp->ds_lock, RW_READER); 376 if (dsp->ds_dlstate != DL_IDLE || 377 dsp->ds_mode == DLD_UNITDATA) { 378 freemsg(mp); 379 } else if (dsp->ds_mode == DLD_FASTPATH) { 380 str_mdata_fastpath_put(dsp, mp); 381 } else if (dsp->ds_mode == DLD_RAW) { 382 str_mdata_raw_put(dsp, mp); 383 } 384 rw_exit(&dsp->ds_lock); 385 break; 386 case M_PROTO: 387 case M_PCPROTO: 388 dld_proto(dsp, mp); 389 break; 390 case M_IOCTL: 391 dld_ioc(dsp, mp); 392 break; 393 case M_FLUSH: 394 if (*mp->b_rptr & FLUSHW) { 395 dld_tx_flush(dsp); 396 *mp->b_rptr &= ~FLUSHW; 397 } 398 399 if (*mp->b_rptr & FLUSHR) { 400 qreply(wq, mp); 401 } else { 402 freemsg(mp); 403 } 404 break; 405 default: 406 freemsg(mp); 407 break; 408 } 409 410 DLD_EXIT(dsp); 411 } 412 413 /* 414 * qi_srvp: srv(9e) 415 */ 416 void 417 dld_wsrv(queue_t *wq) 418 { 419 mblk_t *mp; 420 dld_str_t *dsp = wq->q_ptr; 421 422 DLD_ENTER(dsp); 423 rw_enter(&dsp->ds_lock, RW_READER); 424 /* 425 * Grab all packets (chained via b_next) off our transmit queue 426 * and try to send them all to the MAC layer. Since the queue 427 * is independent of streams, we are able to dequeue all messages 428 * at once without looping through getq() and manually chaining 429 * them. Note that the queue size parameters (byte and message 430 * counts) are cleared as well, but we postpone the backenabling 431 * until after the MAC transmit since some packets may end up 432 * back at our transmit queue. 433 */ 434 mutex_enter(&dsp->ds_tx_list_lock); 435 if ((mp = dsp->ds_tx_list_head) == NULL) { 436 ASSERT(!dsp->ds_tx_qbusy); 437 ASSERT(dsp->ds_tx_flow_mp != NULL); 438 ASSERT(dsp->ds_tx_list_head == NULL); 439 ASSERT(dsp->ds_tx_list_tail == NULL); 440 ASSERT(dsp->ds_tx_cnt == 0); 441 ASSERT(dsp->ds_tx_msgcnt == 0); 442 mutex_exit(&dsp->ds_tx_list_lock); 443 rw_exit(&dsp->ds_lock); 444 DLD_EXIT(dsp); 445 return; 446 } 447 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 448 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 449 mutex_exit(&dsp->ds_tx_list_lock); 450 451 /* 452 * Discard packets unless we are attached and bound; note that 453 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 454 * because regardless of the mode all transmit will end up in 455 * str_mdata_fastpath_put() where the packets may be queued. 456 */ 457 ASSERT(DB_TYPE(mp) == M_DATA); 458 if (dsp->ds_dlstate != DL_IDLE) { 459 freemsgchain(mp); 460 goto done; 461 } 462 463 /* 464 * Attempt to transmit one or more packets. If the MAC can't 465 * send them all, re-queue the packet(s) at the beginning of 466 * the transmit queue to avoid any re-ordering. 467 */ 468 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 469 dld_tx_enqueue(dsp, mp, B_TRUE); 470 471 done: 472 /* 473 * Grab the list lock again and check if the transmit queue is 474 * really empty; if so, lift up flow-control and backenable any 475 * writer queues. If the queue is not empty, schedule service 476 * thread to drain it. 477 */ 478 mutex_enter(&dsp->ds_tx_list_lock); 479 if (dsp->ds_tx_list_head == NULL) { 480 dsp->ds_tx_flow_mp = getq(wq); 481 ASSERT(dsp->ds_tx_flow_mp != NULL); 482 dsp->ds_tx_qbusy = B_FALSE; 483 } 484 mutex_exit(&dsp->ds_tx_list_lock); 485 486 rw_exit(&dsp->ds_lock); 487 DLD_EXIT(dsp); 488 } 489 490 void 491 dld_init_ops(struct dev_ops *ops, const char *name) 492 { 493 struct streamtab *stream; 494 struct qinit *rq, *wq; 495 struct module_info *modinfo; 496 497 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 498 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 499 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 500 modinfo->mi_minpsz = 0; 501 modinfo->mi_maxpsz = 64*1024; 502 modinfo->mi_hiwat = 1; 503 modinfo->mi_lowat = 0; 504 505 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 506 rq->qi_qopen = dld_open; 507 rq->qi_qclose = dld_close; 508 rq->qi_minfo = modinfo; 509 510 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 511 wq->qi_putp = (pfi_t)dld_wput; 512 wq->qi_srvp = (pfi_t)dld_wsrv; 513 wq->qi_minfo = modinfo; 514 515 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 516 stream->st_rdinit = rq; 517 stream->st_wrinit = wq; 518 ops->devo_cb_ops->cb_str = stream; 519 520 ops->devo_getinfo = &dld_getinfo; 521 } 522 523 void 524 dld_fini_ops(struct dev_ops *ops) 525 { 526 struct streamtab *stream; 527 struct qinit *rq, *wq; 528 struct module_info *modinfo; 529 530 stream = ops->devo_cb_ops->cb_str; 531 rq = stream->st_rdinit; 532 wq = stream->st_wrinit; 533 modinfo = rq->qi_minfo; 534 ASSERT(wq->qi_minfo == modinfo); 535 536 kmem_free(stream, sizeof (struct streamtab)); 537 kmem_free(wq, sizeof (struct qinit)); 538 kmem_free(rq, sizeof (struct qinit)); 539 kmem_free(modinfo->mi_idname, FMNAMESZ); 540 kmem_free(modinfo, sizeof (struct module_info)); 541 } 542 543 /* 544 * Initialize this module's data structures. 545 */ 546 void 547 dld_str_init(void) 548 { 549 /* 550 * Create dld_str_t object cache. 551 */ 552 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 553 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 554 ASSERT(str_cachep != NULL); 555 556 /* 557 * Allocate a vmem arena to manage minor numbers. The range of the 558 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 559 * minor number). 560 */ 561 minor_arenap = vmem_create("dld_minor_arena", 562 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 563 VM_SLEEP | VMC_IDENTIFIER); 564 ASSERT(minor_arenap != NULL); 565 566 /* 567 * Create a hash table for maintaining dld_str_t's. 568 * The ds_minor field (the clone minor number) of a dld_str_t 569 * is used as a key for this hash table because this number is 570 * globally unique (allocated from "dld_minor_arena"). 571 */ 572 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 573 mod_hash_null_valdtor); 574 } 575 576 /* 577 * Tear down this module's data structures. 578 */ 579 int 580 dld_str_fini(void) 581 { 582 /* 583 * Make sure that there are no objects in use. 584 */ 585 if (str_count != 0) 586 return (EBUSY); 587 588 /* 589 * Check to see if there are any minor numbers still in use. 590 */ 591 if (minor_count != 0) 592 return (EBUSY); 593 594 /* 595 * Destroy object cache. 596 */ 597 kmem_cache_destroy(str_cachep); 598 vmem_destroy(minor_arenap); 599 mod_hash_destroy_idhash(str_hashp); 600 return (0); 601 } 602 603 /* 604 * Create a new dld_str_t object. 605 */ 606 dld_str_t * 607 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 608 { 609 dld_str_t *dsp; 610 int err; 611 612 /* 613 * Allocate an object from the cache. 614 */ 615 atomic_add_32(&str_count, 1); 616 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 617 618 /* 619 * Allocate the dummy mblk for flow-control. 620 */ 621 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 622 if (dsp->ds_tx_flow_mp == NULL) { 623 kmem_cache_free(str_cachep, dsp); 624 atomic_add_32(&str_count, -1); 625 return (NULL); 626 } 627 dsp->ds_type = type; 628 dsp->ds_major = major; 629 dsp->ds_style = style; 630 631 /* 632 * Initialize the queue pointers. 633 */ 634 ASSERT(RD(rq) == rq); 635 dsp->ds_rq = rq; 636 dsp->ds_wq = WR(rq); 637 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 638 639 /* 640 * We want explicit control over our write-side STREAMS queue 641 * where the dummy mblk gets added/removed for flow-control. 642 */ 643 noenable(WR(rq)); 644 645 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 646 (mod_hash_val_t)dsp); 647 ASSERT(err == 0); 648 return (dsp); 649 } 650 651 /* 652 * Destroy a dld_str_t object. 653 */ 654 void 655 dld_str_destroy(dld_str_t *dsp) 656 { 657 queue_t *rq; 658 queue_t *wq; 659 mod_hash_val_t val; 660 /* 661 * Clear the queue pointers. 662 */ 663 rq = dsp->ds_rq; 664 wq = dsp->ds_wq; 665 ASSERT(wq == WR(rq)); 666 667 rq->q_ptr = wq->q_ptr = NULL; 668 dsp->ds_rq = dsp->ds_wq = NULL; 669 670 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 671 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 672 ASSERT(dsp->ds_tx_list_head == NULL); 673 ASSERT(dsp->ds_tx_list_tail == NULL); 674 ASSERT(dsp->ds_tx_cnt == 0); 675 ASSERT(dsp->ds_tx_msgcnt == 0); 676 ASSERT(!dsp->ds_tx_qbusy); 677 678 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 679 ASSERT(dsp->ds_thr == 0); 680 ASSERT(dsp->ds_pending_req == NULL); 681 682 /* 683 * Reinitialize all the flags. 684 */ 685 dsp->ds_notifications = 0; 686 dsp->ds_passivestate = DLD_UNINITIALIZED; 687 dsp->ds_mode = DLD_UNITDATA; 688 689 /* 690 * Free the dummy mblk if exists. 691 */ 692 if (dsp->ds_tx_flow_mp != NULL) { 693 freeb(dsp->ds_tx_flow_mp); 694 dsp->ds_tx_flow_mp = NULL; 695 } 696 697 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 698 ASSERT(dsp == (dld_str_t *)val); 699 700 /* 701 * Free the object back to the cache. 702 */ 703 kmem_cache_free(str_cachep, dsp); 704 atomic_add_32(&str_count, -1); 705 } 706 707 /* 708 * kmem_cache contructor function: see kmem_cache_create(9f). 709 */ 710 /*ARGSUSED*/ 711 static int 712 str_constructor(void *buf, void *cdrarg, int kmflags) 713 { 714 dld_str_t *dsp = buf; 715 716 bzero(buf, sizeof (dld_str_t)); 717 718 /* 719 * Allocate a new minor number. 720 */ 721 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 722 return (-1); 723 724 /* 725 * Initialize the DLPI state machine. 726 */ 727 dsp->ds_dlstate = DL_UNATTACHED; 728 dsp->ds_ppa = (t_uscalar_t)-1; 729 730 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 731 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 732 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 733 cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); 734 735 return (0); 736 } 737 738 /* 739 * kmem_cache destructor function. 740 */ 741 /*ARGSUSED*/ 742 static void 743 str_destructor(void *buf, void *cdrarg) 744 { 745 dld_str_t *dsp = buf; 746 747 /* 748 * Make sure the DLPI state machine was reset. 749 */ 750 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 751 752 /* 753 * Make sure the data-link interface was closed. 754 */ 755 ASSERT(dsp->ds_mh == NULL); 756 ASSERT(dsp->ds_dc == NULL); 757 758 /* 759 * Make sure enabled notifications are cleared. 760 */ 761 ASSERT(dsp->ds_notifications == 0); 762 763 /* 764 * Make sure polling is disabled. 765 */ 766 ASSERT(!dsp->ds_polling); 767 768 /* 769 * Release the minor number. 770 */ 771 dld_minor_rele(dsp->ds_minor); 772 773 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 774 rw_destroy(&dsp->ds_lock); 775 776 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 777 mutex_destroy(&dsp->ds_tx_list_lock); 778 ASSERT(dsp->ds_tx_flow_mp == NULL); 779 780 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 781 mutex_destroy(&dsp->ds_thr_lock); 782 ASSERT(dsp->ds_pending_req == NULL); 783 ASSERT(dsp->ds_pending_op == NULL); 784 ASSERT(dsp->ds_pending_cnt == 0); 785 cv_destroy(&dsp->ds_pending_cv); 786 } 787 788 /* 789 * M_DATA put (IP fast-path mode) 790 */ 791 void 792 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 793 { 794 /* 795 * This function can be called from within dld or from an upper 796 * layer protocol (currently only tcp). If we are in the busy 797 * mode enqueue the packet(s) and return. Otherwise hand them 798 * over to the MAC driver for transmission; any remaining one(s) 799 * which didn't get sent will be queued. 800 * 801 * Note here that we don't grab the list lock prior to checking 802 * the busy flag. This is okay, because a missed transition 803 * will not cause any packet reordering for any particular TCP 804 * connection (which is single-threaded). The enqueue routine 805 * will atomically set the busy flag and schedule the service 806 * thread to run; the flag is only cleared by the service thread 807 * when there is no more packet to be transmitted. 808 */ 809 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 810 dld_tx_enqueue(dsp, mp, B_FALSE); 811 } 812 813 /* 814 * M_DATA put (raw mode) 815 */ 816 void 817 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 818 { 819 mblk_t *bp, *newmp; 820 size_t size; 821 mac_header_info_t mhi; 822 823 /* 824 * Certain MAC type plugins provide an illusion for raw DLPI 825 * consumers. They pretend that the MAC layer is something that 826 * it's not for the benefit of observability tools. For example, a 827 * wifi plugin might pretend that it's Ethernet for such consumers. 828 * Here, we call into the MAC layer so that this illusion can be 829 * maintained. The plugin will optionally transform the MAC header 830 * here into something that can be passed down. The header goes 831 * from raw mode to "cooked" mode. 832 */ 833 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 834 goto discard; 835 mp = newmp; 836 837 size = MBLKL(mp); 838 839 /* 840 * Check the packet is not too big and that any remaining 841 * fragment list is composed entirely of M_DATA messages. (We 842 * know the first fragment was M_DATA otherwise we could not 843 * have got here). 844 */ 845 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 846 if (DB_TYPE(bp) != M_DATA) 847 goto discard; 848 size += MBLKL(bp); 849 } 850 851 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 852 goto discard; 853 854 if (size > dsp->ds_mip->mi_sdu_max + mhi.mhi_hdrsize) 855 goto discard; 856 857 if (dsp->ds_mip->mi_media == DL_ETHER && mhi.mhi_origsap == VLAN_TPID) { 858 struct ether_vlan_header *evhp; 859 860 if (size < sizeof (struct ether_vlan_header)) 861 goto discard; 862 /* 863 * Replace vtag with our own 864 */ 865 evhp = (struct ether_vlan_header *)mp->b_rptr; 866 evhp->ether_tci = htons(VLAN_TCI(dsp->ds_pri, 867 ETHER_CFI, dsp->ds_vid)); 868 } 869 870 str_mdata_fastpath_put(dsp, mp); 871 return; 872 873 discard: 874 freemsg(mp); 875 } 876 877 /* 878 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 879 */ 880 int 881 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 882 { 883 int err; 884 const char *drvname; 885 char name[MAXNAMELEN]; 886 dls_channel_t dc; 887 uint_t addr_length; 888 889 ASSERT(dsp->ds_dc == NULL); 890 891 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 892 return (EINVAL); 893 894 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 895 896 if (strcmp(drvname, "aggr") != 0 && 897 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 898 return (EINVAL); 899 900 /* 901 * Open a channel. 902 */ 903 if ((err = dls_open(name, &dc)) != 0) { 904 (void) qassociate(dsp->ds_wq, -1); 905 return (err); 906 } 907 908 /* 909 * Cache the MAC interface handle, a pointer to the immutable MAC 910 * information and the current and 'factory' MAC address. 911 */ 912 dsp->ds_mh = dls_mac(dc); 913 dsp->ds_mip = mac_info(dsp->ds_mh); 914 915 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 916 917 addr_length = dsp->ds_mip->mi_addr_length; 918 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 919 920 /* 921 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 922 * a non-VLAN interface). 923 */ 924 dsp->ds_vid = dls_vid(dc); 925 926 /* 927 * Set the default packet priority. 928 */ 929 dsp->ds_pri = 0; 930 931 /* 932 * Add a notify function so that the we get updates from the MAC. 933 */ 934 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 935 936 dsp->ds_ppa = ppa; 937 dsp->ds_dc = dc; 938 dsp->ds_dlstate = DL_UNBOUND; 939 940 return (0); 941 } 942 943 /* 944 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 945 * from close(2) for style 2. 946 */ 947 void 948 dld_str_detach(dld_str_t *dsp) 949 { 950 ASSERT(dsp->ds_thr == 0); 951 952 /* 953 * Remove the notify function. 954 */ 955 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 956 957 /* 958 * Clear the polling and promisc flags. 959 */ 960 dsp->ds_polling = B_FALSE; 961 dsp->ds_soft_ring = B_FALSE; 962 dsp->ds_promisc = 0; 963 964 /* 965 * Close the channel. 966 */ 967 dls_close(dsp->ds_dc); 968 dsp->ds_ppa = (t_uscalar_t)-1; 969 dsp->ds_dc = NULL; 970 dsp->ds_mh = NULL; 971 972 (void) qassociate(dsp->ds_wq, -1); 973 974 /* 975 * Re-initialize the DLPI state machine. 976 */ 977 dsp->ds_dlstate = DL_UNATTACHED; 978 979 } 980 981 /* 982 * Raw mode receive function. 983 */ 984 /*ARGSUSED*/ 985 void 986 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 987 size_t header_length) 988 { 989 dld_str_t *dsp = (dld_str_t *)arg; 990 mblk_t *next, *newmp; 991 992 ASSERT(mp != NULL); 993 do { 994 /* 995 * Get the pointer to the next packet in the chain and then 996 * clear b_next before the packet gets passed on. 997 */ 998 next = mp->b_next; 999 mp->b_next = NULL; 1000 1001 /* 1002 * Wind back b_rptr to point at the MAC header. 1003 */ 1004 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 1005 mp->b_rptr -= header_length; 1006 1007 /* 1008 * Certain MAC type plugins provide an illusion for raw 1009 * DLPI consumers. They pretend that the MAC layer is 1010 * something that it's not for the benefit of observability 1011 * tools. For example, a wifi plugin might pretend that 1012 * it's Ethernet for such consumers. Here, we call into 1013 * the MAC layer so that this illusion can be maintained. 1014 * The plugin will optionally transform the MAC header here 1015 * into something that can be passed up to raw consumers. 1016 * The header goes from "cooked" mode to raw mode. 1017 */ 1018 if ((newmp = mac_header_uncook(dsp->ds_mh, mp)) == NULL) { 1019 freemsg(mp); 1020 mp = next; 1021 continue; 1022 } 1023 mp = newmp; 1024 1025 if (dsp->ds_mip->mi_media == DL_ETHER) { 1026 struct ether_header *ehp = 1027 (struct ether_header *)mp->b_rptr; 1028 1029 if (ntohs(ehp->ether_type) == VLAN_TPID) { 1030 /* 1031 * Strip off the vtag 1032 */ 1033 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 1034 2 * ETHERADDRL); 1035 mp->b_rptr += VLAN_TAGSZ; 1036 } 1037 } 1038 /* 1039 * Pass the packet on. 1040 */ 1041 if (canputnext(dsp->ds_rq)) 1042 putnext(dsp->ds_rq, mp); 1043 else 1044 freemsg(mp); 1045 1046 /* 1047 * Move on to the next packet in the chain. 1048 */ 1049 mp = next; 1050 } while (mp != NULL); 1051 } 1052 1053 /* 1054 * Fast-path receive function. 1055 */ 1056 /*ARGSUSED*/ 1057 void 1058 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1059 size_t header_length) 1060 { 1061 dld_str_t *dsp = (dld_str_t *)arg; 1062 mblk_t *next; 1063 1064 ASSERT(mp != NULL); 1065 do { 1066 /* 1067 * Get the pointer to the next packet in the chain and then 1068 * clear b_next before the packet gets passed on. 1069 */ 1070 next = mp->b_next; 1071 mp->b_next = NULL; 1072 1073 /* 1074 * Pass the packet on. 1075 */ 1076 if (canputnext(dsp->ds_rq)) 1077 putnext(dsp->ds_rq, mp); 1078 else 1079 freemsg(mp); 1080 /* 1081 * Move on to the next packet in the chain. 1082 */ 1083 mp = next; 1084 } while (mp != NULL); 1085 } 1086 1087 /* 1088 * Default receive function (send DL_UNITDATA_IND messages). 1089 */ 1090 /*ARGSUSED*/ 1091 void 1092 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1093 size_t header_length) 1094 { 1095 dld_str_t *dsp = (dld_str_t *)arg; 1096 mblk_t *ud_mp; 1097 mblk_t *next; 1098 1099 ASSERT(mp != NULL); 1100 do { 1101 /* 1102 * Get the pointer to the next packet in the chain and then 1103 * clear b_next before the packet gets passed on. 1104 */ 1105 next = mp->b_next; 1106 mp->b_next = NULL; 1107 1108 /* 1109 * Wind back b_rptr to point at the MAC header. 1110 */ 1111 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 1112 mp->b_rptr -= header_length; 1113 1114 /* 1115 * Create the DL_UNITDATA_IND M_PROTO. 1116 */ 1117 if ((ud_mp = str_unitdata_ind(dsp, mp)) == NULL) { 1118 freemsgchain(mp); 1119 return; 1120 } 1121 1122 /* 1123 * Advance b_rptr to point at the payload again. 1124 */ 1125 mp->b_rptr += header_length; 1126 1127 /* 1128 * Prepend the DL_UNITDATA_IND. 1129 */ 1130 ud_mp->b_cont = mp; 1131 1132 /* 1133 * Send the message. 1134 */ 1135 if (canputnext(dsp->ds_rq)) 1136 putnext(dsp->ds_rq, ud_mp); 1137 else 1138 freemsg(ud_mp); 1139 1140 /* 1141 * Move on to the next packet in the chain. 1142 */ 1143 mp = next; 1144 } while (mp != NULL); 1145 } 1146 1147 /* 1148 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1149 * current state of the interface. 1150 */ 1151 void 1152 dld_str_notify_ind(dld_str_t *dsp) 1153 { 1154 mac_notify_type_t type; 1155 1156 for (type = 0; type < MAC_NNOTE; type++) 1157 str_notify(dsp, type); 1158 } 1159 1160 typedef struct dl_unitdata_ind_wrapper { 1161 dl_unitdata_ind_t dl_unitdata; 1162 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1163 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1164 } dl_unitdata_ind_wrapper_t; 1165 1166 /* 1167 * Create a DL_UNITDATA_IND M_PROTO message. 1168 */ 1169 static mblk_t * 1170 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp) 1171 { 1172 mblk_t *nmp; 1173 dl_unitdata_ind_wrapper_t *dlwp; 1174 dl_unitdata_ind_t *dlp; 1175 mac_header_info_t mhi; 1176 uint_t addr_length; 1177 uint8_t *daddr; 1178 uint8_t *saddr; 1179 1180 /* 1181 * Get the packet header information. 1182 */ 1183 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 1184 return (NULL); 1185 1186 /* 1187 * Allocate a message large enough to contain the wrapper structure 1188 * defined above. 1189 */ 1190 if ((nmp = mexchange(dsp->ds_wq, NULL, 1191 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1192 DL_UNITDATA_IND)) == NULL) 1193 return (NULL); 1194 1195 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1196 1197 dlp = &(dlwp->dl_unitdata); 1198 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1199 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1200 1201 /* 1202 * Copy in the destination address. 1203 */ 1204 addr_length = dsp->ds_mip->mi_addr_length; 1205 daddr = dlwp->dl_dest_addr; 1206 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1207 bcopy(mhi.mhi_daddr, daddr, addr_length); 1208 1209 /* 1210 * Set the destination DLSAP to our bound DLSAP value. 1211 */ 1212 *(uint16_t *)(daddr + addr_length) = dsp->ds_sap; 1213 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1214 1215 /* 1216 * If the destination address was multicast or broadcast then the 1217 * dl_group_address field should be non-zero. 1218 */ 1219 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1220 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1221 1222 /* 1223 * Copy in the source address if one exists. Some MAC types (DL_IB 1224 * for example) may not have access to source information. 1225 */ 1226 if (mhi.mhi_saddr == NULL) { 1227 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1228 } else { 1229 saddr = dlwp->dl_src_addr; 1230 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1231 bcopy(mhi.mhi_saddr, saddr, addr_length); 1232 1233 /* 1234 * Set the source DLSAP to the packet ethertype. 1235 */ 1236 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1237 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1238 } 1239 1240 return (nmp); 1241 } 1242 1243 /* 1244 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1245 */ 1246 static void 1247 str_notify_promisc_on_phys(dld_str_t *dsp) 1248 { 1249 mblk_t *mp; 1250 dl_notify_ind_t *dlip; 1251 1252 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1253 return; 1254 1255 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1256 M_PROTO, 0)) == NULL) 1257 return; 1258 1259 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1260 dlip = (dl_notify_ind_t *)mp->b_rptr; 1261 dlip->dl_primitive = DL_NOTIFY_IND; 1262 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1263 1264 qreply(dsp->ds_wq, mp); 1265 } 1266 1267 /* 1268 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1269 */ 1270 static void 1271 str_notify_promisc_off_phys(dld_str_t *dsp) 1272 { 1273 mblk_t *mp; 1274 dl_notify_ind_t *dlip; 1275 1276 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1277 return; 1278 1279 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1280 M_PROTO, 0)) == NULL) 1281 return; 1282 1283 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1284 dlip = (dl_notify_ind_t *)mp->b_rptr; 1285 dlip->dl_primitive = DL_NOTIFY_IND; 1286 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1287 1288 qreply(dsp->ds_wq, mp); 1289 } 1290 1291 /* 1292 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1293 */ 1294 static void 1295 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1296 { 1297 mblk_t *mp; 1298 dl_notify_ind_t *dlip; 1299 uint_t addr_length; 1300 uint16_t ethertype; 1301 1302 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1303 return; 1304 1305 addr_length = dsp->ds_mip->mi_addr_length; 1306 if ((mp = mexchange(dsp->ds_wq, NULL, 1307 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1308 M_PROTO, 0)) == NULL) 1309 return; 1310 1311 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1312 dlip = (dl_notify_ind_t *)mp->b_rptr; 1313 dlip->dl_primitive = DL_NOTIFY_IND; 1314 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1315 dlip->dl_data = DL_CURR_PHYS_ADDR; 1316 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1317 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1318 1319 bcopy(addr, &dlip[1], addr_length); 1320 1321 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1322 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1323 ethertype; 1324 1325 qreply(dsp->ds_wq, mp); 1326 } 1327 1328 /* 1329 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1330 */ 1331 static void 1332 str_notify_link_up(dld_str_t *dsp) 1333 { 1334 mblk_t *mp; 1335 dl_notify_ind_t *dlip; 1336 1337 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1338 return; 1339 1340 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1341 M_PROTO, 0)) == NULL) 1342 return; 1343 1344 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1345 dlip = (dl_notify_ind_t *)mp->b_rptr; 1346 dlip->dl_primitive = DL_NOTIFY_IND; 1347 dlip->dl_notification = DL_NOTE_LINK_UP; 1348 1349 qreply(dsp->ds_wq, mp); 1350 } 1351 1352 /* 1353 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1354 */ 1355 static void 1356 str_notify_link_down(dld_str_t *dsp) 1357 { 1358 mblk_t *mp; 1359 dl_notify_ind_t *dlip; 1360 1361 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1362 return; 1363 1364 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1365 M_PROTO, 0)) == NULL) 1366 return; 1367 1368 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1369 dlip = (dl_notify_ind_t *)mp->b_rptr; 1370 dlip->dl_primitive = DL_NOTIFY_IND; 1371 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1372 1373 qreply(dsp->ds_wq, mp); 1374 } 1375 1376 /* 1377 * DL_NOTIFY_IND: DL_NOTE_SPEED 1378 */ 1379 static void 1380 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1381 { 1382 mblk_t *mp; 1383 dl_notify_ind_t *dlip; 1384 1385 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1386 return; 1387 1388 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1389 M_PROTO, 0)) == NULL) 1390 return; 1391 1392 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1393 dlip = (dl_notify_ind_t *)mp->b_rptr; 1394 dlip->dl_primitive = DL_NOTIFY_IND; 1395 dlip->dl_notification = DL_NOTE_SPEED; 1396 dlip->dl_data = speed; 1397 1398 qreply(dsp->ds_wq, mp); 1399 } 1400 1401 /* 1402 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1403 */ 1404 static void 1405 str_notify_capab_reneg(dld_str_t *dsp) 1406 { 1407 mblk_t *mp; 1408 dl_notify_ind_t *dlip; 1409 1410 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1411 return; 1412 1413 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1414 M_PROTO, 0)) == NULL) 1415 return; 1416 1417 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1418 dlip = (dl_notify_ind_t *)mp->b_rptr; 1419 dlip->dl_primitive = DL_NOTIFY_IND; 1420 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1421 1422 qreply(dsp->ds_wq, mp); 1423 } 1424 1425 /* 1426 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1427 */ 1428 static void 1429 str_notify_fastpath_flush(dld_str_t *dsp) 1430 { 1431 mblk_t *mp; 1432 dl_notify_ind_t *dlip; 1433 1434 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1435 return; 1436 1437 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1438 M_PROTO, 0)) == NULL) 1439 return; 1440 1441 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1442 dlip = (dl_notify_ind_t *)mp->b_rptr; 1443 dlip->dl_primitive = DL_NOTIFY_IND; 1444 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1445 1446 qreply(dsp->ds_wq, mp); 1447 } 1448 1449 /* 1450 * MAC notification callback. 1451 */ 1452 static void 1453 str_notify(void *arg, mac_notify_type_t type) 1454 { 1455 dld_str_t *dsp = (dld_str_t *)arg; 1456 queue_t *q = dsp->ds_wq; 1457 1458 switch (type) { 1459 case MAC_NOTE_TX: 1460 qenable(q); 1461 break; 1462 1463 case MAC_NOTE_DEVPROMISC: 1464 /* 1465 * Send the appropriate DL_NOTIFY_IND. 1466 */ 1467 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1468 str_notify_promisc_on_phys(dsp); 1469 else 1470 str_notify_promisc_off_phys(dsp); 1471 break; 1472 1473 case MAC_NOTE_PROMISC: 1474 break; 1475 1476 case MAC_NOTE_UNICST: 1477 /* 1478 * This notification is sent whenever the MAC unicast address 1479 * changes. We need to re-cache the address. 1480 */ 1481 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1482 1483 /* 1484 * Send the appropriate DL_NOTIFY_IND. 1485 */ 1486 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1487 break; 1488 1489 case MAC_NOTE_LINK: 1490 /* 1491 * This notification is sent every time the MAC driver 1492 * updates the link state. 1493 */ 1494 switch (mac_link_get(dsp->ds_mh)) { 1495 case LINK_STATE_UP: { 1496 uint64_t speed; 1497 /* 1498 * The link is up so send the appropriate 1499 * DL_NOTIFY_IND. 1500 */ 1501 str_notify_link_up(dsp); 1502 1503 speed = mac_stat_get(dsp->ds_mh, MAC_STAT_IFSPEED); 1504 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1505 break; 1506 } 1507 case LINK_STATE_DOWN: 1508 /* 1509 * The link is down so send the appropriate 1510 * DL_NOTIFY_IND. 1511 */ 1512 str_notify_link_down(dsp); 1513 break; 1514 1515 default: 1516 break; 1517 } 1518 break; 1519 1520 case MAC_NOTE_RESOURCE: 1521 /* 1522 * This notification is sent whenever the MAC resources 1523 * change. We need to renegotiate the capabilities. 1524 * Send the appropriate DL_NOTIFY_IND. 1525 */ 1526 str_notify_capab_reneg(dsp); 1527 break; 1528 1529 case MAC_NOTE_FASTPATH_FLUSH: 1530 str_notify_fastpath_flush(dsp); 1531 break; 1532 1533 default: 1534 ASSERT(B_FALSE); 1535 break; 1536 } 1537 } 1538 1539 /* 1540 * Enqueue one or more messages to the transmit queue. 1541 * Caller specifies the insertion position (head/tail). 1542 */ 1543 void 1544 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1545 { 1546 mblk_t *tail; 1547 queue_t *q = dsp->ds_wq; 1548 uint_t cnt, msgcnt; 1549 uint_t tot_cnt, tot_msgcnt; 1550 1551 ASSERT(DB_TYPE(mp) == M_DATA); 1552 /* Calculate total size and count of the packet(s) */ 1553 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1554 tail->b_next != NULL; tail = tail->b_next) { 1555 ASSERT(DB_TYPE(tail->b_next) == M_DATA); 1556 cnt += msgdsize(tail->b_next); 1557 msgcnt++; 1558 } 1559 1560 mutex_enter(&dsp->ds_tx_list_lock); 1561 /* 1562 * If the queue depth would exceed the allowed threshold, drop 1563 * new packet(s) and drain those already in the queue. 1564 */ 1565 tot_cnt = dsp->ds_tx_cnt + cnt; 1566 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1567 1568 if (!head_insert && 1569 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1570 ASSERT(dsp->ds_tx_qbusy); 1571 mutex_exit(&dsp->ds_tx_list_lock); 1572 freemsgchain(mp); 1573 goto done; 1574 } 1575 1576 /* Update the queue size parameters */ 1577 dsp->ds_tx_cnt = tot_cnt; 1578 dsp->ds_tx_msgcnt = tot_msgcnt; 1579 1580 /* 1581 * If the transmit queue is currently empty and we are 1582 * about to deposit the packet(s) there, switch mode to 1583 * "busy" and raise flow-control condition. 1584 */ 1585 if (!dsp->ds_tx_qbusy) { 1586 dsp->ds_tx_qbusy = B_TRUE; 1587 ASSERT(dsp->ds_tx_flow_mp != NULL); 1588 (void) putq(q, dsp->ds_tx_flow_mp); 1589 dsp->ds_tx_flow_mp = NULL; 1590 } 1591 1592 if (!head_insert) { 1593 /* Tail insertion */ 1594 if (dsp->ds_tx_list_head == NULL) 1595 dsp->ds_tx_list_head = mp; 1596 else 1597 dsp->ds_tx_list_tail->b_next = mp; 1598 dsp->ds_tx_list_tail = tail; 1599 } else { 1600 /* Head insertion */ 1601 tail->b_next = dsp->ds_tx_list_head; 1602 if (dsp->ds_tx_list_head == NULL) 1603 dsp->ds_tx_list_tail = tail; 1604 dsp->ds_tx_list_head = mp; 1605 } 1606 mutex_exit(&dsp->ds_tx_list_lock); 1607 done: 1608 /* Schedule service thread to drain the transmit queue */ 1609 qenable(q); 1610 } 1611 1612 void 1613 dld_tx_flush(dld_str_t *dsp) 1614 { 1615 mutex_enter(&dsp->ds_tx_list_lock); 1616 if (dsp->ds_tx_list_head != NULL) { 1617 freemsgchain(dsp->ds_tx_list_head); 1618 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1619 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1620 if (dsp->ds_tx_qbusy) { 1621 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1622 ASSERT(dsp->ds_tx_flow_mp != NULL); 1623 dsp->ds_tx_qbusy = B_FALSE; 1624 } 1625 } 1626 mutex_exit(&dsp->ds_tx_list_lock); 1627 } 1628 1629 /* 1630 * Process an M_IOCTL message. 1631 */ 1632 static void 1633 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1634 { 1635 uint_t cmd; 1636 1637 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1638 ASSERT(dsp->ds_type == DLD_DLPI); 1639 1640 switch (cmd) { 1641 case DLIOCRAW: 1642 ioc_raw(dsp, mp); 1643 break; 1644 case DLIOCHDRINFO: 1645 ioc_fast(dsp, mp); 1646 break; 1647 default: 1648 ioc(dsp, mp); 1649 } 1650 } 1651 1652 /* 1653 * DLIOCRAW 1654 */ 1655 static void 1656 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1657 { 1658 queue_t *q = dsp->ds_wq; 1659 1660 rw_enter(&dsp->ds_lock, RW_WRITER); 1661 if (dsp->ds_polling || dsp->ds_soft_ring) { 1662 rw_exit(&dsp->ds_lock); 1663 miocnak(q, mp, 0, EPROTO); 1664 return; 1665 } 1666 1667 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1668 /* 1669 * Set the receive callback. 1670 */ 1671 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1672 } 1673 1674 /* 1675 * Note that raw mode is enabled. 1676 */ 1677 dsp->ds_mode = DLD_RAW; 1678 1679 rw_exit(&dsp->ds_lock); 1680 miocack(q, mp, 0, 0); 1681 } 1682 1683 /* 1684 * DLIOCHDRINFO 1685 */ 1686 static void 1687 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1688 { 1689 dl_unitdata_req_t *dlp; 1690 off_t off; 1691 size_t len; 1692 const uint8_t *addr; 1693 uint16_t sap; 1694 mblk_t *nmp; 1695 mblk_t *hmp; 1696 uint_t addr_length; 1697 queue_t *q = dsp->ds_wq; 1698 int err; 1699 dls_channel_t dc; 1700 1701 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1702 err = ENOTSUP; 1703 goto failed; 1704 } 1705 1706 nmp = mp->b_cont; 1707 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1708 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1709 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1710 err = EINVAL; 1711 goto failed; 1712 } 1713 1714 off = dlp->dl_dest_addr_offset; 1715 len = dlp->dl_dest_addr_length; 1716 1717 if (!MBLKIN(nmp, off, len)) { 1718 err = EINVAL; 1719 goto failed; 1720 } 1721 1722 rw_enter(&dsp->ds_lock, RW_READER); 1723 if (dsp->ds_dlstate != DL_IDLE) { 1724 rw_exit(&dsp->ds_lock); 1725 err = ENOTSUP; 1726 goto failed; 1727 } 1728 1729 addr_length = dsp->ds_mip->mi_addr_length; 1730 if (len != addr_length + sizeof (uint16_t)) { 1731 rw_exit(&dsp->ds_lock); 1732 err = EINVAL; 1733 goto failed; 1734 } 1735 1736 addr = nmp->b_rptr + off; 1737 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 1738 dc = dsp->ds_dc; 1739 1740 if ((hmp = dls_header(dc, addr, sap, dsp->ds_pri, NULL)) == NULL) { 1741 rw_exit(&dsp->ds_lock); 1742 err = ENOMEM; 1743 goto failed; 1744 } 1745 1746 /* 1747 * This is a performance optimization. We originally entered 1748 * as reader and only become writer upon transitioning into 1749 * the DLD_FASTPATH mode for the first time. Otherwise we 1750 * stay as reader and return the fast-path header to IP. 1751 */ 1752 if (dsp->ds_mode != DLD_FASTPATH) { 1753 if (!rw_tryupgrade(&dsp->ds_lock)) { 1754 rw_exit(&dsp->ds_lock); 1755 rw_enter(&dsp->ds_lock, RW_WRITER); 1756 1757 /* 1758 * State may have changed before we re-acquired 1759 * the writer lock in case the upgrade failed. 1760 */ 1761 if (dsp->ds_dlstate != DL_IDLE) { 1762 rw_exit(&dsp->ds_lock); 1763 err = ENOTSUP; 1764 goto failed; 1765 } 1766 } 1767 1768 /* 1769 * Set the receive callback (unless polling is enabled). 1770 */ 1771 if (!dsp->ds_polling && !dsp->ds_soft_ring) 1772 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 1773 1774 /* 1775 * Note that fast-path mode is enabled. 1776 */ 1777 dsp->ds_mode = DLD_FASTPATH; 1778 } 1779 rw_exit(&dsp->ds_lock); 1780 1781 freemsg(nmp->b_cont); 1782 nmp->b_cont = hmp; 1783 1784 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 1785 return; 1786 failed: 1787 miocnak(q, mp, 0, err); 1788 } 1789 1790 /* 1791 * Catch-all handler. 1792 */ 1793 static void 1794 ioc(dld_str_t *dsp, mblk_t *mp) 1795 { 1796 queue_t *q = dsp->ds_wq; 1797 mac_handle_t mh; 1798 1799 rw_enter(&dsp->ds_lock, RW_READER); 1800 if (dsp->ds_dlstate == DL_UNATTACHED) { 1801 rw_exit(&dsp->ds_lock); 1802 miocnak(q, mp, 0, EINVAL); 1803 return; 1804 } 1805 mh = dsp->ds_mh; 1806 ASSERT(mh != NULL); 1807 rw_exit(&dsp->ds_lock); 1808 mac_ioctl(mh, q, mp); 1809 } 1810 1811 /* 1812 * Allocate a new minor number. 1813 */ 1814 static minor_t 1815 dld_minor_hold(boolean_t sleep) 1816 { 1817 minor_t minor; 1818 1819 /* 1820 * Grab a value from the arena. 1821 */ 1822 atomic_add_32(&minor_count, 1); 1823 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 1824 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 1825 atomic_add_32(&minor_count, -1); 1826 return (0); 1827 } 1828 1829 return (minor); 1830 } 1831 1832 /* 1833 * Release a previously allocated minor number. 1834 */ 1835 static void 1836 dld_minor_rele(minor_t minor) 1837 { 1838 /* 1839 * Return the value to the arena. 1840 */ 1841 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 1842 1843 atomic_add_32(&minor_count, -1); 1844 } 1845