xref: /titanic_44/usr/src/uts/common/io/mac/mac.c (revision 551bc2a66868b5cb5be6b70ab9f55515e77a39a9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * MAC Services Module
31  */
32 
33 #include <sys/types.h>
34 #include <sys/conf.h>
35 #include <sys/stat.h>
36 #include <sys/stream.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/dlpi.h>
40 #include <sys/modhash.h>
41 #include <sys/mac.h>
42 #include <sys/mac_impl.h>
43 #include <sys/dls.h>
44 #include <sys/dld.h>
45 #include <sys/modctl.h>
46 #include <sys/fs/dv_node.h>
47 #include <sys/thread.h>
48 #include <sys/proc.h>
49 #include <sys/callb.h>
50 #include <sys/cpuvar.h>
51 #include <sys/atomic.h>
52 #include <sys/sdt.h>
53 
54 #define	IMPL_HASHSZ	67	/* prime */
55 
56 static kmem_cache_t	*i_mac_impl_cachep;
57 static mod_hash_t	*i_mac_impl_hash;
58 krwlock_t		i_mac_impl_lock;
59 uint_t			i_mac_impl_count;
60 static kmem_cache_t	*mac_vnic_tx_cache;
61 
62 #define	MACTYPE_KMODDIR	"mac"
63 #define	MACTYPE_HASHSZ	67
64 static mod_hash_t	*i_mactype_hash;
65 /*
66  * i_mactype_lock synchronizes threads that obtain references to mactype_t
67  * structures through i_mactype_getplugin().
68  */
69 static kmutex_t		i_mactype_lock;
70 
71 static void i_mac_notify_thread(void *);
72 static mblk_t *mac_vnic_tx(void *, mblk_t *);
73 static mblk_t *mac_vnic_txloop(void *, mblk_t *);
74 
75 /*
76  * Private functions.
77  */
78 
79 /*ARGSUSED*/
80 static int
81 i_mac_constructor(void *buf, void *arg, int kmflag)
82 {
83 	mac_impl_t	*mip = buf;
84 
85 	bzero(buf, sizeof (mac_impl_t));
86 
87 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
88 
89 	rw_init(&mip->mi_state_lock, NULL, RW_DRIVER, NULL);
90 	rw_init(&mip->mi_data_lock, NULL, RW_DRIVER, NULL);
91 	rw_init(&mip->mi_notify_lock, NULL, RW_DRIVER, NULL);
92 	rw_init(&mip->mi_rx_lock, NULL, RW_DRIVER, NULL);
93 	rw_init(&mip->mi_tx_lock, NULL, RW_DRIVER, NULL);
94 	rw_init(&mip->mi_resource_lock, NULL, RW_DRIVER, NULL);
95 	mutex_init(&mip->mi_activelink_lock, NULL, MUTEX_DEFAULT, NULL);
96 	mutex_init(&mip->mi_notify_bits_lock, NULL, MUTEX_DRIVER, NULL);
97 	cv_init(&mip->mi_notify_cv, NULL, CV_DRIVER, NULL);
98 	mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
99 	cv_init(&mip->mi_rx_cv, NULL, CV_DRIVER, NULL);
100 	return (0);
101 }
102 
103 /*ARGSUSED*/
104 static void
105 i_mac_destructor(void *buf, void *arg)
106 {
107 	mac_impl_t	*mip = buf;
108 
109 	ASSERT(mip->mi_ref == 0);
110 	ASSERT(mip->mi_active == 0);
111 	ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN);
112 	ASSERT(mip->mi_devpromisc == 0);
113 	ASSERT(mip->mi_promisc == 0);
114 	ASSERT(mip->mi_mmap == NULL);
115 	ASSERT(mip->mi_mnfp == NULL);
116 	ASSERT(mip->mi_resource_add == NULL);
117 	ASSERT(mip->mi_ksp == NULL);
118 	ASSERT(mip->mi_kstat_count == 0);
119 	ASSERT(mip->mi_notify_bits == 0);
120 	ASSERT(mip->mi_notify_thread == NULL);
121 
122 	rw_destroy(&mip->mi_state_lock);
123 	rw_destroy(&mip->mi_data_lock);
124 	rw_destroy(&mip->mi_notify_lock);
125 	rw_destroy(&mip->mi_rx_lock);
126 	rw_destroy(&mip->mi_tx_lock);
127 	rw_destroy(&mip->mi_resource_lock);
128 	mutex_destroy(&mip->mi_activelink_lock);
129 	mutex_destroy(&mip->mi_notify_bits_lock);
130 	cv_destroy(&mip->mi_notify_cv);
131 	mutex_destroy(&mip->mi_lock);
132 	cv_destroy(&mip->mi_rx_cv);
133 }
134 
135 /*
136  * mac_vnic_tx_t kmem cache support functions.
137  */
138 
139 /* ARGSUSED */
140 static int
141 i_mac_vnic_tx_ctor(void *buf, void *arg, int mkflag)
142 {
143 	mac_vnic_tx_t *vnic_tx = buf;
144 
145 	bzero(buf, sizeof (mac_vnic_tx_t));
146 	mutex_init(&vnic_tx->mv_lock, NULL, MUTEX_DRIVER, NULL);
147 	cv_init(&vnic_tx->mv_cv, NULL, CV_DRIVER, NULL);
148 	return (0);
149 }
150 
151 /* ARGSUSED */
152 static void
153 i_mac_vnic_tx_dtor(void *buf, void *arg)
154 {
155 	mac_vnic_tx_t *vnic_tx = buf;
156 
157 	ASSERT(vnic_tx->mv_refs == 0);
158 	mutex_destroy(&vnic_tx->mv_lock);
159 	cv_destroy(&vnic_tx->mv_cv);
160 }
161 
162 static void
163 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type)
164 {
165 	rw_enter(&i_mac_impl_lock, RW_READER);
166 	if (mip->mi_disabled)
167 		goto exit;
168 
169 	/*
170 	 * Guard against incorrect notifications.  (Running a newer
171 	 * mac client against an older implementation?)
172 	 */
173 	if (type >= MAC_NNOTE)
174 		goto exit;
175 
176 	mutex_enter(&mip->mi_notify_bits_lock);
177 	mip->mi_notify_bits |= (1 << type);
178 	cv_broadcast(&mip->mi_notify_cv);
179 	mutex_exit(&mip->mi_notify_bits_lock);
180 
181 exit:
182 	rw_exit(&i_mac_impl_lock);
183 }
184 
185 static void
186 i_mac_log_link_state(mac_impl_t *mip)
187 {
188 	/*
189 	 * If no change, then it is not interesting.
190 	 */
191 	if (mip->mi_lastlinkstate == mip->mi_linkstate)
192 		return;
193 
194 	switch (mip->mi_linkstate) {
195 	case LINK_STATE_UP:
196 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
197 			char det[200];
198 
199 			mip->mi_type->mt_ops.mtops_link_details(det,
200 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
201 
202 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
203 		} else {
204 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
205 		}
206 		break;
207 
208 	case LINK_STATE_DOWN:
209 		/*
210 		 * Only transitions from UP to DOWN are interesting
211 		 */
212 		if (mip->mi_lastlinkstate != LINK_STATE_UNKNOWN)
213 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
214 		break;
215 
216 	case LINK_STATE_UNKNOWN:
217 		/*
218 		 * This case is normally not interesting.
219 		 */
220 		break;
221 	}
222 	mip->mi_lastlinkstate = mip->mi_linkstate;
223 }
224 
225 static void
226 i_mac_notify_thread(void *arg)
227 {
228 	mac_impl_t	*mip = arg;
229 	callb_cpr_t	cprinfo;
230 
231 	CALLB_CPR_INIT(&cprinfo, &mip->mi_notify_bits_lock, callb_generic_cpr,
232 	    "i_mac_notify_thread");
233 
234 	mutex_enter(&mip->mi_notify_bits_lock);
235 	for (;;) {
236 		uint32_t	bits;
237 		uint32_t	type;
238 
239 		bits = mip->mi_notify_bits;
240 		if (bits == 0) {
241 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
242 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
243 			CALLB_CPR_SAFE_END(&cprinfo, &mip->mi_notify_bits_lock);
244 			continue;
245 		}
246 		mip->mi_notify_bits = 0;
247 
248 		if ((bits & (1 << MAC_NNOTE)) != 0) {
249 			/* request to quit */
250 			ASSERT(mip->mi_disabled);
251 			break;
252 		}
253 
254 		mutex_exit(&mip->mi_notify_bits_lock);
255 
256 		/*
257 		 * Log link changes.
258 		 */
259 		if ((bits & (1 << MAC_NOTE_LINK)) != 0)
260 			i_mac_log_link_state(mip);
261 
262 		/*
263 		 * Do notification callbacks for each notification type.
264 		 */
265 		for (type = 0; type < MAC_NNOTE; type++) {
266 			mac_notify_fn_t	*mnfp;
267 
268 			if ((bits & (1 << type)) == 0) {
269 				continue;
270 			}
271 
272 			/*
273 			 * Walk the list of notifications.
274 			 */
275 			rw_enter(&mip->mi_notify_lock, RW_READER);
276 			for (mnfp = mip->mi_mnfp; mnfp != NULL;
277 			    mnfp = mnfp->mnf_nextp) {
278 
279 				mnfp->mnf_fn(mnfp->mnf_arg, type);
280 			}
281 			rw_exit(&mip->mi_notify_lock);
282 		}
283 
284 		mutex_enter(&mip->mi_notify_bits_lock);
285 	}
286 
287 	mip->mi_notify_thread = NULL;
288 	cv_broadcast(&mip->mi_notify_cv);
289 
290 	CALLB_CPR_EXIT(&cprinfo);
291 
292 	thread_exit();
293 }
294 
295 static mactype_t *
296 i_mactype_getplugin(const char *pname)
297 {
298 	mactype_t	*mtype = NULL;
299 	boolean_t	tried_modload = B_FALSE;
300 
301 	mutex_enter(&i_mactype_lock);
302 
303 find_registered_mactype:
304 	if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname,
305 	    (mod_hash_val_t *)&mtype) != 0) {
306 		if (!tried_modload) {
307 			/*
308 			 * If the plugin has not yet been loaded, then
309 			 * attempt to load it now.  If modload() succeeds,
310 			 * the plugin should have registered using
311 			 * mactype_register(), in which case we can go back
312 			 * and attempt to find it again.
313 			 */
314 			if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) {
315 				tried_modload = B_TRUE;
316 				goto find_registered_mactype;
317 			}
318 		}
319 	} else {
320 		/*
321 		 * Note that there's no danger that the plugin we've loaded
322 		 * could be unloaded between the modload() step and the
323 		 * reference count bump here, as we're holding
324 		 * i_mactype_lock, which mactype_unregister() also holds.
325 		 */
326 		atomic_inc_32(&mtype->mt_ref);
327 	}
328 
329 	mutex_exit(&i_mactype_lock);
330 	return (mtype);
331 }
332 
333 /*
334  * Module initialization functions.
335  */
336 
337 void
338 mac_init(void)
339 {
340 	i_mac_impl_cachep = kmem_cache_create("mac_impl_cache",
341 	    sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor,
342 	    NULL, NULL, NULL, 0);
343 	ASSERT(i_mac_impl_cachep != NULL);
344 
345 	mac_vnic_tx_cache = kmem_cache_create("mac_vnic_tx_cache",
346 	    sizeof (mac_vnic_tx_t), 0, i_mac_vnic_tx_ctor, i_mac_vnic_tx_dtor,
347 	    NULL, NULL, NULL, 0);
348 	ASSERT(mac_vnic_tx_cache != NULL);
349 
350 	i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash",
351 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
352 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
353 	rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL);
354 	i_mac_impl_count = 0;
355 
356 	i_mactype_hash = mod_hash_create_extended("mactype_hash",
357 	    MACTYPE_HASHSZ,
358 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
359 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
360 }
361 
362 int
363 mac_fini(void)
364 {
365 	if (i_mac_impl_count > 0)
366 		return (EBUSY);
367 
368 	mod_hash_destroy_hash(i_mac_impl_hash);
369 	rw_destroy(&i_mac_impl_lock);
370 
371 	kmem_cache_destroy(i_mac_impl_cachep);
372 	kmem_cache_destroy(mac_vnic_tx_cache);
373 
374 	mod_hash_destroy_hash(i_mactype_hash);
375 	return (0);
376 }
377 
378 /*
379  * Client functions.
380  */
381 
382 int
383 mac_open(const char *macname, mac_handle_t *mhp)
384 {
385 	char		driver[MAXNAMELEN];
386 	uint_t		ddi_instance;
387 	major_t		major;
388 	dev_info_t	*dip;
389 	mac_impl_t	*mip;
390 	int		err;
391 
392 	/*
393 	 * Check the device name length to make sure it won't overflow our
394 	 * buffer.
395 	 */
396 	if (strlen(macname) >= MAXNAMELEN)
397 		return (EINVAL);
398 
399 	/*
400 	 * Split the device name into driver and instance components.
401 	 */
402 	if (ddi_parse(macname, driver, &ddi_instance) != DDI_SUCCESS)
403 		return (EINVAL);
404 
405 	if ((strcmp(driver, "aggr") == 0) || (strcmp(driver, "vnic") == 0))
406 		ddi_instance = 0;
407 
408 	/*
409 	 * Get the major number of the driver.
410 	 */
411 	if ((major = ddi_name_to_major(driver)) == (major_t)-1)
412 		return (EINVAL);
413 
414 	/*
415 	 * Hold the given instance to prevent it from being detached.
416 	 * This will also attach the instance if it is not currently attached.
417 	 * Currently we ensure that mac_register() (called by the driver's
418 	 * attach entry point) and all code paths under it cannot possibly
419 	 * call mac_open() because this would lead to a recursive attach
420 	 * panic.
421 	 */
422 	if ((dip = ddi_hold_devi_by_instance(major, ddi_instance, 0)) == NULL)
423 		return (EINVAL);
424 
425 	/*
426 	 * Look up its entry in the global hash table.
427 	 */
428 again:
429 	rw_enter(&i_mac_impl_lock, RW_WRITER);
430 	err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname,
431 	    (mod_hash_val_t *)&mip);
432 	if (err != 0) {
433 		err = ENOENT;
434 		goto failed;
435 	}
436 
437 	if (mip->mi_disabled) {
438 		rw_exit(&i_mac_impl_lock);
439 		goto again;
440 	}
441 
442 	mip->mi_ref++;
443 	rw_exit(&i_mac_impl_lock);
444 
445 	*mhp = (mac_handle_t)mip;
446 	return (0);
447 
448 failed:
449 	rw_exit(&i_mac_impl_lock);
450 	ddi_release_devi(dip);
451 	return (err);
452 }
453 
454 void
455 mac_close(mac_handle_t mh)
456 {
457 	mac_impl_t	*mip = (mac_impl_t *)mh;
458 	dev_info_t	*dip = mip->mi_dip;
459 
460 	rw_enter(&i_mac_impl_lock, RW_WRITER);
461 
462 	ASSERT(mip->mi_ref != 0);
463 	if (--mip->mi_ref == 0) {
464 		ASSERT(!mip->mi_activelink);
465 	}
466 	ddi_release_devi(dip);
467 	rw_exit(&i_mac_impl_lock);
468 }
469 
470 const mac_info_t *
471 mac_info(mac_handle_t mh)
472 {
473 	return (&((mac_impl_t *)mh)->mi_info);
474 }
475 
476 dev_info_t *
477 mac_devinfo_get(mac_handle_t mh)
478 {
479 	return (((mac_impl_t *)mh)->mi_dip);
480 }
481 
482 uint64_t
483 mac_stat_get(mac_handle_t mh, uint_t stat)
484 {
485 	mac_impl_t	*mip = (mac_impl_t *)mh;
486 	uint64_t	val;
487 	int		ret;
488 
489 	/*
490 	 * The range of stat determines where it is maintained.  Stat
491 	 * values from 0 up to (but not including) MAC_STAT_MIN are
492 	 * mainteined by the mac module itself.  Everything else is
493 	 * maintained by the driver.
494 	 */
495 	if (stat < MAC_STAT_MIN) {
496 		/* These stats are maintained by the mac module itself. */
497 		switch (stat) {
498 		case MAC_STAT_LINK_STATE:
499 			return (mip->mi_linkstate);
500 		case MAC_STAT_LINK_UP:
501 			return (mip->mi_linkstate == LINK_STATE_UP);
502 		case MAC_STAT_PROMISC:
503 			return (mip->mi_devpromisc != 0);
504 		default:
505 			ASSERT(B_FALSE);
506 		}
507 	}
508 
509 	/*
510 	 * Call the driver to get the given statistic.
511 	 */
512 	ret = mip->mi_getstat(mip->mi_driver, stat, &val);
513 	if (ret != 0) {
514 		/*
515 		 * The driver doesn't support this statistic.  Get the
516 		 * statistic's default value.
517 		 */
518 		val = mac_stat_default(mip, stat);
519 	}
520 	return (val);
521 }
522 
523 int
524 mac_start(mac_handle_t mh)
525 {
526 	mac_impl_t	*mip = (mac_impl_t *)mh;
527 	int		err;
528 
529 	ASSERT(mip->mi_start != NULL);
530 
531 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
532 
533 	/*
534 	 * Check whether the device is already started.
535 	 */
536 	if (mip->mi_active++ != 0) {
537 		/*
538 		 * It's already started so there's nothing more to do.
539 		 */
540 		err = 0;
541 		goto done;
542 	}
543 
544 	/*
545 	 * Start the device.
546 	 */
547 	if ((err = mip->mi_start(mip->mi_driver)) != 0)
548 		--mip->mi_active;
549 
550 done:
551 	rw_exit(&(mip->mi_state_lock));
552 	return (err);
553 }
554 
555 void
556 mac_stop(mac_handle_t mh)
557 {
558 	mac_impl_t	*mip = (mac_impl_t *)mh;
559 
560 	ASSERT(mip->mi_stop != NULL);
561 
562 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
563 
564 	/*
565 	 * Check whether the device is still needed.
566 	 */
567 	ASSERT(mip->mi_active != 0);
568 	if (--mip->mi_active != 0) {
569 		/*
570 		 * It's still needed so there's nothing more to do.
571 		 */
572 		goto done;
573 	}
574 
575 	/*
576 	 * Stop the device.
577 	 */
578 	mip->mi_stop(mip->mi_driver);
579 
580 done:
581 	rw_exit(&(mip->mi_state_lock));
582 }
583 
584 int
585 mac_multicst_add(mac_handle_t mh, const uint8_t *addr)
586 {
587 	mac_impl_t		*mip = (mac_impl_t *)mh;
588 	mac_multicst_addr_t	**pp;
589 	mac_multicst_addr_t	*p;
590 	int			err;
591 
592 	ASSERT(mip->mi_multicst != NULL);
593 
594 	/*
595 	 * Verify the address.
596 	 */
597 	if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
598 	    mip->mi_pdata)) != 0) {
599 		return (err);
600 	}
601 
602 	/*
603 	 * Check whether the given address is already enabled.
604 	 */
605 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
606 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
607 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
608 		    0) {
609 			/*
610 			 * The address is already enabled so just bump the
611 			 * reference count.
612 			 */
613 			p->mma_ref++;
614 			err = 0;
615 			goto done;
616 		}
617 	}
618 
619 	/*
620 	 * Allocate a new list entry.
621 	 */
622 	if ((p = kmem_zalloc(sizeof (mac_multicst_addr_t),
623 	    KM_NOSLEEP)) == NULL) {
624 		err = ENOMEM;
625 		goto done;
626 	}
627 
628 	/*
629 	 * Enable a new multicast address.
630 	 */
631 	if ((err = mip->mi_multicst(mip->mi_driver, B_TRUE, addr)) != 0) {
632 		kmem_free(p, sizeof (mac_multicst_addr_t));
633 		goto done;
634 	}
635 
636 	/*
637 	 * Add the address to the list of enabled addresses.
638 	 */
639 	bcopy(addr, p->mma_addr, mip->mi_type->mt_addr_length);
640 	p->mma_ref++;
641 	*pp = p;
642 
643 done:
644 	rw_exit(&(mip->mi_data_lock));
645 	return (err);
646 }
647 
648 int
649 mac_multicst_remove(mac_handle_t mh, const uint8_t *addr)
650 {
651 	mac_impl_t		*mip = (mac_impl_t *)mh;
652 	mac_multicst_addr_t	**pp;
653 	mac_multicst_addr_t	*p;
654 	int			err;
655 
656 	ASSERT(mip->mi_multicst != NULL);
657 
658 	/*
659 	 * Find the entry in the list for the given address.
660 	 */
661 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
662 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
663 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
664 		    0) {
665 			if (--p->mma_ref == 0)
666 				break;
667 
668 			/*
669 			 * There is still a reference to this address so
670 			 * there's nothing more to do.
671 			 */
672 			err = 0;
673 			goto done;
674 		}
675 	}
676 
677 	/*
678 	 * We did not find an entry for the given address so it is not
679 	 * currently enabled.
680 	 */
681 	if (p == NULL) {
682 		err = ENOENT;
683 		goto done;
684 	}
685 	ASSERT(p->mma_ref == 0);
686 
687 	/*
688 	 * Disable the multicast address.
689 	 */
690 	if ((err = mip->mi_multicst(mip->mi_driver, B_FALSE, addr)) != 0) {
691 		p->mma_ref++;
692 		goto done;
693 	}
694 
695 	/*
696 	 * Remove it from the list.
697 	 */
698 	*pp = p->mma_nextp;
699 	kmem_free(p, sizeof (mac_multicst_addr_t));
700 
701 done:
702 	rw_exit(&(mip->mi_data_lock));
703 	return (err);
704 }
705 
706 /*
707  * mac_unicst_verify: Verifies the passed address. It fails
708  * if the passed address is a group address or has incorrect length.
709  */
710 boolean_t
711 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
712 {
713 	mac_impl_t	*mip = (mac_impl_t *)mh;
714 
715 	/*
716 	 * Verify the address.
717 	 */
718 	if ((len != mip->mi_type->mt_addr_length) ||
719 	    (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
720 	    mip->mi_pdata)) != 0) {
721 		return (B_FALSE);
722 	} else {
723 		return (B_TRUE);
724 	}
725 }
726 
727 int
728 mac_unicst_set(mac_handle_t mh, const uint8_t *addr)
729 {
730 	mac_impl_t	*mip = (mac_impl_t *)mh;
731 	int		err;
732 	boolean_t	notify = B_FALSE;
733 
734 	ASSERT(mip->mi_unicst != NULL);
735 
736 	/*
737 	 * Verify the address.
738 	 */
739 	if ((err = mip->mi_type->mt_ops.mtops_unicst_verify(addr,
740 	    mip->mi_pdata)) != 0) {
741 		return (err);
742 	}
743 
744 	/*
745 	 * Program the new unicast address.
746 	 */
747 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
748 
749 	/*
750 	 * If address doesn't change, do nothing.
751 	 * This check is necessary otherwise it may call into mac_unicst_set
752 	 * recursively.
753 	 */
754 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
755 		err = 0;
756 		goto done;
757 	}
758 
759 	if ((err = mip->mi_unicst(mip->mi_driver, addr)) != 0)
760 		goto done;
761 
762 	/*
763 	 * Save the address and flag that we need to send a notification.
764 	 */
765 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
766 	notify = B_TRUE;
767 
768 done:
769 	rw_exit(&(mip->mi_data_lock));
770 
771 	if (notify)
772 		i_mac_notify(mip, MAC_NOTE_UNICST);
773 
774 	return (err);
775 }
776 
777 void
778 mac_unicst_get(mac_handle_t mh, uint8_t *addr)
779 {
780 	mac_impl_t	*mip = (mac_impl_t *)mh;
781 
782 	/*
783 	 * Copy out the current unicast source address.
784 	 */
785 	rw_enter(&(mip->mi_data_lock), RW_READER);
786 	bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
787 	rw_exit(&(mip->mi_data_lock));
788 }
789 
790 void
791 mac_dest_get(mac_handle_t mh, uint8_t *addr)
792 {
793 	mac_impl_t	*mip = (mac_impl_t *)mh;
794 
795 	/*
796 	 * Copy out the current destination address.
797 	 */
798 	rw_enter(&(mip->mi_data_lock), RW_READER);
799 	bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
800 	rw_exit(&(mip->mi_data_lock));
801 }
802 
803 int
804 mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype)
805 {
806 	mac_impl_t	*mip = (mac_impl_t *)mh;
807 	int		err = 0;
808 
809 	ASSERT(mip->mi_setpromisc != NULL);
810 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
811 
812 	/*
813 	 * Determine whether we should enable or disable promiscuous mode.
814 	 * For details on the distinction between "device promiscuous mode"
815 	 * and "MAC promiscuous mode", see PSARC/2005/289.
816 	 */
817 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
818 	if (on) {
819 		/*
820 		 * Enable promiscuous mode on the device if not yet enabled.
821 		 */
822 		if (mip->mi_devpromisc++ == 0) {
823 			err = mip->mi_setpromisc(mip->mi_driver, B_TRUE);
824 			if (err != 0) {
825 				mip->mi_devpromisc--;
826 				goto done;
827 			}
828 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
829 		}
830 
831 		/*
832 		 * Enable promiscuous mode on the MAC if not yet enabled.
833 		 */
834 		if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0)
835 			i_mac_notify(mip, MAC_NOTE_PROMISC);
836 	} else {
837 		if (mip->mi_devpromisc == 0) {
838 			err = EPROTO;
839 			goto done;
840 		}
841 
842 		/*
843 		 * Disable promiscuous mode on the device if this is the last
844 		 * enabling.
845 		 */
846 		if (--mip->mi_devpromisc == 0) {
847 			err = mip->mi_setpromisc(mip->mi_driver, B_FALSE);
848 			if (err != 0) {
849 				mip->mi_devpromisc++;
850 				goto done;
851 			}
852 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
853 		}
854 
855 		/*
856 		 * Disable promiscuous mode on the MAC if this is the last
857 		 * enabling.
858 		 */
859 		if (ptype == MAC_PROMISC && --mip->mi_promisc == 0)
860 			i_mac_notify(mip, MAC_NOTE_PROMISC);
861 	}
862 
863 done:
864 	rw_exit(&(mip->mi_data_lock));
865 	return (err);
866 }
867 
868 boolean_t
869 mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype)
870 {
871 	mac_impl_t		*mip = (mac_impl_t *)mh;
872 
873 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
874 
875 	/*
876 	 * Return the current promiscuity.
877 	 */
878 	if (ptype == MAC_DEVPROMISC)
879 		return (mip->mi_devpromisc != 0);
880 	else
881 		return (mip->mi_promisc != 0);
882 }
883 
884 void
885 mac_resources(mac_handle_t mh)
886 {
887 	mac_impl_t	*mip = (mac_impl_t *)mh;
888 
889 	/*
890 	 * If the driver supports resource registration, call the driver to
891 	 * ask it to register its resources.
892 	 */
893 	if (mip->mi_callbacks->mc_callbacks & MC_RESOURCES)
894 		mip->mi_resources(mip->mi_driver);
895 }
896 
897 void
898 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
899 {
900 	mac_impl_t	*mip = (mac_impl_t *)mh;
901 
902 	/*
903 	 * Call the driver to handle the ioctl.  The driver may not support
904 	 * any ioctls, in which case we reply with a NAK on its behalf.
905 	 */
906 	if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
907 		mip->mi_ioctl(mip->mi_driver, wq, bp);
908 	else
909 		miocnak(wq, bp, 0, EINVAL);
910 }
911 
912 const mac_txinfo_t *
913 mac_do_tx_get(mac_handle_t mh, boolean_t is_vnic)
914 {
915 	mac_impl_t	*mip = (mac_impl_t *)mh;
916 	mac_txinfo_t	*mtp;
917 
918 	/*
919 	 * Grab the lock to prevent us from racing with MAC_PROMISC being
920 	 * changed.  This is sufficient since MAC clients are careful to always
921 	 * call mac_txloop_add() prior to enabling MAC_PROMISC, and to disable
922 	 * MAC_PROMISC prior to calling mac_txloop_remove().
923 	 */
924 	rw_enter(&mip->mi_tx_lock, RW_READER);
925 
926 	if (mac_promisc_get(mh, MAC_PROMISC)) {
927 		ASSERT(mip->mi_mtfp != NULL);
928 		if (mip->mi_vnic_present && !is_vnic) {
929 			mtp = &mip->mi_vnic_txloopinfo;
930 		} else {
931 			mtp = &mip->mi_txloopinfo;
932 		}
933 	} else {
934 		if (mip->mi_vnic_present && !is_vnic) {
935 			mtp = &mip->mi_vnic_txinfo;
936 		} else {
937 			/*
938 			 * Note that we cannot ASSERT() that mip->mi_mtfp is
939 			 * NULL, because to satisfy the above ASSERT(), we
940 			 * have to disable MAC_PROMISC prior to calling
941 			 * mac_txloop_remove().
942 			 */
943 			mtp = &mip->mi_txinfo;
944 		}
945 	}
946 
947 	rw_exit(&mip->mi_tx_lock);
948 	return (mtp);
949 }
950 
951 /*
952  * Invoked by VNIC to obtain the transmit entry point.
953  */
954 const mac_txinfo_t *
955 mac_vnic_tx_get(mac_handle_t mh)
956 {
957 	return (mac_do_tx_get(mh, B_TRUE));
958 }
959 
960 /*
961  * Invoked by any non-VNIC client to obtain the transmit entry point.
962  * If a VNIC is present, the VNIC transmit function provided by the VNIC
963  * will be returned to the MAC client.
964  */
965 const mac_txinfo_t *
966 mac_tx_get(mac_handle_t mh)
967 {
968 	return (mac_do_tx_get(mh, B_FALSE));
969 }
970 
971 link_state_t
972 mac_link_get(mac_handle_t mh)
973 {
974 	return (((mac_impl_t *)mh)->mi_linkstate);
975 }
976 
977 mac_notify_handle_t
978 mac_notify_add(mac_handle_t mh, mac_notify_t notify, void *arg)
979 {
980 	mac_impl_t		*mip = (mac_impl_t *)mh;
981 	mac_notify_fn_t		*mnfp;
982 
983 	mnfp = kmem_zalloc(sizeof (mac_notify_fn_t), KM_SLEEP);
984 	mnfp->mnf_fn = notify;
985 	mnfp->mnf_arg = arg;
986 
987 	/*
988 	 * Add it to the head of the 'notify' callback list.
989 	 */
990 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
991 	mnfp->mnf_nextp = mip->mi_mnfp;
992 	mip->mi_mnfp = mnfp;
993 	rw_exit(&mip->mi_notify_lock);
994 
995 	return ((mac_notify_handle_t)mnfp);
996 }
997 
998 void
999 mac_notify_remove(mac_handle_t mh, mac_notify_handle_t mnh)
1000 {
1001 	mac_impl_t		*mip = (mac_impl_t *)mh;
1002 	mac_notify_fn_t		*mnfp = (mac_notify_fn_t *)mnh;
1003 	mac_notify_fn_t		**pp;
1004 	mac_notify_fn_t		*p;
1005 
1006 	/*
1007 	 * Search the 'notify' callback list for the function closure.
1008 	 */
1009 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
1010 	for (pp = &(mip->mi_mnfp); (p = *pp) != NULL;
1011 	    pp = &(p->mnf_nextp)) {
1012 		if (p == mnfp)
1013 			break;
1014 	}
1015 	ASSERT(p != NULL);
1016 
1017 	/*
1018 	 * Remove it from the list.
1019 	 */
1020 	*pp = p->mnf_nextp;
1021 	rw_exit(&mip->mi_notify_lock);
1022 
1023 	/*
1024 	 * Free it.
1025 	 */
1026 	kmem_free(mnfp, sizeof (mac_notify_fn_t));
1027 }
1028 
1029 void
1030 mac_notify(mac_handle_t mh)
1031 {
1032 	mac_impl_t		*mip = (mac_impl_t *)mh;
1033 	mac_notify_type_t	type;
1034 
1035 	for (type = 0; type < MAC_NNOTE; type++)
1036 		i_mac_notify(mip, type);
1037 }
1038 
1039 /*
1040  * Register a receive function for this mac.
1041  * More information on this function's interaction with mac_rx()
1042  * can be found atop mac_rx().
1043  */
1044 mac_rx_handle_t
1045 mac_do_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg, boolean_t is_active)
1046 {
1047 	mac_impl_t	*mip = (mac_impl_t *)mh;
1048 	mac_rx_fn_t	*mrfp;
1049 
1050 	mrfp = kmem_zalloc(sizeof (mac_rx_fn_t), KM_SLEEP);
1051 	mrfp->mrf_fn = rx;
1052 	mrfp->mrf_arg = arg;
1053 	mrfp->mrf_active = is_active;
1054 
1055 	/*
1056 	 * Add it to the head of the 'rx' callback list.
1057 	 */
1058 	rw_enter(&(mip->mi_rx_lock), RW_WRITER);
1059 
1060 	/*
1061 	 * mac_rx() will only call callbacks that are marked inuse.
1062 	 */
1063 	mrfp->mrf_inuse = B_TRUE;
1064 	mrfp->mrf_nextp = mip->mi_mrfp;
1065 
1066 	/*
1067 	 * mac_rx() could be traversing the remainder of the list
1068 	 * and miss the new callback we're adding here. This is not a problem
1069 	 * because we do not guarantee the callback to take effect immediately
1070 	 * after mac_rx_add() returns.
1071 	 */
1072 	mip->mi_mrfp = mrfp;
1073 	rw_exit(&(mip->mi_rx_lock));
1074 
1075 	return ((mac_rx_handle_t)mrfp);
1076 }
1077 
1078 mac_rx_handle_t
1079 mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1080 {
1081 	return (mac_do_rx_add(mh, rx, arg, B_FALSE));
1082 }
1083 
1084 mac_rx_handle_t
1085 mac_active_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1086 {
1087 	return (mac_do_rx_add(mh, rx, arg, B_TRUE));
1088 }
1089 
1090 /*
1091  * Unregister a receive function for this mac.
1092  * This function does not block if wait is B_FALSE. This is useful
1093  * for clients who call mac_rx_remove() from a non-blockable context.
1094  * More information on this function's interaction with mac_rx()
1095  * can be found atop mac_rx().
1096  */
1097 void
1098 mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh, boolean_t wait)
1099 {
1100 	mac_impl_t		*mip = (mac_impl_t *)mh;
1101 	mac_rx_fn_t		*mrfp = (mac_rx_fn_t *)mrh;
1102 	mac_rx_fn_t		**pp;
1103 	mac_rx_fn_t		*p;
1104 
1105 	/*
1106 	 * Search the 'rx' callback list for the function closure.
1107 	 */
1108 	rw_enter(&mip->mi_rx_lock, RW_WRITER);
1109 	for (pp = &(mip->mi_mrfp); (p = *pp) != NULL; pp = &(p->mrf_nextp)) {
1110 		if (p == mrfp)
1111 			break;
1112 	}
1113 	ASSERT(p != NULL);
1114 
1115 	/*
1116 	 * If mac_rx() is running, mark callback for deletion
1117 	 * and return (if wait is false), or wait until mac_rx()
1118 	 * exits (if wait is true).
1119 	 */
1120 	if (mip->mi_rx_ref > 0) {
1121 		DTRACE_PROBE1(defer_delete, mac_impl_t *, mip);
1122 		p->mrf_inuse = B_FALSE;
1123 		mutex_enter(&mip->mi_lock);
1124 		mip->mi_rx_removed++;
1125 		mutex_exit(&mip->mi_lock);
1126 
1127 		rw_exit(&mip->mi_rx_lock);
1128 		if (wait)
1129 			mac_rx_remove_wait(mh);
1130 		return;
1131 	}
1132 
1133 	/* Remove it from the list. */
1134 	*pp = p->mrf_nextp;
1135 	kmem_free(mrfp, sizeof (mac_rx_fn_t));
1136 	rw_exit(&mip->mi_rx_lock);
1137 }
1138 
1139 /*
1140  * Wait for all pending callback removals to be completed by mac_rx().
1141  * Note that if we call mac_rx_remove() immediately before this, there is no
1142  * guarantee we would wait *only* on the callback that we specified.
1143  * mac_rx_remove() could have been called by other threads and we would have
1144  * to wait for other marked callbacks to be removed as well.
1145  */
1146 void
1147 mac_rx_remove_wait(mac_handle_t mh)
1148 {
1149 	mac_impl_t	*mip = (mac_impl_t *)mh;
1150 
1151 	mutex_enter(&mip->mi_lock);
1152 	while (mip->mi_rx_removed > 0) {
1153 		DTRACE_PROBE1(need_wait, mac_impl_t *, mip);
1154 		cv_wait(&mip->mi_rx_cv, &mip->mi_lock);
1155 	}
1156 	mutex_exit(&mip->mi_lock);
1157 }
1158 
1159 mac_txloop_handle_t
1160 mac_txloop_add(mac_handle_t mh, mac_txloop_t tx, void *arg)
1161 {
1162 	mac_impl_t	*mip = (mac_impl_t *)mh;
1163 	mac_txloop_fn_t	*mtfp;
1164 
1165 	mtfp = kmem_zalloc(sizeof (mac_txloop_fn_t), KM_SLEEP);
1166 	mtfp->mtf_fn = tx;
1167 	mtfp->mtf_arg = arg;
1168 
1169 	/*
1170 	 * Add it to the head of the 'tx' callback list.
1171 	 */
1172 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1173 	mtfp->mtf_nextp = mip->mi_mtfp;
1174 	mip->mi_mtfp = mtfp;
1175 	rw_exit(&(mip->mi_tx_lock));
1176 
1177 	return ((mac_txloop_handle_t)mtfp);
1178 }
1179 
1180 /*
1181  * Unregister a transmit function for this mac.  This removes the function
1182  * from the list of transmit functions for this mac.
1183  */
1184 void
1185 mac_txloop_remove(mac_handle_t mh, mac_txloop_handle_t mth)
1186 {
1187 	mac_impl_t		*mip = (mac_impl_t *)mh;
1188 	mac_txloop_fn_t		*mtfp = (mac_txloop_fn_t *)mth;
1189 	mac_txloop_fn_t		**pp;
1190 	mac_txloop_fn_t		*p;
1191 
1192 	/*
1193 	 * Search the 'tx' callback list for the function.
1194 	 */
1195 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1196 	for (pp = &(mip->mi_mtfp); (p = *pp) != NULL; pp = &(p->mtf_nextp)) {
1197 		if (p == mtfp)
1198 			break;
1199 	}
1200 	ASSERT(p != NULL);
1201 
1202 	/* Remove it from the list. */
1203 	*pp = p->mtf_nextp;
1204 	kmem_free(mtfp, sizeof (mac_txloop_fn_t));
1205 	rw_exit(&(mip->mi_tx_lock));
1206 }
1207 
1208 void
1209 mac_resource_set(mac_handle_t mh, mac_resource_add_t add, void *arg)
1210 {
1211 	mac_impl_t		*mip = (mac_impl_t *)mh;
1212 
1213 	/*
1214 	 * Update the 'resource_add' callbacks.
1215 	 */
1216 	rw_enter(&(mip->mi_resource_lock), RW_WRITER);
1217 	mip->mi_resource_add = add;
1218 	mip->mi_resource_add_arg = arg;
1219 	rw_exit(&(mip->mi_resource_lock));
1220 }
1221 
1222 /*
1223  * Driver support functions.
1224  */
1225 
1226 mac_register_t *
1227 mac_alloc(uint_t mac_version)
1228 {
1229 	mac_register_t *mregp;
1230 
1231 	/*
1232 	 * Make sure there isn't a version mismatch between the driver and
1233 	 * the framework.  In the future, if multiple versions are
1234 	 * supported, this check could become more sophisticated.
1235 	 */
1236 	if (mac_version != MAC_VERSION)
1237 		return (NULL);
1238 
1239 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
1240 	mregp->m_version = mac_version;
1241 	return (mregp);
1242 }
1243 
1244 void
1245 mac_free(mac_register_t *mregp)
1246 {
1247 	kmem_free(mregp, sizeof (mac_register_t));
1248 }
1249 
1250 /*
1251  * mac_register() is how drivers register new MACs with the GLDv3
1252  * framework.  The mregp argument is allocated by drivers using the
1253  * mac_alloc() function, and can be freed using mac_free() immediately upon
1254  * return from mac_register().  Upon success (0 return value), the mhp
1255  * opaque pointer becomes the driver's handle to its MAC interface, and is
1256  * the argument to all other mac module entry points.
1257  */
1258 int
1259 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
1260 {
1261 	mac_impl_t	*mip;
1262 	mactype_t	*mtype;
1263 	int		err = EINVAL;
1264 	struct devnames *dnp;
1265 	minor_t		minor;
1266 	boolean_t	style1_created = B_FALSE, style2_created = B_FALSE;
1267 
1268 	/* Find the required MAC-Type plugin. */
1269 	if ((mtype = i_mactype_getplugin(mregp->m_type_ident)) == NULL)
1270 		return (EINVAL);
1271 
1272 	/* Create a mac_impl_t to represent this MAC. */
1273 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
1274 
1275 	/*
1276 	 * The mac is not ready for open yet.
1277 	 */
1278 	mip->mi_disabled = B_TRUE;
1279 
1280 	mip->mi_drvname = ddi_driver_name(mregp->m_dip);
1281 	/*
1282 	 * Some drivers such as aggr need to register multiple MACs.  Such
1283 	 * drivers must supply a non-zero "instance" argument so that each
1284 	 * MAC can be assigned a unique MAC name and can have unique
1285 	 * kstats.
1286 	 */
1287 	mip->mi_instance = ((mregp->m_instance == 0) ?
1288 	    ddi_get_instance(mregp->m_dip) : mregp->m_instance);
1289 
1290 	/* Construct the MAC name as <drvname><instance> */
1291 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
1292 	    mip->mi_drvname, mip->mi_instance);
1293 
1294 	mip->mi_driver = mregp->m_driver;
1295 
1296 	mip->mi_type = mtype;
1297 	mip->mi_info.mi_media = mtype->mt_type;
1298 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
1299 	mip->mi_info.mi_sdu_min = mregp->m_min_sdu;
1300 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
1301 		goto fail;
1302 	mip->mi_info.mi_sdu_max = mregp->m_max_sdu;
1303 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
1304 	/*
1305 	 * If the media supports a broadcast address, cache a pointer to it
1306 	 * in the mac_info_t so that upper layers can use it.
1307 	 */
1308 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
1309 
1310 	/*
1311 	 * Copy the unicast source address into the mac_info_t, but only if
1312 	 * the MAC-Type defines a non-zero address length.  We need to
1313 	 * handle MAC-Types that have an address length of 0
1314 	 * (point-to-point protocol MACs for example).
1315 	 */
1316 	if (mip->mi_type->mt_addr_length > 0) {
1317 		if (mregp->m_src_addr == NULL)
1318 			goto fail;
1319 		mip->mi_info.mi_unicst_addr =
1320 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
1321 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
1322 		    mip->mi_type->mt_addr_length);
1323 
1324 		/*
1325 		 * Copy the fixed 'factory' MAC address from the immutable
1326 		 * info.  This is taken to be the MAC address currently in
1327 		 * use.
1328 		 */
1329 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
1330 		    mip->mi_type->mt_addr_length);
1331 		/* Copy the destination address if one is provided. */
1332 		if (mregp->m_dst_addr != NULL) {
1333 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
1334 			    mip->mi_type->mt_addr_length);
1335 		}
1336 	} else if (mregp->m_src_addr != NULL) {
1337 		goto fail;
1338 	}
1339 
1340 	/*
1341 	 * The format of the m_pdata is specific to the plugin.  It is
1342 	 * passed in as an argument to all of the plugin callbacks.  The
1343 	 * driver can update this information by calling
1344 	 * mac_pdata_update().
1345 	 */
1346 	if (mregp->m_pdata != NULL) {
1347 		/*
1348 		 * Verify that the plugin supports MAC plugin data and that
1349 		 * the supplied data is valid.
1350 		 */
1351 		if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1352 			goto fail;
1353 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
1354 		    mregp->m_pdata_size)) {
1355 			goto fail;
1356 		}
1357 		mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
1358 		bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size);
1359 		mip->mi_pdata_size = mregp->m_pdata_size;
1360 	}
1361 
1362 	/*
1363 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
1364 	 * check to make sure all mandatory callbacks are set.
1365 	 */
1366 	if (mregp->m_callbacks->mc_getstat == NULL ||
1367 	    mregp->m_callbacks->mc_start == NULL ||
1368 	    mregp->m_callbacks->mc_stop == NULL ||
1369 	    mregp->m_callbacks->mc_setpromisc == NULL ||
1370 	    mregp->m_callbacks->mc_multicst == NULL ||
1371 	    mregp->m_callbacks->mc_unicst == NULL ||
1372 	    mregp->m_callbacks->mc_tx == NULL) {
1373 		goto fail;
1374 	}
1375 	mip->mi_callbacks = mregp->m_callbacks;
1376 
1377 	mip->mi_dip = mregp->m_dip;
1378 
1379 	/*
1380 	 * Set up the possible transmit routines.
1381 	 */
1382 	mip->mi_txinfo.mt_fn = mip->mi_tx;
1383 	mip->mi_txinfo.mt_arg = mip->mi_driver;
1384 
1385 	mip->mi_vnic_txinfo.mt_fn = mac_vnic_tx;
1386 	mip->mi_vnic_txinfo.mt_arg = mip;
1387 
1388 	mip->mi_txloopinfo.mt_fn = mac_txloop;
1389 	mip->mi_txloopinfo.mt_arg = mip;
1390 
1391 	mip->mi_vnic_txloopinfo.mt_fn = mac_vnic_txloop;
1392 	mip->mi_vnic_txloopinfo.mt_arg = mip;
1393 
1394 	/*
1395 	 * Allocate a notification thread.
1396 	 */
1397 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
1398 	    mip, 0, &p0, TS_RUN, minclsyspri);
1399 	if (mip->mi_notify_thread == NULL)
1400 		goto fail;
1401 
1402 	/*
1403 	 * Initialize the kstats for this device.
1404 	 */
1405 	mac_stat_create(mip);
1406 
1407 	err = EEXIST;
1408 	/* Create a style-2 DLPI device */
1409 	if (ddi_create_minor_node(mip->mi_dip, (char *)mip->mi_drvname,
1410 	    S_IFCHR, 0, DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
1411 		goto fail;
1412 	style2_created = B_TRUE;
1413 
1414 	/* Create a style-1 DLPI device */
1415 	minor = (minor_t)mip->mi_instance + 1;
1416 	if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, minor,
1417 	    DDI_NT_NET, 0) != DDI_SUCCESS)
1418 		goto fail;
1419 	style1_created = B_TRUE;
1420 
1421 	/*
1422 	 * Create a link for this MAC.  The link name will be the same as
1423 	 * the MAC name.
1424 	 */
1425 	err = dls_create(mip->mi_name, mip->mi_name);
1426 	if (err != 0)
1427 		goto fail;
1428 
1429 	/* set the gldv3 flag in dn_flags */
1430 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
1431 	LOCK_DEV_OPS(&dnp->dn_lock);
1432 	dnp->dn_flags |= DN_GLDV3_DRIVER;
1433 	UNLOCK_DEV_OPS(&dnp->dn_lock);
1434 
1435 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1436 	if (mod_hash_insert(i_mac_impl_hash,
1437 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
1438 		rw_exit(&i_mac_impl_lock);
1439 		VERIFY(dls_destroy(mip->mi_name) == 0);
1440 		err = EEXIST;
1441 		goto fail;
1442 	}
1443 
1444 	/*
1445 	 * Mark the MAC to be ready for open.
1446 	 */
1447 	mip->mi_disabled = B_FALSE;
1448 
1449 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
1450 
1451 	rw_exit(&i_mac_impl_lock);
1452 
1453 	atomic_inc_32(&i_mac_impl_count);
1454 	*mhp = (mac_handle_t)mip;
1455 	return (0);
1456 
1457 fail:
1458 	/* clean up notification thread */
1459 	if (mip->mi_notify_thread != NULL) {
1460 		mutex_enter(&mip->mi_notify_bits_lock);
1461 		mip->mi_notify_bits = (1 << MAC_NNOTE);
1462 		cv_broadcast(&mip->mi_notify_cv);
1463 		while (mip->mi_notify_bits != 0)
1464 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1465 		mutex_exit(&mip->mi_notify_bits_lock);
1466 	}
1467 
1468 	if (mip->mi_info.mi_unicst_addr != NULL) {
1469 		kmem_free(mip->mi_info.mi_unicst_addr,
1470 		    mip->mi_type->mt_addr_length);
1471 		mip->mi_info.mi_unicst_addr = NULL;
1472 	}
1473 	if (style1_created)
1474 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1475 	if (style2_created)
1476 		ddi_remove_minor_node(mip->mi_dip, (char *)mip->mi_drvname);
1477 
1478 	mac_stat_destroy(mip);
1479 
1480 	if (mip->mi_type != NULL) {
1481 		atomic_dec_32(&mip->mi_type->mt_ref);
1482 		mip->mi_type = NULL;
1483 	}
1484 
1485 	if (mip->mi_pdata != NULL) {
1486 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1487 		mip->mi_pdata = NULL;
1488 		mip->mi_pdata_size = 0;
1489 	}
1490 
1491 	kmem_cache_free(i_mac_impl_cachep, mip);
1492 	return (err);
1493 }
1494 
1495 int
1496 mac_disable(mac_handle_t mh)
1497 {
1498 	int			err;
1499 	mac_impl_t		*mip = (mac_impl_t *)mh;
1500 
1501 	/*
1502 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1503 	 * If not, set mi_disabled to prevent any new VLAN's from being
1504 	 * created while we're destroying this mac.
1505 	 */
1506 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1507 	if (mip->mi_ref > 0) {
1508 		rw_exit(&i_mac_impl_lock);
1509 		return (EBUSY);
1510 	}
1511 	mip->mi_disabled = B_TRUE;
1512 	rw_exit(&i_mac_impl_lock);
1513 
1514 	if ((err = dls_destroy(mip->mi_name)) != 0) {
1515 		rw_enter(&i_mac_impl_lock, RW_WRITER);
1516 		mip->mi_disabled = B_FALSE;
1517 		rw_exit(&i_mac_impl_lock);
1518 		return (err);
1519 	}
1520 
1521 	return (0);
1522 }
1523 
1524 int
1525 mac_unregister(mac_handle_t mh)
1526 {
1527 	int			err;
1528 	mac_impl_t		*mip = (mac_impl_t *)mh;
1529 	mod_hash_val_t		val;
1530 	mac_multicst_addr_t	*p, *nextp;
1531 
1532 	/*
1533 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1534 	 * If not, set mi_disabled to prevent any new VLAN's from being
1535 	 * created while we're destroying this mac. Once mac_disable() returns
1536 	 * 0, the rest of mac_unregister() stuff should continue without
1537 	 * returning an error.
1538 	 */
1539 	if (!mip->mi_disabled) {
1540 		if ((err = mac_disable(mh)) != 0)
1541 			return (err);
1542 	}
1543 
1544 	/*
1545 	 * Clean up notification thread (wait for it to exit).
1546 	 */
1547 	mutex_enter(&mip->mi_notify_bits_lock);
1548 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1549 	cv_broadcast(&mip->mi_notify_cv);
1550 	while (mip->mi_notify_bits != 0)
1551 		cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1552 	mutex_exit(&mip->mi_notify_bits_lock);
1553 
1554 	/*
1555 	 * Remove both style 1 and style 2 minor nodes
1556 	 */
1557 	ddi_remove_minor_node(mip->mi_dip, (char *)mip->mi_drvname);
1558 	ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1559 
1560 	ASSERT(!mip->mi_activelink);
1561 
1562 	mac_stat_destroy(mip);
1563 
1564 	(void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name,
1565 	    &val);
1566 	ASSERT(mip == (mac_impl_t *)val);
1567 
1568 	ASSERT(i_mac_impl_count > 0);
1569 	atomic_dec_32(&i_mac_impl_count);
1570 
1571 	if (mip->mi_pdata != NULL)
1572 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1573 	mip->mi_pdata = NULL;
1574 	mip->mi_pdata_size = 0;
1575 
1576 	/*
1577 	 * Free the list of multicast addresses.
1578 	 */
1579 	for (p = mip->mi_mmap; p != NULL; p = nextp) {
1580 		nextp = p->mma_nextp;
1581 		kmem_free(p, sizeof (mac_multicst_addr_t));
1582 	}
1583 	mip->mi_mmap = NULL;
1584 
1585 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
1586 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
1587 	mip->mi_info.mi_unicst_addr = NULL;
1588 
1589 	atomic_dec_32(&mip->mi_type->mt_ref);
1590 	mip->mi_type = NULL;
1591 
1592 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
1593 
1594 	kmem_cache_free(i_mac_impl_cachep, mip);
1595 
1596 	return (0);
1597 }
1598 
1599 /*
1600  * To avoid potential deadlocks, mac_rx() releases mi_rx_lock
1601  * before invoking its list of upcalls. This introduces races with
1602  * mac_rx_remove() and mac_rx_add(), who can potentially modify the
1603  * upcall list while mi_rx_lock is not being held. The race with
1604  * mac_rx_remove() is handled by incrementing mi_rx_ref upon entering
1605  * mac_rx(); a non-zero mi_rx_ref would tell mac_rx_remove()
1606  * to not modify the list but instead mark an upcall for deletion.
1607  * before mac_rx() exits, mi_rx_ref is decremented and if it
1608  * is 0, the marked upcalls will be removed from the list and freed.
1609  * The race with mac_rx_add() is harmless because mac_rx_add() only
1610  * prepends to the list and since mac_rx() saves the list head
1611  * before releasing mi_rx_lock, any prepended upcall won't be seen
1612  * until the next packet chain arrives.
1613  *
1614  * To minimize lock contention between multiple parallel invocations
1615  * of mac_rx(), mi_rx_lock is acquired as a READER lock. The
1616  * use of atomic operations ensures the sanity of mi_rx_ref. mi_rx_lock
1617  * will be upgraded to WRITER mode when there are marked upcalls to be
1618  * cleaned.
1619  */
1620 static void
1621 mac_do_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain,
1622     boolean_t active_only)
1623 {
1624 	mac_impl_t	*mip = (mac_impl_t *)mh;
1625 	mblk_t		*bp = mp_chain;
1626 	mac_rx_fn_t	*mrfp;
1627 
1628 	/*
1629 	 * Call all registered receive functions.
1630 	 */
1631 	rw_enter(&mip->mi_rx_lock, RW_READER);
1632 	if ((mrfp = mip->mi_mrfp) == NULL) {
1633 		/* There are no registered receive functions. */
1634 		freemsgchain(bp);
1635 		rw_exit(&mip->mi_rx_lock);
1636 		return;
1637 	}
1638 	atomic_inc_32(&mip->mi_rx_ref);
1639 	rw_exit(&mip->mi_rx_lock);
1640 
1641 	/*
1642 	 * Call registered receive functions.
1643 	 */
1644 	do {
1645 		mblk_t *recv_bp;
1646 
1647 		if (active_only && !mrfp->mrf_active) {
1648 			mrfp = mrfp->mrf_nextp;
1649 			if (mrfp == NULL) {
1650 				/*
1651 				 * We hit the last receiver, but it's not
1652 				 * active.
1653 				 */
1654 				freemsgchain(bp);
1655 			}
1656 			continue;
1657 		}
1658 
1659 		recv_bp = (mrfp->mrf_nextp != NULL) ? copymsgchain(bp) : bp;
1660 		if (recv_bp != NULL) {
1661 			if (mrfp->mrf_inuse) {
1662 				/*
1663 				 * Send bp itself and keep the copy.
1664 				 * If there's only one active receiver,
1665 				 * it should get the original message,
1666 				 * tagged with the hardware checksum flags.
1667 				 */
1668 				mrfp->mrf_fn(mrfp->mrf_arg, mrh, bp);
1669 				bp = recv_bp;
1670 			} else {
1671 				freemsgchain(recv_bp);
1672 			}
1673 		}
1674 
1675 		mrfp = mrfp->mrf_nextp;
1676 	} while (mrfp != NULL);
1677 
1678 	rw_enter(&mip->mi_rx_lock, RW_READER);
1679 	if (atomic_dec_32_nv(&mip->mi_rx_ref) == 0 && mip->mi_rx_removed > 0) {
1680 		mac_rx_fn_t	**pp, *p;
1681 		uint32_t	cnt = 0;
1682 
1683 		DTRACE_PROBE1(delete_callbacks, mac_impl_t *, mip);
1684 
1685 		/*
1686 		 * Need to become exclusive before doing cleanup
1687 		 */
1688 		if (rw_tryupgrade(&mip->mi_rx_lock) == 0) {
1689 			rw_exit(&mip->mi_rx_lock);
1690 			rw_enter(&mip->mi_rx_lock, RW_WRITER);
1691 		}
1692 
1693 		/*
1694 		 * We return if another thread has already entered and cleaned
1695 		 * up the list.
1696 		 */
1697 		if (mip->mi_rx_ref > 0 || mip->mi_rx_removed == 0) {
1698 			rw_exit(&mip->mi_rx_lock);
1699 			return;
1700 		}
1701 
1702 		/*
1703 		 * Free removed callbacks.
1704 		 */
1705 		pp = &mip->mi_mrfp;
1706 		while (*pp != NULL) {
1707 			if (!(*pp)->mrf_inuse) {
1708 				p = *pp;
1709 				*pp = (*pp)->mrf_nextp;
1710 				kmem_free(p, sizeof (*p));
1711 				cnt++;
1712 				continue;
1713 			}
1714 			pp = &(*pp)->mrf_nextp;
1715 		}
1716 
1717 		/*
1718 		 * Wake up mac_rx_remove_wait()
1719 		 */
1720 		mutex_enter(&mip->mi_lock);
1721 		ASSERT(mip->mi_rx_removed == cnt);
1722 		mip->mi_rx_removed = 0;
1723 		cv_broadcast(&mip->mi_rx_cv);
1724 		mutex_exit(&mip->mi_lock);
1725 	}
1726 	rw_exit(&mip->mi_rx_lock);
1727 }
1728 
1729 void
1730 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
1731 {
1732 	mac_do_rx(mh, mrh, mp_chain, B_FALSE);
1733 }
1734 
1735 /*
1736  * Send a packet chain up to the receive callbacks which declared
1737  * themselves as being active.
1738  */
1739 void
1740 mac_active_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp_chain)
1741 {
1742 	mac_do_rx(arg, mrh, mp_chain, B_TRUE);
1743 }
1744 
1745 /*
1746  * Function passed to the active client sharing a VNIC. This function
1747  * is returned by mac_tx_get() when a VNIC is present. It invokes
1748  * the VNIC transmit entry point which was specified by the VNIC when
1749  * it called mac_vnic_set(). The VNIC transmit entry point will
1750  * pass the packets to the local VNICs and/or to the underlying VNICs
1751  * if needed.
1752  */
1753 static mblk_t *
1754 mac_vnic_tx(void *arg, mblk_t *mp)
1755 {
1756 	mac_impl_t	*mip = arg;
1757 	mac_txinfo_t	*mtfp;
1758 	mac_vnic_tx_t	*mvt;
1759 
1760 	/*
1761 	 * There is a race between the notification of the VNIC
1762 	 * addition and removal, and the processing of the VNIC notification
1763 	 * by the MAC client. During this window, it is possible for
1764 	 * an active MAC client to contine invoking mac_vnic_tx() while
1765 	 * the VNIC has already been removed. So we cannot assume
1766 	 * that mi_vnic_present will always be true when mac_vnic_tx()
1767 	 * is invoked.
1768 	 */
1769 	rw_enter(&mip->mi_tx_lock, RW_READER);
1770 	if (!mip->mi_vnic_present) {
1771 		rw_exit(&mip->mi_tx_lock);
1772 		freemsgchain(mp);
1773 		return (NULL);
1774 	}
1775 
1776 	ASSERT(mip->mi_vnic_tx != NULL);
1777 	mvt = mip->mi_vnic_tx;
1778 	MAC_VNIC_TXINFO_REFHOLD(mvt);
1779 	rw_exit(&mip->mi_tx_lock);
1780 
1781 	mtfp = &mvt->mv_txinfo;
1782 	mtfp->mt_fn(mtfp->mt_arg, mp);
1783 
1784 	MAC_VNIC_TXINFO_REFRELE(mvt);
1785 	return (NULL);
1786 }
1787 
1788 /*
1789  * Transmit function -- ONLY used when there are registered loopback listeners.
1790  */
1791 mblk_t *
1792 mac_do_txloop(void *arg, mblk_t *bp, boolean_t call_vnic)
1793 {
1794 	mac_impl_t	*mip = arg;
1795 	mac_txloop_fn_t	*mtfp;
1796 	mblk_t		*loop_bp, *resid_bp, *next_bp;
1797 
1798 	if (call_vnic) {
1799 		/*
1800 		 * In promiscous mode, a copy of the sent packet will
1801 		 * be sent to the client's promiscous receive entry
1802 		 * points via mac_vnic_tx()->
1803 		 * mac_active_rx_promisc()->mac_rx_default().
1804 		 */
1805 		return (mac_vnic_tx(arg, bp));
1806 	}
1807 
1808 	while (bp != NULL) {
1809 		next_bp = bp->b_next;
1810 		bp->b_next = NULL;
1811 
1812 		if ((loop_bp = copymsg(bp)) == NULL)
1813 			goto noresources;
1814 
1815 		if ((resid_bp = mip->mi_tx(mip->mi_driver, bp)) != NULL) {
1816 			ASSERT(resid_bp == bp);
1817 			freemsg(loop_bp);
1818 			goto noresources;
1819 		}
1820 
1821 		rw_enter(&mip->mi_tx_lock, RW_READER);
1822 		mtfp = mip->mi_mtfp;
1823 		while (mtfp != NULL && loop_bp != NULL) {
1824 			bp = loop_bp;
1825 
1826 			/* XXX counter bump if copymsg() fails? */
1827 			if (mtfp->mtf_nextp != NULL)
1828 				loop_bp = copymsg(bp);
1829 			else
1830 				loop_bp = NULL;
1831 
1832 			mtfp->mtf_fn(mtfp->mtf_arg, bp);
1833 			mtfp = mtfp->mtf_nextp;
1834 		}
1835 		rw_exit(&mip->mi_tx_lock);
1836 
1837 		/*
1838 		 * It's possible we've raced with the disabling of promiscuous
1839 		 * mode, in which case we can discard our copy.
1840 		 */
1841 		if (loop_bp != NULL)
1842 			freemsg(loop_bp);
1843 
1844 		bp = next_bp;
1845 	}
1846 
1847 	return (NULL);
1848 
1849 noresources:
1850 	bp->b_next = next_bp;
1851 	return (bp);
1852 }
1853 
1854 mblk_t *
1855 mac_txloop(void *arg, mblk_t *bp)
1856 {
1857 	return (mac_do_txloop(arg, bp, B_FALSE));
1858 }
1859 
1860 static mblk_t *
1861 mac_vnic_txloop(void *arg, mblk_t *bp)
1862 {
1863 	return (mac_do_txloop(arg, bp, B_TRUE));
1864 }
1865 
1866 void
1867 mac_link_update(mac_handle_t mh, link_state_t link)
1868 {
1869 	mac_impl_t	*mip = (mac_impl_t *)mh;
1870 
1871 	/*
1872 	 * Save the link state.
1873 	 */
1874 	mip->mi_linkstate = link;
1875 
1876 	/*
1877 	 * Send a MAC_NOTE_LINK notification.
1878 	 */
1879 	i_mac_notify(mip, MAC_NOTE_LINK);
1880 }
1881 
1882 void
1883 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
1884 {
1885 	mac_impl_t	*mip = (mac_impl_t *)mh;
1886 
1887 	if (mip->mi_type->mt_addr_length == 0)
1888 		return;
1889 
1890 	/*
1891 	 * Save the address.
1892 	 */
1893 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
1894 
1895 	/*
1896 	 * Send a MAC_NOTE_UNICST notification.
1897 	 */
1898 	i_mac_notify(mip, MAC_NOTE_UNICST);
1899 }
1900 
1901 void
1902 mac_tx_update(mac_handle_t mh)
1903 {
1904 	/*
1905 	 * Send a MAC_NOTE_TX notification.
1906 	 */
1907 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_TX);
1908 }
1909 
1910 void
1911 mac_resource_update(mac_handle_t mh)
1912 {
1913 	/*
1914 	 * Send a MAC_NOTE_RESOURCE notification.
1915 	 */
1916 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_RESOURCE);
1917 }
1918 
1919 mac_resource_handle_t
1920 mac_resource_add(mac_handle_t mh, mac_resource_t *mrp)
1921 {
1922 	mac_impl_t		*mip = (mac_impl_t *)mh;
1923 	mac_resource_handle_t	mrh;
1924 	mac_resource_add_t	add;
1925 	void			*arg;
1926 
1927 	rw_enter(&mip->mi_resource_lock, RW_READER);
1928 	add = mip->mi_resource_add;
1929 	arg = mip->mi_resource_add_arg;
1930 
1931 	if (add != NULL)
1932 		mrh = add(arg, mrp);
1933 	else
1934 		mrh = NULL;
1935 	rw_exit(&mip->mi_resource_lock);
1936 
1937 	return (mrh);
1938 }
1939 
1940 int
1941 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
1942 {
1943 	mac_impl_t	*mip = (mac_impl_t *)mh;
1944 
1945 	/*
1946 	 * Verify that the plugin supports MAC plugin data and that the
1947 	 * supplied data is valid.
1948 	 */
1949 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1950 		return (EINVAL);
1951 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
1952 		return (EINVAL);
1953 
1954 	if (mip->mi_pdata != NULL)
1955 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1956 
1957 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
1958 	bcopy(mac_pdata, mip->mi_pdata, dsize);
1959 	mip->mi_pdata_size = dsize;
1960 
1961 	/*
1962 	 * Since the MAC plugin data is used to construct MAC headers that
1963 	 * were cached in fast-path headers, we need to flush fast-path
1964 	 * information for links associated with this mac.
1965 	 */
1966 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
1967 	return (0);
1968 }
1969 
1970 void
1971 mac_multicst_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg,
1972     boolean_t add)
1973 {
1974 	mac_impl_t		*mip = (mac_impl_t *)mh;
1975 	mac_multicst_addr_t	*p;
1976 
1977 	/*
1978 	 * If no specific refresh function was given then default to the
1979 	 * driver's m_multicst entry point.
1980 	 */
1981 	if (refresh == NULL) {
1982 		refresh = mip->mi_multicst;
1983 		arg = mip->mi_driver;
1984 	}
1985 	ASSERT(refresh != NULL);
1986 
1987 	/*
1988 	 * Walk the multicast address list and call the refresh function for
1989 	 * each address.
1990 	 */
1991 	rw_enter(&(mip->mi_data_lock), RW_READER);
1992 	for (p = mip->mi_mmap; p != NULL; p = p->mma_nextp)
1993 		refresh(arg, add, p->mma_addr);
1994 	rw_exit(&(mip->mi_data_lock));
1995 }
1996 
1997 void
1998 mac_unicst_refresh(mac_handle_t mh, mac_unicst_t refresh, void *arg)
1999 {
2000 	mac_impl_t	*mip = (mac_impl_t *)mh;
2001 	/*
2002 	 * If no specific refresh function was given then default to the
2003 	 * driver's mi_unicst entry point.
2004 	 */
2005 	if (refresh == NULL) {
2006 		refresh = mip->mi_unicst;
2007 		arg = mip->mi_driver;
2008 	}
2009 	ASSERT(refresh != NULL);
2010 
2011 	/*
2012 	 * Call the refresh function with the current unicast address.
2013 	 */
2014 	refresh(arg, mip->mi_addr);
2015 }
2016 
2017 void
2018 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg)
2019 {
2020 	mac_impl_t	*mip = (mac_impl_t *)mh;
2021 
2022 	/*
2023 	 * If no specific refresh function was given then default to the
2024 	 * driver's m_promisc entry point.
2025 	 */
2026 	if (refresh == NULL) {
2027 		refresh = mip->mi_setpromisc;
2028 		arg = mip->mi_driver;
2029 	}
2030 	ASSERT(refresh != NULL);
2031 
2032 	/*
2033 	 * Call the refresh function with the current promiscuity.
2034 	 */
2035 	refresh(arg, (mip->mi_devpromisc != 0));
2036 }
2037 
2038 boolean_t
2039 mac_do_active_set(mac_handle_t mh, boolean_t shareable)
2040 {
2041 	mac_impl_t *mip = (mac_impl_t *)mh;
2042 
2043 	mutex_enter(&mip->mi_activelink_lock);
2044 	if (mip->mi_activelink) {
2045 		mutex_exit(&mip->mi_activelink_lock);
2046 		return (B_FALSE);
2047 	}
2048 	mip->mi_activelink = B_TRUE;
2049 	mip->mi_shareable = shareable;
2050 	mutex_exit(&mip->mi_activelink_lock);
2051 	return (B_TRUE);
2052 }
2053 
2054 /*
2055  * Called by MAC clients. By default, active MAC clients cannot
2056  * share the NIC with VNICs.
2057  */
2058 boolean_t
2059 mac_active_set(mac_handle_t mh)
2060 {
2061 	return (mac_do_active_set(mh, B_FALSE));
2062 }
2063 
2064 /*
2065  * Called by MAC clients which can share the NIC with VNICS, e.g. DLS.
2066  */
2067 boolean_t
2068 mac_active_shareable_set(mac_handle_t mh)
2069 {
2070 	return (mac_do_active_set(mh, B_TRUE));
2071 }
2072 
2073 void
2074 mac_active_clear(mac_handle_t mh)
2075 {
2076 	mac_impl_t *mip = (mac_impl_t *)mh;
2077 
2078 	mutex_enter(&mip->mi_activelink_lock);
2079 	ASSERT(mip->mi_activelink);
2080 	mip->mi_activelink = B_FALSE;
2081 	mutex_exit(&mip->mi_activelink_lock);
2082 }
2083 
2084 boolean_t
2085 mac_vnic_set(mac_handle_t mh, mac_txinfo_t *tx_info, mac_getcapab_t getcapab_fn,
2086     void *getcapab_arg)
2087 {
2088 	mac_impl_t	*mip = (mac_impl_t *)mh;
2089 	mac_vnic_tx_t	*vnic_tx;
2090 
2091 	mutex_enter(&mip->mi_activelink_lock);
2092 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2093 	ASSERT(!mip->mi_vnic_present);
2094 
2095 	if (mip->mi_activelink && !mip->mi_shareable) {
2096 		/*
2097 		 * The NIC is already used by an active client which cannot
2098 		 * share it with VNICs.
2099 		 */
2100 		rw_exit(&mip->mi_tx_lock);
2101 		mutex_exit(&mip->mi_activelink_lock);
2102 		return (B_FALSE);
2103 	}
2104 
2105 	vnic_tx = kmem_cache_alloc(mac_vnic_tx_cache, KM_SLEEP);
2106 	vnic_tx->mv_refs = 0;
2107 	vnic_tx->mv_txinfo = *tx_info;
2108 	vnic_tx->mv_clearing = B_FALSE;
2109 
2110 	mip->mi_vnic_present = B_TRUE;
2111 	mip->mi_vnic_tx = vnic_tx;
2112 	mip->mi_vnic_getcapab_fn = getcapab_fn;
2113 	mip->mi_vnic_getcapab_arg = getcapab_arg;
2114 	rw_exit(&mip->mi_tx_lock);
2115 	mutex_exit(&mip->mi_activelink_lock);
2116 
2117 	i_mac_notify(mip, MAC_NOTE_VNIC);
2118 	return (B_TRUE);
2119 }
2120 
2121 void
2122 mac_vnic_clear(mac_handle_t mh)
2123 {
2124 	mac_impl_t *mip = (mac_impl_t *)mh;
2125 	mac_vnic_tx_t	*vnic_tx;
2126 
2127 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2128 	ASSERT(mip->mi_vnic_present);
2129 	mip->mi_vnic_present = B_FALSE;
2130 	/*
2131 	 * Setting mi_vnic_tx to NULL here under the lock guarantees
2132 	 * that no new references to the current VNIC transmit structure
2133 	 * will be taken by mac_vnic_tx(). This is a necessary condition
2134 	 * for safely waiting for the reference count to drop to
2135 	 * zero below.
2136 	 */
2137 	vnic_tx = mip->mi_vnic_tx;
2138 	mip->mi_vnic_tx = NULL;
2139 	mip->mi_vnic_getcapab_fn = NULL;
2140 	mip->mi_vnic_getcapab_arg = NULL;
2141 	rw_exit(&mip->mi_tx_lock);
2142 
2143 	i_mac_notify(mip, MAC_NOTE_VNIC);
2144 
2145 	/*
2146 	 * Wait for all TX calls referencing the VNIC transmit
2147 	 * entry point that was removed to complete.
2148 	 */
2149 	mutex_enter(&vnic_tx->mv_lock);
2150 	vnic_tx->mv_clearing = B_TRUE;
2151 	while (vnic_tx->mv_refs > 0)
2152 		cv_wait(&vnic_tx->mv_cv, &vnic_tx->mv_lock);
2153 	mutex_exit(&vnic_tx->mv_lock);
2154 	kmem_cache_free(mac_vnic_tx_cache, vnic_tx);
2155 }
2156 
2157 /*
2158  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
2159  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
2160  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
2161  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
2162  * cannot disappear while we are accessing it.
2163  */
2164 typedef struct i_mac_info_state_s {
2165 	const char	*mi_name;
2166 	mac_info_t	*mi_infop;
2167 } i_mac_info_state_t;
2168 
2169 /*ARGSUSED*/
2170 static uint_t
2171 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2172 {
2173 	i_mac_info_state_t	*statep = arg;
2174 	mac_impl_t		*mip = (mac_impl_t *)val;
2175 
2176 	if (mip->mi_disabled)
2177 		return (MH_WALK_CONTINUE);
2178 
2179 	if (strcmp(statep->mi_name,
2180 	    ddi_driver_name(mip->mi_dip)) != 0)
2181 		return (MH_WALK_CONTINUE);
2182 
2183 	statep->mi_infop = &mip->mi_info;
2184 	return (MH_WALK_TERMINATE);
2185 }
2186 
2187 boolean_t
2188 mac_info_get(const char *name, mac_info_t *minfop)
2189 {
2190 	i_mac_info_state_t	state;
2191 
2192 	rw_enter(&i_mac_impl_lock, RW_READER);
2193 	state.mi_name = name;
2194 	state.mi_infop = NULL;
2195 	mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
2196 	if (state.mi_infop == NULL) {
2197 		rw_exit(&i_mac_impl_lock);
2198 		return (B_FALSE);
2199 	}
2200 	*minfop = *state.mi_infop;
2201 	rw_exit(&i_mac_impl_lock);
2202 	return (B_TRUE);
2203 }
2204 
2205 boolean_t
2206 mac_do_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data,
2207     boolean_t is_vnic)
2208 {
2209 	mac_impl_t *mip = (mac_impl_t *)mh;
2210 
2211 	if (!is_vnic) {
2212 		rw_enter(&mip->mi_tx_lock, RW_READER);
2213 		if (mip->mi_vnic_present) {
2214 			boolean_t rv;
2215 
2216 			rv = mip->mi_vnic_getcapab_fn(mip->mi_vnic_getcapab_arg,
2217 			    cap, cap_data);
2218 			rw_exit(&mip->mi_tx_lock);
2219 			return (rv);
2220 		}
2221 		rw_exit(&mip->mi_tx_lock);
2222 	}
2223 
2224 	if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
2225 		return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
2226 	else
2227 		return (B_FALSE);
2228 }
2229 
2230 boolean_t
2231 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2232 {
2233 	return (mac_do_capab_get(mh, cap, cap_data, B_FALSE));
2234 }
2235 
2236 boolean_t
2237 mac_vnic_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2238 {
2239 	return (mac_do_capab_get(mh, cap, cap_data, B_TRUE));
2240 }
2241 
2242 boolean_t
2243 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
2244 {
2245 	mac_impl_t	*mip = (mac_impl_t *)mh;
2246 	return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
2247 	    mip->mi_pdata));
2248 }
2249 
2250 mblk_t *
2251 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
2252     size_t extra_len)
2253 {
2254 	mac_impl_t	*mip = (mac_impl_t *)mh;
2255 	return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, daddr, sap,
2256 	    mip->mi_pdata, payload, extra_len));
2257 }
2258 
2259 int
2260 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
2261 {
2262 	mac_impl_t	*mip = (mac_impl_t *)mh;
2263 	return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
2264 	    mhip));
2265 }
2266 
2267 mblk_t *
2268 mac_header_cook(mac_handle_t mh, mblk_t *mp)
2269 {
2270 	mac_impl_t	*mip = (mac_impl_t *)mh;
2271 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
2272 		if (DB_REF(mp) > 1) {
2273 			mblk_t *newmp = copymsg(mp);
2274 			if (newmp == NULL)
2275 				return (NULL);
2276 			freemsg(mp);
2277 			mp = newmp;
2278 		}
2279 		return (mip->mi_type->mt_ops.mtops_header_cook(mp,
2280 		    mip->mi_pdata));
2281 	}
2282 	return (mp);
2283 }
2284 
2285 mblk_t *
2286 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
2287 {
2288 	mac_impl_t	*mip = (mac_impl_t *)mh;
2289 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
2290 		if (DB_REF(mp) > 1) {
2291 			mblk_t *newmp = copymsg(mp);
2292 			if (newmp == NULL)
2293 				return (NULL);
2294 			freemsg(mp);
2295 			mp = newmp;
2296 		}
2297 		return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
2298 		    mip->mi_pdata));
2299 	}
2300 	return (mp);
2301 }
2302 
2303 void
2304 mac_init_ops(struct dev_ops *ops, const char *name)
2305 {
2306 	dld_init_ops(ops, name);
2307 }
2308 
2309 void
2310 mac_fini_ops(struct dev_ops *ops)
2311 {
2312 	dld_fini_ops(ops);
2313 }
2314 
2315 /*
2316  * MAC Type Plugin functions.
2317  */
2318 
2319 mactype_register_t *
2320 mactype_alloc(uint_t mactype_version)
2321 {
2322 	mactype_register_t *mtrp;
2323 
2324 	/*
2325 	 * Make sure there isn't a version mismatch between the plugin and
2326 	 * the framework.  In the future, if multiple versions are
2327 	 * supported, this check could become more sophisticated.
2328 	 */
2329 	if (mactype_version != MACTYPE_VERSION)
2330 		return (NULL);
2331 
2332 	mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP);
2333 	mtrp->mtr_version = mactype_version;
2334 	return (mtrp);
2335 }
2336 
2337 void
2338 mactype_free(mactype_register_t *mtrp)
2339 {
2340 	kmem_free(mtrp, sizeof (mactype_register_t));
2341 }
2342 
2343 int
2344 mactype_register(mactype_register_t *mtrp)
2345 {
2346 	mactype_t	*mtp;
2347 	mactype_ops_t	*ops = mtrp->mtr_ops;
2348 
2349 	/* Do some sanity checking before we register this MAC type. */
2350 	if (mtrp->mtr_ident == NULL || ops == NULL || mtrp->mtr_addrlen == 0)
2351 		return (EINVAL);
2352 
2353 	/*
2354 	 * Verify that all mandatory callbacks are set in the ops
2355 	 * vector.
2356 	 */
2357 	if (ops->mtops_unicst_verify == NULL ||
2358 	    ops->mtops_multicst_verify == NULL ||
2359 	    ops->mtops_sap_verify == NULL ||
2360 	    ops->mtops_header == NULL ||
2361 	    ops->mtops_header_info == NULL) {
2362 		return (EINVAL);
2363 	}
2364 
2365 	mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP);
2366 	mtp->mt_ident = mtrp->mtr_ident;
2367 	mtp->mt_ops = *ops;
2368 	mtp->mt_type = mtrp->mtr_mactype;
2369 	mtp->mt_nativetype = mtrp->mtr_nativetype;
2370 	mtp->mt_addr_length = mtrp->mtr_addrlen;
2371 	if (mtrp->mtr_brdcst_addr != NULL) {
2372 		mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP);
2373 		bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr,
2374 		    mtrp->mtr_addrlen);
2375 	}
2376 
2377 	mtp->mt_stats = mtrp->mtr_stats;
2378 	mtp->mt_statcount = mtrp->mtr_statcount;
2379 
2380 	if (mod_hash_insert(i_mactype_hash,
2381 	    (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) {
2382 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2383 		kmem_free(mtp, sizeof (*mtp));
2384 		return (EEXIST);
2385 	}
2386 	return (0);
2387 }
2388 
2389 int
2390 mactype_unregister(const char *ident)
2391 {
2392 	mactype_t	*mtp;
2393 	mod_hash_val_t	val;
2394 	int 		err;
2395 
2396 	/*
2397 	 * Let's not allow MAC drivers to use this plugin while we're
2398 	 * trying to unregister it.  Holding i_mactype_lock also prevents a
2399 	 * plugin from unregistering while a MAC driver is attempting to
2400 	 * hold a reference to it in i_mactype_getplugin().
2401 	 */
2402 	mutex_enter(&i_mactype_lock);
2403 
2404 	if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident,
2405 	    (mod_hash_val_t *)&mtp)) != 0) {
2406 		/* A plugin is trying to unregister, but it never registered. */
2407 		err = ENXIO;
2408 		goto done;
2409 	}
2410 
2411 	if (mtp->mt_ref != 0) {
2412 		err = EBUSY;
2413 		goto done;
2414 	}
2415 
2416 	err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val);
2417 	ASSERT(err == 0);
2418 	if (err != 0) {
2419 		/* This should never happen, thus the ASSERT() above. */
2420 		err = EINVAL;
2421 		goto done;
2422 	}
2423 	ASSERT(mtp == (mactype_t *)val);
2424 
2425 	kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2426 	kmem_free(mtp, sizeof (mactype_t));
2427 done:
2428 	mutex_exit(&i_mactype_lock);
2429 	return (err);
2430 }
2431 
2432 int
2433 mac_vlan_create(mac_handle_t mh, const char *name, minor_t minor)
2434 {
2435 	mac_impl_t		*mip = (mac_impl_t *)mh;
2436 
2437 	/* Create a style-1 DLPI device */
2438 	if (ddi_create_minor_node(mip->mi_dip, (char *)name, S_IFCHR, minor,
2439 	    DDI_NT_NET, 0) != DDI_SUCCESS) {
2440 		return (-1);
2441 	}
2442 	return (0);
2443 }
2444 
2445 void
2446 mac_vlan_remove(mac_handle_t mh, const char *name)
2447 {
2448 	mac_impl_t		*mip = (mac_impl_t *)mh;
2449 	dev_info_t		*dipp;
2450 
2451 	ddi_remove_minor_node(mip->mi_dip, (char *)name);
2452 	dipp = ddi_get_parent(mip->mi_dip);
2453 	(void) devfs_clean(dipp, NULL, 0);
2454 }
2455