xref: /titanic_41/usr/src/uts/common/io/mac/mac.c (revision 5eb92cf2b27ec0d138dc9e8eedd5b6ecf4cfec0c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * MAC Services Module
31  */
32 
33 #include <sys/types.h>
34 #include <sys/conf.h>
35 #include <sys/stat.h>
36 #include <sys/stream.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/dlpi.h>
40 #include <sys/modhash.h>
41 #include <sys/mac.h>
42 #include <sys/mac_impl.h>
43 #include <sys/dls.h>
44 #include <sys/dld.h>
45 #include <sys/modctl.h>
46 #include <sys/fs/dv_node.h>
47 #include <sys/thread.h>
48 #include <sys/proc.h>
49 #include <sys/callb.h>
50 #include <sys/cpuvar.h>
51 #include <sys/atomic.h>
52 #include <sys/sdt.h>
53 
54 #define	IMPL_HASHSZ	67	/* prime */
55 
56 static kmem_cache_t	*i_mac_impl_cachep;
57 static mod_hash_t	*i_mac_impl_hash;
58 krwlock_t		i_mac_impl_lock;
59 uint_t			i_mac_impl_count;
60 static kmem_cache_t	*mac_vnic_tx_cache;
61 
62 #define	MACTYPE_KMODDIR	"mac"
63 #define	MACTYPE_HASHSZ	67
64 static mod_hash_t	*i_mactype_hash;
65 /*
66  * i_mactype_lock synchronizes threads that obtain references to mactype_t
67  * structures through i_mactype_getplugin().
68  */
69 static kmutex_t		i_mactype_lock;
70 
71 static void i_mac_notify_thread(void *);
72 static mblk_t *mac_vnic_tx(void *, mblk_t *);
73 static mblk_t *mac_vnic_txloop(void *, mblk_t *);
74 
75 /*
76  * Private functions.
77  */
78 
79 /*ARGSUSED*/
80 static int
81 i_mac_constructor(void *buf, void *arg, int kmflag)
82 {
83 	mac_impl_t	*mip = buf;
84 
85 	bzero(buf, sizeof (mac_impl_t));
86 
87 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
88 
89 	rw_init(&mip->mi_state_lock, NULL, RW_DRIVER, NULL);
90 	rw_init(&mip->mi_data_lock, NULL, RW_DRIVER, NULL);
91 	rw_init(&mip->mi_notify_lock, NULL, RW_DRIVER, NULL);
92 	rw_init(&mip->mi_rx_lock, NULL, RW_DRIVER, NULL);
93 	rw_init(&mip->mi_tx_lock, NULL, RW_DRIVER, NULL);
94 	rw_init(&mip->mi_resource_lock, NULL, RW_DRIVER, NULL);
95 	mutex_init(&mip->mi_activelink_lock, NULL, MUTEX_DEFAULT, NULL);
96 	mutex_init(&mip->mi_notify_bits_lock, NULL, MUTEX_DRIVER, NULL);
97 	cv_init(&mip->mi_notify_cv, NULL, CV_DRIVER, NULL);
98 	mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
99 	cv_init(&mip->mi_rx_cv, NULL, CV_DRIVER, NULL);
100 	return (0);
101 }
102 
103 /*ARGSUSED*/
104 static void
105 i_mac_destructor(void *buf, void *arg)
106 {
107 	mac_impl_t	*mip = buf;
108 
109 	ASSERT(mip->mi_ref == 0);
110 	ASSERT(mip->mi_active == 0);
111 	ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN);
112 	ASSERT(mip->mi_devpromisc == 0);
113 	ASSERT(mip->mi_promisc == 0);
114 	ASSERT(mip->mi_mmap == NULL);
115 	ASSERT(mip->mi_mnfp == NULL);
116 	ASSERT(mip->mi_resource_add == NULL);
117 	ASSERT(mip->mi_ksp == NULL);
118 	ASSERT(mip->mi_kstat_count == 0);
119 	ASSERT(mip->mi_notify_bits == 0);
120 	ASSERT(mip->mi_notify_thread == NULL);
121 
122 	rw_destroy(&mip->mi_state_lock);
123 	rw_destroy(&mip->mi_data_lock);
124 	rw_destroy(&mip->mi_notify_lock);
125 	rw_destroy(&mip->mi_rx_lock);
126 	rw_destroy(&mip->mi_tx_lock);
127 	rw_destroy(&mip->mi_resource_lock);
128 	mutex_destroy(&mip->mi_activelink_lock);
129 	mutex_destroy(&mip->mi_notify_bits_lock);
130 	cv_destroy(&mip->mi_notify_cv);
131 	mutex_destroy(&mip->mi_lock);
132 	cv_destroy(&mip->mi_rx_cv);
133 }
134 
135 /*
136  * mac_vnic_tx_t kmem cache support functions.
137  */
138 
139 /* ARGSUSED */
140 static int
141 i_mac_vnic_tx_ctor(void *buf, void *arg, int mkflag)
142 {
143 	mac_vnic_tx_t *vnic_tx = buf;
144 
145 	bzero(buf, sizeof (mac_vnic_tx_t));
146 	mutex_init(&vnic_tx->mv_lock, NULL, MUTEX_DRIVER, NULL);
147 	cv_init(&vnic_tx->mv_cv, NULL, CV_DRIVER, NULL);
148 	return (0);
149 }
150 
151 /* ARGSUSED */
152 static void
153 i_mac_vnic_tx_dtor(void *buf, void *arg)
154 {
155 	mac_vnic_tx_t *vnic_tx = buf;
156 
157 	ASSERT(vnic_tx->mv_refs == 0);
158 	mutex_destroy(&vnic_tx->mv_lock);
159 	cv_destroy(&vnic_tx->mv_cv);
160 }
161 
162 static void
163 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type)
164 {
165 	rw_enter(&i_mac_impl_lock, RW_READER);
166 	if (mip->mi_disabled)
167 		goto exit;
168 
169 	/*
170 	 * Guard against incorrect notifications.  (Running a newer
171 	 * mac client against an older implementation?)
172 	 */
173 	if (type >= MAC_NNOTE)
174 		goto exit;
175 
176 	mutex_enter(&mip->mi_notify_bits_lock);
177 	mip->mi_notify_bits |= (1 << type);
178 	cv_broadcast(&mip->mi_notify_cv);
179 	mutex_exit(&mip->mi_notify_bits_lock);
180 
181 exit:
182 	rw_exit(&i_mac_impl_lock);
183 }
184 
185 static void
186 i_mac_log_link_state(mac_impl_t *mip)
187 {
188 	/*
189 	 * If no change, then it is not interesting.
190 	 */
191 	if (mip->mi_lastlinkstate == mip->mi_linkstate)
192 		return;
193 
194 	switch (mip->mi_linkstate) {
195 	case LINK_STATE_UP:
196 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
197 			char det[200];
198 
199 			mip->mi_type->mt_ops.mtops_link_details(det,
200 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
201 
202 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
203 		} else {
204 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
205 		}
206 		break;
207 
208 	case LINK_STATE_DOWN:
209 		/*
210 		 * Only transitions from UP to DOWN are interesting
211 		 */
212 		if (mip->mi_lastlinkstate != LINK_STATE_UNKNOWN)
213 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
214 		break;
215 
216 	case LINK_STATE_UNKNOWN:
217 		/*
218 		 * This case is normally not interesting.
219 		 */
220 		break;
221 	}
222 	mip->mi_lastlinkstate = mip->mi_linkstate;
223 }
224 
225 static void
226 i_mac_notify_thread(void *arg)
227 {
228 	mac_impl_t	*mip = arg;
229 	callb_cpr_t	cprinfo;
230 
231 	CALLB_CPR_INIT(&cprinfo, &mip->mi_notify_bits_lock, callb_generic_cpr,
232 	    "i_mac_notify_thread");
233 
234 	mutex_enter(&mip->mi_notify_bits_lock);
235 	for (;;) {
236 		uint32_t	bits;
237 		uint32_t	type;
238 
239 		bits = mip->mi_notify_bits;
240 		if (bits == 0) {
241 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
242 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
243 			CALLB_CPR_SAFE_END(&cprinfo, &mip->mi_notify_bits_lock);
244 			continue;
245 		}
246 		mip->mi_notify_bits = 0;
247 
248 		if ((bits & (1 << MAC_NNOTE)) != 0) {
249 			/* request to quit */
250 			ASSERT(mip->mi_disabled);
251 			break;
252 		}
253 
254 		mutex_exit(&mip->mi_notify_bits_lock);
255 
256 		/*
257 		 * Log link changes.
258 		 */
259 		if ((bits & (1 << MAC_NOTE_LINK)) != 0)
260 			i_mac_log_link_state(mip);
261 
262 		/*
263 		 * Do notification callbacks for each notification type.
264 		 */
265 		for (type = 0; type < MAC_NNOTE; type++) {
266 			mac_notify_fn_t	*mnfp;
267 
268 			if ((bits & (1 << type)) == 0) {
269 				continue;
270 			}
271 
272 			/*
273 			 * Walk the list of notifications.
274 			 */
275 			rw_enter(&mip->mi_notify_lock, RW_READER);
276 			for (mnfp = mip->mi_mnfp; mnfp != NULL;
277 			    mnfp = mnfp->mnf_nextp) {
278 
279 				mnfp->mnf_fn(mnfp->mnf_arg, type);
280 			}
281 			rw_exit(&mip->mi_notify_lock);
282 		}
283 
284 		mutex_enter(&mip->mi_notify_bits_lock);
285 	}
286 
287 	mip->mi_notify_thread = NULL;
288 	cv_broadcast(&mip->mi_notify_cv);
289 
290 	CALLB_CPR_EXIT(&cprinfo);
291 
292 	thread_exit();
293 }
294 
295 static mactype_t *
296 i_mactype_getplugin(const char *pname)
297 {
298 	mactype_t	*mtype = NULL;
299 	boolean_t	tried_modload = B_FALSE;
300 
301 	mutex_enter(&i_mactype_lock);
302 
303 find_registered_mactype:
304 	if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname,
305 	    (mod_hash_val_t *)&mtype) != 0) {
306 		if (!tried_modload) {
307 			/*
308 			 * If the plugin has not yet been loaded, then
309 			 * attempt to load it now.  If modload() succeeds,
310 			 * the plugin should have registered using
311 			 * mactype_register(), in which case we can go back
312 			 * and attempt to find it again.
313 			 */
314 			if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) {
315 				tried_modload = B_TRUE;
316 				goto find_registered_mactype;
317 			}
318 		}
319 	} else {
320 		/*
321 		 * Note that there's no danger that the plugin we've loaded
322 		 * could be unloaded between the modload() step and the
323 		 * reference count bump here, as we're holding
324 		 * i_mactype_lock, which mactype_unregister() also holds.
325 		 */
326 		atomic_inc_32(&mtype->mt_ref);
327 	}
328 
329 	mutex_exit(&i_mactype_lock);
330 	return (mtype);
331 }
332 
333 /*
334  * Module initialization functions.
335  */
336 
337 void
338 mac_init(void)
339 {
340 	i_mac_impl_cachep = kmem_cache_create("mac_impl_cache",
341 	    sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor,
342 	    NULL, NULL, NULL, 0);
343 	ASSERT(i_mac_impl_cachep != NULL);
344 
345 	mac_vnic_tx_cache = kmem_cache_create("mac_vnic_tx_cache",
346 	    sizeof (mac_vnic_tx_t), 0, i_mac_vnic_tx_ctor, i_mac_vnic_tx_dtor,
347 	    NULL, NULL, NULL, 0);
348 	ASSERT(mac_vnic_tx_cache != NULL);
349 
350 	i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash",
351 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
352 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
353 	rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL);
354 	i_mac_impl_count = 0;
355 
356 	i_mactype_hash = mod_hash_create_extended("mactype_hash",
357 	    MACTYPE_HASHSZ,
358 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
359 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
360 }
361 
362 int
363 mac_fini(void)
364 {
365 	if (i_mac_impl_count > 0)
366 		return (EBUSY);
367 
368 	mod_hash_destroy_hash(i_mac_impl_hash);
369 	rw_destroy(&i_mac_impl_lock);
370 
371 	kmem_cache_destroy(i_mac_impl_cachep);
372 	kmem_cache_destroy(mac_vnic_tx_cache);
373 
374 	mod_hash_destroy_hash(i_mactype_hash);
375 	return (0);
376 }
377 
378 /*
379  * Client functions.
380  */
381 
382 int
383 mac_open(const char *macname, uint_t ddi_instance, mac_handle_t *mhp)
384 {
385 	char		driver[MAXNAMELEN];
386 	uint_t		instance;
387 	major_t		major;
388 	dev_info_t	*dip;
389 	mac_impl_t	*mip;
390 	int		err;
391 
392 	/*
393 	 * Check the device name length to make sure it won't overflow our
394 	 * buffer.
395 	 */
396 	if (strlen(macname) >= MAXNAMELEN)
397 		return (EINVAL);
398 
399 	/*
400 	 * Split the device name into driver and instance components.
401 	 */
402 	if (ddi_parse(macname, driver, &instance) != DDI_SUCCESS)
403 		return (EINVAL);
404 
405 	if ((strcmp(driver, "aggr") == 0) || (strcmp(driver, "vnic") == 0))
406 		ddi_instance = 0;
407 
408 	/*
409 	 * Get the major number of the driver.
410 	 */
411 	if ((major = ddi_name_to_major(driver)) == (major_t)-1)
412 		return (EINVAL);
413 
414 	/*
415 	 * Hold the given instance to prevent it from being detached.
416 	 * This will also attach the instance if it is not currently attached.
417 	 * Currently we ensure that mac_register() (called by the driver's
418 	 * attach entry point) and all code paths under it cannot possibly
419 	 * call mac_open() because this would lead to a recursive attach
420 	 * panic.
421 	 */
422 	if ((dip = ddi_hold_devi_by_instance(major, ddi_instance, 0)) == NULL)
423 		return (EINVAL);
424 
425 	/*
426 	 * Look up its entry in the global hash table.
427 	 */
428 again:
429 	rw_enter(&i_mac_impl_lock, RW_WRITER);
430 	err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname,
431 	    (mod_hash_val_t *)&mip);
432 	if (err != 0) {
433 		err = ENOENT;
434 		goto failed;
435 	}
436 
437 	if (mip->mi_disabled) {
438 		rw_exit(&i_mac_impl_lock);
439 		goto again;
440 	}
441 
442 	mip->mi_ref++;
443 	rw_exit(&i_mac_impl_lock);
444 
445 	*mhp = (mac_handle_t)mip;
446 	return (0);
447 
448 failed:
449 	rw_exit(&i_mac_impl_lock);
450 	ddi_release_devi(dip);
451 	return (err);
452 }
453 
454 void
455 mac_close(mac_handle_t mh)
456 {
457 	mac_impl_t	*mip = (mac_impl_t *)mh;
458 	dev_info_t	*dip = mip->mi_dip;
459 
460 	rw_enter(&i_mac_impl_lock, RW_WRITER);
461 
462 	ASSERT(mip->mi_ref != 0);
463 	if (--mip->mi_ref == 0) {
464 		ASSERT(!mip->mi_activelink);
465 	}
466 	ddi_release_devi(dip);
467 	rw_exit(&i_mac_impl_lock);
468 }
469 
470 const mac_info_t *
471 mac_info(mac_handle_t mh)
472 {
473 	return (&((mac_impl_t *)mh)->mi_info);
474 }
475 
476 dev_info_t *
477 mac_devinfo_get(mac_handle_t mh)
478 {
479 	return (((mac_impl_t *)mh)->mi_dip);
480 }
481 
482 uint64_t
483 mac_stat_get(mac_handle_t mh, uint_t stat)
484 {
485 	mac_impl_t	*mip = (mac_impl_t *)mh;
486 	uint64_t	val;
487 	int		ret;
488 
489 	/*
490 	 * The range of stat determines where it is maintained.  Stat
491 	 * values from 0 up to (but not including) MAC_STAT_MIN are
492 	 * mainteined by the mac module itself.  Everything else is
493 	 * maintained by the driver.
494 	 */
495 	if (stat < MAC_STAT_MIN) {
496 		/* These stats are maintained by the mac module itself. */
497 		switch (stat) {
498 		case MAC_STAT_LINK_STATE:
499 			return (mip->mi_linkstate);
500 		case MAC_STAT_LINK_UP:
501 			return (mip->mi_linkstate == LINK_STATE_UP);
502 		case MAC_STAT_PROMISC:
503 			return (mip->mi_devpromisc != 0);
504 		default:
505 			ASSERT(B_FALSE);
506 		}
507 	}
508 
509 	/*
510 	 * Call the driver to get the given statistic.
511 	 */
512 	ret = mip->mi_getstat(mip->mi_driver, stat, &val);
513 	if (ret != 0) {
514 		/*
515 		 * The driver doesn't support this statistic.  Get the
516 		 * statistic's default value.
517 		 */
518 		val = mac_stat_default(mip, stat);
519 	}
520 	return (val);
521 }
522 
523 int
524 mac_start(mac_handle_t mh)
525 {
526 	mac_impl_t	*mip = (mac_impl_t *)mh;
527 	int		err;
528 
529 	ASSERT(mip->mi_start != NULL);
530 
531 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
532 
533 	/*
534 	 * Check whether the device is already started.
535 	 */
536 	if (mip->mi_active++ != 0) {
537 		/*
538 		 * It's already started so there's nothing more to do.
539 		 */
540 		err = 0;
541 		goto done;
542 	}
543 
544 	/*
545 	 * Start the device.
546 	 */
547 	if ((err = mip->mi_start(mip->mi_driver)) != 0)
548 		--mip->mi_active;
549 
550 done:
551 	rw_exit(&(mip->mi_state_lock));
552 	return (err);
553 }
554 
555 void
556 mac_stop(mac_handle_t mh)
557 {
558 	mac_impl_t	*mip = (mac_impl_t *)mh;
559 
560 	ASSERT(mip->mi_stop != NULL);
561 
562 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
563 
564 	/*
565 	 * Check whether the device is still needed.
566 	 */
567 	ASSERT(mip->mi_active != 0);
568 	if (--mip->mi_active != 0) {
569 		/*
570 		 * It's still needed so there's nothing more to do.
571 		 */
572 		goto done;
573 	}
574 
575 	/*
576 	 * Stop the device.
577 	 */
578 	mip->mi_stop(mip->mi_driver);
579 
580 done:
581 	rw_exit(&(mip->mi_state_lock));
582 }
583 
584 int
585 mac_multicst_add(mac_handle_t mh, const uint8_t *addr)
586 {
587 	mac_impl_t		*mip = (mac_impl_t *)mh;
588 	mac_multicst_addr_t	**pp;
589 	mac_multicst_addr_t	*p;
590 	int			err;
591 
592 	ASSERT(mip->mi_multicst != NULL);
593 
594 	/*
595 	 * Verify the address.
596 	 */
597 	if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
598 	    mip->mi_pdata)) != 0) {
599 		return (err);
600 	}
601 
602 	/*
603 	 * Check whether the given address is already enabled.
604 	 */
605 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
606 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
607 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
608 		    0) {
609 			/*
610 			 * The address is already enabled so just bump the
611 			 * reference count.
612 			 */
613 			p->mma_ref++;
614 			err = 0;
615 			goto done;
616 		}
617 	}
618 
619 	/*
620 	 * Allocate a new list entry.
621 	 */
622 	if ((p = kmem_zalloc(sizeof (mac_multicst_addr_t),
623 	    KM_NOSLEEP)) == NULL) {
624 		err = ENOMEM;
625 		goto done;
626 	}
627 
628 	/*
629 	 * Enable a new multicast address.
630 	 */
631 	if ((err = mip->mi_multicst(mip->mi_driver, B_TRUE, addr)) != 0) {
632 		kmem_free(p, sizeof (mac_multicst_addr_t));
633 		goto done;
634 	}
635 
636 	/*
637 	 * Add the address to the list of enabled addresses.
638 	 */
639 	bcopy(addr, p->mma_addr, mip->mi_type->mt_addr_length);
640 	p->mma_ref++;
641 	*pp = p;
642 
643 done:
644 	rw_exit(&(mip->mi_data_lock));
645 	return (err);
646 }
647 
648 int
649 mac_multicst_remove(mac_handle_t mh, const uint8_t *addr)
650 {
651 	mac_impl_t		*mip = (mac_impl_t *)mh;
652 	mac_multicst_addr_t	**pp;
653 	mac_multicst_addr_t	*p;
654 	int			err;
655 
656 	ASSERT(mip->mi_multicst != NULL);
657 
658 	/*
659 	 * Find the entry in the list for the given address.
660 	 */
661 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
662 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
663 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
664 		    0) {
665 			if (--p->mma_ref == 0)
666 				break;
667 
668 			/*
669 			 * There is still a reference to this address so
670 			 * there's nothing more to do.
671 			 */
672 			err = 0;
673 			goto done;
674 		}
675 	}
676 
677 	/*
678 	 * We did not find an entry for the given address so it is not
679 	 * currently enabled.
680 	 */
681 	if (p == NULL) {
682 		err = ENOENT;
683 		goto done;
684 	}
685 	ASSERT(p->mma_ref == 0);
686 
687 	/*
688 	 * Disable the multicast address.
689 	 */
690 	if ((err = mip->mi_multicst(mip->mi_driver, B_FALSE, addr)) != 0) {
691 		p->mma_ref++;
692 		goto done;
693 	}
694 
695 	/*
696 	 * Remove it from the list.
697 	 */
698 	*pp = p->mma_nextp;
699 	kmem_free(p, sizeof (mac_multicst_addr_t));
700 
701 done:
702 	rw_exit(&(mip->mi_data_lock));
703 	return (err);
704 }
705 
706 /*
707  * mac_unicst_verify: Verifies the passed address. It fails
708  * if the passed address is a group address or has incorrect length.
709  */
710 boolean_t
711 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
712 {
713 	mac_impl_t	*mip = (mac_impl_t *)mh;
714 
715 	/*
716 	 * Verify the address.
717 	 */
718 	if ((len != mip->mi_type->mt_addr_length) ||
719 	    (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
720 	    mip->mi_pdata)) != 0) {
721 		return (B_FALSE);
722 	} else {
723 		return (B_TRUE);
724 	}
725 }
726 
727 int
728 mac_unicst_set(mac_handle_t mh, const uint8_t *addr)
729 {
730 	mac_impl_t	*mip = (mac_impl_t *)mh;
731 	int		err;
732 	boolean_t	notify = B_FALSE;
733 
734 	ASSERT(mip->mi_unicst != NULL);
735 
736 	/*
737 	 * Verify the address.
738 	 */
739 	if ((err = mip->mi_type->mt_ops.mtops_unicst_verify(addr,
740 	    mip->mi_pdata)) != 0) {
741 		return (err);
742 	}
743 
744 	/*
745 	 * Program the new unicast address.
746 	 */
747 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
748 
749 	/*
750 	 * If address doesn't change, do nothing.
751 	 * This check is necessary otherwise it may call into mac_unicst_set
752 	 * recursively.
753 	 */
754 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
755 		err = 0;
756 		goto done;
757 	}
758 
759 	if ((err = mip->mi_unicst(mip->mi_driver, addr)) != 0)
760 		goto done;
761 
762 	/*
763 	 * Save the address and flag that we need to send a notification.
764 	 */
765 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
766 	notify = B_TRUE;
767 
768 done:
769 	rw_exit(&(mip->mi_data_lock));
770 
771 	if (notify)
772 		i_mac_notify(mip, MAC_NOTE_UNICST);
773 
774 	return (err);
775 }
776 
777 void
778 mac_unicst_get(mac_handle_t mh, uint8_t *addr)
779 {
780 	mac_impl_t	*mip = (mac_impl_t *)mh;
781 
782 	/*
783 	 * Copy out the current unicast source address.
784 	 */
785 	rw_enter(&(mip->mi_data_lock), RW_READER);
786 	bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
787 	rw_exit(&(mip->mi_data_lock));
788 }
789 
790 void
791 mac_dest_get(mac_handle_t mh, uint8_t *addr)
792 {
793 	mac_impl_t	*mip = (mac_impl_t *)mh;
794 
795 	/*
796 	 * Copy out the current destination address.
797 	 */
798 	rw_enter(&(mip->mi_data_lock), RW_READER);
799 	bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
800 	rw_exit(&(mip->mi_data_lock));
801 }
802 
803 int
804 mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype)
805 {
806 	mac_impl_t	*mip = (mac_impl_t *)mh;
807 	int		err = 0;
808 
809 	ASSERT(mip->mi_setpromisc != NULL);
810 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
811 
812 	/*
813 	 * Determine whether we should enable or disable promiscuous mode.
814 	 * For details on the distinction between "device promiscuous mode"
815 	 * and "MAC promiscuous mode", see PSARC/2005/289.
816 	 */
817 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
818 	if (on) {
819 		/*
820 		 * Enable promiscuous mode on the device if not yet enabled.
821 		 */
822 		if (mip->mi_devpromisc++ == 0) {
823 			err = mip->mi_setpromisc(mip->mi_driver, B_TRUE);
824 			if (err != 0) {
825 				mip->mi_devpromisc--;
826 				goto done;
827 			}
828 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
829 		}
830 
831 		/*
832 		 * Enable promiscuous mode on the MAC if not yet enabled.
833 		 */
834 		if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0)
835 			i_mac_notify(mip, MAC_NOTE_PROMISC);
836 	} else {
837 		if (mip->mi_devpromisc == 0) {
838 			err = EPROTO;
839 			goto done;
840 		}
841 
842 		/*
843 		 * Disable promiscuous mode on the device if this is the last
844 		 * enabling.
845 		 */
846 		if (--mip->mi_devpromisc == 0) {
847 			err = mip->mi_setpromisc(mip->mi_driver, B_FALSE);
848 			if (err != 0) {
849 				mip->mi_devpromisc++;
850 				goto done;
851 			}
852 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
853 		}
854 
855 		/*
856 		 * Disable promiscuous mode on the MAC if this is the last
857 		 * enabling.
858 		 */
859 		if (ptype == MAC_PROMISC && --mip->mi_promisc == 0)
860 			i_mac_notify(mip, MAC_NOTE_PROMISC);
861 	}
862 
863 done:
864 	rw_exit(&(mip->mi_data_lock));
865 	return (err);
866 }
867 
868 boolean_t
869 mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype)
870 {
871 	mac_impl_t		*mip = (mac_impl_t *)mh;
872 
873 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
874 
875 	/*
876 	 * Return the current promiscuity.
877 	 */
878 	if (ptype == MAC_DEVPROMISC)
879 		return (mip->mi_devpromisc != 0);
880 	else
881 		return (mip->mi_promisc != 0);
882 }
883 
884 void
885 mac_resources(mac_handle_t mh)
886 {
887 	mac_impl_t	*mip = (mac_impl_t *)mh;
888 
889 	/*
890 	 * If the driver supports resource registration, call the driver to
891 	 * ask it to register its resources.
892 	 */
893 	if (mip->mi_callbacks->mc_callbacks & MC_RESOURCES)
894 		mip->mi_resources(mip->mi_driver);
895 }
896 
897 void
898 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
899 {
900 	mac_impl_t	*mip = (mac_impl_t *)mh;
901 
902 	/*
903 	 * Call the driver to handle the ioctl.  The driver may not support
904 	 * any ioctls, in which case we reply with a NAK on its behalf.
905 	 */
906 	if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
907 		mip->mi_ioctl(mip->mi_driver, wq, bp);
908 	else
909 		miocnak(wq, bp, 0, EINVAL);
910 }
911 
912 const mac_txinfo_t *
913 mac_do_tx_get(mac_handle_t mh, boolean_t is_vnic)
914 {
915 	mac_impl_t	*mip = (mac_impl_t *)mh;
916 	mac_txinfo_t	*mtp;
917 
918 	/*
919 	 * Grab the lock to prevent us from racing with MAC_PROMISC being
920 	 * changed.  This is sufficient since MAC clients are careful to always
921 	 * call mac_txloop_add() prior to enabling MAC_PROMISC, and to disable
922 	 * MAC_PROMISC prior to calling mac_txloop_remove().
923 	 */
924 	rw_enter(&mip->mi_tx_lock, RW_READER);
925 
926 	if (mac_promisc_get(mh, MAC_PROMISC)) {
927 		ASSERT(mip->mi_mtfp != NULL);
928 		if (mip->mi_vnic_present && !is_vnic) {
929 			mtp = &mip->mi_vnic_txloopinfo;
930 		} else {
931 			mtp = &mip->mi_txloopinfo;
932 		}
933 	} else {
934 		if (mip->mi_vnic_present && !is_vnic) {
935 			mtp = &mip->mi_vnic_txinfo;
936 		} else {
937 			/*
938 			 * Note that we cannot ASSERT() that mip->mi_mtfp is
939 			 * NULL, because to satisfy the above ASSERT(), we
940 			 * have to disable MAC_PROMISC prior to calling
941 			 * mac_txloop_remove().
942 			 */
943 			mtp = &mip->mi_txinfo;
944 		}
945 	}
946 
947 	rw_exit(&mip->mi_tx_lock);
948 	return (mtp);
949 }
950 
951 /*
952  * Invoked by VNIC to obtain the transmit entry point.
953  */
954 const mac_txinfo_t *
955 mac_vnic_tx_get(mac_handle_t mh)
956 {
957 	return (mac_do_tx_get(mh, B_TRUE));
958 }
959 
960 /*
961  * Invoked by any non-VNIC client to obtain the transmit entry point.
962  * If a VNIC is present, the VNIC transmit function provided by the VNIC
963  * will be returned to the MAC client.
964  */
965 const mac_txinfo_t *
966 mac_tx_get(mac_handle_t mh)
967 {
968 	return (mac_do_tx_get(mh, B_FALSE));
969 }
970 
971 link_state_t
972 mac_link_get(mac_handle_t mh)
973 {
974 	return (((mac_impl_t *)mh)->mi_linkstate);
975 }
976 
977 mac_notify_handle_t
978 mac_notify_add(mac_handle_t mh, mac_notify_t notify, void *arg)
979 {
980 	mac_impl_t		*mip = (mac_impl_t *)mh;
981 	mac_notify_fn_t		*mnfp;
982 
983 	mnfp = kmem_zalloc(sizeof (mac_notify_fn_t), KM_SLEEP);
984 	mnfp->mnf_fn = notify;
985 	mnfp->mnf_arg = arg;
986 
987 	/*
988 	 * Add it to the head of the 'notify' callback list.
989 	 */
990 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
991 	mnfp->mnf_nextp = mip->mi_mnfp;
992 	mip->mi_mnfp = mnfp;
993 	rw_exit(&mip->mi_notify_lock);
994 
995 	return ((mac_notify_handle_t)mnfp);
996 }
997 
998 void
999 mac_notify_remove(mac_handle_t mh, mac_notify_handle_t mnh)
1000 {
1001 	mac_impl_t		*mip = (mac_impl_t *)mh;
1002 	mac_notify_fn_t		*mnfp = (mac_notify_fn_t *)mnh;
1003 	mac_notify_fn_t		**pp;
1004 	mac_notify_fn_t		*p;
1005 
1006 	/*
1007 	 * Search the 'notify' callback list for the function closure.
1008 	 */
1009 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
1010 	for (pp = &(mip->mi_mnfp); (p = *pp) != NULL;
1011 	    pp = &(p->mnf_nextp)) {
1012 		if (p == mnfp)
1013 			break;
1014 	}
1015 	ASSERT(p != NULL);
1016 
1017 	/*
1018 	 * Remove it from the list.
1019 	 */
1020 	*pp = p->mnf_nextp;
1021 	rw_exit(&mip->mi_notify_lock);
1022 
1023 	/*
1024 	 * Free it.
1025 	 */
1026 	kmem_free(mnfp, sizeof (mac_notify_fn_t));
1027 }
1028 
1029 void
1030 mac_notify(mac_handle_t mh)
1031 {
1032 	mac_impl_t		*mip = (mac_impl_t *)mh;
1033 	mac_notify_type_t	type;
1034 
1035 	for (type = 0; type < MAC_NNOTE; type++)
1036 		i_mac_notify(mip, type);
1037 }
1038 
1039 /*
1040  * Register a receive function for this mac.
1041  * More information on this function's interaction with mac_rx()
1042  * can be found atop mac_rx().
1043  */
1044 mac_rx_handle_t
1045 mac_do_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg, boolean_t is_active)
1046 {
1047 	mac_impl_t	*mip = (mac_impl_t *)mh;
1048 	mac_rx_fn_t	*mrfp;
1049 
1050 	mrfp = kmem_zalloc(sizeof (mac_rx_fn_t), KM_SLEEP);
1051 	mrfp->mrf_fn = rx;
1052 	mrfp->mrf_arg = arg;
1053 	mrfp->mrf_active = is_active;
1054 
1055 	/*
1056 	 * Add it to the head of the 'rx' callback list.
1057 	 */
1058 	rw_enter(&(mip->mi_rx_lock), RW_WRITER);
1059 
1060 	/*
1061 	 * mac_rx() will only call callbacks that are marked inuse.
1062 	 */
1063 	mrfp->mrf_inuse = B_TRUE;
1064 	mrfp->mrf_nextp = mip->mi_mrfp;
1065 
1066 	/*
1067 	 * mac_rx() could be traversing the remainder of the list
1068 	 * and miss the new callback we're adding here. This is not a problem
1069 	 * because we do not guarantee the callback to take effect immediately
1070 	 * after mac_rx_add() returns.
1071 	 */
1072 	mip->mi_mrfp = mrfp;
1073 	rw_exit(&(mip->mi_rx_lock));
1074 
1075 	return ((mac_rx_handle_t)mrfp);
1076 }
1077 
1078 mac_rx_handle_t
1079 mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1080 {
1081 	return (mac_do_rx_add(mh, rx, arg, B_FALSE));
1082 }
1083 
1084 mac_rx_handle_t
1085 mac_active_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1086 {
1087 	return (mac_do_rx_add(mh, rx, arg, B_TRUE));
1088 }
1089 
1090 /*
1091  * Unregister a receive function for this mac.
1092  * This function does not block if wait is B_FALSE. This is useful
1093  * for clients who call mac_rx_remove() from a non-blockable context.
1094  * More information on this function's interaction with mac_rx()
1095  * can be found atop mac_rx().
1096  */
1097 void
1098 mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh, boolean_t wait)
1099 {
1100 	mac_impl_t		*mip = (mac_impl_t *)mh;
1101 	mac_rx_fn_t		*mrfp = (mac_rx_fn_t *)mrh;
1102 	mac_rx_fn_t		**pp;
1103 	mac_rx_fn_t		*p;
1104 
1105 	/*
1106 	 * Search the 'rx' callback list for the function closure.
1107 	 */
1108 	rw_enter(&mip->mi_rx_lock, RW_WRITER);
1109 	for (pp = &(mip->mi_mrfp); (p = *pp) != NULL; pp = &(p->mrf_nextp)) {
1110 		if (p == mrfp)
1111 			break;
1112 	}
1113 	ASSERT(p != NULL);
1114 
1115 	/*
1116 	 * If mac_rx() is running, mark callback for deletion
1117 	 * and return (if wait is false), or wait until mac_rx()
1118 	 * exits (if wait is true).
1119 	 */
1120 	if (mip->mi_rx_ref > 0) {
1121 		DTRACE_PROBE1(defer_delete, mac_impl_t *, mip);
1122 		p->mrf_inuse = B_FALSE;
1123 		mutex_enter(&mip->mi_lock);
1124 		mip->mi_rx_removed++;
1125 		mutex_exit(&mip->mi_lock);
1126 
1127 		rw_exit(&mip->mi_rx_lock);
1128 		if (wait)
1129 			mac_rx_remove_wait(mh);
1130 		return;
1131 	}
1132 
1133 	/* Remove it from the list. */
1134 	*pp = p->mrf_nextp;
1135 	kmem_free(mrfp, sizeof (mac_rx_fn_t));
1136 	rw_exit(&mip->mi_rx_lock);
1137 }
1138 
1139 /*
1140  * Wait for all pending callback removals to be completed by mac_rx().
1141  * Note that if we call mac_rx_remove() immediately before this, there is no
1142  * guarantee we would wait *only* on the callback that we specified.
1143  * mac_rx_remove() could have been called by other threads and we would have
1144  * to wait for other marked callbacks to be removed as well.
1145  */
1146 void
1147 mac_rx_remove_wait(mac_handle_t mh)
1148 {
1149 	mac_impl_t	*mip = (mac_impl_t *)mh;
1150 
1151 	mutex_enter(&mip->mi_lock);
1152 	while (mip->mi_rx_removed > 0) {
1153 		DTRACE_PROBE1(need_wait, mac_impl_t *, mip);
1154 		cv_wait(&mip->mi_rx_cv, &mip->mi_lock);
1155 	}
1156 	mutex_exit(&mip->mi_lock);
1157 }
1158 
1159 mac_txloop_handle_t
1160 mac_txloop_add(mac_handle_t mh, mac_txloop_t tx, void *arg)
1161 {
1162 	mac_impl_t	*mip = (mac_impl_t *)mh;
1163 	mac_txloop_fn_t	*mtfp;
1164 
1165 	mtfp = kmem_zalloc(sizeof (mac_txloop_fn_t), KM_SLEEP);
1166 	mtfp->mtf_fn = tx;
1167 	mtfp->mtf_arg = arg;
1168 
1169 	/*
1170 	 * Add it to the head of the 'tx' callback list.
1171 	 */
1172 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1173 	mtfp->mtf_nextp = mip->mi_mtfp;
1174 	mip->mi_mtfp = mtfp;
1175 	rw_exit(&(mip->mi_tx_lock));
1176 
1177 	return ((mac_txloop_handle_t)mtfp);
1178 }
1179 
1180 /*
1181  * Unregister a transmit function for this mac.  This removes the function
1182  * from the list of transmit functions for this mac.
1183  */
1184 void
1185 mac_txloop_remove(mac_handle_t mh, mac_txloop_handle_t mth)
1186 {
1187 	mac_impl_t		*mip = (mac_impl_t *)mh;
1188 	mac_txloop_fn_t		*mtfp = (mac_txloop_fn_t *)mth;
1189 	mac_txloop_fn_t		**pp;
1190 	mac_txloop_fn_t		*p;
1191 
1192 	/*
1193 	 * Search the 'tx' callback list for the function.
1194 	 */
1195 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1196 	for (pp = &(mip->mi_mtfp); (p = *pp) != NULL; pp = &(p->mtf_nextp)) {
1197 		if (p == mtfp)
1198 			break;
1199 	}
1200 	ASSERT(p != NULL);
1201 
1202 	/* Remove it from the list. */
1203 	*pp = p->mtf_nextp;
1204 	kmem_free(mtfp, sizeof (mac_txloop_fn_t));
1205 	rw_exit(&(mip->mi_tx_lock));
1206 }
1207 
1208 void
1209 mac_resource_set(mac_handle_t mh, mac_resource_add_t add, void *arg)
1210 {
1211 	mac_impl_t		*mip = (mac_impl_t *)mh;
1212 
1213 	/*
1214 	 * Update the 'resource_add' callbacks.
1215 	 */
1216 	rw_enter(&(mip->mi_resource_lock), RW_WRITER);
1217 	mip->mi_resource_add = add;
1218 	mip->mi_resource_add_arg = arg;
1219 	rw_exit(&(mip->mi_resource_lock));
1220 }
1221 
1222 /*
1223  * Driver support functions.
1224  */
1225 
1226 mac_register_t *
1227 mac_alloc(uint_t mac_version)
1228 {
1229 	mac_register_t *mregp;
1230 
1231 	/*
1232 	 * Make sure there isn't a version mismatch between the driver and
1233 	 * the framework.  In the future, if multiple versions are
1234 	 * supported, this check could become more sophisticated.
1235 	 */
1236 	if (mac_version != MAC_VERSION)
1237 		return (NULL);
1238 
1239 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
1240 	mregp->m_version = mac_version;
1241 	return (mregp);
1242 }
1243 
1244 void
1245 mac_free(mac_register_t *mregp)
1246 {
1247 	kmem_free(mregp, sizeof (mac_register_t));
1248 }
1249 
1250 /*
1251  * mac_register() is how drivers register new MACs with the GLDv3
1252  * framework.  The mregp argument is allocated by drivers using the
1253  * mac_alloc() function, and can be freed using mac_free() immediately upon
1254  * return from mac_register().  Upon success (0 return value), the mhp
1255  * opaque pointer becomes the driver's handle to its MAC interface, and is
1256  * the argument to all other mac module entry points.
1257  */
1258 int
1259 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
1260 {
1261 	mac_impl_t	*mip;
1262 	mactype_t	*mtype;
1263 	int		err = EINVAL;
1264 	struct devnames *dnp;
1265 	minor_t		minor;
1266 	boolean_t	style1_created = B_FALSE, style2_created = B_FALSE;
1267 
1268 	/* Find the required MAC-Type plugin. */
1269 	if ((mtype = i_mactype_getplugin(mregp->m_type_ident)) == NULL)
1270 		return (EINVAL);
1271 
1272 	/* Create a mac_impl_t to represent this MAC. */
1273 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
1274 
1275 	/*
1276 	 * The mac is not ready for open yet.
1277 	 */
1278 	mip->mi_disabled = B_TRUE;
1279 
1280 	mip->mi_drvname = ddi_driver_name(mregp->m_dip);
1281 	/*
1282 	 * Some drivers such as aggr need to register multiple MACs.  Such
1283 	 * drivers must supply a non-zero "instance" argument so that each
1284 	 * MAC can be assigned a unique MAC name and can have unique
1285 	 * kstats.
1286 	 */
1287 	mip->mi_instance = ((mregp->m_instance == 0) ?
1288 	    ddi_get_instance(mregp->m_dip) : mregp->m_instance);
1289 
1290 	/* Construct the MAC name as <drvname><instance> */
1291 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
1292 	    mip->mi_drvname, mip->mi_instance);
1293 
1294 	mip->mi_driver = mregp->m_driver;
1295 
1296 	mip->mi_type = mtype;
1297 	mip->mi_info.mi_media = mtype->mt_type;
1298 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
1299 	mip->mi_info.mi_sdu_min = mregp->m_min_sdu;
1300 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
1301 		goto fail;
1302 	mip->mi_info.mi_sdu_max = mregp->m_max_sdu;
1303 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
1304 	/*
1305 	 * If the media supports a broadcast address, cache a pointer to it
1306 	 * in the mac_info_t so that upper layers can use it.
1307 	 */
1308 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
1309 
1310 	/*
1311 	 * Copy the unicast source address into the mac_info_t, but only if
1312 	 * the MAC-Type defines a non-zero address length.  We need to
1313 	 * handle MAC-Types that have an address length of 0
1314 	 * (point-to-point protocol MACs for example).
1315 	 */
1316 	if (mip->mi_type->mt_addr_length > 0) {
1317 		if (mregp->m_src_addr == NULL)
1318 			goto fail;
1319 		mip->mi_info.mi_unicst_addr =
1320 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
1321 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
1322 		    mip->mi_type->mt_addr_length);
1323 
1324 		/*
1325 		 * Copy the fixed 'factory' MAC address from the immutable
1326 		 * info.  This is taken to be the MAC address currently in
1327 		 * use.
1328 		 */
1329 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
1330 		    mip->mi_type->mt_addr_length);
1331 		/* Copy the destination address if one is provided. */
1332 		if (mregp->m_dst_addr != NULL) {
1333 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
1334 			    mip->mi_type->mt_addr_length);
1335 		}
1336 	} else if (mregp->m_src_addr != NULL) {
1337 		goto fail;
1338 	}
1339 
1340 	/*
1341 	 * The format of the m_pdata is specific to the plugin.  It is
1342 	 * passed in as an argument to all of the plugin callbacks.  The
1343 	 * driver can update this information by calling
1344 	 * mac_pdata_update().
1345 	 */
1346 	if (mregp->m_pdata != NULL) {
1347 		/*
1348 		 * Verify that the plugin supports MAC plugin data and that
1349 		 * the supplied data is valid.
1350 		 */
1351 		if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1352 			goto fail;
1353 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
1354 		    mregp->m_pdata_size)) {
1355 			goto fail;
1356 		}
1357 		mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
1358 		bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size);
1359 		mip->mi_pdata_size = mregp->m_pdata_size;
1360 	}
1361 
1362 	/*
1363 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
1364 	 * check to make sure all mandatory callbacks are set.
1365 	 */
1366 	if (mregp->m_callbacks->mc_getstat == NULL ||
1367 	    mregp->m_callbacks->mc_start == NULL ||
1368 	    mregp->m_callbacks->mc_stop == NULL ||
1369 	    mregp->m_callbacks->mc_setpromisc == NULL ||
1370 	    mregp->m_callbacks->mc_multicst == NULL ||
1371 	    mregp->m_callbacks->mc_unicst == NULL ||
1372 	    mregp->m_callbacks->mc_tx == NULL) {
1373 		goto fail;
1374 	}
1375 	mip->mi_callbacks = mregp->m_callbacks;
1376 
1377 	mip->mi_dip = mregp->m_dip;
1378 
1379 	/*
1380 	 * Set up the possible transmit routines.
1381 	 */
1382 	mip->mi_txinfo.mt_fn = mip->mi_tx;
1383 	mip->mi_txinfo.mt_arg = mip->mi_driver;
1384 
1385 	mip->mi_vnic_txinfo.mt_fn = mac_vnic_tx;
1386 	mip->mi_vnic_txinfo.mt_arg = mip;
1387 
1388 	mip->mi_txloopinfo.mt_fn = mac_txloop;
1389 	mip->mi_txloopinfo.mt_arg = mip;
1390 
1391 	mip->mi_vnic_txloopinfo.mt_fn = mac_vnic_txloop;
1392 	mip->mi_vnic_txloopinfo.mt_arg = mip;
1393 
1394 	/*
1395 	 * Allocate a notification thread.
1396 	 */
1397 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
1398 	    mip, 0, &p0, TS_RUN, minclsyspri);
1399 	if (mip->mi_notify_thread == NULL)
1400 		goto fail;
1401 
1402 	/*
1403 	 * Initialize the kstats for this device.
1404 	 */
1405 	mac_stat_create(mip);
1406 
1407 	err = EEXIST;
1408 	/* Create a style-2 DLPI device */
1409 	if (ddi_create_minor_node(mip->mi_dip, (char *)mip->mi_drvname,
1410 	    S_IFCHR, 0, DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
1411 		goto fail;
1412 	style2_created = B_TRUE;
1413 
1414 	/* Create a style-1 DLPI device */
1415 	minor = (minor_t)mip->mi_instance + 1;
1416 	if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, minor,
1417 	    DDI_NT_NET, 0) != DDI_SUCCESS)
1418 		goto fail;
1419 	style1_created = B_TRUE;
1420 
1421 	/*
1422 	 * Create a link for this MAC.  The link name will be the same as
1423 	 * the MAC name.
1424 	 */
1425 	err = dls_create(mip->mi_name, mip->mi_name,
1426 	    ddi_get_instance(mip->mi_dip));
1427 	if (err != 0)
1428 		goto fail;
1429 
1430 	/* set the gldv3 flag in dn_flags */
1431 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
1432 	LOCK_DEV_OPS(&dnp->dn_lock);
1433 	dnp->dn_flags |= DN_GLDV3_DRIVER;
1434 	UNLOCK_DEV_OPS(&dnp->dn_lock);
1435 
1436 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1437 	if (mod_hash_insert(i_mac_impl_hash,
1438 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
1439 		rw_exit(&i_mac_impl_lock);
1440 		VERIFY(dls_destroy(mip->mi_name) == 0);
1441 		err = EEXIST;
1442 		goto fail;
1443 	}
1444 
1445 	/*
1446 	 * Mark the MAC to be ready for open.
1447 	 */
1448 	mip->mi_disabled = B_FALSE;
1449 
1450 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
1451 
1452 	rw_exit(&i_mac_impl_lock);
1453 
1454 	atomic_inc_32(&i_mac_impl_count);
1455 	*mhp = (mac_handle_t)mip;
1456 	return (0);
1457 
1458 fail:
1459 	/* clean up notification thread */
1460 	if (mip->mi_notify_thread != NULL) {
1461 		mutex_enter(&mip->mi_notify_bits_lock);
1462 		mip->mi_notify_bits = (1 << MAC_NNOTE);
1463 		cv_broadcast(&mip->mi_notify_cv);
1464 		while (mip->mi_notify_bits != 0)
1465 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1466 		mutex_exit(&mip->mi_notify_bits_lock);
1467 	}
1468 
1469 	if (mip->mi_info.mi_unicst_addr != NULL) {
1470 		kmem_free(mip->mi_info.mi_unicst_addr,
1471 		    mip->mi_type->mt_addr_length);
1472 		mip->mi_info.mi_unicst_addr = NULL;
1473 	}
1474 	if (style1_created)
1475 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1476 	if (style2_created)
1477 		ddi_remove_minor_node(mip->mi_dip, (char *)mip->mi_drvname);
1478 
1479 	mac_stat_destroy(mip);
1480 
1481 	if (mip->mi_type != NULL) {
1482 		atomic_dec_32(&mip->mi_type->mt_ref);
1483 		mip->mi_type = NULL;
1484 	}
1485 
1486 	if (mip->mi_pdata != NULL) {
1487 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1488 		mip->mi_pdata = NULL;
1489 		mip->mi_pdata_size = 0;
1490 	}
1491 
1492 	kmem_cache_free(i_mac_impl_cachep, mip);
1493 	return (err);
1494 }
1495 
1496 int
1497 mac_disable(mac_handle_t mh)
1498 {
1499 	int			err;
1500 	mac_impl_t		*mip = (mac_impl_t *)mh;
1501 
1502 	/*
1503 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1504 	 * If not, set mi_disabled to prevent any new VLAN's from being
1505 	 * created while we're destroying this mac.
1506 	 */
1507 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1508 	if (mip->mi_ref > 0) {
1509 		rw_exit(&i_mac_impl_lock);
1510 		return (EBUSY);
1511 	}
1512 	mip->mi_disabled = B_TRUE;
1513 	rw_exit(&i_mac_impl_lock);
1514 
1515 	if ((err = dls_destroy(mip->mi_name)) != 0) {
1516 		rw_enter(&i_mac_impl_lock, RW_WRITER);
1517 		mip->mi_disabled = B_FALSE;
1518 		rw_exit(&i_mac_impl_lock);
1519 		return (err);
1520 	}
1521 
1522 	return (0);
1523 }
1524 
1525 int
1526 mac_unregister(mac_handle_t mh)
1527 {
1528 	int			err;
1529 	mac_impl_t		*mip = (mac_impl_t *)mh;
1530 	mod_hash_val_t		val;
1531 	mac_multicst_addr_t	*p, *nextp;
1532 
1533 	/*
1534 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1535 	 * If not, set mi_disabled to prevent any new VLAN's from being
1536 	 * created while we're destroying this mac. Once mac_disable() returns
1537 	 * 0, the rest of mac_unregister() stuff should continue without
1538 	 * returning an error.
1539 	 */
1540 	if (!mip->mi_disabled) {
1541 		if ((err = mac_disable(mh)) != 0)
1542 			return (err);
1543 	}
1544 
1545 	/*
1546 	 * Clean up notification thread (wait for it to exit).
1547 	 */
1548 	mutex_enter(&mip->mi_notify_bits_lock);
1549 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1550 	cv_broadcast(&mip->mi_notify_cv);
1551 	while (mip->mi_notify_bits != 0)
1552 		cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1553 	mutex_exit(&mip->mi_notify_bits_lock);
1554 
1555 	/*
1556 	 * Remove both style 1 and style 2 minor nodes
1557 	 */
1558 	ddi_remove_minor_node(mip->mi_dip, (char *)mip->mi_drvname);
1559 	ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1560 
1561 	ASSERT(!mip->mi_activelink);
1562 
1563 	mac_stat_destroy(mip);
1564 
1565 	(void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name,
1566 	    &val);
1567 	ASSERT(mip == (mac_impl_t *)val);
1568 
1569 	ASSERT(i_mac_impl_count > 0);
1570 	atomic_dec_32(&i_mac_impl_count);
1571 
1572 	if (mip->mi_pdata != NULL)
1573 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1574 	mip->mi_pdata = NULL;
1575 	mip->mi_pdata_size = 0;
1576 
1577 	/*
1578 	 * Free the list of multicast addresses.
1579 	 */
1580 	for (p = mip->mi_mmap; p != NULL; p = nextp) {
1581 		nextp = p->mma_nextp;
1582 		kmem_free(p, sizeof (mac_multicst_addr_t));
1583 	}
1584 	mip->mi_mmap = NULL;
1585 
1586 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
1587 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
1588 	mip->mi_info.mi_unicst_addr = NULL;
1589 
1590 	atomic_dec_32(&mip->mi_type->mt_ref);
1591 	mip->mi_type = NULL;
1592 
1593 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
1594 
1595 	kmem_cache_free(i_mac_impl_cachep, mip);
1596 
1597 	return (0);
1598 }
1599 
1600 /*
1601  * To avoid potential deadlocks, mac_rx() releases mi_rx_lock
1602  * before invoking its list of upcalls. This introduces races with
1603  * mac_rx_remove() and mac_rx_add(), who can potentially modify the
1604  * upcall list while mi_rx_lock is not being held. The race with
1605  * mac_rx_remove() is handled by incrementing mi_rx_ref upon entering
1606  * mac_rx(); a non-zero mi_rx_ref would tell mac_rx_remove()
1607  * to not modify the list but instead mark an upcall for deletion.
1608  * before mac_rx() exits, mi_rx_ref is decremented and if it
1609  * is 0, the marked upcalls will be removed from the list and freed.
1610  * The race with mac_rx_add() is harmless because mac_rx_add() only
1611  * prepends to the list and since mac_rx() saves the list head
1612  * before releasing mi_rx_lock, any prepended upcall won't be seen
1613  * until the next packet chain arrives.
1614  *
1615  * To minimize lock contention between multiple parallel invocations
1616  * of mac_rx(), mi_rx_lock is acquired as a READER lock. The
1617  * use of atomic operations ensures the sanity of mi_rx_ref. mi_rx_lock
1618  * will be upgraded to WRITER mode when there are marked upcalls to be
1619  * cleaned.
1620  */
1621 static void
1622 mac_do_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain,
1623     boolean_t active_only)
1624 {
1625 	mac_impl_t	*mip = (mac_impl_t *)mh;
1626 	mblk_t		*bp = mp_chain;
1627 	mac_rx_fn_t	*mrfp;
1628 
1629 	/*
1630 	 * Call all registered receive functions.
1631 	 */
1632 	rw_enter(&mip->mi_rx_lock, RW_READER);
1633 	if ((mrfp = mip->mi_mrfp) == NULL) {
1634 		/* There are no registered receive functions. */
1635 		freemsgchain(bp);
1636 		rw_exit(&mip->mi_rx_lock);
1637 		return;
1638 	}
1639 	atomic_inc_32(&mip->mi_rx_ref);
1640 	rw_exit(&mip->mi_rx_lock);
1641 
1642 	/*
1643 	 * Call registered receive functions.
1644 	 */
1645 	do {
1646 		mblk_t *recv_bp;
1647 
1648 		if (active_only && !mrfp->mrf_active) {
1649 			mrfp = mrfp->mrf_nextp;
1650 			if (mrfp == NULL) {
1651 				/*
1652 				 * We hit the last receiver, but it's not
1653 				 * active.
1654 				 */
1655 				freemsgchain(bp);
1656 			}
1657 			continue;
1658 		}
1659 
1660 		recv_bp = (mrfp->mrf_nextp != NULL) ? copymsgchain(bp) : bp;
1661 		if (recv_bp != NULL) {
1662 			if (mrfp->mrf_inuse) {
1663 				/*
1664 				 * Send bp itself and keep the copy.
1665 				 * If there's only one active receiver,
1666 				 * it should get the original message,
1667 				 * tagged with the hardware checksum flags.
1668 				 */
1669 				mrfp->mrf_fn(mrfp->mrf_arg, mrh, bp);
1670 				bp = recv_bp;
1671 			} else {
1672 				freemsgchain(recv_bp);
1673 			}
1674 		}
1675 
1676 		mrfp = mrfp->mrf_nextp;
1677 	} while (mrfp != NULL);
1678 
1679 	rw_enter(&mip->mi_rx_lock, RW_READER);
1680 	if (atomic_dec_32_nv(&mip->mi_rx_ref) == 0 && mip->mi_rx_removed > 0) {
1681 		mac_rx_fn_t	**pp, *p;
1682 		uint32_t	cnt = 0;
1683 
1684 		DTRACE_PROBE1(delete_callbacks, mac_impl_t *, mip);
1685 
1686 		/*
1687 		 * Need to become exclusive before doing cleanup
1688 		 */
1689 		if (rw_tryupgrade(&mip->mi_rx_lock) == 0) {
1690 			rw_exit(&mip->mi_rx_lock);
1691 			rw_enter(&mip->mi_rx_lock, RW_WRITER);
1692 		}
1693 
1694 		/*
1695 		 * We return if another thread has already entered and cleaned
1696 		 * up the list.
1697 		 */
1698 		if (mip->mi_rx_ref > 0 || mip->mi_rx_removed == 0) {
1699 			rw_exit(&mip->mi_rx_lock);
1700 			return;
1701 		}
1702 
1703 		/*
1704 		 * Free removed callbacks.
1705 		 */
1706 		pp = &mip->mi_mrfp;
1707 		while (*pp != NULL) {
1708 			if (!(*pp)->mrf_inuse) {
1709 				p = *pp;
1710 				*pp = (*pp)->mrf_nextp;
1711 				kmem_free(p, sizeof (*p));
1712 				cnt++;
1713 				continue;
1714 			}
1715 			pp = &(*pp)->mrf_nextp;
1716 		}
1717 
1718 		/*
1719 		 * Wake up mac_rx_remove_wait()
1720 		 */
1721 		mutex_enter(&mip->mi_lock);
1722 		ASSERT(mip->mi_rx_removed == cnt);
1723 		mip->mi_rx_removed = 0;
1724 		cv_broadcast(&mip->mi_rx_cv);
1725 		mutex_exit(&mip->mi_lock);
1726 	}
1727 	rw_exit(&mip->mi_rx_lock);
1728 }
1729 
1730 void
1731 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
1732 {
1733 	mac_do_rx(mh, mrh, mp_chain, B_FALSE);
1734 }
1735 
1736 /*
1737  * Send a packet chain up to the receive callbacks which declared
1738  * themselves as being active.
1739  */
1740 void
1741 mac_active_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp_chain)
1742 {
1743 	mac_do_rx(arg, mrh, mp_chain, B_TRUE);
1744 }
1745 
1746 /*
1747  * Function passed to the active client sharing a VNIC. This function
1748  * is returned by mac_tx_get() when a VNIC is present. It invokes
1749  * the VNIC transmit entry point which was specified by the VNIC when
1750  * it called mac_vnic_set(). The VNIC transmit entry point will
1751  * pass the packets to the local VNICs and/or to the underlying VNICs
1752  * if needed.
1753  */
1754 static mblk_t *
1755 mac_vnic_tx(void *arg, mblk_t *mp)
1756 {
1757 	mac_impl_t	*mip = arg;
1758 	mac_txinfo_t	*mtfp;
1759 	mac_vnic_tx_t	*mvt;
1760 
1761 	/*
1762 	 * There is a race between the notification of the VNIC
1763 	 * addition and removal, and the processing of the VNIC notification
1764 	 * by the MAC client. During this window, it is possible for
1765 	 * an active MAC client to contine invoking mac_vnic_tx() while
1766 	 * the VNIC has already been removed. So we cannot assume
1767 	 * that mi_vnic_present will always be true when mac_vnic_tx()
1768 	 * is invoked.
1769 	 */
1770 	rw_enter(&mip->mi_tx_lock, RW_READER);
1771 	if (!mip->mi_vnic_present) {
1772 		rw_exit(&mip->mi_tx_lock);
1773 		freemsgchain(mp);
1774 		return (NULL);
1775 	}
1776 
1777 	ASSERT(mip->mi_vnic_tx != NULL);
1778 	mvt = mip->mi_vnic_tx;
1779 	MAC_VNIC_TXINFO_REFHOLD(mvt);
1780 	rw_exit(&mip->mi_tx_lock);
1781 
1782 	mtfp = &mvt->mv_txinfo;
1783 	mtfp->mt_fn(mtfp->mt_arg, mp);
1784 
1785 	MAC_VNIC_TXINFO_REFRELE(mvt);
1786 	return (NULL);
1787 }
1788 
1789 /*
1790  * Transmit function -- ONLY used when there are registered loopback listeners.
1791  */
1792 mblk_t *
1793 mac_do_txloop(void *arg, mblk_t *bp, boolean_t call_vnic)
1794 {
1795 	mac_impl_t	*mip = arg;
1796 	mac_txloop_fn_t	*mtfp;
1797 	mblk_t		*loop_bp, *resid_bp, *next_bp;
1798 
1799 	if (call_vnic) {
1800 		/*
1801 		 * In promiscous mode, a copy of the sent packet will
1802 		 * be sent to the client's promiscous receive entry
1803 		 * points via mac_vnic_tx()->
1804 		 * mac_active_rx_promisc()->mac_rx_default().
1805 		 */
1806 		return (mac_vnic_tx(arg, bp));
1807 	}
1808 
1809 	while (bp != NULL) {
1810 		next_bp = bp->b_next;
1811 		bp->b_next = NULL;
1812 
1813 		if ((loop_bp = copymsg(bp)) == NULL)
1814 			goto noresources;
1815 
1816 		if ((resid_bp = mip->mi_tx(mip->mi_driver, bp)) != NULL) {
1817 			ASSERT(resid_bp == bp);
1818 			freemsg(loop_bp);
1819 			goto noresources;
1820 		}
1821 
1822 		rw_enter(&mip->mi_tx_lock, RW_READER);
1823 		mtfp = mip->mi_mtfp;
1824 		while (mtfp != NULL && loop_bp != NULL) {
1825 			bp = loop_bp;
1826 
1827 			/* XXX counter bump if copymsg() fails? */
1828 			if (mtfp->mtf_nextp != NULL)
1829 				loop_bp = copymsg(bp);
1830 			else
1831 				loop_bp = NULL;
1832 
1833 			mtfp->mtf_fn(mtfp->mtf_arg, bp);
1834 			mtfp = mtfp->mtf_nextp;
1835 		}
1836 		rw_exit(&mip->mi_tx_lock);
1837 
1838 		/*
1839 		 * It's possible we've raced with the disabling of promiscuous
1840 		 * mode, in which case we can discard our copy.
1841 		 */
1842 		if (loop_bp != NULL)
1843 			freemsg(loop_bp);
1844 
1845 		bp = next_bp;
1846 	}
1847 
1848 	return (NULL);
1849 
1850 noresources:
1851 	bp->b_next = next_bp;
1852 	return (bp);
1853 }
1854 
1855 mblk_t *
1856 mac_txloop(void *arg, mblk_t *bp)
1857 {
1858 	return (mac_do_txloop(arg, bp, B_FALSE));
1859 }
1860 
1861 static mblk_t *
1862 mac_vnic_txloop(void *arg, mblk_t *bp)
1863 {
1864 	return (mac_do_txloop(arg, bp, B_TRUE));
1865 }
1866 
1867 void
1868 mac_link_update(mac_handle_t mh, link_state_t link)
1869 {
1870 	mac_impl_t	*mip = (mac_impl_t *)mh;
1871 
1872 	/*
1873 	 * Save the link state.
1874 	 */
1875 	mip->mi_linkstate = link;
1876 
1877 	/*
1878 	 * Send a MAC_NOTE_LINK notification.
1879 	 */
1880 	i_mac_notify(mip, MAC_NOTE_LINK);
1881 }
1882 
1883 void
1884 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
1885 {
1886 	mac_impl_t	*mip = (mac_impl_t *)mh;
1887 
1888 	if (mip->mi_type->mt_addr_length == 0)
1889 		return;
1890 
1891 	/*
1892 	 * Save the address.
1893 	 */
1894 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
1895 
1896 	/*
1897 	 * Send a MAC_NOTE_UNICST notification.
1898 	 */
1899 	i_mac_notify(mip, MAC_NOTE_UNICST);
1900 }
1901 
1902 void
1903 mac_tx_update(mac_handle_t mh)
1904 {
1905 	/*
1906 	 * Send a MAC_NOTE_TX notification.
1907 	 */
1908 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_TX);
1909 }
1910 
1911 void
1912 mac_resource_update(mac_handle_t mh)
1913 {
1914 	/*
1915 	 * Send a MAC_NOTE_RESOURCE notification.
1916 	 */
1917 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_RESOURCE);
1918 }
1919 
1920 mac_resource_handle_t
1921 mac_resource_add(mac_handle_t mh, mac_resource_t *mrp)
1922 {
1923 	mac_impl_t		*mip = (mac_impl_t *)mh;
1924 	mac_resource_handle_t	mrh;
1925 	mac_resource_add_t	add;
1926 	void			*arg;
1927 
1928 	rw_enter(&mip->mi_resource_lock, RW_READER);
1929 	add = mip->mi_resource_add;
1930 	arg = mip->mi_resource_add_arg;
1931 
1932 	if (add != NULL)
1933 		mrh = add(arg, mrp);
1934 	else
1935 		mrh = NULL;
1936 	rw_exit(&mip->mi_resource_lock);
1937 
1938 	return (mrh);
1939 }
1940 
1941 int
1942 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
1943 {
1944 	mac_impl_t	*mip = (mac_impl_t *)mh;
1945 
1946 	/*
1947 	 * Verify that the plugin supports MAC plugin data and that the
1948 	 * supplied data is valid.
1949 	 */
1950 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1951 		return (EINVAL);
1952 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
1953 		return (EINVAL);
1954 
1955 	if (mip->mi_pdata != NULL)
1956 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1957 
1958 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
1959 	bcopy(mac_pdata, mip->mi_pdata, dsize);
1960 	mip->mi_pdata_size = dsize;
1961 
1962 	/*
1963 	 * Since the MAC plugin data is used to construct MAC headers that
1964 	 * were cached in fast-path headers, we need to flush fast-path
1965 	 * information for links associated with this mac.
1966 	 */
1967 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
1968 	return (0);
1969 }
1970 
1971 void
1972 mac_multicst_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg,
1973     boolean_t add)
1974 {
1975 	mac_impl_t		*mip = (mac_impl_t *)mh;
1976 	mac_multicst_addr_t	*p;
1977 
1978 	/*
1979 	 * If no specific refresh function was given then default to the
1980 	 * driver's m_multicst entry point.
1981 	 */
1982 	if (refresh == NULL) {
1983 		refresh = mip->mi_multicst;
1984 		arg = mip->mi_driver;
1985 	}
1986 	ASSERT(refresh != NULL);
1987 
1988 	/*
1989 	 * Walk the multicast address list and call the refresh function for
1990 	 * each address.
1991 	 */
1992 	rw_enter(&(mip->mi_data_lock), RW_READER);
1993 	for (p = mip->mi_mmap; p != NULL; p = p->mma_nextp)
1994 		refresh(arg, add, p->mma_addr);
1995 	rw_exit(&(mip->mi_data_lock));
1996 }
1997 
1998 void
1999 mac_unicst_refresh(mac_handle_t mh, mac_unicst_t refresh, void *arg)
2000 {
2001 	mac_impl_t	*mip = (mac_impl_t *)mh;
2002 	/*
2003 	 * If no specific refresh function was given then default to the
2004 	 * driver's mi_unicst entry point.
2005 	 */
2006 	if (refresh == NULL) {
2007 		refresh = mip->mi_unicst;
2008 		arg = mip->mi_driver;
2009 	}
2010 	ASSERT(refresh != NULL);
2011 
2012 	/*
2013 	 * Call the refresh function with the current unicast address.
2014 	 */
2015 	refresh(arg, mip->mi_addr);
2016 }
2017 
2018 void
2019 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg)
2020 {
2021 	mac_impl_t	*mip = (mac_impl_t *)mh;
2022 
2023 	/*
2024 	 * If no specific refresh function was given then default to the
2025 	 * driver's m_promisc entry point.
2026 	 */
2027 	if (refresh == NULL) {
2028 		refresh = mip->mi_setpromisc;
2029 		arg = mip->mi_driver;
2030 	}
2031 	ASSERT(refresh != NULL);
2032 
2033 	/*
2034 	 * Call the refresh function with the current promiscuity.
2035 	 */
2036 	refresh(arg, (mip->mi_devpromisc != 0));
2037 }
2038 
2039 boolean_t
2040 mac_do_active_set(mac_handle_t mh, boolean_t shareable)
2041 {
2042 	mac_impl_t *mip = (mac_impl_t *)mh;
2043 
2044 	mutex_enter(&mip->mi_activelink_lock);
2045 	if (mip->mi_activelink) {
2046 		mutex_exit(&mip->mi_activelink_lock);
2047 		return (B_FALSE);
2048 	}
2049 	mip->mi_activelink = B_TRUE;
2050 	mip->mi_shareable = shareable;
2051 	mutex_exit(&mip->mi_activelink_lock);
2052 	return (B_TRUE);
2053 }
2054 
2055 /*
2056  * Called by MAC clients. By default, active MAC clients cannot
2057  * share the NIC with VNICs.
2058  */
2059 boolean_t
2060 mac_active_set(mac_handle_t mh)
2061 {
2062 	return (mac_do_active_set(mh, B_FALSE));
2063 }
2064 
2065 /*
2066  * Called by MAC clients which can share the NIC with VNICS, e.g. DLS.
2067  */
2068 boolean_t
2069 mac_active_shareable_set(mac_handle_t mh)
2070 {
2071 	return (mac_do_active_set(mh, B_TRUE));
2072 }
2073 
2074 void
2075 mac_active_clear(mac_handle_t mh)
2076 {
2077 	mac_impl_t *mip = (mac_impl_t *)mh;
2078 
2079 	mutex_enter(&mip->mi_activelink_lock);
2080 	ASSERT(mip->mi_activelink);
2081 	mip->mi_activelink = B_FALSE;
2082 	mutex_exit(&mip->mi_activelink_lock);
2083 }
2084 
2085 boolean_t
2086 mac_vnic_set(mac_handle_t mh, mac_txinfo_t *tx_info, mac_getcapab_t getcapab_fn,
2087     void *getcapab_arg)
2088 {
2089 	mac_impl_t	*mip = (mac_impl_t *)mh;
2090 	mac_vnic_tx_t	*vnic_tx;
2091 
2092 	mutex_enter(&mip->mi_activelink_lock);
2093 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2094 	ASSERT(!mip->mi_vnic_present);
2095 
2096 	if (mip->mi_activelink && !mip->mi_shareable) {
2097 		/*
2098 		 * The NIC is already used by an active client which cannot
2099 		 * share it with VNICs.
2100 		 */
2101 		rw_exit(&mip->mi_tx_lock);
2102 		mutex_exit(&mip->mi_activelink_lock);
2103 		return (B_FALSE);
2104 	}
2105 
2106 	vnic_tx = kmem_cache_alloc(mac_vnic_tx_cache, KM_SLEEP);
2107 	vnic_tx->mv_refs = 0;
2108 	vnic_tx->mv_txinfo = *tx_info;
2109 	vnic_tx->mv_clearing = B_FALSE;
2110 
2111 	mip->mi_vnic_present = B_TRUE;
2112 	mip->mi_vnic_tx = vnic_tx;
2113 	mip->mi_vnic_getcapab_fn = getcapab_fn;
2114 	mip->mi_vnic_getcapab_arg = getcapab_arg;
2115 	rw_exit(&mip->mi_tx_lock);
2116 	mutex_exit(&mip->mi_activelink_lock);
2117 
2118 	i_mac_notify(mip, MAC_NOTE_VNIC);
2119 	return (B_TRUE);
2120 }
2121 
2122 void
2123 mac_vnic_clear(mac_handle_t mh)
2124 {
2125 	mac_impl_t *mip = (mac_impl_t *)mh;
2126 	mac_vnic_tx_t	*vnic_tx;
2127 
2128 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2129 	ASSERT(mip->mi_vnic_present);
2130 	mip->mi_vnic_present = B_FALSE;
2131 	/*
2132 	 * Setting mi_vnic_tx to NULL here under the lock guarantees
2133 	 * that no new references to the current VNIC transmit structure
2134 	 * will be taken by mac_vnic_tx(). This is a necessary condition
2135 	 * for safely waiting for the reference count to drop to
2136 	 * zero below.
2137 	 */
2138 	vnic_tx = mip->mi_vnic_tx;
2139 	mip->mi_vnic_tx = NULL;
2140 	mip->mi_vnic_getcapab_fn = NULL;
2141 	mip->mi_vnic_getcapab_arg = NULL;
2142 	rw_exit(&mip->mi_tx_lock);
2143 
2144 	i_mac_notify(mip, MAC_NOTE_VNIC);
2145 
2146 	/*
2147 	 * Wait for all TX calls referencing the VNIC transmit
2148 	 * entry point that was removed to complete.
2149 	 */
2150 	mutex_enter(&vnic_tx->mv_lock);
2151 	vnic_tx->mv_clearing = B_TRUE;
2152 	while (vnic_tx->mv_refs > 0)
2153 		cv_wait(&vnic_tx->mv_cv, &vnic_tx->mv_lock);
2154 	mutex_exit(&vnic_tx->mv_lock);
2155 	kmem_cache_free(mac_vnic_tx_cache, vnic_tx);
2156 }
2157 
2158 /*
2159  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
2160  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
2161  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
2162  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
2163  * cannot disappear while we are accessing it.
2164  */
2165 typedef struct i_mac_info_state_s {
2166 	const char	*mi_name;
2167 	mac_info_t	*mi_infop;
2168 } i_mac_info_state_t;
2169 
2170 /*ARGSUSED*/
2171 static uint_t
2172 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2173 {
2174 	i_mac_info_state_t	*statep = arg;
2175 	mac_impl_t		*mip = (mac_impl_t *)val;
2176 
2177 	if (mip->mi_disabled)
2178 		return (MH_WALK_CONTINUE);
2179 
2180 	if (strcmp(statep->mi_name,
2181 	    ddi_driver_name(mip->mi_dip)) != 0)
2182 		return (MH_WALK_CONTINUE);
2183 
2184 	statep->mi_infop = &mip->mi_info;
2185 	return (MH_WALK_TERMINATE);
2186 }
2187 
2188 boolean_t
2189 mac_info_get(const char *name, mac_info_t *minfop)
2190 {
2191 	i_mac_info_state_t	state;
2192 
2193 	rw_enter(&i_mac_impl_lock, RW_READER);
2194 	state.mi_name = name;
2195 	state.mi_infop = NULL;
2196 	mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
2197 	if (state.mi_infop == NULL) {
2198 		rw_exit(&i_mac_impl_lock);
2199 		return (B_FALSE);
2200 	}
2201 	*minfop = *state.mi_infop;
2202 	rw_exit(&i_mac_impl_lock);
2203 	return (B_TRUE);
2204 }
2205 
2206 boolean_t
2207 mac_do_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data,
2208     boolean_t is_vnic)
2209 {
2210 	mac_impl_t *mip = (mac_impl_t *)mh;
2211 
2212 	if (!is_vnic) {
2213 		rw_enter(&mip->mi_tx_lock, RW_READER);
2214 		if (mip->mi_vnic_present) {
2215 			boolean_t rv;
2216 
2217 			rv = mip->mi_vnic_getcapab_fn(mip->mi_vnic_getcapab_arg,
2218 			    cap, cap_data);
2219 			rw_exit(&mip->mi_tx_lock);
2220 			return (rv);
2221 		}
2222 		rw_exit(&mip->mi_tx_lock);
2223 	}
2224 
2225 	if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
2226 		return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
2227 	else
2228 		return (B_FALSE);
2229 }
2230 
2231 boolean_t
2232 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2233 {
2234 	return (mac_do_capab_get(mh, cap, cap_data, B_FALSE));
2235 }
2236 
2237 boolean_t
2238 mac_vnic_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2239 {
2240 	return (mac_do_capab_get(mh, cap, cap_data, B_TRUE));
2241 }
2242 
2243 boolean_t
2244 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
2245 {
2246 	mac_impl_t	*mip = (mac_impl_t *)mh;
2247 	return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
2248 	    mip->mi_pdata));
2249 }
2250 
2251 mblk_t *
2252 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
2253     size_t extra_len)
2254 {
2255 	mac_impl_t	*mip = (mac_impl_t *)mh;
2256 	return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, daddr, sap,
2257 	    mip->mi_pdata, payload, extra_len));
2258 }
2259 
2260 int
2261 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
2262 {
2263 	mac_impl_t	*mip = (mac_impl_t *)mh;
2264 	return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
2265 	    mhip));
2266 }
2267 
2268 mblk_t *
2269 mac_header_cook(mac_handle_t mh, mblk_t *mp)
2270 {
2271 	mac_impl_t	*mip = (mac_impl_t *)mh;
2272 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
2273 		if (DB_REF(mp) > 1) {
2274 			mblk_t *newmp = copymsg(mp);
2275 			if (newmp == NULL)
2276 				return (NULL);
2277 			freemsg(mp);
2278 			mp = newmp;
2279 		}
2280 		return (mip->mi_type->mt_ops.mtops_header_cook(mp,
2281 		    mip->mi_pdata));
2282 	}
2283 	return (mp);
2284 }
2285 
2286 mblk_t *
2287 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
2288 {
2289 	mac_impl_t	*mip = (mac_impl_t *)mh;
2290 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
2291 		if (DB_REF(mp) > 1) {
2292 			mblk_t *newmp = copymsg(mp);
2293 			if (newmp == NULL)
2294 				return (NULL);
2295 			freemsg(mp);
2296 			mp = newmp;
2297 		}
2298 		return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
2299 		    mip->mi_pdata));
2300 	}
2301 	return (mp);
2302 }
2303 
2304 void
2305 mac_init_ops(struct dev_ops *ops, const char *name)
2306 {
2307 	dld_init_ops(ops, name);
2308 }
2309 
2310 void
2311 mac_fini_ops(struct dev_ops *ops)
2312 {
2313 	dld_fini_ops(ops);
2314 }
2315 
2316 /*
2317  * MAC Type Plugin functions.
2318  */
2319 
2320 mactype_register_t *
2321 mactype_alloc(uint_t mactype_version)
2322 {
2323 	mactype_register_t *mtrp;
2324 
2325 	/*
2326 	 * Make sure there isn't a version mismatch between the plugin and
2327 	 * the framework.  In the future, if multiple versions are
2328 	 * supported, this check could become more sophisticated.
2329 	 */
2330 	if (mactype_version != MACTYPE_VERSION)
2331 		return (NULL);
2332 
2333 	mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP);
2334 	mtrp->mtr_version = mactype_version;
2335 	return (mtrp);
2336 }
2337 
2338 void
2339 mactype_free(mactype_register_t *mtrp)
2340 {
2341 	kmem_free(mtrp, sizeof (mactype_register_t));
2342 }
2343 
2344 int
2345 mactype_register(mactype_register_t *mtrp)
2346 {
2347 	mactype_t	*mtp;
2348 	mactype_ops_t	*ops = mtrp->mtr_ops;
2349 
2350 	/* Do some sanity checking before we register this MAC type. */
2351 	if (mtrp->mtr_ident == NULL || ops == NULL || mtrp->mtr_addrlen == 0)
2352 		return (EINVAL);
2353 
2354 	/*
2355 	 * Verify that all mandatory callbacks are set in the ops
2356 	 * vector.
2357 	 */
2358 	if (ops->mtops_unicst_verify == NULL ||
2359 	    ops->mtops_multicst_verify == NULL ||
2360 	    ops->mtops_sap_verify == NULL ||
2361 	    ops->mtops_header == NULL ||
2362 	    ops->mtops_header_info == NULL) {
2363 		return (EINVAL);
2364 	}
2365 
2366 	mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP);
2367 	mtp->mt_ident = mtrp->mtr_ident;
2368 	mtp->mt_ops = *ops;
2369 	mtp->mt_type = mtrp->mtr_mactype;
2370 	mtp->mt_nativetype = mtrp->mtr_nativetype;
2371 	mtp->mt_addr_length = mtrp->mtr_addrlen;
2372 	if (mtrp->mtr_brdcst_addr != NULL) {
2373 		mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP);
2374 		bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr,
2375 		    mtrp->mtr_addrlen);
2376 	}
2377 
2378 	mtp->mt_stats = mtrp->mtr_stats;
2379 	mtp->mt_statcount = mtrp->mtr_statcount;
2380 
2381 	if (mod_hash_insert(i_mactype_hash,
2382 	    (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) {
2383 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2384 		kmem_free(mtp, sizeof (*mtp));
2385 		return (EEXIST);
2386 	}
2387 	return (0);
2388 }
2389 
2390 int
2391 mactype_unregister(const char *ident)
2392 {
2393 	mactype_t	*mtp;
2394 	mod_hash_val_t	val;
2395 	int 		err;
2396 
2397 	/*
2398 	 * Let's not allow MAC drivers to use this plugin while we're
2399 	 * trying to unregister it.  Holding i_mactype_lock also prevents a
2400 	 * plugin from unregistering while a MAC driver is attempting to
2401 	 * hold a reference to it in i_mactype_getplugin().
2402 	 */
2403 	mutex_enter(&i_mactype_lock);
2404 
2405 	if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident,
2406 	    (mod_hash_val_t *)&mtp)) != 0) {
2407 		/* A plugin is trying to unregister, but it never registered. */
2408 		err = ENXIO;
2409 		goto done;
2410 	}
2411 
2412 	if (mtp->mt_ref != 0) {
2413 		err = EBUSY;
2414 		goto done;
2415 	}
2416 
2417 	err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val);
2418 	ASSERT(err == 0);
2419 	if (err != 0) {
2420 		/* This should never happen, thus the ASSERT() above. */
2421 		err = EINVAL;
2422 		goto done;
2423 	}
2424 	ASSERT(mtp == (mactype_t *)val);
2425 
2426 	kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2427 	kmem_free(mtp, sizeof (mactype_t));
2428 done:
2429 	mutex_exit(&i_mactype_lock);
2430 	return (err);
2431 }
2432 
2433 int
2434 mac_vlan_create(mac_handle_t mh, const char *name, minor_t minor)
2435 {
2436 	mac_impl_t		*mip = (mac_impl_t *)mh;
2437 
2438 	/* Create a style-1 DLPI device */
2439 	if (ddi_create_minor_node(mip->mi_dip, (char *)name, S_IFCHR, minor,
2440 	    DDI_NT_NET, 0) != DDI_SUCCESS) {
2441 		return (-1);
2442 	}
2443 	return (0);
2444 }
2445 
2446 void
2447 mac_vlan_remove(mac_handle_t mh, const char *name)
2448 {
2449 	mac_impl_t		*mip = (mac_impl_t *)mh;
2450 	dev_info_t		*dipp;
2451 
2452 	ddi_remove_minor_node(mip->mi_dip, (char *)name);
2453 	dipp = ddi_get_parent(mip->mi_dip);
2454 	(void) devfs_clean(dipp, NULL, 0);
2455 }
2456