xref: /illumos-gate/usr/src/uts/common/io/mac/mac_provider.c (revision a1c36c8ba5112b6713dabac615bf8d56a45f0764)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/conf.h>
28 #include <sys/id_space.h>
29 #include <sys/esunddi.h>
30 #include <sys/stat.h>
31 #include <sys/mkdev.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/dlpi.h>
35 #include <sys/modhash.h>
36 #include <sys/mac.h>
37 #include <sys/mac_provider.h>
38 #include <sys/mac_impl.h>
39 #include <sys/mac_client_impl.h>
40 #include <sys/mac_client_priv.h>
41 #include <sys/mac_soft_ring.h>
42 #include <sys/mac_stat.h>
43 #include <sys/dld.h>
44 #include <sys/modctl.h>
45 #include <sys/fs/dv_node.h>
46 #include <sys/thread.h>
47 #include <sys/proc.h>
48 #include <sys/callb.h>
49 #include <sys/cpuvar.h>
50 #include <sys/atomic.h>
51 #include <sys/sdt.h>
52 #include <sys/mac_flow.h>
53 #include <sys/ddi_intr_impl.h>
54 #include <sys/disp.h>
55 #include <sys/sdt.h>
56 #include <sys/pattr.h>
57 #include <sys/strsun.h>
58 
59 /*
60  * MAC Provider Interface.
61  *
62  * Interface for GLDv3 compatible NIC drivers.
63  */
64 
65 static void i_mac_notify_thread(void *);
66 
67 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
68 
69 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
70 	mac_fanout_recompute,	/* MAC_NOTE_LINK */
71 	NULL,		/* MAC_NOTE_UNICST */
72 	NULL,		/* MAC_NOTE_TX */
73 	NULL,		/* MAC_NOTE_DEVPROMISC */
74 	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
75 	NULL,		/* MAC_NOTE_SDU_SIZE */
76 	NULL,		/* MAC_NOTE_MARGIN */
77 	NULL,		/* MAC_NOTE_CAPAB_CHG */
78 	NULL		/* MAC_NOTE_LOWLINK */
79 };
80 
81 /*
82  * Driver support functions.
83  */
84 
85 /* REGISTRATION */
86 
87 mac_register_t *
88 mac_alloc(uint_t mac_version)
89 {
90 	mac_register_t *mregp;
91 
92 	/*
93 	 * Make sure there isn't a version mismatch between the driver and
94 	 * the framework.  In the future, if multiple versions are
95 	 * supported, this check could become more sophisticated.
96 	 */
97 	if (mac_version != MAC_VERSION)
98 		return (NULL);
99 
100 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
101 	mregp->m_version = mac_version;
102 	return (mregp);
103 }
104 
105 void
106 mac_free(mac_register_t *mregp)
107 {
108 	kmem_free(mregp, sizeof (mac_register_t));
109 }
110 
111 /*
112  * mac_register() is how drivers register new MACs with the GLDv3
113  * framework.  The mregp argument is allocated by drivers using the
114  * mac_alloc() function, and can be freed using mac_free() immediately upon
115  * return from mac_register().  Upon success (0 return value), the mhp
116  * opaque pointer becomes the driver's handle to its MAC interface, and is
117  * the argument to all other mac module entry points.
118  */
119 /* ARGSUSED */
120 int
121 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
122 {
123 	mac_impl_t		*mip;
124 	mactype_t		*mtype;
125 	int			err = EINVAL;
126 	struct devnames		*dnp = NULL;
127 	uint_t			instance;
128 	boolean_t		style1_created = B_FALSE;
129 	boolean_t		style2_created = B_FALSE;
130 	char			*driver;
131 	minor_t			minor = 0;
132 
133 	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
134 	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
135 		return (EINVAL);
136 
137 	/* Find the required MAC-Type plugin. */
138 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
139 		return (EINVAL);
140 
141 	/* Create a mac_impl_t to represent this MAC. */
142 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
143 
144 	/*
145 	 * The mac is not ready for open yet.
146 	 */
147 	mip->mi_state_flags |= MIS_DISABLED;
148 
149 	/*
150 	 * When a mac is registered, the m_instance field can be set to:
151 	 *
152 	 *  0:	Get the mac's instance number from m_dip.
153 	 *	This is usually used for physical device dips.
154 	 *
155 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
156 	 *	For example, when an aggregation is created with the key option,
157 	 *	"key" will be used as the instance number.
158 	 *
159 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
160 	 *	This is often used when a MAC of a virtual link is registered
161 	 *	(e.g., aggregation when "key" is not specified, or vnic).
162 	 *
163 	 * Note that the instance number is used to derive the mi_minor field
164 	 * of mac_impl_t, which will then be used to derive the name of kstats
165 	 * and the devfs nodes.  The first 2 cases are needed to preserve
166 	 * backward compatibility.
167 	 */
168 	switch (mregp->m_instance) {
169 	case 0:
170 		instance = ddi_get_instance(mregp->m_dip);
171 		break;
172 	case ((uint_t)-1):
173 		minor = mac_minor_hold(B_TRUE);
174 		if (minor == 0) {
175 			err = ENOSPC;
176 			goto fail;
177 		}
178 		instance = minor - 1;
179 		break;
180 	default:
181 		instance = mregp->m_instance;
182 		if (instance >= MAC_MAX_MINOR) {
183 			err = EINVAL;
184 			goto fail;
185 		}
186 		break;
187 	}
188 
189 	mip->mi_minor = (minor_t)(instance + 1);
190 	mip->mi_dip = mregp->m_dip;
191 	mip->mi_clients_list = NULL;
192 	mip->mi_nclients = 0;
193 
194 	/* Set the default IEEE Port VLAN Identifier */
195 	mip->mi_pvid = 1;
196 
197 	/* Default bridge link learning protection values */
198 	mip->mi_llimit = 1000;
199 	mip->mi_ldecay = 200;
200 
201 	driver = (char *)ddi_driver_name(mip->mi_dip);
202 
203 	/* Construct the MAC name as <drvname><instance> */
204 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
205 	    driver, instance);
206 
207 	mip->mi_driver = mregp->m_driver;
208 
209 	mip->mi_type = mtype;
210 	mip->mi_margin = mregp->m_margin;
211 	mip->mi_info.mi_media = mtype->mt_type;
212 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
213 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
214 		goto fail;
215 	if (mregp->m_multicast_sdu == 0)
216 		mregp->m_multicast_sdu = mregp->m_max_sdu;
217 	if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
218 	    mregp->m_multicast_sdu > mregp->m_max_sdu)
219 		goto fail;
220 	mip->mi_sdu_min = mregp->m_min_sdu;
221 	mip->mi_sdu_max = mregp->m_max_sdu;
222 	mip->mi_sdu_multicast = mregp->m_multicast_sdu;
223 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
224 	/*
225 	 * If the media supports a broadcast address, cache a pointer to it
226 	 * in the mac_info_t so that upper layers can use it.
227 	 */
228 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
229 
230 	mip->mi_v12n_level = mregp->m_v12n;
231 
232 	/*
233 	 * Copy the unicast source address into the mac_info_t, but only if
234 	 * the MAC-Type defines a non-zero address length.  We need to
235 	 * handle MAC-Types that have an address length of 0
236 	 * (point-to-point protocol MACs for example).
237 	 */
238 	if (mip->mi_type->mt_addr_length > 0) {
239 		if (mregp->m_src_addr == NULL)
240 			goto fail;
241 		mip->mi_info.mi_unicst_addr =
242 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
243 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
244 		    mip->mi_type->mt_addr_length);
245 
246 		/*
247 		 * Copy the fixed 'factory' MAC address from the immutable
248 		 * info.  This is taken to be the MAC address currently in
249 		 * use.
250 		 */
251 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
252 		    mip->mi_type->mt_addr_length);
253 
254 		/*
255 		 * At this point, we should set up the classification
256 		 * rules etc but we delay it till mac_open() so that
257 		 * the resource discovery has taken place and we
258 		 * know someone wants to use the device. Otherwise
259 		 * memory gets allocated for Rx ring structures even
260 		 * during probe.
261 		 */
262 
263 		/* Copy the destination address if one is provided. */
264 		if (mregp->m_dst_addr != NULL) {
265 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
266 			    mip->mi_type->mt_addr_length);
267 			mip->mi_dstaddr_set = B_TRUE;
268 		}
269 	} else if (mregp->m_src_addr != NULL) {
270 		goto fail;
271 	}
272 
273 	/*
274 	 * The format of the m_pdata is specific to the plugin.  It is
275 	 * passed in as an argument to all of the plugin callbacks.  The
276 	 * driver can update this information by calling
277 	 * mac_pdata_update().
278 	 */
279 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
280 		/*
281 		 * Verify if the supplied plugin data is valid.  Note that
282 		 * even if the caller passed in a NULL pointer as plugin data,
283 		 * we still need to verify if that's valid as the plugin may
284 		 * require plugin data to function.
285 		 */
286 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
287 		    mregp->m_pdata_size)) {
288 			goto fail;
289 		}
290 		if (mregp->m_pdata != NULL) {
291 			mip->mi_pdata =
292 			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
293 			bcopy(mregp->m_pdata, mip->mi_pdata,
294 			    mregp->m_pdata_size);
295 			mip->mi_pdata_size = mregp->m_pdata_size;
296 		}
297 	} else if (mregp->m_pdata != NULL) {
298 		/*
299 		 * The caller supplied non-NULL plugin data, but the plugin
300 		 * does not recognize plugin data.
301 		 */
302 		err = EINVAL;
303 		goto fail;
304 	}
305 
306 	/*
307 	 * Register the private properties.
308 	 */
309 	mac_register_priv_prop(mip, mregp->m_priv_props);
310 
311 	/*
312 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
313 	 * check to make sure all mandatory callbacks are set.
314 	 */
315 	if (mregp->m_callbacks->mc_getstat == NULL ||
316 	    mregp->m_callbacks->mc_start == NULL ||
317 	    mregp->m_callbacks->mc_stop == NULL ||
318 	    mregp->m_callbacks->mc_setpromisc == NULL ||
319 	    mregp->m_callbacks->mc_multicst == NULL) {
320 		goto fail;
321 	}
322 	mip->mi_callbacks = mregp->m_callbacks;
323 
324 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
325 	    &mip->mi_capab_legacy)) {
326 		mip->mi_state_flags |= MIS_LEGACY;
327 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
328 	} else {
329 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
330 		    mip->mi_minor);
331 	}
332 
333 	/*
334 	 * Allocate a notification thread. thread_create blocks for memory
335 	 * if needed, it never fails.
336 	 */
337 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
338 	    mip, 0, &p0, TS_RUN, minclsyspri);
339 
340 	/*
341 	 * Initialize the capabilities
342 	 */
343 
344 	bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
345 	bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
346 
347 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
348 		mip->mi_state_flags |= MIS_IS_VNIC;
349 
350 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
351 		mip->mi_state_flags |= MIS_IS_AGGR;
352 
353 	mac_addr_factory_init(mip);
354 
355 	/*
356 	 * Enforce the virtrualization level registered.
357 	 */
358 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
359 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
360 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
361 			goto fail;
362 
363 		/*
364 		 * The driver needs to register at least rx rings for this
365 		 * virtualization level.
366 		 */
367 		if (mip->mi_rx_groups == NULL)
368 			goto fail;
369 	}
370 
371 	/*
372 	 * The driver must set mc_unicst entry point to NULL when it advertises
373 	 * CAP_RINGS for rx groups.
374 	 */
375 	if (mip->mi_rx_groups != NULL) {
376 		if (mregp->m_callbacks->mc_unicst != NULL)
377 			goto fail;
378 	} else {
379 		if (mregp->m_callbacks->mc_unicst == NULL)
380 			goto fail;
381 	}
382 
383 	/*
384 	 * Initialize MAC addresses. Must be called after mac_init_rings().
385 	 */
386 	mac_init_macaddr(mip);
387 
388 	mip->mi_share_capab.ms_snum = 0;
389 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
390 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
391 		    &mip->mi_share_capab);
392 	}
393 
394 	/*
395 	 * Initialize the kstats for this device.
396 	 */
397 	mac_driver_stat_create(mip);
398 
399 	/* Zero out any properties. */
400 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
401 
402 	if (mip->mi_minor <= MAC_MAX_MINOR) {
403 		/* Create a style-2 DLPI device */
404 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
405 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
406 			goto fail;
407 		style2_created = B_TRUE;
408 
409 		/* Create a style-1 DLPI device */
410 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
411 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
412 			goto fail;
413 		style1_created = B_TRUE;
414 	}
415 
416 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
417 
418 	rw_enter(&i_mac_impl_lock, RW_WRITER);
419 	if (mod_hash_insert(i_mac_impl_hash,
420 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
421 		rw_exit(&i_mac_impl_lock);
422 		err = EEXIST;
423 		goto fail;
424 	}
425 
426 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
427 	    (mac_impl_t *), mip);
428 
429 	/*
430 	 * Mark the MAC to be ready for open.
431 	 */
432 	mip->mi_state_flags &= ~MIS_DISABLED;
433 	rw_exit(&i_mac_impl_lock);
434 
435 	atomic_inc_32(&i_mac_impl_count);
436 
437 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
438 	*mhp = (mac_handle_t)mip;
439 	return (0);
440 
441 fail:
442 	if (style1_created)
443 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
444 
445 	if (style2_created)
446 		ddi_remove_minor_node(mip->mi_dip, driver);
447 
448 	mac_addr_factory_fini(mip);
449 
450 	/* Clean up registered MAC addresses */
451 	mac_fini_macaddr(mip);
452 
453 	/* Clean up registered rings */
454 	mac_free_rings(mip, MAC_RING_TYPE_RX);
455 	mac_free_rings(mip, MAC_RING_TYPE_TX);
456 
457 	/* Clean up notification thread */
458 	if (mip->mi_notify_thread != NULL)
459 		i_mac_notify_exit(mip);
460 
461 	if (mip->mi_info.mi_unicst_addr != NULL) {
462 		kmem_free(mip->mi_info.mi_unicst_addr,
463 		    mip->mi_type->mt_addr_length);
464 		mip->mi_info.mi_unicst_addr = NULL;
465 	}
466 
467 	mac_driver_stat_delete(mip);
468 
469 	if (mip->mi_type != NULL) {
470 		atomic_dec_32(&mip->mi_type->mt_ref);
471 		mip->mi_type = NULL;
472 	}
473 
474 	if (mip->mi_pdata != NULL) {
475 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
476 		mip->mi_pdata = NULL;
477 		mip->mi_pdata_size = 0;
478 	}
479 
480 	if (minor != 0) {
481 		ASSERT(minor > MAC_MAX_MINOR);
482 		mac_minor_rele(minor);
483 	}
484 
485 	mip->mi_state_flags = 0;
486 	mac_unregister_priv_prop(mip);
487 
488 	/*
489 	 * Clear the state before destroying the mac_impl_t
490 	 */
491 	mip->mi_state_flags = 0;
492 
493 	kmem_cache_free(i_mac_impl_cachep, mip);
494 	return (err);
495 }
496 
497 /*
498  * Unregister from the GLDv3 framework
499  */
500 int
501 mac_unregister(mac_handle_t mh)
502 {
503 	int			err;
504 	mac_impl_t		*mip = (mac_impl_t *)mh;
505 	mod_hash_val_t		val;
506 	mac_margin_req_t	*mmr, *nextmmr;
507 
508 	/* Fail the unregister if there are any open references to this mac. */
509 	if ((err = mac_disable_nowait(mh)) != 0)
510 		return (err);
511 
512 	/*
513 	 * Clean up notification thread and wait for it to exit.
514 	 */
515 	i_mac_notify_exit(mip);
516 
517 	i_mac_perim_enter(mip);
518 
519 	/*
520 	 * There is still resource properties configured over this mac.
521 	 */
522 	if (mip->mi_resource_props.mrp_mask != 0)
523 		mac_fastpath_enable((mac_handle_t)mip);
524 
525 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
526 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
527 		ddi_remove_minor_node(mip->mi_dip,
528 		    (char *)ddi_driver_name(mip->mi_dip));
529 	}
530 
531 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
532 	    MIS_EXCLUSIVE));
533 
534 	mac_driver_stat_delete(mip);
535 
536 	(void) mod_hash_remove(i_mac_impl_hash,
537 	    (mod_hash_key_t)mip->mi_name, &val);
538 	ASSERT(mip == (mac_impl_t *)val);
539 
540 	ASSERT(i_mac_impl_count > 0);
541 	atomic_dec_32(&i_mac_impl_count);
542 
543 	if (mip->mi_pdata != NULL)
544 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
545 	mip->mi_pdata = NULL;
546 	mip->mi_pdata_size = 0;
547 
548 	/*
549 	 * Free the list of margin request.
550 	 */
551 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
552 		nextmmr = mmr->mmr_nextp;
553 		kmem_free(mmr, sizeof (mac_margin_req_t));
554 	}
555 	mip->mi_mmrp = NULL;
556 
557 	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
558 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
559 	mip->mi_info.mi_unicst_addr = NULL;
560 
561 	atomic_dec_32(&mip->mi_type->mt_ref);
562 	mip->mi_type = NULL;
563 
564 	/*
565 	 * Free the primary MAC address.
566 	 */
567 	mac_fini_macaddr(mip);
568 
569 	/*
570 	 * free all rings
571 	 */
572 	mac_free_rings(mip, MAC_RING_TYPE_RX);
573 	mac_free_rings(mip, MAC_RING_TYPE_TX);
574 
575 	mac_addr_factory_fini(mip);
576 
577 	bzero(mip->mi_addr, MAXMACADDRLEN);
578 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
579 
580 	/* and the flows */
581 	mac_flow_tab_destroy(mip->mi_flow_tab);
582 	mip->mi_flow_tab = NULL;
583 
584 	if (mip->mi_minor > MAC_MAX_MINOR)
585 		mac_minor_rele(mip->mi_minor);
586 
587 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
588 
589 	/*
590 	 * Reset the perim related fields to default values before
591 	 * kmem_cache_free
592 	 */
593 	i_mac_perim_exit(mip);
594 	mip->mi_state_flags = 0;
595 
596 	mac_unregister_priv_prop(mip);
597 
598 	ASSERT(mip->mi_bridge_link == NULL);
599 	kmem_cache_free(i_mac_impl_cachep, mip);
600 
601 	return (0);
602 }
603 
604 /* DATA RECEPTION */
605 
606 /*
607  * This function is invoked for packets received by the MAC driver in
608  * interrupt context. The ring generation number provided by the driver
609  * is matched with the ring generation number held in MAC. If they do not
610  * match, received packets are considered stale packets coming from an older
611  * assignment of the ring. Drop them.
612  */
613 void
614 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
615     uint64_t mr_gen_num)
616 {
617 	mac_ring_t		*mr = (mac_ring_t *)mrh;
618 
619 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
620 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
621 		    mr->mr_gen_num, uint64_t, mr_gen_num);
622 		freemsgchain(mp_chain);
623 		return;
624 	}
625 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
626 }
627 
628 /*
629  * This function is invoked for each packet received by the underlying driver.
630  */
631 void
632 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
633 {
634 	mac_impl_t *mip = (mac_impl_t *)mh;
635 
636 	/*
637 	 * Check if the link is part of a bridge.  If not, then we don't need
638 	 * to take the lock to remain consistent.  Make this common case
639 	 * lock-free and tail-call optimized.
640 	 */
641 	if (mip->mi_bridge_link == NULL) {
642 		mac_rx_common(mh, mrh, mp_chain);
643 	} else {
644 		/*
645 		 * Once we take a reference on the bridge link, the bridge
646 		 * module itself can't unload, so the callback pointers are
647 		 * stable.
648 		 */
649 		mutex_enter(&mip->mi_bridge_lock);
650 		if ((mh = mip->mi_bridge_link) != NULL)
651 			mac_bridge_ref_cb(mh, B_TRUE);
652 		mutex_exit(&mip->mi_bridge_lock);
653 		if (mh == NULL) {
654 			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
655 		} else {
656 			mac_bridge_rx_cb(mh, mrh, mp_chain);
657 			mac_bridge_ref_cb(mh, B_FALSE);
658 		}
659 	}
660 }
661 
662 /*
663  * Special case function: this allows snooping of packets transmitted and
664  * received by TRILL. By design, they go directly into the TRILL module.
665  */
666 void
667 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
668 {
669 	mac_impl_t *mip = (mac_impl_t *)mh;
670 
671 	if (mip->mi_promisc_list != NULL)
672 		mac_promisc_dispatch(mip, mp, NULL);
673 }
674 
675 /*
676  * This is the upward reentry point for packets arriving from the bridging
677  * module and from mac_rx for links not part of a bridge.
678  */
679 void
680 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
681 {
682 	mac_impl_t		*mip = (mac_impl_t *)mh;
683 	mac_ring_t		*mr = (mac_ring_t *)mrh;
684 	mac_soft_ring_set_t 	*mac_srs;
685 	mblk_t			*bp = mp_chain;
686 	boolean_t		hw_classified = B_FALSE;
687 
688 	/*
689 	 * If there are any promiscuous mode callbacks defined for
690 	 * this MAC, pass them a copy if appropriate.
691 	 */
692 	if (mip->mi_promisc_list != NULL)
693 		mac_promisc_dispatch(mip, mp_chain, NULL);
694 
695 	if (mr != NULL) {
696 		/*
697 		 * If the SRS teardown has started, just return. The 'mr'
698 		 * continues to be valid until the driver unregisters the mac.
699 		 * Hardware classified packets will not make their way up
700 		 * beyond this point once the teardown has started. The driver
701 		 * is never passed a pointer to a flow entry or SRS or any
702 		 * structure that can be freed much before mac_unregister.
703 		 */
704 		mutex_enter(&mr->mr_lock);
705 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
706 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
707 			mutex_exit(&mr->mr_lock);
708 			freemsgchain(mp_chain);
709 			return;
710 		}
711 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
712 			hw_classified = B_TRUE;
713 			MR_REFHOLD_LOCKED(mr);
714 		}
715 		mutex_exit(&mr->mr_lock);
716 
717 		/*
718 		 * We check if an SRS is controlling this ring.
719 		 * If so, we can directly call the srs_lower_proc
720 		 * routine otherwise we need to go through mac_rx_classify
721 		 * to reach the right place.
722 		 */
723 		if (hw_classified) {
724 			mac_srs = mr->mr_srs;
725 			/*
726 			 * This is supposed to be the fast path.
727 			 * All packets received though here were steered by
728 			 * the hardware classifier, and share the same
729 			 * MAC header info.
730 			 */
731 			mac_srs->srs_rx.sr_lower_proc(mh,
732 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
733 			MR_REFRELE(mr);
734 			return;
735 		}
736 		/* We'll fall through to software classification */
737 	} else {
738 		flow_entry_t *flent;
739 		int err;
740 
741 		rw_enter(&mip->mi_rw_lock, RW_READER);
742 		if (mip->mi_single_active_client != NULL) {
743 			flent = mip->mi_single_active_client->mci_flent_list;
744 			FLOW_TRY_REFHOLD(flent, err);
745 			rw_exit(&mip->mi_rw_lock);
746 			if (err == 0) {
747 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
748 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
749 				FLOW_REFRELE(flent);
750 				return;
751 			}
752 		} else {
753 			rw_exit(&mip->mi_rw_lock);
754 		}
755 	}
756 
757 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
758 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
759 			return;
760 	}
761 
762 	freemsgchain(bp);
763 }
764 
765 /* DATA TRANSMISSION */
766 
767 /*
768  * A driver's notification to resume transmission, in case of a provider
769  * without TX rings.
770  */
771 void
772 mac_tx_update(mac_handle_t mh)
773 {
774 	mac_tx_ring_update(mh, NULL);
775 }
776 
777 /*
778  * A driver's notification to resume transmission on the specified TX ring.
779  */
780 void
781 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
782 {
783 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
784 }
785 
786 /* LINK STATE */
787 /*
788  * Notify the MAC layer about a link state change
789  */
790 void
791 mac_link_update(mac_handle_t mh, link_state_t link)
792 {
793 	mac_impl_t	*mip = (mac_impl_t *)mh;
794 
795 	/*
796 	 * Save the link state.
797 	 */
798 	mip->mi_lowlinkstate = link;
799 
800 	/*
801 	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
802 	 * thread to deliver both lower and upper notifications.
803 	 */
804 	i_mac_notify(mip, MAC_NOTE_LOWLINK);
805 }
806 
807 /*
808  * Notify the MAC layer about a link state change due to bridging.
809  */
810 void
811 mac_link_redo(mac_handle_t mh, link_state_t link)
812 {
813 	mac_impl_t	*mip = (mac_impl_t *)mh;
814 
815 	/*
816 	 * Save the link state.
817 	 */
818 	mip->mi_linkstate = link;
819 
820 	/*
821 	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
822 	 * made.
823 	 */
824 	i_mac_notify(mip, MAC_NOTE_LINK);
825 }
826 
827 /* MINOR NODE HANDLING */
828 
829 /*
830  * Given a dev_t, return the instance number (PPA) associated with it.
831  * Drivers can use this in their getinfo(9e) implementation to lookup
832  * the instance number (i.e. PPA) of the device, to use as an index to
833  * their own array of soft state structures.
834  *
835  * Returns -1 on error.
836  */
837 int
838 mac_devt_to_instance(dev_t devt)
839 {
840 	return (dld_devt_to_instance(devt));
841 }
842 
843 /*
844  * This function returns the first minor number that is available for
845  * driver private use.  All minor numbers smaller than this are
846  * reserved for GLDv3 use.
847  */
848 minor_t
849 mac_private_minor(void)
850 {
851 	return (MAC_PRIVATE_MINOR);
852 }
853 
854 /* OTHER CONTROL INFORMATION */
855 
856 /*
857  * A driver notified us that its primary MAC address has changed.
858  */
859 void
860 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
861 {
862 	mac_impl_t	*mip = (mac_impl_t *)mh;
863 
864 	if (mip->mi_type->mt_addr_length == 0)
865 		return;
866 
867 	i_mac_perim_enter(mip);
868 
869 	/*
870 	 * If address changes, freshen the MAC address value and update
871 	 * all MAC clients that share this MAC address.
872 	 */
873 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
874 		mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
875 		    (uint8_t *)addr);
876 	}
877 
878 	i_mac_perim_exit(mip);
879 
880 	/*
881 	 * Send a MAC_NOTE_UNICST notification.
882 	 */
883 	i_mac_notify(mip, MAC_NOTE_UNICST);
884 }
885 
886 void
887 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
888 {
889 	mac_impl_t	*mip = (mac_impl_t *)mh;
890 
891 	if (mip->mi_type->mt_addr_length == 0)
892 		return;
893 
894 	i_mac_perim_enter(mip);
895 	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
896 	i_mac_perim_exit(mip);
897 	i_mac_notify(mip, MAC_NOTE_DEST);
898 }
899 
900 /*
901  * MAC plugin information changed.
902  */
903 int
904 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
905 {
906 	mac_impl_t	*mip = (mac_impl_t *)mh;
907 
908 	/*
909 	 * Verify that the plugin supports MAC plugin data and that the
910 	 * supplied data is valid.
911 	 */
912 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
913 		return (EINVAL);
914 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
915 		return (EINVAL);
916 
917 	if (mip->mi_pdata != NULL)
918 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
919 
920 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
921 	bcopy(mac_pdata, mip->mi_pdata, dsize);
922 	mip->mi_pdata_size = dsize;
923 
924 	/*
925 	 * Since the MAC plugin data is used to construct MAC headers that
926 	 * were cached in fast-path headers, we need to flush fast-path
927 	 * information for links associated with this mac.
928 	 */
929 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
930 	return (0);
931 }
932 
933 /*
934  * Invoked by driver as well as the framework to notify its capability change.
935  */
936 void
937 mac_capab_update(mac_handle_t mh)
938 {
939 	/* Send MAC_NOTE_CAPAB_CHG notification */
940 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
941 }
942 
943 /*
944  * Used by normal drivers to update the max sdu size.
945  * We need to handle the case of a smaller mi_sdu_multicast
946  * since this is called by mac_set_mtu() even for drivers that
947  * have differing unicast and multicast mtu and we don't want to
948  * increase the multicast mtu by accident in that case.
949  */
950 int
951 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
952 {
953 	mac_impl_t	*mip = (mac_impl_t *)mh;
954 
955 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
956 		return (EINVAL);
957 	mip->mi_sdu_max = sdu_max;
958 	if (mip->mi_sdu_multicast > mip->mi_sdu_max)
959 		mip->mi_sdu_multicast = mip->mi_sdu_max;
960 
961 	/* Send a MAC_NOTE_SDU_SIZE notification. */
962 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
963 	return (0);
964 }
965 
966 /*
967  * Version of the above function that is used by drivers that have a different
968  * max sdu size for multicast/broadcast vs. unicast.
969  */
970 int
971 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
972 {
973 	mac_impl_t	*mip = (mac_impl_t *)mh;
974 
975 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
976 		return (EINVAL);
977 	if (sdu_multicast == 0)
978 		sdu_multicast = sdu_max;
979 	if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
980 		return (EINVAL);
981 	mip->mi_sdu_max = sdu_max;
982 	mip->mi_sdu_multicast = sdu_multicast;
983 
984 	/* Send a MAC_NOTE_SDU_SIZE notification. */
985 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
986 	return (0);
987 }
988 
989 static void
990 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
991 {
992 	mac_client_impl_t *mcip;
993 	flow_entry_t *flent;
994 	mac_soft_ring_set_t *mac_rx_srs;
995 	mac_cpus_t *srs_cpu;
996 	int i;
997 
998 	if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
999 	    (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1000 		/* interrupt can be re-targeted */
1001 		ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1002 		flent = mcip->mci_flent;
1003 		if (ring->mr_type == MAC_RING_TYPE_RX) {
1004 			for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1005 				mac_rx_srs = flent->fe_rx_srs[i];
1006 				if (mac_rx_srs->srs_ring != ring)
1007 					continue;
1008 				srs_cpu = &mac_rx_srs->srs_cpu;
1009 				mutex_enter(&cpu_lock);
1010 				mac_rx_srs_retarget_intr(mac_rx_srs,
1011 				    srs_cpu->mc_rx_intr_cpu);
1012 				mutex_exit(&cpu_lock);
1013 				break;
1014 			}
1015 		} else {
1016 			if (flent->fe_tx_srs != NULL) {
1017 				mutex_enter(&cpu_lock);
1018 				mac_tx_srs_retarget_intr(
1019 				    flent->fe_tx_srs);
1020 				mutex_exit(&cpu_lock);
1021 			}
1022 		}
1023 	}
1024 }
1025 
1026 /*
1027  * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1028  * their clients. There is a 1-1 mapping pseudo ring and the hardware
1029  * ring. ddi interrupt handles are exported from the hardware ring to
1030  * the pseudo ring. Thus when the interrupt handle changes, clients of
1031  * aggr that are using the handle need to use the new handle and
1032  * re-target their interrupts.
1033  */
1034 static void
1035 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1036     ddi_intr_handle_t ddh)
1037 {
1038 	mac_ring_t *pring;
1039 	mac_group_t *pgroup;
1040 	mac_impl_t *pmip;
1041 	char macname[MAXNAMELEN];
1042 	mac_perim_handle_t p_mph;
1043 	uint64_t saved_gen_num;
1044 
1045 again:
1046 	pring = (mac_ring_t *)ring->mr_prh;
1047 	pgroup = (mac_group_t *)pring->mr_gh;
1048 	pmip = (mac_impl_t *)pgroup->mrg_mh;
1049 	saved_gen_num = ring->mr_gen_num;
1050 	(void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1051 	/*
1052 	 * We need to enter aggr's perimeter. The locking hierarchy
1053 	 * dictates that aggr's perimeter should be entered first
1054 	 * and then the port's perimeter. So drop the port's
1055 	 * perimeter, enter aggr's and then re-enter port's
1056 	 * perimeter.
1057 	 */
1058 	i_mac_perim_exit(mip);
1059 	/*
1060 	 * While we know pmip is the aggr's mip, there is a
1061 	 * possibility that aggr could have unregistered by
1062 	 * the time we exit port's perimeter (mip) and
1063 	 * enter aggr's perimeter (pmip). To avoid that
1064 	 * scenario, enter aggr's perimeter using its name.
1065 	 */
1066 	if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1067 		return;
1068 	i_mac_perim_enter(mip);
1069 	/*
1070 	 * Check if the ring got assigned to another aggregation before
1071 	 * be could enter aggr's and the port's perimeter. When a ring
1072 	 * gets deleted from an aggregation, it calls mac_stop_ring()
1073 	 * which increments the generation number. So checking
1074 	 * generation number will be enough.
1075 	 */
1076 	if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1077 		i_mac_perim_exit(mip);
1078 		mac_perim_exit(p_mph);
1079 		i_mac_perim_enter(mip);
1080 		goto again;
1081 	}
1082 
1083 	/* Check if pseudo ring is still present */
1084 	if (ring->mr_prh != NULL) {
1085 		pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1086 		pring->mr_info.mri_intr.mi_ddi_shared =
1087 		    ring->mr_info.mri_intr.mi_ddi_shared;
1088 		if (ddh != NULL)
1089 			mac_ring_intr_retarget(pgroup, pring);
1090 	}
1091 	i_mac_perim_exit(mip);
1092 	mac_perim_exit(p_mph);
1093 }
1094 /*
1095  * API called by driver to provide new interrupt handle for TX/RX rings.
1096  * This usually happens when IRM (Interrupt Resource Manangement)
1097  * framework either gives the driver more MSI-x interrupts or takes
1098  * away MSI-x interrupts from the driver.
1099  */
1100 void
1101 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1102 {
1103 	mac_ring_t	*ring = (mac_ring_t *)mrh;
1104 	mac_group_t	*group = (mac_group_t *)ring->mr_gh;
1105 	mac_impl_t	*mip = (mac_impl_t *)group->mrg_mh;
1106 
1107 	i_mac_perim_enter(mip);
1108 	ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1109 	if (ddh == NULL) {
1110 		/* Interrupts being reset */
1111 		ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1112 		if (ring->mr_prh != NULL) {
1113 			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1114 			return;
1115 		}
1116 	} else {
1117 		/* New interrupt handle */
1118 		mac_compare_ddi_handle(mip->mi_rx_groups,
1119 		    mip->mi_rx_group_count, ring);
1120 		if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1121 			mac_compare_ddi_handle(mip->mi_tx_groups,
1122 			    mip->mi_tx_group_count, ring);
1123 		}
1124 		if (ring->mr_prh != NULL) {
1125 			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1126 			return;
1127 		} else {
1128 			mac_ring_intr_retarget(group, ring);
1129 		}
1130 	}
1131 	i_mac_perim_exit(mip);
1132 }
1133 
1134 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1135 
1136 /*
1137  * Updates the mac_impl structure with the current state of the link
1138  */
1139 static void
1140 i_mac_log_link_state(mac_impl_t *mip)
1141 {
1142 	/*
1143 	 * If no change, then it is not interesting.
1144 	 */
1145 	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1146 		return;
1147 
1148 	switch (mip->mi_lowlinkstate) {
1149 	case LINK_STATE_UP:
1150 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1151 			char det[200];
1152 
1153 			mip->mi_type->mt_ops.mtops_link_details(det,
1154 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1155 
1156 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1157 		} else {
1158 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1159 		}
1160 		break;
1161 
1162 	case LINK_STATE_DOWN:
1163 		/*
1164 		 * Only transitions from UP to DOWN are interesting
1165 		 */
1166 		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1167 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1168 		break;
1169 
1170 	case LINK_STATE_UNKNOWN:
1171 		/*
1172 		 * This case is normally not interesting.
1173 		 */
1174 		break;
1175 	}
1176 	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1177 }
1178 
1179 /*
1180  * Main routine for the callbacks notifications thread
1181  */
1182 static void
1183 i_mac_notify_thread(void *arg)
1184 {
1185 	mac_impl_t	*mip = arg;
1186 	callb_cpr_t	cprinfo;
1187 	mac_cb_t	*mcb;
1188 	mac_cb_info_t	*mcbi;
1189 	mac_notify_cb_t	*mncb;
1190 
1191 	mcbi = &mip->mi_notify_cb_info;
1192 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1193 	    "i_mac_notify_thread");
1194 
1195 	mutex_enter(mcbi->mcbi_lockp);
1196 
1197 	for (;;) {
1198 		uint32_t	bits;
1199 		uint32_t	type;
1200 
1201 		bits = mip->mi_notify_bits;
1202 		if (bits == 0) {
1203 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1204 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1205 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1206 			continue;
1207 		}
1208 		mip->mi_notify_bits = 0;
1209 		if ((bits & (1 << MAC_NNOTE)) != 0) {
1210 			/* request to quit */
1211 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1212 			break;
1213 		}
1214 
1215 		mutex_exit(mcbi->mcbi_lockp);
1216 
1217 		/*
1218 		 * Log link changes on the actual link, but then do reports on
1219 		 * synthetic state (if part of a bridge).
1220 		 */
1221 		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1222 			link_state_t newstate;
1223 			mac_handle_t mh;
1224 
1225 			i_mac_log_link_state(mip);
1226 			newstate = mip->mi_lowlinkstate;
1227 			if (mip->mi_bridge_link != NULL) {
1228 				mutex_enter(&mip->mi_bridge_lock);
1229 				if ((mh = mip->mi_bridge_link) != NULL) {
1230 					newstate = mac_bridge_ls_cb(mh,
1231 					    newstate);
1232 				}
1233 				mutex_exit(&mip->mi_bridge_lock);
1234 			}
1235 			if (newstate != mip->mi_linkstate) {
1236 				mip->mi_linkstate = newstate;
1237 				bits |= 1 << MAC_NOTE_LINK;
1238 			}
1239 		}
1240 
1241 		/*
1242 		 * Do notification callbacks for each notification type.
1243 		 */
1244 		for (type = 0; type < MAC_NNOTE; type++) {
1245 			if ((bits & (1 << type)) == 0) {
1246 				continue;
1247 			}
1248 
1249 			if (mac_notify_cb_list[type] != NULL)
1250 				(*mac_notify_cb_list[type])(mip);
1251 
1252 			/*
1253 			 * Walk the list of notifications.
1254 			 */
1255 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1256 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1257 			    mcb = mcb->mcb_nextp) {
1258 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1259 				mncb->mncb_fn(mncb->mncb_arg, type);
1260 			}
1261 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1262 			    &mip->mi_notify_cb_list);
1263 		}
1264 
1265 		mutex_enter(mcbi->mcbi_lockp);
1266 	}
1267 
1268 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1269 	cv_broadcast(&mcbi->mcbi_cv);
1270 
1271 	/* CALLB_CPR_EXIT drops the lock */
1272 	CALLB_CPR_EXIT(&cprinfo);
1273 	thread_exit();
1274 }
1275 
1276 /*
1277  * Signal the i_mac_notify_thread asking it to quit.
1278  * Then wait till it is done.
1279  */
1280 void
1281 i_mac_notify_exit(mac_impl_t *mip)
1282 {
1283 	mac_cb_info_t	*mcbi;
1284 
1285 	mcbi = &mip->mi_notify_cb_info;
1286 
1287 	mutex_enter(mcbi->mcbi_lockp);
1288 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1289 	cv_broadcast(&mcbi->mcbi_cv);
1290 
1291 
1292 	while ((mip->mi_notify_thread != NULL) &&
1293 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1294 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1295 	}
1296 
1297 	/* Necessary clean up before doing kmem_cache_free */
1298 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1299 	mip->mi_notify_bits = 0;
1300 	mip->mi_notify_thread = NULL;
1301 	mutex_exit(mcbi->mcbi_lockp);
1302 }
1303 
1304 /*
1305  * Entry point invoked by drivers to dynamically add a ring to an
1306  * existing group.
1307  */
1308 int
1309 mac_group_add_ring(mac_group_handle_t gh, int index)
1310 {
1311 	mac_group_t *group = (mac_group_t *)gh;
1312 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1313 	int ret;
1314 
1315 	i_mac_perim_enter(mip);
1316 	ret = i_mac_group_add_ring(group, NULL, index);
1317 	i_mac_perim_exit(mip);
1318 	return (ret);
1319 }
1320 
1321 /*
1322  * Entry point invoked by drivers to dynamically remove a ring
1323  * from an existing group. The specified ring handle must no longer
1324  * be used by the driver after a call to this function.
1325  */
1326 void
1327 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1328 {
1329 	mac_group_t *group = (mac_group_t *)gh;
1330 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1331 
1332 	i_mac_perim_enter(mip);
1333 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1334 	i_mac_perim_exit(mip);
1335 }
1336 
1337 /*
1338  * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1339  * entry points.
1340  */
1341 
1342 void
1343 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1344 {
1345 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1346 
1347 	/* nothing to do if the caller doesn't want the default value */
1348 	if (pr->pr_default == NULL)
1349 		return;
1350 
1351 	ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1352 
1353 	*(uint8_t *)(pr->pr_default) = val;
1354 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1355 }
1356 
1357 void
1358 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1359 {
1360 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1361 
1362 	/* nothing to do if the caller doesn't want the default value */
1363 	if (pr->pr_default == NULL)
1364 		return;
1365 
1366 	ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1367 
1368 	bcopy(&val, pr->pr_default, sizeof (val));
1369 
1370 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1371 }
1372 
1373 void
1374 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1375 {
1376 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1377 
1378 	/* nothing to do if the caller doesn't want the default value */
1379 	if (pr->pr_default == NULL)
1380 		return;
1381 
1382 	ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1383 
1384 	bcopy(&val, pr->pr_default, sizeof (val));
1385 
1386 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1387 }
1388 
1389 void
1390 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1391 {
1392 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1393 
1394 	/* nothing to do if the caller doesn't want the default value */
1395 	if (pr->pr_default == NULL)
1396 		return;
1397 
1398 	if (strlen(str) >= pr->pr_default_size)
1399 		pr->pr_errno = ENOBUFS;
1400 	else
1401 		(void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1402 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1403 }
1404 
1405 void
1406 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1407     link_flowctrl_t val)
1408 {
1409 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1410 
1411 	/* nothing to do if the caller doesn't want the default value */
1412 	if (pr->pr_default == NULL)
1413 		return;
1414 
1415 	ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1416 
1417 	bcopy(&val, pr->pr_default, sizeof (val));
1418 
1419 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1420 }
1421 
1422 void
1423 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1424     uint32_t max)
1425 {
1426 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1427 	mac_propval_range_t *range = pr->pr_range;
1428 	mac_propval_uint32_range_t *range32;
1429 
1430 	/* nothing to do if the caller doesn't want the range info */
1431 	if (range == NULL)
1432 		return;
1433 
1434 	if (pr->pr_range_cur_count++ == 0) {
1435 		/* first range */
1436 		pr->pr_flags |= MAC_PROP_INFO_RANGE;
1437 		range->mpr_type = MAC_PROPVAL_UINT32;
1438 	} else {
1439 		/* all ranges of a property should be of the same type */
1440 		ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1441 		if (pr->pr_range_cur_count > range->mpr_count) {
1442 			pr->pr_errno = ENOSPC;
1443 			return;
1444 		}
1445 	}
1446 
1447 	range32 = range->mpr_range_uint32;
1448 	range32[pr->pr_range_cur_count - 1].mpur_min = min;
1449 	range32[pr->pr_range_cur_count - 1].mpur_max = max;
1450 }
1451 
1452 void
1453 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1454 {
1455 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1456 
1457 	pr->pr_perm = perm;
1458 	pr->pr_flags |= MAC_PROP_INFO_PERM;
1459 }
1460 
1461 void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff,
1462     uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1463 {
1464 	uint32_t flags;
1465 
1466 	ASSERT(DB_TYPE(mp) == M_DATA);
1467 
1468 	flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1469 	if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1470 		if (value != NULL)
1471 			*value = (uint32_t)DB_CKSUM16(mp);
1472 		if ((flags & HCK_PARTIALCKSUM) != 0) {
1473 			if (start != NULL)
1474 				*start = (uint32_t)DB_CKSUMSTART(mp);
1475 			if (stuff != NULL)
1476 				*stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1477 			if (end != NULL)
1478 				*end = (uint32_t)DB_CKSUMEND(mp);
1479 		}
1480 	}
1481 
1482 	if (flags_ptr != NULL)
1483 		*flags_ptr = flags;
1484 }
1485 
1486 void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff,
1487     uint32_t end, uint32_t value, uint32_t flags)
1488 {
1489 	ASSERT(DB_TYPE(mp) == M_DATA);
1490 
1491 	DB_CKSUMSTART(mp) = (intptr_t)start;
1492 	DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1493 	DB_CKSUMEND(mp) = (intptr_t)end;
1494 	DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1495 	DB_CKSUM16(mp) = (uint16_t)value;
1496 }
1497 
1498 void
1499 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1500 {
1501 	ASSERT(DB_TYPE(mp) == M_DATA);
1502 
1503 	if (flags != NULL) {
1504 		*flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1505 		if ((*flags != 0) && (mss != NULL))
1506 			*mss = (uint32_t)DB_LSOMSS(mp);
1507 	}
1508 }
1509