xref: /illumos-gate/usr/src/uts/common/io/mac/mac_provider.c (revision 1bff1300cebf1ea8e11ce928b10e208097e67f24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2018 Joyent, Inc.
25  * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/id_space.h>
31 #include <sys/esunddi.h>
32 #include <sys/stat.h>
33 #include <sys/mkdev.h>
34 #include <sys/stream.h>
35 #include <sys/strsubr.h>
36 #include <sys/dlpi.h>
37 #include <sys/modhash.h>
38 #include <sys/mac.h>
39 #include <sys/mac_provider.h>
40 #include <sys/mac_impl.h>
41 #include <sys/mac_client_impl.h>
42 #include <sys/mac_client_priv.h>
43 #include <sys/mac_soft_ring.h>
44 #include <sys/mac_stat.h>
45 #include <sys/dld.h>
46 #include <sys/modctl.h>
47 #include <sys/fs/dv_node.h>
48 #include <sys/thread.h>
49 #include <sys/proc.h>
50 #include <sys/callb.h>
51 #include <sys/cpuvar.h>
52 #include <sys/atomic.h>
53 #include <sys/sdt.h>
54 #include <sys/mac_flow.h>
55 #include <sys/ddi_intr_impl.h>
56 #include <sys/disp.h>
57 #include <sys/sdt.h>
58 #include <sys/pattr.h>
59 #include <sys/strsun.h>
60 #include <sys/vlan.h>
61 
62 /*
63  * MAC Provider Interface.
64  *
65  * Interface for GLDv3 compatible NIC drivers.
66  */
67 
68 static void i_mac_notify_thread(void *);
69 
70 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
71 
72 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
73 	mac_fanout_recompute,	/* MAC_NOTE_LINK */
74 	NULL,		/* MAC_NOTE_UNICST */
75 	NULL,		/* MAC_NOTE_TX */
76 	NULL,		/* MAC_NOTE_DEVPROMISC */
77 	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
78 	NULL,		/* MAC_NOTE_SDU_SIZE */
79 	NULL,		/* MAC_NOTE_MARGIN */
80 	NULL,		/* MAC_NOTE_CAPAB_CHG */
81 	NULL		/* MAC_NOTE_LOWLINK */
82 };
83 
84 /*
85  * Driver support functions.
86  */
87 
88 /* REGISTRATION */
89 
90 mac_register_t *
91 mac_alloc(uint_t mac_version)
92 {
93 	mac_register_t *mregp;
94 
95 	/*
96 	 * Make sure there isn't a version mismatch between the driver and
97 	 * the framework.  In the future, if multiple versions are
98 	 * supported, this check could become more sophisticated.
99 	 */
100 	if (mac_version != MAC_VERSION)
101 		return (NULL);
102 
103 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
104 	mregp->m_version = mac_version;
105 	return (mregp);
106 }
107 
108 void
109 mac_free(mac_register_t *mregp)
110 {
111 	kmem_free(mregp, sizeof (mac_register_t));
112 }
113 
114 /*
115  * mac_register() is how drivers register new MACs with the GLDv3
116  * framework.  The mregp argument is allocated by drivers using the
117  * mac_alloc() function, and can be freed using mac_free() immediately upon
118  * return from mac_register().  Upon success (0 return value), the mhp
119  * opaque pointer becomes the driver's handle to its MAC interface, and is
120  * the argument to all other mac module entry points.
121  */
122 /* ARGSUSED */
123 int
124 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
125 {
126 	mac_impl_t		*mip;
127 	mactype_t		*mtype;
128 	int			err = EINVAL;
129 	struct devnames		*dnp = NULL;
130 	uint_t			instance;
131 	boolean_t		style1_created = B_FALSE;
132 	boolean_t		style2_created = B_FALSE;
133 	char			*driver;
134 	minor_t			minor = 0;
135 
136 	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
137 	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
138 		return (EINVAL);
139 
140 	/* Find the required MAC-Type plugin. */
141 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
142 		return (EINVAL);
143 
144 	/* Create a mac_impl_t to represent this MAC. */
145 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
146 
147 	/*
148 	 * The mac is not ready for open yet.
149 	 */
150 	mip->mi_state_flags |= MIS_DISABLED;
151 
152 	/*
153 	 * When a mac is registered, the m_instance field can be set to:
154 	 *
155 	 *  0:	Get the mac's instance number from m_dip.
156 	 *	This is usually used for physical device dips.
157 	 *
158 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
159 	 *	For example, when an aggregation is created with the key option,
160 	 *	"key" will be used as the instance number.
161 	 *
162 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
163 	 *	This is often used when a MAC of a virtual link is registered
164 	 *	(e.g., aggregation when "key" is not specified, or vnic).
165 	 *
166 	 * Note that the instance number is used to derive the mi_minor field
167 	 * of mac_impl_t, which will then be used to derive the name of kstats
168 	 * and the devfs nodes.  The first 2 cases are needed to preserve
169 	 * backward compatibility.
170 	 */
171 	switch (mregp->m_instance) {
172 	case 0:
173 		instance = ddi_get_instance(mregp->m_dip);
174 		break;
175 	case ((uint_t)-1):
176 		minor = mac_minor_hold(B_TRUE);
177 		if (minor == 0) {
178 			err = ENOSPC;
179 			goto fail;
180 		}
181 		instance = minor - 1;
182 		break;
183 	default:
184 		instance = mregp->m_instance;
185 		if (instance >= MAC_MAX_MINOR) {
186 			err = EINVAL;
187 			goto fail;
188 		}
189 		break;
190 	}
191 
192 	mip->mi_minor = (minor_t)(instance + 1);
193 	mip->mi_dip = mregp->m_dip;
194 	mip->mi_clients_list = NULL;
195 	mip->mi_nclients = 0;
196 
197 	/* Set the default IEEE Port VLAN Identifier */
198 	mip->mi_pvid = 1;
199 
200 	/* Default bridge link learning protection values */
201 	mip->mi_llimit = 1000;
202 	mip->mi_ldecay = 200;
203 
204 	driver = (char *)ddi_driver_name(mip->mi_dip);
205 
206 	/* Construct the MAC name as <drvname><instance> */
207 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
208 	    driver, instance);
209 
210 	mip->mi_driver = mregp->m_driver;
211 
212 	mip->mi_type = mtype;
213 	mip->mi_margin = mregp->m_margin;
214 	mip->mi_info.mi_media = mtype->mt_type;
215 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
216 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
217 		goto fail;
218 	if (mregp->m_multicast_sdu == 0)
219 		mregp->m_multicast_sdu = mregp->m_max_sdu;
220 	if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
221 	    mregp->m_multicast_sdu > mregp->m_max_sdu)
222 		goto fail;
223 	mip->mi_sdu_min = mregp->m_min_sdu;
224 	mip->mi_sdu_max = mregp->m_max_sdu;
225 	mip->mi_sdu_multicast = mregp->m_multicast_sdu;
226 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
227 	/*
228 	 * If the media supports a broadcast address, cache a pointer to it
229 	 * in the mac_info_t so that upper layers can use it.
230 	 */
231 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
232 
233 	mip->mi_v12n_level = mregp->m_v12n;
234 
235 	/*
236 	 * Copy the unicast source address into the mac_info_t, but only if
237 	 * the MAC-Type defines a non-zero address length.  We need to
238 	 * handle MAC-Types that have an address length of 0
239 	 * (point-to-point protocol MACs for example).
240 	 */
241 	if (mip->mi_type->mt_addr_length > 0) {
242 		if (mregp->m_src_addr == NULL)
243 			goto fail;
244 		mip->mi_info.mi_unicst_addr =
245 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
246 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
247 		    mip->mi_type->mt_addr_length);
248 
249 		/*
250 		 * Copy the fixed 'factory' MAC address from the immutable
251 		 * info.  This is taken to be the MAC address currently in
252 		 * use.
253 		 */
254 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
255 		    mip->mi_type->mt_addr_length);
256 
257 		/*
258 		 * At this point, we should set up the classification
259 		 * rules etc but we delay it till mac_open() so that
260 		 * the resource discovery has taken place and we
261 		 * know someone wants to use the device. Otherwise
262 		 * memory gets allocated for Rx ring structures even
263 		 * during probe.
264 		 */
265 
266 		/* Copy the destination address if one is provided. */
267 		if (mregp->m_dst_addr != NULL) {
268 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
269 			    mip->mi_type->mt_addr_length);
270 			mip->mi_dstaddr_set = B_TRUE;
271 		}
272 	} else if (mregp->m_src_addr != NULL) {
273 		goto fail;
274 	}
275 
276 	/*
277 	 * The format of the m_pdata is specific to the plugin.  It is
278 	 * passed in as an argument to all of the plugin callbacks.  The
279 	 * driver can update this information by calling
280 	 * mac_pdata_update().
281 	 */
282 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
283 		/*
284 		 * Verify if the supplied plugin data is valid.  Note that
285 		 * even if the caller passed in a NULL pointer as plugin data,
286 		 * we still need to verify if that's valid as the plugin may
287 		 * require plugin data to function.
288 		 */
289 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
290 		    mregp->m_pdata_size)) {
291 			goto fail;
292 		}
293 		if (mregp->m_pdata != NULL) {
294 			mip->mi_pdata =
295 			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
296 			bcopy(mregp->m_pdata, mip->mi_pdata,
297 			    mregp->m_pdata_size);
298 			mip->mi_pdata_size = mregp->m_pdata_size;
299 		}
300 	} else if (mregp->m_pdata != NULL) {
301 		/*
302 		 * The caller supplied non-NULL plugin data, but the plugin
303 		 * does not recognize plugin data.
304 		 */
305 		err = EINVAL;
306 		goto fail;
307 	}
308 
309 	/*
310 	 * Register the private properties.
311 	 */
312 	mac_register_priv_prop(mip, mregp->m_priv_props);
313 
314 	/*
315 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
316 	 * check to make sure all mandatory callbacks are set.
317 	 */
318 	if (mregp->m_callbacks->mc_getstat == NULL ||
319 	    mregp->m_callbacks->mc_start == NULL ||
320 	    mregp->m_callbacks->mc_stop == NULL ||
321 	    mregp->m_callbacks->mc_setpromisc == NULL ||
322 	    mregp->m_callbacks->mc_multicst == NULL) {
323 		goto fail;
324 	}
325 	mip->mi_callbacks = mregp->m_callbacks;
326 
327 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
328 	    &mip->mi_capab_legacy)) {
329 		mip->mi_state_flags |= MIS_LEGACY;
330 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
331 	} else {
332 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
333 		    mip->mi_minor);
334 	}
335 
336 	/*
337 	 * Allocate a notification thread. thread_create blocks for memory
338 	 * if needed, it never fails.
339 	 */
340 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
341 	    mip, 0, &p0, TS_RUN, minclsyspri);
342 
343 	/*
344 	 * Initialize the capabilities
345 	 */
346 
347 	bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
348 	bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
349 
350 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
351 		mip->mi_state_flags |= MIS_IS_VNIC;
352 
353 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
354 		mip->mi_state_flags |= MIS_IS_AGGR;
355 
356 	mac_addr_factory_init(mip);
357 
358 	mac_transceiver_init(mip);
359 
360 	mac_led_init(mip);
361 
362 	/*
363 	 * Enforce the virtrualization level registered.
364 	 */
365 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
366 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
367 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
368 			goto fail;
369 
370 		/*
371 		 * The driver needs to register at least rx rings for this
372 		 * virtualization level.
373 		 */
374 		if (mip->mi_rx_groups == NULL)
375 			goto fail;
376 	}
377 
378 	/*
379 	 * The driver must set mc_unicst entry point to NULL when it advertises
380 	 * CAP_RINGS for rx groups.
381 	 */
382 	if (mip->mi_rx_groups != NULL) {
383 		if (mregp->m_callbacks->mc_unicst != NULL)
384 			goto fail;
385 	} else {
386 		if (mregp->m_callbacks->mc_unicst == NULL)
387 			goto fail;
388 	}
389 
390 	/*
391 	 * Initialize MAC addresses. Must be called after mac_init_rings().
392 	 */
393 	mac_init_macaddr(mip);
394 
395 	mip->mi_share_capab.ms_snum = 0;
396 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
397 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
398 		    &mip->mi_share_capab);
399 	}
400 
401 	/*
402 	 * Initialize the kstats for this device.
403 	 */
404 	mac_driver_stat_create(mip);
405 
406 	/* Zero out any properties. */
407 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
408 
409 	if (mip->mi_minor <= MAC_MAX_MINOR) {
410 		/* Create a style-2 DLPI device */
411 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
412 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
413 			goto fail;
414 		style2_created = B_TRUE;
415 
416 		/* Create a style-1 DLPI device */
417 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
418 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
419 			goto fail;
420 		style1_created = B_TRUE;
421 	}
422 
423 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
424 
425 	rw_enter(&i_mac_impl_lock, RW_WRITER);
426 	if (mod_hash_insert(i_mac_impl_hash,
427 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
428 		rw_exit(&i_mac_impl_lock);
429 		err = EEXIST;
430 		goto fail;
431 	}
432 
433 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
434 	    (mac_impl_t *), mip);
435 
436 	/*
437 	 * Mark the MAC to be ready for open.
438 	 */
439 	mip->mi_state_flags &= ~MIS_DISABLED;
440 	rw_exit(&i_mac_impl_lock);
441 
442 	atomic_inc_32(&i_mac_impl_count);
443 
444 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
445 	*mhp = (mac_handle_t)mip;
446 	return (0);
447 
448 fail:
449 	if (style1_created)
450 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
451 
452 	if (style2_created)
453 		ddi_remove_minor_node(mip->mi_dip, driver);
454 
455 	mac_addr_factory_fini(mip);
456 
457 	/* Clean up registered MAC addresses */
458 	mac_fini_macaddr(mip);
459 
460 	/* Clean up registered rings */
461 	mac_free_rings(mip, MAC_RING_TYPE_RX);
462 	mac_free_rings(mip, MAC_RING_TYPE_TX);
463 
464 	/* Clean up notification thread */
465 	if (mip->mi_notify_thread != NULL)
466 		i_mac_notify_exit(mip);
467 
468 	if (mip->mi_info.mi_unicst_addr != NULL) {
469 		kmem_free(mip->mi_info.mi_unicst_addr,
470 		    mip->mi_type->mt_addr_length);
471 		mip->mi_info.mi_unicst_addr = NULL;
472 	}
473 
474 	mac_driver_stat_delete(mip);
475 
476 	if (mip->mi_type != NULL) {
477 		atomic_dec_32(&mip->mi_type->mt_ref);
478 		mip->mi_type = NULL;
479 	}
480 
481 	if (mip->mi_pdata != NULL) {
482 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
483 		mip->mi_pdata = NULL;
484 		mip->mi_pdata_size = 0;
485 	}
486 
487 	if (minor != 0) {
488 		ASSERT(minor > MAC_MAX_MINOR);
489 		mac_minor_rele(minor);
490 	}
491 
492 	mip->mi_state_flags = 0;
493 	mac_unregister_priv_prop(mip);
494 
495 	/*
496 	 * Clear the state before destroying the mac_impl_t
497 	 */
498 	mip->mi_state_flags = 0;
499 
500 	kmem_cache_free(i_mac_impl_cachep, mip);
501 	return (err);
502 }
503 
504 /*
505  * Unregister from the GLDv3 framework
506  */
507 int
508 mac_unregister(mac_handle_t mh)
509 {
510 	int			err;
511 	mac_impl_t		*mip = (mac_impl_t *)mh;
512 	mod_hash_val_t		val;
513 	mac_margin_req_t	*mmr, *nextmmr;
514 
515 	/* Fail the unregister if there are any open references to this mac. */
516 	if ((err = mac_disable_nowait(mh)) != 0)
517 		return (err);
518 
519 	/*
520 	 * Clean up notification thread and wait for it to exit.
521 	 */
522 	i_mac_notify_exit(mip);
523 
524 	/*
525 	 * Prior to acquiring the MAC perimeter, remove the MAC instance from
526 	 * the internal hash table. Such removal means table-walkers that
527 	 * acquire the perimeter will not do so on behalf of what we are
528 	 * unregistering, which prevents a deadlock.
529 	 */
530 	rw_enter(&i_mac_impl_lock, RW_WRITER);
531 	(void) mod_hash_remove(i_mac_impl_hash,
532 	    (mod_hash_key_t)mip->mi_name, &val);
533 	rw_exit(&i_mac_impl_lock);
534 	ASSERT(mip == (mac_impl_t *)val);
535 
536 	i_mac_perim_enter(mip);
537 
538 	/*
539 	 * There is still resource properties configured over this mac.
540 	 */
541 	if (mip->mi_resource_props.mrp_mask != 0)
542 		mac_fastpath_enable((mac_handle_t)mip);
543 
544 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
545 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
546 		ddi_remove_minor_node(mip->mi_dip,
547 		    (char *)ddi_driver_name(mip->mi_dip));
548 	}
549 
550 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
551 	    MIS_EXCLUSIVE));
552 
553 	mac_driver_stat_delete(mip);
554 
555 	ASSERT(i_mac_impl_count > 0);
556 	atomic_dec_32(&i_mac_impl_count);
557 
558 	if (mip->mi_pdata != NULL)
559 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
560 	mip->mi_pdata = NULL;
561 	mip->mi_pdata_size = 0;
562 
563 	/*
564 	 * Free the list of margin request.
565 	 */
566 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
567 		nextmmr = mmr->mmr_nextp;
568 		kmem_free(mmr, sizeof (mac_margin_req_t));
569 	}
570 	mip->mi_mmrp = NULL;
571 
572 	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
573 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
574 	mip->mi_info.mi_unicst_addr = NULL;
575 
576 	atomic_dec_32(&mip->mi_type->mt_ref);
577 	mip->mi_type = NULL;
578 
579 	/*
580 	 * Free the primary MAC address.
581 	 */
582 	mac_fini_macaddr(mip);
583 
584 	/*
585 	 * free all rings
586 	 */
587 	mac_free_rings(mip, MAC_RING_TYPE_RX);
588 	mac_free_rings(mip, MAC_RING_TYPE_TX);
589 
590 	mac_addr_factory_fini(mip);
591 
592 	bzero(mip->mi_addr, MAXMACADDRLEN);
593 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
594 	mip->mi_dstaddr_set = B_FALSE;
595 
596 	/* and the flows */
597 	mac_flow_tab_destroy(mip->mi_flow_tab);
598 	mip->mi_flow_tab = NULL;
599 
600 	if (mip->mi_minor > MAC_MAX_MINOR)
601 		mac_minor_rele(mip->mi_minor);
602 
603 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
604 
605 	/*
606 	 * Reset the perim related fields to default values before
607 	 * kmem_cache_free
608 	 */
609 	i_mac_perim_exit(mip);
610 	mip->mi_state_flags = 0;
611 
612 	mac_unregister_priv_prop(mip);
613 
614 	ASSERT(mip->mi_bridge_link == NULL);
615 	kmem_cache_free(i_mac_impl_cachep, mip);
616 
617 	return (0);
618 }
619 
620 /* DATA RECEPTION */
621 
622 /*
623  * This function is invoked for packets received by the MAC driver in
624  * interrupt context. The ring generation number provided by the driver
625  * is matched with the ring generation number held in MAC. If they do not
626  * match, received packets are considered stale packets coming from an older
627  * assignment of the ring. Drop them.
628  */
629 void
630 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
631     uint64_t mr_gen_num)
632 {
633 	mac_ring_t		*mr = (mac_ring_t *)mrh;
634 
635 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
636 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
637 		    mr->mr_gen_num, uint64_t, mr_gen_num);
638 		freemsgchain(mp_chain);
639 		return;
640 	}
641 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
642 }
643 
644 /*
645  * This function is invoked for each packet received by the underlying driver.
646  */
647 void
648 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
649 {
650 	mac_impl_t *mip = (mac_impl_t *)mh;
651 
652 	/*
653 	 * Check if the link is part of a bridge.  If not, then we don't need
654 	 * to take the lock to remain consistent.  Make this common case
655 	 * lock-free and tail-call optimized.
656 	 */
657 	if (mip->mi_bridge_link == NULL) {
658 		mac_rx_common(mh, mrh, mp_chain);
659 	} else {
660 		/*
661 		 * Once we take a reference on the bridge link, the bridge
662 		 * module itself can't unload, so the callback pointers are
663 		 * stable.
664 		 */
665 		mutex_enter(&mip->mi_bridge_lock);
666 		if ((mh = mip->mi_bridge_link) != NULL)
667 			mac_bridge_ref_cb(mh, B_TRUE);
668 		mutex_exit(&mip->mi_bridge_lock);
669 		if (mh == NULL) {
670 			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
671 		} else {
672 			mac_bridge_rx_cb(mh, mrh, mp_chain);
673 			mac_bridge_ref_cb(mh, B_FALSE);
674 		}
675 	}
676 }
677 
678 /*
679  * Special case function: this allows snooping of packets transmitted and
680  * received by TRILL. By design, they go directly into the TRILL module.
681  */
682 void
683 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
684 {
685 	mac_impl_t *mip = (mac_impl_t *)mh;
686 
687 	if (mip->mi_promisc_list != NULL)
688 		mac_promisc_dispatch(mip, mp, NULL);
689 }
690 
691 /*
692  * This is the upward reentry point for packets arriving from the bridging
693  * module and from mac_rx for links not part of a bridge.
694  */
695 void
696 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
697 {
698 	mac_impl_t		*mip = (mac_impl_t *)mh;
699 	mac_ring_t		*mr = (mac_ring_t *)mrh;
700 	mac_soft_ring_set_t	*mac_srs;
701 	mblk_t			*bp = mp_chain;
702 
703 	/*
704 	 * If there are any promiscuous mode callbacks defined for
705 	 * this MAC, pass them a copy if appropriate.
706 	 */
707 	if (mip->mi_promisc_list != NULL)
708 		mac_promisc_dispatch(mip, mp_chain, NULL);
709 
710 	if (mr != NULL) {
711 		/*
712 		 * If the SRS teardown has started, just return. The 'mr'
713 		 * continues to be valid until the driver unregisters the MAC.
714 		 * Hardware classified packets will not make their way up
715 		 * beyond this point once the teardown has started. The driver
716 		 * is never passed a pointer to a flow entry or SRS or any
717 		 * structure that can be freed much before mac_unregister.
718 		 */
719 		mutex_enter(&mr->mr_lock);
720 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
721 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
722 			mutex_exit(&mr->mr_lock);
723 			freemsgchain(mp_chain);
724 			return;
725 		}
726 
727 		/*
728 		 * The ring is in passthru mode; pass the chain up to
729 		 * the pseudo ring.
730 		 */
731 		if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) {
732 			MR_REFHOLD_LOCKED(mr);
733 			mutex_exit(&mr->mr_lock);
734 			mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain,
735 			    B_FALSE);
736 			MR_REFRELE(mr);
737 			return;
738 		}
739 
740 		/*
741 		 * The passthru callback should only be set when in
742 		 * MAC_PASSTHRU_CLASSIFIER mode.
743 		 */
744 		ASSERT3P(mr->mr_pt_fn, ==, NULL);
745 
746 		/*
747 		 * We check if an SRS is controlling this ring.
748 		 * If so, we can directly call the srs_lower_proc
749 		 * routine otherwise we need to go through mac_rx_classify
750 		 * to reach the right place.
751 		 */
752 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
753 			MR_REFHOLD_LOCKED(mr);
754 			mutex_exit(&mr->mr_lock);
755 			ASSERT3P(mr->mr_srs, !=, NULL);
756 			mac_srs = mr->mr_srs;
757 
758 			/*
759 			 * This is the fast path. All packets received
760 			 * on this ring are hardware classified and
761 			 * share the same MAC header info.
762 			 */
763 			mac_srs->srs_rx.sr_lower_proc(mh,
764 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
765 			MR_REFRELE(mr);
766 			return;
767 		}
768 
769 		mutex_exit(&mr->mr_lock);
770 		/* We'll fall through to software classification */
771 	} else {
772 		flow_entry_t *flent;
773 		int err;
774 
775 		rw_enter(&mip->mi_rw_lock, RW_READER);
776 		if (mip->mi_single_active_client != NULL) {
777 			flent = mip->mi_single_active_client->mci_flent_list;
778 			FLOW_TRY_REFHOLD(flent, err);
779 			rw_exit(&mip->mi_rw_lock);
780 			if (err == 0) {
781 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
782 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
783 				FLOW_REFRELE(flent);
784 				return;
785 			}
786 		} else {
787 			rw_exit(&mip->mi_rw_lock);
788 		}
789 	}
790 
791 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
792 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
793 			return;
794 	}
795 
796 	freemsgchain(bp);
797 }
798 
799 /* DATA TRANSMISSION */
800 
801 /*
802  * A driver's notification to resume transmission, in case of a provider
803  * without TX rings.
804  */
805 void
806 mac_tx_update(mac_handle_t mh)
807 {
808 	mac_tx_ring_update(mh, NULL);
809 }
810 
811 /*
812  * A driver's notification to resume transmission on the specified TX ring.
813  */
814 void
815 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
816 {
817 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
818 }
819 
820 /* LINK STATE */
821 /*
822  * Notify the MAC layer about a link state change
823  */
824 void
825 mac_link_update(mac_handle_t mh, link_state_t link)
826 {
827 	mac_impl_t	*mip = (mac_impl_t *)mh;
828 
829 	/*
830 	 * Save the link state.
831 	 */
832 	mip->mi_lowlinkstate = link;
833 
834 	/*
835 	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
836 	 * thread to deliver both lower and upper notifications.
837 	 */
838 	i_mac_notify(mip, MAC_NOTE_LOWLINK);
839 }
840 
841 /*
842  * Notify the MAC layer about a link state change due to bridging.
843  */
844 void
845 mac_link_redo(mac_handle_t mh, link_state_t link)
846 {
847 	mac_impl_t	*mip = (mac_impl_t *)mh;
848 
849 	/*
850 	 * Save the link state.
851 	 */
852 	mip->mi_linkstate = link;
853 
854 	/*
855 	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
856 	 * made.
857 	 */
858 	i_mac_notify(mip, MAC_NOTE_LINK);
859 }
860 
861 /* MINOR NODE HANDLING */
862 
863 /*
864  * Given a dev_t, return the instance number (PPA) associated with it.
865  * Drivers can use this in their getinfo(9e) implementation to lookup
866  * the instance number (i.e. PPA) of the device, to use as an index to
867  * their own array of soft state structures.
868  *
869  * Returns -1 on error.
870  */
871 int
872 mac_devt_to_instance(dev_t devt)
873 {
874 	return (dld_devt_to_instance(devt));
875 }
876 
877 /*
878  * This function returns the first minor number that is available for
879  * driver private use.  All minor numbers smaller than this are
880  * reserved for GLDv3 use.
881  */
882 minor_t
883 mac_private_minor(void)
884 {
885 	return (MAC_PRIVATE_MINOR);
886 }
887 
888 /* OTHER CONTROL INFORMATION */
889 
890 /*
891  * A driver notified us that its primary MAC address has changed.
892  */
893 void
894 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
895 {
896 	mac_impl_t	*mip = (mac_impl_t *)mh;
897 
898 	if (mip->mi_type->mt_addr_length == 0)
899 		return;
900 
901 	i_mac_perim_enter(mip);
902 
903 	/*
904 	 * If address changes, freshen the MAC address value and update
905 	 * all MAC clients that share this MAC address.
906 	 */
907 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
908 		mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
909 		    (uint8_t *)addr);
910 	}
911 
912 	i_mac_perim_exit(mip);
913 
914 	/*
915 	 * Send a MAC_NOTE_UNICST notification.
916 	 */
917 	i_mac_notify(mip, MAC_NOTE_UNICST);
918 }
919 
920 void
921 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
922 {
923 	mac_impl_t	*mip = (mac_impl_t *)mh;
924 
925 	if (mip->mi_type->mt_addr_length == 0)
926 		return;
927 
928 	i_mac_perim_enter(mip);
929 	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
930 	i_mac_perim_exit(mip);
931 	i_mac_notify(mip, MAC_NOTE_DEST);
932 }
933 
934 /*
935  * MAC plugin information changed.
936  */
937 int
938 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
939 {
940 	mac_impl_t	*mip = (mac_impl_t *)mh;
941 
942 	/*
943 	 * Verify that the plugin supports MAC plugin data and that the
944 	 * supplied data is valid.
945 	 */
946 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
947 		return (EINVAL);
948 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
949 		return (EINVAL);
950 
951 	if (mip->mi_pdata != NULL)
952 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
953 
954 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
955 	bcopy(mac_pdata, mip->mi_pdata, dsize);
956 	mip->mi_pdata_size = dsize;
957 
958 	/*
959 	 * Since the MAC plugin data is used to construct MAC headers that
960 	 * were cached in fast-path headers, we need to flush fast-path
961 	 * information for links associated with this mac.
962 	 */
963 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
964 	return (0);
965 }
966 
967 /*
968  * Invoked by driver as well as the framework to notify its capability change.
969  */
970 void
971 mac_capab_update(mac_handle_t mh)
972 {
973 	/* Send MAC_NOTE_CAPAB_CHG notification */
974 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
975 }
976 
977 /*
978  * Used by normal drivers to update the max sdu size.
979  * We need to handle the case of a smaller mi_sdu_multicast
980  * since this is called by mac_set_mtu() even for drivers that
981  * have differing unicast and multicast mtu and we don't want to
982  * increase the multicast mtu by accident in that case.
983  */
984 int
985 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
986 {
987 	mac_impl_t	*mip = (mac_impl_t *)mh;
988 
989 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
990 		return (EINVAL);
991 	mip->mi_sdu_max = sdu_max;
992 	if (mip->mi_sdu_multicast > mip->mi_sdu_max)
993 		mip->mi_sdu_multicast = mip->mi_sdu_max;
994 
995 	/* Send a MAC_NOTE_SDU_SIZE notification. */
996 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
997 	return (0);
998 }
999 
1000 /*
1001  * Version of the above function that is used by drivers that have a different
1002  * max sdu size for multicast/broadcast vs. unicast.
1003  */
1004 int
1005 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
1006 {
1007 	mac_impl_t	*mip = (mac_impl_t *)mh;
1008 
1009 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1010 		return (EINVAL);
1011 	if (sdu_multicast == 0)
1012 		sdu_multicast = sdu_max;
1013 	if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
1014 		return (EINVAL);
1015 	mip->mi_sdu_max = sdu_max;
1016 	mip->mi_sdu_multicast = sdu_multicast;
1017 
1018 	/* Send a MAC_NOTE_SDU_SIZE notification. */
1019 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1020 	return (0);
1021 }
1022 
1023 static void
1024 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
1025 {
1026 	mac_client_impl_t *mcip;
1027 	flow_entry_t *flent;
1028 	mac_soft_ring_set_t *mac_rx_srs;
1029 	mac_cpus_t *srs_cpu;
1030 	int i;
1031 
1032 	if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1033 	    (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1034 		/* interrupt can be re-targeted */
1035 		ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1036 		flent = mcip->mci_flent;
1037 		if (ring->mr_type == MAC_RING_TYPE_RX) {
1038 			for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1039 				mac_rx_srs = flent->fe_rx_srs[i];
1040 				if (mac_rx_srs->srs_ring != ring)
1041 					continue;
1042 				srs_cpu = &mac_rx_srs->srs_cpu;
1043 				mutex_enter(&cpu_lock);
1044 				mac_rx_srs_retarget_intr(mac_rx_srs,
1045 				    srs_cpu->mc_rx_intr_cpu);
1046 				mutex_exit(&cpu_lock);
1047 				break;
1048 			}
1049 		} else {
1050 			if (flent->fe_tx_srs != NULL) {
1051 				mutex_enter(&cpu_lock);
1052 				mac_tx_srs_retarget_intr(
1053 				    flent->fe_tx_srs);
1054 				mutex_exit(&cpu_lock);
1055 			}
1056 		}
1057 	}
1058 }
1059 
1060 /*
1061  * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1062  * their clients. There is a 1-1 mapping pseudo ring and the hardware
1063  * ring. ddi interrupt handles are exported from the hardware ring to
1064  * the pseudo ring. Thus when the interrupt handle changes, clients of
1065  * aggr that are using the handle need to use the new handle and
1066  * re-target their interrupts.
1067  */
1068 static void
1069 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1070     ddi_intr_handle_t ddh)
1071 {
1072 	mac_ring_t *pring;
1073 	mac_group_t *pgroup;
1074 	mac_impl_t *pmip;
1075 	char macname[MAXNAMELEN];
1076 	mac_perim_handle_t p_mph;
1077 	uint64_t saved_gen_num;
1078 
1079 again:
1080 	pring = (mac_ring_t *)ring->mr_prh;
1081 	pgroup = (mac_group_t *)pring->mr_gh;
1082 	pmip = (mac_impl_t *)pgroup->mrg_mh;
1083 	saved_gen_num = ring->mr_gen_num;
1084 	(void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1085 	/*
1086 	 * We need to enter aggr's perimeter. The locking hierarchy
1087 	 * dictates that aggr's perimeter should be entered first
1088 	 * and then the port's perimeter. So drop the port's
1089 	 * perimeter, enter aggr's and then re-enter port's
1090 	 * perimeter.
1091 	 */
1092 	i_mac_perim_exit(mip);
1093 	/*
1094 	 * While we know pmip is the aggr's mip, there is a
1095 	 * possibility that aggr could have unregistered by
1096 	 * the time we exit port's perimeter (mip) and
1097 	 * enter aggr's perimeter (pmip). To avoid that
1098 	 * scenario, enter aggr's perimeter using its name.
1099 	 */
1100 	if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1101 		return;
1102 	i_mac_perim_enter(mip);
1103 	/*
1104 	 * Check if the ring got assigned to another aggregation before
1105 	 * be could enter aggr's and the port's perimeter. When a ring
1106 	 * gets deleted from an aggregation, it calls mac_stop_ring()
1107 	 * which increments the generation number. So checking
1108 	 * generation number will be enough.
1109 	 */
1110 	if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1111 		i_mac_perim_exit(mip);
1112 		mac_perim_exit(p_mph);
1113 		i_mac_perim_enter(mip);
1114 		goto again;
1115 	}
1116 
1117 	/* Check if pseudo ring is still present */
1118 	if (ring->mr_prh != NULL) {
1119 		pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1120 		pring->mr_info.mri_intr.mi_ddi_shared =
1121 		    ring->mr_info.mri_intr.mi_ddi_shared;
1122 		if (ddh != NULL)
1123 			mac_ring_intr_retarget(pgroup, pring);
1124 	}
1125 	i_mac_perim_exit(mip);
1126 	mac_perim_exit(p_mph);
1127 }
1128 /*
1129  * API called by driver to provide new interrupt handle for TX/RX rings.
1130  * This usually happens when IRM (Interrupt Resource Manangement)
1131  * framework either gives the driver more MSI-x interrupts or takes
1132  * away MSI-x interrupts from the driver.
1133  */
1134 void
1135 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1136 {
1137 	mac_ring_t	*ring = (mac_ring_t *)mrh;
1138 	mac_group_t	*group = (mac_group_t *)ring->mr_gh;
1139 	mac_impl_t	*mip = (mac_impl_t *)group->mrg_mh;
1140 
1141 	i_mac_perim_enter(mip);
1142 	ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1143 	if (ddh == NULL) {
1144 		/* Interrupts being reset */
1145 		ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1146 		if (ring->mr_prh != NULL) {
1147 			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1148 			return;
1149 		}
1150 	} else {
1151 		/* New interrupt handle */
1152 		mac_compare_ddi_handle(mip->mi_rx_groups,
1153 		    mip->mi_rx_group_count, ring);
1154 		if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1155 			mac_compare_ddi_handle(mip->mi_tx_groups,
1156 			    mip->mi_tx_group_count, ring);
1157 		}
1158 		if (ring->mr_prh != NULL) {
1159 			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1160 			return;
1161 		} else {
1162 			mac_ring_intr_retarget(group, ring);
1163 		}
1164 	}
1165 	i_mac_perim_exit(mip);
1166 }
1167 
1168 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1169 
1170 /*
1171  * Updates the mac_impl structure with the current state of the link
1172  */
1173 static void
1174 i_mac_log_link_state(mac_impl_t *mip)
1175 {
1176 	/*
1177 	 * If no change, then it is not interesting.
1178 	 */
1179 	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1180 		return;
1181 
1182 	switch (mip->mi_lowlinkstate) {
1183 	case LINK_STATE_UP:
1184 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1185 			char det[200];
1186 
1187 			mip->mi_type->mt_ops.mtops_link_details(det,
1188 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1189 
1190 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1191 		} else {
1192 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1193 		}
1194 		break;
1195 
1196 	case LINK_STATE_DOWN:
1197 		/*
1198 		 * Only transitions from UP to DOWN are interesting
1199 		 */
1200 		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1201 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1202 		break;
1203 
1204 	case LINK_STATE_UNKNOWN:
1205 		/*
1206 		 * This case is normally not interesting.
1207 		 */
1208 		break;
1209 	}
1210 	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1211 }
1212 
1213 /*
1214  * Main routine for the callbacks notifications thread
1215  */
1216 static void
1217 i_mac_notify_thread(void *arg)
1218 {
1219 	mac_impl_t	*mip = arg;
1220 	callb_cpr_t	cprinfo;
1221 	mac_cb_t	*mcb;
1222 	mac_cb_info_t	*mcbi;
1223 	mac_notify_cb_t	*mncb;
1224 
1225 	mcbi = &mip->mi_notify_cb_info;
1226 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1227 	    "i_mac_notify_thread");
1228 
1229 	mutex_enter(mcbi->mcbi_lockp);
1230 
1231 	for (;;) {
1232 		uint32_t	bits;
1233 		uint32_t	type;
1234 
1235 		bits = mip->mi_notify_bits;
1236 		if (bits == 0) {
1237 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1238 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1239 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1240 			continue;
1241 		}
1242 		mip->mi_notify_bits = 0;
1243 		if ((bits & (1 << MAC_NNOTE)) != 0) {
1244 			/* request to quit */
1245 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1246 			break;
1247 		}
1248 
1249 		mutex_exit(mcbi->mcbi_lockp);
1250 
1251 		/*
1252 		 * Log link changes on the actual link, but then do reports on
1253 		 * synthetic state (if part of a bridge).
1254 		 */
1255 		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1256 			link_state_t newstate;
1257 			mac_handle_t mh;
1258 
1259 			i_mac_log_link_state(mip);
1260 			newstate = mip->mi_lowlinkstate;
1261 			if (mip->mi_bridge_link != NULL) {
1262 				mutex_enter(&mip->mi_bridge_lock);
1263 				if ((mh = mip->mi_bridge_link) != NULL) {
1264 					newstate = mac_bridge_ls_cb(mh,
1265 					    newstate);
1266 				}
1267 				mutex_exit(&mip->mi_bridge_lock);
1268 			}
1269 			if (newstate != mip->mi_linkstate) {
1270 				mip->mi_linkstate = newstate;
1271 				bits |= 1 << MAC_NOTE_LINK;
1272 			}
1273 		}
1274 
1275 		/*
1276 		 * Do notification callbacks for each notification type.
1277 		 */
1278 		for (type = 0; type < MAC_NNOTE; type++) {
1279 			if ((bits & (1 << type)) == 0) {
1280 				continue;
1281 			}
1282 
1283 			if (mac_notify_cb_list[type] != NULL)
1284 				(*mac_notify_cb_list[type])(mip);
1285 
1286 			/*
1287 			 * Walk the list of notifications.
1288 			 */
1289 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1290 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1291 			    mcb = mcb->mcb_nextp) {
1292 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1293 				mncb->mncb_fn(mncb->mncb_arg, type);
1294 			}
1295 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1296 			    &mip->mi_notify_cb_list);
1297 		}
1298 
1299 		mutex_enter(mcbi->mcbi_lockp);
1300 	}
1301 
1302 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1303 	cv_broadcast(&mcbi->mcbi_cv);
1304 
1305 	/* CALLB_CPR_EXIT drops the lock */
1306 	CALLB_CPR_EXIT(&cprinfo);
1307 	thread_exit();
1308 }
1309 
1310 /*
1311  * Signal the i_mac_notify_thread asking it to quit.
1312  * Then wait till it is done.
1313  */
1314 void
1315 i_mac_notify_exit(mac_impl_t *mip)
1316 {
1317 	mac_cb_info_t	*mcbi;
1318 
1319 	mcbi = &mip->mi_notify_cb_info;
1320 
1321 	mutex_enter(mcbi->mcbi_lockp);
1322 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1323 	cv_broadcast(&mcbi->mcbi_cv);
1324 
1325 
1326 	while ((mip->mi_notify_thread != NULL) &&
1327 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1328 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1329 	}
1330 
1331 	/* Necessary clean up before doing kmem_cache_free */
1332 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1333 	mip->mi_notify_bits = 0;
1334 	mip->mi_notify_thread = NULL;
1335 	mutex_exit(mcbi->mcbi_lockp);
1336 }
1337 
1338 /*
1339  * Entry point invoked by drivers to dynamically add a ring to an
1340  * existing group.
1341  */
1342 int
1343 mac_group_add_ring(mac_group_handle_t gh, int index)
1344 {
1345 	mac_group_t *group = (mac_group_t *)gh;
1346 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1347 	int ret;
1348 
1349 	i_mac_perim_enter(mip);
1350 	ret = i_mac_group_add_ring(group, NULL, index);
1351 	i_mac_perim_exit(mip);
1352 	return (ret);
1353 }
1354 
1355 /*
1356  * Entry point invoked by drivers to dynamically remove a ring
1357  * from an existing group. The specified ring handle must no longer
1358  * be used by the driver after a call to this function.
1359  */
1360 void
1361 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1362 {
1363 	mac_group_t *group = (mac_group_t *)gh;
1364 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1365 
1366 	i_mac_perim_enter(mip);
1367 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1368 	i_mac_perim_exit(mip);
1369 }
1370 
1371 /*
1372  * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1373  * entry points.
1374  */
1375 
1376 void
1377 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1378 {
1379 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1380 
1381 	/* nothing to do if the caller doesn't want the default value */
1382 	if (pr->pr_default == NULL)
1383 		return;
1384 
1385 	ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1386 
1387 	*(uint8_t *)(pr->pr_default) = val;
1388 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1389 }
1390 
1391 void
1392 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1393 {
1394 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1395 
1396 	/* nothing to do if the caller doesn't want the default value */
1397 	if (pr->pr_default == NULL)
1398 		return;
1399 
1400 	ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1401 
1402 	bcopy(&val, pr->pr_default, sizeof (val));
1403 
1404 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1405 }
1406 
1407 void
1408 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1409 {
1410 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1411 
1412 	/* nothing to do if the caller doesn't want the default value */
1413 	if (pr->pr_default == NULL)
1414 		return;
1415 
1416 	ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1417 
1418 	bcopy(&val, pr->pr_default, sizeof (val));
1419 
1420 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1421 }
1422 
1423 void
1424 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1425 {
1426 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1427 
1428 	/* nothing to do if the caller doesn't want the default value */
1429 	if (pr->pr_default == NULL)
1430 		return;
1431 
1432 	if (strlen(str) >= pr->pr_default_size)
1433 		pr->pr_errno = ENOBUFS;
1434 	else
1435 		(void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1436 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1437 }
1438 
1439 void
1440 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1441     link_flowctrl_t val)
1442 {
1443 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1444 
1445 	/* nothing to do if the caller doesn't want the default value */
1446 	if (pr->pr_default == NULL)
1447 		return;
1448 
1449 	ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1450 
1451 	bcopy(&val, pr->pr_default, sizeof (val));
1452 
1453 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1454 }
1455 
1456 void
1457 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1458     uint32_t max)
1459 {
1460 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1461 	mac_propval_range_t *range = pr->pr_range;
1462 	mac_propval_uint32_range_t *range32;
1463 
1464 	/* nothing to do if the caller doesn't want the range info */
1465 	if (range == NULL)
1466 		return;
1467 
1468 	if (pr->pr_range_cur_count++ == 0) {
1469 		/* first range */
1470 		pr->pr_flags |= MAC_PROP_INFO_RANGE;
1471 		range->mpr_type = MAC_PROPVAL_UINT32;
1472 	} else {
1473 		/* all ranges of a property should be of the same type */
1474 		ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1475 		if (pr->pr_range_cur_count > range->mpr_count) {
1476 			pr->pr_errno = ENOSPC;
1477 			return;
1478 		}
1479 	}
1480 
1481 	range32 = range->mpr_range_uint32;
1482 	range32[pr->pr_range_cur_count - 1].mpur_min = min;
1483 	range32[pr->pr_range_cur_count - 1].mpur_max = max;
1484 }
1485 
1486 void
1487 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1488 {
1489 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1490 
1491 	pr->pr_perm = perm;
1492 	pr->pr_flags |= MAC_PROP_INFO_PERM;
1493 }
1494 
1495 void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff,
1496     uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1497 {
1498 	uint32_t flags;
1499 
1500 	ASSERT(DB_TYPE(mp) == M_DATA);
1501 
1502 	flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1503 	if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1504 		if (value != NULL)
1505 			*value = (uint32_t)DB_CKSUM16(mp);
1506 		if ((flags & HCK_PARTIALCKSUM) != 0) {
1507 			if (start != NULL)
1508 				*start = (uint32_t)DB_CKSUMSTART(mp);
1509 			if (stuff != NULL)
1510 				*stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1511 			if (end != NULL)
1512 				*end = (uint32_t)DB_CKSUMEND(mp);
1513 		}
1514 	}
1515 
1516 	if (flags_ptr != NULL)
1517 		*flags_ptr = flags;
1518 }
1519 
1520 void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff,
1521     uint32_t end, uint32_t value, uint32_t flags)
1522 {
1523 	ASSERT(DB_TYPE(mp) == M_DATA);
1524 
1525 	DB_CKSUMSTART(mp) = (intptr_t)start;
1526 	DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1527 	DB_CKSUMEND(mp) = (intptr_t)end;
1528 	DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1529 	DB_CKSUM16(mp) = (uint16_t)value;
1530 }
1531 
1532 void
1533 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1534 {
1535 	ASSERT(DB_TYPE(mp) == M_DATA);
1536 
1537 	if (flags != NULL) {
1538 		*flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1539 		if ((*flags != 0) && (mss != NULL))
1540 			*mss = (uint32_t)DB_LSOMSS(mp);
1541 	}
1542 }
1543 
1544 void
1545 mac_transceiver_info_set_present(mac_transceiver_info_t *infop,
1546     boolean_t present)
1547 {
1548 	infop->mti_present = present;
1549 }
1550 
1551 void
1552 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop,
1553     boolean_t usable)
1554 {
1555 	infop->mti_usable = usable;
1556 }
1557