xref: /titanic_52/usr/src/uts/common/io/mac/mac_provider.c (revision 4c1177a46d4d850e30806d4e27d635527bba8e90)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/conf.h>
29 #include <sys/id_space.h>
30 #include <sys/esunddi.h>
31 #include <sys/stat.h>
32 #include <sys/mkdev.h>
33 #include <sys/stream.h>
34 #include <sys/strsubr.h>
35 #include <sys/dlpi.h>
36 #include <sys/modhash.h>
37 #include <sys/mac.h>
38 #include <sys/mac_provider.h>
39 #include <sys/mac_impl.h>
40 #include <sys/mac_client_impl.h>
41 #include <sys/mac_client_priv.h>
42 #include <sys/mac_soft_ring.h>
43 #include <sys/mac_stat.h>
44 #include <sys/dld.h>
45 #include <sys/modctl.h>
46 #include <sys/fs/dv_node.h>
47 #include <sys/thread.h>
48 #include <sys/proc.h>
49 #include <sys/callb.h>
50 #include <sys/cpuvar.h>
51 #include <sys/atomic.h>
52 #include <sys/sdt.h>
53 #include <sys/mac_flow.h>
54 #include <sys/ddi_intr_impl.h>
55 #include <sys/disp.h>
56 #include <sys/sdt.h>
57 #include <sys/pattr.h>
58 #include <sys/strsun.h>
59 
60 /*
61  * MAC Provider Interface.
62  *
63  * Interface for GLDv3 compatible NIC drivers.
64  */
65 
66 static void i_mac_notify_thread(void *);
67 
68 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
69 
70 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
71 	mac_fanout_recompute,	/* MAC_NOTE_LINK */
72 	NULL,		/* MAC_NOTE_UNICST */
73 	NULL,		/* MAC_NOTE_TX */
74 	NULL,		/* MAC_NOTE_DEVPROMISC */
75 	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
76 	NULL,		/* MAC_NOTE_SDU_SIZE */
77 	NULL,		/* MAC_NOTE_MARGIN */
78 	NULL,		/* MAC_NOTE_CAPAB_CHG */
79 	NULL		/* MAC_NOTE_LOWLINK */
80 };
81 
82 /*
83  * Driver support functions.
84  */
85 
86 /* REGISTRATION */
87 
88 mac_register_t *
89 mac_alloc(uint_t mac_version)
90 {
91 	mac_register_t *mregp;
92 
93 	/*
94 	 * Make sure there isn't a version mismatch between the driver and
95 	 * the framework.  In the future, if multiple versions are
96 	 * supported, this check could become more sophisticated.
97 	 */
98 	if (mac_version != MAC_VERSION)
99 		return (NULL);
100 
101 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
102 	mregp->m_version = mac_version;
103 	return (mregp);
104 }
105 
106 void
107 mac_free(mac_register_t *mregp)
108 {
109 	kmem_free(mregp, sizeof (mac_register_t));
110 }
111 
112 /*
113  * mac_register() is how drivers register new MACs with the GLDv3
114  * framework.  The mregp argument is allocated by drivers using the
115  * mac_alloc() function, and can be freed using mac_free() immediately upon
116  * return from mac_register().  Upon success (0 return value), the mhp
117  * opaque pointer becomes the driver's handle to its MAC interface, and is
118  * the argument to all other mac module entry points.
119  */
120 /* ARGSUSED */
121 int
122 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
123 {
124 	mac_impl_t		*mip;
125 	mactype_t		*mtype;
126 	int			err = EINVAL;
127 	struct devnames		*dnp = NULL;
128 	uint_t			instance;
129 	boolean_t		style1_created = B_FALSE;
130 	boolean_t		style2_created = B_FALSE;
131 	char			*driver;
132 	minor_t			minor = 0;
133 
134 	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
135 	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
136 		return (EINVAL);
137 
138 	/* Find the required MAC-Type plugin. */
139 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
140 		return (EINVAL);
141 
142 	/* Create a mac_impl_t to represent this MAC. */
143 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
144 
145 	/*
146 	 * The mac is not ready for open yet.
147 	 */
148 	mip->mi_state_flags |= MIS_DISABLED;
149 
150 	/*
151 	 * When a mac is registered, the m_instance field can be set to:
152 	 *
153 	 *  0:	Get the mac's instance number from m_dip.
154 	 *	This is usually used for physical device dips.
155 	 *
156 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
157 	 *	For example, when an aggregation is created with the key option,
158 	 *	"key" will be used as the instance number.
159 	 *
160 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
161 	 *	This is often used when a MAC of a virtual link is registered
162 	 *	(e.g., aggregation when "key" is not specified, or vnic).
163 	 *
164 	 * Note that the instance number is used to derive the mi_minor field
165 	 * of mac_impl_t, which will then be used to derive the name of kstats
166 	 * and the devfs nodes.  The first 2 cases are needed to preserve
167 	 * backward compatibility.
168 	 */
169 	switch (mregp->m_instance) {
170 	case 0:
171 		instance = ddi_get_instance(mregp->m_dip);
172 		break;
173 	case ((uint_t)-1):
174 		minor = mac_minor_hold(B_TRUE);
175 		if (minor == 0) {
176 			err = ENOSPC;
177 			goto fail;
178 		}
179 		instance = minor - 1;
180 		break;
181 	default:
182 		instance = mregp->m_instance;
183 		if (instance >= MAC_MAX_MINOR) {
184 			err = EINVAL;
185 			goto fail;
186 		}
187 		break;
188 	}
189 
190 	mip->mi_minor = (minor_t)(instance + 1);
191 	mip->mi_dip = mregp->m_dip;
192 	mip->mi_clients_list = NULL;
193 	mip->mi_nclients = 0;
194 
195 	/* Set the default IEEE Port VLAN Identifier */
196 	mip->mi_pvid = 1;
197 
198 	/* Default bridge link learning protection values */
199 	mip->mi_llimit = 1000;
200 	mip->mi_ldecay = 200;
201 
202 	driver = (char *)ddi_driver_name(mip->mi_dip);
203 
204 	/* Construct the MAC name as <drvname><instance> */
205 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
206 	    driver, instance);
207 
208 	mip->mi_driver = mregp->m_driver;
209 
210 	mip->mi_type = mtype;
211 	mip->mi_margin = mregp->m_margin;
212 	mip->mi_info.mi_media = mtype->mt_type;
213 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
214 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
215 		goto fail;
216 	if (mregp->m_multicast_sdu == 0)
217 		mregp->m_multicast_sdu = mregp->m_max_sdu;
218 	if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
219 	    mregp->m_multicast_sdu > mregp->m_max_sdu)
220 		goto fail;
221 	mip->mi_sdu_min = mregp->m_min_sdu;
222 	mip->mi_sdu_max = mregp->m_max_sdu;
223 	mip->mi_sdu_multicast = mregp->m_multicast_sdu;
224 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
225 	/*
226 	 * If the media supports a broadcast address, cache a pointer to it
227 	 * in the mac_info_t so that upper layers can use it.
228 	 */
229 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
230 
231 	mip->mi_v12n_level = mregp->m_v12n;
232 
233 	/*
234 	 * Copy the unicast source address into the mac_info_t, but only if
235 	 * the MAC-Type defines a non-zero address length.  We need to
236 	 * handle MAC-Types that have an address length of 0
237 	 * (point-to-point protocol MACs for example).
238 	 */
239 	if (mip->mi_type->mt_addr_length > 0) {
240 		if (mregp->m_src_addr == NULL)
241 			goto fail;
242 		mip->mi_info.mi_unicst_addr =
243 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
244 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
245 		    mip->mi_type->mt_addr_length);
246 
247 		/*
248 		 * Copy the fixed 'factory' MAC address from the immutable
249 		 * info.  This is taken to be the MAC address currently in
250 		 * use.
251 		 */
252 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
253 		    mip->mi_type->mt_addr_length);
254 
255 		/*
256 		 * At this point, we should set up the classification
257 		 * rules etc but we delay it till mac_open() so that
258 		 * the resource discovery has taken place and we
259 		 * know someone wants to use the device. Otherwise
260 		 * memory gets allocated for Rx ring structures even
261 		 * during probe.
262 		 */
263 
264 		/* Copy the destination address if one is provided. */
265 		if (mregp->m_dst_addr != NULL) {
266 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
267 			    mip->mi_type->mt_addr_length);
268 			mip->mi_dstaddr_set = B_TRUE;
269 		}
270 	} else if (mregp->m_src_addr != NULL) {
271 		goto fail;
272 	}
273 
274 	/*
275 	 * The format of the m_pdata is specific to the plugin.  It is
276 	 * passed in as an argument to all of the plugin callbacks.  The
277 	 * driver can update this information by calling
278 	 * mac_pdata_update().
279 	 */
280 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
281 		/*
282 		 * Verify if the supplied plugin data is valid.  Note that
283 		 * even if the caller passed in a NULL pointer as plugin data,
284 		 * we still need to verify if that's valid as the plugin may
285 		 * require plugin data to function.
286 		 */
287 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
288 		    mregp->m_pdata_size)) {
289 			goto fail;
290 		}
291 		if (mregp->m_pdata != NULL) {
292 			mip->mi_pdata =
293 			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
294 			bcopy(mregp->m_pdata, mip->mi_pdata,
295 			    mregp->m_pdata_size);
296 			mip->mi_pdata_size = mregp->m_pdata_size;
297 		}
298 	} else if (mregp->m_pdata != NULL) {
299 		/*
300 		 * The caller supplied non-NULL plugin data, but the plugin
301 		 * does not recognize plugin data.
302 		 */
303 		err = EINVAL;
304 		goto fail;
305 	}
306 
307 	/*
308 	 * Register the private properties.
309 	 */
310 	mac_register_priv_prop(mip, mregp->m_priv_props);
311 
312 	/*
313 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
314 	 * check to make sure all mandatory callbacks are set.
315 	 */
316 	if (mregp->m_callbacks->mc_getstat == NULL ||
317 	    mregp->m_callbacks->mc_start == NULL ||
318 	    mregp->m_callbacks->mc_stop == NULL ||
319 	    mregp->m_callbacks->mc_setpromisc == NULL ||
320 	    mregp->m_callbacks->mc_multicst == NULL) {
321 		goto fail;
322 	}
323 	mip->mi_callbacks = mregp->m_callbacks;
324 
325 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
326 	    &mip->mi_capab_legacy)) {
327 		mip->mi_state_flags |= MIS_LEGACY;
328 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
329 	} else {
330 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
331 		    mip->mi_minor);
332 	}
333 
334 	/*
335 	 * Allocate a notification thread. thread_create blocks for memory
336 	 * if needed, it never fails.
337 	 */
338 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
339 	    mip, 0, &p0, TS_RUN, minclsyspri);
340 
341 	/*
342 	 * Initialize the capabilities
343 	 */
344 
345 	bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
346 	bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
347 
348 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
349 		mip->mi_state_flags |= MIS_IS_VNIC;
350 
351 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
352 		mip->mi_state_flags |= MIS_IS_AGGR;
353 
354 	mac_addr_factory_init(mip);
355 
356 	mac_transceiver_init(mip);
357 
358 	mac_led_init(mip);
359 
360 	/*
361 	 * Enforce the virtrualization level registered.
362 	 */
363 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
364 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
365 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
366 			goto fail;
367 
368 		/*
369 		 * The driver needs to register at least rx rings for this
370 		 * virtualization level.
371 		 */
372 		if (mip->mi_rx_groups == NULL)
373 			goto fail;
374 	}
375 
376 	/*
377 	 * The driver must set mc_unicst entry point to NULL when it advertises
378 	 * CAP_RINGS for rx groups.
379 	 */
380 	if (mip->mi_rx_groups != NULL) {
381 		if (mregp->m_callbacks->mc_unicst != NULL)
382 			goto fail;
383 	} else {
384 		if (mregp->m_callbacks->mc_unicst == NULL)
385 			goto fail;
386 	}
387 
388 	/*
389 	 * Initialize MAC addresses. Must be called after mac_init_rings().
390 	 */
391 	mac_init_macaddr(mip);
392 
393 	mip->mi_share_capab.ms_snum = 0;
394 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
395 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
396 		    &mip->mi_share_capab);
397 	}
398 
399 	/*
400 	 * Initialize the kstats for this device.
401 	 */
402 	mac_driver_stat_create(mip);
403 
404 	/* Zero out any properties. */
405 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
406 
407 	if (mip->mi_minor <= MAC_MAX_MINOR) {
408 		/* Create a style-2 DLPI device */
409 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
410 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
411 			goto fail;
412 		style2_created = B_TRUE;
413 
414 		/* Create a style-1 DLPI device */
415 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
416 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
417 			goto fail;
418 		style1_created = B_TRUE;
419 	}
420 
421 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
422 
423 	rw_enter(&i_mac_impl_lock, RW_WRITER);
424 	if (mod_hash_insert(i_mac_impl_hash,
425 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
426 		rw_exit(&i_mac_impl_lock);
427 		err = EEXIST;
428 		goto fail;
429 	}
430 
431 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
432 	    (mac_impl_t *), mip);
433 
434 	/*
435 	 * Mark the MAC to be ready for open.
436 	 */
437 	mip->mi_state_flags &= ~MIS_DISABLED;
438 	rw_exit(&i_mac_impl_lock);
439 
440 	atomic_inc_32(&i_mac_impl_count);
441 
442 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
443 	*mhp = (mac_handle_t)mip;
444 	return (0);
445 
446 fail:
447 	if (style1_created)
448 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
449 
450 	if (style2_created)
451 		ddi_remove_minor_node(mip->mi_dip, driver);
452 
453 	mac_addr_factory_fini(mip);
454 
455 	/* Clean up registered MAC addresses */
456 	mac_fini_macaddr(mip);
457 
458 	/* Clean up registered rings */
459 	mac_free_rings(mip, MAC_RING_TYPE_RX);
460 	mac_free_rings(mip, MAC_RING_TYPE_TX);
461 
462 	/* Clean up notification thread */
463 	if (mip->mi_notify_thread != NULL)
464 		i_mac_notify_exit(mip);
465 
466 	if (mip->mi_info.mi_unicst_addr != NULL) {
467 		kmem_free(mip->mi_info.mi_unicst_addr,
468 		    mip->mi_type->mt_addr_length);
469 		mip->mi_info.mi_unicst_addr = NULL;
470 	}
471 
472 	mac_driver_stat_delete(mip);
473 
474 	if (mip->mi_type != NULL) {
475 		atomic_dec_32(&mip->mi_type->mt_ref);
476 		mip->mi_type = NULL;
477 	}
478 
479 	if (mip->mi_pdata != NULL) {
480 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
481 		mip->mi_pdata = NULL;
482 		mip->mi_pdata_size = 0;
483 	}
484 
485 	if (minor != 0) {
486 		ASSERT(minor > MAC_MAX_MINOR);
487 		mac_minor_rele(minor);
488 	}
489 
490 	mip->mi_state_flags = 0;
491 	mac_unregister_priv_prop(mip);
492 
493 	/*
494 	 * Clear the state before destroying the mac_impl_t
495 	 */
496 	mip->mi_state_flags = 0;
497 
498 	kmem_cache_free(i_mac_impl_cachep, mip);
499 	return (err);
500 }
501 
502 /*
503  * Unregister from the GLDv3 framework
504  */
505 int
506 mac_unregister(mac_handle_t mh)
507 {
508 	int			err;
509 	mac_impl_t		*mip = (mac_impl_t *)mh;
510 	mod_hash_val_t		val;
511 	mac_margin_req_t	*mmr, *nextmmr;
512 
513 	/* Fail the unregister if there are any open references to this mac. */
514 	if ((err = mac_disable_nowait(mh)) != 0)
515 		return (err);
516 
517 	/*
518 	 * Clean up notification thread and wait for it to exit.
519 	 */
520 	i_mac_notify_exit(mip);
521 
522 	/*
523 	 * Prior to acquiring the MAC perimeter, remove the MAC instance from
524 	 * the internal hash table. Such removal means table-walkers that
525 	 * acquire the perimeter will not do so on behalf of what we are
526 	 * unregistering, which prevents a deadlock.
527 	 */
528 	rw_enter(&i_mac_impl_lock, RW_WRITER);
529 	(void) mod_hash_remove(i_mac_impl_hash,
530 	    (mod_hash_key_t)mip->mi_name, &val);
531 	rw_exit(&i_mac_impl_lock);
532 	ASSERT(mip == (mac_impl_t *)val);
533 
534 	i_mac_perim_enter(mip);
535 
536 	/*
537 	 * There is still resource properties configured over this mac.
538 	 */
539 	if (mip->mi_resource_props.mrp_mask != 0)
540 		mac_fastpath_enable((mac_handle_t)mip);
541 
542 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
543 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
544 		ddi_remove_minor_node(mip->mi_dip,
545 		    (char *)ddi_driver_name(mip->mi_dip));
546 	}
547 
548 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
549 	    MIS_EXCLUSIVE));
550 
551 	mac_driver_stat_delete(mip);
552 
553 	ASSERT(i_mac_impl_count > 0);
554 	atomic_dec_32(&i_mac_impl_count);
555 
556 	if (mip->mi_pdata != NULL)
557 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
558 	mip->mi_pdata = NULL;
559 	mip->mi_pdata_size = 0;
560 
561 	/*
562 	 * Free the list of margin request.
563 	 */
564 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
565 		nextmmr = mmr->mmr_nextp;
566 		kmem_free(mmr, sizeof (mac_margin_req_t));
567 	}
568 	mip->mi_mmrp = NULL;
569 
570 	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
571 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
572 	mip->mi_info.mi_unicst_addr = NULL;
573 
574 	atomic_dec_32(&mip->mi_type->mt_ref);
575 	mip->mi_type = NULL;
576 
577 	/*
578 	 * Free the primary MAC address.
579 	 */
580 	mac_fini_macaddr(mip);
581 
582 	/*
583 	 * free all rings
584 	 */
585 	mac_free_rings(mip, MAC_RING_TYPE_RX);
586 	mac_free_rings(mip, MAC_RING_TYPE_TX);
587 
588 	mac_addr_factory_fini(mip);
589 
590 	bzero(mip->mi_addr, MAXMACADDRLEN);
591 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
592 	mip->mi_dstaddr_set = B_FALSE;
593 
594 	/* and the flows */
595 	mac_flow_tab_destroy(mip->mi_flow_tab);
596 	mip->mi_flow_tab = NULL;
597 
598 	if (mip->mi_minor > MAC_MAX_MINOR)
599 		mac_minor_rele(mip->mi_minor);
600 
601 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
602 
603 	/*
604 	 * Reset the perim related fields to default values before
605 	 * kmem_cache_free
606 	 */
607 	i_mac_perim_exit(mip);
608 	mip->mi_state_flags = 0;
609 
610 	mac_unregister_priv_prop(mip);
611 
612 	ASSERT(mip->mi_bridge_link == NULL);
613 	kmem_cache_free(i_mac_impl_cachep, mip);
614 
615 	return (0);
616 }
617 
618 /* DATA RECEPTION */
619 
620 /*
621  * This function is invoked for packets received by the MAC driver in
622  * interrupt context. The ring generation number provided by the driver
623  * is matched with the ring generation number held in MAC. If they do not
624  * match, received packets are considered stale packets coming from an older
625  * assignment of the ring. Drop them.
626  */
627 void
628 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
629     uint64_t mr_gen_num)
630 {
631 	mac_ring_t		*mr = (mac_ring_t *)mrh;
632 
633 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
634 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
635 		    mr->mr_gen_num, uint64_t, mr_gen_num);
636 		freemsgchain(mp_chain);
637 		return;
638 	}
639 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
640 }
641 
642 /*
643  * This function is invoked for each packet received by the underlying driver.
644  */
645 void
646 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
647 {
648 	mac_impl_t *mip = (mac_impl_t *)mh;
649 
650 	/*
651 	 * Check if the link is part of a bridge.  If not, then we don't need
652 	 * to take the lock to remain consistent.  Make this common case
653 	 * lock-free and tail-call optimized.
654 	 */
655 	if (mip->mi_bridge_link == NULL) {
656 		mac_rx_common(mh, mrh, mp_chain);
657 	} else {
658 		/*
659 		 * Once we take a reference on the bridge link, the bridge
660 		 * module itself can't unload, so the callback pointers are
661 		 * stable.
662 		 */
663 		mutex_enter(&mip->mi_bridge_lock);
664 		if ((mh = mip->mi_bridge_link) != NULL)
665 			mac_bridge_ref_cb(mh, B_TRUE);
666 		mutex_exit(&mip->mi_bridge_lock);
667 		if (mh == NULL) {
668 			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
669 		} else {
670 			mac_bridge_rx_cb(mh, mrh, mp_chain);
671 			mac_bridge_ref_cb(mh, B_FALSE);
672 		}
673 	}
674 }
675 
676 /*
677  * Special case function: this allows snooping of packets transmitted and
678  * received by TRILL. By design, they go directly into the TRILL module.
679  */
680 void
681 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
682 {
683 	mac_impl_t *mip = (mac_impl_t *)mh;
684 
685 	if (mip->mi_promisc_list != NULL)
686 		mac_promisc_dispatch(mip, mp, NULL);
687 }
688 
689 /*
690  * This is the upward reentry point for packets arriving from the bridging
691  * module and from mac_rx for links not part of a bridge.
692  */
693 void
694 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
695 {
696 	mac_impl_t		*mip = (mac_impl_t *)mh;
697 	mac_ring_t		*mr = (mac_ring_t *)mrh;
698 	mac_soft_ring_set_t 	*mac_srs;
699 	mblk_t			*bp = mp_chain;
700 	boolean_t		hw_classified = B_FALSE;
701 
702 	/*
703 	 * If there are any promiscuous mode callbacks defined for
704 	 * this MAC, pass them a copy if appropriate.
705 	 */
706 	if (mip->mi_promisc_list != NULL)
707 		mac_promisc_dispatch(mip, mp_chain, NULL);
708 
709 	if (mr != NULL) {
710 		/*
711 		 * If the SRS teardown has started, just return. The 'mr'
712 		 * continues to be valid until the driver unregisters the mac.
713 		 * Hardware classified packets will not make their way up
714 		 * beyond this point once the teardown has started. The driver
715 		 * is never passed a pointer to a flow entry or SRS or any
716 		 * structure that can be freed much before mac_unregister.
717 		 */
718 		mutex_enter(&mr->mr_lock);
719 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
720 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
721 			mutex_exit(&mr->mr_lock);
722 			freemsgchain(mp_chain);
723 			return;
724 		}
725 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
726 			hw_classified = B_TRUE;
727 			MR_REFHOLD_LOCKED(mr);
728 		}
729 		mutex_exit(&mr->mr_lock);
730 
731 		/*
732 		 * We check if an SRS is controlling this ring.
733 		 * If so, we can directly call the srs_lower_proc
734 		 * routine otherwise we need to go through mac_rx_classify
735 		 * to reach the right place.
736 		 */
737 		if (hw_classified) {
738 			mac_srs = mr->mr_srs;
739 			/*
740 			 * This is supposed to be the fast path.
741 			 * All packets received though here were steered by
742 			 * the hardware classifier, and share the same
743 			 * MAC header info.
744 			 */
745 			mac_srs->srs_rx.sr_lower_proc(mh,
746 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
747 			MR_REFRELE(mr);
748 			return;
749 		}
750 		/* We'll fall through to software classification */
751 	} else {
752 		flow_entry_t *flent;
753 		int err;
754 
755 		rw_enter(&mip->mi_rw_lock, RW_READER);
756 		if (mip->mi_single_active_client != NULL) {
757 			flent = mip->mi_single_active_client->mci_flent_list;
758 			FLOW_TRY_REFHOLD(flent, err);
759 			rw_exit(&mip->mi_rw_lock);
760 			if (err == 0) {
761 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
762 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
763 				FLOW_REFRELE(flent);
764 				return;
765 			}
766 		} else {
767 			rw_exit(&mip->mi_rw_lock);
768 		}
769 	}
770 
771 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
772 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
773 			return;
774 	}
775 
776 	freemsgchain(bp);
777 }
778 
779 /* DATA TRANSMISSION */
780 
781 /*
782  * A driver's notification to resume transmission, in case of a provider
783  * without TX rings.
784  */
785 void
786 mac_tx_update(mac_handle_t mh)
787 {
788 	mac_tx_ring_update(mh, NULL);
789 }
790 
791 /*
792  * A driver's notification to resume transmission on the specified TX ring.
793  */
794 void
795 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
796 {
797 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
798 }
799 
800 /* LINK STATE */
801 /*
802  * Notify the MAC layer about a link state change
803  */
804 void
805 mac_link_update(mac_handle_t mh, link_state_t link)
806 {
807 	mac_impl_t	*mip = (mac_impl_t *)mh;
808 
809 	/*
810 	 * Save the link state.
811 	 */
812 	mip->mi_lowlinkstate = link;
813 
814 	/*
815 	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
816 	 * thread to deliver both lower and upper notifications.
817 	 */
818 	i_mac_notify(mip, MAC_NOTE_LOWLINK);
819 }
820 
821 /*
822  * Notify the MAC layer about a link state change due to bridging.
823  */
824 void
825 mac_link_redo(mac_handle_t mh, link_state_t link)
826 {
827 	mac_impl_t	*mip = (mac_impl_t *)mh;
828 
829 	/*
830 	 * Save the link state.
831 	 */
832 	mip->mi_linkstate = link;
833 
834 	/*
835 	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
836 	 * made.
837 	 */
838 	i_mac_notify(mip, MAC_NOTE_LINK);
839 }
840 
841 /* MINOR NODE HANDLING */
842 
843 /*
844  * Given a dev_t, return the instance number (PPA) associated with it.
845  * Drivers can use this in their getinfo(9e) implementation to lookup
846  * the instance number (i.e. PPA) of the device, to use as an index to
847  * their own array of soft state structures.
848  *
849  * Returns -1 on error.
850  */
851 int
852 mac_devt_to_instance(dev_t devt)
853 {
854 	return (dld_devt_to_instance(devt));
855 }
856 
857 /*
858  * This function returns the first minor number that is available for
859  * driver private use.  All minor numbers smaller than this are
860  * reserved for GLDv3 use.
861  */
862 minor_t
863 mac_private_minor(void)
864 {
865 	return (MAC_PRIVATE_MINOR);
866 }
867 
868 /* OTHER CONTROL INFORMATION */
869 
870 /*
871  * A driver notified us that its primary MAC address has changed.
872  */
873 void
874 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
875 {
876 	mac_impl_t	*mip = (mac_impl_t *)mh;
877 
878 	if (mip->mi_type->mt_addr_length == 0)
879 		return;
880 
881 	i_mac_perim_enter(mip);
882 
883 	/*
884 	 * If address changes, freshen the MAC address value and update
885 	 * all MAC clients that share this MAC address.
886 	 */
887 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
888 		mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
889 		    (uint8_t *)addr);
890 	}
891 
892 	i_mac_perim_exit(mip);
893 
894 	/*
895 	 * Send a MAC_NOTE_UNICST notification.
896 	 */
897 	i_mac_notify(mip, MAC_NOTE_UNICST);
898 }
899 
900 void
901 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
902 {
903 	mac_impl_t	*mip = (mac_impl_t *)mh;
904 
905 	if (mip->mi_type->mt_addr_length == 0)
906 		return;
907 
908 	i_mac_perim_enter(mip);
909 	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
910 	i_mac_perim_exit(mip);
911 	i_mac_notify(mip, MAC_NOTE_DEST);
912 }
913 
914 /*
915  * MAC plugin information changed.
916  */
917 int
918 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
919 {
920 	mac_impl_t	*mip = (mac_impl_t *)mh;
921 
922 	/*
923 	 * Verify that the plugin supports MAC plugin data and that the
924 	 * supplied data is valid.
925 	 */
926 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
927 		return (EINVAL);
928 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
929 		return (EINVAL);
930 
931 	if (mip->mi_pdata != NULL)
932 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
933 
934 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
935 	bcopy(mac_pdata, mip->mi_pdata, dsize);
936 	mip->mi_pdata_size = dsize;
937 
938 	/*
939 	 * Since the MAC plugin data is used to construct MAC headers that
940 	 * were cached in fast-path headers, we need to flush fast-path
941 	 * information for links associated with this mac.
942 	 */
943 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
944 	return (0);
945 }
946 
947 /*
948  * Invoked by driver as well as the framework to notify its capability change.
949  */
950 void
951 mac_capab_update(mac_handle_t mh)
952 {
953 	/* Send MAC_NOTE_CAPAB_CHG notification */
954 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
955 }
956 
957 /*
958  * Used by normal drivers to update the max sdu size.
959  * We need to handle the case of a smaller mi_sdu_multicast
960  * since this is called by mac_set_mtu() even for drivers that
961  * have differing unicast and multicast mtu and we don't want to
962  * increase the multicast mtu by accident in that case.
963  */
964 int
965 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
966 {
967 	mac_impl_t	*mip = (mac_impl_t *)mh;
968 
969 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
970 		return (EINVAL);
971 	mip->mi_sdu_max = sdu_max;
972 	if (mip->mi_sdu_multicast > mip->mi_sdu_max)
973 		mip->mi_sdu_multicast = mip->mi_sdu_max;
974 
975 	/* Send a MAC_NOTE_SDU_SIZE notification. */
976 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
977 	return (0);
978 }
979 
980 /*
981  * Version of the above function that is used by drivers that have a different
982  * max sdu size for multicast/broadcast vs. unicast.
983  */
984 int
985 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
986 {
987 	mac_impl_t	*mip = (mac_impl_t *)mh;
988 
989 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
990 		return (EINVAL);
991 	if (sdu_multicast == 0)
992 		sdu_multicast = sdu_max;
993 	if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
994 		return (EINVAL);
995 	mip->mi_sdu_max = sdu_max;
996 	mip->mi_sdu_multicast = sdu_multicast;
997 
998 	/* Send a MAC_NOTE_SDU_SIZE notification. */
999 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1000 	return (0);
1001 }
1002 
1003 static void
1004 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
1005 {
1006 	mac_client_impl_t *mcip;
1007 	flow_entry_t *flent;
1008 	mac_soft_ring_set_t *mac_rx_srs;
1009 	mac_cpus_t *srs_cpu;
1010 	int i;
1011 
1012 	if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1013 	    (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1014 		/* interrupt can be re-targeted */
1015 		ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1016 		flent = mcip->mci_flent;
1017 		if (ring->mr_type == MAC_RING_TYPE_RX) {
1018 			for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1019 				mac_rx_srs = flent->fe_rx_srs[i];
1020 				if (mac_rx_srs->srs_ring != ring)
1021 					continue;
1022 				srs_cpu = &mac_rx_srs->srs_cpu;
1023 				mutex_enter(&cpu_lock);
1024 				mac_rx_srs_retarget_intr(mac_rx_srs,
1025 				    srs_cpu->mc_rx_intr_cpu);
1026 				mutex_exit(&cpu_lock);
1027 				break;
1028 			}
1029 		} else {
1030 			if (flent->fe_tx_srs != NULL) {
1031 				mutex_enter(&cpu_lock);
1032 				mac_tx_srs_retarget_intr(
1033 				    flent->fe_tx_srs);
1034 				mutex_exit(&cpu_lock);
1035 			}
1036 		}
1037 	}
1038 }
1039 
1040 /*
1041  * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1042  * their clients. There is a 1-1 mapping pseudo ring and the hardware
1043  * ring. ddi interrupt handles are exported from the hardware ring to
1044  * the pseudo ring. Thus when the interrupt handle changes, clients of
1045  * aggr that are using the handle need to use the new handle and
1046  * re-target their interrupts.
1047  */
1048 static void
1049 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1050     ddi_intr_handle_t ddh)
1051 {
1052 	mac_ring_t *pring;
1053 	mac_group_t *pgroup;
1054 	mac_impl_t *pmip;
1055 	char macname[MAXNAMELEN];
1056 	mac_perim_handle_t p_mph;
1057 	uint64_t saved_gen_num;
1058 
1059 again:
1060 	pring = (mac_ring_t *)ring->mr_prh;
1061 	pgroup = (mac_group_t *)pring->mr_gh;
1062 	pmip = (mac_impl_t *)pgroup->mrg_mh;
1063 	saved_gen_num = ring->mr_gen_num;
1064 	(void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1065 	/*
1066 	 * We need to enter aggr's perimeter. The locking hierarchy
1067 	 * dictates that aggr's perimeter should be entered first
1068 	 * and then the port's perimeter. So drop the port's
1069 	 * perimeter, enter aggr's and then re-enter port's
1070 	 * perimeter.
1071 	 */
1072 	i_mac_perim_exit(mip);
1073 	/*
1074 	 * While we know pmip is the aggr's mip, there is a
1075 	 * possibility that aggr could have unregistered by
1076 	 * the time we exit port's perimeter (mip) and
1077 	 * enter aggr's perimeter (pmip). To avoid that
1078 	 * scenario, enter aggr's perimeter using its name.
1079 	 */
1080 	if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1081 		return;
1082 	i_mac_perim_enter(mip);
1083 	/*
1084 	 * Check if the ring got assigned to another aggregation before
1085 	 * be could enter aggr's and the port's perimeter. When a ring
1086 	 * gets deleted from an aggregation, it calls mac_stop_ring()
1087 	 * which increments the generation number. So checking
1088 	 * generation number will be enough.
1089 	 */
1090 	if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1091 		i_mac_perim_exit(mip);
1092 		mac_perim_exit(p_mph);
1093 		i_mac_perim_enter(mip);
1094 		goto again;
1095 	}
1096 
1097 	/* Check if pseudo ring is still present */
1098 	if (ring->mr_prh != NULL) {
1099 		pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1100 		pring->mr_info.mri_intr.mi_ddi_shared =
1101 		    ring->mr_info.mri_intr.mi_ddi_shared;
1102 		if (ddh != NULL)
1103 			mac_ring_intr_retarget(pgroup, pring);
1104 	}
1105 	i_mac_perim_exit(mip);
1106 	mac_perim_exit(p_mph);
1107 }
1108 /*
1109  * API called by driver to provide new interrupt handle for TX/RX rings.
1110  * This usually happens when IRM (Interrupt Resource Manangement)
1111  * framework either gives the driver more MSI-x interrupts or takes
1112  * away MSI-x interrupts from the driver.
1113  */
1114 void
1115 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1116 {
1117 	mac_ring_t	*ring = (mac_ring_t *)mrh;
1118 	mac_group_t	*group = (mac_group_t *)ring->mr_gh;
1119 	mac_impl_t	*mip = (mac_impl_t *)group->mrg_mh;
1120 
1121 	i_mac_perim_enter(mip);
1122 	ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1123 	if (ddh == NULL) {
1124 		/* Interrupts being reset */
1125 		ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1126 		if (ring->mr_prh != NULL) {
1127 			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1128 			return;
1129 		}
1130 	} else {
1131 		/* New interrupt handle */
1132 		mac_compare_ddi_handle(mip->mi_rx_groups,
1133 		    mip->mi_rx_group_count, ring);
1134 		if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1135 			mac_compare_ddi_handle(mip->mi_tx_groups,
1136 			    mip->mi_tx_group_count, ring);
1137 		}
1138 		if (ring->mr_prh != NULL) {
1139 			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1140 			return;
1141 		} else {
1142 			mac_ring_intr_retarget(group, ring);
1143 		}
1144 	}
1145 	i_mac_perim_exit(mip);
1146 }
1147 
1148 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1149 
1150 /*
1151  * Updates the mac_impl structure with the current state of the link
1152  */
1153 static void
1154 i_mac_log_link_state(mac_impl_t *mip)
1155 {
1156 	/*
1157 	 * If no change, then it is not interesting.
1158 	 */
1159 	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1160 		return;
1161 
1162 	switch (mip->mi_lowlinkstate) {
1163 	case LINK_STATE_UP:
1164 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1165 			char det[200];
1166 
1167 			mip->mi_type->mt_ops.mtops_link_details(det,
1168 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1169 
1170 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1171 		} else {
1172 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1173 		}
1174 		break;
1175 
1176 	case LINK_STATE_DOWN:
1177 		/*
1178 		 * Only transitions from UP to DOWN are interesting
1179 		 */
1180 		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1181 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1182 		break;
1183 
1184 	case LINK_STATE_UNKNOWN:
1185 		/*
1186 		 * This case is normally not interesting.
1187 		 */
1188 		break;
1189 	}
1190 	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1191 }
1192 
1193 /*
1194  * Main routine for the callbacks notifications thread
1195  */
1196 static void
1197 i_mac_notify_thread(void *arg)
1198 {
1199 	mac_impl_t	*mip = arg;
1200 	callb_cpr_t	cprinfo;
1201 	mac_cb_t	*mcb;
1202 	mac_cb_info_t	*mcbi;
1203 	mac_notify_cb_t	*mncb;
1204 
1205 	mcbi = &mip->mi_notify_cb_info;
1206 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1207 	    "i_mac_notify_thread");
1208 
1209 	mutex_enter(mcbi->mcbi_lockp);
1210 
1211 	for (;;) {
1212 		uint32_t	bits;
1213 		uint32_t	type;
1214 
1215 		bits = mip->mi_notify_bits;
1216 		if (bits == 0) {
1217 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1218 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1219 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1220 			continue;
1221 		}
1222 		mip->mi_notify_bits = 0;
1223 		if ((bits & (1 << MAC_NNOTE)) != 0) {
1224 			/* request to quit */
1225 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1226 			break;
1227 		}
1228 
1229 		mutex_exit(mcbi->mcbi_lockp);
1230 
1231 		/*
1232 		 * Log link changes on the actual link, but then do reports on
1233 		 * synthetic state (if part of a bridge).
1234 		 */
1235 		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1236 			link_state_t newstate;
1237 			mac_handle_t mh;
1238 
1239 			i_mac_log_link_state(mip);
1240 			newstate = mip->mi_lowlinkstate;
1241 			if (mip->mi_bridge_link != NULL) {
1242 				mutex_enter(&mip->mi_bridge_lock);
1243 				if ((mh = mip->mi_bridge_link) != NULL) {
1244 					newstate = mac_bridge_ls_cb(mh,
1245 					    newstate);
1246 				}
1247 				mutex_exit(&mip->mi_bridge_lock);
1248 			}
1249 			if (newstate != mip->mi_linkstate) {
1250 				mip->mi_linkstate = newstate;
1251 				bits |= 1 << MAC_NOTE_LINK;
1252 			}
1253 		}
1254 
1255 		/*
1256 		 * Do notification callbacks for each notification type.
1257 		 */
1258 		for (type = 0; type < MAC_NNOTE; type++) {
1259 			if ((bits & (1 << type)) == 0) {
1260 				continue;
1261 			}
1262 
1263 			if (mac_notify_cb_list[type] != NULL)
1264 				(*mac_notify_cb_list[type])(mip);
1265 
1266 			/*
1267 			 * Walk the list of notifications.
1268 			 */
1269 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1270 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1271 			    mcb = mcb->mcb_nextp) {
1272 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1273 				mncb->mncb_fn(mncb->mncb_arg, type);
1274 			}
1275 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1276 			    &mip->mi_notify_cb_list);
1277 		}
1278 
1279 		mutex_enter(mcbi->mcbi_lockp);
1280 	}
1281 
1282 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1283 	cv_broadcast(&mcbi->mcbi_cv);
1284 
1285 	/* CALLB_CPR_EXIT drops the lock */
1286 	CALLB_CPR_EXIT(&cprinfo);
1287 	thread_exit();
1288 }
1289 
1290 /*
1291  * Signal the i_mac_notify_thread asking it to quit.
1292  * Then wait till it is done.
1293  */
1294 void
1295 i_mac_notify_exit(mac_impl_t *mip)
1296 {
1297 	mac_cb_info_t	*mcbi;
1298 
1299 	mcbi = &mip->mi_notify_cb_info;
1300 
1301 	mutex_enter(mcbi->mcbi_lockp);
1302 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1303 	cv_broadcast(&mcbi->mcbi_cv);
1304 
1305 
1306 	while ((mip->mi_notify_thread != NULL) &&
1307 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1308 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1309 	}
1310 
1311 	/* Necessary clean up before doing kmem_cache_free */
1312 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1313 	mip->mi_notify_bits = 0;
1314 	mip->mi_notify_thread = NULL;
1315 	mutex_exit(mcbi->mcbi_lockp);
1316 }
1317 
1318 /*
1319  * Entry point invoked by drivers to dynamically add a ring to an
1320  * existing group.
1321  */
1322 int
1323 mac_group_add_ring(mac_group_handle_t gh, int index)
1324 {
1325 	mac_group_t *group = (mac_group_t *)gh;
1326 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1327 	int ret;
1328 
1329 	i_mac_perim_enter(mip);
1330 	ret = i_mac_group_add_ring(group, NULL, index);
1331 	i_mac_perim_exit(mip);
1332 	return (ret);
1333 }
1334 
1335 /*
1336  * Entry point invoked by drivers to dynamically remove a ring
1337  * from an existing group. The specified ring handle must no longer
1338  * be used by the driver after a call to this function.
1339  */
1340 void
1341 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1342 {
1343 	mac_group_t *group = (mac_group_t *)gh;
1344 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1345 
1346 	i_mac_perim_enter(mip);
1347 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1348 	i_mac_perim_exit(mip);
1349 }
1350 
1351 /*
1352  * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1353  * entry points.
1354  */
1355 
1356 void
1357 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1358 {
1359 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1360 
1361 	/* nothing to do if the caller doesn't want the default value */
1362 	if (pr->pr_default == NULL)
1363 		return;
1364 
1365 	ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1366 
1367 	*(uint8_t *)(pr->pr_default) = val;
1368 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1369 }
1370 
1371 void
1372 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1373 {
1374 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1375 
1376 	/* nothing to do if the caller doesn't want the default value */
1377 	if (pr->pr_default == NULL)
1378 		return;
1379 
1380 	ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1381 
1382 	bcopy(&val, pr->pr_default, sizeof (val));
1383 
1384 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1385 }
1386 
1387 void
1388 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1389 {
1390 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1391 
1392 	/* nothing to do if the caller doesn't want the default value */
1393 	if (pr->pr_default == NULL)
1394 		return;
1395 
1396 	ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1397 
1398 	bcopy(&val, pr->pr_default, sizeof (val));
1399 
1400 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1401 }
1402 
1403 void
1404 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1405 {
1406 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1407 
1408 	/* nothing to do if the caller doesn't want the default value */
1409 	if (pr->pr_default == NULL)
1410 		return;
1411 
1412 	if (strlen(str) >= pr->pr_default_size)
1413 		pr->pr_errno = ENOBUFS;
1414 	else
1415 		(void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1416 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1417 }
1418 
1419 void
1420 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1421     link_flowctrl_t val)
1422 {
1423 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1424 
1425 	/* nothing to do if the caller doesn't want the default value */
1426 	if (pr->pr_default == NULL)
1427 		return;
1428 
1429 	ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1430 
1431 	bcopy(&val, pr->pr_default, sizeof (val));
1432 
1433 	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1434 }
1435 
1436 void
1437 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1438     uint32_t max)
1439 {
1440 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1441 	mac_propval_range_t *range = pr->pr_range;
1442 	mac_propval_uint32_range_t *range32;
1443 
1444 	/* nothing to do if the caller doesn't want the range info */
1445 	if (range == NULL)
1446 		return;
1447 
1448 	if (pr->pr_range_cur_count++ == 0) {
1449 		/* first range */
1450 		pr->pr_flags |= MAC_PROP_INFO_RANGE;
1451 		range->mpr_type = MAC_PROPVAL_UINT32;
1452 	} else {
1453 		/* all ranges of a property should be of the same type */
1454 		ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1455 		if (pr->pr_range_cur_count > range->mpr_count) {
1456 			pr->pr_errno = ENOSPC;
1457 			return;
1458 		}
1459 	}
1460 
1461 	range32 = range->mpr_range_uint32;
1462 	range32[pr->pr_range_cur_count - 1].mpur_min = min;
1463 	range32[pr->pr_range_cur_count - 1].mpur_max = max;
1464 }
1465 
1466 void
1467 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1468 {
1469 	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1470 
1471 	pr->pr_perm = perm;
1472 	pr->pr_flags |= MAC_PROP_INFO_PERM;
1473 }
1474 
1475 void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff,
1476     uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1477 {
1478 	uint32_t flags;
1479 
1480 	ASSERT(DB_TYPE(mp) == M_DATA);
1481 
1482 	flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1483 	if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1484 		if (value != NULL)
1485 			*value = (uint32_t)DB_CKSUM16(mp);
1486 		if ((flags & HCK_PARTIALCKSUM) != 0) {
1487 			if (start != NULL)
1488 				*start = (uint32_t)DB_CKSUMSTART(mp);
1489 			if (stuff != NULL)
1490 				*stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1491 			if (end != NULL)
1492 				*end = (uint32_t)DB_CKSUMEND(mp);
1493 		}
1494 	}
1495 
1496 	if (flags_ptr != NULL)
1497 		*flags_ptr = flags;
1498 }
1499 
1500 void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff,
1501     uint32_t end, uint32_t value, uint32_t flags)
1502 {
1503 	ASSERT(DB_TYPE(mp) == M_DATA);
1504 
1505 	DB_CKSUMSTART(mp) = (intptr_t)start;
1506 	DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1507 	DB_CKSUMEND(mp) = (intptr_t)end;
1508 	DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1509 	DB_CKSUM16(mp) = (uint16_t)value;
1510 }
1511 
1512 void
1513 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1514 {
1515 	ASSERT(DB_TYPE(mp) == M_DATA);
1516 
1517 	if (flags != NULL) {
1518 		*flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1519 		if ((*flags != 0) && (mss != NULL))
1520 			*mss = (uint32_t)DB_LSOMSS(mp);
1521 	}
1522 }
1523 
1524 void
1525 mac_transceiver_info_set_present(mac_transceiver_info_t *infop,
1526     boolean_t present)
1527 {
1528 	infop->mti_present = present;
1529 }
1530 
1531 void
1532 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop,
1533     boolean_t usable)
1534 {
1535 	infop->mti_usable = usable;
1536 }
1537