xref: /titanic_50/usr/src/uts/common/io/mac/mac_provider.c (revision 60471b7bbfab236de7d8776aed871d919c5f81c3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/conf.h>
29 #include <sys/id_space.h>
30 #include <sys/esunddi.h>
31 #include <sys/stat.h>
32 #include <sys/mkdev.h>
33 #include <sys/stream.h>
34 #include <sys/strsubr.h>
35 #include <sys/dlpi.h>
36 #include <sys/modhash.h>
37 #include <sys/mac.h>
38 #include <sys/mac_provider.h>
39 #include <sys/mac_impl.h>
40 #include <sys/mac_client_impl.h>
41 #include <sys/mac_client_priv.h>
42 #include <sys/mac_soft_ring.h>
43 #include <sys/modctl.h>
44 #include <sys/fs/dv_node.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/callb.h>
48 #include <sys/cpuvar.h>
49 #include <sys/atomic.h>
50 #include <sys/sdt.h>
51 #include <sys/mac_flow.h>
52 #include <sys/ddi_intr_impl.h>
53 #include <sys/disp.h>
54 #include <sys/sdt.h>
55 
56 /*
57  * MAC Provider Interface.
58  *
59  * Interface for GLDv3 compatible NIC drivers.
60  */
61 
62 static void i_mac_notify_thread(void *);
63 
64 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
65 
66 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
67 	mac_fanout_recompute,	/* MAC_NOTE_LINK */
68 	NULL,		/* MAC_NOTE_UNICST */
69 	NULL,		/* MAC_NOTE_TX */
70 	NULL,		/* MAC_NOTE_DEVPROMISC */
71 	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
72 	NULL,		/* MAC_NOTE_SDU_SIZE */
73 	NULL,		/* MAC_NOTE_MARGIN */
74 	NULL,		/* MAC_NOTE_CAPAB_CHG */
75 	NULL		/* MAC_NOTE_LOWLINK */
76 };
77 
78 /*
79  * Driver support functions.
80  */
81 
82 /* REGISTRATION */
83 
84 mac_register_t *
85 mac_alloc(uint_t mac_version)
86 {
87 	mac_register_t *mregp;
88 
89 	/*
90 	 * Make sure there isn't a version mismatch between the driver and
91 	 * the framework.  In the future, if multiple versions are
92 	 * supported, this check could become more sophisticated.
93 	 */
94 	if (mac_version != MAC_VERSION)
95 		return (NULL);
96 
97 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
98 	mregp->m_version = mac_version;
99 	return (mregp);
100 }
101 
102 void
103 mac_free(mac_register_t *mregp)
104 {
105 	kmem_free(mregp, sizeof (mac_register_t));
106 }
107 
108 /*
109  * mac_register() is how drivers register new MACs with the GLDv3
110  * framework.  The mregp argument is allocated by drivers using the
111  * mac_alloc() function, and can be freed using mac_free() immediately upon
112  * return from mac_register().  Upon success (0 return value), the mhp
113  * opaque pointer becomes the driver's handle to its MAC interface, and is
114  * the argument to all other mac module entry points.
115  */
116 /* ARGSUSED */
117 int
118 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
119 {
120 	mac_impl_t		*mip;
121 	mactype_t		*mtype;
122 	int			err = EINVAL;
123 	struct devnames		*dnp = NULL;
124 	uint_t			instance;
125 	boolean_t		style1_created = B_FALSE;
126 	boolean_t		style2_created = B_FALSE;
127 	char			*driver;
128 	minor_t			minor = 0;
129 
130 	/* Find the required MAC-Type plugin. */
131 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
132 		return (EINVAL);
133 
134 	/* Create a mac_impl_t to represent this MAC. */
135 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
136 
137 	/*
138 	 * The mac is not ready for open yet.
139 	 */
140 	mip->mi_state_flags |= MIS_DISABLED;
141 
142 	/*
143 	 * When a mac is registered, the m_instance field can be set to:
144 	 *
145 	 *  0:	Get the mac's instance number from m_dip.
146 	 *	This is usually used for physical device dips.
147 	 *
148 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
149 	 *	For example, when an aggregation is created with the key option,
150 	 *	"key" will be used as the instance number.
151 	 *
152 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
153 	 *	This is often used when a MAC of a virtual link is registered
154 	 *	(e.g., aggregation when "key" is not specified, or vnic).
155 	 *
156 	 * Note that the instance number is used to derive the mi_minor field
157 	 * of mac_impl_t, which will then be used to derive the name of kstats
158 	 * and the devfs nodes.  The first 2 cases are needed to preserve
159 	 * backward compatibility.
160 	 */
161 	switch (mregp->m_instance) {
162 	case 0:
163 		instance = ddi_get_instance(mregp->m_dip);
164 		break;
165 	case ((uint_t)-1):
166 		minor = mac_minor_hold(B_TRUE);
167 		if (minor == 0) {
168 			err = ENOSPC;
169 			goto fail;
170 		}
171 		instance = minor - 1;
172 		break;
173 	default:
174 		instance = mregp->m_instance;
175 		if (instance >= MAC_MAX_MINOR) {
176 			err = EINVAL;
177 			goto fail;
178 		}
179 		break;
180 	}
181 
182 	mip->mi_minor = (minor_t)(instance + 1);
183 	mip->mi_dip = mregp->m_dip;
184 	mip->mi_clients_list = NULL;
185 	mip->mi_nclients = 0;
186 
187 	/* Set the default IEEE Port VLAN Identifier */
188 	mip->mi_pvid = 1;
189 
190 	/* Default bridge link learning protection values */
191 	mip->mi_llimit = 1000;
192 	mip->mi_ldecay = 200;
193 
194 	driver = (char *)ddi_driver_name(mip->mi_dip);
195 
196 	/* Construct the MAC name as <drvname><instance> */
197 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
198 	    driver, instance);
199 
200 	mip->mi_driver = mregp->m_driver;
201 
202 	mip->mi_type = mtype;
203 	mip->mi_margin = mregp->m_margin;
204 	mip->mi_info.mi_media = mtype->mt_type;
205 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
206 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
207 		goto fail;
208 	mip->mi_sdu_min = mregp->m_min_sdu;
209 	mip->mi_sdu_max = mregp->m_max_sdu;
210 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
211 	/*
212 	 * If the media supports a broadcast address, cache a pointer to it
213 	 * in the mac_info_t so that upper layers can use it.
214 	 */
215 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
216 
217 	mip->mi_v12n_level = mregp->m_v12n;
218 
219 	/*
220 	 * Copy the unicast source address into the mac_info_t, but only if
221 	 * the MAC-Type defines a non-zero address length.  We need to
222 	 * handle MAC-Types that have an address length of 0
223 	 * (point-to-point protocol MACs for example).
224 	 */
225 	if (mip->mi_type->mt_addr_length > 0) {
226 		if (mregp->m_src_addr == NULL)
227 			goto fail;
228 		mip->mi_info.mi_unicst_addr =
229 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
230 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
231 		    mip->mi_type->mt_addr_length);
232 
233 		/*
234 		 * Copy the fixed 'factory' MAC address from the immutable
235 		 * info.  This is taken to be the MAC address currently in
236 		 * use.
237 		 */
238 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
239 		    mip->mi_type->mt_addr_length);
240 
241 		/*
242 		 * At this point, we should set up the classification
243 		 * rules etc but we delay it till mac_open() so that
244 		 * the resource discovery has taken place and we
245 		 * know someone wants to use the device. Otherwise
246 		 * memory gets allocated for Rx ring structures even
247 		 * during probe.
248 		 */
249 
250 		/* Copy the destination address if one is provided. */
251 		if (mregp->m_dst_addr != NULL) {
252 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
253 			    mip->mi_type->mt_addr_length);
254 			mip->mi_dstaddr_set = B_TRUE;
255 		}
256 	} else if (mregp->m_src_addr != NULL) {
257 		goto fail;
258 	}
259 
260 	/*
261 	 * The format of the m_pdata is specific to the plugin.  It is
262 	 * passed in as an argument to all of the plugin callbacks.  The
263 	 * driver can update this information by calling
264 	 * mac_pdata_update().
265 	 */
266 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
267 		/*
268 		 * Verify if the supplied plugin data is valid.  Note that
269 		 * even if the caller passed in a NULL pointer as plugin data,
270 		 * we still need to verify if that's valid as the plugin may
271 		 * require plugin data to function.
272 		 */
273 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
274 		    mregp->m_pdata_size)) {
275 			goto fail;
276 		}
277 		if (mregp->m_pdata != NULL) {
278 			mip->mi_pdata =
279 			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
280 			bcopy(mregp->m_pdata, mip->mi_pdata,
281 			    mregp->m_pdata_size);
282 			mip->mi_pdata_size = mregp->m_pdata_size;
283 		}
284 	} else if (mregp->m_pdata != NULL) {
285 		/*
286 		 * The caller supplied non-NULL plugin data, but the plugin
287 		 * does not recognize plugin data.
288 		 */
289 		err = EINVAL;
290 		goto fail;
291 	}
292 
293 	/*
294 	 * Register the private properties.
295 	 */
296 	mac_register_priv_prop(mip, mregp->m_priv_props,
297 	    mregp->m_priv_prop_count);
298 
299 	/*
300 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
301 	 * check to make sure all mandatory callbacks are set.
302 	 */
303 	if (mregp->m_callbacks->mc_getstat == NULL ||
304 	    mregp->m_callbacks->mc_start == NULL ||
305 	    mregp->m_callbacks->mc_stop == NULL ||
306 	    mregp->m_callbacks->mc_setpromisc == NULL ||
307 	    mregp->m_callbacks->mc_multicst == NULL) {
308 		goto fail;
309 	}
310 	mip->mi_callbacks = mregp->m_callbacks;
311 
312 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
313 	    &mip->mi_capab_legacy)) {
314 		mip->mi_state_flags |= MIS_LEGACY;
315 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
316 	} else {
317 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
318 		    ddi_get_instance(mip->mi_dip) + 1);
319 	}
320 
321 	/*
322 	 * Allocate a notification thread. thread_create blocks for memory
323 	 * if needed, it never fails.
324 	 */
325 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
326 	    mip, 0, &p0, TS_RUN, minclsyspri);
327 
328 	/*
329 	 * Initialize the capabilities
330 	 */
331 
332 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
333 		mip->mi_state_flags |= MIS_IS_VNIC;
334 
335 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
336 		mip->mi_state_flags |= MIS_IS_AGGR;
337 
338 	mac_addr_factory_init(mip);
339 
340 	/*
341 	 * Enforce the virtrualization level registered.
342 	 */
343 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
344 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
345 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
346 			goto fail;
347 
348 		/*
349 		 * The driver needs to register at least rx rings for this
350 		 * virtualization level.
351 		 */
352 		if (mip->mi_rx_groups == NULL)
353 			goto fail;
354 	}
355 
356 	/*
357 	 * The driver must set mc_unicst entry point to NULL when it advertises
358 	 * CAP_RINGS for rx groups.
359 	 */
360 	if (mip->mi_rx_groups != NULL) {
361 		if (mregp->m_callbacks->mc_unicst != NULL)
362 			goto fail;
363 	} else {
364 		if (mregp->m_callbacks->mc_unicst == NULL)
365 			goto fail;
366 	}
367 
368 	/*
369 	 * The driver must set mc_tx entry point to NULL when it advertises
370 	 * CAP_RINGS for tx rings.
371 	 */
372 	if (mip->mi_tx_groups != NULL) {
373 		if (mregp->m_callbacks->mc_tx != NULL)
374 			goto fail;
375 	} else {
376 		if (mregp->m_callbacks->mc_tx == NULL)
377 			goto fail;
378 	}
379 
380 	/*
381 	 * Initialize MAC addresses. Must be called after mac_init_rings().
382 	 */
383 	mac_init_macaddr(mip);
384 
385 	mip->mi_share_capab.ms_snum = 0;
386 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
387 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
388 		    &mip->mi_share_capab);
389 	}
390 
391 	/*
392 	 * Initialize the kstats for this device.
393 	 */
394 	mac_stat_create(mip);
395 
396 	/* Zero out any properties. */
397 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
398 
399 	/* set the gldv3 flag in dn_flags */
400 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
401 	LOCK_DEV_OPS(&dnp->dn_lock);
402 	dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
403 	UNLOCK_DEV_OPS(&dnp->dn_lock);
404 
405 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
406 		/* Create a style-2 DLPI device */
407 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
408 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
409 			goto fail;
410 		style2_created = B_TRUE;
411 
412 		/* Create a style-1 DLPI device */
413 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
414 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
415 			goto fail;
416 		style1_created = B_TRUE;
417 	}
418 
419 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
420 
421 	rw_enter(&i_mac_impl_lock, RW_WRITER);
422 	if (mod_hash_insert(i_mac_impl_hash,
423 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
424 		rw_exit(&i_mac_impl_lock);
425 		err = EEXIST;
426 		goto fail;
427 	}
428 
429 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
430 	    (mac_impl_t *), mip);
431 
432 	/*
433 	 * Mark the MAC to be ready for open.
434 	 */
435 	mip->mi_state_flags &= ~MIS_DISABLED;
436 	rw_exit(&i_mac_impl_lock);
437 
438 	atomic_inc_32(&i_mac_impl_count);
439 
440 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
441 	*mhp = (mac_handle_t)mip;
442 	return (0);
443 
444 fail:
445 	if (style1_created)
446 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
447 
448 	if (style2_created)
449 		ddi_remove_minor_node(mip->mi_dip, driver);
450 
451 	mac_addr_factory_fini(mip);
452 
453 	/* Clean up registered MAC addresses */
454 	mac_fini_macaddr(mip);
455 
456 	/* Clean up registered rings */
457 	mac_free_rings(mip, MAC_RING_TYPE_RX);
458 	mac_free_rings(mip, MAC_RING_TYPE_TX);
459 
460 	/* Clean up notification thread */
461 	if (mip->mi_notify_thread != NULL)
462 		i_mac_notify_exit(mip);
463 
464 	if (mip->mi_info.mi_unicst_addr != NULL) {
465 		kmem_free(mip->mi_info.mi_unicst_addr,
466 		    mip->mi_type->mt_addr_length);
467 		mip->mi_info.mi_unicst_addr = NULL;
468 	}
469 
470 	mac_stat_destroy(mip);
471 
472 	if (mip->mi_type != NULL) {
473 		atomic_dec_32(&mip->mi_type->mt_ref);
474 		mip->mi_type = NULL;
475 	}
476 
477 	if (mip->mi_pdata != NULL) {
478 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
479 		mip->mi_pdata = NULL;
480 		mip->mi_pdata_size = 0;
481 	}
482 
483 	if (minor != 0) {
484 		ASSERT(minor > MAC_MAX_MINOR);
485 		mac_minor_rele(minor);
486 	}
487 
488 	mac_unregister_priv_prop(mip);
489 
490 	kmem_cache_free(i_mac_impl_cachep, mip);
491 	return (err);
492 }
493 
494 /*
495  * Unregister from the GLDv3 framework
496  */
497 int
498 mac_unregister(mac_handle_t mh)
499 {
500 	int			err;
501 	mac_impl_t		*mip = (mac_impl_t *)mh;
502 	mod_hash_val_t		val;
503 	mac_margin_req_t	*mmr, *nextmmr;
504 
505 	/* Fail the unregister if there are any open references to this mac. */
506 	if ((err = mac_disable_nowait(mh)) != 0)
507 		return (err);
508 
509 	/*
510 	 * Clean up notification thread and wait for it to exit.
511 	 */
512 	i_mac_notify_exit(mip);
513 
514 	i_mac_perim_enter(mip);
515 
516 	/*
517 	 * There is still resource properties configured over this mac.
518 	 */
519 	if (mip->mi_resource_props.mrp_mask != 0)
520 		mac_fastpath_enable((mac_handle_t)mip);
521 
522 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
523 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
524 		ddi_remove_minor_node(mip->mi_dip,
525 		    (char *)ddi_driver_name(mip->mi_dip));
526 	}
527 
528 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
529 	    MIS_EXCLUSIVE));
530 
531 	mac_stat_destroy(mip);
532 
533 	(void) mod_hash_remove(i_mac_impl_hash,
534 	    (mod_hash_key_t)mip->mi_name, &val);
535 	ASSERT(mip == (mac_impl_t *)val);
536 
537 	ASSERT(i_mac_impl_count > 0);
538 	atomic_dec_32(&i_mac_impl_count);
539 
540 	if (mip->mi_pdata != NULL)
541 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
542 	mip->mi_pdata = NULL;
543 	mip->mi_pdata_size = 0;
544 
545 	/*
546 	 * Free the list of margin request.
547 	 */
548 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
549 		nextmmr = mmr->mmr_nextp;
550 		kmem_free(mmr, sizeof (mac_margin_req_t));
551 	}
552 	mip->mi_mmrp = NULL;
553 
554 	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
555 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
556 	mip->mi_info.mi_unicst_addr = NULL;
557 
558 	atomic_dec_32(&mip->mi_type->mt_ref);
559 	mip->mi_type = NULL;
560 
561 	/*
562 	 * Free the primary MAC address.
563 	 */
564 	mac_fini_macaddr(mip);
565 
566 	/*
567 	 * free all rings
568 	 */
569 	mac_free_rings(mip, MAC_RING_TYPE_RX);
570 	mac_free_rings(mip, MAC_RING_TYPE_TX);
571 
572 	mac_addr_factory_fini(mip);
573 
574 	bzero(mip->mi_addr, MAXMACADDRLEN);
575 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
576 
577 	/* and the flows */
578 	mac_flow_tab_destroy(mip->mi_flow_tab);
579 	mip->mi_flow_tab = NULL;
580 
581 	if (mip->mi_minor > MAC_MAX_MINOR)
582 		mac_minor_rele(mip->mi_minor);
583 
584 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
585 
586 	/*
587 	 * Reset the perim related fields to default values before
588 	 * kmem_cache_free
589 	 */
590 	i_mac_perim_exit(mip);
591 	mip->mi_state_flags = 0;
592 
593 	mac_unregister_priv_prop(mip);
594 
595 	ASSERT(mip->mi_bridge_link == NULL);
596 	kmem_cache_free(i_mac_impl_cachep, mip);
597 
598 	return (0);
599 }
600 
601 /* DATA RECEPTION */
602 
603 /*
604  * This function is invoked for packets received by the MAC driver in
605  * interrupt context. The ring generation number provided by the driver
606  * is matched with the ring generation number held in MAC. If they do not
607  * match, received packets are considered stale packets coming from an older
608  * assignment of the ring. Drop them.
609  */
610 void
611 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
612     uint64_t mr_gen_num)
613 {
614 	mac_ring_t		*mr = (mac_ring_t *)mrh;
615 
616 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
617 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
618 		    mr->mr_gen_num, uint64_t, mr_gen_num);
619 		freemsgchain(mp_chain);
620 		return;
621 	}
622 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
623 }
624 
625 /*
626  * This function is invoked for each packet received by the underlying driver.
627  */
628 void
629 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
630 {
631 	mac_impl_t *mip = (mac_impl_t *)mh;
632 
633 	/*
634 	 * Check if the link is part of a bridge.  If not, then we don't need
635 	 * to take the lock to remain consistent.  Make this common case
636 	 * lock-free and tail-call optimized.
637 	 */
638 	if (mip->mi_bridge_link == NULL) {
639 		mac_rx_common(mh, mrh, mp_chain);
640 	} else {
641 		/*
642 		 * Once we take a reference on the bridge link, the bridge
643 		 * module itself can't unload, so the callback pointers are
644 		 * stable.
645 		 */
646 		mutex_enter(&mip->mi_bridge_lock);
647 		if ((mh = mip->mi_bridge_link) != NULL)
648 			mac_bridge_ref_cb(mh, B_TRUE);
649 		mutex_exit(&mip->mi_bridge_lock);
650 		if (mh == NULL) {
651 			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
652 		} else {
653 			mac_bridge_rx_cb(mh, mrh, mp_chain);
654 			mac_bridge_ref_cb(mh, B_FALSE);
655 		}
656 	}
657 }
658 
659 /*
660  * Special case function: this allows snooping of packets transmitted and
661  * received by TRILL. By design, they go directly into the TRILL module.
662  */
663 void
664 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
665 {
666 	mac_impl_t *mip = (mac_impl_t *)mh;
667 
668 	if (mip->mi_promisc_list != NULL)
669 		mac_promisc_dispatch(mip, mp, NULL);
670 }
671 
672 /*
673  * This is the upward reentry point for packets arriving from the bridging
674  * module and from mac_rx for links not part of a bridge.
675  */
676 void
677 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
678 {
679 	mac_impl_t		*mip = (mac_impl_t *)mh;
680 	mac_ring_t		*mr = (mac_ring_t *)mrh;
681 	mac_soft_ring_set_t 	*mac_srs;
682 	mblk_t			*bp = mp_chain;
683 	boolean_t		hw_classified = B_FALSE;
684 
685 	/*
686 	 * If there are any promiscuous mode callbacks defined for
687 	 * this MAC, pass them a copy if appropriate.
688 	 */
689 	if (mip->mi_promisc_list != NULL)
690 		mac_promisc_dispatch(mip, mp_chain, NULL);
691 
692 	if (mr != NULL) {
693 		/*
694 		 * If the SRS teardown has started, just return. The 'mr'
695 		 * continues to be valid until the driver unregisters the mac.
696 		 * Hardware classified packets will not make their way up
697 		 * beyond this point once the teardown has started. The driver
698 		 * is never passed a pointer to a flow entry or SRS or any
699 		 * structure that can be freed much before mac_unregister.
700 		 */
701 		mutex_enter(&mr->mr_lock);
702 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
703 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
704 			mutex_exit(&mr->mr_lock);
705 			freemsgchain(mp_chain);
706 			return;
707 		}
708 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
709 			hw_classified = B_TRUE;
710 			MR_REFHOLD_LOCKED(mr);
711 		}
712 		mutex_exit(&mr->mr_lock);
713 
714 		/*
715 		 * We check if an SRS is controlling this ring.
716 		 * If so, we can directly call the srs_lower_proc
717 		 * routine otherwise we need to go through mac_rx_classify
718 		 * to reach the right place.
719 		 */
720 		if (hw_classified) {
721 			mac_srs = mr->mr_srs;
722 			/*
723 			 * This is supposed to be the fast path.
724 			 * All packets received though here were steered by
725 			 * the hardware classifier, and share the same
726 			 * MAC header info.
727 			 */
728 			mac_srs->srs_rx.sr_lower_proc(mh,
729 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
730 			MR_REFRELE(mr);
731 			return;
732 		}
733 		/* We'll fall through to software classification */
734 	} else {
735 		flow_entry_t *flent;
736 		int err;
737 
738 		rw_enter(&mip->mi_rw_lock, RW_READER);
739 		if (mip->mi_single_active_client != NULL) {
740 			flent = mip->mi_single_active_client->mci_flent_list;
741 			FLOW_TRY_REFHOLD(flent, err);
742 			rw_exit(&mip->mi_rw_lock);
743 			if (err == 0) {
744 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
745 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
746 				FLOW_REFRELE(flent);
747 				return;
748 			}
749 		} else {
750 			rw_exit(&mip->mi_rw_lock);
751 		}
752 	}
753 
754 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
755 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
756 			return;
757 	}
758 
759 	freemsgchain(bp);
760 }
761 
762 /* DATA TRANSMISSION */
763 
764 /*
765  * A driver's notification to resume transmission, in case of a provider
766  * without TX rings.
767  */
768 void
769 mac_tx_update(mac_handle_t mh)
770 {
771 	/*
772 	 * Walk the list of MAC clients (mac_client_handle)
773 	 * and update
774 	 */
775 	i_mac_tx_srs_notify((mac_impl_t *)mh, NULL);
776 }
777 
778 /*
779  * A driver's notification to resume transmission on the specified TX ring.
780  */
781 void
782 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
783 {
784 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
785 }
786 
787 /* LINK STATE */
788 /*
789  * Notify the MAC layer about a link state change
790  */
791 void
792 mac_link_update(mac_handle_t mh, link_state_t link)
793 {
794 	mac_impl_t	*mip = (mac_impl_t *)mh;
795 
796 	/*
797 	 * Save the link state.
798 	 */
799 	mip->mi_lowlinkstate = link;
800 
801 	/*
802 	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
803 	 * thread to deliver both lower and upper notifications.
804 	 */
805 	i_mac_notify(mip, MAC_NOTE_LOWLINK);
806 }
807 
808 /*
809  * Notify the MAC layer about a link state change due to bridging.
810  */
811 void
812 mac_link_redo(mac_handle_t mh, link_state_t link)
813 {
814 	mac_impl_t	*mip = (mac_impl_t *)mh;
815 
816 	/*
817 	 * Save the link state.
818 	 */
819 	mip->mi_linkstate = link;
820 
821 	/*
822 	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
823 	 * made.
824 	 */
825 	i_mac_notify(mip, MAC_NOTE_LINK);
826 }
827 
828 /* OTHER CONTROL INFORMATION */
829 
830 /*
831  * A driver notified us that its primary MAC address has changed.
832  */
833 void
834 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
835 {
836 	mac_impl_t	*mip = (mac_impl_t *)mh;
837 
838 	if (mip->mi_type->mt_addr_length == 0)
839 		return;
840 
841 	i_mac_perim_enter(mip);
842 	/*
843 	 * If address doesn't change, do nothing.
844 	 */
845 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
846 		i_mac_perim_exit(mip);
847 		return;
848 	}
849 
850 	/*
851 	 * Freshen the MAC address value and update all MAC clients that
852 	 * share this MAC address.
853 	 */
854 	mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
855 	    (uint8_t *)addr);
856 
857 	i_mac_perim_exit(mip);
858 
859 	/*
860 	 * Send a MAC_NOTE_UNICST notification.
861 	 */
862 	i_mac_notify(mip, MAC_NOTE_UNICST);
863 }
864 
865 void
866 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
867 {
868 	mac_impl_t	*mip = (mac_impl_t *)mh;
869 
870 	if (mip->mi_type->mt_addr_length == 0)
871 		return;
872 
873 	i_mac_perim_enter(mip);
874 	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
875 	i_mac_perim_exit(mip);
876 	i_mac_notify(mip, MAC_NOTE_DEST);
877 }
878 
879 /*
880  * MAC plugin information changed.
881  */
882 int
883 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
884 {
885 	mac_impl_t	*mip = (mac_impl_t *)mh;
886 
887 	/*
888 	 * Verify that the plugin supports MAC plugin data and that the
889 	 * supplied data is valid.
890 	 */
891 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
892 		return (EINVAL);
893 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
894 		return (EINVAL);
895 
896 	if (mip->mi_pdata != NULL)
897 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
898 
899 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
900 	bcopy(mac_pdata, mip->mi_pdata, dsize);
901 	mip->mi_pdata_size = dsize;
902 
903 	/*
904 	 * Since the MAC plugin data is used to construct MAC headers that
905 	 * were cached in fast-path headers, we need to flush fast-path
906 	 * information for links associated with this mac.
907 	 */
908 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
909 	return (0);
910 }
911 
912 /*
913  * Invoked by driver as well as the framework to notify its capability change.
914  */
915 void
916 mac_capab_update(mac_handle_t mh)
917 {
918 	/* Send MAC_NOTE_CAPAB_CHG notification */
919 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
920 }
921 
922 int
923 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
924 {
925 	mac_impl_t	*mip = (mac_impl_t *)mh;
926 
927 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
928 		return (EINVAL);
929 	mip->mi_sdu_max = sdu_max;
930 
931 	/* Send a MAC_NOTE_SDU_SIZE notification. */
932 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
933 	return (0);
934 }
935 
936 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
937 
938 /*
939  * Updates the mac_impl structure with the current state of the link
940  */
941 static void
942 i_mac_log_link_state(mac_impl_t *mip)
943 {
944 	/*
945 	 * If no change, then it is not interesting.
946 	 */
947 	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
948 		return;
949 
950 	switch (mip->mi_lowlinkstate) {
951 	case LINK_STATE_UP:
952 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
953 			char det[200];
954 
955 			mip->mi_type->mt_ops.mtops_link_details(det,
956 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
957 
958 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
959 		} else {
960 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
961 		}
962 		break;
963 
964 	case LINK_STATE_DOWN:
965 		/*
966 		 * Only transitions from UP to DOWN are interesting
967 		 */
968 		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
969 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
970 		break;
971 
972 	case LINK_STATE_UNKNOWN:
973 		/*
974 		 * This case is normally not interesting.
975 		 */
976 		break;
977 	}
978 	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
979 }
980 
981 /*
982  * Main routine for the callbacks notifications thread
983  */
984 static void
985 i_mac_notify_thread(void *arg)
986 {
987 	mac_impl_t	*mip = arg;
988 	callb_cpr_t	cprinfo;
989 	mac_cb_t	*mcb;
990 	mac_cb_info_t	*mcbi;
991 	mac_notify_cb_t	*mncb;
992 
993 	mcbi = &mip->mi_notify_cb_info;
994 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
995 	    "i_mac_notify_thread");
996 
997 	mutex_enter(mcbi->mcbi_lockp);
998 
999 	for (;;) {
1000 		uint32_t	bits;
1001 		uint32_t	type;
1002 
1003 		bits = mip->mi_notify_bits;
1004 		if (bits == 0) {
1005 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1006 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1007 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1008 			continue;
1009 		}
1010 		mip->mi_notify_bits = 0;
1011 		if ((bits & (1 << MAC_NNOTE)) != 0) {
1012 			/* request to quit */
1013 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1014 			break;
1015 		}
1016 
1017 		mutex_exit(mcbi->mcbi_lockp);
1018 
1019 		/*
1020 		 * Log link changes on the actual link, but then do reports on
1021 		 * synthetic state (if part of a bridge).
1022 		 */
1023 		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1024 			link_state_t newstate;
1025 			mac_handle_t mh;
1026 
1027 			i_mac_log_link_state(mip);
1028 			newstate = mip->mi_lowlinkstate;
1029 			if (mip->mi_bridge_link != NULL) {
1030 				mutex_enter(&mip->mi_bridge_lock);
1031 				if ((mh = mip->mi_bridge_link) != NULL) {
1032 					newstate = mac_bridge_ls_cb(mh,
1033 					    newstate);
1034 				}
1035 				mutex_exit(&mip->mi_bridge_lock);
1036 			}
1037 			if (newstate != mip->mi_linkstate) {
1038 				mip->mi_linkstate = newstate;
1039 				bits |= 1 << MAC_NOTE_LINK;
1040 			}
1041 		}
1042 
1043 		/*
1044 		 * Do notification callbacks for each notification type.
1045 		 */
1046 		for (type = 0; type < MAC_NNOTE; type++) {
1047 			if ((bits & (1 << type)) == 0) {
1048 				continue;
1049 			}
1050 
1051 			if (mac_notify_cb_list[type] != NULL)
1052 				(*mac_notify_cb_list[type])(mip);
1053 
1054 			/*
1055 			 * Walk the list of notifications.
1056 			 */
1057 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1058 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1059 			    mcb = mcb->mcb_nextp) {
1060 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1061 				mncb->mncb_fn(mncb->mncb_arg, type);
1062 			}
1063 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1064 			    &mip->mi_notify_cb_list);
1065 		}
1066 
1067 		mutex_enter(mcbi->mcbi_lockp);
1068 	}
1069 
1070 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1071 	cv_broadcast(&mcbi->mcbi_cv);
1072 
1073 	/* CALLB_CPR_EXIT drops the lock */
1074 	CALLB_CPR_EXIT(&cprinfo);
1075 	thread_exit();
1076 }
1077 
1078 /*
1079  * Signal the i_mac_notify_thread asking it to quit.
1080  * Then wait till it is done.
1081  */
1082 void
1083 i_mac_notify_exit(mac_impl_t *mip)
1084 {
1085 	mac_cb_info_t	*mcbi;
1086 
1087 	mcbi = &mip->mi_notify_cb_info;
1088 
1089 	mutex_enter(mcbi->mcbi_lockp);
1090 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1091 	cv_broadcast(&mcbi->mcbi_cv);
1092 
1093 
1094 	while ((mip->mi_notify_thread != NULL) &&
1095 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1096 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1097 	}
1098 
1099 	/* Necessary clean up before doing kmem_cache_free */
1100 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1101 	mip->mi_notify_bits = 0;
1102 	mip->mi_notify_thread = NULL;
1103 	mutex_exit(mcbi->mcbi_lockp);
1104 }
1105 
1106 /*
1107  * Entry point invoked by drivers to dynamically add a ring to an
1108  * existing group.
1109  */
1110 int
1111 mac_group_add_ring(mac_group_handle_t gh, int index)
1112 {
1113 	mac_group_t *group = (mac_group_t *)gh;
1114 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1115 	int ret;
1116 
1117 	i_mac_perim_enter(mip);
1118 
1119 	/*
1120 	 * Only RX rings can be added or removed by drivers currently.
1121 	 */
1122 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1123 
1124 	ret = i_mac_group_add_ring(group, NULL, index);
1125 
1126 	i_mac_perim_exit(mip);
1127 
1128 	return (ret);
1129 }
1130 
1131 /*
1132  * Entry point invoked by drivers to dynamically remove a ring
1133  * from an existing group. The specified ring handle must no longer
1134  * be used by the driver after a call to this function.
1135  */
1136 void
1137 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1138 {
1139 	mac_group_t *group = (mac_group_t *)gh;
1140 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1141 
1142 	i_mac_perim_enter(mip);
1143 
1144 	/*
1145 	 * Only RX rings can be added or removed by drivers currently.
1146 	 */
1147 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1148 
1149 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1150 
1151 	i_mac_perim_exit(mip);
1152 }
1153