xref: /titanic_50/usr/src/uts/common/io/mac/mac_provider.c (revision d6c01e8c77db27063de6c97d78a1818ad9030343)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/conf.h>
29 #include <sys/id_space.h>
30 #include <sys/esunddi.h>
31 #include <sys/stat.h>
32 #include <sys/mkdev.h>
33 #include <sys/stream.h>
34 #include <sys/strsubr.h>
35 #include <sys/dlpi.h>
36 #include <sys/modhash.h>
37 #include <sys/mac.h>
38 #include <sys/mac_provider.h>
39 #include <sys/mac_impl.h>
40 #include <sys/mac_client_impl.h>
41 #include <sys/mac_client_priv.h>
42 #include <sys/mac_soft_ring.h>
43 #include <sys/dld.h>
44 #include <sys/modctl.h>
45 #include <sys/fs/dv_node.h>
46 #include <sys/thread.h>
47 #include <sys/proc.h>
48 #include <sys/callb.h>
49 #include <sys/cpuvar.h>
50 #include <sys/atomic.h>
51 #include <sys/sdt.h>
52 #include <sys/mac_flow.h>
53 #include <sys/ddi_intr_impl.h>
54 #include <sys/disp.h>
55 #include <sys/sdt.h>
56 
57 /*
58  * MAC Provider Interface.
59  *
60  * Interface for GLDv3 compatible NIC drivers.
61  */
62 
63 static void i_mac_notify_thread(void *);
64 
65 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
66 
67 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
68 	mac_fanout_recompute,	/* MAC_NOTE_LINK */
69 	NULL,		/* MAC_NOTE_UNICST */
70 	NULL,		/* MAC_NOTE_TX */
71 	NULL,		/* MAC_NOTE_DEVPROMISC */
72 	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
73 	NULL,		/* MAC_NOTE_SDU_SIZE */
74 	NULL,		/* MAC_NOTE_MARGIN */
75 	NULL,		/* MAC_NOTE_CAPAB_CHG */
76 	NULL		/* MAC_NOTE_LOWLINK */
77 };
78 
79 /*
80  * Driver support functions.
81  */
82 
83 /* REGISTRATION */
84 
85 mac_register_t *
86 mac_alloc(uint_t mac_version)
87 {
88 	mac_register_t *mregp;
89 
90 	/*
91 	 * Make sure there isn't a version mismatch between the driver and
92 	 * the framework.  In the future, if multiple versions are
93 	 * supported, this check could become more sophisticated.
94 	 */
95 	if (mac_version != MAC_VERSION)
96 		return (NULL);
97 
98 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
99 	mregp->m_version = mac_version;
100 	return (mregp);
101 }
102 
103 void
104 mac_free(mac_register_t *mregp)
105 {
106 	kmem_free(mregp, sizeof (mac_register_t));
107 }
108 
109 /*
110  * mac_register() is how drivers register new MACs with the GLDv3
111  * framework.  The mregp argument is allocated by drivers using the
112  * mac_alloc() function, and can be freed using mac_free() immediately upon
113  * return from mac_register().  Upon success (0 return value), the mhp
114  * opaque pointer becomes the driver's handle to its MAC interface, and is
115  * the argument to all other mac module entry points.
116  */
117 /* ARGSUSED */
118 int
119 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
120 {
121 	mac_impl_t		*mip;
122 	mactype_t		*mtype;
123 	int			err = EINVAL;
124 	struct devnames		*dnp = NULL;
125 	uint_t			instance;
126 	boolean_t		style1_created = B_FALSE;
127 	boolean_t		style2_created = B_FALSE;
128 	char			*driver;
129 	minor_t			minor = 0;
130 
131 	/* Find the required MAC-Type plugin. */
132 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
133 		return (EINVAL);
134 
135 	/* Create a mac_impl_t to represent this MAC. */
136 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
137 
138 	/*
139 	 * The mac is not ready for open yet.
140 	 */
141 	mip->mi_state_flags |= MIS_DISABLED;
142 
143 	/*
144 	 * When a mac is registered, the m_instance field can be set to:
145 	 *
146 	 *  0:	Get the mac's instance number from m_dip.
147 	 *	This is usually used for physical device dips.
148 	 *
149 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
150 	 *	For example, when an aggregation is created with the key option,
151 	 *	"key" will be used as the instance number.
152 	 *
153 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
154 	 *	This is often used when a MAC of a virtual link is registered
155 	 *	(e.g., aggregation when "key" is not specified, or vnic).
156 	 *
157 	 * Note that the instance number is used to derive the mi_minor field
158 	 * of mac_impl_t, which will then be used to derive the name of kstats
159 	 * and the devfs nodes.  The first 2 cases are needed to preserve
160 	 * backward compatibility.
161 	 */
162 	switch (mregp->m_instance) {
163 	case 0:
164 		instance = ddi_get_instance(mregp->m_dip);
165 		break;
166 	case ((uint_t)-1):
167 		minor = mac_minor_hold(B_TRUE);
168 		if (minor == 0) {
169 			err = ENOSPC;
170 			goto fail;
171 		}
172 		instance = minor - 1;
173 		break;
174 	default:
175 		instance = mregp->m_instance;
176 		if (instance >= MAC_MAX_MINOR) {
177 			err = EINVAL;
178 			goto fail;
179 		}
180 		break;
181 	}
182 
183 	mip->mi_minor = (minor_t)(instance + 1);
184 	mip->mi_dip = mregp->m_dip;
185 	mip->mi_clients_list = NULL;
186 	mip->mi_nclients = 0;
187 
188 	/* Set the default IEEE Port VLAN Identifier */
189 	mip->mi_pvid = 1;
190 
191 	/* Default bridge link learning protection values */
192 	mip->mi_llimit = 1000;
193 	mip->mi_ldecay = 200;
194 
195 	driver = (char *)ddi_driver_name(mip->mi_dip);
196 
197 	/* Construct the MAC name as <drvname><instance> */
198 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
199 	    driver, instance);
200 
201 	mip->mi_driver = mregp->m_driver;
202 
203 	mip->mi_type = mtype;
204 	mip->mi_margin = mregp->m_margin;
205 	mip->mi_info.mi_media = mtype->mt_type;
206 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
207 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
208 		goto fail;
209 	mip->mi_sdu_min = mregp->m_min_sdu;
210 	mip->mi_sdu_max = mregp->m_max_sdu;
211 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
212 	/*
213 	 * If the media supports a broadcast address, cache a pointer to it
214 	 * in the mac_info_t so that upper layers can use it.
215 	 */
216 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
217 
218 	mip->mi_v12n_level = mregp->m_v12n;
219 
220 	/*
221 	 * Copy the unicast source address into the mac_info_t, but only if
222 	 * the MAC-Type defines a non-zero address length.  We need to
223 	 * handle MAC-Types that have an address length of 0
224 	 * (point-to-point protocol MACs for example).
225 	 */
226 	if (mip->mi_type->mt_addr_length > 0) {
227 		if (mregp->m_src_addr == NULL)
228 			goto fail;
229 		mip->mi_info.mi_unicst_addr =
230 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
231 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
232 		    mip->mi_type->mt_addr_length);
233 
234 		/*
235 		 * Copy the fixed 'factory' MAC address from the immutable
236 		 * info.  This is taken to be the MAC address currently in
237 		 * use.
238 		 */
239 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
240 		    mip->mi_type->mt_addr_length);
241 
242 		/*
243 		 * At this point, we should set up the classification
244 		 * rules etc but we delay it till mac_open() so that
245 		 * the resource discovery has taken place and we
246 		 * know someone wants to use the device. Otherwise
247 		 * memory gets allocated for Rx ring structures even
248 		 * during probe.
249 		 */
250 
251 		/* Copy the destination address if one is provided. */
252 		if (mregp->m_dst_addr != NULL) {
253 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
254 			    mip->mi_type->mt_addr_length);
255 			mip->mi_dstaddr_set = B_TRUE;
256 		}
257 	} else if (mregp->m_src_addr != NULL) {
258 		goto fail;
259 	}
260 
261 	/*
262 	 * The format of the m_pdata is specific to the plugin.  It is
263 	 * passed in as an argument to all of the plugin callbacks.  The
264 	 * driver can update this information by calling
265 	 * mac_pdata_update().
266 	 */
267 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
268 		/*
269 		 * Verify if the supplied plugin data is valid.  Note that
270 		 * even if the caller passed in a NULL pointer as plugin data,
271 		 * we still need to verify if that's valid as the plugin may
272 		 * require plugin data to function.
273 		 */
274 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
275 		    mregp->m_pdata_size)) {
276 			goto fail;
277 		}
278 		if (mregp->m_pdata != NULL) {
279 			mip->mi_pdata =
280 			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
281 			bcopy(mregp->m_pdata, mip->mi_pdata,
282 			    mregp->m_pdata_size);
283 			mip->mi_pdata_size = mregp->m_pdata_size;
284 		}
285 	} else if (mregp->m_pdata != NULL) {
286 		/*
287 		 * The caller supplied non-NULL plugin data, but the plugin
288 		 * does not recognize plugin data.
289 		 */
290 		err = EINVAL;
291 		goto fail;
292 	}
293 
294 	/*
295 	 * Register the private properties.
296 	 */
297 	mac_register_priv_prop(mip, mregp->m_priv_props,
298 	    mregp->m_priv_prop_count);
299 
300 	/*
301 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
302 	 * check to make sure all mandatory callbacks are set.
303 	 */
304 	if (mregp->m_callbacks->mc_getstat == NULL ||
305 	    mregp->m_callbacks->mc_start == NULL ||
306 	    mregp->m_callbacks->mc_stop == NULL ||
307 	    mregp->m_callbacks->mc_setpromisc == NULL ||
308 	    mregp->m_callbacks->mc_multicst == NULL) {
309 		goto fail;
310 	}
311 	mip->mi_callbacks = mregp->m_callbacks;
312 
313 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
314 	    &mip->mi_capab_legacy)) {
315 		mip->mi_state_flags |= MIS_LEGACY;
316 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
317 	} else {
318 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
319 		    mip->mi_minor);
320 	}
321 
322 	/*
323 	 * Allocate a notification thread. thread_create blocks for memory
324 	 * if needed, it never fails.
325 	 */
326 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
327 	    mip, 0, &p0, TS_RUN, minclsyspri);
328 
329 	/*
330 	 * Initialize the capabilities
331 	 */
332 
333 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
334 		mip->mi_state_flags |= MIS_IS_VNIC;
335 
336 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
337 		mip->mi_state_flags |= MIS_IS_AGGR;
338 
339 	mac_addr_factory_init(mip);
340 
341 	/*
342 	 * Enforce the virtrualization level registered.
343 	 */
344 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
345 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
346 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
347 			goto fail;
348 
349 		/*
350 		 * The driver needs to register at least rx rings for this
351 		 * virtualization level.
352 		 */
353 		if (mip->mi_rx_groups == NULL)
354 			goto fail;
355 	}
356 
357 	/*
358 	 * The driver must set mc_unicst entry point to NULL when it advertises
359 	 * CAP_RINGS for rx groups.
360 	 */
361 	if (mip->mi_rx_groups != NULL) {
362 		if (mregp->m_callbacks->mc_unicst != NULL)
363 			goto fail;
364 	} else {
365 		if (mregp->m_callbacks->mc_unicst == NULL)
366 			goto fail;
367 	}
368 
369 	/*
370 	 * The driver must set mc_tx entry point to NULL when it advertises
371 	 * CAP_RINGS for tx rings.
372 	 */
373 	if (mip->mi_tx_groups != NULL) {
374 		if (mregp->m_callbacks->mc_tx != NULL)
375 			goto fail;
376 	} else {
377 		if (mregp->m_callbacks->mc_tx == NULL)
378 			goto fail;
379 	}
380 
381 	/*
382 	 * Initialize MAC addresses. Must be called after mac_init_rings().
383 	 */
384 	mac_init_macaddr(mip);
385 
386 	mip->mi_share_capab.ms_snum = 0;
387 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
388 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
389 		    &mip->mi_share_capab);
390 	}
391 
392 	/*
393 	 * Initialize the kstats for this device.
394 	 */
395 	mac_stat_create(mip);
396 
397 	/* Zero out any properties. */
398 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
399 
400 	/* set the gldv3 flag in dn_flags */
401 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
402 	LOCK_DEV_OPS(&dnp->dn_lock);
403 	dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
404 	UNLOCK_DEV_OPS(&dnp->dn_lock);
405 
406 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
407 		/* Create a style-2 DLPI device */
408 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
409 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
410 			goto fail;
411 		style2_created = B_TRUE;
412 
413 		/* Create a style-1 DLPI device */
414 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
415 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
416 			goto fail;
417 		style1_created = B_TRUE;
418 	}
419 
420 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
421 
422 	rw_enter(&i_mac_impl_lock, RW_WRITER);
423 	if (mod_hash_insert(i_mac_impl_hash,
424 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
425 		rw_exit(&i_mac_impl_lock);
426 		err = EEXIST;
427 		goto fail;
428 	}
429 
430 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
431 	    (mac_impl_t *), mip);
432 
433 	/*
434 	 * Mark the MAC to be ready for open.
435 	 */
436 	mip->mi_state_flags &= ~MIS_DISABLED;
437 	rw_exit(&i_mac_impl_lock);
438 
439 	atomic_inc_32(&i_mac_impl_count);
440 
441 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
442 	*mhp = (mac_handle_t)mip;
443 	return (0);
444 
445 fail:
446 	if (style1_created)
447 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
448 
449 	if (style2_created)
450 		ddi_remove_minor_node(mip->mi_dip, driver);
451 
452 	mac_addr_factory_fini(mip);
453 
454 	/* Clean up registered MAC addresses */
455 	mac_fini_macaddr(mip);
456 
457 	/* Clean up registered rings */
458 	mac_free_rings(mip, MAC_RING_TYPE_RX);
459 	mac_free_rings(mip, MAC_RING_TYPE_TX);
460 
461 	/* Clean up notification thread */
462 	if (mip->mi_notify_thread != NULL)
463 		i_mac_notify_exit(mip);
464 
465 	if (mip->mi_info.mi_unicst_addr != NULL) {
466 		kmem_free(mip->mi_info.mi_unicst_addr,
467 		    mip->mi_type->mt_addr_length);
468 		mip->mi_info.mi_unicst_addr = NULL;
469 	}
470 
471 	mac_stat_destroy(mip);
472 
473 	if (mip->mi_type != NULL) {
474 		atomic_dec_32(&mip->mi_type->mt_ref);
475 		mip->mi_type = NULL;
476 	}
477 
478 	if (mip->mi_pdata != NULL) {
479 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
480 		mip->mi_pdata = NULL;
481 		mip->mi_pdata_size = 0;
482 	}
483 
484 	if (minor != 0) {
485 		ASSERT(minor > MAC_MAX_MINOR);
486 		mac_minor_rele(minor);
487 	}
488 
489 	mac_unregister_priv_prop(mip);
490 
491 	kmem_cache_free(i_mac_impl_cachep, mip);
492 	return (err);
493 }
494 
495 /*
496  * Unregister from the GLDv3 framework
497  */
498 int
499 mac_unregister(mac_handle_t mh)
500 {
501 	int			err;
502 	mac_impl_t		*mip = (mac_impl_t *)mh;
503 	mod_hash_val_t		val;
504 	mac_margin_req_t	*mmr, *nextmmr;
505 
506 	/* Fail the unregister if there are any open references to this mac. */
507 	if ((err = mac_disable_nowait(mh)) != 0)
508 		return (err);
509 
510 	/*
511 	 * Clean up notification thread and wait for it to exit.
512 	 */
513 	i_mac_notify_exit(mip);
514 
515 	i_mac_perim_enter(mip);
516 
517 	/*
518 	 * There is still resource properties configured over this mac.
519 	 */
520 	if (mip->mi_resource_props.mrp_mask != 0)
521 		mac_fastpath_enable((mac_handle_t)mip);
522 
523 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
524 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
525 		ddi_remove_minor_node(mip->mi_dip,
526 		    (char *)ddi_driver_name(mip->mi_dip));
527 	}
528 
529 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
530 	    MIS_EXCLUSIVE));
531 
532 	mac_stat_destroy(mip);
533 
534 	(void) mod_hash_remove(i_mac_impl_hash,
535 	    (mod_hash_key_t)mip->mi_name, &val);
536 	ASSERT(mip == (mac_impl_t *)val);
537 
538 	ASSERT(i_mac_impl_count > 0);
539 	atomic_dec_32(&i_mac_impl_count);
540 
541 	if (mip->mi_pdata != NULL)
542 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
543 	mip->mi_pdata = NULL;
544 	mip->mi_pdata_size = 0;
545 
546 	/*
547 	 * Free the list of margin request.
548 	 */
549 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
550 		nextmmr = mmr->mmr_nextp;
551 		kmem_free(mmr, sizeof (mac_margin_req_t));
552 	}
553 	mip->mi_mmrp = NULL;
554 
555 	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
556 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
557 	mip->mi_info.mi_unicst_addr = NULL;
558 
559 	atomic_dec_32(&mip->mi_type->mt_ref);
560 	mip->mi_type = NULL;
561 
562 	/*
563 	 * Free the primary MAC address.
564 	 */
565 	mac_fini_macaddr(mip);
566 
567 	/*
568 	 * free all rings
569 	 */
570 	mac_free_rings(mip, MAC_RING_TYPE_RX);
571 	mac_free_rings(mip, MAC_RING_TYPE_TX);
572 
573 	mac_addr_factory_fini(mip);
574 
575 	bzero(mip->mi_addr, MAXMACADDRLEN);
576 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
577 
578 	/* and the flows */
579 	mac_flow_tab_destroy(mip->mi_flow_tab);
580 	mip->mi_flow_tab = NULL;
581 
582 	if (mip->mi_minor > MAC_MAX_MINOR)
583 		mac_minor_rele(mip->mi_minor);
584 
585 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
586 
587 	/*
588 	 * Reset the perim related fields to default values before
589 	 * kmem_cache_free
590 	 */
591 	i_mac_perim_exit(mip);
592 	mip->mi_state_flags = 0;
593 
594 	mac_unregister_priv_prop(mip);
595 
596 	ASSERT(mip->mi_bridge_link == NULL);
597 	kmem_cache_free(i_mac_impl_cachep, mip);
598 
599 	return (0);
600 }
601 
602 /* DATA RECEPTION */
603 
604 /*
605  * This function is invoked for packets received by the MAC driver in
606  * interrupt context. The ring generation number provided by the driver
607  * is matched with the ring generation number held in MAC. If they do not
608  * match, received packets are considered stale packets coming from an older
609  * assignment of the ring. Drop them.
610  */
611 void
612 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
613     uint64_t mr_gen_num)
614 {
615 	mac_ring_t		*mr = (mac_ring_t *)mrh;
616 
617 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
618 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
619 		    mr->mr_gen_num, uint64_t, mr_gen_num);
620 		freemsgchain(mp_chain);
621 		return;
622 	}
623 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
624 }
625 
626 /*
627  * This function is invoked for each packet received by the underlying driver.
628  */
629 void
630 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
631 {
632 	mac_impl_t *mip = (mac_impl_t *)mh;
633 
634 	/*
635 	 * Check if the link is part of a bridge.  If not, then we don't need
636 	 * to take the lock to remain consistent.  Make this common case
637 	 * lock-free and tail-call optimized.
638 	 */
639 	if (mip->mi_bridge_link == NULL) {
640 		mac_rx_common(mh, mrh, mp_chain);
641 	} else {
642 		/*
643 		 * Once we take a reference on the bridge link, the bridge
644 		 * module itself can't unload, so the callback pointers are
645 		 * stable.
646 		 */
647 		mutex_enter(&mip->mi_bridge_lock);
648 		if ((mh = mip->mi_bridge_link) != NULL)
649 			mac_bridge_ref_cb(mh, B_TRUE);
650 		mutex_exit(&mip->mi_bridge_lock);
651 		if (mh == NULL) {
652 			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
653 		} else {
654 			mac_bridge_rx_cb(mh, mrh, mp_chain);
655 			mac_bridge_ref_cb(mh, B_FALSE);
656 		}
657 	}
658 }
659 
660 /*
661  * Special case function: this allows snooping of packets transmitted and
662  * received by TRILL. By design, they go directly into the TRILL module.
663  */
664 void
665 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
666 {
667 	mac_impl_t *mip = (mac_impl_t *)mh;
668 
669 	if (mip->mi_promisc_list != NULL)
670 		mac_promisc_dispatch(mip, mp, NULL);
671 }
672 
673 /*
674  * This is the upward reentry point for packets arriving from the bridging
675  * module and from mac_rx for links not part of a bridge.
676  */
677 void
678 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
679 {
680 	mac_impl_t		*mip = (mac_impl_t *)mh;
681 	mac_ring_t		*mr = (mac_ring_t *)mrh;
682 	mac_soft_ring_set_t 	*mac_srs;
683 	mblk_t			*bp = mp_chain;
684 	boolean_t		hw_classified = B_FALSE;
685 
686 	/*
687 	 * If there are any promiscuous mode callbacks defined for
688 	 * this MAC, pass them a copy if appropriate.
689 	 */
690 	if (mip->mi_promisc_list != NULL)
691 		mac_promisc_dispatch(mip, mp_chain, NULL);
692 
693 	if (mr != NULL) {
694 		/*
695 		 * If the SRS teardown has started, just return. The 'mr'
696 		 * continues to be valid until the driver unregisters the mac.
697 		 * Hardware classified packets will not make their way up
698 		 * beyond this point once the teardown has started. The driver
699 		 * is never passed a pointer to a flow entry or SRS or any
700 		 * structure that can be freed much before mac_unregister.
701 		 */
702 		mutex_enter(&mr->mr_lock);
703 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
704 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
705 			mutex_exit(&mr->mr_lock);
706 			freemsgchain(mp_chain);
707 			return;
708 		}
709 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
710 			hw_classified = B_TRUE;
711 			MR_REFHOLD_LOCKED(mr);
712 		}
713 		mutex_exit(&mr->mr_lock);
714 
715 		/*
716 		 * We check if an SRS is controlling this ring.
717 		 * If so, we can directly call the srs_lower_proc
718 		 * routine otherwise we need to go through mac_rx_classify
719 		 * to reach the right place.
720 		 */
721 		if (hw_classified) {
722 			mac_srs = mr->mr_srs;
723 			/*
724 			 * This is supposed to be the fast path.
725 			 * All packets received though here were steered by
726 			 * the hardware classifier, and share the same
727 			 * MAC header info.
728 			 */
729 			mac_srs->srs_rx.sr_lower_proc(mh,
730 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
731 			MR_REFRELE(mr);
732 			return;
733 		}
734 		/* We'll fall through to software classification */
735 	} else {
736 		flow_entry_t *flent;
737 		int err;
738 
739 		rw_enter(&mip->mi_rw_lock, RW_READER);
740 		if (mip->mi_single_active_client != NULL) {
741 			flent = mip->mi_single_active_client->mci_flent_list;
742 			FLOW_TRY_REFHOLD(flent, err);
743 			rw_exit(&mip->mi_rw_lock);
744 			if (err == 0) {
745 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
746 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
747 				FLOW_REFRELE(flent);
748 				return;
749 			}
750 		} else {
751 			rw_exit(&mip->mi_rw_lock);
752 		}
753 	}
754 
755 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
756 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
757 			return;
758 	}
759 
760 	freemsgchain(bp);
761 }
762 
763 /* DATA TRANSMISSION */
764 
765 /*
766  * A driver's notification to resume transmission, in case of a provider
767  * without TX rings.
768  */
769 void
770 mac_tx_update(mac_handle_t mh)
771 {
772 	/*
773 	 * Walk the list of MAC clients (mac_client_handle)
774 	 * and update
775 	 */
776 	i_mac_tx_srs_notify((mac_impl_t *)mh, NULL);
777 }
778 
779 /*
780  * A driver's notification to resume transmission on the specified TX ring.
781  */
782 void
783 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
784 {
785 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
786 }
787 
788 /* LINK STATE */
789 /*
790  * Notify the MAC layer about a link state change
791  */
792 void
793 mac_link_update(mac_handle_t mh, link_state_t link)
794 {
795 	mac_impl_t	*mip = (mac_impl_t *)mh;
796 
797 	/*
798 	 * Save the link state.
799 	 */
800 	mip->mi_lowlinkstate = link;
801 
802 	/*
803 	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
804 	 * thread to deliver both lower and upper notifications.
805 	 */
806 	i_mac_notify(mip, MAC_NOTE_LOWLINK);
807 }
808 
809 /*
810  * Notify the MAC layer about a link state change due to bridging.
811  */
812 void
813 mac_link_redo(mac_handle_t mh, link_state_t link)
814 {
815 	mac_impl_t	*mip = (mac_impl_t *)mh;
816 
817 	/*
818 	 * Save the link state.
819 	 */
820 	mip->mi_linkstate = link;
821 
822 	/*
823 	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
824 	 * made.
825 	 */
826 	i_mac_notify(mip, MAC_NOTE_LINK);
827 }
828 
829 /* MINOR NODE HANDLING */
830 
831 /*
832  * Given a dev_t, return the instance number (PPA) associated with it.
833  * Drivers can use this in their getinfo(9e) implementation to lookup
834  * the instance number (i.e. PPA) of the device, to use as an index to
835  * their own array of soft state structures.
836  *
837  * Returns -1 on error.
838  */
839 int
840 mac_devt_to_instance(dev_t devt)
841 {
842 	return (dld_devt_to_instance(devt));
843 }
844 
845 /*
846  * This function returns the first minor number that is available for
847  * driver private use.  All minor numbers smaller than this are
848  * reserved for GLDv3 use.
849  */
850 minor_t
851 mac_private_minor(void)
852 {
853 	return (MAC_PRIVATE_MINOR);
854 }
855 
856 /* OTHER CONTROL INFORMATION */
857 
858 /*
859  * A driver notified us that its primary MAC address has changed.
860  */
861 void
862 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
863 {
864 	mac_impl_t	*mip = (mac_impl_t *)mh;
865 
866 	if (mip->mi_type->mt_addr_length == 0)
867 		return;
868 
869 	i_mac_perim_enter(mip);
870 	/*
871 	 * If address doesn't change, do nothing.
872 	 */
873 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
874 		i_mac_perim_exit(mip);
875 		return;
876 	}
877 
878 	/*
879 	 * Freshen the MAC address value and update all MAC clients that
880 	 * share this MAC address.
881 	 */
882 	mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
883 	    (uint8_t *)addr);
884 
885 	i_mac_perim_exit(mip);
886 
887 	/*
888 	 * Send a MAC_NOTE_UNICST notification.
889 	 */
890 	i_mac_notify(mip, MAC_NOTE_UNICST);
891 }
892 
893 void
894 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
895 {
896 	mac_impl_t	*mip = (mac_impl_t *)mh;
897 
898 	if (mip->mi_type->mt_addr_length == 0)
899 		return;
900 
901 	i_mac_perim_enter(mip);
902 	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
903 	i_mac_perim_exit(mip);
904 	i_mac_notify(mip, MAC_NOTE_DEST);
905 }
906 
907 /*
908  * MAC plugin information changed.
909  */
910 int
911 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
912 {
913 	mac_impl_t	*mip = (mac_impl_t *)mh;
914 
915 	/*
916 	 * Verify that the plugin supports MAC plugin data and that the
917 	 * supplied data is valid.
918 	 */
919 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
920 		return (EINVAL);
921 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
922 		return (EINVAL);
923 
924 	if (mip->mi_pdata != NULL)
925 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
926 
927 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
928 	bcopy(mac_pdata, mip->mi_pdata, dsize);
929 	mip->mi_pdata_size = dsize;
930 
931 	/*
932 	 * Since the MAC plugin data is used to construct MAC headers that
933 	 * were cached in fast-path headers, we need to flush fast-path
934 	 * information for links associated with this mac.
935 	 */
936 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
937 	return (0);
938 }
939 
940 /*
941  * Invoked by driver as well as the framework to notify its capability change.
942  */
943 void
944 mac_capab_update(mac_handle_t mh)
945 {
946 	/* Send MAC_NOTE_CAPAB_CHG notification */
947 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
948 }
949 
950 int
951 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
952 {
953 	mac_impl_t	*mip = (mac_impl_t *)mh;
954 
955 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
956 		return (EINVAL);
957 	mip->mi_sdu_max = sdu_max;
958 
959 	/* Send a MAC_NOTE_SDU_SIZE notification. */
960 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
961 	return (0);
962 }
963 
964 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
965 
966 /*
967  * Updates the mac_impl structure with the current state of the link
968  */
969 static void
970 i_mac_log_link_state(mac_impl_t *mip)
971 {
972 	/*
973 	 * If no change, then it is not interesting.
974 	 */
975 	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
976 		return;
977 
978 	switch (mip->mi_lowlinkstate) {
979 	case LINK_STATE_UP:
980 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
981 			char det[200];
982 
983 			mip->mi_type->mt_ops.mtops_link_details(det,
984 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
985 
986 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
987 		} else {
988 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
989 		}
990 		break;
991 
992 	case LINK_STATE_DOWN:
993 		/*
994 		 * Only transitions from UP to DOWN are interesting
995 		 */
996 		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
997 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
998 		break;
999 
1000 	case LINK_STATE_UNKNOWN:
1001 		/*
1002 		 * This case is normally not interesting.
1003 		 */
1004 		break;
1005 	}
1006 	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1007 }
1008 
1009 /*
1010  * Main routine for the callbacks notifications thread
1011  */
1012 static void
1013 i_mac_notify_thread(void *arg)
1014 {
1015 	mac_impl_t	*mip = arg;
1016 	callb_cpr_t	cprinfo;
1017 	mac_cb_t	*mcb;
1018 	mac_cb_info_t	*mcbi;
1019 	mac_notify_cb_t	*mncb;
1020 
1021 	mcbi = &mip->mi_notify_cb_info;
1022 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1023 	    "i_mac_notify_thread");
1024 
1025 	mutex_enter(mcbi->mcbi_lockp);
1026 
1027 	for (;;) {
1028 		uint32_t	bits;
1029 		uint32_t	type;
1030 
1031 		bits = mip->mi_notify_bits;
1032 		if (bits == 0) {
1033 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1034 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1035 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1036 			continue;
1037 		}
1038 		mip->mi_notify_bits = 0;
1039 		if ((bits & (1 << MAC_NNOTE)) != 0) {
1040 			/* request to quit */
1041 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1042 			break;
1043 		}
1044 
1045 		mutex_exit(mcbi->mcbi_lockp);
1046 
1047 		/*
1048 		 * Log link changes on the actual link, but then do reports on
1049 		 * synthetic state (if part of a bridge).
1050 		 */
1051 		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1052 			link_state_t newstate;
1053 			mac_handle_t mh;
1054 
1055 			i_mac_log_link_state(mip);
1056 			newstate = mip->mi_lowlinkstate;
1057 			if (mip->mi_bridge_link != NULL) {
1058 				mutex_enter(&mip->mi_bridge_lock);
1059 				if ((mh = mip->mi_bridge_link) != NULL) {
1060 					newstate = mac_bridge_ls_cb(mh,
1061 					    newstate);
1062 				}
1063 				mutex_exit(&mip->mi_bridge_lock);
1064 			}
1065 			if (newstate != mip->mi_linkstate) {
1066 				mip->mi_linkstate = newstate;
1067 				bits |= 1 << MAC_NOTE_LINK;
1068 			}
1069 		}
1070 
1071 		/*
1072 		 * Do notification callbacks for each notification type.
1073 		 */
1074 		for (type = 0; type < MAC_NNOTE; type++) {
1075 			if ((bits & (1 << type)) == 0) {
1076 				continue;
1077 			}
1078 
1079 			if (mac_notify_cb_list[type] != NULL)
1080 				(*mac_notify_cb_list[type])(mip);
1081 
1082 			/*
1083 			 * Walk the list of notifications.
1084 			 */
1085 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1086 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1087 			    mcb = mcb->mcb_nextp) {
1088 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1089 				mncb->mncb_fn(mncb->mncb_arg, type);
1090 			}
1091 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1092 			    &mip->mi_notify_cb_list);
1093 		}
1094 
1095 		mutex_enter(mcbi->mcbi_lockp);
1096 	}
1097 
1098 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1099 	cv_broadcast(&mcbi->mcbi_cv);
1100 
1101 	/* CALLB_CPR_EXIT drops the lock */
1102 	CALLB_CPR_EXIT(&cprinfo);
1103 	thread_exit();
1104 }
1105 
1106 /*
1107  * Signal the i_mac_notify_thread asking it to quit.
1108  * Then wait till it is done.
1109  */
1110 void
1111 i_mac_notify_exit(mac_impl_t *mip)
1112 {
1113 	mac_cb_info_t	*mcbi;
1114 
1115 	mcbi = &mip->mi_notify_cb_info;
1116 
1117 	mutex_enter(mcbi->mcbi_lockp);
1118 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1119 	cv_broadcast(&mcbi->mcbi_cv);
1120 
1121 
1122 	while ((mip->mi_notify_thread != NULL) &&
1123 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1124 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1125 	}
1126 
1127 	/* Necessary clean up before doing kmem_cache_free */
1128 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1129 	mip->mi_notify_bits = 0;
1130 	mip->mi_notify_thread = NULL;
1131 	mutex_exit(mcbi->mcbi_lockp);
1132 }
1133 
1134 /*
1135  * Entry point invoked by drivers to dynamically add a ring to an
1136  * existing group.
1137  */
1138 int
1139 mac_group_add_ring(mac_group_handle_t gh, int index)
1140 {
1141 	mac_group_t *group = (mac_group_t *)gh;
1142 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1143 	int ret;
1144 
1145 	i_mac_perim_enter(mip);
1146 
1147 	/*
1148 	 * Only RX rings can be added or removed by drivers currently.
1149 	 */
1150 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1151 
1152 	ret = i_mac_group_add_ring(group, NULL, index);
1153 
1154 	i_mac_perim_exit(mip);
1155 
1156 	return (ret);
1157 }
1158 
1159 /*
1160  * Entry point invoked by drivers to dynamically remove a ring
1161  * from an existing group. The specified ring handle must no longer
1162  * be used by the driver after a call to this function.
1163  */
1164 void
1165 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1166 {
1167 	mac_group_t *group = (mac_group_t *)gh;
1168 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1169 
1170 	i_mac_perim_enter(mip);
1171 
1172 	/*
1173 	 * Only RX rings can be added or removed by drivers currently.
1174 	 */
1175 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1176 
1177 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1178 
1179 	i_mac_perim_exit(mip);
1180 }
1181