xref: /titanic_51/usr/src/uts/common/io/mac/mac_provider.c (revision 375b28ffc40c6f03a644dc9310ae2000e73ffd5e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/conf.h>
29 #include <sys/id_space.h>
30 #include <sys/esunddi.h>
31 #include <sys/stat.h>
32 #include <sys/mkdev.h>
33 #include <sys/stream.h>
34 #include <sys/strsubr.h>
35 #include <sys/dlpi.h>
36 #include <sys/modhash.h>
37 #include <sys/mac.h>
38 #include <sys/mac_provider.h>
39 #include <sys/mac_impl.h>
40 #include <sys/mac_client_impl.h>
41 #include <sys/mac_client_priv.h>
42 #include <sys/mac_soft_ring.h>
43 #include <sys/modctl.h>
44 #include <sys/fs/dv_node.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/callb.h>
48 #include <sys/cpuvar.h>
49 #include <sys/atomic.h>
50 #include <sys/sdt.h>
51 #include <sys/mac_flow.h>
52 #include <sys/ddi_intr_impl.h>
53 #include <sys/disp.h>
54 #include <sys/sdt.h>
55 
56 /*
57  * MAC Provider Interface.
58  *
59  * Interface for GLDv3 compatible NIC drivers.
60  */
61 
62 static void i_mac_notify_thread(void *);
63 
64 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
65 
66 typedef struct mac_notify_default_cb_s {
67 	mac_notify_type_t		mac_notify_type;
68 	mac_notify_default_cb_fn_t	mac_notify_cb_fn;
69 }mac_notify_default_cb_t;
70 
71 mac_notify_default_cb_t mac_notify_cb_list[] = {
72 	{ MAC_NOTE_LINK,		mac_fanout_recompute},
73 	{ MAC_NOTE_UNICST,		NULL},
74 	{ MAC_NOTE_TX,			NULL},
75 	{ MAC_NOTE_DEVPROMISC,		NULL},
76 	{ MAC_NOTE_FASTPATH_FLUSH,	NULL},
77 	{ MAC_NOTE_SDU_SIZE,		NULL},
78 	{ MAC_NOTE_MARGIN,		NULL},
79 	{ MAC_NOTE_CAPAB_CHG,		NULL},
80 	{ MAC_NNOTE,			NULL},
81 };
82 
83 /*
84  * Driver support functions.
85  */
86 
87 /* REGISTRATION */
88 
89 mac_register_t *
90 mac_alloc(uint_t mac_version)
91 {
92 	mac_register_t *mregp;
93 
94 	/*
95 	 * Make sure there isn't a version mismatch between the driver and
96 	 * the framework.  In the future, if multiple versions are
97 	 * supported, this check could become more sophisticated.
98 	 */
99 	if (mac_version != MAC_VERSION)
100 		return (NULL);
101 
102 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
103 	mregp->m_version = mac_version;
104 	return (mregp);
105 }
106 
107 void
108 mac_free(mac_register_t *mregp)
109 {
110 	kmem_free(mregp, sizeof (mac_register_t));
111 }
112 
113 /*
114  * mac_register() is how drivers register new MACs with the GLDv3
115  * framework.  The mregp argument is allocated by drivers using the
116  * mac_alloc() function, and can be freed using mac_free() immediately upon
117  * return from mac_register().  Upon success (0 return value), the mhp
118  * opaque pointer becomes the driver's handle to its MAC interface, and is
119  * the argument to all other mac module entry points.
120  */
121 /* ARGSUSED */
122 int
123 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
124 {
125 	mac_impl_t		*mip;
126 	mactype_t		*mtype;
127 	int			err = EINVAL;
128 	struct devnames		*dnp = NULL;
129 	uint_t			instance;
130 	boolean_t		style1_created = B_FALSE;
131 	boolean_t		style2_created = B_FALSE;
132 	char			*driver;
133 	minor_t			minor = 0;
134 
135 	/* Find the required MAC-Type plugin. */
136 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
137 		return (EINVAL);
138 
139 	/* Create a mac_impl_t to represent this MAC. */
140 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
141 
142 	/*
143 	 * The mac is not ready for open yet.
144 	 */
145 	mip->mi_state_flags |= MIS_DISABLED;
146 
147 	/*
148 	 * When a mac is registered, the m_instance field can be set to:
149 	 *
150 	 *  0:	Get the mac's instance number from m_dip.
151 	 *	This is usually used for physical device dips.
152 	 *
153 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
154 	 *	For example, when an aggregation is created with the key option,
155 	 *	"key" will be used as the instance number.
156 	 *
157 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
158 	 *	This is often used when a MAC of a virtual link is registered
159 	 *	(e.g., aggregation when "key" is not specified, or vnic).
160 	 *
161 	 * Note that the instance number is used to derive the mi_minor field
162 	 * of mac_impl_t, which will then be used to derive the name of kstats
163 	 * and the devfs nodes.  The first 2 cases are needed to preserve
164 	 * backward compatibility.
165 	 */
166 	switch (mregp->m_instance) {
167 	case 0:
168 		instance = ddi_get_instance(mregp->m_dip);
169 		break;
170 	case ((uint_t)-1):
171 		minor = mac_minor_hold(B_TRUE);
172 		if (minor == 0) {
173 			err = ENOSPC;
174 			goto fail;
175 		}
176 		instance = minor - 1;
177 		break;
178 	default:
179 		instance = mregp->m_instance;
180 		if (instance >= MAC_MAX_MINOR) {
181 			err = EINVAL;
182 			goto fail;
183 		}
184 		break;
185 	}
186 
187 	mip->mi_minor = (minor_t)(instance + 1);
188 	mip->mi_dip = mregp->m_dip;
189 	mip->mi_clients_list = NULL;
190 	mip->mi_nclients = 0;
191 
192 	driver = (char *)ddi_driver_name(mip->mi_dip);
193 
194 	/* Construct the MAC name as <drvname><instance> */
195 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
196 	    driver, instance);
197 
198 	mip->mi_driver = mregp->m_driver;
199 
200 	mip->mi_type = mtype;
201 	mip->mi_margin = mregp->m_margin;
202 	mip->mi_info.mi_media = mtype->mt_type;
203 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
204 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
205 		goto fail;
206 	mip->mi_sdu_min = mregp->m_min_sdu;
207 	mip->mi_sdu_max = mregp->m_max_sdu;
208 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
209 	/*
210 	 * If the media supports a broadcast address, cache a pointer to it
211 	 * in the mac_info_t so that upper layers can use it.
212 	 */
213 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
214 
215 	mip->mi_v12n_level = mregp->m_v12n;
216 
217 	/*
218 	 * Copy the unicast source address into the mac_info_t, but only if
219 	 * the MAC-Type defines a non-zero address length.  We need to
220 	 * handle MAC-Types that have an address length of 0
221 	 * (point-to-point protocol MACs for example).
222 	 */
223 	if (mip->mi_type->mt_addr_length > 0) {
224 		if (mregp->m_src_addr == NULL)
225 			goto fail;
226 		mip->mi_info.mi_unicst_addr =
227 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
228 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
229 		    mip->mi_type->mt_addr_length);
230 
231 		/*
232 		 * Copy the fixed 'factory' MAC address from the immutable
233 		 * info.  This is taken to be the MAC address currently in
234 		 * use.
235 		 */
236 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
237 		    mip->mi_type->mt_addr_length);
238 
239 		/*
240 		 * At this point, we should set up the classification
241 		 * rules etc but we delay it till mac_open() so that
242 		 * the resource discovery has taken place and we
243 		 * know someone wants to use the device. Otherwise
244 		 * memory gets allocated for Rx ring structures even
245 		 * during probe.
246 		 */
247 
248 		/* Copy the destination address if one is provided. */
249 		if (mregp->m_dst_addr != NULL) {
250 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
251 			    mip->mi_type->mt_addr_length);
252 		}
253 	} else if (mregp->m_src_addr != NULL) {
254 		goto fail;
255 	}
256 
257 	/*
258 	 * The format of the m_pdata is specific to the plugin.  It is
259 	 * passed in as an argument to all of the plugin callbacks.  The
260 	 * driver can update this information by calling
261 	 * mac_pdata_update().
262 	 */
263 	if (mregp->m_pdata != NULL) {
264 		/*
265 		 * Verify that the plugin supports MAC plugin data and that
266 		 * the supplied data is valid.
267 		 */
268 		if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
269 			goto fail;
270 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
271 		    mregp->m_pdata_size)) {
272 			goto fail;
273 		}
274 		mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
275 		bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size);
276 		mip->mi_pdata_size = mregp->m_pdata_size;
277 	}
278 
279 	/*
280 	 * Register the private properties.
281 	 */
282 	mac_register_priv_prop(mip, mregp->m_priv_props,
283 	    mregp->m_priv_prop_count);
284 
285 	/*
286 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
287 	 * check to make sure all mandatory callbacks are set.
288 	 */
289 	if (mregp->m_callbacks->mc_getstat == NULL ||
290 	    mregp->m_callbacks->mc_start == NULL ||
291 	    mregp->m_callbacks->mc_stop == NULL ||
292 	    mregp->m_callbacks->mc_setpromisc == NULL ||
293 	    mregp->m_callbacks->mc_multicst == NULL) {
294 		goto fail;
295 	}
296 	mip->mi_callbacks = mregp->m_callbacks;
297 
298 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
299 	    &mip->mi_capab_legacy)) {
300 		mip->mi_state_flags |= MIS_LEGACY;
301 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
302 	} else {
303 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
304 		    ddi_get_instance(mip->mi_dip) + 1);
305 	}
306 
307 	/*
308 	 * Allocate a notification thread. thread_create blocks for memory
309 	 * if needed, it never fails.
310 	 */
311 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
312 	    mip, 0, &p0, TS_RUN, minclsyspri);
313 
314 	/*
315 	 * Initialize the capabilities
316 	 */
317 
318 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
319 		mip->mi_state_flags |= MIS_IS_VNIC;
320 
321 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
322 		mip->mi_state_flags |= MIS_IS_AGGR;
323 
324 	mac_addr_factory_init(mip);
325 
326 	/*
327 	 * Enforce the virtrualization level registered.
328 	 */
329 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
330 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
331 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
332 			goto fail;
333 
334 		/*
335 		 * The driver needs to register at least rx rings for this
336 		 * virtualization level.
337 		 */
338 		if (mip->mi_rx_groups == NULL)
339 			goto fail;
340 	}
341 
342 	/*
343 	 * The driver must set mc_unicst entry point to NULL when it advertises
344 	 * CAP_RINGS for rx groups.
345 	 */
346 	if (mip->mi_rx_groups != NULL) {
347 		if (mregp->m_callbacks->mc_unicst != NULL)
348 			goto fail;
349 	} else {
350 		if (mregp->m_callbacks->mc_unicst == NULL)
351 			goto fail;
352 	}
353 
354 	/*
355 	 * The driver must set mc_tx entry point to NULL when it advertises
356 	 * CAP_RINGS for tx rings.
357 	 */
358 	if (mip->mi_tx_groups != NULL) {
359 		if (mregp->m_callbacks->mc_tx != NULL)
360 			goto fail;
361 	} else {
362 		if (mregp->m_callbacks->mc_tx == NULL)
363 			goto fail;
364 	}
365 
366 	/*
367 	 * Initialize MAC addresses. Must be called after mac_init_rings().
368 	 */
369 	mac_init_macaddr(mip);
370 
371 	mip->mi_share_capab.ms_snum = 0;
372 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
373 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
374 		    &mip->mi_share_capab);
375 	}
376 
377 	/*
378 	 * Initialize the kstats for this device.
379 	 */
380 	mac_stat_create(mip);
381 
382 	/* Zero out any properties. */
383 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
384 
385 	/* set the gldv3 flag in dn_flags */
386 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
387 	LOCK_DEV_OPS(&dnp->dn_lock);
388 	dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
389 	UNLOCK_DEV_OPS(&dnp->dn_lock);
390 
391 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
392 		/* Create a style-2 DLPI device */
393 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
394 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
395 			goto fail;
396 		style2_created = B_TRUE;
397 
398 		/* Create a style-1 DLPI device */
399 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
400 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
401 			goto fail;
402 		style1_created = B_TRUE;
403 	}
404 
405 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
406 
407 	rw_enter(&i_mac_impl_lock, RW_WRITER);
408 	if (mod_hash_insert(i_mac_impl_hash,
409 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
410 		rw_exit(&i_mac_impl_lock);
411 		err = EEXIST;
412 		goto fail;
413 	}
414 
415 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
416 	    (mac_impl_t *), mip);
417 
418 	/*
419 	 * Mark the MAC to be ready for open.
420 	 */
421 	mip->mi_state_flags &= ~MIS_DISABLED;
422 	rw_exit(&i_mac_impl_lock);
423 
424 	atomic_inc_32(&i_mac_impl_count);
425 
426 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
427 	*mhp = (mac_handle_t)mip;
428 	return (0);
429 
430 fail:
431 	if (style1_created)
432 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
433 
434 	if (style2_created)
435 		ddi_remove_minor_node(mip->mi_dip, driver);
436 
437 	mac_addr_factory_fini(mip);
438 
439 	/* Clean up registered MAC addresses */
440 	mac_fini_macaddr(mip);
441 
442 	/* Clean up registered rings */
443 	mac_free_rings(mip, MAC_RING_TYPE_RX);
444 	mac_free_rings(mip, MAC_RING_TYPE_TX);
445 
446 	/* Clean up notification thread */
447 	if (mip->mi_notify_thread != NULL)
448 		i_mac_notify_exit(mip);
449 
450 	if (mip->mi_info.mi_unicst_addr != NULL) {
451 		kmem_free(mip->mi_info.mi_unicst_addr,
452 		    mip->mi_type->mt_addr_length);
453 		mip->mi_info.mi_unicst_addr = NULL;
454 	}
455 
456 	mac_stat_destroy(mip);
457 
458 	if (mip->mi_type != NULL) {
459 		atomic_dec_32(&mip->mi_type->mt_ref);
460 		mip->mi_type = NULL;
461 	}
462 
463 	if (mip->mi_pdata != NULL) {
464 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
465 		mip->mi_pdata = NULL;
466 		mip->mi_pdata_size = 0;
467 	}
468 
469 	if (minor != 0) {
470 		ASSERT(minor > MAC_MAX_MINOR);
471 		mac_minor_rele(minor);
472 	}
473 
474 	mac_unregister_priv_prop(mip);
475 
476 	kmem_cache_free(i_mac_impl_cachep, mip);
477 	return (err);
478 }
479 
480 /*
481  * Unregister from the GLDv3 framework
482  */
483 int
484 mac_unregister(mac_handle_t mh)
485 {
486 	int			err;
487 	mac_impl_t		*mip = (mac_impl_t *)mh;
488 	mod_hash_val_t		val;
489 	mac_margin_req_t	*mmr, *nextmmr;
490 
491 	/* Fail the unregister if there are any open references to this mac. */
492 	if ((err = mac_disable_nowait(mh)) != 0)
493 		return (err);
494 
495 	/*
496 	 * Clean up notification thread and wait for it to exit.
497 	 */
498 	i_mac_notify_exit(mip);
499 
500 	i_mac_perim_enter(mip);
501 
502 	/*
503 	 * There is still resource properties configured over this mac.
504 	 */
505 	if (mip->mi_resource_props.mrp_mask != 0)
506 		mac_fastpath_enable((mac_handle_t)mip);
507 
508 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
509 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
510 		ddi_remove_minor_node(mip->mi_dip,
511 		    (char *)ddi_driver_name(mip->mi_dip));
512 	}
513 
514 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
515 	    MIS_EXCLUSIVE));
516 
517 	mac_stat_destroy(mip);
518 
519 	(void) mod_hash_remove(i_mac_impl_hash,
520 	    (mod_hash_key_t)mip->mi_name, &val);
521 	ASSERT(mip == (mac_impl_t *)val);
522 
523 	ASSERT(i_mac_impl_count > 0);
524 	atomic_dec_32(&i_mac_impl_count);
525 
526 	if (mip->mi_pdata != NULL)
527 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
528 	mip->mi_pdata = NULL;
529 	mip->mi_pdata_size = 0;
530 
531 	/*
532 	 * Free the list of margin request.
533 	 */
534 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
535 		nextmmr = mmr->mmr_nextp;
536 		kmem_free(mmr, sizeof (mac_margin_req_t));
537 	}
538 	mip->mi_mmrp = NULL;
539 
540 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
541 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
542 	mip->mi_info.mi_unicst_addr = NULL;
543 
544 	atomic_dec_32(&mip->mi_type->mt_ref);
545 	mip->mi_type = NULL;
546 
547 	/*
548 	 * Free the primary MAC address.
549 	 */
550 	mac_fini_macaddr(mip);
551 
552 	/*
553 	 * free all rings
554 	 */
555 	mac_free_rings(mip, MAC_RING_TYPE_RX);
556 	mac_free_rings(mip, MAC_RING_TYPE_TX);
557 
558 	mac_addr_factory_fini(mip);
559 
560 	bzero(mip->mi_addr, MAXMACADDRLEN);
561 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
562 
563 	/* and the flows */
564 	mac_flow_tab_destroy(mip->mi_flow_tab);
565 	mip->mi_flow_tab = NULL;
566 
567 	if (mip->mi_minor > MAC_MAX_MINOR)
568 		mac_minor_rele(mip->mi_minor);
569 
570 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
571 
572 	/*
573 	 * Reset the perim related fields to default values before
574 	 * kmem_cache_free
575 	 */
576 	i_mac_perim_exit(mip);
577 	mip->mi_state_flags = 0;
578 
579 	mac_unregister_priv_prop(mip);
580 	kmem_cache_free(i_mac_impl_cachep, mip);
581 
582 	return (0);
583 }
584 
585 /* DATA RECEPTION */
586 
587 /*
588  * This function is invoked for packets received by the MAC driver in
589  * interrupt context. The ring generation number provided by the driver
590  * is matched with the ring generation number held in MAC. If they do not
591  * match, received packets are considered stale packets coming from an older
592  * assignment of the ring. Drop them.
593  */
594 void
595 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
596     uint64_t mr_gen_num)
597 {
598 	mac_ring_t		*mr = (mac_ring_t *)mrh;
599 
600 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
601 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
602 		    mr->mr_gen_num, uint64_t, mr_gen_num);
603 		freemsgchain(mp_chain);
604 		return;
605 	}
606 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
607 }
608 
609 /*
610  * This function is invoked for each packet received by the underlying
611  * driver.
612  */
613 void
614 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
615 {
616 	mac_impl_t		*mip = (mac_impl_t *)mh;
617 	mac_ring_t		*mr = (mac_ring_t *)mrh;
618 	mac_soft_ring_set_t 	*mac_srs;
619 	mblk_t			*bp = mp_chain;
620 	boolean_t		hw_classified = B_FALSE;
621 
622 	/*
623 	 * If there are any promiscuous mode callbacks defined for
624 	 * this MAC, pass them a copy if appropriate.
625 	 */
626 	if (mip->mi_promisc_list != NULL)
627 		mac_promisc_dispatch(mip, mp_chain, NULL);
628 
629 	if (mr != NULL) {
630 		/*
631 		 * If the SRS teardown has started, just return. The 'mr'
632 		 * continues to be valid until the driver unregisters the mac.
633 		 * Hardware classified packets will not make their way up
634 		 * beyond this point once the teardown has started. The driver
635 		 * is never passed a pointer to a flow entry or SRS or any
636 		 * structure that can be freed much before mac_unregister.
637 		 */
638 		mutex_enter(&mr->mr_lock);
639 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
640 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
641 			mutex_exit(&mr->mr_lock);
642 			freemsgchain(mp_chain);
643 			return;
644 		}
645 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
646 			hw_classified = B_TRUE;
647 			MR_REFHOLD_LOCKED(mr);
648 		}
649 		mutex_exit(&mr->mr_lock);
650 
651 		/*
652 		 * We check if an SRS is controlling this ring.
653 		 * If so, we can directly call the srs_lower_proc
654 		 * routine otherwise we need to go through mac_rx_classify
655 		 * to reach the right place.
656 		 */
657 		if (hw_classified) {
658 			mac_srs = mr->mr_srs;
659 			/*
660 			 * This is supposed to be the fast path.
661 			 * All packets received though here were steered by
662 			 * the hardware classifier, and share the same
663 			 * MAC header info.
664 			 */
665 			mac_srs->srs_rx.sr_lower_proc(mh,
666 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
667 			MR_REFRELE(mr);
668 			return;
669 		}
670 		/* We'll fall through to software classification */
671 	} else {
672 		flow_entry_t *flent;
673 		int err;
674 
675 		rw_enter(&mip->mi_rw_lock, RW_READER);
676 		if (mip->mi_single_active_client != NULL) {
677 			flent = mip->mi_single_active_client->mci_flent_list;
678 			FLOW_TRY_REFHOLD(flent, err);
679 			rw_exit(&mip->mi_rw_lock);
680 			if (err == 0) {
681 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
682 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
683 				FLOW_REFRELE(flent);
684 				return;
685 			}
686 		} else {
687 			rw_exit(&mip->mi_rw_lock);
688 		}
689 	}
690 
691 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
692 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
693 			return;
694 	}
695 
696 	freemsgchain(bp);
697 }
698 
699 /* DATA TRANSMISSION */
700 
701 /*
702  * A driver's notification to resume transmission, in case of a provider
703  * without TX rings.
704  */
705 void
706 mac_tx_update(mac_handle_t mh)
707 {
708 	/*
709 	 * Walk the list of MAC clients (mac_client_handle)
710 	 * and update
711 	 */
712 	i_mac_tx_srs_notify((mac_impl_t *)mh, NULL);
713 }
714 
715 /*
716  * A driver's notification to resume transmission on the specified TX ring.
717  */
718 void
719 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
720 {
721 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
722 }
723 
724 /* LINK STATE */
725 /*
726  * Notify the MAC layer about a link state change
727  */
728 void
729 mac_link_update(mac_handle_t mh, link_state_t link)
730 {
731 	mac_impl_t	*mip = (mac_impl_t *)mh;
732 
733 	/*
734 	 * Save the link state.
735 	 */
736 	mip->mi_linkstate = link;
737 
738 	/*
739 	 * Send a MAC_NOTE_LINK notification.
740 	 */
741 	i_mac_notify(mip, MAC_NOTE_LINK);
742 }
743 
744 /* OTHER CONTROL INFORMATION */
745 
746 /*
747  * A driver notified us that its primary MAC address has changed.
748  */
749 void
750 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
751 {
752 	mac_impl_t	*mip = (mac_impl_t *)mh;
753 
754 	if (mip->mi_type->mt_addr_length == 0)
755 		return;
756 
757 	i_mac_perim_enter(mip);
758 	/*
759 	 * If address doesn't change, do nothing.
760 	 */
761 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
762 		i_mac_perim_exit(mip);
763 		return;
764 	}
765 
766 	/*
767 	 * Freshen the MAC address value and update all MAC clients that
768 	 * share this MAC address.
769 	 */
770 	mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
771 	    (uint8_t *)addr);
772 
773 	i_mac_perim_exit(mip);
774 
775 	/*
776 	 * Send a MAC_NOTE_UNICST notification.
777 	 */
778 	i_mac_notify(mip, MAC_NOTE_UNICST);
779 }
780 
781 /*
782  * MAC plugin information changed.
783  */
784 int
785 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
786 {
787 	mac_impl_t	*mip = (mac_impl_t *)mh;
788 
789 	/*
790 	 * Verify that the plugin supports MAC plugin data and that the
791 	 * supplied data is valid.
792 	 */
793 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
794 		return (EINVAL);
795 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
796 		return (EINVAL);
797 
798 	if (mip->mi_pdata != NULL)
799 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
800 
801 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
802 	bcopy(mac_pdata, mip->mi_pdata, dsize);
803 	mip->mi_pdata_size = dsize;
804 
805 	/*
806 	 * Since the MAC plugin data is used to construct MAC headers that
807 	 * were cached in fast-path headers, we need to flush fast-path
808 	 * information for links associated with this mac.
809 	 */
810 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
811 	return (0);
812 }
813 
814 /*
815  * Invoked by driver as well as the framework to notify its capability change.
816  */
817 void
818 mac_capab_update(mac_handle_t mh)
819 {
820 	/* Send MAC_NOTE_CAPAB_CHG notification */
821 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
822 }
823 
824 int
825 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
826 {
827 	mac_impl_t	*mip = (mac_impl_t *)mh;
828 
829 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
830 		return (EINVAL);
831 	mip->mi_sdu_max = sdu_max;
832 
833 	/* Send a MAC_NOTE_SDU_SIZE notification. */
834 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
835 	return (0);
836 }
837 
838 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
839 
840 /*
841  * Updates the mac_impl structure with the current state of the link
842  */
843 static void
844 i_mac_log_link_state(mac_impl_t *mip)
845 {
846 	/*
847 	 * If no change, then it is not interesting.
848 	 */
849 	if (mip->mi_lastlinkstate == mip->mi_linkstate)
850 		return;
851 
852 	switch (mip->mi_linkstate) {
853 	case LINK_STATE_UP:
854 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
855 			char det[200];
856 
857 			mip->mi_type->mt_ops.mtops_link_details(det,
858 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
859 
860 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
861 		} else {
862 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
863 		}
864 		break;
865 
866 	case LINK_STATE_DOWN:
867 		/*
868 		 * Only transitions from UP to DOWN are interesting
869 		 */
870 		if (mip->mi_lastlinkstate != LINK_STATE_UNKNOWN)
871 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
872 		break;
873 
874 	case LINK_STATE_UNKNOWN:
875 		/*
876 		 * This case is normally not interesting.
877 		 */
878 		break;
879 	}
880 	mip->mi_lastlinkstate = mip->mi_linkstate;
881 }
882 
883 /*
884  * Main routine for the callbacks notifications thread
885  */
886 static void
887 i_mac_notify_thread(void *arg)
888 {
889 	mac_impl_t	*mip = arg;
890 	callb_cpr_t	cprinfo;
891 	mac_cb_t	*mcb;
892 	mac_cb_info_t	*mcbi;
893 	mac_notify_cb_t	*mncb;
894 
895 	mcbi = &mip->mi_notify_cb_info;
896 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
897 	    "i_mac_notify_thread");
898 
899 	mutex_enter(mcbi->mcbi_lockp);
900 
901 	for (;;) {
902 		uint32_t	bits;
903 		uint32_t	type;
904 
905 		bits = mip->mi_notify_bits;
906 		if (bits == 0) {
907 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
908 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
909 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
910 			continue;
911 		}
912 		mip->mi_notify_bits = 0;
913 		if ((bits & (1 << MAC_NNOTE)) != 0) {
914 			/* request to quit */
915 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
916 			break;
917 		}
918 
919 		mutex_exit(mcbi->mcbi_lockp);
920 
921 		/*
922 		 * Log link changes.
923 		 */
924 		if ((bits & (1 << MAC_NOTE_LINK)) != 0)
925 			i_mac_log_link_state(mip);
926 
927 		/*
928 		 * Do notification callbacks for each notification type.
929 		 */
930 		for (type = 0; type < MAC_NNOTE; type++) {
931 			if ((bits & (1 << type)) == 0) {
932 				continue;
933 			}
934 
935 			if (mac_notify_cb_list[type].mac_notify_cb_fn)
936 				mac_notify_cb_list[type].mac_notify_cb_fn(mip);
937 
938 			/*
939 			 * Walk the list of notifications.
940 			 */
941 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
942 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
943 			    mcb = mcb->mcb_nextp) {
944 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
945 				mncb->mncb_fn(mncb->mncb_arg, type);
946 			}
947 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
948 			    &mip->mi_notify_cb_list);
949 		}
950 
951 		mutex_enter(mcbi->mcbi_lockp);
952 	}
953 
954 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
955 	cv_broadcast(&mcbi->mcbi_cv);
956 
957 	/* CALLB_CPR_EXIT drops the lock */
958 	CALLB_CPR_EXIT(&cprinfo);
959 	thread_exit();
960 }
961 
962 /*
963  * Signal the i_mac_notify_thread asking it to quit.
964  * Then wait till it is done.
965  */
966 void
967 i_mac_notify_exit(mac_impl_t *mip)
968 {
969 	mac_cb_info_t	*mcbi;
970 
971 	mcbi = &mip->mi_notify_cb_info;
972 
973 	mutex_enter(mcbi->mcbi_lockp);
974 	mip->mi_notify_bits = (1 << MAC_NNOTE);
975 	cv_broadcast(&mcbi->mcbi_cv);
976 
977 
978 	while ((mip->mi_notify_thread != NULL) &&
979 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
980 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
981 	}
982 
983 	/* Necessary clean up before doing kmem_cache_free */
984 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
985 	mip->mi_notify_bits = 0;
986 	mip->mi_notify_thread = NULL;
987 	mutex_exit(mcbi->mcbi_lockp);
988 }
989 
990 /*
991  * Entry point invoked by drivers to dynamically add a ring to an
992  * existing group.
993  */
994 int
995 mac_group_add_ring(mac_group_handle_t gh, int index)
996 {
997 	mac_group_t *group = (mac_group_t *)gh;
998 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
999 	int ret;
1000 
1001 	i_mac_perim_enter(mip);
1002 
1003 	/*
1004 	 * Only RX rings can be added or removed by drivers currently.
1005 	 */
1006 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1007 
1008 	ret = i_mac_group_add_ring(group, NULL, index);
1009 
1010 	i_mac_perim_exit(mip);
1011 
1012 	return (ret);
1013 }
1014 
1015 /*
1016  * Entry point invoked by drivers to dynamically remove a ring
1017  * from an existing group. The specified ring handle must no longer
1018  * be used by the driver after a call to this function.
1019  */
1020 void
1021 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1022 {
1023 	mac_group_t *group = (mac_group_t *)gh;
1024 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1025 
1026 	i_mac_perim_enter(mip);
1027 
1028 	/*
1029 	 * Only RX rings can be added or removed by drivers currently.
1030 	 */
1031 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1032 
1033 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1034 
1035 	i_mac_perim_exit(mip);
1036 }
1037