xref: /titanic_51/usr/src/uts/common/io/mac/mac_provider.c (revision 2d39cb4c2c63a5cd31332527611ae6366103b733)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/conf.h>
29 #include <sys/id_space.h>
30 #include <sys/esunddi.h>
31 #include <sys/stat.h>
32 #include <sys/mkdev.h>
33 #include <sys/stream.h>
34 #include <sys/strsubr.h>
35 #include <sys/dlpi.h>
36 #include <sys/modhash.h>
37 #include <sys/mac.h>
38 #include <sys/mac_provider.h>
39 #include <sys/mac_impl.h>
40 #include <sys/mac_client_impl.h>
41 #include <sys/mac_client_priv.h>
42 #include <sys/mac_soft_ring.h>
43 #include <sys/modctl.h>
44 #include <sys/fs/dv_node.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/callb.h>
48 #include <sys/cpuvar.h>
49 #include <sys/atomic.h>
50 #include <sys/sdt.h>
51 #include <sys/mac_flow.h>
52 #include <sys/ddi_intr_impl.h>
53 #include <sys/disp.h>
54 #include <sys/sdt.h>
55 
56 /*
57  * MAC Provider Interface.
58  *
59  * Interface for GLDv3 compatible NIC drivers.
60  */
61 
62 static void i_mac_notify_thread(void *);
63 
64 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
65 
66 typedef struct mac_notify_default_cb_s {
67 	mac_notify_type_t		mac_notify_type;
68 	mac_notify_default_cb_fn_t	mac_notify_cb_fn;
69 }mac_notify_default_cb_t;
70 
71 mac_notify_default_cb_t mac_notify_cb_list[] = {
72 	{ MAC_NOTE_LINK,		mac_fanout_recompute},
73 	{ MAC_NOTE_PROMISC,		NULL},
74 	{ MAC_NOTE_UNICST,		NULL},
75 	{ MAC_NOTE_TX,			NULL},
76 	{ MAC_NOTE_RESOURCE,		NULL},
77 	{ MAC_NOTE_DEVPROMISC,		NULL},
78 	{ MAC_NOTE_FASTPATH_FLUSH,	NULL},
79 	{ MAC_NOTE_SDU_SIZE,		NULL},
80 	{ MAC_NOTE_MARGIN,		NULL},
81 	{ MAC_NOTE_CAPAB_CHG,		NULL},
82 	{ MAC_NNOTE,			NULL},
83 };
84 
85 /*
86  * Driver support functions.
87  */
88 
89 /* REGISTRATION */
90 
91 mac_register_t *
92 mac_alloc(uint_t mac_version)
93 {
94 	mac_register_t *mregp;
95 
96 	/*
97 	 * Make sure there isn't a version mismatch between the driver and
98 	 * the framework.  In the future, if multiple versions are
99 	 * supported, this check could become more sophisticated.
100 	 */
101 	if (mac_version != MAC_VERSION)
102 		return (NULL);
103 
104 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
105 	mregp->m_version = mac_version;
106 	return (mregp);
107 }
108 
109 void
110 mac_free(mac_register_t *mregp)
111 {
112 	kmem_free(mregp, sizeof (mac_register_t));
113 }
114 
115 /*
116  * mac_register() is how drivers register new MACs with the GLDv3
117  * framework.  The mregp argument is allocated by drivers using the
118  * mac_alloc() function, and can be freed using mac_free() immediately upon
119  * return from mac_register().  Upon success (0 return value), the mhp
120  * opaque pointer becomes the driver's handle to its MAC interface, and is
121  * the argument to all other mac module entry points.
122  */
123 /* ARGSUSED */
124 int
125 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
126 {
127 	mac_impl_t		*mip;
128 	mactype_t		*mtype;
129 	int			err = EINVAL;
130 	struct devnames		*dnp = NULL;
131 	uint_t			instance;
132 	boolean_t		style1_created = B_FALSE;
133 	boolean_t		style2_created = B_FALSE;
134 	char			*driver;
135 	minor_t			minor = 0;
136 
137 	/* Find the required MAC-Type plugin. */
138 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
139 		return (EINVAL);
140 
141 	/* Create a mac_impl_t to represent this MAC. */
142 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
143 
144 	/*
145 	 * The mac is not ready for open yet.
146 	 */
147 	mip->mi_state_flags |= MIS_DISABLED;
148 
149 	/*
150 	 * When a mac is registered, the m_instance field can be set to:
151 	 *
152 	 *  0:	Get the mac's instance number from m_dip.
153 	 *	This is usually used for physical device dips.
154 	 *
155 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
156 	 *	For example, when an aggregation is created with the key option,
157 	 *	"key" will be used as the instance number.
158 	 *
159 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
160 	 *	This is often used when a MAC of a virtual link is registered
161 	 *	(e.g., aggregation when "key" is not specified, or vnic).
162 	 *
163 	 * Note that the instance number is used to derive the mi_minor field
164 	 * of mac_impl_t, which will then be used to derive the name of kstats
165 	 * and the devfs nodes.  The first 2 cases are needed to preserve
166 	 * backward compatibility.
167 	 */
168 	switch (mregp->m_instance) {
169 	case 0:
170 		instance = ddi_get_instance(mregp->m_dip);
171 		break;
172 	case ((uint_t)-1):
173 		minor = mac_minor_hold(B_TRUE);
174 		if (minor == 0) {
175 			err = ENOSPC;
176 			goto fail;
177 		}
178 		instance = minor - 1;
179 		break;
180 	default:
181 		instance = mregp->m_instance;
182 		if (instance >= MAC_MAX_MINOR) {
183 			err = EINVAL;
184 			goto fail;
185 		}
186 		break;
187 	}
188 
189 	mip->mi_minor = (minor_t)(instance + 1);
190 	mip->mi_dip = mregp->m_dip;
191 	mip->mi_clients_list = NULL;
192 	mip->mi_nclients = 0;
193 
194 	driver = (char *)ddi_driver_name(mip->mi_dip);
195 
196 	/* Construct the MAC name as <drvname><instance> */
197 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
198 	    driver, instance);
199 
200 	mip->mi_driver = mregp->m_driver;
201 
202 	mip->mi_type = mtype;
203 	mip->mi_margin = mregp->m_margin;
204 	mip->mi_info.mi_media = mtype->mt_type;
205 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
206 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
207 		goto fail;
208 	mip->mi_sdu_min = mregp->m_min_sdu;
209 	mip->mi_sdu_max = mregp->m_max_sdu;
210 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
211 	/*
212 	 * If the media supports a broadcast address, cache a pointer to it
213 	 * in the mac_info_t so that upper layers can use it.
214 	 */
215 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
216 
217 	mip->mi_v12n_level = mregp->m_v12n;
218 
219 	/*
220 	 * Copy the unicast source address into the mac_info_t, but only if
221 	 * the MAC-Type defines a non-zero address length.  We need to
222 	 * handle MAC-Types that have an address length of 0
223 	 * (point-to-point protocol MACs for example).
224 	 */
225 	if (mip->mi_type->mt_addr_length > 0) {
226 		if (mregp->m_src_addr == NULL)
227 			goto fail;
228 		mip->mi_info.mi_unicst_addr =
229 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
230 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
231 		    mip->mi_type->mt_addr_length);
232 
233 		/*
234 		 * Copy the fixed 'factory' MAC address from the immutable
235 		 * info.  This is taken to be the MAC address currently in
236 		 * use.
237 		 */
238 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
239 		    mip->mi_type->mt_addr_length);
240 
241 		/*
242 		 * At this point, we should set up the classification
243 		 * rules etc but we delay it till mac_open() so that
244 		 * the resource discovery has taken place and we
245 		 * know someone wants to use the device. Otherwise
246 		 * memory gets allocated for Rx ring structures even
247 		 * during probe.
248 		 */
249 
250 		/* Copy the destination address if one is provided. */
251 		if (mregp->m_dst_addr != NULL) {
252 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
253 			    mip->mi_type->mt_addr_length);
254 		}
255 	} else if (mregp->m_src_addr != NULL) {
256 		goto fail;
257 	}
258 
259 	/*
260 	 * The format of the m_pdata is specific to the plugin.  It is
261 	 * passed in as an argument to all of the plugin callbacks.  The
262 	 * driver can update this information by calling
263 	 * mac_pdata_update().
264 	 */
265 	if (mregp->m_pdata != NULL) {
266 		/*
267 		 * Verify that the plugin supports MAC plugin data and that
268 		 * the supplied data is valid.
269 		 */
270 		if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
271 			goto fail;
272 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
273 		    mregp->m_pdata_size)) {
274 			goto fail;
275 		}
276 		mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
277 		bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size);
278 		mip->mi_pdata_size = mregp->m_pdata_size;
279 	}
280 
281 	/*
282 	 * Register the private properties.
283 	 */
284 	mac_register_priv_prop(mip, mregp->m_priv_props,
285 	    mregp->m_priv_prop_count);
286 
287 	/*
288 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
289 	 * check to make sure all mandatory callbacks are set.
290 	 */
291 	if (mregp->m_callbacks->mc_getstat == NULL ||
292 	    mregp->m_callbacks->mc_start == NULL ||
293 	    mregp->m_callbacks->mc_stop == NULL ||
294 	    mregp->m_callbacks->mc_setpromisc == NULL ||
295 	    mregp->m_callbacks->mc_multicst == NULL) {
296 		goto fail;
297 	}
298 	mip->mi_callbacks = mregp->m_callbacks;
299 
300 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
301 	    &mip->mi_capab_legacy)) {
302 		mip->mi_state_flags |= MIS_LEGACY;
303 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
304 	} else {
305 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
306 		    ddi_get_instance(mip->mi_dip) + 1);
307 	}
308 
309 	/*
310 	 * Allocate a notification thread. thread_create blocks for memory
311 	 * if needed, it never fails.
312 	 */
313 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
314 	    mip, 0, &p0, TS_RUN, minclsyspri);
315 
316 	/*
317 	 * Initialize the capabilities
318 	 */
319 
320 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
321 		mip->mi_state_flags |= MIS_IS_VNIC;
322 
323 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
324 		mip->mi_state_flags |= MIS_IS_AGGR;
325 
326 	mac_addr_factory_init(mip);
327 
328 	/*
329 	 * Enforce the virtrualization level registered.
330 	 */
331 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
332 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
333 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
334 			goto fail;
335 
336 		/*
337 		 * The driver needs to register at least rx rings for this
338 		 * virtualization level.
339 		 */
340 		if (mip->mi_rx_groups == NULL)
341 			goto fail;
342 	}
343 
344 	/*
345 	 * The driver must set mc_unicst entry point to NULL when it advertises
346 	 * CAP_RINGS for rx groups.
347 	 */
348 	if (mip->mi_rx_groups != NULL) {
349 		if (mregp->m_callbacks->mc_unicst != NULL)
350 			goto fail;
351 	} else {
352 		if (mregp->m_callbacks->mc_unicst == NULL)
353 			goto fail;
354 	}
355 
356 	/*
357 	 * The driver must set mc_tx entry point to NULL when it advertises
358 	 * CAP_RINGS for tx rings.
359 	 */
360 	if (mip->mi_tx_groups != NULL) {
361 		if (mregp->m_callbacks->mc_tx != NULL)
362 			goto fail;
363 	} else {
364 		if (mregp->m_callbacks->mc_tx == NULL)
365 			goto fail;
366 	}
367 
368 	/*
369 	 * Initialize MAC addresses. Must be called after mac_init_rings().
370 	 */
371 	mac_init_macaddr(mip);
372 
373 	mip->mi_share_capab.ms_snum = 0;
374 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
375 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
376 		    &mip->mi_share_capab);
377 	}
378 
379 	/*
380 	 * Initialize the kstats for this device.
381 	 */
382 	mac_stat_create(mip);
383 
384 	/* Zero out any properties. */
385 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
386 
387 	/* set the gldv3 flag in dn_flags */
388 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
389 	LOCK_DEV_OPS(&dnp->dn_lock);
390 	dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
391 	UNLOCK_DEV_OPS(&dnp->dn_lock);
392 
393 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
394 		/* Create a style-2 DLPI device */
395 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
396 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
397 			goto fail;
398 		style2_created = B_TRUE;
399 
400 		/* Create a style-1 DLPI device */
401 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
402 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
403 			goto fail;
404 		style1_created = B_TRUE;
405 	}
406 
407 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
408 
409 	rw_enter(&i_mac_impl_lock, RW_WRITER);
410 	if (mod_hash_insert(i_mac_impl_hash,
411 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
412 		rw_exit(&i_mac_impl_lock);
413 		err = EEXIST;
414 		goto fail;
415 	}
416 
417 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
418 	    (mac_impl_t *), mip);
419 
420 	/*
421 	 * Mark the MAC to be ready for open.
422 	 */
423 	mip->mi_state_flags &= ~MIS_DISABLED;
424 	rw_exit(&i_mac_impl_lock);
425 
426 	atomic_inc_32(&i_mac_impl_count);
427 
428 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
429 	*mhp = (mac_handle_t)mip;
430 	return (0);
431 
432 fail:
433 	if (style1_created)
434 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
435 
436 	if (style2_created)
437 		ddi_remove_minor_node(mip->mi_dip, driver);
438 
439 	mac_addr_factory_fini(mip);
440 
441 	/* Clean up registered MAC addresses */
442 	mac_fini_macaddr(mip);
443 
444 	/* Clean up registered rings */
445 	mac_free_rings(mip, MAC_RING_TYPE_RX);
446 	mac_free_rings(mip, MAC_RING_TYPE_TX);
447 
448 	/* Clean up notification thread */
449 	if (mip->mi_notify_thread != NULL)
450 		i_mac_notify_exit(mip);
451 
452 	if (mip->mi_info.mi_unicst_addr != NULL) {
453 		kmem_free(mip->mi_info.mi_unicst_addr,
454 		    mip->mi_type->mt_addr_length);
455 		mip->mi_info.mi_unicst_addr = NULL;
456 	}
457 
458 	mac_stat_destroy(mip);
459 
460 	if (mip->mi_type != NULL) {
461 		atomic_dec_32(&mip->mi_type->mt_ref);
462 		mip->mi_type = NULL;
463 	}
464 
465 	if (mip->mi_pdata != NULL) {
466 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
467 		mip->mi_pdata = NULL;
468 		mip->mi_pdata_size = 0;
469 	}
470 
471 	if (minor != 0) {
472 		ASSERT(minor > MAC_MAX_MINOR);
473 		mac_minor_rele(minor);
474 	}
475 
476 	mac_unregister_priv_prop(mip);
477 
478 	kmem_cache_free(i_mac_impl_cachep, mip);
479 	return (err);
480 }
481 
482 /*
483  * Unregister from the GLDv3 framework
484  */
485 int
486 mac_unregister(mac_handle_t mh)
487 {
488 	int			err;
489 	mac_impl_t		*mip = (mac_impl_t *)mh;
490 	mod_hash_val_t		val;
491 	mac_margin_req_t	*mmr, *nextmmr;
492 
493 	/* Fail the unregister if there are any open references to this mac. */
494 	if ((err = mac_disable_nowait(mh)) != 0)
495 		return (err);
496 
497 	/*
498 	 * Clean up notification thread and wait for it to exit.
499 	 */
500 	i_mac_notify_exit(mip);
501 
502 	i_mac_perim_enter(mip);
503 
504 	/*
505 	 * There is still resource properties configured over this mac.
506 	 */
507 	if (mip->mi_resource_props.mrp_mask != 0)
508 		mac_fastpath_enable((mac_handle_t)mip);
509 
510 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
511 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
512 		ddi_remove_minor_node(mip->mi_dip,
513 		    (char *)ddi_driver_name(mip->mi_dip));
514 	}
515 
516 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
517 	    MIS_EXCLUSIVE));
518 
519 	mac_stat_destroy(mip);
520 
521 	(void) mod_hash_remove(i_mac_impl_hash,
522 	    (mod_hash_key_t)mip->mi_name, &val);
523 	ASSERT(mip == (mac_impl_t *)val);
524 
525 	ASSERT(i_mac_impl_count > 0);
526 	atomic_dec_32(&i_mac_impl_count);
527 
528 	if (mip->mi_pdata != NULL)
529 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
530 	mip->mi_pdata = NULL;
531 	mip->mi_pdata_size = 0;
532 
533 	/*
534 	 * Free the list of margin request.
535 	 */
536 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
537 		nextmmr = mmr->mmr_nextp;
538 		kmem_free(mmr, sizeof (mac_margin_req_t));
539 	}
540 	mip->mi_mmrp = NULL;
541 
542 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
543 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
544 	mip->mi_info.mi_unicst_addr = NULL;
545 
546 	atomic_dec_32(&mip->mi_type->mt_ref);
547 	mip->mi_type = NULL;
548 
549 	/*
550 	 * Free the primary MAC address.
551 	 */
552 	mac_fini_macaddr(mip);
553 
554 	/*
555 	 * free all rings
556 	 */
557 	mac_free_rings(mip, MAC_RING_TYPE_RX);
558 	mac_free_rings(mip, MAC_RING_TYPE_TX);
559 
560 	mac_addr_factory_fini(mip);
561 
562 	bzero(mip->mi_addr, MAXMACADDRLEN);
563 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
564 
565 	/* and the flows */
566 	mac_flow_tab_destroy(mip->mi_flow_tab);
567 	mip->mi_flow_tab = NULL;
568 
569 	if (mip->mi_minor > MAC_MAX_MINOR)
570 		mac_minor_rele(mip->mi_minor);
571 
572 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
573 
574 	/*
575 	 * Reset the perim related fields to default values before
576 	 * kmem_cache_free
577 	 */
578 	i_mac_perim_exit(mip);
579 	mip->mi_state_flags = 0;
580 
581 	mac_unregister_priv_prop(mip);
582 	kmem_cache_free(i_mac_impl_cachep, mip);
583 
584 	return (0);
585 }
586 
587 /* DATA RECEPTION */
588 
589 /*
590  * This function is invoked for packets received by the MAC driver in
591  * interrupt context. The ring generation number provided by the driver
592  * is matched with the ring generation number held in MAC. If they do not
593  * match, received packets are considered stale packets coming from an older
594  * assignment of the ring. Drop them.
595  */
596 void
597 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
598     uint64_t mr_gen_num)
599 {
600 	mac_ring_t		*mr = (mac_ring_t *)mrh;
601 
602 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
603 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
604 		    mr->mr_gen_num, uint64_t, mr_gen_num);
605 		freemsgchain(mp_chain);
606 		return;
607 	}
608 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
609 }
610 
611 /*
612  * This function is invoked for each packet received by the underlying
613  * driver.
614  */
615 void
616 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
617 {
618 	mac_impl_t		*mip = (mac_impl_t *)mh;
619 	mac_ring_t		*mr = (mac_ring_t *)mrh;
620 	mac_soft_ring_set_t 	*mac_srs;
621 	mblk_t			*bp = mp_chain;
622 	boolean_t		hw_classified = B_FALSE;
623 
624 	/*
625 	 * If there are any promiscuous mode callbacks defined for
626 	 * this MAC, pass them a copy if appropriate.
627 	 */
628 	if (mip->mi_promisc_list != NULL)
629 		mac_promisc_dispatch(mip, mp_chain, NULL);
630 
631 	if (mr != NULL) {
632 		/*
633 		 * If the SRS teardown has started, just return. The 'mr'
634 		 * continues to be valid until the driver unregisters the mac.
635 		 * Hardware classified packets will not make their way up
636 		 * beyond this point once the teardown has started. The driver
637 		 * is never passed a pointer to a flow entry or SRS or any
638 		 * structure that can be freed much before mac_unregister.
639 		 */
640 		mutex_enter(&mr->mr_lock);
641 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
642 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
643 			mutex_exit(&mr->mr_lock);
644 			freemsgchain(mp_chain);
645 			return;
646 		}
647 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
648 			hw_classified = B_TRUE;
649 			MR_REFHOLD_LOCKED(mr);
650 		}
651 		mutex_exit(&mr->mr_lock);
652 
653 		/*
654 		 * We check if an SRS is controlling this ring.
655 		 * If so, we can directly call the srs_lower_proc
656 		 * routine otherwise we need to go through mac_rx_classify
657 		 * to reach the right place.
658 		 */
659 		if (hw_classified) {
660 			mac_srs = mr->mr_srs;
661 			/*
662 			 * This is supposed to be the fast path.
663 			 * All packets received though here were steered by
664 			 * the hardware classifier, and share the same
665 			 * MAC header info.
666 			 */
667 			mac_srs->srs_rx.sr_lower_proc(mh,
668 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
669 			MR_REFRELE(mr);
670 			return;
671 		}
672 		/* We'll fall through to software classification */
673 	} else {
674 		flow_entry_t *flent;
675 		int err;
676 
677 		rw_enter(&mip->mi_rw_lock, RW_READER);
678 		if (mip->mi_single_active_client != NULL) {
679 			flent = mip->mi_single_active_client->mci_flent_list;
680 			FLOW_TRY_REFHOLD(flent, err);
681 			rw_exit(&mip->mi_rw_lock);
682 			if (err == 0) {
683 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
684 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
685 				FLOW_REFRELE(flent);
686 				return;
687 			}
688 		} else {
689 			rw_exit(&mip->mi_rw_lock);
690 		}
691 	}
692 
693 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
694 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
695 			return;
696 	}
697 
698 	freemsgchain(bp);
699 }
700 
701 /* DATA TRANSMISSION */
702 
703 /*
704  * A driver's notification to resume transmission, in case of a provider
705  * without TX rings.
706  */
707 void
708 mac_tx_update(mac_handle_t mh)
709 {
710 	/*
711 	 * Walk the list of MAC clients (mac_client_handle)
712 	 * and update
713 	 */
714 	i_mac_tx_srs_notify((mac_impl_t *)mh, NULL);
715 }
716 
717 /*
718  * A driver's notification to resume transmission on the specified TX ring.
719  */
720 void
721 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
722 {
723 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
724 }
725 
726 /* LINK STATE */
727 /*
728  * Notify the MAC layer about a link state change
729  */
730 void
731 mac_link_update(mac_handle_t mh, link_state_t link)
732 {
733 	mac_impl_t	*mip = (mac_impl_t *)mh;
734 
735 	/*
736 	 * Save the link state.
737 	 */
738 	mip->mi_linkstate = link;
739 
740 	/*
741 	 * Send a MAC_NOTE_LINK notification.
742 	 */
743 	i_mac_notify(mip, MAC_NOTE_LINK);
744 }
745 
746 /* OTHER CONTROL INFORMATION */
747 
748 /*
749  * A driver notified us that its primary MAC address has changed.
750  */
751 void
752 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
753 {
754 	mac_impl_t	*mip = (mac_impl_t *)mh;
755 
756 	if (mip->mi_type->mt_addr_length == 0)
757 		return;
758 
759 	i_mac_perim_enter(mip);
760 	/*
761 	 * If address doesn't change, do nothing.
762 	 */
763 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
764 		i_mac_perim_exit(mip);
765 		return;
766 	}
767 
768 	/*
769 	 * Freshen the MAC address value and update all MAC clients that
770 	 * share this MAC address.
771 	 */
772 	mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
773 	    (uint8_t *)addr);
774 
775 	i_mac_perim_exit(mip);
776 
777 	/*
778 	 * Send a MAC_NOTE_UNICST notification.
779 	 */
780 	i_mac_notify(mip, MAC_NOTE_UNICST);
781 }
782 
783 /*
784  * The provider's hw resources (e.g. rings grouping) has changed.
785  * Notify the MAC framework to trigger a re-negotiation of the capabilities.
786  */
787 void
788 mac_resource_update(mac_handle_t mh)
789 {
790 	/*
791 	 * Send a MAC_NOTE_RESOURCE notification.
792 	 */
793 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_RESOURCE);
794 }
795 
796 /*
797  * MAC plugin information changed.
798  */
799 int
800 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
801 {
802 	mac_impl_t	*mip = (mac_impl_t *)mh;
803 
804 	/*
805 	 * Verify that the plugin supports MAC plugin data and that the
806 	 * supplied data is valid.
807 	 */
808 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
809 		return (EINVAL);
810 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
811 		return (EINVAL);
812 
813 	if (mip->mi_pdata != NULL)
814 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
815 
816 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
817 	bcopy(mac_pdata, mip->mi_pdata, dsize);
818 	mip->mi_pdata_size = dsize;
819 
820 	/*
821 	 * Since the MAC plugin data is used to construct MAC headers that
822 	 * were cached in fast-path headers, we need to flush fast-path
823 	 * information for links associated with this mac.
824 	 */
825 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
826 	return (0);
827 }
828 
829 /*
830  * Invoked by driver as well as the framework to notify its capability change.
831  */
832 void
833 mac_capab_update(mac_handle_t mh)
834 {
835 	/* Send MAC_NOTE_CAPAB_CHG notification */
836 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
837 }
838 
839 int
840 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
841 {
842 	mac_impl_t	*mip = (mac_impl_t *)mh;
843 
844 	if (sdu_max <= mip->mi_sdu_min)
845 		return (EINVAL);
846 	mip->mi_sdu_max = sdu_max;
847 
848 	/* Send a MAC_NOTE_SDU_SIZE notification. */
849 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
850 	return (0);
851 }
852 
853 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
854 
855 /*
856  * Updates the mac_impl structure with the current state of the link
857  */
858 static void
859 i_mac_log_link_state(mac_impl_t *mip)
860 {
861 	/*
862 	 * If no change, then it is not interesting.
863 	 */
864 	if (mip->mi_lastlinkstate == mip->mi_linkstate)
865 		return;
866 
867 	switch (mip->mi_linkstate) {
868 	case LINK_STATE_UP:
869 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
870 			char det[200];
871 
872 			mip->mi_type->mt_ops.mtops_link_details(det,
873 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
874 
875 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
876 		} else {
877 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
878 		}
879 		break;
880 
881 	case LINK_STATE_DOWN:
882 		/*
883 		 * Only transitions from UP to DOWN are interesting
884 		 */
885 		if (mip->mi_lastlinkstate != LINK_STATE_UNKNOWN)
886 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
887 		break;
888 
889 	case LINK_STATE_UNKNOWN:
890 		/*
891 		 * This case is normally not interesting.
892 		 */
893 		break;
894 	}
895 	mip->mi_lastlinkstate = mip->mi_linkstate;
896 }
897 
898 /*
899  * Main routine for the callbacks notifications thread
900  */
901 static void
902 i_mac_notify_thread(void *arg)
903 {
904 	mac_impl_t	*mip = arg;
905 	callb_cpr_t	cprinfo;
906 	mac_cb_t	*mcb;
907 	mac_cb_info_t	*mcbi;
908 	mac_notify_cb_t	*mncb;
909 
910 	mcbi = &mip->mi_notify_cb_info;
911 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
912 	    "i_mac_notify_thread");
913 
914 	mutex_enter(mcbi->mcbi_lockp);
915 
916 	for (;;) {
917 		uint32_t	bits;
918 		uint32_t	type;
919 
920 		bits = mip->mi_notify_bits;
921 		if (bits == 0) {
922 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
923 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
924 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
925 			continue;
926 		}
927 		mip->mi_notify_bits = 0;
928 		if ((bits & (1 << MAC_NNOTE)) != 0) {
929 			/* request to quit */
930 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
931 			break;
932 		}
933 
934 		mutex_exit(mcbi->mcbi_lockp);
935 
936 		/*
937 		 * Log link changes.
938 		 */
939 		if ((bits & (1 << MAC_NOTE_LINK)) != 0)
940 			i_mac_log_link_state(mip);
941 
942 		/*
943 		 * Do notification callbacks for each notification type.
944 		 */
945 		for (type = 0; type < MAC_NNOTE; type++) {
946 			if ((bits & (1 << type)) == 0) {
947 				continue;
948 			}
949 
950 			if (mac_notify_cb_list[type].mac_notify_cb_fn)
951 				mac_notify_cb_list[type].mac_notify_cb_fn(mip);
952 
953 			/*
954 			 * Walk the list of notifications.
955 			 */
956 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
957 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
958 			    mcb = mcb->mcb_nextp) {
959 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
960 				mncb->mncb_fn(mncb->mncb_arg, type);
961 			}
962 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
963 			    &mip->mi_notify_cb_list);
964 		}
965 
966 		mutex_enter(mcbi->mcbi_lockp);
967 	}
968 
969 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
970 	cv_broadcast(&mcbi->mcbi_cv);
971 
972 	/* CALLB_CPR_EXIT drops the lock */
973 	CALLB_CPR_EXIT(&cprinfo);
974 	thread_exit();
975 }
976 
977 /*
978  * Signal the i_mac_notify_thread asking it to quit.
979  * Then wait till it is done.
980  */
981 void
982 i_mac_notify_exit(mac_impl_t *mip)
983 {
984 	mac_cb_info_t	*mcbi;
985 
986 	mcbi = &mip->mi_notify_cb_info;
987 
988 	mutex_enter(mcbi->mcbi_lockp);
989 	mip->mi_notify_bits = (1 << MAC_NNOTE);
990 	cv_broadcast(&mcbi->mcbi_cv);
991 
992 
993 	while ((mip->mi_notify_thread != NULL) &&
994 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
995 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
996 	}
997 
998 	/* Necessary clean up before doing kmem_cache_free */
999 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1000 	mip->mi_notify_bits = 0;
1001 	mip->mi_notify_thread = NULL;
1002 	mutex_exit(mcbi->mcbi_lockp);
1003 }
1004 
1005 /*
1006  * Entry point invoked by drivers to dynamically add a ring to an
1007  * existing group.
1008  */
1009 int
1010 mac_group_add_ring(mac_group_handle_t gh, int index)
1011 {
1012 	mac_group_t *group = (mac_group_t *)gh;
1013 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1014 	int ret;
1015 
1016 	i_mac_perim_enter(mip);
1017 
1018 	/*
1019 	 * Only RX rings can be added or removed by drivers currently.
1020 	 */
1021 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1022 
1023 	ret = i_mac_group_add_ring(group, NULL, index);
1024 
1025 	i_mac_perim_exit(mip);
1026 
1027 	return (ret);
1028 }
1029 
1030 /*
1031  * Entry point invoked by drivers to dynamically remove a ring
1032  * from an existing group. The specified ring handle must no longer
1033  * be used by the driver after a call to this function.
1034  */
1035 void
1036 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1037 {
1038 	mac_group_t *group = (mac_group_t *)gh;
1039 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1040 
1041 	i_mac_perim_enter(mip);
1042 
1043 	/*
1044 	 * Only RX rings can be added or removed by drivers currently.
1045 	 */
1046 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1047 
1048 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1049 
1050 	i_mac_perim_exit(mip);
1051 }
1052