1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
26 * Copyright 2020 RackTop Systems, Inc.
27 * Copyright 2024 Oxide Computer Company
28 */
29
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/id_space.h>
33 #include <sys/esunddi.h>
34 #include <sys/stat.h>
35 #include <sys/mkdev.h>
36 #include <sys/stream.h>
37 #include <sys/strsubr.h>
38 #include <sys/dlpi.h>
39 #include <sys/modhash.h>
40 #include <sys/mac.h>
41 #include <sys/mac_provider.h>
42 #include <sys/mac_impl.h>
43 #include <sys/mac_client_impl.h>
44 #include <sys/mac_client_priv.h>
45 #include <sys/mac_soft_ring.h>
46 #include <sys/mac_stat.h>
47 #include <sys/dld.h>
48 #include <sys/modctl.h>
49 #include <sys/fs/dv_node.h>
50 #include <sys/thread.h>
51 #include <sys/proc.h>
52 #include <sys/callb.h>
53 #include <sys/cpuvar.h>
54 #include <sys/atomic.h>
55 #include <sys/sdt.h>
56 #include <sys/mac_flow.h>
57 #include <sys/ddi_intr_impl.h>
58 #include <sys/disp.h>
59 #include <sys/sdt.h>
60 #include <sys/pattr.h>
61 #include <sys/strsun.h>
62 #include <sys/vlan.h>
63 #include <inet/ip.h>
64 #include <inet/tcp.h>
65 #include <netinet/udp.h>
66 #include <netinet/sctp.h>
67
68 /*
69 * MAC Provider Interface.
70 *
71 * Interface for GLDv3 compatible NIC drivers.
72 */
73
74 static void i_mac_notify_thread(void *);
75
76 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
77
78 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
79 mac_fanout_recompute, /* MAC_NOTE_LINK */
80 NULL, /* MAC_NOTE_UNICST */
81 NULL, /* MAC_NOTE_TX */
82 NULL, /* MAC_NOTE_DEVPROMISC */
83 NULL, /* MAC_NOTE_FASTPATH_FLUSH */
84 NULL, /* MAC_NOTE_SDU_SIZE */
85 NULL, /* MAC_NOTE_MARGIN */
86 NULL, /* MAC_NOTE_CAPAB_CHG */
87 NULL /* MAC_NOTE_LOWLINK */
88 };
89
90 /*
91 * Driver support functions.
92 */
93
94 /* REGISTRATION */
95
96 mac_register_t *
mac_alloc(uint_t mac_version)97 mac_alloc(uint_t mac_version)
98 {
99 mac_register_t *mregp;
100
101 /*
102 * Make sure there isn't a version mismatch between the driver and
103 * the framework. In the future, if multiple versions are
104 * supported, this check could become more sophisticated.
105 */
106 if (mac_version != MAC_VERSION)
107 return (NULL);
108
109 mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
110 mregp->m_version = mac_version;
111 return (mregp);
112 }
113
114 void
mac_free(mac_register_t * mregp)115 mac_free(mac_register_t *mregp)
116 {
117 kmem_free(mregp, sizeof (mac_register_t));
118 }
119
120 /*
121 * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS
122 * value.
123 */
124 static uint16_t
mac_features_to_flags(mac_handle_t mh)125 mac_features_to_flags(mac_handle_t mh)
126 {
127 uint16_t flags = 0;
128 uint32_t cap_sum = 0;
129 mac_capab_lso_t cap_lso;
130
131 if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) {
132 if (cap_sum & HCKSUM_IPHDRCKSUM)
133 flags |= HCK_IPV4_HDRCKSUM;
134
135 if (cap_sum & HCKSUM_INET_PARTIAL)
136 flags |= HCK_PARTIALCKSUM;
137 else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6))
138 flags |= HCK_FULLCKSUM;
139 }
140
141 /*
142 * We don't need the information stored in 'cap_lso', but we
143 * need to pass a non-NULL pointer to appease the driver.
144 */
145 if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso))
146 flags |= HW_LSO;
147
148 return (flags);
149 }
150
151 /*
152 * mac_register() is how drivers register new MACs with the GLDv3
153 * framework. The mregp argument is allocated by drivers using the
154 * mac_alloc() function, and can be freed using mac_free() immediately upon
155 * return from mac_register(). Upon success (0 return value), the mhp
156 * opaque pointer becomes the driver's handle to its MAC interface, and is
157 * the argument to all other mac module entry points.
158 */
159 /* ARGSUSED */
160 int
mac_register(mac_register_t * mregp,mac_handle_t * mhp)161 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
162 {
163 mac_impl_t *mip;
164 mactype_t *mtype;
165 int err = EINVAL;
166 struct devnames *dnp = NULL;
167 uint_t instance;
168 boolean_t style1_created = B_FALSE;
169 boolean_t style2_created = B_FALSE;
170 char *driver;
171 minor_t minor = 0;
172
173 /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
174 if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
175 return (EINVAL);
176
177 /* Find the required MAC-Type plugin. */
178 if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
179 return (EINVAL);
180
181 /* Create a mac_impl_t to represent this MAC. */
182 mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
183
184 /*
185 * The mac is not ready for open yet.
186 */
187 mip->mi_state_flags |= MIS_DISABLED;
188
189 /*
190 * When a mac is registered, the m_instance field can be set to:
191 *
192 * 0: Get the mac's instance number from m_dip.
193 * This is usually used for physical device dips.
194 *
195 * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
196 * For example, when an aggregation is created with the key option,
197 * "key" will be used as the instance number.
198 *
199 * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
200 * This is often used when a MAC of a virtual link is registered
201 * (e.g., aggregation when "key" is not specified, or vnic).
202 *
203 * Note that the instance number is used to derive the mi_minor field
204 * of mac_impl_t, which will then be used to derive the name of kstats
205 * and the devfs nodes. The first 2 cases are needed to preserve
206 * backward compatibility.
207 */
208 switch (mregp->m_instance) {
209 case 0:
210 instance = ddi_get_instance(mregp->m_dip);
211 break;
212 case ((uint_t)-1):
213 minor = mac_minor_hold(B_TRUE);
214 if (minor == 0) {
215 err = ENOSPC;
216 goto fail;
217 }
218 instance = minor - 1;
219 break;
220 default:
221 instance = mregp->m_instance;
222 if (instance >= MAC_MAX_MINOR) {
223 err = EINVAL;
224 goto fail;
225 }
226 break;
227 }
228
229 mip->mi_minor = (minor_t)(instance + 1);
230 mip->mi_dip = mregp->m_dip;
231 mip->mi_clients_list = NULL;
232 mip->mi_nclients = 0;
233
234 /* Set the default IEEE Port VLAN Identifier */
235 mip->mi_pvid = 1;
236
237 /* Default bridge link learning protection values */
238 mip->mi_llimit = 1000;
239 mip->mi_ldecay = 200;
240
241 driver = (char *)ddi_driver_name(mip->mi_dip);
242
243 /* Construct the MAC name as <drvname><instance> */
244 (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
245 driver, instance);
246
247 mip->mi_driver = mregp->m_driver;
248
249 mip->mi_type = mtype;
250 mip->mi_margin = mregp->m_margin;
251 mip->mi_info.mi_media = mtype->mt_type;
252 mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
253 if (mregp->m_max_sdu <= mregp->m_min_sdu)
254 goto fail;
255 if (mregp->m_multicast_sdu == 0)
256 mregp->m_multicast_sdu = mregp->m_max_sdu;
257 if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
258 mregp->m_multicast_sdu > mregp->m_max_sdu)
259 goto fail;
260 mip->mi_sdu_min = mregp->m_min_sdu;
261 mip->mi_sdu_max = mregp->m_max_sdu;
262 mip->mi_sdu_multicast = mregp->m_multicast_sdu;
263 mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
264 /*
265 * If the media supports a broadcast address, cache a pointer to it
266 * in the mac_info_t so that upper layers can use it.
267 */
268 mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
269
270 mip->mi_v12n_level = mregp->m_v12n;
271
272 /*
273 * Copy the unicast source address into the mac_info_t, but only if
274 * the MAC-Type defines a non-zero address length. We need to
275 * handle MAC-Types that have an address length of 0
276 * (point-to-point protocol MACs for example).
277 */
278 if (mip->mi_type->mt_addr_length > 0) {
279 if (mregp->m_src_addr == NULL)
280 goto fail;
281 mip->mi_info.mi_unicst_addr =
282 kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
283 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
284 mip->mi_type->mt_addr_length);
285
286 /*
287 * Copy the fixed 'factory' MAC address from the immutable
288 * info. This is taken to be the MAC address currently in
289 * use.
290 */
291 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
292 mip->mi_type->mt_addr_length);
293
294 /*
295 * At this point, we should set up the classification
296 * rules etc but we delay it till mac_open() so that
297 * the resource discovery has taken place and we
298 * know someone wants to use the device. Otherwise
299 * memory gets allocated for Rx ring structures even
300 * during probe.
301 */
302
303 /* Copy the destination address if one is provided. */
304 if (mregp->m_dst_addr != NULL) {
305 bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
306 mip->mi_type->mt_addr_length);
307 mip->mi_dstaddr_set = B_TRUE;
308 }
309 } else if (mregp->m_src_addr != NULL) {
310 goto fail;
311 }
312
313 /*
314 * The format of the m_pdata is specific to the plugin. It is
315 * passed in as an argument to all of the plugin callbacks. The
316 * driver can update this information by calling
317 * mac_pdata_update().
318 */
319 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
320 /*
321 * Verify if the supplied plugin data is valid. Note that
322 * even if the caller passed in a NULL pointer as plugin data,
323 * we still need to verify if that's valid as the plugin may
324 * require plugin data to function.
325 */
326 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
327 mregp->m_pdata_size)) {
328 goto fail;
329 }
330 if (mregp->m_pdata != NULL) {
331 mip->mi_pdata =
332 kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
333 bcopy(mregp->m_pdata, mip->mi_pdata,
334 mregp->m_pdata_size);
335 mip->mi_pdata_size = mregp->m_pdata_size;
336 }
337 } else if (mregp->m_pdata != NULL) {
338 /*
339 * The caller supplied non-NULL plugin data, but the plugin
340 * does not recognize plugin data.
341 */
342 err = EINVAL;
343 goto fail;
344 }
345
346 /*
347 * Register the private properties.
348 */
349 mac_register_priv_prop(mip, mregp->m_priv_props);
350
351 /*
352 * Stash the driver callbacks into the mac_impl_t, but first sanity
353 * check to make sure all mandatory callbacks are set.
354 */
355 if (mregp->m_callbacks->mc_getstat == NULL ||
356 mregp->m_callbacks->mc_start == NULL ||
357 mregp->m_callbacks->mc_stop == NULL ||
358 mregp->m_callbacks->mc_setpromisc == NULL ||
359 mregp->m_callbacks->mc_multicst == NULL) {
360 goto fail;
361 }
362 mip->mi_callbacks = mregp->m_callbacks;
363
364 if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
365 &mip->mi_capab_legacy)) {
366 mip->mi_state_flags |= MIS_LEGACY;
367 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
368 } else {
369 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
370 mip->mi_minor);
371 }
372
373 /*
374 * Allocate a notification thread. thread_create blocks for memory
375 * if needed, it never fails.
376 */
377 mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
378 mip, 0, &p0, TS_RUN, minclsyspri);
379
380 /*
381 * Cache the DB_CKSUMFLAGS that this MAC supports.
382 */
383 mip->mi_tx_cksum_flags = mac_features_to_flags((mac_handle_t)mip);
384
385 /*
386 * Initialize the capabilities
387 */
388 bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
389 bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
390
391 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
392 mip->mi_state_flags |= MIS_IS_VNIC;
393
394 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
395 mip->mi_state_flags |= MIS_IS_AGGR;
396
397 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_OVERLAY, NULL))
398 mip->mi_state_flags |= MIS_IS_OVERLAY;
399
400 mac_addr_factory_init(mip);
401
402 mac_transceiver_init(mip);
403
404 mac_led_init(mip);
405
406 /*
407 * Enforce the virtrualization level registered.
408 */
409 if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
410 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
411 mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
412 goto fail;
413
414 /*
415 * The driver needs to register at least rx rings for this
416 * virtualization level.
417 */
418 if (mip->mi_rx_groups == NULL)
419 goto fail;
420 }
421
422 /*
423 * The driver must set mc_unicst entry point to NULL when it advertises
424 * CAP_RINGS for rx groups.
425 */
426 if (mip->mi_rx_groups != NULL) {
427 if (mregp->m_callbacks->mc_unicst != NULL)
428 goto fail;
429 } else {
430 if (mregp->m_callbacks->mc_unicst == NULL)
431 goto fail;
432 }
433
434 /*
435 * Initialize MAC addresses. Must be called after mac_init_rings().
436 */
437 mac_init_macaddr(mip);
438
439 mip->mi_share_capab.ms_snum = 0;
440 if (mip->mi_v12n_level & MAC_VIRT_HIO) {
441 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
442 &mip->mi_share_capab);
443 }
444
445 /*
446 * Initialize the kstats for this device.
447 */
448 mac_driver_stat_create(mip);
449
450 /* Zero out any properties. */
451 bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
452
453 if (mip->mi_minor <= MAC_MAX_MINOR) {
454 /* Create a style-2 DLPI device */
455 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
456 DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
457 goto fail;
458 style2_created = B_TRUE;
459
460 /* Create a style-1 DLPI device */
461 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
462 mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
463 goto fail;
464 style1_created = B_TRUE;
465 }
466
467 mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
468
469 rw_enter(&i_mac_impl_lock, RW_WRITER);
470 if (mod_hash_insert(i_mac_impl_hash,
471 (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
472 rw_exit(&i_mac_impl_lock);
473 err = EEXIST;
474 goto fail;
475 }
476
477 DTRACE_PROBE2(mac__register, struct devnames *, dnp,
478 (mac_impl_t *), mip);
479
480 /*
481 * Mark the MAC to be ready for open.
482 */
483 mip->mi_state_flags &= ~MIS_DISABLED;
484 rw_exit(&i_mac_impl_lock);
485
486 atomic_inc_32(&i_mac_impl_count);
487
488 cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
489 *mhp = (mac_handle_t)mip;
490 return (0);
491
492 fail:
493 if (style1_created)
494 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
495
496 if (style2_created)
497 ddi_remove_minor_node(mip->mi_dip, driver);
498
499 mac_addr_factory_fini(mip);
500
501 /* Clean up registered MAC addresses */
502 mac_fini_macaddr(mip);
503
504 /* Clean up registered rings */
505 mac_free_rings(mip, MAC_RING_TYPE_RX);
506 mac_free_rings(mip, MAC_RING_TYPE_TX);
507
508 /* Clean up notification thread */
509 if (mip->mi_notify_thread != NULL)
510 i_mac_notify_exit(mip);
511
512 if (mip->mi_info.mi_unicst_addr != NULL) {
513 kmem_free(mip->mi_info.mi_unicst_addr,
514 mip->mi_type->mt_addr_length);
515 mip->mi_info.mi_unicst_addr = NULL;
516 }
517
518 mac_driver_stat_delete(mip);
519
520 if (mip->mi_type != NULL) {
521 atomic_dec_32(&mip->mi_type->mt_ref);
522 mip->mi_type = NULL;
523 }
524
525 if (mip->mi_pdata != NULL) {
526 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
527 mip->mi_pdata = NULL;
528 mip->mi_pdata_size = 0;
529 }
530
531 if (minor != 0) {
532 ASSERT(minor > MAC_MAX_MINOR);
533 mac_minor_rele(minor);
534 }
535
536 mip->mi_state_flags = 0;
537 mac_unregister_priv_prop(mip);
538
539 /*
540 * Clear the state before destroying the mac_impl_t
541 */
542 mip->mi_state_flags = 0;
543
544 kmem_cache_free(i_mac_impl_cachep, mip);
545 return (err);
546 }
547
548 /*
549 * Unregister from the GLDv3 framework
550 */
551 int
mac_unregister(mac_handle_t mh)552 mac_unregister(mac_handle_t mh)
553 {
554 int err;
555 mac_impl_t *mip = (mac_impl_t *)mh;
556 mod_hash_val_t val;
557 mac_margin_req_t *mmr, *nextmmr;
558
559 /* Fail the unregister if there are any open references to this mac. */
560 if ((err = mac_disable_nowait(mh)) != 0)
561 return (err);
562
563 /*
564 * Clean up notification thread and wait for it to exit.
565 */
566 i_mac_notify_exit(mip);
567
568 /*
569 * Prior to acquiring the MAC perimeter, remove the MAC instance from
570 * the internal hash table. Such removal means table-walkers that
571 * acquire the perimeter will not do so on behalf of what we are
572 * unregistering, which prevents a deadlock.
573 */
574 rw_enter(&i_mac_impl_lock, RW_WRITER);
575 (void) mod_hash_remove(i_mac_impl_hash,
576 (mod_hash_key_t)mip->mi_name, &val);
577 rw_exit(&i_mac_impl_lock);
578 ASSERT(mip == (mac_impl_t *)val);
579
580 i_mac_perim_enter(mip);
581
582 /*
583 * There is still resource properties configured over this mac.
584 */
585 if (mip->mi_resource_props.mrp_mask != 0)
586 mac_fastpath_enable((mac_handle_t)mip);
587
588 if (mip->mi_minor < MAC_MAX_MINOR + 1) {
589 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
590 ddi_remove_minor_node(mip->mi_dip,
591 (char *)ddi_driver_name(mip->mi_dip));
592 }
593
594 ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
595 MIS_EXCLUSIVE));
596
597 mac_driver_stat_delete(mip);
598
599 ASSERT(i_mac_impl_count > 0);
600 atomic_dec_32(&i_mac_impl_count);
601
602 if (mip->mi_pdata != NULL)
603 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
604 mip->mi_pdata = NULL;
605 mip->mi_pdata_size = 0;
606
607 /*
608 * Free the list of margin request.
609 */
610 for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
611 nextmmr = mmr->mmr_nextp;
612 kmem_free(mmr, sizeof (mac_margin_req_t));
613 }
614 mip->mi_mmrp = NULL;
615
616 mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
617 kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
618 mip->mi_info.mi_unicst_addr = NULL;
619
620 atomic_dec_32(&mip->mi_type->mt_ref);
621 mip->mi_type = NULL;
622
623 /*
624 * Free the primary MAC address.
625 */
626 mac_fini_macaddr(mip);
627
628 /*
629 * free all rings
630 */
631 mac_free_rings(mip, MAC_RING_TYPE_RX);
632 mac_free_rings(mip, MAC_RING_TYPE_TX);
633
634 mac_addr_factory_fini(mip);
635
636 bzero(mip->mi_addr, MAXMACADDRLEN);
637 bzero(mip->mi_dstaddr, MAXMACADDRLEN);
638 mip->mi_dstaddr_set = B_FALSE;
639
640 /* and the flows */
641 mac_flow_tab_destroy(mip->mi_flow_tab);
642 mip->mi_flow_tab = NULL;
643
644 if (mip->mi_minor > MAC_MAX_MINOR)
645 mac_minor_rele(mip->mi_minor);
646
647 cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
648
649 /*
650 * Reset the perim related fields to default values before
651 * kmem_cache_free
652 */
653 i_mac_perim_exit(mip);
654 mip->mi_state_flags = 0;
655
656 mac_unregister_priv_prop(mip);
657
658 ASSERT(mip->mi_bridge_link == NULL);
659 kmem_cache_free(i_mac_impl_cachep, mip);
660
661 return (0);
662 }
663
664 /* DATA RECEPTION */
665
666 /*
667 * This function is invoked for packets received by the MAC driver in
668 * interrupt context. The ring generation number provided by the driver
669 * is matched with the ring generation number held in MAC. If they do not
670 * match, received packets are considered stale packets coming from an older
671 * assignment of the ring. Drop them.
672 */
673 void
mac_rx_ring(mac_handle_t mh,mac_ring_handle_t mrh,mblk_t * mp_chain,uint64_t mr_gen_num)674 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
675 uint64_t mr_gen_num)
676 {
677 mac_ring_t *mr = (mac_ring_t *)mrh;
678
679 if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
680 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
681 mr->mr_gen_num, uint64_t, mr_gen_num);
682 freemsgchain(mp_chain);
683 return;
684 }
685 mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
686 }
687
688 /*
689 * This function is invoked for each packet received by the underlying driver.
690 */
691 void
mac_rx(mac_handle_t mh,mac_resource_handle_t mrh,mblk_t * mp_chain)692 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
693 {
694 mac_impl_t *mip = (mac_impl_t *)mh;
695
696 /*
697 * Check if the link is part of a bridge. If not, then we don't need
698 * to take the lock to remain consistent. Make this common case
699 * lock-free and tail-call optimized.
700 */
701 if (mip->mi_bridge_link == NULL) {
702 mac_rx_common(mh, mrh, mp_chain);
703 } else {
704 /*
705 * Once we take a reference on the bridge link, the bridge
706 * module itself can't unload, so the callback pointers are
707 * stable.
708 */
709 mutex_enter(&mip->mi_bridge_lock);
710 if ((mh = mip->mi_bridge_link) != NULL)
711 mac_bridge_ref_cb(mh, B_TRUE);
712 mutex_exit(&mip->mi_bridge_lock);
713 if (mh == NULL) {
714 mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
715 } else {
716 mac_bridge_rx_cb(mh, mrh, mp_chain);
717 mac_bridge_ref_cb(mh, B_FALSE);
718 }
719 }
720 }
721
722 /*
723 * Special case function: this allows snooping of packets transmitted and
724 * received by TRILL. By design, they go directly into the TRILL module.
725 */
726 void
mac_trill_snoop(mac_handle_t mh,mblk_t * mp)727 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
728 {
729 mac_impl_t *mip = (mac_impl_t *)mh;
730
731 if (mip->mi_promisc_list != NULL)
732 mac_promisc_dispatch(mip, mp, NULL, B_FALSE);
733 }
734
735 /*
736 * This is the upward reentry point for packets arriving from the bridging
737 * module and from mac_rx for links not part of a bridge.
738 */
739 void
mac_rx_common(mac_handle_t mh,mac_resource_handle_t mrh,mblk_t * mp_chain)740 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
741 {
742 mac_impl_t *mip = (mac_impl_t *)mh;
743 mac_ring_t *mr = (mac_ring_t *)mrh;
744 mac_soft_ring_set_t *mac_srs;
745 mblk_t *bp = mp_chain;
746
747 /*
748 * If there are any promiscuous mode callbacks defined for
749 * this MAC, pass them a copy if appropriate.
750 */
751 if (mip->mi_promisc_list != NULL)
752 mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE);
753
754 if (mr != NULL) {
755 /*
756 * If the SRS teardown has started, just return. The 'mr'
757 * continues to be valid until the driver unregisters the MAC.
758 * Hardware classified packets will not make their way up
759 * beyond this point once the teardown has started. The driver
760 * is never passed a pointer to a flow entry or SRS or any
761 * structure that can be freed much before mac_unregister.
762 */
763 mutex_enter(&mr->mr_lock);
764 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
765 (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
766 mutex_exit(&mr->mr_lock);
767 freemsgchain(mp_chain);
768 return;
769 }
770
771 /*
772 * The ring is in passthru mode; pass the chain up to
773 * the pseudo ring.
774 */
775 if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) {
776 MR_REFHOLD_LOCKED(mr);
777 mutex_exit(&mr->mr_lock);
778 mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain,
779 B_FALSE);
780 MR_REFRELE(mr);
781 return;
782 }
783
784 /*
785 * The passthru callback should only be set when in
786 * MAC_PASSTHRU_CLASSIFIER mode.
787 */
788 ASSERT3P(mr->mr_pt_fn, ==, NULL);
789
790 /*
791 * We check if an SRS is controlling this ring.
792 * If so, we can directly call the srs_lower_proc
793 * routine otherwise we need to go through mac_rx_classify
794 * to reach the right place.
795 */
796 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
797 MR_REFHOLD_LOCKED(mr);
798 mutex_exit(&mr->mr_lock);
799 ASSERT3P(mr->mr_srs, !=, NULL);
800 mac_srs = mr->mr_srs;
801
802 /*
803 * This is the fast path. All packets received
804 * on this ring are hardware classified and
805 * share the same MAC header info.
806 */
807 mac_srs->srs_rx.sr_lower_proc(mh,
808 (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
809 MR_REFRELE(mr);
810 return;
811 }
812
813 mutex_exit(&mr->mr_lock);
814 /* We'll fall through to software classification */
815 } else {
816 flow_entry_t *flent;
817 int err;
818
819 rw_enter(&mip->mi_rw_lock, RW_READER);
820 if (mip->mi_single_active_client != NULL) {
821 flent = mip->mi_single_active_client->mci_flent_list;
822 FLOW_TRY_REFHOLD(flent, err);
823 rw_exit(&mip->mi_rw_lock);
824 if (err == 0) {
825 (flent->fe_cb_fn)(flent->fe_cb_arg1,
826 flent->fe_cb_arg2, mp_chain, B_FALSE);
827 FLOW_REFRELE(flent);
828 return;
829 }
830 } else {
831 rw_exit(&mip->mi_rw_lock);
832 }
833 }
834
835 if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
836 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
837 return;
838 }
839
840 freemsgchain(bp);
841 }
842
843 /* DATA TRANSMISSION */
844
845 /*
846 * A driver's notification to resume transmission, in case of a provider
847 * without TX rings.
848 */
849 void
mac_tx_update(mac_handle_t mh)850 mac_tx_update(mac_handle_t mh)
851 {
852 mac_tx_ring_update(mh, NULL);
853 }
854
855 /*
856 * A driver's notification to resume transmission on the specified TX ring.
857 */
858 void
mac_tx_ring_update(mac_handle_t mh,mac_ring_handle_t rh)859 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
860 {
861 i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
862 }
863
864 /* LINK STATE */
865 /*
866 * Notify the MAC layer about a link state change
867 */
868 void
mac_link_update(mac_handle_t mh,link_state_t link)869 mac_link_update(mac_handle_t mh, link_state_t link)
870 {
871 mac_impl_t *mip = (mac_impl_t *)mh;
872
873 /*
874 * Save the link state.
875 */
876 mip->mi_lowlinkstate = link;
877
878 /*
879 * Send a MAC_NOTE_LOWLINK notification. This tells the notification
880 * thread to deliver both lower and upper notifications.
881 */
882 i_mac_notify(mip, MAC_NOTE_LOWLINK);
883 }
884
885 /*
886 * Notify the MAC layer about a link state change due to bridging.
887 */
888 void
mac_link_redo(mac_handle_t mh,link_state_t link)889 mac_link_redo(mac_handle_t mh, link_state_t link)
890 {
891 mac_impl_t *mip = (mac_impl_t *)mh;
892
893 /*
894 * Save the link state.
895 */
896 mip->mi_linkstate = link;
897
898 /*
899 * Send a MAC_NOTE_LINK notification. Only upper notifications are
900 * made.
901 */
902 i_mac_notify(mip, MAC_NOTE_LINK);
903 }
904
905 /* MINOR NODE HANDLING */
906
907 /*
908 * Given a dev_t, return the instance number (PPA) associated with it.
909 * Drivers can use this in their getinfo(9e) implementation to lookup
910 * the instance number (i.e. PPA) of the device, to use as an index to
911 * their own array of soft state structures.
912 *
913 * Returns -1 on error.
914 */
915 int
mac_devt_to_instance(dev_t devt)916 mac_devt_to_instance(dev_t devt)
917 {
918 return (dld_devt_to_instance(devt));
919 }
920
921 /*
922 * Drivers that make use of the private minor number space are expected to
923 * provide their own getinfo(9e) entry point. This function simply forwards
924 * to the default MAC framework getinfo(9e) implementation as a convenience
925 * if they don't need any special mapping (mac instance != ddi_get_instance())
926 */
927 int
mac_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** resp)928 mac_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
929 {
930 return (dld_getinfo(dip, cmd, arg, resp));
931 }
932
933 /*
934 * This function returns the first minor number that is available for
935 * driver private use. All minor numbers smaller than this are
936 * reserved for GLDv3 use.
937 */
938 minor_t
mac_private_minor(void)939 mac_private_minor(void)
940 {
941 return (MAC_PRIVATE_MINOR);
942 }
943
944 /* OTHER CONTROL INFORMATION */
945
946 /*
947 * A driver notified us that its primary MAC address has changed.
948 */
949 void
mac_unicst_update(mac_handle_t mh,const uint8_t * addr)950 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
951 {
952 mac_impl_t *mip = (mac_impl_t *)mh;
953
954 if (mip->mi_type->mt_addr_length == 0)
955 return;
956
957 i_mac_perim_enter(mip);
958
959 /*
960 * If address changes, freshen the MAC address value and update
961 * all MAC clients that share this MAC address.
962 */
963 if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
964 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
965 (uint8_t *)addr);
966 }
967
968 i_mac_perim_exit(mip);
969
970 /*
971 * Send a MAC_NOTE_UNICST notification.
972 */
973 i_mac_notify(mip, MAC_NOTE_UNICST);
974 }
975
976 void
mac_dst_update(mac_handle_t mh,const uint8_t * addr)977 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
978 {
979 mac_impl_t *mip = (mac_impl_t *)mh;
980
981 if (mip->mi_type->mt_addr_length == 0)
982 return;
983
984 i_mac_perim_enter(mip);
985 bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
986 i_mac_perim_exit(mip);
987 i_mac_notify(mip, MAC_NOTE_DEST);
988 }
989
990 /*
991 * MAC plugin information changed.
992 */
993 int
mac_pdata_update(mac_handle_t mh,void * mac_pdata,size_t dsize)994 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
995 {
996 mac_impl_t *mip = (mac_impl_t *)mh;
997
998 /*
999 * Verify that the plugin supports MAC plugin data and that the
1000 * supplied data is valid.
1001 */
1002 if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1003 return (EINVAL);
1004 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
1005 return (EINVAL);
1006
1007 if (mip->mi_pdata != NULL)
1008 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1009
1010 mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
1011 bcopy(mac_pdata, mip->mi_pdata, dsize);
1012 mip->mi_pdata_size = dsize;
1013
1014 /*
1015 * Since the MAC plugin data is used to construct MAC headers that
1016 * were cached in fast-path headers, we need to flush fast-path
1017 * information for links associated with this mac.
1018 */
1019 i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
1020 return (0);
1021 }
1022
1023 /*
1024 * The mac provider or mac frameowrk calls this function when it wants
1025 * to notify upstream consumers that the capabilities have changed and
1026 * that they should modify their own internal state accordingly.
1027 *
1028 * We currently have no regard for the fact that a provider could
1029 * decide to drop capabilities which would invalidate pending traffic.
1030 * For example, if one was to disable the Tx checksum offload while
1031 * TCP/IP traffic was being sent by mac clients relying on that
1032 * feature, then those packets would hit the write with missing or
1033 * partial checksums. A proper solution involves not only providing
1034 * notfication, but also performing client quiescing. That is, a capab
1035 * change should be treated as an atomic transaction that forms a
1036 * barrier between traffic relying on the current capabs and traffic
1037 * relying on the new capabs. In practice, simnet is currently the
1038 * only provider that could hit this, and it's an easily avoidable
1039 * situation (and at worst it should only lead to some dropped
1040 * packets). But if we ever want better on-the-fly capab change to
1041 * actual hardware providers, then we should give this update
1042 * mechanism a proper implementation.
1043 */
1044 void
mac_capab_update(mac_handle_t mh)1045 mac_capab_update(mac_handle_t mh)
1046 {
1047 /*
1048 * Send a MAC_NOTE_CAPAB_CHG notification to alert upstream
1049 * clients to renegotiate capabilities.
1050 */
1051 i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
1052 }
1053
1054 /*
1055 * Used by normal drivers to update the max sdu size.
1056 * We need to handle the case of a smaller mi_sdu_multicast
1057 * since this is called by mac_set_mtu() even for drivers that
1058 * have differing unicast and multicast mtu and we don't want to
1059 * increase the multicast mtu by accident in that case.
1060 */
1061 int
mac_maxsdu_update(mac_handle_t mh,uint_t sdu_max)1062 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
1063 {
1064 mac_impl_t *mip = (mac_impl_t *)mh;
1065
1066 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1067 return (EINVAL);
1068 mip->mi_sdu_max = sdu_max;
1069 if (mip->mi_sdu_multicast > mip->mi_sdu_max)
1070 mip->mi_sdu_multicast = mip->mi_sdu_max;
1071
1072 /* Send a MAC_NOTE_SDU_SIZE notification. */
1073 i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1074 return (0);
1075 }
1076
1077 /*
1078 * Version of the above function that is used by drivers that have a different
1079 * max sdu size for multicast/broadcast vs. unicast.
1080 */
1081 int
mac_maxsdu_update2(mac_handle_t mh,uint_t sdu_max,uint_t sdu_multicast)1082 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
1083 {
1084 mac_impl_t *mip = (mac_impl_t *)mh;
1085
1086 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1087 return (EINVAL);
1088 if (sdu_multicast == 0)
1089 sdu_multicast = sdu_max;
1090 if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
1091 return (EINVAL);
1092 mip->mi_sdu_max = sdu_max;
1093 mip->mi_sdu_multicast = sdu_multicast;
1094
1095 /* Send a MAC_NOTE_SDU_SIZE notification. */
1096 i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1097 return (0);
1098 }
1099
1100 static void
mac_ring_intr_retarget(mac_group_t * group,mac_ring_t * ring)1101 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
1102 {
1103 mac_client_impl_t *mcip;
1104 flow_entry_t *flent;
1105 mac_soft_ring_set_t *mac_rx_srs;
1106 mac_cpus_t *srs_cpu;
1107 int i;
1108
1109 if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1110 (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1111 /* interrupt can be re-targeted */
1112 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1113 flent = mcip->mci_flent;
1114 if (ring->mr_type == MAC_RING_TYPE_RX) {
1115 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1116 mac_rx_srs = flent->fe_rx_srs[i];
1117 if (mac_rx_srs->srs_ring != ring)
1118 continue;
1119 srs_cpu = &mac_rx_srs->srs_cpu;
1120 mutex_enter(&cpu_lock);
1121 mac_rx_srs_retarget_intr(mac_rx_srs,
1122 srs_cpu->mc_rx_intr_cpu);
1123 mutex_exit(&cpu_lock);
1124 break;
1125 }
1126 } else {
1127 if (flent->fe_tx_srs != NULL) {
1128 mutex_enter(&cpu_lock);
1129 mac_tx_srs_retarget_intr(
1130 flent->fe_tx_srs);
1131 mutex_exit(&cpu_lock);
1132 }
1133 }
1134 }
1135 }
1136
1137 /*
1138 * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1139 * their clients. There is a 1-1 mapping pseudo ring and the hardware
1140 * ring. ddi interrupt handles are exported from the hardware ring to
1141 * the pseudo ring. Thus when the interrupt handle changes, clients of
1142 * aggr that are using the handle need to use the new handle and
1143 * re-target their interrupts.
1144 */
1145 static void
mac_pseudo_ring_intr_retarget(mac_impl_t * mip,mac_ring_t * ring,ddi_intr_handle_t ddh)1146 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1147 ddi_intr_handle_t ddh)
1148 {
1149 mac_ring_t *pring;
1150 mac_group_t *pgroup;
1151 mac_impl_t *pmip;
1152 char macname[MAXNAMELEN];
1153 mac_perim_handle_t p_mph;
1154 uint64_t saved_gen_num;
1155
1156 again:
1157 pring = (mac_ring_t *)ring->mr_prh;
1158 pgroup = (mac_group_t *)pring->mr_gh;
1159 pmip = (mac_impl_t *)pgroup->mrg_mh;
1160 saved_gen_num = ring->mr_gen_num;
1161 (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1162 /*
1163 * We need to enter aggr's perimeter. The locking hierarchy
1164 * dictates that aggr's perimeter should be entered first
1165 * and then the port's perimeter. So drop the port's
1166 * perimeter, enter aggr's and then re-enter port's
1167 * perimeter.
1168 */
1169 i_mac_perim_exit(mip);
1170 /*
1171 * While we know pmip is the aggr's mip, there is a
1172 * possibility that aggr could have unregistered by
1173 * the time we exit port's perimeter (mip) and
1174 * enter aggr's perimeter (pmip). To avoid that
1175 * scenario, enter aggr's perimeter using its name.
1176 */
1177 if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1178 return;
1179 i_mac_perim_enter(mip);
1180 /*
1181 * Check if the ring got assigned to another aggregation before
1182 * be could enter aggr's and the port's perimeter. When a ring
1183 * gets deleted from an aggregation, it calls mac_stop_ring()
1184 * which increments the generation number. So checking
1185 * generation number will be enough.
1186 */
1187 if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1188 i_mac_perim_exit(mip);
1189 mac_perim_exit(p_mph);
1190 i_mac_perim_enter(mip);
1191 goto again;
1192 }
1193
1194 /* Check if pseudo ring is still present */
1195 if (ring->mr_prh != NULL) {
1196 pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1197 pring->mr_info.mri_intr.mi_ddi_shared =
1198 ring->mr_info.mri_intr.mi_ddi_shared;
1199 if (ddh != NULL)
1200 mac_ring_intr_retarget(pgroup, pring);
1201 }
1202 i_mac_perim_exit(mip);
1203 mac_perim_exit(p_mph);
1204 }
1205 /*
1206 * API called by driver to provide new interrupt handle for TX/RX rings.
1207 * This usually happens when IRM (Interrupt Resource Manangement)
1208 * framework either gives the driver more MSI-x interrupts or takes
1209 * away MSI-x interrupts from the driver.
1210 */
1211 void
mac_ring_intr_set(mac_ring_handle_t mrh,ddi_intr_handle_t ddh)1212 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1213 {
1214 mac_ring_t *ring = (mac_ring_t *)mrh;
1215 mac_group_t *group = (mac_group_t *)ring->mr_gh;
1216 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1217
1218 i_mac_perim_enter(mip);
1219 ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1220 if (ddh == NULL) {
1221 /* Interrupts being reset */
1222 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1223 if (ring->mr_prh != NULL) {
1224 mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1225 return;
1226 }
1227 } else {
1228 /* New interrupt handle */
1229 mac_compare_ddi_handle(mip->mi_rx_groups,
1230 mip->mi_rx_group_count, ring);
1231 if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1232 mac_compare_ddi_handle(mip->mi_tx_groups,
1233 mip->mi_tx_group_count, ring);
1234 }
1235 if (ring->mr_prh != NULL) {
1236 mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1237 return;
1238 } else {
1239 mac_ring_intr_retarget(group, ring);
1240 }
1241 }
1242 i_mac_perim_exit(mip);
1243 }
1244
1245 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1246
1247 /*
1248 * Updates the mac_impl structure with the current state of the link
1249 */
1250 static void
i_mac_log_link_state(mac_impl_t * mip)1251 i_mac_log_link_state(mac_impl_t *mip)
1252 {
1253 /*
1254 * If no change, then it is not interesting.
1255 */
1256 if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1257 return;
1258
1259 switch (mip->mi_lowlinkstate) {
1260 case LINK_STATE_UP:
1261 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1262 char det[200];
1263
1264 mip->mi_type->mt_ops.mtops_link_details(det,
1265 sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1266
1267 cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1268 } else {
1269 cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1270 }
1271 break;
1272
1273 case LINK_STATE_DOWN:
1274 /*
1275 * Only transitions from UP to DOWN are interesting
1276 */
1277 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1278 cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1279 break;
1280
1281 case LINK_STATE_UNKNOWN:
1282 /*
1283 * This case is normally not interesting.
1284 */
1285 break;
1286 }
1287 mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1288 }
1289
1290 /*
1291 * Main routine for the callbacks notifications thread
1292 */
1293 static void
i_mac_notify_thread(void * arg)1294 i_mac_notify_thread(void *arg)
1295 {
1296 mac_impl_t *mip = arg;
1297 callb_cpr_t cprinfo;
1298 mac_cb_t *mcb;
1299 mac_cb_info_t *mcbi;
1300 mac_notify_cb_t *mncb;
1301
1302 mcbi = &mip->mi_notify_cb_info;
1303 CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1304 "i_mac_notify_thread");
1305
1306 mutex_enter(mcbi->mcbi_lockp);
1307
1308 for (;;) {
1309 uint32_t bits;
1310 uint32_t type;
1311
1312 bits = mip->mi_notify_bits;
1313 if (bits == 0) {
1314 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1315 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1316 CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1317 continue;
1318 }
1319 mip->mi_notify_bits = 0;
1320 if ((bits & (1 << MAC_NNOTE)) != 0) {
1321 /* request to quit */
1322 ASSERT(mip->mi_state_flags & MIS_DISABLED);
1323 break;
1324 }
1325
1326 mutex_exit(mcbi->mcbi_lockp);
1327
1328 /*
1329 * Log link changes on the actual link, but then do reports on
1330 * synthetic state (if part of a bridge).
1331 */
1332 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1333 link_state_t newstate;
1334 mac_handle_t mh;
1335
1336 i_mac_log_link_state(mip);
1337 newstate = mip->mi_lowlinkstate;
1338 if (mip->mi_bridge_link != NULL) {
1339 mutex_enter(&mip->mi_bridge_lock);
1340 if ((mh = mip->mi_bridge_link) != NULL) {
1341 newstate = mac_bridge_ls_cb(mh,
1342 newstate);
1343 }
1344 mutex_exit(&mip->mi_bridge_lock);
1345 }
1346 if (newstate != mip->mi_linkstate) {
1347 mip->mi_linkstate = newstate;
1348 bits |= 1 << MAC_NOTE_LINK;
1349 }
1350 }
1351
1352 /*
1353 * Depending on which capabs have changed, the Tx
1354 * checksum flags may also need to be updated.
1355 */
1356 if ((bits & (1 << MAC_NOTE_CAPAB_CHG)) != 0) {
1357 mac_perim_handle_t mph;
1358 mac_handle_t mh = (mac_handle_t)mip;
1359
1360 mac_perim_enter_by_mh(mh, &mph);
1361 mip->mi_tx_cksum_flags = mac_features_to_flags(mh);
1362 mac_perim_exit(mph);
1363 }
1364
1365 /*
1366 * Do notification callbacks for each notification type.
1367 */
1368 for (type = 0; type < MAC_NNOTE; type++) {
1369 if ((bits & (1 << type)) == 0) {
1370 continue;
1371 }
1372
1373 if (mac_notify_cb_list[type] != NULL)
1374 (*mac_notify_cb_list[type])(mip);
1375
1376 /*
1377 * Walk the list of notifications.
1378 */
1379 MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1380 for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1381 mcb = mcb->mcb_nextp) {
1382 mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1383 mncb->mncb_fn(mncb->mncb_arg, type);
1384 }
1385 MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1386 &mip->mi_notify_cb_list);
1387 }
1388
1389 mutex_enter(mcbi->mcbi_lockp);
1390 }
1391
1392 mip->mi_state_flags |= MIS_NOTIFY_DONE;
1393 cv_broadcast(&mcbi->mcbi_cv);
1394
1395 /* CALLB_CPR_EXIT drops the lock */
1396 CALLB_CPR_EXIT(&cprinfo);
1397 thread_exit();
1398 }
1399
1400 /*
1401 * Signal the i_mac_notify_thread asking it to quit.
1402 * Then wait till it is done.
1403 */
1404 void
i_mac_notify_exit(mac_impl_t * mip)1405 i_mac_notify_exit(mac_impl_t *mip)
1406 {
1407 mac_cb_info_t *mcbi;
1408
1409 mcbi = &mip->mi_notify_cb_info;
1410
1411 mutex_enter(mcbi->mcbi_lockp);
1412 mip->mi_notify_bits = (1 << MAC_NNOTE);
1413 cv_broadcast(&mcbi->mcbi_cv);
1414
1415
1416 while ((mip->mi_notify_thread != NULL) &&
1417 !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1418 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1419 }
1420
1421 /* Necessary clean up before doing kmem_cache_free */
1422 mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1423 mip->mi_notify_bits = 0;
1424 mip->mi_notify_thread = NULL;
1425 mutex_exit(mcbi->mcbi_lockp);
1426 }
1427
1428 /*
1429 * Entry point invoked by drivers to dynamically add a ring to an
1430 * existing group.
1431 */
1432 int
mac_group_add_ring(mac_group_handle_t gh,int index)1433 mac_group_add_ring(mac_group_handle_t gh, int index)
1434 {
1435 mac_group_t *group = (mac_group_t *)gh;
1436 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1437 int ret;
1438
1439 i_mac_perim_enter(mip);
1440 ret = i_mac_group_add_ring(group, NULL, index);
1441 i_mac_perim_exit(mip);
1442 return (ret);
1443 }
1444
1445 /*
1446 * Entry point invoked by drivers to dynamically remove a ring
1447 * from an existing group. The specified ring handle must no longer
1448 * be used by the driver after a call to this function.
1449 */
1450 void
mac_group_rem_ring(mac_group_handle_t gh,mac_ring_handle_t rh)1451 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1452 {
1453 mac_group_t *group = (mac_group_t *)gh;
1454 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1455
1456 i_mac_perim_enter(mip);
1457 i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1458 i_mac_perim_exit(mip);
1459 }
1460
1461 /*
1462 * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1463 * entry points.
1464 */
1465
1466 void
mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph,uint8_t val)1467 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1468 {
1469 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1470
1471 /* nothing to do if the caller doesn't want the default value */
1472 if (pr->pr_default == NULL)
1473 return;
1474
1475 ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1476
1477 *(uint8_t *)(pr->pr_default) = val;
1478 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1479 }
1480
1481 void
mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph,uint64_t val)1482 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1483 {
1484 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1485
1486 /* nothing to do if the caller doesn't want the default value */
1487 if (pr->pr_default == NULL)
1488 return;
1489
1490 ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1491
1492 bcopy(&val, pr->pr_default, sizeof (val));
1493
1494 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1495 }
1496
1497 void
mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph,uint32_t val)1498 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1499 {
1500 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1501
1502 /* nothing to do if the caller doesn't want the default value */
1503 if (pr->pr_default == NULL)
1504 return;
1505
1506 ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1507
1508 bcopy(&val, pr->pr_default, sizeof (val));
1509
1510 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1511 }
1512
1513 void
mac_prop_info_set_default_str(mac_prop_info_handle_t ph,const char * str)1514 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1515 {
1516 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1517
1518 /* nothing to do if the caller doesn't want the default value */
1519 if (pr->pr_default == NULL)
1520 return;
1521
1522 if (strlen(str) >= pr->pr_default_size)
1523 pr->pr_errno = ENOBUFS;
1524 else
1525 (void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1526 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1527 }
1528
1529 void
mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,link_flowctrl_t val)1530 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1531 link_flowctrl_t val)
1532 {
1533 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1534
1535 /* nothing to do if the caller doesn't want the default value */
1536 if (pr->pr_default == NULL)
1537 return;
1538
1539 ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1540
1541 bcopy(&val, pr->pr_default, sizeof (val));
1542
1543 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1544 }
1545
1546 void
mac_prop_info_set_default_fec(mac_prop_info_handle_t ph,link_fec_t val)1547 mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val)
1548 {
1549 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1550
1551 /* nothing to do if the caller doesn't want the default value */
1552 if (pr->pr_default == NULL)
1553 return;
1554
1555 ASSERT(pr->pr_default_size >= sizeof (link_fec_t));
1556
1557 bcopy(&val, pr->pr_default, sizeof (val));
1558
1559 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1560 }
1561
1562 void
mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph,uint32_t min,uint32_t max)1563 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1564 uint32_t max)
1565 {
1566 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1567 mac_propval_range_t *range = pr->pr_range;
1568 mac_propval_uint32_range_t *range32;
1569
1570 /* nothing to do if the caller doesn't want the range info */
1571 if (range == NULL)
1572 return;
1573
1574 if (pr->pr_range_cur_count++ == 0) {
1575 /* first range */
1576 pr->pr_flags |= MAC_PROP_INFO_RANGE;
1577 range->mpr_type = MAC_PROPVAL_UINT32;
1578 } else {
1579 /* all ranges of a property should be of the same type */
1580 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1581 if (pr->pr_range_cur_count > range->mpr_count) {
1582 pr->pr_errno = ENOSPC;
1583 return;
1584 }
1585 }
1586
1587 range32 = range->mpr_range_uint32;
1588 range32[pr->pr_range_cur_count - 1].mpur_min = min;
1589 range32[pr->pr_range_cur_count - 1].mpur_max = max;
1590 }
1591
1592 void
mac_prop_info_set_perm(mac_prop_info_handle_t ph,uint8_t perm)1593 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1594 {
1595 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1596
1597 pr->pr_perm = perm;
1598 pr->pr_flags |= MAC_PROP_INFO_PERM;
1599 }
1600
1601 void
mac_hcksum_get(const mblk_t * mp,uint32_t * start,uint32_t * stuff,uint32_t * end,uint32_t * value,uint32_t * flags_ptr)1602 mac_hcksum_get(const mblk_t *mp, uint32_t *start, uint32_t *stuff,
1603 uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1604 {
1605 uint32_t flags;
1606
1607 ASSERT(DB_TYPE(mp) == M_DATA);
1608
1609 flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1610 if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1611 if (value != NULL)
1612 *value = (uint32_t)DB_CKSUM16(mp);
1613 if ((flags & HCK_PARTIALCKSUM) != 0) {
1614 if (start != NULL)
1615 *start = (uint32_t)DB_CKSUMSTART(mp);
1616 if (stuff != NULL)
1617 *stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1618 if (end != NULL)
1619 *end = (uint32_t)DB_CKSUMEND(mp);
1620 }
1621 }
1622
1623 if (flags_ptr != NULL)
1624 *flags_ptr = flags;
1625 }
1626
1627 void
mac_hcksum_set(mblk_t * mp,uint32_t start,uint32_t stuff,uint32_t end,uint32_t value,uint32_t flags)1628 mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, uint32_t end,
1629 uint32_t value, uint32_t flags)
1630 {
1631 ASSERT(DB_TYPE(mp) == M_DATA);
1632
1633 DB_CKSUMSTART(mp) = (intptr_t)start;
1634 DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1635 DB_CKSUMEND(mp) = (intptr_t)end;
1636 DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1637 DB_CKSUM16(mp) = (uint16_t)value;
1638 }
1639
1640 void
mac_hcksum_clone(const mblk_t * src,mblk_t * dst)1641 mac_hcksum_clone(const mblk_t *src, mblk_t *dst)
1642 {
1643 ASSERT3U(DB_TYPE(src), ==, M_DATA);
1644 ASSERT3U(DB_TYPE(dst), ==, M_DATA);
1645
1646 /*
1647 * Do these assignments unconditionally, rather than only when
1648 * flags is non-zero. This protects a situation where zeroed
1649 * hcksum data does not make the jump onto an mblk_t with
1650 * stale data in those fields. It's important to copy all
1651 * possible flags (HCK_* as well as HW_*) and not just the
1652 * checksum specific flags. Dropping flags during a clone
1653 * could result in dropped packets. If the caller has good
1654 * reason to drop those flags then it should do it manually,
1655 * after the clone.
1656 */
1657 DB_CKSUMFLAGS(dst) = DB_CKSUMFLAGS(src);
1658 DB_CKSUMSTART(dst) = DB_CKSUMSTART(src);
1659 DB_CKSUMSTUFF(dst) = DB_CKSUMSTUFF(src);
1660 DB_CKSUMEND(dst) = DB_CKSUMEND(src);
1661 DB_CKSUM16(dst) = DB_CKSUM16(src);
1662 DB_LSOMSS(dst) = DB_LSOMSS(src);
1663 }
1664
1665 void
mac_lso_get(mblk_t * mp,uint32_t * mss,uint32_t * flags)1666 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1667 {
1668 ASSERT(DB_TYPE(mp) == M_DATA);
1669
1670 if (flags != NULL) {
1671 *flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1672 if ((*flags != 0) && (mss != NULL))
1673 *mss = (uint32_t)DB_LSOMSS(mp);
1674 }
1675 }
1676
1677 void
mac_transceiver_info_set_present(mac_transceiver_info_t * infop,boolean_t present)1678 mac_transceiver_info_set_present(mac_transceiver_info_t *infop,
1679 boolean_t present)
1680 {
1681 infop->mti_present = present;
1682 }
1683
1684 void
mac_transceiver_info_set_usable(mac_transceiver_info_t * infop,boolean_t usable)1685 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop,
1686 boolean_t usable)
1687 {
1688 infop->mti_usable = usable;
1689 }
1690
1691 /*
1692 * We should really keep track of our offset and not walk everything every
1693 * time. I can't imagine that this will be kind to us at high packet rates;
1694 * however, for the moment, let's leave that.
1695 *
1696 * This walks a message block chain without pulling up to fill in the context
1697 * information. Note that the data we care about could be hidden across more
1698 * than one mblk_t.
1699 */
1700 static int
mac_meoi_get_uint8(mblk_t * mp,off_t off,uint8_t * out)1701 mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out)
1702 {
1703 size_t mpsize;
1704 uint8_t *bp;
1705
1706 mpsize = msgsize(mp);
1707 /* Check for overflow */
1708 if (off + sizeof (uint16_t) > mpsize)
1709 return (-1);
1710
1711 mpsize = MBLKL(mp);
1712 while (off >= mpsize) {
1713 mp = mp->b_cont;
1714 off -= mpsize;
1715 mpsize = MBLKL(mp);
1716 }
1717
1718 bp = mp->b_rptr + off;
1719 *out = *bp;
1720 return (0);
1721
1722 }
1723
1724 static int
mac_meoi_get_uint16(mblk_t * mp,off_t off,uint16_t * out)1725 mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out)
1726 {
1727 size_t mpsize;
1728 uint8_t *bp;
1729
1730 mpsize = msgsize(mp);
1731 /* Check for overflow */
1732 if (off + sizeof (uint16_t) > mpsize)
1733 return (-1);
1734
1735 mpsize = MBLKL(mp);
1736 while (off >= mpsize) {
1737 mp = mp->b_cont;
1738 off -= mpsize;
1739 mpsize = MBLKL(mp);
1740 }
1741
1742 /*
1743 * Data is in network order. Note the second byte of data might be in
1744 * the next mp.
1745 */
1746 bp = mp->b_rptr + off;
1747 *out = *bp << 8;
1748 if (off + 1 == mpsize) {
1749 mp = mp->b_cont;
1750 bp = mp->b_rptr;
1751 } else {
1752 bp++;
1753 }
1754
1755 *out |= *bp;
1756 return (0);
1757
1758 }
1759
1760 static boolean_t
mac_meoi_ip6eh_proto(uint8_t id)1761 mac_meoi_ip6eh_proto(uint8_t id)
1762 {
1763 switch (id) {
1764 case IPPROTO_HOPOPTS:
1765 case IPPROTO_ROUTING:
1766 case IPPROTO_FRAGMENT:
1767 case IPPROTO_AH:
1768 case IPPROTO_DSTOPTS:
1769 case IPPROTO_MH:
1770 case IPPROTO_HIP:
1771 case IPPROTO_SHIM6:
1772 /* Currently known extension headers */
1773 return (B_TRUE);
1774 case IPPROTO_ESP:
1775 /*
1776 * While the IANA protocol numbers listing notes ESP as an IPv6
1777 * extension header, we cannot effectively parse it like one.
1778 *
1779 * For now, mac_ether_offload_info() will report it as the L4
1780 * protocol for a parsed packet containing this EH.
1781 */
1782 default:
1783 return (B_FALSE);
1784 }
1785 }
1786
1787 int
mac_ether_offload_info(mblk_t * mp,mac_ether_offload_info_t * meoi)1788 mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi)
1789 {
1790 size_t off;
1791 uint16_t ether, iplen;
1792 uint8_t ipproto, ip4verlen, l4len, maclen;
1793
1794 bzero(meoi, sizeof (mac_ether_offload_info_t));
1795
1796 const size_t pktlen = msgsize(mp);
1797 meoi->meoi_len = pktlen;
1798 off = offsetof(struct ether_header, ether_type);
1799 if (mac_meoi_get_uint16(mp, off, ðer) != 0)
1800 return (-1);
1801
1802 if (ether == ETHERTYPE_VLAN) {
1803 off = offsetof(struct ether_vlan_header, ether_type);
1804 if (mac_meoi_get_uint16(mp, off, ðer) != 0)
1805 return (-1);
1806 meoi->meoi_flags |= MEOI_VLAN_TAGGED;
1807 maclen = sizeof (struct ether_vlan_header);
1808 } else {
1809 maclen = sizeof (struct ether_header);
1810 }
1811 if (maclen > pktlen)
1812 return (-1);
1813 meoi->meoi_flags |= MEOI_L2INFO_SET;
1814 meoi->meoi_l2hlen = maclen;
1815 meoi->meoi_l3proto = ether;
1816
1817 switch (ether) {
1818 case ETHERTYPE_IP:
1819 /*
1820 * For IPv4 we need to get the length of the header, as it can
1821 * be variable.
1822 */
1823 off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen;
1824 if (mac_meoi_get_uint8(mp, off, &ip4verlen) != 0)
1825 return (-1);
1826 ip4verlen &= 0x0f;
1827 if (ip4verlen < 5 || ip4verlen > 0x0f)
1828 return (-1);
1829 iplen = ip4verlen * 4;
1830 off = offsetof(ipha_t, ipha_protocol) + maclen;
1831 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1)
1832 return (-1);
1833 break;
1834 case ETHERTYPE_IPV6:
1835 iplen = sizeof (ip6_t);
1836 off = offsetof(ip6_t, ip6_nxt) + maclen;
1837 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1)
1838 return (-1);
1839 /* Chase any extension headers present in packet */
1840 while (mac_meoi_ip6eh_proto(ipproto)) {
1841 uint8_t len_val, next_proto;
1842 uint16_t eh_len;
1843
1844 off = maclen + iplen;
1845 if (mac_meoi_get_uint8(mp, off, &next_proto) == -1)
1846 return (-1);
1847 if (ipproto == IPPROTO_FRAGMENT) {
1848 /*
1849 * The Fragment extension header bears a
1850 * predefined fixed length, rather than
1851 * communicating it through the EH itself.
1852 */
1853 eh_len = 8;
1854 } else if (ipproto == IPPROTO_AH) {
1855 /*
1856 * The length of the IP Authentication EH is
1857 * stored as (n + 2) * 32-bits, where 'n' is the
1858 * recorded EH length field
1859 */
1860 off += 1;
1861 if (mac_meoi_get_uint8(mp, off, &len_val) == -1)
1862 return (-1);
1863 eh_len = ((uint16_t)len_val + 2) * 4;
1864 } else {
1865 /*
1866 * All other EHs should follow the sizing
1867 * formula of (n + 1) * 64-bits, where 'n' is
1868 * the recorded EH length field.
1869 */
1870 off += 1;
1871 if (mac_meoi_get_uint8(mp, off, &len_val) == -1)
1872 return (-1);
1873 eh_len = ((uint16_t)len_val + 1) * 8;
1874 }
1875 /*
1876 * Protect against overflow in the case of a very
1877 * contrived packet.
1878 */
1879 if ((iplen + eh_len) < iplen) {
1880 return (-1);
1881 }
1882
1883 iplen += eh_len;
1884 ipproto = next_proto;
1885 }
1886 break;
1887 default:
1888 return (0);
1889 }
1890 if (((size_t)maclen + (size_t)iplen) > pktlen)
1891 return (-1);
1892 meoi->meoi_l3hlen = iplen;
1893 meoi->meoi_l4proto = ipproto;
1894 meoi->meoi_flags |= MEOI_L3INFO_SET;
1895
1896 switch (ipproto) {
1897 case IPPROTO_TCP:
1898 off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen;
1899 if (mac_meoi_get_uint8(mp, off, &l4len) == -1)
1900 return (-1);
1901 l4len = (l4len & 0xf0) >> 4;
1902 if (l4len < 5 || l4len > 0xf)
1903 return (-1);
1904 l4len *= 4;
1905 break;
1906 case IPPROTO_UDP:
1907 l4len = sizeof (struct udphdr);
1908 break;
1909 case IPPROTO_SCTP:
1910 l4len = sizeof (sctp_hdr_t);
1911 break;
1912 default:
1913 return (0);
1914 }
1915
1916 if (((size_t)maclen + (size_t)iplen + (size_t)l4len) > pktlen)
1917 return (-1);
1918 meoi->meoi_l4hlen = l4len;
1919 meoi->meoi_flags |= MEOI_L4INFO_SET;
1920 return (0);
1921 }
1922