1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2019 Joyent, Inc.
25 */
26
27 /*
28 * Data-Link Services Module
29 */
30
31 #include <sys/sysmacros.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <sys/dld_impl.h>
36 #include <sys/sdt.h>
37 #include <sys/atomic.h>
38 #include <sys/sysevent.h>
39 #include <sys/sysevent/eventdefs.h>
40 #include <sys/sysevent/datalink.h>
41
42 static kmem_cache_t *i_dls_link_cachep;
43 mod_hash_t *i_dls_link_hash;
44 static uint_t i_dls_link_count;
45
46 #define LINK_HASHSZ 67 /* prime */
47 #define IMPL_HASHSZ 67 /* prime */
48
49 /*
50 * Construct a hash key from the DLSAP value.
51 */
52 #define MAKE_KEY(_sap) \
53 ((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
54
55 #define DLS_STRIP_PADDING(pktsize, p) { \
56 if (pktsize != 0) { \
57 ssize_t delta = pktsize - msgdsize(p); \
58 \
59 if (delta < 0) \
60 (void) adjmsg(p, delta); \
61 } \
62 }
63
64 /*
65 * Private functions.
66 */
67
68 /*ARGSUSED*/
69 static int
i_dls_link_constructor(void * buf,void * arg,int kmflag)70 i_dls_link_constructor(void *buf, void *arg, int kmflag)
71 {
72 dls_link_t *dlp = buf;
73 char name[MAXNAMELEN];
74
75 bzero(buf, sizeof (dls_link_t));
76
77 (void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
78 dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
79 mod_hash_null_valdtor);
80
81 return (0);
82 }
83
84 /*ARGSUSED*/
85 static void
i_dls_link_destructor(void * buf,void * arg)86 i_dls_link_destructor(void *buf, void *arg)
87 {
88 dls_link_t *dlp = buf;
89
90 ASSERT(dlp->dl_ref == 0);
91 ASSERT(dlp->dl_mh == NULL);
92 ASSERT(dlp->dl_mah == NULL);
93 ASSERT(dlp->dl_unknowns == 0);
94
95 mod_hash_destroy_idhash(dlp->dl_str_hash);
96 dlp->dl_str_hash = NULL;
97
98 }
99
100 /*
101 * - Parse the mac header information of the given packet.
102 * - Strip the padding and skip over the header. Note that because some
103 * DLS consumers only check the db_ref count of the first mblk, we
104 * pullup the message into a single mblk. Because the original message
105 * is freed as the result of message pulling up, mac_vlan_header_info()
106 * is called again to update the mhi_saddr and mhi_daddr pointers in the
107 * mhip. Further, the mac_vlan_header_info() function ensures that the
108 * size of the pulled message is greater than the MAC header size,
109 * therefore we can directly advance b_rptr to point at the payload.
110 *
111 * We choose to use a macro for performance reasons.
112 */
113 #define DLS_PREPARE_PKT(mh, mp, mhip, err) { \
114 mblk_t *nextp = (mp)->b_next; \
115 if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) { \
116 DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp)); \
117 if (MBLKL((mp)) < (mhip)->mhi_hdrsize) { \
118 mblk_t *newmp; \
119 if ((newmp = msgpullup((mp), -1)) == NULL) { \
120 (err) = EINVAL; \
121 } else { \
122 (mp)->b_next = NULL; \
123 freemsg((mp)); \
124 (mp) = newmp; \
125 VERIFY(mac_vlan_header_info((mh), \
126 (mp), (mhip)) == 0); \
127 (mp)->b_next = nextp; \
128 (mp)->b_rptr += (mhip)->mhi_hdrsize; \
129 } \
130 } else { \
131 (mp)->b_rptr += (mhip)->mhi_hdrsize; \
132 } \
133 } \
134 }
135
136 /*
137 * Truncate the chain starting at mp such that all packets in the chain
138 * have identical source and destination addresses, saps, and tag types
139 * (see below). It returns a pointer to the mblk following the chain,
140 * NULL if there is no further packet following the processed chain.
141 * The countp argument is set to the number of valid packets in the chain.
142 * Note that the whole MAC header (including the VLAN tag if any) in each
143 * packet will be stripped.
144 */
145 static mblk_t *
i_dls_link_subchain(dls_link_t * dlp,mblk_t * mp,const mac_header_info_t * mhip,uint_t * countp)146 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
147 uint_t *countp)
148 {
149 mblk_t *prevp;
150 uint_t npacket = 1;
151 size_t addr_size = dlp->dl_mip->mi_addr_length;
152 uint16_t vid = VLAN_ID(mhip->mhi_tci);
153 uint16_t pri = VLAN_PRI(mhip->mhi_tci);
154
155 /*
156 * Compare with subsequent headers until we find one that has
157 * differing header information. After checking each packet
158 * strip padding and skip over the header.
159 */
160 for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
161 mac_header_info_t cmhi;
162 uint16_t cvid, cpri;
163 int err;
164
165 DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
166 if (err != 0)
167 break;
168
169 prevp->b_next = mp;
170
171 /*
172 * The source, destination, sap, vlan tag must all match in
173 * a given subchain.
174 */
175 if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
176 memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
177 memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
178 mhip->mhi_bindsap != cmhi.mhi_bindsap) {
179 /*
180 * Note that we don't need to restore the padding.
181 */
182 mp->b_rptr -= cmhi.mhi_hdrsize;
183 break;
184 }
185
186 cvid = VLAN_ID(cmhi.mhi_tci);
187 cpri = VLAN_PRI(cmhi.mhi_tci);
188
189 /*
190 * There are several types of packets. Packets don't match
191 * if they are classified to different type or if they are
192 * VLAN packets but belong to different VLANs:
193 *
194 * packet type tagged vid pri
195 * ---------------------------------------------------------
196 * untagged No zero zero
197 * VLAN packets Yes non-zero -
198 * priority tagged Yes zero non-zero
199 * 0 tagged Yes zero zero
200 */
201 if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
202 (vid != cvid) || ((vid == VLAN_ID_NONE) &&
203 (((pri == 0) && (cpri != 0)) ||
204 ((pri != 0) && (cpri == 0))))) {
205 mp->b_rptr -= cmhi.mhi_hdrsize;
206 break;
207 }
208
209 npacket++;
210 }
211
212 /*
213 * Break the chain at this point and return a pointer to the next
214 * sub-chain.
215 */
216 prevp->b_next = NULL;
217 *countp = npacket;
218 return (mp);
219 }
220
221 /* ARGSUSED */
222 static int
i_dls_head_hold(mod_hash_key_t key,mod_hash_val_t val)223 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
224 {
225 dls_head_t *dhp = (dls_head_t *)val;
226
227 /*
228 * The lock order is mod_hash's internal lock -> dh_lock as in the
229 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
230 */
231 mutex_enter(&dhp->dh_lock);
232 if (dhp->dh_removing) {
233 mutex_exit(&dhp->dh_lock);
234 return (-1);
235 }
236 dhp->dh_ref++;
237 mutex_exit(&dhp->dh_lock);
238 return (0);
239 }
240
241 void
i_dls_head_rele(dls_head_t * dhp)242 i_dls_head_rele(dls_head_t *dhp)
243 {
244 mutex_enter(&dhp->dh_lock);
245 dhp->dh_ref--;
246 if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
247 cv_broadcast(&dhp->dh_cv);
248 mutex_exit(&dhp->dh_lock);
249 }
250
251 static dls_head_t *
i_dls_head_alloc(mod_hash_key_t key)252 i_dls_head_alloc(mod_hash_key_t key)
253 {
254 dls_head_t *dhp;
255
256 dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
257 dhp->dh_key = key;
258 return (dhp);
259 }
260
261 static void
i_dls_head_free(dls_head_t * dhp)262 i_dls_head_free(dls_head_t *dhp)
263 {
264 ASSERT(dhp->dh_ref == 0);
265 kmem_free(dhp, sizeof (dls_head_t));
266 }
267
268 /*
269 * Try to send mp up to the streams of the given sap. Return the
270 * number of streams which accepted this message, or 0 if no streams
271 * accepted the message.
272 *
273 * Note that this function copies the message chain and the original
274 * mp remains valid after this function returns.
275 */
276 static uint_t
i_dls_link_rx_func(dls_link_t * dlp,mac_resource_handle_t mrh,mac_header_info_t * mhip,mblk_t * mp,uint32_t sap,boolean_t (* acceptfunc)())277 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
278 mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
279 boolean_t (*acceptfunc)())
280 {
281 mod_hash_t *hash = dlp->dl_str_hash;
282 mod_hash_key_t key;
283 dls_head_t *dhp;
284 dld_str_t *dsp;
285 mblk_t *nmp;
286 dls_rx_t ds_rx;
287 void *ds_rx_arg;
288 uint_t naccepted = 0;
289 int rval;
290
291 /*
292 * Construct a hash key from the DLSAP.
293 */
294 key = MAKE_KEY(sap);
295
296 /*
297 * Search the hash table for a dld_str_t eligible to receive a
298 * packet chain for this DLSAP. The mod hash's internal lock
299 * serializes find/insert/remove from the mod hash list.
300 * Incrementing the dh_ref (while holding the mod hash lock)
301 * ensures dls_link_remove will wait for the upcall to finish.
302 */
303 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
304 i_dls_head_hold, &rval) != 0 || (rval != 0)) {
305 return (0);
306 }
307
308 /*
309 * Find all dld_str_t that will accept the sub-chain.
310 */
311 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
312 if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
313 continue;
314
315 /*
316 * We have at least one acceptor.
317 */
318 naccepted++;
319
320 /*
321 * There will normally be at least one more dld_str_t
322 * (since we've yet to check for non-promiscuous
323 * dld_str_t) so dup the sub-chain.
324 */
325 if ((nmp = copymsgchain(mp)) != NULL)
326 ds_rx(ds_rx_arg, mrh, nmp, mhip);
327 }
328
329 /*
330 * Release the hold on the dld_str_t chain now that we have
331 * finished walking it.
332 */
333 i_dls_head_rele(dhp);
334 return (naccepted);
335 }
336
337 /* ARGSUSED */
338 void
i_dls_link_rx(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)339 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
340 boolean_t loopback)
341 {
342 dls_link_t *dlp = arg;
343 mod_hash_t *hash = dlp->dl_str_hash;
344 mblk_t *nextp;
345 mac_header_info_t mhi;
346 dls_head_t *dhp;
347 dld_str_t *dsp;
348 dld_str_t *ndsp;
349 mblk_t *nmp;
350 mod_hash_key_t key;
351 uint_t npacket;
352 boolean_t accepted;
353 dls_rx_t ds_rx, nds_rx;
354 void *ds_rx_arg, *nds_rx_arg;
355 uint16_t vid;
356 int err, rval;
357
358 /*
359 * Walk the packet chain.
360 */
361 for (; mp != NULL; mp = nextp) {
362 /*
363 * Wipe the accepted state.
364 */
365 accepted = B_FALSE;
366
367 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
368 if (err != 0) {
369 atomic_inc_32(&(dlp->dl_unknowns));
370 nextp = mp->b_next;
371 mp->b_next = NULL;
372 freemsg(mp);
373 continue;
374 }
375
376 /*
377 * Grab the longest sub-chain we can process as a single
378 * unit.
379 */
380 nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
381 ASSERT(npacket != 0);
382
383 vid = VLAN_ID(mhi.mhi_tci);
384
385 /*
386 * This condition is true only when a sun4v vsw client
387 * is on the scene; as it is the only type of client
388 * that multiplexes VLANs on a single client instance.
389 * All other types of clients have one VLAN per client
390 * instance. In that case, MAC strips the VLAN tag
391 * before delivering it to DLS (see mac_rx_deliver()).
392 */
393 if (mhi.mhi_istagged) {
394
395 /*
396 * If it is tagged traffic, send it upstream to
397 * all dld_str_t which are attached to the physical
398 * link and bound to SAP 0x8100.
399 */
400 if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
401 ETHERTYPE_VLAN, dls_accept) > 0) {
402 accepted = B_TRUE;
403 }
404
405 /*
406 * Don't pass the packets up if they are tagged
407 * packets and:
408 * - their VID and priority are both zero and the
409 * original packet isn't using the PVID (invalid
410 * packets).
411 * - their sap is ETHERTYPE_VLAN and their VID is
412 * zero as they have already been sent upstreams.
413 */
414 if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
415 VLAN_PRI(mhi.mhi_tci) == 0) ||
416 (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
417 vid == VLAN_ID_NONE)) {
418 freemsgchain(mp);
419 goto loop;
420 }
421 }
422
423 /*
424 * Construct a hash key from the DLSAP.
425 */
426 key = MAKE_KEY(mhi.mhi_bindsap);
427
428 /*
429 * Search the hash table for dld_str_t eligible to receive
430 * a packet chain for this DLSAP.
431 */
432 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
433 i_dls_head_hold, &rval) != 0 || (rval != 0)) {
434 freemsgchain(mp);
435 goto loop;
436 }
437
438 /*
439 * Find the first dld_str_t that will accept the sub-chain.
440 */
441 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
442 if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
443 break;
444
445 /*
446 * If we did not find any dld_str_t willing to accept the
447 * sub-chain then throw it away.
448 */
449 if (dsp == NULL) {
450 i_dls_head_rele(dhp);
451 freemsgchain(mp);
452 goto loop;
453 }
454
455 /*
456 * We have at least one acceptor.
457 */
458 accepted = B_TRUE;
459 for (;;) {
460 /*
461 * Find the next dld_str_t that will accept the
462 * sub-chain.
463 */
464 for (ndsp = dsp->ds_next; ndsp != NULL;
465 ndsp = ndsp->ds_next)
466 if (dls_accept(ndsp, &mhi, &nds_rx,
467 &nds_rx_arg))
468 break;
469
470 /*
471 * If there are no more dld_str_t that are willing
472 * to accept the sub-chain then we don't need to dup
473 * it before handing it to the current one.
474 */
475 if (ndsp == NULL) {
476 ds_rx(ds_rx_arg, mrh, mp, &mhi);
477
478 /*
479 * Since there are no more dld_str_t, we're
480 * done.
481 */
482 break;
483 }
484
485 /*
486 * There are more dld_str_t so dup the sub-chain.
487 */
488 if ((nmp = copymsgchain(mp)) != NULL)
489 ds_rx(ds_rx_arg, mrh, nmp, &mhi);
490
491 dsp = ndsp;
492 ds_rx = nds_rx;
493 ds_rx_arg = nds_rx_arg;
494 }
495
496 /*
497 * Release the hold on the dld_str_t chain now that we have
498 * finished walking it.
499 */
500 i_dls_head_rele(dhp);
501
502 loop:
503 /*
504 * If there were no acceptors then add the packet count to the
505 * 'unknown' count.
506 */
507 if (!accepted)
508 atomic_add_32(&(dlp->dl_unknowns), npacket);
509 }
510 }
511
512 /* ARGSUSED */
513 void
dls_rx_vlan_promisc(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)514 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
515 boolean_t loopback)
516 {
517 dld_str_t *dsp = arg;
518 dls_link_t *dlp = dsp->ds_dlp;
519 mac_header_info_t mhi;
520 dls_rx_t ds_rx;
521 void *ds_rx_arg;
522 int err;
523
524 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
525 if (err != 0)
526 goto drop;
527
528 /*
529 * If there is promiscuous handle for vlan, we filter out the untagged
530 * pkts and pkts that are not for the primary unicast address.
531 */
532 if (dsp->ds_vlan_mph != NULL) {
533 uint8_t prim_addr[MAXMACADDRLEN];
534 size_t addr_length = dsp->ds_mip->mi_addr_length;
535
536 if (!(mhi.mhi_istagged))
537 goto drop;
538 ASSERT(dsp->ds_mh != NULL);
539 mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
540 if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
541 goto drop;
542
543 if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
544 goto drop;
545
546 ds_rx(ds_rx_arg, NULL, mp, &mhi);
547 return;
548 }
549
550 drop:
551 atomic_inc_32(&dlp->dl_unknowns);
552 freemsg(mp);
553 }
554
555 /* ARGSUSED */
556 void
dls_rx_promisc(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)557 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
558 boolean_t loopback)
559 {
560 dld_str_t *dsp = arg;
561 dls_link_t *dlp = dsp->ds_dlp;
562 mac_header_info_t mhi;
563 dls_rx_t ds_rx;
564 void *ds_rx_arg;
565 int err;
566 dls_head_t *dhp;
567 mod_hash_key_t key;
568
569 /*
570 * We expect to deal with only a single packet.
571 */
572 ASSERT3P(mp->b_next, ==, NULL);
573
574 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
575
576 if (err != 0)
577 goto drop;
578
579 /*
580 * In order to filter out sap pkt that no dls channel listens, search
581 * the hash table trying to find a dld_str_t eligible to receive the pkt
582 */
583 if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
584 key = MAKE_KEY(mhi.mhi_bindsap);
585 if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
586 (mod_hash_val_t *)&dhp) != 0)
587 goto drop;
588 }
589
590 if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
591 goto drop;
592
593 ds_rx(ds_rx_arg, NULL, mp, &mhi);
594 return;
595
596 drop:
597 atomic_inc_32(&dlp->dl_unknowns);
598 freemsg(mp);
599 }
600
601 /*
602 * We'd like to notify via sysevents that a link state change has occurred.
603 * There are a couple of challenges associated with this. The first is that if
604 * the link is flapping a lot, we may not see an accurate state when we launch
605 * the notification, we're told it changed, not what it changed to.
606 *
607 * The next problem is that all of the information that a user has associated
608 * with this device is the exact opposite of what we have on the dls_link_t. We
609 * have the name of the mac device, which has no bearing on what users see.
610 * Likewise, we don't have the datalink id either. So we're going to have to get
611 * this from dls.
612 *
613 * This is all further complicated by the fact that this could be going on in
614 * another thread at the same time as someone is tearing down the dls_link_t
615 * that we're associated with. We need to be careful not to grab the mac
616 * perimeter, otherwise we stand a good chance of deadlock.
617 */
618 static void
dls_link_notify(void * arg,mac_notify_type_t type)619 dls_link_notify(void *arg, mac_notify_type_t type)
620 {
621 dls_link_t *dlp = arg;
622 dls_dl_handle_t dhp;
623 nvlist_t *nvp;
624 sysevent_t *event;
625 sysevent_id_t eid;
626
627 if (type != MAC_NOTE_LINK && type != MAC_NOTE_LOWLINK)
628 return;
629
630 /*
631 * If we can't find a devnet handle for this link, then there is no user
632 * knowable device for this at the moment and there's nothing we can
633 * really share with them that will make sense.
634 */
635 if (dls_devnet_hold_tmp_by_link(dlp, &dhp) != 0)
636 return;
637
638 /*
639 * Because we're attaching this nvlist_t to the sysevent, it'll get
640 * cleaned up when we call sysevent_free.
641 */
642 VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
643 VERIFY(nvlist_add_int32(nvp, DATALINK_EV_LINK_ID,
644 dls_devnet_linkid(dhp)) == 0);
645 VERIFY(nvlist_add_string(nvp, DATALINK_EV_LINK_NAME,
646 dls_devnet_link(dhp)) == 0);
647 VERIFY(nvlist_add_int32(nvp, DATALINK_EV_ZONE_ID,
648 dls_devnet_getzid(dhp)) == 0);
649
650 dls_devnet_rele_tmp(dhp);
651
652 event = sysevent_alloc(EC_DATALINK, ESC_DATALINK_LINK_STATE,
653 ILLUMOS_KERN_PUB"dls", SE_SLEEP);
654 VERIFY(event != NULL);
655 (void) sysevent_attach_attributes(event, (sysevent_attr_list_t *)nvp);
656
657 (void) log_sysevent(event, SE_SLEEP, &eid);
658 sysevent_free(event);
659
660 }
661
662 static void
i_dls_link_destroy(dls_link_t * dlp)663 i_dls_link_destroy(dls_link_t *dlp)
664 {
665 ASSERT(dlp->dl_nactive == 0);
666 ASSERT(dlp->dl_impl_count == 0);
667 ASSERT(dlp->dl_zone_ref == 0);
668
669 /*
670 * Free the structure back to the cache.
671 */
672 if (dlp->dl_mnh != NULL)
673 mac_notify_remove(dlp->dl_mnh, B_TRUE);
674
675 if (dlp->dl_mch != NULL)
676 mac_client_close(dlp->dl_mch, 0);
677
678 if (dlp->dl_mh != NULL) {
679 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
680 mac_close(dlp->dl_mh);
681 }
682
683 dlp->dl_mh = NULL;
684 dlp->dl_mch = NULL;
685 dlp->dl_mip = NULL;
686 dlp->dl_mnh = NULL;
687 dlp->dl_unknowns = 0;
688 dlp->dl_nonip_cnt = 0;
689 kmem_cache_free(i_dls_link_cachep, dlp);
690 }
691
692 static int
i_dls_link_create(const char * name,dls_link_t ** dlpp)693 i_dls_link_create(const char *name, dls_link_t **dlpp)
694 {
695 dls_link_t *dlp;
696 int err;
697
698 /*
699 * Allocate a new dls_link_t structure.
700 */
701 dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
702
703 /*
704 * Name the dls_link_t after the MAC interface it represents.
705 */
706 (void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
707
708 /*
709 * First reference; hold open the MAC interface.
710 */
711 ASSERT(dlp->dl_mh == NULL);
712 err = mac_open(dlp->dl_name, &dlp->dl_mh);
713 if (err != 0)
714 goto bail;
715
716 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
717 dlp->dl_mip = mac_info(dlp->dl_mh);
718
719 /* DLS is the "primary" MAC client */
720 ASSERT(dlp->dl_mch == NULL);
721
722 err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
723 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
724 if (err != 0)
725 goto bail;
726
727 dlp->dl_mnh = mac_notify_add(dlp->dl_mh, dls_link_notify, dlp);
728
729 DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
730 dlp->dl_mch);
731
732 *dlpp = dlp;
733 return (0);
734
735 bail:
736 i_dls_link_destroy(dlp);
737 return (err);
738 }
739
740 /*
741 * Module initialization functions.
742 */
743
744 void
dls_link_init(void)745 dls_link_init(void)
746 {
747 /*
748 * Create a kmem_cache of dls_link_t structures.
749 */
750 i_dls_link_cachep = kmem_cache_create("dls_link_cache",
751 sizeof (dls_link_t), 0, i_dls_link_constructor,
752 i_dls_link_destructor, NULL, NULL, NULL, 0);
753 ASSERT(i_dls_link_cachep != NULL);
754
755 /*
756 * Create a dls_link_t hash table and associated lock.
757 */
758 i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
759 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
760 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
761 i_dls_link_count = 0;
762 }
763
764 int
dls_link_fini(void)765 dls_link_fini(void)
766 {
767 if (i_dls_link_count > 0)
768 return (EBUSY);
769
770 /*
771 * Destroy the kmem_cache.
772 */
773 kmem_cache_destroy(i_dls_link_cachep);
774
775 /*
776 * Destroy the hash table and associated lock.
777 */
778 mod_hash_destroy_hash(i_dls_link_hash);
779 return (0);
780 }
781
782 /*
783 * Exported functions.
784 */
785
786 static int
dls_link_hold_common(const char * name,dls_link_t ** dlpp,boolean_t create)787 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
788 {
789 dls_link_t *dlp;
790 int err;
791
792 /*
793 * Look up a dls_link_t corresponding to the given macname in the
794 * global hash table. The i_dls_link_hash itself is protected by the
795 * mod_hash package's internal lock which synchronizes
796 * find/insert/remove into the global mod_hash list. Assumes that
797 * inserts and removes are single threaded on a per mac end point
798 * by the mac perimeter.
799 */
800 if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
801 (mod_hash_val_t *)&dlp)) == 0)
802 goto done;
803
804 ASSERT(err == MH_ERR_NOTFOUND);
805 if (!create)
806 return (ENOENT);
807
808 /*
809 * We didn't find anything so we need to create one.
810 */
811 if ((err = i_dls_link_create(name, &dlp)) != 0)
812 return (err);
813
814 /*
815 * Insert the dls_link_t.
816 */
817 err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
818 (mod_hash_val_t)dlp);
819 ASSERT(err == 0);
820
821 atomic_inc_32(&i_dls_link_count);
822 ASSERT(i_dls_link_count != 0);
823
824 done:
825 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
826 /*
827 * Bump the reference count and hand back the reference.
828 */
829 dlp->dl_ref++;
830 *dlpp = dlp;
831 return (0);
832 }
833
834 int
dls_link_hold_create(const char * name,dls_link_t ** dlpp)835 dls_link_hold_create(const char *name, dls_link_t **dlpp)
836 {
837 return (dls_link_hold_common(name, dlpp, B_TRUE));
838 }
839
840 int
dls_link_hold(const char * name,dls_link_t ** dlpp)841 dls_link_hold(const char *name, dls_link_t **dlpp)
842 {
843 return (dls_link_hold_common(name, dlpp, B_FALSE));
844 }
845
846 dev_info_t *
dls_link_devinfo(dev_t dev)847 dls_link_devinfo(dev_t dev)
848 {
849 dls_link_t *dlp;
850 dev_info_t *dip;
851 char macname[MAXNAMELEN];
852 char *drv;
853 mac_perim_handle_t mph;
854
855 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
856 return (NULL);
857 (void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
858 DLS_MINOR2INST(getminor(dev)));
859
860 /*
861 * The code below assumes that the name constructed above is the
862 * macname. This is not the case for legacy devices. Currently this
863 * is ok because this function is only called in the getinfo(9e) path,
864 * which for a legacy device would directly end up in the driver's
865 * getinfo, rather than here
866 */
867 if (mac_perim_enter_by_macname(macname, &mph) != 0)
868 return (NULL);
869
870 if (dls_link_hold(macname, &dlp) != 0) {
871 mac_perim_exit(mph);
872 return (NULL);
873 }
874
875 dip = mac_devinfo_get(dlp->dl_mh);
876 dls_link_rele(dlp);
877 mac_perim_exit(mph);
878
879 return (dip);
880 }
881
882 dev_t
dls_link_dev(dls_link_t * dlp)883 dls_link_dev(dls_link_t *dlp)
884 {
885 return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
886 mac_minor(dlp->dl_mh)));
887 }
888
889 void
dls_link_rele(dls_link_t * dlp)890 dls_link_rele(dls_link_t *dlp)
891 {
892 mod_hash_val_t val;
893
894 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
895 /*
896 * Check if there are any more references.
897 */
898 if (--dlp->dl_ref == 0) {
899 (void) mod_hash_remove(i_dls_link_hash,
900 (mod_hash_key_t)dlp->dl_name, &val);
901 ASSERT(dlp == (dls_link_t *)val);
902
903 /*
904 * Destroy the dls_link_t.
905 */
906 i_dls_link_destroy(dlp);
907 ASSERT(i_dls_link_count > 0);
908 atomic_dec_32(&i_dls_link_count);
909 }
910 }
911
912 int
dls_link_rele_by_name(const char * name)913 dls_link_rele_by_name(const char *name)
914 {
915 dls_link_t *dlp;
916
917 if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
918 (mod_hash_val_t *)&dlp) != 0)
919 return (ENOENT);
920
921 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
922
923 /*
924 * Must fail detach if mac client is busy.
925 */
926 ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
927 if (mac_link_has_flows(dlp->dl_mch))
928 return (ENOTEMPTY);
929
930 dls_link_rele(dlp);
931 return (0);
932 }
933
934 int
dls_link_setzid(const char * name,zoneid_t zid)935 dls_link_setzid(const char *name, zoneid_t zid)
936 {
937 dls_link_t *dlp;
938 int err = 0;
939 zoneid_t old_zid;
940
941 if ((err = dls_link_hold_create(name, &dlp)) != 0)
942 return (err);
943
944 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
945
946 if ((old_zid = dlp->dl_zid) == zid)
947 goto done;
948
949 /*
950 * Check whether this dlp is used by its own zone. If yes, we cannot
951 * change its zoneid.
952 */
953 if (dlp->dl_zone_ref != 0) {
954 err = EBUSY;
955 goto done;
956 }
957
958 dlp->dl_zid = zid;
959
960 if (zid == GLOBAL_ZONEID) {
961 /*
962 * The link is moving from a non-global zone to the global
963 * zone, so we need to release the reference that was held
964 * when the link was originally assigned to the non-global
965 * zone.
966 */
967 dls_link_rele(dlp);
968 }
969
970 done:
971 /*
972 * We only keep the reference to this link open if the link has
973 * successfully moved from the global zone to a non-global zone.
974 */
975 if (err != 0 || old_zid != GLOBAL_ZONEID)
976 dls_link_rele(dlp);
977 return (err);
978 }
979
980 int
dls_link_getzid(const char * name,zoneid_t * zidp)981 dls_link_getzid(const char *name, zoneid_t *zidp)
982 {
983 dls_link_t *dlp;
984 int err = 0;
985
986 if ((err = dls_link_hold(name, &dlp)) != 0)
987 return (err);
988
989 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
990
991 *zidp = dlp->dl_zid;
992
993 dls_link_rele(dlp);
994 return (0);
995 }
996
997 void
dls_link_add(dls_link_t * dlp,uint32_t sap,dld_str_t * dsp)998 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
999 {
1000 mod_hash_t *hash = dlp->dl_str_hash;
1001 mod_hash_key_t key;
1002 dls_head_t *dhp;
1003 dld_str_t *p;
1004 int err;
1005
1006 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
1007
1008 /*
1009 * Generate a hash key based on the sap.
1010 */
1011 key = MAKE_KEY(sap);
1012
1013 /*
1014 * Search the table for a list head with this key.
1015 */
1016 if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
1017 ASSERT(err == MH_ERR_NOTFOUND);
1018
1019 dhp = i_dls_head_alloc(key);
1020 err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
1021 ASSERT(err == 0);
1022 }
1023
1024 /*
1025 * Add the dld_str_t to the head of the list. List walkers in
1026 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
1027 * while they walk the list. The membar below ensures that list walkers
1028 * see exactly the old list or the new list.
1029 */
1030 ASSERT(dsp->ds_next == NULL);
1031 p = dhp->dh_list;
1032 dsp->ds_next = p;
1033
1034 membar_producer();
1035
1036 dhp->dh_list = dsp;
1037
1038 /*
1039 * Save a pointer to the list head.
1040 */
1041 dsp->ds_head = dhp;
1042 dlp->dl_impl_count++;
1043 }
1044
1045 void
dls_link_remove(dls_link_t * dlp,dld_str_t * dsp)1046 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
1047 {
1048 mod_hash_t *hash = dlp->dl_str_hash;
1049 dld_str_t **pp;
1050 dld_str_t *p;
1051 dls_head_t *dhp;
1052
1053 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
1054
1055 /*
1056 * We set dh_removing here to tell the receive callbacks not to pass
1057 * up packets anymore. Then wait till the current callbacks are done.
1058 * This happens either in the close path or in processing the
1059 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
1060 * The dh_ref ensures there aren't and there won't be any upcalls
1061 * walking or using the dh_list. The mod hash internal lock ensures
1062 * that the insert/remove of the dls_head_t itself synchronizes with
1063 * any i_dls_link_rx trying to locate it. The perimeter ensures that
1064 * there isn't another simultaneous dls_link_add/remove.
1065 */
1066 dhp = dsp->ds_head;
1067
1068 mutex_enter(&dhp->dh_lock);
1069 dhp->dh_removing = B_TRUE;
1070 while (dhp->dh_ref != 0)
1071 cv_wait(&dhp->dh_cv, &dhp->dh_lock);
1072 mutex_exit(&dhp->dh_lock);
1073
1074 /*
1075 * Walk the list and remove the dld_str_t.
1076 */
1077 for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
1078 if (p == dsp)
1079 break;
1080 }
1081 ASSERT(p != NULL);
1082 *pp = p->ds_next;
1083 p->ds_next = NULL;
1084 p->ds_head = NULL;
1085
1086 ASSERT(dlp->dl_impl_count != 0);
1087 dlp->dl_impl_count--;
1088
1089 if (dhp->dh_list == NULL) {
1090 mod_hash_val_t val = NULL;
1091
1092 /*
1093 * The list is empty so remove the hash table entry.
1094 */
1095 (void) mod_hash_remove(hash, dhp->dh_key, &val);
1096 ASSERT(dhp == (dls_head_t *)val);
1097 i_dls_head_free(dhp);
1098 } else {
1099 mutex_enter(&dhp->dh_lock);
1100 dhp->dh_removing = B_FALSE;
1101 mutex_exit(&dhp->dh_lock);
1102 }
1103 }
1104