xref: /titanic_51/usr/src/uts/common/inet/ipnet/ipnet.c (revision a799b1e741b6f59012a469e6b57c40cb8061127b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * The ipnet device defined here provides access to packets at the IP layer. To
29  * provide access to packets at this layer it registers a callback function in
30  * the ip module and when there are open instances of the device ip will pass
31  * packets into the device. Packets from ip are passed on the input, output and
32  * loopback paths. Internally the module returns to ip as soon as possible by
33  * deferring processing using a taskq.
34  *
35  * Management of the devices in /dev/ipnet/ is handled by the devname
36  * filesystem and use of the neti interfaces.  This module registers for NIC
37  * events using the neti framework so that when IP interfaces are bought up,
38  * taken down etc. the ipnet module is notified and its view of the interfaces
39  * configured on the system adjusted.  On attach, the module gets an initial
40  * view of the system again using the neti framework but as it has already
41  * registered for IP interface events, it is still up-to-date with any changes.
42  */
43 
44 #include <sys/types.h>
45 #include <sys/conf.h>
46 #include <sys/cred.h>
47 #include <sys/stat.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/modctl.h>
51 #include <sys/dlpi.h>
52 #include <sys/strsun.h>
53 #include <sys/id_space.h>
54 #include <sys/kmem.h>
55 #include <sys/mkdev.h>
56 #include <sys/neti.h>
57 #include <net/if.h>
58 #include <sys/errno.h>
59 #include <sys/list.h>
60 #include <sys/ksynch.h>
61 #include <sys/hook_event.h>
62 #include <sys/stropts.h>
63 #include <sys/sysmacros.h>
64 #include <inet/ip.h>
65 #include <inet/ip_multi.h>
66 #include <inet/ip6.h>
67 #include <inet/ipnet.h>
68 
69 static struct module_info ipnet_minfo = {
70 	1,		/* mi_idnum */
71 	"ipnet",	/* mi_idname */
72 	0,		/* mi_minpsz */
73 	INFPSZ,		/* mi_maxpsz */
74 	2048,		/* mi_hiwat */
75 	0		/* mi_lowat */
76 };
77 
78 /*
79  * List to hold static view of ipnetif_t's on the system. This is needed to
80  * avoid holding the lock protecting the avl tree of ipnetif's over the
81  * callback into the dev filesystem.
82  */
83 typedef struct ipnetif_cbdata {
84 	char		ic_ifname[LIFNAMSIZ];
85 	dev_t		ic_dev;
86 	list_node_t	ic_next;
87 } ipnetif_cbdata_t;
88 
89 /*
90  * Convenience enumerated type for ipnet_accept().  It describes the
91  * properties of a given ipnet_addrp_t relative to a single ipnet_t
92  * client stream.  The values represent whether the address is ...
93  */
94 typedef enum {
95 	IPNETADDR_MYADDR,	/* an address on my ipnetif_t. */
96 	IPNETADDR_MBCAST,	/* a multicast or broadcast address. */
97 	IPNETADDR_UNKNOWN	/* none of the above. */
98 } ipnet_addrtype_t;
99 
100 /* Argument used for the ipnet_nicevent_taskq callback. */
101 typedef struct ipnet_nicevent_s {
102 	nic_event_t		ipne_event;
103 	net_handle_t		ipne_protocol;
104 	netstackid_t		ipne_stackid;
105 	uint64_t		ipne_ifindex;
106 	uint64_t		ipne_lifindex;
107 	char			ipne_ifname[LIFNAMSIZ];
108 } ipnet_nicevent_t;
109 
110 static dev_info_t	*ipnet_dip;
111 static major_t		ipnet_major;
112 static ddi_taskq_t	*ipnet_taskq;		/* taskq for packets */
113 static ddi_taskq_t	*ipnet_nicevent_taskq;	/* taskq for NIC events */
114 static id_space_t	*ipnet_minor_space;
115 static const int	IPNET_MINOR_LO = 1; 	/* minor number for /dev/lo0 */
116 static const int 	IPNET_MINOR_MIN = 2; 	/* start of dynamic minors */
117 static dl_info_ack_t	ipnet_infoack = IPNET_INFO_ACK_INIT;
118 static ipnet_acceptfn_t	ipnet_accept, ipnet_loaccept;
119 
120 static void	ipnet_input(mblk_t *);
121 static int	ipnet_wput(queue_t *, mblk_t *);
122 static int	ipnet_rsrv(queue_t *);
123 static int	ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
124 static int	ipnet_close(queue_t *);
125 static void	ipnet_ioctl(queue_t *, mblk_t *);
126 static void	ipnet_iocdata(queue_t *, mblk_t *);
127 static void 	ipnet_wputnondata(queue_t *, mblk_t *);
128 static int	ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
129 static int	ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
130 static int	ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
131 static void	ipnet_inforeq(queue_t *q, mblk_t *mp);
132 static void	ipnet_bindreq(queue_t *q, mblk_t *mp);
133 static void	ipnet_unbindreq(queue_t *q, mblk_t *mp);
134 static void	ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
135 static void	ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
136 static int	ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
137 static void	ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
138 static int	ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
139 static void	ipnet_nicevent_task(void *);
140 static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *);
141 static void	ipnet_remove_if(ipnetif_t *, ipnet_stack_t *);
142 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
143 static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *);
144 static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *);
145 static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
146 static void	ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *);
147 static int	ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
148 static int 	ipnet_if_compare_name(const void *, const void *);
149 static int 	ipnet_if_compare_index(const void *, const void *);
150 static void	ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
151 static void	ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
152 static void	ipnetif_refhold(ipnetif_t *);
153 static void	ipnetif_refrele(ipnetif_t *);
154 static void	ipnet_walkers_inc(ipnet_stack_t *);
155 static void	ipnet_walkers_dec(ipnet_stack_t *);
156 static void	ipnet_register_netihook(ipnet_stack_t *);
157 static void	*ipnet_stack_init(netstackid_t, netstack_t *);
158 static void	ipnet_stack_fini(netstackid_t, void *);
159 
160 static struct qinit ipnet_rinit = {
161 	NULL,		/* qi_putp */
162 	ipnet_rsrv,	/* qi_srvp */
163 	ipnet_open,	/* qi_qopen */
164 	ipnet_close,	/* qi_qclose */
165 	NULL,		/* qi_qadmin */
166 	&ipnet_minfo,	/* qi_minfo */
167 };
168 
169 static struct qinit ipnet_winit = {
170 	ipnet_wput,	/* qi_putp */
171 	NULL,		/* qi_srvp */
172 	NULL,		/* qi_qopen */
173 	NULL,		/* qi_qclose */
174 	NULL,		/* qi_qadmin */
175 	&ipnet_minfo,	/* qi_minfo */
176 };
177 
178 static struct streamtab ipnet_info = {
179 	&ipnet_rinit, &ipnet_winit
180 };
181 
182 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
183     ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
184     ddi_quiesce_not_supported);
185 
186 static struct modldrv modldrv = {
187 	&mod_driverops,
188 	"STREAMS ipnet driver",
189 	&ipnet_ops
190 };
191 
192 static struct modlinkage modlinkage = {
193 	MODREV_1, &modldrv, NULL
194 };
195 
196 /*
197  * Walk the list of physical interfaces on the machine, for each
198  * interface create a new ipnetif_t and add any addresses to it. We
199  * need to do the walk twice, once for IPv4 and once for IPv6.
200  *
201  * The interfaces are destroyed as part of ipnet_stack_fini() for each
202  * stack.  Note that we cannot do this initialization in
203  * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
204  */
205 static int
206 ipnet_if_init(void)
207 {
208 	netstack_handle_t	nh;
209 	netstack_t		*ns;
210 	ipnet_stack_t		*ips;
211 	int			ret = 0;
212 
213 	netstack_next_init(&nh);
214 	while ((ns = netstack_next(&nh)) != NULL) {
215 		ips = ns->netstack_ipnet;
216 		if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0)
217 			ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE);
218 		netstack_rele(ns);
219 		if (ret != 0)
220 			break;
221 	}
222 	netstack_next_fini(&nh);
223 	return (ret);
224 }
225 
226 /*
227  * Standard module entry points.
228  */
229 int
230 _init(void)
231 {
232 	int ret;
233 	boolean_t netstack_registered = B_FALSE;
234 
235 	if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
236 		return (ENODEV);
237 	ipnet_minor_space = id_space_create("ipnet_minor_space",
238 	    IPNET_MINOR_MIN, MAXMIN32);
239 
240 	/*
241 	 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
242 	 * delivery of packets to clients.  Note that we need to create the
243 	 * taskqs before calling netstack_register() since ipnet_stack_init()
244 	 * registers callbacks that use 'em.
245 	 */
246 	ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
247 	ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
248 	    1, TASKQ_DEFAULTPRI, 0);
249 	if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
250 		ret = ENOMEM;
251 		goto done;
252 	}
253 
254 	netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
255 	netstack_registered = B_TRUE;
256 
257 	if ((ret = ipnet_if_init()) == 0)
258 		ret = mod_install(&modlinkage);
259 done:
260 	if (ret != 0) {
261 		if (ipnet_taskq != NULL)
262 			ddi_taskq_destroy(ipnet_taskq);
263 		if (ipnet_nicevent_taskq != NULL)
264 			ddi_taskq_destroy(ipnet_nicevent_taskq);
265 		if (netstack_registered)
266 			netstack_unregister(NS_IPNET);
267 		id_space_destroy(ipnet_minor_space);
268 	}
269 	return (ret);
270 }
271 
272 int
273 _fini(void)
274 {
275 	int err;
276 
277 	if ((err = mod_remove(&modlinkage)) != 0)
278 		return (err);
279 
280 	netstack_unregister(NS_IPNET);
281 	ddi_taskq_destroy(ipnet_nicevent_taskq);
282 	ddi_taskq_destroy(ipnet_taskq);
283 	id_space_destroy(ipnet_minor_space);
284 	return (0);
285 }
286 
287 int
288 _info(struct modinfo *modinfop)
289 {
290 	return (mod_info(&modlinkage, modinfop));
291 }
292 
293 static void
294 ipnet_register_netihook(ipnet_stack_t *ips)
295 {
296 	int		ret;
297 	zoneid_t	zoneid;
298 	netid_t		netid;
299 
300 	HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
301 	    ips);
302 
303 	/*
304 	 * It is possible for an exclusive stack to be in the process of
305 	 * shutting down here, and the netid and protocol lookups could fail
306 	 * in that case.
307 	 */
308 	zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid);
309 	if ((netid = net_zoneidtonetid(zoneid)) == -1)
310 		return;
311 
312 	if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) {
313 		if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
314 		    ips->ips_nicevents)) != 0) {
315 			VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
316 			ips->ips_ndv4 = NULL;
317 			cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks"
318 			    " in zone %d: %d", zoneid, ret);
319 		}
320 	}
321 	if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) {
322 		if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
323 		    ips->ips_nicevents)) != 0) {
324 			VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
325 			ips->ips_ndv6 = NULL;
326 			cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks"
327 			    " in zone %d: %d", zoneid, ret);
328 		}
329 	}
330 }
331 
332 /*
333  * This function is called on attach to build an initial view of the
334  * interfaces on the system. It will be called once for IPv4 and once
335  * for IPv6, although there is only one ipnet interface for both IPv4
336  * and IPv6 there are separate address lists.
337  */
338 static int
339 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
340 {
341 	phy_if_t		phyif;
342 	lif_if_t		lif;
343 	ipnetif_t		*ipnetif;
344 	char			name[LIFNAMSIZ];
345 	boolean_t		new_if = B_FALSE;
346 	uint64_t		ifflags;
347 	int			ret = 0;
348 
349 	/*
350 	 * If ipnet_register_netihook() was unable to initialize this
351 	 * stack's net_handle_t, then we cannot populate any interface
352 	 * information.  This usually happens when we attempted to
353 	 * grab a net_handle_t as a stack was shutting down.  We don't
354 	 * want to fail the entire _init() operation because of a
355 	 * stack shutdown (other stacks will continue to work just
356 	 * fine), so we silently return success here.
357 	 */
358 	if (nd == NULL)
359 		return (0);
360 
361 	/*
362 	 * Make sure we're not processing NIC events during the
363 	 * population of our interfaces and address lists.
364 	 */
365 	mutex_enter(&ips->ips_event_lock);
366 
367 	for (phyif = net_phygetnext(nd, 0); phyif != 0;
368 	    phyif = net_phygetnext(nd, phyif)) {
369 		if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
370 			continue;
371 		if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) {
372 			ipnetif = ipnet_create_if(name, phyif, ips);
373 			if (ipnetif == NULL) {
374 				ret = ENOMEM;
375 				goto done;
376 			}
377 			new_if = B_TRUE;
378 		}
379 		ipnetif->if_flags |=
380 		    isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
381 
382 		for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
383 		    lif = net_lifgetnext(nd, phyif, lif)) {
384 			/*
385 			 * Skip addresses that aren't up.  We'll add
386 			 * them when we receive an NE_LIF_UP event.
387 			 */
388 			if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
389 			    !(ifflags & IFF_UP))
390 				continue;
391 			/* Don't add it if we already have it. */
392 			if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
393 				continue;
394 			ipnet_add_ifaddr(lif, ipnetif, nd);
395 		}
396 		if (!new_if)
397 			ipnetif_refrele(ipnetif);
398 	}
399 
400 done:
401 	mutex_exit(&ips->ips_event_lock);
402 	return (ret);
403 }
404 
405 static int
406 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
407 {
408 	if (cmd != DDI_ATTACH)
409 		return (DDI_FAILURE);
410 
411 	if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
412 	    DDI_PSEUDO, 0) == DDI_FAILURE)
413 		return (DDI_FAILURE);
414 
415 	ipnet_dip = dip;
416 	return (DDI_SUCCESS);
417 }
418 
419 static int
420 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
421 {
422 	if (cmd != DDI_DETACH)
423 		return (DDI_FAILURE);
424 
425 	ASSERT(dip == ipnet_dip);
426 	ddi_remove_minor_node(ipnet_dip, NULL);
427 	ipnet_dip = NULL;
428 	return (DDI_SUCCESS);
429 }
430 
431 /* ARGSUSED */
432 static int
433 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
434 {
435 	int error = DDI_FAILURE;
436 
437 	switch (infocmd) {
438 	case DDI_INFO_DEVT2INSTANCE:
439 		*result = (void *)0;
440 		error = DDI_SUCCESS;
441 		break;
442 	case DDI_INFO_DEVT2DEVINFO:
443 		if (ipnet_dip != NULL) {
444 			*result = ipnet_dip;
445 			error = DDI_SUCCESS;
446 		}
447 		break;
448 	}
449 	return (error);
450 }
451 
452 /* ARGSUSED */
453 static int
454 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
455 {
456 	ipnet_t		*ipnet;
457 	netstack_t	*ns = NULL;
458 	ipnet_stack_t	*ips;
459 	int		err = 0;
460 	zoneid_t	zoneid = crgetzoneid(crp);
461 
462 	/*
463 	 * If the system is labeled, only the global zone is allowed to open
464 	 * IP observability nodes.
465 	 */
466 	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
467 		return (EACCES);
468 
469 	/* We don't support open as a module */
470 	if (sflag & MODOPEN)
471 		return (ENOTSUP);
472 
473 	/* This driver is self-cloning, we don't support re-open. */
474 	if (rq->q_ptr != NULL)
475 		return (EBUSY);
476 
477 	if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
478 		return (ENOMEM);
479 
480 	VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
481 	ips = ns->netstack_ipnet;
482 
483 	rq->q_ptr = WR(rq)->q_ptr = ipnet;
484 	ipnet->ipnet_rq = rq;
485 	ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
486 	ipnet->ipnet_zoneid = zoneid;
487 	ipnet->ipnet_dlstate = DL_UNBOUND;
488 	ipnet->ipnet_sap = 0;
489 	ipnet->ipnet_ns = ns;
490 
491 	/*
492 	 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
493 	 * to be processed after ipnet_if is set and the ipnet_t has been
494 	 * inserted in the ips_str_list.
495 	 */
496 	mutex_enter(&ips->ips_event_lock);
497 	if (getminor(*dev) == IPNET_MINOR_LO) {
498 		ipnet->ipnet_flags |= IPNET_LOMODE;
499 		ipnet->ipnet_acceptfn = ipnet_loaccept;
500 	} else {
501 		ipnet->ipnet_acceptfn = ipnet_accept;
502 		ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips);
503 		if (ipnet->ipnet_if == NULL ||
504 		    !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) {
505 			err = ENODEV;
506 			goto done;
507 		}
508 	}
509 
510 	mutex_enter(&ips->ips_walkers_lock);
511 	while (ips->ips_walkers_cnt != 0)
512 		cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
513 	list_insert_head(&ips->ips_str_list, ipnet);
514 	*dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
515 	qprocson(rq);
516 
517 	/*
518 	 * Only register our callback if we're the first open client; we call
519 	 * unregister in close() for the last open client.
520 	 */
521 	if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
522 		ipobs_register_hook(ns, ipnet_input);
523 	mutex_exit(&ips->ips_walkers_lock);
524 
525 done:
526 	mutex_exit(&ips->ips_event_lock);
527 	if (err != 0) {
528 		netstack_rele(ns);
529 		id_free(ipnet_minor_space, ipnet->ipnet_minor);
530 		if (ipnet->ipnet_if != NULL)
531 			ipnetif_refrele(ipnet->ipnet_if);
532 		kmem_free(ipnet, sizeof (*ipnet));
533 	}
534 	return (err);
535 }
536 
537 static int
538 ipnet_close(queue_t *rq)
539 {
540 	ipnet_t		*ipnet = rq->q_ptr;
541 	ipnet_stack_t	*ips = ipnet->ipnet_ns->netstack_ipnet;
542 
543 	if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
544 		ipnet_leave_allmulti(ipnet->ipnet_if, ips);
545 	if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
546 		ipnet_leave_allmulti(ipnet->ipnet_if, ips);
547 
548 	mutex_enter(&ips->ips_walkers_lock);
549 	while (ips->ips_walkers_cnt != 0)
550 		cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
551 
552 	qprocsoff(rq);
553 
554 	list_remove(&ips->ips_str_list, ipnet);
555 	if (ipnet->ipnet_if != NULL)
556 		ipnetif_refrele(ipnet->ipnet_if);
557 	id_free(ipnet_minor_space, ipnet->ipnet_minor);
558 	kmem_free(ipnet, sizeof (*ipnet));
559 
560 	if (list_is_empty(&ips->ips_str_list))
561 		ipobs_unregister_hook(ips->ips_netstack, ipnet_input);
562 
563 	mutex_exit(&ips->ips_walkers_lock);
564 	netstack_rele(ips->ips_netstack);
565 	return (0);
566 }
567 
568 static int
569 ipnet_wput(queue_t *q, mblk_t *mp)
570 {
571 	switch (mp->b_datap->db_type) {
572 	case M_FLUSH:
573 		if (*mp->b_rptr & FLUSHW) {
574 			flushq(q, FLUSHDATA);
575 			*mp->b_rptr &= ~FLUSHW;
576 		}
577 		if (*mp->b_rptr & FLUSHR)
578 			qreply(q, mp);
579 		else
580 			freemsg(mp);
581 		break;
582 	case M_PROTO:
583 	case M_PCPROTO:
584 		ipnet_wputnondata(q, mp);
585 		break;
586 	case M_IOCTL:
587 		ipnet_ioctl(q, mp);
588 		break;
589 	case M_IOCDATA:
590 		ipnet_iocdata(q, mp);
591 		break;
592 	default:
593 		freemsg(mp);
594 		break;
595 	}
596 	return (0);
597 }
598 
599 static int
600 ipnet_rsrv(queue_t *q)
601 {
602 	mblk_t *mp;
603 
604 	while ((mp = getq(q)) != NULL) {
605 		ASSERT(DB_TYPE(mp) == M_DATA);
606 		if (canputnext(q)) {
607 			putnext(q, mp);
608 		} else {
609 			(void) putbq(q, mp);
610 			break;
611 		}
612 	}
613 	return (0);
614 }
615 
616 static void
617 ipnet_ioctl(queue_t *q, mblk_t *mp)
618 {
619 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
620 
621 	switch (iocp->ioc_cmd) {
622 	case DLIOCRAW:
623 		miocack(q, mp, 0, 0);
624 		break;
625 	case DLIOCIPNETINFO:
626 		if (iocp->ioc_count == TRANSPARENT) {
627 			mcopyin(mp, NULL, sizeof (uint_t), NULL);
628 			qreply(q, mp);
629 			break;
630 		}
631 		/* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */
632 	default:
633 		miocnak(q, mp, 0, EINVAL);
634 		break;
635 	}
636 }
637 
638 static void
639 ipnet_iocdata(queue_t *q, mblk_t *mp)
640 {
641 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
642 	ipnet_t		*ipnet = q->q_ptr;
643 
644 	switch (iocp->ioc_cmd) {
645 	case DLIOCIPNETINFO:
646 		if (*(int *)mp->b_cont->b_rptr == 1)
647 			ipnet->ipnet_flags |= IPNET_INFO;
648 		else if (*(int *)mp->b_cont->b_rptr == 0)
649 			ipnet->ipnet_flags &= ~IPNET_INFO;
650 		else
651 			goto iocnak;
652 		miocack(q, mp, 0, DL_IPNETINFO_VERSION);
653 		break;
654 	default:
655 	iocnak:
656 		miocnak(q, mp, 0, EINVAL);
657 		break;
658 	}
659 }
660 
661 static void
662 ipnet_wputnondata(queue_t *q, mblk_t *mp)
663 {
664 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
665 	t_uscalar_t		prim = dlp->dl_primitive;
666 
667 	switch (prim) {
668 	case DL_INFO_REQ:
669 		ipnet_inforeq(q, mp);
670 		break;
671 	case DL_UNBIND_REQ:
672 		ipnet_unbindreq(q, mp);
673 		break;
674 	case DL_BIND_REQ:
675 		ipnet_bindreq(q, mp);
676 		break;
677 	case DL_PROMISCON_REQ:
678 		ipnet_dlpromisconreq(q, mp);
679 		break;
680 	case DL_PROMISCOFF_REQ:
681 		ipnet_dlpromiscoffreq(q, mp);
682 		break;
683 	case DL_UNITDATA_REQ:
684 	case DL_DETACH_REQ:
685 	case DL_PHYS_ADDR_REQ:
686 	case DL_SET_PHYS_ADDR_REQ:
687 	case DL_ENABMULTI_REQ:
688 	case DL_DISABMULTI_REQ:
689 	case DL_ATTACH_REQ:
690 		dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
691 		break;
692 	default:
693 		dlerrorack(q, mp, prim, DL_BADPRIM, 0);
694 		break;
695 	}
696 }
697 
698 static void
699 ipnet_inforeq(queue_t *q, mblk_t *mp)
700 {
701 	dl_info_ack_t	*dlip;
702 	size_t		size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
703 
704 	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
705 		dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
706 		return;
707 	}
708 
709 	if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
710 		return;
711 
712 	dlip = (dl_info_ack_t *)mp->b_rptr;
713 	*dlip = ipnet_infoack;
714 	qreply(q, mp);
715 }
716 
717 static void
718 ipnet_bindreq(queue_t *q, mblk_t *mp)
719 {
720 	union   DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
721 	int32_t sap;
722 	ipnet_t	*ipnet = q->q_ptr;
723 
724 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
725 		dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
726 		return;
727 	}
728 
729 	sap = dlp->bind_req.dl_sap;
730 	if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) {
731 		dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
732 	} else {
733 		ipnet->ipnet_sap = sap;
734 		ipnet->ipnet_dlstate = DL_IDLE;
735 		dlbindack(q, mp, sap, 0, 0, 0, 0);
736 	}
737 }
738 
739 static void
740 ipnet_unbindreq(queue_t *q, mblk_t *mp)
741 {
742 	ipnet_t	*ipnet = q->q_ptr;
743 
744 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
745 		dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
746 		return;
747 	}
748 
749 	if (ipnet->ipnet_dlstate != DL_IDLE) {
750 		dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
751 	} else {
752 		ipnet->ipnet_dlstate = DL_UNBOUND;
753 		ipnet->ipnet_sap = 0;
754 		dlokack(q, mp, DL_UNBIND_REQ);
755 	}
756 }
757 
758 static void
759 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
760 {
761 	ipnet_t		*ipnet = q->q_ptr;
762 	t_uscalar_t	level;
763 	int		err;
764 
765 	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
766 		dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
767 		return;
768 	}
769 
770 	if (ipnet->ipnet_flags & IPNET_LOMODE) {
771 		dlokack(q, mp, DL_PROMISCON_REQ);
772 		return;
773 	}
774 
775 	level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
776 	if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
777 		if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
778 		    ipnet->ipnet_ns->netstack_ipnet)) != 0) {
779 			dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
780 			return;
781 		}
782 	}
783 
784 	switch (level) {
785 	case DL_PROMISC_PHYS:
786 		ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
787 		break;
788 	case DL_PROMISC_SAP:
789 		ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
790 		break;
791 	case DL_PROMISC_MULTI:
792 		ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
793 		break;
794 	default:
795 		dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
796 		return;
797 	}
798 
799 	dlokack(q, mp, DL_PROMISCON_REQ);
800 }
801 
802 static void
803 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
804 {
805 	ipnet_t		*ipnet = q->q_ptr;
806 	t_uscalar_t	level;
807 	uint16_t	orig_ipnet_flags = ipnet->ipnet_flags;
808 
809 	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
810 		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
811 		return;
812 	}
813 
814 	if (ipnet->ipnet_flags & IPNET_LOMODE) {
815 		dlokack(q, mp, DL_PROMISCOFF_REQ);
816 		return;
817 	}
818 
819 	level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
820 	switch (level) {
821 	case DL_PROMISC_PHYS:
822 		if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
823 			ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
824 		break;
825 	case DL_PROMISC_SAP:
826 		if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
827 			ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
828 		break;
829 	case DL_PROMISC_MULTI:
830 		if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
831 			ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
832 		break;
833 	default:
834 		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
835 		return;
836 	}
837 
838 	if (orig_ipnet_flags == ipnet->ipnet_flags) {
839 		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
840 		return;
841 	}
842 
843 	if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
844 		ipnet_leave_allmulti(ipnet->ipnet_if,
845 		    ipnet->ipnet_ns->netstack_ipnet);
846 	}
847 
848 	dlokack(q, mp, DL_PROMISCOFF_REQ);
849 }
850 
851 static int
852 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
853 {
854 	int		err = 0;
855 	ip_stack_t	*ipst = ips->ips_netstack->netstack_ip;
856 	uint64_t	index = ipnetif->if_index;
857 
858 	mutex_enter(&ips->ips_event_lock);
859 	if (ipnetif->if_multicnt == 0) {
860 		ASSERT((ipnetif->if_flags &
861 		    (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
862 		if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
863 			err = ip_join_allmulti(index, B_FALSE, ipst);
864 			if (err != 0)
865 				goto done;
866 			ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
867 		}
868 		if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
869 			err = ip_join_allmulti(index, B_TRUE, ipst);
870 			if (err != 0 &&
871 			    (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
872 				(void) ip_leave_allmulti(index, B_FALSE, ipst);
873 				ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
874 				goto done;
875 			}
876 			ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
877 		}
878 	}
879 	ipnetif->if_multicnt++;
880 
881 done:
882 	mutex_exit(&ips->ips_event_lock);
883 	return (err);
884 }
885 
886 static void
887 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
888 {
889 	int		err;
890 	ip_stack_t	*ipst = ips->ips_netstack->netstack_ip;
891 	uint64_t	index = ipnetif->if_index;
892 
893 	mutex_enter(&ips->ips_event_lock);
894 	ASSERT(ipnetif->if_multicnt != 0);
895 	if (--ipnetif->if_multicnt == 0) {
896 		if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
897 			err = ip_leave_allmulti(index, B_FALSE, ipst);
898 			ASSERT(err == 0 || err == ENODEV);
899 			ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
900 		}
901 		if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
902 			err = ip_leave_allmulti(index, B_TRUE, ipst);
903 			ASSERT(err == 0 || err == ENODEV);
904 			ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
905 		}
906 	}
907 	mutex_exit(&ips->ips_event_lock);
908 }
909 
910 static mblk_t *
911 ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp)
912 {
913 	mblk_t		*dlhdr;
914 	dl_ipnetinfo_t	*dl;
915 
916 	if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
917 		freemsg(mp);
918 		return (NULL);
919 	}
920 	dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
921 	dl->dli_version = DL_IPNETINFO_VERSION;
922 	dl->dli_len = htons(sizeof (*dl));
923 	dl->dli_ipver = ihd->ihd_ipver;
924 	dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc);
925 	dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst);
926 	dlhdr->b_wptr += sizeof (*dl);
927 	dlhdr->b_cont = mp;
928 
929 	return (dlhdr);
930 }
931 
932 static ipnet_addrtype_t
933 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
934 {
935 	list_t			*list;
936 	ipnetif_t		*ipnetif = ipnet->ipnet_if;
937 	ipnetif_addr_t		*ifaddr;
938 	ipnet_addrtype_t	addrtype = IPNETADDR_UNKNOWN;
939 
940 	/* First check if the address is multicast or limited broadcast. */
941 	switch (addr->iap_family) {
942 	case AF_INET:
943 		if (CLASSD(*(addr->iap_addr4)) ||
944 		    *(addr->iap_addr4) == INADDR_BROADCAST)
945 			return (IPNETADDR_MBCAST);
946 		break;
947 	case AF_INET6:
948 		if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
949 			return (IPNETADDR_MBCAST);
950 		break;
951 	}
952 
953 	/*
954 	 * Walk the address list to see if the address belongs to our
955 	 * interface or is one of our subnet broadcast addresses.
956 	 */
957 	mutex_enter(&ipnetif->if_addr_lock);
958 	list = (addr->iap_family == AF_INET) ?
959 	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
960 	for (ifaddr = list_head(list);
961 	    ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
962 	    ifaddr = list_next(list, ifaddr)) {
963 		/*
964 		 * If we're not in the global zone, then only look at
965 		 * addresses in our zone.
966 		 */
967 		if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
968 		    ipnet->ipnet_zoneid != ifaddr->ifa_zone)
969 			continue;
970 		switch (addr->iap_family) {
971 		case AF_INET:
972 			if (ifaddr->ifa_ip4addr != INADDR_ANY &&
973 			    *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
974 				addrtype = IPNETADDR_MYADDR;
975 			else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
976 			    *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
977 				addrtype = IPNETADDR_MBCAST;
978 			break;
979 		case AF_INET6:
980 			if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
981 			    &ifaddr->ifa_ip6addr))
982 				addrtype = IPNETADDR_MYADDR;
983 			break;
984 		}
985 	}
986 	mutex_exit(&ipnetif->if_addr_lock);
987 
988 	return (addrtype);
989 }
990 
991 /*
992  * Verify if the packet contained in ihd should be passed up to the
993  * ipnet client stream.
994  */
995 static boolean_t
996 ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
997     ipnet_addrp_t *dst)
998 {
999 	boolean_t		obsif;
1000 	uint64_t		ifindex = ipnet->ipnet_if->if_index;
1001 	ipnet_addrtype_t	srctype, dsttype;
1002 
1003 	srctype = ipnet_get_addrtype(ipnet, src);
1004 	dsttype = ipnet_get_addrtype(ipnet, dst);
1005 
1006 	/*
1007 	 * If the packet's ifindex matches ours, or the packet's group ifindex
1008 	 * matches ours, it's on the interface we're observing.  (Thus,
1009 	 * observing on the group ifindex matches all ifindexes in the group.)
1010 	 */
1011 	obsif = (ihd->ihd_ifindex == ifindex || ihd->ihd_grifindex == ifindex);
1012 
1013 	/*
1014 	 * Do not allow an ipnet stream to see packets that are not from or to
1015 	 * its zone.  The exception is when zones are using the shared stack
1016 	 * model.  In this case, streams in the global zone have visibility
1017 	 * into other shared-stack zones, and broadcast and multicast traffic
1018 	 * is visible by all zones in the stack.
1019 	 */
1020 	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1021 	    dsttype != IPNETADDR_MBCAST) {
1022 		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
1023 		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
1024 			return (B_FALSE);
1025 	}
1026 
1027 	/*
1028 	 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1029 	 * packet's IP version.
1030 	 */
1031 	if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
1032 	    ipnet->ipnet_sap != ihd->ihd_ipver)
1033 		return (B_FALSE);
1034 
1035 	/* If the destination address is ours, then accept the packet. */
1036 	if (dsttype == IPNETADDR_MYADDR)
1037 		return (B_TRUE);
1038 
1039 	/*
1040 	 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1041 	 * sent or received on the interface we're observing, or packets that
1042 	 * have our source address (this allows us to see packets we send).
1043 	 */
1044 	if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
1045 		if (srctype == IPNETADDR_MYADDR || obsif)
1046 			return (B_TRUE);
1047 	}
1048 
1049 	/*
1050 	 * We accept multicast and broadcast packets transmitted or received
1051 	 * on the interface we're observing.
1052 	 */
1053 	if (dsttype == IPNETADDR_MBCAST && obsif)
1054 		return (B_TRUE);
1055 
1056 	return (B_FALSE);
1057 }
1058 
1059 /*
1060  * Verify if the packet contained in ihd should be passed up to the ipnet
1061  * client stream that's in IPNET_LOMODE.
1062  */
1063 /* ARGSUSED */
1064 static boolean_t
1065 ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
1066     ipnet_addrp_t *dst)
1067 {
1068 	if (ihd->ihd_htype != IPOBS_HOOK_LOCAL)
1069 		return (B_FALSE);
1070 
1071 	/*
1072 	 * An ipnet stream must not see packets that are not from/to its zone.
1073 	 */
1074 	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
1075 		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
1076 		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
1077 			return (B_FALSE);
1078 	}
1079 
1080 	return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver);
1081 }
1082 
1083 static void
1084 ipnet_dispatch(void *arg)
1085 {
1086 	mblk_t			*mp = arg;
1087 	ipobs_hook_data_t	*ihd = (ipobs_hook_data_t *)mp->b_rptr;
1088 	ipnet_t			*ipnet;
1089 	mblk_t			*netmp;
1090 	list_t			*list;
1091 	ipnet_stack_t		*ips = ihd->ihd_stack->netstack_ipnet;
1092 	ipnet_addrp_t		src, dst;
1093 
1094 	if (ihd->ihd_ipver == IPV4_VERSION) {
1095 		src.iap_family = dst.iap_family = AF_INET;
1096 		src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src;
1097 		dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst;
1098 	} else {
1099 		src.iap_family = dst.iap_family = AF_INET6;
1100 		src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src;
1101 		dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst;
1102 	}
1103 
1104 	ipnet_walkers_inc(ips);
1105 
1106 	list = &ips->ips_str_list;
1107 	for (ipnet = list_head(list); ipnet != NULL;
1108 	    ipnet = list_next(list, ipnet)) {
1109 		if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst))
1110 			continue;
1111 
1112 		if (list_next(list, ipnet) == NULL) {
1113 			netmp = ihd->ihd_mp;
1114 			ihd->ihd_mp = NULL;
1115 		} else {
1116 			if ((netmp = dupmsg(ihd->ihd_mp)) == NULL &&
1117 			    (netmp = copymsg(ihd->ihd_mp)) == NULL) {
1118 				atomic_inc_64(&ips->ips_drops);
1119 				continue;
1120 			}
1121 		}
1122 
1123 		if (ipnet->ipnet_flags & IPNET_INFO) {
1124 			if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) {
1125 				atomic_inc_64(&ips->ips_drops);
1126 				continue;
1127 			}
1128 		}
1129 
1130 		if (ipnet->ipnet_rq->q_first == NULL &&
1131 		    canputnext(ipnet->ipnet_rq)) {
1132 			putnext(ipnet->ipnet_rq, netmp);
1133 		} else if (canput(ipnet->ipnet_rq)) {
1134 			(void) putq(ipnet->ipnet_rq, netmp);
1135 		} else {
1136 			freemsg(netmp);
1137 			atomic_inc_64(&ips->ips_drops);
1138 		}
1139 	}
1140 
1141 	ipnet_walkers_dec(ips);
1142 
1143 	freemsg(ihd->ihd_mp);
1144 	freemsg(mp);
1145 }
1146 
1147 static void
1148 ipnet_input(mblk_t *mp)
1149 {
1150 	ipobs_hook_data_t  *ihd = (ipobs_hook_data_t *)mp->b_rptr;
1151 
1152 	if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
1153 	    DDI_SUCCESS) {
1154 		atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops);
1155 		freemsg(ihd->ihd_mp);
1156 		freemsg(mp);
1157 	}
1158 }
1159 
1160 /*
1161  * Create a new ipnetif_t and new minor node for it.  If creation is
1162  * successful the new ipnetif_t is inserted into an avl_tree
1163  * containing ipnetif's for this stack instance.
1164  */
1165 static ipnetif_t *
1166 ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips)
1167 {
1168 	ipnetif_t	*ipnetif;
1169 	avl_index_t	where = 0;
1170 	minor_t		ifminor;
1171 
1172 	/*
1173 	 * Because ipnet_create_if() can be called from a NIC event
1174 	 * callback, it should not block.
1175 	 */
1176 	ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
1177 	if (ifminor == (minor_t)-1)
1178 		return (NULL);
1179 	if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) {
1180 		id_free(ipnet_minor_space, ifminor);
1181 		return (NULL);
1182 	}
1183 
1184 	(void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
1185 	ipnetif->if_index = index;
1186 
1187 	mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
1188 	list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
1189 	    offsetof(ipnetif_addr_t, ifa_link));
1190 	list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
1191 	    offsetof(ipnetif_addr_t, ifa_link));
1192 	ipnetif->if_dev = makedevice(ipnet_major, ifminor);
1193 	mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
1194 	ipnetif->if_refcnt = 1;
1195 
1196 	mutex_enter(&ips->ips_avl_lock);
1197 	VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
1198 	avl_insert(&ips->ips_avl_by_index, ipnetif, where);
1199 	VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
1200 	avl_insert(&ips->ips_avl_by_name, ipnetif, where);
1201 	mutex_exit(&ips->ips_avl_lock);
1202 
1203 	return (ipnetif);
1204 }
1205 
1206 static void
1207 ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1208 {
1209 	ipnet_t	*ipnet;
1210 
1211 	ipnet_walkers_inc(ips);
1212 	/* Send a SIGHUP to all open streams associated with this ipnetif. */
1213 	for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
1214 	    ipnet = list_next(&ips->ips_str_list, ipnet)) {
1215 		if (ipnet->ipnet_if == ipnetif)
1216 			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1217 	}
1218 	ipnet_walkers_dec(ips);
1219 	mutex_enter(&ips->ips_avl_lock);
1220 	avl_remove(&ips->ips_avl_by_index, ipnetif);
1221 	avl_remove(&ips->ips_avl_by_name, ipnetif);
1222 	mutex_exit(&ips->ips_avl_lock);
1223 	/* Release the reference we implicitly held in ipnet_create_if(). */
1224 	ipnetif_refrele(ipnetif);
1225 }
1226 
1227 static void
1228 ipnet_purge_addrlist(list_t *addrlist)
1229 {
1230 	ipnetif_addr_t *ifa;
1231 
1232 	while ((ifa = list_head(addrlist)) != NULL) {
1233 		list_remove(addrlist, ifa);
1234 		kmem_free(ifa, sizeof (*ifa));
1235 	}
1236 }
1237 
1238 static void
1239 ipnet_free_if(ipnetif_t *ipnetif)
1240 {
1241 	ASSERT(ipnetif->if_refcnt == 0);
1242 
1243 	/* Remove IPv4/v6 address lists from the ipnetif */
1244 	ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
1245 	list_destroy(&ipnetif->if_ip4addr_list);
1246 	ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
1247 	list_destroy(&ipnetif->if_ip6addr_list);
1248 	mutex_destroy(&ipnetif->if_addr_lock);
1249 	mutex_destroy(&ipnetif->if_reflock);
1250 	id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
1251 	kmem_free(ipnetif, sizeof (*ipnetif));
1252 }
1253 
1254 /*
1255  * Create an ipnetif_addr_t with the given logical interface id (lif)
1256  * and add it to the supplied ipnetif.  The lif is the netinfo
1257  * representation of logical interface id, and we use this id to match
1258  * incoming netinfo events against our lists of addresses.
1259  */
1260 static void
1261 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
1262 {
1263 	ipnetif_addr_t		*ifaddr;
1264 	zoneid_t		zoneid;
1265 	struct sockaddr_in	bcast;
1266 	struct sockaddr_storage	addr;
1267 	net_ifaddr_t		type = NA_ADDRESS;
1268 	uint64_t		phyif = ipnetif->if_index;
1269 
1270 	if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
1271 	    net_getlifzone(nd, phyif, lif, &zoneid) != 0)
1272 		return;
1273 	if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
1274 		return;
1275 
1276 	ifaddr->ifa_zone = zoneid;
1277 	ifaddr->ifa_id = lif;
1278 
1279 	switch (addr.ss_family) {
1280 	case AF_INET:
1281 		ifaddr->ifa_ip4addr =
1282 		    ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
1283 		/*
1284 		 * Try and get the broadcast address.  Note that it's okay for
1285 		 * an interface to not have a broadcast address, so we don't
1286 		 * fail the entire operation if net_getlifaddr() fails here.
1287 		 */
1288 		type = NA_BROADCAST;
1289 		if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
1290 			ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
1291 		break;
1292 	case AF_INET6:
1293 		ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
1294 		break;
1295 	}
1296 
1297 	mutex_enter(&ipnetif->if_addr_lock);
1298 	list_insert_tail(addr.ss_family == AF_INET ?
1299 	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
1300 	mutex_exit(&ipnetif->if_addr_lock);
1301 }
1302 
1303 static void
1304 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
1305 {
1306 	mutex_enter(&ipnetif->if_addr_lock);
1307 	list_remove(isv6 ?
1308 	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
1309 	mutex_exit(&ipnetif->if_addr_lock);
1310 	kmem_free(ifaddr, sizeof (*ifaddr));
1311 }
1312 
1313 static void
1314 ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips,
1315     boolean_t isv6)
1316 {
1317 	ipnetif_t	*ipnetif;
1318 	boolean_t	refrele_needed = B_TRUE;
1319 
1320 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) {
1321 		ipnetif = ipnet_create_if(ifname, ifindex, ips);
1322 		refrele_needed = B_FALSE;
1323 	}
1324 	if (ipnetif != NULL) {
1325 		ipnetif->if_flags |=
1326 		    isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
1327 	}
1328 
1329 	if (ipnetif->if_multicnt != 0) {
1330 		if (ip_join_allmulti(ifindex, isv6,
1331 		    ips->ips_netstack->netstack_ip) == 0) {
1332 			ipnetif->if_flags |=
1333 			    isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
1334 		}
1335 	}
1336 
1337 	if (refrele_needed)
1338 		ipnetif_refrele(ipnetif);
1339 }
1340 
1341 static void
1342 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
1343 {
1344 	ipnetif_t	*ipnetif;
1345 
1346 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
1347 		return;
1348 
1349 	mutex_enter(&ipnetif->if_addr_lock);
1350 	ipnet_purge_addrlist(isv6 ?
1351 	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
1352 	mutex_exit(&ipnetif->if_addr_lock);
1353 
1354 	/*
1355 	 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1356 	 * separate NE_UNPLUMB events for IPv4 and IPv6.  We remove the ipnetif
1357 	 * if both IPv4 and IPv6 interfaces have been unplumbed.
1358 	 */
1359 	ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
1360 	if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
1361 		ipnet_remove_if(ipnetif, ips);
1362 	ipnetif_refrele(ipnetif);
1363 }
1364 
1365 static void
1366 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
1367     ipnet_stack_t *ips, boolean_t isv6)
1368 {
1369 	ipnetif_t	*ipnetif;
1370 	ipnetif_addr_t	*ifaddr;
1371 
1372 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
1373 		return;
1374 	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
1375 		/*
1376 		 * We must have missed a NE_LIF_DOWN event.  Delete this
1377 		 * ifaddr and re-create it.
1378 		 */
1379 		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1380 	}
1381 
1382 	ipnet_add_ifaddr(lifindex, ipnetif, nd);
1383 	ipnetif_refrele(ipnetif);
1384 }
1385 
1386 static void
1387 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
1388     boolean_t isv6)
1389 {
1390 	ipnetif_t	*ipnetif;
1391 	ipnetif_addr_t	*ifaddr;
1392 
1393 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
1394 		return;
1395 	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
1396 		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1397 	ipnetif_refrele(ipnetif);
1398 	/*
1399 	 * Make sure that open streams on this ipnetif are still allowed to
1400 	 * have it open.
1401 	 */
1402 	ipnet_if_zonecheck(ipnetif, ips);
1403 }
1404 
1405 /*
1406  * This callback from the NIC event framework dispatches a taskq as the event
1407  * handlers may block.
1408  */
1409 /* ARGSUSED */
1410 static int
1411 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
1412 {
1413 	ipnet_stack_t		*ips = arg;
1414 	hook_nic_event_t	*hn = (hook_nic_event_t *)info;
1415 	ipnet_nicevent_t	*ipne;
1416 
1417 	if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
1418 		return (0);
1419 	ipne->ipne_event = hn->hne_event;
1420 	ipne->ipne_protocol = hn->hne_protocol;
1421 	ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
1422 	ipne->ipne_ifindex = hn->hne_nic;
1423 	ipne->ipne_lifindex = hn->hne_lif;
1424 	if (hn->hne_datalen != 0) {
1425 		(void) strlcpy(ipne->ipne_ifname, hn->hne_data,
1426 		    sizeof (ipne->ipne_ifname));
1427 	}
1428 	(void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
1429 	    ipne, DDI_NOSLEEP);
1430 	return (0);
1431 }
1432 
1433 static void
1434 ipnet_nicevent_task(void *arg)
1435 {
1436 	ipnet_nicevent_t	*ipne = arg;
1437 	netstack_t		*ns;
1438 	ipnet_stack_t		*ips;
1439 	boolean_t		isv6;
1440 
1441 	if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
1442 		goto done;
1443 	ips = ns->netstack_ipnet;
1444 	isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
1445 
1446 	mutex_enter(&ips->ips_event_lock);
1447 	switch (ipne->ipne_event) {
1448 	case NE_PLUMB:
1449 		ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips,
1450 		    isv6);
1451 		break;
1452 	case NE_UNPLUMB:
1453 		ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
1454 		break;
1455 	case NE_LIF_UP:
1456 		ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
1457 		    ipne->ipne_protocol, ips, isv6);
1458 		break;
1459 	case NE_LIF_DOWN:
1460 		ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
1461 		    isv6);
1462 		break;
1463 	default:
1464 		break;
1465 	}
1466 	mutex_exit(&ips->ips_event_lock);
1467 done:
1468 	if (ns != NULL)
1469 		netstack_rele(ns);
1470 	kmem_free(ipne, sizeof (ipnet_nicevent_t));
1471 }
1472 
1473 dev_t
1474 ipnet_if_getdev(char *name, zoneid_t zoneid)
1475 {
1476 	netstack_t	*ns;
1477 	ipnet_stack_t	*ips;
1478 	ipnetif_t	*ipnetif;
1479 	dev_t		dev = (dev_t)-1;
1480 
1481 	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1482 		return (dev);
1483 	if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1484 		return (dev);
1485 
1486 	ips = ns->netstack_ipnet;
1487 	mutex_enter(&ips->ips_avl_lock);
1488 	if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
1489 		if (ipnet_if_in_zone(ipnetif, zoneid, ips))
1490 			dev = ipnetif->if_dev;
1491 	}
1492 	mutex_exit(&ips->ips_avl_lock);
1493 	netstack_rele(ns);
1494 
1495 	return (dev);
1496 }
1497 
1498 static ipnetif_t *
1499 ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips)
1500 {
1501 	ipnetif_t	*ipnetif;
1502 
1503 	mutex_enter(&ips->ips_avl_lock);
1504 	if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
1505 		ipnetif_refhold(ipnetif);
1506 	mutex_exit(&ips->ips_avl_lock);
1507 	return (ipnetif);
1508 }
1509 
1510 static ipnetif_t *
1511 ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips)
1512 {
1513 	ipnetif_t	*ipnetif;
1514 	avl_tree_t	*tree;
1515 
1516 	mutex_enter(&ips->ips_avl_lock);
1517 	tree = &ips->ips_avl_by_index;
1518 	for (ipnetif = avl_first(tree); ipnetif != NULL;
1519 	    ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
1520 		if (ipnetif->if_dev == dev) {
1521 			ipnetif_refhold(ipnetif);
1522 			break;
1523 		}
1524 	}
1525 	mutex_exit(&ips->ips_avl_lock);
1526 	return (ipnetif);
1527 }
1528 
1529 static ipnetif_addr_t *
1530 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
1531 {
1532 	ipnetif_addr_t	*ifaddr;
1533 	list_t		*list;
1534 
1535 	mutex_enter(&ipnetif->if_addr_lock);
1536 	list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
1537 	for (ifaddr = list_head(list); ifaddr != NULL;
1538 	    ifaddr = list_next(list, ifaddr)) {
1539 		if (lid == ifaddr->ifa_id)
1540 			break;
1541 	}
1542 	mutex_exit(&ipnetif->if_addr_lock);
1543 	return (ifaddr);
1544 }
1545 
1546 /* ARGSUSED */
1547 static void *
1548 ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
1549 {
1550 	ipnet_stack_t	*ips;
1551 
1552 	ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
1553 	ips->ips_netstack = ns;
1554 	mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
1555 	avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index,
1556 	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
1557 	avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name,
1558 	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
1559 	mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
1560 	cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
1561 	list_create(&ips->ips_str_list, sizeof (ipnet_t),
1562 	    offsetof(ipnet_t, ipnet_next));
1563 	ipnet_register_netihook(ips);
1564 	return (ips);
1565 }
1566 
1567 /* ARGSUSED */
1568 static void
1569 ipnet_stack_fini(netstackid_t stackid, void *arg)
1570 {
1571 	ipnet_stack_t	*ips = arg;
1572 	ipnetif_t	*ipnetif, *nipnetif;
1573 
1574 	if (ips->ips_ndv4 != NULL) {
1575 		VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
1576 		    ips->ips_nicevents) == 0);
1577 		VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
1578 	}
1579 	if (ips->ips_ndv6 != NULL) {
1580 		VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
1581 		    ips->ips_nicevents) == 0);
1582 		VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
1583 	}
1584 	hook_free(ips->ips_nicevents);
1585 
1586 	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1587 	    ipnetif = nipnetif) {
1588 		nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
1589 		ipnet_remove_if(ipnetif, ips);
1590 	}
1591 	avl_destroy(&ips->ips_avl_by_index);
1592 	avl_destroy(&ips->ips_avl_by_name);
1593 	mutex_destroy(&ips->ips_avl_lock);
1594 	mutex_destroy(&ips->ips_walkers_lock);
1595 	cv_destroy(&ips->ips_walkers_cv);
1596 	list_destroy(&ips->ips_str_list);
1597 	kmem_free(ips, sizeof (*ips));
1598 }
1599 
1600 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1601 static boolean_t
1602 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
1603 {
1604 	ipnetif_addr_t *ifa;
1605 
1606 	for (ifa = list_head(addrlist); ifa != NULL;
1607 	    ifa = list_next(addrlist, ifa)) {
1608 		if (ifa->ifa_zone == zoneid)
1609 			return (B_TRUE);
1610 	}
1611 	return (B_FALSE);
1612 }
1613 
1614 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1615 static boolean_t
1616 ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
1617 {
1618 	int ret;
1619 
1620 	/*
1621 	 * The global zone has visibility into all interfaces in the global
1622 	 * stack, and exclusive stack zones have visibility into all
1623 	 * interfaces in their stack.
1624 	 */
1625 	if (zoneid == GLOBAL_ZONEID ||
1626 	    ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1627 		return (B_TRUE);
1628 
1629 	/*
1630 	 * Shared-stack zones only have visibility for interfaces that have
1631 	 * addresses in their zone.
1632 	 */
1633 	mutex_enter(&ipnetif->if_addr_lock);
1634 	ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
1635 	    ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
1636 	mutex_exit(&ipnetif->if_addr_lock);
1637 	return (ret);
1638 }
1639 
1640 /*
1641  * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1642  * still be allowed to have it open.  A given ipnet_t may no longer be allowed
1643  * to have an ipnetif open if there are no longer any addresses that belong to
1644  * the ipnetif in the ipnet_t's non-global shared-stack zoneid.  If that's the
1645  * case, send the ipnet_t an M_HANGUP.
1646  */
1647 static void
1648 ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1649 {
1650 	list_t	*strlist = &ips->ips_str_list;
1651 	ipnet_t	*ipnet;
1652 
1653 	ipnet_walkers_inc(ips);
1654 	for (ipnet = list_head(strlist); ipnet != NULL;
1655 	    ipnet = list_next(strlist, ipnet)) {
1656 		if (ipnet->ipnet_if != ipnetif)
1657 			continue;
1658 		if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
1659 			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1660 	}
1661 	ipnet_walkers_dec(ips);
1662 }
1663 
1664 void
1665 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
1666 {
1667 	ipnetif_t 		*ipnetif;
1668 	list_t			cbdata;
1669 	ipnetif_cbdata_t	*cbnode;
1670 	netstack_t		*ns;
1671 	ipnet_stack_t		*ips;
1672 
1673 	/*
1674 	 * On labeled systems, non-global zones shouldn't see anything
1675 	 * in /dev/ipnet.
1676 	 */
1677 	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1678 		return;
1679 
1680 	if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1681 		return;
1682 
1683 	ips = ns->netstack_ipnet;
1684 	list_create(&cbdata, sizeof (ipnetif_cbdata_t),
1685 	    offsetof(ipnetif_cbdata_t, ic_next));
1686 
1687 	mutex_enter(&ips->ips_avl_lock);
1688 	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1689 	    ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
1690 		if (!ipnet_if_in_zone(ipnetif, zoneid, ips))
1691 			continue;
1692 		cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
1693 		(void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
1694 		cbnode->ic_dev = ipnetif->if_dev;
1695 		list_insert_head(&cbdata, cbnode);
1696 	}
1697 	mutex_exit(&ips->ips_avl_lock);
1698 
1699 	while ((cbnode = list_head(&cbdata)) != NULL) {
1700 		cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
1701 		list_remove(&cbdata, cbnode);
1702 		kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
1703 	}
1704 	list_destroy(&cbdata);
1705 	netstack_rele(ns);
1706 }
1707 
1708 static int
1709 ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp)
1710 {
1711 	int64_t index1 = *((int64_t *)index_ptr);
1712 	int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
1713 
1714 	return (SIGNOF(index2 - index1));
1715 }
1716 
1717 static int
1718 ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp)
1719 {
1720 	int res;
1721 
1722 	res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
1723 	return (SIGNOF(res));
1724 }
1725 
1726 static void
1727 ipnetif_refhold(ipnetif_t *ipnetif)
1728 {
1729 	mutex_enter(&ipnetif->if_reflock);
1730 	ipnetif->if_refcnt++;
1731 	mutex_exit(&ipnetif->if_reflock);
1732 }
1733 
1734 static void
1735 ipnetif_refrele(ipnetif_t *ipnetif)
1736 {
1737 	mutex_enter(&ipnetif->if_reflock);
1738 	ASSERT(ipnetif->if_refcnt != 0);
1739 	if (--ipnetif->if_refcnt == 0)
1740 		ipnet_free_if(ipnetif);
1741 	else
1742 		mutex_exit(&ipnetif->if_reflock);
1743 }
1744 
1745 static void
1746 ipnet_walkers_inc(ipnet_stack_t *ips)
1747 {
1748 	mutex_enter(&ips->ips_walkers_lock);
1749 	ips->ips_walkers_cnt++;
1750 	mutex_exit(&ips->ips_walkers_lock);
1751 }
1752 
1753 static void
1754 ipnet_walkers_dec(ipnet_stack_t *ips)
1755 {
1756 	mutex_enter(&ips->ips_walkers_lock);
1757 	ASSERT(ips->ips_walkers_cnt != 0);
1758 	if (--ips->ips_walkers_cnt == 0)
1759 		cv_broadcast(&ips->ips_walkers_cv);
1760 	mutex_exit(&ips->ips_walkers_lock);
1761 }
1762