1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
30 */
31
32 /*
33 * The ipnet device defined here provides access to packets at the IP layer. To
34 * provide access to packets at this layer it registers a callback function in
35 * the ip module and when there are open instances of the device ip will pass
36 * packets into the device. Packets from ip are passed on the input, output and
37 * loopback paths. Internally the module returns to ip as soon as possible by
38 * deferring processing using a taskq.
39 *
40 * Management of the devices in /dev/ipnet/ is handled by the devname
41 * filesystem and use of the neti interfaces. This module registers for NIC
42 * events using the neti framework so that when IP interfaces are bought up,
43 * taken down etc. the ipnet module is notified and its view of the interfaces
44 * configured on the system adjusted. On attach, the module gets an initial
45 * view of the system again using the neti framework but as it has already
46 * registered for IP interface events, it is still up-to-date with any changes.
47 */
48
49 #include <sys/types.h>
50 #include <sys/conf.h>
51 #include <sys/cred.h>
52 #include <sys/stat.h>
53 #include <sys/ddi.h>
54 #include <sys/sunddi.h>
55 #include <sys/modctl.h>
56 #include <sys/dlpi.h>
57 #include <sys/strsun.h>
58 #include <sys/id_space.h>
59 #include <sys/kmem.h>
60 #include <sys/mkdev.h>
61 #include <sys/neti.h>
62 #include <net/if.h>
63 #include <sys/errno.h>
64 #include <sys/list.h>
65 #include <sys/ksynch.h>
66 #include <sys/hook_event.h>
67 #include <sys/sdt.h>
68 #include <sys/stropts.h>
69 #include <sys/sysmacros.h>
70 #include <inet/ip.h>
71 #include <inet/ip_if.h>
72 #include <inet/ip_multi.h>
73 #include <inet/ip6.h>
74 #include <inet/ipnet.h>
75 #include <net/bpf.h>
76 #include <net/bpfdesc.h>
77 #include <net/dlt.h>
78
79 static struct module_info ipnet_minfo = {
80 1, /* mi_idnum */
81 "ipnet", /* mi_idname */
82 0, /* mi_minpsz */
83 INFPSZ, /* mi_maxpsz */
84 2048, /* mi_hiwat */
85 0 /* mi_lowat */
86 };
87
88 /*
89 * List to hold static view of ipnetif_t's on the system. This is needed to
90 * avoid holding the lock protecting the avl tree of ipnetif's over the
91 * callback into the dev filesystem.
92 */
93 typedef struct ipnetif_cbdata {
94 char ic_ifname[LIFNAMSIZ];
95 dev_t ic_dev;
96 list_node_t ic_next;
97 } ipnetif_cbdata_t;
98
99 /*
100 * Convenience enumerated type for ipnet_accept(). It describes the
101 * properties of a given ipnet_addrp_t relative to a single ipnet_t
102 * client stream. The values represent whether the address is ...
103 */
104 typedef enum {
105 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */
106 IPNETADDR_MBCAST, /* a multicast or broadcast address. */
107 IPNETADDR_UNKNOWN /* none of the above. */
108 } ipnet_addrtype_t;
109
110 /* Argument used for the ipnet_nicevent_taskq callback. */
111 typedef struct ipnet_nicevent_s {
112 nic_event_t ipne_event;
113 net_handle_t ipne_protocol;
114 netstackid_t ipne_stackid;
115 uint64_t ipne_ifindex;
116 uint64_t ipne_lifindex;
117 char ipne_ifname[LIFNAMSIZ];
118 } ipnet_nicevent_t;
119
120 static dev_info_t *ipnet_dip;
121 static major_t ipnet_major;
122 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */
123 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */
124 static id_space_t *ipnet_minor_space;
125 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */
126 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */
127 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT;
128 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept;
129 static bpf_itap_fn_t ipnet_itap;
130
131 static void ipnet_input(mblk_t *);
132 static int ipnet_wput(queue_t *, mblk_t *);
133 static int ipnet_rsrv(queue_t *);
134 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
135 static int ipnet_close(queue_t *, int, cred_t *);
136 static void ipnet_ioctl(queue_t *, mblk_t *);
137 static void ipnet_iocdata(queue_t *, mblk_t *);
138 static void ipnet_wputnondata(queue_t *, mblk_t *);
139 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
140 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
141 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
142 static void ipnet_inforeq(queue_t *q, mblk_t *mp);
143 static void ipnet_bindreq(queue_t *q, mblk_t *mp);
144 static void ipnet_unbindreq(queue_t *q, mblk_t *mp);
145 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
146 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
147 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
148 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
149 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
150 static void ipnet_nicevent_task(void *);
151 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
152 uint64_t);
153 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
154 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
155 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
156 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
157 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
158 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
159 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
160 static int ipnetif_compare_name(const void *, const void *);
161 static int ipnetif_compare_name_zone(const void *, const void *);
162 static int ipnetif_compare_index(const void *, const void *);
163 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
164 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
165 static void ipnetif_refhold(ipnetif_t *);
166 static void ipnetif_refrele(ipnetif_t *);
167 static void ipnet_walkers_inc(ipnet_stack_t *);
168 static void ipnet_walkers_dec(ipnet_stack_t *);
169 static void ipnet_register_netihook(ipnet_stack_t *);
170 static void *ipnet_stack_init(netstackid_t, netstack_t *);
171 static void ipnet_stack_fini(netstackid_t, void *);
172 static void ipnet_dispatch(void *);
173 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
174 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
175 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
176 static void ipnetif_clone_release(ipnetif_t *);
177
178 static struct qinit ipnet_rinit = {
179 NULL, /* qi_putp */
180 ipnet_rsrv, /* qi_srvp */
181 ipnet_open, /* qi_qopen */
182 ipnet_close, /* qi_qclose */
183 NULL, /* qi_qadmin */
184 &ipnet_minfo, /* qi_minfo */
185 };
186
187 static struct qinit ipnet_winit = {
188 ipnet_wput, /* qi_putp */
189 NULL, /* qi_srvp */
190 NULL, /* qi_qopen */
191 NULL, /* qi_qclose */
192 NULL, /* qi_qadmin */
193 &ipnet_minfo, /* qi_minfo */
194 };
195
196 static struct streamtab ipnet_info = {
197 &ipnet_rinit, &ipnet_winit
198 };
199
200 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
201 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
202 ddi_quiesce_not_supported);
203
204 static struct modldrv modldrv = {
205 &mod_driverops,
206 "STREAMS ipnet driver",
207 &ipnet_ops
208 };
209
210 static struct modlinkage modlinkage = {
211 MODREV_1, &modldrv, NULL
212 };
213
214 /*
215 * This structure contains the template data (names and type) that is
216 * copied, in bulk, into the new kstats structure created by net_kstat_create.
217 * No actual statistical information is stored in this instance of the
218 * ipnet_kstats_t structure.
219 */
220 static ipnet_kstats_t stats_template = {
221 { "duplicationFail", KSTAT_DATA_UINT64 },
222 { "dispatchOk", KSTAT_DATA_UINT64 },
223 { "dispatchFail", KSTAT_DATA_UINT64 },
224 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 },
225 { "dispatchDupDrop", KSTAT_DATA_UINT64 },
226 { "dispatchDeliver", KSTAT_DATA_UINT64 },
227 { "acceptOk", KSTAT_DATA_UINT64 },
228 { "acceptFail", KSTAT_DATA_UINT64 }
229 };
230
231 /*
232 * Walk the list of physical interfaces on the machine, for each
233 * interface create a new ipnetif_t and add any addresses to it. We
234 * need to do the walk twice, once for IPv4 and once for IPv6.
235 *
236 * The interfaces are destroyed as part of ipnet_stack_fini() for each
237 * stack. Note that we cannot do this initialization in
238 * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
239 */
240 static int
ipnetif_init(void)241 ipnetif_init(void)
242 {
243 netstack_handle_t nh;
244 netstack_t *ns;
245 ipnet_stack_t *ips;
246 int ret = 0;
247
248 netstack_next_init(&nh);
249 while ((ns = netstack_next(&nh)) != NULL) {
250 ips = ns->netstack_ipnet;
251 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0)
252 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE);
253 netstack_rele(ns);
254 if (ret != 0)
255 break;
256 }
257 netstack_next_fini(&nh);
258 return (ret);
259 }
260
261 /*
262 * Standard module entry points.
263 */
264 int
_init(void)265 _init(void)
266 {
267 int ret;
268 boolean_t netstack_registered = B_FALSE;
269
270 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
271 return (ENODEV);
272 ipnet_minor_space = id_space_create("ipnet_minor_space",
273 IPNET_MINOR_MIN, MAXMIN32);
274
275 /*
276 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
277 * delivery of packets to clients. Note that we need to create the
278 * taskqs before calling netstack_register() since ipnet_stack_init()
279 * registers callbacks that use 'em.
280 */
281 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
282 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
283 1, TASKQ_DEFAULTPRI, 0);
284 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
285 ret = ENOMEM;
286 goto done;
287 }
288
289 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
290 netstack_registered = B_TRUE;
291
292 if ((ret = ipnetif_init()) == 0)
293 ret = mod_install(&modlinkage);
294 done:
295 if (ret != 0) {
296 if (ipnet_taskq != NULL)
297 ddi_taskq_destroy(ipnet_taskq);
298 if (ipnet_nicevent_taskq != NULL)
299 ddi_taskq_destroy(ipnet_nicevent_taskq);
300 if (netstack_registered)
301 netstack_unregister(NS_IPNET);
302 id_space_destroy(ipnet_minor_space);
303 }
304 return (ret);
305 }
306
307 int
_fini(void)308 _fini(void)
309 {
310 int err;
311
312 if ((err = mod_remove(&modlinkage)) != 0)
313 return (err);
314
315 netstack_unregister(NS_IPNET);
316 ddi_taskq_destroy(ipnet_nicevent_taskq);
317 ddi_taskq_destroy(ipnet_taskq);
318 id_space_destroy(ipnet_minor_space);
319 return (0);
320 }
321
322 int
_info(struct modinfo * modinfop)323 _info(struct modinfo *modinfop)
324 {
325 return (mod_info(&modlinkage, modinfop));
326 }
327
328 static void
ipnet_register_netihook(ipnet_stack_t * ips)329 ipnet_register_netihook(ipnet_stack_t *ips)
330 {
331 int ret;
332 zoneid_t zoneid;
333 netid_t netid;
334
335 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
336 ips);
337
338 /*
339 * It is possible for an exclusive stack to be in the process of
340 * shutting down here, and the netid and protocol lookups could fail
341 * in that case.
342 */
343 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid);
344 if ((netid = net_zoneidtonetid(zoneid)) == -1)
345 return;
346
347 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) {
348 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
349 ips->ips_nicevents)) != 0) {
350 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
351 ips->ips_ndv4 = NULL;
352 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks"
353 " in zone %d: %d", zoneid, ret);
354 }
355 }
356 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) {
357 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
358 ips->ips_nicevents)) != 0) {
359 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
360 ips->ips_ndv6 = NULL;
361 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks"
362 " in zone %d: %d", zoneid, ret);
363 }
364 }
365
366 /*
367 * Create a local set of kstats for each zone.
368 */
369 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
370 "misc", KSTAT_TYPE_NAMED,
371 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
372 if (ips->ips_kstatp != NULL) {
373 bcopy(&stats_template, &ips->ips_stats,
374 sizeof (ips->ips_stats));
375 ips->ips_kstatp->ks_data = &ips->ips_stats;
376 ips->ips_kstatp->ks_private =
377 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
378 kstat_install(ips->ips_kstatp);
379 } else {
380 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
381 "ipnet", "ipnet_stats", "misc");
382 }
383 }
384
385 /*
386 * This function is called on attach to build an initial view of the
387 * interfaces on the system. It will be called once for IPv4 and once
388 * for IPv6, although there is only one ipnet interface for both IPv4
389 * and IPv6 there are separate address lists.
390 */
391 static int
ipnet_populate_if(net_handle_t nd,ipnet_stack_t * ips,boolean_t isv6)392 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
393 {
394 phy_if_t phyif;
395 lif_if_t lif;
396 ipnetif_t *ipnetif;
397 char name[LIFNAMSIZ];
398 boolean_t new_if = B_FALSE;
399 uint64_t ifflags;
400 int ret = 0;
401
402 /*
403 * If ipnet_register_netihook() was unable to initialize this
404 * stack's net_handle_t, then we cannot populate any interface
405 * information. This usually happens when we attempted to
406 * grab a net_handle_t as a stack was shutting down. We don't
407 * want to fail the entire _init() operation because of a
408 * stack shutdown (other stacks will continue to work just
409 * fine), so we silently return success here.
410 */
411 if (nd == NULL)
412 return (0);
413
414 /*
415 * Make sure we're not processing NIC events during the
416 * population of our interfaces and address lists.
417 */
418 mutex_enter(&ips->ips_event_lock);
419
420 for (phyif = net_phygetnext(nd, 0); phyif != 0;
421 phyif = net_phygetnext(nd, phyif)) {
422 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
423 continue;
424 ifflags = 0;
425 (void) net_getlifflags(nd, phyif, 0, &ifflags);
426 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
427 ipnetif = ipnetif_create(name, phyif, ips, ifflags);
428 if (ipnetif == NULL) {
429 ret = ENOMEM;
430 goto done;
431 }
432 new_if = B_TRUE;
433 }
434 ipnetif->if_flags |=
435 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
436
437 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
438 lif = net_lifgetnext(nd, phyif, lif)) {
439 /*
440 * Skip addresses that aren't up. We'll add
441 * them when we receive an NE_LIF_UP event.
442 */
443 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
444 !(ifflags & IFF_UP))
445 continue;
446 /* Don't add it if we already have it. */
447 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
448 continue;
449 ipnet_add_ifaddr(lif, ipnetif, nd);
450 }
451 if (!new_if)
452 ipnetif_refrele(ipnetif);
453 }
454
455 done:
456 mutex_exit(&ips->ips_event_lock);
457 return (ret);
458 }
459
460 static int
ipnet_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)461 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
462 {
463 if (cmd != DDI_ATTACH)
464 return (DDI_FAILURE);
465
466 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
467 DDI_PSEUDO, 0) == DDI_FAILURE)
468 return (DDI_FAILURE);
469
470 ipnet_dip = dip;
471 return (DDI_SUCCESS);
472 }
473
474 static int
ipnet_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)475 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
476 {
477 if (cmd != DDI_DETACH)
478 return (DDI_FAILURE);
479
480 ASSERT(dip == ipnet_dip);
481 ddi_remove_minor_node(ipnet_dip, NULL);
482 ipnet_dip = NULL;
483 return (DDI_SUCCESS);
484 }
485
486 /* ARGSUSED */
487 static int
ipnet_devinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)488 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
489 {
490 int error = DDI_FAILURE;
491
492 switch (infocmd) {
493 case DDI_INFO_DEVT2INSTANCE:
494 *result = (void *)0;
495 error = DDI_SUCCESS;
496 break;
497 case DDI_INFO_DEVT2DEVINFO:
498 if (ipnet_dip != NULL) {
499 *result = ipnet_dip;
500 error = DDI_SUCCESS;
501 }
502 break;
503 }
504 return (error);
505 }
506
507 /* ARGSUSED */
508 static int
ipnet_open(queue_t * rq,dev_t * dev,int oflag,int sflag,cred_t * crp)509 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
510 {
511 ipnet_t *ipnet;
512 netstack_t *ns = NULL;
513 ipnet_stack_t *ips;
514 int err = 0;
515 zoneid_t zoneid = crgetzoneid(crp);
516
517 /*
518 * If the system is labeled, only the global zone is allowed to open
519 * IP observability nodes.
520 */
521 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
522 return (EACCES);
523
524 /* We don't support open as a module */
525 if (sflag & MODOPEN)
526 return (ENOTSUP);
527
528 /* This driver is self-cloning, we don't support re-open. */
529 if (rq->q_ptr != NULL)
530 return (EBUSY);
531
532 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
533 return (ENOMEM);
534
535 VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
536 ips = ns->netstack_ipnet;
537
538 rq->q_ptr = WR(rq)->q_ptr = ipnet;
539 ipnet->ipnet_rq = rq;
540 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
541 ipnet->ipnet_zoneid = zoneid;
542 ipnet->ipnet_dlstate = DL_UNBOUND;
543 ipnet->ipnet_ns = ns;
544
545 /*
546 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
547 * to be processed after ipnet_if is set and the ipnet_t has been
548 * inserted in the ips_str_list.
549 */
550 mutex_enter(&ips->ips_event_lock);
551 if (getminor(*dev) == IPNET_MINOR_LO) {
552 ipnet->ipnet_flags |= IPNET_LOMODE;
553 ipnet->ipnet_acceptfn = ipnet_loaccept;
554 } else {
555 ipnet->ipnet_acceptfn = ipnet_accept;
556 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips);
557 if (ipnet->ipnet_if == NULL ||
558 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) {
559 err = ENODEV;
560 goto done;
561 }
562 }
563
564 mutex_enter(&ips->ips_walkers_lock);
565 while (ips->ips_walkers_cnt != 0)
566 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
567 list_insert_head(&ips->ips_str_list, ipnet);
568 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
569 qprocson(rq);
570
571 /*
572 * Only register our callback if we're the first open client; we call
573 * unregister in close() for the last open client.
574 */
575 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
576 ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
577 mutex_exit(&ips->ips_walkers_lock);
578
579 done:
580 mutex_exit(&ips->ips_event_lock);
581 if (err != 0) {
582 netstack_rele(ns);
583 id_free(ipnet_minor_space, ipnet->ipnet_minor);
584 if (ipnet->ipnet_if != NULL)
585 ipnetif_refrele(ipnet->ipnet_if);
586 kmem_free(ipnet, sizeof (*ipnet));
587 }
588 return (err);
589 }
590
591 /* ARGSUSED */
592 static int
ipnet_close(queue_t * rq,int flags __unused,cred_t * credp __unused)593 ipnet_close(queue_t *rq, int flags __unused, cred_t *credp __unused)
594 {
595 ipnet_t *ipnet = rq->q_ptr;
596 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet;
597
598 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
599 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
600 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
601 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
602
603 mutex_enter(&ips->ips_walkers_lock);
604 while (ips->ips_walkers_cnt != 0)
605 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
606
607 qprocsoff(rq);
608
609 list_remove(&ips->ips_str_list, ipnet);
610 if (ipnet->ipnet_if != NULL)
611 ipnetif_refrele(ipnet->ipnet_if);
612 id_free(ipnet_minor_space, ipnet->ipnet_minor);
613
614 if (list_is_empty(&ips->ips_str_list)) {
615 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
616 ips->ips_hook = NULL;
617 }
618
619 kmem_free(ipnet, sizeof (*ipnet));
620
621 mutex_exit(&ips->ips_walkers_lock);
622 netstack_rele(ips->ips_netstack);
623 return (0);
624 }
625
626 static int
ipnet_wput(queue_t * q,mblk_t * mp)627 ipnet_wput(queue_t *q, mblk_t *mp)
628 {
629 switch (mp->b_datap->db_type) {
630 case M_FLUSH:
631 if (*mp->b_rptr & FLUSHW) {
632 flushq(q, FLUSHDATA);
633 *mp->b_rptr &= ~FLUSHW;
634 }
635 if (*mp->b_rptr & FLUSHR)
636 qreply(q, mp);
637 else
638 freemsg(mp);
639 break;
640 case M_PROTO:
641 case M_PCPROTO:
642 ipnet_wputnondata(q, mp);
643 break;
644 case M_IOCTL:
645 ipnet_ioctl(q, mp);
646 break;
647 case M_IOCDATA:
648 ipnet_iocdata(q, mp);
649 break;
650 default:
651 freemsg(mp);
652 break;
653 }
654 return (0);
655 }
656
657 static int
ipnet_rsrv(queue_t * q)658 ipnet_rsrv(queue_t *q)
659 {
660 mblk_t *mp;
661
662 while ((mp = getq(q)) != NULL) {
663 ASSERT(DB_TYPE(mp) == M_DATA);
664 if (canputnext(q)) {
665 putnext(q, mp);
666 } else {
667 (void) putbq(q, mp);
668 break;
669 }
670 }
671 return (0);
672 }
673
674 static void
ipnet_ioctl(queue_t * q,mblk_t * mp)675 ipnet_ioctl(queue_t *q, mblk_t *mp)
676 {
677 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
678
679 switch (iocp->ioc_cmd) {
680 case DLIOCRAW:
681 miocack(q, mp, 0, 0);
682 break;
683 case DLIOCIPNETINFO:
684 if (iocp->ioc_count == TRANSPARENT) {
685 mcopyin(mp, NULL, sizeof (uint_t), NULL);
686 qreply(q, mp);
687 break;
688 }
689 /* We don't support I_STR with DLIOCIPNETINFO. */
690 /* FALLTHROUGH */
691 default:
692 miocnak(q, mp, 0, EINVAL);
693 break;
694 }
695 }
696
697 static void
ipnet_iocdata(queue_t * q,mblk_t * mp)698 ipnet_iocdata(queue_t *q, mblk_t *mp)
699 {
700 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
701 ipnet_t *ipnet = q->q_ptr;
702
703 switch (iocp->ioc_cmd) {
704 case DLIOCIPNETINFO:
705 if (*(int *)mp->b_cont->b_rptr == 1)
706 ipnet->ipnet_flags |= IPNET_INFO;
707 else if (*(int *)mp->b_cont->b_rptr == 0)
708 ipnet->ipnet_flags &= ~IPNET_INFO;
709 else
710 goto iocnak;
711 miocack(q, mp, 0, DL_IPNETINFO_VERSION);
712 break;
713 default:
714 iocnak:
715 miocnak(q, mp, 0, EINVAL);
716 break;
717 }
718 }
719
720 static void
ipnet_wputnondata(queue_t * q,mblk_t * mp)721 ipnet_wputnondata(queue_t *q, mblk_t *mp)
722 {
723 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
724 t_uscalar_t prim = dlp->dl_primitive;
725
726 switch (prim) {
727 case DL_INFO_REQ:
728 ipnet_inforeq(q, mp);
729 break;
730 case DL_UNBIND_REQ:
731 ipnet_unbindreq(q, mp);
732 break;
733 case DL_BIND_REQ:
734 ipnet_bindreq(q, mp);
735 break;
736 case DL_PROMISCON_REQ:
737 ipnet_dlpromisconreq(q, mp);
738 break;
739 case DL_PROMISCOFF_REQ:
740 ipnet_dlpromiscoffreq(q, mp);
741 break;
742 case DL_UNITDATA_REQ:
743 case DL_DETACH_REQ:
744 case DL_PHYS_ADDR_REQ:
745 case DL_SET_PHYS_ADDR_REQ:
746 case DL_ENABMULTI_REQ:
747 case DL_DISABMULTI_REQ:
748 case DL_ATTACH_REQ:
749 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
750 break;
751 default:
752 dlerrorack(q, mp, prim, DL_BADPRIM, 0);
753 break;
754 }
755 }
756
757 static void
ipnet_inforeq(queue_t * q,mblk_t * mp)758 ipnet_inforeq(queue_t *q, mblk_t *mp)
759 {
760 dl_info_ack_t *dlip;
761 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
762
763 if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
764 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
765 return;
766 }
767
768 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
769 return;
770
771 dlip = (dl_info_ack_t *)mp->b_rptr;
772 *dlip = ipnet_infoack;
773 qreply(q, mp);
774 }
775
776 static void
ipnet_bindreq(queue_t * q,mblk_t * mp)777 ipnet_bindreq(queue_t *q, mblk_t *mp)
778 {
779 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
780 ipnet_t *ipnet = q->q_ptr;
781
782 if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
783 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
784 return;
785 }
786
787 switch (dlp->bind_req.dl_sap) {
788 case 0 :
789 ipnet->ipnet_family = AF_UNSPEC;
790 break;
791 case IPV4_VERSION :
792 ipnet->ipnet_family = AF_INET;
793 break;
794 case IPV6_VERSION :
795 ipnet->ipnet_family = AF_INET6;
796 break;
797 default :
798 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
799 return;
800 /*NOTREACHED*/
801 }
802
803 ipnet->ipnet_dlstate = DL_IDLE;
804 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0);
805 }
806
807 static void
ipnet_unbindreq(queue_t * q,mblk_t * mp)808 ipnet_unbindreq(queue_t *q, mblk_t *mp)
809 {
810 ipnet_t *ipnet = q->q_ptr;
811
812 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
813 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
814 return;
815 }
816
817 if (ipnet->ipnet_dlstate != DL_IDLE) {
818 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
819 } else {
820 ipnet->ipnet_dlstate = DL_UNBOUND;
821 ipnet->ipnet_family = AF_UNSPEC;
822 dlokack(q, mp, DL_UNBIND_REQ);
823 }
824 }
825
826 static void
ipnet_dlpromisconreq(queue_t * q,mblk_t * mp)827 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
828 {
829 ipnet_t *ipnet = q->q_ptr;
830 t_uscalar_t level;
831 int err;
832
833 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
834 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
835 return;
836 }
837
838 if (ipnet->ipnet_flags & IPNET_LOMODE) {
839 dlokack(q, mp, DL_PROMISCON_REQ);
840 return;
841 }
842
843 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
844 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
845 if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
846 ipnet->ipnet_ns->netstack_ipnet)) != 0) {
847 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
848 return;
849 }
850 }
851
852 switch (level) {
853 case DL_PROMISC_PHYS:
854 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
855 break;
856 case DL_PROMISC_SAP:
857 ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
858 break;
859 case DL_PROMISC_MULTI:
860 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
861 break;
862 default:
863 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
864 return;
865 }
866
867 dlokack(q, mp, DL_PROMISCON_REQ);
868 }
869
870 static void
ipnet_dlpromiscoffreq(queue_t * q,mblk_t * mp)871 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
872 {
873 ipnet_t *ipnet = q->q_ptr;
874 t_uscalar_t level;
875 uint16_t orig_ipnet_flags = ipnet->ipnet_flags;
876
877 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
878 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
879 return;
880 }
881
882 if (ipnet->ipnet_flags & IPNET_LOMODE) {
883 dlokack(q, mp, DL_PROMISCOFF_REQ);
884 return;
885 }
886
887 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
888 switch (level) {
889 case DL_PROMISC_PHYS:
890 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
891 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
892 break;
893 case DL_PROMISC_SAP:
894 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
895 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
896 break;
897 case DL_PROMISC_MULTI:
898 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
899 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
900 break;
901 default:
902 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
903 return;
904 }
905
906 if (orig_ipnet_flags == ipnet->ipnet_flags) {
907 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
908 return;
909 }
910
911 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
912 ipnet_leave_allmulti(ipnet->ipnet_if,
913 ipnet->ipnet_ns->netstack_ipnet);
914 }
915
916 dlokack(q, mp, DL_PROMISCOFF_REQ);
917 }
918
919 static int
ipnet_join_allmulti(ipnetif_t * ipnetif,ipnet_stack_t * ips)920 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
921 {
922 int err = 0;
923 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
924 uint64_t index = ipnetif->if_index;
925
926 mutex_enter(&ips->ips_event_lock);
927 if (ipnetif->if_multicnt == 0) {
928 ASSERT((ipnetif->if_flags &
929 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
930 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
931 err = ip_join_allmulti(index, B_FALSE, ipst);
932 if (err != 0)
933 goto done;
934 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
935 }
936 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
937 err = ip_join_allmulti(index, B_TRUE, ipst);
938 if (err != 0 &&
939 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
940 (void) ip_leave_allmulti(index, B_FALSE, ipst);
941 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
942 goto done;
943 }
944 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
945 }
946 }
947 ipnetif->if_multicnt++;
948
949 done:
950 mutex_exit(&ips->ips_event_lock);
951 return (err);
952 }
953
954 static void
ipnet_leave_allmulti(ipnetif_t * ipnetif,ipnet_stack_t * ips)955 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
956 {
957 int err;
958 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
959 uint64_t index = ipnetif->if_index;
960
961 mutex_enter(&ips->ips_event_lock);
962 ASSERT(ipnetif->if_multicnt != 0);
963 if (--ipnetif->if_multicnt == 0) {
964 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
965 err = ip_leave_allmulti(index, B_FALSE, ipst);
966 ASSERT(err == 0 || err == ENODEV);
967 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
968 }
969 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
970 err = ip_leave_allmulti(index, B_TRUE, ipst);
971 ASSERT(err == 0 || err == ENODEV);
972 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
973 }
974 }
975 mutex_exit(&ips->ips_event_lock);
976 }
977
978 /*
979 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
980 * The structure it copies the header information from,
981 * hook_pkt_observe_t, is constructed using network byte
982 * order in ipobs_hook(), so there is no conversion here.
983 */
984 static mblk_t *
ipnet_addheader(hook_pkt_observe_t * hdr,mblk_t * mp)985 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp)
986 {
987 mblk_t *dlhdr;
988 dl_ipnetinfo_t *dl;
989
990 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
991 freemsg(mp);
992 return (NULL);
993 }
994 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
995 dl->dli_version = DL_IPNETINFO_VERSION;
996 dl->dli_family = hdr->hpo_family;
997 dl->dli_htype = hdr->hpo_htype;
998 dl->dli_pktlen = hdr->hpo_pktlen;
999 dl->dli_ifindex = hdr->hpo_ifindex;
1000 dl->dli_grifindex = hdr->hpo_grifindex;
1001 dl->dli_zsrc = hdr->hpo_zsrc;
1002 dl->dli_zdst = hdr->hpo_zdst;
1003 dlhdr->b_wptr += sizeof (*dl);
1004 dlhdr->b_cont = mp;
1005
1006 return (dlhdr);
1007 }
1008
1009 static ipnet_addrtype_t
ipnet_get_addrtype(ipnet_t * ipnet,ipnet_addrp_t * addr)1010 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
1011 {
1012 list_t *list;
1013 ipnetif_t *ipnetif = ipnet->ipnet_if;
1014 ipnetif_addr_t *ifaddr;
1015 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN;
1016
1017 /* First check if the address is multicast or limited broadcast. */
1018 switch (addr->iap_family) {
1019 case AF_INET:
1020 if (CLASSD(*(addr->iap_addr4)) ||
1021 *(addr->iap_addr4) == INADDR_BROADCAST)
1022 return (IPNETADDR_MBCAST);
1023 break;
1024 case AF_INET6:
1025 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
1026 return (IPNETADDR_MBCAST);
1027 break;
1028 }
1029
1030 /*
1031 * Walk the address list to see if the address belongs to our
1032 * interface or is one of our subnet broadcast addresses.
1033 */
1034 mutex_enter(&ipnetif->if_addr_lock);
1035 list = (addr->iap_family == AF_INET) ?
1036 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
1037 for (ifaddr = list_head(list);
1038 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
1039 ifaddr = list_next(list, ifaddr)) {
1040 /*
1041 * If we're not in the global zone, then only look at
1042 * addresses in our zone.
1043 */
1044 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1045 ipnet->ipnet_zoneid != ifaddr->ifa_zone)
1046 continue;
1047 switch (addr->iap_family) {
1048 case AF_INET:
1049 if (ifaddr->ifa_ip4addr != INADDR_ANY &&
1050 *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
1051 addrtype = IPNETADDR_MYADDR;
1052 else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
1053 *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
1054 addrtype = IPNETADDR_MBCAST;
1055 break;
1056 case AF_INET6:
1057 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
1058 &ifaddr->ifa_ip6addr))
1059 addrtype = IPNETADDR_MYADDR;
1060 break;
1061 }
1062 }
1063 mutex_exit(&ipnetif->if_addr_lock);
1064
1065 return (addrtype);
1066 }
1067
1068 /*
1069 * Verify if the packet contained in hdr should be passed up to the
1070 * ipnet client stream.
1071 */
1072 static boolean_t
ipnet_accept(ipnet_t * ipnet,hook_pkt_observe_t * hdr,ipnet_addrp_t * src,ipnet_addrp_t * dst)1073 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1074 ipnet_addrp_t *dst)
1075 {
1076 boolean_t obsif;
1077 uint64_t ifindex = ipnet->ipnet_if->if_index;
1078 ipnet_addrtype_t srctype;
1079 ipnet_addrtype_t dsttype;
1080
1081 srctype = ipnet_get_addrtype(ipnet, src);
1082 dsttype = ipnet_get_addrtype(ipnet, dst);
1083
1084 /*
1085 * If the packet's ifindex matches ours, or the packet's group ifindex
1086 * matches ours, it's on the interface we're observing. (Thus,
1087 * observing on the group ifindex matches all ifindexes in the group.)
1088 */
1089 obsif = (ntohl(hdr->hpo_ifindex) == ifindex ||
1090 ntohl(hdr->hpo_grifindex) == ifindex);
1091
1092 DTRACE_PROBE5(ipnet_accept__addr,
1093 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src,
1094 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst,
1095 boolean_t, obsif);
1096
1097 /*
1098 * Do not allow an ipnet stream to see packets that are not from or to
1099 * its zone. The exception is when zones are using the shared stack
1100 * model. In this case, streams in the global zone have visibility
1101 * into other shared-stack zones, and broadcast and multicast traffic
1102 * is visible by all zones in the stack.
1103 */
1104 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1105 dsttype != IPNETADDR_MBCAST) {
1106 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1107 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1108 return (B_FALSE);
1109 }
1110
1111 /*
1112 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1113 * packet's IP version.
1114 */
1115 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
1116 ipnet->ipnet_family != hdr->hpo_family)
1117 return (B_FALSE);
1118
1119 /* If the destination address is ours, then accept the packet. */
1120 if (dsttype == IPNETADDR_MYADDR)
1121 return (B_TRUE);
1122
1123 /*
1124 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1125 * sent or received on the interface we're observing, or packets that
1126 * have our source address (this allows us to see packets we send).
1127 */
1128 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
1129 if (srctype == IPNETADDR_MYADDR || obsif)
1130 return (B_TRUE);
1131 }
1132
1133 /*
1134 * We accept multicast and broadcast packets transmitted or received
1135 * on the interface we're observing.
1136 */
1137 if (dsttype == IPNETADDR_MBCAST && obsif)
1138 return (B_TRUE);
1139
1140 return (B_FALSE);
1141 }
1142
1143 /*
1144 * Verify if the packet contained in hdr should be passed up to the ipnet
1145 * client stream that's in IPNET_LOMODE.
1146 */
1147 /* ARGSUSED */
1148 static boolean_t
ipnet_loaccept(ipnet_t * ipnet,hook_pkt_observe_t * hdr,ipnet_addrp_t * src,ipnet_addrp_t * dst)1149 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1150 ipnet_addrp_t *dst)
1151 {
1152 if (hdr->hpo_htype != htons(IPOBS_HOOK_LOCAL)) {
1153 /*
1154 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
1155 */
1156 if (ipnet->ipnet_if == NULL)
1157 return (B_FALSE);
1158 }
1159
1160 /*
1161 * An ipnet stream must not see packets that are not from/to its zone.
1162 */
1163 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
1164 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1165 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1166 return (B_FALSE);
1167 }
1168
1169 return (ipnet->ipnet_family == AF_UNSPEC ||
1170 ipnet->ipnet_family == hdr->hpo_family);
1171 }
1172
1173 static void
ipnet_dispatch(void * arg)1174 ipnet_dispatch(void *arg)
1175 {
1176 mblk_t *mp = arg;
1177 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1178 ipnet_t *ipnet;
1179 mblk_t *netmp;
1180 list_t *list;
1181 ipnet_stack_t *ips;
1182 ipnet_addrp_t src;
1183 ipnet_addrp_t dst;
1184
1185 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1186
1187 netmp = hdr->hpo_pkt->b_cont;
1188 src.iap_family = hdr->hpo_family;
1189 dst.iap_family = hdr->hpo_family;
1190
1191 if (hdr->hpo_family == AF_INET) {
1192 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
1193 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
1194 } else {
1195 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
1196 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
1197 }
1198
1199 ipnet_walkers_inc(ips);
1200
1201 list = &ips->ips_str_list;
1202 for (ipnet = list_head(list); ipnet != NULL;
1203 ipnet = list_next(list, ipnet)) {
1204 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
1205 IPSK_BUMP(ips, ik_acceptFail);
1206 continue;
1207 }
1208 IPSK_BUMP(ips, ik_acceptOk);
1209
1210 if (list_next(list, ipnet) == NULL) {
1211 netmp = hdr->hpo_pkt->b_cont;
1212 hdr->hpo_pkt->b_cont = NULL;
1213 } else {
1214 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL &&
1215 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) {
1216 IPSK_BUMP(ips, ik_duplicationFail);
1217 continue;
1218 }
1219 }
1220
1221 if (ipnet->ipnet_flags & IPNET_INFO) {
1222 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) {
1223 IPSK_BUMP(ips, ik_dispatchHeaderDrop);
1224 continue;
1225 }
1226 }
1227
1228 if (ipnet->ipnet_rq->q_first == NULL &&
1229 canputnext(ipnet->ipnet_rq)) {
1230 putnext(ipnet->ipnet_rq, netmp);
1231 IPSK_BUMP(ips, ik_dispatchDeliver);
1232 } else if (canput(ipnet->ipnet_rq)) {
1233 (void) putq(ipnet->ipnet_rq, netmp);
1234 IPSK_BUMP(ips, ik_dispatchDeliver);
1235 } else {
1236 freemsg(netmp);
1237 IPSK_BUMP(ips, ik_dispatchPutDrop);
1238 }
1239 }
1240
1241 ipnet_walkers_dec(ips);
1242
1243 freemsg(mp);
1244 }
1245
1246 static void
ipnet_input(mblk_t * mp)1247 ipnet_input(mblk_t *mp)
1248 {
1249 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1250 ipnet_stack_t *ips;
1251
1252 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1253
1254 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
1255 DDI_SUCCESS) {
1256 IPSK_BUMP(ips, ik_dispatchFail);
1257 freemsg(mp);
1258 } else {
1259 IPSK_BUMP(ips, ik_dispatchOk);
1260 }
1261 }
1262
1263 static ipnetif_t *
ipnet_alloc_if(ipnet_stack_t * ips)1264 ipnet_alloc_if(ipnet_stack_t *ips)
1265 {
1266 ipnetif_t *ipnetif;
1267
1268 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL)
1269 return (NULL);
1270
1271 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
1272 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
1273 offsetof(ipnetif_addr_t, ifa_link));
1274 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
1275 offsetof(ipnetif_addr_t, ifa_link));
1276 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
1277
1278 ipnetif->if_stackp = ips;
1279
1280 return (ipnetif);
1281 }
1282
1283 /*
1284 * Create a new ipnetif_t and new minor node for it. If creation is
1285 * successful the new ipnetif_t is inserted into an avl_tree
1286 * containing ipnetif's for this stack instance.
1287 */
1288 static ipnetif_t *
ipnetif_create(const char * name,uint64_t index,ipnet_stack_t * ips,uint64_t ifflags)1289 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips,
1290 uint64_t ifflags)
1291 {
1292 ipnetif_t *ipnetif;
1293 avl_index_t where = 0;
1294 minor_t ifminor;
1295
1296 /*
1297 * Because ipnetif_create() can be called from a NIC event
1298 * callback, it should not block.
1299 */
1300 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
1301 if (ifminor == (minor_t)-1)
1302 return (NULL);
1303 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) {
1304 id_free(ipnet_minor_space, ifminor);
1305 return (NULL);
1306 }
1307
1308 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
1309 ipnetif->if_index = (uint_t)index;
1310 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack);
1311 ipnetif->if_dev = makedevice(ipnet_major, ifminor);
1312
1313 ipnetif->if_refcnt = 1;
1314 if ((ifflags & IFF_LOOPBACK) != 0)
1315 ipnetif->if_flags = IPNETIF_LOOPBACK;
1316
1317 mutex_enter(&ips->ips_avl_lock);
1318 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
1319 avl_insert(&ips->ips_avl_by_index, ipnetif, where);
1320 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
1321 avl_insert(&ips->ips_avl_by_name, ipnetif, where);
1322 mutex_exit(&ips->ips_avl_lock);
1323
1324 return (ipnetif);
1325 }
1326
1327 static void
ipnetif_remove(ipnetif_t * ipnetif,ipnet_stack_t * ips)1328 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1329 {
1330 ipnet_t *ipnet;
1331
1332 ipnet_walkers_inc(ips);
1333 /* Send a SIGHUP to all open streams associated with this ipnetif. */
1334 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
1335 ipnet = list_next(&ips->ips_str_list, ipnet)) {
1336 if (ipnet->ipnet_if == ipnetif)
1337 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1338 }
1339 ipnet_walkers_dec(ips);
1340 mutex_enter(&ips->ips_avl_lock);
1341 avl_remove(&ips->ips_avl_by_index, ipnetif);
1342 avl_remove(&ips->ips_avl_by_name, ipnetif);
1343 mutex_exit(&ips->ips_avl_lock);
1344 /*
1345 * Release the reference we implicitly held in ipnetif_create().
1346 */
1347 ipnetif_refrele(ipnetif);
1348 }
1349
1350 static void
ipnet_purge_addrlist(list_t * addrlist)1351 ipnet_purge_addrlist(list_t *addrlist)
1352 {
1353 ipnetif_addr_t *ifa;
1354
1355 while ((ifa = list_head(addrlist)) != NULL) {
1356 list_remove(addrlist, ifa);
1357 if (ifa->ifa_shared != NULL)
1358 ipnetif_clone_release(ifa->ifa_shared);
1359 kmem_free(ifa, sizeof (*ifa));
1360 }
1361 }
1362
1363 static void
ipnetif_free(ipnetif_t * ipnetif)1364 ipnetif_free(ipnetif_t *ipnetif)
1365 {
1366 ASSERT(ipnetif->if_refcnt == 0);
1367 ASSERT(ipnetif->if_sharecnt == 0);
1368
1369 /* Remove IPv4/v6 address lists from the ipnetif */
1370 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
1371 list_destroy(&ipnetif->if_ip4addr_list);
1372 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
1373 list_destroy(&ipnetif->if_ip6addr_list);
1374 mutex_destroy(&ipnetif->if_addr_lock);
1375 mutex_destroy(&ipnetif->if_reflock);
1376 if (ipnetif->if_dev != 0)
1377 id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
1378 kmem_free(ipnetif, sizeof (*ipnetif));
1379 }
1380
1381 /*
1382 * Create an ipnetif_addr_t with the given logical interface id (lif)
1383 * and add it to the supplied ipnetif. The lif is the netinfo
1384 * representation of logical interface id, and we use this id to match
1385 * incoming netinfo events against our lists of addresses.
1386 */
1387 static void
ipnet_add_ifaddr(uint64_t lif,ipnetif_t * ipnetif,net_handle_t nd)1388 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
1389 {
1390 ipnetif_addr_t *ifaddr;
1391 zoneid_t zoneid;
1392 struct sockaddr_in bcast;
1393 struct sockaddr_storage addr;
1394 net_ifaddr_t type = NA_ADDRESS;
1395 uint64_t phyif = ipnetif->if_index;
1396
1397 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
1398 net_getlifzone(nd, phyif, lif, &zoneid) != 0)
1399 return;
1400
1401 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
1402 return;
1403 ifaddr->ifa_zone = zoneid;
1404 ifaddr->ifa_id = lif;
1405 ifaddr->ifa_shared = NULL;
1406
1407 switch (addr.ss_family) {
1408 case AF_INET:
1409 ifaddr->ifa_ip4addr =
1410 ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
1411 /*
1412 * Try and get the broadcast address. Note that it's okay for
1413 * an interface to not have a broadcast address, so we don't
1414 * fail the entire operation if net_getlifaddr() fails here.
1415 */
1416 type = NA_BROADCAST;
1417 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
1418 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
1419 break;
1420 case AF_INET6:
1421 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
1422 break;
1423 }
1424
1425 /*
1426 * The zoneid stored in ipnetif_t needs to correspond to the actual
1427 * zone the address is being used in. This facilitates finding the
1428 * correct netstack_t pointer, amongst other things, later.
1429 */
1430 if (zoneid == ALL_ZONES)
1431 zoneid = GLOBAL_ZONEID;
1432
1433 mutex_enter(&ipnetif->if_addr_lock);
1434 if (zoneid != ipnetif->if_zoneid) {
1435 ipnetif_t *ifp2;
1436
1437 ifp2 = ipnetif_clone_create(ipnetif, zoneid);
1438 ifaddr->ifa_shared = ifp2;
1439 }
1440 list_insert_tail(addr.ss_family == AF_INET ?
1441 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
1442 mutex_exit(&ipnetif->if_addr_lock);
1443 }
1444
1445 static void
ipnet_delete_ifaddr(ipnetif_addr_t * ifaddr,ipnetif_t * ipnetif,boolean_t isv6)1446 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
1447 {
1448 mutex_enter(&ipnetif->if_addr_lock);
1449 if (ifaddr->ifa_shared != NULL)
1450 ipnetif_clone_release(ifaddr->ifa_shared);
1451
1452 list_remove(isv6 ?
1453 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
1454 mutex_exit(&ipnetif->if_addr_lock);
1455 kmem_free(ifaddr, sizeof (*ifaddr));
1456 }
1457
1458 static void
ipnet_plumb_ev(ipnet_nicevent_t * ipne,ipnet_stack_t * ips,boolean_t isv6)1459 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6)
1460 {
1461 ipnetif_t *ipnetif;
1462 boolean_t refrele_needed = B_TRUE;
1463 uint64_t ifflags;
1464 uint64_t ifindex;
1465 char *ifname;
1466
1467 ifflags = 0;
1468 ifname = ipne->ipne_ifname;
1469 ifindex = ipne->ipne_ifindex;
1470
1471 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags);
1472
1473 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) {
1474 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags);
1475 refrele_needed = B_FALSE;
1476 }
1477 if (ipnetif != NULL) {
1478 ipnetif->if_flags |=
1479 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
1480 }
1481
1482 if (ipnetif->if_multicnt != 0) {
1483 if (ip_join_allmulti(ifindex, isv6,
1484 ips->ips_netstack->netstack_ip) == 0) {
1485 ipnetif->if_flags |=
1486 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
1487 }
1488 }
1489
1490 if (refrele_needed)
1491 ipnetif_refrele(ipnetif);
1492 }
1493
1494 static void
ipnet_unplumb_ev(uint64_t ifindex,ipnet_stack_t * ips,boolean_t isv6)1495 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
1496 {
1497 ipnetif_t *ipnetif;
1498
1499 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1500 return;
1501
1502 mutex_enter(&ipnetif->if_addr_lock);
1503 ipnet_purge_addrlist(isv6 ?
1504 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
1505 mutex_exit(&ipnetif->if_addr_lock);
1506
1507 /*
1508 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1509 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif
1510 * if both IPv4 and IPv6 interfaces have been unplumbed.
1511 */
1512 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
1513 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
1514 ipnetif_remove(ipnetif, ips);
1515 ipnetif_refrele(ipnetif);
1516 }
1517
1518 static void
ipnet_lifup_ev(uint64_t ifindex,uint64_t lifindex,net_handle_t nd,ipnet_stack_t * ips,boolean_t isv6)1519 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
1520 ipnet_stack_t *ips, boolean_t isv6)
1521 {
1522 ipnetif_t *ipnetif;
1523 ipnetif_addr_t *ifaddr;
1524
1525 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1526 return;
1527 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
1528 /*
1529 * We must have missed a NE_LIF_DOWN event. Delete this
1530 * ifaddr and re-create it.
1531 */
1532 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1533 }
1534
1535 ipnet_add_ifaddr(lifindex, ipnetif, nd);
1536 ipnetif_refrele(ipnetif);
1537 }
1538
1539 static void
ipnet_lifdown_ev(uint64_t ifindex,uint64_t lifindex,ipnet_stack_t * ips,boolean_t isv6)1540 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
1541 boolean_t isv6)
1542 {
1543 ipnetif_t *ipnetif;
1544 ipnetif_addr_t *ifaddr;
1545
1546 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1547 return;
1548 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
1549 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1550 ipnetif_refrele(ipnetif);
1551 /*
1552 * Make sure that open streams on this ipnetif are still allowed to
1553 * have it open.
1554 */
1555 ipnetif_zonecheck(ipnetif, ips);
1556 }
1557
1558 /*
1559 * This callback from the NIC event framework dispatches a taskq as the event
1560 * handlers may block.
1561 */
1562 /* ARGSUSED */
1563 static int
ipnet_nicevent_cb(hook_event_token_t token,hook_data_t info,void * arg)1564 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
1565 {
1566 ipnet_stack_t *ips = arg;
1567 hook_nic_event_t *hn = (hook_nic_event_t *)info;
1568 ipnet_nicevent_t *ipne;
1569
1570 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
1571 return (0);
1572 ipne->ipne_event = hn->hne_event;
1573 ipne->ipne_protocol = hn->hne_protocol;
1574 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
1575 ipne->ipne_ifindex = hn->hne_nic;
1576 ipne->ipne_lifindex = hn->hne_lif;
1577 if (hn->hne_datalen != 0) {
1578 (void) strlcpy(ipne->ipne_ifname, hn->hne_data,
1579 sizeof (ipne->ipne_ifname));
1580 }
1581 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
1582 ipne, DDI_NOSLEEP);
1583 return (0);
1584 }
1585
1586 static void
ipnet_nicevent_task(void * arg)1587 ipnet_nicevent_task(void *arg)
1588 {
1589 ipnet_nicevent_t *ipne = arg;
1590 netstack_t *ns;
1591 ipnet_stack_t *ips;
1592 boolean_t isv6;
1593
1594 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
1595 goto done;
1596 ips = ns->netstack_ipnet;
1597 isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
1598
1599 mutex_enter(&ips->ips_event_lock);
1600 switch (ipne->ipne_event) {
1601 case NE_PLUMB:
1602 ipnet_plumb_ev(ipne, ips, isv6);
1603 break;
1604 case NE_UNPLUMB:
1605 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
1606 break;
1607 case NE_LIF_UP:
1608 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
1609 ipne->ipne_protocol, ips, isv6);
1610 break;
1611 case NE_LIF_DOWN:
1612 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
1613 isv6);
1614 break;
1615 default:
1616 break;
1617 }
1618 mutex_exit(&ips->ips_event_lock);
1619 done:
1620 if (ns != NULL)
1621 netstack_rele(ns);
1622 kmem_free(ipne, sizeof (ipnet_nicevent_t));
1623 }
1624
1625 dev_t
ipnet_if_getdev(char * name,zoneid_t zoneid)1626 ipnet_if_getdev(char *name, zoneid_t zoneid)
1627 {
1628 netstack_t *ns;
1629 ipnet_stack_t *ips;
1630 ipnetif_t *ipnetif;
1631 dev_t dev = (dev_t)-1;
1632
1633 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1634 return (dev);
1635 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1636 return (dev);
1637
1638 ips = ns->netstack_ipnet;
1639 mutex_enter(&ips->ips_avl_lock);
1640 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
1641 if (ipnetif_in_zone(ipnetif, zoneid, ips))
1642 dev = ipnetif->if_dev;
1643 }
1644 mutex_exit(&ips->ips_avl_lock);
1645 netstack_rele(ns);
1646
1647 return (dev);
1648 }
1649
1650 static ipnetif_t *
ipnetif_getby_index(uint64_t id,ipnet_stack_t * ips)1651 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips)
1652 {
1653 ipnetif_t *ipnetif;
1654
1655 mutex_enter(&ips->ips_avl_lock);
1656 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
1657 ipnetif_refhold(ipnetif);
1658 mutex_exit(&ips->ips_avl_lock);
1659 return (ipnetif);
1660 }
1661
1662 static ipnetif_t *
ipnetif_getby_dev(dev_t dev,ipnet_stack_t * ips)1663 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips)
1664 {
1665 ipnetif_t *ipnetif;
1666 avl_tree_t *tree;
1667
1668 mutex_enter(&ips->ips_avl_lock);
1669 tree = &ips->ips_avl_by_index;
1670 for (ipnetif = avl_first(tree); ipnetif != NULL;
1671 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
1672 if (ipnetif->if_dev == dev) {
1673 ipnetif_refhold(ipnetif);
1674 break;
1675 }
1676 }
1677 mutex_exit(&ips->ips_avl_lock);
1678 return (ipnetif);
1679 }
1680
1681 static ipnetif_addr_t *
ipnet_match_lif(ipnetif_t * ipnetif,lif_if_t lid,boolean_t isv6)1682 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
1683 {
1684 ipnetif_addr_t *ifaddr;
1685 list_t *list;
1686
1687 mutex_enter(&ipnetif->if_addr_lock);
1688 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
1689 for (ifaddr = list_head(list); ifaddr != NULL;
1690 ifaddr = list_next(list, ifaddr)) {
1691 if (lid == ifaddr->ifa_id)
1692 break;
1693 }
1694 mutex_exit(&ipnetif->if_addr_lock);
1695 return (ifaddr);
1696 }
1697
1698 /* ARGSUSED */
1699 static void *
ipnet_stack_init(netstackid_t stackid,netstack_t * ns)1700 ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
1701 {
1702 ipnet_stack_t *ips;
1703
1704 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
1705 ips->ips_netstack = ns;
1706 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
1707 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index,
1708 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
1709 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name,
1710 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
1711 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone,
1712 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared));
1713 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
1714 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
1715 list_create(&ips->ips_str_list, sizeof (ipnet_t),
1716 offsetof(ipnet_t, ipnet_next));
1717 ipnet_register_netihook(ips);
1718 return (ips);
1719 }
1720
1721 /* ARGSUSED */
1722 static void
ipnet_stack_fini(netstackid_t stackid,void * arg)1723 ipnet_stack_fini(netstackid_t stackid, void *arg)
1724 {
1725 ipnet_stack_t *ips = arg;
1726 ipnetif_t *ipnetif, *nipnetif;
1727
1728 if (ips->ips_kstatp != NULL) {
1729 zoneid_t zoneid;
1730
1731 zoneid = netstackid_to_zoneid(stackid);
1732 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp);
1733 }
1734 if (ips->ips_ndv4 != NULL) {
1735 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
1736 ips->ips_nicevents) == 0);
1737 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
1738 }
1739 if (ips->ips_ndv6 != NULL) {
1740 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
1741 ips->ips_nicevents) == 0);
1742 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
1743 }
1744 hook_free(ips->ips_nicevents);
1745
1746 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1747 ipnetif = nipnetif) {
1748 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
1749 ipnetif_remove(ipnetif, ips);
1750 }
1751 avl_destroy(&ips->ips_avl_by_shared);
1752 avl_destroy(&ips->ips_avl_by_index);
1753 avl_destroy(&ips->ips_avl_by_name);
1754 mutex_destroy(&ips->ips_avl_lock);
1755 mutex_destroy(&ips->ips_walkers_lock);
1756 cv_destroy(&ips->ips_walkers_cv);
1757 list_destroy(&ips->ips_str_list);
1758 kmem_free(ips, sizeof (*ips));
1759 }
1760
1761 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1762 static boolean_t
ipnet_addrs_in_zone(list_t * addrlist,zoneid_t zoneid)1763 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
1764 {
1765 ipnetif_addr_t *ifa;
1766
1767 for (ifa = list_head(addrlist); ifa != NULL;
1768 ifa = list_next(addrlist, ifa)) {
1769 if (ifa->ifa_zone == zoneid)
1770 return (B_TRUE);
1771 }
1772 return (B_FALSE);
1773 }
1774
1775 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1776 static boolean_t
ipnetif_in_zone(ipnetif_t * ipnetif,zoneid_t zoneid,ipnet_stack_t * ips)1777 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
1778 {
1779 int ret;
1780
1781 /*
1782 * The global zone has visibility into all interfaces in the global
1783 * stack, and exclusive stack zones have visibility into all
1784 * interfaces in their stack.
1785 */
1786 if (zoneid == GLOBAL_ZONEID ||
1787 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1788 return (B_TRUE);
1789
1790 /*
1791 * Shared-stack zones only have visibility for interfaces that have
1792 * addresses in their zone.
1793 */
1794 mutex_enter(&ipnetif->if_addr_lock);
1795 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
1796 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
1797 mutex_exit(&ipnetif->if_addr_lock);
1798 return (ret);
1799 }
1800
1801 /*
1802 * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1803 * still be allowed to have it open. A given ipnet_t may no longer be allowed
1804 * to have an ipnetif open if there are no longer any addresses that belong to
1805 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the
1806 * case, send the ipnet_t an M_HANGUP.
1807 */
1808 static void
ipnetif_zonecheck(ipnetif_t * ipnetif,ipnet_stack_t * ips)1809 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1810 {
1811 list_t *strlist = &ips->ips_str_list;
1812 ipnet_t *ipnet;
1813
1814 ipnet_walkers_inc(ips);
1815 for (ipnet = list_head(strlist); ipnet != NULL;
1816 ipnet = list_next(strlist, ipnet)) {
1817 if (ipnet->ipnet_if != ipnetif)
1818 continue;
1819 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
1820 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1821 }
1822 ipnet_walkers_dec(ips);
1823 }
1824
1825 void
ipnet_walk_if(ipnet_walkfunc_t * cb,void * arg,zoneid_t zoneid)1826 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
1827 {
1828 ipnetif_t *ipnetif;
1829 list_t cbdata;
1830 ipnetif_cbdata_t *cbnode;
1831 netstack_t *ns;
1832 ipnet_stack_t *ips;
1833
1834 /*
1835 * On labeled systems, non-global zones shouldn't see anything
1836 * in /dev/ipnet.
1837 */
1838 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1839 return;
1840
1841 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1842 return;
1843
1844 ips = ns->netstack_ipnet;
1845 list_create(&cbdata, sizeof (ipnetif_cbdata_t),
1846 offsetof(ipnetif_cbdata_t, ic_next));
1847
1848 mutex_enter(&ips->ips_avl_lock);
1849 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1850 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
1851 if (!ipnetif_in_zone(ipnetif, zoneid, ips))
1852 continue;
1853 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
1854 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
1855 cbnode->ic_dev = ipnetif->if_dev;
1856 list_insert_head(&cbdata, cbnode);
1857 }
1858 mutex_exit(&ips->ips_avl_lock);
1859
1860 while ((cbnode = list_head(&cbdata)) != NULL) {
1861 cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
1862 list_remove(&cbdata, cbnode);
1863 kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
1864 }
1865 list_destroy(&cbdata);
1866 netstack_rele(ns);
1867 }
1868
1869 static int
ipnetif_compare_index(const void * index_ptr,const void * ipnetifp)1870 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp)
1871 {
1872 int64_t index1 = *((int64_t *)index_ptr);
1873 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
1874
1875 return (SIGNOF(index2 - index1));
1876 }
1877
1878 static int
ipnetif_compare_name(const void * name_ptr,const void * ipnetifp)1879 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp)
1880 {
1881 int res;
1882
1883 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
1884 return (SIGNOF(res));
1885 }
1886
1887 static int
ipnetif_compare_name_zone(const void * key_ptr,const void * ipnetifp)1888 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp)
1889 {
1890 const uintptr_t *ptr = key_ptr;
1891 const ipnetif_t *ifp;
1892 int res;
1893
1894 ifp = ipnetifp;
1895 res = ifp->if_zoneid - ptr[0];
1896 if (res != 0)
1897 return (SIGNOF(res));
1898 res = strcmp(ifp->if_name, (char *)ptr[1]);
1899 return (SIGNOF(res));
1900 }
1901
1902 static void
ipnetif_refhold(ipnetif_t * ipnetif)1903 ipnetif_refhold(ipnetif_t *ipnetif)
1904 {
1905 mutex_enter(&ipnetif->if_reflock);
1906 ipnetif->if_refcnt++;
1907 mutex_exit(&ipnetif->if_reflock);
1908 }
1909
1910 static void
ipnetif_refrele(ipnetif_t * ipnetif)1911 ipnetif_refrele(ipnetif_t *ipnetif)
1912 {
1913 mutex_enter(&ipnetif->if_reflock);
1914 ASSERT(ipnetif->if_refcnt > 0);
1915 if (--ipnetif->if_refcnt == 0)
1916 ipnetif_free(ipnetif);
1917 else
1918 mutex_exit(&ipnetif->if_reflock);
1919 }
1920
1921 static void
ipnet_walkers_inc(ipnet_stack_t * ips)1922 ipnet_walkers_inc(ipnet_stack_t *ips)
1923 {
1924 mutex_enter(&ips->ips_walkers_lock);
1925 ips->ips_walkers_cnt++;
1926 mutex_exit(&ips->ips_walkers_lock);
1927 }
1928
1929 static void
ipnet_walkers_dec(ipnet_stack_t * ips)1930 ipnet_walkers_dec(ipnet_stack_t *ips)
1931 {
1932 mutex_enter(&ips->ips_walkers_lock);
1933 ASSERT(ips->ips_walkers_cnt != 0);
1934 if (--ips->ips_walkers_cnt == 0)
1935 cv_broadcast(&ips->ips_walkers_cv);
1936 mutex_exit(&ips->ips_walkers_lock);
1937 }
1938
1939 /*ARGSUSED*/
1940 static int
ipobs_bounce_func(hook_event_token_t token,hook_data_t info,void * arg)1941 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
1942 {
1943 hook_pkt_observe_t *hdr;
1944 pfv_t func = (pfv_t)arg;
1945 mblk_t *mp;
1946
1947 hdr = (hook_pkt_observe_t *)info;
1948 /*
1949 * Code in ip_input() expects that it is the only one accessing the
1950 * packet.
1951 */
1952 mp = copymsg(hdr->hpo_pkt);
1953 if (mp == NULL) {
1954 netstack_t *ns = hdr->hpo_ctx;
1955 ipnet_stack_t *ips = ns->netstack_ipnet;
1956
1957 IPSK_BUMP(ips, ik_dispatchDupDrop);
1958 return (0);
1959 }
1960
1961 hdr = (hook_pkt_observe_t *)mp->b_rptr;
1962 hdr->hpo_pkt = mp;
1963
1964 func(mp);
1965
1966 return (0);
1967 }
1968
1969 hook_t *
ipobs_register_hook(netstack_t * ns,pfv_t func)1970 ipobs_register_hook(netstack_t *ns, pfv_t func)
1971 {
1972 ip_stack_t *ipst = ns->netstack_ip;
1973 char name[32];
1974 hook_t *hook;
1975
1976 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func);
1977 VERIFY(hook != NULL);
1978
1979 /*
1980 * To register multiple hooks with the same callback function,
1981 * a unique name is needed.
1982 */
1983 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook);
1984 hook->h_name = strdup(name);
1985
1986 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1987 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
1988
1989 return (hook);
1990 }
1991
1992 void
ipobs_unregister_hook(netstack_t * ns,hook_t * hook)1993 ipobs_unregister_hook(netstack_t *ns, hook_t *hook)
1994 {
1995 ip_stack_t *ipst = ns->netstack_ip;
1996
1997 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1998
1999 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
2000
2001 strfree(hook->h_name);
2002
2003 hook_free(hook);
2004 }
2005
2006 /* ******************************************************************** */
2007 /* BPF Functions below */
2008 /* ******************************************************************** */
2009
2010 /*
2011 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
2012 */
2013 ipnet_stack_t *
ipnet_find_by_zoneid(zoneid_t zoneid)2014 ipnet_find_by_zoneid(zoneid_t zoneid)
2015 {
2016 netstack_t *ns;
2017
2018 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL);
2019 return (ns->netstack_ipnet);
2020 }
2021
2022 /*
2023 * Functions, such as the above ipnet_find_by_zoneid(), will return a
2024 * pointer to ipnet_stack_t by calling a netstack lookup function.
2025 * The netstack_find_*() functions return a pointer after doing a "hold"
2026 * on the data structure and thereby require a "release" when the caller
2027 * is finished with it. We need to mirror that API here and thus a caller
2028 * of ipnet_find_by_zoneid() is required to call ipnet_rele().
2029 */
2030 void
ipnet_rele(ipnet_stack_t * ips)2031 ipnet_rele(ipnet_stack_t *ips)
2032 {
2033 netstack_rele(ips->ips_netstack);
2034 }
2035
2036 /*
2037 */
2038 void
ipnet_set_itap(bpf_itap_fn_t tapfunc)2039 ipnet_set_itap(bpf_itap_fn_t tapfunc)
2040 {
2041 ipnet_itap = tapfunc;
2042 }
2043
2044 /*
2045 * The list of interfaces available via ipnet is private for each zone,
2046 * so the AVL tree of each zone must be searched for a given name, even
2047 * if all names are unique.
2048 */
2049 int
ipnet_open_byname(const char * name,ipnetif_t ** ptr,zoneid_t zoneid)2050 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid)
2051 {
2052 ipnet_stack_t *ips;
2053 ipnetif_t *ipnetif;
2054
2055 ASSERT(ptr != NULL);
2056 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2057
2058 mutex_enter(&ips->ips_avl_lock);
2059
2060 /*
2061 * Shared instance zone?
2062 */
2063 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2064 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2065
2066 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2067 } else {
2068 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2069 }
2070 if (ipnetif != NULL)
2071 ipnetif_refhold(ipnetif);
2072 mutex_exit(&ips->ips_avl_lock);
2073
2074 *ptr = ipnetif;
2075 ipnet_rele(ips);
2076
2077 if (ipnetif == NULL)
2078 return (ESRCH);
2079 return (0);
2080 }
2081
2082 void
ipnet_close_byhandle(ipnetif_t * ifp)2083 ipnet_close_byhandle(ipnetif_t *ifp)
2084 {
2085 ASSERT(ifp != NULL);
2086 ipnetif_refrele(ifp);
2087 }
2088
2089 const char *
ipnet_name(ipnetif_t * ifp)2090 ipnet_name(ipnetif_t *ifp)
2091 {
2092 ASSERT(ifp != NULL);
2093 return (ifp->if_name);
2094 }
2095
2096 /*
2097 * To find the linkid for a given name, it is necessary to know which zone
2098 * the interface name belongs to and to search the avl tree for that zone
2099 * as there is no master list of all interfaces and which zone they belong
2100 * to. It is assumed that the caller of this function is somehow already
2101 * working with the ipnet interfaces and hence the ips_event_lock is held.
2102 * When BPF calls into this function, it is doing so because of an event
2103 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
2104 * value returned has meaning without the need for grabbing a hold on the
2105 * owning structure.
2106 */
2107 int
ipnet_get_linkid_byname(const char * name,uint_t * idp,zoneid_t zoneid)2108 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid)
2109 {
2110 ipnet_stack_t *ips;
2111 ipnetif_t *ifp;
2112
2113 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2114 ASSERT(mutex_owned(&ips->ips_event_lock));
2115
2116 mutex_enter(&ips->ips_avl_lock);
2117 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2118 if (ifp != NULL)
2119 *idp = (uint_t)ifp->if_index;
2120
2121 /*
2122 * Shared instance zone?
2123 */
2124 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2125 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2126
2127 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2128 if (ifp != NULL)
2129 *idp = (uint_t)ifp->if_index;
2130 }
2131
2132 mutex_exit(&ips->ips_avl_lock);
2133 ipnet_rele(ips);
2134
2135 if (ifp == NULL)
2136 return (ESRCH);
2137 return (0);
2138 }
2139
2140 /*
2141 * Strictly speaking, there is no such thing as a "client" in ipnet, like
2142 * there is in mac. BPF only needs to have this because it is required as
2143 * part of interfacing correctly with mac. The reuse of the original
2144 * ipnetif_t as a client poses no danger, so long as it is done with its
2145 * own ref-count'd hold that is given up on close.
2146 */
2147 int
ipnet_client_open(ipnetif_t * ptr,ipnetif_t ** result)2148 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result)
2149 {
2150 ASSERT(ptr != NULL);
2151 ASSERT(result != NULL);
2152 ipnetif_refhold(ptr);
2153 *result = ptr;
2154
2155 return (0);
2156 }
2157
2158 void
ipnet_client_close(ipnetif_t * ptr)2159 ipnet_client_close(ipnetif_t *ptr)
2160 {
2161 ASSERT(ptr != NULL);
2162 ipnetif_refrele(ptr);
2163 }
2164
2165 /*
2166 * This is called from BPF when it needs to start receiving packets
2167 * from ipnet.
2168 *
2169 * The use of the ipnet_t structure here is somewhat lightweight when
2170 * compared to how it is used elsewhere but it already has all of the
2171 * right fields in it, so reuse here doesn't seem out of order. Its
2172 * primary purpose here is to provide the means to store pointers for
2173 * use when ipnet_promisc_remove() needs to be called.
2174 *
2175 * This should never be called for the IPNET_MINOR_LO device as it is
2176 * never created via ipnetif_create.
2177 */
2178 /*ARGSUSED*/
2179 int
ipnet_promisc_add(void * handle,uint_t how,void * data,uintptr_t * mhandle,int flags)2180 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle,
2181 int flags)
2182 {
2183 ip_stack_t *ipst;
2184 netstack_t *ns;
2185 ipnetif_t *ifp;
2186 ipnet_t *ipnet;
2187 char name[32];
2188 int error;
2189
2190 ifp = (ipnetif_t *)handle;
2191
2192 if (how != DL_PROMISC_PHYS && how != DL_PROMISC_MULTI)
2193 return (EINVAL);
2194
2195 ns = netstack_find_by_zoneid(ifp->if_zoneid);
2196
2197 if ((error = ipnet_join_allmulti(ifp, ns->netstack_ipnet)) != 0) {
2198 netstack_rele(ns);
2199 return (error);
2200 }
2201
2202 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP);
2203 ipnet->ipnet_if = ifp;
2204 ipnet->ipnet_ns = ns;
2205 ipnet->ipnet_flags = flags;
2206
2207 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) {
2208 ipnet->ipnet_acceptfn = ipnet_loaccept;
2209 } else {
2210 ipnet->ipnet_acceptfn = ipnet_accept;
2211 }
2212
2213 /*
2214 * To register multiple hooks with the same callback function,
2215 * a unique name is needed.
2216 */
2217 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet);
2218 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p",
2219 (void *)ipnet->ipnet_hook);
2220 ipnet->ipnet_hook->h_name = strdup(name);
2221 ipnet->ipnet_data = data;
2222 ipnet->ipnet_zoneid = ifp->if_zoneid;
2223
2224 ipst = ns->netstack_ip;
2225
2226 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2227 ipnet->ipnet_hook);
2228 if (error != 0)
2229 goto regfail;
2230
2231 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2232 ipnet->ipnet_hook);
2233 if (error != 0) {
2234 (void) net_hook_unregister(ipst->ips_ip4_observe_pr,
2235 NH_OBSERVE, ipnet->ipnet_hook);
2236 goto regfail;
2237 }
2238
2239 *mhandle = (uintptr_t)ipnet;
2240 netstack_rele(ns);
2241
2242 return (0);
2243
2244 regfail:
2245 cmn_err(CE_WARN, "net_hook_register failed: %d", error);
2246 strfree(ipnet->ipnet_hook->h_name);
2247 hook_free(ipnet->ipnet_hook);
2248 netstack_rele(ns);
2249 return (error);
2250 }
2251
2252 void
ipnet_promisc_remove(void * data)2253 ipnet_promisc_remove(void *data)
2254 {
2255 ip_stack_t *ipst;
2256 ipnet_t *ipnet;
2257 hook_t *hook;
2258
2259 ipnet = data;
2260 ipst = ipnet->ipnet_ns->netstack_ip;
2261 hook = ipnet->ipnet_hook;
2262
2263 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2264 hook) == 0);
2265
2266 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2267 hook) == 0);
2268
2269 strfree(hook->h_name);
2270
2271 hook_free(hook);
2272
2273 kmem_free(ipnet, sizeof (*ipnet));
2274 }
2275
2276 /*
2277 * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
2278 * An important field from that structure is "ipnet_data" that
2279 * contains the "data" pointer passed into ipnet_promisc_add: it needs
2280 * to be passed back to bpf when we call into ipnet_itap.
2281 *
2282 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
2283 * from BPF.
2284 */
2285 /*ARGSUSED*/
2286 static int
ipnet_bpf_bounce(hook_event_token_t token,hook_data_t info,void * arg)2287 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg)
2288 {
2289 hook_pkt_observe_t *hdr;
2290 ipnet_addrp_t src;
2291 ipnet_addrp_t dst;
2292 ipnet_stack_t *ips;
2293 ipnet_t *ipnet;
2294 mblk_t *netmp;
2295 mblk_t *mp;
2296
2297 hdr = (hook_pkt_observe_t *)info;
2298 mp = hdr->hpo_pkt;
2299 ipnet = (ipnet_t *)arg;
2300 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
2301
2302 netmp = hdr->hpo_pkt->b_cont;
2303 src.iap_family = hdr->hpo_family;
2304 dst.iap_family = hdr->hpo_family;
2305
2306 if (hdr->hpo_family == AF_INET) {
2307 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
2308 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
2309 } else {
2310 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
2311 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
2312 }
2313
2314 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
2315 IPSK_BUMP(ips, ik_acceptFail);
2316 return (0);
2317 }
2318 IPSK_BUMP(ips, ik_acceptOk);
2319
2320 ipnet_itap(ipnet->ipnet_data, mp,
2321 hdr->hpo_htype == htons(IPOBS_HOOK_OUTBOUND),
2322 ntohl(hdr->hpo_pktlen) + MBLKL(mp));
2323
2324 return (0);
2325 }
2326
2327 /*
2328 * clone'd ipnetif_t's are created when a shared IP instance zone comes
2329 * to life and configures an IP address. The model that BPF uses is that
2330 * each interface must have a unique pointer and each interface must be
2331 * representative of what it can capture. They are limited to one DLT
2332 * per interface and one zone per interface. Thus every interface that
2333 * can be seen in a zone must be announced via an attach to bpf. For
2334 * shared instance zones, this means the ipnet driver needs to detect
2335 * when an address is added to an interface in a zone for the first
2336 * time (and also when the last address is removed.)
2337 */
2338 static ipnetif_t *
ipnetif_clone_create(ipnetif_t * ifp,zoneid_t zoneid)2339 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid)
2340 {
2341 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name };
2342 ipnet_stack_t *ips = ifp->if_stackp;
2343 avl_index_t where = 0;
2344 ipnetif_t *newif;
2345
2346 mutex_enter(&ips->ips_avl_lock);
2347 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where);
2348 if (newif != NULL) {
2349 ipnetif_refhold(newif);
2350 newif->if_sharecnt++;
2351 mutex_exit(&ips->ips_avl_lock);
2352 return (newif);
2353 }
2354
2355 newif = ipnet_alloc_if(ips);
2356 if (newif == NULL) {
2357 mutex_exit(&ips->ips_avl_lock);
2358 return (NULL);
2359 }
2360
2361 newif->if_refcnt = 1;
2362 newif->if_sharecnt = 1;
2363 newif->if_zoneid = zoneid;
2364 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ);
2365 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK;
2366 newif->if_index = ifp->if_index;
2367
2368 avl_insert(&ips->ips_avl_by_shared, newif, where);
2369 mutex_exit(&ips->ips_avl_lock);
2370
2371 return (newif);
2372 }
2373
2374 static void
ipnetif_clone_release(ipnetif_t * ipnetif)2375 ipnetif_clone_release(ipnetif_t *ipnetif)
2376 {
2377 boolean_t dofree = B_FALSE;
2378 boolean_t doremove = B_FALSE;
2379 ipnet_stack_t *ips = ipnetif->if_stackp;
2380
2381 mutex_enter(&ipnetif->if_reflock);
2382 ASSERT(ipnetif->if_refcnt > 0);
2383 if (--ipnetif->if_refcnt == 0)
2384 dofree = B_TRUE;
2385 ASSERT(ipnetif->if_sharecnt > 0);
2386 if (--ipnetif->if_sharecnt == 0)
2387 doremove = B_TRUE;
2388 mutex_exit(&ipnetif->if_reflock);
2389 if (doremove) {
2390 mutex_enter(&ips->ips_avl_lock);
2391 avl_remove(&ips->ips_avl_by_shared, ipnetif);
2392 mutex_exit(&ips->ips_avl_lock);
2393 }
2394 if (dofree) {
2395 ASSERT(ipnetif->if_sharecnt == 0);
2396 ipnetif_free(ipnetif);
2397 }
2398 }
2399