1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * The ipnet device defined here provides access to packets at the IP layer. To
29 * provide access to packets at this layer it registers a callback function in
30 * the ip module and when there are open instances of the device ip will pass
31 * packets into the device. Packets from ip are passed on the input, output and
32 * loopback paths. Internally the module returns to ip as soon as possible by
33 * deferring processing using a taskq.
34 *
35 * Management of the devices in /dev/ipnet/ is handled by the devname
36 * filesystem and use of the neti interfaces. This module registers for NIC
37 * events using the neti framework so that when IP interfaces are bought up,
38 * taken down etc. the ipnet module is notified and its view of the interfaces
39 * configured on the system adjusted. On attach, the module gets an initial
40 * view of the system again using the neti framework but as it has already
41 * registered for IP interface events, it is still up-to-date with any changes.
42 */
43
44 #include <sys/types.h>
45 #include <sys/conf.h>
46 #include <sys/cred.h>
47 #include <sys/stat.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/modctl.h>
51 #include <sys/dlpi.h>
52 #include <sys/strsun.h>
53 #include <sys/id_space.h>
54 #include <sys/kmem.h>
55 #include <sys/mkdev.h>
56 #include <sys/neti.h>
57 #include <net/if.h>
58 #include <sys/errno.h>
59 #include <sys/list.h>
60 #include <sys/ksynch.h>
61 #include <sys/hook_event.h>
62 #include <sys/sdt.h>
63 #include <sys/stropts.h>
64 #include <sys/sysmacros.h>
65 #include <inet/ip.h>
66 #include <inet/ip_if.h>
67 #include <inet/ip_multi.h>
68 #include <inet/ip6.h>
69 #include <inet/ipnet.h>
70 #include <net/bpf.h>
71 #include <net/bpfdesc.h>
72 #include <net/dlt.h>
73
74 static struct module_info ipnet_minfo = {
75 1, /* mi_idnum */
76 "ipnet", /* mi_idname */
77 0, /* mi_minpsz */
78 INFPSZ, /* mi_maxpsz */
79 2048, /* mi_hiwat */
80 0 /* mi_lowat */
81 };
82
83 /*
84 * List to hold static view of ipnetif_t's on the system. This is needed to
85 * avoid holding the lock protecting the avl tree of ipnetif's over the
86 * callback into the dev filesystem.
87 */
88 typedef struct ipnetif_cbdata {
89 char ic_ifname[LIFNAMSIZ];
90 dev_t ic_dev;
91 list_node_t ic_next;
92 } ipnetif_cbdata_t;
93
94 /*
95 * Convenience enumerated type for ipnet_accept(). It describes the
96 * properties of a given ipnet_addrp_t relative to a single ipnet_t
97 * client stream. The values represent whether the address is ...
98 */
99 typedef enum {
100 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */
101 IPNETADDR_MBCAST, /* a multicast or broadcast address. */
102 IPNETADDR_UNKNOWN /* none of the above. */
103 } ipnet_addrtype_t;
104
105 /* Argument used for the ipnet_nicevent_taskq callback. */
106 typedef struct ipnet_nicevent_s {
107 nic_event_t ipne_event;
108 net_handle_t ipne_protocol;
109 netstackid_t ipne_stackid;
110 uint64_t ipne_ifindex;
111 uint64_t ipne_lifindex;
112 char ipne_ifname[LIFNAMSIZ];
113 } ipnet_nicevent_t;
114
115 static dev_info_t *ipnet_dip;
116 static major_t ipnet_major;
117 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */
118 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */
119 static id_space_t *ipnet_minor_space;
120 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */
121 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */
122 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT;
123 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept;
124 static bpf_itap_fn_t ipnet_itap;
125
126 static void ipnet_input(mblk_t *);
127 static int ipnet_wput(queue_t *, mblk_t *);
128 static int ipnet_rsrv(queue_t *);
129 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
130 static int ipnet_close(queue_t *);
131 static void ipnet_ioctl(queue_t *, mblk_t *);
132 static void ipnet_iocdata(queue_t *, mblk_t *);
133 static void ipnet_wputnondata(queue_t *, mblk_t *);
134 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
135 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
136 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
137 static void ipnet_inforeq(queue_t *q, mblk_t *mp);
138 static void ipnet_bindreq(queue_t *q, mblk_t *mp);
139 static void ipnet_unbindreq(queue_t *q, mblk_t *mp);
140 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
141 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
142 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
143 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
144 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
145 static void ipnet_nicevent_task(void *);
146 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
147 uint64_t);
148 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
149 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
150 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
151 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
152 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
153 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
154 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
155 static int ipnetif_compare_name(const void *, const void *);
156 static int ipnetif_compare_name_zone(const void *, const void *);
157 static int ipnetif_compare_index(const void *, const void *);
158 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
159 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
160 static void ipnetif_refhold(ipnetif_t *);
161 static void ipnetif_refrele(ipnetif_t *);
162 static void ipnet_walkers_inc(ipnet_stack_t *);
163 static void ipnet_walkers_dec(ipnet_stack_t *);
164 static void ipnet_register_netihook(ipnet_stack_t *);
165 static void *ipnet_stack_init(netstackid_t, netstack_t *);
166 static void ipnet_stack_fini(netstackid_t, void *);
167 static void ipnet_dispatch(void *);
168 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
169 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
170 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
171 static void ipnetif_clone_release(ipnetif_t *);
172
173 static struct qinit ipnet_rinit = {
174 NULL, /* qi_putp */
175 ipnet_rsrv, /* qi_srvp */
176 ipnet_open, /* qi_qopen */
177 ipnet_close, /* qi_qclose */
178 NULL, /* qi_qadmin */
179 &ipnet_minfo, /* qi_minfo */
180 };
181
182 static struct qinit ipnet_winit = {
183 ipnet_wput, /* qi_putp */
184 NULL, /* qi_srvp */
185 NULL, /* qi_qopen */
186 NULL, /* qi_qclose */
187 NULL, /* qi_qadmin */
188 &ipnet_minfo, /* qi_minfo */
189 };
190
191 static struct streamtab ipnet_info = {
192 &ipnet_rinit, &ipnet_winit
193 };
194
195 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
196 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
197 ddi_quiesce_not_supported);
198
199 static struct modldrv modldrv = {
200 &mod_driverops,
201 "STREAMS ipnet driver",
202 &ipnet_ops
203 };
204
205 static struct modlinkage modlinkage = {
206 MODREV_1, &modldrv, NULL
207 };
208
209 /*
210 * This structure contains the template data (names and type) that is
211 * copied, in bulk, into the new kstats structure created by net_kstat_create.
212 * No actual statistical information is stored in this instance of the
213 * ipnet_kstats_t structure.
214 */
215 static ipnet_kstats_t stats_template = {
216 { "duplicationFail", KSTAT_DATA_UINT64 },
217 { "dispatchOk", KSTAT_DATA_UINT64 },
218 { "dispatchFail", KSTAT_DATA_UINT64 },
219 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 },
220 { "dispatchDupDrop", KSTAT_DATA_UINT64 },
221 { "dispatchPutDrop", KSTAT_DATA_UINT64 },
222 { "dispatchDeliver", KSTAT_DATA_UINT64 },
223 { "acceptOk", KSTAT_DATA_UINT64 },
224 { "acceptFail", KSTAT_DATA_UINT64 }
225 };
226
227 /*
228 * Walk the list of physical interfaces on the machine, for each
229 * interface create a new ipnetif_t and add any addresses to it. We
230 * need to do the walk twice, once for IPv4 and once for IPv6.
231 *
232 * The interfaces are destroyed as part of ipnet_stack_fini() for each
233 * stack. Note that we cannot do this initialization in
234 * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
235 */
236 static int
ipnetif_init(void)237 ipnetif_init(void)
238 {
239 netstack_handle_t nh;
240 netstack_t *ns;
241 ipnet_stack_t *ips;
242 int ret = 0;
243
244 netstack_next_init(&nh);
245 while ((ns = netstack_next(&nh)) != NULL) {
246 ips = ns->netstack_ipnet;
247 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0)
248 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE);
249 netstack_rele(ns);
250 if (ret != 0)
251 break;
252 }
253 netstack_next_fini(&nh);
254 return (ret);
255 }
256
257 /*
258 * Standard module entry points.
259 */
260 int
_init(void)261 _init(void)
262 {
263 int ret;
264 boolean_t netstack_registered = B_FALSE;
265
266 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
267 return (ENODEV);
268 ipnet_minor_space = id_space_create("ipnet_minor_space",
269 IPNET_MINOR_MIN, MAXMIN32);
270
271 /*
272 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
273 * delivery of packets to clients. Note that we need to create the
274 * taskqs before calling netstack_register() since ipnet_stack_init()
275 * registers callbacks that use 'em.
276 */
277 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
278 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
279 1, TASKQ_DEFAULTPRI, 0);
280 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
281 ret = ENOMEM;
282 goto done;
283 }
284
285 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
286 netstack_registered = B_TRUE;
287
288 if ((ret = ipnetif_init()) == 0)
289 ret = mod_install(&modlinkage);
290 done:
291 if (ret != 0) {
292 if (ipnet_taskq != NULL)
293 ddi_taskq_destroy(ipnet_taskq);
294 if (ipnet_nicevent_taskq != NULL)
295 ddi_taskq_destroy(ipnet_nicevent_taskq);
296 if (netstack_registered)
297 netstack_unregister(NS_IPNET);
298 id_space_destroy(ipnet_minor_space);
299 }
300 return (ret);
301 }
302
303 int
_fini(void)304 _fini(void)
305 {
306 int err;
307
308 if ((err = mod_remove(&modlinkage)) != 0)
309 return (err);
310
311 netstack_unregister(NS_IPNET);
312 ddi_taskq_destroy(ipnet_nicevent_taskq);
313 ddi_taskq_destroy(ipnet_taskq);
314 id_space_destroy(ipnet_minor_space);
315 return (0);
316 }
317
318 int
_info(struct modinfo * modinfop)319 _info(struct modinfo *modinfop)
320 {
321 return (mod_info(&modlinkage, modinfop));
322 }
323
324 static void
ipnet_register_netihook(ipnet_stack_t * ips)325 ipnet_register_netihook(ipnet_stack_t *ips)
326 {
327 int ret;
328 zoneid_t zoneid;
329 netid_t netid;
330
331 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
332 ips);
333
334 /*
335 * It is possible for an exclusive stack to be in the process of
336 * shutting down here, and the netid and protocol lookups could fail
337 * in that case.
338 */
339 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid);
340 if ((netid = net_zoneidtonetid(zoneid)) == -1)
341 return;
342
343 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) {
344 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
345 ips->ips_nicevents)) != 0) {
346 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
347 ips->ips_ndv4 = NULL;
348 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks"
349 " in zone %d: %d", zoneid, ret);
350 }
351 }
352 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) {
353 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
354 ips->ips_nicevents)) != 0) {
355 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
356 ips->ips_ndv6 = NULL;
357 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks"
358 " in zone %d: %d", zoneid, ret);
359 }
360 }
361
362 /*
363 * Create a local set of kstats for each zone.
364 */
365 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
366 "misc", KSTAT_TYPE_NAMED,
367 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
368 if (ips->ips_kstatp != NULL) {
369 bcopy(&stats_template, &ips->ips_stats,
370 sizeof (ips->ips_stats));
371 ips->ips_kstatp->ks_data = &ips->ips_stats;
372 ips->ips_kstatp->ks_private =
373 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
374 kstat_install(ips->ips_kstatp);
375 } else {
376 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
377 "ipnet", "ipnet_stats", "misc");
378 }
379 }
380
381 /*
382 * This function is called on attach to build an initial view of the
383 * interfaces on the system. It will be called once for IPv4 and once
384 * for IPv6, although there is only one ipnet interface for both IPv4
385 * and IPv6 there are separate address lists.
386 */
387 static int
ipnet_populate_if(net_handle_t nd,ipnet_stack_t * ips,boolean_t isv6)388 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
389 {
390 phy_if_t phyif;
391 lif_if_t lif;
392 ipnetif_t *ipnetif;
393 char name[LIFNAMSIZ];
394 boolean_t new_if = B_FALSE;
395 uint64_t ifflags;
396 int ret = 0;
397
398 /*
399 * If ipnet_register_netihook() was unable to initialize this
400 * stack's net_handle_t, then we cannot populate any interface
401 * information. This usually happens when we attempted to
402 * grab a net_handle_t as a stack was shutting down. We don't
403 * want to fail the entire _init() operation because of a
404 * stack shutdown (other stacks will continue to work just
405 * fine), so we silently return success here.
406 */
407 if (nd == NULL)
408 return (0);
409
410 /*
411 * Make sure we're not processing NIC events during the
412 * population of our interfaces and address lists.
413 */
414 mutex_enter(&ips->ips_event_lock);
415
416 for (phyif = net_phygetnext(nd, 0); phyif != 0;
417 phyif = net_phygetnext(nd, phyif)) {
418 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
419 continue;
420 ifflags = 0;
421 (void) net_getlifflags(nd, phyif, 0, &ifflags);
422 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
423 ipnetif = ipnetif_create(name, phyif, ips, ifflags);
424 if (ipnetif == NULL) {
425 ret = ENOMEM;
426 goto done;
427 }
428 new_if = B_TRUE;
429 }
430 ipnetif->if_flags |=
431 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
432
433 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
434 lif = net_lifgetnext(nd, phyif, lif)) {
435 /*
436 * Skip addresses that aren't up. We'll add
437 * them when we receive an NE_LIF_UP event.
438 */
439 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
440 !(ifflags & IFF_UP))
441 continue;
442 /* Don't add it if we already have it. */
443 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
444 continue;
445 ipnet_add_ifaddr(lif, ipnetif, nd);
446 }
447 if (!new_if)
448 ipnetif_refrele(ipnetif);
449 }
450
451 done:
452 mutex_exit(&ips->ips_event_lock);
453 return (ret);
454 }
455
456 static int
ipnet_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)457 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
458 {
459 if (cmd != DDI_ATTACH)
460 return (DDI_FAILURE);
461
462 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
463 DDI_PSEUDO, 0) == DDI_FAILURE)
464 return (DDI_FAILURE);
465
466 ipnet_dip = dip;
467 return (DDI_SUCCESS);
468 }
469
470 static int
ipnet_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)471 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
472 {
473 if (cmd != DDI_DETACH)
474 return (DDI_FAILURE);
475
476 ASSERT(dip == ipnet_dip);
477 ddi_remove_minor_node(ipnet_dip, NULL);
478 ipnet_dip = NULL;
479 return (DDI_SUCCESS);
480 }
481
482 /* ARGSUSED */
483 static int
ipnet_devinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)484 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
485 {
486 int error = DDI_FAILURE;
487
488 switch (infocmd) {
489 case DDI_INFO_DEVT2INSTANCE:
490 *result = (void *)0;
491 error = DDI_SUCCESS;
492 break;
493 case DDI_INFO_DEVT2DEVINFO:
494 if (ipnet_dip != NULL) {
495 *result = ipnet_dip;
496 error = DDI_SUCCESS;
497 }
498 break;
499 }
500 return (error);
501 }
502
503 /* ARGSUSED */
504 static int
ipnet_open(queue_t * rq,dev_t * dev,int oflag,int sflag,cred_t * crp)505 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
506 {
507 ipnet_t *ipnet;
508 netstack_t *ns = NULL;
509 ipnet_stack_t *ips;
510 int err = 0;
511 zoneid_t zoneid = crgetzoneid(crp);
512
513 /*
514 * If the system is labeled, only the global zone is allowed to open
515 * IP observability nodes.
516 */
517 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
518 return (EACCES);
519
520 /* We don't support open as a module */
521 if (sflag & MODOPEN)
522 return (ENOTSUP);
523
524 /* This driver is self-cloning, we don't support re-open. */
525 if (rq->q_ptr != NULL)
526 return (EBUSY);
527
528 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
529 return (ENOMEM);
530
531 VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
532 ips = ns->netstack_ipnet;
533
534 rq->q_ptr = WR(rq)->q_ptr = ipnet;
535 ipnet->ipnet_rq = rq;
536 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
537 ipnet->ipnet_zoneid = zoneid;
538 ipnet->ipnet_dlstate = DL_UNBOUND;
539 ipnet->ipnet_ns = ns;
540
541 /*
542 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
543 * to be processed after ipnet_if is set and the ipnet_t has been
544 * inserted in the ips_str_list.
545 */
546 mutex_enter(&ips->ips_event_lock);
547 if (getminor(*dev) == IPNET_MINOR_LO) {
548 ipnet->ipnet_flags |= IPNET_LOMODE;
549 ipnet->ipnet_acceptfn = ipnet_loaccept;
550 } else {
551 ipnet->ipnet_acceptfn = ipnet_accept;
552 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips);
553 if (ipnet->ipnet_if == NULL ||
554 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) {
555 err = ENODEV;
556 goto done;
557 }
558 }
559
560 mutex_enter(&ips->ips_walkers_lock);
561 while (ips->ips_walkers_cnt != 0)
562 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
563 list_insert_head(&ips->ips_str_list, ipnet);
564 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
565 qprocson(rq);
566
567 /*
568 * Only register our callback if we're the first open client; we call
569 * unregister in close() for the last open client.
570 */
571 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
572 ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
573 mutex_exit(&ips->ips_walkers_lock);
574
575 done:
576 mutex_exit(&ips->ips_event_lock);
577 if (err != 0) {
578 netstack_rele(ns);
579 id_free(ipnet_minor_space, ipnet->ipnet_minor);
580 if (ipnet->ipnet_if != NULL)
581 ipnetif_refrele(ipnet->ipnet_if);
582 kmem_free(ipnet, sizeof (*ipnet));
583 }
584 return (err);
585 }
586
587 static int
ipnet_close(queue_t * rq)588 ipnet_close(queue_t *rq)
589 {
590 ipnet_t *ipnet = rq->q_ptr;
591 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet;
592
593 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
594 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
595 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
596 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
597
598 mutex_enter(&ips->ips_walkers_lock);
599 while (ips->ips_walkers_cnt != 0)
600 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
601
602 qprocsoff(rq);
603
604 list_remove(&ips->ips_str_list, ipnet);
605 if (ipnet->ipnet_if != NULL)
606 ipnetif_refrele(ipnet->ipnet_if);
607 id_free(ipnet_minor_space, ipnet->ipnet_minor);
608
609 if (list_is_empty(&ips->ips_str_list)) {
610 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
611 ips->ips_hook = NULL;
612 }
613
614 kmem_free(ipnet, sizeof (*ipnet));
615
616 mutex_exit(&ips->ips_walkers_lock);
617 netstack_rele(ips->ips_netstack);
618 return (0);
619 }
620
621 static int
ipnet_wput(queue_t * q,mblk_t * mp)622 ipnet_wput(queue_t *q, mblk_t *mp)
623 {
624 switch (mp->b_datap->db_type) {
625 case M_FLUSH:
626 if (*mp->b_rptr & FLUSHW) {
627 flushq(q, FLUSHDATA);
628 *mp->b_rptr &= ~FLUSHW;
629 }
630 if (*mp->b_rptr & FLUSHR)
631 qreply(q, mp);
632 else
633 freemsg(mp);
634 break;
635 case M_PROTO:
636 case M_PCPROTO:
637 ipnet_wputnondata(q, mp);
638 break;
639 case M_IOCTL:
640 ipnet_ioctl(q, mp);
641 break;
642 case M_IOCDATA:
643 ipnet_iocdata(q, mp);
644 break;
645 default:
646 freemsg(mp);
647 break;
648 }
649 return (0);
650 }
651
652 static int
ipnet_rsrv(queue_t * q)653 ipnet_rsrv(queue_t *q)
654 {
655 mblk_t *mp;
656
657 while ((mp = getq(q)) != NULL) {
658 ASSERT(DB_TYPE(mp) == M_DATA);
659 if (canputnext(q)) {
660 putnext(q, mp);
661 } else {
662 (void) putbq(q, mp);
663 break;
664 }
665 }
666 return (0);
667 }
668
669 static void
ipnet_ioctl(queue_t * q,mblk_t * mp)670 ipnet_ioctl(queue_t *q, mblk_t *mp)
671 {
672 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
673
674 switch (iocp->ioc_cmd) {
675 case DLIOCRAW:
676 miocack(q, mp, 0, 0);
677 break;
678 case DLIOCIPNETINFO:
679 if (iocp->ioc_count == TRANSPARENT) {
680 mcopyin(mp, NULL, sizeof (uint_t), NULL);
681 qreply(q, mp);
682 break;
683 }
684 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */
685 default:
686 miocnak(q, mp, 0, EINVAL);
687 break;
688 }
689 }
690
691 static void
ipnet_iocdata(queue_t * q,mblk_t * mp)692 ipnet_iocdata(queue_t *q, mblk_t *mp)
693 {
694 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
695 ipnet_t *ipnet = q->q_ptr;
696
697 switch (iocp->ioc_cmd) {
698 case DLIOCIPNETINFO:
699 if (*(int *)mp->b_cont->b_rptr == 1)
700 ipnet->ipnet_flags |= IPNET_INFO;
701 else if (*(int *)mp->b_cont->b_rptr == 0)
702 ipnet->ipnet_flags &= ~IPNET_INFO;
703 else
704 goto iocnak;
705 miocack(q, mp, 0, DL_IPNETINFO_VERSION);
706 break;
707 default:
708 iocnak:
709 miocnak(q, mp, 0, EINVAL);
710 break;
711 }
712 }
713
714 static void
ipnet_wputnondata(queue_t * q,mblk_t * mp)715 ipnet_wputnondata(queue_t *q, mblk_t *mp)
716 {
717 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
718 t_uscalar_t prim = dlp->dl_primitive;
719
720 switch (prim) {
721 case DL_INFO_REQ:
722 ipnet_inforeq(q, mp);
723 break;
724 case DL_UNBIND_REQ:
725 ipnet_unbindreq(q, mp);
726 break;
727 case DL_BIND_REQ:
728 ipnet_bindreq(q, mp);
729 break;
730 case DL_PROMISCON_REQ:
731 ipnet_dlpromisconreq(q, mp);
732 break;
733 case DL_PROMISCOFF_REQ:
734 ipnet_dlpromiscoffreq(q, mp);
735 break;
736 case DL_UNITDATA_REQ:
737 case DL_DETACH_REQ:
738 case DL_PHYS_ADDR_REQ:
739 case DL_SET_PHYS_ADDR_REQ:
740 case DL_ENABMULTI_REQ:
741 case DL_DISABMULTI_REQ:
742 case DL_ATTACH_REQ:
743 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
744 break;
745 default:
746 dlerrorack(q, mp, prim, DL_BADPRIM, 0);
747 break;
748 }
749 }
750
751 static void
ipnet_inforeq(queue_t * q,mblk_t * mp)752 ipnet_inforeq(queue_t *q, mblk_t *mp)
753 {
754 dl_info_ack_t *dlip;
755 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
756
757 if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
758 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
759 return;
760 }
761
762 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
763 return;
764
765 dlip = (dl_info_ack_t *)mp->b_rptr;
766 *dlip = ipnet_infoack;
767 qreply(q, mp);
768 }
769
770 static void
ipnet_bindreq(queue_t * q,mblk_t * mp)771 ipnet_bindreq(queue_t *q, mblk_t *mp)
772 {
773 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
774 ipnet_t *ipnet = q->q_ptr;
775
776 if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
777 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
778 return;
779 }
780
781 switch (dlp->bind_req.dl_sap) {
782 case 0 :
783 ipnet->ipnet_family = AF_UNSPEC;
784 break;
785 case IPV4_VERSION :
786 ipnet->ipnet_family = AF_INET;
787 break;
788 case IPV6_VERSION :
789 ipnet->ipnet_family = AF_INET6;
790 break;
791 default :
792 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
793 return;
794 /*NOTREACHED*/
795 }
796
797 ipnet->ipnet_dlstate = DL_IDLE;
798 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0);
799 }
800
801 static void
ipnet_unbindreq(queue_t * q,mblk_t * mp)802 ipnet_unbindreq(queue_t *q, mblk_t *mp)
803 {
804 ipnet_t *ipnet = q->q_ptr;
805
806 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
807 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
808 return;
809 }
810
811 if (ipnet->ipnet_dlstate != DL_IDLE) {
812 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
813 } else {
814 ipnet->ipnet_dlstate = DL_UNBOUND;
815 ipnet->ipnet_family = AF_UNSPEC;
816 dlokack(q, mp, DL_UNBIND_REQ);
817 }
818 }
819
820 static void
ipnet_dlpromisconreq(queue_t * q,mblk_t * mp)821 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
822 {
823 ipnet_t *ipnet = q->q_ptr;
824 t_uscalar_t level;
825 int err;
826
827 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
828 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
829 return;
830 }
831
832 if (ipnet->ipnet_flags & IPNET_LOMODE) {
833 dlokack(q, mp, DL_PROMISCON_REQ);
834 return;
835 }
836
837 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
838 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
839 if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
840 ipnet->ipnet_ns->netstack_ipnet)) != 0) {
841 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
842 return;
843 }
844 }
845
846 switch (level) {
847 case DL_PROMISC_PHYS:
848 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
849 break;
850 case DL_PROMISC_SAP:
851 ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
852 break;
853 case DL_PROMISC_MULTI:
854 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
855 break;
856 default:
857 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
858 return;
859 }
860
861 dlokack(q, mp, DL_PROMISCON_REQ);
862 }
863
864 static void
ipnet_dlpromiscoffreq(queue_t * q,mblk_t * mp)865 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
866 {
867 ipnet_t *ipnet = q->q_ptr;
868 t_uscalar_t level;
869 uint16_t orig_ipnet_flags = ipnet->ipnet_flags;
870
871 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
872 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
873 return;
874 }
875
876 if (ipnet->ipnet_flags & IPNET_LOMODE) {
877 dlokack(q, mp, DL_PROMISCOFF_REQ);
878 return;
879 }
880
881 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
882 switch (level) {
883 case DL_PROMISC_PHYS:
884 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
885 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
886 break;
887 case DL_PROMISC_SAP:
888 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
889 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
890 break;
891 case DL_PROMISC_MULTI:
892 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
893 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
894 break;
895 default:
896 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
897 return;
898 }
899
900 if (orig_ipnet_flags == ipnet->ipnet_flags) {
901 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
902 return;
903 }
904
905 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
906 ipnet_leave_allmulti(ipnet->ipnet_if,
907 ipnet->ipnet_ns->netstack_ipnet);
908 }
909
910 dlokack(q, mp, DL_PROMISCOFF_REQ);
911 }
912
913 static int
ipnet_join_allmulti(ipnetif_t * ipnetif,ipnet_stack_t * ips)914 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
915 {
916 int err = 0;
917 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
918 uint64_t index = ipnetif->if_index;
919
920 mutex_enter(&ips->ips_event_lock);
921 if (ipnetif->if_multicnt == 0) {
922 ASSERT((ipnetif->if_flags &
923 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
924 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
925 err = ip_join_allmulti(index, B_FALSE, ipst);
926 if (err != 0)
927 goto done;
928 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
929 }
930 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
931 err = ip_join_allmulti(index, B_TRUE, ipst);
932 if (err != 0 &&
933 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
934 (void) ip_leave_allmulti(index, B_FALSE, ipst);
935 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
936 goto done;
937 }
938 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
939 }
940 }
941 ipnetif->if_multicnt++;
942
943 done:
944 mutex_exit(&ips->ips_event_lock);
945 return (err);
946 }
947
948 static void
ipnet_leave_allmulti(ipnetif_t * ipnetif,ipnet_stack_t * ips)949 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
950 {
951 int err;
952 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
953 uint64_t index = ipnetif->if_index;
954
955 mutex_enter(&ips->ips_event_lock);
956 ASSERT(ipnetif->if_multicnt != 0);
957 if (--ipnetif->if_multicnt == 0) {
958 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
959 err = ip_leave_allmulti(index, B_FALSE, ipst);
960 ASSERT(err == 0 || err == ENODEV);
961 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
962 }
963 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
964 err = ip_leave_allmulti(index, B_TRUE, ipst);
965 ASSERT(err == 0 || err == ENODEV);
966 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
967 }
968 }
969 mutex_exit(&ips->ips_event_lock);
970 }
971
972 /*
973 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
974 * The structure it copies the header information from,
975 * hook_pkt_observe_t, is constructed using network byte
976 * order in ipobs_hook(), so there is no conversion here.
977 */
978 static mblk_t *
ipnet_addheader(hook_pkt_observe_t * hdr,mblk_t * mp)979 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp)
980 {
981 mblk_t *dlhdr;
982 dl_ipnetinfo_t *dl;
983
984 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
985 freemsg(mp);
986 return (NULL);
987 }
988 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
989 dl->dli_version = DL_IPNETINFO_VERSION;
990 dl->dli_family = hdr->hpo_family;
991 dl->dli_htype = hdr->hpo_htype;
992 dl->dli_pktlen = hdr->hpo_pktlen;
993 dl->dli_ifindex = hdr->hpo_ifindex;
994 dl->dli_grifindex = hdr->hpo_grifindex;
995 dl->dli_zsrc = hdr->hpo_zsrc;
996 dl->dli_zdst = hdr->hpo_zdst;
997 dlhdr->b_wptr += sizeof (*dl);
998 dlhdr->b_cont = mp;
999
1000 return (dlhdr);
1001 }
1002
1003 static ipnet_addrtype_t
ipnet_get_addrtype(ipnet_t * ipnet,ipnet_addrp_t * addr)1004 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
1005 {
1006 list_t *list;
1007 ipnetif_t *ipnetif = ipnet->ipnet_if;
1008 ipnetif_addr_t *ifaddr;
1009 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN;
1010
1011 /* First check if the address is multicast or limited broadcast. */
1012 switch (addr->iap_family) {
1013 case AF_INET:
1014 if (CLASSD(*(addr->iap_addr4)) ||
1015 *(addr->iap_addr4) == INADDR_BROADCAST)
1016 return (IPNETADDR_MBCAST);
1017 break;
1018 case AF_INET6:
1019 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
1020 return (IPNETADDR_MBCAST);
1021 break;
1022 }
1023
1024 /*
1025 * Walk the address list to see if the address belongs to our
1026 * interface or is one of our subnet broadcast addresses.
1027 */
1028 mutex_enter(&ipnetif->if_addr_lock);
1029 list = (addr->iap_family == AF_INET) ?
1030 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
1031 for (ifaddr = list_head(list);
1032 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
1033 ifaddr = list_next(list, ifaddr)) {
1034 /*
1035 * If we're not in the global zone, then only look at
1036 * addresses in our zone.
1037 */
1038 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1039 ipnet->ipnet_zoneid != ifaddr->ifa_zone)
1040 continue;
1041 switch (addr->iap_family) {
1042 case AF_INET:
1043 if (ifaddr->ifa_ip4addr != INADDR_ANY &&
1044 *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
1045 addrtype = IPNETADDR_MYADDR;
1046 else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
1047 *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
1048 addrtype = IPNETADDR_MBCAST;
1049 break;
1050 case AF_INET6:
1051 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
1052 &ifaddr->ifa_ip6addr))
1053 addrtype = IPNETADDR_MYADDR;
1054 break;
1055 }
1056 }
1057 mutex_exit(&ipnetif->if_addr_lock);
1058
1059 return (addrtype);
1060 }
1061
1062 /*
1063 * Verify if the packet contained in hdr should be passed up to the
1064 * ipnet client stream.
1065 */
1066 static boolean_t
ipnet_accept(ipnet_t * ipnet,hook_pkt_observe_t * hdr,ipnet_addrp_t * src,ipnet_addrp_t * dst)1067 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1068 ipnet_addrp_t *dst)
1069 {
1070 boolean_t obsif;
1071 uint64_t ifindex = ipnet->ipnet_if->if_index;
1072 ipnet_addrtype_t srctype;
1073 ipnet_addrtype_t dsttype;
1074
1075 srctype = ipnet_get_addrtype(ipnet, src);
1076 dsttype = ipnet_get_addrtype(ipnet, dst);
1077
1078 /*
1079 * If the packet's ifindex matches ours, or the packet's group ifindex
1080 * matches ours, it's on the interface we're observing. (Thus,
1081 * observing on the group ifindex matches all ifindexes in the group.)
1082 */
1083 obsif = (ntohl(hdr->hpo_ifindex) == ifindex ||
1084 ntohl(hdr->hpo_grifindex) == ifindex);
1085
1086 DTRACE_PROBE5(ipnet_accept__addr,
1087 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src,
1088 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst,
1089 boolean_t, obsif);
1090
1091 /*
1092 * Do not allow an ipnet stream to see packets that are not from or to
1093 * its zone. The exception is when zones are using the shared stack
1094 * model. In this case, streams in the global zone have visibility
1095 * into other shared-stack zones, and broadcast and multicast traffic
1096 * is visible by all zones in the stack.
1097 */
1098 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1099 dsttype != IPNETADDR_MBCAST) {
1100 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1101 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1102 return (B_FALSE);
1103 }
1104
1105 /*
1106 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1107 * packet's IP version.
1108 */
1109 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
1110 ipnet->ipnet_family != hdr->hpo_family)
1111 return (B_FALSE);
1112
1113 /* If the destination address is ours, then accept the packet. */
1114 if (dsttype == IPNETADDR_MYADDR)
1115 return (B_TRUE);
1116
1117 /*
1118 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1119 * sent or received on the interface we're observing, or packets that
1120 * have our source address (this allows us to see packets we send).
1121 */
1122 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
1123 if (srctype == IPNETADDR_MYADDR || obsif)
1124 return (B_TRUE);
1125 }
1126
1127 /*
1128 * We accept multicast and broadcast packets transmitted or received
1129 * on the interface we're observing.
1130 */
1131 if (dsttype == IPNETADDR_MBCAST && obsif)
1132 return (B_TRUE);
1133
1134 return (B_FALSE);
1135 }
1136
1137 /*
1138 * Verify if the packet contained in hdr should be passed up to the ipnet
1139 * client stream that's in IPNET_LOMODE.
1140 */
1141 /* ARGSUSED */
1142 static boolean_t
ipnet_loaccept(ipnet_t * ipnet,hook_pkt_observe_t * hdr,ipnet_addrp_t * src,ipnet_addrp_t * dst)1143 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1144 ipnet_addrp_t *dst)
1145 {
1146 if (hdr->hpo_htype != htons(IPOBS_HOOK_LOCAL)) {
1147 /*
1148 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
1149 */
1150 if (ipnet->ipnet_if == NULL)
1151 return (B_FALSE);
1152 }
1153
1154 /*
1155 * An ipnet stream must not see packets that are not from/to its zone.
1156 */
1157 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
1158 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1159 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1160 return (B_FALSE);
1161 }
1162
1163 return (ipnet->ipnet_family == AF_UNSPEC ||
1164 ipnet->ipnet_family == hdr->hpo_family);
1165 }
1166
1167 static void
ipnet_dispatch(void * arg)1168 ipnet_dispatch(void *arg)
1169 {
1170 mblk_t *mp = arg;
1171 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1172 ipnet_t *ipnet;
1173 mblk_t *netmp;
1174 list_t *list;
1175 ipnet_stack_t *ips;
1176 ipnet_addrp_t src;
1177 ipnet_addrp_t dst;
1178
1179 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1180
1181 netmp = hdr->hpo_pkt->b_cont;
1182 src.iap_family = hdr->hpo_family;
1183 dst.iap_family = hdr->hpo_family;
1184
1185 if (hdr->hpo_family == AF_INET) {
1186 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
1187 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
1188 } else {
1189 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
1190 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
1191 }
1192
1193 ipnet_walkers_inc(ips);
1194
1195 list = &ips->ips_str_list;
1196 for (ipnet = list_head(list); ipnet != NULL;
1197 ipnet = list_next(list, ipnet)) {
1198 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
1199 IPSK_BUMP(ips, ik_acceptFail);
1200 continue;
1201 }
1202 IPSK_BUMP(ips, ik_acceptOk);
1203
1204 if (list_next(list, ipnet) == NULL) {
1205 netmp = hdr->hpo_pkt->b_cont;
1206 hdr->hpo_pkt->b_cont = NULL;
1207 } else {
1208 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL &&
1209 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) {
1210 IPSK_BUMP(ips, ik_duplicationFail);
1211 continue;
1212 }
1213 }
1214
1215 if (ipnet->ipnet_flags & IPNET_INFO) {
1216 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) {
1217 IPSK_BUMP(ips, ik_dispatchHeaderDrop);
1218 continue;
1219 }
1220 }
1221
1222 if (ipnet->ipnet_rq->q_first == NULL &&
1223 canputnext(ipnet->ipnet_rq)) {
1224 putnext(ipnet->ipnet_rq, netmp);
1225 IPSK_BUMP(ips, ik_dispatchDeliver);
1226 } else if (canput(ipnet->ipnet_rq)) {
1227 (void) putq(ipnet->ipnet_rq, netmp);
1228 IPSK_BUMP(ips, ik_dispatchDeliver);
1229 } else {
1230 freemsg(netmp);
1231 IPSK_BUMP(ips, ik_dispatchPutDrop);
1232 }
1233 }
1234
1235 ipnet_walkers_dec(ips);
1236
1237 freemsg(mp);
1238 }
1239
1240 static void
ipnet_input(mblk_t * mp)1241 ipnet_input(mblk_t *mp)
1242 {
1243 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1244 ipnet_stack_t *ips;
1245
1246 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1247
1248 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
1249 DDI_SUCCESS) {
1250 IPSK_BUMP(ips, ik_dispatchFail);
1251 freemsg(mp);
1252 } else {
1253 IPSK_BUMP(ips, ik_dispatchOk);
1254 }
1255 }
1256
1257 static ipnetif_t *
ipnet_alloc_if(ipnet_stack_t * ips)1258 ipnet_alloc_if(ipnet_stack_t *ips)
1259 {
1260 ipnetif_t *ipnetif;
1261
1262 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL)
1263 return (NULL);
1264
1265 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
1266 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
1267 offsetof(ipnetif_addr_t, ifa_link));
1268 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
1269 offsetof(ipnetif_addr_t, ifa_link));
1270 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
1271
1272 ipnetif->if_stackp = ips;
1273
1274 return (ipnetif);
1275 }
1276
1277 /*
1278 * Create a new ipnetif_t and new minor node for it. If creation is
1279 * successful the new ipnetif_t is inserted into an avl_tree
1280 * containing ipnetif's for this stack instance.
1281 */
1282 static ipnetif_t *
ipnetif_create(const char * name,uint64_t index,ipnet_stack_t * ips,uint64_t ifflags)1283 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips,
1284 uint64_t ifflags)
1285 {
1286 ipnetif_t *ipnetif;
1287 avl_index_t where = 0;
1288 minor_t ifminor;
1289
1290 /*
1291 * Because ipnetif_create() can be called from a NIC event
1292 * callback, it should not block.
1293 */
1294 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
1295 if (ifminor == (minor_t)-1)
1296 return (NULL);
1297 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) {
1298 id_free(ipnet_minor_space, ifminor);
1299 return (NULL);
1300 }
1301
1302 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
1303 ipnetif->if_index = (uint_t)index;
1304 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack);
1305 ipnetif->if_dev = makedevice(ipnet_major, ifminor);
1306
1307 ipnetif->if_refcnt = 1;
1308 if ((ifflags & IFF_LOOPBACK) != 0)
1309 ipnetif->if_flags = IPNETIF_LOOPBACK;
1310
1311 mutex_enter(&ips->ips_avl_lock);
1312 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
1313 avl_insert(&ips->ips_avl_by_index, ipnetif, where);
1314 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
1315 avl_insert(&ips->ips_avl_by_name, ipnetif, where);
1316 mutex_exit(&ips->ips_avl_lock);
1317
1318 return (ipnetif);
1319 }
1320
1321 static void
ipnetif_remove(ipnetif_t * ipnetif,ipnet_stack_t * ips)1322 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1323 {
1324 ipnet_t *ipnet;
1325
1326 ipnet_walkers_inc(ips);
1327 /* Send a SIGHUP to all open streams associated with this ipnetif. */
1328 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
1329 ipnet = list_next(&ips->ips_str_list, ipnet)) {
1330 if (ipnet->ipnet_if == ipnetif)
1331 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1332 }
1333 ipnet_walkers_dec(ips);
1334 mutex_enter(&ips->ips_avl_lock);
1335 avl_remove(&ips->ips_avl_by_index, ipnetif);
1336 avl_remove(&ips->ips_avl_by_name, ipnetif);
1337 mutex_exit(&ips->ips_avl_lock);
1338 /*
1339 * Release the reference we implicitly held in ipnetif_create().
1340 */
1341 ipnetif_refrele(ipnetif);
1342 }
1343
1344 static void
ipnet_purge_addrlist(list_t * addrlist)1345 ipnet_purge_addrlist(list_t *addrlist)
1346 {
1347 ipnetif_addr_t *ifa;
1348
1349 while ((ifa = list_head(addrlist)) != NULL) {
1350 list_remove(addrlist, ifa);
1351 if (ifa->ifa_shared != NULL)
1352 ipnetif_clone_release(ifa->ifa_shared);
1353 kmem_free(ifa, sizeof (*ifa));
1354 }
1355 }
1356
1357 static void
ipnetif_free(ipnetif_t * ipnetif)1358 ipnetif_free(ipnetif_t *ipnetif)
1359 {
1360 ASSERT(ipnetif->if_refcnt == 0);
1361 ASSERT(ipnetif->if_sharecnt == 0);
1362
1363 /* Remove IPv4/v6 address lists from the ipnetif */
1364 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
1365 list_destroy(&ipnetif->if_ip4addr_list);
1366 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
1367 list_destroy(&ipnetif->if_ip6addr_list);
1368 mutex_destroy(&ipnetif->if_addr_lock);
1369 mutex_destroy(&ipnetif->if_reflock);
1370 if (ipnetif->if_dev != 0)
1371 id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
1372 kmem_free(ipnetif, sizeof (*ipnetif));
1373 }
1374
1375 /*
1376 * Create an ipnetif_addr_t with the given logical interface id (lif)
1377 * and add it to the supplied ipnetif. The lif is the netinfo
1378 * representation of logical interface id, and we use this id to match
1379 * incoming netinfo events against our lists of addresses.
1380 */
1381 static void
ipnet_add_ifaddr(uint64_t lif,ipnetif_t * ipnetif,net_handle_t nd)1382 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
1383 {
1384 ipnetif_addr_t *ifaddr;
1385 zoneid_t zoneid;
1386 struct sockaddr_in bcast;
1387 struct sockaddr_storage addr;
1388 net_ifaddr_t type = NA_ADDRESS;
1389 uint64_t phyif = ipnetif->if_index;
1390
1391 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
1392 net_getlifzone(nd, phyif, lif, &zoneid) != 0)
1393 return;
1394
1395 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
1396 return;
1397 ifaddr->ifa_zone = zoneid;
1398 ifaddr->ifa_id = lif;
1399 ifaddr->ifa_shared = NULL;
1400
1401 switch (addr.ss_family) {
1402 case AF_INET:
1403 ifaddr->ifa_ip4addr =
1404 ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
1405 /*
1406 * Try and get the broadcast address. Note that it's okay for
1407 * an interface to not have a broadcast address, so we don't
1408 * fail the entire operation if net_getlifaddr() fails here.
1409 */
1410 type = NA_BROADCAST;
1411 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
1412 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
1413 break;
1414 case AF_INET6:
1415 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
1416 break;
1417 }
1418
1419 /*
1420 * The zoneid stored in ipnetif_t needs to correspond to the actual
1421 * zone the address is being used in. This facilitates finding the
1422 * correct netstack_t pointer, amongst other things, later.
1423 */
1424 if (zoneid == ALL_ZONES)
1425 zoneid = GLOBAL_ZONEID;
1426
1427 mutex_enter(&ipnetif->if_addr_lock);
1428 if (zoneid != ipnetif->if_zoneid) {
1429 ipnetif_t *ifp2;
1430
1431 ifp2 = ipnetif_clone_create(ipnetif, zoneid);
1432 ifaddr->ifa_shared = ifp2;
1433 }
1434 list_insert_tail(addr.ss_family == AF_INET ?
1435 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
1436 mutex_exit(&ipnetif->if_addr_lock);
1437 }
1438
1439 static void
ipnet_delete_ifaddr(ipnetif_addr_t * ifaddr,ipnetif_t * ipnetif,boolean_t isv6)1440 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
1441 {
1442 mutex_enter(&ipnetif->if_addr_lock);
1443 if (ifaddr->ifa_shared != NULL)
1444 ipnetif_clone_release(ifaddr->ifa_shared);
1445
1446 list_remove(isv6 ?
1447 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
1448 mutex_exit(&ipnetif->if_addr_lock);
1449 kmem_free(ifaddr, sizeof (*ifaddr));
1450 }
1451
1452 static void
ipnet_plumb_ev(ipnet_nicevent_t * ipne,ipnet_stack_t * ips,boolean_t isv6)1453 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6)
1454 {
1455 ipnetif_t *ipnetif;
1456 boolean_t refrele_needed = B_TRUE;
1457 uint64_t ifflags;
1458 uint64_t ifindex;
1459 char *ifname;
1460
1461 ifflags = 0;
1462 ifname = ipne->ipne_ifname;
1463 ifindex = ipne->ipne_ifindex;
1464
1465 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags);
1466
1467 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) {
1468 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags);
1469 refrele_needed = B_FALSE;
1470 }
1471 if (ipnetif != NULL) {
1472 ipnetif->if_flags |=
1473 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
1474 }
1475
1476 if (ipnetif->if_multicnt != 0) {
1477 if (ip_join_allmulti(ifindex, isv6,
1478 ips->ips_netstack->netstack_ip) == 0) {
1479 ipnetif->if_flags |=
1480 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
1481 }
1482 }
1483
1484 if (refrele_needed)
1485 ipnetif_refrele(ipnetif);
1486 }
1487
1488 static void
ipnet_unplumb_ev(uint64_t ifindex,ipnet_stack_t * ips,boolean_t isv6)1489 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
1490 {
1491 ipnetif_t *ipnetif;
1492
1493 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1494 return;
1495
1496 mutex_enter(&ipnetif->if_addr_lock);
1497 ipnet_purge_addrlist(isv6 ?
1498 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
1499 mutex_exit(&ipnetif->if_addr_lock);
1500
1501 /*
1502 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1503 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif
1504 * if both IPv4 and IPv6 interfaces have been unplumbed.
1505 */
1506 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
1507 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
1508 ipnetif_remove(ipnetif, ips);
1509 ipnetif_refrele(ipnetif);
1510 }
1511
1512 static void
ipnet_lifup_ev(uint64_t ifindex,uint64_t lifindex,net_handle_t nd,ipnet_stack_t * ips,boolean_t isv6)1513 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
1514 ipnet_stack_t *ips, boolean_t isv6)
1515 {
1516 ipnetif_t *ipnetif;
1517 ipnetif_addr_t *ifaddr;
1518
1519 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1520 return;
1521 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
1522 /*
1523 * We must have missed a NE_LIF_DOWN event. Delete this
1524 * ifaddr and re-create it.
1525 */
1526 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1527 }
1528
1529 ipnet_add_ifaddr(lifindex, ipnetif, nd);
1530 ipnetif_refrele(ipnetif);
1531 }
1532
1533 static void
ipnet_lifdown_ev(uint64_t ifindex,uint64_t lifindex,ipnet_stack_t * ips,boolean_t isv6)1534 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
1535 boolean_t isv6)
1536 {
1537 ipnetif_t *ipnetif;
1538 ipnetif_addr_t *ifaddr;
1539
1540 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1541 return;
1542 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
1543 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1544 ipnetif_refrele(ipnetif);
1545 /*
1546 * Make sure that open streams on this ipnetif are still allowed to
1547 * have it open.
1548 */
1549 ipnetif_zonecheck(ipnetif, ips);
1550 }
1551
1552 /*
1553 * This callback from the NIC event framework dispatches a taskq as the event
1554 * handlers may block.
1555 */
1556 /* ARGSUSED */
1557 static int
ipnet_nicevent_cb(hook_event_token_t token,hook_data_t info,void * arg)1558 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
1559 {
1560 ipnet_stack_t *ips = arg;
1561 hook_nic_event_t *hn = (hook_nic_event_t *)info;
1562 ipnet_nicevent_t *ipne;
1563
1564 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
1565 return (0);
1566 ipne->ipne_event = hn->hne_event;
1567 ipne->ipne_protocol = hn->hne_protocol;
1568 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
1569 ipne->ipne_ifindex = hn->hne_nic;
1570 ipne->ipne_lifindex = hn->hne_lif;
1571 if (hn->hne_datalen != 0) {
1572 (void) strlcpy(ipne->ipne_ifname, hn->hne_data,
1573 sizeof (ipne->ipne_ifname));
1574 }
1575 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
1576 ipne, DDI_NOSLEEP);
1577 return (0);
1578 }
1579
1580 static void
ipnet_nicevent_task(void * arg)1581 ipnet_nicevent_task(void *arg)
1582 {
1583 ipnet_nicevent_t *ipne = arg;
1584 netstack_t *ns;
1585 ipnet_stack_t *ips;
1586 boolean_t isv6;
1587
1588 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
1589 goto done;
1590 ips = ns->netstack_ipnet;
1591 isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
1592
1593 mutex_enter(&ips->ips_event_lock);
1594 switch (ipne->ipne_event) {
1595 case NE_PLUMB:
1596 ipnet_plumb_ev(ipne, ips, isv6);
1597 break;
1598 case NE_UNPLUMB:
1599 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
1600 break;
1601 case NE_LIF_UP:
1602 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
1603 ipne->ipne_protocol, ips, isv6);
1604 break;
1605 case NE_LIF_DOWN:
1606 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
1607 isv6);
1608 break;
1609 default:
1610 break;
1611 }
1612 mutex_exit(&ips->ips_event_lock);
1613 done:
1614 if (ns != NULL)
1615 netstack_rele(ns);
1616 kmem_free(ipne, sizeof (ipnet_nicevent_t));
1617 }
1618
1619 dev_t
ipnet_if_getdev(char * name,zoneid_t zoneid)1620 ipnet_if_getdev(char *name, zoneid_t zoneid)
1621 {
1622 netstack_t *ns;
1623 ipnet_stack_t *ips;
1624 ipnetif_t *ipnetif;
1625 dev_t dev = (dev_t)-1;
1626
1627 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1628 return (dev);
1629 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1630 return (dev);
1631
1632 ips = ns->netstack_ipnet;
1633 mutex_enter(&ips->ips_avl_lock);
1634 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
1635 if (ipnetif_in_zone(ipnetif, zoneid, ips))
1636 dev = ipnetif->if_dev;
1637 }
1638 mutex_exit(&ips->ips_avl_lock);
1639 netstack_rele(ns);
1640
1641 return (dev);
1642 }
1643
1644 static ipnetif_t *
ipnetif_getby_index(uint64_t id,ipnet_stack_t * ips)1645 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips)
1646 {
1647 ipnetif_t *ipnetif;
1648
1649 mutex_enter(&ips->ips_avl_lock);
1650 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
1651 ipnetif_refhold(ipnetif);
1652 mutex_exit(&ips->ips_avl_lock);
1653 return (ipnetif);
1654 }
1655
1656 static ipnetif_t *
ipnetif_getby_dev(dev_t dev,ipnet_stack_t * ips)1657 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips)
1658 {
1659 ipnetif_t *ipnetif;
1660 avl_tree_t *tree;
1661
1662 mutex_enter(&ips->ips_avl_lock);
1663 tree = &ips->ips_avl_by_index;
1664 for (ipnetif = avl_first(tree); ipnetif != NULL;
1665 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
1666 if (ipnetif->if_dev == dev) {
1667 ipnetif_refhold(ipnetif);
1668 break;
1669 }
1670 }
1671 mutex_exit(&ips->ips_avl_lock);
1672 return (ipnetif);
1673 }
1674
1675 static ipnetif_addr_t *
ipnet_match_lif(ipnetif_t * ipnetif,lif_if_t lid,boolean_t isv6)1676 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
1677 {
1678 ipnetif_addr_t *ifaddr;
1679 list_t *list;
1680
1681 mutex_enter(&ipnetif->if_addr_lock);
1682 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
1683 for (ifaddr = list_head(list); ifaddr != NULL;
1684 ifaddr = list_next(list, ifaddr)) {
1685 if (lid == ifaddr->ifa_id)
1686 break;
1687 }
1688 mutex_exit(&ipnetif->if_addr_lock);
1689 return (ifaddr);
1690 }
1691
1692 /* ARGSUSED */
1693 static void *
ipnet_stack_init(netstackid_t stackid,netstack_t * ns)1694 ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
1695 {
1696 ipnet_stack_t *ips;
1697
1698 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
1699 ips->ips_netstack = ns;
1700 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
1701 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index,
1702 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
1703 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name,
1704 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
1705 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone,
1706 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared));
1707 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
1708 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
1709 list_create(&ips->ips_str_list, sizeof (ipnet_t),
1710 offsetof(ipnet_t, ipnet_next));
1711 ipnet_register_netihook(ips);
1712 return (ips);
1713 }
1714
1715 /* ARGSUSED */
1716 static void
ipnet_stack_fini(netstackid_t stackid,void * arg)1717 ipnet_stack_fini(netstackid_t stackid, void *arg)
1718 {
1719 ipnet_stack_t *ips = arg;
1720 ipnetif_t *ipnetif, *nipnetif;
1721
1722 if (ips->ips_kstatp != NULL) {
1723 zoneid_t zoneid;
1724
1725 zoneid = netstackid_to_zoneid(stackid);
1726 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp);
1727 }
1728 if (ips->ips_ndv4 != NULL) {
1729 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
1730 ips->ips_nicevents) == 0);
1731 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
1732 }
1733 if (ips->ips_ndv6 != NULL) {
1734 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
1735 ips->ips_nicevents) == 0);
1736 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
1737 }
1738 hook_free(ips->ips_nicevents);
1739
1740 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1741 ipnetif = nipnetif) {
1742 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
1743 ipnetif_remove(ipnetif, ips);
1744 }
1745 avl_destroy(&ips->ips_avl_by_shared);
1746 avl_destroy(&ips->ips_avl_by_index);
1747 avl_destroy(&ips->ips_avl_by_name);
1748 mutex_destroy(&ips->ips_avl_lock);
1749 mutex_destroy(&ips->ips_walkers_lock);
1750 cv_destroy(&ips->ips_walkers_cv);
1751 list_destroy(&ips->ips_str_list);
1752 kmem_free(ips, sizeof (*ips));
1753 }
1754
1755 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1756 static boolean_t
ipnet_addrs_in_zone(list_t * addrlist,zoneid_t zoneid)1757 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
1758 {
1759 ipnetif_addr_t *ifa;
1760
1761 for (ifa = list_head(addrlist); ifa != NULL;
1762 ifa = list_next(addrlist, ifa)) {
1763 if (ifa->ifa_zone == zoneid)
1764 return (B_TRUE);
1765 }
1766 return (B_FALSE);
1767 }
1768
1769 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1770 static boolean_t
ipnetif_in_zone(ipnetif_t * ipnetif,zoneid_t zoneid,ipnet_stack_t * ips)1771 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
1772 {
1773 int ret;
1774
1775 /*
1776 * The global zone has visibility into all interfaces in the global
1777 * stack, and exclusive stack zones have visibility into all
1778 * interfaces in their stack.
1779 */
1780 if (zoneid == GLOBAL_ZONEID ||
1781 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1782 return (B_TRUE);
1783
1784 /*
1785 * Shared-stack zones only have visibility for interfaces that have
1786 * addresses in their zone.
1787 */
1788 mutex_enter(&ipnetif->if_addr_lock);
1789 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
1790 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
1791 mutex_exit(&ipnetif->if_addr_lock);
1792 return (ret);
1793 }
1794
1795 /*
1796 * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1797 * still be allowed to have it open. A given ipnet_t may no longer be allowed
1798 * to have an ipnetif open if there are no longer any addresses that belong to
1799 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the
1800 * case, send the ipnet_t an M_HANGUP.
1801 */
1802 static void
ipnetif_zonecheck(ipnetif_t * ipnetif,ipnet_stack_t * ips)1803 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1804 {
1805 list_t *strlist = &ips->ips_str_list;
1806 ipnet_t *ipnet;
1807
1808 ipnet_walkers_inc(ips);
1809 for (ipnet = list_head(strlist); ipnet != NULL;
1810 ipnet = list_next(strlist, ipnet)) {
1811 if (ipnet->ipnet_if != ipnetif)
1812 continue;
1813 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
1814 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1815 }
1816 ipnet_walkers_dec(ips);
1817 }
1818
1819 void
ipnet_walk_if(ipnet_walkfunc_t * cb,void * arg,zoneid_t zoneid)1820 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
1821 {
1822 ipnetif_t *ipnetif;
1823 list_t cbdata;
1824 ipnetif_cbdata_t *cbnode;
1825 netstack_t *ns;
1826 ipnet_stack_t *ips;
1827
1828 /*
1829 * On labeled systems, non-global zones shouldn't see anything
1830 * in /dev/ipnet.
1831 */
1832 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1833 return;
1834
1835 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1836 return;
1837
1838 ips = ns->netstack_ipnet;
1839 list_create(&cbdata, sizeof (ipnetif_cbdata_t),
1840 offsetof(ipnetif_cbdata_t, ic_next));
1841
1842 mutex_enter(&ips->ips_avl_lock);
1843 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1844 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
1845 if (!ipnetif_in_zone(ipnetif, zoneid, ips))
1846 continue;
1847 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
1848 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
1849 cbnode->ic_dev = ipnetif->if_dev;
1850 list_insert_head(&cbdata, cbnode);
1851 }
1852 mutex_exit(&ips->ips_avl_lock);
1853
1854 while ((cbnode = list_head(&cbdata)) != NULL) {
1855 cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
1856 list_remove(&cbdata, cbnode);
1857 kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
1858 }
1859 list_destroy(&cbdata);
1860 netstack_rele(ns);
1861 }
1862
1863 static int
ipnetif_compare_index(const void * index_ptr,const void * ipnetifp)1864 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp)
1865 {
1866 int64_t index1 = *((int64_t *)index_ptr);
1867 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
1868
1869 return (SIGNOF(index2 - index1));
1870 }
1871
1872 static int
ipnetif_compare_name(const void * name_ptr,const void * ipnetifp)1873 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp)
1874 {
1875 int res;
1876
1877 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
1878 return (SIGNOF(res));
1879 }
1880
1881 static int
ipnetif_compare_name_zone(const void * key_ptr,const void * ipnetifp)1882 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp)
1883 {
1884 const uintptr_t *ptr = key_ptr;
1885 const ipnetif_t *ifp;
1886 int res;
1887
1888 ifp = ipnetifp;
1889 res = ifp->if_zoneid - ptr[0];
1890 if (res != 0)
1891 return (SIGNOF(res));
1892 res = strcmp(ifp->if_name, (char *)ptr[1]);
1893 return (SIGNOF(res));
1894 }
1895
1896 static void
ipnetif_refhold(ipnetif_t * ipnetif)1897 ipnetif_refhold(ipnetif_t *ipnetif)
1898 {
1899 mutex_enter(&ipnetif->if_reflock);
1900 ipnetif->if_refcnt++;
1901 mutex_exit(&ipnetif->if_reflock);
1902 }
1903
1904 static void
ipnetif_refrele(ipnetif_t * ipnetif)1905 ipnetif_refrele(ipnetif_t *ipnetif)
1906 {
1907 mutex_enter(&ipnetif->if_reflock);
1908 ASSERT(ipnetif->if_refcnt > 0);
1909 if (--ipnetif->if_refcnt == 0)
1910 ipnetif_free(ipnetif);
1911 else
1912 mutex_exit(&ipnetif->if_reflock);
1913 }
1914
1915 static void
ipnet_walkers_inc(ipnet_stack_t * ips)1916 ipnet_walkers_inc(ipnet_stack_t *ips)
1917 {
1918 mutex_enter(&ips->ips_walkers_lock);
1919 ips->ips_walkers_cnt++;
1920 mutex_exit(&ips->ips_walkers_lock);
1921 }
1922
1923 static void
ipnet_walkers_dec(ipnet_stack_t * ips)1924 ipnet_walkers_dec(ipnet_stack_t *ips)
1925 {
1926 mutex_enter(&ips->ips_walkers_lock);
1927 ASSERT(ips->ips_walkers_cnt != 0);
1928 if (--ips->ips_walkers_cnt == 0)
1929 cv_broadcast(&ips->ips_walkers_cv);
1930 mutex_exit(&ips->ips_walkers_lock);
1931 }
1932
1933 /*ARGSUSED*/
1934 static int
ipobs_bounce_func(hook_event_token_t token,hook_data_t info,void * arg)1935 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
1936 {
1937 hook_pkt_observe_t *hdr;
1938 pfv_t func = (pfv_t)arg;
1939 mblk_t *mp;
1940
1941 hdr = (hook_pkt_observe_t *)info;
1942 /*
1943 * Code in ip_input() expects that it is the only one accessing the
1944 * packet.
1945 */
1946 mp = copymsg(hdr->hpo_pkt);
1947 if (mp == NULL) {
1948 netstack_t *ns = hdr->hpo_ctx;
1949 ipnet_stack_t *ips = ns->netstack_ipnet;
1950
1951 IPSK_BUMP(ips, ik_dispatchDupDrop);
1952 return (0);
1953 }
1954
1955 hdr = (hook_pkt_observe_t *)mp->b_rptr;
1956 hdr->hpo_pkt = mp;
1957
1958 func(mp);
1959
1960 return (0);
1961 }
1962
1963 hook_t *
ipobs_register_hook(netstack_t * ns,pfv_t func)1964 ipobs_register_hook(netstack_t *ns, pfv_t func)
1965 {
1966 ip_stack_t *ipst = ns->netstack_ip;
1967 char name[32];
1968 hook_t *hook;
1969
1970 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func);
1971 VERIFY(hook != NULL);
1972
1973 /*
1974 * To register multiple hooks with he same callback function,
1975 * a unique name is needed.
1976 */
1977 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook);
1978 hook->h_name = strdup(name);
1979
1980 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1981 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
1982
1983 return (hook);
1984 }
1985
1986 void
ipobs_unregister_hook(netstack_t * ns,hook_t * hook)1987 ipobs_unregister_hook(netstack_t *ns, hook_t *hook)
1988 {
1989 ip_stack_t *ipst = ns->netstack_ip;
1990
1991 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1992
1993 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
1994
1995 strfree(hook->h_name);
1996
1997 hook_free(hook);
1998 }
1999
2000 /* ******************************************************************** */
2001 /* BPF Functions below */
2002 /* ******************************************************************** */
2003
2004 /*
2005 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
2006 */
2007 ipnet_stack_t *
ipnet_find_by_zoneid(zoneid_t zoneid)2008 ipnet_find_by_zoneid(zoneid_t zoneid)
2009 {
2010 netstack_t *ns;
2011
2012 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL);
2013 return (ns->netstack_ipnet);
2014 }
2015
2016 /*
2017 * Functions, such as the above ipnet_find_by_zoneid(), will return a
2018 * pointer to ipnet_stack_t by calling a netstack lookup function.
2019 * The netstack_find_*() functions return a pointer after doing a "hold"
2020 * on the data structure and thereby require a "release" when the caller
2021 * is finished with it. We need to mirror that API here and thus a caller
2022 * of ipnet_find_by_zoneid() is required to call ipnet_rele().
2023 */
2024 void
ipnet_rele(ipnet_stack_t * ips)2025 ipnet_rele(ipnet_stack_t *ips)
2026 {
2027 netstack_rele(ips->ips_netstack);
2028 }
2029
2030 /*
2031 */
2032 void
ipnet_set_itap(bpf_itap_fn_t tapfunc)2033 ipnet_set_itap(bpf_itap_fn_t tapfunc)
2034 {
2035 ipnet_itap = tapfunc;
2036 }
2037
2038 /*
2039 * The list of interfaces available via ipnet is private for each zone,
2040 * so the AVL tree of each zone must be searched for a given name, even
2041 * if all names are unique.
2042 */
2043 int
ipnet_open_byname(const char * name,ipnetif_t ** ptr,zoneid_t zoneid)2044 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid)
2045 {
2046 ipnet_stack_t *ips;
2047 ipnetif_t *ipnetif;
2048
2049 ASSERT(ptr != NULL);
2050 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2051
2052 mutex_enter(&ips->ips_avl_lock);
2053
2054 /*
2055 * Shared instance zone?
2056 */
2057 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2058 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2059
2060 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2061 } else {
2062 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2063 }
2064 if (ipnetif != NULL)
2065 ipnetif_refhold(ipnetif);
2066 mutex_exit(&ips->ips_avl_lock);
2067
2068 *ptr = ipnetif;
2069 ipnet_rele(ips);
2070
2071 if (ipnetif == NULL)
2072 return (ESRCH);
2073 return (0);
2074 }
2075
2076 void
ipnet_close_byhandle(ipnetif_t * ifp)2077 ipnet_close_byhandle(ipnetif_t *ifp)
2078 {
2079 ASSERT(ifp != NULL);
2080 ipnetif_refrele(ifp);
2081 }
2082
2083 const char *
ipnet_name(ipnetif_t * ifp)2084 ipnet_name(ipnetif_t *ifp)
2085 {
2086 ASSERT(ifp != NULL);
2087 return (ifp->if_name);
2088 }
2089
2090 /*
2091 * To find the linkid for a given name, it is necessary to know which zone
2092 * the interface name belongs to and to search the avl tree for that zone
2093 * as there is no master list of all interfaces and which zone they belong
2094 * to. It is assumed that the caller of this function is somehow already
2095 * working with the ipnet interfaces and hence the ips_event_lock is held.
2096 * When BPF calls into this function, it is doing so because of an event
2097 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
2098 * value returned has meaning without the need for grabbing a hold on the
2099 * owning structure.
2100 */
2101 int
ipnet_get_linkid_byname(const char * name,uint_t * idp,zoneid_t zoneid)2102 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid)
2103 {
2104 ipnet_stack_t *ips;
2105 ipnetif_t *ifp;
2106
2107 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2108 ASSERT(mutex_owned(&ips->ips_event_lock));
2109
2110 mutex_enter(&ips->ips_avl_lock);
2111 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2112 if (ifp != NULL)
2113 *idp = (uint_t)ifp->if_index;
2114
2115 /*
2116 * Shared instance zone?
2117 */
2118 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2119 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2120
2121 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2122 if (ifp != NULL)
2123 *idp = (uint_t)ifp->if_index;
2124 }
2125
2126 mutex_exit(&ips->ips_avl_lock);
2127 ipnet_rele(ips);
2128
2129 if (ifp == NULL)
2130 return (ESRCH);
2131 return (0);
2132 }
2133
2134 /*
2135 * Strictly speaking, there is no such thing as a "client" in ipnet, like
2136 * there is in mac. BPF only needs to have this because it is required as
2137 * part of interfacing correctly with mac. The reuse of the original
2138 * ipnetif_t as a client poses no danger, so long as it is done with its
2139 * own ref-count'd hold that is given up on close.
2140 */
2141 int
ipnet_client_open(ipnetif_t * ptr,ipnetif_t ** result)2142 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result)
2143 {
2144 ASSERT(ptr != NULL);
2145 ASSERT(result != NULL);
2146 ipnetif_refhold(ptr);
2147 *result = ptr;
2148
2149 return (0);
2150 }
2151
2152 void
ipnet_client_close(ipnetif_t * ptr)2153 ipnet_client_close(ipnetif_t *ptr)
2154 {
2155 ASSERT(ptr != NULL);
2156 ipnetif_refrele(ptr);
2157 }
2158
2159 /*
2160 * This is called from BPF when it needs to start receiving packets
2161 * from ipnet.
2162 *
2163 * The use of the ipnet_t structure here is somewhat lightweight when
2164 * compared to how it is used elsewhere but it already has all of the
2165 * right fields in it, so reuse here doesn't seem out of order. Its
2166 * primary purpose here is to provide the means to store pointers for
2167 * use when ipnet_promisc_remove() needs to be called.
2168 *
2169 * This should never be called for the IPNET_MINOR_LO device as it is
2170 * never created via ipnetif_create.
2171 */
2172 /*ARGSUSED*/
2173 int
ipnet_promisc_add(void * handle,uint_t how,void * data,uintptr_t * mhandle,int flags)2174 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle,
2175 int flags)
2176 {
2177 ip_stack_t *ipst;
2178 netstack_t *ns;
2179 ipnetif_t *ifp;
2180 ipnet_t *ipnet;
2181 char name[32];
2182 int error;
2183
2184 ifp = (ipnetif_t *)handle;
2185 ns = netstack_find_by_zoneid(ifp->if_zoneid);
2186
2187 if ((how == DL_PROMISC_PHYS) || (how == DL_PROMISC_MULTI)) {
2188 error = ipnet_join_allmulti(ifp, ns->netstack_ipnet);
2189 if (error != 0)
2190 return (error);
2191 } else {
2192 return (EINVAL);
2193 }
2194
2195 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP);
2196 ipnet->ipnet_if = ifp;
2197 ipnet->ipnet_ns = ns;
2198 ipnet->ipnet_flags = flags;
2199
2200 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) {
2201 ipnet->ipnet_acceptfn = ipnet_loaccept;
2202 } else {
2203 ipnet->ipnet_acceptfn = ipnet_accept;
2204 }
2205
2206 /*
2207 * To register multiple hooks with the same callback function,
2208 * a unique name is needed.
2209 */
2210 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet);
2211 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p",
2212 (void *)ipnet->ipnet_hook);
2213 ipnet->ipnet_hook->h_name = strdup(name);
2214 ipnet->ipnet_data = data;
2215 ipnet->ipnet_zoneid = ifp->if_zoneid;
2216
2217 ipst = ns->netstack_ip;
2218
2219 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2220 ipnet->ipnet_hook);
2221 if (error != 0)
2222 goto regfail;
2223
2224 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2225 ipnet->ipnet_hook);
2226 if (error != 0) {
2227 (void) net_hook_unregister(ipst->ips_ip4_observe_pr,
2228 NH_OBSERVE, ipnet->ipnet_hook);
2229 goto regfail;
2230 }
2231
2232 *mhandle = (uintptr_t)ipnet;
2233
2234 return (0);
2235
2236 regfail:
2237 cmn_err(CE_WARN, "net_hook_register failed: %d", error);
2238 strfree(ipnet->ipnet_hook->h_name);
2239 hook_free(ipnet->ipnet_hook);
2240 ipnet_leave_allmulti(ifp, ns->netstack_ipnet);
2241 netstack_rele(ns);
2242 return (error);
2243 }
2244
2245 void
ipnet_promisc_remove(void * data)2246 ipnet_promisc_remove(void *data)
2247 {
2248 ip_stack_t *ipst;
2249 ipnet_t *ipnet;
2250 hook_t *hook;
2251
2252 ipnet = data;
2253 ipst = ipnet->ipnet_ns->netstack_ip;
2254 hook = ipnet->ipnet_hook;
2255
2256 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2257 hook) == 0);
2258
2259 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2260 hook) == 0);
2261
2262 strfree(hook->h_name);
2263
2264 hook_free(hook);
2265
2266 ipnet_leave_allmulti(ipnet->ipnet_if, ipnet->ipnet_ns->netstack_ipnet);
2267
2268 netstack_rele(ipnet->ipnet_ns);
2269
2270 kmem_free(ipnet, sizeof (*ipnet));
2271 }
2272
2273 /*
2274 * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
2275 * An important field from that structure is "ipnet_data" that
2276 * contains the "data" pointer passed into ipnet_promisc_add: it needs
2277 * to be passed back to bpf when we call into ipnet_itap.
2278 *
2279 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
2280 * from BPF.
2281 */
2282 /*ARGSUSED*/
2283 static int
ipnet_bpf_bounce(hook_event_token_t token,hook_data_t info,void * arg)2284 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg)
2285 {
2286 hook_pkt_observe_t *hdr;
2287 ipnet_addrp_t src;
2288 ipnet_addrp_t dst;
2289 ipnet_stack_t *ips;
2290 ipnet_t *ipnet;
2291 mblk_t *netmp;
2292 mblk_t *mp;
2293
2294 hdr = (hook_pkt_observe_t *)info;
2295 mp = hdr->hpo_pkt;
2296 ipnet = (ipnet_t *)arg;
2297 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
2298
2299 netmp = hdr->hpo_pkt->b_cont;
2300 src.iap_family = hdr->hpo_family;
2301 dst.iap_family = hdr->hpo_family;
2302
2303 if (hdr->hpo_family == AF_INET) {
2304 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
2305 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
2306 } else {
2307 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
2308 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
2309 }
2310
2311 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
2312 IPSK_BUMP(ips, ik_acceptFail);
2313 return (0);
2314 }
2315 IPSK_BUMP(ips, ik_acceptOk);
2316
2317 ipnet_itap(ipnet->ipnet_data, mp,
2318 hdr->hpo_htype == htons(IPOBS_HOOK_OUTBOUND),
2319 ntohl(hdr->hpo_pktlen) + MBLKL(mp));
2320
2321 return (0);
2322 }
2323
2324 /*
2325 * clone'd ipnetif_t's are created when a shared IP instance zone comes
2326 * to life and configures an IP address. The model that BPF uses is that
2327 * each interface must have a unique pointer and each interface must be
2328 * representative of what it can capture. They are limited to one DLT
2329 * per interface and one zone per interface. Thus every interface that
2330 * can be seen in a zone must be announced via an attach to bpf. For
2331 * shared instance zones, this means the ipnet driver needs to detect
2332 * when an address is added to an interface in a zone for the first
2333 * time (and also when the last address is removed.)
2334 */
2335 static ipnetif_t *
ipnetif_clone_create(ipnetif_t * ifp,zoneid_t zoneid)2336 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid)
2337 {
2338 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name };
2339 ipnet_stack_t *ips = ifp->if_stackp;
2340 avl_index_t where = 0;
2341 ipnetif_t *newif;
2342
2343 mutex_enter(&ips->ips_avl_lock);
2344 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where);
2345 if (newif != NULL) {
2346 ipnetif_refhold(newif);
2347 newif->if_sharecnt++;
2348 mutex_exit(&ips->ips_avl_lock);
2349 return (newif);
2350 }
2351
2352 newif = ipnet_alloc_if(ips);
2353 if (newif == NULL) {
2354 mutex_exit(&ips->ips_avl_lock);
2355 return (NULL);
2356 }
2357
2358 newif->if_refcnt = 1;
2359 newif->if_sharecnt = 1;
2360 newif->if_zoneid = zoneid;
2361 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ);
2362 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK;
2363 newif->if_index = ifp->if_index;
2364
2365 avl_insert(&ips->ips_avl_by_shared, newif, where);
2366 mutex_exit(&ips->ips_avl_lock);
2367
2368 return (newif);
2369 }
2370
2371 static void
ipnetif_clone_release(ipnetif_t * ipnetif)2372 ipnetif_clone_release(ipnetif_t *ipnetif)
2373 {
2374 boolean_t dofree = B_FALSE;
2375 boolean_t doremove = B_FALSE;
2376 ipnet_stack_t *ips = ipnetif->if_stackp;
2377
2378 mutex_enter(&ipnetif->if_reflock);
2379 ASSERT(ipnetif->if_refcnt > 0);
2380 if (--ipnetif->if_refcnt == 0)
2381 dofree = B_TRUE;
2382 ASSERT(ipnetif->if_sharecnt > 0);
2383 if (--ipnetif->if_sharecnt == 0)
2384 doremove = B_TRUE;
2385 mutex_exit(&ipnetif->if_reflock);
2386 if (doremove) {
2387 mutex_enter(&ips->ips_avl_lock);
2388 avl_remove(&ips->ips_avl_by_shared, ipnetif);
2389 mutex_exit(&ips->ips_avl_lock);
2390 }
2391 if (dofree) {
2392 ASSERT(ipnetif->if_sharecnt == 0);
2393 ipnetif_free(ipnetif);
2394 }
2395 }
2396