xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_netinfo.c (revision 8380b3cc879a715dff53a0564cd5b1c4bf9ade62)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/stream.h>
30 #include <sys/strsubr.h>
31 #include <sys/pattr.h>
32 #include <sys/dlpi.h>
33 #include <sys/atomic.h>
34 #include <sys/sunddi.h>
35 #include <sys/socket.h>
36 #include <sys/neti.h>
37 #include <sys/sdt.h>
38 #include <sys/cmn_err.h>
39 
40 #include <netinet/in.h>
41 #include <inet/common.h>
42 #include <inet/mib2.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/ip_if.h>
46 #include <inet/ip_ire.h>
47 #include <inet/ip_impl.h>
48 #include <inet/ip_ndp.h>
49 #include <inet/ipclassifier.h>
50 #include <inet/ipp_common.h>
51 #include <inet/ip_ftable.h>
52 
53 /*
54  * IPv4 netinfo entry point declarations.
55  */
56 static int 		ip_getifname(net_handle_t, phy_if_t, char *,
57 			    const size_t);
58 static int 		ip_getmtu(net_handle_t, phy_if_t, lif_if_t);
59 static int 		ip_getpmtuenabled(net_handle_t);
60 static int 		ip_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
61 			    size_t, net_ifaddr_t [], void *);
62 static phy_if_t		ip_phygetnext(net_handle_t, phy_if_t);
63 static phy_if_t 	ip_phylookup(net_handle_t, const char *);
64 static lif_if_t 	ip_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
65 static int 		ip_inject(net_handle_t, inject_t, net_inject_t *);
66 static phy_if_t 	ip_routeto(net_handle_t, struct sockaddr *,
67 			    struct sockaddr *);
68 static int 		ip_ispartialchecksum(net_handle_t, mblk_t *);
69 static int 		ip_isvalidchecksum(net_handle_t, mblk_t *);
70 
71 static int 		ipv6_getifname(net_handle_t, phy_if_t, char *,
72 			    const size_t);
73 static int 		ipv6_getmtu(net_handle_t, phy_if_t, lif_if_t);
74 static int 		ipv6_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
75 			    size_t, net_ifaddr_t [], void *);
76 static phy_if_t 	ipv6_phygetnext(net_handle_t, phy_if_t);
77 static phy_if_t 	ipv6_phylookup(net_handle_t, const char *);
78 static lif_if_t 	ipv6_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
79 static int 		ipv6_inject(net_handle_t, inject_t, net_inject_t *);
80 static phy_if_t 	ipv6_routeto(net_handle_t, struct sockaddr *,
81 			    struct sockaddr *);
82 static int 		ipv6_isvalidchecksum(net_handle_t, mblk_t *);
83 
84 /* Netinfo private functions */
85 static	int		ip_getifname_impl(phy_if_t, char *,
86 			    const size_t, boolean_t, ip_stack_t *);
87 static	int		ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t,
88 			    ip_stack_t *);
89 static	phy_if_t	ip_phylookup_impl(const char *, boolean_t,
90 			    ip_stack_t *);
91 static	lif_if_t	ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t,
92 			    ip_stack_t *);
93 static	int		ip_inject_impl(inject_t, net_inject_t *, boolean_t,
94 			    ip_stack_t *);
95 static	int		ip_getifaddr_type(sa_family_t, ipif_t *, lif_if_t,
96 			    void *);
97 static	phy_if_t	ip_routeto_impl(struct sockaddr *, struct sockaddr *,
98 			    ip_stack_t *);
99 static	int		ip_getlifaddr_impl(sa_family_t, phy_if_t, lif_if_t,
100 			    size_t, net_ifaddr_t [], struct sockaddr *,
101 			    ip_stack_t *);
102 static	void		ip_ni_queue_in_func(void *);
103 static	void		ip_ni_queue_out_func(void *);
104 static	void		ip_ni_queue_func_impl(injection_t *,  boolean_t);
105 
106 
107 static net_protocol_t ipv4info = {
108 	NETINFO_VERSION,
109 	NHF_INET,
110 	ip_getifname,
111 	ip_getmtu,
112 	ip_getpmtuenabled,
113 	ip_getlifaddr,
114 	ip_phygetnext,
115 	ip_phylookup,
116 	ip_lifgetnext,
117 	ip_inject,
118 	ip_routeto,
119 	ip_ispartialchecksum,
120 	ip_isvalidchecksum
121 };
122 
123 
124 static net_protocol_t ipv6info = {
125 	NETINFO_VERSION,
126 	NHF_INET6,
127 	ipv6_getifname,
128 	ipv6_getmtu,
129 	ip_getpmtuenabled,
130 	ipv6_getlifaddr,
131 	ipv6_phygetnext,
132 	ipv6_phylookup,
133 	ipv6_lifgetnext,
134 	ipv6_inject,
135 	ipv6_routeto,
136 	ip_ispartialchecksum,
137 	ipv6_isvalidchecksum
138 };
139 
140 /*
141  * The taskq eventq_queue_in is used to process the upside inject messages.
142  * The taskq eventq_queue_out is used to process the downside inject messages.
143  * The taskq eventq_queue_nic is used to process the nic event messages.
144  */
145 static ddi_taskq_t 	*eventq_queue_in = NULL;
146 static ddi_taskq_t 	*eventq_queue_out = NULL;
147 ddi_taskq_t 	*eventq_queue_nic = NULL;
148 
149 /*
150  * Initialize queues for inject.
151  */
152 void
153 ip_net_g_init()
154 {
155 	if (eventq_queue_out == NULL) {
156 		eventq_queue_out = ddi_taskq_create(NULL,
157 		    "IP_INJECT_QUEUE_OUT", 1, TASKQ_DEFAULTPRI, 0);
158 
159 		if (eventq_queue_out == NULL)
160 			cmn_err(CE_NOTE, "ipv4_net_init: "
161 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_OUT");
162 	}
163 
164 	if (eventq_queue_in == NULL) {
165 		eventq_queue_in = ddi_taskq_create(NULL,
166 		    "IP_INJECT_QUEUE_IN", 1, TASKQ_DEFAULTPRI, 0);
167 
168 		if (eventq_queue_in == NULL)
169 			cmn_err(CE_NOTE, "ipv4_net_init: "
170 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_IN");
171 	}
172 
173 	if (eventq_queue_nic == NULL) {
174 		eventq_queue_nic = ddi_taskq_create(NULL,
175 		    "IP_NIC_EVENT_QUEUE", 1, TASKQ_DEFAULTPRI, 0);
176 
177 		if (eventq_queue_nic == NULL)
178 			cmn_err(CE_NOTE, "ipv4_net_init: "
179 			    "ddi_taskq_create failed for IP_NIC_EVENT_QUEUE");
180 	}
181 }
182 
183 /*
184  * Destroy inject queues
185  */
186 void
187 ip_net_g_destroy()
188 {
189 	if (eventq_queue_nic != NULL) {
190 		ddi_taskq_destroy(eventq_queue_nic);
191 		eventq_queue_nic = NULL;
192 	}
193 
194 	if (eventq_queue_in != NULL) {
195 		ddi_taskq_destroy(eventq_queue_in);
196 		eventq_queue_in = NULL;
197 	}
198 
199 	if (eventq_queue_out != NULL) {
200 		ddi_taskq_destroy(eventq_queue_out);
201 		eventq_queue_out = NULL;
202 	}
203 }
204 
205 /*
206  * Register IPv4 and IPv6 netinfo functions and initialize queues for inject.
207  */
208 void
209 ip_net_init(ip_stack_t *ipst, netstack_t *ns)
210 {
211 	netid_t id;
212 
213 	id = net_getnetidbynetstackid(ns->netstack_stackid);
214 	ASSERT(id != -1);
215 
216 	ipst->ips_ipv4_net_data = net_protocol_register(id, &ipv4info);
217 	ASSERT(ipst->ips_ipv4_net_data != NULL);
218 
219 	ipst->ips_ipv6_net_data = net_protocol_register(id, &ipv6info);
220 	ASSERT(ipst->ips_ipv6_net_data != NULL);
221 }
222 
223 
224 /*
225  * Unregister IPv4 and IPv6 functions and inject queues
226  */
227 void
228 ip_net_destroy(ip_stack_t *ipst)
229 {
230 	if (ipst->ips_ipv4_net_data != NULL) {
231 		if (net_protocol_unregister(ipst->ips_ipv4_net_data) == 0)
232 			ipst->ips_ipv4_net_data = NULL;
233 	}
234 
235 	if (ipst->ips_ipv6_net_data != NULL) {
236 		if (net_protocol_unregister(ipst->ips_ipv6_net_data) == 0)
237 			ipst->ips_ipv6_net_data = NULL;
238 	}
239 }
240 
241 /*
242  * Initialize IPv4 hooks family the event
243  */
244 void
245 ipv4_hook_init(ip_stack_t *ipst)
246 {
247 	HOOK_FAMILY_INIT(&ipst->ips_ipv4root, Hn_IPV4);
248 	if (net_family_register(ipst->ips_ipv4_net_data, &ipst->ips_ipv4root)
249 	    != 0) {
250 		cmn_err(CE_NOTE, "ipv4_hook_init: "
251 		    "net_family_register failed for ipv4");
252 	}
253 
254 	HOOK_EVENT_INIT(&ipst->ips_ip4_physical_in_event, NH_PHYSICAL_IN);
255 	ipst->ips_ipv4firewall_physical_in = net_event_register(
256 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_in_event);
257 	if (ipst->ips_ipv4firewall_physical_in == NULL) {
258 		cmn_err(CE_NOTE, "ipv4_hook_init: "
259 		    "net_event_register failed for ipv4/physical_in");
260 	}
261 
262 	HOOK_EVENT_INIT(&ipst->ips_ip4_physical_out_event, NH_PHYSICAL_OUT);
263 	ipst->ips_ipv4firewall_physical_out = net_event_register(
264 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_out_event);
265 	if (ipst->ips_ipv4firewall_physical_out == NULL) {
266 		cmn_err(CE_NOTE, "ipv4_hook_init: "
267 		    "net_event_register failed for ipv4/physical_out");
268 	}
269 
270 	HOOK_EVENT_INIT(&ipst->ips_ip4_forwarding_event, NH_FORWARDING);
271 	ipst->ips_ipv4firewall_forwarding = net_event_register(
272 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_forwarding_event);
273 	if (ipst->ips_ipv4firewall_forwarding == NULL) {
274 		cmn_err(CE_NOTE, "ipv4_hook_init: "
275 		    "net_event_register failed for ipv4/forwarding");
276 	}
277 
278 	HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_in_event, NH_LOOPBACK_IN);
279 	ipst->ips_ipv4firewall_loopback_in = net_event_register(
280 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_in_event);
281 	if (ipst->ips_ipv4firewall_loopback_in == NULL) {
282 		cmn_err(CE_NOTE, "ipv4_hook_init: "
283 		    "net_event_register failed for ipv4/loopback_in");
284 	}
285 
286 	HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_out_event, NH_LOOPBACK_OUT);
287 	ipst->ips_ipv4firewall_loopback_out = net_event_register(
288 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_out_event);
289 	if (ipst->ips_ipv4firewall_loopback_out == NULL) {
290 		cmn_err(CE_NOTE, "ipv4_hook_init: "
291 		    "net_event_register failed for ipv4/loopback_out");
292 	}
293 
294 	HOOK_EVENT_INIT(&ipst->ips_ip4_nic_events, NH_NIC_EVENTS);
295 	ipst->ips_ip4_nic_events.he_flags = HOOK_RDONLY;
296 	ipst->ips_ipv4nicevents = net_event_register(
297 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_nic_events);
298 	if (ipst->ips_ipv4nicevents == NULL) {
299 		cmn_err(CE_NOTE, "ipv4_hook_init: "
300 		    "net_event_register failed for ipv4/nic_events");
301 	}
302 }
303 
304 void
305 ipv4_hook_destroy(ip_stack_t *ipst)
306 {
307 	if (ipst->ips_ipv4firewall_forwarding != NULL) {
308 		if (net_event_unregister(ipst->ips_ipv4_net_data,
309 		    &ipst->ips_ip4_forwarding_event) == 0)
310 			ipst->ips_ipv4firewall_forwarding = NULL;
311 	}
312 
313 	if (ipst->ips_ipv4firewall_physical_in != NULL) {
314 		if (net_event_unregister(ipst->ips_ipv4_net_data,
315 		    &ipst->ips_ip4_physical_in_event) == 0)
316 			ipst->ips_ipv4firewall_physical_in = NULL;
317 	}
318 
319 	if (ipst->ips_ipv4firewall_physical_out != NULL) {
320 		if (net_event_unregister(ipst->ips_ipv4_net_data,
321 		    &ipst->ips_ip4_physical_out_event) == 0)
322 			ipst->ips_ipv4firewall_physical_out = NULL;
323 	}
324 
325 	if (ipst->ips_ipv4firewall_loopback_in != NULL) {
326 		if (net_event_unregister(ipst->ips_ipv4_net_data,
327 		    &ipst->ips_ip4_loopback_in_event) == 0)
328 			ipst->ips_ipv4firewall_loopback_in = NULL;
329 	}
330 
331 	if (ipst->ips_ipv4firewall_loopback_out != NULL) {
332 		if (net_event_unregister(ipst->ips_ipv4_net_data,
333 		    &ipst->ips_ip4_loopback_out_event) == 0)
334 			ipst->ips_ipv4firewall_loopback_out = NULL;
335 	}
336 
337 	if (ipst->ips_ipv4nicevents != NULL) {
338 		if (net_event_unregister(ipst->ips_ipv4_net_data,
339 		    &ipst->ips_ip4_nic_events) == 0)
340 			ipst->ips_ipv4nicevents = NULL;
341 	}
342 
343 	(void) net_family_unregister(ipst->ips_ipv4_net_data,
344 	    &ipst->ips_ipv4root);
345 }
346 
347 /*
348  * Initialize IPv6 hooks family and event
349  */
350 void
351 ipv6_hook_init(ip_stack_t *ipst)
352 {
353 
354 	HOOK_FAMILY_INIT(&ipst->ips_ipv6root, Hn_IPV6);
355 	if (net_family_register(ipst->ips_ipv6_net_data, &ipst->ips_ipv6root)
356 	    != 0) {
357 		cmn_err(CE_NOTE, "ipv6_hook_init: "
358 		    "net_family_register failed for ipv6");
359 	}
360 
361 	HOOK_EVENT_INIT(&ipst->ips_ip6_physical_in_event, NH_PHYSICAL_IN);
362 	ipst->ips_ipv6firewall_physical_in = net_event_register(
363 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_in_event);
364 	if (ipst->ips_ipv6firewall_physical_in == NULL) {
365 		cmn_err(CE_NOTE, "ipv6_hook_init: "
366 		    "net_event_register failed for ipv6/physical_in");
367 	}
368 
369 	HOOK_EVENT_INIT(&ipst->ips_ip6_physical_out_event, NH_PHYSICAL_OUT);
370 	ipst->ips_ipv6firewall_physical_out = net_event_register(
371 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_out_event);
372 	if (ipst->ips_ipv6firewall_physical_out == NULL) {
373 		cmn_err(CE_NOTE, "ipv6_hook_init: "
374 		    "net_event_register failed for ipv6/physical_out");
375 	}
376 
377 	HOOK_EVENT_INIT(&ipst->ips_ip6_forwarding_event, NH_FORWARDING);
378 	ipst->ips_ipv6firewall_forwarding = net_event_register(
379 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_forwarding_event);
380 	if (ipst->ips_ipv6firewall_forwarding == NULL) {
381 		cmn_err(CE_NOTE, "ipv6_hook_init: "
382 		    "net_event_register failed for ipv6/forwarding");
383 	}
384 
385 	HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_in_event, NH_LOOPBACK_IN);
386 	ipst->ips_ipv6firewall_loopback_in = net_event_register(
387 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_in_event);
388 	if (ipst->ips_ipv6firewall_loopback_in == NULL) {
389 		cmn_err(CE_NOTE, "ipv6_hook_init: "
390 		    "net_event_register failed for ipv6/loopback_in");
391 	}
392 
393 	HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_out_event, NH_LOOPBACK_OUT);
394 	ipst->ips_ipv6firewall_loopback_out = net_event_register(
395 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_out_event);
396 	if (ipst->ips_ipv6firewall_loopback_out == NULL) {
397 		cmn_err(CE_NOTE, "ipv6_hook_init: "
398 		    "net_event_register failed for ipv6/loopback_out");
399 	}
400 
401 	HOOK_EVENT_INIT(&ipst->ips_ip6_nic_events, NH_NIC_EVENTS);
402 	ipst->ips_ip6_nic_events.he_flags = HOOK_RDONLY;
403 	ipst->ips_ipv6nicevents = net_event_register(
404 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_nic_events);
405 	if (ipst->ips_ipv6nicevents == NULL) {
406 		cmn_err(CE_NOTE, "ipv6_hook_init: "
407 		    "net_event_register failed for ipv6/nic_events");
408 	}
409 }
410 
411 void
412 ipv6_hook_destroy(ip_stack_t *ipst)
413 {
414 	if (ipst->ips_ipv6firewall_forwarding != NULL) {
415 		if (net_event_unregister(ipst->ips_ipv6_net_data,
416 		    &ipst->ips_ip6_forwarding_event) == 0)
417 			ipst->ips_ipv6firewall_forwarding = NULL;
418 	}
419 
420 	if (ipst->ips_ipv6firewall_physical_in != NULL) {
421 		if (net_event_unregister(ipst->ips_ipv6_net_data,
422 		    &ipst->ips_ip6_physical_in_event) == 0)
423 			ipst->ips_ipv6firewall_physical_in = NULL;
424 	}
425 
426 	if (ipst->ips_ipv6firewall_physical_out != NULL) {
427 		if (net_event_unregister(ipst->ips_ipv6_net_data,
428 		    &ipst->ips_ip6_physical_out_event) == 0)
429 			ipst->ips_ipv6firewall_physical_out = NULL;
430 	}
431 
432 	if (ipst->ips_ipv6firewall_loopback_in != NULL) {
433 		if (net_event_unregister(ipst->ips_ipv6_net_data,
434 		    &ipst->ips_ip6_loopback_in_event) == 0)
435 			ipst->ips_ipv6firewall_loopback_in = NULL;
436 	}
437 
438 	if (ipst->ips_ipv6firewall_loopback_out != NULL) {
439 		if (net_event_unregister(ipst->ips_ipv6_net_data,
440 		    &ipst->ips_ip6_loopback_out_event) == 0)
441 			ipst->ips_ipv6firewall_loopback_out = NULL;
442 	}
443 
444 	if (ipst->ips_ipv6nicevents != NULL) {
445 		if (net_event_unregister(ipst->ips_ipv6_net_data,
446 		    &ipst->ips_ip6_nic_events) == 0)
447 			ipst->ips_ipv6nicevents = NULL;
448 	}
449 
450 	(void) net_family_unregister(ipst->ips_ipv6_net_data,
451 	    &ipst->ips_ipv6root);
452 }
453 
454 /*
455  * Determine the name of an IPv4 interface
456  */
457 static int
458 ip_getifname(net_handle_t neti, phy_if_t phy_ifdata, char *buffer,
459     const size_t buflen)
460 {
461 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE,
462 	    neti->netd_stack->nts_netstack->netstack_ip));
463 }
464 
465 /*
466  * Determine the name of an IPv6 interface
467  */
468 static int
469 ipv6_getifname(net_handle_t neti, phy_if_t phy_ifdata, char *buffer,
470     const size_t buflen)
471 {
472 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE,
473 	    neti->netd_stack->nts_netstack->netstack_ip));
474 }
475 
476 /*
477  * Shared implementation to determine the name of a given network interface
478  */
479 /* ARGSUSED */
480 static int
481 ip_getifname_impl(phy_if_t phy_ifdata,
482     char *buffer, const size_t buflen, boolean_t isv6, ip_stack_t *ipst)
483 {
484 	ill_t *ill;
485 	char *name;
486 
487 	ASSERT(buffer != NULL);
488 
489 	ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL,
490 	    NULL, NULL, ipst);
491 	if (ill != NULL) {
492 		name = ill->ill_name;
493 	} else {
494 		/* Fallback to group names only if hook_emulation is set */
495 		if (ipst->ips_ipmp_hook_emulation) {
496 			ill = ill_group_lookup_on_ifindex((uint_t)phy_ifdata,
497 			    isv6, ipst);
498 		}
499 		if (ill == NULL)
500 			return (1);
501 		name = ill->ill_phyint->phyint_groupname;
502 	}
503 	if (name != NULL) {
504 		(void) strlcpy(buffer, name, buflen);
505 		ill_refrele(ill);
506 		return (0);
507 	} else {
508 		ill_refrele(ill);
509 		return (1);
510 	}
511 
512 }
513 
514 /*
515  * Determine the MTU of an IPv4 network interface
516  */
517 static int
518 ip_getmtu(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
519 {
520 	netstack_t *ns;
521 
522 	ns = neti->netd_stack->nts_netstack;
523 	ASSERT(ns != NULL);
524 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE, ns->netstack_ip));
525 }
526 
527 /*
528  * Determine the MTU of an IPv6 network interface
529  */
530 static int
531 ipv6_getmtu(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
532 {
533 	netstack_t *ns;
534 
535 	ns = neti->netd_stack->nts_netstack;
536 	ASSERT(ns != NULL);
537 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE, ns->netstack_ip));
538 }
539 
540 /*
541  * Shared implementation to determine the MTU of a network interface
542  *
543  * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
544  * But IP Filter only uses a zero ifdata.
545  */
546 /* ARGSUSED */
547 static int
548 ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
549     ip_stack_t *ipst)
550 {
551 	lif_if_t ipifid;
552 	ipif_t *ipif;
553 	int mtu;
554 
555 	ipifid = UNMAP_IPIF_ID(ifdata);
556 
557 	ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid,
558 	    isv6, ipst);
559 	if (ipif == NULL)
560 		return (0);
561 
562 	mtu = ipif->ipif_mtu;
563 	ipif_refrele(ipif);
564 
565 	if (mtu == 0) {
566 		ill_t *ill;
567 
568 		if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6,
569 		    NULL, NULL, NULL, NULL, ipst)) == NULL) {
570 			/*
571 			 * Fallback to group names only if hook_emulation
572 			 * is set
573 			 */
574 			if (ipst->ips_ipmp_hook_emulation) {
575 				ill = ill_group_lookup_on_ifindex(
576 				    (uint_t)phy_ifdata, isv6, ipst);
577 			}
578 			if (ill == NULL)
579 				return (0);
580 		}
581 		mtu = ill->ill_max_frag;
582 		ill_refrele(ill);
583 	}
584 
585 	return (mtu);
586 }
587 
588 /*
589  * Determine if path MTU discovery is enabled for IP
590  */
591 static int
592 ip_getpmtuenabled(net_handle_t neti)
593 {
594 	netstack_t *ns;
595 
596 	ns = neti->netd_stack->nts_netstack;
597 	ASSERT(ns != NULL);
598 	return (ns->netstack_ip->ips_ip_path_mtu_discovery);
599 }
600 
601 /*
602  * Get next interface from the current list of IPv4 physical network interfaces
603  *
604  * Note: this does not handle the case when ipmp_hook_emulation is set.
605  * But IP Filter does not use this function.
606  */
607 static phy_if_t
608 ip_phygetnext(net_handle_t neti, phy_if_t phy_ifdata)
609 {
610 	netstack_t *ns;
611 
612 	ns = neti->netd_stack->nts_netstack;
613 	ASSERT(ns != NULL);
614 	return (ill_get_next_ifindex(phy_ifdata, B_FALSE, ns->netstack_ip));
615 }
616 
617 /*
618  * Get next interface from the current list of IPv6 physical network interfaces
619  */
620 static phy_if_t
621 ipv6_phygetnext(net_handle_t neti, phy_if_t phy_ifdata)
622 {
623 	netstack_t *ns;
624 
625 	ns = neti->netd_stack->nts_netstack;
626 	ASSERT(ns != NULL);
627 	return (ill_get_next_ifindex(phy_ifdata, B_TRUE, ns->netstack_ip));
628 }
629 
630 /*
631  * Determine if a network interface name exists for IPv4
632  */
633 static phy_if_t
634 ip_phylookup(net_handle_t neti, const char *name)
635 {
636 	netstack_t *ns;
637 
638 	ns = neti->netd_stack->nts_netstack;
639 	ASSERT(ns != NULL);
640 	return (ip_phylookup_impl(name, B_FALSE, ns->netstack_ip));
641 }
642 
643 /*
644  * Determine if a network interface name exists for IPv6
645  */
646 static phy_if_t
647 ipv6_phylookup(net_handle_t neti, const char *name)
648 {
649 	netstack_t *ns;
650 
651 	ns = neti->netd_stack->nts_netstack;
652 	ASSERT(ns != NULL);
653 	return (ip_phylookup_impl(name, B_TRUE, ns->netstack_ip));
654 }
655 
656 /*
657  * Implement looking up an ill_t based on the name supplied and matching
658  * it up with either IPv4 or IPv6.  ill_get_ifindex_by_name() is not used
659  * because it does not match on the address family in addition to the name.
660  */
661 static phy_if_t
662 ip_phylookup_impl(const char *name, boolean_t isv6, ip_stack_t *ipst)
663 {
664 	phy_if_t phy;
665 	ill_t *ill;
666 
667 	ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL,
668 	    NULL, NULL, NULL, ipst);
669 
670 	/* Fallback to group names only if hook_emulation is set */
671 	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
672 		ill = ill_group_lookup_on_name((char *)name, isv6, ipst);
673 	}
674 	if (ill == NULL)
675 		return (0);
676 
677 	phy = ill->ill_phyint->phyint_hook_ifindex;
678 
679 	ill_refrele(ill);
680 
681 	return (phy);
682 }
683 
684 /*
685  * Get next interface from the current list of IPv4 logical network interfaces
686  */
687 static lif_if_t
688 ip_lifgetnext(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
689 {
690 	netstack_t *ns;
691 
692 	ns = neti->netd_stack->nts_netstack;
693 	ASSERT(ns != NULL);
694 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE,
695 	    ns->netstack_ip));
696 }
697 
698 /*
699  * Get next interface from the current list of IPv6 logical network interfaces
700  */
701 static lif_if_t
702 ipv6_lifgetnext(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
703 {
704 	netstack_t *ns;
705 
706 	ns = neti->netd_stack->nts_netstack;
707 	ASSERT(ns != NULL);
708 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE,
709 	    ns->netstack_ip));
710 }
711 
712 /*
713  * Shared implementation to get next interface from the current list of
714  * logical network interfaces
715  *
716  * Note: this does not handle the case when ipmp_hook_emulation is set.
717  * But IP Filter does not use this function.
718  */
719 static lif_if_t
720 ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
721     ip_stack_t *ipst)
722 {
723 	lif_if_t newidx, oldidx;
724 	boolean_t nextok;
725 	ipif_t *ipif;
726 	ill_t *ill;
727 
728 	ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL,
729 	    NULL, NULL, ipst);
730 	if (ill == NULL)
731 		return (0);
732 
733 	if (ifdata != 0) {
734 		oldidx = UNMAP_IPIF_ID(ifdata);
735 		nextok = B_FALSE;
736 	} else {
737 		oldidx = 0;
738 		nextok = B_TRUE;
739 	}
740 
741 	mutex_enter(&ill->ill_lock);
742 	if (ill->ill_state_flags & ILL_CONDEMNED) {
743 		mutex_exit(&ill->ill_lock);
744 		ill_refrele(ill);
745 		return (0);
746 	}
747 
748 	/*
749 	 * It's safe to iterate the ill_ipif list when holding an ill_lock.
750 	 * And it's also safe to access ipif_id without ipif refhold.
751 	 * See ipif_get_id().
752 	 */
753 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
754 		if (!IPIF_CAN_LOOKUP(ipif))
755 			continue;
756 		if (nextok) {
757 			ipif_refhold_locked(ipif);
758 			break;
759 		} else if (oldidx == ipif->ipif_id) {
760 			nextok = B_TRUE;
761 		}
762 	}
763 
764 	mutex_exit(&ill->ill_lock);
765 	ill_refrele(ill);
766 
767 	if (ipif == NULL)
768 		return (0);
769 
770 	newidx = ipif->ipif_id;
771 	ipif_refrele(ipif);
772 
773 	return (MAP_IPIF_ID(newidx));
774 }
775 
776 /*
777  * Inject an IPv4 packet to or from an interface
778  */
779 static int
780 ip_inject(net_handle_t neti, inject_t style, net_inject_t *packet)
781 {
782 	netstack_t *ns;
783 
784 	ns = neti->netd_stack->nts_netstack;
785 	ASSERT(ns != NULL);
786 	return (ip_inject_impl(style, packet, B_FALSE, ns->netstack_ip));
787 }
788 
789 
790 /*
791  * Inject an IPv6 packet to or from an interface
792  */
793 static int
794 ipv6_inject(net_handle_t neti, inject_t style, net_inject_t *packet)
795 {
796 	netstack_t *ns;
797 
798 	ns = neti->netd_stack->nts_netstack;
799 	return (ip_inject_impl(style, packet, B_TRUE, ns->netstack_ip));
800 }
801 
802 /*
803  * Shared implementation to inject a packet to or from an interface
804  * Return value:
805  *   0: successful
806  *  -1: memory allocation failed
807  *   1: other errors
808  */
809 static int
810 ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6,
811     ip_stack_t *ipst)
812 {
813 	struct sockaddr_in6 *sin6;
814 	ddi_taskq_t *tq = NULL;
815 	void (* func)(void *);
816 	injection_t *inject;
817 	ip6_t *ip6h;
818 	ire_t *ire;
819 	mblk_t *mp;
820 
821 	ASSERT(packet != NULL);
822 	ASSERT(packet->ni_packet != NULL);
823 	ASSERT(packet->ni_packet->b_datap->db_type == M_DATA);
824 
825 	switch (style) {
826 	case NI_QUEUE_IN:
827 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
828 		if (inject == NULL)
829 			return (-1);
830 		inject->inj_data = *packet;
831 		inject->inj_isv6 = isv6;
832 		/*
833 		 * deliver up into the kernel, immitating its reception by a
834 		 * network interface, add to list and schedule timeout
835 		 */
836 		func = ip_ni_queue_in_func;
837 		tq = eventq_queue_in;
838 		break;
839 
840 	case NI_QUEUE_OUT:
841 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
842 		if (inject == NULL)
843 			return (-1);
844 		inject->inj_data = *packet;
845 		inject->inj_isv6 = isv6;
846 		/*
847 		 * deliver out of the kernel, as if it were being sent via a
848 		 * raw socket so that IPFilter will see it again, add to list
849 		 * and schedule timeout
850 		 */
851 		func = ip_ni_queue_out_func;
852 		tq = eventq_queue_out;
853 		break;
854 
855 	case NI_DIRECT_OUT:
856 		/*
857 		 * Note:
858 		 * For IPv4, the code path below will be greatly simplified
859 		 * with the delivery of surya - it will become a single
860 		 * function call to X.  A follow on project is aimed to
861 		 * provide similar functionality for IPv6.
862 		 */
863 		mp = packet->ni_packet;
864 
865 		if (!isv6) {
866 			struct sockaddr *sock;
867 
868 			sock = (struct sockaddr *)&packet->ni_addr;
869 			/*
870 			 * ipfil_sendpkt was provided by surya to ease the
871 			 * problems associated with sending out a packet.
872 			 * Currently this function only supports IPv4.
873 			 */
874 			switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
875 			    netstackid_to_zoneid(
876 			    ipst->ips_netstack->netstack_stackid))) {
877 			case 0 :
878 			case EINPROGRESS:
879 				return (0);
880 			case ECOMM :
881 			case ENONET :
882 				return (1);
883 			default :
884 				return (1);
885 			}
886 			/* NOTREACHED */
887 
888 		}
889 
890 		ip6h = (ip6_t *)mp->b_rptr;
891 		sin6 = (struct sockaddr_in6 *)&packet->ni_addr;
892 		ASSERT(sin6->sin6_family == AF_INET6);
893 
894 		ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0,
895 		    NULL, NULL, ALL_ZONES, NULL,
896 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
897 		    ipst);
898 
899 		if (ire == NULL) {
900 			ip2dbg(("ip_inject: ire_cache_lookup failed\n"));
901 			freemsg(mp);
902 			return (1);
903 		}
904 
905 		if (ire->ire_stq == NULL) {
906 			/* Send to loopback destination. */
907 			if (ire->ire_rfq == NULL) {
908 				ip2dbg(("ip_inject: bad nexthop\n"));
909 				ire_refrele(ire);
910 				freemsg(mp);
911 				return (1);
912 			}
913 			DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
914 			    void_ip_t *, ip6h, __dtrace_ipsr_ill_t *,
915 			    ire->ire_ipif->ipif_ill, ipha_t *, NULL, ip6_t *,
916 			    ip6h, int, 1);
917 			ip_wput_local_v6(ire->ire_rfq,
918 			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0);
919 			ire_refrele(ire);
920 			return (0);
921 		}
922 
923 		mp->b_queue = ire->ire_stq;
924 
925 		if (ire->ire_nce == NULL ||
926 		    ire->ire_nce->nce_fp_mp == NULL &&
927 		    ire->ire_nce->nce_res_mp == NULL) {
928 			ip_newroute_v6(ire->ire_stq, mp,
929 			    &sin6->sin6_addr, NULL, NULL, ALL_ZONES, ipst);
930 
931 			ire_refrele(ire);
932 			return (0);
933 		} else {
934 			/* prepend L2 header for IPv6 packets. */
935 			mblk_t *llmp;
936 
937 			/*
938 			 * Lock IREs, see 6420438
939 			 */
940 			mutex_enter(&ire->ire_lock);
941 			llmp = ire->ire_nce->nce_fp_mp ?
942 			    ire->ire_nce->nce_fp_mp :
943 			    ire->ire_nce->nce_res_mp;
944 
945 			if ((mp = dupb(llmp)) == NULL &&
946 			    (mp = copyb(llmp)) == NULL) {
947 				ip2dbg(("ip_inject: llhdr failed\n"));
948 				mutex_exit(&ire->ire_lock);
949 				ire_refrele(ire);
950 				freemsg(mp);
951 				return (1);
952 			}
953 			mutex_exit(&ire->ire_lock);
954 			linkb(mp, packet->ni_packet);
955 		}
956 
957 		mp->b_queue = ire->ire_stq;
958 
959 		break;
960 	default:
961 		freemsg(packet->ni_packet);
962 		return (1);
963 	}
964 
965 	if (tq) {
966 		inject->inj_ptr = ipst;
967 		if (ddi_taskq_dispatch(tq, func, (void *)inject,
968 		    DDI_SLEEP) == DDI_FAILURE) {
969 			ip2dbg(("ip_inject:  ddi_taskq_dispatch failed\n"));
970 			freemsg(packet->ni_packet);
971 			return (1);
972 		}
973 	} else {
974 		putnext(ire->ire_stq, mp);
975 		ire_refrele(ire);
976 	}
977 
978 	return (0);
979 }
980 
981 /*
982  * Find the interface used for traffic to a given IPv4 address
983  */
984 static phy_if_t
985 ip_routeto(net_handle_t neti, struct sockaddr *address, struct sockaddr *next)
986 {
987 	netstack_t *ns;
988 
989 	ASSERT(address != NULL);
990 
991 	if (address->sa_family != AF_INET)
992 		return (0);
993 
994 	ns = neti->netd_stack->nts_netstack;
995 	ASSERT(ns != NULL);
996 
997 	return (ip_routeto_impl(address, next, ns->netstack_ip));
998 }
999 
1000 /*
1001  * Find the interface used for traffic to a given IPv6 address
1002  */
1003 static phy_if_t
1004 ipv6_routeto(net_handle_t neti, struct sockaddr *address, struct sockaddr *next)
1005 {
1006 	netstack_t *ns;
1007 
1008 	ASSERT(address != NULL);
1009 
1010 	if (address->sa_family != AF_INET6)
1011 		return (0);
1012 
1013 	ns = neti->netd_stack->nts_netstack;
1014 	ASSERT(ns != NULL);
1015 
1016 	return (ip_routeto_impl(address, next, ns->netstack_ip));
1017 }
1018 
1019 
1020 /*
1021  * Find the interface used for traffic to an address.
1022  * For lint reasons, next/next6/sin/sin6 are all declared and assigned
1023  * a value at the top.  The alternative would end up with two bunches
1024  * of assignments, with each bunch setting half to NULL.
1025  */
1026 static phy_if_t
1027 ip_routeto_impl(struct sockaddr *address, struct sockaddr *nexthop,
1028     ip_stack_t *ipst)
1029 {
1030 	struct sockaddr_in6 *next6 = (struct sockaddr_in6 *)nexthop;
1031 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)address;
1032 	struct sockaddr_in *next = (struct sockaddr_in *)nexthop;
1033 	struct sockaddr_in *sin = (struct sockaddr_in *)address;
1034 	ire_t *sire = NULL;
1035 	ire_t *ire;
1036 	ill_t *ill;
1037 	phy_if_t phy_if;
1038 
1039 	if (address->sa_family == AF_INET6) {
1040 		ire = ire_route_lookup_v6(&sin6->sin6_addr, NULL,
1041 		    0, 0, NULL, &sire, ALL_ZONES, NULL,
1042 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
1043 		    ipst);
1044 	} else {
1045 		ire = ire_route_lookup(sin->sin_addr.s_addr, 0,
1046 		    0, 0, NULL, &sire, ALL_ZONES, NULL,
1047 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
1048 		    ipst);
1049 	}
1050 
1051 	if (ire == NULL)
1052 		return (0);
1053 
1054 	/*
1055 	 * For some destinations, we have routes that are dead ends, so
1056 	 * return to indicate that no physical interface can be used to
1057 	 * reach the destination.
1058 	 */
1059 	if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) != 0) {
1060 		if (sire != NULL)
1061 			ire_refrele(sire);
1062 		ire_refrele(ire);
1063 		return (0);
1064 	}
1065 
1066 	ill = ire_to_ill(ire);
1067 	if (ill == NULL) {
1068 		if (sire != NULL)
1069 			ire_refrele(sire);
1070 		ire_refrele(ire);
1071 		return (0);
1072 	}
1073 
1074 	if (nexthop != NULL) {
1075 		if (address->sa_family == AF_INET6) {
1076 			next->sin_addr.s_addr = sire ? sire->ire_gateway_addr :
1077 			    sin->sin_addr.s_addr;
1078 		} else {
1079 			next6->sin6_addr = sire ? sire->ire_gateway_addr_v6 :
1080 			    sin6->sin6_addr;
1081 		}
1082 	}
1083 
1084 	ASSERT(ill != NULL);
1085 	phy_if = (phy_if_t)ill->ill_phyint->phyint_hook_ifindex;
1086 	if (sire != NULL)
1087 		ire_refrele(sire);
1088 	ire_refrele(ire);
1089 
1090 	return (phy_if);
1091 }
1092 
1093 /*
1094  * Determine if checksumming is being used for the given packet.
1095  *
1096  * Return value:
1097  *   NET_HCK_NONE: full checksum recalculation is required
1098  *   NET_HCK_L3_FULL: full layer 3 checksum
1099  *   NET_HCK_L4_FULL: full layer 4 checksum
1100  *   NET_HCK_L4_PART: partial layer 4 checksum
1101  */
1102 /*ARGSUSED*/
1103 static int
1104 ip_ispartialchecksum(net_handle_t neti, mblk_t *mp)
1105 {
1106 	int ret = 0;
1107 
1108 	ASSERT(mp != NULL);
1109 
1110 	if ((DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) != 0) {
1111 		ret |= (int)NET_HCK_L4_FULL;
1112 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
1113 			ret |= (int)NET_HCK_L3_FULL;
1114 	}
1115 	if ((DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM) != 0) {
1116 		ret |= (int)NET_HCK_L4_PART;
1117 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
1118 			ret |= (int)NET_HCK_L3_FULL;
1119 	}
1120 
1121 	return (ret);
1122 }
1123 
1124 /*
1125  * Return true or false, indicating whether the network and transport
1126  * headers are correct.  Use the capabilities flags and flags set in the
1127  * dblk_t to determine whether or not the checksum is valid.
1128  *
1129  * Return:
1130  *   0: the checksum was incorrect
1131  *   1: the original checksum was correct
1132  */
1133 /*ARGSUSED*/
1134 static int
1135 ip_isvalidchecksum(net_handle_t neti, mblk_t *mp)
1136 {
1137 	unsigned char *wptr;
1138 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
1139 	int hlen;
1140 	int ret;
1141 
1142 	ASSERT(mp != NULL);
1143 
1144 	if (dohwcksum &&
1145 	    DB_CKSUM16(mp) != 0xFFFF &&
1146 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) &&
1147 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) &&
1148 	    (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM))
1149 		return (1);
1150 
1151 	hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2;
1152 
1153 	/*
1154 	 * Check that the mblk being passed in has enough data in it
1155 	 * before blindly checking ip_cksum.
1156 	 */
1157 	if (msgdsize(mp) < hlen)
1158 		return (0);
1159 
1160 	if (mp->b_wptr < mp->b_rptr + hlen) {
1161 		if (pullupmsg(mp, hlen) == 0)
1162 			return (0);
1163 		wptr = mp->b_wptr;
1164 	} else {
1165 		wptr = mp->b_wptr;
1166 		mp->b_wptr = mp->b_rptr + hlen;
1167 	}
1168 
1169 	if (ipha->ipha_hdr_checksum == ip_cksum(mp, 0, ipha->ipha_hdr_checksum))
1170 		ret = 1;
1171 	else
1172 		ret = 0;
1173 	mp->b_wptr = wptr;
1174 
1175 	return (ret);
1176 }
1177 
1178 /*
1179  * Unsupported with IPv6
1180  */
1181 /*ARGSUSED*/
1182 static int
1183 ipv6_isvalidchecksum(net_handle_t neti, mblk_t *mp)
1184 {
1185 	return (-1);
1186 }
1187 
1188 /*
1189  * Determine the network addresses for an IPv4 interface
1190  */
1191 static int
1192 ip_getlifaddr(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
1193     size_t nelem, net_ifaddr_t type[], void *storage)
1194 {
1195 	netstack_t *ns;
1196 
1197 	ns = neti->netd_stack->nts_netstack;
1198 	ASSERT(ns != NULL);
1199 	return (ip_getlifaddr_impl(AF_INET, phy_ifdata, ifdata,
1200 	    nelem, type, storage, ns->netstack_ip));
1201 }
1202 
1203 /*
1204  * Determine the network addresses for an IPv6 interface
1205  */
1206 static int
1207 ipv6_getlifaddr(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
1208     size_t nelem, net_ifaddr_t type[], void *storage)
1209 {
1210 	netstack_t *ns;
1211 
1212 	ns = neti->netd_stack->nts_netstack;
1213 	ASSERT(ns != NULL);
1214 	return (ip_getlifaddr_impl(AF_INET6, phy_ifdata, ifdata,
1215 	    nelem, type, storage, ns->netstack_ip));
1216 }
1217 
1218 /*
1219  * Shared implementation to determine the network addresses for an interface
1220  *
1221  * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
1222  * But IP Filter only uses a zero ifdata.
1223  */
1224 /* ARGSUSED */
1225 static int
1226 ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata,
1227     lif_if_t ifdata, size_t nelem, net_ifaddr_t type[],
1228     struct sockaddr *storage, ip_stack_t *ipst)
1229 {
1230 	struct sockaddr_in6 *sin6;
1231 	struct sockaddr_in *sin;
1232 	lif_if_t ipifid;
1233 	ipif_t *ipif;
1234 	int i;
1235 
1236 	ASSERT(type != NULL);
1237 	ASSERT(storage != NULL);
1238 
1239 	ipifid = UNMAP_IPIF_ID(ifdata);
1240 
1241 	if (family == AF_INET) {
1242 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1243 		    (uint_t)ipifid, B_FALSE, ipst)) == NULL)
1244 			return (1);
1245 
1246 		sin = (struct sockaddr_in *)storage;
1247 		for (i = 0; i < nelem; i++, sin++) {
1248 			if (ip_getifaddr_type(AF_INET, ipif, type[i],
1249 			    &sin->sin_addr) < 0) {
1250 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1251 				    type[i]));
1252 				ipif_refrele(ipif);
1253 				return (1);
1254 			}
1255 		}
1256 	} else {
1257 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1258 		    (uint_t)ipifid, B_TRUE, ipst)) == NULL)
1259 			return (1);
1260 
1261 		sin6 = (struct sockaddr_in6 *)storage;
1262 		for (i = 0; i < nelem; i++, sin6++) {
1263 			if (ip_getifaddr_type(AF_INET6, ipif, type[i],
1264 			    &sin6->sin6_addr) < 0) {
1265 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1266 				    type[i]));
1267 				ipif_refrele(ipif);
1268 				return (1);
1269 			}
1270 		}
1271 	}
1272 	ipif_refrele(ipif);
1273 	return (0);
1274 }
1275 
1276 /*
1277  * ip_getlifaddr private function
1278  */
1279 static int
1280 ip_getifaddr_type(sa_family_t family, ipif_t *ill_ipif,
1281     lif_if_t type, void *storage)
1282 {
1283 	void *src_addr;
1284 	int mem_size;
1285 
1286 	ASSERT(ill_ipif != NULL);
1287 	ASSERT(storage != NULL);
1288 
1289 	if (family == AF_INET) {
1290 		mem_size = sizeof (struct in_addr);
1291 
1292 		switch (type) {
1293 		case NA_ADDRESS:
1294 			src_addr = &(ill_ipif->ipif_lcl_addr);
1295 			break;
1296 		case NA_PEER:
1297 			src_addr = &(ill_ipif->ipif_pp_dst_addr);
1298 			break;
1299 		case NA_BROADCAST:
1300 			src_addr = &(ill_ipif->ipif_brd_addr);
1301 			break;
1302 		case NA_NETMASK:
1303 			src_addr = &(ill_ipif->ipif_net_mask);
1304 			break;
1305 		default:
1306 			return (-1);
1307 			/*NOTREACHED*/
1308 		}
1309 	} else {
1310 		mem_size = sizeof (struct in6_addr);
1311 
1312 		switch (type) {
1313 		case NA_ADDRESS:
1314 			src_addr = &(ill_ipif->ipif_v6lcl_addr);
1315 			break;
1316 		case NA_PEER:
1317 			src_addr = &(ill_ipif->ipif_v6pp_dst_addr);
1318 			break;
1319 		case NA_BROADCAST:
1320 			src_addr = &(ill_ipif->ipif_v6brd_addr);
1321 			break;
1322 		case NA_NETMASK:
1323 			src_addr = &(ill_ipif->ipif_v6net_mask);
1324 			break;
1325 		default:
1326 			return (-1);
1327 			/*NOTREACHED*/
1328 		}
1329 	}
1330 
1331 	(void) memcpy(storage, src_addr, mem_size);
1332 	return (1);
1333 }
1334 
1335 /*
1336  * Deliver packet up into the kernel, immitating its reception by a
1337  * network interface.
1338  */
1339 static void
1340 ip_ni_queue_in_func(void *inject)
1341 {
1342 	ip_ni_queue_func_impl(inject, B_FALSE);
1343 }
1344 
1345 /*
1346  * Deliver out of the kernel, as if it were being sent via a
1347  * raw socket so that IPFilter will see it again.
1348  */
1349 static void
1350 ip_ni_queue_out_func(void *inject)
1351 {
1352 	ip_ni_queue_func_impl(inject, B_TRUE);
1353 }
1354 
1355 /*
1356  * Shared implementation for inject via ip_output and ip_input
1357  */
1358 static void
1359 ip_ni_queue_func_impl(injection_t *inject,  boolean_t out)
1360 {
1361 	net_inject_t *packet;
1362 	conn_t *conn;
1363 	ill_t *ill;
1364 	ip_stack_t *ipst = (ip_stack_t *)inject->inj_ptr;
1365 
1366 	ASSERT(inject != NULL);
1367 	packet = &inject->inj_data;
1368 	ASSERT(packet->ni_packet != NULL);
1369 
1370 	ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical,
1371 	    B_FALSE, NULL, NULL, NULL, NULL, ipst);
1372 
1373 	/* Fallback to group names only if hook_emulation is set */
1374 	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
1375 		ill = ill_group_lookup_on_ifindex((uint_t)packet->ni_physical,
1376 		    B_FALSE, ipst);
1377 	}
1378 	if (ill == NULL) {
1379 		kmem_free(inject, sizeof (*inject));
1380 		return;
1381 	}
1382 
1383 	if (out == 0) {
1384 		if (inject->inj_isv6) {
1385 			ip_rput_v6(ill->ill_rq, packet->ni_packet);
1386 		} else {
1387 			ip_input(ill, NULL, packet->ni_packet, 0);
1388 		}
1389 		kmem_free(inject, sizeof (*inject));
1390 		ill_refrele(ill);
1391 		return;
1392 	}
1393 
1394 	/*
1395 	 * Even though ipcl_conn_create requests that it be passed
1396 	 * a different value for "TCP", in this case there may not
1397 	 * be a TCP connection backing the packet and more than
1398 	 * likely, non-TCP packets will go here too.
1399 	 */
1400 	conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ipst->ips_netstack);
1401 	if (conn != NULL) {
1402 		if (inject->inj_isv6) {
1403 			conn->conn_flags |= IPCL_ISV6;
1404 			conn->conn_af_isv6 = B_TRUE;
1405 			conn->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT;
1406 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1407 			ip_output_v6(conn, packet->ni_packet, ill->ill_wq,
1408 			    IP_WPUT);
1409 		} else {
1410 			conn->conn_af_isv6 = B_FALSE;
1411 			conn->conn_pkt_isv6 = B_FALSE;
1412 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1413 			ip_output(conn, packet->ni_packet, ill->ill_wq,
1414 			    IP_WPUT);
1415 		}
1416 
1417 		CONN_DEC_REF(conn);
1418 	}
1419 
1420 	kmem_free(inject, sizeof (*inject));
1421 	ill_refrele(ill);
1422 }
1423 
1424 /*
1425  * taskq function for nic events.
1426  */
1427 void
1428 ip_ne_queue_func(void *arg)
1429 {
1430 	hook_event_token_t hr;
1431 	hook_nic_event_int_t *info = (hook_nic_event_int_t *)arg;
1432 	ip_stack_t *ipst;
1433 	netstack_t *ns;
1434 
1435 	ns = netstack_find_by_stackid(info->hnei_stackid);
1436 	if (ns == NULL)
1437 		goto done;
1438 
1439 	ipst = ns->netstack_ip;
1440 	if (ipst == NULL)
1441 		goto done;
1442 
1443 	hr = (info->hnei_event.hne_protocol == ipst->ips_ipv6_net_data) ?
1444 	    ipst->ips_ipv6nicevents : ipst->ips_ipv4nicevents;
1445 	(void) hook_run(info->hnei_event.hne_protocol->netd_hooks, hr,
1446 	    (hook_data_t)&info->hnei_event);
1447 
1448 done:
1449 	if (ns != NULL)
1450 		netstack_rele(ns);
1451 	kmem_free(info->hnei_event.hne_data, info->hnei_event.hne_datalen);
1452 	kmem_free(arg, sizeof (hook_nic_event_int_t));
1453 }
1454 
1455 /*
1456  * Temporary function to support IPMP emulation for IP Filter.
1457  * Lookup an ill based on the ifindex assigned to the group.
1458  * Skips unusable ones i.e. where any of these flags are set:
1459  * (PHYI_FAILED|PHYI_OFFLINE|PHYI_INACTIVE)
1460  */
1461 ill_t *
1462 ill_group_lookup_on_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst)
1463 {
1464 	ill_t	*ill;
1465 	phyint_t *phyi;
1466 
1467 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1468 	phyi = phyint_lookup_group_ifindex(index, ipst);
1469 	if (phyi != NULL) {
1470 		ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4;
1471 		if (ill != NULL) {
1472 			mutex_enter(&ill->ill_lock);
1473 			if (ILL_CAN_LOOKUP(ill)) {
1474 				ill_refhold_locked(ill);
1475 				mutex_exit(&ill->ill_lock);
1476 				rw_exit(&ipst->ips_ill_g_lock);
1477 				return (ill);
1478 			}
1479 			mutex_exit(&ill->ill_lock);
1480 		}
1481 	}
1482 	rw_exit(&ipst->ips_ill_g_lock);
1483 	return (NULL);
1484 }
1485 
1486 /*
1487  * Temporary function to support IPMP emulation for IP Filter.
1488  * Lookup an ill based on the group name.
1489  * Skips unusable ones i.e. where any of these flags are set:
1490  * (PHYI_FAILED|PHYI_OFFLINE|PHYI_INACTIVE)
1491  */
1492 ill_t *
1493 ill_group_lookup_on_name(char *name, boolean_t isv6, ip_stack_t *ipst)
1494 {
1495 	ill_t	*ill;
1496 	phyint_t *phyi;
1497 
1498 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1499 	phyi = phyint_lookup_group(name, B_TRUE, ipst);
1500 	if (phyi != NULL) {
1501 		ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4;
1502 		if (ill != NULL) {
1503 			mutex_enter(&ill->ill_lock);
1504 			if (ILL_CAN_LOOKUP(ill)) {
1505 				ill_refhold_locked(ill);
1506 				mutex_exit(&ill->ill_lock);
1507 				rw_exit(&ipst->ips_ill_g_lock);
1508 				return (ill);
1509 			}
1510 			mutex_exit(&ill->ill_lock);
1511 		}
1512 	}
1513 	rw_exit(&ipst->ips_ill_g_lock);
1514 	return (NULL);
1515 }
1516