xref: /titanic_50/usr/src/uts/common/inet/ip/ip_netinfo.c (revision 3fbe3e2827948b5ff8ffec94d18c232af100ea3c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/stream.h>
30 #include <sys/strsubr.h>
31 #include <sys/pattr.h>
32 #include <sys/dlpi.h>
33 #include <sys/atomic.h>
34 #include <sys/sunddi.h>
35 #include <sys/socket.h>
36 #include <sys/neti.h>
37 #include <sys/sdt.h>
38 #include <sys/cmn_err.h>
39 
40 #include <netinet/in.h>
41 #include <inet/common.h>
42 #include <inet/mib2.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/ip_if.h>
46 #include <inet/ip_ire.h>
47 #include <inet/ip_impl.h>
48 #include <inet/ip_ndp.h>
49 #include <inet/ipclassifier.h>
50 #include <inet/ipp_common.h>
51 #include <inet/ip_ftable.h>
52 
53 /*
54  * IPv4 netinfo entry point declarations.
55  */
56 static int 		ip_getifname(net_handle_t, phy_if_t, char *,
57 			    const size_t);
58 static int 		ip_getmtu(net_handle_t, phy_if_t, lif_if_t);
59 static int 		ip_getpmtuenabled(net_handle_t);
60 static int 		ip_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
61 			    size_t, net_ifaddr_t [], void *);
62 static phy_if_t		ip_phygetnext(net_handle_t, phy_if_t);
63 static phy_if_t 	ip_phylookup(net_handle_t, const char *);
64 static lif_if_t 	ip_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
65 static int 		ip_inject(net_handle_t, inject_t, net_inject_t *);
66 static phy_if_t 	ip_routeto(net_handle_t, struct sockaddr *,
67 			    struct sockaddr *);
68 static int 		ip_ispartialchecksum(net_handle_t, mblk_t *);
69 static int 		ip_isvalidchecksum(net_handle_t, mblk_t *);
70 
71 static int 		ipv6_getifname(net_handle_t, phy_if_t, char *,
72 			    const size_t);
73 static int 		ipv6_getmtu(net_handle_t, phy_if_t, lif_if_t);
74 static int 		ipv6_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
75 			    size_t, net_ifaddr_t [], void *);
76 static phy_if_t 	ipv6_phygetnext(net_handle_t, phy_if_t);
77 static phy_if_t 	ipv6_phylookup(net_handle_t, const char *);
78 static lif_if_t 	ipv6_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
79 static int 		ipv6_inject(net_handle_t, inject_t, net_inject_t *);
80 static phy_if_t 	ipv6_routeto(net_handle_t, struct sockaddr *,
81 			    struct sockaddr *);
82 static int 		ipv6_isvalidchecksum(net_handle_t, mblk_t *);
83 
84 /* Netinfo private functions */
85 static	int		ip_getifname_impl(phy_if_t, char *,
86 			    const size_t, boolean_t, ip_stack_t *);
87 static	int		ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t,
88 			    ip_stack_t *);
89 static	phy_if_t	ip_phylookup_impl(const char *, boolean_t,
90 			    ip_stack_t *);
91 static	lif_if_t	ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t,
92 			    ip_stack_t *);
93 static	int		ip_inject_impl(inject_t, net_inject_t *, boolean_t,
94 			    ip_stack_t *);
95 static	int		ip_getifaddr_type(sa_family_t, ipif_t *, lif_if_t,
96 			    void *);
97 static	phy_if_t	ip_routeto_impl(struct sockaddr *, struct sockaddr *,
98 			    ip_stack_t *);
99 static	int		ip_getlifaddr_impl(sa_family_t, phy_if_t, lif_if_t,
100 			    size_t, net_ifaddr_t [], struct sockaddr *,
101 			    ip_stack_t *);
102 static	void		ip_ni_queue_in_func(void *);
103 static	void		ip_ni_queue_out_func(void *);
104 static	void		ip_ni_queue_func_impl(injection_t *,  boolean_t);
105 
106 
107 static net_protocol_t ipv4info = {
108 	NETINFO_VERSION,
109 	NHF_INET,
110 	ip_getifname,
111 	ip_getmtu,
112 	ip_getpmtuenabled,
113 	ip_getlifaddr,
114 	ip_phygetnext,
115 	ip_phylookup,
116 	ip_lifgetnext,
117 	ip_inject,
118 	ip_routeto,
119 	ip_ispartialchecksum,
120 	ip_isvalidchecksum
121 };
122 
123 
124 static net_protocol_t ipv6info = {
125 	NETINFO_VERSION,
126 	NHF_INET6,
127 	ipv6_getifname,
128 	ipv6_getmtu,
129 	ip_getpmtuenabled,
130 	ipv6_getlifaddr,
131 	ipv6_phygetnext,
132 	ipv6_phylookup,
133 	ipv6_lifgetnext,
134 	ipv6_inject,
135 	ipv6_routeto,
136 	ip_ispartialchecksum,
137 	ipv6_isvalidchecksum
138 };
139 
140 /*
141  * The taskq eventq_queue_in is used to process the upside inject messages.
142  * The taskq eventq_queue_out is used to process the downside inject messages.
143  * The taskq eventq_queue_nic is used to process the nic event messages.
144  */
145 static ddi_taskq_t 	*eventq_queue_in = NULL;
146 static ddi_taskq_t 	*eventq_queue_out = NULL;
147 ddi_taskq_t 	*eventq_queue_nic = NULL;
148 
149 /*
150  * Initialize queues for inject.
151  */
152 void
153 ip_net_g_init()
154 {
155 	if (eventq_queue_out == NULL) {
156 		eventq_queue_out = ddi_taskq_create(NULL,
157 		    "IP_INJECT_QUEUE_OUT", 1, TASKQ_DEFAULTPRI, 0);
158 
159 		if (eventq_queue_out == NULL)
160 			cmn_err(CE_NOTE, "ipv4_net_init: "
161 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_OUT");
162 	}
163 
164 	if (eventq_queue_in == NULL) {
165 		eventq_queue_in = ddi_taskq_create(NULL,
166 		    "IP_INJECT_QUEUE_IN", 1, TASKQ_DEFAULTPRI, 0);
167 
168 		if (eventq_queue_in == NULL)
169 			cmn_err(CE_NOTE, "ipv4_net_init: "
170 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_IN");
171 	}
172 
173 	if (eventq_queue_nic == NULL) {
174 		eventq_queue_nic = ddi_taskq_create(NULL,
175 		    "IP_NIC_EVENT_QUEUE", 1, TASKQ_DEFAULTPRI, 0);
176 
177 		if (eventq_queue_nic == NULL)
178 			cmn_err(CE_NOTE, "ipv4_net_init: "
179 			    "ddi_taskq_create failed for IP_NIC_EVENT_QUEUE");
180 	}
181 }
182 
183 /*
184  * Destroy inject queues
185  */
186 void
187 ip_net_g_destroy()
188 {
189 	if (eventq_queue_nic != NULL) {
190 		ddi_taskq_destroy(eventq_queue_nic);
191 		eventq_queue_nic = NULL;
192 	}
193 
194 	if (eventq_queue_in != NULL) {
195 		ddi_taskq_destroy(eventq_queue_in);
196 		eventq_queue_in = NULL;
197 	}
198 
199 	if (eventq_queue_out != NULL) {
200 		ddi_taskq_destroy(eventq_queue_out);
201 		eventq_queue_out = NULL;
202 	}
203 }
204 
205 /*
206  * Register IPv4 and IPv6 netinfo functions and initialize queues for inject.
207  */
208 void
209 ip_net_init(ip_stack_t *ipst, netstack_t *ns)
210 {
211 	netid_t id;
212 
213 	id = net_getnetidbynetstackid(ns->netstack_stackid);
214 	ASSERT(id != -1);
215 
216 	ipst->ips_ipv4_net_data = net_protocol_register(id, &ipv4info);
217 	ASSERT(ipst->ips_ipv4_net_data != NULL);
218 
219 	ipst->ips_ipv6_net_data = net_protocol_register(id, &ipv6info);
220 	ASSERT(ipst->ips_ipv6_net_data != NULL);
221 }
222 
223 
224 /*
225  * Unregister IPv4 and IPv6 functions.
226  */
227 void
228 ip_net_destroy(ip_stack_t *ipst)
229 {
230 	if (ipst->ips_ipv4_net_data != NULL) {
231 		if (net_protocol_unregister(ipst->ips_ipv4_net_data) == 0)
232 			ipst->ips_ipv4_net_data = NULL;
233 	}
234 
235 	if (ipst->ips_ipv6_net_data != NULL) {
236 		if (net_protocol_unregister(ipst->ips_ipv6_net_data) == 0)
237 			ipst->ips_ipv6_net_data = NULL;
238 	}
239 }
240 
241 /*
242  * Initialize IPv4 hooks family the event
243  */
244 void
245 ipv4_hook_init(ip_stack_t *ipst)
246 {
247 	HOOK_FAMILY_INIT(&ipst->ips_ipv4root, Hn_IPV4);
248 	if (net_family_register(ipst->ips_ipv4_net_data, &ipst->ips_ipv4root)
249 	    != 0) {
250 		cmn_err(CE_NOTE, "ipv4_hook_init: "
251 		    "net_family_register failed for ipv4");
252 	}
253 
254 	HOOK_EVENT_INIT(&ipst->ips_ip4_physical_in_event, NH_PHYSICAL_IN);
255 	ipst->ips_ipv4firewall_physical_in = net_event_register(
256 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_in_event);
257 	if (ipst->ips_ipv4firewall_physical_in == NULL) {
258 		cmn_err(CE_NOTE, "ipv4_hook_init: "
259 		    "net_event_register failed for ipv4/physical_in");
260 	}
261 
262 	HOOK_EVENT_INIT(&ipst->ips_ip4_physical_out_event, NH_PHYSICAL_OUT);
263 	ipst->ips_ipv4firewall_physical_out = net_event_register(
264 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_out_event);
265 	if (ipst->ips_ipv4firewall_physical_out == NULL) {
266 		cmn_err(CE_NOTE, "ipv4_hook_init: "
267 		    "net_event_register failed for ipv4/physical_out");
268 	}
269 
270 	HOOK_EVENT_INIT(&ipst->ips_ip4_forwarding_event, NH_FORWARDING);
271 	ipst->ips_ipv4firewall_forwarding = net_event_register(
272 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_forwarding_event);
273 	if (ipst->ips_ipv4firewall_forwarding == NULL) {
274 		cmn_err(CE_NOTE, "ipv4_hook_init: "
275 		    "net_event_register failed for ipv4/forwarding");
276 	}
277 
278 	HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_in_event, NH_LOOPBACK_IN);
279 	ipst->ips_ipv4firewall_loopback_in = net_event_register(
280 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_in_event);
281 	if (ipst->ips_ipv4firewall_loopback_in == NULL) {
282 		cmn_err(CE_NOTE, "ipv4_hook_init: "
283 		    "net_event_register failed for ipv4/loopback_in");
284 	}
285 
286 	HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_out_event, NH_LOOPBACK_OUT);
287 	ipst->ips_ipv4firewall_loopback_out = net_event_register(
288 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_out_event);
289 	if (ipst->ips_ipv4firewall_loopback_out == NULL) {
290 		cmn_err(CE_NOTE, "ipv4_hook_init: "
291 		    "net_event_register failed for ipv4/loopback_out");
292 	}
293 
294 	HOOK_EVENT_INIT(&ipst->ips_ip4_nic_events, NH_NIC_EVENTS);
295 	ipst->ips_ip4_nic_events.he_flags = HOOK_RDONLY;
296 	ipst->ips_ipv4nicevents = net_event_register(
297 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_nic_events);
298 	if (ipst->ips_ipv4nicevents == NULL) {
299 		cmn_err(CE_NOTE, "ipv4_hook_init: "
300 		    "net_event_register failed for ipv4/nic_events");
301 	}
302 }
303 
304 void
305 ipv4_hook_shutdown(ip_stack_t *ipst)
306 {
307 	if (ipst->ips_ipv4firewall_forwarding != NULL) {
308 		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
309 		    &ipst->ips_ip4_forwarding_event);
310 	}
311 
312 	if (ipst->ips_ipv4firewall_physical_in != NULL) {
313 		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
314 		    &ipst->ips_ip4_physical_in_event);
315 	}
316 
317 	if (ipst->ips_ipv4firewall_physical_out != NULL) {
318 		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
319 		    &ipst->ips_ip4_physical_out_event);
320 	}
321 
322 	if (ipst->ips_ipv4firewall_loopback_in != NULL) {
323 		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
324 		    &ipst->ips_ip4_loopback_in_event);
325 	}
326 
327 	if (ipst->ips_ipv4firewall_loopback_out != NULL) {
328 		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
329 		    &ipst->ips_ip4_loopback_out_event);
330 	}
331 
332 	if (ipst->ips_ipv4nicevents != NULL) {
333 		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
334 		    &ipst->ips_ip4_nic_events);
335 	}
336 
337 	(void) net_family_shutdown(ipst->ips_ipv4_net_data,
338 	    &ipst->ips_ipv4root);
339 }
340 
341 void
342 ipv4_hook_destroy(ip_stack_t *ipst)
343 {
344 	if (ipst->ips_ipv4firewall_forwarding != NULL) {
345 		if (net_event_unregister(ipst->ips_ipv4_net_data,
346 		    &ipst->ips_ip4_forwarding_event) == 0)
347 			ipst->ips_ipv4firewall_forwarding = NULL;
348 	}
349 
350 	if (ipst->ips_ipv4firewall_physical_in != NULL) {
351 		if (net_event_unregister(ipst->ips_ipv4_net_data,
352 		    &ipst->ips_ip4_physical_in_event) == 0)
353 			ipst->ips_ipv4firewall_physical_in = NULL;
354 	}
355 
356 	if (ipst->ips_ipv4firewall_physical_out != NULL) {
357 		if (net_event_unregister(ipst->ips_ipv4_net_data,
358 		    &ipst->ips_ip4_physical_out_event) == 0)
359 			ipst->ips_ipv4firewall_physical_out = NULL;
360 	}
361 
362 	if (ipst->ips_ipv4firewall_loopback_in != NULL) {
363 		if (net_event_unregister(ipst->ips_ipv4_net_data,
364 		    &ipst->ips_ip4_loopback_in_event) == 0)
365 			ipst->ips_ipv4firewall_loopback_in = NULL;
366 	}
367 
368 	if (ipst->ips_ipv4firewall_loopback_out != NULL) {
369 		if (net_event_unregister(ipst->ips_ipv4_net_data,
370 		    &ipst->ips_ip4_loopback_out_event) == 0)
371 			ipst->ips_ipv4firewall_loopback_out = NULL;
372 	}
373 
374 	if (ipst->ips_ipv4nicevents != NULL) {
375 		if (net_event_unregister(ipst->ips_ipv4_net_data,
376 		    &ipst->ips_ip4_nic_events) == 0)
377 			ipst->ips_ipv4nicevents = NULL;
378 	}
379 
380 	(void) net_family_unregister(ipst->ips_ipv4_net_data,
381 	    &ipst->ips_ipv4root);
382 }
383 
384 /*
385  * Initialize IPv6 hooks family and event
386  */
387 void
388 ipv6_hook_init(ip_stack_t *ipst)
389 {
390 
391 	HOOK_FAMILY_INIT(&ipst->ips_ipv6root, Hn_IPV6);
392 	if (net_family_register(ipst->ips_ipv6_net_data, &ipst->ips_ipv6root)
393 	    != 0) {
394 		cmn_err(CE_NOTE, "ipv6_hook_init: "
395 		    "net_family_register failed for ipv6");
396 	}
397 
398 	HOOK_EVENT_INIT(&ipst->ips_ip6_physical_in_event, NH_PHYSICAL_IN);
399 	ipst->ips_ipv6firewall_physical_in = net_event_register(
400 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_in_event);
401 	if (ipst->ips_ipv6firewall_physical_in == NULL) {
402 		cmn_err(CE_NOTE, "ipv6_hook_init: "
403 		    "net_event_register failed for ipv6/physical_in");
404 	}
405 
406 	HOOK_EVENT_INIT(&ipst->ips_ip6_physical_out_event, NH_PHYSICAL_OUT);
407 	ipst->ips_ipv6firewall_physical_out = net_event_register(
408 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_out_event);
409 	if (ipst->ips_ipv6firewall_physical_out == NULL) {
410 		cmn_err(CE_NOTE, "ipv6_hook_init: "
411 		    "net_event_register failed for ipv6/physical_out");
412 	}
413 
414 	HOOK_EVENT_INIT(&ipst->ips_ip6_forwarding_event, NH_FORWARDING);
415 	ipst->ips_ipv6firewall_forwarding = net_event_register(
416 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_forwarding_event);
417 	if (ipst->ips_ipv6firewall_forwarding == NULL) {
418 		cmn_err(CE_NOTE, "ipv6_hook_init: "
419 		    "net_event_register failed for ipv6/forwarding");
420 	}
421 
422 	HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_in_event, NH_LOOPBACK_IN);
423 	ipst->ips_ipv6firewall_loopback_in = net_event_register(
424 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_in_event);
425 	if (ipst->ips_ipv6firewall_loopback_in == NULL) {
426 		cmn_err(CE_NOTE, "ipv6_hook_init: "
427 		    "net_event_register failed for ipv6/loopback_in");
428 	}
429 
430 	HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_out_event, NH_LOOPBACK_OUT);
431 	ipst->ips_ipv6firewall_loopback_out = net_event_register(
432 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_out_event);
433 	if (ipst->ips_ipv6firewall_loopback_out == NULL) {
434 		cmn_err(CE_NOTE, "ipv6_hook_init: "
435 		    "net_event_register failed for ipv6/loopback_out");
436 	}
437 
438 	HOOK_EVENT_INIT(&ipst->ips_ip6_nic_events, NH_NIC_EVENTS);
439 	ipst->ips_ip6_nic_events.he_flags = HOOK_RDONLY;
440 	ipst->ips_ipv6nicevents = net_event_register(
441 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_nic_events);
442 	if (ipst->ips_ipv6nicevents == NULL) {
443 		cmn_err(CE_NOTE, "ipv6_hook_init: "
444 		    "net_event_register failed for ipv6/nic_events");
445 	}
446 }
447 
448 void
449 ipv6_hook_shutdown(ip_stack_t *ipst)
450 {
451 	if (ipst->ips_ipv6firewall_forwarding != NULL) {
452 		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
453 		    &ipst->ips_ip6_forwarding_event);
454 	}
455 
456 	if (ipst->ips_ipv6firewall_physical_in != NULL) {
457 		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
458 		    &ipst->ips_ip6_physical_in_event);
459 	}
460 
461 	if (ipst->ips_ipv6firewall_physical_out != NULL) {
462 		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
463 		    &ipst->ips_ip6_physical_out_event);
464 	}
465 
466 	if (ipst->ips_ipv6firewall_loopback_in != NULL) {
467 		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
468 		    &ipst->ips_ip6_loopback_in_event);
469 	}
470 
471 	if (ipst->ips_ipv6firewall_loopback_out != NULL) {
472 		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
473 		    &ipst->ips_ip6_loopback_out_event);
474 	}
475 
476 	if (ipst->ips_ipv6nicevents != NULL) {
477 		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
478 		    &ipst->ips_ip6_nic_events);
479 	}
480 
481 	(void) net_family_shutdown(ipst->ips_ipv6_net_data,
482 	    &ipst->ips_ipv6root);
483 }
484 
485 void
486 ipv6_hook_destroy(ip_stack_t *ipst)
487 {
488 	if (ipst->ips_ipv6firewall_forwarding != NULL) {
489 		if (net_event_unregister(ipst->ips_ipv6_net_data,
490 		    &ipst->ips_ip6_forwarding_event) == 0)
491 			ipst->ips_ipv6firewall_forwarding = NULL;
492 	}
493 
494 	if (ipst->ips_ipv6firewall_physical_in != NULL) {
495 		if (net_event_unregister(ipst->ips_ipv6_net_data,
496 		    &ipst->ips_ip6_physical_in_event) == 0)
497 			ipst->ips_ipv6firewall_physical_in = NULL;
498 	}
499 
500 	if (ipst->ips_ipv6firewall_physical_out != NULL) {
501 		if (net_event_unregister(ipst->ips_ipv6_net_data,
502 		    &ipst->ips_ip6_physical_out_event) == 0)
503 			ipst->ips_ipv6firewall_physical_out = NULL;
504 	}
505 
506 	if (ipst->ips_ipv6firewall_loopback_in != NULL) {
507 		if (net_event_unregister(ipst->ips_ipv6_net_data,
508 		    &ipst->ips_ip6_loopback_in_event) == 0)
509 			ipst->ips_ipv6firewall_loopback_in = NULL;
510 	}
511 
512 	if (ipst->ips_ipv6firewall_loopback_out != NULL) {
513 		if (net_event_unregister(ipst->ips_ipv6_net_data,
514 		    &ipst->ips_ip6_loopback_out_event) == 0)
515 			ipst->ips_ipv6firewall_loopback_out = NULL;
516 	}
517 
518 	if (ipst->ips_ipv6nicevents != NULL) {
519 		if (net_event_unregister(ipst->ips_ipv6_net_data,
520 		    &ipst->ips_ip6_nic_events) == 0)
521 			ipst->ips_ipv6nicevents = NULL;
522 	}
523 
524 	(void) net_family_unregister(ipst->ips_ipv6_net_data,
525 	    &ipst->ips_ipv6root);
526 }
527 
528 /*
529  * Determine the name of an IPv4 interface
530  */
531 static int
532 ip_getifname(net_handle_t neti, phy_if_t phy_ifdata, char *buffer,
533     const size_t buflen)
534 {
535 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE,
536 	    neti->netd_stack->nts_netstack->netstack_ip));
537 }
538 
539 /*
540  * Determine the name of an IPv6 interface
541  */
542 static int
543 ipv6_getifname(net_handle_t neti, phy_if_t phy_ifdata, char *buffer,
544     const size_t buflen)
545 {
546 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE,
547 	    neti->netd_stack->nts_netstack->netstack_ip));
548 }
549 
550 /*
551  * Shared implementation to determine the name of a given network interface
552  */
553 /* ARGSUSED */
554 static int
555 ip_getifname_impl(phy_if_t phy_ifdata,
556     char *buffer, const size_t buflen, boolean_t isv6, ip_stack_t *ipst)
557 {
558 	ill_t *ill;
559 	char *name;
560 
561 	ASSERT(buffer != NULL);
562 
563 	ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL,
564 	    NULL, NULL, ipst);
565 	if (ill != NULL) {
566 		name = ill->ill_name;
567 	} else {
568 		/* Fallback to group names only if hook_emulation is set */
569 		if (ipst->ips_ipmp_hook_emulation) {
570 			ill = ill_group_lookup_on_ifindex((uint_t)phy_ifdata,
571 			    isv6, ipst);
572 		}
573 		if (ill == NULL)
574 			return (1);
575 		name = ill->ill_phyint->phyint_groupname;
576 	}
577 	if (name != NULL) {
578 		(void) strlcpy(buffer, name, buflen);
579 		ill_refrele(ill);
580 		return (0);
581 	} else {
582 		ill_refrele(ill);
583 		return (1);
584 	}
585 
586 }
587 
588 /*
589  * Determine the MTU of an IPv4 network interface
590  */
591 static int
592 ip_getmtu(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
593 {
594 	netstack_t *ns;
595 
596 	ns = neti->netd_stack->nts_netstack;
597 	ASSERT(ns != NULL);
598 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE, ns->netstack_ip));
599 }
600 
601 /*
602  * Determine the MTU of an IPv6 network interface
603  */
604 static int
605 ipv6_getmtu(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
606 {
607 	netstack_t *ns;
608 
609 	ns = neti->netd_stack->nts_netstack;
610 	ASSERT(ns != NULL);
611 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE, ns->netstack_ip));
612 }
613 
614 /*
615  * Shared implementation to determine the MTU of a network interface
616  *
617  * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
618  * But IP Filter only uses a zero ifdata.
619  */
620 /* ARGSUSED */
621 static int
622 ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
623     ip_stack_t *ipst)
624 {
625 	lif_if_t ipifid;
626 	ipif_t *ipif;
627 	int mtu;
628 
629 	ipifid = UNMAP_IPIF_ID(ifdata);
630 
631 	ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid,
632 	    isv6, ipst);
633 	if (ipif == NULL)
634 		return (0);
635 
636 	mtu = ipif->ipif_mtu;
637 	ipif_refrele(ipif);
638 
639 	if (mtu == 0) {
640 		ill_t *ill;
641 
642 		if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6,
643 		    NULL, NULL, NULL, NULL, ipst)) == NULL) {
644 			/*
645 			 * Fallback to group names only if hook_emulation
646 			 * is set
647 			 */
648 			if (ipst->ips_ipmp_hook_emulation) {
649 				ill = ill_group_lookup_on_ifindex(
650 				    (uint_t)phy_ifdata, isv6, ipst);
651 			}
652 			if (ill == NULL)
653 				return (0);
654 		}
655 		mtu = ill->ill_max_frag;
656 		ill_refrele(ill);
657 	}
658 
659 	return (mtu);
660 }
661 
662 /*
663  * Determine if path MTU discovery is enabled for IP
664  */
665 static int
666 ip_getpmtuenabled(net_handle_t neti)
667 {
668 	netstack_t *ns;
669 
670 	ns = neti->netd_stack->nts_netstack;
671 	ASSERT(ns != NULL);
672 	return (ns->netstack_ip->ips_ip_path_mtu_discovery);
673 }
674 
675 /*
676  * Get next interface from the current list of IPv4 physical network interfaces
677  *
678  * Note: this does not handle the case when ipmp_hook_emulation is set.
679  * But IP Filter does not use this function.
680  */
681 static phy_if_t
682 ip_phygetnext(net_handle_t neti, phy_if_t phy_ifdata)
683 {
684 	netstack_t *ns;
685 
686 	ns = neti->netd_stack->nts_netstack;
687 	ASSERT(ns != NULL);
688 	return (ill_get_next_ifindex(phy_ifdata, B_FALSE, ns->netstack_ip));
689 }
690 
691 /*
692  * Get next interface from the current list of IPv6 physical network interfaces
693  */
694 static phy_if_t
695 ipv6_phygetnext(net_handle_t neti, phy_if_t phy_ifdata)
696 {
697 	netstack_t *ns;
698 
699 	ns = neti->netd_stack->nts_netstack;
700 	ASSERT(ns != NULL);
701 	return (ill_get_next_ifindex(phy_ifdata, B_TRUE, ns->netstack_ip));
702 }
703 
704 /*
705  * Determine if a network interface name exists for IPv4
706  */
707 static phy_if_t
708 ip_phylookup(net_handle_t neti, const char *name)
709 {
710 	netstack_t *ns;
711 
712 	ns = neti->netd_stack->nts_netstack;
713 	ASSERT(ns != NULL);
714 	return (ip_phylookup_impl(name, B_FALSE, ns->netstack_ip));
715 }
716 
717 /*
718  * Determine if a network interface name exists for IPv6
719  */
720 static phy_if_t
721 ipv6_phylookup(net_handle_t neti, const char *name)
722 {
723 	netstack_t *ns;
724 
725 	ns = neti->netd_stack->nts_netstack;
726 	ASSERT(ns != NULL);
727 	return (ip_phylookup_impl(name, B_TRUE, ns->netstack_ip));
728 }
729 
730 /*
731  * Implement looking up an ill_t based on the name supplied and matching
732  * it up with either IPv4 or IPv6.  ill_get_ifindex_by_name() is not used
733  * because it does not match on the address family in addition to the name.
734  */
735 static phy_if_t
736 ip_phylookup_impl(const char *name, boolean_t isv6, ip_stack_t *ipst)
737 {
738 	phy_if_t phy;
739 	ill_t *ill;
740 
741 	ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL,
742 	    NULL, NULL, NULL, ipst);
743 
744 	/* Fallback to group names only if hook_emulation is set */
745 	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
746 		ill = ill_group_lookup_on_name((char *)name, isv6, ipst);
747 	}
748 	if (ill == NULL)
749 		return (0);
750 
751 	phy = ill->ill_phyint->phyint_hook_ifindex;
752 
753 	ill_refrele(ill);
754 
755 	return (phy);
756 }
757 
758 /*
759  * Get next interface from the current list of IPv4 logical network interfaces
760  */
761 static lif_if_t
762 ip_lifgetnext(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
763 {
764 	netstack_t *ns;
765 
766 	ns = neti->netd_stack->nts_netstack;
767 	ASSERT(ns != NULL);
768 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE,
769 	    ns->netstack_ip));
770 }
771 
772 /*
773  * Get next interface from the current list of IPv6 logical network interfaces
774  */
775 static lif_if_t
776 ipv6_lifgetnext(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata)
777 {
778 	netstack_t *ns;
779 
780 	ns = neti->netd_stack->nts_netstack;
781 	ASSERT(ns != NULL);
782 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE,
783 	    ns->netstack_ip));
784 }
785 
786 /*
787  * Shared implementation to get next interface from the current list of
788  * logical network interfaces
789  *
790  * Note: this does not handle the case when ipmp_hook_emulation is set.
791  * But IP Filter does not use this function.
792  */
793 static lif_if_t
794 ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
795     ip_stack_t *ipst)
796 {
797 	lif_if_t newidx, oldidx;
798 	boolean_t nextok;
799 	ipif_t *ipif;
800 	ill_t *ill;
801 
802 	ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL,
803 	    NULL, NULL, ipst);
804 	if (ill == NULL)
805 		return (0);
806 
807 	if (ifdata != 0) {
808 		oldidx = UNMAP_IPIF_ID(ifdata);
809 		nextok = B_FALSE;
810 	} else {
811 		oldidx = 0;
812 		nextok = B_TRUE;
813 	}
814 
815 	mutex_enter(&ill->ill_lock);
816 	if (ill->ill_state_flags & ILL_CONDEMNED) {
817 		mutex_exit(&ill->ill_lock);
818 		ill_refrele(ill);
819 		return (0);
820 	}
821 
822 	/*
823 	 * It's safe to iterate the ill_ipif list when holding an ill_lock.
824 	 * And it's also safe to access ipif_id without ipif refhold.
825 	 * See ipif_get_id().
826 	 */
827 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
828 		if (!IPIF_CAN_LOOKUP(ipif))
829 			continue;
830 		if (nextok) {
831 			ipif_refhold_locked(ipif);
832 			break;
833 		} else if (oldidx == ipif->ipif_id) {
834 			nextok = B_TRUE;
835 		}
836 	}
837 
838 	mutex_exit(&ill->ill_lock);
839 	ill_refrele(ill);
840 
841 	if (ipif == NULL)
842 		return (0);
843 
844 	newidx = ipif->ipif_id;
845 	ipif_refrele(ipif);
846 
847 	return (MAP_IPIF_ID(newidx));
848 }
849 
850 /*
851  * Inject an IPv4 packet to or from an interface
852  */
853 static int
854 ip_inject(net_handle_t neti, inject_t style, net_inject_t *packet)
855 {
856 	netstack_t *ns;
857 
858 	ns = neti->netd_stack->nts_netstack;
859 	ASSERT(ns != NULL);
860 	return (ip_inject_impl(style, packet, B_FALSE, ns->netstack_ip));
861 }
862 
863 
864 /*
865  * Inject an IPv6 packet to or from an interface
866  */
867 static int
868 ipv6_inject(net_handle_t neti, inject_t style, net_inject_t *packet)
869 {
870 	netstack_t *ns;
871 
872 	ns = neti->netd_stack->nts_netstack;
873 	return (ip_inject_impl(style, packet, B_TRUE, ns->netstack_ip));
874 }
875 
876 /*
877  * Shared implementation to inject a packet to or from an interface
878  * Return value:
879  *   0: successful
880  *  -1: memory allocation failed
881  *   1: other errors
882  */
883 static int
884 ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6,
885     ip_stack_t *ipst)
886 {
887 	struct sockaddr_in6 *sin6;
888 	ddi_taskq_t *tq = NULL;
889 	void (* func)(void *);
890 	injection_t *inject;
891 	ip6_t *ip6h;
892 	ire_t *ire;
893 	mblk_t *mp;
894 
895 	ASSERT(packet != NULL);
896 	ASSERT(packet->ni_packet != NULL);
897 	ASSERT(packet->ni_packet->b_datap->db_type == M_DATA);
898 
899 	switch (style) {
900 	case NI_QUEUE_IN:
901 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
902 		if (inject == NULL)
903 			return (-1);
904 		inject->inj_data = *packet;
905 		inject->inj_isv6 = isv6;
906 		/*
907 		 * deliver up into the kernel, immitating its reception by a
908 		 * network interface, add to list and schedule timeout
909 		 */
910 		func = ip_ni_queue_in_func;
911 		tq = eventq_queue_in;
912 		break;
913 
914 	case NI_QUEUE_OUT:
915 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
916 		if (inject == NULL)
917 			return (-1);
918 		inject->inj_data = *packet;
919 		inject->inj_isv6 = isv6;
920 		/*
921 		 * deliver out of the kernel, as if it were being sent via a
922 		 * raw socket so that IPFilter will see it again, add to list
923 		 * and schedule timeout
924 		 */
925 		func = ip_ni_queue_out_func;
926 		tq = eventq_queue_out;
927 		break;
928 
929 	case NI_DIRECT_OUT:
930 		/*
931 		 * Note:
932 		 * For IPv4, the code path below will be greatly simplified
933 		 * with the delivery of surya - it will become a single
934 		 * function call to X.  A follow on project is aimed to
935 		 * provide similar functionality for IPv6.
936 		 */
937 		mp = packet->ni_packet;
938 
939 		if (!isv6) {
940 			struct sockaddr *sock;
941 
942 			sock = (struct sockaddr *)&packet->ni_addr;
943 			/*
944 			 * ipfil_sendpkt was provided by surya to ease the
945 			 * problems associated with sending out a packet.
946 			 * Currently this function only supports IPv4.
947 			 */
948 			switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
949 			    netstackid_to_zoneid(
950 			    ipst->ips_netstack->netstack_stackid))) {
951 			case 0 :
952 			case EINPROGRESS:
953 				return (0);
954 			case ECOMM :
955 			case ENONET :
956 				return (1);
957 			default :
958 				return (1);
959 			}
960 			/* NOTREACHED */
961 
962 		}
963 
964 		ip6h = (ip6_t *)mp->b_rptr;
965 		sin6 = (struct sockaddr_in6 *)&packet->ni_addr;
966 		ASSERT(sin6->sin6_family == AF_INET6);
967 
968 		ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0,
969 		    NULL, NULL, ALL_ZONES, NULL,
970 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
971 		    ipst);
972 
973 		if (ire == NULL) {
974 			ip2dbg(("ip_inject: ire_cache_lookup failed\n"));
975 			freemsg(mp);
976 			return (1);
977 		}
978 
979 		if (ire->ire_stq == NULL) {
980 			/* Send to loopback destination. */
981 			if (ire->ire_rfq == NULL) {
982 				ip2dbg(("ip_inject: bad nexthop\n"));
983 				ire_refrele(ire);
984 				freemsg(mp);
985 				return (1);
986 			}
987 			DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
988 			    void_ip_t *, ip6h, __dtrace_ipsr_ill_t *,
989 			    ire->ire_ipif->ipif_ill, ipha_t *, NULL, ip6_t *,
990 			    ip6h, int, 1);
991 			ip_wput_local_v6(ire->ire_rfq,
992 			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0);
993 			ire_refrele(ire);
994 			return (0);
995 		}
996 
997 		mp->b_queue = ire->ire_stq;
998 
999 		if (ire->ire_nce == NULL ||
1000 		    ire->ire_nce->nce_fp_mp == NULL &&
1001 		    ire->ire_nce->nce_res_mp == NULL) {
1002 			ip_newroute_v6(ire->ire_stq, mp,
1003 			    &sin6->sin6_addr, NULL, NULL, ALL_ZONES, ipst);
1004 
1005 			ire_refrele(ire);
1006 			return (0);
1007 		} else {
1008 			/* prepend L2 header for IPv6 packets. */
1009 			mblk_t *llmp;
1010 
1011 			/*
1012 			 * Lock IREs, see 6420438
1013 			 */
1014 			mutex_enter(&ire->ire_lock);
1015 			llmp = ire->ire_nce->nce_fp_mp ?
1016 			    ire->ire_nce->nce_fp_mp :
1017 			    ire->ire_nce->nce_res_mp;
1018 
1019 			if ((mp = dupb(llmp)) == NULL &&
1020 			    (mp = copyb(llmp)) == NULL) {
1021 				ip2dbg(("ip_inject: llhdr failed\n"));
1022 				mutex_exit(&ire->ire_lock);
1023 				ire_refrele(ire);
1024 				freemsg(mp);
1025 				return (1);
1026 			}
1027 			mutex_exit(&ire->ire_lock);
1028 			linkb(mp, packet->ni_packet);
1029 		}
1030 
1031 		mp->b_queue = ire->ire_stq;
1032 
1033 		break;
1034 	default:
1035 		freemsg(packet->ni_packet);
1036 		return (1);
1037 	}
1038 
1039 	if (tq) {
1040 		inject->inj_ptr = ipst;
1041 		if (ddi_taskq_dispatch(tq, func, (void *)inject,
1042 		    DDI_SLEEP) == DDI_FAILURE) {
1043 			ip2dbg(("ip_inject:  ddi_taskq_dispatch failed\n"));
1044 			freemsg(packet->ni_packet);
1045 			return (1);
1046 		}
1047 	} else {
1048 		putnext(ire->ire_stq, mp);
1049 		ire_refrele(ire);
1050 	}
1051 
1052 	return (0);
1053 }
1054 
1055 /*
1056  * Find the interface used for traffic to a given IPv4 address
1057  */
1058 static phy_if_t
1059 ip_routeto(net_handle_t neti, struct sockaddr *address, struct sockaddr *next)
1060 {
1061 	netstack_t *ns;
1062 
1063 	ASSERT(address != NULL);
1064 
1065 	if (address->sa_family != AF_INET)
1066 		return (0);
1067 
1068 	ns = neti->netd_stack->nts_netstack;
1069 	ASSERT(ns != NULL);
1070 
1071 	return (ip_routeto_impl(address, next, ns->netstack_ip));
1072 }
1073 
1074 /*
1075  * Find the interface used for traffic to a given IPv6 address
1076  */
1077 static phy_if_t
1078 ipv6_routeto(net_handle_t neti, struct sockaddr *address, struct sockaddr *next)
1079 {
1080 	netstack_t *ns;
1081 
1082 	ASSERT(address != NULL);
1083 
1084 	if (address->sa_family != AF_INET6)
1085 		return (0);
1086 
1087 	ns = neti->netd_stack->nts_netstack;
1088 	ASSERT(ns != NULL);
1089 
1090 	return (ip_routeto_impl(address, next, ns->netstack_ip));
1091 }
1092 
1093 
1094 /*
1095  * Find the interface used for traffic to an address.
1096  * For lint reasons, next/next6/sin/sin6 are all declared and assigned
1097  * a value at the top.  The alternative would end up with two bunches
1098  * of assignments, with each bunch setting half to NULL.
1099  */
1100 static phy_if_t
1101 ip_routeto_impl(struct sockaddr *address, struct sockaddr *nexthop,
1102     ip_stack_t *ipst)
1103 {
1104 	struct sockaddr_in6 *next6 = (struct sockaddr_in6 *)nexthop;
1105 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)address;
1106 	struct sockaddr_in *next = (struct sockaddr_in *)nexthop;
1107 	struct sockaddr_in *sin = (struct sockaddr_in *)address;
1108 	ire_t *sire = NULL;
1109 	ire_t *ire;
1110 	ill_t *ill;
1111 	phy_if_t phy_if;
1112 
1113 	if (address->sa_family == AF_INET6) {
1114 		ire = ire_route_lookup_v6(&sin6->sin6_addr, NULL,
1115 		    0, 0, NULL, &sire, ALL_ZONES, NULL,
1116 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
1117 		    ipst);
1118 	} else {
1119 		ire = ire_route_lookup(sin->sin_addr.s_addr, 0,
1120 		    0, 0, NULL, &sire, ALL_ZONES, NULL,
1121 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
1122 		    ipst);
1123 	}
1124 
1125 	if (ire == NULL)
1126 		return (0);
1127 
1128 	/*
1129 	 * For some destinations, we have routes that are dead ends, so
1130 	 * return to indicate that no physical interface can be used to
1131 	 * reach the destination.
1132 	 */
1133 	if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) != 0) {
1134 		if (sire != NULL)
1135 			ire_refrele(sire);
1136 		ire_refrele(ire);
1137 		return (0);
1138 	}
1139 
1140 	ill = ire_to_ill(ire);
1141 	if (ill == NULL) {
1142 		if (sire != NULL)
1143 			ire_refrele(sire);
1144 		ire_refrele(ire);
1145 		return (0);
1146 	}
1147 
1148 	if (nexthop != NULL) {
1149 		if (address->sa_family == AF_INET6) {
1150 			next->sin_addr.s_addr = sire ? sire->ire_gateway_addr :
1151 			    sin->sin_addr.s_addr;
1152 		} else {
1153 			next6->sin6_addr = sire ? sire->ire_gateway_addr_v6 :
1154 			    sin6->sin6_addr;
1155 		}
1156 	}
1157 
1158 	ASSERT(ill != NULL);
1159 	phy_if = (phy_if_t)ill->ill_phyint->phyint_hook_ifindex;
1160 	if (sire != NULL)
1161 		ire_refrele(sire);
1162 	ire_refrele(ire);
1163 
1164 	return (phy_if);
1165 }
1166 
1167 /*
1168  * Determine if checksumming is being used for the given packet.
1169  *
1170  * Return value:
1171  *   NET_HCK_NONE: full checksum recalculation is required
1172  *   NET_HCK_L3_FULL: full layer 3 checksum
1173  *   NET_HCK_L4_FULL: full layer 4 checksum
1174  *   NET_HCK_L4_PART: partial layer 4 checksum
1175  */
1176 /*ARGSUSED*/
1177 static int
1178 ip_ispartialchecksum(net_handle_t neti, mblk_t *mp)
1179 {
1180 	int ret = 0;
1181 
1182 	ASSERT(mp != NULL);
1183 
1184 	if ((DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) != 0) {
1185 		ret |= (int)NET_HCK_L4_FULL;
1186 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
1187 			ret |= (int)NET_HCK_L3_FULL;
1188 	}
1189 	if ((DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM) != 0) {
1190 		ret |= (int)NET_HCK_L4_PART;
1191 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
1192 			ret |= (int)NET_HCK_L3_FULL;
1193 	}
1194 
1195 	return (ret);
1196 }
1197 
1198 /*
1199  * Return true or false, indicating whether the network and transport
1200  * headers are correct.  Use the capabilities flags and flags set in the
1201  * dblk_t to determine whether or not the checksum is valid.
1202  *
1203  * Return:
1204  *   0: the checksum was incorrect
1205  *   1: the original checksum was correct
1206  */
1207 /*ARGSUSED*/
1208 static int
1209 ip_isvalidchecksum(net_handle_t neti, mblk_t *mp)
1210 {
1211 	unsigned char *wptr;
1212 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
1213 	int hlen;
1214 	int ret;
1215 
1216 	ASSERT(mp != NULL);
1217 
1218 	if (dohwcksum &&
1219 	    DB_CKSUM16(mp) != 0xFFFF &&
1220 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) &&
1221 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) &&
1222 	    (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM))
1223 		return (1);
1224 
1225 	hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2;
1226 
1227 	/*
1228 	 * Check that the mblk being passed in has enough data in it
1229 	 * before blindly checking ip_cksum.
1230 	 */
1231 	if (msgdsize(mp) < hlen)
1232 		return (0);
1233 
1234 	if (mp->b_wptr < mp->b_rptr + hlen) {
1235 		if (pullupmsg(mp, hlen) == 0)
1236 			return (0);
1237 		wptr = mp->b_wptr;
1238 	} else {
1239 		wptr = mp->b_wptr;
1240 		mp->b_wptr = mp->b_rptr + hlen;
1241 	}
1242 
1243 	if (ipha->ipha_hdr_checksum == ip_cksum(mp, 0, ipha->ipha_hdr_checksum))
1244 		ret = 1;
1245 	else
1246 		ret = 0;
1247 	mp->b_wptr = wptr;
1248 
1249 	return (ret);
1250 }
1251 
1252 /*
1253  * Unsupported with IPv6
1254  */
1255 /*ARGSUSED*/
1256 static int
1257 ipv6_isvalidchecksum(net_handle_t neti, mblk_t *mp)
1258 {
1259 	return (-1);
1260 }
1261 
1262 /*
1263  * Determine the network addresses for an IPv4 interface
1264  */
1265 static int
1266 ip_getlifaddr(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
1267     size_t nelem, net_ifaddr_t type[], void *storage)
1268 {
1269 	netstack_t *ns;
1270 
1271 	ns = neti->netd_stack->nts_netstack;
1272 	ASSERT(ns != NULL);
1273 	return (ip_getlifaddr_impl(AF_INET, phy_ifdata, ifdata,
1274 	    nelem, type, storage, ns->netstack_ip));
1275 }
1276 
1277 /*
1278  * Determine the network addresses for an IPv6 interface
1279  */
1280 static int
1281 ipv6_getlifaddr(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
1282     size_t nelem, net_ifaddr_t type[], void *storage)
1283 {
1284 	netstack_t *ns;
1285 
1286 	ns = neti->netd_stack->nts_netstack;
1287 	ASSERT(ns != NULL);
1288 	return (ip_getlifaddr_impl(AF_INET6, phy_ifdata, ifdata,
1289 	    nelem, type, storage, ns->netstack_ip));
1290 }
1291 
1292 /*
1293  * Shared implementation to determine the network addresses for an interface
1294  *
1295  * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
1296  * But IP Filter only uses a zero ifdata.
1297  */
1298 /* ARGSUSED */
1299 static int
1300 ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata,
1301     lif_if_t ifdata, size_t nelem, net_ifaddr_t type[],
1302     struct sockaddr *storage, ip_stack_t *ipst)
1303 {
1304 	struct sockaddr_in6 *sin6;
1305 	struct sockaddr_in *sin;
1306 	lif_if_t ipifid;
1307 	ipif_t *ipif;
1308 	int i;
1309 
1310 	ASSERT(type != NULL);
1311 	ASSERT(storage != NULL);
1312 
1313 	ipifid = UNMAP_IPIF_ID(ifdata);
1314 
1315 	if (family == AF_INET) {
1316 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1317 		    (uint_t)ipifid, B_FALSE, ipst)) == NULL)
1318 			return (1);
1319 
1320 		sin = (struct sockaddr_in *)storage;
1321 		for (i = 0; i < nelem; i++, sin++) {
1322 			if (ip_getifaddr_type(AF_INET, ipif, type[i],
1323 			    &sin->sin_addr) < 0) {
1324 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1325 				    type[i]));
1326 				ipif_refrele(ipif);
1327 				return (1);
1328 			}
1329 			sin->sin_family = AF_INET;
1330 		}
1331 	} else {
1332 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1333 		    (uint_t)ipifid, B_TRUE, ipst)) == NULL)
1334 			return (1);
1335 
1336 		sin6 = (struct sockaddr_in6 *)storage;
1337 		for (i = 0; i < nelem; i++, sin6++) {
1338 			if (ip_getifaddr_type(AF_INET6, ipif, type[i],
1339 			    &sin6->sin6_addr) < 0) {
1340 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1341 				    type[i]));
1342 				ipif_refrele(ipif);
1343 				return (1);
1344 			}
1345 			sin6->sin6_family = AF_INET6;
1346 		}
1347 	}
1348 	ipif_refrele(ipif);
1349 	return (0);
1350 }
1351 
1352 /*
1353  * ip_getlifaddr private function
1354  */
1355 static int
1356 ip_getifaddr_type(sa_family_t family, ipif_t *ill_ipif,
1357     lif_if_t type, void *storage)
1358 {
1359 	void *src_addr;
1360 	int mem_size;
1361 
1362 	ASSERT(ill_ipif != NULL);
1363 	ASSERT(storage != NULL);
1364 
1365 	if (family == AF_INET) {
1366 		mem_size = sizeof (struct in_addr);
1367 
1368 		switch (type) {
1369 		case NA_ADDRESS:
1370 			src_addr = &(ill_ipif->ipif_lcl_addr);
1371 			break;
1372 		case NA_PEER:
1373 			src_addr = &(ill_ipif->ipif_pp_dst_addr);
1374 			break;
1375 		case NA_BROADCAST:
1376 			src_addr = &(ill_ipif->ipif_brd_addr);
1377 			break;
1378 		case NA_NETMASK:
1379 			src_addr = &(ill_ipif->ipif_net_mask);
1380 			break;
1381 		default:
1382 			return (-1);
1383 			/*NOTREACHED*/
1384 		}
1385 	} else {
1386 		mem_size = sizeof (struct in6_addr);
1387 
1388 		switch (type) {
1389 		case NA_ADDRESS:
1390 			src_addr = &(ill_ipif->ipif_v6lcl_addr);
1391 			break;
1392 		case NA_PEER:
1393 			src_addr = &(ill_ipif->ipif_v6pp_dst_addr);
1394 			break;
1395 		case NA_BROADCAST:
1396 			src_addr = &(ill_ipif->ipif_v6brd_addr);
1397 			break;
1398 		case NA_NETMASK:
1399 			src_addr = &(ill_ipif->ipif_v6net_mask);
1400 			break;
1401 		default:
1402 			return (-1);
1403 			/*NOTREACHED*/
1404 		}
1405 	}
1406 
1407 	(void) memcpy(storage, src_addr, mem_size);
1408 	return (1);
1409 }
1410 
1411 /*
1412  * Deliver packet up into the kernel, immitating its reception by a
1413  * network interface.
1414  */
1415 static void
1416 ip_ni_queue_in_func(void *inject)
1417 {
1418 	ip_ni_queue_func_impl(inject, B_FALSE);
1419 }
1420 
1421 /*
1422  * Deliver out of the kernel, as if it were being sent via a
1423  * raw socket so that IPFilter will see it again.
1424  */
1425 static void
1426 ip_ni_queue_out_func(void *inject)
1427 {
1428 	ip_ni_queue_func_impl(inject, B_TRUE);
1429 }
1430 
1431 /*
1432  * Shared implementation for inject via ip_output and ip_input
1433  */
1434 static void
1435 ip_ni_queue_func_impl(injection_t *inject,  boolean_t out)
1436 {
1437 	net_inject_t *packet;
1438 	conn_t *conn;
1439 	ill_t *ill;
1440 	ip_stack_t *ipst = (ip_stack_t *)inject->inj_ptr;
1441 
1442 	ASSERT(inject != NULL);
1443 	packet = &inject->inj_data;
1444 	ASSERT(packet->ni_packet != NULL);
1445 
1446 	ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical,
1447 	    B_FALSE, NULL, NULL, NULL, NULL, ipst);
1448 
1449 	/* Fallback to group names only if hook_emulation is set */
1450 	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
1451 		ill = ill_group_lookup_on_ifindex((uint_t)packet->ni_physical,
1452 		    B_FALSE, ipst);
1453 	}
1454 	if (ill == NULL) {
1455 		kmem_free(inject, sizeof (*inject));
1456 		return;
1457 	}
1458 
1459 	if (out == 0) {
1460 		if (inject->inj_isv6) {
1461 			ip_rput_v6(ill->ill_rq, packet->ni_packet);
1462 		} else {
1463 			ip_input(ill, NULL, packet->ni_packet, 0);
1464 		}
1465 		kmem_free(inject, sizeof (*inject));
1466 		ill_refrele(ill);
1467 		return;
1468 	}
1469 
1470 	/*
1471 	 * Even though ipcl_conn_create requests that it be passed
1472 	 * a different value for "TCP", in this case there may not
1473 	 * be a TCP connection backing the packet and more than
1474 	 * likely, non-TCP packets will go here too.
1475 	 */
1476 	conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ipst->ips_netstack);
1477 	if (conn != NULL) {
1478 		if (inject->inj_isv6) {
1479 			conn->conn_flags |= IPCL_ISV6;
1480 			conn->conn_af_isv6 = B_TRUE;
1481 			conn->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT;
1482 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1483 			ip_output_v6(conn, packet->ni_packet, ill->ill_wq,
1484 			    IP_WPUT);
1485 		} else {
1486 			conn->conn_af_isv6 = B_FALSE;
1487 			conn->conn_pkt_isv6 = B_FALSE;
1488 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1489 			ip_output(conn, packet->ni_packet, ill->ill_wq,
1490 			    IP_WPUT);
1491 		}
1492 
1493 		CONN_DEC_REF(conn);
1494 	}
1495 
1496 	kmem_free(inject, sizeof (*inject));
1497 	ill_refrele(ill);
1498 }
1499 
1500 /*
1501  * taskq function for nic events.
1502  */
1503 void
1504 ip_ne_queue_func(void *arg)
1505 {
1506 	hook_event_token_t hr;
1507 	hook_nic_event_int_t *info = (hook_nic_event_int_t *)arg;
1508 	ip_stack_t *ipst;
1509 	netstack_t *ns;
1510 
1511 	ns = netstack_find_by_stackid(info->hnei_stackid);
1512 	if (ns == NULL)
1513 		goto done;
1514 
1515 	ipst = ns->netstack_ip;
1516 	if (ipst == NULL)
1517 		goto done;
1518 
1519 	hr = (info->hnei_event.hne_protocol == ipst->ips_ipv6_net_data) ?
1520 	    ipst->ips_ipv6nicevents : ipst->ips_ipv4nicevents;
1521 	(void) hook_run(info->hnei_event.hne_protocol->netd_hooks, hr,
1522 	    (hook_data_t)&info->hnei_event);
1523 
1524 done:
1525 	if (ns != NULL)
1526 		netstack_rele(ns);
1527 	kmem_free(info->hnei_event.hne_data, info->hnei_event.hne_datalen);
1528 	kmem_free(arg, sizeof (hook_nic_event_int_t));
1529 }
1530 
1531 /*
1532  * Temporary function to support IPMP emulation for IP Filter.
1533  * Lookup an ill based on the ifindex assigned to the group.
1534  * Skips unusable ones i.e. where any of these flags are set:
1535  * (PHYI_FAILED|PHYI_OFFLINE|PHYI_INACTIVE)
1536  */
1537 ill_t *
1538 ill_group_lookup_on_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst)
1539 {
1540 	ill_t	*ill;
1541 	phyint_t *phyi;
1542 
1543 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1544 	phyi = phyint_lookup_group_ifindex(index, ipst);
1545 	if (phyi != NULL) {
1546 		ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4;
1547 		if (ill != NULL) {
1548 			mutex_enter(&ill->ill_lock);
1549 			if (ILL_CAN_LOOKUP(ill)) {
1550 				ill_refhold_locked(ill);
1551 				mutex_exit(&ill->ill_lock);
1552 				rw_exit(&ipst->ips_ill_g_lock);
1553 				return (ill);
1554 			}
1555 			mutex_exit(&ill->ill_lock);
1556 		}
1557 	}
1558 	rw_exit(&ipst->ips_ill_g_lock);
1559 	return (NULL);
1560 }
1561 
1562 /*
1563  * Temporary function to support IPMP emulation for IP Filter.
1564  * Lookup an ill based on the group name.
1565  * Skips unusable ones i.e. where any of these flags are set:
1566  * (PHYI_FAILED|PHYI_OFFLINE|PHYI_INACTIVE)
1567  */
1568 ill_t *
1569 ill_group_lookup_on_name(char *name, boolean_t isv6, ip_stack_t *ipst)
1570 {
1571 	ill_t	*ill;
1572 	phyint_t *phyi;
1573 
1574 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1575 	phyi = phyint_lookup_group(name, B_TRUE, ipst);
1576 	if (phyi != NULL) {
1577 		ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4;
1578 		if (ill != NULL) {
1579 			mutex_enter(&ill->ill_lock);
1580 			if (ILL_CAN_LOOKUP(ill)) {
1581 				ill_refhold_locked(ill);
1582 				mutex_exit(&ill->ill_lock);
1583 				rw_exit(&ipst->ips_ill_g_lock);
1584 				return (ill);
1585 			}
1586 			mutex_exit(&ill->ill_lock);
1587 		}
1588 	}
1589 	rw_exit(&ipst->ips_ill_g_lock);
1590 	return (NULL);
1591 }
1592