xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_netinfo.c (revision 9e26e16f703d2dfcc0689de957c21efcb72473e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/pattr.h>
34 #include <sys/dlpi.h>
35 #include <sys/atomic.h>
36 #include <sys/sunddi.h>
37 #include <sys/socket.h>
38 #include <sys/neti.h>
39 
40 #include <netinet/in.h>
41 #include <inet/common.h>
42 #include <inet/mib2.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/ip_if.h>
46 #include <inet/ip_ire.h>
47 #include <inet/ip_impl.h>
48 #include <inet/ip_ndp.h>
49 #include <inet/ipclassifier.h>
50 #include <inet/ipp_common.h>
51 #include <inet/ip_ftable.h>
52 
53 /*
54  * IPv4 netinfo entry point declarations.
55  */
56 static int 		ip_getifname(phy_if_t, char *, const size_t);
57 static int 		ip_getmtu(phy_if_t, lif_if_t);
58 static int 		ip_getpmtuenabled(void);
59 static int 		ip_getlifaddr(phy_if_t, lif_if_t, size_t,
60 			    net_ifaddr_t [], void *);
61 static phy_if_t		ip_phygetnext(phy_if_t);
62 static phy_if_t 	ip_phylookup(const char *);
63 static lif_if_t 	ip_lifgetnext(phy_if_t, lif_if_t);
64 static int 		ip_inject(inject_t, net_inject_t *);
65 static phy_if_t 	ip_routeto(struct sockaddr *);
66 static int 		ip_ispartialchecksum(mblk_t *);
67 static int 		ip_isvalidchecksum(mblk_t *);
68 
69 static int 		ipv6_getifname(phy_if_t, char *, const size_t);
70 static int 		ipv6_getmtu(phy_if_t, lif_if_t);
71 static int 		ipv6_getlifaddr(phy_if_t, lif_if_t, size_t,
72 			    net_ifaddr_t [], void *);
73 static phy_if_t 	ipv6_phygetnext(phy_if_t);
74 static phy_if_t 	ipv6_phylookup(const char *);
75 static lif_if_t 	ipv6_lifgetnext(phy_if_t, lif_if_t);
76 static int 		ipv6_inject(inject_t, net_inject_t *);
77 static phy_if_t 	ipv6_routeto(struct sockaddr *);
78 static int 		ipv6_isvalidchecksum(mblk_t *);
79 
80 /* Netinfo private functions */
81 static	int		ip_getifname_impl(phy_if_t, char *,
82 			    const size_t, boolean_t);
83 static	int		ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t);
84 static	phy_if_t	ip_phylookup_impl(const char *, boolean_t);
85 static	lif_if_t	ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t);
86 static	int		ip_inject_impl(inject_t, net_inject_t *, boolean_t);
87 static	int		ip_getifaddr_type(sa_family_t, ipif_t *, lif_if_t,
88 			    void *);
89 static	phy_if_t	ip_routeto_impl(struct sockaddr *);
90 static	int		ip_getlifaddr_impl(sa_family_t, phy_if_t, lif_if_t,
91 			    size_t, net_ifaddr_t [], struct sockaddr *);
92 static	void		ip_ni_queue_in_func(void *);
93 static	void		ip_ni_queue_out_func(void *);
94 static	void		ip_ni_queue_func_impl(injection_t *,  boolean_t);
95 
96 
97 static net_info_t ipv4info = {
98 	NETINFO_VERSION,
99 	NHF_INET,
100 	ip_getifname,
101 	ip_getmtu,
102 	ip_getpmtuenabled,
103 	ip_getlifaddr,
104 	ip_phygetnext,
105 	ip_phylookup,
106 	ip_lifgetnext,
107 	ip_inject,
108 	ip_routeto,
109 	ip_ispartialchecksum,
110 	ip_isvalidchecksum
111 };
112 
113 
114 static net_info_t ipv6info = {
115 	NETINFO_VERSION,
116 	NHF_INET6,
117 	ipv6_getifname,
118 	ipv6_getmtu,
119 	ip_getpmtuenabled,
120 	ipv6_getlifaddr,
121 	ipv6_phygetnext,
122 	ipv6_phylookup,
123 	ipv6_lifgetnext,
124 	ipv6_inject,
125 	ipv6_routeto,
126 	ip_ispartialchecksum,
127 	ipv6_isvalidchecksum
128 };
129 
130 /*
131  * The taskq eventq_queue_in is used to process the upside inject messages.
132  * The taskq eventq_queue_out is used to process the downside inject messages.
133  * The taskq eventq_queue_nic is used to process the nic event messages.
134  */
135 static ddi_taskq_t 	*eventq_queue_in = NULL;
136 static ddi_taskq_t 	*eventq_queue_out = NULL;
137 ddi_taskq_t 	*eventq_queue_nic = NULL;
138 
139 static hook_family_t	ipv4root;
140 static hook_family_t	ipv6root;
141 
142 /*
143  * Hooks for firewalling
144  */
145 hook_event_t		ip4_physical_in_event;
146 hook_event_t		ip4_physical_out_event;
147 hook_event_t		ip4_forwarding_event;
148 hook_event_t		ip4_loopback_in_event;
149 hook_event_t		ip4_loopback_out_event;
150 hook_event_t		ip4_nic_events;
151 hook_event_t		ip6_physical_in_event;
152 hook_event_t		ip6_physical_out_event;
153 hook_event_t		ip6_forwarding_event;
154 hook_event_t		ip6_loopback_in_event;
155 hook_event_t		ip6_loopback_out_event;
156 hook_event_t		ip6_nic_events;
157 
158 hook_event_token_t	ipv4firewall_physical_in;
159 hook_event_token_t	ipv4firewall_physical_out;
160 hook_event_token_t	ipv4firewall_forwarding;
161 hook_event_token_t	ipv4firewall_loopback_in;
162 hook_event_token_t	ipv4firewall_loopback_out;
163 hook_event_token_t	ipv4nicevents;
164 hook_event_token_t	ipv6firewall_physical_in;
165 hook_event_token_t	ipv6firewall_physical_out;
166 hook_event_token_t	ipv6firewall_forwarding;
167 hook_event_token_t	ipv6firewall_loopback_in;
168 hook_event_token_t	ipv6firewall_loopback_out;
169 hook_event_token_t	ipv6nicevents;
170 
171 net_data_t		ipv4 = NULL;
172 net_data_t		ipv6 = NULL;
173 
174 
175 /*
176  * Register IPv4 and IPv6 netinfo functions and initialize queues for inject.
177  */
178 void
179 ip_net_init()
180 {
181 
182 	ipv4 = net_register(&ipv4info);
183 	ASSERT(ipv4 != NULL);
184 
185 	ipv6 = net_register(&ipv6info);
186 	ASSERT(ipv6 != NULL);
187 
188 	if (eventq_queue_out == NULL) {
189 		eventq_queue_out = ddi_taskq_create(NULL,
190 		    "IP_INJECT_QUEUE_OUT", 1, TASKQ_DEFAULTPRI, 0);
191 
192 		if (eventq_queue_out == NULL)
193 			cmn_err(CE_NOTE, "ipv4_net_init: "
194 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_OUT");
195 	}
196 
197 	if (eventq_queue_in == NULL) {
198 		eventq_queue_in = ddi_taskq_create(NULL,
199 		    "IP_INJECT_QUEUE_IN", 1, TASKQ_DEFAULTPRI, 0);
200 
201 		if (eventq_queue_in == NULL)
202 			cmn_err(CE_NOTE, "ipv4_net_init: "
203 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_IN");
204 	}
205 
206 	if (eventq_queue_nic == NULL) {
207 		eventq_queue_nic = ddi_taskq_create(NULL,
208 		    "IP_NIC_EVENT_QUEUE", 1, TASKQ_DEFAULTPRI, 0);
209 
210 		if (eventq_queue_nic == NULL)
211 			cmn_err(CE_NOTE, "ipv4_net_init: "
212 			    "ddi_taskq_create failed for IP_NIC_EVENT_QUEUE");
213 	}
214 }
215 
216 
217 /*
218  * Unregister IPv4 and IPv6 functions and inject queues
219  */
220 void
221 ip_net_destroy()
222 {
223 
224 	if (eventq_queue_nic != NULL) {
225 		ddi_taskq_destroy(eventq_queue_nic);
226 		eventq_queue_nic = NULL;
227 	}
228 
229 	if (eventq_queue_in != NULL) {
230 		ddi_taskq_destroy(eventq_queue_in);
231 		eventq_queue_in = NULL;
232 	}
233 
234 	if (eventq_queue_out != NULL) {
235 		ddi_taskq_destroy(eventq_queue_out);
236 		eventq_queue_out = NULL;
237 	}
238 
239 	if (ipv4 != NULL) {
240 		if (net_unregister(ipv4) == 0)
241 			ipv4 = NULL;
242 	}
243 
244 	if (ipv6 != NULL) {
245 		if (net_unregister(ipv6) == 0)
246 			ipv6 = NULL;
247 	}
248 }
249 
250 
251 /*
252  * Initialize IPv4 hooks family the event
253  */
254 void
255 ipv4_hook_init()
256 {
257 
258 	HOOK_FAMILY_INIT(&ipv4root, Hn_IPV4);
259 	if (net_register_family(ipv4, &ipv4root) != 0) {
260 		cmn_err(CE_NOTE, "ipv4_hook_init: "
261 		    "net_register_family failed for ipv4");
262 	}
263 
264 	HOOK_EVENT_INIT(&ip4_physical_in_event, NH_PHYSICAL_IN);
265 	ipv4firewall_physical_in = net_register_event(ipv4,
266 	    &ip4_physical_in_event);
267 	if (ipv4firewall_physical_in == NULL) {
268 		cmn_err(CE_NOTE, "ipv4_hook_init: "
269 		    "net_register_event failed for ipv4/physical_in");
270 	}
271 
272 	HOOK_EVENT_INIT(&ip4_physical_out_event, NH_PHYSICAL_OUT);
273 	ipv4firewall_physical_out = net_register_event(ipv4,
274 	    &ip4_physical_out_event);
275 	if (ipv4firewall_physical_out == NULL) {
276 		cmn_err(CE_NOTE, "ipv4_hook_init: "
277 		    "net_register_event failed for ipv4/physical_out");
278 	}
279 
280 	HOOK_EVENT_INIT(&ip4_forwarding_event, NH_FORWARDING);
281 	ipv4firewall_forwarding = net_register_event(ipv4,
282 	    &ip4_forwarding_event);
283 	if (ipv4firewall_forwarding == NULL) {
284 		cmn_err(CE_NOTE, "ipv4_hook_init: "
285 		    "net_register_event failed for ipv4/forwarding");
286 	}
287 
288 	HOOK_EVENT_INIT(&ip4_loopback_in_event, NH_LOOPBACK_IN);
289 	ipv4firewall_loopback_in = net_register_event(ipv4,
290 	    &ip4_loopback_in_event);
291 	if (ipv4firewall_loopback_in == NULL) {
292 		cmn_err(CE_NOTE, "ipv4_hook_init: "
293 		    "net_register_event failed for ipv4/loopback_in");
294 	}
295 
296 	HOOK_EVENT_INIT(&ip4_loopback_out_event, NH_LOOPBACK_OUT);
297 	ipv4firewall_loopback_out = net_register_event(ipv4,
298 	    &ip4_loopback_out_event);
299 	if (ipv4firewall_loopback_out == NULL) {
300 		cmn_err(CE_NOTE, "ipv4_hook_init: "
301 		    "net_register_event failed for ipv4/loopback_out");
302 	}
303 
304 	HOOK_EVENT_INIT(&ip4_nic_events, NH_NIC_EVENTS);
305 	ip4_nic_events.he_flags = HOOK_RDONLY;
306 	ipv4nicevents = net_register_event(ipv4, &ip4_nic_events);
307 	if (ipv4nicevents == NULL) {
308 		cmn_err(CE_NOTE, "ipv4_hook_init: "
309 		    "net_register_event failed for ipv4/nic_events");
310 	}
311 }
312 
313 
314 void
315 ipv4_hook_destroy()
316 {
317 	if (ipv4firewall_forwarding != NULL) {
318 		if (net_unregister_event(ipv4, &ip4_forwarding_event) == 0)
319 			ipv4firewall_forwarding = NULL;
320 	}
321 
322 	if (ipv4firewall_physical_in != NULL) {
323 		if (net_unregister_event(ipv4, &ip4_physical_in_event) == 0)
324 			ipv4firewall_physical_in = NULL;
325 	}
326 
327 	if (ipv4firewall_physical_out != NULL) {
328 		if (net_unregister_event(ipv4, &ip4_physical_out_event) == 0)
329 			ipv4firewall_physical_out = NULL;
330 	}
331 
332 	if (ipv4firewall_loopback_in != NULL) {
333 		if (net_unregister_event(ipv4, &ip4_loopback_in_event) == 0)
334 			ipv4firewall_loopback_in = NULL;
335 	}
336 
337 	if (ipv4firewall_loopback_out != NULL) {
338 		if (net_unregister_event(ipv4, &ip4_loopback_out_event) == 0)
339 			ipv4firewall_loopback_out = NULL;
340 	}
341 
342 	if (ipv4nicevents != NULL) {
343 		if (net_unregister_event(ipv4, &ip4_nic_events) == 0)
344 			ipv4nicevents = NULL;
345 	}
346 
347 	(void) net_unregister_family(ipv4, &ipv4root);
348 }
349 
350 
351 /*
352  * Initialize IPv6 hooks family and event
353  */
354 void
355 ipv6_hook_init()
356 {
357 
358 	HOOK_FAMILY_INIT(&ipv6root, Hn_IPV6);
359 	if (net_register_family(ipv6, &ipv6root) != 0) {
360 		cmn_err(CE_NOTE, "ipv6_hook_init: "
361 		    "net_register_family failed for ipv6");
362 	}
363 
364 	HOOK_EVENT_INIT(&ip6_physical_in_event, NH_PHYSICAL_IN);
365 	ipv6firewall_physical_in = net_register_event(ipv6,
366 	    &ip6_physical_in_event);
367 	if (ipv6firewall_physical_in == NULL) {
368 		cmn_err(CE_NOTE, "ipv6_hook_init: "
369 		    "net_register_event failed for ipv6/physical_in");
370 	}
371 
372 	HOOK_EVENT_INIT(&ip6_physical_out_event, NH_PHYSICAL_OUT);
373 	ipv6firewall_physical_out = net_register_event(ipv6,
374 	    &ip6_physical_out_event);
375 	if (ipv6firewall_physical_out == NULL) {
376 		cmn_err(CE_NOTE, "ipv6_hook_init: "
377 		    "net_register_event failed for ipv6/physical_out");
378 	}
379 
380 	HOOK_EVENT_INIT(&ip6_forwarding_event, NH_FORWARDING);
381 	ipv6firewall_forwarding = net_register_event(ipv6,
382 	    &ip6_forwarding_event);
383 	if (ipv6firewall_forwarding == NULL) {
384 		cmn_err(CE_NOTE, "ipv6_hook_init: "
385 		    "net_register_event failed for ipv6/forwarding");
386 	}
387 
388 	HOOK_EVENT_INIT(&ip6_loopback_in_event, NH_LOOPBACK_IN);
389 	ipv6firewall_loopback_in = net_register_event(ipv6,
390 	    &ip6_loopback_in_event);
391 	if (ipv6firewall_loopback_in == NULL) {
392 		cmn_err(CE_NOTE, "ipv6_hook_init: "
393 		    "net_register_event failed for ipv6/loopback_in");
394 	}
395 
396 	HOOK_EVENT_INIT(&ip6_loopback_out_event, NH_LOOPBACK_OUT);
397 	ipv6firewall_loopback_out = net_register_event(ipv6,
398 	    &ip6_loopback_out_event);
399 	if (ipv6firewall_loopback_out == NULL) {
400 		cmn_err(CE_NOTE, "ipv6_hook_init: "
401 		    "net_register_event failed for ipv6/loopback_out");
402 	}
403 
404 	HOOK_EVENT_INIT(&ip6_nic_events, NH_NIC_EVENTS);
405 	ip6_nic_events.he_flags = HOOK_RDONLY;
406 	ipv6nicevents = net_register_event(ipv6, &ip6_nic_events);
407 	if (ipv6nicevents == NULL) {
408 		cmn_err(CE_NOTE, "ipv6_hook_init: "
409 		    "net_register_event failed for ipv6/nic_events");
410 	}
411 }
412 
413 
414 void
415 ipv6_hook_destroy()
416 {
417 	if (ipv6firewall_forwarding != NULL) {
418 		if (net_unregister_event(ipv6, &ip6_forwarding_event) == 0)
419 			ipv6firewall_forwarding = NULL;
420 	}
421 
422 	if (ipv6firewall_physical_in != NULL) {
423 		if (net_unregister_event(ipv6, &ip6_physical_in_event) == 0)
424 			ipv6firewall_physical_in = NULL;
425 	}
426 
427 	if (ipv6firewall_physical_out != NULL) {
428 		if (net_unregister_event(ipv6, &ip6_physical_out_event) == 0)
429 			ipv6firewall_physical_out = NULL;
430 	}
431 
432 	if (ipv6firewall_loopback_in != NULL) {
433 		if (net_unregister_event(ipv6, &ip6_loopback_in_event) == 0)
434 			ipv6firewall_loopback_in = NULL;
435 	}
436 
437 	if (ipv6firewall_loopback_out != NULL) {
438 		if (net_unregister_event(ipv6, &ip6_loopback_out_event) == 0)
439 			ipv6firewall_loopback_out = NULL;
440 	}
441 
442 	if (ipv6nicevents != NULL) {
443 		if (net_unregister_event(ipv6, &ip6_nic_events) == 0)
444 			ipv6nicevents = NULL;
445 	}
446 
447 	(void) net_unregister_family(ipv6, &ipv6root);
448 }
449 
450 
451 /*
452  * Determine the name of an IPv4 interface
453  */
454 static int
455 ip_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen)
456 {
457 
458 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE));
459 }
460 
461 
462 /*
463  * Determine the name of an IPv6 interface
464  */
465 static int
466 ipv6_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen)
467 {
468 
469 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE));
470 }
471 
472 
473 /*
474  * Shared implementation to determine the name of a given network interface
475  */
476 /* ARGSUSED */
477 static int
478 ip_getifname_impl(phy_if_t phy_ifdata,
479     char *buffer, const size_t buflen, boolean_t isv6)
480 {
481 	ill_t *ill;
482 
483 	ASSERT(buffer != NULL);
484 
485 	ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL,
486 	    NULL, NULL);
487 	if (ill == NULL)
488 		return (1);
489 
490 	if (ill->ill_name != NULL) {
491 		(void) strlcpy(buffer, ill->ill_name, buflen);
492 		ill_refrele(ill);
493 		return (0);
494 	} else {
495 		ill_refrele(ill);
496 		return (1);
497 	}
498 
499 }
500 
501 
502 /*
503  * Determine the MTU of an IPv4 network interface
504  */
505 static int
506 ip_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata)
507 {
508 
509 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE));
510 }
511 
512 
513 /*
514  * Determine the MTU of an IPv6 network interface
515  */
516 static int
517 ipv6_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata)
518 {
519 
520 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE));
521 }
522 
523 
524 /*
525  * Shared implementation to determine the MTU of a network interface
526  */
527 /* ARGSUSED */
528 static int
529 ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6)
530 {
531 	lif_if_t ipifid;
532 	ipif_t *ipif;
533 	int mtu;
534 
535 	ipifid = UNMAP_IPIF_ID(ifdata);
536 
537 	ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid, isv6);
538 	if (ipif == NULL)
539 		return (0);
540 
541 	mtu = ipif->ipif_mtu;
542 	ipif_refrele(ipif);
543 
544 	if (mtu == 0) {
545 		ill_t *ill;
546 
547 		if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6,
548 		    NULL, NULL, NULL, NULL)) == NULL) {
549 			return (0);
550 		}
551 		mtu = ill->ill_max_frag;
552 		ill_refrele(ill);
553 	}
554 
555 	return (mtu);
556 }
557 
558 
559 /*
560  * Determine if path MTU discovery is enabled for IP
561  */
562 static int
563 ip_getpmtuenabled(void)
564 {
565 
566 	return (ip_path_mtu_discovery);
567 }
568 
569 
570 /*
571  * Get next interface from the current list of IPv4 physical network interfaces
572  */
573 static phy_if_t
574 ip_phygetnext(phy_if_t phy_ifdata)
575 {
576 
577 	return (ill_get_next_ifindex(phy_ifdata, B_FALSE));
578 }
579 
580 
581 /*
582  * Get next interface from the current list of IPv6 physical network interfaces
583  */
584 static phy_if_t
585 ipv6_phygetnext(phy_if_t phy_ifdata)
586 {
587 
588 	return (ill_get_next_ifindex(phy_ifdata, B_TRUE));
589 }
590 
591 
592 /*
593  * Determine if a network interface name exists for IPv4
594  */
595 static phy_if_t
596 ip_phylookup(const char *name)
597 {
598 
599 	return (ip_phylookup_impl(name, B_FALSE));
600 
601 }
602 
603 
604 /*
605  * Determine if a network interface name exists for IPv6
606  */
607 static phy_if_t
608 ipv6_phylookup(const char *name)
609 {
610 
611 	return (ip_phylookup_impl(name, B_TRUE));
612 }
613 
614 
615 /*
616  * Implement looking up an ill_t based on the name supplied and matching
617  * it up with either IPv4 or IPv6.  ill_get_ifindex_by_name() is not used
618  * because it does not match on the address family in addition to the name.
619  */
620 static phy_if_t
621 ip_phylookup_impl(const char *name, boolean_t isv6)
622 {
623 	phy_if_t phy;
624 	ill_t *ill;
625 
626 	ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL,
627 	    NULL, NULL, NULL);
628 
629 	if (ill == NULL)
630 		return (0);
631 
632 	phy = ill->ill_phyint->phyint_ifindex;
633 
634 	ill_refrele(ill);
635 
636 	return (phy);
637 }
638 
639 
640 /*
641  * Get next interface from the current list of IPv4 logical network interfaces
642  */
643 static lif_if_t
644 ip_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata)
645 {
646 
647 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE));
648 }
649 
650 
651 /*
652  * Get next interface from the current list of IPv6 logical network interfaces
653  */
654 static lif_if_t
655 ipv6_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata)
656 {
657 
658 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE));
659 }
660 
661 
662 /*
663  * Shared implementation to get next interface from the current list of
664  * logical network interfaces
665  */
666 static lif_if_t
667 ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6)
668 {
669 	lif_if_t newidx, oldidx;
670 	boolean_t nextok;
671 	ipif_t *ipif;
672 	ill_t *ill;
673 
674 	ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL, NULL, NULL);
675 	if (ill == NULL)
676 		return (0);
677 
678 	if (ifdata != 0) {
679 		oldidx = UNMAP_IPIF_ID(ifdata);
680 		nextok = B_FALSE;
681 	} else {
682 		oldidx = 0;
683 		nextok = B_TRUE;
684 	}
685 
686 	mutex_enter(&ill->ill_lock);
687 	if (ill->ill_state_flags & ILL_CONDEMNED) {
688 		mutex_exit(&ill->ill_lock);
689 		ill_refrele(ill);
690 		return (0);
691 	}
692 
693 	/*
694 	 * It's safe to iterate the ill_ipif list when holding an ill_lock.
695 	 * And it's also safe to access ipif_id without ipif refhold.
696 	 * See ipif_get_id().
697 	 */
698 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
699 		if (!IPIF_CAN_LOOKUP(ipif))
700 			continue;
701 		if (nextok) {
702 			ipif_refhold_locked(ipif);
703 			break;
704 		} else if (oldidx == ipif->ipif_id) {
705 			nextok = B_TRUE;
706 		}
707 	}
708 
709 	mutex_exit(&ill->ill_lock);
710 	ill_refrele(ill);
711 
712 	if (ipif == NULL)
713 		return (0);
714 
715 	newidx = ipif->ipif_id;
716 	ipif_refrele(ipif);
717 
718 	return (MAP_IPIF_ID(newidx));
719 }
720 
721 
722 /*
723  * Inject an IPv4 packet to or from an interface
724  */
725 static int
726 ip_inject(inject_t style, net_inject_t *packet)
727 {
728 
729 	return (ip_inject_impl(style, packet, B_FALSE));
730 }
731 
732 
733 /*
734  * Inject an IPv6 packet to or from an interface
735  */
736 static int
737 ipv6_inject(inject_t style, net_inject_t *packet)
738 {
739 
740 	return (ip_inject_impl(style, packet, B_TRUE));
741 }
742 
743 
744 /*
745  * Shared implementation to inject a packet to or from an interface
746  * Return value:
747  *   0: successful
748  *  -1: memory allocation failed
749  *   1: other errors
750  */
751 static int
752 ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6)
753 {
754 	struct sockaddr_in6 *sin6;
755 	ddi_taskq_t *tq = NULL;
756 	void (* func)(void*);
757 	injection_t *inject;
758 	ip6_t *ip6h;
759 	ire_t *ire;
760 	mblk_t *mp;
761 
762 	ASSERT(packet != NULL);
763 	ASSERT(packet->ni_packet != NULL);
764 	ASSERT(packet->ni_packet->b_datap->db_type == M_DATA);
765 
766 	switch (style) {
767 	case NI_QUEUE_IN:
768 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
769 		if (inject == NULL)
770 			return (-1);
771 		inject->inj_data = *packet;
772 		inject->inj_isv6 = isv6;
773 		/*
774 		 * deliver up into the kernel, immitating its reception by a
775 		 * network interface, add to list and schedule timeout
776 		 */
777 		func = ip_ni_queue_in_func;
778 		tq = eventq_queue_in;
779 		break;
780 
781 	case NI_QUEUE_OUT:
782 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
783 		if (inject == NULL)
784 			return (-1);
785 		inject->inj_data = *packet;
786 		inject->inj_isv6 = isv6;
787 		/*
788 		 * deliver out of the kernel, as if it were being sent via a
789 		 * raw socket so that IPFilter will see it again, add to list
790 		 * and schedule timeout
791 		 */
792 		func = ip_ni_queue_out_func;
793 		tq = eventq_queue_out;
794 		break;
795 
796 	case NI_DIRECT_OUT:
797 		/*
798 		 * Note:
799 		 * For IPv4, the code path below will be greatly simplified
800 		 * with the delivery of surya - it will become a single
801 		 * function call to X.  A follow on project is aimed to
802 		 * provide similar functionality for IPv6.
803 		 */
804 		mp = packet->ni_packet;
805 
806 		if (!isv6) {
807 			struct sockaddr *sock;
808 
809 			sock = (struct sockaddr *)&packet->ni_addr;
810 			/*
811 			 * ipfil_sendpkt was provided by surya to ease the
812 			 * problems associated with sending out a packet.
813 			 * Currently this function only supports IPv4.
814 			 */
815 			switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
816 			    ALL_ZONES)) {
817 			case 0 :
818 			case EINPROGRESS:
819 				return (0);
820 			case ECOMM :
821 			case ENONET :
822 				return (1);
823 			default :
824 				return (1);
825 			}
826 			/* NOTREACHED */
827 
828 		}
829 
830 		ip6h = (ip6_t *)mp->b_rptr;
831 		sin6 = (struct sockaddr_in6 *)&packet->ni_addr;
832 		ASSERT(sin6->sin6_family == AF_INET6);
833 
834 		ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0,
835 		    NULL, NULL, ALL_ZONES, NULL,
836 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
837 
838 		if (ire == NULL) {
839 			ip2dbg(("ip_inject: ire_cache_lookup failed\n"));
840 			freemsg(mp);
841 			return (1);
842 		}
843 
844 		if (ire->ire_stq == NULL) {
845 			/* Send to loopback destination. */
846 			if (ire->ire_rfq == NULL) {
847 				ip2dbg(("ip_inject: bad nexthop\n"));
848 				ire_refrele(ire);
849 				freemsg(mp);
850 				return (1);
851 			}
852 			ip_wput_local_v6(ire->ire_rfq,
853 			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0);
854 			ire_refrele(ire);
855 			return (0);
856 		}
857 
858 		mp->b_queue = ire->ire_stq;
859 
860 		if (ire->ire_nce == NULL ||
861 		    ire->ire_nce->nce_fp_mp == NULL &&
862 		    ire->ire_nce->nce_res_mp == NULL) {
863 			ip_newroute_v6(ire->ire_stq, mp,
864 			    &sin6->sin6_addr, NULL, NULL, ALL_ZONES);
865 
866 			ire_refrele(ire);
867 			return (0);
868 		} else {
869 			/* prepend L2 header for IPv6 packets. */
870 			mblk_t *llmp;
871 
872 			/*
873 			 * Lock IREs, see 6420438
874 			 */
875 			mutex_enter(&ire->ire_lock);
876 			llmp = ire->ire_nce->nce_fp_mp ?
877 			    ire->ire_nce->nce_fp_mp :
878 			    ire->ire_nce->nce_res_mp;
879 
880 			if ((mp = dupb(llmp)) == NULL &&
881 			    (mp = copyb(llmp)) == NULL) {
882 				ip2dbg(("ip_inject: llhdr failed\n"));
883 				mutex_exit(&ire->ire_lock);
884 				ire_refrele(ire);
885 				freemsg(mp);
886 				return (1);
887 			}
888 			mutex_exit(&ire->ire_lock);
889 			linkb(mp, packet->ni_packet);
890 		}
891 
892 		mp->b_queue = ire->ire_stq;
893 
894 		break;
895 	default:
896 		freemsg(packet->ni_packet);
897 		return (1);
898 	}
899 
900 	if (tq) {
901 		if (ddi_taskq_dispatch(tq, func, (void *)inject,
902 		    DDI_SLEEP) == DDI_FAILURE) {
903 			ip2dbg(("ip_inject:  ddi_taskq_dispatch failed\n"));
904 			freemsg(packet->ni_packet);
905 			return (1);
906 		}
907 	} else {
908 		putnext(ire->ire_stq, mp);
909 		ire_refrele(ire);
910 	}
911 
912 	return (0);
913 }
914 
915 
916 /*
917  * Find the interface used for traffic to a given IPv4 address
918  */
919 static phy_if_t
920 ip_routeto(struct sockaddr *address)
921 {
922 
923 	ASSERT(address != NULL);
924 
925 	if (address->sa_family != AF_INET)
926 		return (0);
927 	return (ip_routeto_impl(address));
928 }
929 
930 
931 /*
932  * Find the interface used for traffic to a given IPv6 address
933  */
934 static phy_if_t
935 ipv6_routeto(struct sockaddr *address)
936 {
937 
938 	ASSERT(address != NULL);
939 
940 	if (address->sa_family != AF_INET6)
941 		return (0);
942 	return (ip_routeto_impl(address));
943 }
944 
945 
946 /*
947  * Find the interface used for traffic to an address
948  */
949 static phy_if_t
950 ip_routeto_impl(struct sockaddr *address)
951 {
952 	ire_t *ire;
953 	ill_t *ill;
954 	phy_if_t phy_if;
955 
956 	if (address->sa_family == AF_INET6) {
957 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)address;
958 		ire = ire_route_lookup_v6(&sin6->sin6_addr, NULL,
959 		    0, 0, NULL, NULL, ALL_ZONES, NULL,
960 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
961 	} else {
962 		struct sockaddr_in *sin = (struct sockaddr_in *)address;
963 		ire = ire_route_lookup(sin->sin_addr.s_addr, 0,
964 		    0, 0, NULL, NULL, ALL_ZONES, NULL,
965 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
966 	}
967 
968 	if (ire == NULL)
969 		return (0);
970 
971 	ill = ire_to_ill(ire);
972 	if (ill == NULL)
973 		return (0);
974 
975 	ASSERT(ill != NULL);
976 	phy_if = (phy_if_t)ill->ill_phyint->phyint_ifindex;
977 	ire_refrele(ire);
978 
979 	return (phy_if);
980 }
981 
982 
983 /*
984  * Determine if checksumming is being used for the given packet.
985  *
986  * Return value:
987  *   NET_HCK_NONE: full checksum recalculation is required
988  *   NET_HCK_L3_FULL: full layer 3 checksum
989  *   NET_HCK_L4_FULL: full layer 4 checksum
990  *   NET_HCK_L4_PART: partial layer 4 checksum
991  */
992 static int
993 ip_ispartialchecksum(mblk_t *mp)
994 {
995 	int ret = 0;
996 
997 	ASSERT(mp != NULL);
998 
999 	if ((DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) != 0) {
1000 		ret |= (int)NET_HCK_L4_FULL;
1001 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
1002 			ret |= (int)NET_HCK_L3_FULL;
1003 	}
1004 	if ((DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM) != 0) {
1005 		ret |= (int)NET_HCK_L4_PART;
1006 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
1007 			ret |= (int)NET_HCK_L3_FULL;
1008 	}
1009 
1010 	return (ret);
1011 }
1012 
1013 
1014 /*
1015  * Return true or false, indicating whether the network and transport
1016  * headers are correct.  Use the capabilities flags and flags set in the
1017  * dblk_t to determine whether or not the checksum is valid.
1018  *
1019  * Return:
1020  *   0: the checksum was incorrect
1021  *   1: the original checksum was correct
1022  */
1023 static int
1024 ip_isvalidchecksum(mblk_t *mp)
1025 {
1026 	unsigned char *wptr;
1027 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
1028 	int hlen;
1029 	int ret;
1030 
1031 	ASSERT(mp != NULL);
1032 
1033 	if (dohwcksum &&
1034 	    DB_CKSUM16(mp) != 0xFFFF &&
1035 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) &&
1036 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) &&
1037 	    (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM))
1038 		return (1);
1039 
1040 	hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2;
1041 
1042 	/*
1043 	 * Check that the mblk being passed in has enough data in it
1044 	 * before blindly checking ip_cksum.
1045 	 */
1046 	if (msgdsize(mp) < hlen)
1047 		return (0);
1048 
1049 	if (mp->b_wptr < mp->b_rptr + hlen) {
1050 		if (pullupmsg(mp, hlen) == 0)
1051 			return (0);
1052 		wptr = mp->b_wptr;
1053 	} else {
1054 		wptr = mp->b_wptr;
1055 		mp->b_wptr = mp->b_rptr + hlen;
1056 	}
1057 
1058 	if (ipha->ipha_hdr_checksum == ip_cksum(mp, 0, ipha->ipha_hdr_checksum))
1059 		ret = 1;
1060 	else
1061 		ret = 0;
1062 	mp->b_wptr = wptr;
1063 
1064 	return (ret);
1065 }
1066 
1067 
1068 /*
1069  * Unsupported with IPv6
1070  */
1071 /*ARGSUSED*/
1072 static int
1073 ipv6_isvalidchecksum(mblk_t *mp)
1074 {
1075 
1076 	return (-1);
1077 }
1078 
1079 /*
1080  * Determine the network addresses for an IPv4 interface
1081  */
1082 static int
1083 ip_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem,
1084 	net_ifaddr_t type[], void *storage)
1085 {
1086 
1087 	return (ip_getlifaddr_impl(AF_INET, phy_ifdata, ifdata,
1088 	    nelem, type, storage));
1089 }
1090 
1091 
1092 /*
1093  * Determine the network addresses for an IPv6 interface
1094  */
1095 static int
1096 ipv6_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem,
1097 		net_ifaddr_t type[], void *storage)
1098 {
1099 
1100 	return (ip_getlifaddr_impl(AF_INET6, phy_ifdata, ifdata,
1101 	    nelem, type, storage));
1102 }
1103 
1104 
1105 /*
1106  * Shared implementation to determine the network addresses for an interface
1107  */
1108 /* ARGSUSED */
1109 static int
1110 ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata,
1111     lif_if_t ifdata, size_t nelem, net_ifaddr_t type[],
1112     struct sockaddr *storage)
1113 {
1114 	struct sockaddr_in6 *sin6;
1115 	struct sockaddr_in *sin;
1116 	lif_if_t ipifid;
1117 	ipif_t *ipif;
1118 	int i;
1119 
1120 	ASSERT(type != NULL);
1121 	ASSERT(storage != NULL);
1122 
1123 	ipifid = UNMAP_IPIF_ID(ifdata);
1124 
1125 	if (family == AF_INET) {
1126 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1127 		    (uint_t)ipifid, B_FALSE)) == NULL)
1128 			return (1);
1129 
1130 		sin = (struct sockaddr_in *)storage;
1131 		for (i = 0; i < nelem; i++, sin++) {
1132 			if (ip_getifaddr_type(AF_INET, ipif, type[i],
1133 			    &sin->sin_addr) < 0) {
1134 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1135 				    type[i]));
1136 				ipif_refrele(ipif);
1137 				return (1);
1138 			}
1139 		}
1140 	} else {
1141 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1142 		    (uint_t)ipifid, B_TRUE)) == NULL)
1143 			return (1);
1144 
1145 		sin6 = (struct sockaddr_in6 *)storage;
1146 		for (i = 0; i < nelem; i++, sin6++) {
1147 			if (ip_getifaddr_type(AF_INET6, ipif, type[i],
1148 			    &sin6->sin6_addr) < 0) {
1149 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1150 				    type[i]));
1151 				ipif_refrele(ipif);
1152 				return (1);
1153 			}
1154 		}
1155 	}
1156 	ipif_refrele(ipif);
1157 	return (0);
1158 }
1159 
1160 /*
1161  * ip_getlifaddr private function
1162  */
1163 static int
1164 ip_getifaddr_type(sa_family_t family, ipif_t *ill_ipif,
1165     lif_if_t type, void *storage)
1166 {
1167 	void *src_addr;
1168 	int mem_size;
1169 
1170 	ASSERT(ill_ipif != NULL);
1171 	ASSERT(storage != NULL);
1172 
1173 	if (family == AF_INET) {
1174 		mem_size = sizeof (struct in_addr);
1175 
1176 		switch (type) {
1177 		case NA_ADDRESS:
1178 			src_addr = &(ill_ipif->ipif_lcl_addr);
1179 			break;
1180 		case NA_PEER:
1181 			src_addr = &(ill_ipif->ipif_pp_dst_addr);
1182 			break;
1183 		case NA_BROADCAST:
1184 			src_addr = &(ill_ipif->ipif_brd_addr);
1185 			break;
1186 		case NA_NETMASK:
1187 			src_addr = &(ill_ipif->ipif_net_mask);
1188 			break;
1189 		default:
1190 			return (-1);
1191 			/*NOTREACHED*/
1192 		}
1193 	} else {
1194 		mem_size = sizeof (struct in6_addr);
1195 
1196 		switch (type) {
1197 		case NA_ADDRESS:
1198 			src_addr = &(ill_ipif->ipif_v6lcl_addr);
1199 			break;
1200 		case NA_PEER:
1201 			src_addr = &(ill_ipif->ipif_v6pp_dst_addr);
1202 			break;
1203 		case NA_BROADCAST:
1204 			src_addr = &(ill_ipif->ipif_v6brd_addr);
1205 			break;
1206 		case NA_NETMASK:
1207 			src_addr = &(ill_ipif->ipif_v6net_mask);
1208 			break;
1209 		default:
1210 			return (-1);
1211 			/*NOTREACHED*/
1212 		}
1213 	}
1214 
1215 	(void) memcpy(storage, src_addr, mem_size);
1216 	return (1);
1217 }
1218 
1219 
1220 /*
1221  * Deliver packet up into the kernel, immitating its reception by a
1222  * network interface.
1223  */
1224 static void
1225 ip_ni_queue_in_func(void *inject)
1226 {
1227 
1228 	ip_ni_queue_func_impl(inject, B_FALSE);
1229 }
1230 
1231 
1232 /*
1233  * Deliver out of the kernel, as if it were being sent via a
1234  * raw socket so that IPFilter will see it again.
1235  */
1236 static void
1237 ip_ni_queue_out_func(void *inject)
1238 {
1239 
1240 	ip_ni_queue_func_impl(inject, B_TRUE);
1241 }
1242 
1243 
1244 /*
1245  * Shared implementation for inject via ip_output and ip_input
1246  */
1247 static void
1248 ip_ni_queue_func_impl(injection_t *inject,  boolean_t out)
1249 {
1250 	net_inject_t *packet;
1251 	conn_t *conn;
1252 	ill_t *ill;
1253 
1254 	ASSERT(inject != NULL);
1255 	packet = &inject->inj_data;
1256 	ASSERT(packet->ni_packet != NULL);
1257 
1258 	if ((ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical,
1259 	    B_FALSE, NULL, NULL, NULL, NULL)) == NULL) {
1260 		kmem_free(inject, sizeof (*inject));
1261 		return;
1262 	}
1263 
1264 	if (out == 0) {
1265 		if (inject->inj_isv6) {
1266 			ip_rput_v6(ill->ill_rq, packet->ni_packet);
1267 		} else {
1268 			ip_input(ill, NULL, packet->ni_packet, 0);
1269 		}
1270 		kmem_free(inject, sizeof (*inject));
1271 		ill_refrele(ill);
1272 		return;
1273 	}
1274 
1275 	/*
1276 	 * Even though ipcl_conn_create requests that it be passed
1277 	 * a different value for "TCP", in this case there may not
1278 	 * be a TCP connection backing the packet and more than
1279 	 * likely, non-TCP packets will go here too.
1280 	 */
1281 	conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP);
1282 	if (conn != NULL) {
1283 		if (inject->inj_isv6) {
1284 			conn->conn_flags |= IPCL_ISV6;
1285 			conn->conn_af_isv6 = B_TRUE;
1286 			conn->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT;
1287 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1288 			ip_output_v6(conn, packet->ni_packet, ill->ill_wq,
1289 				IP_WPUT);
1290 		} else {
1291 			conn->conn_af_isv6 = B_FALSE;
1292 			conn->conn_pkt_isv6 = B_FALSE;
1293 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1294 			ip_output(conn, packet->ni_packet, ill->ill_wq,
1295 				IP_WPUT);
1296 		}
1297 
1298 		CONN_DEC_REF(conn);
1299 	}
1300 
1301 	kmem_free(inject, sizeof (*inject));
1302 	ill_refrele(ill);
1303 }
1304 
1305 /*
1306  * taskq function for nic events.
1307  */
1308 void
1309 ip_ne_queue_func(void *arg)
1310 {
1311 
1312 	hook_event_int_t *hr;
1313 	hook_nic_event_t *info = (hook_nic_event_t *)arg;
1314 
1315 	hr = (info->hne_family == ipv6) ? ipv6nicevents : ipv4nicevents;
1316 	(void) hook_run(hr, (hook_data_t)info);
1317 
1318 	if (info->hne_data != NULL)
1319 		kmem_free(info->hne_data, info->hne_datalen);
1320 	kmem_free(arg, sizeof (hook_nic_event_t));
1321 }
1322