xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_fil_solaris.c (revision 2e0fe3efe5f9d579d4e44b3532d8e342c68b40ca)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7  */
8 
9 #if !defined(lint)
10 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
12 #endif
13 
14 #include <sys/types.h>
15 #include <sys/errno.h>
16 #include <sys/param.h>
17 #include <sys/cpuvar.h>
18 #include <sys/open.h>
19 #include <sys/ioctl.h>
20 #include <sys/filio.h>
21 #include <sys/systm.h>
22 #include <sys/strsubr.h>
23 #include <sys/cred.h>
24 #include <sys/ddi.h>
25 #include <sys/sunddi.h>
26 #include <sys/ksynch.h>
27 #include <sys/kmem.h>
28 #include <sys/mkdev.h>
29 #include <sys/protosw.h>
30 #include <sys/socket.h>
31 #include <sys/dditypes.h>
32 #include <sys/cmn_err.h>
33 #include <sys/zone.h>
34 #include <net/if.h>
35 #include <net/af.h>
36 #include <net/route.h>
37 #include <netinet/in.h>
38 #include <netinet/in_systm.h>
39 #include <netinet/ip.h>
40 #include <netinet/ip_var.h>
41 #include <netinet/tcp.h>
42 #include <netinet/udp.h>
43 #include <netinet/tcpip.h>
44 #include <netinet/ip_icmp.h>
45 #include "netinet/ip_compat.h"
46 #ifdef	USE_INET6
47 # include <netinet/icmp6.h>
48 #endif
49 #include "netinet/ip_fil.h"
50 #include "netinet/ip_nat.h"
51 #include "netinet/ip_frag.h"
52 #include "netinet/ip_state.h"
53 #include "netinet/ip_auth.h"
54 #include "netinet/ip_proxy.h"
55 #include "netinet/ipf_stack.h"
56 #ifdef	IPFILTER_LOOKUP
57 # include "netinet/ip_lookup.h"
58 #endif
59 #include <inet/ip_ire.h>
60 
61 #include <sys/md5.h>
62 #include <sys/neti.h>
63 
64 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
65 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
66 static	int	fr_enableipf __P((ipf_stack_t *, int));
67 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
68 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
69 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
70 static	int	ipf_hook __P((hook_data_t, int, int, void *));
71 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
72 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
73 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
74     void *));
75 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
76 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
77 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
78 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
79 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
80     void *));
81 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
82     void *));
83 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
84 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
85 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
86 
87 #if SOLARIS2 < 10
88 #if SOLARIS2 >= 7
89 u_int		*ip_ttl_ptr = NULL;
90 u_int		*ip_mtudisc = NULL;
91 # if SOLARIS2 >= 8
92 int		*ip_forwarding = NULL;
93 u_int		*ip6_forwarding = NULL;
94 # else
95 u_int		*ip_forwarding = NULL;
96 # endif
97 #else
98 u_long		*ip_ttl_ptr = NULL;
99 u_long		*ip_mtudisc = NULL;
100 u_long		*ip_forwarding = NULL;
101 #endif
102 #endif
103 
104 
105 /* ------------------------------------------------------------------------ */
106 /* Function:    ipldetach                                                   */
107 /* Returns:     int - 0 == success, else error.                             */
108 /* Parameters:  Nil                                                         */
109 /*                                                                          */
110 /* This function is responsible for undoing anything that might have been   */
111 /* done in a call to iplattach().  It must be able to clean up from a call  */
112 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
113 /* configures a table to be so large that we cannot allocate enough memory  */
114 /* for it.                                                                  */
115 /* ------------------------------------------------------------------------ */
116 int ipldetach(ifs)
117 ipf_stack_t *ifs;
118 {
119 
120 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
121 
122 #if SOLARIS2 < 10
123 
124 	if (ifs->ifs_fr_control_forwarding & 2) {
125 		if (ip_forwarding != NULL)
126 			*ip_forwarding = 0;
127 #if SOLARIS2 >= 8
128 		if (ip6_forwarding != NULL)
129 			*ip6_forwarding = 0;
130 #endif
131 	}
132 #endif
133 
134 	/*
135 	 * This lock needs to be dropped around the net_hook_unregister calls
136 	 * because we can deadlock here with:
137 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
138 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
139 	 */
140 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
141 
142 #define	UNDO_HOOK(_f, _b, _e, _h)					\
143 	do {								\
144 		if (ifs->_f != NULL) {					\
145 			if (ifs->_b) {					\
146 				ifs->_b = (net_hook_unregister(ifs->_f,	\
147 					   _e, ifs->_h) != 0);		\
148 				if (!ifs->_b) {				\
149 					hook_free(ifs->_h);		\
150 					ifs->_h = NULL;			\
151 				}					\
152 			} else if (ifs->_h != NULL) {			\
153 				hook_free(ifs->_h);			\
154 				ifs->_h = NULL;				\
155 			}						\
156 		}							\
157 		_NOTE(CONSTCOND)					\
158 	} while (0)
159 
160 	/*
161 	 * Remove IPv6 Hooks
162 	 */
163 	if (ifs->ifs_ipf_ipv6 != NULL) {
164 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
165 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
166 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
167 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
168 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
169 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
170 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
171 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
172 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
173 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
174 
175 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
176 			goto detach_failed;
177 		ifs->ifs_ipf_ipv6 = NULL;
178         }
179 
180 	/*
181 	 * Remove IPv4 Hooks
182 	 */
183 	if (ifs->ifs_ipf_ipv4 != NULL) {
184 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
185 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
186 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
187 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
188 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
189 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
190 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
191 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
192 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
193 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
194 
195 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
196 			goto detach_failed;
197 		ifs->ifs_ipf_ipv4 = NULL;
198 	}
199 
200 #undef UNDO_HOOK
201 
202 #ifdef	IPFDEBUG
203 	cmn_err(CE_CONT, "ipldetach()\n");
204 #endif
205 
206 	WRITE_ENTER(&ifs->ifs_ipf_global);
207 	fr_deinitialise(ifs);
208 
209 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
210 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
211 
212 	if (ifs->ifs_ipf_locks_done == 1) {
213 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
214 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
215 		RW_DESTROY(&ifs->ifs_ipf_tokens);
216 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
217 		ifs->ifs_ipf_locks_done = 0;
218 	}
219 
220 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
221 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
222 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
223 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
224 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
225 		return -1;
226 
227 	return 0;
228 
229 detach_failed:
230 	WRITE_ENTER(&ifs->ifs_ipf_global);
231 	return -1;
232 }
233 
234 int iplattach(ifs)
235 ipf_stack_t *ifs;
236 {
237 #if SOLARIS2 < 10
238 	int i;
239 #endif
240 	netid_t id = ifs->ifs_netid;
241 
242 #ifdef	IPFDEBUG
243 	cmn_err(CE_CONT, "iplattach()\n");
244 #endif
245 
246 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
247 	ifs->ifs_fr_flags = IPF_LOGGING;
248 #ifdef _KERNEL
249 	ifs->ifs_fr_update_ipid = 0;
250 #else
251 	ifs->ifs_fr_update_ipid = 1;
252 #endif
253 	ifs->ifs_fr_minttl = 4;
254 	ifs->ifs_fr_icmpminfragmtu = 68;
255 #if defined(IPFILTER_DEFAULT_BLOCK)
256 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
257 #else
258 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
259 #endif
260 
261 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
262 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
263 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
264 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
265 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
266 	ifs->ifs_ipf_locks_done = 1;
267 
268 	if (fr_initialise(ifs) < 0)
269 		return -1;
270 
271 	HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
272 		  "ipfilter_hook4_nicevents", ifs);
273 	HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
274 		  "ipfilter_hook4_in", ifs);
275 	HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
276 		  "ipfilter_hook4_out", ifs);
277 	HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
278 		  "ipfilter_hook4_loop_in", ifs);
279 	HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
280 		  "ipfilter_hook4_loop_out", ifs);
281 
282 	/*
283 	 * If we hold this lock over all of the net_hook_register calls, we
284 	 * can cause a deadlock to occur with the following lock ordering:
285 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
286 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
287 	 */
288 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
289 
290 	/*
291 	 * Add IPv4 hooks
292 	 */
293 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
294 	if (ifs->ifs_ipf_ipv4 == NULL)
295 		goto hookup_failed;
296 
297 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
298 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
299 	if (!ifs->ifs_hook4_nic_events)
300 		goto hookup_failed;
301 
302 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
303 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
304 	if (!ifs->ifs_hook4_physical_in)
305 		goto hookup_failed;
306 
307 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
308 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
309 	if (!ifs->ifs_hook4_physical_out)
310 		goto hookup_failed;
311 
312 	if (ifs->ifs_ipf_loopback) {
313 		ifs->ifs_hook4_loopback_in = (net_hook_register(
314 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
315 		    ifs->ifs_ipfhook4_loop_in) == 0);
316 		if (!ifs->ifs_hook4_loopback_in)
317 			goto hookup_failed;
318 
319 		ifs->ifs_hook4_loopback_out = (net_hook_register(
320 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
321 		    ifs->ifs_ipfhook4_loop_out) == 0);
322 		if (!ifs->ifs_hook4_loopback_out)
323 			goto hookup_failed;
324 	}
325 	/*
326 	 * Add IPv6 hooks
327 	 */
328 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
329 	if (ifs->ifs_ipf_ipv6 == NULL)
330 		goto hookup_failed;
331 
332 	HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
333 		  "ipfilter_hook6_nicevents", ifs);
334 	HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
335 		  "ipfilter_hook6_in", ifs);
336 	HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
337 		  "ipfilter_hook6_out", ifs);
338 	HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
339 		  "ipfilter_hook6_loop_in", ifs);
340 	HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
341 		  "ipfilter_hook6_loop_out", ifs);
342 
343 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
344 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
345 	if (!ifs->ifs_hook6_nic_events)
346 		goto hookup_failed;
347 
348 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
349 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
350 	if (!ifs->ifs_hook6_physical_in)
351 		goto hookup_failed;
352 
353 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
354 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
355 	if (!ifs->ifs_hook6_physical_out)
356 		goto hookup_failed;
357 
358 	if (ifs->ifs_ipf_loopback) {
359 		ifs->ifs_hook6_loopback_in = (net_hook_register(
360 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
361 		    ifs->ifs_ipfhook6_loop_in) == 0);
362 		if (!ifs->ifs_hook6_loopback_in)
363 			goto hookup_failed;
364 
365 		ifs->ifs_hook6_loopback_out = (net_hook_register(
366 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
367 		    ifs->ifs_ipfhook6_loop_out) == 0);
368 		if (!ifs->ifs_hook6_loopback_out)
369 			goto hookup_failed;
370 	}
371 
372 	/*
373 	 * Reacquire ipf_global, now it is safe.
374 	 */
375 	WRITE_ENTER(&ifs->ifs_ipf_global);
376 
377 /* Do not use private interface ip_params_arr[] in Solaris 10 */
378 #if SOLARIS2 < 10
379 
380 #if SOLARIS2 >= 8
381 	ip_forwarding = &ip_g_forward;
382 #endif
383 	/*
384 	 * XXX - There is no terminator for this array, so it is not possible
385 	 * to tell if what we are looking for is missing and go off the end
386 	 * of the array.
387 	 */
388 
389 #if SOLARIS2 <= 8
390 	for (i = 0; ; i++) {
391 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
392 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
393 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
394 			    "ip_path_mtu_discovery")) {
395 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
396 		}
397 #if SOLARIS2 < 8
398 		else if (!strcmp(ip_param_arr[i].ip_param_name,
399 			    "ip_forwarding")) {
400 			ip_forwarding = &ip_param_arr[i].ip_param_value;
401 		}
402 #else
403 		else if (!strcmp(ip_param_arr[i].ip_param_name,
404 			    "ip6_forwarding")) {
405 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
406 		}
407 #endif
408 
409 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
410 #if SOLARIS2 >= 8
411 		    ip6_forwarding != NULL &&
412 #endif
413 		    ip_forwarding != NULL)
414 			break;
415 	}
416 #endif
417 
418 	if (ifs->ifs_fr_control_forwarding & 1) {
419 		if (ip_forwarding != NULL)
420 			*ip_forwarding = 1;
421 #if SOLARIS2 >= 8
422 		if (ip6_forwarding != NULL)
423 			*ip6_forwarding = 1;
424 #endif
425 	}
426 
427 #endif
428 
429 	return 0;
430 hookup_failed:
431 	WRITE_ENTER(&ifs->ifs_ipf_global);
432 	return -1;
433 }
434 
435 static	int	fr_setipfloopback(set, ifs)
436 int set;
437 ipf_stack_t *ifs;
438 {
439 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
440 		return EFAULT;
441 
442 	if (set && !ifs->ifs_ipf_loopback) {
443 		ifs->ifs_ipf_loopback = 1;
444 
445 		ifs->ifs_hook4_loopback_in = (net_hook_register(
446 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
447 		    ifs->ifs_ipfhook4_loop_in) == 0);
448 		if (!ifs->ifs_hook4_loopback_in)
449 			return EINVAL;
450 
451 		ifs->ifs_hook4_loopback_out = (net_hook_register(
452 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
453 		    ifs->ifs_ipfhook4_loop_out) == 0);
454 		if (!ifs->ifs_hook4_loopback_out)
455 			return EINVAL;
456 
457 		ifs->ifs_hook6_loopback_in = (net_hook_register(
458 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
459 		    ifs->ifs_ipfhook6_loop_in) == 0);
460 		if (!ifs->ifs_hook6_loopback_in)
461 			return EINVAL;
462 
463 		ifs->ifs_hook6_loopback_out = (net_hook_register(
464 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
465 		    ifs->ifs_ipfhook6_loop_out) == 0);
466 		if (!ifs->ifs_hook6_loopback_out)
467 			return EINVAL;
468 
469 	} else if (!set && ifs->ifs_ipf_loopback) {
470 		ifs->ifs_ipf_loopback = 0;
471 
472 		ifs->ifs_hook4_loopback_in =
473 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
474 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
475 		if (ifs->ifs_hook4_loopback_in)
476 			return EBUSY;
477 
478 		ifs->ifs_hook4_loopback_out =
479 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
480 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
481 		if (ifs->ifs_hook4_loopback_out)
482 			return EBUSY;
483 
484 		ifs->ifs_hook6_loopback_in =
485 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
486 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
487 		if (ifs->ifs_hook6_loopback_in)
488 			return EBUSY;
489 
490 		ifs->ifs_hook6_loopback_out =
491 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
492 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
493 		if (ifs->ifs_hook6_loopback_out)
494 			return EBUSY;
495 	}
496 	return 0;
497 }
498 
499 
500 /*
501  * Filter ioctl interface.
502  */
503 /*ARGSUSED*/
504 int iplioctl(dev, cmd, data, mode, cp, rp)
505 dev_t dev;
506 int cmd;
507 #if SOLARIS2 >= 7
508 intptr_t data;
509 #else
510 int *data;
511 #endif
512 int mode;
513 cred_t *cp;
514 int *rp;
515 {
516 	int error = 0, tmp;
517 	friostat_t fio;
518 	minor_t unit;
519 	u_int enable;
520 	ipf_stack_t *ifs;
521 
522 #ifdef	IPFDEBUG
523 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
524 		dev, cmd, data, mode, cp, rp);
525 #endif
526 	unit = getminor(dev);
527 	if (IPL_LOGMAX < unit)
528 		return ENXIO;
529 
530         /*
531 	 * As we're calling ipf_find_stack in user space, from a given zone
532 	 * to find the stack pointer for this zone, there is no need to have
533 	 * a hold/refence count here.
534 	 */
535 	ifs = ipf_find_stack(crgetzoneid(cp));
536 	ASSERT(ifs != NULL);
537 
538 	if (ifs->ifs_fr_running <= 0) {
539 		if (unit != IPL_LOGIPF) {
540 			return EIO;
541 		}
542 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
543 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
544 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
545 			return EIO;
546 		}
547 	}
548 
549 	READ_ENTER(&ifs->ifs_ipf_global);
550 	if (ifs->ifs_fr_enable_active != 0) {
551 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
552 		return EBUSY;
553 	}
554 
555 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
556 			       curproc, ifs);
557 	if (error != -1) {
558 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
559 		return error;
560 	}
561 	error = 0;
562 
563 	switch (cmd)
564 	{
565 	case SIOCFRENB :
566 		if (!(mode & FWRITE))
567 			error = EPERM;
568 		else {
569 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
570 				       sizeof(enable));
571 			if (error != 0) {
572 				error = EFAULT;
573 				break;
574 			}
575 
576 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
577 			WRITE_ENTER(&ifs->ifs_ipf_global);
578 
579 			/*
580 			 * We must recheck fr_enable_active here, since we've
581 			 * dropped ifs_ipf_global from R in order to get it
582 			 * exclusively.
583 			 */
584 			if (ifs->ifs_fr_enable_active == 0) {
585 				ifs->ifs_fr_enable_active = 1;
586 				error = fr_enableipf(ifs, enable);
587 				ifs->ifs_fr_enable_active = 0;
588 			}
589 		}
590 		break;
591 	case SIOCIPFSET :
592 		if (!(mode & FWRITE)) {
593 			error = EPERM;
594 			break;
595 		}
596 		/* FALLTHRU */
597 	case SIOCIPFGETNEXT :
598 	case SIOCIPFGET :
599 		error = fr_ipftune(cmd, (void *)data, ifs);
600 		break;
601 	case SIOCSETFF :
602 		if (!(mode & FWRITE))
603 			error = EPERM;
604 		else {
605 			error = COPYIN((caddr_t)data,
606 				       (caddr_t)&ifs->ifs_fr_flags,
607 				       sizeof(ifs->ifs_fr_flags));
608 			if (error != 0)
609 				error = EFAULT;
610 		}
611 		break;
612 	case SIOCIPFLP :
613 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
614 			       sizeof(tmp));
615 		if (error != 0)
616 			error = EFAULT;
617 		else
618 			error = fr_setipfloopback(tmp, ifs);
619 		break;
620 	case SIOCGETFF :
621 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
622 				sizeof(ifs->ifs_fr_flags));
623 		if (error != 0)
624 			error = EFAULT;
625 		break;
626 	case SIOCFUNCL :
627 		error = fr_resolvefunc((void *)data);
628 		break;
629 	case SIOCINAFR :
630 	case SIOCRMAFR :
631 	case SIOCADAFR :
632 	case SIOCZRLST :
633 		if (!(mode & FWRITE))
634 			error = EPERM;
635 		else
636 			error = frrequest(unit, cmd, (caddr_t)data,
637 					  ifs->ifs_fr_active, 1, ifs);
638 		break;
639 	case SIOCINIFR :
640 	case SIOCRMIFR :
641 	case SIOCADIFR :
642 		if (!(mode & FWRITE))
643 			error = EPERM;
644 		else
645 			error = frrequest(unit, cmd, (caddr_t)data,
646 					  1 - ifs->ifs_fr_active, 1, ifs);
647 		break;
648 	case SIOCSWAPA :
649 		if (!(mode & FWRITE))
650 			error = EPERM;
651 		else {
652 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
653 			bzero((char *)ifs->ifs_frcache,
654 			    sizeof (ifs->ifs_frcache));
655 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
656 					(caddr_t)data,
657 					sizeof(ifs->ifs_fr_active));
658 			if (error != 0)
659 				error = EFAULT;
660 			else
661 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
662 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
663 		}
664 		break;
665 	case SIOCGETFS :
666 		fr_getstat(&fio, ifs);
667 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
668 		break;
669 	case SIOCFRZST :
670 		if (!(mode & FWRITE))
671 			error = EPERM;
672 		else
673 			error = fr_zerostats((caddr_t)data, ifs);
674 		break;
675 	case	SIOCIPFFL :
676 		if (!(mode & FWRITE))
677 			error = EPERM;
678 		else {
679 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
680 				       sizeof(tmp));
681 			if (!error) {
682 				tmp = frflush(unit, 4, tmp, ifs);
683 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
684 						sizeof(tmp));
685 				if (error != 0)
686 					error = EFAULT;
687 			} else
688 				error = EFAULT;
689 		}
690 		break;
691 #ifdef USE_INET6
692 	case	SIOCIPFL6 :
693 		if (!(mode & FWRITE))
694 			error = EPERM;
695 		else {
696 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
697 				       sizeof(tmp));
698 			if (!error) {
699 				tmp = frflush(unit, 6, tmp, ifs);
700 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
701 						sizeof(tmp));
702 				if (error != 0)
703 					error = EFAULT;
704 			} else
705 				error = EFAULT;
706 		}
707 		break;
708 #endif
709 	case SIOCSTLCK :
710 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
711 		if (error == 0) {
712 			ifs->ifs_fr_state_lock = tmp;
713 			ifs->ifs_fr_nat_lock = tmp;
714 			ifs->ifs_fr_frag_lock = tmp;
715 			ifs->ifs_fr_auth_lock = tmp;
716 		} else
717 			error = EFAULT;
718 	break;
719 #ifdef	IPFILTER_LOG
720 	case	SIOCIPFFB :
721 		if (!(mode & FWRITE))
722 			error = EPERM;
723 		else {
724 			tmp = ipflog_clear(unit, ifs);
725 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
726 				       sizeof(tmp));
727 			if (error)
728 				error = EFAULT;
729 		}
730 		break;
731 #endif /* IPFILTER_LOG */
732 	case SIOCFRSYN :
733 		if (!(mode & FWRITE))
734 			error = EPERM;
735 		else {
736 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
737 			WRITE_ENTER(&ifs->ifs_ipf_global);
738 
739 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
740 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
741 			fr_nataddrsync(0, NULL, NULL, ifs);
742 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
743 			error = 0;
744 		}
745 		break;
746 	case SIOCGFRST :
747 		error = fr_outobj((void *)data, fr_fragstats(ifs),
748 				  IPFOBJ_FRAGSTAT);
749 		break;
750 	case FIONREAD :
751 #ifdef	IPFILTER_LOG
752 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
753 
754 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
755 		if (error != 0)
756 			error = EFAULT;
757 #endif
758 		break;
759 	case SIOCIPFITER :
760 		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
761 				       curproc, ifs);
762 		break;
763 
764 	case SIOCGENITER :
765 		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
766 					curproc, ifs);
767 		break;
768 
769 	case SIOCIPFDELTOK :
770 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
771 		if (error != 0) {
772 			error = EFAULT;
773 		} else {
774 			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
775 		}
776 		break;
777 
778 	default :
779 #ifdef	IPFDEBUG
780 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
781 			cmd, (void *)data);
782 #endif
783 		error = EINVAL;
784 		break;
785 	}
786 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
787 	return error;
788 }
789 
790 
791 static int fr_enableipf(ifs, enable)
792 ipf_stack_t *ifs;
793 int enable;
794 {
795 	int error;
796 
797 	if (!enable) {
798 		error = ipldetach(ifs);
799 		if (error == 0)
800 			ifs->ifs_fr_running = -1;
801 		return error;
802 	}
803 
804 	if (ifs->ifs_fr_running > 0)
805 		return 0;
806 
807 	error = iplattach(ifs);
808 	if (error == 0) {
809 		if (ifs->ifs_fr_timer_id == NULL) {
810 			int hz = drv_usectohz(500000);
811 
812 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
813 						       (void *)ifs,
814 						       hz);
815 		}
816 		ifs->ifs_fr_running = 1;
817 	} else {
818 		(void) ipldetach(ifs);
819 	}
820 	return error;
821 }
822 
823 
824 phy_if_t get_unit(name, v, ifs)
825 char *name;
826 int v;
827 ipf_stack_t *ifs;
828 {
829 	net_handle_t nif;
830 
831   	if (v == 4)
832  		nif = ifs->ifs_ipf_ipv4;
833   	else if (v == 6)
834  		nif = ifs->ifs_ipf_ipv6;
835   	else
836  		return 0;
837 
838  	return (net_phylookup(nif, name));
839 }
840 
841 /*
842  * routines below for saving IP headers to buffer
843  */
844 /*ARGSUSED*/
845 int iplopen(devp, flags, otype, cred)
846 dev_t *devp;
847 int flags, otype;
848 cred_t *cred;
849 {
850 	minor_t min = getminor(*devp);
851 
852 #ifdef	IPFDEBUG
853 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
854 #endif
855 	if (!(otype & OTYP_CHR))
856 		return ENXIO;
857 
858 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
859 	return min;
860 }
861 
862 
863 /*ARGSUSED*/
864 int iplclose(dev, flags, otype, cred)
865 dev_t dev;
866 int flags, otype;
867 cred_t *cred;
868 {
869 	minor_t	min = getminor(dev);
870 
871 #ifdef	IPFDEBUG
872 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
873 #endif
874 
875 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
876 	return min;
877 }
878 
879 #ifdef	IPFILTER_LOG
880 /*
881  * iplread/ipllog
882  * both of these must operate with at least splnet() lest they be
883  * called during packet processing and cause an inconsistancy to appear in
884  * the filter lists.
885  */
886 /*ARGSUSED*/
887 int iplread(dev, uio, cp)
888 dev_t dev;
889 register struct uio *uio;
890 cred_t *cp;
891 {
892 	ipf_stack_t *ifs;
893 	int ret;
894 
895         /*
896 	 * As we're calling ipf_find_stack in user space, from a given zone
897 	 * to find the stack pointer for this zone, there is no need to have
898 	 * a hold/refence count here.
899 	 */
900 	ifs = ipf_find_stack(crgetzoneid(cp));
901 	ASSERT(ifs != NULL);
902 
903 # ifdef	IPFDEBUG
904 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
905 # endif
906 
907 	if (ifs->ifs_fr_running < 1) {
908 		return EIO;
909 	}
910 
911 # ifdef	IPFILTER_SYNC
912 	if (getminor(dev) == IPL_LOGSYNC) {
913 		return ipfsync_read(uio);
914 	}
915 # endif
916 
917 	ret = ipflog_read(getminor(dev), uio, ifs);
918 	return ret;
919 }
920 #endif /* IPFILTER_LOG */
921 
922 
923 /*
924  * iplread/ipllog
925  * both of these must operate with at least splnet() lest they be
926  * called during packet processing and cause an inconsistancy to appear in
927  * the filter lists.
928  */
929 int iplwrite(dev, uio, cp)
930 dev_t dev;
931 register struct uio *uio;
932 cred_t *cp;
933 {
934 	ipf_stack_t *ifs;
935 
936         /*
937 	 * As we're calling ipf_find_stack in user space, from a given zone
938 	 * to find the stack pointer for this zone, there is no need to have
939 	 * a hold/refence count here.
940 	 */
941 	ifs = ipf_find_stack(crgetzoneid(cp));
942 	ASSERT(ifs != NULL);
943 
944 #ifdef	IPFDEBUG
945 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
946 #endif
947 
948 	if (ifs->ifs_fr_running < 1) {
949 		return EIO;
950 	}
951 
952 #ifdef	IPFILTER_SYNC
953 	if (getminor(dev) == IPL_LOGSYNC)
954 		return ipfsync_write(uio);
955 #endif /* IPFILTER_SYNC */
956 	dev = dev;	/* LINT */
957 	uio = uio;	/* LINT */
958 	cp = cp;	/* LINT */
959 	return ENXIO;
960 }
961 
962 
963 /*
964  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
965  * requires a large amount of setting up and isn't any more efficient.
966  */
967 int fr_send_reset(fin)
968 fr_info_t *fin;
969 {
970 	tcphdr_t *tcp, *tcp2;
971 	int tlen, hlen;
972 	mblk_t *m;
973 #ifdef	USE_INET6
974 	ip6_t *ip6;
975 #endif
976 	ip_t *ip;
977 
978 	tcp = fin->fin_dp;
979 	if (tcp->th_flags & TH_RST)
980 		return -1;
981 
982 #ifndef	IPFILTER_CKSUM
983 	if (fr_checkl4sum(fin) == -1)
984 		return -1;
985 #endif
986 
987 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
988 #ifdef	USE_INET6
989 	if (fin->fin_v == 6)
990 		hlen = sizeof(ip6_t);
991 	else
992 #endif
993 		hlen = sizeof(ip_t);
994 	hlen += sizeof(*tcp2);
995 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
996 		return -1;
997 
998 	m->b_rptr += 64;
999 	MTYPE(m) = M_DATA;
1000 	m->b_wptr = m->b_rptr + hlen;
1001 	ip = (ip_t *)m->b_rptr;
1002 	bzero((char *)ip, hlen);
1003 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1004 	tcp2->th_dport = tcp->th_sport;
1005 	tcp2->th_sport = tcp->th_dport;
1006 	if (tcp->th_flags & TH_ACK) {
1007 		tcp2->th_seq = tcp->th_ack;
1008 		tcp2->th_flags = TH_RST;
1009 	} else {
1010 		tcp2->th_ack = ntohl(tcp->th_seq);
1011 		tcp2->th_ack += tlen;
1012 		tcp2->th_ack = htonl(tcp2->th_ack);
1013 		tcp2->th_flags = TH_RST|TH_ACK;
1014 	}
1015 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1016 
1017 	ip->ip_v = fin->fin_v;
1018 #ifdef	USE_INET6
1019 	if (fin->fin_v == 6) {
1020 		ip6 = (ip6_t *)m->b_rptr;
1021 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1022 		ip6->ip6_src = fin->fin_dst6.in6;
1023 		ip6->ip6_dst = fin->fin_src6.in6;
1024 		ip6->ip6_plen = htons(sizeof(*tcp));
1025 		ip6->ip6_nxt = IPPROTO_TCP;
1026 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1027 	} else
1028 #endif
1029 	{
1030 		ip->ip_src.s_addr = fin->fin_daddr;
1031 		ip->ip_dst.s_addr = fin->fin_saddr;
1032 		ip->ip_id = fr_nextipid(fin);
1033 		ip->ip_hl = sizeof(*ip) >> 2;
1034 		ip->ip_p = IPPROTO_TCP;
1035 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1036 		ip->ip_tos = fin->fin_ip->ip_tos;
1037 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1038 	}
1039 	return fr_send_ip(fin, m, &m);
1040 }
1041 
1042 /*
1043  * Function:	fr_send_ip
1044  * Returns:	 0: success
1045  *		-1: failed
1046  * Parameters:
1047  *	fin: packet information
1048  *	m: the message block where ip head starts
1049  *
1050  * Send a new packet through the IP stack.
1051  *
1052  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1053  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1054  * function).
1055  *
1056  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1057  * in by this function.
1058  *
1059  * All other portions of the packet must be in on-the-wire format.
1060  */
1061 /*ARGSUSED*/
1062 static int fr_send_ip(fin, m, mpp)
1063 fr_info_t *fin;
1064 mblk_t *m, **mpp;
1065 {
1066 	qpktinfo_t qpi, *qpip;
1067 	fr_info_t fnew;
1068 	ip_t *ip;
1069 	int i, hlen;
1070 	ipf_stack_t *ifs = fin->fin_ifs;
1071 
1072 	ip = (ip_t *)m->b_rptr;
1073 	bzero((char *)&fnew, sizeof(fnew));
1074 
1075 #ifdef	USE_INET6
1076 	if (fin->fin_v == 6) {
1077 		ip6_t *ip6;
1078 
1079 		ip6 = (ip6_t *)ip;
1080 		ip6->ip6_vfc = 0x60;
1081 		ip6->ip6_hlim = 127;
1082 		fnew.fin_v = 6;
1083 		hlen = sizeof(*ip6);
1084 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1085 	} else
1086 #endif
1087 	{
1088 		fnew.fin_v = 4;
1089 #if SOLARIS2 >= 10
1090 		ip->ip_ttl = 255;
1091 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1092 			ip->ip_off = htons(IP_DF);
1093 #else
1094 		if (ip_ttl_ptr != NULL)
1095 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1096 		else
1097 			ip->ip_ttl = 63;
1098 		if (ip_mtudisc != NULL)
1099 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1100 		else
1101 			ip->ip_off = htons(IP_DF);
1102 #endif
1103 		/*
1104 		 * The dance with byte order and ip_len/ip_off is because in
1105 		 * fr_fastroute, it expects them to be in host byte order but
1106 		 * ipf_cksum expects them to be in network byte order.
1107 		 */
1108 		ip->ip_len = htons(ip->ip_len);
1109 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1110 		ip->ip_len = ntohs(ip->ip_len);
1111 		ip->ip_off = ntohs(ip->ip_off);
1112 		hlen = sizeof(*ip);
1113 		fnew.fin_plen = ip->ip_len;
1114 	}
1115 
1116 	qpip = fin->fin_qpi;
1117 	qpi.qpi_off = 0;
1118 	qpi.qpi_ill = qpip->qpi_ill;
1119 	qpi.qpi_m = m;
1120 	qpi.qpi_data = ip;
1121 	fnew.fin_qpi = &qpi;
1122 	fnew.fin_ifp = fin->fin_ifp;
1123 	fnew.fin_flx = FI_NOCKSUM;
1124 	fnew.fin_m = m;
1125 	fnew.fin_qfm = m;
1126 	fnew.fin_ip = ip;
1127 	fnew.fin_mp = mpp;
1128 	fnew.fin_hlen = hlen;
1129 	fnew.fin_dp = (char *)ip + hlen;
1130 	fnew.fin_ifs = fin->fin_ifs;
1131 	(void) fr_makefrip(hlen, ip, &fnew);
1132 
1133 	i = fr_fastroute(m, mpp, &fnew, NULL);
1134 	return i;
1135 }
1136 
1137 
1138 int fr_send_icmp_err(type, fin, dst)
1139 int type;
1140 fr_info_t *fin;
1141 int dst;
1142 {
1143 	struct in_addr dst4;
1144 	struct icmp *icmp;
1145 	qpktinfo_t *qpi;
1146 	int hlen, code;
1147 	phy_if_t phy;
1148 	u_short sz;
1149 #ifdef	USE_INET6
1150 	mblk_t *mb;
1151 #endif
1152 	mblk_t *m;
1153 #ifdef	USE_INET6
1154 	ip6_t *ip6;
1155 #endif
1156 	ip_t *ip;
1157 	ipf_stack_t *ifs = fin->fin_ifs;
1158 
1159 	if ((type < 0) || (type > ICMP_MAXTYPE))
1160 		return -1;
1161 
1162 	code = fin->fin_icode;
1163 #ifdef USE_INET6
1164 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1165 		return -1;
1166 #endif
1167 
1168 #ifndef	IPFILTER_CKSUM
1169 	if (fr_checkl4sum(fin) == -1)
1170 		return -1;
1171 #endif
1172 
1173 	qpi = fin->fin_qpi;
1174 
1175 #ifdef	USE_INET6
1176 	mb = fin->fin_qfm;
1177 
1178 	if (fin->fin_v == 6) {
1179 		sz = sizeof(ip6_t);
1180 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1181 		hlen = sizeof(ip6_t);
1182 		type = icmptoicmp6types[type];
1183 		if (type == ICMP6_DST_UNREACH)
1184 			code = icmptoicmp6unreach[code];
1185 	} else
1186 #endif
1187 	{
1188 		if ((fin->fin_p == IPPROTO_ICMP) &&
1189 		    !(fin->fin_flx & FI_SHORT))
1190 			switch (ntohs(fin->fin_data[0]) >> 8)
1191 			{
1192 			case ICMP_ECHO :
1193 			case ICMP_TSTAMP :
1194 			case ICMP_IREQ :
1195 			case ICMP_MASKREQ :
1196 				break;
1197 			default :
1198 				return 0;
1199 			}
1200 
1201 		sz = sizeof(ip_t) * 2;
1202 		sz += 8;		/* 64 bits of data */
1203 		hlen = sizeof(ip_t);
1204 	}
1205 
1206 	sz += offsetof(struct icmp, icmp_ip);
1207 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1208 		return -1;
1209 	MTYPE(m) = M_DATA;
1210 	m->b_rptr += 64;
1211 	m->b_wptr = m->b_rptr + sz;
1212 	bzero((char *)m->b_rptr, (size_t)sz);
1213 	ip = (ip_t *)m->b_rptr;
1214 	ip->ip_v = fin->fin_v;
1215 	icmp = (struct icmp *)(m->b_rptr + hlen);
1216 	icmp->icmp_type = type & 0xff;
1217 	icmp->icmp_code = code & 0xff;
1218 	phy = (phy_if_t)qpi->qpi_ill;
1219 	if (type == ICMP_UNREACH && (phy != 0) &&
1220 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1221 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1222 
1223 #ifdef	USE_INET6
1224 	if (fin->fin_v == 6) {
1225 		struct in6_addr dst6;
1226 		int csz;
1227 
1228 		if (dst == 0) {
1229 			ipf_stack_t *ifs = fin->fin_ifs;
1230 
1231 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1232 				       (void *)&dst6, NULL, ifs) == -1) {
1233 				FREE_MB_T(m);
1234 				return -1;
1235 			}
1236 		} else
1237 			dst6 = fin->fin_dst6.in6;
1238 
1239 		csz = sz;
1240 		sz -= sizeof(ip6_t);
1241 		ip6 = (ip6_t *)m->b_rptr;
1242 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1243 		ip6->ip6_plen = htons((u_short)sz);
1244 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1245 		ip6->ip6_src = dst6;
1246 		ip6->ip6_dst = fin->fin_src6.in6;
1247 		sz -= offsetof(struct icmp, icmp_ip);
1248 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1249 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1250 	} else
1251 #endif
1252 	{
1253 		ip->ip_hl = sizeof(*ip) >> 2;
1254 		ip->ip_p = IPPROTO_ICMP;
1255 		ip->ip_id = fin->fin_ip->ip_id;
1256 		ip->ip_tos = fin->fin_ip->ip_tos;
1257 		ip->ip_len = (u_short)sz;
1258 		if (dst == 0) {
1259 			ipf_stack_t *ifs = fin->fin_ifs;
1260 
1261 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1262 				       (void *)&dst4, NULL, ifs) == -1) {
1263 				FREE_MB_T(m);
1264 				return -1;
1265 			}
1266 		} else {
1267 			dst4 = fin->fin_dst;
1268 		}
1269 		ip->ip_src = dst4;
1270 		ip->ip_dst = fin->fin_src;
1271 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1272 		      sizeof(*fin->fin_ip));
1273 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1274 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1275 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1276 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1277 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1278 					     sz - sizeof(ip_t));
1279 	}
1280 
1281 	/*
1282 	 * Need to exit out of these so we don't recursively call rw_enter
1283 	 * from fr_qout.
1284 	 */
1285 	return fr_send_ip(fin, m, &m);
1286 }
1287 
1288 #include <sys/time.h>
1289 #include <sys/varargs.h>
1290 
1291 #ifndef _KERNEL
1292 #include <stdio.h>
1293 #endif
1294 
1295 #define	NULLADDR_RATE_LIMIT 10	/* 10 seconds */
1296 
1297 
1298 /*
1299  * Print out warning message at rate-limited speed.
1300  */
1301 static void rate_limit_message(ipf_stack_t *ifs,
1302 			       int rate, const char *message, ...)
1303 {
1304 	static time_t last_time = 0;
1305 	time_t now;
1306 	va_list args;
1307 	char msg_buf[256];
1308 	int  need_printed = 0;
1309 
1310 	now = ddi_get_time();
1311 
1312 	/* make sure, no multiple entries */
1313 	ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
1314 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1315 	if (now - last_time >= rate) {
1316 		need_printed = 1;
1317 		last_time = now;
1318 	}
1319 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1320 
1321 	if (need_printed) {
1322 		va_start(args, message);
1323 		(void)vsnprintf(msg_buf, 255, message, args);
1324 		va_end(args);
1325 #ifdef _KERNEL
1326 		cmn_err(CE_WARN, msg_buf);
1327 #else
1328 		fprintf(std_err, msg_buf);
1329 #endif
1330 	}
1331 }
1332 
1333 /*
1334  * Return the first IP Address associated with an interface
1335  * For IPv6, we walk through the list of logical interfaces and return
1336  * the address of the first one that isn't a link-local interface.
1337  * We can't assume that it is :1 because another link-local address
1338  * may have been assigned there.
1339  */
1340 /*ARGSUSED*/
1341 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1342 int v, atype;
1343 void *ifptr;
1344 struct in_addr  *inp, *inpmask;
1345 ipf_stack_t *ifs;
1346 {
1347 	struct sockaddr_in6 v6addr[2];
1348 	struct sockaddr_in v4addr[2];
1349 	net_ifaddr_t type[2];
1350 	net_handle_t net_data;
1351 	phy_if_t phyif;
1352 	void *array;
1353 
1354 	switch (v)
1355 	{
1356 	case 4:
1357 		net_data = ifs->ifs_ipf_ipv4;
1358 		array = v4addr;
1359 		break;
1360 	case 6:
1361 		net_data = ifs->ifs_ipf_ipv6;
1362 		array = v6addr;
1363 		break;
1364 	default:
1365 		net_data = NULL;
1366 		break;
1367 	}
1368 
1369 	if (net_data == NULL)
1370 		return -1;
1371 
1372 	phyif = (phy_if_t)ifptr;
1373 
1374 	switch (atype)
1375 	{
1376 	case FRI_PEERADDR :
1377 		type[0] = NA_PEER;
1378 		break;
1379 
1380 	case FRI_BROADCAST :
1381 		type[0] = NA_BROADCAST;
1382 		break;
1383 
1384 	default :
1385 		type[0] = NA_ADDRESS;
1386 		break;
1387 	}
1388 
1389 	type[1] = NA_NETMASK;
1390 
1391 	if (v == 6) {
1392 		lif_if_t idx = 0;
1393 
1394 		do {
1395 			idx = net_lifgetnext(net_data, phyif, idx);
1396 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1397 					   array) < 0)
1398 				return -1;
1399 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1400 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1401 				break;
1402 		} while (idx != 0);
1403 
1404 		if (idx == 0)
1405 			return -1;
1406 
1407 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1408 					inp, inpmask);
1409 	}
1410 
1411 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1412 		return -1;
1413 
1414 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1415 }
1416 
1417 
1418 u_32_t fr_newisn(fin)
1419 fr_info_t *fin;
1420 {
1421 	static int iss_seq_off = 0;
1422 	u_char hash[16];
1423 	u_32_t newiss;
1424 	MD5_CTX ctx;
1425 	ipf_stack_t *ifs = fin->fin_ifs;
1426 
1427 	/*
1428 	 * Compute the base value of the ISS.  It is a hash
1429 	 * of (saddr, sport, daddr, dport, secret).
1430 	 */
1431 	MD5Init(&ctx);
1432 
1433 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1434 		  sizeof(fin->fin_fi.fi_src));
1435 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1436 		  sizeof(fin->fin_fi.fi_dst));
1437 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1438 
1439 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1440 
1441 	MD5Final(hash, &ctx);
1442 
1443 	bcopy(hash, &newiss, sizeof(newiss));
1444 
1445 	/*
1446 	 * Now increment our "timer", and add it in to
1447 	 * the computed value.
1448 	 *
1449 	 * XXX Use `addin'?
1450 	 * XXX TCP_ISSINCR too large to use?
1451 	 */
1452 	iss_seq_off += 0x00010000;
1453 	newiss += iss_seq_off;
1454 	return newiss;
1455 }
1456 
1457 
1458 /* ------------------------------------------------------------------------ */
1459 /* Function:    fr_nextipid                                                 */
1460 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1461 /* Parameters:  fin(I) - pointer to packet information                      */
1462 /*                                                                          */
1463 /* Returns the next IPv4 ID to use for this packet.                         */
1464 /* ------------------------------------------------------------------------ */
1465 u_short fr_nextipid(fin)
1466 fr_info_t *fin;
1467 {
1468 	static u_short ipid = 0;
1469 	u_short id;
1470 	ipf_stack_t *ifs = fin->fin_ifs;
1471 
1472 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1473 	if (fin->fin_pktnum != 0) {
1474 		id = fin->fin_pktnum & 0xffff;
1475 	} else {
1476 		id = ipid++;
1477 	}
1478 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1479 
1480 	return id;
1481 }
1482 
1483 
1484 #ifndef IPFILTER_CKSUM
1485 /* ARGSUSED */
1486 #endif
1487 INLINE void fr_checkv4sum(fin)
1488 fr_info_t *fin;
1489 {
1490 #ifdef IPFILTER_CKSUM
1491 	if (fr_checkl4sum(fin) == -1)
1492 		fin->fin_flx |= FI_BAD;
1493 #endif
1494 }
1495 
1496 
1497 #ifdef USE_INET6
1498 # ifndef IPFILTER_CKSUM
1499 /* ARGSUSED */
1500 # endif
1501 INLINE void fr_checkv6sum(fin)
1502 fr_info_t *fin;
1503 {
1504 # ifdef IPFILTER_CKSUM
1505 	if (fr_checkl4sum(fin) == -1)
1506 		fin->fin_flx |= FI_BAD;
1507 # endif
1508 }
1509 #endif /* USE_INET6 */
1510 
1511 
1512 #if (SOLARIS2 < 7)
1513 void fr_slowtimer()
1514 #else
1515 /*ARGSUSED*/
1516 void fr_slowtimer __P((void *arg))
1517 #endif
1518 {
1519 	ipf_stack_t *ifs = arg;
1520 
1521 	READ_ENTER(&ifs->ifs_ipf_global);
1522 	if (ifs->ifs_fr_running != 1) {
1523 		ifs->ifs_fr_timer_id = NULL;
1524 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1525 		return;
1526 	}
1527 	ipf_expiretokens(ifs);
1528 	fr_fragexpire(ifs);
1529 	fr_timeoutstate(ifs);
1530 	fr_natexpire(ifs);
1531 	fr_authexpire(ifs);
1532 	ifs->ifs_fr_ticks++;
1533 	if (ifs->ifs_fr_running == 1)
1534 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1535 		    drv_usectohz(500000));
1536 	else
1537 		ifs->ifs_fr_timer_id = NULL;
1538 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1539 }
1540 
1541 
1542 /* ------------------------------------------------------------------------ */
1543 /* Function:    fr_pullup                                                   */
1544 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1545 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1546 /*              fin(I) - pointer to packet information                      */
1547 /*              len(I) - number of bytes to pullup                          */
1548 /*                                                                          */
1549 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1550 /* single buffer for ease of access.  Operating system native functions are */
1551 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1552 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1553 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1554 /* and ONLY if the pullup succeeds.                                         */
1555 /*                                                                          */
1556 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1557 /* of buffers that starts at *fin->fin_mp.                                  */
1558 /* ------------------------------------------------------------------------ */
1559 void *fr_pullup(min, fin, len)
1560 mb_t *min;
1561 fr_info_t *fin;
1562 int len;
1563 {
1564 	qpktinfo_t *qpi = fin->fin_qpi;
1565 	int out = fin->fin_out, dpoff, ipoff;
1566 	mb_t *m = min, *m1, *m2;
1567 	char *ip;
1568 	uint32_t start, stuff, end, value, flags;
1569 	ipf_stack_t *ifs = fin->fin_ifs;
1570 
1571 	if (m == NULL)
1572 		return NULL;
1573 
1574 	ip = (char *)fin->fin_ip;
1575 	if ((fin->fin_flx & FI_COALESCE) != 0)
1576 		return ip;
1577 
1578 	ipoff = fin->fin_ipoff;
1579 	if (fin->fin_dp != NULL)
1580 		dpoff = (char *)fin->fin_dp - (char *)ip;
1581 	else
1582 		dpoff = 0;
1583 
1584 	if (M_LEN(m) < len + ipoff) {
1585 
1586 		/*
1587 		 * pfil_precheck ensures the IP header is on a 32bit
1588 		 * aligned address so simply fail if that isn't currently
1589 		 * the case (should never happen).
1590 		 */
1591 		int inc = 0;
1592 
1593 		if (ipoff > 0) {
1594 			if ((ipoff & 3) != 0) {
1595 				inc = 4 - (ipoff & 3);
1596 				if (m->b_rptr - inc >= m->b_datap->db_base)
1597 					m->b_rptr -= inc;
1598 				else
1599 					inc = 0;
1600 			}
1601 		}
1602 
1603 		/*
1604 		 * XXX This is here as a work around for a bug with DEBUG
1605 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1606 		 * XXX code as a way to stash the phyint_index for a packet,
1607 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1608 		 * XXX for both of these to be NULL.  See 6442390.
1609 		 */
1610 		m1 = m;
1611 		m2 = m->b_prev;
1612 
1613 		do {
1614 			m1->b_next = NULL;
1615 			m1->b_prev = NULL;
1616 			m1 = m1->b_cont;
1617 		} while (m1);
1618 
1619 		/*
1620 		 * Need to preserve checksum information by copying them
1621 		 * to newmp which heads the pulluped message.
1622 		 */
1623 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1624 		    &value, &flags);
1625 
1626 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1627 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1628 			FREE_MB_T(*fin->fin_mp);
1629 			*fin->fin_mp = NULL;
1630 			fin->fin_m = NULL;
1631 			fin->fin_ip = NULL;
1632 			fin->fin_dp = NULL;
1633 			qpi->qpi_data = NULL;
1634 			return NULL;
1635 		}
1636 
1637 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1638 		    value, flags, 0);
1639 
1640 		m->b_prev = m2;
1641 		m->b_rptr += inc;
1642 		fin->fin_m = m;
1643 		ip = MTOD(m, char *) + ipoff;
1644 		qpi->qpi_data = ip;
1645 	}
1646 
1647 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1648 	fin->fin_ip = (ip_t *)ip;
1649 	if (fin->fin_dp != NULL)
1650 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1651 
1652 	if (len == fin->fin_plen)
1653 		fin->fin_flx |= FI_COALESCE;
1654 	return ip;
1655 }
1656 
1657 
1658 /*
1659  * Function:	fr_verifysrc
1660  * Returns:	int (really boolean)
1661  * Parameters:	fin - packet information
1662  *
1663  * Check whether the packet has a valid source address for the interface on
1664  * which the packet arrived, implementing the "fr_chksrc" feature.
1665  * Returns true iff the packet's source address is valid.
1666  */
1667 int fr_verifysrc(fin)
1668 fr_info_t *fin;
1669 {
1670 	net_handle_t net_data_p;
1671 	phy_if_t phy_ifdata_routeto;
1672 	struct sockaddr	sin;
1673 	ipf_stack_t *ifs = fin->fin_ifs;
1674 
1675 	if (fin->fin_v == 4) {
1676 		net_data_p = ifs->ifs_ipf_ipv4;
1677 	} else if (fin->fin_v == 6) {
1678 		net_data_p = ifs->ifs_ipf_ipv6;
1679 	} else {
1680 		return (0);
1681 	}
1682 
1683 	/* Get the index corresponding to the if name */
1684 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1685 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1686 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1687 
1688 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1689 }
1690 
1691 
1692 /*
1693  * Function:	fr_fastroute
1694  * Returns:	 0: success;
1695  *		-1: failed
1696  * Parameters:
1697  *	mb: the message block where ip head starts
1698  *	mpp: the pointer to the pointer of the orignal
1699  *		packet message
1700  *	fin: packet information
1701  *	fdp: destination interface information
1702  *	if it is NULL, no interface information provided.
1703  *
1704  * This function is for fastroute/to/dup-to rules. It calls
1705  * pfil_make_lay2_packet to search route, make lay-2 header
1706  * ,and identify output queue for the IP packet.
1707  * The destination address depends on the following conditions:
1708  * 1: for fastroute rule, fdp is passed in as NULL, so the
1709  *	destination address is the IP Packet's destination address
1710  * 2: for to/dup-to rule, if an ip address is specified after
1711  *	the interface name, this address is the as destination
1712  *	address. Otherwise IP Packet's destination address is used
1713  */
1714 int fr_fastroute(mb, mpp, fin, fdp)
1715 mblk_t *mb, **mpp;
1716 fr_info_t *fin;
1717 frdest_t *fdp;
1718 {
1719         net_handle_t net_data_p;
1720 	net_inject_t *inj;
1721 	mblk_t *mp = NULL;
1722 	frentry_t *fr = fin->fin_fr;
1723 	qpktinfo_t *qpi;
1724 	ip_t *ip;
1725 
1726 	struct sockaddr_in *sin;
1727 	struct sockaddr_in6 *sin6;
1728 	struct sockaddr *sinp;
1729 	ipf_stack_t *ifs = fin->fin_ifs;
1730 #ifndef	sparc
1731 	u_short __iplen, __ipoff;
1732 #endif
1733 
1734 	if (fin->fin_v == 4) {
1735 		net_data_p = ifs->ifs_ipf_ipv4;
1736 	} else if (fin->fin_v == 6) {
1737 		net_data_p = ifs->ifs_ipf_ipv6;
1738 	} else {
1739 		return (-1);
1740 	}
1741 
1742 	inj = net_inject_alloc(NETINFO_VERSION);
1743 	if (inj == NULL)
1744 		return -1;
1745 
1746 	ip = fin->fin_ip;
1747 	qpi = fin->fin_qpi;
1748 
1749 	/*
1750 	 * If this is a duplicate mblk then we want ip to point at that
1751 	 * data, not the original, if and only if it is already pointing at
1752 	 * the current mblk data.
1753 	 *
1754 	 * Otherwise, if it's not a duplicate, and we're not already pointing
1755 	 * at the current mblk data, then we want to ensure that the data
1756 	 * points at ip.
1757 	 */
1758 
1759 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1760 		ip = (ip_t *)mb->b_rptr;
1761 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1762 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
1763 		qpi->qpi_off = 0;
1764 	}
1765 
1766 	/*
1767 	 * If there is another M_PROTO, we don't want it
1768 	 */
1769 	if (*mpp != mb) {
1770 		mp = unlinkb(*mpp);
1771 		freeb(*mpp);
1772 		*mpp = mp;
1773 	}
1774 
1775 	sinp = (struct sockaddr *)&inj->ni_addr;
1776 	sin = (struct sockaddr_in *)sinp;
1777 	sin6 = (struct sockaddr_in6 *)sinp;
1778 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1779 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1780 	inj->ni_packet = mb;
1781 
1782 	/*
1783 	 * In case we're here due to "to <if>" being used with
1784 	 * "keep state", check that we're going in the correct
1785 	 * direction.
1786 	 */
1787 	if (fdp != NULL) {
1788 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1789 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1790 			goto bad_fastroute;
1791 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1792 		if (fin->fin_v == 4) {
1793 			sin->sin_addr = fdp->fd_ip;
1794 		} else {
1795 			sin6->sin6_addr = fdp->fd_ip6.in6;
1796 		}
1797 	} else {
1798 		if (fin->fin_v == 4) {
1799 			sin->sin_addr = ip->ip_dst;
1800 		} else {
1801 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1802 		}
1803 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1804 	}
1805 
1806 	/*
1807 	 * Clear the hardware checksum flags from packets that we are doing
1808 	 * input processing on as leaving them set will cause the outgoing
1809 	 * NIC (if it supports hardware checksum) to calculate them anew,
1810 	 * using the old (correct) checksums as the pseudo value to start
1811 	 * from.
1812 	 */
1813 	if (fin->fin_out == 0) {
1814 		DB_CKSUMFLAGS(mb) = 0;
1815 	}
1816 
1817 	*mpp = mb;
1818 
1819 	if (fin->fin_out == 0) {
1820 		void *saveifp;
1821 		u_32_t pass;
1822 
1823 		saveifp = fin->fin_ifp;
1824 		fin->fin_ifp = (void *)inj->ni_physical;
1825 		fin->fin_flx &= ~FI_STATE;
1826 		fin->fin_out = 1;
1827 		(void) fr_acctpkt(fin, &pass);
1828 		fin->fin_fr = NULL;
1829 		if (!fr || !(fr->fr_flags & FR_RETMASK))
1830 			(void) fr_checkstate(fin, &pass);
1831 		if (fr_checknatout(fin, NULL) == -1)
1832 			goto bad_fastroute;
1833 		fin->fin_out = 0;
1834 		fin->fin_ifp = saveifp;
1835 	}
1836 #ifndef	sparc
1837 	if (fin->fin_v == 4) {
1838 		__iplen = (u_short)ip->ip_len,
1839 		__ipoff = (u_short)ip->ip_off;
1840 
1841 		ip->ip_len = htons(__iplen);
1842 		ip->ip_off = htons(__ipoff);
1843 	}
1844 #endif
1845 
1846 	if (net_data_p) {
1847 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1848 			net_inject_free(inj);
1849 			return (-1);
1850 		}
1851 	}
1852 
1853 	ifs->ifs_fr_frouteok[0]++;
1854 	net_inject_free(inj);
1855 	return 0;
1856 bad_fastroute:
1857 	net_inject_free(inj);
1858 	freemsg(mb);
1859 	ifs->ifs_fr_frouteok[1]++;
1860 	return -1;
1861 }
1862 
1863 
1864 /* ------------------------------------------------------------------------ */
1865 /* Function:    ipf_hook4_out                                               */
1866 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1867 /* Parameters:  event(I)     - pointer to event                             */
1868 /*              info(I)      - pointer to hook information for firewalling  */
1869 /*                                                                          */
1870 /* Calling ipf_hook.                                                        */
1871 /* ------------------------------------------------------------------------ */
1872 /*ARGSUSED*/
1873 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1874 {
1875 	return ipf_hook(info, 1, 0, arg);
1876 }
1877 /*ARGSUSED*/
1878 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1879 {
1880 	return ipf_hook6(info, 1, 0, arg);
1881 }
1882 
1883 /* ------------------------------------------------------------------------ */
1884 /* Function:    ipf_hook4_in                                                */
1885 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1886 /* Parameters:  event(I)     - pointer to event                             */
1887 /*              info(I)      - pointer to hook information for firewalling  */
1888 /*                                                                          */
1889 /* Calling ipf_hook.                                                        */
1890 /* ------------------------------------------------------------------------ */
1891 /*ARGSUSED*/
1892 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1893 {
1894 	return ipf_hook(info, 0, 0, arg);
1895 }
1896 /*ARGSUSED*/
1897 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1898 {
1899 	return ipf_hook6(info, 0, 0, arg);
1900 }
1901 
1902 
1903 /* ------------------------------------------------------------------------ */
1904 /* Function:    ipf_hook4_loop_out                                          */
1905 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1906 /* Parameters:  event(I)     - pointer to event                             */
1907 /*              info(I)      - pointer to hook information for firewalling  */
1908 /*                                                                          */
1909 /* Calling ipf_hook.                                                        */
1910 /* ------------------------------------------------------------------------ */
1911 /*ARGSUSED*/
1912 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1913 {
1914 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
1915 }
1916 /*ARGSUSED*/
1917 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1918 {
1919 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1920 }
1921 
1922 /* ------------------------------------------------------------------------ */
1923 /* Function:    ipf_hook4_loop_in                                           */
1924 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1925 /* Parameters:  event(I)     - pointer to event                             */
1926 /*              info(I)      - pointer to hook information for firewalling  */
1927 /*                                                                          */
1928 /* Calling ipf_hook.                                                        */
1929 /* ------------------------------------------------------------------------ */
1930 /*ARGSUSED*/
1931 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1932 {
1933 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
1934 }
1935 /*ARGSUSED*/
1936 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1937 {
1938 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1939 }
1940 
1941 /* ------------------------------------------------------------------------ */
1942 /* Function:    ipf_hook                                                    */
1943 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1944 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1945 /*              out(I)       - whether packet is going in or out            */
1946 /*              loopback(I)  - whether packet is a loopback packet or not   */
1947 /*                                                                          */
1948 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1949 /* parameters out of the info structure and forms them up to be useful for  */
1950 /* calling ipfilter.                                                        */
1951 /* ------------------------------------------------------------------------ */
1952 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1953 {
1954 	hook_pkt_event_t *fw;
1955 	ipf_stack_t *ifs;
1956 	qpktinfo_t qpi;
1957 	int rval, hlen;
1958 	u_short swap;
1959 	phy_if_t phy;
1960 	ip_t *ip;
1961 
1962 	ifs = arg;
1963 	fw = (hook_pkt_event_t *)info;
1964 
1965 	ASSERT(fw != NULL);
1966 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1967 
1968 	ip = fw->hpe_hdr;
1969 	swap = ntohs(ip->ip_len);
1970 	ip->ip_len = swap;
1971 	swap = ntohs(ip->ip_off);
1972 	ip->ip_off = swap;
1973 	hlen = IPH_HDR_LENGTH(ip);
1974 
1975 	qpi.qpi_m = fw->hpe_mb;
1976 	qpi.qpi_data = fw->hpe_hdr;
1977 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1978 	qpi.qpi_ill = (void *)phy;
1979 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1980 	if (qpi.qpi_flags)
1981 		qpi.qpi_flags |= FI_MBCAST;
1982 	qpi.qpi_flags |= loopback;
1983 
1984 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1985 	    &qpi, fw->hpe_mp, ifs);
1986 
1987 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
1988 	if (rval == 0 && *(fw->hpe_mp) == NULL)
1989 		rval = 1;
1990 
1991 	/* Notify IP the packet mblk_t and IP header pointers. */
1992 	fw->hpe_mb = qpi.qpi_m;
1993 	fw->hpe_hdr = qpi.qpi_data;
1994 	if (rval == 0) {
1995 		ip = qpi.qpi_data;
1996 		swap = ntohs(ip->ip_len);
1997 		ip->ip_len = swap;
1998 		swap = ntohs(ip->ip_off);
1999 		ip->ip_off = swap;
2000 	}
2001 	return rval;
2002 
2003 }
2004 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2005 {
2006 	hook_pkt_event_t *fw;
2007 	int rval, hlen;
2008 	qpktinfo_t qpi;
2009 	phy_if_t phy;
2010 
2011 	fw = (hook_pkt_event_t *)info;
2012 
2013 	ASSERT(fw != NULL);
2014 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2015 
2016 	hlen = sizeof (ip6_t);
2017 
2018 	qpi.qpi_m = fw->hpe_mb;
2019 	qpi.qpi_data = fw->hpe_hdr;
2020 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2021 	qpi.qpi_ill = (void *)phy;
2022 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2023 	if (qpi.qpi_flags)
2024 		qpi.qpi_flags |= FI_MBCAST;
2025 	qpi.qpi_flags |= loopback;
2026 
2027 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2028 	    &qpi, fw->hpe_mp, arg);
2029 
2030 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2031 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2032 		rval = 1;
2033 
2034 	/* Notify IP the packet mblk_t and IP header pointers. */
2035 	fw->hpe_mb = qpi.qpi_m;
2036 	fw->hpe_hdr = qpi.qpi_data;
2037 	return rval;
2038 
2039 }
2040 
2041 
2042 /* ------------------------------------------------------------------------ */
2043 /* Function:    ipf_nic_event_v4                                            */
2044 /* Returns:     int - 0 == no problems encountered                          */
2045 /* Parameters:  event(I)     - pointer to event                             */
2046 /*              info(I)      - pointer to information about a NIC event     */
2047 /*                                                                          */
2048 /* Function to receive asynchronous NIC events from IP                      */
2049 /* ------------------------------------------------------------------------ */
2050 /*ARGSUSED*/
2051 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2052 {
2053 	struct sockaddr_in *sin;
2054 	hook_nic_event_t *hn;
2055 	ipf_stack_t *ifs = arg;
2056 	void *new_ifp = NULL;
2057 
2058 	if (ifs->ifs_fr_running <= 0)
2059 		return (0);
2060 
2061 	hn = (hook_nic_event_t *)info;
2062 
2063 	switch (hn->hne_event)
2064 	{
2065 	case NE_PLUMB :
2066 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2067 		       ifs);
2068 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2069 			      hn->hne_data, ifs);
2070 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2071 			     hn->hne_data, ifs);
2072 		break;
2073 
2074 	case NE_UNPLUMB :
2075 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2076 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2077 			      ifs);
2078 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2079 		break;
2080 
2081 	case NE_ADDRESS_CHANGE :
2082 		/*
2083 		 * We only respond to events for logical interface 0 because
2084 		 * IPFilter only uses the first address given to a network
2085 		 * interface.  We check for hne_lif==1 because the netinfo
2086 		 * code maps adds 1 to the lif number so that it can return
2087 		 * 0 to indicate "no more lifs" when walking them.
2088 		 */
2089 		if (hn->hne_lif == 1) {
2090 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2091 			    ifs);
2092 			sin = hn->hne_data;
2093 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2094 			    ifs);
2095 		}
2096 		break;
2097 
2098 #if SOLARIS2 >= 10
2099 	case NE_IFINDEX_CHANGE :
2100 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2101 
2102 		if (hn->hne_data != NULL) {
2103 			/*
2104 			 * The netinfo passes interface index as int (hne_data should be
2105 			 * handled as a pointer to int), which is always 32bit. We need to
2106 			 * convert it to void pointer here, since interfaces are
2107 			 * represented as pointers to void in IPF. The pointers are 64 bits
2108 			 * long on 64bit platforms. Doing something like
2109 			 *	(void *)((int) x)
2110 			 * will throw warning:
2111 			 *   "cast to pointer from integer of different size"
2112 			 * during 64bit compilation.
2113 			 *
2114 			 * The line below uses (size_t) to typecast int to
2115 			 * size_t, which might be 64bit/32bit (depending
2116 			 * on architecture). Once we have proper 64bit/32bit
2117 			 * type (size_t), we can safely convert it to void pointer.
2118 			 */
2119 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2120 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2121 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2122 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2123 		}
2124 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2125 		break;
2126 #endif
2127 
2128 	default :
2129 		break;
2130 	}
2131 
2132 	return 0;
2133 }
2134 
2135 
2136 /* ------------------------------------------------------------------------ */
2137 /* Function:    ipf_nic_event_v6                                            */
2138 /* Returns:     int - 0 == no problems encountered                          */
2139 /* Parameters:  event(I)     - pointer to event                             */
2140 /*              info(I)      - pointer to information about a NIC event     */
2141 /*                                                                          */
2142 /* Function to receive asynchronous NIC events from IP                      */
2143 /* ------------------------------------------------------------------------ */
2144 /*ARGSUSED*/
2145 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2146 {
2147 	struct sockaddr_in6 *sin6;
2148 	hook_nic_event_t *hn;
2149 	ipf_stack_t *ifs = arg;
2150 	void *new_ifp = NULL;
2151 
2152 	if (ifs->ifs_fr_running <= 0)
2153 		return (0);
2154 
2155 	hn = (hook_nic_event_t *)info;
2156 
2157 	switch (hn->hne_event)
2158 	{
2159 	case NE_PLUMB :
2160 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2161 		       hn->hne_data, ifs);
2162 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2163 			      hn->hne_data, ifs);
2164 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2165 			     hn->hne_data, ifs);
2166 		break;
2167 
2168 	case NE_UNPLUMB :
2169 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2170 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2171 			      ifs);
2172 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2173 		break;
2174 
2175 	case NE_ADDRESS_CHANGE :
2176 		if (hn->hne_lif == 1) {
2177 			sin6 = hn->hne_data;
2178 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2179 				       ifs);
2180 		}
2181 		break;
2182 
2183 #if SOLARIS2 >= 10
2184 	case NE_IFINDEX_CHANGE :
2185 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2186 		if (hn->hne_data != NULL) {
2187 			/*
2188 			 * The netinfo passes interface index as int (hne_data should be
2189 			 * handled as a pointer to int), which is always 32bit. We need to
2190 			 * convert it to void pointer here, since interfaces are
2191 			 * represented as pointers to void in IPF. The pointers are 64 bits
2192 			 * long on 64bit platforms. Doing something like
2193 			 *	(void *)((int) x)
2194 			 * will throw warning:
2195 			 *   "cast to pointer from integer of different size"
2196 			 * during 64bit compilation.
2197 			 *
2198 			 * The line below uses (size_t) to typecast int to
2199 			 * size_t, which might be 64bit/32bit (depending
2200 			 * on architecture). Once we have proper 64bit/32bit
2201 			 * type (size_t), we can safely convert it to void pointer.
2202 			 */
2203 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2204 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2205 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2206 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2207 		}
2208 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2209 		break;
2210 #endif
2211 
2212 	default :
2213 		break;
2214 	}
2215 
2216 	return 0;
2217 }
2218 
2219 /*
2220  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2221  * are needed in Solaris kernel only. We don't need them in
2222  * ipftest to pretend the ICMP/RST packet was sent as a response.
2223  */
2224 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2225 /* ------------------------------------------------------------------------ */
2226 /* Function:    fr_make_rst                                                 */
2227 /* Returns:     int - 0 on success, -1 on failure			    */
2228 /* Parameters:  fin(I) - pointer to packet information                      */
2229 /*                                                                          */
2230 /* We must alter the original mblks passed to IPF from IP stack via	    */
2231 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2232 /* IPF can basicaly do only these things with mblk representing the packet: */
2233 /*	leave it as it is (pass the packet)				    */
2234 /*                                                                          */
2235 /*	discard it (block the packet)					    */
2236 /*                                                                          */
2237 /*	alter it (i.e. NAT)						    */
2238 /*                                                                          */
2239 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2240 /* instead to IP stack via FW_HOOKS.					    */
2241 /*                                                                          */
2242 /* The return-rst action for packets coming via NIC is handled as follows:  */
2243 /*	mblk with packet is discarded					    */
2244 /*                                                                          */
2245 /*	new mblk with RST response is constructed and injected to network   */
2246 /*                                                                          */
2247 /* IPF can't inject packets to loopback interface, this is just another	    */
2248 /* limitation we have to deal with here. The only option to send RST	    */
2249 /* response to offending TCP packet coming via loopback is to alter it.	    */
2250 /*									    */
2251 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2252 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2253 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2254 /* ------------------------------------------------------------------------ */
2255 int fr_make_rst(fin)
2256 fr_info_t *fin;
2257 {
2258 	uint16_t tmp_port;
2259 	int rv = -1;
2260 	uint32_t old_ack;
2261 	tcphdr_t *tcp = NULL;
2262 	struct in_addr tmp_src;
2263 #ifdef USE_INET6
2264 	struct in6_addr	tmp_src6;
2265 #endif
2266 
2267 	ASSERT(fin->fin_p == IPPROTO_TCP);
2268 
2269 	/*
2270 	 * We do not need to adjust chksum, since it is not being checked by
2271 	 * Solaris IP stack for loopback clients.
2272 	 */
2273 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2274 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2275 
2276 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2277 			/* Swap IPv4 addresses. */
2278 			tmp_src = fin->fin_ip->ip_src;
2279 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2280 			fin->fin_ip->ip_dst = tmp_src;
2281 
2282 			rv = 0;
2283 		}
2284 		else
2285 			tcp = NULL;
2286 	}
2287 #ifdef USE_INET6
2288 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2289 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2290 		/*
2291 		 * We are relying on fact the next header is TCP, which is true
2292 		 * for regular TCP packets coming in over loopback.
2293 		 */
2294 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2295 			/* Swap IPv6 addresses. */
2296 			tmp_src6 = fin->fin_ip6->ip6_src;
2297 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2298 			fin->fin_ip6->ip6_dst = tmp_src6;
2299 
2300 			rv = 0;
2301 		}
2302 		else
2303 			tcp = NULL;
2304 	}
2305 #endif
2306 
2307 	if (tcp != NULL) {
2308 		/*
2309 		 * Adjust TCP header:
2310 		 *	swap ports,
2311 		 *	set flags,
2312 		 *	set correct ACK number
2313 		 */
2314 		tmp_port = tcp->th_sport;
2315 		tcp->th_sport = tcp->th_dport;
2316 		tcp->th_dport = tmp_port;
2317 		old_ack = tcp->th_ack;
2318 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2319 		tcp->th_seq = old_ack;
2320 		tcp->th_flags = TH_RST | TH_ACK;
2321 	}
2322 
2323 	return (rv);
2324 }
2325 
2326 /* ------------------------------------------------------------------------ */
2327 /* Function:    fr_make_icmp_v4                                             */
2328 /* Returns:     int - 0 on success, -1 on failure			    */
2329 /* Parameters:  fin(I) - pointer to packet information                      */
2330 /*                                                                          */
2331 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2332 /* what is going to happen here and why. Once you read the comment there,   */
2333 /* continue here with next paragraph.					    */
2334 /*									    */
2335 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2336 /* happen here:								    */
2337 /*	(1) Original mblk is copied (duplicated).			    */
2338 /*                                                                          */
2339 /*	(2) ICMP header is created.					    */
2340 /*                                                                          */
2341 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2342 /*	    data ready then.						    */
2343 /*                                                                          */
2344 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2345 /*                                                                          */
2346 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2347 /*	    header only and ICMP chksum is computed.			    */
2348 /*                                                                          */
2349 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2350 /*	    which now contains new IP header. If original packet was spread */
2351 /*	    over several mblks, only the first mblk is kept.		    */
2352 /* ------------------------------------------------------------------------ */
2353 static int fr_make_icmp_v4(fin)
2354 fr_info_t *fin;
2355 {
2356 	struct in_addr tmp_src;
2357 	tcphdr_t *tcp;
2358 	struct icmp *icmp;
2359 	mblk_t *mblk_icmp;
2360 	mblk_t *mblk_ip;
2361 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2362 	size_t orig_iphdr_len;	/* length of IP header only */
2363 	uint32_t sum;
2364 	uint16_t *buf;
2365 	int len;
2366 
2367 
2368 	if (fin->fin_v != 4)
2369 		return (-1);
2370 
2371 	/*
2372 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2373 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2374 	 */
2375 	tcp = (tcphdr_t *) fin->fin_dp;
2376 
2377 	if ((fin->fin_p == IPPROTO_TCP) &&
2378 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2379 		return (-1);
2380 
2381 	/*
2382 	 * Step (1)
2383 	 *
2384 	 * Make copy of original mblk.
2385 	 *
2386 	 * We want to copy as much data as necessary, not less, not more.  The
2387 	 * ICMPv4 payload length for unreachable messages is:
2388 	 *	original IP header + 8 bytes of L4 (if there are any).
2389 	 *
2390 	 * We determine if there are at least 8 bytes of L4 data following IP
2391 	 * header first.
2392 	 */
2393 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2394 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2395 	/*
2396 	 * Since we don't want to copy more data than necessary, we must trim
2397 	 * the original mblk here.  The right way (STREAMish) would be to use
2398 	 * adjmsg() to trim it.  However we would have to calculate the length
2399 	 * argument for adjmsg() from pointers we already have here.
2400 	 *
2401 	 * Since we have pointers and offsets, it's faster and easier for
2402 	 * us to just adjust pointers by hand instead of using adjmsg().
2403 	 */
2404 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2405 	fin->fin_m->b_wptr += icmp_pld_len;
2406 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2407 
2408 	/*
2409 	 * Also we don't want to copy any L2 stuff, which might precede IP
2410 	 * header, so we have have to set b_rptr to point to the start of IP
2411 	 * header.
2412 	 */
2413 	fin->fin_m->b_rptr += fin->fin_ipoff;
2414 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2415 		return (-1);
2416 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2417 
2418 	/*
2419 	 * Step (2)
2420 	 *
2421 	 * Create an ICMP header, which will be appened to original mblk later.
2422 	 * ICMP header is just another mblk.
2423 	 */
2424 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2425 	if (mblk_icmp == NULL) {
2426 		FREE_MB_T(mblk_ip);
2427 		return (-1);
2428 	}
2429 
2430 	MTYPE(mblk_icmp) = M_DATA;
2431 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2432 	icmp->icmp_type = ICMP_UNREACH;
2433 	icmp->icmp_code = fin->fin_icode & 0xFF;
2434 	icmp->icmp_void = 0;
2435 	icmp->icmp_cksum = 0;
2436 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2437 
2438 	/*
2439 	 * Step (3)
2440 	 *
2441 	 * Complete ICMP packet - link ICMP header with L4 data from original
2442 	 * IP packet.
2443 	 */
2444 	linkb(mblk_icmp, mblk_ip);
2445 
2446 	/*
2447 	 * Step (4)
2448 	 *
2449 	 * Swap IP addresses and change IP header fields accordingly in
2450 	 * original IP packet.
2451 	 *
2452 	 * There is a rule option return-icmp as a dest for physical
2453 	 * interfaces. This option becomes useless for loopback, since IPF box
2454 	 * uses same address as a loopback destination. We ignore the option
2455 	 * here, the ICMP packet will always look like as it would have been
2456 	 * sent from the original destination host.
2457 	 */
2458 	tmp_src = fin->fin_ip->ip_src;
2459 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2460 	fin->fin_ip->ip_dst = tmp_src;
2461 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2462 	fin->fin_ip->ip_sum = 0;
2463 
2464 	/*
2465 	 * Step (5)
2466 	 *
2467 	 * We trim the orignal mblk to hold IP header only.
2468 	 */
2469 	fin->fin_m->b_wptr = fin->fin_dp;
2470 	orig_iphdr_len = fin->fin_m->b_wptr -
2471 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2472 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2473 			    orig_iphdr_len);
2474 
2475 	/*
2476 	 * ICMP chksum calculation. The data we are calculating chksum for are
2477 	 * spread over two mblks, therefore we have to use two for loops.
2478 	 *
2479 	 * First for loop computes chksum part for ICMP header.
2480 	 */
2481 	buf = (uint16_t *) icmp;
2482 	len = ICMPERR_ICMPHLEN;
2483 	for (sum = 0; len > 1; len -= 2)
2484 		sum += *buf++;
2485 
2486 	/*
2487 	 * Here we add chksum part for ICMP payload.
2488 	 */
2489 	len = icmp_pld_len;
2490 	buf = (uint16_t *) mblk_ip->b_rptr;
2491 	for (; len > 1; len -= 2)
2492 		sum += *buf++;
2493 
2494 	/*
2495 	 * Chksum is done.
2496 	 */
2497 	sum = (sum >> 16) + (sum & 0xffff);
2498 	sum += (sum >> 16);
2499 	icmp->icmp_cksum = ~sum;
2500 
2501 	/*
2502 	 * Step (6)
2503 	 *
2504 	 * Release all packet mblks, except the first one.
2505 	 */
2506 	if (fin->fin_m->b_cont != NULL) {
2507 		FREE_MB_T(fin->fin_m->b_cont);
2508 	}
2509 
2510 	/*
2511 	 * Append ICMP payload to first mblk, which already contains new IP
2512 	 * header.
2513 	 */
2514 	linkb(fin->fin_m, mblk_icmp);
2515 
2516 	return (0);
2517 }
2518 
2519 #ifdef USE_INET6
2520 /* ------------------------------------------------------------------------ */
2521 /* Function:    fr_make_icmp_v6                                             */
2522 /* Returns:     int - 0 on success, -1 on failure			    */
2523 /* Parameters:  fin(I) - pointer to packet information                      */
2524 /*									    */
2525 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2526 /* what and why is going to happen here. Once you read the comment there,   */
2527 /* continue here with next paragraph.					    */
2528 /*									    */
2529 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2530 /* The algorithm is fairly simple:					    */
2531 /*	1) We need to get copy of complete mblk.			    */
2532 /*									    */
2533 /*	2) New ICMPv6 header is created.				    */
2534 /*									    */
2535 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2536 /*	   header.							    */
2537 /*									    */
2538 /*	4) The checksum must be adjusted.				    */
2539 /*									    */
2540 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2541 /*	   are adjusted (protocol number).				    */
2542 /*									    */
2543 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2544 /*	   linked with the ICMPv6 data we got from (3).			    */
2545 /* ------------------------------------------------------------------------ */
2546 static int fr_make_icmp_v6(fin)
2547 fr_info_t *fin;
2548 {
2549 	struct icmp6_hdr *icmp6;
2550 	tcphdr_t *tcp;
2551 	struct in6_addr	tmp_src6;
2552 	size_t icmp_pld_len;
2553 	mblk_t *mblk_ip, *mblk_icmp;
2554 
2555 	if (fin->fin_v != 6)
2556 		return (-1);
2557 
2558 	/*
2559 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2560 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2561 	 */
2562 	tcp = (tcphdr_t *) fin->fin_dp;
2563 
2564 	if ((fin->fin_p == IPPROTO_TCP) &&
2565 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2566 		return (-1);
2567 
2568 	/*
2569 	 * Step (1)
2570 	 *
2571 	 * We need to copy complete packet in case of IPv6, no trimming is
2572 	 * needed (except the L2 headers).
2573 	 */
2574 	icmp_pld_len = M_LEN(fin->fin_m);
2575 	fin->fin_m->b_rptr += fin->fin_ipoff;
2576 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2577 		return (-1);
2578 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2579 
2580 	/*
2581 	 * Step (2)
2582 	 *
2583 	 * Allocate and create ICMP header.
2584 	 */
2585 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2586 			BPRI_HI);
2587 
2588 	if (mblk_icmp == NULL)
2589 		return (-1);
2590 
2591 	MTYPE(mblk_icmp) = M_DATA;
2592 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2593 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
2594 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
2595 	icmp6->icmp6_data32[0] = 0;
2596 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2597 
2598 	/*
2599 	 * Step (3)
2600 	 *
2601 	 * Link the copy of IP packet to ICMP header.
2602 	 */
2603 	linkb(mblk_icmp, mblk_ip);
2604 
2605 	/*
2606 	 * Step (4)
2607 	 *
2608 	 * Calculate chksum - this is much more easier task than in case of
2609 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2610 	 * We are making compensation just for change of packet length.
2611 	 */
2612 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2613 
2614 	/*
2615 	 * Step (5)
2616 	 *
2617 	 * Swap IP addresses.
2618 	 */
2619 	tmp_src6 = fin->fin_ip6->ip6_src;
2620 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2621 	fin->fin_ip6->ip6_dst = tmp_src6;
2622 
2623 	/*
2624 	 * and adjust IP header data.
2625 	 */
2626 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2627 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2628 
2629 	/*
2630 	 * Step (6)
2631 	 *
2632 	 * We must release all linked mblks from original packet and keep only
2633 	 * the first mblk with IP header to link ICMP data.
2634 	 */
2635 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2636 
2637 	if (fin->fin_m->b_cont != NULL) {
2638 		FREE_MB_T(fin->fin_m->b_cont);
2639 	}
2640 
2641 	/*
2642 	 * Append ICMP payload to IP header.
2643 	 */
2644 	linkb(fin->fin_m, mblk_icmp);
2645 
2646 	return (0);
2647 }
2648 #endif	/* USE_INET6 */
2649 
2650 /* ------------------------------------------------------------------------ */
2651 /* Function:    fr_make_icmp                                                */
2652 /* Returns:     int - 0 on success, -1 on failure			    */
2653 /* Parameters:  fin(I) - pointer to packet information                      */
2654 /*                                                                          */
2655 /* We must alter the original mblks passed to IPF from IP stack via	    */
2656 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
2657 /* comment at fr_make_rst() function.					    */
2658 /*									    */
2659 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
2660 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
2661 /* protocol version. However there are some details, which are common to    */
2662 /* both IP versions. The details are going to be explained here.	    */
2663 /*                                                                          */
2664 /* The packet looks as follows:						    */
2665 /*    xxx | IP hdr | IP payload    ...	| 				    */
2666 /*    ^   ^        ^            	^				    */
2667 /*    |   |        |            	|				    */
2668 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2669 /*    |   |        |							    */
2670 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2671 /*    |   |								    */
2672 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2673 /*    |      of loopback)						    */
2674 /*    |   								    */
2675 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
2676 /*                                                                          */
2677 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2678 /* well in advance before the matching rule was found (the rule, which took */
2679 /* us here, to fr_make_icmp() function).				    */
2680 /*                                                                          */
2681 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
2682 /* packet. New packet will be represented as chain of mblks.		    */
2683 /* orig mblk |- b_cont ---.						    */
2684 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
2685 /*    |	                          ^	            `-> duped orig mblk	    */
2686 /*    |                           |				^	    */
2687 /*    `- The original mblk        |				|	    */
2688 /*       will be trimmed to       |				|	    */
2689 /*       to contain IP header     |				|	    */
2690 /*       only                     |				|	    */
2691 /*                                |				|	    */
2692 /*                                `- This is newly		|           */
2693 /*                                   allocated mblk to		|	    */
2694 /*                                   hold ICMPv6 data.		|	    */
2695 /*								|	    */
2696 /*								|	    */
2697 /*								|	    */
2698 /*	    This is the copy of original mblk, it will contain -'	    */
2699 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
2700 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
2701 /*	    (TCP/UDP/L4) data from original packet.			    */
2702 /* ------------------------------------------------------------------------ */
2703 int fr_make_icmp(fin)
2704 fr_info_t *fin;
2705 {
2706 	int rv;
2707 
2708 	if (fin->fin_v == 4)
2709 		rv = fr_make_icmp_v4(fin);
2710 #ifdef USE_INET6
2711 	else if (fin->fin_v == 6)
2712 		rv = fr_make_icmp_v6(fin);
2713 #endif
2714 	else
2715 		rv = -1;
2716 
2717 	return (rv);
2718 }
2719 
2720 /* ------------------------------------------------------------------------ */
2721 /* Function:    fr_buf_sum						    */
2722 /* Returns:     unsigned int - sum of buffer buf			    */
2723 /* Parameters:  buf - pointer to buf we want to sum up			    */
2724 /*              len - length of buffer buf				    */
2725 /*                                                                          */
2726 /* Sums buffer buf. The result is used for chksum calculation. The buf	    */
2727 /* argument must be aligned.						    */
2728 /* ------------------------------------------------------------------------ */
2729 static uint32_t fr_buf_sum(buf, len)
2730 const void *buf;
2731 unsigned int len;
2732 {
2733 	uint32_t	sum = 0;
2734 	uint16_t	*b = (uint16_t *)buf;
2735 
2736 	while (len > 1) {
2737 		sum += *b++;
2738 		len -= 2;
2739 	}
2740 
2741 	if (len == 1)
2742 		sum += htons((*(unsigned char *)b) << 8);
2743 
2744 	return (sum);
2745 }
2746 
2747 /* ------------------------------------------------------------------------ */
2748 /* Function:    fr_calc_chksum						    */
2749 /* Returns:     void							    */
2750 /* Parameters:  fin - pointer to fr_info_t instance with packet data	    */
2751 /*              pkt - pointer to duplicated packet			    */
2752 /*                                                                          */
2753 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP	    */
2754 /* versions.								    */
2755 /* ------------------------------------------------------------------------ */
2756 void fr_calc_chksum(fin, pkt)
2757 fr_info_t *fin;
2758 mb_t *pkt;
2759 {
2760 	struct pseudo_hdr {
2761 		union {
2762 			struct in_addr	in4;
2763 #ifdef USE_INET6
2764 			struct in6_addr	in6;
2765 #endif
2766 		} src_addr;
2767 		union {
2768 			struct in_addr	in4;
2769 #ifdef USE_INET6
2770 			struct in6_addr	in6;
2771 #endif
2772 		} dst_addr;
2773 		char		zero;
2774 		char		proto;
2775 		uint16_t	len;
2776 	}	phdr;
2777 	uint32_t	sum, ip_sum;
2778 	void	*buf;
2779 	uint16_t	*l4_csum_p;
2780 	tcphdr_t	*tcp;
2781 	udphdr_t	*udp;
2782 	icmphdr_t	*icmp;
2783 #ifdef USE_INET6
2784 	struct icmp6_hdr	*icmp6;
2785 #endif
2786 	ip_t		*ip;
2787 	unsigned int	len;
2788 	int		pld_len;
2789 
2790 	/*
2791 	 * We need to pullup the packet to the single continuous buffer to avoid
2792 	 * potential misaligment of b_rptr member in mblk chain.
2793 	 */
2794 	if (pullupmsg(pkt, -1) == 0) {
2795 		cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2796 		    " will not be computed by IPF");
2797 		return;
2798 	}
2799 
2800 	/*
2801 	 * It is guaranteed IP header starts right at b_rptr, because we are
2802 	 * working with a copy of the original packet.
2803 	 *
2804 	 * Compute pseudo header chksum for TCP and UDP.
2805 	 */
2806 	if ((fin->fin_p == IPPROTO_UDP) ||
2807 	    (fin->fin_p == IPPROTO_TCP)) {
2808 		bzero(&phdr, sizeof (phdr));
2809 #ifdef USE_INET6
2810 		if (fin->fin_v == 6) {
2811 			phdr.src_addr.in6 = fin->fin_srcip6;
2812 			phdr.dst_addr.in6 = fin->fin_dstip6;
2813 		} else {
2814 			phdr.src_addr.in4 = fin->fin_src;
2815 			phdr.dst_addr.in4 = fin->fin_dst;
2816 		}
2817 #else
2818 		phdr.src_addr.in4 = fin->fin_src;
2819 		phdr.dst_addr.in4 = fin->fin_dst;
2820 #endif
2821 		phdr.zero = (char) 0;
2822 		phdr.proto = fin->fin_p;
2823 		phdr.len = htons((uint16_t)fin->fin_dlen);
2824 		sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2825 	} else {
2826 		sum = 0;
2827 	}
2828 
2829 	/*
2830 	 * Set pointer to the L4 chksum field in the packet, set buf pointer to
2831 	 * the L4 header start.
2832 	 */
2833 	switch (fin->fin_p) {
2834 		case IPPROTO_UDP:
2835 			udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2836 			l4_csum_p = &udp->uh_sum;
2837 			buf = udp;
2838 			break;
2839 		case IPPROTO_TCP:
2840 			tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2841 			l4_csum_p = &tcp->th_sum;
2842 			buf = tcp;
2843 			break;
2844 		case IPPROTO_ICMP:
2845 			icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2846 			l4_csum_p = &icmp->icmp_cksum;
2847 			buf = icmp;
2848 			break;
2849 #ifdef USE_INET6
2850 		case IPPROTO_ICMPV6:
2851 			icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2852 			l4_csum_p = &icmp6->icmp6_cksum;
2853 			buf = icmp6;
2854 			break;
2855 #endif
2856 		default:
2857 			l4_csum_p = NULL;
2858 	}
2859 
2860 	/*
2861 	 * Compute L4 chksum if needed.
2862 	 */
2863 	if (l4_csum_p != NULL) {
2864 		*l4_csum_p = (uint16_t)0;
2865 		pld_len = fin->fin_dlen;
2866 		len = pkt->b_wptr - (unsigned char *)buf;
2867 		ASSERT(len == pld_len);
2868 		/*
2869 		 * Add payload sum to pseudoheader sum.
2870 		 */
2871 		sum += fr_buf_sum(buf, len);
2872 		while (sum >> 16)
2873 			sum = (sum & 0xFFFF) + (sum >> 16);
2874 
2875 		*l4_csum_p = ~((uint16_t)sum);
2876 		DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
2877 	}
2878 
2879 	/*
2880 	 * The IP header chksum is needed just for IPv4.
2881 	 */
2882 	if (fin->fin_v == 4) {
2883 		/*
2884 		 * Compute IPv4 header chksum.
2885 		 */
2886 		ip = (ip_t *)pkt->b_rptr;
2887 		ip->ip_sum = (uint16_t)0;
2888 		ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
2889 		while (ip_sum >> 16)
2890 			ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
2891 
2892 		ip->ip_sum = ~((uint16_t)ip_sum);
2893 		DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
2894 	}
2895 
2896 	return;
2897 }
2898 
2899 #endif	/* _KERNEL && SOLARIS2 >= 10 */
2900