xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_fil_solaris.c (revision aedf2b3bb56b025fcaf87b49ec6c8aeea07f16d7)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if !defined(lint)
11 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
12 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
13 #endif
14 
15 #include <sys/types.h>
16 #include <sys/errno.h>
17 #include <sys/param.h>
18 #include <sys/cpuvar.h>
19 #include <sys/open.h>
20 #include <sys/ioctl.h>
21 #include <sys/filio.h>
22 #include <sys/systm.h>
23 #include <sys/strsubr.h>
24 #include <sys/cred.h>
25 #include <sys/cred_impl.h>
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/ksynch.h>
29 #include <sys/kmem.h>
30 #include <sys/mkdev.h>
31 #include <sys/protosw.h>
32 #include <sys/socket.h>
33 #include <sys/dditypes.h>
34 #include <sys/cmn_err.h>
35 #include <sys/zone.h>
36 #include <net/if.h>
37 #include <net/af.h>
38 #include <net/route.h>
39 #include <netinet/in.h>
40 #include <netinet/in_systm.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip_var.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <netinet/tcpip.h>
46 #include <netinet/ip_icmp.h>
47 #include "netinet/ip_compat.h"
48 #ifdef	USE_INET6
49 # include <netinet/icmp6.h>
50 #endif
51 #include "netinet/ip_fil.h"
52 #include "netinet/ip_nat.h"
53 #include "netinet/ip_frag.h"
54 #include "netinet/ip_state.h"
55 #include "netinet/ip_auth.h"
56 #include "netinet/ip_proxy.h"
57 #include "netinet/ipf_stack.h"
58 #ifdef	IPFILTER_LOOKUP
59 # include "netinet/ip_lookup.h"
60 #endif
61 #include <inet/ip_ire.h>
62 
63 #include <sys/md5.h>
64 #include <sys/neti.h>
65 
66 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
67 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
68 static	int	fr_enableipf __P((ipf_stack_t *, int));
69 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
70 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
71 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
72 static	int	ipf_hook __P((hook_data_t, int, int, void *));
73 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
74 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
75 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
76     void *));
77 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
78 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
79 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
80 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
81 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
82     void *));
83 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
84     void *));
85 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
86 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
87 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
88 
89 #if SOLARIS2 < 10
90 #if SOLARIS2 >= 7
91 u_int		*ip_ttl_ptr = NULL;
92 u_int		*ip_mtudisc = NULL;
93 # if SOLARIS2 >= 8
94 int		*ip_forwarding = NULL;
95 u_int		*ip6_forwarding = NULL;
96 # else
97 u_int		*ip_forwarding = NULL;
98 # endif
99 #else
100 u_long		*ip_ttl_ptr = NULL;
101 u_long		*ip_mtudisc = NULL;
102 u_long		*ip_forwarding = NULL;
103 #endif
104 #endif
105 
106 
107 /* ------------------------------------------------------------------------ */
108 /* Function:    ipldetach                                                   */
109 /* Returns:     int - 0 == success, else error.                             */
110 /* Parameters:  Nil                                                         */
111 /*                                                                          */
112 /* This function is responsible for undoing anything that might have been   */
113 /* done in a call to iplattach().  It must be able to clean up from a call  */
114 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
115 /* configures a table to be so large that we cannot allocate enough memory  */
116 /* for it.                                                                  */
117 /* ------------------------------------------------------------------------ */
118 int ipldetach(ifs)
119 ipf_stack_t *ifs;
120 {
121 
122 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
123 
124 #if SOLARIS2 < 10
125 
126 	if (ifs->ifs_fr_control_forwarding & 2) {
127 		if (ip_forwarding != NULL)
128 			*ip_forwarding = 0;
129 #if SOLARIS2 >= 8
130 		if (ip6_forwarding != NULL)
131 			*ip6_forwarding = 0;
132 #endif
133 	}
134 #endif
135 
136 	/*
137 	 * This lock needs to be dropped around the net_hook_unregister calls
138 	 * because we can deadlock here with:
139 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
140 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
141 	 */
142 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
143 
144 #define	UNDO_HOOK(_f, _b, _e, _h)					\
145 	do {								\
146 		if (ifs->_f != NULL) {					\
147 			if (ifs->_b) {					\
148 				ifs->_b = (net_hook_unregister(ifs->_f,	\
149 					   _e, ifs->_h) != 0);		\
150 				if (!ifs->_b) {				\
151 					hook_free(ifs->_h);		\
152 					ifs->_h = NULL;			\
153 				}					\
154 			} else if (ifs->_h != NULL) {			\
155 				hook_free(ifs->_h);			\
156 				ifs->_h = NULL;				\
157 			}						\
158 		}							\
159 		_NOTE(CONSTCOND)					\
160 	} while (0)
161 
162 	/*
163 	 * Remove IPv6 Hooks
164 	 */
165 	if (ifs->ifs_ipf_ipv6 != NULL) {
166 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
167 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
168 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
169 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
170 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
171 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
172 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
173 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
174 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
175 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
176 
177 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
178 			goto detach_failed;
179 		ifs->ifs_ipf_ipv6 = NULL;
180         }
181 
182 	/*
183 	 * Remove IPv4 Hooks
184 	 */
185 	if (ifs->ifs_ipf_ipv4 != NULL) {
186 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
187 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
188 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
189 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
190 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
191 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
192 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
193 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
194 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
195 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
196 
197 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
198 			goto detach_failed;
199 		ifs->ifs_ipf_ipv4 = NULL;
200 	}
201 
202 #undef UNDO_HOOK
203 
204 #ifdef	IPFDEBUG
205 	cmn_err(CE_CONT, "ipldetach()\n");
206 #endif
207 
208 	WRITE_ENTER(&ifs->ifs_ipf_global);
209 	fr_deinitialise(ifs);
210 
211 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
212 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
213 
214 	if (ifs->ifs_ipf_locks_done == 1) {
215 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
216 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
217 		RW_DESTROY(&ifs->ifs_ipf_tokens);
218 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
219 		ifs->ifs_ipf_locks_done = 0;
220 	}
221 
222 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
223 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
224 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
225 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
226 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
227 		return -1;
228 
229 	return 0;
230 
231 detach_failed:
232 	WRITE_ENTER(&ifs->ifs_ipf_global);
233 	return -1;
234 }
235 
236 int iplattach(ifs)
237 ipf_stack_t *ifs;
238 {
239 #if SOLARIS2 < 10
240 	int i;
241 #endif
242 	netid_t id = ifs->ifs_netid;
243 
244 #ifdef	IPFDEBUG
245 	cmn_err(CE_CONT, "iplattach()\n");
246 #endif
247 
248 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
249 	ifs->ifs_fr_flags = IPF_LOGGING;
250 #ifdef _KERNEL
251 	ifs->ifs_fr_update_ipid = 0;
252 #else
253 	ifs->ifs_fr_update_ipid = 1;
254 #endif
255 	ifs->ifs_fr_minttl = 4;
256 	ifs->ifs_fr_icmpminfragmtu = 68;
257 #if defined(IPFILTER_DEFAULT_BLOCK)
258 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
259 #else
260 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
261 #endif
262 
263 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
264 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
265 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
266 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
267 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
268 	ifs->ifs_ipf_locks_done = 1;
269 
270 	if (fr_initialise(ifs) < 0)
271 		return -1;
272 
273 	HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
274 		  "ipfilter_hook4_nicevents", ifs);
275 	HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
276 		  "ipfilter_hook4_in", ifs);
277 	HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
278 		  "ipfilter_hook4_out", ifs);
279 	HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
280 		  "ipfilter_hook4_loop_in", ifs);
281 	HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
282 		  "ipfilter_hook4_loop_out", ifs);
283 
284 	/*
285 	 * If we hold this lock over all of the net_hook_register calls, we
286 	 * can cause a deadlock to occur with the following lock ordering:
287 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
288 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
289 	 */
290 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
291 
292 	/*
293 	 * Add IPv4 hooks
294 	 */
295 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
296 	if (ifs->ifs_ipf_ipv4 == NULL)
297 		goto hookup_failed;
298 
299 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
300 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
301 	if (!ifs->ifs_hook4_nic_events)
302 		goto hookup_failed;
303 
304 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
305 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
306 	if (!ifs->ifs_hook4_physical_in)
307 		goto hookup_failed;
308 
309 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
310 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
311 	if (!ifs->ifs_hook4_physical_out)
312 		goto hookup_failed;
313 
314 	if (ifs->ifs_ipf_loopback) {
315 		ifs->ifs_hook4_loopback_in = (net_hook_register(
316 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
317 		    ifs->ifs_ipfhook4_loop_in) == 0);
318 		if (!ifs->ifs_hook4_loopback_in)
319 			goto hookup_failed;
320 
321 		ifs->ifs_hook4_loopback_out = (net_hook_register(
322 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
323 		    ifs->ifs_ipfhook4_loop_out) == 0);
324 		if (!ifs->ifs_hook4_loopback_out)
325 			goto hookup_failed;
326 	}
327 	/*
328 	 * Add IPv6 hooks
329 	 */
330 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
331 	if (ifs->ifs_ipf_ipv6 == NULL)
332 		goto hookup_failed;
333 
334 	HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
335 		  "ipfilter_hook6_nicevents", ifs);
336 	HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
337 		  "ipfilter_hook6_in", ifs);
338 	HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
339 		  "ipfilter_hook6_out", ifs);
340 	HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
341 		  "ipfilter_hook6_loop_in", ifs);
342 	HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
343 		  "ipfilter_hook6_loop_out", ifs);
344 
345 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
346 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
347 	if (!ifs->ifs_hook6_nic_events)
348 		goto hookup_failed;
349 
350 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
351 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
352 	if (!ifs->ifs_hook6_physical_in)
353 		goto hookup_failed;
354 
355 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
356 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
357 	if (!ifs->ifs_hook6_physical_out)
358 		goto hookup_failed;
359 
360 	if (ifs->ifs_ipf_loopback) {
361 		ifs->ifs_hook6_loopback_in = (net_hook_register(
362 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
363 		    ifs->ifs_ipfhook6_loop_in) == 0);
364 		if (!ifs->ifs_hook6_loopback_in)
365 			goto hookup_failed;
366 
367 		ifs->ifs_hook6_loopback_out = (net_hook_register(
368 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
369 		    ifs->ifs_ipfhook6_loop_out) == 0);
370 		if (!ifs->ifs_hook6_loopback_out)
371 			goto hookup_failed;
372 	}
373 
374 	/*
375 	 * Reacquire ipf_global, now it is safe.
376 	 */
377 	WRITE_ENTER(&ifs->ifs_ipf_global);
378 
379 /* Do not use private interface ip_params_arr[] in Solaris 10 */
380 #if SOLARIS2 < 10
381 
382 #if SOLARIS2 >= 8
383 	ip_forwarding = &ip_g_forward;
384 #endif
385 	/*
386 	 * XXX - There is no terminator for this array, so it is not possible
387 	 * to tell if what we are looking for is missing and go off the end
388 	 * of the array.
389 	 */
390 
391 #if SOLARIS2 <= 8
392 	for (i = 0; ; i++) {
393 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
394 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
395 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
396 			    "ip_path_mtu_discovery")) {
397 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
398 		}
399 #if SOLARIS2 < 8
400 		else if (!strcmp(ip_param_arr[i].ip_param_name,
401 			    "ip_forwarding")) {
402 			ip_forwarding = &ip_param_arr[i].ip_param_value;
403 		}
404 #else
405 		else if (!strcmp(ip_param_arr[i].ip_param_name,
406 			    "ip6_forwarding")) {
407 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
408 		}
409 #endif
410 
411 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
412 #if SOLARIS2 >= 8
413 		    ip6_forwarding != NULL &&
414 #endif
415 		    ip_forwarding != NULL)
416 			break;
417 	}
418 #endif
419 
420 	if (ifs->ifs_fr_control_forwarding & 1) {
421 		if (ip_forwarding != NULL)
422 			*ip_forwarding = 1;
423 #if SOLARIS2 >= 8
424 		if (ip6_forwarding != NULL)
425 			*ip6_forwarding = 1;
426 #endif
427 	}
428 
429 #endif
430 
431 	return 0;
432 hookup_failed:
433 	WRITE_ENTER(&ifs->ifs_ipf_global);
434 	return -1;
435 }
436 
437 static	int	fr_setipfloopback(set, ifs)
438 int set;
439 ipf_stack_t *ifs;
440 {
441 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
442 		return EFAULT;
443 
444 	if (set && !ifs->ifs_ipf_loopback) {
445 		ifs->ifs_ipf_loopback = 1;
446 
447 		ifs->ifs_hook4_loopback_in = (net_hook_register(
448 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
449 		    ifs->ifs_ipfhook4_loop_in) == 0);
450 		if (!ifs->ifs_hook4_loopback_in)
451 			return EINVAL;
452 
453 		ifs->ifs_hook4_loopback_out = (net_hook_register(
454 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
455 		    ifs->ifs_ipfhook4_loop_out) == 0);
456 		if (!ifs->ifs_hook4_loopback_out)
457 			return EINVAL;
458 
459 		ifs->ifs_hook6_loopback_in = (net_hook_register(
460 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
461 		    ifs->ifs_ipfhook6_loop_in) == 0);
462 		if (!ifs->ifs_hook6_loopback_in)
463 			return EINVAL;
464 
465 		ifs->ifs_hook6_loopback_out = (net_hook_register(
466 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
467 		    ifs->ifs_ipfhook6_loop_out) == 0);
468 		if (!ifs->ifs_hook6_loopback_out)
469 			return EINVAL;
470 
471 	} else if (!set && ifs->ifs_ipf_loopback) {
472 		ifs->ifs_ipf_loopback = 0;
473 
474 		ifs->ifs_hook4_loopback_in =
475 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
476 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
477 		if (ifs->ifs_hook4_loopback_in)
478 			return EBUSY;
479 
480 		ifs->ifs_hook4_loopback_out =
481 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
482 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
483 		if (ifs->ifs_hook4_loopback_out)
484 			return EBUSY;
485 
486 		ifs->ifs_hook6_loopback_in =
487 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
488 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
489 		if (ifs->ifs_hook6_loopback_in)
490 			return EBUSY;
491 
492 		ifs->ifs_hook6_loopback_out =
493 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
494 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
495 		if (ifs->ifs_hook6_loopback_out)
496 			return EBUSY;
497 	}
498 	return 0;
499 }
500 
501 
502 /*
503  * Filter ioctl interface.
504  */
505 /*ARGSUSED*/
506 int iplioctl(dev, cmd, data, mode, cp, rp)
507 dev_t dev;
508 int cmd;
509 #if SOLARIS2 >= 7
510 intptr_t data;
511 #else
512 int *data;
513 #endif
514 int mode;
515 cred_t *cp;
516 int *rp;
517 {
518 	int error = 0, tmp;
519 	friostat_t fio;
520 	minor_t unit;
521 	u_int enable;
522 	ipf_stack_t *ifs;
523 
524 #ifdef	IPFDEBUG
525 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
526 		dev, cmd, data, mode, cp, rp);
527 #endif
528 	unit = getminor(dev);
529 	if (IPL_LOGMAX < unit)
530 		return ENXIO;
531 
532         /*
533 	 * As we're calling ipf_find_stack in user space, from a given zone
534 	 * to find the stack pointer for this zone, there is no need to have
535 	 * a hold/refence count here.
536 	 */
537 	ifs = ipf_find_stack(crgetzoneid(cp));
538 	ASSERT(ifs != NULL);
539 
540 	if (ifs->ifs_fr_running <= 0) {
541 		if (unit != IPL_LOGIPF) {
542 			return EIO;
543 		}
544 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
545 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
546 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
547 			return EIO;
548 		}
549 	}
550 
551 	READ_ENTER(&ifs->ifs_ipf_global);
552 	if (ifs->ifs_fr_enable_active != 0) {
553 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
554 		return EBUSY;
555 	}
556 
557 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, cp->cr_uid,
558 			       curproc, ifs);
559 	if (error != -1) {
560 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
561 		return error;
562 	}
563 	error = 0;
564 
565 	switch (cmd)
566 	{
567 	case SIOCFRENB :
568 		if (!(mode & FWRITE))
569 			error = EPERM;
570 		else {
571 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
572 				       sizeof(enable));
573 			if (error != 0) {
574 				error = EFAULT;
575 				break;
576 			}
577 
578 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
579 			WRITE_ENTER(&ifs->ifs_ipf_global);
580 			ifs->ifs_fr_enable_active = 1;
581 			error = fr_enableipf(ifs, enable);
582 			ifs->ifs_fr_enable_active = 0;
583 		}
584 		break;
585 	case SIOCIPFSET :
586 		if (!(mode & FWRITE)) {
587 			error = EPERM;
588 			break;
589 		}
590 		/* FALLTHRU */
591 	case SIOCIPFGETNEXT :
592 	case SIOCIPFGET :
593 		error = fr_ipftune(cmd, (void *)data, ifs);
594 		break;
595 	case SIOCSETFF :
596 		if (!(mode & FWRITE))
597 			error = EPERM;
598 		else {
599 			error = COPYIN((caddr_t)data,
600 				       (caddr_t)&ifs->ifs_fr_flags,
601 				       sizeof(ifs->ifs_fr_flags));
602 			if (error != 0)
603 				error = EFAULT;
604 		}
605 		break;
606 	case SIOCIPFLP :
607 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
608 			       sizeof(tmp));
609 		if (error != 0)
610 			error = EFAULT;
611 		else
612 			error = fr_setipfloopback(tmp, ifs);
613 		break;
614 	case SIOCGETFF :
615 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
616 				sizeof(ifs->ifs_fr_flags));
617 		if (error != 0)
618 			error = EFAULT;
619 		break;
620 	case SIOCFUNCL :
621 		error = fr_resolvefunc((void *)data);
622 		break;
623 	case SIOCINAFR :
624 	case SIOCRMAFR :
625 	case SIOCADAFR :
626 	case SIOCZRLST :
627 		if (!(mode & FWRITE))
628 			error = EPERM;
629 		else
630 			error = frrequest(unit, cmd, (caddr_t)data,
631 					  ifs->ifs_fr_active, 1, ifs);
632 		break;
633 	case SIOCINIFR :
634 	case SIOCRMIFR :
635 	case SIOCADIFR :
636 		if (!(mode & FWRITE))
637 			error = EPERM;
638 		else
639 			error = frrequest(unit, cmd, (caddr_t)data,
640 					  1 - ifs->ifs_fr_active, 1, ifs);
641 		break;
642 	case SIOCSWAPA :
643 		if (!(mode & FWRITE))
644 			error = EPERM;
645 		else {
646 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
647 			bzero((char *)ifs->ifs_frcache,
648 			    sizeof (ifs->ifs_frcache));
649 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
650 					(caddr_t)data,
651 					sizeof(ifs->ifs_fr_active));
652 			if (error != 0)
653 				error = EFAULT;
654 			else
655 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
656 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
657 		}
658 		break;
659 	case SIOCGETFS :
660 		fr_getstat(&fio, ifs);
661 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
662 		break;
663 	case SIOCFRZST :
664 		if (!(mode & FWRITE))
665 			error = EPERM;
666 		else
667 			error = fr_zerostats((caddr_t)data, ifs);
668 		break;
669 	case	SIOCIPFFL :
670 		if (!(mode & FWRITE))
671 			error = EPERM;
672 		else {
673 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
674 				       sizeof(tmp));
675 			if (!error) {
676 				tmp = frflush(unit, 4, tmp, ifs);
677 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
678 						sizeof(tmp));
679 				if (error != 0)
680 					error = EFAULT;
681 			} else
682 				error = EFAULT;
683 		}
684 		break;
685 #ifdef USE_INET6
686 	case	SIOCIPFL6 :
687 		if (!(mode & FWRITE))
688 			error = EPERM;
689 		else {
690 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
691 				       sizeof(tmp));
692 			if (!error) {
693 				tmp = frflush(unit, 6, tmp, ifs);
694 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
695 						sizeof(tmp));
696 				if (error != 0)
697 					error = EFAULT;
698 			} else
699 				error = EFAULT;
700 		}
701 		break;
702 #endif
703 	case SIOCSTLCK :
704 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
705 		if (error == 0) {
706 			ifs->ifs_fr_state_lock = tmp;
707 			ifs->ifs_fr_nat_lock = tmp;
708 			ifs->ifs_fr_frag_lock = tmp;
709 			ifs->ifs_fr_auth_lock = tmp;
710 		} else
711 			error = EFAULT;
712 	break;
713 #ifdef	IPFILTER_LOG
714 	case	SIOCIPFFB :
715 		if (!(mode & FWRITE))
716 			error = EPERM;
717 		else {
718 			tmp = ipflog_clear(unit, ifs);
719 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
720 				       sizeof(tmp));
721 			if (error)
722 				error = EFAULT;
723 		}
724 		break;
725 #endif /* IPFILTER_LOG */
726 	case SIOCFRSYN :
727 		if (!(mode & FWRITE))
728 			error = EPERM;
729 		else {
730 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
731 			WRITE_ENTER(&ifs->ifs_ipf_global);
732 
733 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
734 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
735 			fr_nataddrsync(0, NULL, NULL, ifs);
736 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
737 			error = 0;
738 		}
739 		break;
740 	case SIOCGFRST :
741 		error = fr_outobj((void *)data, fr_fragstats(ifs),
742 				  IPFOBJ_FRAGSTAT);
743 		break;
744 	case FIONREAD :
745 #ifdef	IPFILTER_LOG
746 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
747 
748 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
749 		if (error != 0)
750 			error = EFAULT;
751 #endif
752 		break;
753 	case SIOCIPFITER :
754 		error = ipf_frruleiter((caddr_t)data, cp->cr_uid,
755 				       curproc, ifs);
756 		break;
757 
758 	case SIOCGENITER :
759 		error = ipf_genericiter((caddr_t)data, cp->cr_uid,
760 					curproc, ifs);
761 		break;
762 
763 	case SIOCIPFDELTOK :
764 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
765 		if (error != 0) {
766 			error = EFAULT;
767 		} else {
768 			error = ipf_deltoken(tmp, cp->cr_uid, curproc, ifs);
769 		}
770 		break;
771 
772 	default :
773 #ifdef	IPFDEBUG
774 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
775 			cmd, (void *)data);
776 #endif
777 		error = EINVAL;
778 		break;
779 	}
780 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
781 	return error;
782 }
783 
784 
785 static int fr_enableipf(ifs, enable)
786 ipf_stack_t *ifs;
787 int enable;
788 {
789 	int error;
790 
791 	if (!enable) {
792 		error = ipldetach(ifs);
793 		if (error == 0)
794 			ifs->ifs_fr_running = -1;
795 		return error;
796 	}
797 
798 	if (ifs->ifs_fr_running > 0)
799 		return 0;
800 
801 	error = iplattach(ifs);
802 	if (error == 0) {
803 		if (ifs->ifs_fr_timer_id == NULL) {
804 			int hz = drv_usectohz(500000);
805 
806 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
807 						       (void *)ifs,
808 						       hz);
809 		}
810 		ifs->ifs_fr_running = 1;
811 	} else {
812 		(void) ipldetach(ifs);
813 	}
814 	return error;
815 }
816 
817 
818 phy_if_t get_unit(name, v, ifs)
819 char *name;
820 int v;
821 ipf_stack_t *ifs;
822 {
823 	net_handle_t nif;
824 
825   	if (v == 4)
826  		nif = ifs->ifs_ipf_ipv4;
827   	else if (v == 6)
828  		nif = ifs->ifs_ipf_ipv6;
829   	else
830  		return 0;
831 
832  	return (net_phylookup(nif, name));
833 }
834 
835 /*
836  * routines below for saving IP headers to buffer
837  */
838 /*ARGSUSED*/
839 int iplopen(devp, flags, otype, cred)
840 dev_t *devp;
841 int flags, otype;
842 cred_t *cred;
843 {
844 	minor_t min = getminor(*devp);
845 
846 #ifdef	IPFDEBUG
847 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
848 #endif
849 	if (!(otype & OTYP_CHR))
850 		return ENXIO;
851 
852 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
853 	return min;
854 }
855 
856 
857 /*ARGSUSED*/
858 int iplclose(dev, flags, otype, cred)
859 dev_t dev;
860 int flags, otype;
861 cred_t *cred;
862 {
863 	minor_t	min = getminor(dev);
864 
865 #ifdef	IPFDEBUG
866 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
867 #endif
868 
869 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
870 	return min;
871 }
872 
873 #ifdef	IPFILTER_LOG
874 /*
875  * iplread/ipllog
876  * both of these must operate with at least splnet() lest they be
877  * called during packet processing and cause an inconsistancy to appear in
878  * the filter lists.
879  */
880 /*ARGSUSED*/
881 int iplread(dev, uio, cp)
882 dev_t dev;
883 register struct uio *uio;
884 cred_t *cp;
885 {
886 	ipf_stack_t *ifs;
887 	int ret;
888 
889         /*
890 	 * As we're calling ipf_find_stack in user space, from a given zone
891 	 * to find the stack pointer for this zone, there is no need to have
892 	 * a hold/refence count here.
893 	 */
894 	ifs = ipf_find_stack(crgetzoneid(cp));
895 	ASSERT(ifs != NULL);
896 
897 # ifdef	IPFDEBUG
898 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
899 # endif
900 
901 	if (ifs->ifs_fr_running < 1) {
902 		return EIO;
903 	}
904 
905 # ifdef	IPFILTER_SYNC
906 	if (getminor(dev) == IPL_LOGSYNC) {
907 		return ipfsync_read(uio);
908 	}
909 # endif
910 
911 	ret = ipflog_read(getminor(dev), uio, ifs);
912 	return ret;
913 }
914 #endif /* IPFILTER_LOG */
915 
916 
917 /*
918  * iplread/ipllog
919  * both of these must operate with at least splnet() lest they be
920  * called during packet processing and cause an inconsistancy to appear in
921  * the filter lists.
922  */
923 int iplwrite(dev, uio, cp)
924 dev_t dev;
925 register struct uio *uio;
926 cred_t *cp;
927 {
928 	ipf_stack_t *ifs;
929 
930         /*
931 	 * As we're calling ipf_find_stack in user space, from a given zone
932 	 * to find the stack pointer for this zone, there is no need to have
933 	 * a hold/refence count here.
934 	 */
935 	ifs = ipf_find_stack(crgetzoneid(cp));
936 	ASSERT(ifs != NULL);
937 
938 #ifdef	IPFDEBUG
939 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
940 #endif
941 
942 	if (ifs->ifs_fr_running < 1) {
943 		return EIO;
944 	}
945 
946 #ifdef	IPFILTER_SYNC
947 	if (getminor(dev) == IPL_LOGSYNC)
948 		return ipfsync_write(uio);
949 #endif /* IPFILTER_SYNC */
950 	dev = dev;	/* LINT */
951 	uio = uio;	/* LINT */
952 	cp = cp;	/* LINT */
953 	return ENXIO;
954 }
955 
956 
957 /*
958  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
959  * requires a large amount of setting up and isn't any more efficient.
960  */
961 int fr_send_reset(fin)
962 fr_info_t *fin;
963 {
964 	tcphdr_t *tcp, *tcp2;
965 	int tlen, hlen;
966 	mblk_t *m;
967 #ifdef	USE_INET6
968 	ip6_t *ip6;
969 #endif
970 	ip_t *ip;
971 
972 	tcp = fin->fin_dp;
973 	if (tcp->th_flags & TH_RST)
974 		return -1;
975 
976 #ifndef	IPFILTER_CKSUM
977 	if (fr_checkl4sum(fin) == -1)
978 		return -1;
979 #endif
980 
981 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
982 #ifdef	USE_INET6
983 	if (fin->fin_v == 6)
984 		hlen = sizeof(ip6_t);
985 	else
986 #endif
987 		hlen = sizeof(ip_t);
988 	hlen += sizeof(*tcp2);
989 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
990 		return -1;
991 
992 	m->b_rptr += 64;
993 	MTYPE(m) = M_DATA;
994 	m->b_wptr = m->b_rptr + hlen;
995 	ip = (ip_t *)m->b_rptr;
996 	bzero((char *)ip, hlen);
997 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
998 	tcp2->th_dport = tcp->th_sport;
999 	tcp2->th_sport = tcp->th_dport;
1000 	if (tcp->th_flags & TH_ACK) {
1001 		tcp2->th_seq = tcp->th_ack;
1002 		tcp2->th_flags = TH_RST;
1003 	} else {
1004 		tcp2->th_ack = ntohl(tcp->th_seq);
1005 		tcp2->th_ack += tlen;
1006 		tcp2->th_ack = htonl(tcp2->th_ack);
1007 		tcp2->th_flags = TH_RST|TH_ACK;
1008 	}
1009 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1010 
1011 	ip->ip_v = fin->fin_v;
1012 #ifdef	USE_INET6
1013 	if (fin->fin_v == 6) {
1014 		ip6 = (ip6_t *)m->b_rptr;
1015 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1016 		ip6->ip6_src = fin->fin_dst6.in6;
1017 		ip6->ip6_dst = fin->fin_src6.in6;
1018 		ip6->ip6_plen = htons(sizeof(*tcp));
1019 		ip6->ip6_nxt = IPPROTO_TCP;
1020 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1021 	} else
1022 #endif
1023 	{
1024 		ip->ip_src.s_addr = fin->fin_daddr;
1025 		ip->ip_dst.s_addr = fin->fin_saddr;
1026 		ip->ip_id = fr_nextipid(fin);
1027 		ip->ip_hl = sizeof(*ip) >> 2;
1028 		ip->ip_p = IPPROTO_TCP;
1029 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1030 		ip->ip_tos = fin->fin_ip->ip_tos;
1031 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1032 	}
1033 	return fr_send_ip(fin, m, &m);
1034 }
1035 
1036 /*
1037  * Function:	fr_send_ip
1038  * Returns:	 0: success
1039  *		-1: failed
1040  * Parameters:
1041  *	fin: packet information
1042  *	m: the message block where ip head starts
1043  *
1044  * Send a new packet through the IP stack.
1045  *
1046  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1047  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1048  * function).
1049  *
1050  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1051  * in by this function.
1052  *
1053  * All other portions of the packet must be in on-the-wire format.
1054  */
1055 /*ARGSUSED*/
1056 static int fr_send_ip(fin, m, mpp)
1057 fr_info_t *fin;
1058 mblk_t *m, **mpp;
1059 {
1060 	qpktinfo_t qpi, *qpip;
1061 	fr_info_t fnew;
1062 	ip_t *ip;
1063 	int i, hlen;
1064 	ipf_stack_t *ifs = fin->fin_ifs;
1065 
1066 	ip = (ip_t *)m->b_rptr;
1067 	bzero((char *)&fnew, sizeof(fnew));
1068 
1069 #ifdef	USE_INET6
1070 	if (fin->fin_v == 6) {
1071 		ip6_t *ip6;
1072 
1073 		ip6 = (ip6_t *)ip;
1074 		ip6->ip6_vfc = 0x60;
1075 		ip6->ip6_hlim = 127;
1076 		fnew.fin_v = 6;
1077 		hlen = sizeof(*ip6);
1078 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1079 	} else
1080 #endif
1081 	{
1082 		fnew.fin_v = 4;
1083 #if SOLARIS2 >= 10
1084 		ip->ip_ttl = 255;
1085 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1086 			ip->ip_off = htons(IP_DF);
1087 #else
1088 		if (ip_ttl_ptr != NULL)
1089 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1090 		else
1091 			ip->ip_ttl = 63;
1092 		if (ip_mtudisc != NULL)
1093 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1094 		else
1095 			ip->ip_off = htons(IP_DF);
1096 #endif
1097 		/*
1098 		 * The dance with byte order and ip_len/ip_off is because in
1099 		 * fr_fastroute, it expects them to be in host byte order but
1100 		 * ipf_cksum expects them to be in network byte order.
1101 		 */
1102 		ip->ip_len = htons(ip->ip_len);
1103 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1104 		ip->ip_len = ntohs(ip->ip_len);
1105 		ip->ip_off = ntohs(ip->ip_off);
1106 		hlen = sizeof(*ip);
1107 		fnew.fin_plen = ip->ip_len;
1108 	}
1109 
1110 	qpip = fin->fin_qpi;
1111 	qpi.qpi_off = 0;
1112 	qpi.qpi_ill = qpip->qpi_ill;
1113 	qpi.qpi_m = m;
1114 	qpi.qpi_data = ip;
1115 	fnew.fin_qpi = &qpi;
1116 	fnew.fin_ifp = fin->fin_ifp;
1117 	fnew.fin_flx = FI_NOCKSUM;
1118 	fnew.fin_m = m;
1119 	fnew.fin_qfm = m;
1120 	fnew.fin_ip = ip;
1121 	fnew.fin_mp = mpp;
1122 	fnew.fin_hlen = hlen;
1123 	fnew.fin_dp = (char *)ip + hlen;
1124 	fnew.fin_ifs = fin->fin_ifs;
1125 	(void) fr_makefrip(hlen, ip, &fnew);
1126 
1127 	i = fr_fastroute(m, mpp, &fnew, NULL);
1128 	return i;
1129 }
1130 
1131 
1132 int fr_send_icmp_err(type, fin, dst)
1133 int type;
1134 fr_info_t *fin;
1135 int dst;
1136 {
1137 	struct in_addr dst4;
1138 	struct icmp *icmp;
1139 	qpktinfo_t *qpi;
1140 	int hlen, code;
1141 	phy_if_t phy;
1142 	u_short sz;
1143 #ifdef	USE_INET6
1144 	mblk_t *mb;
1145 #endif
1146 	mblk_t *m;
1147 #ifdef	USE_INET6
1148 	ip6_t *ip6;
1149 #endif
1150 	ip_t *ip;
1151 	ipf_stack_t *ifs = fin->fin_ifs;
1152 
1153 	if ((type < 0) || (type > ICMP_MAXTYPE))
1154 		return -1;
1155 
1156 	code = fin->fin_icode;
1157 #ifdef USE_INET6
1158 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1159 		return -1;
1160 #endif
1161 
1162 #ifndef	IPFILTER_CKSUM
1163 	if (fr_checkl4sum(fin) == -1)
1164 		return -1;
1165 #endif
1166 
1167 	qpi = fin->fin_qpi;
1168 
1169 #ifdef	USE_INET6
1170 	mb = fin->fin_qfm;
1171 
1172 	if (fin->fin_v == 6) {
1173 		sz = sizeof(ip6_t);
1174 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1175 		hlen = sizeof(ip6_t);
1176 		type = icmptoicmp6types[type];
1177 		if (type == ICMP6_DST_UNREACH)
1178 			code = icmptoicmp6unreach[code];
1179 	} else
1180 #endif
1181 	{
1182 		if ((fin->fin_p == IPPROTO_ICMP) &&
1183 		    !(fin->fin_flx & FI_SHORT))
1184 			switch (ntohs(fin->fin_data[0]) >> 8)
1185 			{
1186 			case ICMP_ECHO :
1187 			case ICMP_TSTAMP :
1188 			case ICMP_IREQ :
1189 			case ICMP_MASKREQ :
1190 				break;
1191 			default :
1192 				return 0;
1193 			}
1194 
1195 		sz = sizeof(ip_t) * 2;
1196 		sz += 8;		/* 64 bits of data */
1197 		hlen = sizeof(ip_t);
1198 	}
1199 
1200 	sz += offsetof(struct icmp, icmp_ip);
1201 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1202 		return -1;
1203 	MTYPE(m) = M_DATA;
1204 	m->b_rptr += 64;
1205 	m->b_wptr = m->b_rptr + sz;
1206 	bzero((char *)m->b_rptr, (size_t)sz);
1207 	ip = (ip_t *)m->b_rptr;
1208 	ip->ip_v = fin->fin_v;
1209 	icmp = (struct icmp *)(m->b_rptr + hlen);
1210 	icmp->icmp_type = type & 0xff;
1211 	icmp->icmp_code = code & 0xff;
1212 	phy = (phy_if_t)qpi->qpi_ill;
1213 	if (type == ICMP_UNREACH && (phy != 0) &&
1214 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1215 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1216 
1217 #ifdef	USE_INET6
1218 	if (fin->fin_v == 6) {
1219 		struct in6_addr dst6;
1220 		int csz;
1221 
1222 		if (dst == 0) {
1223 			ipf_stack_t *ifs = fin->fin_ifs;
1224 
1225 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1226 				       (void *)&dst6, NULL, ifs) == -1) {
1227 				FREE_MB_T(m);
1228 				return -1;
1229 			}
1230 		} else
1231 			dst6 = fin->fin_dst6.in6;
1232 
1233 		csz = sz;
1234 		sz -= sizeof(ip6_t);
1235 		ip6 = (ip6_t *)m->b_rptr;
1236 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1237 		ip6->ip6_plen = htons((u_short)sz);
1238 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1239 		ip6->ip6_src = dst6;
1240 		ip6->ip6_dst = fin->fin_src6.in6;
1241 		sz -= offsetof(struct icmp, icmp_ip);
1242 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1243 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1244 	} else
1245 #endif
1246 	{
1247 		ip->ip_hl = sizeof(*ip) >> 2;
1248 		ip->ip_p = IPPROTO_ICMP;
1249 		ip->ip_id = fin->fin_ip->ip_id;
1250 		ip->ip_tos = fin->fin_ip->ip_tos;
1251 		ip->ip_len = (u_short)sz;
1252 		if (dst == 0) {
1253 			ipf_stack_t *ifs = fin->fin_ifs;
1254 
1255 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1256 				       (void *)&dst4, NULL, ifs) == -1) {
1257 				FREE_MB_T(m);
1258 				return -1;
1259 			}
1260 		} else {
1261 			dst4 = fin->fin_dst;
1262 		}
1263 		ip->ip_src = dst4;
1264 		ip->ip_dst = fin->fin_src;
1265 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1266 		      sizeof(*fin->fin_ip));
1267 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1268 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1269 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1270 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1271 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1272 					     sz - sizeof(ip_t));
1273 	}
1274 
1275 	/*
1276 	 * Need to exit out of these so we don't recursively call rw_enter
1277 	 * from fr_qout.
1278 	 */
1279 	return fr_send_ip(fin, m, &m);
1280 }
1281 
1282 #include <sys/time.h>
1283 #include <sys/varargs.h>
1284 
1285 #ifndef _KERNEL
1286 #include <stdio.h>
1287 #endif
1288 
1289 #define	NULLADDR_RATE_LIMIT 10	/* 10 seconds */
1290 
1291 
1292 /*
1293  * Print out warning message at rate-limited speed.
1294  */
1295 static void rate_limit_message(ipf_stack_t *ifs,
1296 			       int rate, const char *message, ...)
1297 {
1298 	static time_t last_time = 0;
1299 	time_t now;
1300 	va_list args;
1301 	char msg_buf[256];
1302 	int  need_printed = 0;
1303 
1304 	now = ddi_get_time();
1305 
1306 	/* make sure, no multiple entries */
1307 	ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
1308 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1309 	if (now - last_time >= rate) {
1310 		need_printed = 1;
1311 		last_time = now;
1312 	}
1313 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1314 
1315 	if (need_printed) {
1316 		va_start(args, message);
1317 		(void)vsnprintf(msg_buf, 255, message, args);
1318 		va_end(args);
1319 #ifdef _KERNEL
1320 		cmn_err(CE_WARN, msg_buf);
1321 #else
1322 		fprintf(std_err, msg_buf);
1323 #endif
1324 	}
1325 }
1326 
1327 /*
1328  * Return the first IP Address associated with an interface
1329  * For IPv6, we walk through the list of logical interfaces and return
1330  * the address of the first one that isn't a link-local interface.
1331  * We can't assume that it is :1 because another link-local address
1332  * may have been assigned there.
1333  */
1334 /*ARGSUSED*/
1335 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1336 int v, atype;
1337 void *ifptr;
1338 struct in_addr  *inp, *inpmask;
1339 ipf_stack_t *ifs;
1340 {
1341 	struct sockaddr_in6 v6addr[2];
1342 	struct sockaddr_in v4addr[2];
1343 	net_ifaddr_t type[2];
1344 	net_handle_t net_data;
1345 	phy_if_t phyif;
1346 	void *array;
1347 
1348 	switch (v)
1349 	{
1350 	case 4:
1351 		net_data = ifs->ifs_ipf_ipv4;
1352 		array = v4addr;
1353 		break;
1354 	case 6:
1355 		net_data = ifs->ifs_ipf_ipv6;
1356 		array = v6addr;
1357 		break;
1358 	default:
1359 		net_data = NULL;
1360 		break;
1361 	}
1362 
1363 	if (net_data == NULL)
1364 		return -1;
1365 
1366 	phyif = (phy_if_t)ifptr;
1367 
1368 	switch (atype)
1369 	{
1370 	case FRI_PEERADDR :
1371 		type[0] = NA_PEER;
1372 		break;
1373 
1374 	case FRI_BROADCAST :
1375 		type[0] = NA_BROADCAST;
1376 		break;
1377 
1378 	default :
1379 		type[0] = NA_ADDRESS;
1380 		break;
1381 	}
1382 
1383 	type[1] = NA_NETMASK;
1384 
1385 	if (v == 6) {
1386 		lif_if_t idx = 0;
1387 
1388 		do {
1389 			idx = net_lifgetnext(net_data, phyif, idx);
1390 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1391 					   array) < 0)
1392 				return -1;
1393 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1394 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1395 				break;
1396 		} while (idx != 0);
1397 
1398 		if (idx == 0)
1399 			return -1;
1400 
1401 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1402 					inp, inpmask);
1403 	}
1404 
1405 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1406 		return -1;
1407 
1408 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1409 }
1410 
1411 
1412 u_32_t fr_newisn(fin)
1413 fr_info_t *fin;
1414 {
1415 	static int iss_seq_off = 0;
1416 	u_char hash[16];
1417 	u_32_t newiss;
1418 	MD5_CTX ctx;
1419 	ipf_stack_t *ifs = fin->fin_ifs;
1420 
1421 	/*
1422 	 * Compute the base value of the ISS.  It is a hash
1423 	 * of (saddr, sport, daddr, dport, secret).
1424 	 */
1425 	MD5Init(&ctx);
1426 
1427 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1428 		  sizeof(fin->fin_fi.fi_src));
1429 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1430 		  sizeof(fin->fin_fi.fi_dst));
1431 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1432 
1433 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1434 
1435 	MD5Final(hash, &ctx);
1436 
1437 	bcopy(hash, &newiss, sizeof(newiss));
1438 
1439 	/*
1440 	 * Now increment our "timer", and add it in to
1441 	 * the computed value.
1442 	 *
1443 	 * XXX Use `addin'?
1444 	 * XXX TCP_ISSINCR too large to use?
1445 	 */
1446 	iss_seq_off += 0x00010000;
1447 	newiss += iss_seq_off;
1448 	return newiss;
1449 }
1450 
1451 
1452 /* ------------------------------------------------------------------------ */
1453 /* Function:    fr_nextipid                                                 */
1454 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1455 /* Parameters:  fin(I) - pointer to packet information                      */
1456 /*                                                                          */
1457 /* Returns the next IPv4 ID to use for this packet.                         */
1458 /* ------------------------------------------------------------------------ */
1459 u_short fr_nextipid(fin)
1460 fr_info_t *fin;
1461 {
1462 	static u_short ipid = 0;
1463 	u_short id;
1464 	ipf_stack_t *ifs = fin->fin_ifs;
1465 
1466 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1467 	if (fin->fin_pktnum != 0) {
1468 		id = fin->fin_pktnum & 0xffff;
1469 	} else {
1470 		id = ipid++;
1471 	}
1472 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1473 
1474 	return id;
1475 }
1476 
1477 
1478 #ifndef IPFILTER_CKSUM
1479 /* ARGSUSED */
1480 #endif
1481 INLINE void fr_checkv4sum(fin)
1482 fr_info_t *fin;
1483 {
1484 #ifdef IPFILTER_CKSUM
1485 	if (fr_checkl4sum(fin) == -1)
1486 		fin->fin_flx |= FI_BAD;
1487 #endif
1488 }
1489 
1490 
1491 #ifdef USE_INET6
1492 # ifndef IPFILTER_CKSUM
1493 /* ARGSUSED */
1494 # endif
1495 INLINE void fr_checkv6sum(fin)
1496 fr_info_t *fin;
1497 {
1498 # ifdef IPFILTER_CKSUM
1499 	if (fr_checkl4sum(fin) == -1)
1500 		fin->fin_flx |= FI_BAD;
1501 # endif
1502 }
1503 #endif /* USE_INET6 */
1504 
1505 
1506 #if (SOLARIS2 < 7)
1507 void fr_slowtimer()
1508 #else
1509 /*ARGSUSED*/
1510 void fr_slowtimer __P((void *arg))
1511 #endif
1512 {
1513 	ipf_stack_t *ifs = arg;
1514 
1515 	READ_ENTER(&ifs->ifs_ipf_global);
1516 	if (ifs->ifs_fr_running != 1) {
1517 		ifs->ifs_fr_timer_id = NULL;
1518 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1519 		return;
1520 	}
1521 	ipf_expiretokens(ifs);
1522 	fr_fragexpire(ifs);
1523 	fr_timeoutstate(ifs);
1524 	fr_natexpire(ifs);
1525 	fr_authexpire(ifs);
1526 	ifs->ifs_fr_ticks++;
1527 	if (ifs->ifs_fr_running == 1)
1528 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1529 		    drv_usectohz(500000));
1530 	else
1531 		ifs->ifs_fr_timer_id = NULL;
1532 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1533 }
1534 
1535 
1536 /* ------------------------------------------------------------------------ */
1537 /* Function:    fr_pullup                                                   */
1538 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1539 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1540 /*              fin(I) - pointer to packet information                      */
1541 /*              len(I) - number of bytes to pullup                          */
1542 /*                                                                          */
1543 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1544 /* single buffer for ease of access.  Operating system native functions are */
1545 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1546 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1547 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1548 /* and ONLY if the pullup succeeds.                                         */
1549 /*                                                                          */
1550 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1551 /* of buffers that starts at *fin->fin_mp.                                  */
1552 /* ------------------------------------------------------------------------ */
1553 void *fr_pullup(min, fin, len)
1554 mb_t *min;
1555 fr_info_t *fin;
1556 int len;
1557 {
1558 	qpktinfo_t *qpi = fin->fin_qpi;
1559 	int out = fin->fin_out, dpoff, ipoff;
1560 	mb_t *m = min, *m1, *m2;
1561 	char *ip;
1562 	uint32_t start, stuff, end, value, flags;
1563 	ipf_stack_t *ifs = fin->fin_ifs;
1564 
1565 	if (m == NULL)
1566 		return NULL;
1567 
1568 	ip = (char *)fin->fin_ip;
1569 	if ((fin->fin_flx & FI_COALESCE) != 0)
1570 		return ip;
1571 
1572 	ipoff = fin->fin_ipoff;
1573 	if (fin->fin_dp != NULL)
1574 		dpoff = (char *)fin->fin_dp - (char *)ip;
1575 	else
1576 		dpoff = 0;
1577 
1578 	if (M_LEN(m) < len + ipoff) {
1579 
1580 		/*
1581 		 * pfil_precheck ensures the IP header is on a 32bit
1582 		 * aligned address so simply fail if that isn't currently
1583 		 * the case (should never happen).
1584 		 */
1585 		int inc = 0;
1586 
1587 		if (ipoff > 0) {
1588 			if ((ipoff & 3) != 0) {
1589 				inc = 4 - (ipoff & 3);
1590 				if (m->b_rptr - inc >= m->b_datap->db_base)
1591 					m->b_rptr -= inc;
1592 				else
1593 					inc = 0;
1594 			}
1595 		}
1596 
1597 		/*
1598 		 * XXX This is here as a work around for a bug with DEBUG
1599 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1600 		 * XXX code as a way to stash the phyint_index for a packet,
1601 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1602 		 * XXX for both of these to be NULL.  See 6442390.
1603 		 */
1604 		m1 = m;
1605 		m2 = m->b_prev;
1606 
1607 		do {
1608 			m1->b_next = NULL;
1609 			m1->b_prev = NULL;
1610 			m1 = m1->b_cont;
1611 		} while (m1);
1612 
1613 		/*
1614 		 * Need to preserve checksum information by copying them
1615 		 * to newmp which heads the pulluped message.
1616 		 */
1617 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1618 		    &value, &flags);
1619 
1620 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1621 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1622 			FREE_MB_T(*fin->fin_mp);
1623 			*fin->fin_mp = NULL;
1624 			fin->fin_m = NULL;
1625 			fin->fin_ip = NULL;
1626 			fin->fin_dp = NULL;
1627 			qpi->qpi_data = NULL;
1628 			return NULL;
1629 		}
1630 
1631 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1632 		    value, flags, 0);
1633 
1634 		m->b_prev = m2;
1635 		m->b_rptr += inc;
1636 		fin->fin_m = m;
1637 		ip = MTOD(m, char *) + ipoff;
1638 		qpi->qpi_data = ip;
1639 	}
1640 
1641 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1642 	fin->fin_ip = (ip_t *)ip;
1643 	if (fin->fin_dp != NULL)
1644 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1645 
1646 	if (len == fin->fin_plen)
1647 		fin->fin_flx |= FI_COALESCE;
1648 	return ip;
1649 }
1650 
1651 
1652 /*
1653  * Function:	fr_verifysrc
1654  * Returns:	int (really boolean)
1655  * Parameters:	fin - packet information
1656  *
1657  * Check whether the packet has a valid source address for the interface on
1658  * which the packet arrived, implementing the "fr_chksrc" feature.
1659  * Returns true iff the packet's source address is valid.
1660  */
1661 int fr_verifysrc(fin)
1662 fr_info_t *fin;
1663 {
1664 	net_handle_t net_data_p;
1665 	phy_if_t phy_ifdata_routeto;
1666 	struct sockaddr	sin;
1667 	ipf_stack_t *ifs = fin->fin_ifs;
1668 
1669 	if (fin->fin_v == 4) {
1670 		net_data_p = ifs->ifs_ipf_ipv4;
1671 	} else if (fin->fin_v == 6) {
1672 		net_data_p = ifs->ifs_ipf_ipv6;
1673 	} else {
1674 		return (0);
1675 	}
1676 
1677 	/* Get the index corresponding to the if name */
1678 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1679 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1680 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1681 
1682 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1683 }
1684 
1685 
1686 /*
1687  * Function:	fr_fastroute
1688  * Returns:	 0: success;
1689  *		-1: failed
1690  * Parameters:
1691  *	mb: the message block where ip head starts
1692  *	mpp: the pointer to the pointer of the orignal
1693  *		packet message
1694  *	fin: packet information
1695  *	fdp: destination interface information
1696  *	if it is NULL, no interface information provided.
1697  *
1698  * This function is for fastroute/to/dup-to rules. It calls
1699  * pfil_make_lay2_packet to search route, make lay-2 header
1700  * ,and identify output queue for the IP packet.
1701  * The destination address depends on the following conditions:
1702  * 1: for fastroute rule, fdp is passed in as NULL, so the
1703  *	destination address is the IP Packet's destination address
1704  * 2: for to/dup-to rule, if an ip address is specified after
1705  *	the interface name, this address is the as destination
1706  *	address. Otherwise IP Packet's destination address is used
1707  */
1708 int fr_fastroute(mb, mpp, fin, fdp)
1709 mblk_t *mb, **mpp;
1710 fr_info_t *fin;
1711 frdest_t *fdp;
1712 {
1713         net_handle_t net_data_p;
1714 	net_inject_t *inj;
1715 	mblk_t *mp = NULL;
1716 	frentry_t *fr = fin->fin_fr;
1717 	qpktinfo_t *qpi;
1718 	ip_t *ip;
1719 
1720 	struct sockaddr_in *sin;
1721 	struct sockaddr_in6 *sin6;
1722 	struct sockaddr *sinp;
1723 	ipf_stack_t *ifs = fin->fin_ifs;
1724 #ifndef	sparc
1725 	u_short __iplen, __ipoff;
1726 #endif
1727 
1728 	if (fin->fin_v == 4) {
1729 		net_data_p = ifs->ifs_ipf_ipv4;
1730 	} else if (fin->fin_v == 6) {
1731 		net_data_p = ifs->ifs_ipf_ipv6;
1732 	} else {
1733 		return (-1);
1734 	}
1735 
1736 	inj = net_inject_alloc(NETINFO_VERSION);
1737 	if (inj == NULL)
1738 		return -1;
1739 
1740 	ip = fin->fin_ip;
1741 	qpi = fin->fin_qpi;
1742 
1743 	/*
1744 	 * If this is a duplicate mblk then we want ip to point at that
1745 	 * data, not the original, if and only if it is already pointing at
1746 	 * the current mblk data.
1747 	 *
1748 	 * Otherwise, if it's not a duplicate, and we're not already pointing
1749 	 * at the current mblk data, then we want to ensure that the data
1750 	 * points at ip.
1751 	 */
1752 
1753 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1754 		ip = (ip_t *)mb->b_rptr;
1755 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1756 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
1757 		qpi->qpi_off = 0;
1758 	}
1759 
1760 	/*
1761 	 * If there is another M_PROTO, we don't want it
1762 	 */
1763 	if (*mpp != mb) {
1764 		mp = unlinkb(*mpp);
1765 		freeb(*mpp);
1766 		*mpp = mp;
1767 	}
1768 
1769 	sinp = (struct sockaddr *)&inj->ni_addr;
1770 	sin = (struct sockaddr_in *)sinp;
1771 	sin6 = (struct sockaddr_in6 *)sinp;
1772 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1773 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1774 	inj->ni_packet = mb;
1775 
1776 	/*
1777 	 * In case we're here due to "to <if>" being used with
1778 	 * "keep state", check that we're going in the correct
1779 	 * direction.
1780 	 */
1781 	if (fdp != NULL) {
1782 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1783 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1784 			goto bad_fastroute;
1785 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1786 		if (fin->fin_v == 4) {
1787 			sin->sin_addr = fdp->fd_ip;
1788 		} else {
1789 			sin6->sin6_addr = fdp->fd_ip6.in6;
1790 		}
1791 	} else {
1792 		if (fin->fin_v == 4) {
1793 			sin->sin_addr = ip->ip_dst;
1794 		} else {
1795 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1796 		}
1797 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1798 	}
1799 
1800 	/*
1801 	 * Clear the hardware checksum flags from packets that we are doing
1802 	 * input processing on as leaving them set will cause the outgoing
1803 	 * NIC (if it supports hardware checksum) to calculate them anew,
1804 	 * using the old (correct) checksums as the pseudo value to start
1805 	 * from.
1806 	 */
1807 	if (fin->fin_out == 0) {
1808 		DB_CKSUMFLAGS(mb) = 0;
1809 	}
1810 
1811 	*mpp = mb;
1812 
1813 	if (fin->fin_out == 0) {
1814 		void *saveifp;
1815 		u_32_t pass;
1816 
1817 		saveifp = fin->fin_ifp;
1818 		fin->fin_ifp = (void *)inj->ni_physical;
1819 		fin->fin_flx &= ~FI_STATE;
1820 		fin->fin_out = 1;
1821 		(void) fr_acctpkt(fin, &pass);
1822 		fin->fin_fr = NULL;
1823 		if (!fr || !(fr->fr_flags & FR_RETMASK))
1824 			(void) fr_checkstate(fin, &pass);
1825 		if (fr_checknatout(fin, NULL) == -1)
1826 			goto bad_fastroute;
1827 		fin->fin_out = 0;
1828 		fin->fin_ifp = saveifp;
1829 	}
1830 #ifndef	sparc
1831 	if (fin->fin_v == 4) {
1832 		__iplen = (u_short)ip->ip_len,
1833 		__ipoff = (u_short)ip->ip_off;
1834 
1835 		ip->ip_len = htons(__iplen);
1836 		ip->ip_off = htons(__ipoff);
1837 	}
1838 #endif
1839 
1840 	if (net_data_p) {
1841 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1842 			net_inject_free(inj);
1843 			return (-1);
1844 		}
1845 	}
1846 
1847 	ifs->ifs_fr_frouteok[0]++;
1848 	net_inject_free(inj);
1849 	return 0;
1850 bad_fastroute:
1851 	net_inject_free(inj);
1852 	freemsg(mb);
1853 	ifs->ifs_fr_frouteok[1]++;
1854 	return -1;
1855 }
1856 
1857 
1858 /* ------------------------------------------------------------------------ */
1859 /* Function:    ipf_hook4_out                                               */
1860 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1861 /* Parameters:  event(I)     - pointer to event                             */
1862 /*              info(I)      - pointer to hook information for firewalling  */
1863 /*                                                                          */
1864 /* Calling ipf_hook.                                                        */
1865 /* ------------------------------------------------------------------------ */
1866 /*ARGSUSED*/
1867 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1868 {
1869 	return ipf_hook(info, 1, 0, arg);
1870 }
1871 /*ARGSUSED*/
1872 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1873 {
1874 	return ipf_hook6(info, 1, 0, arg);
1875 }
1876 
1877 /* ------------------------------------------------------------------------ */
1878 /* Function:    ipf_hook4_in                                                */
1879 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1880 /* Parameters:  event(I)     - pointer to event                             */
1881 /*              info(I)      - pointer to hook information for firewalling  */
1882 /*                                                                          */
1883 /* Calling ipf_hook.                                                        */
1884 /* ------------------------------------------------------------------------ */
1885 /*ARGSUSED*/
1886 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1887 {
1888 	return ipf_hook(info, 0, 0, arg);
1889 }
1890 /*ARGSUSED*/
1891 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1892 {
1893 	return ipf_hook6(info, 0, 0, arg);
1894 }
1895 
1896 
1897 /* ------------------------------------------------------------------------ */
1898 /* Function:    ipf_hook4_loop_out                                          */
1899 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1900 /* Parameters:  event(I)     - pointer to event                             */
1901 /*              info(I)      - pointer to hook information for firewalling  */
1902 /*                                                                          */
1903 /* Calling ipf_hook.                                                        */
1904 /* ------------------------------------------------------------------------ */
1905 /*ARGSUSED*/
1906 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1907 {
1908 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
1909 }
1910 /*ARGSUSED*/
1911 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1912 {
1913 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1914 }
1915 
1916 /* ------------------------------------------------------------------------ */
1917 /* Function:    ipf_hook4_loop_in                                           */
1918 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1919 /* Parameters:  event(I)     - pointer to event                             */
1920 /*              info(I)      - pointer to hook information for firewalling  */
1921 /*                                                                          */
1922 /* Calling ipf_hook.                                                        */
1923 /* ------------------------------------------------------------------------ */
1924 /*ARGSUSED*/
1925 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1926 {
1927 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
1928 }
1929 /*ARGSUSED*/
1930 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1931 {
1932 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1933 }
1934 
1935 /* ------------------------------------------------------------------------ */
1936 /* Function:    ipf_hook                                                    */
1937 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1938 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1939 /*              out(I)       - whether packet is going in or out            */
1940 /*              loopback(I)  - whether packet is a loopback packet or not   */
1941 /*                                                                          */
1942 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1943 /* parameters out of the info structure and forms them up to be useful for  */
1944 /* calling ipfilter.                                                        */
1945 /* ------------------------------------------------------------------------ */
1946 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1947 {
1948 	hook_pkt_event_t *fw;
1949 	ipf_stack_t *ifs;
1950 	qpktinfo_t qpi;
1951 	int rval, hlen;
1952 	u_short swap;
1953 	phy_if_t phy;
1954 	ip_t *ip;
1955 
1956 	ifs = arg;
1957 	fw = (hook_pkt_event_t *)info;
1958 
1959 	ASSERT(fw != NULL);
1960 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1961 
1962 	ip = fw->hpe_hdr;
1963 	swap = ntohs(ip->ip_len);
1964 	ip->ip_len = swap;
1965 	swap = ntohs(ip->ip_off);
1966 	ip->ip_off = swap;
1967 	hlen = IPH_HDR_LENGTH(ip);
1968 
1969 	qpi.qpi_m = fw->hpe_mb;
1970 	qpi.qpi_data = fw->hpe_hdr;
1971 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1972 	qpi.qpi_ill = (void *)phy;
1973 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1974 	if (qpi.qpi_flags)
1975 		qpi.qpi_flags |= FI_MBCAST;
1976 	qpi.qpi_flags |= loopback;
1977 
1978 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1979 	    &qpi, fw->hpe_mp, ifs);
1980 
1981 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
1982 	if (rval == 0 && *(fw->hpe_mp) == NULL)
1983 		rval = 1;
1984 
1985 	/* Notify IP the packet mblk_t and IP header pointers. */
1986 	fw->hpe_mb = qpi.qpi_m;
1987 	fw->hpe_hdr = qpi.qpi_data;
1988 	if (rval == 0) {
1989 		ip = qpi.qpi_data;
1990 		swap = ntohs(ip->ip_len);
1991 		ip->ip_len = swap;
1992 		swap = ntohs(ip->ip_off);
1993 		ip->ip_off = swap;
1994 	}
1995 	return rval;
1996 
1997 }
1998 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
1999 {
2000 	hook_pkt_event_t *fw;
2001 	int rval, hlen;
2002 	qpktinfo_t qpi;
2003 	phy_if_t phy;
2004 
2005 	fw = (hook_pkt_event_t *)info;
2006 
2007 	ASSERT(fw != NULL);
2008 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2009 
2010 	hlen = sizeof (ip6_t);
2011 
2012 	qpi.qpi_m = fw->hpe_mb;
2013 	qpi.qpi_data = fw->hpe_hdr;
2014 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2015 	qpi.qpi_ill = (void *)phy;
2016 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2017 	if (qpi.qpi_flags)
2018 		qpi.qpi_flags |= FI_MBCAST;
2019 	qpi.qpi_flags |= loopback;
2020 
2021 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2022 	    &qpi, fw->hpe_mp, arg);
2023 
2024 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2025 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2026 		rval = 1;
2027 
2028 	/* Notify IP the packet mblk_t and IP header pointers. */
2029 	fw->hpe_mb = qpi.qpi_m;
2030 	fw->hpe_hdr = qpi.qpi_data;
2031 	return rval;
2032 
2033 }
2034 
2035 
2036 /* ------------------------------------------------------------------------ */
2037 /* Function:    ipf_nic_event_v4                                            */
2038 /* Returns:     int - 0 == no problems encountered                          */
2039 /* Parameters:  event(I)     - pointer to event                             */
2040 /*              info(I)      - pointer to information about a NIC event     */
2041 /*                                                                          */
2042 /* Function to receive asynchronous NIC events from IP                      */
2043 /* ------------------------------------------------------------------------ */
2044 /*ARGSUSED*/
2045 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2046 {
2047 	struct sockaddr_in *sin;
2048 	hook_nic_event_t *hn;
2049 	ipf_stack_t *ifs = arg;
2050 
2051 	hn = (hook_nic_event_t *)info;
2052 
2053 	switch (hn->hne_event)
2054 	{
2055 	case NE_PLUMB :
2056 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2057 		       ifs);
2058 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2059 			      hn->hne_data, ifs);
2060 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2061 			     hn->hne_data, ifs);
2062 		break;
2063 
2064 	case NE_UNPLUMB :
2065 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2066 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2067 			      ifs);
2068 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2069 		break;
2070 
2071 	case NE_ADDRESS_CHANGE :
2072 		/*
2073 		 * We only respond to events for logical interface 0 because
2074 		 * IPFilter only uses the first address given to a network
2075 		 * interface.  We check for hne_lif==1 because the netinfo
2076 		 * code maps adds 1 to the lif number so that it can return
2077 		 * 0 to indicate "no more lifs" when walking them.
2078 		 */
2079 		if (hn->hne_lif == 1) {
2080 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2081 			    ifs);
2082 			sin = hn->hne_data;
2083 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2084 			    ifs);
2085 		}
2086 		break;
2087 
2088 	default :
2089 		break;
2090 	}
2091 
2092 	return 0;
2093 }
2094 
2095 
2096 /* ------------------------------------------------------------------------ */
2097 /* Function:    ipf_nic_event_v6                                            */
2098 /* Returns:     int - 0 == no problems encountered                          */
2099 /* Parameters:  event(I)     - pointer to event                             */
2100 /*              info(I)      - pointer to information about a NIC event     */
2101 /*                                                                          */
2102 /* Function to receive asynchronous NIC events from IP                      */
2103 /* ------------------------------------------------------------------------ */
2104 /*ARGSUSED*/
2105 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2106 {
2107 	struct sockaddr_in6 *sin6;
2108 	hook_nic_event_t *hn;
2109 	ipf_stack_t *ifs = arg;
2110 
2111 	hn = (hook_nic_event_t *)info;
2112 
2113 	switch (hn->hne_event)
2114 	{
2115 	case NE_PLUMB :
2116 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2117 		       hn->hne_data, ifs);
2118 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2119 			      hn->hne_data, ifs);
2120 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2121 			     hn->hne_data, ifs);
2122 		break;
2123 
2124 	case NE_UNPLUMB :
2125 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2126 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2127 			      ifs);
2128 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2129 		break;
2130 
2131 	case NE_ADDRESS_CHANGE :
2132 		if (hn->hne_lif == 1) {
2133 			sin6 = hn->hne_data;
2134 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2135 				       ifs);
2136 		}
2137 		break;
2138 	default :
2139 		break;
2140 	}
2141 
2142 	return 0;
2143 }
2144 
2145 /*
2146  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2147  * are needed in Solaris kernel only. We don't need them in
2148  * ipftest to pretend the ICMP/RST packet was sent as a response.
2149  */
2150 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2151 /* ------------------------------------------------------------------------ */
2152 /* Function:    fr_make_rst                                                 */
2153 /* Returns:     int - 0 on success, -1 on failure			    */
2154 /* Parameters:  fin(I) - pointer to packet information                      */
2155 /*                                                                          */
2156 /* We must alter the original mblks passed to IPF from IP stack via	    */
2157 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2158 /* IPF can basicaly do only these things with mblk representing the packet: */
2159 /*	leave it as it is (pass the packet)				    */
2160 /*                                                                          */
2161 /*	discard it (block the packet)					    */
2162 /*                                                                          */
2163 /*	alter it (i.e. NAT)						    */
2164 /*                                                                          */
2165 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2166 /* instead to IP stack via FW_HOOKS.					    */
2167 /*                                                                          */
2168 /* The return-rst action for packets coming via NIC is handled as follows:  */
2169 /*	mblk with packet is discarded					    */
2170 /*                                                                          */
2171 /*	new mblk with RST response is constructed and injected to network   */
2172 /*                                                                          */
2173 /* IPF can't inject packets to loopback interface, this is just another	    */
2174 /* limitation we have to deal with here. The only option to send RST	    */
2175 /* response to offending TCP packet coming via loopback is to alter it.	    */
2176 /*									    */
2177 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2178 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2179 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2180 /* ------------------------------------------------------------------------ */
2181 int fr_make_rst(fin)
2182 fr_info_t *fin;
2183 {
2184 	uint16_t tmp_port;
2185 	int rv = -1;
2186 	uint32_t old_ack;
2187 	tcphdr_t *tcp = NULL;
2188 	struct in_addr tmp_src;
2189 #ifdef USE_INET6
2190 	struct in6_addr	tmp_src6;
2191 #endif
2192 
2193 	ASSERT(fin->fin_p == IPPROTO_TCP);
2194 
2195 	/*
2196 	 * We do not need to adjust chksum, since it is not being checked by
2197 	 * Solaris IP stack for loopback clients.
2198 	 */
2199 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2200 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2201 
2202 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2203 			/* Swap IPv4 addresses. */
2204 			tmp_src = fin->fin_ip->ip_src;
2205 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2206 			fin->fin_ip->ip_dst = tmp_src;
2207 
2208 			rv = 0;
2209 		}
2210 		else
2211 			tcp = NULL;
2212 	}
2213 #ifdef USE_INET6
2214 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2215 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2216 		/*
2217 		 * We are relying on fact the next header is TCP, which is true
2218 		 * for regular TCP packets coming in over loopback.
2219 		 */
2220 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2221 			/* Swap IPv6 addresses. */
2222 			tmp_src6 = fin->fin_ip6->ip6_src;
2223 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2224 			fin->fin_ip6->ip6_dst = tmp_src6;
2225 
2226 			rv = 0;
2227 		}
2228 		else
2229 			tcp = NULL;
2230 	}
2231 #endif
2232 
2233 	if (tcp != NULL) {
2234 		/*
2235 		 * Adjust TCP header:
2236 		 *	swap ports,
2237 		 *	set flags,
2238 		 *	set correct ACK number
2239 		 */
2240 		tmp_port = tcp->th_sport;
2241 		tcp->th_sport = tcp->th_dport;
2242 		tcp->th_dport = tmp_port;
2243 		old_ack = tcp->th_ack;
2244 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2245 		tcp->th_seq = old_ack;
2246 		tcp->th_flags = TH_RST | TH_ACK;
2247 	}
2248 
2249 	return (rv);
2250 }
2251 
2252 /* ------------------------------------------------------------------------ */
2253 /* Function:    fr_make_icmp_v4                                             */
2254 /* Returns:     int - 0 on success, -1 on failure			    */
2255 /* Parameters:  fin(I) - pointer to packet information                      */
2256 /*                                                                          */
2257 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2258 /* what is going to happen here and why. Once you read the comment there,   */
2259 /* continue here with next paragraph.					    */
2260 /*									    */
2261 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2262 /* happen here:								    */
2263 /*	(1) Original mblk is copied (duplicated).			    */
2264 /*                                                                          */
2265 /*	(2) ICMP header is created.					    */
2266 /*                                                                          */
2267 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2268 /*	    data ready then.						    */
2269 /*                                                                          */
2270 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2271 /*                                                                          */
2272 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2273 /*	    header only and ICMP chksum is computed.			    */
2274 /*                                                                          */
2275 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2276 /*	    which now contains new IP header. If original packet was spread */
2277 /*	    over several mblks, only the first mblk is kept.		    */
2278 /* ------------------------------------------------------------------------ */
2279 static int fr_make_icmp_v4(fin)
2280 fr_info_t *fin;
2281 {
2282 	struct in_addr tmp_src;
2283 	tcphdr_t *tcp;
2284 	struct icmp *icmp;
2285 	mblk_t *mblk_icmp;
2286 	mblk_t *mblk_ip;
2287 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2288 	size_t orig_iphdr_len;	/* length of IP header only */
2289 	uint32_t sum;
2290 	uint16_t *buf;
2291 	int len;
2292 
2293 
2294 	if (fin->fin_v != 4)
2295 		return (-1);
2296 
2297 	/*
2298 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2299 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2300 	 */
2301 	tcp = (tcphdr_t *) fin->fin_dp;
2302 
2303 	if ((fin->fin_p == IPPROTO_TCP) &&
2304 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2305 		return (-1);
2306 
2307 	/*
2308 	 * Step (1)
2309 	 *
2310 	 * Make copy of original mblk.
2311 	 *
2312 	 * We want to copy as much data as necessary, not less, not more.  The
2313 	 * ICMPv4 payload length for unreachable messages is:
2314 	 *	original IP header + 8 bytes of L4 (if there are any).
2315 	 *
2316 	 * We determine if there are at least 8 bytes of L4 data following IP
2317 	 * header first.
2318 	 */
2319 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2320 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2321 	/*
2322 	 * Since we don't want to copy more data than necessary, we must trim
2323 	 * the original mblk here.  The right way (STREAMish) would be to use
2324 	 * adjmsg() to trim it.  However we would have to calculate the length
2325 	 * argument for adjmsg() from pointers we already have here.
2326 	 *
2327 	 * Since we have pointers and offsets, it's faster and easier for
2328 	 * us to just adjust pointers by hand instead of using adjmsg().
2329 	 */
2330 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2331 	fin->fin_m->b_wptr += icmp_pld_len;
2332 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2333 
2334 	/*
2335 	 * Also we don't want to copy any L2 stuff, which might precede IP
2336 	 * header, so we have have to set b_rptr to point to the start of IP
2337 	 * header.
2338 	 */
2339 	fin->fin_m->b_rptr += fin->fin_ipoff;
2340 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2341 		return (-1);
2342 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2343 
2344 	/*
2345 	 * Step (2)
2346 	 *
2347 	 * Create an ICMP header, which will be appened to original mblk later.
2348 	 * ICMP header is just another mblk.
2349 	 */
2350 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2351 	if (mblk_icmp == NULL) {
2352 		FREE_MB_T(mblk_ip);
2353 		return (-1);
2354 	}
2355 
2356 	MTYPE(mblk_icmp) = M_DATA;
2357 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2358 	icmp->icmp_type = ICMP_UNREACH;
2359 	icmp->icmp_code = fin->fin_icode & 0xFF;
2360 	icmp->icmp_void = 0;
2361 	icmp->icmp_cksum = 0;
2362 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2363 
2364 	/*
2365 	 * Step (3)
2366 	 *
2367 	 * Complete ICMP packet - link ICMP header with L4 data from original
2368 	 * IP packet.
2369 	 */
2370 	linkb(mblk_icmp, mblk_ip);
2371 
2372 	/*
2373 	 * Step (4)
2374 	 *
2375 	 * Swap IP addresses and change IP header fields accordingly in
2376 	 * original IP packet.
2377 	 *
2378 	 * There is a rule option return-icmp as a dest for physical
2379 	 * interfaces. This option becomes useless for loopback, since IPF box
2380 	 * uses same address as a loopback destination. We ignore the option
2381 	 * here, the ICMP packet will always look like as it would have been
2382 	 * sent from the original destination host.
2383 	 */
2384 	tmp_src = fin->fin_ip->ip_src;
2385 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2386 	fin->fin_ip->ip_dst = tmp_src;
2387 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2388 	fin->fin_ip->ip_sum = 0;
2389 
2390 	/*
2391 	 * Step (5)
2392 	 *
2393 	 * We trim the orignal mblk to hold IP header only.
2394 	 */
2395 	fin->fin_m->b_wptr = fin->fin_dp;
2396 	orig_iphdr_len = fin->fin_m->b_wptr -
2397 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2398 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2399 			    orig_iphdr_len);
2400 
2401 	/*
2402 	 * ICMP chksum calculation. The data we are calculating chksum for are
2403 	 * spread over two mblks, therefore we have to use two for loops.
2404 	 *
2405 	 * First for loop computes chksum part for ICMP header.
2406 	 */
2407 	buf = (uint16_t *) icmp;
2408 	len = ICMPERR_ICMPHLEN;
2409 	for (sum = 0; len > 1; len -= 2)
2410 		sum += *buf++;
2411 
2412 	/*
2413 	 * Here we add chksum part for ICMP payload.
2414 	 */
2415 	len = icmp_pld_len;
2416 	buf = (uint16_t *) mblk_ip->b_rptr;
2417 	for (; len > 1; len -= 2)
2418 		sum += *buf++;
2419 
2420 	/*
2421 	 * Chksum is done.
2422 	 */
2423 	sum = (sum >> 16) + (sum & 0xffff);
2424 	sum += (sum >> 16);
2425 	icmp->icmp_cksum = ~sum;
2426 
2427 	/*
2428 	 * Step (6)
2429 	 *
2430 	 * Release all packet mblks, except the first one.
2431 	 */
2432 	if (fin->fin_m->b_cont != NULL) {
2433 		FREE_MB_T(fin->fin_m->b_cont);
2434 	}
2435 
2436 	/*
2437 	 * Append ICMP payload to first mblk, which already contains new IP
2438 	 * header.
2439 	 */
2440 	linkb(fin->fin_m, mblk_icmp);
2441 
2442 	return (0);
2443 }
2444 
2445 #ifdef USE_INET6
2446 /* ------------------------------------------------------------------------ */
2447 /* Function:    fr_make_icmp_v6                                             */
2448 /* Returns:     int - 0 on success, -1 on failure			    */
2449 /* Parameters:  fin(I) - pointer to packet information                      */
2450 /*									    */
2451 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2452 /* what and why is going to happen here. Once you read the comment there,   */
2453 /* continue here with next paragraph.					    */
2454 /*									    */
2455 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2456 /* The algorithm is fairly simple:					    */
2457 /*	1) We need to get copy of complete mblk.			    */
2458 /*									    */
2459 /*	2) New ICMPv6 header is created.				    */
2460 /*									    */
2461 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2462 /*	   header.							    */
2463 /*									    */
2464 /*	4) The checksum must be adjusted.				    */
2465 /*									    */
2466 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2467 /*	   are adjusted (protocol number).				    */
2468 /*									    */
2469 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2470 /*	   linked with the ICMPv6 data we got from (3).			    */
2471 /* ------------------------------------------------------------------------ */
2472 static int fr_make_icmp_v6(fin)
2473 fr_info_t *fin;
2474 {
2475 	struct icmp6_hdr *icmp6;
2476 	tcphdr_t *tcp;
2477 	struct in6_addr	tmp_src6;
2478 	size_t icmp_pld_len;
2479 	mblk_t *mblk_ip, *mblk_icmp;
2480 
2481 	if (fin->fin_v != 6)
2482 		return (-1);
2483 
2484 	/*
2485 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2486 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2487 	 */
2488 	tcp = (tcphdr_t *) fin->fin_dp;
2489 
2490 	if ((fin->fin_p == IPPROTO_TCP) &&
2491 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2492 		return (-1);
2493 
2494 	/*
2495 	 * Step (1)
2496 	 *
2497 	 * We need to copy complete packet in case of IPv6, no trimming is
2498 	 * needed (except the L2 headers).
2499 	 */
2500 	icmp_pld_len = M_LEN(fin->fin_m);
2501 	fin->fin_m->b_rptr += fin->fin_ipoff;
2502 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2503 		return (-1);
2504 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2505 
2506 	/*
2507 	 * Step (2)
2508 	 *
2509 	 * Allocate and create ICMP header.
2510 	 */
2511 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2512 			BPRI_HI);
2513 
2514 	if (mblk_icmp == NULL)
2515 		return (-1);
2516 
2517 	MTYPE(mblk_icmp) = M_DATA;
2518 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2519 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
2520 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
2521 	icmp6->icmp6_data32[0] = 0;
2522 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2523 
2524 	/*
2525 	 * Step (3)
2526 	 *
2527 	 * Link the copy of IP packet to ICMP header.
2528 	 */
2529 	linkb(mblk_icmp, mblk_ip);
2530 
2531 	/*
2532 	 * Step (4)
2533 	 *
2534 	 * Calculate chksum - this is much more easier task than in case of
2535 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2536 	 * We are making compensation just for change of packet length.
2537 	 */
2538 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2539 
2540 	/*
2541 	 * Step (5)
2542 	 *
2543 	 * Swap IP addresses.
2544 	 */
2545 	tmp_src6 = fin->fin_ip6->ip6_src;
2546 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2547 	fin->fin_ip6->ip6_dst = tmp_src6;
2548 
2549 	/*
2550 	 * and adjust IP header data.
2551 	 */
2552 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2553 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2554 
2555 	/*
2556 	 * Step (6)
2557 	 *
2558 	 * We must release all linked mblks from original packet and keep only
2559 	 * the first mblk with IP header to link ICMP data.
2560 	 */
2561 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2562 
2563 	if (fin->fin_m->b_cont != NULL) {
2564 		FREE_MB_T(fin->fin_m->b_cont);
2565 	}
2566 
2567 	/*
2568 	 * Append ICMP payload to IP header.
2569 	 */
2570 	linkb(fin->fin_m, mblk_icmp);
2571 
2572 	return (0);
2573 }
2574 #endif	/* USE_INET6 */
2575 
2576 /* ------------------------------------------------------------------------ */
2577 /* Function:    fr_make_icmp                                                */
2578 /* Returns:     int - 0 on success, -1 on failure			    */
2579 /* Parameters:  fin(I) - pointer to packet information                      */
2580 /*                                                                          */
2581 /* We must alter the original mblks passed to IPF from IP stack via	    */
2582 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
2583 /* comment at fr_make_rst() function.					    */
2584 /*									    */
2585 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
2586 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
2587 /* protocol version. However there are some details, which are common to    */
2588 /* both IP versions. The details are going to be explained here.	    */
2589 /*                                                                          */
2590 /* The packet looks as follows:						    */
2591 /*    xxx | IP hdr | IP payload    ...	| 				    */
2592 /*    ^   ^        ^            	^				    */
2593 /*    |   |        |            	|				    */
2594 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2595 /*    |   |        |							    */
2596 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2597 /*    |   |								    */
2598 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2599 /*    |      of loopback)						    */
2600 /*    |   								    */
2601 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
2602 /*                                                                          */
2603 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2604 /* well in advance before the matching rule was found (the rule, which took */
2605 /* us here, to fr_make_icmp() function).				    */
2606 /*                                                                          */
2607 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
2608 /* packet. New packet will be represented as chain of mblks.		    */
2609 /* orig mblk |- b_cont ---.						    */
2610 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
2611 /*    |	                          ^	            `-> duped orig mblk	    */
2612 /*    |                           |				^	    */
2613 /*    `- The original mblk        |				|	    */
2614 /*       will be trimmed to       |				|	    */
2615 /*       to contain IP header     |				|	    */
2616 /*       only                     |				|	    */
2617 /*                                |				|	    */
2618 /*                                `- This is newly		|           */
2619 /*                                   allocated mblk to		|	    */
2620 /*                                   hold ICMPv6 data.		|	    */
2621 /*								|	    */
2622 /*								|	    */
2623 /*								|	    */
2624 /*	    This is the copy of original mblk, it will contain -'	    */
2625 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
2626 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
2627 /*	    (TCP/UDP/L4) data from original packet.			    */
2628 /* ------------------------------------------------------------------------ */
2629 int fr_make_icmp(fin)
2630 fr_info_t *fin;
2631 {
2632 	int rv;
2633 
2634 	if (fin->fin_v == 4)
2635 		rv = fr_make_icmp_v4(fin);
2636 #ifdef USE_INET6
2637 	else if (fin->fin_v == 6)
2638 		rv = fr_make_icmp_v6(fin);
2639 #endif
2640 	else
2641 		rv = -1;
2642 
2643 	return (rv);
2644 }
2645 #endif	/* _KERNEL && SOLARIS2 >= 10 */
2646