xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_fil_solaris.c (revision 1de082f7b7fd4b6629e14b0f9b8f94f6c0bda3c2)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if !defined(lint)
11 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
12 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
13 #endif
14 
15 #include <sys/types.h>
16 #include <sys/errno.h>
17 #include <sys/param.h>
18 #include <sys/cpuvar.h>
19 #include <sys/open.h>
20 #include <sys/ioctl.h>
21 #include <sys/filio.h>
22 #include <sys/systm.h>
23 #include <sys/strsubr.h>
24 #include <sys/cred.h>
25 #include <sys/cred_impl.h>
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/ksynch.h>
29 #include <sys/kmem.h>
30 #include <sys/mkdev.h>
31 #include <sys/protosw.h>
32 #include <sys/socket.h>
33 #include <sys/dditypes.h>
34 #include <sys/cmn_err.h>
35 #include <sys/zone.h>
36 #include <net/if.h>
37 #include <net/af.h>
38 #include <net/route.h>
39 #include <netinet/in.h>
40 #include <netinet/in_systm.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip_var.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <netinet/tcpip.h>
46 #include <netinet/ip_icmp.h>
47 #include "netinet/ip_compat.h"
48 #ifdef	USE_INET6
49 # include <netinet/icmp6.h>
50 #endif
51 #include "netinet/ip_fil.h"
52 #include "netinet/ip_nat.h"
53 #include "netinet/ip_frag.h"
54 #include "netinet/ip_state.h"
55 #include "netinet/ip_auth.h"
56 #include "netinet/ip_proxy.h"
57 #include "netinet/ipf_stack.h"
58 #ifdef	IPFILTER_LOOKUP
59 # include "netinet/ip_lookup.h"
60 #endif
61 #include <inet/ip_ire.h>
62 
63 #include <sys/md5.h>
64 #include <sys/neti.h>
65 
66 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
67 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
68 static	int	fr_enableipf __P((ipf_stack_t *, int));
69 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
70 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
71 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
72 static	int	ipf_hook __P((hook_data_t, int, int, void *));
73 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
74 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
75 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
76     void *));
77 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
78 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
79 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
80 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
81 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
82     void *));
83 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
84     void *));
85 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
86 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
87 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
88 
89 #if SOLARIS2 < 10
90 #if SOLARIS2 >= 7
91 u_int		*ip_ttl_ptr = NULL;
92 u_int		*ip_mtudisc = NULL;
93 # if SOLARIS2 >= 8
94 int		*ip_forwarding = NULL;
95 u_int		*ip6_forwarding = NULL;
96 # else
97 u_int		*ip_forwarding = NULL;
98 # endif
99 #else
100 u_long		*ip_ttl_ptr = NULL;
101 u_long		*ip_mtudisc = NULL;
102 u_long		*ip_forwarding = NULL;
103 #endif
104 #endif
105 
106 
107 /* ------------------------------------------------------------------------ */
108 /* Function:    ipldetach                                                   */
109 /* Returns:     int - 0 == success, else error.                             */
110 /* Parameters:  Nil                                                         */
111 /*                                                                          */
112 /* This function is responsible for undoing anything that might have been   */
113 /* done in a call to iplattach().  It must be able to clean up from a call  */
114 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
115 /* configures a table to be so large that we cannot allocate enough memory  */
116 /* for it.                                                                  */
117 /* ------------------------------------------------------------------------ */
118 int ipldetach(ifs)
119 ipf_stack_t *ifs;
120 {
121 
122 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
123 
124 #if SOLARIS2 < 10
125 
126 	if (ifs->ifs_fr_control_forwarding & 2) {
127 		if (ip_forwarding != NULL)
128 			*ip_forwarding = 0;
129 #if SOLARIS2 >= 8
130 		if (ip6_forwarding != NULL)
131 			*ip6_forwarding = 0;
132 #endif
133 	}
134 #endif
135 
136 	/*
137 	 * This lock needs to be dropped around the net_hook_unregister calls
138 	 * because we can deadlock here with:
139 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
140 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
141 	 */
142 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
143 
144 #define	UNDO_HOOK(_f, _b, _e, _h)					\
145 	do {								\
146 		if (ifs->_f != NULL) {					\
147 			if (ifs->_b) {					\
148 				ifs->_b = (net_hook_unregister(ifs->_f,	\
149 					   _e, ifs->_h) != 0);		\
150 				if (!ifs->_b) {				\
151 					hook_free(ifs->_h);		\
152 					ifs->_h = NULL;			\
153 				}					\
154 			} else if (ifs->_h != NULL) {			\
155 				hook_free(ifs->_h);			\
156 				ifs->_h = NULL;				\
157 			}						\
158 		}							\
159 		_NOTE(CONSTCOND)					\
160 	} while (0)
161 
162 	/*
163 	 * Remove IPv6 Hooks
164 	 */
165 	if (ifs->ifs_ipf_ipv6 != NULL) {
166 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
167 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
168 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
169 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
170 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
171 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
172 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
173 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
174 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
175 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
176 
177 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
178 			goto detach_failed;
179 		ifs->ifs_ipf_ipv6 = NULL;
180         }
181 
182 	/*
183 	 * Remove IPv4 Hooks
184 	 */
185 	if (ifs->ifs_ipf_ipv4 != NULL) {
186 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
187 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
188 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
189 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
190 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
191 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
192 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
193 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
194 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
195 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
196 
197 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
198 			goto detach_failed;
199 		ifs->ifs_ipf_ipv4 = NULL;
200 	}
201 
202 #undef UNDO_HOOK
203 
204 #ifdef	IPFDEBUG
205 	cmn_err(CE_CONT, "ipldetach()\n");
206 #endif
207 
208 	WRITE_ENTER(&ifs->ifs_ipf_global);
209 	fr_deinitialise(ifs);
210 
211 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
212 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
213 
214 	if (ifs->ifs_ipf_locks_done == 1) {
215 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
216 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
217 		RW_DESTROY(&ifs->ifs_ipf_tokens);
218 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
219 		ifs->ifs_ipf_locks_done = 0;
220 	}
221 
222 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
223 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
224 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
225 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
226 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
227 		return -1;
228 
229 	return 0;
230 
231 detach_failed:
232 	WRITE_ENTER(&ifs->ifs_ipf_global);
233 	return -1;
234 }
235 
236 int iplattach(ifs)
237 ipf_stack_t *ifs;
238 {
239 #if SOLARIS2 < 10
240 	int i;
241 #endif
242 	netid_t id = ifs->ifs_netid;
243 
244 #ifdef	IPFDEBUG
245 	cmn_err(CE_CONT, "iplattach()\n");
246 #endif
247 
248 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
249 	ifs->ifs_fr_flags = IPF_LOGGING;
250 #ifdef _KERNEL
251 	ifs->ifs_fr_update_ipid = 0;
252 #else
253 	ifs->ifs_fr_update_ipid = 1;
254 #endif
255 	ifs->ifs_fr_minttl = 4;
256 	ifs->ifs_fr_icmpminfragmtu = 68;
257 #if defined(IPFILTER_DEFAULT_BLOCK)
258 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
259 #else
260 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
261 #endif
262 
263 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
264 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
265 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
266 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
267 	ifs->ifs_ipf_locks_done = 1;
268 
269 	if (fr_initialise(ifs) < 0)
270 		return -1;
271 
272 	HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
273 		  "ipfilter_hook4_nicevents", ifs);
274 	HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
275 		  "ipfilter_hook4_in", ifs);
276 	HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
277 		  "ipfilter_hook4_out", ifs);
278 	HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
279 		  "ipfilter_hook4_loop_in", ifs);
280 	HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
281 		  "ipfilter_hook4_loop_out", ifs);
282 
283 	/*
284 	 * If we hold this lock over all of the net_hook_register calls, we
285 	 * can cause a deadlock to occur with the following lock ordering:
286 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
287 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
288 	 */
289 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
290 
291 	/*
292 	 * Add IPv4 hooks
293 	 */
294 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
295 	if (ifs->ifs_ipf_ipv4 == NULL)
296 		goto hookup_failed;
297 
298 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
299 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
300 	if (!ifs->ifs_hook4_nic_events)
301 		goto hookup_failed;
302 
303 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
304 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
305 	if (!ifs->ifs_hook4_physical_in)
306 		goto hookup_failed;
307 
308 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
309 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
310 	if (!ifs->ifs_hook4_physical_out)
311 		goto hookup_failed;
312 
313 	if (ifs->ifs_ipf_loopback) {
314 		ifs->ifs_hook4_loopback_in = (net_hook_register(
315 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
316 		    ifs->ifs_ipfhook4_loop_in) == 0);
317 		if (!ifs->ifs_hook4_loopback_in)
318 			goto hookup_failed;
319 
320 		ifs->ifs_hook4_loopback_out = (net_hook_register(
321 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
322 		    ifs->ifs_ipfhook4_loop_out) == 0);
323 		if (!ifs->ifs_hook4_loopback_out)
324 			goto hookup_failed;
325 	}
326 	/*
327 	 * Add IPv6 hooks
328 	 */
329 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
330 	if (ifs->ifs_ipf_ipv6 == NULL)
331 		goto hookup_failed;
332 
333 	HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
334 		  "ipfilter_hook6_nicevents", ifs);
335 	HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
336 		  "ipfilter_hook6_in", ifs);
337 	HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
338 		  "ipfilter_hook6_out", ifs);
339 	HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
340 		  "ipfilter_hook6_loop_in", ifs);
341 	HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
342 		  "ipfilter_hook6_loop_out", ifs);
343 
344 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
345 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
346 	if (!ifs->ifs_hook6_nic_events)
347 		goto hookup_failed;
348 
349 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
350 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
351 	if (!ifs->ifs_hook6_physical_in)
352 		goto hookup_failed;
353 
354 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
355 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
356 	if (!ifs->ifs_hook6_physical_out)
357 		goto hookup_failed;
358 
359 	if (ifs->ifs_ipf_loopback) {
360 		ifs->ifs_hook6_loopback_in = (net_hook_register(
361 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
362 		    ifs->ifs_ipfhook6_loop_in) == 0);
363 		if (!ifs->ifs_hook6_loopback_in)
364 			goto hookup_failed;
365 
366 		ifs->ifs_hook6_loopback_out = (net_hook_register(
367 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
368 		    ifs->ifs_ipfhook6_loop_out) == 0);
369 		if (!ifs->ifs_hook6_loopback_out)
370 			goto hookup_failed;
371 	}
372 
373 	/*
374 	 * Reacquire ipf_global, now it is safe.
375 	 */
376 	WRITE_ENTER(&ifs->ifs_ipf_global);
377 
378 /* Do not use private interface ip_params_arr[] in Solaris 10 */
379 #if SOLARIS2 < 10
380 
381 #if SOLARIS2 >= 8
382 	ip_forwarding = &ip_g_forward;
383 #endif
384 	/*
385 	 * XXX - There is no terminator for this array, so it is not possible
386 	 * to tell if what we are looking for is missing and go off the end
387 	 * of the array.
388 	 */
389 
390 #if SOLARIS2 <= 8
391 	for (i = 0; ; i++) {
392 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
393 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
394 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
395 			    "ip_path_mtu_discovery")) {
396 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
397 		}
398 #if SOLARIS2 < 8
399 		else if (!strcmp(ip_param_arr[i].ip_param_name,
400 			    "ip_forwarding")) {
401 			ip_forwarding = &ip_param_arr[i].ip_param_value;
402 		}
403 #else
404 		else if (!strcmp(ip_param_arr[i].ip_param_name,
405 			    "ip6_forwarding")) {
406 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
407 		}
408 #endif
409 
410 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
411 #if SOLARIS2 >= 8
412 		    ip6_forwarding != NULL &&
413 #endif
414 		    ip_forwarding != NULL)
415 			break;
416 	}
417 #endif
418 
419 	if (ifs->ifs_fr_control_forwarding & 1) {
420 		if (ip_forwarding != NULL)
421 			*ip_forwarding = 1;
422 #if SOLARIS2 >= 8
423 		if (ip6_forwarding != NULL)
424 			*ip6_forwarding = 1;
425 #endif
426 	}
427 
428 #endif
429 
430 	return 0;
431 hookup_failed:
432 	WRITE_ENTER(&ifs->ifs_ipf_global);
433 	return -1;
434 }
435 
436 static	int	fr_setipfloopback(set, ifs)
437 int set;
438 ipf_stack_t *ifs;
439 {
440 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
441 		return EFAULT;
442 
443 	if (set && !ifs->ifs_ipf_loopback) {
444 		ifs->ifs_ipf_loopback = 1;
445 
446 		ifs->ifs_hook4_loopback_in = (net_hook_register(
447 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
448 		    ifs->ifs_ipfhook4_loop_in) == 0);
449 		if (!ifs->ifs_hook4_loopback_in)
450 			return EINVAL;
451 
452 		ifs->ifs_hook4_loopback_out = (net_hook_register(
453 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
454 		    ifs->ifs_ipfhook4_loop_out) == 0);
455 		if (!ifs->ifs_hook4_loopback_out)
456 			return EINVAL;
457 
458 		ifs->ifs_hook6_loopback_in = (net_hook_register(
459 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
460 		    ifs->ifs_ipfhook6_loop_in) == 0);
461 		if (!ifs->ifs_hook6_loopback_in)
462 			return EINVAL;
463 
464 		ifs->ifs_hook6_loopback_out = (net_hook_register(
465 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
466 		    ifs->ifs_ipfhook6_loop_out) == 0);
467 		if (!ifs->ifs_hook6_loopback_out)
468 			return EINVAL;
469 
470 	} else if (!set && ifs->ifs_ipf_loopback) {
471 		ifs->ifs_ipf_loopback = 0;
472 
473 		ifs->ifs_hook4_loopback_in =
474 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
475 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
476 		if (ifs->ifs_hook4_loopback_in)
477 			return EBUSY;
478 
479 		ifs->ifs_hook4_loopback_out =
480 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
481 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
482 		if (ifs->ifs_hook4_loopback_out)
483 			return EBUSY;
484 
485 		ifs->ifs_hook6_loopback_in =
486 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
487 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
488 		if (ifs->ifs_hook6_loopback_in)
489 			return EBUSY;
490 
491 		ifs->ifs_hook6_loopback_out =
492 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
493 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
494 		if (ifs->ifs_hook6_loopback_out)
495 			return EBUSY;
496 	}
497 	return 0;
498 }
499 
500 
501 /*
502  * Filter ioctl interface.
503  */
504 /*ARGSUSED*/
505 int iplioctl(dev, cmd, data, mode, cp, rp)
506 dev_t dev;
507 int cmd;
508 #if SOLARIS2 >= 7
509 intptr_t data;
510 #else
511 int *data;
512 #endif
513 int mode;
514 cred_t *cp;
515 int *rp;
516 {
517 	int error = 0, tmp;
518 	friostat_t fio;
519 	minor_t unit;
520 	u_int enable;
521 	ipf_stack_t *ifs;
522 
523 #ifdef	IPFDEBUG
524 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
525 		dev, cmd, data, mode, cp, rp);
526 #endif
527 	unit = getminor(dev);
528 	if (IPL_LOGMAX < unit)
529 		return ENXIO;
530 
531         /*
532 	 * As we're calling ipf_find_stack in user space, from a given zone
533 	 * to find the stack pointer for this zone, there is no need to have
534 	 * a hold/refence count here.
535 	 */
536 	ifs = ipf_find_stack(crgetzoneid(cp));
537 	ASSERT(ifs != NULL);
538 
539 	if (ifs->ifs_fr_running <= 0) {
540 		if (unit != IPL_LOGIPF) {
541 			return EIO;
542 		}
543 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
544 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
545 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
546 			return EIO;
547 		}
548 	}
549 
550 	READ_ENTER(&ifs->ifs_ipf_global);
551 	if (ifs->ifs_fr_enable_active != 0) {
552 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
553 		return EBUSY;
554 	}
555 
556 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, cp->cr_uid,
557 			       curproc, ifs);
558 	if (error != -1) {
559 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
560 		return error;
561 	}
562 	error = 0;
563 
564 	switch (cmd)
565 	{
566 	case SIOCFRENB :
567 		if (!(mode & FWRITE))
568 			error = EPERM;
569 		else {
570 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
571 				       sizeof(enable));
572 			if (error != 0) {
573 				error = EFAULT;
574 				break;
575 			}
576 
577 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
578 			WRITE_ENTER(&ifs->ifs_ipf_global);
579 			ifs->ifs_fr_enable_active = 1;
580 			error = fr_enableipf(ifs, enable);
581 			ifs->ifs_fr_enable_active = 0;
582 		}
583 		break;
584 	case SIOCIPFSET :
585 		if (!(mode & FWRITE)) {
586 			error = EPERM;
587 			break;
588 		}
589 		/* FALLTHRU */
590 	case SIOCIPFGETNEXT :
591 	case SIOCIPFGET :
592 		error = fr_ipftune(cmd, (void *)data, ifs);
593 		break;
594 	case SIOCSETFF :
595 		if (!(mode & FWRITE))
596 			error = EPERM;
597 		else {
598 			error = COPYIN((caddr_t)data,
599 				       (caddr_t)&ifs->ifs_fr_flags,
600 				       sizeof(ifs->ifs_fr_flags));
601 			if (error != 0)
602 				error = EFAULT;
603 		}
604 		break;
605 	case SIOCIPFLP :
606 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
607 			       sizeof(tmp));
608 		if (error != 0)
609 			error = EFAULT;
610 		else
611 			error = fr_setipfloopback(tmp, ifs);
612 		break;
613 	case SIOCGETFF :
614 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
615 				sizeof(ifs->ifs_fr_flags));
616 		if (error != 0)
617 			error = EFAULT;
618 		break;
619 	case SIOCFUNCL :
620 		error = fr_resolvefunc((void *)data);
621 		break;
622 	case SIOCINAFR :
623 	case SIOCRMAFR :
624 	case SIOCADAFR :
625 	case SIOCZRLST :
626 		if (!(mode & FWRITE))
627 			error = EPERM;
628 		else
629 			error = frrequest(unit, cmd, (caddr_t)data,
630 					  ifs->ifs_fr_active, 1, ifs);
631 		break;
632 	case SIOCINIFR :
633 	case SIOCRMIFR :
634 	case SIOCADIFR :
635 		if (!(mode & FWRITE))
636 			error = EPERM;
637 		else
638 			error = frrequest(unit, cmd, (caddr_t)data,
639 					  1 - ifs->ifs_fr_active, 1, ifs);
640 		break;
641 	case SIOCSWAPA :
642 		if (!(mode & FWRITE))
643 			error = EPERM;
644 		else {
645 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
646 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
647 					(caddr_t)data,
648 					sizeof(ifs->ifs_fr_active));
649 			if (error != 0)
650 				error = EFAULT;
651 			else
652 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
653 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
654 		}
655 		break;
656 	case SIOCGETFS :
657 		fr_getstat(&fio, ifs);
658 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
659 		break;
660 	case SIOCFRZST :
661 		if (!(mode & FWRITE))
662 			error = EPERM;
663 		else
664 			error = fr_zerostats((caddr_t)data, ifs);
665 		break;
666 	case	SIOCIPFFL :
667 		if (!(mode & FWRITE))
668 			error = EPERM;
669 		else {
670 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
671 				       sizeof(tmp));
672 			if (!error) {
673 				tmp = frflush(unit, 4, tmp, ifs);
674 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
675 						sizeof(tmp));
676 				if (error != 0)
677 					error = EFAULT;
678 			} else
679 				error = EFAULT;
680 		}
681 		break;
682 #ifdef USE_INET6
683 	case	SIOCIPFL6 :
684 		if (!(mode & FWRITE))
685 			error = EPERM;
686 		else {
687 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
688 				       sizeof(tmp));
689 			if (!error) {
690 				tmp = frflush(unit, 6, tmp, ifs);
691 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
692 						sizeof(tmp));
693 				if (error != 0)
694 					error = EFAULT;
695 			} else
696 				error = EFAULT;
697 		}
698 		break;
699 #endif
700 	case SIOCSTLCK :
701 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
702 		if (error == 0) {
703 			ifs->ifs_fr_state_lock = tmp;
704 			ifs->ifs_fr_nat_lock = tmp;
705 			ifs->ifs_fr_frag_lock = tmp;
706 			ifs->ifs_fr_auth_lock = tmp;
707 		} else
708 			error = EFAULT;
709 	break;
710 #ifdef	IPFILTER_LOG
711 	case	SIOCIPFFB :
712 		if (!(mode & FWRITE))
713 			error = EPERM;
714 		else {
715 			tmp = ipflog_clear(unit, ifs);
716 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
717 				       sizeof(tmp));
718 			if (error)
719 				error = EFAULT;
720 		}
721 		break;
722 #endif /* IPFILTER_LOG */
723 	case SIOCFRSYN :
724 		if (!(mode & FWRITE))
725 			error = EPERM;
726 		else {
727 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
728 			WRITE_ENTER(&ifs->ifs_ipf_global);
729 
730 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
731 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
732 			fr_nataddrsync(0, NULL, NULL, ifs);
733 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
734 			error = 0;
735 		}
736 		break;
737 	case SIOCGFRST :
738 		error = fr_outobj((void *)data, fr_fragstats(ifs),
739 				  IPFOBJ_FRAGSTAT);
740 		break;
741 	case FIONREAD :
742 #ifdef	IPFILTER_LOG
743 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
744 
745 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
746 		if (error != 0)
747 			error = EFAULT;
748 #endif
749 		break;
750 	case SIOCIPFITER :
751 		error = ipf_frruleiter((caddr_t)data, cp->cr_uid,
752 				       curproc, ifs);
753 		break;
754 
755 	case SIOCGENITER :
756 		error = ipf_genericiter((caddr_t)data, cp->cr_uid,
757 					curproc, ifs);
758 		break;
759 
760 	case SIOCIPFDELTOK :
761 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
762 		if (error != 0) {
763 			error = EFAULT;
764 		} else {
765 			error = ipf_deltoken(tmp, cp->cr_uid, curproc, ifs);
766 		}
767 		break;
768 
769 	default :
770 #ifdef	IPFDEBUG
771 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
772 			cmd, (void *)data);
773 #endif
774 		error = EINVAL;
775 		break;
776 	}
777 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
778 	return error;
779 }
780 
781 
782 static int fr_enableipf(ifs, enable)
783 ipf_stack_t *ifs;
784 int enable;
785 {
786 	int error;
787 
788 	if (!enable) {
789 		error = ipldetach(ifs);
790 		if (error == 0)
791 			ifs->ifs_fr_running = -1;
792 		return error;
793 	}
794 
795 	if (ifs->ifs_fr_running > 0)
796 		return 0;
797 
798 	error = iplattach(ifs);
799 	if (error == 0) {
800 		if (ifs->ifs_fr_timer_id == NULL) {
801 			int hz = drv_usectohz(500000);
802 
803 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
804 						       (void *)ifs,
805 						       hz);
806 		}
807 		ifs->ifs_fr_running = 1;
808 	} else {
809 		(void) ipldetach(ifs);
810 	}
811 	return error;
812 }
813 
814 
815 phy_if_t get_unit(name, v, ifs)
816 char *name;
817 int v;
818 ipf_stack_t *ifs;
819 {
820 	net_handle_t nif;
821 
822   	if (v == 4)
823  		nif = ifs->ifs_ipf_ipv4;
824   	else if (v == 6)
825  		nif = ifs->ifs_ipf_ipv6;
826   	else
827  		return 0;
828 
829  	return (net_phylookup(nif, name));
830 }
831 
832 /*
833  * routines below for saving IP headers to buffer
834  */
835 /*ARGSUSED*/
836 int iplopen(devp, flags, otype, cred)
837 dev_t *devp;
838 int flags, otype;
839 cred_t *cred;
840 {
841 	minor_t min = getminor(*devp);
842 
843 #ifdef	IPFDEBUG
844 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
845 #endif
846 	if (!(otype & OTYP_CHR))
847 		return ENXIO;
848 
849 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
850 	return min;
851 }
852 
853 
854 /*ARGSUSED*/
855 int iplclose(dev, flags, otype, cred)
856 dev_t dev;
857 int flags, otype;
858 cred_t *cred;
859 {
860 	minor_t	min = getminor(dev);
861 
862 #ifdef	IPFDEBUG
863 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
864 #endif
865 
866 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
867 	return min;
868 }
869 
870 #ifdef	IPFILTER_LOG
871 /*
872  * iplread/ipllog
873  * both of these must operate with at least splnet() lest they be
874  * called during packet processing and cause an inconsistancy to appear in
875  * the filter lists.
876  */
877 /*ARGSUSED*/
878 int iplread(dev, uio, cp)
879 dev_t dev;
880 register struct uio *uio;
881 cred_t *cp;
882 {
883 	ipf_stack_t *ifs;
884 	int ret;
885 
886         /*
887 	 * As we're calling ipf_find_stack in user space, from a given zone
888 	 * to find the stack pointer for this zone, there is no need to have
889 	 * a hold/refence count here.
890 	 */
891 	ifs = ipf_find_stack(crgetzoneid(cp));
892 	ASSERT(ifs != NULL);
893 
894 # ifdef	IPFDEBUG
895 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
896 # endif
897 
898 	if (ifs->ifs_fr_running < 1) {
899 		return EIO;
900 	}
901 
902 # ifdef	IPFILTER_SYNC
903 	if (getminor(dev) == IPL_LOGSYNC) {
904 		return ipfsync_read(uio);
905 	}
906 # endif
907 
908 	ret = ipflog_read(getminor(dev), uio, ifs);
909 	return ret;
910 }
911 #endif /* IPFILTER_LOG */
912 
913 
914 /*
915  * iplread/ipllog
916  * both of these must operate with at least splnet() lest they be
917  * called during packet processing and cause an inconsistancy to appear in
918  * the filter lists.
919  */
920 int iplwrite(dev, uio, cp)
921 dev_t dev;
922 register struct uio *uio;
923 cred_t *cp;
924 {
925 	ipf_stack_t *ifs;
926 
927         /*
928 	 * As we're calling ipf_find_stack in user space, from a given zone
929 	 * to find the stack pointer for this zone, there is no need to have
930 	 * a hold/refence count here.
931 	 */
932 	ifs = ipf_find_stack(crgetzoneid(cp));
933 	ASSERT(ifs != NULL);
934 
935 #ifdef	IPFDEBUG
936 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
937 #endif
938 
939 	if (ifs->ifs_fr_running < 1) {
940 		return EIO;
941 	}
942 
943 #ifdef	IPFILTER_SYNC
944 	if (getminor(dev) == IPL_LOGSYNC)
945 		return ipfsync_write(uio);
946 #endif /* IPFILTER_SYNC */
947 	dev = dev;	/* LINT */
948 	uio = uio;	/* LINT */
949 	cp = cp;	/* LINT */
950 	return ENXIO;
951 }
952 
953 
954 /*
955  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
956  * requires a large amount of setting up and isn't any more efficient.
957  */
958 int fr_send_reset(fin)
959 fr_info_t *fin;
960 {
961 	tcphdr_t *tcp, *tcp2;
962 	int tlen, hlen;
963 	mblk_t *m;
964 #ifdef	USE_INET6
965 	ip6_t *ip6;
966 #endif
967 	ip_t *ip;
968 
969 	tcp = fin->fin_dp;
970 	if (tcp->th_flags & TH_RST)
971 		return -1;
972 
973 #ifndef	IPFILTER_CKSUM
974 	if (fr_checkl4sum(fin) == -1)
975 		return -1;
976 #endif
977 
978 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
979 #ifdef	USE_INET6
980 	if (fin->fin_v == 6)
981 		hlen = sizeof(ip6_t);
982 	else
983 #endif
984 		hlen = sizeof(ip_t);
985 	hlen += sizeof(*tcp2);
986 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
987 		return -1;
988 
989 	m->b_rptr += 64;
990 	MTYPE(m) = M_DATA;
991 	m->b_wptr = m->b_rptr + hlen;
992 	ip = (ip_t *)m->b_rptr;
993 	bzero((char *)ip, hlen);
994 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
995 	tcp2->th_dport = tcp->th_sport;
996 	tcp2->th_sport = tcp->th_dport;
997 	if (tcp->th_flags & TH_ACK) {
998 		tcp2->th_seq = tcp->th_ack;
999 		tcp2->th_flags = TH_RST;
1000 	} else {
1001 		tcp2->th_ack = ntohl(tcp->th_seq);
1002 		tcp2->th_ack += tlen;
1003 		tcp2->th_ack = htonl(tcp2->th_ack);
1004 		tcp2->th_flags = TH_RST|TH_ACK;
1005 	}
1006 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1007 
1008 	ip->ip_v = fin->fin_v;
1009 #ifdef	USE_INET6
1010 	if (fin->fin_v == 6) {
1011 		ip6 = (ip6_t *)m->b_rptr;
1012 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1013 		ip6->ip6_src = fin->fin_dst6.in6;
1014 		ip6->ip6_dst = fin->fin_src6.in6;
1015 		ip6->ip6_plen = htons(sizeof(*tcp));
1016 		ip6->ip6_nxt = IPPROTO_TCP;
1017 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1018 	} else
1019 #endif
1020 	{
1021 		ip->ip_src.s_addr = fin->fin_daddr;
1022 		ip->ip_dst.s_addr = fin->fin_saddr;
1023 		ip->ip_id = fr_nextipid(fin);
1024 		ip->ip_hl = sizeof(*ip) >> 2;
1025 		ip->ip_p = IPPROTO_TCP;
1026 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1027 		ip->ip_tos = fin->fin_ip->ip_tos;
1028 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1029 	}
1030 	return fr_send_ip(fin, m, &m);
1031 }
1032 
1033 /*
1034  * Function:	fr_send_ip
1035  * Returns:	 0: success
1036  *		-1: failed
1037  * Parameters:
1038  *	fin: packet information
1039  *	m: the message block where ip head starts
1040  *
1041  * Send a new packet through the IP stack.
1042  *
1043  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1044  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1045  * function).
1046  *
1047  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1048  * in by this function.
1049  *
1050  * All other portions of the packet must be in on-the-wire format.
1051  */
1052 /*ARGSUSED*/
1053 static int fr_send_ip(fin, m, mpp)
1054 fr_info_t *fin;
1055 mblk_t *m, **mpp;
1056 {
1057 	qpktinfo_t qpi, *qpip;
1058 	fr_info_t fnew;
1059 	ip_t *ip;
1060 	int i, hlen;
1061 	ipf_stack_t *ifs = fin->fin_ifs;
1062 
1063 	ip = (ip_t *)m->b_rptr;
1064 	bzero((char *)&fnew, sizeof(fnew));
1065 
1066 #ifdef	USE_INET6
1067 	if (fin->fin_v == 6) {
1068 		ip6_t *ip6;
1069 
1070 		ip6 = (ip6_t *)ip;
1071 		ip6->ip6_vfc = 0x60;
1072 		ip6->ip6_hlim = 127;
1073 		fnew.fin_v = 6;
1074 		hlen = sizeof(*ip6);
1075 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1076 	} else
1077 #endif
1078 	{
1079 		fnew.fin_v = 4;
1080 #if SOLARIS2 >= 10
1081 		ip->ip_ttl = 255;
1082 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1083 			ip->ip_off = htons(IP_DF);
1084 #else
1085 		if (ip_ttl_ptr != NULL)
1086 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1087 		else
1088 			ip->ip_ttl = 63;
1089 		if (ip_mtudisc != NULL)
1090 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1091 		else
1092 			ip->ip_off = htons(IP_DF);
1093 #endif
1094 		/*
1095 		 * The dance with byte order and ip_len/ip_off is because in
1096 		 * fr_fastroute, it expects them to be in host byte order but
1097 		 * ipf_cksum expects them to be in network byte order.
1098 		 */
1099 		ip->ip_len = htons(ip->ip_len);
1100 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1101 		ip->ip_len = ntohs(ip->ip_len);
1102 		ip->ip_off = ntohs(ip->ip_off);
1103 		hlen = sizeof(*ip);
1104 		fnew.fin_plen = ip->ip_len;
1105 	}
1106 
1107 	qpip = fin->fin_qpi;
1108 	qpi.qpi_off = 0;
1109 	qpi.qpi_ill = qpip->qpi_ill;
1110 	qpi.qpi_m = m;
1111 	qpi.qpi_data = ip;
1112 	fnew.fin_qpi = &qpi;
1113 	fnew.fin_ifp = fin->fin_ifp;
1114 	fnew.fin_flx = FI_NOCKSUM;
1115 	fnew.fin_m = m;
1116 	fnew.fin_qfm = m;
1117 	fnew.fin_ip = ip;
1118 	fnew.fin_mp = mpp;
1119 	fnew.fin_hlen = hlen;
1120 	fnew.fin_dp = (char *)ip + hlen;
1121 	fnew.fin_ifs = fin->fin_ifs;
1122 	(void) fr_makefrip(hlen, ip, &fnew);
1123 
1124 	i = fr_fastroute(m, mpp, &fnew, NULL);
1125 	return i;
1126 }
1127 
1128 
1129 int fr_send_icmp_err(type, fin, dst)
1130 int type;
1131 fr_info_t *fin;
1132 int dst;
1133 {
1134 	struct in_addr dst4;
1135 	struct icmp *icmp;
1136 	qpktinfo_t *qpi;
1137 	int hlen, code;
1138 	phy_if_t phy;
1139 	u_short sz;
1140 #ifdef	USE_INET6
1141 	mblk_t *mb;
1142 #endif
1143 	mblk_t *m;
1144 #ifdef	USE_INET6
1145 	ip6_t *ip6;
1146 #endif
1147 	ip_t *ip;
1148 	ipf_stack_t *ifs = fin->fin_ifs;
1149 
1150 	if ((type < 0) || (type > ICMP_MAXTYPE))
1151 		return -1;
1152 
1153 	code = fin->fin_icode;
1154 #ifdef USE_INET6
1155 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1156 		return -1;
1157 #endif
1158 
1159 #ifndef	IPFILTER_CKSUM
1160 	if (fr_checkl4sum(fin) == -1)
1161 		return -1;
1162 #endif
1163 
1164 	qpi = fin->fin_qpi;
1165 
1166 #ifdef	USE_INET6
1167 	mb = fin->fin_qfm;
1168 
1169 	if (fin->fin_v == 6) {
1170 		sz = sizeof(ip6_t);
1171 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1172 		hlen = sizeof(ip6_t);
1173 		type = icmptoicmp6types[type];
1174 		if (type == ICMP6_DST_UNREACH)
1175 			code = icmptoicmp6unreach[code];
1176 	} else
1177 #endif
1178 	{
1179 		if ((fin->fin_p == IPPROTO_ICMP) &&
1180 		    !(fin->fin_flx & FI_SHORT))
1181 			switch (ntohs(fin->fin_data[0]) >> 8)
1182 			{
1183 			case ICMP_ECHO :
1184 			case ICMP_TSTAMP :
1185 			case ICMP_IREQ :
1186 			case ICMP_MASKREQ :
1187 				break;
1188 			default :
1189 				return 0;
1190 			}
1191 
1192 		sz = sizeof(ip_t) * 2;
1193 		sz += 8;		/* 64 bits of data */
1194 		hlen = sizeof(ip_t);
1195 	}
1196 
1197 	sz += offsetof(struct icmp, icmp_ip);
1198 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1199 		return -1;
1200 	MTYPE(m) = M_DATA;
1201 	m->b_rptr += 64;
1202 	m->b_wptr = m->b_rptr + sz;
1203 	bzero((char *)m->b_rptr, (size_t)sz);
1204 	ip = (ip_t *)m->b_rptr;
1205 	ip->ip_v = fin->fin_v;
1206 	icmp = (struct icmp *)(m->b_rptr + hlen);
1207 	icmp->icmp_type = type & 0xff;
1208 	icmp->icmp_code = code & 0xff;
1209 	phy = (phy_if_t)qpi->qpi_ill;
1210 	if (type == ICMP_UNREACH && (phy != 0) &&
1211 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1212 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1213 
1214 #ifdef	USE_INET6
1215 	if (fin->fin_v == 6) {
1216 		struct in6_addr dst6;
1217 		int csz;
1218 
1219 		if (dst == 0) {
1220 			ipf_stack_t *ifs = fin->fin_ifs;
1221 
1222 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1223 				       (void *)&dst6, NULL, ifs) == -1) {
1224 				FREE_MB_T(m);
1225 				return -1;
1226 			}
1227 		} else
1228 			dst6 = fin->fin_dst6.in6;
1229 
1230 		csz = sz;
1231 		sz -= sizeof(ip6_t);
1232 		ip6 = (ip6_t *)m->b_rptr;
1233 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1234 		ip6->ip6_plen = htons((u_short)sz);
1235 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1236 		ip6->ip6_src = dst6;
1237 		ip6->ip6_dst = fin->fin_src6.in6;
1238 		sz -= offsetof(struct icmp, icmp_ip);
1239 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1240 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1241 	} else
1242 #endif
1243 	{
1244 		ip->ip_hl = sizeof(*ip) >> 2;
1245 		ip->ip_p = IPPROTO_ICMP;
1246 		ip->ip_id = fin->fin_ip->ip_id;
1247 		ip->ip_tos = fin->fin_ip->ip_tos;
1248 		ip->ip_len = (u_short)sz;
1249 		if (dst == 0) {
1250 			ipf_stack_t *ifs = fin->fin_ifs;
1251 
1252 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1253 				       (void *)&dst4, NULL, ifs) == -1) {
1254 				FREE_MB_T(m);
1255 				return -1;
1256 			}
1257 		} else {
1258 			dst4 = fin->fin_dst;
1259 		}
1260 		ip->ip_src = dst4;
1261 		ip->ip_dst = fin->fin_src;
1262 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1263 		      sizeof(*fin->fin_ip));
1264 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1265 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1266 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1267 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1268 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1269 					     sz - sizeof(ip_t));
1270 	}
1271 
1272 	/*
1273 	 * Need to exit out of these so we don't recursively call rw_enter
1274 	 * from fr_qout.
1275 	 */
1276 	return fr_send_ip(fin, m, &m);
1277 }
1278 
1279 #include <sys/time.h>
1280 #include <sys/varargs.h>
1281 
1282 #ifndef _KERNEL
1283 #include <stdio.h>
1284 #endif
1285 
1286 #define	NULLADDR_RATE_LIMIT 10	/* 10 seconds */
1287 
1288 
1289 /*
1290  * Print out warning message at rate-limited speed.
1291  */
1292 static void rate_limit_message(ipf_stack_t *ifs,
1293 			       int rate, const char *message, ...)
1294 {
1295 	static time_t last_time = 0;
1296 	time_t now;
1297 	va_list args;
1298 	char msg_buf[256];
1299 	int  need_printed = 0;
1300 
1301 	now = ddi_get_time();
1302 
1303 	/* make sure, no multiple entries */
1304 	ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
1305 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1306 	if (now - last_time >= rate) {
1307 		need_printed = 1;
1308 		last_time = now;
1309 	}
1310 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1311 
1312 	if (need_printed) {
1313 		va_start(args, message);
1314 		(void)vsnprintf(msg_buf, 255, message, args);
1315 		va_end(args);
1316 #ifdef _KERNEL
1317 		cmn_err(CE_WARN, msg_buf);
1318 #else
1319 		fprintf(std_err, msg_buf);
1320 #endif
1321 	}
1322 }
1323 
1324 /*
1325  * Return the first IP Address associated with an interface
1326  * For IPv6, we walk through the list of logical interfaces and return
1327  * the address of the first one that isn't a link-local interface.
1328  * We can't assume that it is :1 because another link-local address
1329  * may have been assigned there.
1330  */
1331 /*ARGSUSED*/
1332 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1333 int v, atype;
1334 void *ifptr;
1335 struct in_addr  *inp, *inpmask;
1336 ipf_stack_t *ifs;
1337 {
1338 	struct sockaddr_in6 v6addr[2];
1339 	struct sockaddr_in v4addr[2];
1340 	net_ifaddr_t type[2];
1341 	net_handle_t net_data;
1342 	phy_if_t phyif;
1343 	void *array;
1344 
1345 	switch (v)
1346 	{
1347 	case 4:
1348 		net_data = ifs->ifs_ipf_ipv4;
1349 		array = v4addr;
1350 		break;
1351 	case 6:
1352 		net_data = ifs->ifs_ipf_ipv6;
1353 		array = v6addr;
1354 		break;
1355 	default:
1356 		net_data = NULL;
1357 		break;
1358 	}
1359 
1360 	if (net_data == NULL)
1361 		return -1;
1362 
1363 	phyif = (phy_if_t)ifptr;
1364 
1365 	switch (atype)
1366 	{
1367 	case FRI_PEERADDR :
1368 		type[0] = NA_PEER;
1369 		break;
1370 
1371 	case FRI_BROADCAST :
1372 		type[0] = NA_BROADCAST;
1373 		break;
1374 
1375 	default :
1376 		type[0] = NA_ADDRESS;
1377 		break;
1378 	}
1379 
1380 	type[1] = NA_NETMASK;
1381 
1382 	if (v == 6) {
1383 		lif_if_t idx = 0;
1384 
1385 		do {
1386 			idx = net_lifgetnext(net_data, phyif, idx);
1387 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1388 					   array) < 0)
1389 				return -1;
1390 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1391 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1392 				break;
1393 		} while (idx != 0);
1394 
1395 		if (idx == 0)
1396 			return -1;
1397 
1398 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1399 					inp, inpmask);
1400 	}
1401 
1402 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1403 		return -1;
1404 
1405 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1406 }
1407 
1408 
1409 u_32_t fr_newisn(fin)
1410 fr_info_t *fin;
1411 {
1412 	static int iss_seq_off = 0;
1413 	u_char hash[16];
1414 	u_32_t newiss;
1415 	MD5_CTX ctx;
1416 	ipf_stack_t *ifs = fin->fin_ifs;
1417 
1418 	/*
1419 	 * Compute the base value of the ISS.  It is a hash
1420 	 * of (saddr, sport, daddr, dport, secret).
1421 	 */
1422 	MD5Init(&ctx);
1423 
1424 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1425 		  sizeof(fin->fin_fi.fi_src));
1426 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1427 		  sizeof(fin->fin_fi.fi_dst));
1428 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1429 
1430 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1431 
1432 	MD5Final(hash, &ctx);
1433 
1434 	bcopy(hash, &newiss, sizeof(newiss));
1435 
1436 	/*
1437 	 * Now increment our "timer", and add it in to
1438 	 * the computed value.
1439 	 *
1440 	 * XXX Use `addin'?
1441 	 * XXX TCP_ISSINCR too large to use?
1442 	 */
1443 	iss_seq_off += 0x00010000;
1444 	newiss += iss_seq_off;
1445 	return newiss;
1446 }
1447 
1448 
1449 /* ------------------------------------------------------------------------ */
1450 /* Function:    fr_nextipid                                                 */
1451 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1452 /* Parameters:  fin(I) - pointer to packet information                      */
1453 /*                                                                          */
1454 /* Returns the next IPv4 ID to use for this packet.                         */
1455 /* ------------------------------------------------------------------------ */
1456 u_short fr_nextipid(fin)
1457 fr_info_t *fin;
1458 {
1459 	static u_short ipid = 0;
1460 	u_short id;
1461 	ipf_stack_t *ifs = fin->fin_ifs;
1462 
1463 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1464 	if (fin->fin_pktnum != 0) {
1465 		id = fin->fin_pktnum & 0xffff;
1466 	} else {
1467 		id = ipid++;
1468 	}
1469 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1470 
1471 	return id;
1472 }
1473 
1474 
1475 #ifndef IPFILTER_CKSUM
1476 /* ARGSUSED */
1477 #endif
1478 INLINE void fr_checkv4sum(fin)
1479 fr_info_t *fin;
1480 {
1481 #ifdef IPFILTER_CKSUM
1482 	if (fr_checkl4sum(fin) == -1)
1483 		fin->fin_flx |= FI_BAD;
1484 #endif
1485 }
1486 
1487 
1488 #ifdef USE_INET6
1489 # ifndef IPFILTER_CKSUM
1490 /* ARGSUSED */
1491 # endif
1492 INLINE void fr_checkv6sum(fin)
1493 fr_info_t *fin;
1494 {
1495 # ifdef IPFILTER_CKSUM
1496 	if (fr_checkl4sum(fin) == -1)
1497 		fin->fin_flx |= FI_BAD;
1498 # endif
1499 }
1500 #endif /* USE_INET6 */
1501 
1502 
1503 #if (SOLARIS2 < 7)
1504 void fr_slowtimer()
1505 #else
1506 /*ARGSUSED*/
1507 void fr_slowtimer __P((void *arg))
1508 #endif
1509 {
1510 	ipf_stack_t *ifs = arg;
1511 
1512 	READ_ENTER(&ifs->ifs_ipf_global);
1513 	if (ifs->ifs_fr_running != 1) {
1514 		ifs->ifs_fr_timer_id = NULL;
1515 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1516 		return;
1517 	}
1518 	ipf_expiretokens(ifs);
1519 	fr_fragexpire(ifs);
1520 	fr_timeoutstate(ifs);
1521 	fr_natexpire(ifs);
1522 	fr_authexpire(ifs);
1523 	ifs->ifs_fr_ticks++;
1524 	if (ifs->ifs_fr_running == 1)
1525 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1526 		    drv_usectohz(500000));
1527 	else
1528 		ifs->ifs_fr_timer_id = NULL;
1529 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1530 }
1531 
1532 
1533 /* ------------------------------------------------------------------------ */
1534 /* Function:    fr_pullup                                                   */
1535 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1536 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1537 /*              fin(I) - pointer to packet information                      */
1538 /*              len(I) - number of bytes to pullup                          */
1539 /*                                                                          */
1540 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1541 /* single buffer for ease of access.  Operating system native functions are */
1542 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1543 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1544 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1545 /* and ONLY if the pullup succeeds.                                         */
1546 /*                                                                          */
1547 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1548 /* of buffers that starts at *fin->fin_mp.                                  */
1549 /* ------------------------------------------------------------------------ */
1550 void *fr_pullup(min, fin, len)
1551 mb_t *min;
1552 fr_info_t *fin;
1553 int len;
1554 {
1555 	qpktinfo_t *qpi = fin->fin_qpi;
1556 	int out = fin->fin_out, dpoff, ipoff;
1557 	mb_t *m = min, *m1, *m2;
1558 	char *ip;
1559 	uint32_t start, stuff, end, value, flags;
1560 	ipf_stack_t *ifs = fin->fin_ifs;
1561 
1562 	if (m == NULL)
1563 		return NULL;
1564 
1565 	ip = (char *)fin->fin_ip;
1566 	if ((fin->fin_flx & FI_COALESCE) != 0)
1567 		return ip;
1568 
1569 	ipoff = fin->fin_ipoff;
1570 	if (fin->fin_dp != NULL)
1571 		dpoff = (char *)fin->fin_dp - (char *)ip;
1572 	else
1573 		dpoff = 0;
1574 
1575 	if (M_LEN(m) < len + ipoff) {
1576 
1577 		/*
1578 		 * pfil_precheck ensures the IP header is on a 32bit
1579 		 * aligned address so simply fail if that isn't currently
1580 		 * the case (should never happen).
1581 		 */
1582 		int inc = 0;
1583 
1584 		if (ipoff > 0) {
1585 			if ((ipoff & 3) != 0) {
1586 				inc = 4 - (ipoff & 3);
1587 				if (m->b_rptr - inc >= m->b_datap->db_base)
1588 					m->b_rptr -= inc;
1589 				else
1590 					inc = 0;
1591 			}
1592 		}
1593 
1594 		/*
1595 		 * XXX This is here as a work around for a bug with DEBUG
1596 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1597 		 * XXX code as a way to stash the phyint_index for a packet,
1598 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1599 		 * XXX for both of these to be NULL.  See 6442390.
1600 		 */
1601 		m1 = m;
1602 		m2 = m->b_prev;
1603 
1604 		do {
1605 			m1->b_next = NULL;
1606 			m1->b_prev = NULL;
1607 			m1 = m1->b_cont;
1608 		} while (m1);
1609 
1610 		/*
1611 		 * Need to preserve checksum information by copying them
1612 		 * to newmp which heads the pulluped message.
1613 		 */
1614 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1615 		    &value, &flags);
1616 
1617 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1618 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1619 			FREE_MB_T(*fin->fin_mp);
1620 			*fin->fin_mp = NULL;
1621 			fin->fin_m = NULL;
1622 			fin->fin_ip = NULL;
1623 			fin->fin_dp = NULL;
1624 			qpi->qpi_data = NULL;
1625 			return NULL;
1626 		}
1627 
1628 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1629 		    value, flags, 0);
1630 
1631 		m->b_prev = m2;
1632 		m->b_rptr += inc;
1633 		fin->fin_m = m;
1634 		ip = MTOD(m, char *) + ipoff;
1635 		qpi->qpi_data = ip;
1636 	}
1637 
1638 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1639 	fin->fin_ip = (ip_t *)ip;
1640 	if (fin->fin_dp != NULL)
1641 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1642 
1643 	if (len == fin->fin_plen)
1644 		fin->fin_flx |= FI_COALESCE;
1645 	return ip;
1646 }
1647 
1648 
1649 /*
1650  * Function:	fr_verifysrc
1651  * Returns:	int (really boolean)
1652  * Parameters:	fin - packet information
1653  *
1654  * Check whether the packet has a valid source address for the interface on
1655  * which the packet arrived, implementing the "fr_chksrc" feature.
1656  * Returns true iff the packet's source address is valid.
1657  */
1658 int fr_verifysrc(fin)
1659 fr_info_t *fin;
1660 {
1661 	net_handle_t net_data_p;
1662 	phy_if_t phy_ifdata_routeto;
1663 	struct sockaddr	sin;
1664 	ipf_stack_t *ifs = fin->fin_ifs;
1665 
1666 	if (fin->fin_v == 4) {
1667 		net_data_p = ifs->ifs_ipf_ipv4;
1668 	} else if (fin->fin_v == 6) {
1669 		net_data_p = ifs->ifs_ipf_ipv6;
1670 	} else {
1671 		return (0);
1672 	}
1673 
1674 	/* Get the index corresponding to the if name */
1675 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1676 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1677 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1678 
1679 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1680 }
1681 
1682 
1683 /*
1684  * Function:	fr_fastroute
1685  * Returns:	 0: success;
1686  *		-1: failed
1687  * Parameters:
1688  *	mb: the message block where ip head starts
1689  *	mpp: the pointer to the pointer of the orignal
1690  *		packet message
1691  *	fin: packet information
1692  *	fdp: destination interface information
1693  *	if it is NULL, no interface information provided.
1694  *
1695  * This function is for fastroute/to/dup-to rules. It calls
1696  * pfil_make_lay2_packet to search route, make lay-2 header
1697  * ,and identify output queue for the IP packet.
1698  * The destination address depends on the following conditions:
1699  * 1: for fastroute rule, fdp is passed in as NULL, so the
1700  *	destination address is the IP Packet's destination address
1701  * 2: for to/dup-to rule, if an ip address is specified after
1702  *	the interface name, this address is the as destination
1703  *	address. Otherwise IP Packet's destination address is used
1704  */
1705 int fr_fastroute(mb, mpp, fin, fdp)
1706 mblk_t *mb, **mpp;
1707 fr_info_t *fin;
1708 frdest_t *fdp;
1709 {
1710         net_handle_t net_data_p;
1711 	net_inject_t *inj;
1712 	mblk_t *mp = NULL;
1713 	frentry_t *fr = fin->fin_fr;
1714 	qpktinfo_t *qpi;
1715 	ip_t *ip;
1716 
1717 	struct sockaddr_in *sin;
1718 	struct sockaddr_in6 *sin6;
1719 	struct sockaddr *sinp;
1720 	ipf_stack_t *ifs = fin->fin_ifs;
1721 #ifndef	sparc
1722 	u_short __iplen, __ipoff;
1723 #endif
1724 
1725 	if (fin->fin_v == 4) {
1726 		net_data_p = ifs->ifs_ipf_ipv4;
1727 	} else if (fin->fin_v == 6) {
1728 		net_data_p = ifs->ifs_ipf_ipv6;
1729 	} else {
1730 		return (-1);
1731 	}
1732 
1733 	inj = net_inject_alloc(NETINFO_VERSION);
1734 	if (inj == NULL)
1735 		return -1;
1736 
1737 	ip = fin->fin_ip;
1738 	qpi = fin->fin_qpi;
1739 
1740 	/*
1741 	 * If this is a duplicate mblk then we want ip to point at that
1742 	 * data, not the original, if and only if it is already pointing at
1743 	 * the current mblk data.
1744 	 *
1745 	 * Otherwise, if it's not a duplicate, and we're not already pointing
1746 	 * at the current mblk data, then we want to ensure that the data
1747 	 * points at ip.
1748 	 */
1749 
1750 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1751 		ip = (ip_t *)mb->b_rptr;
1752 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1753 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
1754 		qpi->qpi_off = 0;
1755 	}
1756 
1757 	/*
1758 	 * If there is another M_PROTO, we don't want it
1759 	 */
1760 	if (*mpp != mb) {
1761 		mp = unlinkb(*mpp);
1762 		freeb(*mpp);
1763 		*mpp = mp;
1764 	}
1765 
1766 	sinp = (struct sockaddr *)&inj->ni_addr;
1767 	sin = (struct sockaddr_in *)sinp;
1768 	sin6 = (struct sockaddr_in6 *)sinp;
1769 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1770 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1771 	inj->ni_packet = mb;
1772 
1773 	/*
1774 	 * In case we're here due to "to <if>" being used with
1775 	 * "keep state", check that we're going in the correct
1776 	 * direction.
1777 	 */
1778 	if (fdp != NULL) {
1779 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1780 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1781 			goto bad_fastroute;
1782 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1783 		if (fin->fin_v == 4) {
1784 			sin->sin_addr = fdp->fd_ip;
1785 		} else {
1786 			sin6->sin6_addr = fdp->fd_ip6.in6;
1787 		}
1788 	} else {
1789 		if (fin->fin_v == 4) {
1790 			sin->sin_addr = ip->ip_dst;
1791 		} else {
1792 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1793 		}
1794 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1795 	}
1796 
1797 	/*
1798 	 * Clear the hardware checksum flags from packets that we are doing
1799 	 * input processing on as leaving them set will cause the outgoing
1800 	 * NIC (if it supports hardware checksum) to calculate them anew,
1801 	 * using the old (correct) checksums as the pseudo value to start
1802 	 * from.
1803 	 */
1804 	if (fin->fin_out == 0) {
1805 		DB_CKSUMFLAGS(mb) = 0;
1806 	}
1807 
1808 	*mpp = mb;
1809 
1810 	if (fin->fin_out == 0) {
1811 		void *saveifp;
1812 		u_32_t pass;
1813 
1814 		saveifp = fin->fin_ifp;
1815 		fin->fin_ifp = (void *)inj->ni_physical;
1816 		fin->fin_flx &= ~FI_STATE;
1817 		fin->fin_out = 1;
1818 		(void) fr_acctpkt(fin, &pass);
1819 		fin->fin_fr = NULL;
1820 		if (!fr || !(fr->fr_flags & FR_RETMASK))
1821 			(void) fr_checkstate(fin, &pass);
1822 		if (fr_checknatout(fin, NULL) == -1)
1823 			goto bad_fastroute;
1824 		fin->fin_out = 0;
1825 		fin->fin_ifp = saveifp;
1826 	}
1827 #ifndef	sparc
1828 	if (fin->fin_v == 4) {
1829 		__iplen = (u_short)ip->ip_len,
1830 		__ipoff = (u_short)ip->ip_off;
1831 
1832 		ip->ip_len = htons(__iplen);
1833 		ip->ip_off = htons(__ipoff);
1834 	}
1835 #endif
1836 
1837 	if (net_data_p) {
1838 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1839 			net_inject_free(inj);
1840 			return (-1);
1841 		}
1842 	}
1843 
1844 	ifs->ifs_fr_frouteok[0]++;
1845 	net_inject_free(inj);
1846 	return 0;
1847 bad_fastroute:
1848 	net_inject_free(inj);
1849 	freemsg(mb);
1850 	ifs->ifs_fr_frouteok[1]++;
1851 	return -1;
1852 }
1853 
1854 
1855 /* ------------------------------------------------------------------------ */
1856 /* Function:    ipf_hook4_out                                               */
1857 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1858 /* Parameters:  event(I)     - pointer to event                             */
1859 /*              info(I)      - pointer to hook information for firewalling  */
1860 /*                                                                          */
1861 /* Calling ipf_hook.                                                        */
1862 /* ------------------------------------------------------------------------ */
1863 /*ARGSUSED*/
1864 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1865 {
1866 	return ipf_hook(info, 1, 0, arg);
1867 }
1868 /*ARGSUSED*/
1869 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1870 {
1871 	return ipf_hook6(info, 1, 0, arg);
1872 }
1873 
1874 /* ------------------------------------------------------------------------ */
1875 /* Function:    ipf_hook4_in                                                */
1876 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1877 /* Parameters:  event(I)     - pointer to event                             */
1878 /*              info(I)      - pointer to hook information for firewalling  */
1879 /*                                                                          */
1880 /* Calling ipf_hook.                                                        */
1881 /* ------------------------------------------------------------------------ */
1882 /*ARGSUSED*/
1883 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1884 {
1885 	return ipf_hook(info, 0, 0, arg);
1886 }
1887 /*ARGSUSED*/
1888 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1889 {
1890 	return ipf_hook6(info, 0, 0, arg);
1891 }
1892 
1893 
1894 /* ------------------------------------------------------------------------ */
1895 /* Function:    ipf_hook4_loop_out                                          */
1896 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1897 /* Parameters:  event(I)     - pointer to event                             */
1898 /*              info(I)      - pointer to hook information for firewalling  */
1899 /*                                                                          */
1900 /* Calling ipf_hook.                                                        */
1901 /* ------------------------------------------------------------------------ */
1902 /*ARGSUSED*/
1903 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1904 {
1905 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
1906 }
1907 /*ARGSUSED*/
1908 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1909 {
1910 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1911 }
1912 
1913 /* ------------------------------------------------------------------------ */
1914 /* Function:    ipf_hook4_loop_in                                           */
1915 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1916 /* Parameters:  event(I)     - pointer to event                             */
1917 /*              info(I)      - pointer to hook information for firewalling  */
1918 /*                                                                          */
1919 /* Calling ipf_hook.                                                        */
1920 /* ------------------------------------------------------------------------ */
1921 /*ARGSUSED*/
1922 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1923 {
1924 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
1925 }
1926 /*ARGSUSED*/
1927 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1928 {
1929 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1930 }
1931 
1932 /* ------------------------------------------------------------------------ */
1933 /* Function:    ipf_hook                                                    */
1934 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1935 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1936 /*              out(I)       - whether packet is going in or out            */
1937 /*              loopback(I)  - whether packet is a loopback packet or not   */
1938 /*                                                                          */
1939 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1940 /* parameters out of the info structure and forms them up to be useful for  */
1941 /* calling ipfilter.                                                        */
1942 /* ------------------------------------------------------------------------ */
1943 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1944 {
1945 	hook_pkt_event_t *fw;
1946 	ipf_stack_t *ifs;
1947 	qpktinfo_t qpi;
1948 	int rval, hlen;
1949 	u_short swap;
1950 	phy_if_t phy;
1951 	ip_t *ip;
1952 
1953 	ifs = arg;
1954 	fw = (hook_pkt_event_t *)info;
1955 
1956 	ASSERT(fw != NULL);
1957 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1958 
1959 	ip = fw->hpe_hdr;
1960 	swap = ntohs(ip->ip_len);
1961 	ip->ip_len = swap;
1962 	swap = ntohs(ip->ip_off);
1963 	ip->ip_off = swap;
1964 	hlen = IPH_HDR_LENGTH(ip);
1965 
1966 	qpi.qpi_m = fw->hpe_mb;
1967 	qpi.qpi_data = fw->hpe_hdr;
1968 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1969 	qpi.qpi_ill = (void *)phy;
1970 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1971 	if (qpi.qpi_flags)
1972 		qpi.qpi_flags |= FI_MBCAST;
1973 	qpi.qpi_flags |= loopback;
1974 
1975 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1976 	    &qpi, fw->hpe_mp, ifs);
1977 
1978 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
1979 	if (rval == 0 && *(fw->hpe_mp) == NULL)
1980 		rval = 1;
1981 
1982 	/* Notify IP the packet mblk_t and IP header pointers. */
1983 	fw->hpe_mb = qpi.qpi_m;
1984 	fw->hpe_hdr = qpi.qpi_data;
1985 	if (rval == 0) {
1986 		ip = qpi.qpi_data;
1987 		swap = ntohs(ip->ip_len);
1988 		ip->ip_len = swap;
1989 		swap = ntohs(ip->ip_off);
1990 		ip->ip_off = swap;
1991 	}
1992 	return rval;
1993 
1994 }
1995 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
1996 {
1997 	hook_pkt_event_t *fw;
1998 	int rval, hlen;
1999 	qpktinfo_t qpi;
2000 	phy_if_t phy;
2001 
2002 	fw = (hook_pkt_event_t *)info;
2003 
2004 	ASSERT(fw != NULL);
2005 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2006 
2007 	hlen = sizeof (ip6_t);
2008 
2009 	qpi.qpi_m = fw->hpe_mb;
2010 	qpi.qpi_data = fw->hpe_hdr;
2011 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2012 	qpi.qpi_ill = (void *)phy;
2013 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2014 	if (qpi.qpi_flags)
2015 		qpi.qpi_flags |= FI_MBCAST;
2016 	qpi.qpi_flags |= loopback;
2017 
2018 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2019 	    &qpi, fw->hpe_mp, arg);
2020 
2021 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2022 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2023 		rval = 1;
2024 
2025 	/* Notify IP the packet mblk_t and IP header pointers. */
2026 	fw->hpe_mb = qpi.qpi_m;
2027 	fw->hpe_hdr = qpi.qpi_data;
2028 	return rval;
2029 
2030 }
2031 
2032 
2033 /* ------------------------------------------------------------------------ */
2034 /* Function:    ipf_nic_event_v4                                            */
2035 /* Returns:     int - 0 == no problems encountered                          */
2036 /* Parameters:  event(I)     - pointer to event                             */
2037 /*              info(I)      - pointer to information about a NIC event     */
2038 /*                                                                          */
2039 /* Function to receive asynchronous NIC events from IP                      */
2040 /* ------------------------------------------------------------------------ */
2041 /*ARGSUSED*/
2042 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2043 {
2044 	struct sockaddr_in *sin;
2045 	hook_nic_event_t *hn;
2046 	ipf_stack_t *ifs = arg;
2047 
2048 	hn = (hook_nic_event_t *)info;
2049 
2050 	switch (hn->hne_event)
2051 	{
2052 	case NE_PLUMB :
2053 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2054 		       ifs);
2055 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2056 			      hn->hne_data, ifs);
2057 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2058 			     hn->hne_data, ifs);
2059 		break;
2060 
2061 	case NE_UNPLUMB :
2062 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2063 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2064 			      ifs);
2065 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2066 		break;
2067 
2068 	case NE_ADDRESS_CHANGE :
2069 		/*
2070 		 * We only respond to events for logical interface 0 because
2071 		 * IPFilter only uses the first address given to a network
2072 		 * interface.  We check for hne_lif==1 because the netinfo
2073 		 * code maps adds 1 to the lif number so that it can return
2074 		 * 0 to indicate "no more lifs" when walking them.
2075 		 */
2076 		if (hn->hne_lif == 1) {
2077 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2078 			    ifs);
2079 			sin = hn->hne_data;
2080 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2081 			    ifs);
2082 		}
2083 		break;
2084 
2085 	default :
2086 		break;
2087 	}
2088 
2089 	return 0;
2090 }
2091 
2092 
2093 /* ------------------------------------------------------------------------ */
2094 /* Function:    ipf_nic_event_v6                                            */
2095 /* Returns:     int - 0 == no problems encountered                          */
2096 /* Parameters:  event(I)     - pointer to event                             */
2097 /*              info(I)      - pointer to information about a NIC event     */
2098 /*                                                                          */
2099 /* Function to receive asynchronous NIC events from IP                      */
2100 /* ------------------------------------------------------------------------ */
2101 /*ARGSUSED*/
2102 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2103 {
2104 	struct sockaddr_in6 *sin6;
2105 	hook_nic_event_t *hn;
2106 	ipf_stack_t *ifs = arg;
2107 
2108 	hn = (hook_nic_event_t *)info;
2109 
2110 	switch (hn->hne_event)
2111 	{
2112 	case NE_PLUMB :
2113 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2114 		       hn->hne_data, ifs);
2115 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2116 			      hn->hne_data, ifs);
2117 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2118 			     hn->hne_data, ifs);
2119 		break;
2120 
2121 	case NE_UNPLUMB :
2122 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2123 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2124 			      ifs);
2125 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2126 		break;
2127 
2128 	case NE_ADDRESS_CHANGE :
2129 		if (hn->hne_lif == 1) {
2130 			sin6 = hn->hne_data;
2131 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2132 				       ifs);
2133 		}
2134 		break;
2135 	default :
2136 		break;
2137 	}
2138 
2139 	return 0;
2140 }
2141 
2142 /*
2143  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2144  * are needed in Solaris kernel only. We don't need them in
2145  * ipftest to pretend the ICMP/RST packet was sent as a response.
2146  */
2147 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2148 /* ------------------------------------------------------------------------ */
2149 /* Function:    fr_make_rst                                                 */
2150 /* Returns:     int - 0 on success, -1 on failure			    */
2151 /* Parameters:  fin(I) - pointer to packet information                      */
2152 /*                                                                          */
2153 /* We must alter the original mblks passed to IPF from IP stack via	    */
2154 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2155 /* IPF can basicaly do only these things with mblk representing the packet: */
2156 /*	leave it as it is (pass the packet)				    */
2157 /*                                                                          */
2158 /*	discard it (block the packet)					    */
2159 /*                                                                          */
2160 /*	alter it (i.e. NAT)						    */
2161 /*                                                                          */
2162 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2163 /* instead to IP stack via FW_HOOKS.					    */
2164 /*                                                                          */
2165 /* The return-rst action for packets coming via NIC is handled as follows:  */
2166 /*	mblk with packet is discarded					    */
2167 /*                                                                          */
2168 /*	new mblk with RST response is constructed and injected to network   */
2169 /*                                                                          */
2170 /* IPF can't inject packets to loopback interface, this is just another	    */
2171 /* limitation we have to deal with here. The only option to send RST	    */
2172 /* response to offending TCP packet coming via loopback is to alter it.	    */
2173 /*									    */
2174 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2175 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2176 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2177 /* ------------------------------------------------------------------------ */
2178 int fr_make_rst(fin)
2179 fr_info_t *fin;
2180 {
2181 	uint16_t tmp_port;
2182 	int rv = -1;
2183 	uint32_t old_ack;
2184 	tcphdr_t *tcp = NULL;
2185 	struct in_addr tmp_src;
2186 #ifdef USE_INET6
2187 	struct in6_addr	tmp_src6;
2188 #endif
2189 
2190 	ASSERT(fin->fin_p == IPPROTO_TCP);
2191 
2192 	/*
2193 	 * We do not need to adjust chksum, since it is not being checked by
2194 	 * Solaris IP stack for loopback clients.
2195 	 */
2196 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2197 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2198 
2199 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2200 			/* Swap IPv4 addresses. */
2201 			tmp_src = fin->fin_ip->ip_src;
2202 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2203 			fin->fin_ip->ip_dst = tmp_src;
2204 
2205 			rv = 0;
2206 		}
2207 		else
2208 			tcp = NULL;
2209 	}
2210 #ifdef USE_INET6
2211 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2212 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2213 		/*
2214 		 * We are relying on fact the next header is TCP, which is true
2215 		 * for regular TCP packets coming in over loopback.
2216 		 */
2217 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2218 			/* Swap IPv6 addresses. */
2219 			tmp_src6 = fin->fin_ip6->ip6_src;
2220 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2221 			fin->fin_ip6->ip6_dst = tmp_src6;
2222 
2223 			rv = 0;
2224 		}
2225 		else
2226 			tcp = NULL;
2227 	}
2228 #endif
2229 
2230 	if (tcp != NULL) {
2231 		/*
2232 		 * Adjust TCP header:
2233 		 *	swap ports,
2234 		 *	set flags,
2235 		 *	set correct ACK number
2236 		 */
2237 		tmp_port = tcp->th_sport;
2238 		tcp->th_sport = tcp->th_dport;
2239 		tcp->th_dport = tmp_port;
2240 		old_ack = tcp->th_ack;
2241 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2242 		tcp->th_seq = old_ack;
2243 		tcp->th_flags = TH_RST | TH_ACK;
2244 	}
2245 
2246 	return (rv);
2247 }
2248 
2249 /* ------------------------------------------------------------------------ */
2250 /* Function:    fr_make_icmp_v4                                             */
2251 /* Returns:     int - 0 on success, -1 on failure			    */
2252 /* Parameters:  fin(I) - pointer to packet information                      */
2253 /*                                                                          */
2254 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2255 /* what is going to happen here and why. Once you read the comment there,   */
2256 /* continue here with next paragraph.					    */
2257 /*									    */
2258 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2259 /* happen here:								    */
2260 /*	(1) Original mblk is copied (duplicated).			    */
2261 /*                                                                          */
2262 /*	(2) ICMP header is created.					    */
2263 /*                                                                          */
2264 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2265 /*	    data ready then.						    */
2266 /*                                                                          */
2267 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2268 /*                                                                          */
2269 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2270 /*	    header only and ICMP chksum is computed.			    */
2271 /*                                                                          */
2272 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2273 /*	    which now contains new IP header. If original packet was spread */
2274 /*	    over several mblks, only the first mblk is kept.		    */
2275 /* ------------------------------------------------------------------------ */
2276 static int fr_make_icmp_v4(fin)
2277 fr_info_t *fin;
2278 {
2279 	struct in_addr tmp_src;
2280 	tcphdr_t *tcp;
2281 	struct icmp *icmp;
2282 	mblk_t *mblk_icmp;
2283 	mblk_t *mblk_ip;
2284 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2285 	size_t orig_iphdr_len;	/* length of IP header only */
2286 	uint32_t sum;
2287 	uint16_t *buf;
2288 	int len;
2289 
2290 
2291 	if (fin->fin_v != 4)
2292 		return (-1);
2293 
2294 	/*
2295 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2296 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2297 	 */
2298 	tcp = (tcphdr_t *) fin->fin_dp;
2299 
2300 	if ((fin->fin_p == IPPROTO_TCP) &&
2301 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2302 		return (-1);
2303 
2304 	/*
2305 	 * Step (1)
2306 	 *
2307 	 * Make copy of original mblk.
2308 	 *
2309 	 * We want to copy as much data as necessary, not less, not more.  The
2310 	 * ICMPv4 payload length for unreachable messages is:
2311 	 *	original IP header + 8 bytes of L4 (if there are any).
2312 	 *
2313 	 * We determine if there are at least 8 bytes of L4 data following IP
2314 	 * header first.
2315 	 */
2316 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2317 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2318 	/*
2319 	 * Since we don't want to copy more data than necessary, we must trim
2320 	 * the original mblk here.  The right way (STREAMish) would be to use
2321 	 * adjmsg() to trim it.  However we would have to calculate the length
2322 	 * argument for adjmsg() from pointers we already have here.
2323 	 *
2324 	 * Since we have pointers and offsets, it's faster and easier for
2325 	 * us to just adjust pointers by hand instead of using adjmsg().
2326 	 */
2327 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2328 	fin->fin_m->b_wptr += icmp_pld_len;
2329 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2330 
2331 	/*
2332 	 * Also we don't want to copy any L2 stuff, which might precede IP
2333 	 * header, so we have have to set b_rptr to point to the start of IP
2334 	 * header.
2335 	 */
2336 	fin->fin_m->b_rptr += fin->fin_ipoff;
2337 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2338 		return (-1);
2339 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2340 
2341 	/*
2342 	 * Step (2)
2343 	 *
2344 	 * Create an ICMP header, which will be appened to original mblk later.
2345 	 * ICMP header is just another mblk.
2346 	 */
2347 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2348 	if (mblk_icmp == NULL) {
2349 		FREE_MB_T(mblk_ip);
2350 		return (-1);
2351 	}
2352 
2353 	MTYPE(mblk_icmp) = M_DATA;
2354 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2355 	icmp->icmp_type = ICMP_UNREACH;
2356 	icmp->icmp_code = fin->fin_icode & 0xFF;
2357 	icmp->icmp_void = 0;
2358 	icmp->icmp_cksum = 0;
2359 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2360 
2361 	/*
2362 	 * Step (3)
2363 	 *
2364 	 * Complete ICMP packet - link ICMP header with L4 data from original
2365 	 * IP packet.
2366 	 */
2367 	linkb(mblk_icmp, mblk_ip);
2368 
2369 	/*
2370 	 * Step (4)
2371 	 *
2372 	 * Swap IP addresses and change IP header fields accordingly in
2373 	 * original IP packet.
2374 	 *
2375 	 * There is a rule option return-icmp as a dest for physical
2376 	 * interfaces. This option becomes useless for loopback, since IPF box
2377 	 * uses same address as a loopback destination. We ignore the option
2378 	 * here, the ICMP packet will always look like as it would have been
2379 	 * sent from the original destination host.
2380 	 */
2381 	tmp_src = fin->fin_ip->ip_src;
2382 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2383 	fin->fin_ip->ip_dst = tmp_src;
2384 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2385 	fin->fin_ip->ip_sum = 0;
2386 
2387 	/*
2388 	 * Step (5)
2389 	 *
2390 	 * We trim the orignal mblk to hold IP header only.
2391 	 */
2392 	fin->fin_m->b_wptr = fin->fin_dp;
2393 	orig_iphdr_len = fin->fin_m->b_wptr -
2394 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2395 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2396 			    orig_iphdr_len);
2397 
2398 	/*
2399 	 * ICMP chksum calculation. The data we are calculating chksum for are
2400 	 * spread over two mblks, therefore we have to use two for loops.
2401 	 *
2402 	 * First for loop computes chksum part for ICMP header.
2403 	 */
2404 	buf = (uint16_t *) icmp;
2405 	len = ICMPERR_ICMPHLEN;
2406 	for (sum = 0; len > 1; len -= 2)
2407 		sum += *buf++;
2408 
2409 	/*
2410 	 * Here we add chksum part for ICMP payload.
2411 	 */
2412 	len = icmp_pld_len;
2413 	buf = (uint16_t *) mblk_ip->b_rptr;
2414 	for (; len > 1; len -= 2)
2415 		sum += *buf++;
2416 
2417 	/*
2418 	 * Chksum is done.
2419 	 */
2420 	sum = (sum >> 16) + (sum & 0xffff);
2421 	sum += (sum >> 16);
2422 	icmp->icmp_cksum = ~sum;
2423 
2424 	/*
2425 	 * Step (6)
2426 	 *
2427 	 * Release all packet mblks, except the first one.
2428 	 */
2429 	if (fin->fin_m->b_cont != NULL) {
2430 		FREE_MB_T(fin->fin_m->b_cont);
2431 	}
2432 
2433 	/*
2434 	 * Append ICMP payload to first mblk, which already contains new IP
2435 	 * header.
2436 	 */
2437 	linkb(fin->fin_m, mblk_icmp);
2438 
2439 	return (0);
2440 }
2441 
2442 #ifdef USE_INET6
2443 /* ------------------------------------------------------------------------ */
2444 /* Function:    fr_make_icmp_v6                                             */
2445 /* Returns:     int - 0 on success, -1 on failure			    */
2446 /* Parameters:  fin(I) - pointer to packet information                      */
2447 /*									    */
2448 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2449 /* what and why is going to happen here. Once you read the comment there,   */
2450 /* continue here with next paragraph.					    */
2451 /*									    */
2452 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2453 /* The algorithm is fairly simple:					    */
2454 /*	1) We need to get copy of complete mblk.			    */
2455 /*									    */
2456 /*	2) New ICMPv6 header is created.				    */
2457 /*									    */
2458 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2459 /*	   header.							    */
2460 /*									    */
2461 /*	4) The checksum must be adjusted.				    */
2462 /*									    */
2463 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2464 /*	   are adjusted (protocol number).				    */
2465 /*									    */
2466 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2467 /*	   linked with the ICMPv6 data we got from (3).			    */
2468 /* ------------------------------------------------------------------------ */
2469 static int fr_make_icmp_v6(fin)
2470 fr_info_t *fin;
2471 {
2472 	struct icmp6_hdr *icmp6;
2473 	tcphdr_t *tcp;
2474 	struct in6_addr	tmp_src6;
2475 	size_t icmp_pld_len;
2476 	mblk_t *mblk_ip, *mblk_icmp;
2477 
2478 	if (fin->fin_v != 6)
2479 		return (-1);
2480 
2481 	/*
2482 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2483 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2484 	 */
2485 	tcp = (tcphdr_t *) fin->fin_dp;
2486 
2487 	if ((fin->fin_p == IPPROTO_TCP) &&
2488 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2489 		return (-1);
2490 
2491 	/*
2492 	 * Step (1)
2493 	 *
2494 	 * We need to copy complete packet in case of IPv6, no trimming is
2495 	 * needed (except the L2 headers).
2496 	 */
2497 	icmp_pld_len = M_LEN(fin->fin_m);
2498 	fin->fin_m->b_rptr += fin->fin_ipoff;
2499 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2500 		return (-1);
2501 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2502 
2503 	/*
2504 	 * Step (2)
2505 	 *
2506 	 * Allocate and create ICMP header.
2507 	 */
2508 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2509 			BPRI_HI);
2510 
2511 	if (mblk_icmp == NULL)
2512 		return (-1);
2513 
2514 	MTYPE(mblk_icmp) = M_DATA;
2515 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2516 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
2517 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
2518 	icmp6->icmp6_data32[0] = 0;
2519 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2520 
2521 	/*
2522 	 * Step (3)
2523 	 *
2524 	 * Link the copy of IP packet to ICMP header.
2525 	 */
2526 	linkb(mblk_icmp, mblk_ip);
2527 
2528 	/*
2529 	 * Step (4)
2530 	 *
2531 	 * Calculate chksum - this is much more easier task than in case of
2532 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2533 	 * We are making compensation just for change of packet length.
2534 	 */
2535 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2536 
2537 	/*
2538 	 * Step (5)
2539 	 *
2540 	 * Swap IP addresses.
2541 	 */
2542 	tmp_src6 = fin->fin_ip6->ip6_src;
2543 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2544 	fin->fin_ip6->ip6_dst = tmp_src6;
2545 
2546 	/*
2547 	 * and adjust IP header data.
2548 	 */
2549 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2550 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2551 
2552 	/*
2553 	 * Step (6)
2554 	 *
2555 	 * We must release all linked mblks from original packet and keep only
2556 	 * the first mblk with IP header to link ICMP data.
2557 	 */
2558 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2559 
2560 	if (fin->fin_m->b_cont != NULL) {
2561 		FREE_MB_T(fin->fin_m->b_cont);
2562 	}
2563 
2564 	/*
2565 	 * Append ICMP payload to IP header.
2566 	 */
2567 	linkb(fin->fin_m, mblk_icmp);
2568 
2569 	return (0);
2570 }
2571 #endif	/* USE_INET6 */
2572 
2573 /* ------------------------------------------------------------------------ */
2574 /* Function:    fr_make_icmp                                                */
2575 /* Returns:     int - 0 on success, -1 on failure			    */
2576 /* Parameters:  fin(I) - pointer to packet information                      */
2577 /*                                                                          */
2578 /* We must alter the original mblks passed to IPF from IP stack via	    */
2579 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
2580 /* comment at fr_make_rst() function.					    */
2581 /*									    */
2582 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
2583 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
2584 /* protocol version. However there are some details, which are common to    */
2585 /* both IP versions. The details are going to be explained here.	    */
2586 /*                                                                          */
2587 /* The packet looks as follows:						    */
2588 /*    xxx | IP hdr | IP payload    ...	| 				    */
2589 /*    ^   ^        ^            	^				    */
2590 /*    |   |        |            	|				    */
2591 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2592 /*    |   |        |							    */
2593 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2594 /*    |   |								    */
2595 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2596 /*    |      of loopback)						    */
2597 /*    |   								    */
2598 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
2599 /*                                                                          */
2600 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2601 /* well in advance before the matching rule was found (the rule, which took */
2602 /* us here, to fr_make_icmp() function).				    */
2603 /*                                                                          */
2604 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
2605 /* packet. New packet will be represented as chain of mblks.		    */
2606 /* orig mblk |- b_cont ---.						    */
2607 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
2608 /*    |	                          ^	            `-> duped orig mblk	    */
2609 /*    |                           |				^	    */
2610 /*    `- The original mblk        |				|	    */
2611 /*       will be trimmed to       |				|	    */
2612 /*       to contain IP header     |				|	    */
2613 /*       only                     |				|	    */
2614 /*                                |				|	    */
2615 /*                                `- This is newly		|           */
2616 /*                                   allocated mblk to		|	    */
2617 /*                                   hold ICMPv6 data.		|	    */
2618 /*								|	    */
2619 /*								|	    */
2620 /*								|	    */
2621 /*	    This is the copy of original mblk, it will contain -'	    */
2622 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
2623 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
2624 /*	    (TCP/UDP/L4) data from original packet.			    */
2625 /* ------------------------------------------------------------------------ */
2626 int fr_make_icmp(fin)
2627 fr_info_t *fin;
2628 {
2629 	int rv;
2630 
2631 	if (fin->fin_v == 4)
2632 		rv = fr_make_icmp_v4(fin);
2633 #ifdef USE_INET6
2634 	else if (fin->fin_v == 6)
2635 		rv = fr_make_icmp_v6(fin);
2636 #endif
2637 	else
2638 		rv = -1;
2639 
2640 	return (rv);
2641 }
2642 #endif	/* _KERNEL && SOLARIS2 >= 10 */
2643