xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_fil_solaris.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7  *
8  * Copyright 2018 Joyent, Inc.
9  */
10 
11 #if !defined(lint)
12 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 #endif
15 
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
20 #include <sys/open.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
25 #include <sys/strsun.h>
26 #include <sys/cred.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/mac_provider.h>
32 #include <sys/mkdev.h>
33 #include <sys/protosw.h>
34 #include <sys/socket.h>
35 #include <sys/dditypes.h>
36 #include <sys/cmn_err.h>
37 #include <sys/zone.h>
38 #include <net/if.h>
39 #include <net/af.h>
40 #include <net/route.h>
41 #include <netinet/in.h>
42 #include <netinet/in_systm.h>
43 #include <netinet/ip.h>
44 #include <netinet/ip_var.h>
45 #include <netinet/tcp.h>
46 #include <netinet/udp.h>
47 #include <netinet/tcpip.h>
48 #include <netinet/ip_icmp.h>
49 #include "netinet/ip_compat.h"
50 #ifdef	USE_INET6
51 # include <netinet/icmp6.h>
52 #endif
53 #include "netinet/ip_fil.h"
54 #include "netinet/ip_nat.h"
55 #include "netinet/ip_frag.h"
56 #include "netinet/ip_state.h"
57 #include "netinet/ip_auth.h"
58 #include "netinet/ip_proxy.h"
59 #include "netinet/ipf_stack.h"
60 #ifdef	IPFILTER_LOOKUP
61 # include "netinet/ip_lookup.h"
62 #endif
63 #include <inet/ip_ire.h>
64 
65 #include <sys/md5.h>
66 #include <sys/neti.h>
67 
68 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
69 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
70 static	int	fr_enableipf __P((ipf_stack_t *, int));
71 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
72 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
73 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
74 static	int	ipf_hook __P((hook_data_t, int, int, void *));
75 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
76 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
77 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
78     void *));
79 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
80 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
81 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
82 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
83 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
84     void *));
85 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
86     void *));
87 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
88 
89 static	int	ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
90 static	int	ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
91     void *));
92 
93 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
94 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
95 
96 static int	ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
97     const char *, const char *, const char *));
98 static int	ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
99     const char *, const char *, const char *));
100 
101 #if SOLARIS2 < 10
102 #if SOLARIS2 >= 7
103 u_int		*ip_ttl_ptr = NULL;
104 u_int		*ip_mtudisc = NULL;
105 # if SOLARIS2 >= 8
106 int		*ip_forwarding = NULL;
107 u_int		*ip6_forwarding = NULL;
108 # else
109 u_int		*ip_forwarding = NULL;
110 # endif
111 #else
112 u_long		*ip_ttl_ptr = NULL;
113 u_long		*ip_mtudisc = NULL;
114 u_long		*ip_forwarding = NULL;
115 #endif
116 #endif
117 
118 vmem_t	*ipf_minor;	/* minor number arena */
119 void 	*ipf_state;	/* DDI state */
120 
121 /*
122  * GZ-controlled and per-zone stacks:
123  *
124  * For each non-global zone, we create two ipf stacks: the per-zone stack and
125  * the GZ-controlled stack.  The per-zone stack can be controlled and observed
126  * from inside the zone or from the global zone.  The GZ-controlled stack can
127  * only be controlled and observed from the global zone (though the rules
128  * still only affect that non-global zone).
129  *
130  * The two hooks are always arranged so that the GZ-controlled stack is always
131  * "outermost" with respect to the zone.  The traffic flow then looks like
132  * this:
133  *
134  * Inbound:
135  *
136  *     nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
137  *
138  * Outbound:
139  *
140  *     nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
141  */
142 
143 /* IPv4 hook names */
144 char *hook4_nicevents = 	"ipfilter_hook4_nicevents";
145 char *hook4_nicevents_gz = 	"ipfilter_hook4_nicevents_gz";
146 char *hook4_in = 		"ipfilter_hook4_in";
147 char *hook4_in_gz = 		"ipfilter_hook4_in_gz";
148 char *hook4_out = 		"ipfilter_hook4_out";
149 char *hook4_out_gz = 		"ipfilter_hook4_out_gz";
150 char *hook4_loop_in = 		"ipfilter_hook4_loop_in";
151 char *hook4_loop_in_gz = 	"ipfilter_hook4_loop_in_gz";
152 char *hook4_loop_out = 		"ipfilter_hook4_loop_out";
153 char *hook4_loop_out_gz = 	"ipfilter_hook4_loop_out_gz";
154 
155 /* IPv6 hook names */
156 char *hook6_nicevents = 	"ipfilter_hook6_nicevents";
157 char *hook6_nicevents_gz = 	"ipfilter_hook6_nicevents_gz";
158 char *hook6_in = 		"ipfilter_hook6_in";
159 char *hook6_in_gz = 		"ipfilter_hook6_in_gz";
160 char *hook6_out = 		"ipfilter_hook6_out";
161 char *hook6_out_gz = 		"ipfilter_hook6_out_gz";
162 char *hook6_loop_in = 		"ipfilter_hook6_loop_in";
163 char *hook6_loop_in_gz = 	"ipfilter_hook6_loop_in_gz";
164 char *hook6_loop_out = 		"ipfilter_hook6_loop_out";
165 char *hook6_loop_out_gz = 	"ipfilter_hook6_loop_out_gz";
166 
167 /* viona hook names */
168 char *hook_viona_in =		"ipfilter_hookviona_in";
169 char *hook_viona_in_gz =	"ipfilter_hookviona_in_gz";
170 char *hook_viona_out =		"ipfilter_hookviona_out";
171 char *hook_viona_out_gz =	"ipfilter_hookviona_out_gz";
172 
173 /* ------------------------------------------------------------------------ */
174 /* Function:    ipldetach                                                   */
175 /* Returns:     int - 0 == success, else error.                             */
176 /* Parameters:  Nil                                                         */
177 /*                                                                          */
178 /* This function is responsible for undoing anything that might have been   */
179 /* done in a call to iplattach().  It must be able to clean up from a call  */
180 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
181 /* configures a table to be so large that we cannot allocate enough memory  */
182 /* for it.                                                                  */
183 /* ------------------------------------------------------------------------ */
184 int ipldetach(ifs)
185 ipf_stack_t *ifs;
186 {
187 
188 	ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
189 
190 #if SOLARIS2 < 10
191 
192 	if (ifs->ifs_fr_control_forwarding & 2) {
193 		if (ip_forwarding != NULL)
194 			*ip_forwarding = 0;
195 #if SOLARIS2 >= 8
196 		if (ip6_forwarding != NULL)
197 			*ip6_forwarding = 0;
198 #endif
199 	}
200 #endif
201 
202 	/*
203 	 * This lock needs to be dropped around the net_hook_unregister calls
204 	 * because we can deadlock here with:
205 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
206 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
207 	 */
208 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
209 
210 #define	UNDO_HOOK(_f, _b, _e, _h)					\
211 	do {								\
212 		if (ifs->_f != NULL) {					\
213 			if (ifs->_b) {					\
214 				int tmp = net_hook_unregister(ifs->_f,	\
215 					   _e, ifs->_h);		\
216 				ifs->_b = (tmp != 0 && tmp != ENXIO);	\
217 				if (!ifs->_b && ifs->_h != NULL) {	\
218 					hook_free(ifs->_h);		\
219 					ifs->_h = NULL;			\
220 				}					\
221 			} else if (ifs->_h != NULL) {			\
222 				hook_free(ifs->_h);			\
223 				ifs->_h = NULL;				\
224 			}						\
225 		}							\
226 		_NOTE(CONSTCOND)					\
227 	} while (0)
228 
229 	/*
230 	 * Remove IPv6 Hooks
231 	 */
232 	if (ifs->ifs_ipf_ipv6 != NULL) {
233 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
234 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
235 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
236 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
237 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
238 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
239 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
240 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
241 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
242 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
243 
244 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
245 			goto detach_failed;
246 		ifs->ifs_ipf_ipv6 = NULL;
247         }
248 
249 	/*
250 	 * Remove IPv4 Hooks
251 	 */
252 	if (ifs->ifs_ipf_ipv4 != NULL) {
253 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
254 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
255 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
256 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
257 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
258 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
259 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
260 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
261 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
262 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
263 
264 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
265 			goto detach_failed;
266 		ifs->ifs_ipf_ipv4 = NULL;
267 	}
268 
269 	/*
270 	 * Remove notification of viona hooks
271 	 */
272 	net_instance_notify_unregister(ifs->ifs_netid,
273 	    ipf_hook_instance_notify);
274 
275 #undef UNDO_HOOK
276 
277 	/*
278 	 * Normally, viona will unregister itself before ipldetach() is called,
279 	 * so these will be no-ops, but out of caution, we try to make sure
280 	 * we've removed any of our references.
281 	 */
282 	(void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
283 	    NH_PHYSICAL_IN);
284 	(void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
285 	    NH_PHYSICAL_OUT);
286 
287 	{
288 		char netidstr[12]; /* Large enough for INT_MAX + NUL */
289 		(void) snprintf(netidstr, sizeof (netidstr), "%d",
290 		    ifs->ifs_netid);
291 
292 		/*
293 		 * The notify callbacks expect the netid value passed as a
294 		 * string in the third argument.  To prevent confusion if
295 		 * traced, we pass the same value the nethook framework would
296 		 * pass, even though the callback does not currently use the
297 		 * value.
298 		 */
299 		(void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
300 		    NULL, Hn_VIONA);
301 	}
302 
303 #ifdef	IPFDEBUG
304 	cmn_err(CE_CONT, "ipldetach()\n");
305 #endif
306 
307 	WRITE_ENTER(&ifs->ifs_ipf_global);
308 	fr_deinitialise(ifs);
309 
310 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
311 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
312 
313 	if (ifs->ifs_ipf_locks_done == 1) {
314 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
315 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
316 		RW_DESTROY(&ifs->ifs_ipf_tokens);
317 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
318 		ifs->ifs_ipf_locks_done = 0;
319 	}
320 
321 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
322 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
323 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
324 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
325 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
326 		return -1;
327 
328 	return 0;
329 
330 detach_failed:
331 	WRITE_ENTER(&ifs->ifs_ipf_global);
332 	return -1;
333 }
334 
335 int iplattach(ifs)
336 ipf_stack_t *ifs;
337 {
338 #if SOLARIS2 < 10
339 	int i;
340 #endif
341 	netid_t id = ifs->ifs_netid;
342 
343 #ifdef	IPFDEBUG
344 	cmn_err(CE_CONT, "iplattach()\n");
345 #endif
346 
347 	ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
348 	ifs->ifs_fr_flags = IPF_LOGGING;
349 #ifdef _KERNEL
350 	ifs->ifs_fr_update_ipid = 0;
351 #else
352 	ifs->ifs_fr_update_ipid = 1;
353 #endif
354 	ifs->ifs_fr_minttl = 4;
355 	ifs->ifs_fr_icmpminfragmtu = 68;
356 #if defined(IPFILTER_DEFAULT_BLOCK)
357 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
358 #else
359 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
360 #endif
361 
362 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
363 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
364 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
365 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
366 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
367 	ifs->ifs_ipf_locks_done = 1;
368 
369 	if (fr_initialise(ifs) < 0)
370 		return -1;
371 
372 	/*
373 	 * For incoming packets, we want the GZ-controlled hooks to run before
374 	 * the per-zone hooks, regardless of what order they're are installed.
375 	 * See the "GZ-controlled and per-zone stacks" comment block at the top
376 	 * of this file.
377 	 */
378 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a)				\
379 	HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);	\
380 	(x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER;	\
381 	(x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
382 
383 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
384 		  hook4_nicevents, hook4_nicevents_gz, ifs);
385 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
386 		  hook4_in, hook4_in_gz, ifs);
387 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
388 		  hook4_loop_in, hook4_loop_in_gz, ifs);
389 
390 	/*
391 	 * For outgoing packets, we want the GZ-controlled hooks to run after
392 	 * the per-zone hooks, regardless of what order they're are installed.
393 	 * See the "GZ-controlled and per-zone stacks" comment block at the top
394 	 * of this file.
395 	 */
396 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a)				\
397 	HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);	\
398 	(x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE;	\
399 	(x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
400 
401 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
402 		  hook4_out, hook4_out_gz, ifs);
403 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
404 		  hook4_loop_out, hook4_loop_out_gz, ifs);
405 
406 	/*
407 	 * If we hold this lock over all of the net_hook_register calls, we
408 	 * can cause a deadlock to occur with the following lock ordering:
409 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
410 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
411 	 */
412 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
413 
414 	/*
415 	 * Add IPv4 hooks
416 	 */
417 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
418 	if (ifs->ifs_ipf_ipv4 == NULL)
419 		goto hookup_failed;
420 
421 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
422 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
423 	if (!ifs->ifs_hook4_nic_events)
424 		goto hookup_failed;
425 
426 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
427 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
428 	if (!ifs->ifs_hook4_physical_in)
429 		goto hookup_failed;
430 
431 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
432 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
433 	if (!ifs->ifs_hook4_physical_out)
434 		goto hookup_failed;
435 
436 	if (ifs->ifs_ipf_loopback) {
437 		ifs->ifs_hook4_loopback_in = (net_hook_register(
438 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
439 		    ifs->ifs_ipfhook4_loop_in) == 0);
440 		if (!ifs->ifs_hook4_loopback_in)
441 			goto hookup_failed;
442 
443 		ifs->ifs_hook4_loopback_out = (net_hook_register(
444 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
445 		    ifs->ifs_ipfhook4_loop_out) == 0);
446 		if (!ifs->ifs_hook4_loopback_out)
447 			goto hookup_failed;
448 	}
449 
450 	/*
451 	 * Add IPv6 hooks
452 	 */
453 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
454 	if (ifs->ifs_ipf_ipv6 == NULL)
455 		goto hookup_failed;
456 
457 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
458 		  hook6_nicevents, hook6_nicevents_gz, ifs);
459 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
460 		  hook6_in, hook6_in_gz, ifs);
461 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
462 		  hook6_loop_in, hook6_loop_in_gz, ifs);
463 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
464 		  hook6_out, hook6_out_gz, ifs);
465 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
466 		  hook6_loop_out, hook6_loop_out_gz, ifs);
467 
468 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
469 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
470 	if (!ifs->ifs_hook6_nic_events)
471 		goto hookup_failed;
472 
473 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
474 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
475 	if (!ifs->ifs_hook6_physical_in)
476 		goto hookup_failed;
477 
478 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
479 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
480 	if (!ifs->ifs_hook6_physical_out)
481 		goto hookup_failed;
482 
483 	if (ifs->ifs_ipf_loopback) {
484 		ifs->ifs_hook6_loopback_in = (net_hook_register(
485 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
486 		    ifs->ifs_ipfhook6_loop_in) == 0);
487 		if (!ifs->ifs_hook6_loopback_in)
488 			goto hookup_failed;
489 
490 		ifs->ifs_hook6_loopback_out = (net_hook_register(
491 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
492 		    ifs->ifs_ipfhook6_loop_out) == 0);
493 		if (!ifs->ifs_hook6_loopback_out)
494 			goto hookup_failed;
495 	}
496 
497 	/*
498 	 * VIONA INET hooks.  While the nethook framework allows us to register
499 	 * hooks for events that haven't been registered yet, we instead
500 	 * register and unregister our hooks in response to notifications
501 	 * about the viona hooks from the nethook framework.  This prevents
502 	 * problems when the viona module gets unloaded while the ipf module
503 	 * does not.  If we do not unregister our hooks after the viona module
504 	 * is unloaded, the viona module cannot later re-register them if it
505 	 * gets reloaded.  As the ip, vnd, and ipf modules are rarely unloaded
506 	 * even on DEBUG kernels, they do not experience this issue.
507 	 */
508 	if (net_instance_notify_register(id, ipf_hook_instance_notify,
509 	    ifs) != 0)
510 		goto hookup_failed;
511 
512 	/*
513 	 * Reacquire ipf_global, now it is safe.
514 	 */
515 	WRITE_ENTER(&ifs->ifs_ipf_global);
516 
517 /* Do not use private interface ip_params_arr[] in Solaris 10 */
518 #if SOLARIS2 < 10
519 
520 #if SOLARIS2 >= 8
521 	ip_forwarding = &ip_g_forward;
522 #endif
523 	/*
524 	 * XXX - There is no terminator for this array, so it is not possible
525 	 * to tell if what we are looking for is missing and go off the end
526 	 * of the array.
527 	 */
528 
529 #if SOLARIS2 <= 8
530 	for (i = 0; ; i++) {
531 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
532 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
533 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
534 			    "ip_path_mtu_discovery")) {
535 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
536 		}
537 #if SOLARIS2 < 8
538 		else if (!strcmp(ip_param_arr[i].ip_param_name,
539 			    "ip_forwarding")) {
540 			ip_forwarding = &ip_param_arr[i].ip_param_value;
541 		}
542 #else
543 		else if (!strcmp(ip_param_arr[i].ip_param_name,
544 			    "ip6_forwarding")) {
545 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
546 		}
547 #endif
548 
549 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
550 #if SOLARIS2 >= 8
551 		    ip6_forwarding != NULL &&
552 #endif
553 		    ip_forwarding != NULL)
554 			break;
555 	}
556 #endif
557 
558 	if (ifs->ifs_fr_control_forwarding & 1) {
559 		if (ip_forwarding != NULL)
560 			*ip_forwarding = 1;
561 #if SOLARIS2 >= 8
562 		if (ip6_forwarding != NULL)
563 			*ip6_forwarding = 1;
564 #endif
565 	}
566 
567 #endif
568 
569 	return 0;
570 hookup_failed:
571 	WRITE_ENTER(&ifs->ifs_ipf_global);
572 	return -1;
573 }
574 
575 /* ------------------------------------------------------------------------ */
576 /*
577  * Called whenever a nethook protocol is registered or unregistered.  Currently
578  * only used to add or remove the hooks for viona.
579  *
580  * While the function signature requires returning int, nothing
581  * in usr/src/uts/common/io/hook.c that invokes the callbacks
582  * captures the return value (nor is there currently any documentation
583  * on what return values should be).  For now at least, we'll return 0
584  * on success (or 'not applicable') or an error value.  Even if the
585  * nethook framework doesn't use the return address, it can be observed via
586  * dtrace if needed.
587  */
588 static int
589 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
590     const char *name, const char *dummy __unused, const char *he_name)
591 {
592 	ipf_stack_t *ifs = arg;
593 	hook_t **hookpp;
594 	char *hook_name, *hint_name;
595 	hook_func_t hookfn;
596 	boolean_t *hookedp;
597 	hook_hint_t hint;
598 	boolean_t out;
599 	int ret = 0;
600 
601 	const boolean_t gz = ifs->ifs_gz_controlled;
602 
603 	/* We currently only care about viona hooks notifications */
604 	if (strcmp(name, Hn_VIONA) != 0)
605 		return (0);
606 
607 	if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
608 		out = B_FALSE;
609 	} else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
610 		out = B_TRUE;
611 	} else {
612 		/*
613 		 * If we've added more hook events to viona, we must add
614 		 * the corresponding handling here (even if it's just to
615 		 * ignore it) to prevent the firewall from not working as
616 		 * intended.
617 		 */
618 		cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
619 		    he_name);
620 
621 		return (0);
622 	}
623 
624 	if (out) {
625 		hookpp = &ifs->ifs_ipfhookviona_out;
626 		hookfn = ipf_hookviona_out;
627 		hookedp = &ifs->ifs_hookviona_physical_out;
628 		name = gz ? hook_viona_out_gz : hook_viona_out;
629 		hint = gz ? HH_AFTER : HH_BEFORE;
630 		hint_name = gz ? hook_viona_out : hook_viona_out_gz;
631 	} else {
632 		hookpp = &ifs->ifs_ipfhookviona_in;
633 		hookfn = ipf_hookviona_in;
634 		hookedp = &ifs->ifs_hookviona_physical_in;
635 		name = gz ? hook_viona_in_gz : hook_viona_in;
636 		hint = gz ? HH_BEFORE : HH_AFTER;
637 		hint_name = gz ? hook_viona_in : hook_viona_in_gz;
638 	}
639 
640 	switch (command) {
641 	default:
642 	case HN_NONE:
643 		break;
644 	case HN_REGISTER:
645 		HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
646 		(*hookpp)->h_hint = hint;
647 		(*hookpp)->h_hintvalue = (uintptr_t)hint_name;
648 		ret = net_hook_register(ifs->ifs_ipf_viona,
649 		    (char *)he_name, *hookpp);
650 		if (ret != 0) {
651 			cmn_err(CE_NOTE, "%s: could not register hook "
652 			    "(hook family=%s hook=%s) err=%d", __func__,
653 			    name, he_name, ret);
654 			*hookedp = B_FALSE;
655 			return (ret);
656 		}
657 		*hookedp = B_TRUE;
658 		break;
659 	case HN_UNREGISTER:
660 		if (ifs->ifs_ipf_viona == NULL)
661 			break;
662 
663 		ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
664 		    (char *)he_name, *hookpp) : 0;
665 		if ((ret == 0 || ret == ENXIO)) {
666 			if (*hookpp != NULL) {
667 				hook_free(*hookpp);
668 				*hookpp = NULL;
669 			}
670 			*hookedp = B_FALSE;
671 		}
672 		break;
673 	}
674 
675 	return (ret);
676 }
677 
678 /*
679  * Called whenever a new nethook instance is created.  Currently only used
680  * with the Hn_VIONA nethooks.  Similar to ipf_hook_protocol_notify, the out
681  * function signature must return an int, though the result is never used.
682  * We elect to return 0 on success (or not applicable) or a non-zero value
683  * on error.
684  */
685 static int
686 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
687     const char *netid, const char *dummy __unused, const char *instance)
688 {
689 	ipf_stack_t *ifs = arg;
690 	int ret = 0;
691 
692 	/* We currently only care about viona hooks */
693 	if (strcmp(instance, Hn_VIONA) != 0)
694 		return (0);
695 
696 	switch (command) {
697 	case HN_NONE:
698 	default:
699 		return (0);
700 	case HN_REGISTER:
701 		ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
702 		    NHF_VIONA);
703 
704 		if (ifs->ifs_ipf_viona == NULL)
705 			return (EPROTONOSUPPORT);
706 
707 		ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
708 		    ipf_hook_protocol_notify, ifs);
709 		VERIFY(ret == 0 || ret == ESHUTDOWN);
710 		break;
711 	case HN_UNREGISTER:
712 		if (ifs->ifs_ipf_viona == NULL)
713 			break;
714 		VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
715 		    ipf_hook_protocol_notify));
716 		VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
717 		ifs->ifs_ipf_viona = NULL;
718 		break;
719 	}
720 
721 	return (ret);
722 }
723 
724 static	int	fr_setipfloopback(set, ifs)
725 int set;
726 ipf_stack_t *ifs;
727 {
728 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
729 		return EFAULT;
730 
731 	if (set && !ifs->ifs_ipf_loopback) {
732 		ifs->ifs_ipf_loopback = 1;
733 
734 		ifs->ifs_hook4_loopback_in = (net_hook_register(
735 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
736 		    ifs->ifs_ipfhook4_loop_in) == 0);
737 		if (!ifs->ifs_hook4_loopback_in)
738 			return EINVAL;
739 
740 		ifs->ifs_hook4_loopback_out = (net_hook_register(
741 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
742 		    ifs->ifs_ipfhook4_loop_out) == 0);
743 		if (!ifs->ifs_hook4_loopback_out)
744 			return EINVAL;
745 
746 		ifs->ifs_hook6_loopback_in = (net_hook_register(
747 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
748 		    ifs->ifs_ipfhook6_loop_in) == 0);
749 		if (!ifs->ifs_hook6_loopback_in)
750 			return EINVAL;
751 
752 		ifs->ifs_hook6_loopback_out = (net_hook_register(
753 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
754 		    ifs->ifs_ipfhook6_loop_out) == 0);
755 		if (!ifs->ifs_hook6_loopback_out)
756 			return EINVAL;
757 
758 	} else if (!set && ifs->ifs_ipf_loopback) {
759 		ifs->ifs_ipf_loopback = 0;
760 
761 		ifs->ifs_hook4_loopback_in =
762 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
763 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
764 		if (ifs->ifs_hook4_loopback_in)
765 			return EBUSY;
766 
767 		ifs->ifs_hook4_loopback_out =
768 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
769 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
770 		if (ifs->ifs_hook4_loopback_out)
771 			return EBUSY;
772 
773 		ifs->ifs_hook6_loopback_in =
774 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
775 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
776 		if (ifs->ifs_hook6_loopback_in)
777 			return EBUSY;
778 
779 		ifs->ifs_hook6_loopback_out =
780 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
781 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
782 		if (ifs->ifs_hook6_loopback_out)
783 			return EBUSY;
784 	}
785 	return 0;
786 }
787 
788 
789 /*
790  * Filter ioctl interface.
791  */
792 /*ARGSUSED*/
793 int iplioctl(dev, cmd, data, mode, cp, rp)
794 dev_t dev;
795 int cmd;
796 #if SOLARIS2 >= 7
797 intptr_t data;
798 #else
799 int *data;
800 #endif
801 int mode;
802 cred_t *cp;
803 int *rp;
804 {
805 	int error = 0, tmp;
806 	friostat_t fio;
807 	minor_t unit;
808 	u_int enable;
809 	ipf_stack_t *ifs;
810 	zoneid_t zid;
811 	ipf_devstate_t *isp;
812 
813 #ifdef	IPFDEBUG
814 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
815 		dev, cmd, data, mode, cp, rp);
816 #endif
817 	unit = getminor(dev);
818 
819 	isp = ddi_get_soft_state(ipf_state, unit);
820 	if (isp == NULL)
821 		return ENXIO;
822 	unit = isp->ipfs_minor;
823 
824 	zid = crgetzoneid(cp);
825 	if (cmd == SIOCIPFZONESET) {
826 		if (zid == GLOBAL_ZONEID)
827 			return fr_setzoneid(isp, (caddr_t) data);
828 		return EACCES;
829 	}
830 
831         /*
832 	 * ipf_find_stack returns with a read lock on ifs_ipf_global
833 	 */
834 	ifs = ipf_find_stack(zid, isp);
835 	if (ifs == NULL)
836 		return ENXIO;
837 
838 	if (ifs->ifs_fr_running <= 0) {
839 		if (unit != IPL_LOGIPF) {
840 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
841 			return EIO;
842 		}
843 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
844 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
845 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
846 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
847 			return EIO;
848 		}
849 	}
850 
851 	if (ifs->ifs_fr_enable_active != 0) {
852 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
853 		return EBUSY;
854 	}
855 
856 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
857 			       curproc, ifs);
858 	if (error != -1) {
859 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
860 		return error;
861 	}
862 	error = 0;
863 
864 	switch (cmd)
865 	{
866 	case SIOCFRENB :
867 		if (!(mode & FWRITE))
868 			error = EPERM;
869 		else {
870 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
871 				       sizeof(enable));
872 			if (error != 0) {
873 				error = EFAULT;
874 				break;
875 			}
876 
877 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
878 			WRITE_ENTER(&ifs->ifs_ipf_global);
879 
880 			/*
881 			 * We must recheck fr_enable_active here, since we've
882 			 * dropped ifs_ipf_global from R in order to get it
883 			 * exclusively.
884 			 */
885 			if (ifs->ifs_fr_enable_active == 0) {
886 				ifs->ifs_fr_enable_active = 1;
887 				error = fr_enableipf(ifs, enable);
888 				ifs->ifs_fr_enable_active = 0;
889 			}
890 		}
891 		break;
892 	case SIOCIPFSET :
893 		if (!(mode & FWRITE)) {
894 			error = EPERM;
895 			break;
896 		}
897 		/* FALLTHRU */
898 	case SIOCIPFGETNEXT :
899 	case SIOCIPFGET :
900 		error = fr_ipftune(cmd, (void *)data, ifs);
901 		break;
902 	case SIOCSETFF :
903 		if (!(mode & FWRITE))
904 			error = EPERM;
905 		else {
906 			error = COPYIN((caddr_t)data,
907 				       (caddr_t)&ifs->ifs_fr_flags,
908 				       sizeof(ifs->ifs_fr_flags));
909 			if (error != 0)
910 				error = EFAULT;
911 		}
912 		break;
913 	case SIOCIPFLP :
914 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
915 			       sizeof(tmp));
916 		if (error != 0)
917 			error = EFAULT;
918 		else
919 			error = fr_setipfloopback(tmp, ifs);
920 		break;
921 	case SIOCGETFF :
922 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
923 				sizeof(ifs->ifs_fr_flags));
924 		if (error != 0)
925 			error = EFAULT;
926 		break;
927 	case SIOCFUNCL :
928 		error = fr_resolvefunc((void *)data);
929 		break;
930 	case SIOCINAFR :
931 	case SIOCRMAFR :
932 	case SIOCADAFR :
933 	case SIOCZRLST :
934 		if (!(mode & FWRITE))
935 			error = EPERM;
936 		else
937 			error = frrequest(unit, cmd, (caddr_t)data,
938 					  ifs->ifs_fr_active, 1, ifs);
939 		break;
940 	case SIOCINIFR :
941 	case SIOCRMIFR :
942 	case SIOCADIFR :
943 		if (!(mode & FWRITE))
944 			error = EPERM;
945 		else
946 			error = frrequest(unit, cmd, (caddr_t)data,
947 					  1 - ifs->ifs_fr_active, 1, ifs);
948 		break;
949 	case SIOCSWAPA :
950 		if (!(mode & FWRITE))
951 			error = EPERM;
952 		else {
953 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
954 			bzero((char *)ifs->ifs_frcache,
955 			    sizeof (ifs->ifs_frcache));
956 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
957 					(caddr_t)data,
958 					sizeof(ifs->ifs_fr_active));
959 			if (error != 0)
960 				error = EFAULT;
961 			else
962 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
963 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
964 		}
965 		break;
966 	case SIOCGETFS :
967 		fr_getstat(&fio, ifs);
968 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
969 		break;
970 	case SIOCFRZST :
971 		if (!(mode & FWRITE))
972 			error = EPERM;
973 		else
974 			error = fr_zerostats((caddr_t)data, ifs);
975 		break;
976 	case	SIOCIPFFL :
977 		if (!(mode & FWRITE))
978 			error = EPERM;
979 		else {
980 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
981 				       sizeof(tmp));
982 			if (!error) {
983 				tmp = frflush(unit, 4, tmp, ifs);
984 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
985 						sizeof(tmp));
986 				if (error != 0)
987 					error = EFAULT;
988 			} else
989 				error = EFAULT;
990 		}
991 		break;
992 #ifdef USE_INET6
993 	case	SIOCIPFL6 :
994 		if (!(mode & FWRITE))
995 			error = EPERM;
996 		else {
997 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
998 				       sizeof(tmp));
999 			if (!error) {
1000 				tmp = frflush(unit, 6, tmp, ifs);
1001 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1002 						sizeof(tmp));
1003 				if (error != 0)
1004 					error = EFAULT;
1005 			} else
1006 				error = EFAULT;
1007 		}
1008 		break;
1009 #endif
1010 	case SIOCSTLCK :
1011 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1012 		if (error == 0) {
1013 			ifs->ifs_fr_state_lock = tmp;
1014 			ifs->ifs_fr_nat_lock = tmp;
1015 			ifs->ifs_fr_frag_lock = tmp;
1016 			ifs->ifs_fr_auth_lock = tmp;
1017 		} else
1018 			error = EFAULT;
1019 	break;
1020 #ifdef	IPFILTER_LOG
1021 	case	SIOCIPFFB :
1022 		if (!(mode & FWRITE))
1023 			error = EPERM;
1024 		else {
1025 			tmp = ipflog_clear(unit, ifs);
1026 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1027 				       sizeof(tmp));
1028 			if (error)
1029 				error = EFAULT;
1030 		}
1031 		break;
1032 #endif /* IPFILTER_LOG */
1033 	case SIOCFRSYN :
1034 		if (!(mode & FWRITE))
1035 			error = EPERM;
1036 		else {
1037 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
1038 			WRITE_ENTER(&ifs->ifs_ipf_global);
1039 
1040 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1041 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1042 			fr_nataddrsync(0, NULL, NULL, ifs);
1043 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1044 			error = 0;
1045 		}
1046 		break;
1047 	case SIOCGFRST :
1048 		error = fr_outobj((void *)data, fr_fragstats(ifs),
1049 				  IPFOBJ_FRAGSTAT);
1050 		break;
1051 	case FIONREAD :
1052 #ifdef	IPFILTER_LOG
1053 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1054 
1055 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1056 		if (error != 0)
1057 			error = EFAULT;
1058 #endif
1059 		break;
1060 	case SIOCIPFITER :
1061 		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1062 				       curproc, ifs);
1063 		break;
1064 
1065 	case SIOCGENITER :
1066 		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1067 					curproc, ifs);
1068 		break;
1069 
1070 	case SIOCIPFDELTOK :
1071 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1072 		if (error != 0) {
1073 			error = EFAULT;
1074 		} else {
1075 			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1076 		}
1077 		break;
1078 
1079 	default :
1080 #ifdef	IPFDEBUG
1081 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1082 			cmd, (void *)data);
1083 #endif
1084 		error = EINVAL;
1085 		break;
1086 	}
1087 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1088 	return error;
1089 }
1090 
1091 
1092 static int fr_enableipf(ifs, enable)
1093 ipf_stack_t *ifs;
1094 int enable;
1095 {
1096 	int error;
1097 
1098 	if (!enable) {
1099 		error = ipldetach(ifs);
1100 		if (error == 0)
1101 			ifs->ifs_fr_running = -1;
1102 		return error;
1103 	}
1104 
1105 	if (ifs->ifs_fr_running > 0)
1106 		return 0;
1107 
1108 	error = iplattach(ifs);
1109 	if (error == 0) {
1110 		if (ifs->ifs_fr_timer_id == NULL) {
1111 			int hz = drv_usectohz(500000);
1112 
1113 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1114 						       (void *)ifs,
1115 						       hz);
1116 		}
1117 		ifs->ifs_fr_running = 1;
1118 	} else {
1119 		(void) ipldetach(ifs);
1120 	}
1121 	return error;
1122 }
1123 
1124 
1125 phy_if_t get_unit(name, v, ifs)
1126 char *name;
1127 int v;
1128 ipf_stack_t *ifs;
1129 {
1130 	net_handle_t nif;
1131 
1132   	if (v == 4)
1133  		nif = ifs->ifs_ipf_ipv4;
1134   	else if (v == 6)
1135  		nif = ifs->ifs_ipf_ipv6;
1136   	else
1137  		return 0;
1138 
1139  	return (net_phylookup(nif, name));
1140 }
1141 
1142 /*
1143  * routines below for saving IP headers to buffer
1144  */
1145 /*ARGSUSED*/
1146 int iplopen(devp, flags, otype, cred)
1147 dev_t *devp;
1148 int flags, otype;
1149 cred_t *cred;
1150 {
1151 	ipf_devstate_t *isp;
1152 	minor_t min = getminor(*devp);
1153 	minor_t minor;
1154 
1155 #ifdef	IPFDEBUG
1156 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1157 #endif
1158 	if (!(otype & OTYP_CHR))
1159 		return ENXIO;
1160 
1161 	if (IPL_LOGMAX < min)
1162 		return ENXIO;
1163 
1164 	minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1165 	    VM_BESTFIT | VM_SLEEP);
1166 
1167 	if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1168 		vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1169 		return ENXIO;
1170 	}
1171 
1172 	*devp = makedevice(getmajor(*devp), minor);
1173 	isp = ddi_get_soft_state(ipf_state, minor);
1174 	VERIFY(isp != NULL);
1175 
1176 	isp->ipfs_minor = min;
1177 	isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1178 
1179 	return 0;
1180 }
1181 
1182 
1183 /*ARGSUSED*/
1184 int iplclose(dev, flags, otype, cred)
1185 dev_t dev;
1186 int flags, otype;
1187 cred_t *cred;
1188 {
1189 	minor_t	min = getminor(dev);
1190 
1191 #ifdef	IPFDEBUG
1192 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1193 #endif
1194 
1195 	if (IPL_LOGMAX < min)
1196 		return ENXIO;
1197 
1198 	ddi_soft_state_free(ipf_state, min);
1199 	vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1200 
1201 	return 0;
1202 }
1203 
1204 #ifdef	IPFILTER_LOG
1205 /*
1206  * iplread/ipllog
1207  * both of these must operate with at least splnet() lest they be
1208  * called during packet processing and cause an inconsistancy to appear in
1209  * the filter lists.
1210  */
1211 /*ARGSUSED*/
1212 int iplread(dev, uio, cp)
1213 dev_t dev;
1214 register struct uio *uio;
1215 cred_t *cp;
1216 {
1217 	ipf_stack_t *ifs;
1218 	int ret;
1219 	minor_t unit;
1220 	ipf_devstate_t *isp;
1221 
1222 	unit = getminor(dev);
1223 	isp = ddi_get_soft_state(ipf_state, unit);
1224 	if (isp == NULL)
1225 		return ENXIO;
1226 	unit = isp->ipfs_minor;
1227 
1228 
1229         /*
1230 	 * ipf_find_stack returns with a read lock on ifs_ipf_global
1231 	 */
1232 	ifs = ipf_find_stack(crgetzoneid(cp), isp);
1233 	if (ifs == NULL)
1234 		return ENXIO;
1235 
1236 # ifdef	IPFDEBUG
1237 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1238 # endif
1239 
1240 	if (ifs->ifs_fr_running < 1) {
1241 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1242 		return EIO;
1243 	}
1244 
1245 # ifdef	IPFILTER_SYNC
1246 	if (unit == IPL_LOGSYNC) {
1247 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1248 		return ipfsync_read(uio);
1249 	}
1250 # endif
1251 
1252 	ret = ipflog_read(unit, uio, ifs);
1253 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1254 	return ret;
1255 }
1256 #endif /* IPFILTER_LOG */
1257 
1258 
1259 /*
1260  * iplread/ipllog
1261  * both of these must operate with at least splnet() lest they be
1262  * called during packet processing and cause an inconsistancy to appear in
1263  * the filter lists.
1264  */
1265 int iplwrite(dev, uio, cp)
1266 dev_t dev;
1267 register struct uio *uio;
1268 cred_t *cp;
1269 {
1270 	ipf_stack_t *ifs;
1271 	minor_t unit;
1272 	ipf_devstate_t *isp;
1273 
1274 	unit = getminor(dev);
1275 	isp = ddi_get_soft_state(ipf_state, unit);
1276 	if (isp == NULL)
1277 		return ENXIO;
1278 	unit = isp->ipfs_minor;
1279 
1280         /*
1281 	 * ipf_find_stack returns with a read lock on ifs_ipf_global
1282 	 */
1283 	ifs = ipf_find_stack(crgetzoneid(cp), isp);
1284 	if (ifs == NULL)
1285 		return ENXIO;
1286 
1287 #ifdef	IPFDEBUG
1288 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1289 #endif
1290 
1291 	if (ifs->ifs_fr_running < 1) {
1292 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1293 		return EIO;
1294 	}
1295 
1296 #ifdef	IPFILTER_SYNC
1297 	if (getminor(dev) == IPL_LOGSYNC) {
1298 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1299 		return ipfsync_write(uio);
1300 	}
1301 #endif /* IPFILTER_SYNC */
1302 	dev = dev;	/* LINT */
1303 	uio = uio;	/* LINT */
1304 	cp = cp;	/* LINT */
1305 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1306 	return ENXIO;
1307 }
1308 
1309 
1310 /*
1311  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1312  * requires a large amount of setting up and isn't any more efficient.
1313  */
1314 int fr_send_reset(fin)
1315 fr_info_t *fin;
1316 {
1317 	tcphdr_t *tcp, *tcp2;
1318 	int tlen, hlen;
1319 	mblk_t *m;
1320 #ifdef	USE_INET6
1321 	ip6_t *ip6;
1322 #endif
1323 	ip_t *ip;
1324 
1325 	tcp = fin->fin_dp;
1326 	if (tcp->th_flags & TH_RST)
1327 		return -1;
1328 
1329 #ifndef	IPFILTER_CKSUM
1330 	if (fr_checkl4sum(fin) == -1)
1331 		return -1;
1332 #endif
1333 
1334 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1335 #ifdef	USE_INET6
1336 	if (fin->fin_v == 6)
1337 		hlen = sizeof(ip6_t);
1338 	else
1339 #endif
1340 		hlen = sizeof(ip_t);
1341 	hlen += sizeof(*tcp2);
1342 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1343 		return -1;
1344 
1345 	m->b_rptr += 64;
1346 	MTYPE(m) = M_DATA;
1347 	m->b_wptr = m->b_rptr + hlen;
1348 	ip = (ip_t *)m->b_rptr;
1349 	bzero((char *)ip, hlen);
1350 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1351 	tcp2->th_dport = tcp->th_sport;
1352 	tcp2->th_sport = tcp->th_dport;
1353 	if (tcp->th_flags & TH_ACK) {
1354 		tcp2->th_seq = tcp->th_ack;
1355 		tcp2->th_flags = TH_RST;
1356 	} else {
1357 		tcp2->th_ack = ntohl(tcp->th_seq);
1358 		tcp2->th_ack += tlen;
1359 		tcp2->th_ack = htonl(tcp2->th_ack);
1360 		tcp2->th_flags = TH_RST|TH_ACK;
1361 	}
1362 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1363 
1364 	ip->ip_v = fin->fin_v;
1365 #ifdef	USE_INET6
1366 	if (fin->fin_v == 6) {
1367 		ip6 = (ip6_t *)m->b_rptr;
1368 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1369 		ip6->ip6_src = fin->fin_dst6.in6;
1370 		ip6->ip6_dst = fin->fin_src6.in6;
1371 		ip6->ip6_plen = htons(sizeof(*tcp));
1372 		ip6->ip6_nxt = IPPROTO_TCP;
1373 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1374 	} else
1375 #endif
1376 	{
1377 		ip->ip_src.s_addr = fin->fin_daddr;
1378 		ip->ip_dst.s_addr = fin->fin_saddr;
1379 		ip->ip_id = fr_nextipid(fin);
1380 		ip->ip_hl = sizeof(*ip) >> 2;
1381 		ip->ip_p = IPPROTO_TCP;
1382 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1383 		ip->ip_tos = fin->fin_ip->ip_tos;
1384 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1385 	}
1386 	return fr_send_ip(fin, m, &m);
1387 }
1388 
1389 /*
1390  * Function:	fr_send_ip
1391  * Returns:	 0: success
1392  *		-1: failed
1393  * Parameters:
1394  *	fin: packet information
1395  *	m: the message block where ip head starts
1396  *
1397  * Send a new packet through the IP stack.
1398  *
1399  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1400  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1401  * function).
1402  *
1403  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1404  * in by this function.
1405  *
1406  * All other portions of the packet must be in on-the-wire format.
1407  */
1408 /*ARGSUSED*/
1409 static int fr_send_ip(fin, m, mpp)
1410 fr_info_t *fin;
1411 mblk_t *m, **mpp;
1412 {
1413 	qpktinfo_t qpi, *qpip;
1414 	fr_info_t fnew;
1415 	ip_t *ip;
1416 	int i, hlen;
1417 	ipf_stack_t *ifs = fin->fin_ifs;
1418 
1419 	ip = (ip_t *)m->b_rptr;
1420 	bzero((char *)&fnew, sizeof(fnew));
1421 
1422 #ifdef	USE_INET6
1423 	if (fin->fin_v == 6) {
1424 		ip6_t *ip6;
1425 
1426 		ip6 = (ip6_t *)ip;
1427 		ip6->ip6_vfc = 0x60;
1428 		ip6->ip6_hlim = 127;
1429 		fnew.fin_v = 6;
1430 		hlen = sizeof(*ip6);
1431 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1432 	} else
1433 #endif
1434 	{
1435 		fnew.fin_v = 4;
1436 #if SOLARIS2 >= 10
1437 		ip->ip_ttl = 255;
1438 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1439 			ip->ip_off = htons(IP_DF);
1440 #else
1441 		if (ip_ttl_ptr != NULL)
1442 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1443 		else
1444 			ip->ip_ttl = 63;
1445 		if (ip_mtudisc != NULL)
1446 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1447 		else
1448 			ip->ip_off = htons(IP_DF);
1449 #endif
1450 		/*
1451 		 * The dance with byte order and ip_len/ip_off is because in
1452 		 * fr_fastroute, it expects them to be in host byte order but
1453 		 * ipf_cksum expects them to be in network byte order.
1454 		 */
1455 		ip->ip_len = htons(ip->ip_len);
1456 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1457 		ip->ip_len = ntohs(ip->ip_len);
1458 		ip->ip_off = ntohs(ip->ip_off);
1459 		hlen = sizeof(*ip);
1460 		fnew.fin_plen = ip->ip_len;
1461 	}
1462 
1463 	qpip = fin->fin_qpi;
1464 	qpi.qpi_off = 0;
1465 	qpi.qpi_ill = qpip->qpi_ill;
1466 	qpi.qpi_m = m;
1467 	qpi.qpi_data = ip;
1468 	fnew.fin_qpi = &qpi;
1469 	fnew.fin_ifp = fin->fin_ifp;
1470 	fnew.fin_flx = FI_NOCKSUM;
1471 	fnew.fin_m = m;
1472 	fnew.fin_qfm = m;
1473 	fnew.fin_ip = ip;
1474 	fnew.fin_mp = mpp;
1475 	fnew.fin_hlen = hlen;
1476 	fnew.fin_dp = (char *)ip + hlen;
1477 	fnew.fin_ifs = fin->fin_ifs;
1478 	(void) fr_makefrip(hlen, ip, &fnew);
1479 
1480 	i = fr_fastroute(m, mpp, &fnew, NULL);
1481 	return i;
1482 }
1483 
1484 
1485 int fr_send_icmp_err(type, fin, dst)
1486 int type;
1487 fr_info_t *fin;
1488 int dst;
1489 {
1490 	struct in_addr dst4;
1491 	struct icmp *icmp;
1492 	qpktinfo_t *qpi;
1493 	int hlen, code;
1494 	phy_if_t phy;
1495 	u_short sz;
1496 #ifdef	USE_INET6
1497 	mblk_t *mb;
1498 #endif
1499 	mblk_t *m;
1500 #ifdef	USE_INET6
1501 	ip6_t *ip6;
1502 #endif
1503 	ip_t *ip;
1504 	ipf_stack_t *ifs = fin->fin_ifs;
1505 
1506 	if ((type < 0) || (type > ICMP_MAXTYPE))
1507 		return -1;
1508 
1509 	code = fin->fin_icode;
1510 #ifdef USE_INET6
1511 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1512 		return -1;
1513 #endif
1514 
1515 #ifndef	IPFILTER_CKSUM
1516 	if (fr_checkl4sum(fin) == -1)
1517 		return -1;
1518 #endif
1519 
1520 	qpi = fin->fin_qpi;
1521 
1522 #ifdef	USE_INET6
1523 	mb = fin->fin_qfm;
1524 
1525 	if (fin->fin_v == 6) {
1526 		sz = sizeof(ip6_t);
1527 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1528 		hlen = sizeof(ip6_t);
1529 		type = icmptoicmp6types[type];
1530 		if (type == ICMP6_DST_UNREACH)
1531 			code = icmptoicmp6unreach[code];
1532 	} else
1533 #endif
1534 	{
1535 		if ((fin->fin_p == IPPROTO_ICMP) &&
1536 		    !(fin->fin_flx & FI_SHORT))
1537 			switch (ntohs(fin->fin_data[0]) >> 8)
1538 			{
1539 			case ICMP_ECHO :
1540 			case ICMP_TSTAMP :
1541 			case ICMP_IREQ :
1542 			case ICMP_MASKREQ :
1543 				break;
1544 			default :
1545 				return 0;
1546 			}
1547 
1548 		sz = sizeof(ip_t) * 2;
1549 		sz += 8;		/* 64 bits of data */
1550 		hlen = sizeof(ip_t);
1551 	}
1552 
1553 	sz += offsetof(struct icmp, icmp_ip);
1554 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1555 		return -1;
1556 	MTYPE(m) = M_DATA;
1557 	m->b_rptr += 64;
1558 	m->b_wptr = m->b_rptr + sz;
1559 	bzero((char *)m->b_rptr, (size_t)sz);
1560 	ip = (ip_t *)m->b_rptr;
1561 	ip->ip_v = fin->fin_v;
1562 	icmp = (struct icmp *)(m->b_rptr + hlen);
1563 	icmp->icmp_type = type & 0xff;
1564 	icmp->icmp_code = code & 0xff;
1565 	phy = (phy_if_t)qpi->qpi_ill;
1566 	if (type == ICMP_UNREACH && (phy != 0) &&
1567 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1568 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1569 
1570 #ifdef	USE_INET6
1571 	if (fin->fin_v == 6) {
1572 		struct in6_addr dst6;
1573 		int csz;
1574 
1575 		if (dst == 0) {
1576 			ipf_stack_t *ifs = fin->fin_ifs;
1577 
1578 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1579 				       (void *)&dst6, NULL, ifs) == -1) {
1580 				FREE_MB_T(m);
1581 				return -1;
1582 			}
1583 		} else
1584 			dst6 = fin->fin_dst6.in6;
1585 
1586 		csz = sz;
1587 		sz -= sizeof(ip6_t);
1588 		ip6 = (ip6_t *)m->b_rptr;
1589 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1590 		ip6->ip6_plen = htons((u_short)sz);
1591 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1592 		ip6->ip6_src = dst6;
1593 		ip6->ip6_dst = fin->fin_src6.in6;
1594 		sz -= offsetof(struct icmp, icmp_ip);
1595 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1596 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1597 	} else
1598 #endif
1599 	{
1600 		ip->ip_hl = sizeof(*ip) >> 2;
1601 		ip->ip_p = IPPROTO_ICMP;
1602 		ip->ip_id = fin->fin_ip->ip_id;
1603 		ip->ip_tos = fin->fin_ip->ip_tos;
1604 		ip->ip_len = (u_short)sz;
1605 		if (dst == 0) {
1606 			ipf_stack_t *ifs = fin->fin_ifs;
1607 
1608 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1609 				       (void *)&dst4, NULL, ifs) == -1) {
1610 				FREE_MB_T(m);
1611 				return -1;
1612 			}
1613 		} else {
1614 			dst4 = fin->fin_dst;
1615 		}
1616 		ip->ip_src = dst4;
1617 		ip->ip_dst = fin->fin_src;
1618 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1619 		      sizeof(*fin->fin_ip));
1620 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1621 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1622 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1623 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1624 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1625 					     sz - sizeof(ip_t));
1626 	}
1627 
1628 	/*
1629 	 * Need to exit out of these so we don't recursively call rw_enter
1630 	 * from fr_qout.
1631 	 */
1632 	return fr_send_ip(fin, m, &m);
1633 }
1634 
1635 #include <sys/time.h>
1636 #include <sys/varargs.h>
1637 
1638 #ifndef _KERNEL
1639 #include <stdio.h>
1640 #endif
1641 
1642 /*
1643  * Return the first IP Address associated with an interface
1644  * For IPv6, we walk through the list of logical interfaces and return
1645  * the address of the first one that isn't a link-local interface.
1646  * We can't assume that it is :1 because another link-local address
1647  * may have been assigned there.
1648  */
1649 /*ARGSUSED*/
1650 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1651 int v, atype;
1652 void *ifptr;
1653 struct in_addr  *inp, *inpmask;
1654 ipf_stack_t *ifs;
1655 {
1656 	struct sockaddr_in6 v6addr[2];
1657 	struct sockaddr_in v4addr[2];
1658 	net_ifaddr_t type[2];
1659 	net_handle_t net_data;
1660 	phy_if_t phyif;
1661 	void *array;
1662 
1663 	switch (v)
1664 	{
1665 	case 4:
1666 		net_data = ifs->ifs_ipf_ipv4;
1667 		array = v4addr;
1668 		break;
1669 	case 6:
1670 		net_data = ifs->ifs_ipf_ipv6;
1671 		array = v6addr;
1672 		break;
1673 	default:
1674 		net_data = NULL;
1675 		break;
1676 	}
1677 
1678 	if (net_data == NULL)
1679 		return -1;
1680 
1681 	phyif = (phy_if_t)ifptr;
1682 
1683 	switch (atype)
1684 	{
1685 	case FRI_PEERADDR :
1686 		type[0] = NA_PEER;
1687 		break;
1688 
1689 	case FRI_BROADCAST :
1690 		type[0] = NA_BROADCAST;
1691 		break;
1692 
1693 	default :
1694 		type[0] = NA_ADDRESS;
1695 		break;
1696 	}
1697 
1698 	type[1] = NA_NETMASK;
1699 
1700 	if (v == 6) {
1701 		lif_if_t idx = 0;
1702 
1703 		do {
1704 			idx = net_lifgetnext(net_data, phyif, idx);
1705 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1706 					   array) < 0)
1707 				return -1;
1708 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1709 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1710 				break;
1711 		} while (idx != 0);
1712 
1713 		if (idx == 0)
1714 			return -1;
1715 
1716 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1717 					inp, inpmask);
1718 	}
1719 
1720 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1721 		return -1;
1722 
1723 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1724 }
1725 
1726 
1727 u_32_t fr_newisn(fin)
1728 fr_info_t *fin;
1729 {
1730 	static int iss_seq_off = 0;
1731 	u_char hash[16];
1732 	u_32_t newiss;
1733 	MD5_CTX ctx;
1734 	ipf_stack_t *ifs = fin->fin_ifs;
1735 
1736 	/*
1737 	 * Compute the base value of the ISS.  It is a hash
1738 	 * of (saddr, sport, daddr, dport, secret).
1739 	 */
1740 	MD5Init(&ctx);
1741 
1742 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1743 		  sizeof(fin->fin_fi.fi_src));
1744 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1745 		  sizeof(fin->fin_fi.fi_dst));
1746 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1747 
1748 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1749 
1750 	MD5Final(hash, &ctx);
1751 
1752 	bcopy(hash, &newiss, sizeof(newiss));
1753 
1754 	/*
1755 	 * Now increment our "timer", and add it in to
1756 	 * the computed value.
1757 	 *
1758 	 * XXX Use `addin'?
1759 	 * XXX TCP_ISSINCR too large to use?
1760 	 */
1761 	iss_seq_off += 0x00010000;
1762 	newiss += iss_seq_off;
1763 	return newiss;
1764 }
1765 
1766 
1767 /* ------------------------------------------------------------------------ */
1768 /* Function:    fr_nextipid                                                 */
1769 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1770 /* Parameters:  fin(I) - pointer to packet information                      */
1771 /*                                                                          */
1772 /* Returns the next IPv4 ID to use for this packet.                         */
1773 /* ------------------------------------------------------------------------ */
1774 u_short fr_nextipid(fin)
1775 fr_info_t *fin;
1776 {
1777 	static u_short ipid = 0;
1778 	u_short id;
1779 	ipf_stack_t *ifs = fin->fin_ifs;
1780 
1781 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1782 	if (fin->fin_pktnum != 0) {
1783 		id = fin->fin_pktnum & 0xffff;
1784 	} else {
1785 		id = ipid++;
1786 	}
1787 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1788 
1789 	return id;
1790 }
1791 
1792 
1793 #ifndef IPFILTER_CKSUM
1794 /* ARGSUSED */
1795 #endif
1796 INLINE void fr_checkv4sum(fin)
1797 fr_info_t *fin;
1798 {
1799 #ifdef IPFILTER_CKSUM
1800 	if (fr_checkl4sum(fin) == -1)
1801 		fin->fin_flx |= FI_BAD;
1802 #endif
1803 }
1804 
1805 
1806 #ifdef USE_INET6
1807 # ifndef IPFILTER_CKSUM
1808 /* ARGSUSED */
1809 # endif
1810 INLINE void fr_checkv6sum(fin)
1811 fr_info_t *fin;
1812 {
1813 # ifdef IPFILTER_CKSUM
1814 	if (fr_checkl4sum(fin) == -1)
1815 		fin->fin_flx |= FI_BAD;
1816 # endif
1817 }
1818 #endif /* USE_INET6 */
1819 
1820 
1821 #if (SOLARIS2 < 7)
1822 void fr_slowtimer()
1823 #else
1824 /*ARGSUSED*/
1825 void fr_slowtimer __P((void *arg))
1826 #endif
1827 {
1828 	ipf_stack_t *ifs = arg;
1829 
1830 	READ_ENTER(&ifs->ifs_ipf_global);
1831 	if (ifs->ifs_fr_running != 1) {
1832 		ifs->ifs_fr_timer_id = NULL;
1833 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1834 		return;
1835 	}
1836 	ipf_expiretokens(ifs);
1837 	fr_fragexpire(ifs);
1838 	fr_timeoutstate(ifs);
1839 	fr_natexpire(ifs);
1840 	fr_authexpire(ifs);
1841 	ifs->ifs_fr_ticks++;
1842 	if (ifs->ifs_fr_running == 1)
1843 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1844 		    drv_usectohz(500000));
1845 	else
1846 		ifs->ifs_fr_timer_id = NULL;
1847 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1848 }
1849 
1850 
1851 /* ------------------------------------------------------------------------ */
1852 /* Function:    fr_pullup                                                   */
1853 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1854 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1855 /*              fin(I) - pointer to packet information                      */
1856 /*              len(I) - number of bytes to pullup                          */
1857 /*                                                                          */
1858 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1859 /* single buffer for ease of access.  Operating system native functions are */
1860 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1861 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1862 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1863 /* and ONLY if the pullup succeeds.                                         */
1864 /*                                                                          */
1865 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1866 /* of buffers that starts at *fin->fin_mp.                                  */
1867 /* ------------------------------------------------------------------------ */
1868 void *fr_pullup(min, fin, len)
1869 mb_t *min;
1870 fr_info_t *fin;
1871 int len;
1872 {
1873 	qpktinfo_t *qpi = fin->fin_qpi;
1874 	int out = fin->fin_out, dpoff, ipoff;
1875 	mb_t *m = min, *m1, *m2;
1876 	char *ip;
1877 	uint32_t start, stuff, end, value, flags;
1878 	ipf_stack_t *ifs = fin->fin_ifs;
1879 
1880 	if (m == NULL)
1881 		return NULL;
1882 
1883 	ip = (char *)fin->fin_ip;
1884 	if ((fin->fin_flx & FI_COALESCE) != 0)
1885 		return ip;
1886 
1887 	ipoff = fin->fin_ipoff;
1888 	if (fin->fin_dp != NULL)
1889 		dpoff = (char *)fin->fin_dp - (char *)ip;
1890 	else
1891 		dpoff = 0;
1892 
1893 	if (M_LEN(m) < len + ipoff) {
1894 
1895 		/*
1896 		 * pfil_precheck ensures the IP header is on a 32bit
1897 		 * aligned address so simply fail if that isn't currently
1898 		 * the case (should never happen).
1899 		 */
1900 		int inc = 0;
1901 
1902 		if (ipoff > 0) {
1903 			if ((ipoff & 3) != 0) {
1904 				inc = 4 - (ipoff & 3);
1905 				if (m->b_rptr - inc >= m->b_datap->db_base)
1906 					m->b_rptr -= inc;
1907 				else
1908 					inc = 0;
1909 			}
1910 		}
1911 
1912 		/*
1913 		 * XXX This is here as a work around for a bug with DEBUG
1914 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1915 		 * XXX code as a way to stash the phyint_index for a packet,
1916 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1917 		 * XXX for both of these to be NULL.  See 6442390.
1918 		 */
1919 		m1 = m;
1920 		m2 = m->b_prev;
1921 
1922 		do {
1923 			m1->b_next = NULL;
1924 			m1->b_prev = NULL;
1925 			m1 = m1->b_cont;
1926 		} while (m1);
1927 
1928 		/*
1929 		 * Need to preserve checksum information by copying them
1930 		 * to newmp which heads the pulluped message.
1931 		 */
1932 		mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
1933 
1934 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1935 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1936 			FREE_MB_T(*fin->fin_mp);
1937 			*fin->fin_mp = NULL;
1938 			fin->fin_m = NULL;
1939 			fin->fin_ip = NULL;
1940 			fin->fin_dp = NULL;
1941 			qpi->qpi_data = NULL;
1942 			return NULL;
1943 		}
1944 
1945 		mac_hcksum_set(m, start, stuff, end, value, flags);
1946 
1947 		m->b_prev = m2;
1948 		m->b_rptr += inc;
1949 		fin->fin_m = m;
1950 		ip = MTOD(m, char *) + ipoff;
1951 		qpi->qpi_data = ip;
1952 	}
1953 
1954 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1955 	fin->fin_ip = (ip_t *)ip;
1956 	if (fin->fin_dp != NULL)
1957 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1958 
1959 	if (len == fin->fin_plen)
1960 		fin->fin_flx |= FI_COALESCE;
1961 	return ip;
1962 }
1963 
1964 
1965 /*
1966  * Function:	fr_verifysrc
1967  * Returns:	int (really boolean)
1968  * Parameters:	fin - packet information
1969  *
1970  * Check whether the packet has a valid source address for the interface on
1971  * which the packet arrived, implementing the "fr_chksrc" feature.
1972  * Returns true iff the packet's source address is valid.
1973  */
1974 int fr_verifysrc(fin)
1975 fr_info_t *fin;
1976 {
1977 	net_handle_t net_data_p;
1978 	phy_if_t phy_ifdata_routeto;
1979 	struct sockaddr	sin;
1980 	ipf_stack_t *ifs = fin->fin_ifs;
1981 
1982 	if (fin->fin_v == 4) {
1983 		net_data_p = ifs->ifs_ipf_ipv4;
1984 	} else if (fin->fin_v == 6) {
1985 		net_data_p = ifs->ifs_ipf_ipv6;
1986 	} else {
1987 		return (0);
1988 	}
1989 
1990 	/* Get the index corresponding to the if name */
1991 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1992 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1993 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1994 
1995 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1996 }
1997 
1998 /*
1999  * Return true only if forwarding is enabled on the interface.
2000  */
2001 static int
2002 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2003 {
2004 	lif_if_t lif;
2005 
2006 	for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2007 	    lif = net_lifgetnext(ndp, phyif, lif)) {
2008 		int res;
2009 		uint64_t flags;
2010 
2011 		res = net_getlifflags(ndp, phyif, lif, &flags);
2012 		if (res != 0)
2013 			return (0);
2014 		if (flags & IFF_ROUTER)
2015 			return (1);
2016 	}
2017 
2018 	return (0);
2019 }
2020 
2021 /*
2022  * Function:	fr_fastroute
2023  * Returns:	 0: success;
2024  *		-1: failed
2025  * Parameters:
2026  *	mb: the message block where ip head starts
2027  *	mpp: the pointer to the pointer of the orignal
2028  *		packet message
2029  *	fin: packet information
2030  *	fdp: destination interface information
2031  *	if it is NULL, no interface information provided.
2032  *
2033  * This function is for fastroute/to/dup-to rules. It calls
2034  * pfil_make_lay2_packet to search route, make lay-2 header
2035  * ,and identify output queue for the IP packet.
2036  * The destination address depends on the following conditions:
2037  * 1: for fastroute rule, fdp is passed in as NULL, so the
2038  *	destination address is the IP Packet's destination address
2039  * 2: for to/dup-to rule, if an ip address is specified after
2040  *	the interface name, this address is the as destination
2041  *	address. Otherwise IP Packet's destination address is used
2042  */
2043 int fr_fastroute(mb, mpp, fin, fdp)
2044 mblk_t *mb, **mpp;
2045 fr_info_t *fin;
2046 frdest_t *fdp;
2047 {
2048         net_handle_t net_data_p;
2049 	net_inject_t *inj;
2050 	mblk_t *mp = NULL;
2051 	frentry_t *fr = fin->fin_fr;
2052 	qpktinfo_t *qpi;
2053 	ip_t *ip;
2054 
2055 	struct sockaddr_in *sin;
2056 	struct sockaddr_in6 *sin6;
2057 	struct sockaddr *sinp;
2058 	ipf_stack_t *ifs = fin->fin_ifs;
2059 #ifndef	sparc
2060 	u_short __iplen, __ipoff;
2061 #endif
2062 
2063 	if (fin->fin_v == 4) {
2064 		net_data_p = ifs->ifs_ipf_ipv4;
2065 	} else if (fin->fin_v == 6) {
2066 		net_data_p = ifs->ifs_ipf_ipv6;
2067 	} else {
2068 		return (-1);
2069 	}
2070 
2071 	/* Check the src here, fin_ifp is the src interface. */
2072 	if (!fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
2073 		return (-1);
2074 
2075 	inj = net_inject_alloc(NETINFO_VERSION);
2076 	if (inj == NULL)
2077 		return -1;
2078 
2079 	ip = fin->fin_ip;
2080 	qpi = fin->fin_qpi;
2081 
2082 	/*
2083 	 * If this is a duplicate mblk then we want ip to point at that
2084 	 * data, not the original, if and only if it is already pointing at
2085 	 * the current mblk data.
2086 	 *
2087 	 * Otherwise, if it's not a duplicate, and we're not already pointing
2088 	 * at the current mblk data, then we want to ensure that the data
2089 	 * points at ip.
2090 	 */
2091 
2092 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2093 		ip = (ip_t *)mb->b_rptr;
2094 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2095 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
2096 		qpi->qpi_off = 0;
2097 	}
2098 
2099 	/*
2100 	 * If there is another M_PROTO, we don't want it
2101 	 */
2102 	if (*mpp != mb) {
2103 		mp = unlinkb(*mpp);
2104 		freeb(*mpp);
2105 		*mpp = mp;
2106 	}
2107 
2108 	sinp = (struct sockaddr *)&inj->ni_addr;
2109 	sin = (struct sockaddr_in *)sinp;
2110 	sin6 = (struct sockaddr_in6 *)sinp;
2111 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2112 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2113 	inj->ni_packet = mb;
2114 
2115 	/*
2116 	 * In case we're here due to "to <if>" being used with
2117 	 * "keep state", check that we're going in the correct
2118 	 * direction.
2119 	 */
2120 	if (fdp != NULL) {
2121 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2122 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2123 			goto bad_fastroute;
2124 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2125 		if (fin->fin_v == 4) {
2126 			sin->sin_addr = fdp->fd_ip;
2127 		} else {
2128 			sin6->sin6_addr = fdp->fd_ip6.in6;
2129 		}
2130 	} else {
2131 		if (fin->fin_v == 4) {
2132 			sin->sin_addr = ip->ip_dst;
2133 		} else {
2134 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2135 		}
2136 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2137 	}
2138 
2139 	/* we're checking the destinatation here */
2140 	if (!fr_forwarding_enabled(inj->ni_physical, net_data_p))
2141 		goto bad_fastroute;
2142 
2143 	/*
2144 	 * Clear the hardware checksum flags from packets that we are doing
2145 	 * input processing on as leaving them set will cause the outgoing
2146 	 * NIC (if it supports hardware checksum) to calculate them anew,
2147 	 * using the old (correct) checksums as the pseudo value to start
2148 	 * from.
2149 	 */
2150 	if (fin->fin_out == 0) {
2151 		DB_CKSUMFLAGS(mb) = 0;
2152 	}
2153 
2154 	*mpp = mb;
2155 
2156 	if (fin->fin_out == 0) {
2157 		void *saveifp;
2158 		u_32_t pass;
2159 
2160 		saveifp = fin->fin_ifp;
2161 		fin->fin_ifp = (void *)inj->ni_physical;
2162 		fin->fin_flx &= ~FI_STATE;
2163 		fin->fin_out = 1;
2164 		(void) fr_acctpkt(fin, &pass);
2165 		fin->fin_fr = NULL;
2166 		if (!fr || !(fr->fr_flags & FR_RETMASK))
2167 			(void) fr_checkstate(fin, &pass);
2168 		if (fr_checknatout(fin, NULL) == -1)
2169 			goto bad_fastroute;
2170 		fin->fin_out = 0;
2171 		fin->fin_ifp = saveifp;
2172 	}
2173 #ifndef	sparc
2174 	if (fin->fin_v == 4) {
2175 		__iplen = (u_short)ip->ip_len,
2176 		__ipoff = (u_short)ip->ip_off;
2177 
2178 		ip->ip_len = htons(__iplen);
2179 		ip->ip_off = htons(__ipoff);
2180 	}
2181 #endif
2182 
2183 	if (net_data_p) {
2184 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2185 			net_inject_free(inj);
2186 			return (-1);
2187 		}
2188 	}
2189 
2190 	ifs->ifs_fr_frouteok[0]++;
2191 	net_inject_free(inj);
2192 	return 0;
2193 bad_fastroute:
2194 	net_inject_free(inj);
2195 	freemsg(mb);
2196 	ifs->ifs_fr_frouteok[1]++;
2197 	return -1;
2198 }
2199 
2200 
2201 /* ------------------------------------------------------------------------ */
2202 /* Function:    ipf_hook4_out                                               */
2203 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2204 /* Parameters:  event(I)     - pointer to event                             */
2205 /*              info(I)      - pointer to hook information for firewalling  */
2206 /*                                                                          */
2207 /* Calling ipf_hook.                                                        */
2208 /* ------------------------------------------------------------------------ */
2209 /*ARGSUSED*/
2210 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2211 {
2212 	return ipf_hook(info, 1, 0, arg);
2213 }
2214 /*ARGSUSED*/
2215 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2216 {
2217 	return ipf_hook6(info, 1, 0, arg);
2218 }
2219 
2220 /* ------------------------------------------------------------------------ */
2221 /* Function:    ipf_hook4_in                                                */
2222 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2223 /* Parameters:  event(I)     - pointer to event                             */
2224 /*              info(I)      - pointer to hook information for firewalling  */
2225 /*                                                                          */
2226 /* Calling ipf_hook.                                                        */
2227 /* ------------------------------------------------------------------------ */
2228 /*ARGSUSED*/
2229 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2230 {
2231 	return ipf_hook(info, 0, 0, arg);
2232 }
2233 /*ARGSUSED*/
2234 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2235 {
2236 	return ipf_hook6(info, 0, 0, arg);
2237 }
2238 
2239 
2240 /* ------------------------------------------------------------------------ */
2241 /* Function:    ipf_hook4_loop_out                                          */
2242 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2243 /* Parameters:  event(I)     - pointer to event                             */
2244 /*              info(I)      - pointer to hook information for firewalling  */
2245 /*                                                                          */
2246 /* Calling ipf_hook.                                                        */
2247 /* ------------------------------------------------------------------------ */
2248 /*ARGSUSED*/
2249 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2250 {
2251 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
2252 }
2253 /*ARGSUSED*/
2254 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2255 {
2256 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2257 }
2258 
2259 /* Static constants used by ipf_hook_ether */
2260 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2261 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2262 };
2263 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2264 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2265 
2266 /* ------------------------------------------------------------------------ */
2267 /* Function:	ipf_hook_ether                                              */
2268 /* Returns:	int - 0 == packet ok, else problem, free packet if not done */
2269 /* Parameters:	token(I)     - pointer to event                             */
2270 /*              info(I)      - pointer to hook information for firewalling  */
2271 /*                                                                          */
2272 /* The ipf_hook_ether hook is currently private to illumos.  It represents  */
2273 /* a layer 2 datapath generally used by virtual machines.  Currently the    */
2274 /* hook is only used by the viona driver to pass along L2 frames for        */
2275 /* inspection.  It requires that the L2 ethernet header is contained within */
2276 /* a single dblk_t (however layers above the L2 header have no restrctions  */
2277 /* in ipf).  ipf does not currently support filtering on L2 fields (e.g.    */
2278 /* filtering on a MAC address or ethertype), however virtual machines do    */
2279 /* not have native IP stack instances where ipf traditionally hooks in.     */
2280 /* Instead this entry point is used to determine if the packet is unicast,  */
2281 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the   */
2282 /* traditional ip hooks for filtering.  Non IPv4 or non IPv6 packets are    */
2283 /* not subject to examination.                                              */
2284 /* ------------------------------------------------------------------------ */
2285 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2286     boolean_t out)
2287 {
2288 	struct ether_header *ethp;
2289 	hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2290 	mblk_t *mp;
2291 	size_t offset, len;
2292 	uint16_t etype;
2293 	boolean_t v6;
2294 
2295 	/*
2296 	 * viona will only pass us mblks with the L2 header contained in a
2297 	 * single data block.
2298 	 */
2299 	mp = *hpe->hpe_mp;
2300 	len = MBLKL(mp);
2301 
2302 	VERIFY3S(len, >=, sizeof (struct ether_header));
2303 
2304 	ethp = (struct ether_header *)mp->b_rptr;
2305 	if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2306 		struct ether_vlan_header *evh =
2307 		    (struct ether_vlan_header *)ethp;
2308 
2309 		VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2310 
2311 		etype = ntohs(evh->ether_type);
2312 		offset = sizeof (*evh);
2313 	} else {
2314 		offset = sizeof (*ethp);
2315 	}
2316 
2317 	/*
2318 	 * ipf only support filtering IPv4 and IPv6.  Ignore other types.
2319 	 */
2320 	if (etype == ETHERTYPE_IP)
2321 		v6 = B_FALSE;
2322 	else if (etype == ETHERTYPE_IPV6)
2323 		v6 = B_TRUE;
2324 	else
2325 		return (0);
2326 
2327 	if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2328 		hpe->hpe_flags |= HPE_BROADCAST;
2329 	else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2330 	    sizeof (ipf_eth_ipv4_mcast)) == 0)
2331 		hpe->hpe_flags |= HPE_MULTICAST;
2332 	else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2333 	    sizeof (ipf_eth_ipv6_mcast)) == 0)
2334 		hpe->hpe_flags |= HPE_MULTICAST;
2335 
2336 	/* Find the start of the IPv4 or IPv6 header */
2337 	for (; offset >= len; len = MBLKL(mp)) {
2338 		offset -= len;
2339 		mp = mp->b_cont;
2340 		if (mp == NULL) {
2341 			freemsg(*hpe->hpe_mp);
2342 			*hpe->hpe_mp = NULL;
2343 			return (-1);
2344 		}
2345 	}
2346 	hpe->hpe_mb = mp;
2347 	hpe->hpe_hdr = mp->b_rptr + offset;
2348 
2349 	return (v6 ? ipf_hook6(info, out, 0, arg) :
2350 	    ipf_hook(info, out, 0, arg));
2351 }
2352 
2353 /* ------------------------------------------------------------------------ */
2354 /* Function:    ipf_hookviona_{in,out}                                      */
2355 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2356 /* Parameters:  event(I)     - pointer to event                             */
2357 /*              info(I)      - pointer to hook information for firewalling  */
2358 /*                                                                          */
2359 /* The viona hooks are private hooks to illumos. They represents a layer 2  */
2360 /* datapath generally used to implement virtual machines.                   */
2361 /* along L2 packets.                                                        */
2362 /*                                                                          */
2363 /* They end up calling the appropriate traditional ip hooks.                */
2364 /* ------------------------------------------------------------------------ */
2365 int
2366 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2367 {
2368 	return (ipf_hook_ether(token, info, arg, B_FALSE));
2369 }
2370 
2371 int
2372 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2373 {
2374 	return (ipf_hook_ether(token, info, arg, B_TRUE));
2375 }
2376 
2377 /* ------------------------------------------------------------------------ */
2378 /* Function:    ipf_hook4_loop_in                                           */
2379 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2380 /* Parameters:  event(I)     - pointer to event                             */
2381 /*              info(I)      - pointer to hook information for firewalling  */
2382 /*                                                                          */
2383 /* Calling ipf_hook.                                                        */
2384 /* ------------------------------------------------------------------------ */
2385 /*ARGSUSED*/
2386 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2387 {
2388 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
2389 }
2390 /*ARGSUSED*/
2391 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2392 {
2393 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2394 }
2395 
2396 /* ------------------------------------------------------------------------ */
2397 /* Function:    ipf_hook                                                    */
2398 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2399 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
2400 /*              out(I)       - whether packet is going in or out            */
2401 /*              loopback(I)  - whether packet is a loopback packet or not   */
2402 /*                                                                          */
2403 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
2404 /* parameters out of the info structure and forms them up to be useful for  */
2405 /* calling ipfilter.                                                        */
2406 /* ------------------------------------------------------------------------ */
2407 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2408 {
2409 	hook_pkt_event_t *fw;
2410 	ipf_stack_t *ifs;
2411 	qpktinfo_t qpi;
2412 	int rval, hlen;
2413 	u_short swap;
2414 	phy_if_t phy;
2415 	ip_t *ip;
2416 
2417 	ifs = arg;
2418 	fw = (hook_pkt_event_t *)info;
2419 
2420 	ASSERT(fw != NULL);
2421 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2422 
2423 	ip = fw->hpe_hdr;
2424 	swap = ntohs(ip->ip_len);
2425 	ip->ip_len = swap;
2426 	swap = ntohs(ip->ip_off);
2427 	ip->ip_off = swap;
2428 	hlen = IPH_HDR_LENGTH(ip);
2429 
2430 	qpi.qpi_m = fw->hpe_mb;
2431 	qpi.qpi_data = fw->hpe_hdr;
2432 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2433 	qpi.qpi_ill = (void *)phy;
2434 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2435 	if (qpi.qpi_flags)
2436 		qpi.qpi_flags |= FI_MBCAST;
2437 	qpi.qpi_flags |= loopback;
2438 
2439 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2440 	    &qpi, fw->hpe_mp, ifs);
2441 
2442 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2443 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2444 		rval = 1;
2445 
2446 	/* Notify IP the packet mblk_t and IP header pointers. */
2447 	fw->hpe_mb = qpi.qpi_m;
2448 	fw->hpe_hdr = qpi.qpi_data;
2449 	if (rval == 0) {
2450 		ip = qpi.qpi_data;
2451 		swap = ntohs(ip->ip_len);
2452 		ip->ip_len = swap;
2453 		swap = ntohs(ip->ip_off);
2454 		ip->ip_off = swap;
2455 	}
2456 	return rval;
2457 
2458 }
2459 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2460 {
2461 	hook_pkt_event_t *fw;
2462 	int rval, hlen;
2463 	qpktinfo_t qpi;
2464 	phy_if_t phy;
2465 
2466 	fw = (hook_pkt_event_t *)info;
2467 
2468 	ASSERT(fw != NULL);
2469 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2470 
2471 	hlen = sizeof (ip6_t);
2472 
2473 	qpi.qpi_m = fw->hpe_mb;
2474 	qpi.qpi_data = fw->hpe_hdr;
2475 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2476 	qpi.qpi_ill = (void *)phy;
2477 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2478 	if (qpi.qpi_flags)
2479 		qpi.qpi_flags |= FI_MBCAST;
2480 	qpi.qpi_flags |= loopback;
2481 
2482 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2483 	    &qpi, fw->hpe_mp, arg);
2484 
2485 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2486 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2487 		rval = 1;
2488 
2489 	/* Notify IP the packet mblk_t and IP header pointers. */
2490 	fw->hpe_mb = qpi.qpi_m;
2491 	fw->hpe_hdr = qpi.qpi_data;
2492 	return rval;
2493 }
2494 
2495 
2496 /* ------------------------------------------------------------------------ */
2497 /* Function:    ipf_nic_event_v4                                            */
2498 /* Returns:     int - 0 == no problems encountered                          */
2499 /* Parameters:  event(I)     - pointer to event                             */
2500 /*              info(I)      - pointer to information about a NIC event     */
2501 /*                                                                          */
2502 /* Function to receive asynchronous NIC events from IP                      */
2503 /* ------------------------------------------------------------------------ */
2504 /*ARGSUSED*/
2505 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2506 {
2507 	struct sockaddr_in *sin;
2508 	hook_nic_event_t *hn;
2509 	ipf_stack_t *ifs = arg;
2510 	void *new_ifp = NULL;
2511 
2512 	if (ifs->ifs_fr_running <= 0)
2513 		return (0);
2514 
2515 	hn = (hook_nic_event_t *)info;
2516 
2517 	switch (hn->hne_event)
2518 	{
2519 	case NE_PLUMB :
2520 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2521 		       ifs);
2522 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2523 			      hn->hne_data, ifs);
2524 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2525 			     hn->hne_data, ifs);
2526 		break;
2527 
2528 	case NE_UNPLUMB :
2529 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2530 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2531 			      ifs);
2532 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2533 		break;
2534 
2535 	case NE_ADDRESS_CHANGE :
2536 		/*
2537 		 * We only respond to events for logical interface 0 because
2538 		 * IPFilter only uses the first address given to a network
2539 		 * interface.  We check for hne_lif==1 because the netinfo
2540 		 * code maps adds 1 to the lif number so that it can return
2541 		 * 0 to indicate "no more lifs" when walking them.
2542 		 */
2543 		if (hn->hne_lif == 1) {
2544 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2545 			    ifs);
2546 			sin = hn->hne_data;
2547 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2548 			    ifs);
2549 		}
2550 		break;
2551 
2552 #if SOLARIS2 >= 10
2553 	case NE_IFINDEX_CHANGE :
2554 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2555 
2556 		if (hn->hne_data != NULL) {
2557 			/*
2558 			 * The netinfo passes interface index as int (hne_data should be
2559 			 * handled as a pointer to int), which is always 32bit. We need to
2560 			 * convert it to void pointer here, since interfaces are
2561 			 * represented as pointers to void in IPF. The pointers are 64 bits
2562 			 * long on 64bit platforms. Doing something like
2563 			 *	(void *)((int) x)
2564 			 * will throw warning:
2565 			 *   "cast to pointer from integer of different size"
2566 			 * during 64bit compilation.
2567 			 *
2568 			 * The line below uses (size_t) to typecast int to
2569 			 * size_t, which might be 64bit/32bit (depending
2570 			 * on architecture). Once we have proper 64bit/32bit
2571 			 * type (size_t), we can safely convert it to void pointer.
2572 			 */
2573 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2574 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2575 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2576 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2577 		}
2578 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2579 		break;
2580 #endif
2581 
2582 	default :
2583 		break;
2584 	}
2585 
2586 	return 0;
2587 }
2588 
2589 
2590 /* ------------------------------------------------------------------------ */
2591 /* Function:    ipf_nic_event_v6                                            */
2592 /* Returns:     int - 0 == no problems encountered                          */
2593 /* Parameters:  event(I)     - pointer to event                             */
2594 /*              info(I)      - pointer to information about a NIC event     */
2595 /*                                                                          */
2596 /* Function to receive asynchronous NIC events from IP                      */
2597 /* ------------------------------------------------------------------------ */
2598 /*ARGSUSED*/
2599 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2600 {
2601 	struct sockaddr_in6 *sin6;
2602 	hook_nic_event_t *hn;
2603 	ipf_stack_t *ifs = arg;
2604 	void *new_ifp = NULL;
2605 
2606 	if (ifs->ifs_fr_running <= 0)
2607 		return (0);
2608 
2609 	hn = (hook_nic_event_t *)info;
2610 
2611 	switch (hn->hne_event)
2612 	{
2613 	case NE_PLUMB :
2614 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2615 		       hn->hne_data, ifs);
2616 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2617 			      hn->hne_data, ifs);
2618 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2619 			     hn->hne_data, ifs);
2620 		break;
2621 
2622 	case NE_UNPLUMB :
2623 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2624 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2625 			      ifs);
2626 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2627 		break;
2628 
2629 	case NE_ADDRESS_CHANGE :
2630 		if (hn->hne_lif == 1) {
2631 			sin6 = hn->hne_data;
2632 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2633 				       ifs);
2634 		}
2635 		break;
2636 
2637 #if SOLARIS2 >= 10
2638 	case NE_IFINDEX_CHANGE :
2639 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2640 		if (hn->hne_data != NULL) {
2641 			/*
2642 			 * The netinfo passes interface index as int (hne_data should be
2643 			 * handled as a pointer to int), which is always 32bit. We need to
2644 			 * convert it to void pointer here, since interfaces are
2645 			 * represented as pointers to void in IPF. The pointers are 64 bits
2646 			 * long on 64bit platforms. Doing something like
2647 			 *	(void *)((int) x)
2648 			 * will throw warning:
2649 			 *   "cast to pointer from integer of different size"
2650 			 * during 64bit compilation.
2651 			 *
2652 			 * The line below uses (size_t) to typecast int to
2653 			 * size_t, which might be 64bit/32bit (depending
2654 			 * on architecture). Once we have proper 64bit/32bit
2655 			 * type (size_t), we can safely convert it to void pointer.
2656 			 */
2657 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2658 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2659 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2660 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2661 		}
2662 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2663 		break;
2664 #endif
2665 
2666 	default :
2667 		break;
2668 	}
2669 
2670 	return 0;
2671 }
2672 
2673 /*
2674  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2675  * are needed in Solaris kernel only. We don't need them in
2676  * ipftest to pretend the ICMP/RST packet was sent as a response.
2677  */
2678 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2679 /* ------------------------------------------------------------------------ */
2680 /* Function:    fr_make_rst                                                 */
2681 /* Returns:     int - 0 on success, -1 on failure			    */
2682 /* Parameters:  fin(I) - pointer to packet information                      */
2683 /*                                                                          */
2684 /* We must alter the original mblks passed to IPF from IP stack via	    */
2685 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2686 /* IPF can basicaly do only these things with mblk representing the packet: */
2687 /*	leave it as it is (pass the packet)				    */
2688 /*                                                                          */
2689 /*	discard it (block the packet)					    */
2690 /*                                                                          */
2691 /*	alter it (i.e. NAT)						    */
2692 /*                                                                          */
2693 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2694 /* instead to IP stack via FW_HOOKS.					    */
2695 /*                                                                          */
2696 /* The return-rst action for packets coming via NIC is handled as follows:  */
2697 /*	mblk with packet is discarded					    */
2698 /*                                                                          */
2699 /*	new mblk with RST response is constructed and injected to network   */
2700 /*                                                                          */
2701 /* IPF can't inject packets to loopback interface, this is just another	    */
2702 /* limitation we have to deal with here. The only option to send RST	    */
2703 /* response to offending TCP packet coming via loopback is to alter it.	    */
2704 /*									    */
2705 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2706 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2707 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2708 /* ------------------------------------------------------------------------ */
2709 int fr_make_rst(fin)
2710 fr_info_t *fin;
2711 {
2712 	uint16_t tmp_port;
2713 	int rv = -1;
2714 	uint32_t old_ack;
2715 	tcphdr_t *tcp = NULL;
2716 	struct in_addr tmp_src;
2717 #ifdef USE_INET6
2718 	struct in6_addr	tmp_src6;
2719 #endif
2720 
2721 	ASSERT(fin->fin_p == IPPROTO_TCP);
2722 
2723 	/*
2724 	 * We do not need to adjust chksum, since it is not being checked by
2725 	 * Solaris IP stack for loopback clients.
2726 	 */
2727 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2728 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2729 
2730 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2731 			/* Swap IPv4 addresses. */
2732 			tmp_src = fin->fin_ip->ip_src;
2733 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2734 			fin->fin_ip->ip_dst = tmp_src;
2735 
2736 			rv = 0;
2737 		}
2738 		else
2739 			tcp = NULL;
2740 	}
2741 #ifdef USE_INET6
2742 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2743 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2744 		/*
2745 		 * We are relying on fact the next header is TCP, which is true
2746 		 * for regular TCP packets coming in over loopback.
2747 		 */
2748 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2749 			/* Swap IPv6 addresses. */
2750 			tmp_src6 = fin->fin_ip6->ip6_src;
2751 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2752 			fin->fin_ip6->ip6_dst = tmp_src6;
2753 
2754 			rv = 0;
2755 		}
2756 		else
2757 			tcp = NULL;
2758 	}
2759 #endif
2760 
2761 	if (tcp != NULL) {
2762 		/*
2763 		 * Adjust TCP header:
2764 		 *	swap ports,
2765 		 *	set flags,
2766 		 *	set correct ACK number
2767 		 */
2768 		tmp_port = tcp->th_sport;
2769 		tcp->th_sport = tcp->th_dport;
2770 		tcp->th_dport = tmp_port;
2771 		old_ack = tcp->th_ack;
2772 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2773 		tcp->th_seq = old_ack;
2774 		tcp->th_flags = TH_RST | TH_ACK;
2775 	}
2776 
2777 	return (rv);
2778 }
2779 
2780 /* ------------------------------------------------------------------------ */
2781 /* Function:    fr_make_icmp_v4                                             */
2782 /* Returns:     int - 0 on success, -1 on failure			    */
2783 /* Parameters:  fin(I) - pointer to packet information                      */
2784 /*                                                                          */
2785 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2786 /* what is going to happen here and why. Once you read the comment there,   */
2787 /* continue here with next paragraph.					    */
2788 /*									    */
2789 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2790 /* happen here:								    */
2791 /*	(1) Original mblk is copied (duplicated).			    */
2792 /*                                                                          */
2793 /*	(2) ICMP header is created.					    */
2794 /*                                                                          */
2795 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2796 /*	    data ready then.						    */
2797 /*                                                                          */
2798 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2799 /*                                                                          */
2800 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2801 /*	    header only and ICMP chksum is computed.			    */
2802 /*                                                                          */
2803 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2804 /*	    which now contains new IP header. If original packet was spread */
2805 /*	    over several mblks, only the first mblk is kept.		    */
2806 /* ------------------------------------------------------------------------ */
2807 static int fr_make_icmp_v4(fin)
2808 fr_info_t *fin;
2809 {
2810 	struct in_addr tmp_src;
2811 	tcphdr_t *tcp;
2812 	struct icmp *icmp;
2813 	mblk_t *mblk_icmp;
2814 	mblk_t *mblk_ip;
2815 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2816 	size_t orig_iphdr_len;	/* length of IP header only */
2817 	uint32_t sum;
2818 	uint16_t *buf;
2819 	int len;
2820 
2821 
2822 	if (fin->fin_v != 4)
2823 		return (-1);
2824 
2825 	/*
2826 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2827 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2828 	 */
2829 	tcp = (tcphdr_t *) fin->fin_dp;
2830 
2831 	if ((fin->fin_p == IPPROTO_TCP) &&
2832 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2833 		return (-1);
2834 
2835 	/*
2836 	 * Step (1)
2837 	 *
2838 	 * Make copy of original mblk.
2839 	 *
2840 	 * We want to copy as much data as necessary, not less, not more.  The
2841 	 * ICMPv4 payload length for unreachable messages is:
2842 	 *	original IP header + 8 bytes of L4 (if there are any).
2843 	 *
2844 	 * We determine if there are at least 8 bytes of L4 data following IP
2845 	 * header first.
2846 	 */
2847 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2848 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2849 	/*
2850 	 * Since we don't want to copy more data than necessary, we must trim
2851 	 * the original mblk here.  The right way (STREAMish) would be to use
2852 	 * adjmsg() to trim it.  However we would have to calculate the length
2853 	 * argument for adjmsg() from pointers we already have here.
2854 	 *
2855 	 * Since we have pointers and offsets, it's faster and easier for
2856 	 * us to just adjust pointers by hand instead of using adjmsg().
2857 	 */
2858 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2859 	fin->fin_m->b_wptr += icmp_pld_len;
2860 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2861 
2862 	/*
2863 	 * Also we don't want to copy any L2 stuff, which might precede IP
2864 	 * header, so we have have to set b_rptr to point to the start of IP
2865 	 * header.
2866 	 */
2867 	fin->fin_m->b_rptr += fin->fin_ipoff;
2868 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2869 		return (-1);
2870 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2871 
2872 	/*
2873 	 * Step (2)
2874 	 *
2875 	 * Create an ICMP header, which will be appened to original mblk later.
2876 	 * ICMP header is just another mblk.
2877 	 */
2878 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2879 	if (mblk_icmp == NULL) {
2880 		FREE_MB_T(mblk_ip);
2881 		return (-1);
2882 	}
2883 
2884 	MTYPE(mblk_icmp) = M_DATA;
2885 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2886 	icmp->icmp_type = ICMP_UNREACH;
2887 	icmp->icmp_code = fin->fin_icode & 0xFF;
2888 	icmp->icmp_void = 0;
2889 	icmp->icmp_cksum = 0;
2890 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2891 
2892 	/*
2893 	 * Step (3)
2894 	 *
2895 	 * Complete ICMP packet - link ICMP header with L4 data from original
2896 	 * IP packet.
2897 	 */
2898 	linkb(mblk_icmp, mblk_ip);
2899 
2900 	/*
2901 	 * Step (4)
2902 	 *
2903 	 * Swap IP addresses and change IP header fields accordingly in
2904 	 * original IP packet.
2905 	 *
2906 	 * There is a rule option return-icmp as a dest for physical
2907 	 * interfaces. This option becomes useless for loopback, since IPF box
2908 	 * uses same address as a loopback destination. We ignore the option
2909 	 * here, the ICMP packet will always look like as it would have been
2910 	 * sent from the original destination host.
2911 	 */
2912 	tmp_src = fin->fin_ip->ip_src;
2913 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2914 	fin->fin_ip->ip_dst = tmp_src;
2915 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2916 	fin->fin_ip->ip_sum = 0;
2917 
2918 	/*
2919 	 * Step (5)
2920 	 *
2921 	 * We trim the orignal mblk to hold IP header only.
2922 	 */
2923 	fin->fin_m->b_wptr = fin->fin_dp;
2924 	orig_iphdr_len = fin->fin_m->b_wptr -
2925 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2926 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2927 			    orig_iphdr_len);
2928 
2929 	/*
2930 	 * ICMP chksum calculation. The data we are calculating chksum for are
2931 	 * spread over two mblks, therefore we have to use two for loops.
2932 	 *
2933 	 * First for loop computes chksum part for ICMP header.
2934 	 */
2935 	buf = (uint16_t *) icmp;
2936 	len = ICMPERR_ICMPHLEN;
2937 	for (sum = 0; len > 1; len -= 2)
2938 		sum += *buf++;
2939 
2940 	/*
2941 	 * Here we add chksum part for ICMP payload.
2942 	 */
2943 	len = icmp_pld_len;
2944 	buf = (uint16_t *) mblk_ip->b_rptr;
2945 	for (; len > 1; len -= 2)
2946 		sum += *buf++;
2947 
2948 	/*
2949 	 * Chksum is done.
2950 	 */
2951 	sum = (sum >> 16) + (sum & 0xffff);
2952 	sum += (sum >> 16);
2953 	icmp->icmp_cksum = ~sum;
2954 
2955 	/*
2956 	 * Step (6)
2957 	 *
2958 	 * Release all packet mblks, except the first one.
2959 	 */
2960 	if (fin->fin_m->b_cont != NULL) {
2961 		FREE_MB_T(fin->fin_m->b_cont);
2962 	}
2963 
2964 	/*
2965 	 * Append ICMP payload to first mblk, which already contains new IP
2966 	 * header.
2967 	 */
2968 	linkb(fin->fin_m, mblk_icmp);
2969 
2970 	return (0);
2971 }
2972 
2973 #ifdef USE_INET6
2974 /* ------------------------------------------------------------------------ */
2975 /* Function:    fr_make_icmp_v6                                             */
2976 /* Returns:     int - 0 on success, -1 on failure			    */
2977 /* Parameters:  fin(I) - pointer to packet information                      */
2978 /*									    */
2979 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2980 /* what and why is going to happen here. Once you read the comment there,   */
2981 /* continue here with next paragraph.					    */
2982 /*									    */
2983 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2984 /* The algorithm is fairly simple:					    */
2985 /*	1) We need to get copy of complete mblk.			    */
2986 /*									    */
2987 /*	2) New ICMPv6 header is created.				    */
2988 /*									    */
2989 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2990 /*	   header.							    */
2991 /*									    */
2992 /*	4) The checksum must be adjusted.				    */
2993 /*									    */
2994 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2995 /*	   are adjusted (protocol number).				    */
2996 /*									    */
2997 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2998 /*	   linked with the ICMPv6 data we got from (3).			    */
2999 /* ------------------------------------------------------------------------ */
3000 static int fr_make_icmp_v6(fin)
3001 fr_info_t *fin;
3002 {
3003 	struct icmp6_hdr *icmp6;
3004 	tcphdr_t *tcp;
3005 	struct in6_addr	tmp_src6;
3006 	size_t icmp_pld_len;
3007 	mblk_t *mblk_ip, *mblk_icmp;
3008 
3009 	if (fin->fin_v != 6)
3010 		return (-1);
3011 
3012 	/*
3013 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3014 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
3015 	 */
3016 	tcp = (tcphdr_t *) fin->fin_dp;
3017 
3018 	if ((fin->fin_p == IPPROTO_TCP) &&
3019 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3020 		return (-1);
3021 
3022 	/*
3023 	 * Step (1)
3024 	 *
3025 	 * We need to copy complete packet in case of IPv6, no trimming is
3026 	 * needed (except the L2 headers).
3027 	 */
3028 	icmp_pld_len = M_LEN(fin->fin_m);
3029 	fin->fin_m->b_rptr += fin->fin_ipoff;
3030 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3031 		return (-1);
3032 	fin->fin_m->b_rptr -= fin->fin_ipoff;
3033 
3034 	/*
3035 	 * Step (2)
3036 	 *
3037 	 * Allocate and create ICMP header.
3038 	 */
3039 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3040 			BPRI_HI);
3041 
3042 	if (mblk_icmp == NULL)
3043 		return (-1);
3044 
3045 	MTYPE(mblk_icmp) = M_DATA;
3046 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
3047 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
3048 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
3049 	icmp6->icmp6_data32[0] = 0;
3050 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3051 
3052 	/*
3053 	 * Step (3)
3054 	 *
3055 	 * Link the copy of IP packet to ICMP header.
3056 	 */
3057 	linkb(mblk_icmp, mblk_ip);
3058 
3059 	/*
3060 	 * Step (4)
3061 	 *
3062 	 * Calculate chksum - this is much more easier task than in case of
3063 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
3064 	 * We are making compensation just for change of packet length.
3065 	 */
3066 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3067 
3068 	/*
3069 	 * Step (5)
3070 	 *
3071 	 * Swap IP addresses.
3072 	 */
3073 	tmp_src6 = fin->fin_ip6->ip6_src;
3074 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3075 	fin->fin_ip6->ip6_dst = tmp_src6;
3076 
3077 	/*
3078 	 * and adjust IP header data.
3079 	 */
3080 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3081 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3082 
3083 	/*
3084 	 * Step (6)
3085 	 *
3086 	 * We must release all linked mblks from original packet and keep only
3087 	 * the first mblk with IP header to link ICMP data.
3088 	 */
3089 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3090 
3091 	if (fin->fin_m->b_cont != NULL) {
3092 		FREE_MB_T(fin->fin_m->b_cont);
3093 	}
3094 
3095 	/*
3096 	 * Append ICMP payload to IP header.
3097 	 */
3098 	linkb(fin->fin_m, mblk_icmp);
3099 
3100 	return (0);
3101 }
3102 #endif	/* USE_INET6 */
3103 
3104 /* ------------------------------------------------------------------------ */
3105 /* Function:    fr_make_icmp                                                */
3106 /* Returns:     int - 0 on success, -1 on failure			    */
3107 /* Parameters:  fin(I) - pointer to packet information                      */
3108 /*                                                                          */
3109 /* We must alter the original mblks passed to IPF from IP stack via	    */
3110 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
3111 /* comment at fr_make_rst() function.					    */
3112 /*									    */
3113 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
3114 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
3115 /* protocol version. However there are some details, which are common to    */
3116 /* both IP versions. The details are going to be explained here.	    */
3117 /*                                                                          */
3118 /* The packet looks as follows:						    */
3119 /*    xxx | IP hdr | IP payload    ...	| 				    */
3120 /*    ^   ^        ^            	^				    */
3121 /*    |   |        |            	|				    */
3122 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3123 /*    |   |        |							    */
3124 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
3125 /*    |   |								    */
3126 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
3127 /*    |      of loopback)						    */
3128 /*    |   								    */
3129 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
3130 /*                                                                          */
3131 /* All relevant IP headers are pulled up into the first mblk. It happened   */
3132 /* well in advance before the matching rule was found (the rule, which took */
3133 /* us here, to fr_make_icmp() function).				    */
3134 /*                                                                          */
3135 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
3136 /* packet. New packet will be represented as chain of mblks.		    */
3137 /* orig mblk |- b_cont ---.						    */
3138 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
3139 /*    |	                          ^	            `-> duped orig mblk	    */
3140 /*    |                           |				^	    */
3141 /*    `- The original mblk        |				|	    */
3142 /*       will be trimmed to       |				|	    */
3143 /*       to contain IP header     |				|	    */
3144 /*       only                     |				|	    */
3145 /*                                |				|	    */
3146 /*                                `- This is newly		|           */
3147 /*                                   allocated mblk to		|	    */
3148 /*                                   hold ICMPv6 data.		|	    */
3149 /*								|	    */
3150 /*								|	    */
3151 /*								|	    */
3152 /*	    This is the copy of original mblk, it will contain -'	    */
3153 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
3154 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
3155 /*	    (TCP/UDP/L4) data from original packet.			    */
3156 /* ------------------------------------------------------------------------ */
3157 int fr_make_icmp(fin)
3158 fr_info_t *fin;
3159 {
3160 	int rv;
3161 
3162 	if (fin->fin_v == 4)
3163 		rv = fr_make_icmp_v4(fin);
3164 #ifdef USE_INET6
3165 	else if (fin->fin_v == 6)
3166 		rv = fr_make_icmp_v6(fin);
3167 #endif
3168 	else
3169 		rv = -1;
3170 
3171 	return (rv);
3172 }
3173 
3174 /* ------------------------------------------------------------------------ */
3175 /* Function:    fr_buf_sum						    */
3176 /* Returns:     unsigned int - sum of buffer buf			    */
3177 /* Parameters:  buf - pointer to buf we want to sum up			    */
3178 /*              len - length of buffer buf				    */
3179 /*                                                                          */
3180 /* Sums buffer buf. The result is used for chksum calculation. The buf	    */
3181 /* argument must be aligned.						    */
3182 /* ------------------------------------------------------------------------ */
3183 static uint32_t fr_buf_sum(buf, len)
3184 const void *buf;
3185 unsigned int len;
3186 {
3187 	uint32_t	sum = 0;
3188 	uint16_t	*b = (uint16_t *)buf;
3189 
3190 	while (len > 1) {
3191 		sum += *b++;
3192 		len -= 2;
3193 	}
3194 
3195 	if (len == 1)
3196 		sum += htons((*(unsigned char *)b) << 8);
3197 
3198 	return (sum);
3199 }
3200 
3201 /* ------------------------------------------------------------------------ */
3202 /* Function:    fr_calc_chksum						    */
3203 /* Returns:     void							    */
3204 /* Parameters:  fin - pointer to fr_info_t instance with packet data	    */
3205 /*              pkt - pointer to duplicated packet			    */
3206 /*                                                                          */
3207 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP	    */
3208 /* versions.								    */
3209 /* ------------------------------------------------------------------------ */
3210 void fr_calc_chksum(fin, pkt)
3211 fr_info_t *fin;
3212 mb_t *pkt;
3213 {
3214 	struct pseudo_hdr {
3215 		union {
3216 			struct in_addr	in4;
3217 #ifdef USE_INET6
3218 			struct in6_addr	in6;
3219 #endif
3220 		} src_addr;
3221 		union {
3222 			struct in_addr	in4;
3223 #ifdef USE_INET6
3224 			struct in6_addr	in6;
3225 #endif
3226 		} dst_addr;
3227 		char		zero;
3228 		char		proto;
3229 		uint16_t	len;
3230 	}	phdr;
3231 	uint32_t	sum, ip_sum;
3232 	void	*buf;
3233 	uint16_t	*l4_csum_p;
3234 	tcphdr_t	*tcp;
3235 	udphdr_t	*udp;
3236 	icmphdr_t	*icmp;
3237 #ifdef USE_INET6
3238 	struct icmp6_hdr	*icmp6;
3239 #endif
3240 	ip_t		*ip;
3241 	unsigned int	len;
3242 	int		pld_len;
3243 
3244 	/*
3245 	 * We need to pullup the packet to the single continuous buffer to avoid
3246 	 * potential misaligment of b_rptr member in mblk chain.
3247 	 */
3248 	if (pullupmsg(pkt, -1) == 0) {
3249 		cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3250 		    " will not be computed by IPF");
3251 		return;
3252 	}
3253 
3254 	/*
3255 	 * It is guaranteed IP header starts right at b_rptr, because we are
3256 	 * working with a copy of the original packet.
3257 	 *
3258 	 * Compute pseudo header chksum for TCP and UDP.
3259 	 */
3260 	if ((fin->fin_p == IPPROTO_UDP) ||
3261 	    (fin->fin_p == IPPROTO_TCP)) {
3262 		bzero(&phdr, sizeof (phdr));
3263 #ifdef USE_INET6
3264 		if (fin->fin_v == 6) {
3265 			phdr.src_addr.in6 = fin->fin_srcip6;
3266 			phdr.dst_addr.in6 = fin->fin_dstip6;
3267 		} else {
3268 			phdr.src_addr.in4 = fin->fin_src;
3269 			phdr.dst_addr.in4 = fin->fin_dst;
3270 		}
3271 #else
3272 		phdr.src_addr.in4 = fin->fin_src;
3273 		phdr.dst_addr.in4 = fin->fin_dst;
3274 #endif
3275 		phdr.zero = (char) 0;
3276 		phdr.proto = fin->fin_p;
3277 		phdr.len = htons((uint16_t)fin->fin_dlen);
3278 		sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3279 	} else {
3280 		sum = 0;
3281 	}
3282 
3283 	/*
3284 	 * Set pointer to the L4 chksum field in the packet, set buf pointer to
3285 	 * the L4 header start.
3286 	 */
3287 	switch (fin->fin_p) {
3288 		case IPPROTO_UDP:
3289 			udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3290 			l4_csum_p = &udp->uh_sum;
3291 			buf = udp;
3292 			break;
3293 		case IPPROTO_TCP:
3294 			tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3295 			l4_csum_p = &tcp->th_sum;
3296 			buf = tcp;
3297 			break;
3298 		case IPPROTO_ICMP:
3299 			icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3300 			l4_csum_p = &icmp->icmp_cksum;
3301 			buf = icmp;
3302 			break;
3303 #ifdef USE_INET6
3304 		case IPPROTO_ICMPV6:
3305 			icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3306 			l4_csum_p = &icmp6->icmp6_cksum;
3307 			buf = icmp6;
3308 			break;
3309 #endif
3310 		default:
3311 			l4_csum_p = NULL;
3312 	}
3313 
3314 	/*
3315 	 * Compute L4 chksum if needed.
3316 	 */
3317 	if (l4_csum_p != NULL) {
3318 		*l4_csum_p = (uint16_t)0;
3319 		pld_len = fin->fin_dlen;
3320 		len = pkt->b_wptr - (unsigned char *)buf;
3321 		ASSERT(len == pld_len);
3322 		/*
3323 		 * Add payload sum to pseudoheader sum.
3324 		 */
3325 		sum += fr_buf_sum(buf, len);
3326 		while (sum >> 16)
3327 			sum = (sum & 0xFFFF) + (sum >> 16);
3328 
3329 		*l4_csum_p = ~((uint16_t)sum);
3330 		DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3331 	}
3332 
3333 	/*
3334 	 * The IP header chksum is needed just for IPv4.
3335 	 */
3336 	if (fin->fin_v == 4) {
3337 		/*
3338 		 * Compute IPv4 header chksum.
3339 		 */
3340 		ip = (ip_t *)pkt->b_rptr;
3341 		ip->ip_sum = (uint16_t)0;
3342 		ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3343 		while (ip_sum >> 16)
3344 			ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3345 
3346 		ip->ip_sum = ~((uint16_t)ip_sum);
3347 		DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3348 	}
3349 
3350 	return;
3351 }
3352 
3353 #endif	/* _KERNEL && SOLARIS2 >= 10 */
3354