xref: /titanic_44/usr/src/uts/common/inet/ipf/ip_fil_solaris.c (revision 546c3aa8e92003d6d783c7172f237b41667539e2)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7  *
8  * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
9  */
10 
11 #if !defined(lint)
12 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 #endif
15 
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
20 #include <sys/open.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
25 #include <sys/cred.h>
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/ksynch.h>
29 #include <sys/kmem.h>
30 #include <sys/mkdev.h>
31 #include <sys/protosw.h>
32 #include <sys/socket.h>
33 #include <sys/dditypes.h>
34 #include <sys/cmn_err.h>
35 #include <sys/zone.h>
36 #include <net/if.h>
37 #include <net/af.h>
38 #include <net/route.h>
39 #include <netinet/in.h>
40 #include <netinet/in_systm.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip_var.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <netinet/tcpip.h>
46 #include <netinet/ip_icmp.h>
47 #include "netinet/ip_compat.h"
48 #ifdef	USE_INET6
49 # include <netinet/icmp6.h>
50 #endif
51 #include "netinet/ip_fil.h"
52 #include "netinet/ip_nat.h"
53 #include "netinet/ip_frag.h"
54 #include "netinet/ip_state.h"
55 #include "netinet/ip_auth.h"
56 #include "netinet/ip_proxy.h"
57 #include "netinet/ipf_stack.h"
58 #ifdef	IPFILTER_LOOKUP
59 # include "netinet/ip_lookup.h"
60 #endif
61 #include <inet/ip_ire.h>
62 
63 #include <sys/md5.h>
64 #include <sys/neti.h>
65 
66 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
67 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
68 static	int	fr_enableipf __P((ipf_stack_t *, int));
69 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
70 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
71 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
72 static	int	ipf_hook __P((hook_data_t, int, int, void *));
73 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
74 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
75 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
76     void *));
77 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
78 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
79 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
80 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
81 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
82     void *));
83 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
84     void *));
85 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
86 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
87 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
88 
89 #if SOLARIS2 < 10
90 #if SOLARIS2 >= 7
91 u_int		*ip_ttl_ptr = NULL;
92 u_int		*ip_mtudisc = NULL;
93 # if SOLARIS2 >= 8
94 int		*ip_forwarding = NULL;
95 u_int		*ip6_forwarding = NULL;
96 # else
97 u_int		*ip_forwarding = NULL;
98 # endif
99 #else
100 u_long		*ip_ttl_ptr = NULL;
101 u_long		*ip_mtudisc = NULL;
102 u_long		*ip_forwarding = NULL;
103 #endif
104 #endif
105 
106 vmem_t	*ipf_minor;	/* minor number arena */
107 void 	*ipf_state;	/* DDI state */
108 
109 /*
110  * GZ-controlled and per-zone stacks:
111  *
112  * For each non-global zone, we create two ipf stacks: the per-zone stack and
113  * the GZ-controlled stack.  The per-zone stack can be controlled and observed
114  * from inside the zone or from the global zone.  The GZ-controlled stack can
115  * only be controlled and observed from the global zone (though the rules
116  * still only affect that non-global zone).
117  *
118  * The two hooks are always arranged so that the GZ-controlled stack is always
119  * "outermost" with respect to the zone.  The traffic flow then looks like
120  * this:
121  *
122  * Inbound:
123  *
124  *     nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
125  *
126  * Outbound:
127  *
128  *     nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
129  */
130 
131 /* IPv4 hook names */
132 char *hook4_nicevents = 	"ipfilter_hook4_nicevents";
133 char *hook4_nicevents_gz = 	"ipfilter_hook4_nicevents_gz";
134 char *hook4_in = 		"ipfilter_hook4_in";
135 char *hook4_in_gz = 		"ipfilter_hook4_in_gz";
136 char *hook4_out = 		"ipfilter_hook4_out";
137 char *hook4_out_gz = 		"ipfilter_hook4_out_gz";
138 char *hook4_loop_in = 		"ipfilter_hook4_loop_in";
139 char *hook4_loop_in_gz = 	"ipfilter_hook4_loop_in_gz";
140 char *hook4_loop_out = 		"ipfilter_hook4_loop_out";
141 char *hook4_loop_out_gz = 	"ipfilter_hook4_loop_out_gz";
142 
143 /* IPv6 hook names */
144 char *hook6_nicevents = 	"ipfilter_hook6_nicevents";
145 char *hook6_nicevents_gz = 	"ipfilter_hook6_nicevents_gz";
146 char *hook6_in = 		"ipfilter_hook6_in";
147 char *hook6_in_gz = 		"ipfilter_hook6_in_gz";
148 char *hook6_out = 		"ipfilter_hook6_out";
149 char *hook6_out_gz = 		"ipfilter_hook6_out_gz";
150 char *hook6_loop_in = 		"ipfilter_hook6_loop_in";
151 char *hook6_loop_in_gz = 	"ipfilter_hook6_loop_in_gz";
152 char *hook6_loop_out = 		"ipfilter_hook6_loop_out";
153 char *hook6_loop_out_gz = 	"ipfilter_hook6_loop_out_gz";
154 
155 /* ------------------------------------------------------------------------ */
156 /* Function:    ipldetach                                                   */
157 /* Returns:     int - 0 == success, else error.                             */
158 /* Parameters:  Nil                                                         */
159 /*                                                                          */
160 /* This function is responsible for undoing anything that might have been   */
161 /* done in a call to iplattach().  It must be able to clean up from a call  */
162 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
163 /* configures a table to be so large that we cannot allocate enough memory  */
164 /* for it.                                                                  */
165 /* ------------------------------------------------------------------------ */
ipldetach(ifs)166 int ipldetach(ifs)
167 ipf_stack_t *ifs;
168 {
169 
170 	ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
171 
172 #if SOLARIS2 < 10
173 
174 	if (ifs->ifs_fr_control_forwarding & 2) {
175 		if (ip_forwarding != NULL)
176 			*ip_forwarding = 0;
177 #if SOLARIS2 >= 8
178 		if (ip6_forwarding != NULL)
179 			*ip6_forwarding = 0;
180 #endif
181 	}
182 #endif
183 
184 	/*
185 	 * This lock needs to be dropped around the net_hook_unregister calls
186 	 * because we can deadlock here with:
187 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
188 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
189 	 */
190 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
191 
192 #define	UNDO_HOOK(_f, _b, _e, _h)					\
193 	do {								\
194 		if (ifs->_f != NULL) {					\
195 			if (ifs->_b) {					\
196 				int tmp = net_hook_unregister(ifs->_f,	\
197 					   _e, ifs->_h);		\
198 				ifs->_b = (tmp != 0 && tmp != ENXIO);	\
199 				if (!ifs->_b && ifs->_h != NULL) {	\
200 					hook_free(ifs->_h);		\
201 					ifs->_h = NULL;			\
202 				}					\
203 			} else if (ifs->_h != NULL) {			\
204 				hook_free(ifs->_h);			\
205 				ifs->_h = NULL;				\
206 			}						\
207 		}							\
208 		_NOTE(CONSTCOND)					\
209 	} while (0)
210 
211 	/*
212 	 * Remove IPv6 Hooks
213 	 */
214 	if (ifs->ifs_ipf_ipv6 != NULL) {
215 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
216 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
217 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
218 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
219 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
220 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
221 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
222 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
223 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
224 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
225 
226 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
227 			goto detach_failed;
228 		ifs->ifs_ipf_ipv6 = NULL;
229         }
230 
231 	/*
232 	 * Remove IPv4 Hooks
233 	 */
234 	if (ifs->ifs_ipf_ipv4 != NULL) {
235 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
236 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
237 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
238 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
239 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
240 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
241 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
242 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
243 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
244 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
245 
246 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
247 			goto detach_failed;
248 		ifs->ifs_ipf_ipv4 = NULL;
249 	}
250 
251 #undef UNDO_HOOK
252 
253 #ifdef	IPFDEBUG
254 	cmn_err(CE_CONT, "ipldetach()\n");
255 #endif
256 
257 	WRITE_ENTER(&ifs->ifs_ipf_global);
258 	fr_deinitialise(ifs);
259 
260 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
261 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
262 
263 	if (ifs->ifs_ipf_locks_done == 1) {
264 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
265 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
266 		RW_DESTROY(&ifs->ifs_ipf_tokens);
267 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
268 		ifs->ifs_ipf_locks_done = 0;
269 	}
270 
271 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
272 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
273 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
274 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
275 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
276 		return -1;
277 
278 	return 0;
279 
280 detach_failed:
281 	WRITE_ENTER(&ifs->ifs_ipf_global);
282 	return -1;
283 }
284 
iplattach(ifs)285 int iplattach(ifs)
286 ipf_stack_t *ifs;
287 {
288 #if SOLARIS2 < 10
289 	int i;
290 #endif
291 	netid_t id = ifs->ifs_netid;
292 
293 #ifdef	IPFDEBUG
294 	cmn_err(CE_CONT, "iplattach()\n");
295 #endif
296 
297 	ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
298 	ifs->ifs_fr_flags = IPF_LOGGING;
299 #ifdef _KERNEL
300 	ifs->ifs_fr_update_ipid = 0;
301 #else
302 	ifs->ifs_fr_update_ipid = 1;
303 #endif
304 	ifs->ifs_fr_minttl = 4;
305 	ifs->ifs_fr_icmpminfragmtu = 68;
306 #if defined(IPFILTER_DEFAULT_BLOCK)
307 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
308 #else
309 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
310 #endif
311 
312 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
313 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
314 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
315 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
316 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
317 	ifs->ifs_ipf_locks_done = 1;
318 
319 	if (fr_initialise(ifs) < 0)
320 		return -1;
321 
322 	/*
323 	 * For incoming packets, we want the GZ-controlled hooks to run before
324 	 * the per-zone hooks, regardless of what order they're are installed.
325 	 * See the "GZ-controlled and per-zone stacks" comment block at the top
326 	 * of this file.
327 	 */
328 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a)				\
329 	HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);	\
330 	(x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER;	\
331 	(x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
332 
333 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
334 		  hook4_nicevents, hook4_nicevents_gz, ifs);
335 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
336 		  hook4_in, hook4_in_gz, ifs);
337 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
338 		  hook4_loop_in, hook4_loop_in_gz, ifs);
339 
340 	/*
341 	 * For outgoing packets, we want the GZ-controlled hooks to run after
342 	 * the per-zone hooks, regardless of what order they're are installed.
343 	 * See the "GZ-controlled and per-zone stacks" comment block at the top
344 	 * of this file.
345 	 */
346 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a)				\
347 	HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);	\
348 	(x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE;	\
349 	(x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
350 
351 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
352 		  hook4_out, hook4_out_gz, ifs);
353 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
354 		  hook4_loop_out, hook4_loop_out_gz, ifs);
355 
356 	/*
357 	 * If we hold this lock over all of the net_hook_register calls, we
358 	 * can cause a deadlock to occur with the following lock ordering:
359 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
360 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
361 	 */
362 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
363 
364 	/*
365 	 * Add IPv4 hooks
366 	 */
367 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
368 	if (ifs->ifs_ipf_ipv4 == NULL)
369 		goto hookup_failed;
370 
371 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
372 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
373 	if (!ifs->ifs_hook4_nic_events)
374 		goto hookup_failed;
375 
376 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
377 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
378 	if (!ifs->ifs_hook4_physical_in)
379 		goto hookup_failed;
380 
381 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
382 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
383 	if (!ifs->ifs_hook4_physical_out)
384 		goto hookup_failed;
385 
386 	if (ifs->ifs_ipf_loopback) {
387 		ifs->ifs_hook4_loopback_in = (net_hook_register(
388 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
389 		    ifs->ifs_ipfhook4_loop_in) == 0);
390 		if (!ifs->ifs_hook4_loopback_in)
391 			goto hookup_failed;
392 
393 		ifs->ifs_hook4_loopback_out = (net_hook_register(
394 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
395 		    ifs->ifs_ipfhook4_loop_out) == 0);
396 		if (!ifs->ifs_hook4_loopback_out)
397 			goto hookup_failed;
398 	}
399 
400 	/*
401 	 * Add IPv6 hooks
402 	 */
403 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
404 	if (ifs->ifs_ipf_ipv6 == NULL)
405 		goto hookup_failed;
406 
407 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
408 		  hook6_nicevents, hook6_nicevents_gz, ifs);
409 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
410 		  hook6_in, hook6_in_gz, ifs);
411 	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
412 		  hook6_loop_in, hook6_loop_in_gz, ifs);
413 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
414 		  hook6_out, hook6_out_gz, ifs);
415 	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
416 		  hook6_loop_out, hook6_loop_out_gz, ifs);
417 
418 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
419 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
420 	if (!ifs->ifs_hook6_nic_events)
421 		goto hookup_failed;
422 
423 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
424 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
425 	if (!ifs->ifs_hook6_physical_in)
426 		goto hookup_failed;
427 
428 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
429 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
430 	if (!ifs->ifs_hook6_physical_out)
431 		goto hookup_failed;
432 
433 	if (ifs->ifs_ipf_loopback) {
434 		ifs->ifs_hook6_loopback_in = (net_hook_register(
435 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
436 		    ifs->ifs_ipfhook6_loop_in) == 0);
437 		if (!ifs->ifs_hook6_loopback_in)
438 			goto hookup_failed;
439 
440 		ifs->ifs_hook6_loopback_out = (net_hook_register(
441 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
442 		    ifs->ifs_ipfhook6_loop_out) == 0);
443 		if (!ifs->ifs_hook6_loopback_out)
444 			goto hookup_failed;
445 	}
446 
447 	/*
448 	 * Reacquire ipf_global, now it is safe.
449 	 */
450 	WRITE_ENTER(&ifs->ifs_ipf_global);
451 
452 /* Do not use private interface ip_params_arr[] in Solaris 10 */
453 #if SOLARIS2 < 10
454 
455 #if SOLARIS2 >= 8
456 	ip_forwarding = &ip_g_forward;
457 #endif
458 	/*
459 	 * XXX - There is no terminator for this array, so it is not possible
460 	 * to tell if what we are looking for is missing and go off the end
461 	 * of the array.
462 	 */
463 
464 #if SOLARIS2 <= 8
465 	for (i = 0; ; i++) {
466 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
467 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
468 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
469 			    "ip_path_mtu_discovery")) {
470 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
471 		}
472 #if SOLARIS2 < 8
473 		else if (!strcmp(ip_param_arr[i].ip_param_name,
474 			    "ip_forwarding")) {
475 			ip_forwarding = &ip_param_arr[i].ip_param_value;
476 		}
477 #else
478 		else if (!strcmp(ip_param_arr[i].ip_param_name,
479 			    "ip6_forwarding")) {
480 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
481 		}
482 #endif
483 
484 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
485 #if SOLARIS2 >= 8
486 		    ip6_forwarding != NULL &&
487 #endif
488 		    ip_forwarding != NULL)
489 			break;
490 	}
491 #endif
492 
493 	if (ifs->ifs_fr_control_forwarding & 1) {
494 		if (ip_forwarding != NULL)
495 			*ip_forwarding = 1;
496 #if SOLARIS2 >= 8
497 		if (ip6_forwarding != NULL)
498 			*ip6_forwarding = 1;
499 #endif
500 	}
501 
502 #endif
503 
504 	return 0;
505 hookup_failed:
506 	WRITE_ENTER(&ifs->ifs_ipf_global);
507 	return -1;
508 }
509 
fr_setipfloopback(set,ifs)510 static	int	fr_setipfloopback(set, ifs)
511 int set;
512 ipf_stack_t *ifs;
513 {
514 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
515 		return EFAULT;
516 
517 	if (set && !ifs->ifs_ipf_loopback) {
518 		ifs->ifs_ipf_loopback = 1;
519 
520 		ifs->ifs_hook4_loopback_in = (net_hook_register(
521 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
522 		    ifs->ifs_ipfhook4_loop_in) == 0);
523 		if (!ifs->ifs_hook4_loopback_in)
524 			return EINVAL;
525 
526 		ifs->ifs_hook4_loopback_out = (net_hook_register(
527 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
528 		    ifs->ifs_ipfhook4_loop_out) == 0);
529 		if (!ifs->ifs_hook4_loopback_out)
530 			return EINVAL;
531 
532 		ifs->ifs_hook6_loopback_in = (net_hook_register(
533 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
534 		    ifs->ifs_ipfhook6_loop_in) == 0);
535 		if (!ifs->ifs_hook6_loopback_in)
536 			return EINVAL;
537 
538 		ifs->ifs_hook6_loopback_out = (net_hook_register(
539 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
540 		    ifs->ifs_ipfhook6_loop_out) == 0);
541 		if (!ifs->ifs_hook6_loopback_out)
542 			return EINVAL;
543 
544 	} else if (!set && ifs->ifs_ipf_loopback) {
545 		ifs->ifs_ipf_loopback = 0;
546 
547 		ifs->ifs_hook4_loopback_in =
548 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
549 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
550 		if (ifs->ifs_hook4_loopback_in)
551 			return EBUSY;
552 
553 		ifs->ifs_hook4_loopback_out =
554 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
555 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
556 		if (ifs->ifs_hook4_loopback_out)
557 			return EBUSY;
558 
559 		ifs->ifs_hook6_loopback_in =
560 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
561 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
562 		if (ifs->ifs_hook6_loopback_in)
563 			return EBUSY;
564 
565 		ifs->ifs_hook6_loopback_out =
566 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
567 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
568 		if (ifs->ifs_hook6_loopback_out)
569 			return EBUSY;
570 	}
571 	return 0;
572 }
573 
574 
575 /*
576  * Filter ioctl interface.
577  */
578 /*ARGSUSED*/
iplioctl(dev,cmd,data,mode,cp,rp)579 int iplioctl(dev, cmd, data, mode, cp, rp)
580 dev_t dev;
581 int cmd;
582 #if SOLARIS2 >= 7
583 intptr_t data;
584 #else
585 int *data;
586 #endif
587 int mode;
588 cred_t *cp;
589 int *rp;
590 {
591 	int error = 0, tmp;
592 	friostat_t fio;
593 	minor_t unit;
594 	u_int enable;
595 	ipf_stack_t *ifs;
596 	zoneid_t zid;
597 	ipf_devstate_t *isp;
598 
599 #ifdef	IPFDEBUG
600 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
601 		dev, cmd, data, mode, cp, rp);
602 #endif
603 	unit = getminor(dev);
604 
605 	isp = ddi_get_soft_state(ipf_state, unit);
606 	if (isp == NULL)
607 		return ENXIO;
608 	unit = isp->ipfs_minor;
609 
610 	zid = crgetzoneid(cp);
611 	if (cmd == SIOCIPFZONESET) {
612 		if (zid == GLOBAL_ZONEID)
613 			return fr_setzoneid(isp, (caddr_t) data);
614 		return EACCES;
615 	}
616 
617         /*
618 	 * ipf_find_stack returns with a read lock on ifs_ipf_global
619 	 */
620 	ifs = ipf_find_stack(zid, isp);
621 	if (ifs == NULL)
622 		return ENXIO;
623 
624 	if (ifs->ifs_fr_running <= 0) {
625 		if (unit != IPL_LOGIPF) {
626 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
627 			return EIO;
628 		}
629 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
630 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
631 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
632 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
633 			return EIO;
634 		}
635 	}
636 
637 	if (ifs->ifs_fr_enable_active != 0) {
638 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
639 		return EBUSY;
640 	}
641 
642 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
643 			       curproc, ifs);
644 	if (error != -1) {
645 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
646 		return error;
647 	}
648 	error = 0;
649 
650 	switch (cmd)
651 	{
652 	case SIOCFRENB :
653 		if (!(mode & FWRITE))
654 			error = EPERM;
655 		else {
656 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
657 				       sizeof(enable));
658 			if (error != 0) {
659 				error = EFAULT;
660 				break;
661 			}
662 
663 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
664 			WRITE_ENTER(&ifs->ifs_ipf_global);
665 
666 			/*
667 			 * We must recheck fr_enable_active here, since we've
668 			 * dropped ifs_ipf_global from R in order to get it
669 			 * exclusively.
670 			 */
671 			if (ifs->ifs_fr_enable_active == 0) {
672 				ifs->ifs_fr_enable_active = 1;
673 				error = fr_enableipf(ifs, enable);
674 				ifs->ifs_fr_enable_active = 0;
675 			}
676 		}
677 		break;
678 	case SIOCIPFSET :
679 		if (!(mode & FWRITE)) {
680 			error = EPERM;
681 			break;
682 		}
683 		/* FALLTHRU */
684 	case SIOCIPFGETNEXT :
685 	case SIOCIPFGET :
686 		error = fr_ipftune(cmd, (void *)data, ifs);
687 		break;
688 	case SIOCSETFF :
689 		if (!(mode & FWRITE))
690 			error = EPERM;
691 		else {
692 			error = COPYIN((caddr_t)data,
693 				       (caddr_t)&ifs->ifs_fr_flags,
694 				       sizeof(ifs->ifs_fr_flags));
695 			if (error != 0)
696 				error = EFAULT;
697 		}
698 		break;
699 	case SIOCIPFLP :
700 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
701 			       sizeof(tmp));
702 		if (error != 0)
703 			error = EFAULT;
704 		else
705 			error = fr_setipfloopback(tmp, ifs);
706 		break;
707 	case SIOCGETFF :
708 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
709 				sizeof(ifs->ifs_fr_flags));
710 		if (error != 0)
711 			error = EFAULT;
712 		break;
713 	case SIOCFUNCL :
714 		error = fr_resolvefunc((void *)data);
715 		break;
716 	case SIOCINAFR :
717 	case SIOCRMAFR :
718 	case SIOCADAFR :
719 	case SIOCZRLST :
720 		if (!(mode & FWRITE))
721 			error = EPERM;
722 		else
723 			error = frrequest(unit, cmd, (caddr_t)data,
724 					  ifs->ifs_fr_active, 1, ifs);
725 		break;
726 	case SIOCINIFR :
727 	case SIOCRMIFR :
728 	case SIOCADIFR :
729 		if (!(mode & FWRITE))
730 			error = EPERM;
731 		else
732 			error = frrequest(unit, cmd, (caddr_t)data,
733 					  1 - ifs->ifs_fr_active, 1, ifs);
734 		break;
735 	case SIOCSWAPA :
736 		if (!(mode & FWRITE))
737 			error = EPERM;
738 		else {
739 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
740 			bzero((char *)ifs->ifs_frcache,
741 			    sizeof (ifs->ifs_frcache));
742 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
743 					(caddr_t)data,
744 					sizeof(ifs->ifs_fr_active));
745 			if (error != 0)
746 				error = EFAULT;
747 			else
748 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
749 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
750 		}
751 		break;
752 	case SIOCGETFS :
753 		fr_getstat(&fio, ifs);
754 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
755 		break;
756 	case SIOCFRZST :
757 		if (!(mode & FWRITE))
758 			error = EPERM;
759 		else
760 			error = fr_zerostats((caddr_t)data, ifs);
761 		break;
762 	case	SIOCIPFFL :
763 		if (!(mode & FWRITE))
764 			error = EPERM;
765 		else {
766 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
767 				       sizeof(tmp));
768 			if (!error) {
769 				tmp = frflush(unit, 4, tmp, ifs);
770 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
771 						sizeof(tmp));
772 				if (error != 0)
773 					error = EFAULT;
774 			} else
775 				error = EFAULT;
776 		}
777 		break;
778 #ifdef USE_INET6
779 	case	SIOCIPFL6 :
780 		if (!(mode & FWRITE))
781 			error = EPERM;
782 		else {
783 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
784 				       sizeof(tmp));
785 			if (!error) {
786 				tmp = frflush(unit, 6, tmp, ifs);
787 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
788 						sizeof(tmp));
789 				if (error != 0)
790 					error = EFAULT;
791 			} else
792 				error = EFAULT;
793 		}
794 		break;
795 #endif
796 	case SIOCSTLCK :
797 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
798 		if (error == 0) {
799 			ifs->ifs_fr_state_lock = tmp;
800 			ifs->ifs_fr_nat_lock = tmp;
801 			ifs->ifs_fr_frag_lock = tmp;
802 			ifs->ifs_fr_auth_lock = tmp;
803 		} else
804 			error = EFAULT;
805 	break;
806 #ifdef	IPFILTER_LOG
807 	case	SIOCIPFFB :
808 		if (!(mode & FWRITE))
809 			error = EPERM;
810 		else {
811 			tmp = ipflog_clear(unit, ifs);
812 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
813 				       sizeof(tmp));
814 			if (error)
815 				error = EFAULT;
816 		}
817 		break;
818 #endif /* IPFILTER_LOG */
819 	case SIOCFRSYN :
820 		if (!(mode & FWRITE))
821 			error = EPERM;
822 		else {
823 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
824 			WRITE_ENTER(&ifs->ifs_ipf_global);
825 
826 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
827 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
828 			fr_nataddrsync(0, NULL, NULL, ifs);
829 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
830 			error = 0;
831 		}
832 		break;
833 	case SIOCGFRST :
834 		error = fr_outobj((void *)data, fr_fragstats(ifs),
835 				  IPFOBJ_FRAGSTAT);
836 		break;
837 	case FIONREAD :
838 #ifdef	IPFILTER_LOG
839 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
840 
841 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
842 		if (error != 0)
843 			error = EFAULT;
844 #endif
845 		break;
846 	case SIOCIPFITER :
847 		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
848 				       curproc, ifs);
849 		break;
850 
851 	case SIOCGENITER :
852 		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
853 					curproc, ifs);
854 		break;
855 
856 	case SIOCIPFDELTOK :
857 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
858 		if (error != 0) {
859 			error = EFAULT;
860 		} else {
861 			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
862 		}
863 		break;
864 
865 	default :
866 #ifdef	IPFDEBUG
867 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
868 			cmd, (void *)data);
869 #endif
870 		error = EINVAL;
871 		break;
872 	}
873 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
874 	return error;
875 }
876 
877 
fr_enableipf(ifs,enable)878 static int fr_enableipf(ifs, enable)
879 ipf_stack_t *ifs;
880 int enable;
881 {
882 	int error;
883 
884 	if (!enable) {
885 		error = ipldetach(ifs);
886 		if (error == 0)
887 			ifs->ifs_fr_running = -1;
888 		return error;
889 	}
890 
891 	if (ifs->ifs_fr_running > 0)
892 		return 0;
893 
894 	error = iplattach(ifs);
895 	if (error == 0) {
896 		if (ifs->ifs_fr_timer_id == NULL) {
897 			int hz = drv_usectohz(500000);
898 
899 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
900 						       (void *)ifs,
901 						       hz);
902 		}
903 		ifs->ifs_fr_running = 1;
904 	} else {
905 		(void) ipldetach(ifs);
906 	}
907 	return error;
908 }
909 
910 
get_unit(name,v,ifs)911 phy_if_t get_unit(name, v, ifs)
912 char *name;
913 int v;
914 ipf_stack_t *ifs;
915 {
916 	net_handle_t nif;
917 
918   	if (v == 4)
919  		nif = ifs->ifs_ipf_ipv4;
920   	else if (v == 6)
921  		nif = ifs->ifs_ipf_ipv6;
922   	else
923  		return 0;
924 
925  	return (net_phylookup(nif, name));
926 }
927 
928 /*
929  * routines below for saving IP headers to buffer
930  */
931 /*ARGSUSED*/
iplopen(devp,flags,otype,cred)932 int iplopen(devp, flags, otype, cred)
933 dev_t *devp;
934 int flags, otype;
935 cred_t *cred;
936 {
937 	ipf_devstate_t *isp;
938 	minor_t min = getminor(*devp);
939 	minor_t minor;
940 
941 #ifdef	IPFDEBUG
942 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
943 #endif
944 	if (!(otype & OTYP_CHR))
945 		return ENXIO;
946 
947 	if (IPL_LOGMAX < min)
948 		return ENXIO;
949 
950 	minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
951 	    VM_BESTFIT | VM_SLEEP);
952 
953 	if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
954 		vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
955 		return ENXIO;
956 	}
957 
958 	*devp = makedevice(getmajor(*devp), minor);
959 	isp = ddi_get_soft_state(ipf_state, minor);
960 	VERIFY(isp != NULL);
961 
962 	isp->ipfs_minor = min;
963 	isp->ipfs_zoneid = IPFS_ZONE_UNSET;
964 
965 	return 0;
966 }
967 
968 
969 /*ARGSUSED*/
iplclose(dev,flags,otype,cred)970 int iplclose(dev, flags, otype, cred)
971 dev_t dev;
972 int flags, otype;
973 cred_t *cred;
974 {
975 	minor_t	min = getminor(dev);
976 
977 #ifdef	IPFDEBUG
978 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
979 #endif
980 
981 	if (IPL_LOGMAX < min)
982 		return ENXIO;
983 
984 	ddi_soft_state_free(ipf_state, min);
985 	vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
986 
987 	return 0;
988 }
989 
990 #ifdef	IPFILTER_LOG
991 /*
992  * iplread/ipllog
993  * both of these must operate with at least splnet() lest they be
994  * called during packet processing and cause an inconsistancy to appear in
995  * the filter lists.
996  */
997 /*ARGSUSED*/
iplread(dev,uio,cp)998 int iplread(dev, uio, cp)
999 dev_t dev;
1000 register struct uio *uio;
1001 cred_t *cp;
1002 {
1003 	ipf_stack_t *ifs;
1004 	int ret;
1005 	minor_t unit;
1006 	ipf_devstate_t *isp;
1007 
1008 	unit = getminor(dev);
1009 	isp = ddi_get_soft_state(ipf_state, unit);
1010 	if (isp == NULL)
1011 		return ENXIO;
1012 	unit = isp->ipfs_minor;
1013 
1014 
1015         /*
1016 	 * ipf_find_stack returns with a read lock on ifs_ipf_global
1017 	 */
1018 	ifs = ipf_find_stack(crgetzoneid(cp), isp);
1019 	if (ifs == NULL)
1020 		return ENXIO;
1021 
1022 # ifdef	IPFDEBUG
1023 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1024 # endif
1025 
1026 	if (ifs->ifs_fr_running < 1) {
1027 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1028 		return EIO;
1029 	}
1030 
1031 # ifdef	IPFILTER_SYNC
1032 	if (unit == IPL_LOGSYNC) {
1033 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1034 		return ipfsync_read(uio);
1035 	}
1036 # endif
1037 
1038 	ret = ipflog_read(unit, uio, ifs);
1039 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1040 	return ret;
1041 }
1042 #endif /* IPFILTER_LOG */
1043 
1044 
1045 /*
1046  * iplread/ipllog
1047  * both of these must operate with at least splnet() lest they be
1048  * called during packet processing and cause an inconsistancy to appear in
1049  * the filter lists.
1050  */
iplwrite(dev,uio,cp)1051 int iplwrite(dev, uio, cp)
1052 dev_t dev;
1053 register struct uio *uio;
1054 cred_t *cp;
1055 {
1056 	ipf_stack_t *ifs;
1057 	minor_t unit;
1058 	ipf_devstate_t *isp;
1059 
1060 	unit = getminor(dev);
1061 	isp = ddi_get_soft_state(ipf_state, unit);
1062 	if (isp == NULL)
1063 		return ENXIO;
1064 	unit = isp->ipfs_minor;
1065 
1066         /*
1067 	 * ipf_find_stack returns with a read lock on ifs_ipf_global
1068 	 */
1069 	ifs = ipf_find_stack(crgetzoneid(cp), isp);
1070 	if (ifs == NULL)
1071 		return ENXIO;
1072 
1073 #ifdef	IPFDEBUG
1074 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1075 #endif
1076 
1077 	if (ifs->ifs_fr_running < 1) {
1078 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1079 		return EIO;
1080 	}
1081 
1082 #ifdef	IPFILTER_SYNC
1083 	if (getminor(dev) == IPL_LOGSYNC) {
1084 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1085 		return ipfsync_write(uio);
1086 	}
1087 #endif /* IPFILTER_SYNC */
1088 	dev = dev;	/* LINT */
1089 	uio = uio;	/* LINT */
1090 	cp = cp;	/* LINT */
1091 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1092 	return ENXIO;
1093 }
1094 
1095 
1096 /*
1097  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1098  * requires a large amount of setting up and isn't any more efficient.
1099  */
fr_send_reset(fin)1100 int fr_send_reset(fin)
1101 fr_info_t *fin;
1102 {
1103 	tcphdr_t *tcp, *tcp2;
1104 	int tlen, hlen;
1105 	mblk_t *m;
1106 #ifdef	USE_INET6
1107 	ip6_t *ip6;
1108 #endif
1109 	ip_t *ip;
1110 
1111 	tcp = fin->fin_dp;
1112 	if (tcp->th_flags & TH_RST)
1113 		return -1;
1114 
1115 #ifndef	IPFILTER_CKSUM
1116 	if (fr_checkl4sum(fin) == -1)
1117 		return -1;
1118 #endif
1119 
1120 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1121 #ifdef	USE_INET6
1122 	if (fin->fin_v == 6)
1123 		hlen = sizeof(ip6_t);
1124 	else
1125 #endif
1126 		hlen = sizeof(ip_t);
1127 	hlen += sizeof(*tcp2);
1128 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1129 		return -1;
1130 
1131 	m->b_rptr += 64;
1132 	MTYPE(m) = M_DATA;
1133 	m->b_wptr = m->b_rptr + hlen;
1134 	ip = (ip_t *)m->b_rptr;
1135 	bzero((char *)ip, hlen);
1136 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1137 	tcp2->th_dport = tcp->th_sport;
1138 	tcp2->th_sport = tcp->th_dport;
1139 	if (tcp->th_flags & TH_ACK) {
1140 		tcp2->th_seq = tcp->th_ack;
1141 		tcp2->th_flags = TH_RST;
1142 	} else {
1143 		tcp2->th_ack = ntohl(tcp->th_seq);
1144 		tcp2->th_ack += tlen;
1145 		tcp2->th_ack = htonl(tcp2->th_ack);
1146 		tcp2->th_flags = TH_RST|TH_ACK;
1147 	}
1148 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1149 
1150 	ip->ip_v = fin->fin_v;
1151 #ifdef	USE_INET6
1152 	if (fin->fin_v == 6) {
1153 		ip6 = (ip6_t *)m->b_rptr;
1154 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1155 		ip6->ip6_src = fin->fin_dst6.in6;
1156 		ip6->ip6_dst = fin->fin_src6.in6;
1157 		ip6->ip6_plen = htons(sizeof(*tcp));
1158 		ip6->ip6_nxt = IPPROTO_TCP;
1159 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1160 	} else
1161 #endif
1162 	{
1163 		ip->ip_src.s_addr = fin->fin_daddr;
1164 		ip->ip_dst.s_addr = fin->fin_saddr;
1165 		ip->ip_id = fr_nextipid(fin);
1166 		ip->ip_hl = sizeof(*ip) >> 2;
1167 		ip->ip_p = IPPROTO_TCP;
1168 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1169 		ip->ip_tos = fin->fin_ip->ip_tos;
1170 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1171 	}
1172 	return fr_send_ip(fin, m, &m);
1173 }
1174 
1175 /*
1176  * Function:	fr_send_ip
1177  * Returns:	 0: success
1178  *		-1: failed
1179  * Parameters:
1180  *	fin: packet information
1181  *	m: the message block where ip head starts
1182  *
1183  * Send a new packet through the IP stack.
1184  *
1185  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1186  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1187  * function).
1188  *
1189  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1190  * in by this function.
1191  *
1192  * All other portions of the packet must be in on-the-wire format.
1193  */
1194 /*ARGSUSED*/
fr_send_ip(fin,m,mpp)1195 static int fr_send_ip(fin, m, mpp)
1196 fr_info_t *fin;
1197 mblk_t *m, **mpp;
1198 {
1199 	qpktinfo_t qpi, *qpip;
1200 	fr_info_t fnew;
1201 	ip_t *ip;
1202 	int i, hlen;
1203 	ipf_stack_t *ifs = fin->fin_ifs;
1204 
1205 	ip = (ip_t *)m->b_rptr;
1206 	bzero((char *)&fnew, sizeof(fnew));
1207 
1208 #ifdef	USE_INET6
1209 	if (fin->fin_v == 6) {
1210 		ip6_t *ip6;
1211 
1212 		ip6 = (ip6_t *)ip;
1213 		ip6->ip6_vfc = 0x60;
1214 		ip6->ip6_hlim = 127;
1215 		fnew.fin_v = 6;
1216 		hlen = sizeof(*ip6);
1217 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1218 	} else
1219 #endif
1220 	{
1221 		fnew.fin_v = 4;
1222 #if SOLARIS2 >= 10
1223 		ip->ip_ttl = 255;
1224 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1225 			ip->ip_off = htons(IP_DF);
1226 #else
1227 		if (ip_ttl_ptr != NULL)
1228 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1229 		else
1230 			ip->ip_ttl = 63;
1231 		if (ip_mtudisc != NULL)
1232 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1233 		else
1234 			ip->ip_off = htons(IP_DF);
1235 #endif
1236 		/*
1237 		 * The dance with byte order and ip_len/ip_off is because in
1238 		 * fr_fastroute, it expects them to be in host byte order but
1239 		 * ipf_cksum expects them to be in network byte order.
1240 		 */
1241 		ip->ip_len = htons(ip->ip_len);
1242 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1243 		ip->ip_len = ntohs(ip->ip_len);
1244 		ip->ip_off = ntohs(ip->ip_off);
1245 		hlen = sizeof(*ip);
1246 		fnew.fin_plen = ip->ip_len;
1247 	}
1248 
1249 	qpip = fin->fin_qpi;
1250 	qpi.qpi_off = 0;
1251 	qpi.qpi_ill = qpip->qpi_ill;
1252 	qpi.qpi_m = m;
1253 	qpi.qpi_data = ip;
1254 	fnew.fin_qpi = &qpi;
1255 	fnew.fin_ifp = fin->fin_ifp;
1256 	fnew.fin_flx = FI_NOCKSUM;
1257 	fnew.fin_m = m;
1258 	fnew.fin_qfm = m;
1259 	fnew.fin_ip = ip;
1260 	fnew.fin_mp = mpp;
1261 	fnew.fin_hlen = hlen;
1262 	fnew.fin_dp = (char *)ip + hlen;
1263 	fnew.fin_ifs = fin->fin_ifs;
1264 	(void) fr_makefrip(hlen, ip, &fnew);
1265 
1266 	i = fr_fastroute(m, mpp, &fnew, NULL);
1267 	return i;
1268 }
1269 
1270 
fr_send_icmp_err(type,fin,dst)1271 int fr_send_icmp_err(type, fin, dst)
1272 int type;
1273 fr_info_t *fin;
1274 int dst;
1275 {
1276 	struct in_addr dst4;
1277 	struct icmp *icmp;
1278 	qpktinfo_t *qpi;
1279 	int hlen, code;
1280 	phy_if_t phy;
1281 	u_short sz;
1282 #ifdef	USE_INET6
1283 	mblk_t *mb;
1284 #endif
1285 	mblk_t *m;
1286 #ifdef	USE_INET6
1287 	ip6_t *ip6;
1288 #endif
1289 	ip_t *ip;
1290 	ipf_stack_t *ifs = fin->fin_ifs;
1291 
1292 	if ((type < 0) || (type > ICMP_MAXTYPE))
1293 		return -1;
1294 
1295 	code = fin->fin_icode;
1296 #ifdef USE_INET6
1297 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1298 		return -1;
1299 #endif
1300 
1301 #ifndef	IPFILTER_CKSUM
1302 	if (fr_checkl4sum(fin) == -1)
1303 		return -1;
1304 #endif
1305 
1306 	qpi = fin->fin_qpi;
1307 
1308 #ifdef	USE_INET6
1309 	mb = fin->fin_qfm;
1310 
1311 	if (fin->fin_v == 6) {
1312 		sz = sizeof(ip6_t);
1313 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1314 		hlen = sizeof(ip6_t);
1315 		type = icmptoicmp6types[type];
1316 		if (type == ICMP6_DST_UNREACH)
1317 			code = icmptoicmp6unreach[code];
1318 	} else
1319 #endif
1320 	{
1321 		if ((fin->fin_p == IPPROTO_ICMP) &&
1322 		    !(fin->fin_flx & FI_SHORT))
1323 			switch (ntohs(fin->fin_data[0]) >> 8)
1324 			{
1325 			case ICMP_ECHO :
1326 			case ICMP_TSTAMP :
1327 			case ICMP_IREQ :
1328 			case ICMP_MASKREQ :
1329 				break;
1330 			default :
1331 				return 0;
1332 			}
1333 
1334 		sz = sizeof(ip_t) * 2;
1335 		sz += 8;		/* 64 bits of data */
1336 		hlen = sizeof(ip_t);
1337 	}
1338 
1339 	sz += offsetof(struct icmp, icmp_ip);
1340 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1341 		return -1;
1342 	MTYPE(m) = M_DATA;
1343 	m->b_rptr += 64;
1344 	m->b_wptr = m->b_rptr + sz;
1345 	bzero((char *)m->b_rptr, (size_t)sz);
1346 	ip = (ip_t *)m->b_rptr;
1347 	ip->ip_v = fin->fin_v;
1348 	icmp = (struct icmp *)(m->b_rptr + hlen);
1349 	icmp->icmp_type = type & 0xff;
1350 	icmp->icmp_code = code & 0xff;
1351 	phy = (phy_if_t)qpi->qpi_ill;
1352 	if (type == ICMP_UNREACH && (phy != 0) &&
1353 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1354 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1355 
1356 #ifdef	USE_INET6
1357 	if (fin->fin_v == 6) {
1358 		struct in6_addr dst6;
1359 		int csz;
1360 
1361 		if (dst == 0) {
1362 			ipf_stack_t *ifs = fin->fin_ifs;
1363 
1364 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1365 				       (void *)&dst6, NULL, ifs) == -1) {
1366 				FREE_MB_T(m);
1367 				return -1;
1368 			}
1369 		} else
1370 			dst6 = fin->fin_dst6.in6;
1371 
1372 		csz = sz;
1373 		sz -= sizeof(ip6_t);
1374 		ip6 = (ip6_t *)m->b_rptr;
1375 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1376 		ip6->ip6_plen = htons((u_short)sz);
1377 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1378 		ip6->ip6_src = dst6;
1379 		ip6->ip6_dst = fin->fin_src6.in6;
1380 		sz -= offsetof(struct icmp, icmp_ip);
1381 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1382 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1383 	} else
1384 #endif
1385 	{
1386 		ip->ip_hl = sizeof(*ip) >> 2;
1387 		ip->ip_p = IPPROTO_ICMP;
1388 		ip->ip_id = fin->fin_ip->ip_id;
1389 		ip->ip_tos = fin->fin_ip->ip_tos;
1390 		ip->ip_len = (u_short)sz;
1391 		if (dst == 0) {
1392 			ipf_stack_t *ifs = fin->fin_ifs;
1393 
1394 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1395 				       (void *)&dst4, NULL, ifs) == -1) {
1396 				FREE_MB_T(m);
1397 				return -1;
1398 			}
1399 		} else {
1400 			dst4 = fin->fin_dst;
1401 		}
1402 		ip->ip_src = dst4;
1403 		ip->ip_dst = fin->fin_src;
1404 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1405 		      sizeof(*fin->fin_ip));
1406 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1407 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1408 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1409 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1410 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1411 					     sz - sizeof(ip_t));
1412 	}
1413 
1414 	/*
1415 	 * Need to exit out of these so we don't recursively call rw_enter
1416 	 * from fr_qout.
1417 	 */
1418 	return fr_send_ip(fin, m, &m);
1419 }
1420 
1421 #include <sys/time.h>
1422 #include <sys/varargs.h>
1423 
1424 #ifndef _KERNEL
1425 #include <stdio.h>
1426 #endif
1427 
1428 /*
1429  * Return the first IP Address associated with an interface
1430  * For IPv6, we walk through the list of logical interfaces and return
1431  * the address of the first one that isn't a link-local interface.
1432  * We can't assume that it is :1 because another link-local address
1433  * may have been assigned there.
1434  */
1435 /*ARGSUSED*/
fr_ifpaddr(v,atype,ifptr,inp,inpmask,ifs)1436 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1437 int v, atype;
1438 void *ifptr;
1439 struct in_addr  *inp, *inpmask;
1440 ipf_stack_t *ifs;
1441 {
1442 	struct sockaddr_in6 v6addr[2];
1443 	struct sockaddr_in v4addr[2];
1444 	net_ifaddr_t type[2];
1445 	net_handle_t net_data;
1446 	phy_if_t phyif;
1447 	void *array;
1448 
1449 	switch (v)
1450 	{
1451 	case 4:
1452 		net_data = ifs->ifs_ipf_ipv4;
1453 		array = v4addr;
1454 		break;
1455 	case 6:
1456 		net_data = ifs->ifs_ipf_ipv6;
1457 		array = v6addr;
1458 		break;
1459 	default:
1460 		net_data = NULL;
1461 		break;
1462 	}
1463 
1464 	if (net_data == NULL)
1465 		return -1;
1466 
1467 	phyif = (phy_if_t)ifptr;
1468 
1469 	switch (atype)
1470 	{
1471 	case FRI_PEERADDR :
1472 		type[0] = NA_PEER;
1473 		break;
1474 
1475 	case FRI_BROADCAST :
1476 		type[0] = NA_BROADCAST;
1477 		break;
1478 
1479 	default :
1480 		type[0] = NA_ADDRESS;
1481 		break;
1482 	}
1483 
1484 	type[1] = NA_NETMASK;
1485 
1486 	if (v == 6) {
1487 		lif_if_t idx = 0;
1488 
1489 		do {
1490 			idx = net_lifgetnext(net_data, phyif, idx);
1491 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1492 					   array) < 0)
1493 				return -1;
1494 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1495 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1496 				break;
1497 		} while (idx != 0);
1498 
1499 		if (idx == 0)
1500 			return -1;
1501 
1502 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1503 					inp, inpmask);
1504 	}
1505 
1506 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1507 		return -1;
1508 
1509 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1510 }
1511 
1512 
fr_newisn(fin)1513 u_32_t fr_newisn(fin)
1514 fr_info_t *fin;
1515 {
1516 	static int iss_seq_off = 0;
1517 	u_char hash[16];
1518 	u_32_t newiss;
1519 	MD5_CTX ctx;
1520 	ipf_stack_t *ifs = fin->fin_ifs;
1521 
1522 	/*
1523 	 * Compute the base value of the ISS.  It is a hash
1524 	 * of (saddr, sport, daddr, dport, secret).
1525 	 */
1526 	MD5Init(&ctx);
1527 
1528 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1529 		  sizeof(fin->fin_fi.fi_src));
1530 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1531 		  sizeof(fin->fin_fi.fi_dst));
1532 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1533 
1534 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1535 
1536 	MD5Final(hash, &ctx);
1537 
1538 	bcopy(hash, &newiss, sizeof(newiss));
1539 
1540 	/*
1541 	 * Now increment our "timer", and add it in to
1542 	 * the computed value.
1543 	 *
1544 	 * XXX Use `addin'?
1545 	 * XXX TCP_ISSINCR too large to use?
1546 	 */
1547 	iss_seq_off += 0x00010000;
1548 	newiss += iss_seq_off;
1549 	return newiss;
1550 }
1551 
1552 
1553 /* ------------------------------------------------------------------------ */
1554 /* Function:    fr_nextipid                                                 */
1555 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1556 /* Parameters:  fin(I) - pointer to packet information                      */
1557 /*                                                                          */
1558 /* Returns the next IPv4 ID to use for this packet.                         */
1559 /* ------------------------------------------------------------------------ */
fr_nextipid(fin)1560 u_short fr_nextipid(fin)
1561 fr_info_t *fin;
1562 {
1563 	static u_short ipid = 0;
1564 	u_short id;
1565 	ipf_stack_t *ifs = fin->fin_ifs;
1566 
1567 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1568 	if (fin->fin_pktnum != 0) {
1569 		id = fin->fin_pktnum & 0xffff;
1570 	} else {
1571 		id = ipid++;
1572 	}
1573 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1574 
1575 	return id;
1576 }
1577 
1578 
1579 #ifndef IPFILTER_CKSUM
1580 /* ARGSUSED */
1581 #endif
fr_checkv4sum(fin)1582 INLINE void fr_checkv4sum(fin)
1583 fr_info_t *fin;
1584 {
1585 #ifdef IPFILTER_CKSUM
1586 	if (fr_checkl4sum(fin) == -1)
1587 		fin->fin_flx |= FI_BAD;
1588 #endif
1589 }
1590 
1591 
1592 #ifdef USE_INET6
1593 # ifndef IPFILTER_CKSUM
1594 /* ARGSUSED */
1595 # endif
fr_checkv6sum(fin)1596 INLINE void fr_checkv6sum(fin)
1597 fr_info_t *fin;
1598 {
1599 # ifdef IPFILTER_CKSUM
1600 	if (fr_checkl4sum(fin) == -1)
1601 		fin->fin_flx |= FI_BAD;
1602 # endif
1603 }
1604 #endif /* USE_INET6 */
1605 
1606 
1607 #if (SOLARIS2 < 7)
fr_slowtimer()1608 void fr_slowtimer()
1609 #else
1610 /*ARGSUSED*/
1611 void fr_slowtimer __P((void *arg))
1612 #endif
1613 {
1614 	ipf_stack_t *ifs = arg;
1615 
1616 	READ_ENTER(&ifs->ifs_ipf_global);
1617 	if (ifs->ifs_fr_running != 1) {
1618 		ifs->ifs_fr_timer_id = NULL;
1619 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1620 		return;
1621 	}
1622 	ipf_expiretokens(ifs);
1623 	fr_fragexpire(ifs);
1624 	fr_timeoutstate(ifs);
1625 	fr_natexpire(ifs);
1626 	fr_authexpire(ifs);
1627 	ifs->ifs_fr_ticks++;
1628 	if (ifs->ifs_fr_running == 1)
1629 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1630 		    drv_usectohz(500000));
1631 	else
1632 		ifs->ifs_fr_timer_id = NULL;
1633 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1634 }
1635 
1636 
1637 /* ------------------------------------------------------------------------ */
1638 /* Function:    fr_pullup                                                   */
1639 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1640 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1641 /*              fin(I) - pointer to packet information                      */
1642 /*              len(I) - number of bytes to pullup                          */
1643 /*                                                                          */
1644 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1645 /* single buffer for ease of access.  Operating system native functions are */
1646 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1647 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1648 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1649 /* and ONLY if the pullup succeeds.                                         */
1650 /*                                                                          */
1651 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1652 /* of buffers that starts at *fin->fin_mp.                                  */
1653 /* ------------------------------------------------------------------------ */
fr_pullup(min,fin,len)1654 void *fr_pullup(min, fin, len)
1655 mb_t *min;
1656 fr_info_t *fin;
1657 int len;
1658 {
1659 	qpktinfo_t *qpi = fin->fin_qpi;
1660 	int out = fin->fin_out, dpoff, ipoff;
1661 	mb_t *m = min, *m1, *m2;
1662 	char *ip;
1663 	uint32_t start, stuff, end, value, flags;
1664 	ipf_stack_t *ifs = fin->fin_ifs;
1665 
1666 	if (m == NULL)
1667 		return NULL;
1668 
1669 	ip = (char *)fin->fin_ip;
1670 	if ((fin->fin_flx & FI_COALESCE) != 0)
1671 		return ip;
1672 
1673 	ipoff = fin->fin_ipoff;
1674 	if (fin->fin_dp != NULL)
1675 		dpoff = (char *)fin->fin_dp - (char *)ip;
1676 	else
1677 		dpoff = 0;
1678 
1679 	if (M_LEN(m) < len + ipoff) {
1680 
1681 		/*
1682 		 * pfil_precheck ensures the IP header is on a 32bit
1683 		 * aligned address so simply fail if that isn't currently
1684 		 * the case (should never happen).
1685 		 */
1686 		int inc = 0;
1687 
1688 		if (ipoff > 0) {
1689 			if ((ipoff & 3) != 0) {
1690 				inc = 4 - (ipoff & 3);
1691 				if (m->b_rptr - inc >= m->b_datap->db_base)
1692 					m->b_rptr -= inc;
1693 				else
1694 					inc = 0;
1695 			}
1696 		}
1697 
1698 		/*
1699 		 * XXX This is here as a work around for a bug with DEBUG
1700 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1701 		 * XXX code as a way to stash the phyint_index for a packet,
1702 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1703 		 * XXX for both of these to be NULL.  See 6442390.
1704 		 */
1705 		m1 = m;
1706 		m2 = m->b_prev;
1707 
1708 		do {
1709 			m1->b_next = NULL;
1710 			m1->b_prev = NULL;
1711 			m1 = m1->b_cont;
1712 		} while (m1);
1713 
1714 		/*
1715 		 * Need to preserve checksum information by copying them
1716 		 * to newmp which heads the pulluped message.
1717 		 */
1718 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1719 		    &value, &flags);
1720 
1721 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1722 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1723 			FREE_MB_T(*fin->fin_mp);
1724 			*fin->fin_mp = NULL;
1725 			fin->fin_m = NULL;
1726 			fin->fin_ip = NULL;
1727 			fin->fin_dp = NULL;
1728 			qpi->qpi_data = NULL;
1729 			return NULL;
1730 		}
1731 
1732 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1733 		    value, flags, 0);
1734 
1735 		m->b_prev = m2;
1736 		m->b_rptr += inc;
1737 		fin->fin_m = m;
1738 		ip = MTOD(m, char *) + ipoff;
1739 		qpi->qpi_data = ip;
1740 	}
1741 
1742 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1743 	fin->fin_ip = (ip_t *)ip;
1744 	if (fin->fin_dp != NULL)
1745 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1746 
1747 	if (len == fin->fin_plen)
1748 		fin->fin_flx |= FI_COALESCE;
1749 	return ip;
1750 }
1751 
1752 
1753 /*
1754  * Function:	fr_verifysrc
1755  * Returns:	int (really boolean)
1756  * Parameters:	fin - packet information
1757  *
1758  * Check whether the packet has a valid source address for the interface on
1759  * which the packet arrived, implementing the "fr_chksrc" feature.
1760  * Returns true iff the packet's source address is valid.
1761  */
fr_verifysrc(fin)1762 int fr_verifysrc(fin)
1763 fr_info_t *fin;
1764 {
1765 	net_handle_t net_data_p;
1766 	phy_if_t phy_ifdata_routeto;
1767 	struct sockaddr	sin;
1768 	ipf_stack_t *ifs = fin->fin_ifs;
1769 
1770 	if (fin->fin_v == 4) {
1771 		net_data_p = ifs->ifs_ipf_ipv4;
1772 	} else if (fin->fin_v == 6) {
1773 		net_data_p = ifs->ifs_ipf_ipv6;
1774 	} else {
1775 		return (0);
1776 	}
1777 
1778 	/* Get the index corresponding to the if name */
1779 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1780 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1781 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1782 
1783 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1784 }
1785 
1786 /*
1787  * Return true only if forwarding is enabled on the interface.
1788  */
1789 static int
fr_forwarding_enabled(phy_if_t phyif,net_handle_t ndp)1790 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
1791 {
1792 	lif_if_t lif;
1793 
1794 	for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
1795 	    lif = net_lifgetnext(ndp, phyif, lif)) {
1796 		int res;
1797 		uint64_t flags;
1798 
1799 		res = net_getlifflags(ndp, phyif, lif, &flags);
1800 		if (res != 0)
1801 			return (0);
1802 		if (flags & IFF_ROUTER)
1803 			return (1);
1804 	}
1805 
1806 	return (0);
1807 }
1808 
1809 /*
1810  * Function:	fr_fastroute
1811  * Returns:	 0: success;
1812  *		-1: failed
1813  * Parameters:
1814  *	mb: the message block where ip head starts
1815  *	mpp: the pointer to the pointer of the orignal
1816  *		packet message
1817  *	fin: packet information
1818  *	fdp: destination interface information
1819  *	if it is NULL, no interface information provided.
1820  *
1821  * This function is for fastroute/to/dup-to rules. It calls
1822  * pfil_make_lay2_packet to search route, make lay-2 header
1823  * ,and identify output queue for the IP packet.
1824  * The destination address depends on the following conditions:
1825  * 1: for fastroute rule, fdp is passed in as NULL, so the
1826  *	destination address is the IP Packet's destination address
1827  * 2: for to/dup-to rule, if an ip address is specified after
1828  *	the interface name, this address is the as destination
1829  *	address. Otherwise IP Packet's destination address is used
1830  */
fr_fastroute(mb,mpp,fin,fdp)1831 int fr_fastroute(mb, mpp, fin, fdp)
1832 mblk_t *mb, **mpp;
1833 fr_info_t *fin;
1834 frdest_t *fdp;
1835 {
1836         net_handle_t net_data_p;
1837 	net_inject_t *inj;
1838 	mblk_t *mp = NULL;
1839 	frentry_t *fr = fin->fin_fr;
1840 	qpktinfo_t *qpi;
1841 	ip_t *ip;
1842 
1843 	struct sockaddr_in *sin;
1844 	struct sockaddr_in6 *sin6;
1845 	struct sockaddr *sinp;
1846 	ipf_stack_t *ifs = fin->fin_ifs;
1847 #ifndef	sparc
1848 	u_short __iplen, __ipoff;
1849 #endif
1850 
1851 	if (fin->fin_v == 4) {
1852 		net_data_p = ifs->ifs_ipf_ipv4;
1853 	} else if (fin->fin_v == 6) {
1854 		net_data_p = ifs->ifs_ipf_ipv6;
1855 	} else {
1856 		return (-1);
1857 	}
1858 
1859 	/* Check the src here, fin_ifp is the src interface. */
1860 	if (!fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
1861 		return (-1);
1862 
1863 	inj = net_inject_alloc(NETINFO_VERSION);
1864 	if (inj == NULL)
1865 		return -1;
1866 
1867 	ip = fin->fin_ip;
1868 	qpi = fin->fin_qpi;
1869 
1870 	/*
1871 	 * If this is a duplicate mblk then we want ip to point at that
1872 	 * data, not the original, if and only if it is already pointing at
1873 	 * the current mblk data.
1874 	 *
1875 	 * Otherwise, if it's not a duplicate, and we're not already pointing
1876 	 * at the current mblk data, then we want to ensure that the data
1877 	 * points at ip.
1878 	 */
1879 
1880 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1881 		ip = (ip_t *)mb->b_rptr;
1882 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1883 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
1884 		qpi->qpi_off = 0;
1885 	}
1886 
1887 	/*
1888 	 * If there is another M_PROTO, we don't want it
1889 	 */
1890 	if (*mpp != mb) {
1891 		mp = unlinkb(*mpp);
1892 		freeb(*mpp);
1893 		*mpp = mp;
1894 	}
1895 
1896 	sinp = (struct sockaddr *)&inj->ni_addr;
1897 	sin = (struct sockaddr_in *)sinp;
1898 	sin6 = (struct sockaddr_in6 *)sinp;
1899 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1900 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1901 	inj->ni_packet = mb;
1902 
1903 	/*
1904 	 * In case we're here due to "to <if>" being used with
1905 	 * "keep state", check that we're going in the correct
1906 	 * direction.
1907 	 */
1908 	if (fdp != NULL) {
1909 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1910 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1911 			goto bad_fastroute;
1912 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1913 		if (fin->fin_v == 4) {
1914 			sin->sin_addr = fdp->fd_ip;
1915 		} else {
1916 			sin6->sin6_addr = fdp->fd_ip6.in6;
1917 		}
1918 	} else {
1919 		if (fin->fin_v == 4) {
1920 			sin->sin_addr = ip->ip_dst;
1921 		} else {
1922 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1923 		}
1924 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1925 	}
1926 
1927 	/* we're checking the destinatation here */
1928 	if (!fr_forwarding_enabled(inj->ni_physical, net_data_p))
1929 		goto bad_fastroute;
1930 
1931 	/*
1932 	 * Clear the hardware checksum flags from packets that we are doing
1933 	 * input processing on as leaving them set will cause the outgoing
1934 	 * NIC (if it supports hardware checksum) to calculate them anew,
1935 	 * using the old (correct) checksums as the pseudo value to start
1936 	 * from.
1937 	 */
1938 	if (fin->fin_out == 0) {
1939 		DB_CKSUMFLAGS(mb) = 0;
1940 	}
1941 
1942 	*mpp = mb;
1943 
1944 	if (fin->fin_out == 0) {
1945 		void *saveifp;
1946 		u_32_t pass;
1947 
1948 		saveifp = fin->fin_ifp;
1949 		fin->fin_ifp = (void *)inj->ni_physical;
1950 		fin->fin_flx &= ~FI_STATE;
1951 		fin->fin_out = 1;
1952 		(void) fr_acctpkt(fin, &pass);
1953 		fin->fin_fr = NULL;
1954 		if (!fr || !(fr->fr_flags & FR_RETMASK))
1955 			(void) fr_checkstate(fin, &pass);
1956 		if (fr_checknatout(fin, NULL) == -1)
1957 			goto bad_fastroute;
1958 		fin->fin_out = 0;
1959 		fin->fin_ifp = saveifp;
1960 	}
1961 #ifndef	sparc
1962 	if (fin->fin_v == 4) {
1963 		__iplen = (u_short)ip->ip_len,
1964 		__ipoff = (u_short)ip->ip_off;
1965 
1966 		ip->ip_len = htons(__iplen);
1967 		ip->ip_off = htons(__ipoff);
1968 	}
1969 #endif
1970 
1971 	if (net_data_p) {
1972 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1973 			net_inject_free(inj);
1974 			return (-1);
1975 		}
1976 	}
1977 
1978 	ifs->ifs_fr_frouteok[0]++;
1979 	net_inject_free(inj);
1980 	return 0;
1981 bad_fastroute:
1982 	net_inject_free(inj);
1983 	freemsg(mb);
1984 	ifs->ifs_fr_frouteok[1]++;
1985 	return -1;
1986 }
1987 
1988 
1989 /* ------------------------------------------------------------------------ */
1990 /* Function:    ipf_hook4_out                                               */
1991 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1992 /* Parameters:  event(I)     - pointer to event                             */
1993 /*              info(I)      - pointer to hook information for firewalling  */
1994 /*                                                                          */
1995 /* Calling ipf_hook.                                                        */
1996 /* ------------------------------------------------------------------------ */
1997 /*ARGSUSED*/
ipf_hook4_out(hook_event_token_t token,hook_data_t info,void * arg)1998 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1999 {
2000 	return ipf_hook(info, 1, 0, arg);
2001 }
2002 /*ARGSUSED*/
ipf_hook6_out(hook_event_token_t token,hook_data_t info,void * arg)2003 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2004 {
2005 	return ipf_hook6(info, 1, 0, arg);
2006 }
2007 
2008 /* ------------------------------------------------------------------------ */
2009 /* Function:    ipf_hook4_in                                                */
2010 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2011 /* Parameters:  event(I)     - pointer to event                             */
2012 /*              info(I)      - pointer to hook information for firewalling  */
2013 /*                                                                          */
2014 /* Calling ipf_hook.                                                        */
2015 /* ------------------------------------------------------------------------ */
2016 /*ARGSUSED*/
ipf_hook4_in(hook_event_token_t token,hook_data_t info,void * arg)2017 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2018 {
2019 	return ipf_hook(info, 0, 0, arg);
2020 }
2021 /*ARGSUSED*/
ipf_hook6_in(hook_event_token_t token,hook_data_t info,void * arg)2022 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2023 {
2024 	return ipf_hook6(info, 0, 0, arg);
2025 }
2026 
2027 
2028 /* ------------------------------------------------------------------------ */
2029 /* Function:    ipf_hook4_loop_out                                          */
2030 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2031 /* Parameters:  event(I)     - pointer to event                             */
2032 /*              info(I)      - pointer to hook information for firewalling  */
2033 /*                                                                          */
2034 /* Calling ipf_hook.                                                        */
2035 /* ------------------------------------------------------------------------ */
2036 /*ARGSUSED*/
ipf_hook4_loop_out(hook_event_token_t token,hook_data_t info,void * arg)2037 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2038 {
2039 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
2040 }
2041 /*ARGSUSED*/
ipf_hook6_loop_out(hook_event_token_t token,hook_data_t info,void * arg)2042 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2043 {
2044 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2045 }
2046 
2047 /* ------------------------------------------------------------------------ */
2048 /* Function:    ipf_hook4_loop_in                                           */
2049 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2050 /* Parameters:  event(I)     - pointer to event                             */
2051 /*              info(I)      - pointer to hook information for firewalling  */
2052 /*                                                                          */
2053 /* Calling ipf_hook.                                                        */
2054 /* ------------------------------------------------------------------------ */
2055 /*ARGSUSED*/
ipf_hook4_loop_in(hook_event_token_t token,hook_data_t info,void * arg)2056 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2057 {
2058 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
2059 }
2060 /*ARGSUSED*/
ipf_hook6_loop_in(hook_event_token_t token,hook_data_t info,void * arg)2061 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2062 {
2063 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2064 }
2065 
2066 /* ------------------------------------------------------------------------ */
2067 /* Function:    ipf_hook                                                    */
2068 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
2069 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
2070 /*              out(I)       - whether packet is going in or out            */
2071 /*              loopback(I)  - whether packet is a loopback packet or not   */
2072 /*                                                                          */
2073 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
2074 /* parameters out of the info structure and forms them up to be useful for  */
2075 /* calling ipfilter.                                                        */
2076 /* ------------------------------------------------------------------------ */
ipf_hook(hook_data_t info,int out,int loopback,void * arg)2077 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2078 {
2079 	hook_pkt_event_t *fw;
2080 	ipf_stack_t *ifs;
2081 	qpktinfo_t qpi;
2082 	int rval, hlen;
2083 	u_short swap;
2084 	phy_if_t phy;
2085 	ip_t *ip;
2086 
2087 	ifs = arg;
2088 	fw = (hook_pkt_event_t *)info;
2089 
2090 	ASSERT(fw != NULL);
2091 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2092 
2093 	ip = fw->hpe_hdr;
2094 	swap = ntohs(ip->ip_len);
2095 	ip->ip_len = swap;
2096 	swap = ntohs(ip->ip_off);
2097 	ip->ip_off = swap;
2098 	hlen = IPH_HDR_LENGTH(ip);
2099 
2100 	qpi.qpi_m = fw->hpe_mb;
2101 	qpi.qpi_data = fw->hpe_hdr;
2102 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2103 	qpi.qpi_ill = (void *)phy;
2104 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2105 	if (qpi.qpi_flags)
2106 		qpi.qpi_flags |= FI_MBCAST;
2107 	qpi.qpi_flags |= loopback;
2108 
2109 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2110 	    &qpi, fw->hpe_mp, ifs);
2111 
2112 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2113 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2114 		rval = 1;
2115 
2116 	/* Notify IP the packet mblk_t and IP header pointers. */
2117 	fw->hpe_mb = qpi.qpi_m;
2118 	fw->hpe_hdr = qpi.qpi_data;
2119 	if (rval == 0) {
2120 		ip = qpi.qpi_data;
2121 		swap = ntohs(ip->ip_len);
2122 		ip->ip_len = swap;
2123 		swap = ntohs(ip->ip_off);
2124 		ip->ip_off = swap;
2125 	}
2126 	return rval;
2127 
2128 }
ipf_hook6(hook_data_t info,int out,int loopback,void * arg)2129 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2130 {
2131 	hook_pkt_event_t *fw;
2132 	int rval, hlen;
2133 	qpktinfo_t qpi;
2134 	phy_if_t phy;
2135 
2136 	fw = (hook_pkt_event_t *)info;
2137 
2138 	ASSERT(fw != NULL);
2139 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2140 
2141 	hlen = sizeof (ip6_t);
2142 
2143 	qpi.qpi_m = fw->hpe_mb;
2144 	qpi.qpi_data = fw->hpe_hdr;
2145 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2146 	qpi.qpi_ill = (void *)phy;
2147 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2148 	if (qpi.qpi_flags)
2149 		qpi.qpi_flags |= FI_MBCAST;
2150 	qpi.qpi_flags |= loopback;
2151 
2152 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2153 	    &qpi, fw->hpe_mp, arg);
2154 
2155 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2156 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2157 		rval = 1;
2158 
2159 	/* Notify IP the packet mblk_t and IP header pointers. */
2160 	fw->hpe_mb = qpi.qpi_m;
2161 	fw->hpe_hdr = qpi.qpi_data;
2162 	return rval;
2163 }
2164 
2165 
2166 /* ------------------------------------------------------------------------ */
2167 /* Function:    ipf_nic_event_v4                                            */
2168 /* Returns:     int - 0 == no problems encountered                          */
2169 /* Parameters:  event(I)     - pointer to event                             */
2170 /*              info(I)      - pointer to information about a NIC event     */
2171 /*                                                                          */
2172 /* Function to receive asynchronous NIC events from IP                      */
2173 /* ------------------------------------------------------------------------ */
2174 /*ARGSUSED*/
ipf_nic_event_v4(hook_event_token_t event,hook_data_t info,void * arg)2175 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2176 {
2177 	struct sockaddr_in *sin;
2178 	hook_nic_event_t *hn;
2179 	ipf_stack_t *ifs = arg;
2180 	void *new_ifp = NULL;
2181 
2182 	if (ifs->ifs_fr_running <= 0)
2183 		return (0);
2184 
2185 	hn = (hook_nic_event_t *)info;
2186 
2187 	switch (hn->hne_event)
2188 	{
2189 	case NE_PLUMB :
2190 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2191 		       ifs);
2192 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2193 			      hn->hne_data, ifs);
2194 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2195 			     hn->hne_data, ifs);
2196 		break;
2197 
2198 	case NE_UNPLUMB :
2199 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2200 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2201 			      ifs);
2202 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2203 		break;
2204 
2205 	case NE_ADDRESS_CHANGE :
2206 		/*
2207 		 * We only respond to events for logical interface 0 because
2208 		 * IPFilter only uses the first address given to a network
2209 		 * interface.  We check for hne_lif==1 because the netinfo
2210 		 * code maps adds 1 to the lif number so that it can return
2211 		 * 0 to indicate "no more lifs" when walking them.
2212 		 */
2213 		if (hn->hne_lif == 1) {
2214 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2215 			    ifs);
2216 			sin = hn->hne_data;
2217 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2218 			    ifs);
2219 		}
2220 		break;
2221 
2222 #if SOLARIS2 >= 10
2223 	case NE_IFINDEX_CHANGE :
2224 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2225 
2226 		if (hn->hne_data != NULL) {
2227 			/*
2228 			 * The netinfo passes interface index as int (hne_data should be
2229 			 * handled as a pointer to int), which is always 32bit. We need to
2230 			 * convert it to void pointer here, since interfaces are
2231 			 * represented as pointers to void in IPF. The pointers are 64 bits
2232 			 * long on 64bit platforms. Doing something like
2233 			 *	(void *)((int) x)
2234 			 * will throw warning:
2235 			 *   "cast to pointer from integer of different size"
2236 			 * during 64bit compilation.
2237 			 *
2238 			 * The line below uses (size_t) to typecast int to
2239 			 * size_t, which might be 64bit/32bit (depending
2240 			 * on architecture). Once we have proper 64bit/32bit
2241 			 * type (size_t), we can safely convert it to void pointer.
2242 			 */
2243 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2244 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2245 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2246 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2247 		}
2248 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2249 		break;
2250 #endif
2251 
2252 	default :
2253 		break;
2254 	}
2255 
2256 	return 0;
2257 }
2258 
2259 
2260 /* ------------------------------------------------------------------------ */
2261 /* Function:    ipf_nic_event_v6                                            */
2262 /* Returns:     int - 0 == no problems encountered                          */
2263 /* Parameters:  event(I)     - pointer to event                             */
2264 /*              info(I)      - pointer to information about a NIC event     */
2265 /*                                                                          */
2266 /* Function to receive asynchronous NIC events from IP                      */
2267 /* ------------------------------------------------------------------------ */
2268 /*ARGSUSED*/
ipf_nic_event_v6(hook_event_token_t event,hook_data_t info,void * arg)2269 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2270 {
2271 	struct sockaddr_in6 *sin6;
2272 	hook_nic_event_t *hn;
2273 	ipf_stack_t *ifs = arg;
2274 	void *new_ifp = NULL;
2275 
2276 	if (ifs->ifs_fr_running <= 0)
2277 		return (0);
2278 
2279 	hn = (hook_nic_event_t *)info;
2280 
2281 	switch (hn->hne_event)
2282 	{
2283 	case NE_PLUMB :
2284 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2285 		       hn->hne_data, ifs);
2286 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2287 			      hn->hne_data, ifs);
2288 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2289 			     hn->hne_data, ifs);
2290 		break;
2291 
2292 	case NE_UNPLUMB :
2293 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2294 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2295 			      ifs);
2296 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2297 		break;
2298 
2299 	case NE_ADDRESS_CHANGE :
2300 		if (hn->hne_lif == 1) {
2301 			sin6 = hn->hne_data;
2302 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2303 				       ifs);
2304 		}
2305 		break;
2306 
2307 #if SOLARIS2 >= 10
2308 	case NE_IFINDEX_CHANGE :
2309 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2310 		if (hn->hne_data != NULL) {
2311 			/*
2312 			 * The netinfo passes interface index as int (hne_data should be
2313 			 * handled as a pointer to int), which is always 32bit. We need to
2314 			 * convert it to void pointer here, since interfaces are
2315 			 * represented as pointers to void in IPF. The pointers are 64 bits
2316 			 * long on 64bit platforms. Doing something like
2317 			 *	(void *)((int) x)
2318 			 * will throw warning:
2319 			 *   "cast to pointer from integer of different size"
2320 			 * during 64bit compilation.
2321 			 *
2322 			 * The line below uses (size_t) to typecast int to
2323 			 * size_t, which might be 64bit/32bit (depending
2324 			 * on architecture). Once we have proper 64bit/32bit
2325 			 * type (size_t), we can safely convert it to void pointer.
2326 			 */
2327 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2328 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2329 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2330 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2331 		}
2332 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2333 		break;
2334 #endif
2335 
2336 	default :
2337 		break;
2338 	}
2339 
2340 	return 0;
2341 }
2342 
2343 /*
2344  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2345  * are needed in Solaris kernel only. We don't need them in
2346  * ipftest to pretend the ICMP/RST packet was sent as a response.
2347  */
2348 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2349 /* ------------------------------------------------------------------------ */
2350 /* Function:    fr_make_rst                                                 */
2351 /* Returns:     int - 0 on success, -1 on failure			    */
2352 /* Parameters:  fin(I) - pointer to packet information                      */
2353 /*                                                                          */
2354 /* We must alter the original mblks passed to IPF from IP stack via	    */
2355 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2356 /* IPF can basicaly do only these things with mblk representing the packet: */
2357 /*	leave it as it is (pass the packet)				    */
2358 /*                                                                          */
2359 /*	discard it (block the packet)					    */
2360 /*                                                                          */
2361 /*	alter it (i.e. NAT)						    */
2362 /*                                                                          */
2363 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2364 /* instead to IP stack via FW_HOOKS.					    */
2365 /*                                                                          */
2366 /* The return-rst action for packets coming via NIC is handled as follows:  */
2367 /*	mblk with packet is discarded					    */
2368 /*                                                                          */
2369 /*	new mblk with RST response is constructed and injected to network   */
2370 /*                                                                          */
2371 /* IPF can't inject packets to loopback interface, this is just another	    */
2372 /* limitation we have to deal with here. The only option to send RST	    */
2373 /* response to offending TCP packet coming via loopback is to alter it.	    */
2374 /*									    */
2375 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2376 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2377 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2378 /* ------------------------------------------------------------------------ */
fr_make_rst(fin)2379 int fr_make_rst(fin)
2380 fr_info_t *fin;
2381 {
2382 	uint16_t tmp_port;
2383 	int rv = -1;
2384 	uint32_t old_ack;
2385 	tcphdr_t *tcp = NULL;
2386 	struct in_addr tmp_src;
2387 #ifdef USE_INET6
2388 	struct in6_addr	tmp_src6;
2389 #endif
2390 
2391 	ASSERT(fin->fin_p == IPPROTO_TCP);
2392 
2393 	/*
2394 	 * We do not need to adjust chksum, since it is not being checked by
2395 	 * Solaris IP stack for loopback clients.
2396 	 */
2397 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2398 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2399 
2400 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2401 			/* Swap IPv4 addresses. */
2402 			tmp_src = fin->fin_ip->ip_src;
2403 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2404 			fin->fin_ip->ip_dst = tmp_src;
2405 
2406 			rv = 0;
2407 		}
2408 		else
2409 			tcp = NULL;
2410 	}
2411 #ifdef USE_INET6
2412 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2413 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2414 		/*
2415 		 * We are relying on fact the next header is TCP, which is true
2416 		 * for regular TCP packets coming in over loopback.
2417 		 */
2418 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2419 			/* Swap IPv6 addresses. */
2420 			tmp_src6 = fin->fin_ip6->ip6_src;
2421 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2422 			fin->fin_ip6->ip6_dst = tmp_src6;
2423 
2424 			rv = 0;
2425 		}
2426 		else
2427 			tcp = NULL;
2428 	}
2429 #endif
2430 
2431 	if (tcp != NULL) {
2432 		/*
2433 		 * Adjust TCP header:
2434 		 *	swap ports,
2435 		 *	set flags,
2436 		 *	set correct ACK number
2437 		 */
2438 		tmp_port = tcp->th_sport;
2439 		tcp->th_sport = tcp->th_dport;
2440 		tcp->th_dport = tmp_port;
2441 		old_ack = tcp->th_ack;
2442 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2443 		tcp->th_seq = old_ack;
2444 		tcp->th_flags = TH_RST | TH_ACK;
2445 	}
2446 
2447 	return (rv);
2448 }
2449 
2450 /* ------------------------------------------------------------------------ */
2451 /* Function:    fr_make_icmp_v4                                             */
2452 /* Returns:     int - 0 on success, -1 on failure			    */
2453 /* Parameters:  fin(I) - pointer to packet information                      */
2454 /*                                                                          */
2455 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2456 /* what is going to happen here and why. Once you read the comment there,   */
2457 /* continue here with next paragraph.					    */
2458 /*									    */
2459 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2460 /* happen here:								    */
2461 /*	(1) Original mblk is copied (duplicated).			    */
2462 /*                                                                          */
2463 /*	(2) ICMP header is created.					    */
2464 /*                                                                          */
2465 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2466 /*	    data ready then.						    */
2467 /*                                                                          */
2468 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2469 /*                                                                          */
2470 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2471 /*	    header only and ICMP chksum is computed.			    */
2472 /*                                                                          */
2473 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2474 /*	    which now contains new IP header. If original packet was spread */
2475 /*	    over several mblks, only the first mblk is kept.		    */
2476 /* ------------------------------------------------------------------------ */
fr_make_icmp_v4(fin)2477 static int fr_make_icmp_v4(fin)
2478 fr_info_t *fin;
2479 {
2480 	struct in_addr tmp_src;
2481 	tcphdr_t *tcp;
2482 	struct icmp *icmp;
2483 	mblk_t *mblk_icmp;
2484 	mblk_t *mblk_ip;
2485 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2486 	size_t orig_iphdr_len;	/* length of IP header only */
2487 	uint32_t sum;
2488 	uint16_t *buf;
2489 	int len;
2490 
2491 
2492 	if (fin->fin_v != 4)
2493 		return (-1);
2494 
2495 	/*
2496 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2497 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2498 	 */
2499 	tcp = (tcphdr_t *) fin->fin_dp;
2500 
2501 	if ((fin->fin_p == IPPROTO_TCP) &&
2502 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2503 		return (-1);
2504 
2505 	/*
2506 	 * Step (1)
2507 	 *
2508 	 * Make copy of original mblk.
2509 	 *
2510 	 * We want to copy as much data as necessary, not less, not more.  The
2511 	 * ICMPv4 payload length for unreachable messages is:
2512 	 *	original IP header + 8 bytes of L4 (if there are any).
2513 	 *
2514 	 * We determine if there are at least 8 bytes of L4 data following IP
2515 	 * header first.
2516 	 */
2517 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2518 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2519 	/*
2520 	 * Since we don't want to copy more data than necessary, we must trim
2521 	 * the original mblk here.  The right way (STREAMish) would be to use
2522 	 * adjmsg() to trim it.  However we would have to calculate the length
2523 	 * argument for adjmsg() from pointers we already have here.
2524 	 *
2525 	 * Since we have pointers and offsets, it's faster and easier for
2526 	 * us to just adjust pointers by hand instead of using adjmsg().
2527 	 */
2528 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2529 	fin->fin_m->b_wptr += icmp_pld_len;
2530 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2531 
2532 	/*
2533 	 * Also we don't want to copy any L2 stuff, which might precede IP
2534 	 * header, so we have have to set b_rptr to point to the start of IP
2535 	 * header.
2536 	 */
2537 	fin->fin_m->b_rptr += fin->fin_ipoff;
2538 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2539 		return (-1);
2540 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2541 
2542 	/*
2543 	 * Step (2)
2544 	 *
2545 	 * Create an ICMP header, which will be appened to original mblk later.
2546 	 * ICMP header is just another mblk.
2547 	 */
2548 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2549 	if (mblk_icmp == NULL) {
2550 		FREE_MB_T(mblk_ip);
2551 		return (-1);
2552 	}
2553 
2554 	MTYPE(mblk_icmp) = M_DATA;
2555 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2556 	icmp->icmp_type = ICMP_UNREACH;
2557 	icmp->icmp_code = fin->fin_icode & 0xFF;
2558 	icmp->icmp_void = 0;
2559 	icmp->icmp_cksum = 0;
2560 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2561 
2562 	/*
2563 	 * Step (3)
2564 	 *
2565 	 * Complete ICMP packet - link ICMP header with L4 data from original
2566 	 * IP packet.
2567 	 */
2568 	linkb(mblk_icmp, mblk_ip);
2569 
2570 	/*
2571 	 * Step (4)
2572 	 *
2573 	 * Swap IP addresses and change IP header fields accordingly in
2574 	 * original IP packet.
2575 	 *
2576 	 * There is a rule option return-icmp as a dest for physical
2577 	 * interfaces. This option becomes useless for loopback, since IPF box
2578 	 * uses same address as a loopback destination. We ignore the option
2579 	 * here, the ICMP packet will always look like as it would have been
2580 	 * sent from the original destination host.
2581 	 */
2582 	tmp_src = fin->fin_ip->ip_src;
2583 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2584 	fin->fin_ip->ip_dst = tmp_src;
2585 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2586 	fin->fin_ip->ip_sum = 0;
2587 
2588 	/*
2589 	 * Step (5)
2590 	 *
2591 	 * We trim the orignal mblk to hold IP header only.
2592 	 */
2593 	fin->fin_m->b_wptr = fin->fin_dp;
2594 	orig_iphdr_len = fin->fin_m->b_wptr -
2595 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2596 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2597 			    orig_iphdr_len);
2598 
2599 	/*
2600 	 * ICMP chksum calculation. The data we are calculating chksum for are
2601 	 * spread over two mblks, therefore we have to use two for loops.
2602 	 *
2603 	 * First for loop computes chksum part for ICMP header.
2604 	 */
2605 	buf = (uint16_t *) icmp;
2606 	len = ICMPERR_ICMPHLEN;
2607 	for (sum = 0; len > 1; len -= 2)
2608 		sum += *buf++;
2609 
2610 	/*
2611 	 * Here we add chksum part for ICMP payload.
2612 	 */
2613 	len = icmp_pld_len;
2614 	buf = (uint16_t *) mblk_ip->b_rptr;
2615 	for (; len > 1; len -= 2)
2616 		sum += *buf++;
2617 
2618 	/*
2619 	 * Chksum is done.
2620 	 */
2621 	sum = (sum >> 16) + (sum & 0xffff);
2622 	sum += (sum >> 16);
2623 	icmp->icmp_cksum = ~sum;
2624 
2625 	/*
2626 	 * Step (6)
2627 	 *
2628 	 * Release all packet mblks, except the first one.
2629 	 */
2630 	if (fin->fin_m->b_cont != NULL) {
2631 		FREE_MB_T(fin->fin_m->b_cont);
2632 	}
2633 
2634 	/*
2635 	 * Append ICMP payload to first mblk, which already contains new IP
2636 	 * header.
2637 	 */
2638 	linkb(fin->fin_m, mblk_icmp);
2639 
2640 	return (0);
2641 }
2642 
2643 #ifdef USE_INET6
2644 /* ------------------------------------------------------------------------ */
2645 /* Function:    fr_make_icmp_v6                                             */
2646 /* Returns:     int - 0 on success, -1 on failure			    */
2647 /* Parameters:  fin(I) - pointer to packet information                      */
2648 /*									    */
2649 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2650 /* what and why is going to happen here. Once you read the comment there,   */
2651 /* continue here with next paragraph.					    */
2652 /*									    */
2653 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2654 /* The algorithm is fairly simple:					    */
2655 /*	1) We need to get copy of complete mblk.			    */
2656 /*									    */
2657 /*	2) New ICMPv6 header is created.				    */
2658 /*									    */
2659 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2660 /*	   header.							    */
2661 /*									    */
2662 /*	4) The checksum must be adjusted.				    */
2663 /*									    */
2664 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2665 /*	   are adjusted (protocol number).				    */
2666 /*									    */
2667 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2668 /*	   linked with the ICMPv6 data we got from (3).			    */
2669 /* ------------------------------------------------------------------------ */
fr_make_icmp_v6(fin)2670 static int fr_make_icmp_v6(fin)
2671 fr_info_t *fin;
2672 {
2673 	struct icmp6_hdr *icmp6;
2674 	tcphdr_t *tcp;
2675 	struct in6_addr	tmp_src6;
2676 	size_t icmp_pld_len;
2677 	mblk_t *mblk_ip, *mblk_icmp;
2678 
2679 	if (fin->fin_v != 6)
2680 		return (-1);
2681 
2682 	/*
2683 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2684 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2685 	 */
2686 	tcp = (tcphdr_t *) fin->fin_dp;
2687 
2688 	if ((fin->fin_p == IPPROTO_TCP) &&
2689 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2690 		return (-1);
2691 
2692 	/*
2693 	 * Step (1)
2694 	 *
2695 	 * We need to copy complete packet in case of IPv6, no trimming is
2696 	 * needed (except the L2 headers).
2697 	 */
2698 	icmp_pld_len = M_LEN(fin->fin_m);
2699 	fin->fin_m->b_rptr += fin->fin_ipoff;
2700 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2701 		return (-1);
2702 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2703 
2704 	/*
2705 	 * Step (2)
2706 	 *
2707 	 * Allocate and create ICMP header.
2708 	 */
2709 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2710 			BPRI_HI);
2711 
2712 	if (mblk_icmp == NULL)
2713 		return (-1);
2714 
2715 	MTYPE(mblk_icmp) = M_DATA;
2716 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2717 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
2718 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
2719 	icmp6->icmp6_data32[0] = 0;
2720 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2721 
2722 	/*
2723 	 * Step (3)
2724 	 *
2725 	 * Link the copy of IP packet to ICMP header.
2726 	 */
2727 	linkb(mblk_icmp, mblk_ip);
2728 
2729 	/*
2730 	 * Step (4)
2731 	 *
2732 	 * Calculate chksum - this is much more easier task than in case of
2733 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2734 	 * We are making compensation just for change of packet length.
2735 	 */
2736 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2737 
2738 	/*
2739 	 * Step (5)
2740 	 *
2741 	 * Swap IP addresses.
2742 	 */
2743 	tmp_src6 = fin->fin_ip6->ip6_src;
2744 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2745 	fin->fin_ip6->ip6_dst = tmp_src6;
2746 
2747 	/*
2748 	 * and adjust IP header data.
2749 	 */
2750 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2751 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2752 
2753 	/*
2754 	 * Step (6)
2755 	 *
2756 	 * We must release all linked mblks from original packet and keep only
2757 	 * the first mblk with IP header to link ICMP data.
2758 	 */
2759 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2760 
2761 	if (fin->fin_m->b_cont != NULL) {
2762 		FREE_MB_T(fin->fin_m->b_cont);
2763 	}
2764 
2765 	/*
2766 	 * Append ICMP payload to IP header.
2767 	 */
2768 	linkb(fin->fin_m, mblk_icmp);
2769 
2770 	return (0);
2771 }
2772 #endif	/* USE_INET6 */
2773 
2774 /* ------------------------------------------------------------------------ */
2775 /* Function:    fr_make_icmp                                                */
2776 /* Returns:     int - 0 on success, -1 on failure			    */
2777 /* Parameters:  fin(I) - pointer to packet information                      */
2778 /*                                                                          */
2779 /* We must alter the original mblks passed to IPF from IP stack via	    */
2780 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
2781 /* comment at fr_make_rst() function.					    */
2782 /*									    */
2783 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
2784 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
2785 /* protocol version. However there are some details, which are common to    */
2786 /* both IP versions. The details are going to be explained here.	    */
2787 /*                                                                          */
2788 /* The packet looks as follows:						    */
2789 /*    xxx | IP hdr | IP payload    ...	| 				    */
2790 /*    ^   ^        ^            	^				    */
2791 /*    |   |        |            	|				    */
2792 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2793 /*    |   |        |							    */
2794 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2795 /*    |   |								    */
2796 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2797 /*    |      of loopback)						    */
2798 /*    |   								    */
2799 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
2800 /*                                                                          */
2801 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2802 /* well in advance before the matching rule was found (the rule, which took */
2803 /* us here, to fr_make_icmp() function).				    */
2804 /*                                                                          */
2805 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
2806 /* packet. New packet will be represented as chain of mblks.		    */
2807 /* orig mblk |- b_cont ---.						    */
2808 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
2809 /*    |	                          ^	            `-> duped orig mblk	    */
2810 /*    |                           |				^	    */
2811 /*    `- The original mblk        |				|	    */
2812 /*       will be trimmed to       |				|	    */
2813 /*       to contain IP header     |				|	    */
2814 /*       only                     |				|	    */
2815 /*                                |				|	    */
2816 /*                                `- This is newly		|           */
2817 /*                                   allocated mblk to		|	    */
2818 /*                                   hold ICMPv6 data.		|	    */
2819 /*								|	    */
2820 /*								|	    */
2821 /*								|	    */
2822 /*	    This is the copy of original mblk, it will contain -'	    */
2823 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
2824 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
2825 /*	    (TCP/UDP/L4) data from original packet.			    */
2826 /* ------------------------------------------------------------------------ */
fr_make_icmp(fin)2827 int fr_make_icmp(fin)
2828 fr_info_t *fin;
2829 {
2830 	int rv;
2831 
2832 	if (fin->fin_v == 4)
2833 		rv = fr_make_icmp_v4(fin);
2834 #ifdef USE_INET6
2835 	else if (fin->fin_v == 6)
2836 		rv = fr_make_icmp_v6(fin);
2837 #endif
2838 	else
2839 		rv = -1;
2840 
2841 	return (rv);
2842 }
2843 
2844 /* ------------------------------------------------------------------------ */
2845 /* Function:    fr_buf_sum						    */
2846 /* Returns:     unsigned int - sum of buffer buf			    */
2847 /* Parameters:  buf - pointer to buf we want to sum up			    */
2848 /*              len - length of buffer buf				    */
2849 /*                                                                          */
2850 /* Sums buffer buf. The result is used for chksum calculation. The buf	    */
2851 /* argument must be aligned.						    */
2852 /* ------------------------------------------------------------------------ */
fr_buf_sum(buf,len)2853 static uint32_t fr_buf_sum(buf, len)
2854 const void *buf;
2855 unsigned int len;
2856 {
2857 	uint32_t	sum = 0;
2858 	uint16_t	*b = (uint16_t *)buf;
2859 
2860 	while (len > 1) {
2861 		sum += *b++;
2862 		len -= 2;
2863 	}
2864 
2865 	if (len == 1)
2866 		sum += htons((*(unsigned char *)b) << 8);
2867 
2868 	return (sum);
2869 }
2870 
2871 /* ------------------------------------------------------------------------ */
2872 /* Function:    fr_calc_chksum						    */
2873 /* Returns:     void							    */
2874 /* Parameters:  fin - pointer to fr_info_t instance with packet data	    */
2875 /*              pkt - pointer to duplicated packet			    */
2876 /*                                                                          */
2877 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP	    */
2878 /* versions.								    */
2879 /* ------------------------------------------------------------------------ */
fr_calc_chksum(fin,pkt)2880 void fr_calc_chksum(fin, pkt)
2881 fr_info_t *fin;
2882 mb_t *pkt;
2883 {
2884 	struct pseudo_hdr {
2885 		union {
2886 			struct in_addr	in4;
2887 #ifdef USE_INET6
2888 			struct in6_addr	in6;
2889 #endif
2890 		} src_addr;
2891 		union {
2892 			struct in_addr	in4;
2893 #ifdef USE_INET6
2894 			struct in6_addr	in6;
2895 #endif
2896 		} dst_addr;
2897 		char		zero;
2898 		char		proto;
2899 		uint16_t	len;
2900 	}	phdr;
2901 	uint32_t	sum, ip_sum;
2902 	void	*buf;
2903 	uint16_t	*l4_csum_p;
2904 	tcphdr_t	*tcp;
2905 	udphdr_t	*udp;
2906 	icmphdr_t	*icmp;
2907 #ifdef USE_INET6
2908 	struct icmp6_hdr	*icmp6;
2909 #endif
2910 	ip_t		*ip;
2911 	unsigned int	len;
2912 	int		pld_len;
2913 
2914 	/*
2915 	 * We need to pullup the packet to the single continuous buffer to avoid
2916 	 * potential misaligment of b_rptr member in mblk chain.
2917 	 */
2918 	if (pullupmsg(pkt, -1) == 0) {
2919 		cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2920 		    " will not be computed by IPF");
2921 		return;
2922 	}
2923 
2924 	/*
2925 	 * It is guaranteed IP header starts right at b_rptr, because we are
2926 	 * working with a copy of the original packet.
2927 	 *
2928 	 * Compute pseudo header chksum for TCP and UDP.
2929 	 */
2930 	if ((fin->fin_p == IPPROTO_UDP) ||
2931 	    (fin->fin_p == IPPROTO_TCP)) {
2932 		bzero(&phdr, sizeof (phdr));
2933 #ifdef USE_INET6
2934 		if (fin->fin_v == 6) {
2935 			phdr.src_addr.in6 = fin->fin_srcip6;
2936 			phdr.dst_addr.in6 = fin->fin_dstip6;
2937 		} else {
2938 			phdr.src_addr.in4 = fin->fin_src;
2939 			phdr.dst_addr.in4 = fin->fin_dst;
2940 		}
2941 #else
2942 		phdr.src_addr.in4 = fin->fin_src;
2943 		phdr.dst_addr.in4 = fin->fin_dst;
2944 #endif
2945 		phdr.zero = (char) 0;
2946 		phdr.proto = fin->fin_p;
2947 		phdr.len = htons((uint16_t)fin->fin_dlen);
2948 		sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2949 	} else {
2950 		sum = 0;
2951 	}
2952 
2953 	/*
2954 	 * Set pointer to the L4 chksum field in the packet, set buf pointer to
2955 	 * the L4 header start.
2956 	 */
2957 	switch (fin->fin_p) {
2958 		case IPPROTO_UDP:
2959 			udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2960 			l4_csum_p = &udp->uh_sum;
2961 			buf = udp;
2962 			break;
2963 		case IPPROTO_TCP:
2964 			tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2965 			l4_csum_p = &tcp->th_sum;
2966 			buf = tcp;
2967 			break;
2968 		case IPPROTO_ICMP:
2969 			icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2970 			l4_csum_p = &icmp->icmp_cksum;
2971 			buf = icmp;
2972 			break;
2973 #ifdef USE_INET6
2974 		case IPPROTO_ICMPV6:
2975 			icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2976 			l4_csum_p = &icmp6->icmp6_cksum;
2977 			buf = icmp6;
2978 			break;
2979 #endif
2980 		default:
2981 			l4_csum_p = NULL;
2982 	}
2983 
2984 	/*
2985 	 * Compute L4 chksum if needed.
2986 	 */
2987 	if (l4_csum_p != NULL) {
2988 		*l4_csum_p = (uint16_t)0;
2989 		pld_len = fin->fin_dlen;
2990 		len = pkt->b_wptr - (unsigned char *)buf;
2991 		ASSERT(len == pld_len);
2992 		/*
2993 		 * Add payload sum to pseudoheader sum.
2994 		 */
2995 		sum += fr_buf_sum(buf, len);
2996 		while (sum >> 16)
2997 			sum = (sum & 0xFFFF) + (sum >> 16);
2998 
2999 		*l4_csum_p = ~((uint16_t)sum);
3000 		DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3001 	}
3002 
3003 	/*
3004 	 * The IP header chksum is needed just for IPv4.
3005 	 */
3006 	if (fin->fin_v == 4) {
3007 		/*
3008 		 * Compute IPv4 header chksum.
3009 		 */
3010 		ip = (ip_t *)pkt->b_rptr;
3011 		ip->ip_sum = (uint16_t)0;
3012 		ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3013 		while (ip_sum >> 16)
3014 			ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3015 
3016 		ip->ip_sum = ~((uint16_t)ip_sum);
3017 		DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3018 	}
3019 
3020 	return;
3021 }
3022 
3023 #endif	/* _KERNEL && SOLARIS2 >= 10 */
3024