xref: /titanic_52/usr/src/uts/common/inet/ipf/ip_nat.c (revision c7158ae983f5a04c4a998f468ecefba6d23ba721)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 static	int	nat_flushtable __P((ipf_stack_t *));
144 static	int	nat_clearlist __P((ipf_stack_t *));
145 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
146 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
148 static	void	nat_delrdr __P((struct ipnat *));
149 static	void	nat_delnat __P((struct ipnat *));
150 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
151 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
152 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
153 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
154 static	int	nat_match __P((fr_info_t *, ipnat_t *));
155 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
156 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
157 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
158 				    struct in_addr, struct in_addr, u_32_t,
159 				    ipf_stack_t *));
160 static	INLINE	int nat_icmpquerytype4 __P((int));
161 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int,
162 				    ipf_stack_t *));
163 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int,
164 				    ipf_stack_t *));
165 static	INLINE	int nat_icmperrortype4 __P((int));
166 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
167 				      tcphdr_t *, nat_t **, int));
168 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
169 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
170 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
171 static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
172 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
173 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
174 static	int	nat_extraflush __P((int, ipf_stack_t *));
175 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
176 static	int	nat_flushclosing __P((int, ipf_stack_t *));
177 
178 
179 /*
180  * Below we declare a list of constants used only in the nat_extraflush()
181  * routine.  We are placing it here, instead of in nat_extraflush() itself,
182  * because we want to make it visible to tools such as mdb, nm etc., so the
183  * values can easily be altered during debugging.
184  */
185 static	const int	idletime_tab[] = {
186 	IPF_TTLVAL(30),		/* 30 seconds */
187 	IPF_TTLVAL(1800),	/* 30 minutes */
188 	IPF_TTLVAL(43200),	/* 12 hours */
189 	IPF_TTLVAL(345600),	/* 4 days */
190 };
191 
192 #define NAT_HAS_L4_CHANGED(n)	\
193  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
194  	(n)->nat_inport != (n)->nat_outport)
195 
196 /* ------------------------------------------------------------------------ */
197 /* Function:    fr_natinit                                                  */
198 /* Returns:     int - 0 == success, -1 == failure                           */
199 /* Parameters:  Nil                                                         */
200 /*                                                                          */
201 /* Initialise all of the NAT locks, tables and other structures.            */
202 /* ------------------------------------------------------------------------ */
203 int fr_natinit(ifs)
204 ipf_stack_t *ifs;
205 {
206 	int i;
207 
208 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
209 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
210 	if (ifs->ifs_nat_table[0] != NULL)
211 		bzero((char *)ifs->ifs_nat_table[0],
212 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
213 	else
214 		return -1;
215 
216 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
217 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
218 	if (ifs->ifs_nat_table[1] != NULL)
219 		bzero((char *)ifs->ifs_nat_table[1],
220 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
221 	else
222 		return -2;
223 
224 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
225 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
226 	if (ifs->ifs_nat_rules != NULL)
227 		bzero((char *)ifs->ifs_nat_rules,
228 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
229 	else
230 		return -3;
231 
232 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
233 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
234 	if (ifs->ifs_rdr_rules != NULL)
235 		bzero((char *)ifs->ifs_rdr_rules,
236 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
237 	else
238 		return -4;
239 
240 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
241 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
242 	if (ifs->ifs_maptable != NULL)
243 		bzero((char *)ifs->ifs_maptable,
244 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
245 	else
246 		return -5;
247 
248 	ifs->ifs_ipf_hm_maplist = NULL;
249 
250 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
251 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
252 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
253 		return -1;
254 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
255 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
256 
257 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
258 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
259 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
260 		return -1;
261 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
262 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
263 
264 	if (ifs->ifs_fr_nat_maxbucket == 0) {
265 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
266 			ifs->ifs_fr_nat_maxbucket++;
267 		ifs->ifs_fr_nat_maxbucket *= 2;
268 	}
269 
270 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
271 	/*
272 	 * Increase this because we may have "keep state" following this too
273 	 * and packet storms can occur if this is removed too quickly.
274 	 */
275 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
276 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
277 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
278 	ifs->ifs_nat_udptq.ifq_ref = 1;
279 	ifs->ifs_nat_udptq.ifq_head = NULL;
280 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
281 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
282 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
283 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
284 	ifs->ifs_nat_icmptq.ifq_ref = 1;
285 	ifs->ifs_nat_icmptq.ifq_head = NULL;
286 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
287 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
288 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
289 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
290 	ifs->ifs_nat_iptq.ifq_ref = 1;
291 	ifs->ifs_nat_iptq.ifq_head = NULL;
292 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
293 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
294 	ifs->ifs_nat_iptq.ifq_next = NULL;
295 
296 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
297 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
298 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
299 #ifdef LARGE_NAT
300 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
301 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
302 #endif
303 	}
304 
305 	/*
306 	 * Increase this because we may have "keep state" following
307 	 * this too and packet storms can occur if this is removed
308 	 * too quickly.
309 	 */
310 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
311 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
312 
313 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
314 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
315 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
316 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
317 
318 	ifs->ifs_fr_nat_init = 1;
319 
320 	return 0;
321 }
322 
323 
324 /* ------------------------------------------------------------------------ */
325 /* Function:    nat_addrdr                                                  */
326 /* Returns:     Nil                                                         */
327 /* Parameters:  n(I) - pointer to NAT rule to add                           */
328 /*                                                                          */
329 /* Adds a redirect rule to the hash table of redirect rules and the list of */
330 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
331 /* use by redirect rules.                                                   */
332 /* ------------------------------------------------------------------------ */
333 static void nat_addrdr(n, ifs)
334 ipnat_t *n;
335 ipf_stack_t *ifs;
336 {
337 	ipnat_t **np;
338 	u_32_t j;
339 	u_int hv;
340 	int k;
341 
342 	k = count4bits(n->in_outmsk);
343 	if ((k >= 0) && (k != 32))
344 		ifs->ifs_rdr_masks |= 1 << k;
345 	j = (n->in_outip & n->in_outmsk);
346 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
347 	np = ifs->ifs_rdr_rules + hv;
348 	while (*np != NULL)
349 		np = &(*np)->in_rnext;
350 	n->in_rnext = NULL;
351 	n->in_prnext = np;
352 	n->in_hv = hv;
353 	*np = n;
354 }
355 
356 
357 /* ------------------------------------------------------------------------ */
358 /* Function:    nat_addnat                                                  */
359 /* Returns:     Nil                                                         */
360 /* Parameters:  n(I) - pointer to NAT rule to add                           */
361 /*                                                                          */
362 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
363 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
364 /* redirect rules.                                                          */
365 /* ------------------------------------------------------------------------ */
366 static void nat_addnat(n, ifs)
367 ipnat_t *n;
368 ipf_stack_t *ifs;
369 {
370 	ipnat_t **np;
371 	u_32_t j;
372 	u_int hv;
373 	int k;
374 
375 	k = count4bits(n->in_inmsk);
376 	if ((k >= 0) && (k != 32))
377 		ifs->ifs_nat_masks |= 1 << k;
378 	j = (n->in_inip & n->in_inmsk);
379 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
380 	np = ifs->ifs_nat_rules + hv;
381 	while (*np != NULL)
382 		np = &(*np)->in_mnext;
383 	n->in_mnext = NULL;
384 	n->in_pmnext = np;
385 	n->in_hv = hv;
386 	*np = n;
387 }
388 
389 
390 /* ------------------------------------------------------------------------ */
391 /* Function:    nat_delrdr                                                  */
392 /* Returns:     Nil                                                         */
393 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
394 /*                                                                          */
395 /* Removes a redirect rule from the hash table of redirect rules.           */
396 /* ------------------------------------------------------------------------ */
397 static void nat_delrdr(n)
398 ipnat_t *n;
399 {
400 	if (n->in_rnext)
401 		n->in_rnext->in_prnext = n->in_prnext;
402 	*n->in_prnext = n->in_rnext;
403 }
404 
405 
406 /* ------------------------------------------------------------------------ */
407 /* Function:    nat_delnat                                                  */
408 /* Returns:     Nil                                                         */
409 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
410 /*                                                                          */
411 /* Removes a NAT map rule from the hash table of NAT map rules.             */
412 /* ------------------------------------------------------------------------ */
413 static void nat_delnat(n)
414 ipnat_t *n;
415 {
416 	if (n->in_mnext != NULL)
417 		n->in_mnext->in_pmnext = n->in_pmnext;
418 	*n->in_pmnext = n->in_mnext;
419 }
420 
421 
422 /* ------------------------------------------------------------------------ */
423 /* Function:    nat_hostmap                                                 */
424 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
425 /*                                else a pointer to the hostmapping to use  */
426 /* Parameters:  np(I)   - pointer to NAT rule                               */
427 /*              real(I) - real IP address                                   */
428 /*              map(I)  - mapped IP address                                 */
429 /*              port(I) - destination port number                           */
430 /* Write Locks: ipf_nat                                                     */
431 /*                                                                          */
432 /* Check if an ip address has already been allocated for a given mapping    */
433 /* that is not doing port based translation.  If is not yet allocated, then */
434 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
435 /* ------------------------------------------------------------------------ */
436 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
437 ipnat_t *np;
438 struct in_addr src;
439 struct in_addr dst;
440 struct in_addr map;
441 u_32_t port;
442 ipf_stack_t *ifs;
443 {
444 	hostmap_t *hm;
445 	u_int hv;
446 
447 	hv = (src.s_addr ^ dst.s_addr);
448 	hv += src.s_addr;
449 	hv += dst.s_addr;
450 	hv %= HOSTMAP_SIZE;
451 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
452 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
453 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
454 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
455 		    ((port == 0) || (port == hm->hm_port))) {
456 			hm->hm_ref++;
457 			return hm;
458 		}
459 
460 	if (np == NULL)
461 		return NULL;
462 
463 	KMALLOC(hm, hostmap_t *);
464 	if (hm) {
465 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
466 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
467 		if (ifs->ifs_ipf_hm_maplist != NULL)
468 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
469 		ifs->ifs_ipf_hm_maplist = hm;
470 
471 		hm->hm_next = ifs->ifs_maptable[hv];
472 		hm->hm_pnext = ifs->ifs_maptable + hv;
473 		if (ifs->ifs_maptable[hv] != NULL)
474 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
475 		ifs->ifs_maptable[hv] = hm;
476 		hm->hm_ipnat = np;
477 		hm->hm_srcip = src;
478 		hm->hm_dstip = dst;
479 		hm->hm_mapip = map;
480 		hm->hm_ref = 1;
481 		hm->hm_port = port;
482 	}
483 	return hm;
484 }
485 
486 
487 /* ------------------------------------------------------------------------ */
488 /* Function:    fr_hostmapdel                                              */
489 /* Returns:     Nil                                                         */
490 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
491 /* Write Locks: ipf_nat                                                     */
492 /*                                                                          */
493 /* Decrement the references to this hostmap structure by one.  If this      */
494 /* reaches zero then remove it and free it.                                 */
495 /* ------------------------------------------------------------------------ */
496 void fr_hostmapdel(hmp)
497 struct hostmap **hmp;
498 {
499 	struct hostmap *hm;
500 
501 	hm = *hmp;
502 	*hmp = NULL;
503 
504 	hm->hm_ref--;
505 	if (hm->hm_ref == 0) {
506 		if (hm->hm_next)
507 			hm->hm_next->hm_pnext = hm->hm_pnext;
508 		*hm->hm_pnext = hm->hm_next;
509 		if (hm->hm_hnext)
510 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
511 		*hm->hm_phnext = hm->hm_hnext;
512 		KFREE(hm);
513 	}
514 }
515 
516 
517 /* ------------------------------------------------------------------------ */
518 /* Function:    fix_outcksum                                                */
519 /* Returns:     Nil                                                         */
520 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
521 /*              n((I)  - amount to adjust checksum by                       */
522 /*                                                                          */
523 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
524 /* ------------------------------------------------------------------------ */
525 void fix_outcksum(sp, n)
526 u_short *sp;
527 u_32_t n;
528 {
529 	u_short sumshort;
530 	u_32_t sum1;
531 
532 	if (n == 0)
533 		return;
534 
535 	sum1 = (~ntohs(*sp)) & 0xffff;
536 	sum1 += (n);
537 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
538 	/* Again */
539 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
540 	sumshort = ~(u_short)sum1;
541 	*(sp) = htons(sumshort);
542 }
543 
544 
545 /* ------------------------------------------------------------------------ */
546 /* Function:    fix_incksum                                                 */
547 /* Returns:     Nil                                                         */
548 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
549 /*              n((I)  - amount to adjust checksum by                       */
550 /*                                                                          */
551 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
552 /* ------------------------------------------------------------------------ */
553 void fix_incksum(sp, n)
554 u_short *sp;
555 u_32_t n;
556 {
557 	u_short sumshort;
558 	u_32_t sum1;
559 
560 	if (n == 0)
561 		return;
562 
563 	sum1 = (~ntohs(*sp)) & 0xffff;
564 	sum1 += ~(n) & 0xffff;
565 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
566 	/* Again */
567 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
568 	sumshort = ~(u_short)sum1;
569 	*(sp) = htons(sumshort);
570 }
571 
572 
573 /* ------------------------------------------------------------------------ */
574 /* Function:    fix_datacksum                                               */
575 /* Returns:     Nil                                                         */
576 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
577 /*              n((I)  - amount to adjust checksum by                       */
578 /*                                                                          */
579 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
580 /* data section of an IP packet.                                            */
581 /*                                                                          */
582 /* The only situation in which you need to do this is when NAT'ing an       */
583 /* ICMP error message. Such a message, contains in its body the IP header   */
584 /* of the original IP packet, that causes the error.                        */
585 /*                                                                          */
586 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
587 /* kernel the data section of the ICMP error is just data, and no special   */
588 /* processing like hardware cksum or ntohs processing have been done by the */
589 /* kernel on the data section.                                              */
590 /* ------------------------------------------------------------------------ */
591 void fix_datacksum(sp, n)
592 u_short *sp;
593 u_32_t n;
594 {
595 	u_short sumshort;
596 	u_32_t sum1;
597 
598 	if (n == 0)
599 		return;
600 
601 	sum1 = (~ntohs(*sp)) & 0xffff;
602 	sum1 += (n);
603 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
604 	/* Again */
605 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
606 	sumshort = ~(u_short)sum1;
607 	*(sp) = htons(sumshort);
608 }
609 
610 
611 /* ------------------------------------------------------------------------ */
612 /* Function:    fr_nat_ioctl                                                */
613 /* Returns:     int - 0 == success, != 0 == failure                         */
614 /* Parameters:  data(I) - pointer to ioctl data                             */
615 /*              cmd(I)  - ioctl command integer                             */
616 /*              mode(I) - file mode bits used with open                     */
617 /*                                                                          */
618 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
619 /* ------------------------------------------------------------------------ */
620 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
621 ioctlcmd_t cmd;
622 caddr_t data;
623 int mode, uid;
624 void *ctx;
625 ipf_stack_t *ifs;
626 {
627 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
628 	int error = 0, ret, arg, getlock;
629 	ipnat_t natd;
630 
631 #if (BSD >= 199306) && defined(_KERNEL)
632 	if ((securelevel >= 2) && (mode & FWRITE))
633 		return EPERM;
634 #endif
635 
636 #if defined(__osf__) && defined(_KERNEL)
637 	getlock = 0;
638 #else
639 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
640 #endif
641 
642 	nat = NULL;     /* XXX gcc -Wuninitialized */
643 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
644 		KMALLOC(nt, ipnat_t *);
645 	} else {
646 		nt = NULL;
647 	}
648 
649 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
650 		if (mode & NAT_SYSSPACE) {
651 			bcopy(data, (char *)&natd, sizeof(natd));
652 			error = 0;
653 		} else {
654 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
655 		}
656 
657 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
658 		BCOPYIN(data, &arg, sizeof(arg));
659 	}
660 
661 	if (error != 0)
662 		goto done;
663 
664 	/*
665 	 * For add/delete, look to see if the NAT entry is already present
666 	 */
667 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
668 		nat = &natd;
669 		if (nat->in_v == 0)	/* For backward compat. */
670 			nat->in_v = 4;
671 		nat->in_flags &= IPN_USERFLAGS;
672 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
673 			if ((nat->in_flags & IPN_SPLIT) == 0)
674 				nat->in_inip &= nat->in_inmsk;
675 			if ((nat->in_flags & IPN_IPRANGE) == 0)
676 				nat->in_outip &= nat->in_outmsk;
677 		}
678 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
679 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
680 		     np = &n->in_next)
681 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
682 			    IPN_CMPSIZ) == 0) {
683 				if (nat->in_redir == NAT_REDIRECT &&
684 				    nat->in_pnext != n->in_pnext)
685 					continue;
686 				break;
687 			}
688 	}
689 
690 	switch (cmd)
691 	{
692 	case SIOCGENITER :
693 	    {
694 		ipfgeniter_t iter;
695 		ipftoken_t *token;
696 
697 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
698 		if (error != 0)
699 			break;
700 
701 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
702 		if (token != NULL)
703 			error  = nat_iterator(token, &iter, ifs);
704 		else
705 			error = ESRCH;
706 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
707 		break;
708 	    }
709 #ifdef  IPFILTER_LOG
710 	case SIOCIPFFB :
711 	{
712 		int tmp;
713 
714 		if (!(mode & FWRITE))
715 			error = EPERM;
716 		else {
717 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
718 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
719 		}
720 		break;
721 	}
722 	case SIOCSETLG :
723 		if (!(mode & FWRITE))
724 			error = EPERM;
725 		else {
726 			BCOPYIN((char *)data,
727 				       (char *)&ifs->ifs_nat_logging,
728 				sizeof(ifs->ifs_nat_logging));
729 		}
730 		break;
731 	case SIOCGETLG :
732 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
733 			sizeof(ifs->ifs_nat_logging));
734 		break;
735 	case FIONREAD :
736 		arg = ifs->ifs_iplused[IPL_LOGNAT];
737 		BCOPYOUT(&arg, data, sizeof(arg));
738 		break;
739 #endif
740 	case SIOCADNAT :
741 		if (!(mode & FWRITE)) {
742 			error = EPERM;
743 		} else if (n != NULL) {
744 			error = EEXIST;
745 		} else if (nt == NULL) {
746 			error = ENOMEM;
747 		}
748 		if (error != 0) {
749 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
750 			break;
751 		}
752 		bcopy((char *)nat, (char *)nt, sizeof(*n));
753 		error = nat_siocaddnat(nt, np, getlock, ifs);
754 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
755 		if (error == 0)
756 			nt = NULL;
757 		break;
758 	case SIOCRMNAT :
759 		if (!(mode & FWRITE)) {
760 			error = EPERM;
761 			n = NULL;
762 		} else if (n == NULL) {
763 			error = ESRCH;
764 		}
765 
766 		if (error != 0) {
767 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
768 			break;
769 		}
770 		nat_siocdelnat(n, np, getlock, ifs);
771 
772 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
773 		n = NULL;
774 		break;
775 	case SIOCGNATS :
776 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
777 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
778 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
779 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
780 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
781 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
782 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
783 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
784 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
785 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
786 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
787 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
788 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
789 		break;
790 	case SIOCGNATL :
791 	    {
792 		natlookup_t nl;
793 
794 		if (getlock) {
795 			READ_ENTER(&ifs->ifs_ipf_nat);
796 		}
797 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
798 		if (error == 0) {
799 			if (nat_lookupredir(&nl, ifs) != NULL) {
800 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
801 			} else {
802 				error = ESRCH;
803 			}
804 		}
805 		if (getlock) {
806 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
807 		}
808 		break;
809 	    }
810 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
811 		if (!(mode & FWRITE)) {
812 			error = EPERM;
813 			break;
814 		}
815 		if (getlock) {
816 			WRITE_ENTER(&ifs->ifs_ipf_nat);
817 		}
818 		error = 0;
819 		if (arg == 0)
820 			ret = nat_flushtable(ifs);
821 		else if (arg == 1)
822 			ret = nat_clearlist(ifs);
823 		else if (arg >= 2 && arg <= 4)
824 			ret = nat_extraflush(arg - 2, ifs);
825 		else
826 			error = EINVAL;
827 		if (getlock) {
828 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
829 		}
830 		if (error == 0) {
831 			BCOPYOUT(&ret, data, sizeof(ret));
832 		}
833 		break;
834 	case SIOCPROXY :
835 		error = appr_ioctl(data, cmd, mode, ifs);
836 		break;
837 	case SIOCSTLCK :
838 		if (!(mode & FWRITE)) {
839 			error = EPERM;
840 		} else {
841 			fr_lock(data, &ifs->ifs_fr_nat_lock);
842 		}
843 		break;
844 	case SIOCSTPUT :
845 		if ((mode & FWRITE) != 0) {
846 			error = fr_natputent(data, getlock, ifs);
847 		} else {
848 			error = EACCES;
849 		}
850 		break;
851 	case SIOCSTGSZ :
852 		if (ifs->ifs_fr_nat_lock) {
853 			if (getlock) {
854 				READ_ENTER(&ifs->ifs_ipf_nat);
855 			}
856 			error = fr_natgetsz(data, ifs);
857 			if (getlock) {
858 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
859 			}
860 		} else
861 			error = EACCES;
862 		break;
863 	case SIOCSTGET :
864 		if (ifs->ifs_fr_nat_lock) {
865 			if (getlock) {
866 				READ_ENTER(&ifs->ifs_ipf_nat);
867 			}
868 			error = fr_natgetent(data, ifs);
869 			if (getlock) {
870 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
871 			}
872 		} else
873 			error = EACCES;
874 		break;
875 	case SIOCIPFDELTOK :
876 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
877 		error = ipf_deltoken(arg, uid, ctx, ifs);
878 		break;
879 	default :
880 		error = EINVAL;
881 		break;
882 	}
883 done:
884 	if (nt)
885 		KFREE(nt);
886 	return error;
887 }
888 
889 
890 /* ------------------------------------------------------------------------ */
891 /* Function:    nat_siocaddnat                                              */
892 /* Returns:     int - 0 == success, != 0 == failure                         */
893 /* Parameters:  n(I)       - pointer to new NAT rule                        */
894 /*              np(I)      - pointer to where to insert new NAT rule        */
895 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
896 /* Mutex Locks: ipf_natio                                                   */
897 /*                                                                          */
898 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
899 /* from information passed to the kernel, then add it  to the appropriate   */
900 /* NAT rule table(s).                                                       */
901 /* ------------------------------------------------------------------------ */
902 static int nat_siocaddnat(n, np, getlock, ifs)
903 ipnat_t *n, **np;
904 int getlock;
905 ipf_stack_t *ifs;
906 {
907 	int error = 0, i, j;
908 
909 	if (nat_resolverule(n, ifs) != 0)
910 		return ENOENT;
911 
912 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
913 		return EINVAL;
914 
915 	n->in_use = 0;
916 	if (n->in_redir & NAT_MAPBLK)
917 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
918 	else if (n->in_flags & IPN_AUTOPORTMAP)
919 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
920 	else if (n->in_flags & IPN_IPRANGE)
921 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
922 	else if (n->in_flags & IPN_SPLIT)
923 		n->in_space = 2;
924 	else if (n->in_outmsk != 0)
925 		n->in_space = ~ntohl(n->in_outmsk);
926 	else
927 		n->in_space = 1;
928 
929 	/*
930 	 * Calculate the number of valid IP addresses in the output
931 	 * mapping range.  In all cases, the range is inclusive of
932 	 * the start and ending IP addresses.
933 	 * If to a CIDR address, lose 2: broadcast + network address
934 	 *                               (so subtract 1)
935 	 * If to a range, add one.
936 	 * If to a single IP address, set to 1.
937 	 */
938 	if (n->in_space) {
939 		if ((n->in_flags & IPN_IPRANGE) != 0)
940 			n->in_space += 1;
941 		else
942 			n->in_space -= 1;
943 	} else
944 		n->in_space = 1;
945 
946 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
947 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
948 		n->in_nip = ntohl(n->in_outip) + 1;
949 	else if ((n->in_flags & IPN_SPLIT) &&
950 		 (n->in_redir & NAT_REDIRECT))
951 		n->in_nip = ntohl(n->in_inip);
952 	else
953 		n->in_nip = ntohl(n->in_outip);
954 	if (n->in_redir & NAT_MAP) {
955 		n->in_pnext = ntohs(n->in_pmin);
956 		/*
957 		 * Multiply by the number of ports made available.
958 		 */
959 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
960 			n->in_space *= (ntohs(n->in_pmax) -
961 					ntohs(n->in_pmin) + 1);
962 			/*
963 			 * Because two different sources can map to
964 			 * different destinations but use the same
965 			 * local IP#/port #.
966 			 * If the result is smaller than in_space, then
967 			 * we may have wrapped around 32bits.
968 			 */
969 			i = n->in_inmsk;
970 			if ((i != 0) && (i != 0xffffffff)) {
971 				j = n->in_space * (~ntohl(i) + 1);
972 				if (j >= n->in_space)
973 					n->in_space = j;
974 				else
975 					n->in_space = 0xffffffff;
976 			}
977 		}
978 		/*
979 		 * If no protocol is specified, multiple by 256 to allow for
980 		 * at least one IP:IP mapping per protocol.
981 		 */
982 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
983 				j = n->in_space * 256;
984 				if (j >= n->in_space)
985 					n->in_space = j;
986 				else
987 					n->in_space = 0xffffffff;
988 		}
989 	}
990 
991 	/* Otherwise, these fields are preset */
992 
993 	if (getlock) {
994 		WRITE_ENTER(&ifs->ifs_ipf_nat);
995 	}
996 	n->in_next = NULL;
997 	*np = n;
998 
999 	if (n->in_age[0] != 0)
1000 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1001 						  n->in_age[0], ifs);
1002 
1003 	if (n->in_age[1] != 0)
1004 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1005 						  n->in_age[1], ifs);
1006 
1007 	if (n->in_redir & NAT_REDIRECT) {
1008 		n->in_flags &= ~IPN_NOTDST;
1009 		nat_addrdr(n, ifs);
1010 	}
1011 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1012 		n->in_flags &= ~IPN_NOTSRC;
1013 		nat_addnat(n, ifs);
1014 	}
1015 	n = NULL;
1016 	ifs->ifs_nat_stats.ns_rules++;
1017 	if (getlock) {
1018 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1019 	}
1020 
1021 	return error;
1022 }
1023 
1024 
1025 /* ------------------------------------------------------------------------ */
1026 /* Function:    nat_resolvrule                                              */
1027 /* Returns:     int - 0 == success, -1 == failure                           */
1028 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1029 /*                                                                          */
1030 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1031 /* any specified interfaces and proxy labels, and determines whether or not */
1032 /* all proxy labels are correctly specified.				    */
1033 /*									    */
1034 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1035 /* ------------------------------------------------------------------------ */
1036 static int nat_resolverule(n, ifs)
1037 ipnat_t *n;
1038 ipf_stack_t *ifs;
1039 {
1040 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1041 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs);
1042 
1043 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1044 	if (n->in_ifnames[1][0] == '\0') {
1045 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1046 		n->in_ifps[1] = n->in_ifps[0];
1047 	} else {
1048 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs);
1049 	}
1050 
1051 	if (n->in_plabel[0] != '\0') {
1052 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1053 		if (n->in_apr == NULL)
1054 			return -1;
1055 	}
1056 	return 0;
1057 }
1058 
1059 
1060 /* ------------------------------------------------------------------------ */
1061 /* Function:    nat_siocdelnat                                              */
1062 /* Returns:     int - 0 == success, != 0 == failure                         */
1063 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1064 /*              np(I)      - pointer to where to insert new NAT rule        */
1065 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1066 /* Mutex Locks: ipf_natio                                                   */
1067 /*                                                                          */
1068 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1069 /* from information passed to the kernel, then add it  to the appropriate   */
1070 /* NAT rule table(s).                                                       */
1071 /* ------------------------------------------------------------------------ */
1072 static void nat_siocdelnat(n, np, getlock, ifs)
1073 ipnat_t *n, **np;
1074 int getlock;
1075 ipf_stack_t *ifs;
1076 {
1077 	if (getlock) {
1078 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1079 	}
1080 	if (n->in_redir & NAT_REDIRECT)
1081 		nat_delrdr(n);
1082 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1083 		nat_delnat(n);
1084 	if (ifs->ifs_nat_list == NULL) {
1085 		ifs->ifs_nat_masks = 0;
1086 		ifs->ifs_rdr_masks = 0;
1087 	}
1088 
1089 	if (n->in_tqehead[0] != NULL) {
1090 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1091 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1092 		}
1093 	}
1094 
1095 	if (n->in_tqehead[1] != NULL) {
1096 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1097 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1098 		}
1099 	}
1100 
1101 	*np = n->in_next;
1102 
1103 	if (n->in_use == 0) {
1104 		if (n->in_apr)
1105 			appr_free(n->in_apr);
1106 		KFREE(n);
1107 		ifs->ifs_nat_stats.ns_rules--;
1108 	} else {
1109 		n->in_flags |= IPN_DELETE;
1110 		n->in_next = NULL;
1111 	}
1112 	if (getlock) {
1113 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1114 	}
1115 }
1116 
1117 
1118 /* ------------------------------------------------------------------------ */
1119 /* Function:    fr_natgetsz                                                 */
1120 /* Returns:     int - 0 == success, != 0 is the error value.                */
1121 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1122 /*                        get the size of.                                  */
1123 /*                                                                          */
1124 /* Handle SIOCSTGSZ.                                                        */
1125 /* Return the size of the nat list entry to be copied back to user space.   */
1126 /* The size of the entry is stored in the ng_sz field and the enture natget */
1127 /* structure is copied back to the user.                                    */
1128 /* ------------------------------------------------------------------------ */
1129 static int fr_natgetsz(data, ifs)
1130 caddr_t data;
1131 ipf_stack_t *ifs;
1132 {
1133 	ap_session_t *aps;
1134 	nat_t *nat, *n;
1135 	natget_t ng;
1136 
1137 	BCOPYIN(data, &ng, sizeof(ng));
1138 
1139 	nat = ng.ng_ptr;
1140 	if (!nat) {
1141 		nat = ifs->ifs_nat_instances;
1142 		ng.ng_sz = 0;
1143 		/*
1144 		 * Empty list so the size returned is 0.  Simple.
1145 		 */
1146 		if (nat == NULL) {
1147 			BCOPYOUT(&ng, data, sizeof(ng));
1148 			return 0;
1149 		}
1150 	} else {
1151 		/*
1152 		 * Make sure the pointer we're copying from exists in the
1153 		 * current list of entries.  Security precaution to prevent
1154 		 * copying of random kernel data.
1155 		 */
1156 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1157 			if (n == nat)
1158 				break;
1159 		if (!n)
1160 			return ESRCH;
1161 	}
1162 
1163 	/*
1164 	 * Incluse any space required for proxy data structures.
1165 	 */
1166 	ng.ng_sz = sizeof(nat_save_t);
1167 	aps = nat->nat_aps;
1168 	if (aps != NULL) {
1169 		ng.ng_sz += sizeof(ap_session_t) - 4;
1170 		if (aps->aps_data != 0)
1171 			ng.ng_sz += aps->aps_psiz;
1172 	}
1173 
1174 	BCOPYOUT(&ng, data, sizeof(ng));
1175 	return 0;
1176 }
1177 
1178 
1179 /* ------------------------------------------------------------------------ */
1180 /* Function:    fr_natgetent                                                */
1181 /* Returns:     int - 0 == success, != 0 is the error value.                */
1182 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1183 /*                        to NAT structure to copy out.                     */
1184 /*                                                                          */
1185 /* Handle SIOCSTGET.                                                        */
1186 /* Copies out NAT entry to user space.  Any additional data held for a      */
1187 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1188 /* ------------------------------------------------------------------------ */
1189 static int fr_natgetent(data, ifs)
1190 caddr_t data;
1191 ipf_stack_t *ifs;
1192 {
1193 	int error, outsize;
1194 	ap_session_t *aps;
1195 	nat_save_t *ipn, ipns;
1196 	nat_t *n, *nat;
1197 
1198 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1199 	if (error != 0)
1200 		return error;
1201 
1202 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1203 		return EINVAL;
1204 
1205 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1206 	if (ipn == NULL)
1207 		return ENOMEM;
1208 
1209 	ipn->ipn_dsize = ipns.ipn_dsize;
1210 	nat = ipns.ipn_next;
1211 	if (nat == NULL) {
1212 		nat = ifs->ifs_nat_instances;
1213 		if (nat == NULL) {
1214 			if (ifs->ifs_nat_instances == NULL)
1215 				error = ENOENT;
1216 			goto finished;
1217 		}
1218 	} else {
1219 		/*
1220 		 * Make sure the pointer we're copying from exists in the
1221 		 * current list of entries.  Security precaution to prevent
1222 		 * copying of random kernel data.
1223 		 */
1224 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1225 			if (n == nat)
1226 				break;
1227 		if (n == NULL) {
1228 			error = ESRCH;
1229 			goto finished;
1230 		}
1231 	}
1232 	ipn->ipn_next = nat->nat_next;
1233 
1234 	/*
1235 	 * Copy the NAT structure.
1236 	 */
1237 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1238 
1239 	/*
1240 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1241 	 */
1242 	if (nat->nat_ptr != NULL)
1243 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1244 		      sizeof(ipn->ipn_ipnat));
1245 
1246 	/*
1247 	 * If we also know the NAT entry has an associated filter rule,
1248 	 * save that too.
1249 	 */
1250 	if (nat->nat_fr != NULL)
1251 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1252 		      sizeof(ipn->ipn_fr));
1253 
1254 	/*
1255 	 * Last but not least, if there is an application proxy session set
1256 	 * up for this NAT entry, then copy that out too, including any
1257 	 * private data saved along side it by the proxy.
1258 	 */
1259 	aps = nat->nat_aps;
1260 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1261 	if (aps != NULL) {
1262 		char *s;
1263 
1264 		if (outsize < sizeof(*aps)) {
1265 			error = ENOBUFS;
1266 			goto finished;
1267 		}
1268 
1269 		s = ipn->ipn_data;
1270 		bcopy((char *)aps, s, sizeof(*aps));
1271 		s += sizeof(*aps);
1272 		outsize -= sizeof(*aps);
1273 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1274 			bcopy(aps->aps_data, s, aps->aps_psiz);
1275 		else
1276 			error = ENOBUFS;
1277 	}
1278 	if (error == 0) {
1279 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1280 	}
1281 
1282 finished:
1283 	if (ipn != NULL) {
1284 		KFREES(ipn, ipns.ipn_dsize);
1285 	}
1286 	return error;
1287 }
1288 
1289 /* ------------------------------------------------------------------------ */
1290 /* Function:    nat_calc_chksum_diffs					    */
1291 /* Returns:     void							    */
1292 /* Parameters:  nat	-	pointer to NAT table entry		    */
1293 /*                                                                          */
1294 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1295 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1296 /* we are dealing with partial chksum offload. For these cases we need to   */
1297 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1298 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1299 /* nat_sumd[0]. 							    */
1300 /*									    */
1301 /* The function accepts initialized NAT table entry and computes the deltas */
1302 /* from nat_inip/nat_outip members. The function is called right before	    */
1303 /* the new entry is inserted into the table.				    */
1304 /*									    */
1305 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1306 /* of delta between original and new IP addresses.			    */
1307 /*									    */
1308 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1309 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1310 /*									    */
1311 /* Some facts about chksum, we should remember:				    */
1312 /*	IP header chksum covers IP header only				    */
1313 /*									    */
1314 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1315 /*		SRC, DST IP address					    */
1316 /*		SRC, DST Port						    */
1317 /*		length of payload					    */
1318 /*									    */
1319 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1320 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1321 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1322 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1323 /* stored along with other IP packet data in dblk_t structure and used in   */
1324 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1325 /*									    */
1326 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1327 /* of delta between new and orig address. NOTE: the order of operands for   */
1328 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1329 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1330 /*									    */
1331 /* ------------------------------------------------------------------------ */
1332 static void nat_calc_chksum_diffs(nat)
1333 nat_t *nat;
1334 {
1335 	u_32_t	sum_orig = 0;
1336 	u_32_t	sum_changed = 0;
1337 	u_32_t	sumd;
1338 	u_32_t	ipsum_orig = 0;
1339 	u_32_t	ipsum_changed = 0;
1340 
1341 	/*
1342 	 * the switch calculates operands for CALC_SUMD(),
1343 	 * which will compute the partial chksum delta.
1344 	 */
1345 	switch (nat->nat_dir)
1346 	{
1347 	case NAT_INBOUND:
1348 		/*
1349 		 * we are dealing with RDR rule (DST address gets
1350 		 * modified on packet from client)
1351 		 */
1352 		sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1353 		sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1354 		break;
1355 	case NAT_OUTBOUND:
1356 		/*
1357 		 * we are dealing with MAP rule (SRC address gets
1358 		 * modified on packet from client)
1359 		 */
1360 		sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1361 		sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1362 		break;
1363 	default: ;
1364 		break;
1365 	}
1366 
1367 	/*
1368 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1369 	 * calculation, which happens at the end of function.
1370 	 */
1371 	ipsum_changed = sum_changed;
1372 	ipsum_orig = sum_orig;
1373 	/*
1374 	 * NOTE: the order of operands for partial chksum adjustment
1375 	 * computation has to be swapped!
1376 	 */
1377 	CALC_SUMD(sum_changed, sum_orig, sumd);
1378 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1379 
1380 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1381 
1382 		/*
1383 		 * switch calculates operands for CALC_SUMD(), which will
1384 		 * compute the full chksum delta.
1385 		 */
1386 		switch (nat->nat_dir)
1387 		{
1388 		case NAT_INBOUND:
1389 			sum_changed = LONG_SUM(
1390 					ntohl(nat->nat_inip.s_addr) +
1391 					ntohs(nat->nat_inport)
1392 				    );
1393 			sum_orig = LONG_SUM(
1394 					ntohl(nat->nat_outip.s_addr) +
1395 					ntohs(nat->nat_outport)
1396 				    );
1397 			break;
1398 		case NAT_OUTBOUND:
1399 			sum_changed = LONG_SUM(
1400 					ntohl(nat->nat_outip.s_addr) +
1401 					ntohs(nat->nat_outport)
1402 				);
1403 			sum_orig = LONG_SUM(
1404 					ntohl(nat->nat_inip.s_addr) +
1405 					ntohs(nat->nat_inport)
1406 				);
1407 			break;
1408 		default: ;
1409 			break;
1410 		}
1411 
1412 		CALC_SUMD(sum_orig, sum_changed, sumd);
1413 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1414 
1415 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1416 			/*
1417 			 * partial HW chksum offload works for TCP/UDP headers only,
1418 			 * so we need to enforce full chksum adjustment for ICMP
1419 			 */
1420 			nat->nat_sumd[1] = nat->nat_sumd[0];
1421 		}
1422 	}
1423 	else
1424 		nat->nat_sumd[0] = nat->nat_sumd[1];
1425 
1426 	/*
1427 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1428 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1429 	 */
1430 	if (NAT_HAS_L4_CHANGED(nat)) {
1431 		/*
1432 		 * bad luck, NAT changes also the L4 header, use IP addresses
1433 		 * to compute chksum adjustment for IP header.
1434 		 */
1435 		CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1436 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1437 	}
1438 	else {
1439 		/*
1440 		 * the NAT does not change L4 hdr -> reuse chksum adjustment
1441 		 * for IP hdr.
1442 		 */
1443 		nat->nat_ipsumd = nat->nat_sumd[0];
1444 
1445 		/*
1446 		 * if L4 header does not use chkusm - zero out deltas
1447 		 */
1448 		if (!(nat->nat_flags & IPN_TCPUDPICMP)) {
1449 			nat->nat_sumd[0] = 0;
1450 			nat->nat_sumd[1] = 0;
1451 		}
1452 	}
1453 
1454 	return;
1455 }
1456 
1457 /* ------------------------------------------------------------------------ */
1458 /* Function:    fr_natputent                                                */
1459 /* Returns:     int - 0 == success, != 0 is the error value.                */
1460 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1461 /*                            structure information to load into the kernel */
1462 /*              getlock(I) - flag indicating whether or not a write lock    */
1463 /*                           on ipf_nat is already held.                    */
1464 /*                                                                          */
1465 /* Handle SIOCSTPUT.                                                        */
1466 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1467 /* firewall rule data structures, if pointers to them indicate so.          */
1468 /* ------------------------------------------------------------------------ */
1469 static int fr_natputent(data, getlock, ifs)
1470 caddr_t data;
1471 int getlock;
1472 ipf_stack_t *ifs;
1473 {
1474 	nat_save_t ipn, *ipnn;
1475 	ap_session_t *aps;
1476 	nat_t *n, *nat;
1477 	frentry_t *fr;
1478 	fr_info_t fin;
1479 	ipnat_t *in;
1480 	int error;
1481 
1482 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1483 	if (error != 0)
1484 		return error;
1485 
1486 	/*
1487 	 * Trigger automatic call to nat_extraflush() if the
1488 	 * table has reached capcity specified by hi watermark.
1489 	 */
1490 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1491 		ifs->ifs_nat_doflush = 1;
1492 
1493 	/*
1494 	 * Initialise early because of code at junkput label.
1495 	 */
1496 	in = NULL;
1497 	aps = NULL;
1498 	nat = NULL;
1499 	ipnn = NULL;
1500 
1501 	/*
1502 	 * New entry, copy in the rest of the NAT entry if it's size is more
1503 	 * than just the nat_t structure.
1504 	 */
1505 	fr = NULL;
1506 	if (ipn.ipn_dsize > sizeof(ipn)) {
1507 		if (ipn.ipn_dsize > 81920) {
1508 			error = ENOMEM;
1509 			goto junkput;
1510 		}
1511 
1512 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1513 		if (ipnn == NULL)
1514 			return ENOMEM;
1515 
1516 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1517 		if (error != 0) {
1518 			error = EFAULT;
1519 			goto junkput;
1520 		}
1521 	} else
1522 		ipnn = &ipn;
1523 
1524 	KMALLOC(nat, nat_t *);
1525 	if (nat == NULL) {
1526 		error = ENOMEM;
1527 		goto junkput;
1528 	}
1529 
1530 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1531 	/*
1532 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1533 	 */
1534 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1535 	nat->nat_tqe.tqe_pnext = NULL;
1536 	nat->nat_tqe.tqe_next = NULL;
1537 	nat->nat_tqe.tqe_ifq = NULL;
1538 	nat->nat_tqe.tqe_parent = nat;
1539 
1540 	/*
1541 	 * Restore the rule associated with this nat session
1542 	 */
1543 	in = ipnn->ipn_nat.nat_ptr;
1544 	if (in != NULL) {
1545 		KMALLOC(in, ipnat_t *);
1546 		nat->nat_ptr = in;
1547 		if (in == NULL) {
1548 			error = ENOMEM;
1549 			goto junkput;
1550 		}
1551 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1552 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1553 		in->in_use = 1;
1554 		in->in_flags |= IPN_DELETE;
1555 
1556 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1557 
1558 		if (nat_resolverule(in, ifs) != 0) {
1559 			error = ESRCH;
1560 			goto junkput;
1561 		}
1562 	}
1563 
1564 	/*
1565 	 * Check that the NAT entry doesn't already exist in the kernel.
1566 	 */
1567 	bzero((char *)&fin, sizeof(fin));
1568 	fin.fin_p = nat->nat_p;
1569 	fin.fin_ifs = ifs;
1570 	if (nat->nat_dir == NAT_OUTBOUND) {
1571 		fin.fin_data[0] = ntohs(nat->nat_oport);
1572 		fin.fin_data[1] = ntohs(nat->nat_outport);
1573 		fin.fin_ifp = nat->nat_ifps[0];
1574 		if (getlock) {
1575 			READ_ENTER(&ifs->ifs_ipf_nat);
1576 		}
1577 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1578 			nat->nat_oip, nat->nat_outip);
1579 		if (getlock) {
1580 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1581 		}
1582 		if (n != NULL) {
1583 			error = EEXIST;
1584 			goto junkput;
1585 		}
1586 	} else if (nat->nat_dir == NAT_INBOUND) {
1587 		fin.fin_data[0] = ntohs(nat->nat_inport);
1588 		fin.fin_data[1] = ntohs(nat->nat_oport);
1589 		fin.fin_ifp = nat->nat_ifps[1];
1590 		if (getlock) {
1591 			READ_ENTER(&ifs->ifs_ipf_nat);
1592 		}
1593 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1594 			nat->nat_inip, nat->nat_oip);
1595 		if (getlock) {
1596 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1597 		}
1598 		if (n != NULL) {
1599 			error = EEXIST;
1600 			goto junkput;
1601 		}
1602 	} else {
1603 		error = EINVAL;
1604 		goto junkput;
1605 	}
1606 
1607 	/*
1608 	 * Restore ap_session_t structure.  Include the private data allocated
1609 	 * if it was there.
1610 	 */
1611 	aps = nat->nat_aps;
1612 	if (aps != NULL) {
1613 		KMALLOC(aps, ap_session_t *);
1614 		nat->nat_aps = aps;
1615 		if (aps == NULL) {
1616 			error = ENOMEM;
1617 			goto junkput;
1618 		}
1619 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1620 		if (in != NULL)
1621 			aps->aps_apr = in->in_apr;
1622 		else
1623 			aps->aps_apr = NULL;
1624 		if (aps->aps_psiz != 0) {
1625 			if (aps->aps_psiz > 81920) {
1626 				error = ENOMEM;
1627 				goto junkput;
1628 			}
1629 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1630 			if (aps->aps_data == NULL) {
1631 				error = ENOMEM;
1632 				goto junkput;
1633 			}
1634 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1635 			      aps->aps_psiz);
1636 		} else {
1637 			aps->aps_psiz = 0;
1638 			aps->aps_data = NULL;
1639 		}
1640 	}
1641 
1642 	/*
1643 	 * If there was a filtering rule associated with this entry then
1644 	 * build up a new one.
1645 	 */
1646 	fr = nat->nat_fr;
1647 	if (fr != NULL) {
1648 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1649 			KMALLOC(fr, frentry_t *);
1650 			nat->nat_fr = fr;
1651 			if (fr == NULL) {
1652 				error = ENOMEM;
1653 				goto junkput;
1654 			}
1655 			ipnn->ipn_nat.nat_fr = fr;
1656 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1657 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1658 
1659 			fr->fr_ref = 1;
1660 			fr->fr_dsize = 0;
1661 			fr->fr_data = NULL;
1662 			fr->fr_type = FR_T_NONE;
1663 
1664 			MUTEX_NUKE(&fr->fr_lock);
1665 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1666 		} else {
1667 			if (getlock) {
1668 				READ_ENTER(&ifs->ifs_ipf_nat);
1669 			}
1670 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1671 				if (n->nat_fr == fr)
1672 					break;
1673 
1674 			if (n != NULL) {
1675 				MUTEX_ENTER(&fr->fr_lock);
1676 				fr->fr_ref++;
1677 				MUTEX_EXIT(&fr->fr_lock);
1678 			}
1679 			if (getlock) {
1680 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1681 			}
1682 			if (!n) {
1683 				error = ESRCH;
1684 				goto junkput;
1685 			}
1686 		}
1687 	}
1688 
1689 	if (ipnn != &ipn) {
1690 		KFREES(ipnn, ipn.ipn_dsize);
1691 		ipnn = NULL;
1692 	}
1693 
1694 	nat_calc_chksum_diffs(nat);
1695 
1696 	if (getlock) {
1697 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1698 	}
1699 	error = nat_insert(nat, nat->nat_rev, ifs);
1700 	if ((error == 0) && (aps != NULL)) {
1701 		aps->aps_next = ifs->ifs_ap_sess_list;
1702 		ifs->ifs_ap_sess_list = aps;
1703 	}
1704 	if (getlock) {
1705 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1706 	}
1707 
1708 	if (error == 0)
1709 		return 0;
1710 
1711 	error = ENOMEM;
1712 
1713 junkput:
1714 	if (fr != NULL)
1715 		(void) fr_derefrule(&fr, ifs);
1716 
1717 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1718 		KFREES(ipnn, ipn.ipn_dsize);
1719 	}
1720 	if (nat != NULL) {
1721 		if (aps != NULL) {
1722 			if (aps->aps_data != NULL) {
1723 				KFREES(aps->aps_data, aps->aps_psiz);
1724 			}
1725 			KFREE(aps);
1726 		}
1727 		if (in != NULL) {
1728 			if (in->in_apr)
1729 				appr_free(in->in_apr);
1730 			KFREE(in);
1731 		}
1732 		KFREE(nat);
1733 	}
1734 	return error;
1735 }
1736 
1737 
1738 /* ------------------------------------------------------------------------ */
1739 /* Function:    nat_delete                                                  */
1740 /* Returns:     Nil                                                         */
1741 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1742 /*              logtype(I) - type of LOG record to create before deleting   */
1743 /* Write Lock:  ipf_nat                                                     */
1744 /*                                                                          */
1745 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1746 /* enabled then generate a NAT log record for this event.                   */
1747 /* ------------------------------------------------------------------------ */
1748 static void nat_delete(nat, logtype, ifs)
1749 struct nat *nat;
1750 int logtype;
1751 ipf_stack_t *ifs;
1752 {
1753 	struct ipnat *ipn;
1754 
1755 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1756 		nat_log(nat, logtype, ifs);
1757 
1758 	/*
1759 	 * Take it as a general indication that all the pointers are set if
1760 	 * nat_pnext is set.
1761 	 */
1762 	if (nat->nat_pnext != NULL) {
1763 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1764 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1765 
1766 		*nat->nat_pnext = nat->nat_next;
1767 		if (nat->nat_next != NULL) {
1768 			nat->nat_next->nat_pnext = nat->nat_pnext;
1769 			nat->nat_next = NULL;
1770 		}
1771 		nat->nat_pnext = NULL;
1772 
1773 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1774 		if (nat->nat_hnext[0] != NULL) {
1775 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1776 			nat->nat_hnext[0] = NULL;
1777 		}
1778 		nat->nat_phnext[0] = NULL;
1779 
1780 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1781 		if (nat->nat_hnext[1] != NULL) {
1782 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1783 			nat->nat_hnext[1] = NULL;
1784 		}
1785 		nat->nat_phnext[1] = NULL;
1786 
1787 		if ((nat->nat_flags & SI_WILDP) != 0)
1788 			ifs->ifs_nat_stats.ns_wilds--;
1789 	}
1790 
1791 	if (nat->nat_me != NULL) {
1792 		*nat->nat_me = NULL;
1793 		nat->nat_me = NULL;
1794 	}
1795 
1796 	fr_deletequeueentry(&nat->nat_tqe);
1797 
1798 	MUTEX_ENTER(&nat->nat_lock);
1799 	if (nat->nat_ref > 1) {
1800 		nat->nat_ref--;
1801 		MUTEX_EXIT(&nat->nat_lock);
1802 		return;
1803 	}
1804 	MUTEX_EXIT(&nat->nat_lock);
1805 
1806 	/*
1807 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1808 	 */
1809 	nat->nat_ref = 0;
1810 
1811 #ifdef	IPFILTER_SYNC
1812 	if (nat->nat_sync)
1813 		ipfsync_del(nat->nat_sync);
1814 #endif
1815 
1816 	if (nat->nat_fr != NULL)
1817 		(void)fr_derefrule(&nat->nat_fr, ifs);
1818 
1819 	if (nat->nat_hm != NULL)
1820 		fr_hostmapdel(&nat->nat_hm);
1821 
1822 	/*
1823 	 * If there is an active reference from the nat entry to its parent
1824 	 * rule, decrement the rule's reference count and free it too if no
1825 	 * longer being used.
1826 	 */
1827 	ipn = nat->nat_ptr;
1828 	if (ipn != NULL) {
1829 		ipn->in_space++;
1830 		ipn->in_use--;
1831 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1832 			if (ipn->in_apr)
1833 				appr_free(ipn->in_apr);
1834 			KFREE(ipn);
1835 			ifs->ifs_nat_stats.ns_rules--;
1836 		}
1837 	}
1838 
1839 	MUTEX_DESTROY(&nat->nat_lock);
1840 
1841 	aps_free(nat->nat_aps, ifs);
1842 	ifs->ifs_nat_stats.ns_inuse--;
1843 
1844 	/*
1845 	 * If there's a fragment table entry too for this nat entry, then
1846 	 * dereference that as well.  This is after nat_lock is released
1847 	 * because of Tru64.
1848 	 */
1849 	fr_forgetnat((void *)nat, ifs);
1850 
1851 	KFREE(nat);
1852 }
1853 
1854 
1855 /* ------------------------------------------------------------------------ */
1856 /* Function:    nat_flushtable                                              */
1857 /* Returns:     int - number of NAT rules deleted                           */
1858 /* Parameters:  Nil                                                         */
1859 /*                                                                          */
1860 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1861 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1862 /* ------------------------------------------------------------------------ */
1863 /*
1864  * nat_flushtable - clear the NAT table of all mapping entries.
1865  */
1866 static int nat_flushtable(ifs)
1867 ipf_stack_t *ifs;
1868 {
1869 	nat_t *nat;
1870 	int j = 0;
1871 
1872 	/*
1873 	 * ALL NAT mappings deleted, so lets just make the deletions
1874 	 * quicker.
1875 	 */
1876 	if (ifs->ifs_nat_table[0] != NULL)
1877 		bzero((char *)ifs->ifs_nat_table[0],
1878 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
1879 	if (ifs->ifs_nat_table[1] != NULL)
1880 		bzero((char *)ifs->ifs_nat_table[1],
1881 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
1882 
1883 	while ((nat = ifs->ifs_nat_instances) != NULL) {
1884 		nat_delete(nat, NL_FLUSH, ifs);
1885 		j++;
1886 	}
1887 
1888 	return j;
1889 }
1890 
1891 
1892 /* ------------------------------------------------------------------------ */
1893 /* Function:    nat_clearlist                                               */
1894 /* Returns:     int - number of NAT/RDR rules deleted                       */
1895 /* Parameters:  Nil                                                         */
1896 /*                                                                          */
1897 /* Delete all rules in the current list of rules.  There is nothing elegant */
1898 /* about this cleanup: simply free all entries on the list of rules and     */
1899 /* clear out the tables used for hashed NAT rule lookups.                   */
1900 /* ------------------------------------------------------------------------ */
1901 static int nat_clearlist(ifs)
1902 ipf_stack_t *ifs;
1903 {
1904 	ipnat_t *n, **np = &ifs->ifs_nat_list;
1905 	int i = 0;
1906 
1907 	if (ifs->ifs_nat_rules != NULL)
1908 		bzero((char *)ifs->ifs_nat_rules,
1909 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
1910 	if (ifs->ifs_rdr_rules != NULL)
1911 		bzero((char *)ifs->ifs_rdr_rules,
1912 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
1913 
1914 	while ((n = *np) != NULL) {
1915 		*np = n->in_next;
1916 		if (n->in_use == 0) {
1917 			if (n->in_apr != NULL)
1918 				appr_free(n->in_apr);
1919 			KFREE(n);
1920 			ifs->ifs_nat_stats.ns_rules--;
1921 		} else {
1922 			n->in_flags |= IPN_DELETE;
1923 			n->in_next = NULL;
1924 		}
1925 		i++;
1926 	}
1927 	ifs->ifs_nat_masks = 0;
1928 	ifs->ifs_rdr_masks = 0;
1929 	return i;
1930 }
1931 
1932 
1933 /* ------------------------------------------------------------------------ */
1934 /* Function:    nat_newmap                                                  */
1935 /* Returns:     int - -1 == error, 0 == success                             */
1936 /* Parameters:  fin(I) - pointer to packet information                      */
1937 /*              nat(I) - pointer to NAT entry                               */
1938 /*              ni(I)  - pointer to structure with misc. information needed */
1939 /*                       to create new NAT entry.                           */
1940 /*                                                                          */
1941 /* Given an empty NAT structure, populate it with new information about a   */
1942 /* new NAT session, as defined by the matching NAT rule.                    */
1943 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1944 /* to the new IP address for the translation.                               */
1945 /* ------------------------------------------------------------------------ */
1946 static INLINE int nat_newmap(fin, nat, ni)
1947 fr_info_t *fin;
1948 nat_t *nat;
1949 natinfo_t *ni;
1950 {
1951 	u_short st_port, dport, sport, port, sp, dp;
1952 	struct in_addr in, inb;
1953 	hostmap_t *hm;
1954 	u_32_t flags;
1955 	u_32_t st_ip;
1956 	ipnat_t *np;
1957 	nat_t *natl;
1958 	int l;
1959 	ipf_stack_t *ifs = fin->fin_ifs;
1960 
1961 	/*
1962 	 * If it's an outbound packet which doesn't match any existing
1963 	 * record, then create a new port
1964 	 */
1965 	l = 0;
1966 	hm = NULL;
1967 	np = ni->nai_np;
1968 	st_ip = np->in_nip;
1969 	st_port = np->in_pnext;
1970 	flags = ni->nai_flags;
1971 	sport = ni->nai_sport;
1972 	dport = ni->nai_dport;
1973 
1974 	/*
1975 	 * Do a loop until we either run out of entries to try or we find
1976 	 * a NAT mapping that isn't currently being used.  This is done
1977 	 * because the change to the source is not (usually) being fixed.
1978 	 */
1979 	do {
1980 		port = 0;
1981 		in.s_addr = htonl(np->in_nip);
1982 		if (l == 0) {
1983 			/*
1984 			 * Check to see if there is an existing NAT
1985 			 * setup for this IP address pair.
1986 			 */
1987 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1988 					 in, 0, ifs);
1989 			if (hm != NULL)
1990 				in.s_addr = hm->hm_mapip.s_addr;
1991 		} else if ((l == 1) && (hm != NULL)) {
1992 			fr_hostmapdel(&hm);
1993 		}
1994 		in.s_addr = ntohl(in.s_addr);
1995 
1996 		nat->nat_hm = hm;
1997 
1998 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1999 			if (l > 0)
2000 				return -1;
2001 		}
2002 
2003 		if (np->in_redir == NAT_BIMAP &&
2004 		    np->in_inmsk == np->in_outmsk) {
2005 			/*
2006 			 * map the address block in a 1:1 fashion
2007 			 */
2008 			in.s_addr = np->in_outip;
2009 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2010 			in.s_addr = ntohl(in.s_addr);
2011 
2012 		} else if (np->in_redir & NAT_MAPBLK) {
2013 			if ((l >= np->in_ppip) || ((l > 0) &&
2014 			     !(flags & IPN_TCPUDP)))
2015 				return -1;
2016 			/*
2017 			 * map-block - Calculate destination address.
2018 			 */
2019 			in.s_addr = ntohl(fin->fin_saddr);
2020 			in.s_addr &= ntohl(~np->in_inmsk);
2021 			inb.s_addr = in.s_addr;
2022 			in.s_addr /= np->in_ippip;
2023 			in.s_addr &= ntohl(~np->in_outmsk);
2024 			in.s_addr += ntohl(np->in_outip);
2025 			/*
2026 			 * Calculate destination port.
2027 			 */
2028 			if ((flags & IPN_TCPUDP) &&
2029 			    (np->in_ppip != 0)) {
2030 				port = ntohs(sport) + l;
2031 				port %= np->in_ppip;
2032 				port += np->in_ppip *
2033 					(inb.s_addr % np->in_ippip);
2034 				port += MAPBLK_MINPORT;
2035 				port = htons(port);
2036 			}
2037 
2038 		} else if ((np->in_outip == 0) &&
2039 			   (np->in_outmsk == 0xffffffff)) {
2040 			/*
2041 			 * 0/32 - use the interface's IP address.
2042 			 */
2043 			if ((l > 0) ||
2044 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2045 				       &in, NULL, fin->fin_ifs) == -1)
2046 				return -1;
2047 			in.s_addr = ntohl(in.s_addr);
2048 
2049 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2050 			/*
2051 			 * 0/0 - use the original source address/port.
2052 			 */
2053 			if (l > 0)
2054 				return -1;
2055 			in.s_addr = ntohl(fin->fin_saddr);
2056 
2057 		} else if ((np->in_outmsk != 0xffffffff) &&
2058 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2059 			np->in_nip++;
2060 
2061 		natl = NULL;
2062 
2063 		if ((flags & IPN_TCPUDP) &&
2064 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2065 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2066 			/*
2067 			 * "ports auto" (without map-block)
2068 			 */
2069 			if ((l > 0) && (l % np->in_ppip == 0)) {
2070 				if (l > np->in_space) {
2071 					return -1;
2072 				} else if ((l > np->in_ppip) &&
2073 					   np->in_outmsk != 0xffffffff)
2074 					np->in_nip++;
2075 			}
2076 			if (np->in_ppip != 0) {
2077 				port = ntohs(sport);
2078 				port += (l % np->in_ppip);
2079 				port %= np->in_ppip;
2080 				port += np->in_ppip *
2081 					(ntohl(fin->fin_saddr) %
2082 					 np->in_ippip);
2083 				port += MAPBLK_MINPORT;
2084 				port = htons(port);
2085 			}
2086 
2087 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2088 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2089 			/*
2090 			 * Standard port translation.  Select next port.
2091 			 */
2092 			port = htons(np->in_pnext++);
2093 
2094 			if (np->in_pnext > ntohs(np->in_pmax)) {
2095 				np->in_pnext = ntohs(np->in_pmin);
2096 				if (np->in_outmsk != 0xffffffff)
2097 					np->in_nip++;
2098 			}
2099 		}
2100 
2101 		if (np->in_flags & IPN_IPRANGE) {
2102 			if (np->in_nip > ntohl(np->in_outmsk))
2103 				np->in_nip = ntohl(np->in_outip);
2104 		} else {
2105 			if ((np->in_outmsk != 0xffffffff) &&
2106 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2107 			    ntohl(np->in_outip))
2108 				np->in_nip = ntohl(np->in_outip) + 1;
2109 		}
2110 
2111 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2112 			port = sport;
2113 
2114 		/*
2115 		 * Here we do a lookup of the connection as seen from
2116 		 * the outside.  If an IP# pair already exists, try
2117 		 * again.  So if you have A->B becomes C->B, you can
2118 		 * also have D->E become C->E but not D->B causing
2119 		 * another C->B.  Also take protocol and ports into
2120 		 * account when determining whether a pre-existing
2121 		 * NAT setup will cause an external conflict where
2122 		 * this is appropriate.
2123 		 */
2124 		inb.s_addr = htonl(in.s_addr);
2125 		sp = fin->fin_data[0];
2126 		dp = fin->fin_data[1];
2127 		fin->fin_data[0] = fin->fin_data[1];
2128 		fin->fin_data[1] = htons(port);
2129 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2130 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2131 		fin->fin_data[0] = sp;
2132 		fin->fin_data[1] = dp;
2133 
2134 		/*
2135 		 * Has the search wrapped around and come back to the
2136 		 * start ?
2137 		 */
2138 		if ((natl != NULL) &&
2139 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2140 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2141 			return -1;
2142 		l++;
2143 	} while (natl != NULL);
2144 
2145 	if (np->in_space > 0)
2146 		np->in_space--;
2147 
2148 	/* Setup the NAT table */
2149 	nat->nat_inip = fin->fin_src;
2150 	nat->nat_outip.s_addr = htonl(in.s_addr);
2151 	nat->nat_oip = fin->fin_dst;
2152 	if (nat->nat_hm == NULL)
2153 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2154 					  nat->nat_outip, 0, ifs);
2155 
2156 	if (flags & IPN_TCPUDP) {
2157 		nat->nat_inport = sport;
2158 		nat->nat_outport = port;	/* sport */
2159 		nat->nat_oport = dport;
2160 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2161 	} else if (flags & IPN_ICMPQUERY) {
2162 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2163 		nat->nat_inport = port;
2164 		nat->nat_outport = port;
2165 	}
2166 
2167 	ni->nai_ip.s_addr = in.s_addr;
2168 	ni->nai_port = port;
2169 	ni->nai_nport = dport;
2170 	return 0;
2171 }
2172 
2173 
2174 /* ------------------------------------------------------------------------ */
2175 /* Function:    nat_newrdr                                                  */
2176 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2177 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2178 /* Parameters:  fin(I) - pointer to packet information                      */
2179 /*              nat(I) - pointer to NAT entry                               */
2180 /*              ni(I)  - pointer to structure with misc. information needed */
2181 /*                       to create new NAT entry.                           */
2182 /*                                                                          */
2183 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2184 /* to the new IP address for the translation.                               */
2185 /* ------------------------------------------------------------------------ */
2186 static INLINE int nat_newrdr(fin, nat, ni)
2187 fr_info_t *fin;
2188 nat_t *nat;
2189 natinfo_t *ni;
2190 {
2191 	u_short nport, dport, sport;
2192 	struct in_addr in, inb;
2193 	u_short sp, dp;
2194 	hostmap_t *hm;
2195 	u_32_t flags;
2196 	ipnat_t *np;
2197 	nat_t *natl;
2198 	int move;
2199 	ipf_stack_t *ifs = fin->fin_ifs;
2200 
2201 	move = 1;
2202 	hm = NULL;
2203 	in.s_addr = 0;
2204 	np = ni->nai_np;
2205 	flags = ni->nai_flags;
2206 	sport = ni->nai_sport;
2207 	dport = ni->nai_dport;
2208 
2209 	/*
2210 	 * If the matching rule has IPN_STICKY set, then we want to have the
2211 	 * same rule kick in as before.  Why would this happen?  If you have
2212 	 * a collection of rdr rules with "round-robin sticky", the current
2213 	 * packet might match a different one to the previous connection but
2214 	 * we want the same destination to be used.
2215 	 */
2216 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2217 	    (IPN_ROUNDR|IPN_STICKY)) {
2218 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2219 				 (u_32_t)dport, ifs);
2220 		if (hm != NULL) {
2221 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2222 			np = hm->hm_ipnat;
2223 			ni->nai_np = np;
2224 			move = 0;
2225 		}
2226 	}
2227 
2228 	/*
2229 	 * Otherwise, it's an inbound packet. Most likely, we don't
2230 	 * want to rewrite source ports and source addresses. Instead,
2231 	 * we want to rewrite to a fixed internal address and fixed
2232 	 * internal port.
2233 	 */
2234 	if (np->in_flags & IPN_SPLIT) {
2235 		in.s_addr = np->in_nip;
2236 
2237 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2238 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2239 					 in, (u_32_t)dport, ifs);
2240 			if (hm != NULL) {
2241 				in.s_addr = hm->hm_mapip.s_addr;
2242 				move = 0;
2243 			}
2244 		}
2245 
2246 		if (hm == NULL || hm->hm_ref == 1) {
2247 			if (np->in_inip == htonl(in.s_addr)) {
2248 				np->in_nip = ntohl(np->in_inmsk);
2249 				move = 0;
2250 			} else {
2251 				np->in_nip = ntohl(np->in_inip);
2252 			}
2253 		}
2254 
2255 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2256 		/*
2257 		 * 0/32 - use the interface's IP address.
2258 		 */
2259 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2260 			   fin->fin_ifs) == -1)
2261 			return -1;
2262 		in.s_addr = ntohl(in.s_addr);
2263 
2264 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2265 		/*
2266 		 * 0/0 - use the original destination address/port.
2267 		 */
2268 		in.s_addr = ntohl(fin->fin_daddr);
2269 
2270 	} else if (np->in_redir == NAT_BIMAP &&
2271 		   np->in_inmsk == np->in_outmsk) {
2272 		/*
2273 		 * map the address block in a 1:1 fashion
2274 		 */
2275 		in.s_addr = np->in_inip;
2276 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2277 		in.s_addr = ntohl(in.s_addr);
2278 	} else {
2279 		in.s_addr = ntohl(np->in_inip);
2280 	}
2281 
2282 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2283 		nport = dport;
2284 	else {
2285 		/*
2286 		 * Whilst not optimized for the case where
2287 		 * pmin == pmax, the gain is not significant.
2288 		 */
2289 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2290 		    (np->in_pmin != np->in_pmax)) {
2291 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2292 				ntohs(np->in_pnext);
2293 			nport = htons(nport);
2294 		} else
2295 			nport = np->in_pnext;
2296 	}
2297 
2298 	/*
2299 	 * When the redirect-to address is set to 0.0.0.0, just
2300 	 * assume a blank `forwarding' of the packet.  We don't
2301 	 * setup any translation for this either.
2302 	 */
2303 	if (in.s_addr == 0) {
2304 		if (nport == dport)
2305 			return -1;
2306 		in.s_addr = ntohl(fin->fin_daddr);
2307 	}
2308 
2309 	/*
2310 	 * Check to see if this redirect mapping already exists and if
2311 	 * it does, return "failure" (allowing it to be created will just
2312 	 * cause one or both of these "connections" to stop working.)
2313 	 */
2314 	inb.s_addr = htonl(in.s_addr);
2315 	sp = fin->fin_data[0];
2316 	dp = fin->fin_data[1];
2317 	fin->fin_data[1] = fin->fin_data[0];
2318 	fin->fin_data[0] = ntohs(nport);
2319 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2320 		    (u_int)fin->fin_p, inb, fin->fin_src);
2321 	fin->fin_data[0] = sp;
2322 	fin->fin_data[1] = dp;
2323 	if (natl != NULL)
2324 		return (-1);
2325 
2326 	nat->nat_inip.s_addr = htonl(in.s_addr);
2327 	nat->nat_outip = fin->fin_dst;
2328 	nat->nat_oip = fin->fin_src;
2329 
2330 	ni->nai_ip.s_addr = in.s_addr;
2331 	ni->nai_nport = nport;
2332 	ni->nai_port = sport;
2333 
2334 	if (flags & IPN_TCPUDP) {
2335 		nat->nat_inport = nport;
2336 		nat->nat_outport = dport;
2337 		nat->nat_oport = sport;
2338 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2339 	} else if (flags & IPN_ICMPQUERY) {
2340 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2341 		nat->nat_inport = nport;
2342 		nat->nat_outport = nport;
2343 	}
2344 
2345 	return move;
2346 }
2347 
2348 /* ------------------------------------------------------------------------ */
2349 /* Function:    nat_new                                                     */
2350 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2351 /*                       else pointer to new NAT structure                  */
2352 /* Parameters:  fin(I)       - pointer to packet information                */
2353 /*              np(I)        - pointer to NAT rule                          */
2354 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2355 /*              flags(I)     - flags describing the current packet          */
2356 /*              direction(I) - direction of packet (in/out)                 */
2357 /* Write Lock:  ipf_nat                                                     */
2358 /*                                                                          */
2359 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2360 /* in any way.                                                              */
2361 /*                                                                          */
2362 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2363 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2364 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2365 /* and (3) building that structure and putting it into the NAT table(s).    */
2366 /* ------------------------------------------------------------------------ */
2367 nat_t *nat_new(fin, np, natsave, flags, direction)
2368 fr_info_t *fin;
2369 ipnat_t *np;
2370 nat_t **natsave;
2371 u_int flags;
2372 int direction;
2373 {
2374 	tcphdr_t *tcp = NULL;
2375 	hostmap_t *hm = NULL;
2376 	nat_t *nat, *natl;
2377 	u_int nflags;
2378 	natinfo_t ni;
2379 	int move;
2380 	ipf_stack_t *ifs = fin->fin_ifs;
2381 
2382 	/*
2383 	 * Trigger automatic call to nat_extraflush() if the
2384 	 * table has reached capcity specified by hi watermark.
2385 	 */
2386 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2387 		ifs->ifs_nat_doflush = 1;
2388 
2389 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2390 		ifs->ifs_nat_stats.ns_memfail++;
2391 		return NULL;
2392 	}
2393 
2394 	move = 1;
2395 	nflags = np->in_flags & flags;
2396 	nflags &= NAT_FROMRULE;
2397 
2398 	ni.nai_np = np;
2399 	ni.nai_nflags = nflags;
2400 	ni.nai_flags = flags;
2401 
2402 	/* Give me a new nat */
2403 	KMALLOC(nat, nat_t *);
2404 	if (nat == NULL) {
2405 		ifs->ifs_nat_stats.ns_memfail++;
2406 		/*
2407 		 * Try to automatically tune the max # of entries in the
2408 		 * table allowed to be less than what will cause kmem_alloc()
2409 		 * to fail and try to eliminate panics due to out of memory
2410 		 * conditions arising.
2411 		 */
2412 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2413 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2414 			printf("ipf_nattable_max reduced to %d\n",
2415 				ifs->ifs_ipf_nattable_max);
2416 		}
2417 		return NULL;
2418 	}
2419 
2420 	if (flags & IPN_TCPUDP) {
2421 		tcp = fin->fin_dp;
2422 		ni.nai_sport = htons(fin->fin_sport);
2423 		ni.nai_dport = htons(fin->fin_dport);
2424 	} else if (flags & IPN_ICMPQUERY) {
2425 		/*
2426 		 * In the ICMP query NAT code, we translate the ICMP id fields
2427 		 * to make them unique. This is indepedent of the ICMP type
2428 		 * (e.g. in the unlikely event that a host sends an echo and
2429 		 * an tstamp request with the same id, both packets will have
2430 		 * their ip address/id field changed in the same way).
2431 		 */
2432 		/* The icmp_id field is used by the sender to identify the
2433 		 * process making the icmp request. (the receiver justs
2434 		 * copies it back in its response). So, it closely matches
2435 		 * the concept of source port. We overlay sport, so we can
2436 		 * maximally reuse the existing code.
2437 		 */
2438 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2439 		ni.nai_dport = ni.nai_sport;
2440 	}
2441 
2442 	bzero((char *)nat, sizeof(*nat));
2443 	nat->nat_flags = flags;
2444 	nat->nat_redir = np->in_redir;
2445 
2446 	if ((flags & NAT_SLAVE) == 0) {
2447 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2448 	}
2449 
2450 	/*
2451 	 * Search the current table for a match.
2452 	 */
2453 	if (direction == NAT_OUTBOUND) {
2454 		/*
2455 		 * We can now arrange to call this for the same connection
2456 		 * because ipf_nat_new doesn't protect the code path into
2457 		 * this function.
2458 		 */
2459 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2460 				     fin->fin_src, fin->fin_dst);
2461 		if (natl != NULL) {
2462 			KFREE(nat);
2463 			nat = natl;
2464 			goto done;
2465 		}
2466 
2467 		move = nat_newmap(fin, nat, &ni);
2468 		if (move == -1)
2469 			goto badnat;
2470 
2471 		np = ni.nai_np;
2472 	} else {
2473 		/*
2474 		 * NAT_INBOUND is used only for redirects rules
2475 		 */
2476 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2477 				    fin->fin_src, fin->fin_dst);
2478 		if (natl != NULL) {
2479 			KFREE(nat);
2480 			nat = natl;
2481 			goto done;
2482 		}
2483 
2484 		move = nat_newrdr(fin, nat, &ni);
2485 		if (move == -1)
2486 			goto badnat;
2487 
2488 		np = ni.nai_np;
2489 	}
2490 
2491 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2492 		if (np->in_redir == NAT_REDIRECT) {
2493 			nat_delrdr(np);
2494 			nat_addrdr(np, ifs);
2495 		} else if (np->in_redir == NAT_MAP) {
2496 			nat_delnat(np);
2497 			nat_addnat(np, ifs);
2498 		}
2499 	}
2500 
2501 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2502 		goto badnat;
2503 	}
2504 
2505 	nat_calc_chksum_diffs(nat);
2506 
2507 	if (flags & SI_WILDP)
2508 		ifs->ifs_nat_stats.ns_wilds++;
2509 	goto done;
2510 badnat:
2511 	ifs->ifs_nat_stats.ns_badnat++;
2512 	if ((hm = nat->nat_hm) != NULL)
2513 		fr_hostmapdel(&hm);
2514 	KFREE(nat);
2515 	nat = NULL;
2516 done:
2517 	if ((flags & NAT_SLAVE) == 0) {
2518 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2519 	}
2520 	return nat;
2521 }
2522 
2523 
2524 /* ------------------------------------------------------------------------ */
2525 /* Function:    nat_finalise                                                */
2526 /* Returns:     int - 0 == sucess, -1 == failure                            */
2527 /* Parameters:  fin(I) - pointer to packet information                      */
2528 /*              nat(I) - pointer to NAT entry                               */
2529 /*              ni(I)  - pointer to structure with misc. information needed */
2530 /*                       to create new NAT entry.                           */
2531 /* Write Lock:  ipf_nat                                                     */
2532 /*                                                                          */
2533 /* This is the tail end of constructing a new NAT entry and is the same     */
2534 /* for both IPv4 and IPv6.                                                  */
2535 /* ------------------------------------------------------------------------ */
2536 /*ARGSUSED*/
2537 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2538 fr_info_t *fin;
2539 nat_t *nat;
2540 natinfo_t *ni;
2541 tcphdr_t *tcp;
2542 nat_t **natsave;
2543 int direction;
2544 {
2545 	frentry_t *fr;
2546 	ipnat_t *np;
2547 	ipf_stack_t *ifs = fin->fin_ifs;
2548 
2549 	np = ni->nai_np;
2550 
2551 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2552 
2553 #ifdef	IPFILTER_SYNC
2554 	if ((nat->nat_flags & SI_CLONE) == 0)
2555 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2556 #endif
2557 
2558 	nat->nat_me = natsave;
2559 	nat->nat_dir = direction;
2560 	nat->nat_ifps[0] = np->in_ifps[0];
2561 	nat->nat_ifps[1] = np->in_ifps[1];
2562 	nat->nat_ptr = np;
2563 	nat->nat_p = fin->fin_p;
2564 	nat->nat_mssclamp = np->in_mssclamp;
2565 	fr = fin->fin_fr;
2566 	nat->nat_fr = fr;
2567 
2568 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2569 		if (appr_new(fin, nat) == -1)
2570 			return -1;
2571 
2572 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2573 		if (ifs->ifs_nat_logging)
2574 			nat_log(nat, (u_int)np->in_redir, ifs);
2575 		np->in_use++;
2576 		if (fr != NULL) {
2577 			MUTEX_ENTER(&fr->fr_lock);
2578 			fr->fr_ref++;
2579 			MUTEX_EXIT(&fr->fr_lock);
2580 		}
2581 		return 0;
2582 	}
2583 
2584 	/*
2585 	 * nat_insert failed, so cleanup time...
2586 	 */
2587 	return -1;
2588 }
2589 
2590 
2591 /* ------------------------------------------------------------------------ */
2592 /* Function:   nat_insert                                                   */
2593 /* Returns:    int - 0 == sucess, -1 == failure                             */
2594 /* Parameters: nat(I) - pointer to NAT structure                            */
2595 /*             rev(I) - flag indicating forward/reverse direction of packet */
2596 /* Write Lock: ipf_nat                                                      */
2597 /*                                                                          */
2598 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2599 /* list of active NAT entries.  Adjust global counters when complete.       */
2600 /* ------------------------------------------------------------------------ */
2601 int	nat_insert(nat, rev, ifs)
2602 nat_t	*nat;
2603 int	rev;
2604 ipf_stack_t *ifs;
2605 {
2606 	u_int hv1, hv2;
2607 	nat_t **natp;
2608 
2609 	/*
2610 	 * Try and return an error as early as possible, so calculate the hash
2611 	 * entry numbers first and then proceed.
2612 	 */
2613 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2614 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2615 				  0xffffffff);
2616 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2617 				  ifs->ifs_ipf_nattable_sz);
2618 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2619 				  0xffffffff);
2620 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2621 				  ifs->ifs_ipf_nattable_sz);
2622 	} else {
2623 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2624 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2625 				  ifs->ifs_ipf_nattable_sz);
2626 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2627 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2628 				  ifs->ifs_ipf_nattable_sz);
2629 	}
2630 
2631 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2632 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2633 		return -1;
2634 	}
2635 
2636 	nat->nat_hv[0] = hv1;
2637 	nat->nat_hv[1] = hv2;
2638 
2639 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2640 
2641 	nat->nat_rev = rev;
2642 	nat->nat_ref = 1;
2643 	nat->nat_bytes[0] = 0;
2644 	nat->nat_pkts[0] = 0;
2645 	nat->nat_bytes[1] = 0;
2646 	nat->nat_pkts[1] = 0;
2647 
2648 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2649 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2650 
2651 	if (nat->nat_ifnames[1][0] !='\0') {
2652 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2653 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2654 	} else {
2655 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2656 			       LIFNAMSIZ);
2657 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2658 		nat->nat_ifps[1] = nat->nat_ifps[0];
2659 	}
2660 
2661 	nat->nat_next = ifs->ifs_nat_instances;
2662 	nat->nat_pnext = &ifs->ifs_nat_instances;
2663 	if (ifs->ifs_nat_instances)
2664 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2665 	ifs->ifs_nat_instances = nat;
2666 
2667 	natp = &ifs->ifs_nat_table[0][hv1];
2668 	if (*natp)
2669 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2670 	nat->nat_phnext[0] = natp;
2671 	nat->nat_hnext[0] = *natp;
2672 	*natp = nat;
2673 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2674 
2675 	natp = &ifs->ifs_nat_table[1][hv2];
2676 	if (*natp)
2677 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2678 	nat->nat_phnext[1] = natp;
2679 	nat->nat_hnext[1] = *natp;
2680 	*natp = nat;
2681 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2682 
2683 	fr_setnatqueue(nat, rev, ifs);
2684 
2685 	ifs->ifs_nat_stats.ns_added++;
2686 	ifs->ifs_nat_stats.ns_inuse++;
2687 	return 0;
2688 }
2689 
2690 
2691 /* ------------------------------------------------------------------------ */
2692 /* Function:    nat_icmperrorlookup                                         */
2693 /* Returns:     nat_t* - point to matching NAT structure                    */
2694 /* Parameters:  fin(I) - pointer to packet information                      */
2695 /*              dir(I) - direction of packet (in/out)                       */
2696 /*                                                                          */
2697 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2698 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2699 /* the required length.                                                     */
2700 /* ------------------------------------------------------------------------ */
2701 nat_t *nat_icmperrorlookup(fin, dir)
2702 fr_info_t *fin;
2703 int dir;
2704 {
2705 	int flags = 0, minlen;
2706 	icmphdr_t *orgicmp;
2707 	tcphdr_t *tcp = NULL;
2708 	u_short data[2];
2709 	nat_t *nat;
2710 	ip_t *oip;
2711 	u_int p;
2712 
2713 	/*
2714 	 * Does it at least have the return (basic) IP header ?
2715 	 * Only a basic IP header (no options) should be with an ICMP error
2716 	 * header.  Also, if it's not an error type, then return.
2717 	 */
2718 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2719 		return NULL;
2720 
2721 	/*
2722 	 * Check packet size
2723 	 */
2724 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2725 	minlen = IP_HL(oip) << 2;
2726 	if ((minlen < sizeof(ip_t)) ||
2727 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2728 		return NULL;
2729 	/*
2730 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2731 	 * header claimed in the encapsulated part which is of concern.  It
2732 	 * may be too big to be in this buffer but not so big that it's
2733 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2734 	 * This is possible because we don't know how big oip_hl is when we
2735 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2736 	 * all here now.
2737 	 */
2738 #ifdef  _KERNEL
2739 	{
2740 	mb_t *m;
2741 
2742 	m = fin->fin_m;
2743 # if defined(MENTAT)
2744 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2745 		return NULL;
2746 # else
2747 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2748 	    (char *)fin->fin_ip + M_LEN(m))
2749 		return NULL;
2750 # endif
2751 	}
2752 #endif
2753 
2754 	if (fin->fin_daddr != oip->ip_src.s_addr)
2755 		return NULL;
2756 
2757 	p = oip->ip_p;
2758 	if (p == IPPROTO_TCP)
2759 		flags = IPN_TCP;
2760 	else if (p == IPPROTO_UDP)
2761 		flags = IPN_UDP;
2762 	else if (p == IPPROTO_ICMP) {
2763 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2764 
2765 		/* see if this is related to an ICMP query */
2766 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2767 			data[0] = fin->fin_data[0];
2768 			data[1] = fin->fin_data[1];
2769 			fin->fin_data[0] = 0;
2770 			fin->fin_data[1] = orgicmp->icmp_id;
2771 
2772 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2773 			/*
2774 			 * NOTE : dir refers to the direction of the original
2775 			 *        ip packet. By definition the icmp error
2776 			 *        message flows in the opposite direction.
2777 			 */
2778 			if (dir == NAT_INBOUND)
2779 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2780 						   oip->ip_src);
2781 			else
2782 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2783 						    oip->ip_src);
2784 			fin->fin_data[0] = data[0];
2785 			fin->fin_data[1] = data[1];
2786 			return nat;
2787 		}
2788 	}
2789 
2790 	if (flags & IPN_TCPUDP) {
2791 		minlen += 8;		/* + 64bits of data to get ports */
2792 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2793 			return NULL;
2794 
2795 		data[0] = fin->fin_data[0];
2796 		data[1] = fin->fin_data[1];
2797 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2798 		fin->fin_data[0] = ntohs(tcp->th_dport);
2799 		fin->fin_data[1] = ntohs(tcp->th_sport);
2800 
2801 		if (dir == NAT_INBOUND) {
2802 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2803 					   oip->ip_src);
2804 		} else {
2805 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2806 					    oip->ip_src);
2807 		}
2808 		fin->fin_data[0] = data[0];
2809 		fin->fin_data[1] = data[1];
2810 		return nat;
2811 	}
2812 	if (dir == NAT_INBOUND)
2813 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2814 	else
2815 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2816 }
2817 
2818 
2819 /* ------------------------------------------------------------------------ */
2820 /* Function:    nat_icmperror                                               */
2821 /* Returns:     nat_t* - point to matching NAT structure                    */
2822 /* Parameters:  fin(I)    - pointer to packet information                   */
2823 /*              nflags(I) - NAT flags for this packet                       */
2824 /*              dir(I)    - direction of packet (in/out)                    */
2825 /*                                                                          */
2826 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2827 /* session.  This will correct both packet header data and checksums.       */
2828 /*                                                                          */
2829 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2830 /* a NAT'd ICMP packet gets correctly recognised.                           */
2831 /* ------------------------------------------------------------------------ */
2832 nat_t *nat_icmperror(fin, nflags, dir)
2833 fr_info_t *fin;
2834 u_int *nflags;
2835 int dir;
2836 {
2837 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2838 	struct in_addr in;
2839 	icmphdr_t *icmp, *orgicmp;
2840 	int dlen;
2841 	udphdr_t *udp;
2842 	tcphdr_t *tcp;
2843 	nat_t *nat;
2844 	ip_t *oip;
2845 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2846 		return NULL;
2847 
2848 	/*
2849 	 * nat_icmperrorlookup() looks up nat entry associated with the
2850 	 * offending IP packet and returns pointer to the entry, or NULL
2851 	 * if packet wasn't natted or for `defective' packets.
2852 	 */
2853 
2854 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2855 		return NULL;
2856 
2857 	sumd2 = 0;
2858 	*nflags = IPN_ICMPERR;
2859 	icmp = fin->fin_dp;
2860 	oip = (ip_t *)&icmp->icmp_ip;
2861 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
2862 	tcp = (tcphdr_t *)udp;
2863 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
2864 
2865 	/*
2866 	 * Need to adjust ICMP header to include the real IP#'s and
2867 	 * port #'s.  There are three steps required.
2868 	 *
2869 	 * Step 1
2870 	 * Fix the IP addresses in the offending IP packet and update
2871 	 * ip header checksum to compensate for the change.
2872 	 *
2873 	 * No update needed here for icmp_cksum because the ICMP checksum
2874 	 * is calculated over the complete ICMP packet, which includes the
2875 	 * changed oip IP addresses and oip->ip_sum.  These two changes
2876 	 * cancel each other out (if the delta for the IP address is x,
2877 	 * then the delta for ip_sum is minus x).
2878 	 */
2879 
2880 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2881 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2882 		in = nat->nat_inip;
2883 		oip->ip_src = in;
2884 	} else {
2885 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2886 		in = nat->nat_outip;
2887 		oip->ip_dst = in;
2888 	}
2889 
2890 	sum2 = LONG_SUM(ntohl(in.s_addr));
2891 	CALC_SUMD(sum1, sum2, sumd);
2892 	fix_datacksum(&oip->ip_sum, sumd);
2893 
2894 	/*
2895 	 * Step 2
2896 	 * Perform other adjustments based on protocol of offending packet.
2897 	 */
2898 
2899 	switch (oip->ip_p) {
2900 		case IPPROTO_TCP :
2901 		case IPPROTO_UDP :
2902 
2903 			/*
2904 			* For offending TCP/UDP IP packets, translate the ports
2905 			* based on the NAT specification.
2906 			*
2907 			* Advance notice : Now it becomes complicated :-)
2908 			*
2909 			* Since the port and IP addresse fields are both part
2910 			* of the TCP/UDP checksum of the offending IP packet,
2911 			* we need to adjust that checksum as well.
2912 			*
2913 			* To further complicate things, the TCP/UDP checksum
2914 			* may not be present.  We must check to see if the
2915 			* length of the data portion is big enough to hold
2916 			* the checksum.  In the UDP case, a test to determine
2917 			* if the checksum is even set is also required.
2918 			*
2919 			* Any changes to an IP address, port or checksum within
2920 			* the ICMP packet requires a change to icmp_cksum.
2921 			*
2922 			* Be extremely careful here ... The change is dependent
2923 			* upon whether or not the TCP/UPD checksum is present.
2924 			*
2925 			* If TCP/UPD checksum is present, the icmp_cksum must
2926 			* compensate for checksum modification resulting from
2927 			* IP address change only.  Port change and resulting
2928 			* data checksum adjustments cancel each other out.
2929 			*
2930 			* If TCP/UDP checksum is not present, icmp_cksum must
2931 			* compensate for port change only.  The IP address
2932 			* change does not modify anything else in this case.
2933 			*/
2934 
2935 			psum1 = 0;
2936 			psum2 = 0;
2937 			psumd = 0;
2938 
2939 			if ((tcp->th_dport == nat->nat_oport) &&
2940 			    (tcp->th_sport != nat->nat_inport)) {
2941 
2942 				/*
2943 				 * Translate the source port.
2944 				 */
2945 
2946 				psum1 = ntohs(tcp->th_sport);
2947 				psum2 = ntohs(nat->nat_inport);
2948 				tcp->th_sport = nat->nat_inport;
2949 
2950 			} else if ((tcp->th_sport == nat->nat_oport) &&
2951 				    (tcp->th_dport != nat->nat_outport)) {
2952 
2953 				/*
2954 				 * Translate the destination port.
2955 				 */
2956 
2957 				psum1 = ntohs(tcp->th_dport);
2958 				psum2 = ntohs(nat->nat_outport);
2959 				tcp->th_dport = nat->nat_outport;
2960 			}
2961 
2962 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
2963 
2964 				/*
2965 				 * TCP checksum present.
2966 				 *
2967 				 * Adjust data checksum and icmp checksum to
2968 				 * compensate for any IP address change.
2969 				 */
2970 
2971 				sum1 = ntohs(tcp->th_sum);
2972 				fix_datacksum(&tcp->th_sum, sumd);
2973 				sum2 = ntohs(tcp->th_sum);
2974 				sumd2 = sumd << 1;
2975 				CALC_SUMD(sum1, sum2, sumd);
2976 				sumd2 += sumd;
2977 
2978 				/*
2979 				 * Also make data checksum adjustment to
2980 				 * compensate for any port change.
2981 				 */
2982 
2983 				if (psum1 != psum2) {
2984 					CALC_SUMD(psum1, psum2, psumd);
2985 					fix_datacksum(&tcp->th_sum, psumd);
2986 				}
2987 
2988 			} else if ((oip->ip_p == IPPROTO_UDP) &&
2989 				   (dlen >= 8) && (udp->uh_sum != 0)) {
2990 
2991 				/*
2992 				 * The UDP checksum is present and set.
2993 				 *
2994 				 * Adjust data checksum and icmp checksum to
2995 				 * compensate for any IP address change.
2996 				 */
2997 
2998 				sum1 = ntohs(udp->uh_sum);
2999 				fix_datacksum(&udp->uh_sum, sumd);
3000 				sum2 = ntohs(udp->uh_sum);
3001 				sumd2 = sumd << 1;
3002 				CALC_SUMD(sum1, sum2, sumd);
3003 				sumd2 += sumd;
3004 
3005 				/*
3006 				 * Also make data checksum adjustment to
3007 				 * compensate for any port change.
3008 				 */
3009 
3010 				if (psum1 != psum2) {
3011 					CALC_SUMD(psum1, psum2, psumd);
3012 					fix_datacksum(&udp->uh_sum, psumd);
3013 				}
3014 
3015 			} else {
3016 
3017 				/*
3018 				 * Data checksum was not present.
3019 				 *
3020 				 * Compensate for any port change.
3021 				 */
3022 
3023 				CALC_SUMD(psum2, psum1, psumd);
3024 				sumd2 += psumd;
3025 			}
3026 			break;
3027 
3028 		case IPPROTO_ICMP :
3029 
3030 			orgicmp = (icmphdr_t *)udp;
3031 
3032 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3033 			    (orgicmp->icmp_id != nat->nat_inport) &&
3034 			    (dlen >= 8)) {
3035 
3036 				/*
3037 				 * Fix ICMP checksum (of the offening ICMP
3038 				 * query packet) to compensate the change
3039 				 * in the ICMP id of the offending ICMP
3040 				 * packet.
3041 				 *
3042 				 * Since you modify orgicmp->icmp_id with
3043 				 * a delta (say x) and you compensate that
3044 				 * in origicmp->icmp_cksum with a delta
3045 				 * minus x, you don't have to adjust the
3046 				 * overall icmp->icmp_cksum
3047 				 */
3048 
3049 				sum1 = ntohs(orgicmp->icmp_id);
3050 				sum2 = ntohs(nat->nat_inport);
3051 				CALC_SUMD(sum1, sum2, sumd);
3052 				orgicmp->icmp_id = nat->nat_inport;
3053 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3054 
3055 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3056 
3057 			break;
3058 
3059 		default :
3060 
3061 			break;
3062 
3063 	} /* switch (oip->ip_p) */
3064 
3065 	/*
3066 	 * Step 3
3067 	 * Make the adjustments to icmp checksum.
3068 	 */
3069 
3070 	if (sumd2 != 0) {
3071 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3072 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3073 		fix_incksum(&icmp->icmp_cksum, sumd2);
3074 	}
3075 	return nat;
3076 }
3077 
3078 
3079 /*
3080  * NB: these lookups don't lock access to the list, it assumed that it has
3081  * already been done!
3082  */
3083 
3084 /* ------------------------------------------------------------------------ */
3085 /* Function:    nat_inlookup                                                */
3086 /* Returns:     nat_t* - NULL == no match,                                  */
3087 /*                       else pointer to matching NAT entry                 */
3088 /* Parameters:  fin(I)    - pointer to packet information                   */
3089 /*              flags(I)  - NAT flags for this packet                       */
3090 /*              p(I)      - protocol for this packet                        */
3091 /*              src(I)    - source IP address                               */
3092 /*              mapdst(I) - destination IP address                          */
3093 /*                                                                          */
3094 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3095 /* real source address/port.  We use this lookup when receiving a packet,   */
3096 /* we're looking for a table entry, based on the destination address.       */
3097 /*                                                                          */
3098 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3099 /*                                                                          */
3100 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3101 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3102 /*                                                                          */
3103 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3104 /*            the packet is of said protocol                                */
3105 /* ------------------------------------------------------------------------ */
3106 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3107 fr_info_t *fin;
3108 u_int flags, p;
3109 struct in_addr src , mapdst;
3110 {
3111 	u_short sport, dport;
3112 	ipnat_t *ipn;
3113 	u_int sflags;
3114 	nat_t *nat;
3115 	int nflags;
3116 	u_32_t dst;
3117 	void *ifp;
3118 	u_int hv;
3119 	ipf_stack_t *ifs = fin->fin_ifs;
3120 
3121 	if (fin != NULL)
3122 		ifp = fin->fin_ifp;
3123 	else
3124 		ifp = NULL;
3125 	sport = 0;
3126 	dport = 0;
3127 	dst = mapdst.s_addr;
3128 	sflags = flags & NAT_TCPUDPICMP;
3129 
3130 	switch (p)
3131 	{
3132 	case IPPROTO_TCP :
3133 	case IPPROTO_UDP :
3134 		sport = htons(fin->fin_data[0]);
3135 		dport = htons(fin->fin_data[1]);
3136 		break;
3137 	case IPPROTO_ICMP :
3138 		if (flags & IPN_ICMPERR)
3139 			sport = fin->fin_data[1];
3140 		else
3141 			dport = fin->fin_data[1];
3142 		break;
3143 	default :
3144 		break;
3145 	}
3146 
3147 
3148 	if ((flags & SI_WILDP) != 0)
3149 		goto find_in_wild_ports;
3150 
3151 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3152 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3153 	nat = ifs->ifs_nat_table[1][hv];
3154 	for (; nat; nat = nat->nat_hnext[1]) {
3155 		if (nat->nat_ifps[0] != NULL) {
3156 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3157 				continue;
3158 		} else if (ifp != NULL)
3159 			nat->nat_ifps[0] = ifp;
3160 
3161 		nflags = nat->nat_flags;
3162 
3163 		if (nat->nat_oip.s_addr == src.s_addr &&
3164 		    nat->nat_outip.s_addr == dst &&
3165 		    (((p == 0) &&
3166 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3167 		     || (p == nat->nat_p))) {
3168 			switch (p)
3169 			{
3170 #if 0
3171 			case IPPROTO_GRE :
3172 				if (nat->nat_call[1] != fin->fin_data[0])
3173 					continue;
3174 				break;
3175 #endif
3176 			case IPPROTO_ICMP :
3177 				if ((flags & IPN_ICMPERR) != 0) {
3178 					if (nat->nat_outport != sport)
3179 						continue;
3180 				} else {
3181 					if (nat->nat_outport != dport)
3182 						continue;
3183 				}
3184 				break;
3185 			case IPPROTO_TCP :
3186 			case IPPROTO_UDP :
3187 				if (nat->nat_oport != sport)
3188 					continue;
3189 				if (nat->nat_outport != dport)
3190 					continue;
3191 				break;
3192 			default :
3193 				break;
3194 			}
3195 
3196 			ipn = nat->nat_ptr;
3197 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3198 				if (appr_match(fin, nat) != 0)
3199 					continue;
3200 			return nat;
3201 		}
3202 	}
3203 
3204 	/*
3205 	 * So if we didn't find it but there are wildcard members in the hash
3206 	 * table, go back and look for them.  We do this search and update here
3207 	 * because it is modifying the NAT table and we want to do this only
3208 	 * for the first packet that matches.  The exception, of course, is
3209 	 * for "dummy" (FI_IGNORE) lookups.
3210 	 */
3211 find_in_wild_ports:
3212 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3213 		return NULL;
3214 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3215 		return NULL;
3216 
3217 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3218 
3219 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3220 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3221 
3222 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3223 
3224 	nat = ifs->ifs_nat_table[1][hv];
3225 	for (; nat; nat = nat->nat_hnext[1]) {
3226 		if (nat->nat_ifps[0] != NULL) {
3227 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3228 				continue;
3229 		} else if (ifp != NULL)
3230 			nat->nat_ifps[0] = ifp;
3231 
3232 		if (nat->nat_p != fin->fin_p)
3233 			continue;
3234 		if (nat->nat_oip.s_addr != src.s_addr ||
3235 		    nat->nat_outip.s_addr != dst)
3236 			continue;
3237 
3238 		nflags = nat->nat_flags;
3239 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3240 			continue;
3241 
3242 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3243 			       NAT_INBOUND) == 1) {
3244 			if ((fin->fin_flx & FI_IGNORE) != 0)
3245 				break;
3246 			if ((nflags & SI_CLONE) != 0) {
3247 				nat = fr_natclone(fin, nat);
3248 				if (nat == NULL)
3249 					break;
3250 			} else {
3251 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3252 				ifs->ifs_nat_stats.ns_wilds--;
3253 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3254 			}
3255 			nat->nat_oport = sport;
3256 			nat->nat_outport = dport;
3257 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3258 			nat_tabmove(nat, ifs);
3259 			break;
3260 		}
3261 	}
3262 
3263 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3264 
3265 	return nat;
3266 }
3267 
3268 
3269 /* ------------------------------------------------------------------------ */
3270 /* Function:    nat_tabmove                                                 */
3271 /* Returns:     Nil                                                         */
3272 /* Parameters:  nat(I) - pointer to NAT structure                           */
3273 /* Write Lock:  ipf_nat                                                     */
3274 /*                                                                          */
3275 /* This function is only called for TCP/UDP NAT table entries where the     */
3276 /* original was placed in the table without hashing on the ports and we now */
3277 /* want to include hashing on port numbers.                                 */
3278 /* ------------------------------------------------------------------------ */
3279 static void nat_tabmove(nat, ifs)
3280 nat_t *nat;
3281 ipf_stack_t *ifs;
3282 {
3283 	nat_t **natp;
3284 	u_int hv;
3285 
3286 	if (nat->nat_flags & SI_CLONE)
3287 		return;
3288 
3289 	/*
3290 	 * Remove the NAT entry from the old location
3291 	 */
3292 	if (nat->nat_hnext[0])
3293 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3294 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3295 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3296 
3297 	if (nat->nat_hnext[1])
3298 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3299 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3300 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3301 
3302 	/*
3303 	 * Add into the NAT table in the new position
3304 	 */
3305 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3306 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3307 			 ifs->ifs_ipf_nattable_sz);
3308 	nat->nat_hv[0] = hv;
3309 	natp = &ifs->ifs_nat_table[0][hv];
3310 	if (*natp)
3311 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3312 	nat->nat_phnext[0] = natp;
3313 	nat->nat_hnext[0] = *natp;
3314 	*natp = nat;
3315 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3316 
3317 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3318 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3319 			 ifs->ifs_ipf_nattable_sz);
3320 	nat->nat_hv[1] = hv;
3321 	natp = &ifs->ifs_nat_table[1][hv];
3322 	if (*natp)
3323 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3324 	nat->nat_phnext[1] = natp;
3325 	nat->nat_hnext[1] = *natp;
3326 	*natp = nat;
3327 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3328 }
3329 
3330 
3331 /* ------------------------------------------------------------------------ */
3332 /* Function:    nat_outlookup                                               */
3333 /* Returns:     nat_t* - NULL == no match,                                  */
3334 /*                       else pointer to matching NAT entry                 */
3335 /* Parameters:  fin(I)   - pointer to packet information                    */
3336 /*              flags(I) - NAT flags for this packet                        */
3337 /*              p(I)     - protocol for this packet                         */
3338 /*              src(I)   - source IP address                                */
3339 /*              dst(I)   - destination IP address                           */
3340 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3341 /*                                                                          */
3342 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3343 /* destination address/port.  We use this lookup when sending a packet out, */
3344 /* we're looking for a table entry, based on the source address.            */
3345 /*                                                                          */
3346 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3347 /*                                                                          */
3348 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3349 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3350 /*                                                                          */
3351 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3352 /*            the packet is of said protocol                                */
3353 /* ------------------------------------------------------------------------ */
3354 nat_t *nat_outlookup(fin, flags, p, src, dst)
3355 fr_info_t *fin;
3356 u_int flags, p;
3357 struct in_addr src , dst;
3358 {
3359 	u_short sport, dport;
3360 	u_int sflags;
3361 	ipnat_t *ipn;
3362 	u_32_t srcip;
3363 	nat_t *nat;
3364 	int nflags;
3365 	void *ifp;
3366 	u_int hv;
3367 	ipf_stack_t *ifs = fin->fin_ifs;
3368 
3369 	ifp = fin->fin_ifp;
3370 
3371 	srcip = src.s_addr;
3372 	sflags = flags & IPN_TCPUDPICMP;
3373 	sport = 0;
3374 	dport = 0;
3375 
3376 	switch (p)
3377 	{
3378 	case IPPROTO_TCP :
3379 	case IPPROTO_UDP :
3380 		sport = htons(fin->fin_data[0]);
3381 		dport = htons(fin->fin_data[1]);
3382 		break;
3383 	case IPPROTO_ICMP :
3384 		if (flags & IPN_ICMPERR)
3385 			sport = fin->fin_data[1];
3386 		else
3387 			dport = fin->fin_data[1];
3388 		break;
3389 	default :
3390 		break;
3391 	}
3392 
3393 	if ((flags & SI_WILDP) != 0)
3394 		goto find_out_wild_ports;
3395 
3396 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3397 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3398 	nat = ifs->ifs_nat_table[0][hv];
3399 	for (; nat; nat = nat->nat_hnext[0]) {
3400 		if (nat->nat_ifps[1] != NULL) {
3401 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3402 				continue;
3403 		} else if (ifp != NULL)
3404 			nat->nat_ifps[1] = ifp;
3405 
3406 		nflags = nat->nat_flags;
3407 
3408 		if (nat->nat_inip.s_addr == srcip &&
3409 		    nat->nat_oip.s_addr == dst.s_addr &&
3410 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3411 		     || (p == nat->nat_p))) {
3412 			switch (p)
3413 			{
3414 #if 0
3415 			case IPPROTO_GRE :
3416 				if (nat->nat_call[1] != fin->fin_data[0])
3417 					continue;
3418 				break;
3419 #endif
3420 			case IPPROTO_TCP :
3421 			case IPPROTO_UDP :
3422 				if (nat->nat_oport != dport)
3423 					continue;
3424 				if (nat->nat_inport != sport)
3425 					continue;
3426 				break;
3427 			default :
3428 				break;
3429 			}
3430 
3431 			ipn = nat->nat_ptr;
3432 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3433 				if (appr_match(fin, nat) != 0)
3434 					continue;
3435 			return nat;
3436 		}
3437 	}
3438 
3439 	/*
3440 	 * So if we didn't find it but there are wildcard members in the hash
3441 	 * table, go back and look for them.  We do this search and update here
3442 	 * because it is modifying the NAT table and we want to do this only
3443 	 * for the first packet that matches.  The exception, of course, is
3444 	 * for "dummy" (FI_IGNORE) lookups.
3445 	 */
3446 find_out_wild_ports:
3447 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3448 		return NULL;
3449 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3450 		return NULL;
3451 
3452 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3453 
3454 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3455 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3456 
3457 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3458 
3459 	nat = ifs->ifs_nat_table[0][hv];
3460 	for (; nat; nat = nat->nat_hnext[0]) {
3461 		if (nat->nat_ifps[1] != NULL) {
3462 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3463 				continue;
3464 		} else if (ifp != NULL)
3465 			nat->nat_ifps[1] = ifp;
3466 
3467 		if (nat->nat_p != fin->fin_p)
3468 			continue;
3469 		if ((nat->nat_inip.s_addr != srcip) ||
3470 		    (nat->nat_oip.s_addr != dst.s_addr))
3471 			continue;
3472 
3473 		nflags = nat->nat_flags;
3474 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3475 			continue;
3476 
3477 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3478 			       NAT_OUTBOUND) == 1) {
3479 			if ((fin->fin_flx & FI_IGNORE) != 0)
3480 				break;
3481 			if ((nflags & SI_CLONE) != 0) {
3482 				nat = fr_natclone(fin, nat);
3483 				if (nat == NULL)
3484 					break;
3485 			} else {
3486 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3487 				ifs->ifs_nat_stats.ns_wilds--;
3488 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3489 			}
3490 			nat->nat_inport = sport;
3491 			nat->nat_oport = dport;
3492 			if (nat->nat_outport == 0)
3493 				nat->nat_outport = sport;
3494 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3495 			nat_tabmove(nat, ifs);
3496 			break;
3497 		}
3498 	}
3499 
3500 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3501 
3502 	return nat;
3503 }
3504 
3505 
3506 /* ------------------------------------------------------------------------ */
3507 /* Function:    nat_lookupredir                                             */
3508 /* Returns:     nat_t* - NULL == no match,                                  */
3509 /*                       else pointer to matching NAT entry                 */
3510 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3511 /*                      entry for.                                          */
3512 /*                                                                          */
3513 /* Lookup the NAT tables to search for a matching redirect                  */
3514 /* ------------------------------------------------------------------------ */
3515 nat_t *nat_lookupredir(np, ifs)
3516 natlookup_t *np;
3517 ipf_stack_t *ifs;
3518 {
3519 	fr_info_t fi;
3520 	nat_t *nat;
3521 
3522 	bzero((char *)&fi, sizeof(fi));
3523 	if (np->nl_flags & IPN_IN) {
3524 		fi.fin_data[0] = ntohs(np->nl_realport);
3525 		fi.fin_data[1] = ntohs(np->nl_outport);
3526 	} else {
3527 		fi.fin_data[0] = ntohs(np->nl_inport);
3528 		fi.fin_data[1] = ntohs(np->nl_outport);
3529 	}
3530 	if (np->nl_flags & IPN_TCP)
3531 		fi.fin_p = IPPROTO_TCP;
3532 	else if (np->nl_flags & IPN_UDP)
3533 		fi.fin_p = IPPROTO_UDP;
3534 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3535 		fi.fin_p = IPPROTO_ICMP;
3536 
3537 	fi.fin_ifs = ifs;
3538 	/*
3539 	 * We can do two sorts of lookups:
3540 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3541 	 * - default: we have the `in' and `out' address, look for `real'.
3542 	 */
3543 	if (np->nl_flags & IPN_IN) {
3544 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3545 					np->nl_realip, np->nl_outip))) {
3546 			np->nl_inip = nat->nat_inip;
3547 			np->nl_inport = nat->nat_inport;
3548 		}
3549 	} else {
3550 		/*
3551 		 * If nl_inip is non null, this is a lookup based on the real
3552 		 * ip address. Else, we use the fake.
3553 		 */
3554 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3555 					 np->nl_inip, np->nl_outip))) {
3556 
3557 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3558 				fr_info_t fin;
3559 				bzero((char *)&fin, sizeof(fin));
3560 				fin.fin_p = nat->nat_p;
3561 				fin.fin_data[0] = ntohs(nat->nat_outport);
3562 				fin.fin_data[1] = ntohs(nat->nat_oport);
3563 				fin.fin_ifs = ifs;
3564 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3565 						 nat->nat_outip,
3566 						 nat->nat_oip) != NULL) {
3567 					np->nl_flags &= ~IPN_FINDFORWARD;
3568 				}
3569 			}
3570 
3571 			np->nl_realip = nat->nat_outip;
3572 			np->nl_realport = nat->nat_outport;
3573 		}
3574  	}
3575 
3576 	return nat;
3577 }
3578 
3579 
3580 /* ------------------------------------------------------------------------ */
3581 /* Function:    nat_match                                                   */
3582 /* Returns:     int - 0 == no match, 1 == match                             */
3583 /* Parameters:  fin(I)   - pointer to packet information                    */
3584 /*              np(I)    - pointer to NAT rule                              */
3585 /*                                                                          */
3586 /* Pull the matching of a packet against a NAT rule out of that complex     */
3587 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3588 /* ------------------------------------------------------------------------ */
3589 static int nat_match(fin, np)
3590 fr_info_t *fin;
3591 ipnat_t *np;
3592 {
3593 	frtuc_t *ft;
3594 
3595 	if (fin->fin_v != 4)
3596 		return 0;
3597 
3598 	if (np->in_p && fin->fin_p != np->in_p)
3599 		return 0;
3600 
3601 	if (fin->fin_out) {
3602 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3603 			return 0;
3604 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3605 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3606 			return 0;
3607 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3608 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3609 			return 0;
3610 	} else {
3611 		if (!(np->in_redir & NAT_REDIRECT))
3612 			return 0;
3613 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3614 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3615 			return 0;
3616 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3617 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3618 			return 0;
3619 	}
3620 
3621 	ft = &np->in_tuc;
3622 	if (!(fin->fin_flx & FI_TCPUDP) ||
3623 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3624 		if (ft->ftu_scmp || ft->ftu_dcmp)
3625 			return 0;
3626 		return 1;
3627 	}
3628 
3629 	return fr_tcpudpchk(fin, ft);
3630 }
3631 
3632 
3633 /* ------------------------------------------------------------------------ */
3634 /* Function:    nat_update                                                  */
3635 /* Returns:     Nil                                                         */
3636 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3637 /*              np(I)     - pointer to NAT rule                             */
3638 /*                                                                          */
3639 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3640 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3641 /* ------------------------------------------------------------------------ */
3642 void nat_update(fin, nat, np)
3643 fr_info_t *fin;
3644 nat_t *nat;
3645 ipnat_t *np;
3646 {
3647 	ipftq_t *ifq, *ifq2;
3648 	ipftqent_t *tqe;
3649 	ipf_stack_t *ifs = fin->fin_ifs;
3650 
3651 	MUTEX_ENTER(&nat->nat_lock);
3652 	tqe = &nat->nat_tqe;
3653 	ifq = tqe->tqe_ifq;
3654 
3655 	/*
3656 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3657 	 * TCP, however, if it is TCP and there is no rule timeout set,
3658 	 * then do not update the timeout here.
3659 	 */
3660 	if (np != NULL)
3661 		ifq2 = np->in_tqehead[fin->fin_rev];
3662 	else
3663 		ifq2 = NULL;
3664 
3665 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3666 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3667 	} else {
3668 		if (ifq2 == NULL) {
3669 			if (nat->nat_p == IPPROTO_UDP)
3670 				ifq2 = &ifs->ifs_nat_udptq;
3671 			else if (nat->nat_p == IPPROTO_ICMP)
3672 				ifq2 = &ifs->ifs_nat_icmptq;
3673 			else
3674 				ifq2 = &ifs->ifs_nat_iptq;
3675 		}
3676 
3677 		fr_movequeue(tqe, ifq, ifq2, ifs);
3678 	}
3679 	MUTEX_EXIT(&nat->nat_lock);
3680 }
3681 
3682 
3683 /* ------------------------------------------------------------------------ */
3684 /* Function:    fr_checknatout                                              */
3685 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3686 /*                     0 == no packet translation occurred,                 */
3687 /*                     1 == packet was successfully translated.             */
3688 /* Parameters:  fin(I)   - pointer to packet information                    */
3689 /*              passp(I) - pointer to filtering result flags                */
3690 /*                                                                          */
3691 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3692 /* first checked to see if they match an existing entry (if an error),      */
3693 /* otherwise a search of the current NAT table is made.  If neither results */
3694 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3695 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3696 /* packet header(s) as required.                                            */
3697 /* ------------------------------------------------------------------------ */
3698 int fr_checknatout(fin, passp)
3699 fr_info_t *fin;
3700 u_32_t *passp;
3701 {
3702 	struct ifnet *ifp, *sifp;
3703 	icmphdr_t *icmp = NULL;
3704 	tcphdr_t *tcp = NULL;
3705 	int rval, natfailed;
3706 	ipnat_t *np = NULL;
3707 	u_int nflags = 0;
3708 	u_32_t ipa, iph;
3709 	int natadd = 1;
3710 	frentry_t *fr;
3711 	nat_t *nat;
3712 	ipf_stack_t *ifs = fin->fin_ifs;
3713 
3714 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3715 		return 0;
3716 
3717 	natfailed = 0;
3718 	fr = fin->fin_fr;
3719 	sifp = fin->fin_ifp;
3720 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3721 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3722 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3723 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3724 	ifp = fin->fin_ifp;
3725 
3726 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3727 		switch (fin->fin_p)
3728 		{
3729 		case IPPROTO_TCP :
3730 			nflags = IPN_TCP;
3731 			break;
3732 		case IPPROTO_UDP :
3733 			nflags = IPN_UDP;
3734 			break;
3735 		case IPPROTO_ICMP :
3736 			icmp = fin->fin_dp;
3737 
3738 			/*
3739 			 * This is an incoming packet, so the destination is
3740 			 * the icmp_id and the source port equals 0
3741 			 */
3742 			if (nat_icmpquerytype4(icmp->icmp_type))
3743 				nflags = IPN_ICMPQUERY;
3744 			break;
3745 		default :
3746 			break;
3747 		}
3748 
3749 		if ((nflags & IPN_TCPUDP))
3750 			tcp = fin->fin_dp;
3751 	}
3752 
3753 	ipa = fin->fin_saddr;
3754 
3755 	READ_ENTER(&ifs->ifs_ipf_nat);
3756 
3757 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3758 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3759 		/*EMPTY*/;
3760 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3761 		natadd = 0;
3762 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3763 				      fin->fin_src, fin->fin_dst))) {
3764 		nflags = nat->nat_flags;
3765 	} else {
3766 		u_32_t hv, msk, nmsk;
3767 
3768 		/*
3769 		 * If there is no current entry in the nat table for this IP#,
3770 		 * create one for it (if there is a matching rule).
3771 		 */
3772 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3773 		msk = 0xffffffff;
3774 		nmsk = ifs->ifs_nat_masks;
3775 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3776 maskloop:
3777 		iph = ipa & htonl(msk);
3778 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3779 		for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext)
3780 		{
3781 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3782 				continue;
3783 			if (np->in_v != fin->fin_v)
3784 				continue;
3785 			if (np->in_p && (np->in_p != fin->fin_p))
3786 				continue;
3787 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3788 				continue;
3789 			if (np->in_flags & IPN_FILTER) {
3790 				if (!nat_match(fin, np))
3791 					continue;
3792 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3793 				continue;
3794 
3795 			if ((fr != NULL) &&
3796 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3797 				continue;
3798 
3799 			if (*np->in_plabel != '\0') {
3800 				if (((np->in_flags & IPN_FILTER) == 0) &&
3801 				    (np->in_dport != tcp->th_dport))
3802 					continue;
3803 				if (appr_ok(fin, tcp, np) == 0)
3804 					continue;
3805 			}
3806 
3807 			if ((nat = nat_new(fin, np, NULL, nflags,
3808 					   NAT_OUTBOUND))) {
3809 				np->in_hits++;
3810 				break;
3811 			} else
3812 				natfailed = -1;
3813 		}
3814 		if ((np == NULL) && (nmsk != 0)) {
3815 			while (nmsk) {
3816 				msk <<= 1;
3817 				if (nmsk & 0x80000000)
3818 					break;
3819 				nmsk <<= 1;
3820 			}
3821 			if (nmsk != 0) {
3822 				nmsk <<= 1;
3823 				goto maskloop;
3824 			}
3825 		}
3826 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3827 	}
3828 
3829 	if (nat != NULL) {
3830 		rval = fr_natout(fin, nat, natadd, nflags);
3831 		if (rval == 1) {
3832 			MUTEX_ENTER(&nat->nat_lock);
3833 			nat->nat_ref++;
3834 			MUTEX_EXIT(&nat->nat_lock);
3835 			nat->nat_touched = ifs->ifs_fr_ticks;
3836 			fin->fin_nat = nat;
3837 		}
3838 	} else
3839 		rval = natfailed;
3840 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3841 
3842 	if (rval == -1) {
3843 		if (passp != NULL)
3844 			*passp = FR_BLOCK;
3845 		fin->fin_flx |= FI_BADNAT;
3846 	}
3847 	fin->fin_ifp = sifp;
3848 	return rval;
3849 }
3850 
3851 /* ------------------------------------------------------------------------ */
3852 /* Function:    fr_natout                                                   */
3853 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3854 /*                     1 == packet was successfully translated.             */
3855 /* Parameters:  fin(I)    - pointer to packet information                   */
3856 /*              nat(I)    - pointer to NAT structure                        */
3857 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3858 /*              nflags(I) - NAT flags set for this packet                   */
3859 /*                                                                          */
3860 /* Translate a packet coming "out" on an interface.                         */
3861 /* ------------------------------------------------------------------------ */
3862 int fr_natout(fin, nat, natadd, nflags)
3863 fr_info_t *fin;
3864 nat_t *nat;
3865 int natadd;
3866 u_32_t nflags;
3867 {
3868 	icmphdr_t *icmp;
3869 	u_short *csump;
3870 	u_32_t sumd;
3871 	tcphdr_t *tcp;
3872 	ipnat_t *np;
3873 	int i;
3874 	ipf_stack_t *ifs = fin->fin_ifs;
3875 
3876 #if SOLARIS && defined(_KERNEL)
3877 	net_data_t net_data_p;
3878 	if (fin->fin_v == 4)
3879 		net_data_p = ifs->ifs_ipf_ipv4;
3880 	else
3881 		net_data_p = ifs->ifs_ipf_ipv6;
3882 #endif
3883 
3884 	tcp = NULL;
3885 	icmp = NULL;
3886 	csump = NULL;
3887 	np = nat->nat_ptr;
3888 
3889 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
3890 		(void) fr_nat_newfrag(fin, 0, nat);
3891 
3892 	MUTEX_ENTER(&nat->nat_lock);
3893 	nat->nat_bytes[1] += fin->fin_plen;
3894 	nat->nat_pkts[1]++;
3895 	MUTEX_EXIT(&nat->nat_lock);
3896 
3897 	/*
3898 	 * Fix up checksums, not by recalculating them, but
3899 	 * simply computing adjustments.
3900 	 * This is only done for STREAMS based IP implementations where the
3901 	 * checksum has already been calculated by IP.  In all other cases,
3902 	 * IPFilter is called before the checksum needs calculating so there
3903 	 * is no call to modify whatever is in the header now.
3904 	 */
3905 	ASSERT(fin->fin_m != NULL);
3906 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
3907 		if (nflags == IPN_ICMPERR) {
3908 			u_32_t s1, s2;
3909 
3910 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3911 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3912 			CALC_SUMD(s1, s2, sumd);
3913 
3914 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
3915 		}
3916 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3917     defined(linux) || defined(BRIDGE_IPF)
3918 		else {
3919 			/*
3920 			 * Strictly speaking, this isn't necessary on BSD
3921 			 * kernels because they do checksum calculation after
3922 			 * this code has run BUT if ipfilter is being used
3923 			 * to do NAT as a bridge, that code doesn't exist.
3924 			 */
3925 			if (nat->nat_dir == NAT_OUTBOUND)
3926 				fix_outcksum(&fin->fin_ip->ip_sum,
3927 					    nat->nat_ipsumd);
3928 			else
3929 				fix_incksum(&fin->fin_ip->ip_sum,
3930 				 	   nat->nat_ipsumd);
3931 		}
3932 #endif
3933 	}
3934 
3935 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3936 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3937 			tcp = fin->fin_dp;
3938 
3939 			tcp->th_sport = nat->nat_outport;
3940 			fin->fin_data[0] = ntohs(nat->nat_outport);
3941 		}
3942 
3943 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3944 			icmp = fin->fin_dp;
3945 			icmp->icmp_id = nat->nat_outport;
3946 		}
3947 
3948 		csump = nat_proto(fin, nat, nflags);
3949 	}
3950 
3951 	fin->fin_ip->ip_src = nat->nat_outip;
3952 
3953 	nat_update(fin, nat, np);
3954 
3955 	/*
3956 	 * The above comments do not hold for layer 4 (or higher) checksums...
3957 	 */
3958 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
3959 		if (nflags & IPN_TCPUDP &&
3960 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
3961 			sumd = nat->nat_sumd[1];
3962 		else
3963 			sumd = nat->nat_sumd[0];
3964 
3965 		if (nat->nat_dir == NAT_OUTBOUND)
3966 			fix_outcksum(csump, sumd);
3967 		else
3968 			fix_incksum(csump, sumd);
3969 	}
3970 #ifdef	IPFILTER_SYNC
3971 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3972 #endif
3973 	/* ------------------------------------------------------------- */
3974 	/* A few quick notes:						 */
3975 	/*	Following are test conditions prior to calling the 	 */
3976 	/*	appr_check routine.					 */
3977 	/*								 */
3978 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3979 	/*	with a redirect rule, we attempt to match the packet's	 */
3980 	/*	source port against in_dport, otherwise	we'd compare the */
3981 	/*	packet's destination.			 		 */
3982 	/* ------------------------------------------------------------- */
3983 	if ((np != NULL) && (np->in_apr != NULL)) {
3984 		i = appr_check(fin, nat);
3985 		if (i == 0)
3986 			i = 1;
3987 	} else
3988 		i = 1;
3989 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]);
3990 	fin->fin_flx |= FI_NATED;
3991 	return i;
3992 }
3993 
3994 
3995 /* ------------------------------------------------------------------------ */
3996 /* Function:    fr_checknatin                                               */
3997 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3998 /*                     0 == no packet translation occurred,                 */
3999 /*                     1 == packet was successfully translated.             */
4000 /* Parameters:  fin(I)   - pointer to packet information                    */
4001 /*              passp(I) - pointer to filtering result flags                */
4002 /*                                                                          */
4003 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4004 /* first checked to see if they match an existing entry (if an error),      */
4005 /* otherwise a search of the current NAT table is made.  If neither results */
4006 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4007 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4008 /* packet header(s) as required.                                            */
4009 /* ------------------------------------------------------------------------ */
4010 int fr_checknatin(fin, passp)
4011 fr_info_t *fin;
4012 u_32_t *passp;
4013 {
4014 	u_int nflags, natadd;
4015 	int rval, natfailed;
4016 	struct ifnet *ifp;
4017 	struct in_addr in;
4018 	icmphdr_t *icmp;
4019 	tcphdr_t *tcp;
4020 	u_short dport;
4021 	ipnat_t *np;
4022 	nat_t *nat;
4023 	u_32_t iph;
4024 	ipf_stack_t *ifs = fin->fin_ifs;
4025 
4026 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
4027 		return 0;
4028 
4029 	tcp = NULL;
4030 	icmp = NULL;
4031 	dport = 0;
4032 	natadd = 1;
4033 	nflags = 0;
4034 	natfailed = 0;
4035 	ifp = fin->fin_ifp;
4036 
4037 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4038 		switch (fin->fin_p)
4039 		{
4040 		case IPPROTO_TCP :
4041 			nflags = IPN_TCP;
4042 			break;
4043 		case IPPROTO_UDP :
4044 			nflags = IPN_UDP;
4045 			break;
4046 		case IPPROTO_ICMP :
4047 			icmp = fin->fin_dp;
4048 
4049 			/*
4050 			 * This is an incoming packet, so the destination is
4051 			 * the icmp_id and the source port equals 0
4052 			 */
4053 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4054 				nflags = IPN_ICMPQUERY;
4055 				dport = icmp->icmp_id;
4056 			} break;
4057 		default :
4058 			break;
4059 		}
4060 
4061 		if ((nflags & IPN_TCPUDP)) {
4062 			tcp = fin->fin_dp;
4063 			dport = tcp->th_dport;
4064 		}
4065 	}
4066 
4067 	in = fin->fin_dst;
4068 
4069 	READ_ENTER(&ifs->ifs_ipf_nat);
4070 
4071 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4072 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4073 		/*EMPTY*/;
4074 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4075 		natadd = 0;
4076 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4077 				     fin->fin_src, in))) {
4078 		nflags = nat->nat_flags;
4079 	} else {
4080 		u_32_t hv, msk, rmsk;
4081 
4082 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4083 		rmsk = ifs->ifs_rdr_masks;
4084 		msk = 0xffffffff;
4085 		WRITE_ENTER(&ifs->ifs_ipf_nat);
4086 		/*
4087 		 * If there is no current entry in the nat table for this IP#,
4088 		 * create one for it (if there is a matching rule).
4089 		 */
4090 maskloop:
4091 		iph = in.s_addr & htonl(msk);
4092 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4093 		for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) {
4094 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4095 				continue;
4096 			if (np->in_v != fin->fin_v)
4097 				continue;
4098 			if (np->in_p && (np->in_p != fin->fin_p))
4099 				continue;
4100 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4101 				continue;
4102 			if (np->in_flags & IPN_FILTER) {
4103 				if (!nat_match(fin, np))
4104 					continue;
4105 			} else {
4106 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4107 					continue;
4108 				if (np->in_pmin &&
4109 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4110 				     (ntohs(dport) < ntohs(np->in_pmin))))
4111 					continue;
4112 			}
4113 
4114 			if (*np->in_plabel != '\0') {
4115 				if (!appr_ok(fin, tcp, np)) {
4116 					continue;
4117 				}
4118 			}
4119 
4120 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4121 			if (nat != NULL) {
4122 				np->in_hits++;
4123 				break;
4124 			} else
4125 				natfailed = -1;
4126 		}
4127 
4128 		if ((np == NULL) && (rmsk != 0)) {
4129 			while (rmsk) {
4130 				msk <<= 1;
4131 				if (rmsk & 0x80000000)
4132 					break;
4133 				rmsk <<= 1;
4134 			}
4135 			if (rmsk != 0) {
4136 				rmsk <<= 1;
4137 				goto maskloop;
4138 			}
4139 		}
4140 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4141 	}
4142 	if (nat != NULL) {
4143 		rval = fr_natin(fin, nat, natadd, nflags);
4144 		if (rval == 1) {
4145 			MUTEX_ENTER(&nat->nat_lock);
4146 			nat->nat_ref++;
4147 			MUTEX_EXIT(&nat->nat_lock);
4148 			nat->nat_touched = ifs->ifs_fr_ticks;
4149 			fin->fin_nat = nat;
4150 			fin->fin_state = nat->nat_state;
4151 		}
4152 	} else
4153 		rval = natfailed;
4154 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4155 
4156 	if (rval == -1) {
4157 		if (passp != NULL)
4158 			*passp = FR_BLOCK;
4159 		fin->fin_flx |= FI_BADNAT;
4160 	}
4161 	return rval;
4162 }
4163 
4164 
4165 /* ------------------------------------------------------------------------ */
4166 /* Function:    fr_natin                                                    */
4167 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4168 /*                     1 == packet was successfully translated.             */
4169 /* Parameters:  fin(I)    - pointer to packet information                   */
4170 /*              nat(I)    - pointer to NAT structure                        */
4171 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4172 /*              nflags(I) - NAT flags set for this packet                   */
4173 /* Locks Held:  ipf_nat (READ)                                              */
4174 /*                                                                          */
4175 /* Translate a packet coming "in" on an interface.                          */
4176 /* ------------------------------------------------------------------------ */
4177 int fr_natin(fin, nat, natadd, nflags)
4178 fr_info_t *fin;
4179 nat_t *nat;
4180 int natadd;
4181 u_32_t nflags;
4182 {
4183 	icmphdr_t *icmp;
4184 	u_short *csump;
4185 	tcphdr_t *tcp;
4186 	ipnat_t *np;
4187 	int i;
4188 	ipf_stack_t *ifs = fin->fin_ifs;
4189 
4190 #if SOLARIS && defined(_KERNEL)
4191 	net_data_t net_data_p;
4192 	if (fin->fin_v == 4)
4193 		net_data_p = ifs->ifs_ipf_ipv4;
4194 	else
4195 		net_data_p = ifs->ifs_ipf_ipv6;
4196 #endif
4197 
4198 	tcp = NULL;
4199 	csump = NULL;
4200 	np = nat->nat_ptr;
4201 	fin->fin_fr = nat->nat_fr;
4202 
4203 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4204 		(void) fr_nat_newfrag(fin, 0, nat);
4205 
4206 	if (np != NULL) {
4207 
4208 	/* ------------------------------------------------------------- */
4209 	/* A few quick notes:						 */
4210 	/*	Following are test conditions prior to calling the 	 */
4211 	/*	appr_check routine.					 */
4212 	/*								 */
4213 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4214 	/*	with a map rule, we attempt to match the packet's	 */
4215 	/*	source port against in_dport, otherwise	we'd compare the */
4216 	/*	packet's destination.			 		 */
4217 	/* ------------------------------------------------------------- */
4218 		if (np->in_apr != NULL) {
4219 			i = appr_check(fin, nat);
4220 			if (i == -1) {
4221 				return -1;
4222 			}
4223 		}
4224 	}
4225 
4226 #ifdef	IPFILTER_SYNC
4227 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4228 #endif
4229 
4230 	MUTEX_ENTER(&nat->nat_lock);
4231 	nat->nat_bytes[0] += fin->fin_plen;
4232 	nat->nat_pkts[0]++;
4233 	MUTEX_EXIT(&nat->nat_lock);
4234 
4235 	fin->fin_ip->ip_dst = nat->nat_inip;
4236 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4237 	if (nflags & IPN_TCPUDP)
4238 		tcp = fin->fin_dp;
4239 
4240 	/*
4241 	 * Fix up checksums, not by recalculating them, but
4242 	 * simply computing adjustments.
4243 	 * Why only do this for some platforms on inbound packets ?
4244 	 * Because for those that it is done, IP processing is yet to happen
4245 	 * and so the IPv4 header checksum has not yet been evaluated.
4246 	 * Perhaps it should always be done for the benefit of things like
4247 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4248 	 * header checksum offloading, perhaps it is a moot point.
4249 	 */
4250 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4251      defined(__osf__) || defined(linux)
4252 	if (nat->nat_dir == NAT_OUTBOUND)
4253 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4254 	else
4255 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4256 #endif
4257 
4258 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4259 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4260 			tcp->th_dport = nat->nat_inport;
4261 			fin->fin_data[1] = ntohs(nat->nat_inport);
4262 		}
4263 
4264 
4265 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4266 			icmp = fin->fin_dp;
4267 
4268 			icmp->icmp_id = nat->nat_inport;
4269 		}
4270 
4271 		csump = nat_proto(fin, nat, nflags);
4272 	}
4273 
4274 	nat_update(fin, nat, np);
4275 
4276 	/*
4277 	 * In case they are being forwarded, inbound packets always need to have
4278 	 * their checksum adjusted even if hardware checksum validation said OK.
4279 	 */
4280 	if (csump != NULL) {
4281 		if (nat->nat_dir == NAT_OUTBOUND)
4282 			fix_incksum(csump, nat->nat_sumd[0]);
4283 		else
4284 			fix_outcksum(csump, nat->nat_sumd[0]);
4285 	}
4286 
4287 #if SOLARIS && defined(_KERNEL)
4288 	if (nflags & IPN_TCPUDP &&
4289 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4290 		/*
4291 		 * Need to adjust the partial checksum result stored in
4292 		 * db_cksum16, which will be used for validation in IP.
4293 		 * See IP_CKSUM_RECV().
4294 		 * Adjustment data should be the inverse of the IP address
4295 		 * changes, because db_cksum16 is supposed to be the complement
4296 		 * of the pesudo header.
4297 		 */
4298 		csump = &fin->fin_m->b_datap->db_cksum16;
4299 		if (nat->nat_dir == NAT_OUTBOUND)
4300 			fix_outcksum(csump, nat->nat_sumd[1]);
4301 		else
4302 			fix_incksum(csump, nat->nat_sumd[1]);
4303 	}
4304 #endif
4305 
4306 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]);
4307 	fin->fin_flx |= FI_NATED;
4308 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4309 		fin->fin_nattag = &np->in_tag;
4310 	return 1;
4311 }
4312 
4313 
4314 /* ------------------------------------------------------------------------ */
4315 /* Function:    nat_proto                                                   */
4316 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4317 /*                         NULL if the transport protocol is not recognised */
4318 /*                         as needing a checksum update.                    */
4319 /* Parameters:  fin(I)    - pointer to packet information                   */
4320 /*              nat(I)    - pointer to NAT structure                        */
4321 /*              nflags(I) - NAT flags set for this packet                   */
4322 /*                                                                          */
4323 /* Return the pointer to the checksum field for each protocol so understood.*/
4324 /* If support for making other changes to a protocol header is required,    */
4325 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4326 /* TCP down to a specific value, then do it from here.                      */
4327 /* ------------------------------------------------------------------------ */
4328 u_short *nat_proto(fin, nat, nflags)
4329 fr_info_t *fin;
4330 nat_t *nat;
4331 u_int nflags;
4332 {
4333 	icmphdr_t *icmp;
4334 	u_short *csump;
4335 	tcphdr_t *tcp;
4336 	udphdr_t *udp;
4337 
4338 	csump = NULL;
4339 	if (fin->fin_out == 0) {
4340 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4341 	} else {
4342 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4343 	}
4344 
4345 	switch (fin->fin_p)
4346 	{
4347 	case IPPROTO_TCP :
4348 		tcp = fin->fin_dp;
4349 
4350 		csump = &tcp->th_sum;
4351 
4352 		/*
4353 		 * Do a MSS CLAMPING on a SYN packet,
4354 		 * only deal IPv4 for now.
4355 		 */
4356 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4357 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4358 
4359 		break;
4360 
4361 	case IPPROTO_UDP :
4362 		udp = fin->fin_dp;
4363 
4364 		if (udp->uh_sum)
4365 			csump = &udp->uh_sum;
4366 		break;
4367 
4368 	case IPPROTO_ICMP :
4369 		icmp = fin->fin_dp;
4370 
4371 		if ((nflags & IPN_ICMPQUERY) != 0) {
4372 			if (icmp->icmp_cksum != 0)
4373 				csump = &icmp->icmp_cksum;
4374 		}
4375 		break;
4376 	}
4377 	return csump;
4378 }
4379 
4380 
4381 /* ------------------------------------------------------------------------ */
4382 /* Function:    fr_natunload                                                */
4383 /* Returns:     Nil                                                         */
4384 /* Parameters:  Nil                                                         */
4385 /*                                                                          */
4386 /* Free all memory used by NAT structures allocated at runtime.             */
4387 /* ------------------------------------------------------------------------ */
4388 void fr_natunload(ifs)
4389 ipf_stack_t *ifs;
4390 {
4391 	ipftq_t *ifq, *ifqnext;
4392 
4393 	(void) nat_clearlist(ifs);
4394 	(void) nat_flushtable(ifs);
4395 
4396 	/*
4397 	 * Proxy timeout queues are not cleaned here because although they
4398 	 * exist on the NAT list, appr_unload is called after fr_natunload
4399 	 * and the proxies actually are responsible for them being created.
4400 	 * Should the proxy timeouts have their own list?  There's no real
4401 	 * justification as this is the only complication.
4402 	 */
4403 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4404 		ifqnext = ifq->ifq_next;
4405 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4406 		    (fr_deletetimeoutqueue(ifq) == 0))
4407 			fr_freetimeoutqueue(ifq, ifs);
4408 	}
4409 
4410 	if (ifs->ifs_nat_table[0] != NULL) {
4411 		KFREES(ifs->ifs_nat_table[0],
4412 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4413 		ifs->ifs_nat_table[0] = NULL;
4414 	}
4415 	if (ifs->ifs_nat_table[1] != NULL) {
4416 		KFREES(ifs->ifs_nat_table[1],
4417 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4418 		ifs->ifs_nat_table[1] = NULL;
4419 	}
4420 	if (ifs->ifs_nat_rules != NULL) {
4421 		KFREES(ifs->ifs_nat_rules,
4422 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4423 		ifs->ifs_nat_rules = NULL;
4424 	}
4425 	if (ifs->ifs_rdr_rules != NULL) {
4426 		KFREES(ifs->ifs_rdr_rules,
4427 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4428 		ifs->ifs_rdr_rules = NULL;
4429 	}
4430 	if (ifs->ifs_maptable != NULL) {
4431 		KFREES(ifs->ifs_maptable,
4432 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4433 		ifs->ifs_maptable = NULL;
4434 	}
4435 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4436 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4437 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4438 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4439 	}
4440 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4441 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4442 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4443 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4444 	}
4445 
4446 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4447 		ifs->ifs_fr_nat_maxbucket = 0;
4448 
4449 	if (ifs->ifs_fr_nat_init == 1) {
4450 		ifs->ifs_fr_nat_init = 0;
4451 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4452 
4453 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4454 		RW_DESTROY(&ifs->ifs_ipf_nat);
4455 
4456 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4457 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4458 
4459 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4460 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4461 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4462 	}
4463 }
4464 
4465 
4466 /* ------------------------------------------------------------------------ */
4467 /* Function:    fr_natexpire                                                */
4468 /* Returns:     Nil                                                         */
4469 /* Parameters:  Nil                                                         */
4470 /*                                                                          */
4471 /* Check all of the timeout queues for entries at the top which need to be  */
4472 /* expired.                                                                 */
4473 /* ------------------------------------------------------------------------ */
4474 void fr_natexpire(ifs)
4475 ipf_stack_t *ifs;
4476 {
4477 	ipftq_t *ifq, *ifqnext;
4478 	ipftqent_t *tqe, *tqn;
4479 	int i;
4480 	SPL_INT(s);
4481 
4482 	SPL_NET(s);
4483 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4484 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4485 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4486 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4487 				break;
4488 			tqn = tqe->tqe_next;
4489 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4490 		}
4491 	}
4492 
4493 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4494 		ifqnext = ifq->ifq_next;
4495 
4496 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4497 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4498 				break;
4499 			tqn = tqe->tqe_next;
4500 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4501 		}
4502 	}
4503 
4504 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4505 		ifqnext = ifq->ifq_next;
4506 
4507 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4508 		    (ifq->ifq_ref == 0)) {
4509 			fr_freetimeoutqueue(ifq, ifs);
4510 		}
4511 	}
4512 
4513 	if (ifs->ifs_nat_doflush != 0) {
4514 		(void) nat_extraflush(2, ifs);
4515 		ifs->ifs_nat_doflush = 0;
4516 	}
4517 
4518 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4519 	SPL_X(s);
4520 }
4521 
4522 
4523 /* ------------------------------------------------------------------------ */
4524 /* Function:    fr_nataddrsync                                              */
4525 /* Returns:     Nil                                                         */
4526 /* Parameters:  ifp(I) -  pointer to network interface                      */
4527 /*              addr(I) - pointer to new network address                    */
4528 /*                                                                          */
4529 /* Walk through all of the currently active NAT sessions, looking for those */
4530 /* which need to have their translated address updated (where the interface */
4531 /* matches the one passed in) and change it, recalculating the checksum sum */
4532 /* difference too.                                                          */
4533 /* ------------------------------------------------------------------------ */
4534 void fr_nataddrsync(ifp, addr, ifs)
4535 void *ifp;
4536 struct in_addr *addr;
4537 ipf_stack_t *ifs;
4538 {
4539 	u_32_t sum1, sum2, sumd;
4540 	nat_t *nat;
4541 	ipnat_t *np;
4542 	SPL_INT(s);
4543 
4544 	if (ifs->ifs_fr_running <= 0)
4545 		return;
4546 
4547 	SPL_NET(s);
4548 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4549 
4550 	if (ifs->ifs_fr_running <= 0) {
4551 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4552 		return;
4553 	}
4554 
4555 	/*
4556 	 * Change IP addresses for NAT sessions for any protocol except TCP
4557 	 * since it will break the TCP connection anyway.  The only rules
4558 	 * which will get changed are those which are "map ... -> 0/32",
4559 	 * where the rule specifies the address is taken from the interface.
4560 	 */
4561 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4562 		if (addr != NULL) {
4563 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4564 			    ((nat->nat_flags & IPN_TCP) != 0))
4565 				continue;
4566 			if (((np = nat->nat_ptr) == NULL) ||
4567 			    (np->in_nip || (np->in_outmsk != 0xffffffff)))
4568 				continue;
4569 
4570 			/*
4571 			 * Change the map-to address to be the same as the
4572 			 * new one.
4573 			 */
4574 			sum1 = nat->nat_outip.s_addr;
4575 			nat->nat_outip = *addr;
4576 			sum2 = nat->nat_outip.s_addr;
4577 
4578 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4579 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
4580 		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
4581 			struct in_addr in;
4582 
4583 			/*
4584 			 * Change the map-to address to be the same as the
4585 			 * new one.
4586 			 */
4587 			sum1 = nat->nat_outip.s_addr;
4588 			if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4589 				       &in, NULL, ifs) != -1)
4590 				nat->nat_outip = in;
4591 			sum2 = nat->nat_outip.s_addr;
4592 		} else {
4593 			continue;
4594 		}
4595 
4596 		if (sum1 == sum2)
4597 			continue;
4598 		/*
4599 		 * Readjust the checksum adjustment to take into
4600 		 * account the new IP#.
4601 		 */
4602 		CALC_SUMD(sum1, sum2, sumd);
4603 		/* XXX - dont change for TCP when solaris does
4604 		 * hardware checksumming.
4605 		 */
4606 		sumd += nat->nat_sumd[0];
4607 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4608 		nat->nat_sumd[1] = nat->nat_sumd[0];
4609 	}
4610 
4611 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4612 	SPL_X(s);
4613 }
4614 
4615 
4616 /* ------------------------------------------------------------------------ */
4617 /* Function:    fr_natifpsync                                               */
4618 /* Returns:     Nil                                                         */
4619 /* Parameters:  action(I) - how we are syncing                              */
4620 /*              ifp(I)    - pointer to network interface                    */
4621 /*              name(I)   - name of interface to sync to                    */
4622 /*                                                                          */
4623 /* This function is used to resync the mapping of interface names and their */
4624 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4625 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4626 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4627 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4628 /* there is no longer any interface associated with it.                     */
4629 /* ------------------------------------------------------------------------ */
4630 void fr_natifpsync(action, ifp, name, ifs)
4631 int action;
4632 void *ifp;
4633 char *name;
4634 ipf_stack_t *ifs;
4635 {
4636 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4637 	int s;
4638 #endif
4639 	nat_t *nat;
4640 	ipnat_t *n;
4641 
4642 	if (ifs->ifs_fr_running <= 0)
4643 		return;
4644 
4645 	SPL_NET(s);
4646 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4647 
4648 	if (ifs->ifs_fr_running <= 0) {
4649 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4650 		return;
4651 	}
4652 
4653 	switch (action)
4654 	{
4655 	case IPFSYNC_RESYNC :
4656 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4657 			if ((ifp == nat->nat_ifps[0]) ||
4658 			    (nat->nat_ifps[0] == (void *)-1)) {
4659 				nat->nat_ifps[0] =
4660 				    fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
4661 			}
4662 
4663 			if ((ifp == nat->nat_ifps[1]) ||
4664 			    (nat->nat_ifps[1] == (void *)-1)) {
4665 				nat->nat_ifps[1] =
4666 				    fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
4667 			}
4668 		}
4669 
4670 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4671 			if (n->in_ifps[0] == ifp ||
4672 			    n->in_ifps[0] == (void *)-1) {
4673 				n->in_ifps[0] =
4674 				    fr_resolvenic(n->in_ifnames[0], 4, ifs);
4675 			}
4676 			if (n->in_ifps[1] == ifp ||
4677 			    n->in_ifps[1] == (void *)-1) {
4678 				n->in_ifps[1] =
4679 				    fr_resolvenic(n->in_ifnames[1], 4, ifs);
4680 			}
4681 		}
4682 		break;
4683 	case IPFSYNC_NEWIFP :
4684 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4685 			if (!strncmp(name, nat->nat_ifnames[0],
4686 				     sizeof(nat->nat_ifnames[0])))
4687 				nat->nat_ifps[0] = ifp;
4688 			if (!strncmp(name, nat->nat_ifnames[1],
4689 				     sizeof(nat->nat_ifnames[1])))
4690 				nat->nat_ifps[1] = ifp;
4691 		}
4692 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4693 			if (!strncmp(name, n->in_ifnames[0],
4694 				     sizeof(n->in_ifnames[0])))
4695 				n->in_ifps[0] = ifp;
4696 			if (!strncmp(name, n->in_ifnames[1],
4697 				     sizeof(n->in_ifnames[1])))
4698 				n->in_ifps[1] = ifp;
4699 		}
4700 		break;
4701 	case IPFSYNC_OLDIFP :
4702 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4703 			if (ifp == nat->nat_ifps[0])
4704 				nat->nat_ifps[0] = (void *)-1;
4705 			if (ifp == nat->nat_ifps[1])
4706 				nat->nat_ifps[1] = (void *)-1;
4707 		}
4708 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4709 			if (n->in_ifps[0] == ifp)
4710 				n->in_ifps[0] = (void *)-1;
4711 			if (n->in_ifps[1] == ifp)
4712 				n->in_ifps[1] = (void *)-1;
4713 		}
4714 		break;
4715 	}
4716 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4717 	SPL_X(s);
4718 }
4719 
4720 
4721 /* ------------------------------------------------------------------------ */
4722 /* Function:    nat_icmpquerytype4                                          */
4723 /* Returns:     int - 1 == success, 0 == failure                            */
4724 /* Parameters:  icmptype(I) - ICMP type number                              */
4725 /*                                                                          */
4726 /* Tests to see if the ICMP type number passed is a query/response type or  */
4727 /* not.                                                                     */
4728 /* ------------------------------------------------------------------------ */
4729 static INLINE int nat_icmpquerytype4(icmptype)
4730 int icmptype;
4731 {
4732 
4733 	/*
4734 	 * For the ICMP query NAT code, it is essential that both the query
4735 	 * and the reply match on the NAT rule. Because the NAT structure
4736 	 * does not keep track of the icmptype, and a single NAT structure
4737 	 * is used for all icmp types with the same src, dest and id, we
4738 	 * simply define the replies as queries as well. The funny thing is,
4739 	 * altough it seems silly to call a reply a query, this is exactly
4740 	 * as it is defined in the IPv4 specification
4741 	 */
4742 
4743 	switch (icmptype)
4744 	{
4745 
4746 	case ICMP_ECHOREPLY:
4747 	case ICMP_ECHO:
4748 	/* route aedvertisement/solliciation is currently unsupported: */
4749 	/* it would require rewriting the ICMP data section            */
4750 	case ICMP_TSTAMP:
4751 	case ICMP_TSTAMPREPLY:
4752 	case ICMP_IREQ:
4753 	case ICMP_IREQREPLY:
4754 	case ICMP_MASKREQ:
4755 	case ICMP_MASKREPLY:
4756 		return 1;
4757 	default:
4758 		return 0;
4759 	}
4760 }
4761 
4762 
4763 /* ------------------------------------------------------------------------ */
4764 /* Function:    nat_log                                                     */
4765 /* Returns:     Nil                                                         */
4766 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4767 /*              type(I) - type of log entry to create                       */
4768 /*                                                                          */
4769 /* Creates a NAT log entry.                                                 */
4770 /* ------------------------------------------------------------------------ */
4771 void nat_log(nat, type, ifs)
4772 struct nat *nat;
4773 u_int type;
4774 ipf_stack_t *ifs;
4775 {
4776 #ifdef	IPFILTER_LOG
4777 # ifndef LARGE_NAT
4778 	struct ipnat *np;
4779 	int rulen;
4780 # endif
4781 	struct natlog natl;
4782 	void *items[1];
4783 	size_t sizes[1];
4784 	int types[1];
4785 
4786 	natl.nl_inip = nat->nat_inip;
4787 	natl.nl_outip = nat->nat_outip;
4788 	natl.nl_origip = nat->nat_oip;
4789 	natl.nl_bytes[0] = nat->nat_bytes[0];
4790 	natl.nl_bytes[1] = nat->nat_bytes[1];
4791 	natl.nl_pkts[0] = nat->nat_pkts[0];
4792 	natl.nl_pkts[1] = nat->nat_pkts[1];
4793 	natl.nl_origport = nat->nat_oport;
4794 	natl.nl_inport = nat->nat_inport;
4795 	natl.nl_outport = nat->nat_outport;
4796 	natl.nl_p = nat->nat_p;
4797 	natl.nl_type = type;
4798 	natl.nl_rule = -1;
4799 # ifndef LARGE_NAT
4800 	if (nat->nat_ptr != NULL) {
4801 		for (rulen = 0, np = ifs->ifs_nat_list; np;
4802 		     np = np->in_next, rulen++)
4803 			if (np == nat->nat_ptr) {
4804 				natl.nl_rule = rulen;
4805 				break;
4806 			}
4807 	}
4808 # endif
4809 	items[0] = &natl;
4810 	sizes[0] = sizeof(natl);
4811 	types[0] = 0;
4812 
4813 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
4814 #endif
4815 }
4816 
4817 
4818 #if defined(__OpenBSD__)
4819 /* ------------------------------------------------------------------------ */
4820 /* Function:    nat_ifdetach                                                */
4821 /* Returns:     Nil                                                         */
4822 /* Parameters:  ifp(I) - pointer to network interface                       */
4823 /*                                                                          */
4824 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4825 /* interface references within IPFilter.                                    */
4826 /* ------------------------------------------------------------------------ */
4827 void nat_ifdetach(ifp, ifs)
4828 void *ifp;
4829 ipf_stack_t *ifs;
4830 {
4831 	frsync(ifp, ifs);
4832 	return;
4833 }
4834 #endif
4835 
4836 
4837 /* ------------------------------------------------------------------------ */
4838 /* Function:    fr_ipnatderef                                               */
4839 /* Returns:     Nil                                                         */
4840 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4841 /* Write Locks: ipf_nat                                                     */
4842 /*                                                                          */
4843 /* ------------------------------------------------------------------------ */
4844 void fr_ipnatderef(inp, ifs)
4845 ipnat_t **inp;
4846 ipf_stack_t *ifs;
4847 {
4848 	ipnat_t *in;
4849 
4850 	in = *inp;
4851 	*inp = NULL;
4852 	in->in_space++;
4853 	in->in_use--;
4854 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4855 		if (in->in_apr)
4856 			appr_free(in->in_apr);
4857 		KFREE(in);
4858 		ifs->ifs_nat_stats.ns_rules--;
4859 #ifdef notdef
4860 #if SOLARIS
4861 		if (ifs->ifs_nat_stats.ns_rules == 0)
4862 			ifs->ifs_pfil_delayed_copy = 1;
4863 #endif
4864 #endif
4865 	}
4866 }
4867 
4868 
4869 /* ------------------------------------------------------------------------ */
4870 /* Function:    fr_natderef                                                 */
4871 /* Returns:     Nil                                                         */
4872 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4873 /*                                                                          */
4874 /* Decrement the reference counter for this NAT table entry and free it if  */
4875 /* there are no more things using it.                                       */
4876 /*                                                                          */
4877 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4878 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4879 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4880 /* because nat_delete() will do that and send nat_ref to -1.                */
4881 /*                                                                          */
4882 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4883 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4884 /* ------------------------------------------------------------------------ */
4885 void fr_natderef(natp, ifs)
4886 nat_t **natp;
4887 ipf_stack_t *ifs;
4888 {
4889 	nat_t *nat;
4890 
4891 	nat = *natp;
4892 	*natp = NULL;
4893 
4894 	MUTEX_ENTER(&nat->nat_lock);
4895 	if (nat->nat_ref > 1) {
4896 		nat->nat_ref--;
4897 		MUTEX_EXIT(&nat->nat_lock);
4898 		return;
4899 	}
4900 	MUTEX_EXIT(&nat->nat_lock);
4901 
4902 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4903 	nat_delete(nat, NL_EXPIRE, ifs);
4904 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4905 }
4906 
4907 
4908 /* ------------------------------------------------------------------------ */
4909 /* Function:    fr_natclone                                                 */
4910 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4911 /*                           else pointer to new state structure            */
4912 /* Parameters:  fin(I) - pointer to packet information                      */
4913 /*              is(I)  - pointer to master state structure                  */
4914 /* Write Lock:  ipf_nat                                                     */
4915 /*                                                                          */
4916 /* Create a "duplcate" state table entry from the master.                   */
4917 /* ------------------------------------------------------------------------ */
4918 static nat_t *fr_natclone(fin, nat)
4919 fr_info_t *fin;
4920 nat_t *nat;
4921 {
4922 	frentry_t *fr;
4923 	nat_t *clone;
4924 	ipnat_t *np;
4925 	ipf_stack_t *ifs = fin->fin_ifs;
4926 
4927 	KMALLOC(clone, nat_t *);
4928 	if (clone == NULL)
4929 		return NULL;
4930 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4931 
4932 	MUTEX_NUKE(&clone->nat_lock);
4933 
4934 	clone->nat_aps = NULL;
4935 	/*
4936 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4937 	 */
4938 	clone->nat_tqe.tqe_pnext = NULL;
4939 	clone->nat_tqe.tqe_next = NULL;
4940 	clone->nat_tqe.tqe_ifq = NULL;
4941 	clone->nat_tqe.tqe_parent = clone;
4942 
4943 	clone->nat_flags &= ~SI_CLONE;
4944 	clone->nat_flags |= SI_CLONED;
4945 
4946 	if (clone->nat_hm)
4947 		clone->nat_hm->hm_ref++;
4948 
4949 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
4950 		KFREE(clone);
4951 		return NULL;
4952 	}
4953 	np = clone->nat_ptr;
4954 	if (np != NULL) {
4955 		if (ifs->ifs_nat_logging)
4956 			nat_log(clone, (u_int)np->in_redir, ifs);
4957 		np->in_use++;
4958 	}
4959 	fr = clone->nat_fr;
4960 	if (fr != NULL) {
4961 		MUTEX_ENTER(&fr->fr_lock);
4962 		fr->fr_ref++;
4963 		MUTEX_EXIT(&fr->fr_lock);
4964 	}
4965 
4966 	/*
4967 	 * Because the clone is created outside the normal loop of things and
4968 	 * TCP has special needs in terms of state, initialise the timeout
4969 	 * state of the new NAT from here.
4970 	 */
4971 	if (clone->nat_p == IPPROTO_TCP) {
4972 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
4973 				  clone->nat_flags);
4974 	}
4975 #ifdef	IPFILTER_SYNC
4976 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4977 #endif
4978 	if (ifs->ifs_nat_logging)
4979 		nat_log(clone, NL_CLONE, ifs);
4980 	return clone;
4981 }
4982 
4983 
4984 /* ------------------------------------------------------------------------ */
4985 /* Function:   nat_wildok                                                   */
4986 /* Returns:    int - 1 == packet's ports match wildcards                    */
4987 /*                   0 == packet's ports don't match wildcards              */
4988 /* Parameters: nat(I)   - NAT entry                                         */
4989 /*             sport(I) - source port                                       */
4990 /*             dport(I) - destination port                                  */
4991 /*             flags(I) - wildcard flags                                    */
4992 /*             dir(I)   - packet direction                                  */
4993 /*                                                                          */
4994 /* Use NAT entry and packet direction to determine which combination of     */
4995 /* wildcard flags should be used.                                           */
4996 /* ------------------------------------------------------------------------ */
4997 static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4998 nat_t *nat;
4999 int sport;
5000 int dport;
5001 int flags;
5002 int dir;
5003 {
5004 	/*
5005 	 * When called by       dir is set to
5006 	 * nat_inlookup         NAT_INBOUND (0)
5007 	 * nat_outlookup        NAT_OUTBOUND (1)
5008 	 *
5009 	 * We simply combine the packet's direction in dir with the original
5010 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5011 	 * which combination of wildcard flags to allow.
5012 	 */
5013 
5014 	switch ((dir << 1) | nat->nat_dir)
5015 	{
5016 	case 3: /* outbound packet / outbound entry */
5017 		if (((nat->nat_inport == sport) ||
5018 		    (flags & SI_W_SPORT)) &&
5019 		    ((nat->nat_oport == dport) ||
5020 		    (flags & SI_W_DPORT)))
5021 			return 1;
5022 		break;
5023 	case 2: /* outbound packet / inbound entry */
5024 		if (((nat->nat_outport == sport) ||
5025 		    (flags & SI_W_DPORT)) &&
5026 		    ((nat->nat_oport == dport) ||
5027 		    (flags & SI_W_SPORT)))
5028 			return 1;
5029 		break;
5030 	case 1: /* inbound packet / outbound entry */
5031 		if (((nat->nat_oport == sport) ||
5032 		    (flags & SI_W_DPORT)) &&
5033 		    ((nat->nat_outport == dport) ||
5034 		    (flags & SI_W_SPORT)))
5035 			return 1;
5036 		break;
5037 	case 0: /* inbound packet / inbound entry */
5038 		if (((nat->nat_oport == sport) ||
5039 		    (flags & SI_W_SPORT)) &&
5040 		    ((nat->nat_outport == dport) ||
5041 		    (flags & SI_W_DPORT)))
5042 			return 1;
5043 		break;
5044 	default:
5045 		break;
5046 	}
5047 
5048 	return(0);
5049 }
5050 
5051 
5052 /* ------------------------------------------------------------------------ */
5053 /* Function:    nat_mssclamp                                                */
5054 /* Returns:     Nil                                                         */
5055 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5056 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5057 /*              csump(I)  - pointer to TCP checksum                         */
5058 /*                                                                          */
5059 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5060 /* then the TCP header checksum will be updated to reflect the change in    */
5061 /* the MSS.                                                                 */
5062 /* ------------------------------------------------------------------------ */
5063 static void nat_mssclamp(tcp, maxmss, csump)
5064 tcphdr_t *tcp;
5065 u_32_t maxmss;
5066 u_short *csump;
5067 {
5068 	u_char *cp, *ep, opt;
5069 	int hlen, advance;
5070 	u_32_t mss, sumd;
5071 
5072 	hlen = TCP_OFF(tcp) << 2;
5073 	if (hlen > sizeof(*tcp)) {
5074 		cp = (u_char *)tcp + sizeof(*tcp);
5075 		ep = (u_char *)tcp + hlen;
5076 
5077 		while (cp < ep) {
5078 			opt = cp[0];
5079 			if (opt == TCPOPT_EOL)
5080 				break;
5081 			else if (opt == TCPOPT_NOP) {
5082 				cp++;
5083 				continue;
5084 			}
5085 
5086 			if (cp + 1 >= ep)
5087 				break;
5088 			advance = cp[1];
5089 			if ((cp + advance > ep) || (advance <= 0))
5090 				break;
5091 			switch (opt)
5092 			{
5093 			case TCPOPT_MAXSEG:
5094 				if (advance != 4)
5095 					break;
5096 				mss = cp[2] * 256 + cp[3];
5097 				if (mss > maxmss) {
5098 					cp[2] = maxmss / 256;
5099 					cp[3] = maxmss & 0xff;
5100 					CALC_SUMD(mss, maxmss, sumd);
5101 					fix_outcksum(csump, sumd);
5102 				}
5103 				break;
5104 			default:
5105 				/* ignore unknown options */
5106 				break;
5107 			}
5108 
5109 			cp += advance;
5110 		}
5111 	}
5112 }
5113 
5114 
5115 /* ------------------------------------------------------------------------ */
5116 /* Function:    fr_setnatqueue                                              */
5117 /* Returns:     Nil                                                         */
5118 /* Parameters:  nat(I)- pointer to NAT structure                            */
5119 /*              rev(I) - forward(0) or reverse(1) direction                 */
5120 /* Locks:       ipf_nat (read or write)                                     */
5121 /*                                                                          */
5122 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5123 /* determining which queue it should be placed on.                          */
5124 /* ------------------------------------------------------------------------ */
5125 void fr_setnatqueue(nat, rev, ifs)
5126 nat_t *nat;
5127 int rev;
5128 ipf_stack_t *ifs;
5129 {
5130 	ipftq_t *oifq, *nifq;
5131 
5132 	if (nat->nat_ptr != NULL)
5133 		nifq = nat->nat_ptr->in_tqehead[rev];
5134 	else
5135 		nifq = NULL;
5136 
5137 	if (nifq == NULL) {
5138 		switch (nat->nat_p)
5139 		{
5140 		case IPPROTO_UDP :
5141 			nifq = &ifs->ifs_nat_udptq;
5142 			break;
5143 		case IPPROTO_ICMP :
5144 			nifq = &ifs->ifs_nat_icmptq;
5145 			break;
5146 		case IPPROTO_TCP :
5147 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5148 			break;
5149 		default :
5150 			nifq = &ifs->ifs_nat_iptq;
5151 			break;
5152 		}
5153 	}
5154 
5155 	oifq = nat->nat_tqe.tqe_ifq;
5156 	/*
5157 	 * If it's currently on a timeout queue, move it from one queue to
5158 	 * another, else put it on the end of the newly determined queue.
5159 	 */
5160 	if (oifq != NULL)
5161 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5162 	else
5163 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5164 	return;
5165 }
5166 
5167 /* ------------------------------------------------------------------------ */
5168 /* Function:    nat_getnext                                                 */
5169 /* Returns:     int - 0 == ok, else error                                   */
5170 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5171 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5172 /*              ifs - ipf stack instance                                    */
5173 /*                                                                          */
5174 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5175 /* and copy it out to the storage space pointed to by itp.  The next item   */
5176 /* in the list to look at is put back in the ipftoken struture.             */
5177 /* ------------------------------------------------------------------------ */
5178 static int nat_getnext(t, itp, ifs)
5179 ipftoken_t *t;
5180 ipfgeniter_t *itp;
5181 ipf_stack_t *ifs;
5182 {
5183 	hostmap_t *hm, *nexthm = NULL, zerohm;
5184 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5185 	nat_t *nat, *nextnat = NULL, zeronat;
5186 	int error = 0, count;
5187 	char *dst;
5188 
5189 	if (itp->igi_nitems == 0)
5190 		return EINVAL;
5191 
5192 	READ_ENTER(&ifs->ifs_ipf_nat);
5193 
5194 	/*
5195 	 * Get "previous" entry from the token and find the next entry.
5196 	 */
5197 	switch (itp->igi_type)
5198 	{
5199 	case IPFGENITER_HOSTMAP :
5200 		hm = t->ipt_data;
5201 		if (hm == NULL) {
5202 			nexthm = ifs->ifs_ipf_hm_maplist;
5203 		} else {
5204 			nexthm = hm->hm_next;
5205 		}
5206 		break;
5207 
5208 	case IPFGENITER_IPNAT :
5209 		ipn = t->ipt_data;
5210 		if (ipn == NULL) {
5211 			nextipnat = ifs->ifs_nat_list;
5212 		} else {
5213 			nextipnat = ipn->in_next;
5214 		}
5215 		break;
5216 
5217 	case IPFGENITER_NAT :
5218 		nat = t->ipt_data;
5219 		if (nat == NULL) {
5220 			nextnat = ifs->ifs_nat_instances;
5221 		} else {
5222 			nextnat = nat->nat_next;
5223 		}
5224 		break;
5225 	default :
5226 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5227 		return EINVAL;
5228 	}
5229 
5230 	dst = itp->igi_data;
5231 	for (count = itp->igi_nitems; count > 0; count--) {
5232 		/*
5233 		 * If we found an entry, add a reference to it and update the token.
5234 		 * Otherwise, zero out data to be returned and NULL out token.
5235 		 */
5236 		switch (itp->igi_type)
5237 		{
5238 		case IPFGENITER_HOSTMAP :
5239 			if (nexthm != NULL) {
5240 				ATOMIC_INC32(nexthm->hm_ref);
5241 				t->ipt_data = nexthm;
5242 			} else {
5243 				bzero(&zerohm, sizeof(zerohm));
5244 				nexthm = &zerohm;
5245 				t->ipt_data = NULL;
5246 			}
5247 			break;
5248 		case IPFGENITER_IPNAT :
5249 			if (nextipnat != NULL) {
5250 				ATOMIC_INC32(nextipnat->in_use);
5251 				t->ipt_data = nextipnat;
5252 			} else {
5253 				bzero(&zeroipn, sizeof(zeroipn));
5254 				nextipnat = &zeroipn;
5255 				t->ipt_data = NULL;
5256 			}
5257 			break;
5258 		case IPFGENITER_NAT :
5259 			if (nextnat != NULL) {
5260 				MUTEX_ENTER(&nextnat->nat_lock);
5261 				nextnat->nat_ref++;
5262 				MUTEX_EXIT(&nextnat->nat_lock);
5263 				t->ipt_data = nextnat;
5264 			} else {
5265 				bzero(&zeronat, sizeof(zeronat));
5266 				nextnat = &zeronat;
5267 				t->ipt_data = NULL;
5268 			}
5269 			break;
5270 		default :
5271 			break;
5272 		}
5273 
5274 		/*
5275 		 * Now that we have ref, it's save to give up lock.
5276 		 */
5277 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5278 
5279 		/*
5280 		 * Copy out data and clean up references and token as needed.
5281 		 */
5282 		switch (itp->igi_type)
5283 		{
5284 		case IPFGENITER_HOSTMAP :
5285 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5286 			if (error != 0)
5287 				error = EFAULT;
5288 			if (t->ipt_data == NULL) {
5289 				ipf_freetoken(t, ifs);
5290 				break;
5291 			} else {
5292 				if (hm != NULL) {
5293 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5294 					fr_hostmapdel(&hm);
5295 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5296 				}
5297 				if (nexthm->hm_next == NULL) {
5298 					ipf_freetoken(t, ifs);
5299 					break;
5300 				}
5301 				dst += sizeof(*nexthm);
5302 				hm = nexthm;
5303 				nexthm = nexthm->hm_next;
5304 			}
5305 			break;
5306 
5307 		case IPFGENITER_IPNAT :
5308 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5309 			if (error != 0)
5310 				error = EFAULT;
5311 			if (t->ipt_data == NULL) {
5312 				ipf_freetoken(t, ifs);
5313 				break;
5314 			} else {
5315 				if (ipn != NULL) {
5316 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5317 					fr_ipnatderef(&ipn, ifs);
5318 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5319 				}
5320 				if (nextipnat->in_next == NULL) {
5321 					ipf_freetoken(t, ifs);
5322 					break;
5323 				}
5324 				dst += sizeof(*nextipnat);
5325 				ipn = nextipnat;
5326 				nextipnat = nextipnat->in_next;
5327 			}
5328 			break;
5329 
5330 		case IPFGENITER_NAT :
5331 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5332 			if (error != 0)
5333 				error = EFAULT;
5334 			if (t->ipt_data == NULL) {
5335 				ipf_freetoken(t, ifs);
5336 				break;
5337 			} else {
5338 				if (nat != NULL)
5339 					fr_natderef(&nat, ifs);
5340 				if (nextnat->nat_next == NULL) {
5341 					ipf_freetoken(t, ifs);
5342 					break;
5343 				}
5344 				dst += sizeof(*nextnat);
5345 				nat = nextnat;
5346 				nextnat = nextnat->nat_next;
5347 			}
5348 			break;
5349 		default :
5350 			break;
5351 		}
5352 
5353 		if ((count == 1) || (error != 0))
5354 			break;
5355 
5356 		READ_ENTER(&ifs->ifs_ipf_nat);
5357 	}
5358 
5359 	return error;
5360 }
5361 
5362 
5363 /* ------------------------------------------------------------------------ */
5364 /* Function:    nat_iterator                                                */
5365 /* Returns:     int - 0 == ok, else error                                   */
5366 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5367 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5368 /*                                                                          */
5369 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5370 /* generic structure to iterate through a list.  There are three different  */
5371 /* linked lists of NAT related information to go through: NAT rules, active */
5372 /* NAT mappings and the NAT fragment cache.                                 */
5373 /* ------------------------------------------------------------------------ */
5374 static int nat_iterator(token, itp, ifs)
5375 ipftoken_t *token;
5376 ipfgeniter_t *itp;
5377 ipf_stack_t *ifs;
5378 {
5379 	int error;
5380 
5381 	if (itp->igi_data == NULL)
5382 		return EFAULT;
5383 
5384 	token->ipt_subtype = itp->igi_type;
5385 
5386 	switch (itp->igi_type)
5387 	{
5388 	case IPFGENITER_HOSTMAP :
5389 	case IPFGENITER_IPNAT :
5390 	case IPFGENITER_NAT :
5391 		error = nat_getnext(token, itp, ifs);
5392 		break;
5393 	case IPFGENITER_NATFRAG :
5394 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5395 				    &ifs->ifs_ipfr_nattail,
5396 				    &ifs->ifs_ipf_natfrag, ifs);
5397 		break;
5398 	default :
5399 		error = EINVAL;
5400 		break;
5401 	}
5402 
5403 	return error;
5404 }
5405 
5406 
5407 /* -------------------------------------------------------------------- */
5408 /* Function:	nat_earlydrop						*/
5409 /* Returns:	number of dropped/removed entries from the queue	*/
5410 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5411 /*		maxidle - entry must be idle this long to be dropped	*/
5412 /*		ifs - ipf stack instance				*/
5413 /*									*/
5414 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5415 /* form specified timeout queue, based on how long they've sat idle,	*/
5416 /* without waiting for it to happen on its own.				*/
5417 /* -------------------------------------------------------------------- */
5418 static int nat_earlydrop(ifq, maxidle, ifs)
5419 ipftq_t *ifq;
5420 int maxidle;
5421 ipf_stack_t *ifs;
5422 {
5423 	ipftqent_t *tqe, *tqn;
5424 	nat_t *nat;
5425 	unsigned int dropped;
5426 	int droptick;
5427 
5428 	if (ifq == NULL)
5429 		return (0);
5430 
5431 	dropped = 0;
5432 
5433 	/*
5434 	 * Determine the tick representing the idle time we're interested
5435 	 * in.  If an entry exists in the queue, and it was touched before
5436 	 * that tick, then it's been idle longer than maxidle ... remove it.
5437 	 */
5438 	droptick = ifs->ifs_fr_ticks - maxidle;
5439 	tqn = ifq->ifq_head;
5440 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5441 		tqn = tqe->tqe_next;
5442 		nat = tqe->tqe_parent;
5443 		nat_delete(nat, ISL_EXPIRE, ifs);
5444 		dropped++;
5445 	}
5446 	return (dropped);
5447 }
5448 
5449 
5450 /* --------------------------------------------------------------------- */
5451 /* Function:	nat_flushclosing					 */
5452 /* Returns:	int - number of NAT entries deleted			 */
5453 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5454 /*		ifs - ipf stack instance				 */
5455 /*									 */
5456 /* Remove nat table entries for TCP connections which are in the process */
5457 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5458 /* --------------------------------------------------------------------- */
5459 static int nat_flushclosing(stateval, ifs)
5460 int stateval;
5461 ipf_stack_t *ifs;
5462 {
5463 	ipftq_t *ifq, *ifqn;
5464 	ipftqent_t *tqe, *tqn;
5465 	nat_t *nat;
5466 	int dropped;
5467 
5468 	dropped = 0;
5469 
5470 	/*
5471 	 * Start by deleting any entries in specific timeout queues.
5472 	 */
5473 	ifqn = &ifs->ifs_nat_tqb[stateval];
5474 	while ((ifq = ifqn) != NULL) {
5475 		ifqn = ifq->ifq_next;
5476 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5477 	}
5478 
5479 	/*
5480 	 * Next, look through user defined queues for closing entries.
5481 	 */
5482 	ifqn = ifs->ifs_nat_utqe;
5483 	while ((ifq = ifqn) != NULL) {
5484 		ifqn = ifq->ifq_next;
5485 		tqn = ifq->ifq_head;
5486 		while ((tqe = tqn) != NULL) {
5487 			tqn = tqe->tqe_next;
5488 			nat = tqe->tqe_parent;
5489 			if (nat->nat_p != IPPROTO_TCP)
5490 				continue;
5491 			if ((nat->nat_tcpstate[0] >= stateval) &&
5492 			    (nat->nat_tcpstate[1] >= stateval)) {
5493 				nat_delete(nat, NL_EXPIRE, ifs);
5494 				dropped++;
5495 			}
5496 		}
5497 	}
5498 	return (dropped);
5499 }
5500 
5501 
5502 /* --------------------------------------------------------------------- */
5503 /* Function:	nat_extraflush						 */
5504 /* Returns:	int - number of NAT entries deleted			 */
5505 /* Parameters:	which(I) - how to flush the active NAT table		 */
5506 /*		ifs - ipf stack instance				 */
5507 /* Write Locks:	ipf_nat							 */
5508 /*									 */
5509 /* Flush nat tables.  Three actions currently defined:			 */
5510 /*									 */
5511 /* which == 0 :	Flush all nat table entries.				 */
5512 /*									 */
5513 /* which == 1 :	Flush entries with TCP connections which have started	 */
5514 /*		to close on both ends.					 */
5515 /*									 */
5516 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5517 /*		does not take us below specified threshold in the table, */
5518 /*		we want to flush entries with TCP connections which have */
5519 /*		been idle for a long time.  Start with connections idle	 */
5520 /*		over 12 hours,  and then work backwards in half hour	 */
5521 /*		increments to at most 30 minutes idle, and finally work	 */
5522 /*		back in 30 second increments to at most 30 seconds.	 */
5523 /* --------------------------------------------------------------------- */
5524 static int nat_extraflush(which, ifs)
5525 int which;
5526 ipf_stack_t *ifs;
5527 {
5528 	ipftq_t *ifq, *ifqn;
5529 	nat_t *nat, **natp;
5530 	int idletime, removed, idle_idx;
5531 	SPL_INT(s);
5532 
5533 	removed = 0;
5534 
5535 	SPL_NET(s);
5536 	switch (which)
5537 	{
5538 	case 0:
5539 		natp = &ifs->ifs_nat_instances;
5540 		while ((nat = *natp) != NULL) {
5541 			natp = &nat->nat_next;
5542 			nat_delete(nat, ISL_FLUSH, ifs);
5543 			removed++;
5544 		}
5545 		break;
5546 
5547 	case 1:
5548 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5549 		break;
5550 
5551 	case 2:
5552 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5553 
5554 		/*
5555 		 * Be sure we haven't done this in the last 10 seconds.
5556 		 */
5557 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5558 		    IPF_TTLVAL(10))
5559 			break;
5560 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5561 
5562 		/*
5563 		 * Determine initial threshold for minimum idle time based on
5564 		 * how long ipfilter has been running.  Ipfilter needs to have
5565 		 * been up as long as the smallest interval to continue on.
5566 		 *
5567 		 * Minimum idle times stored in idletime_tab and indexed by
5568 		 * idle_idx.  Start at upper end of array and work backwards.
5569 		 *
5570 		 * Once the index is found, set the initial idle time to the
5571 		 * first interval before the current ipfilter run time.
5572 		 */
5573 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5574 			break;  /* switch */
5575 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5576 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5577 			idletime = idletime_tab[idle_idx];
5578 		} else {
5579 			while ((idle_idx > 0) &&
5580 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5581 				idle_idx--;
5582 			idletime = (ifs->ifs_fr_ticks /
5583 				    idletime_tab[idle_idx]) *
5584 				    idletime_tab[idle_idx];
5585 		}
5586 
5587 		while ((idle_idx >= 0) &&
5588 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5589 			/*
5590 			 * Start with appropriate timeout queue.
5591 			 */
5592 			removed += nat_earlydrop(
5593 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5594 					idletime, ifs);
5595 
5596 			/*
5597 			 * Make sure we haven't already deleted enough
5598 			 * entries before checking the user defined queues.
5599 			 */
5600 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5601 			    ifs->ifs_nat_flush_lvl_lo)
5602 				break;
5603 
5604 			/*
5605 			 * Next, look through the user defined queues.
5606 			 */
5607 			ifqn = ifs->ifs_nat_utqe;
5608 			while ((ifq = ifqn) != NULL) {
5609 				ifqn = ifq->ifq_next;
5610 				removed += nat_earlydrop(ifq, idletime, ifs);
5611 			}
5612 
5613 			/*
5614 			 * Adjust the granularity of idle time.
5615 			 *
5616 			 * If we reach an interval boundary, we need to
5617 			 * either adjust the idle time accordingly or exit
5618 			 * the loop altogether (if this is very last check).
5619 			 */
5620 			idletime -= idletime_tab[idle_idx];
5621 			if (idletime < idletime_tab[idle_idx]) {
5622 				if (idle_idx != 0) {
5623 					idletime = idletime_tab[idle_idx] -
5624 					    idletime_tab[idle_idx - 1];
5625 					idle_idx--;
5626 				} else {
5627 					break;  /* while */
5628 				}
5629 			}
5630 		}
5631 		break;
5632 	default:
5633 		break;
5634 	}
5635 
5636 	SPL_X(s);
5637 	return (removed);
5638 }
5639