xref: /titanic_52/usr/src/uts/common/inet/ipf/ip_nat.c (revision 7eea693d6b672899726e75993fddc4e95b52647f)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL	1
14 # define        _KERNEL	1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 # define _KERNEL
30 # ifdef __OpenBSD__
31 struct file;
32 # endif
33 # include <sys/uio.h>
34 # undef _KERNEL
35 #endif
36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37 # include <sys/filio.h>
38 # include <sys/fcntl.h>
39 #else
40 # include <sys/ioctl.h>
41 #endif
42 #if !defined(AIX)
43 # include <sys/fcntl.h>
44 #endif
45 #if !defined(linux)
46 # include <sys/protosw.h>
47 #endif
48 #include <sys/socket.h>
49 #if defined(_KERNEL)
50 # include <sys/systm.h>
51 # if !defined(__SVR4) && !defined(__svr4__)
52 #  include <sys/mbuf.h>
53 # endif
54 #endif
55 #if defined(__SVR4) || defined(__svr4__)
56 # include <sys/filio.h>
57 # include <sys/byteorder.h>
58 # ifdef _KERNEL
59 #  include <sys/dditypes.h>
60 # endif
61 # include <sys/stream.h>
62 # include <sys/kmem.h>
63 #endif
64 #if __FreeBSD_version >= 300000
65 # include <sys/queue.h>
66 #endif
67 #include <net/if.h>
68 #if __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87 
88 #if !defined(linux)
89 # include <netinet/ip_var.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ipf_stack.h"
102 #ifdef	IPFILTER_SYNC
103 #include "netinet/ip_sync.h"
104 #endif
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 #endif
108 /* END OF INCLUDES */
109 
110 #undef	SOCKADDR_IN
111 #define	SOCKADDR_IN	struct sockaddr_in
112 
113 #if !defined(lint)
114 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116 #endif
117 
118 
119 /* ======================================================================== */
120 /* How the NAT is organised and works.                                      */
121 /*                                                                          */
122 /* Inside (interface y) NAT       Outside (interface x)                     */
123 /* -------------------- -+- -------------------------------------           */
124 /* Packet going          |   out, processsed by fr_checknatout() for x      */
125 /* ------------>         |   ------------>                                  */
126 /* src=10.1.1.1          |   src=192.1.1.1                                  */
127 /*                       |                                                  */
128 /*                       |   in, processed by fr_checknatin() for x         */
129 /* <------------         |   <------------                                  */
130 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
131 /* -------------------- -+- -------------------------------------           */
132 /* fr_checknatout() - changes ip_src and if required, sport                 */
133 /*             - creates a new mapping, if required.                        */
134 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
135 /*                                                                          */
136 /* In the NAT table, internal source is recorded as "in" and externally     */
137 /* seen as "out".                                                           */
138 /* ======================================================================== */
139 
140 
141 static	int	nat_flushtable __P((ipf_stack_t *));
142 static	int	nat_clearlist __P((ipf_stack_t *));
143 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
144 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
145 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
146 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
147 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
148 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
149 static	int	nat_match __P((fr_info_t *, ipnat_t *));
150 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
151 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
152 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
153 				    struct in_addr, struct in_addr, u_32_t,
154 				    ipf_stack_t *));
155 static	INLINE	int nat_icmpquerytype4 __P((int));
156 static	int	nat_ruleaddrinit __P((ipnat_t *));
157 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
158 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
159 static	INLINE	int nat_icmperrortype4 __P((int));
160 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
161 				      tcphdr_t *, nat_t **, int));
162 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
163 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
164 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
165 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
166 static	int	nat_extraflush __P((int, ipf_stack_t *));
167 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
168 static	int	nat_flushclosing __P((int, ipf_stack_t *));
169 
170 
171 /*
172  * Below we declare a list of constants used only in the nat_extraflush()
173  * routine.  We are placing it here, instead of in nat_extraflush() itself,
174  * because we want to make it visible to tools such as mdb, nm etc., so the
175  * values can easily be altered during debugging.
176  */
177 static	const int	idletime_tab[] = {
178 	IPF_TTLVAL(30),		/* 30 seconds */
179 	IPF_TTLVAL(1800),	/* 30 minutes */
180 	IPF_TTLVAL(43200),	/* 12 hours */
181 	IPF_TTLVAL(345600),	/* 4 days */
182 };
183 
184 #define NAT_HAS_L4_CHANGED(n)	\
185  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
186  	(n)->nat_inport != (n)->nat_outport)
187 
188 /* ------------------------------------------------------------------------ */
189 /* Function:    fr_natinit                                                  */
190 /* Returns:     int - 0 == success, -1 == failure                           */
191 /* Parameters:  Nil                                                         */
192 /*                                                                          */
193 /* Initialise all of the NAT locks, tables and other structures.            */
194 /* ------------------------------------------------------------------------ */
195 int fr_natinit(ifs)
196 ipf_stack_t *ifs;
197 {
198 	int i;
199 
200 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
201 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
202 	if (ifs->ifs_nat_table[0] != NULL)
203 		bzero((char *)ifs->ifs_nat_table[0],
204 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
205 	else
206 		return -1;
207 
208 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
209 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
210 	if (ifs->ifs_nat_table[1] != NULL)
211 		bzero((char *)ifs->ifs_nat_table[1],
212 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
213 	else
214 		return -2;
215 
216 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
217 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
218 	if (ifs->ifs_nat_rules != NULL)
219 		bzero((char *)ifs->ifs_nat_rules,
220 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
221 	else
222 		return -3;
223 
224 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
225 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
226 	if (ifs->ifs_rdr_rules != NULL)
227 		bzero((char *)ifs->ifs_rdr_rules,
228 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
229 	else
230 		return -4;
231 
232 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
233 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
234 	if (ifs->ifs_maptable != NULL)
235 		bzero((char *)ifs->ifs_maptable,
236 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
237 	else
238 		return -5;
239 
240 	ifs->ifs_ipf_hm_maplist = NULL;
241 
242 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
243 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
244 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
245 		return -1;
246 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
247 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
248 
249 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
250 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
251 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
252 		return -1;
253 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
254 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
255 
256 	if (ifs->ifs_fr_nat_maxbucket == 0) {
257 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
258 			ifs->ifs_fr_nat_maxbucket++;
259 		ifs->ifs_fr_nat_maxbucket *= 2;
260 	}
261 
262 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
263 	/*
264 	 * Increase this because we may have "keep state" following this too
265 	 * and packet storms can occur if this is removed too quickly.
266 	 */
267 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
268 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
269 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
270 	ifs->ifs_nat_udptq.ifq_ref = 1;
271 	ifs->ifs_nat_udptq.ifq_head = NULL;
272 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
273 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
274 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
275 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
276 	ifs->ifs_nat_icmptq.ifq_ref = 1;
277 	ifs->ifs_nat_icmptq.ifq_head = NULL;
278 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
279 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
280 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
281 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
282 	ifs->ifs_nat_iptq.ifq_ref = 1;
283 	ifs->ifs_nat_iptq.ifq_head = NULL;
284 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
285 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
286 	ifs->ifs_nat_iptq.ifq_next = NULL;
287 
288 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
289 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
290 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
291 #ifdef LARGE_NAT
292 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
293 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
294 #endif
295 	}
296 
297 	/*
298 	 * Increase this because we may have "keep state" following
299 	 * this too and packet storms can occur if this is removed
300 	 * too quickly.
301 	 */
302 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
303 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
304 
305 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
306 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
307 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
308 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
309 
310 	ifs->ifs_fr_nat_init = 1;
311 
312 	return 0;
313 }
314 
315 
316 /* ------------------------------------------------------------------------ */
317 /* Function:    nat_addrdr                                                  */
318 /* Returns:     Nil                                                         */
319 /* Parameters:  n(I) - pointer to NAT rule to add                           */
320 /*                                                                          */
321 /* Adds a redirect rule to the hash table of redirect rules and the list of */
322 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
323 /* use by redirect rules.                                                   */
324 /* ------------------------------------------------------------------------ */
325 static void nat_addrdr(n, ifs)
326 ipnat_t *n;
327 ipf_stack_t *ifs;
328 {
329 	ipnat_t **np;
330 	u_32_t j;
331 	u_int hv;
332 	int k;
333 
334 	k = count4bits(n->in_outmsk);
335 	if ((k >= 0) && (k != 32))
336 		ifs->ifs_rdr_masks |= 1 << k;
337 	j = (n->in_outip & n->in_outmsk);
338 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
339 	np = ifs->ifs_rdr_rules + hv;
340 	while (*np != NULL)
341 		np = &(*np)->in_rnext;
342 	n->in_rnext = NULL;
343 	n->in_prnext = np;
344 	n->in_hv = hv;
345 	*np = n;
346 }
347 
348 
349 /* ------------------------------------------------------------------------ */
350 /* Function:    nat_addnat                                                  */
351 /* Returns:     Nil                                                         */
352 /* Parameters:  n(I) - pointer to NAT rule to add                           */
353 /*                                                                          */
354 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
355 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
356 /* redirect rules.                                                          */
357 /* ------------------------------------------------------------------------ */
358 static void nat_addnat(n, ifs)
359 ipnat_t *n;
360 ipf_stack_t *ifs;
361 {
362 	ipnat_t **np;
363 	u_32_t j;
364 	u_int hv;
365 	int k;
366 
367 	k = count4bits(n->in_inmsk);
368 	if ((k >= 0) && (k != 32))
369 		ifs->ifs_nat_masks |= 1 << k;
370 	j = (n->in_inip & n->in_inmsk);
371 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
372 	np = ifs->ifs_nat_rules + hv;
373 	while (*np != NULL)
374 		np = &(*np)->in_mnext;
375 	n->in_mnext = NULL;
376 	n->in_pmnext = np;
377 	n->in_hv = hv;
378 	*np = n;
379 }
380 
381 
382 /* ------------------------------------------------------------------------ */
383 /* Function:    nat_delrdr                                                  */
384 /* Returns:     Nil                                                         */
385 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
386 /*                                                                          */
387 /* Removes a redirect rule from the hash table of redirect rules.           */
388 /* ------------------------------------------------------------------------ */
389 void nat_delrdr(n)
390 ipnat_t *n;
391 {
392 	if (n->in_rnext)
393 		n->in_rnext->in_prnext = n->in_prnext;
394 	*n->in_prnext = n->in_rnext;
395 }
396 
397 
398 /* ------------------------------------------------------------------------ */
399 /* Function:    nat_delnat                                                  */
400 /* Returns:     Nil                                                         */
401 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
402 /*                                                                          */
403 /* Removes a NAT map rule from the hash table of NAT map rules.             */
404 /* ------------------------------------------------------------------------ */
405 void nat_delnat(n)
406 ipnat_t *n;
407 {
408 	if (n->in_mnext != NULL)
409 		n->in_mnext->in_pmnext = n->in_pmnext;
410 	*n->in_pmnext = n->in_mnext;
411 }
412 
413 
414 /* ------------------------------------------------------------------------ */
415 /* Function:    nat_hostmap                                                 */
416 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
417 /*                                else a pointer to the hostmapping to use  */
418 /* Parameters:  np(I)   - pointer to NAT rule                               */
419 /*              real(I) - real IP address                                   */
420 /*              map(I)  - mapped IP address                                 */
421 /*              port(I) - destination port number                           */
422 /* Write Locks: ipf_nat                                                     */
423 /*                                                                          */
424 /* Check if an ip address has already been allocated for a given mapping    */
425 /* that is not doing port based translation.  If is not yet allocated, then */
426 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
427 /* ------------------------------------------------------------------------ */
428 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
429 ipnat_t *np;
430 struct in_addr src;
431 struct in_addr dst;
432 struct in_addr map;
433 u_32_t port;
434 ipf_stack_t *ifs;
435 {
436 	hostmap_t *hm;
437 	u_int hv;
438 
439 	hv = (src.s_addr ^ dst.s_addr);
440 	hv += src.s_addr;
441 	hv += dst.s_addr;
442 	hv %= HOSTMAP_SIZE;
443 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
444 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
445 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
446 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
447 		    ((port == 0) || (port == hm->hm_port))) {
448 			hm->hm_ref++;
449 			return hm;
450 		}
451 
452 	if (np == NULL)
453 		return NULL;
454 
455 	KMALLOC(hm, hostmap_t *);
456 	if (hm) {
457 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
458 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
459 		if (ifs->ifs_ipf_hm_maplist != NULL)
460 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
461 		ifs->ifs_ipf_hm_maplist = hm;
462 
463 		hm->hm_next = ifs->ifs_maptable[hv];
464 		hm->hm_pnext = ifs->ifs_maptable + hv;
465 		if (ifs->ifs_maptable[hv] != NULL)
466 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
467 		ifs->ifs_maptable[hv] = hm;
468 		hm->hm_ipnat = np;
469 		hm->hm_srcip = src;
470 		hm->hm_dstip = dst;
471 		hm->hm_mapip = map;
472 		hm->hm_ref = 1;
473 		hm->hm_port = port;
474 		hm->hm_v = 4;
475 	}
476 	return hm;
477 }
478 
479 
480 /* ------------------------------------------------------------------------ */
481 /* Function:    fr_hostmapdel                                              */
482 /* Returns:     Nil                                                         */
483 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
484 /* Write Locks: ipf_nat                                                     */
485 /*                                                                          */
486 /* Decrement the references to this hostmap structure by one.  If this      */
487 /* reaches zero then remove it and free it.                                 */
488 /* ------------------------------------------------------------------------ */
489 void fr_hostmapdel(hmp)
490 struct hostmap **hmp;
491 {
492 	struct hostmap *hm;
493 
494 	hm = *hmp;
495 	*hmp = NULL;
496 
497 	hm->hm_ref--;
498 	if (hm->hm_ref == 0) {
499 		if (hm->hm_next)
500 			hm->hm_next->hm_pnext = hm->hm_pnext;
501 		*hm->hm_pnext = hm->hm_next;
502 		if (hm->hm_hnext)
503 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
504 		*hm->hm_phnext = hm->hm_hnext;
505 		KFREE(hm);
506 	}
507 }
508 
509 
510 /* ------------------------------------------------------------------------ */
511 /* Function:    fix_outcksum                                                */
512 /* Returns:     Nil                                                         */
513 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
514 /*              n((I)  - amount to adjust checksum by                       */
515 /*                                                                          */
516 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
517 /* ------------------------------------------------------------------------ */
518 void fix_outcksum(sp, n)
519 u_short *sp;
520 u_32_t n;
521 {
522 	u_short sumshort;
523 	u_32_t sum1;
524 
525 	if (n == 0)
526 		return;
527 
528 	sum1 = (~ntohs(*sp)) & 0xffff;
529 	sum1 += (n);
530 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
531 	/* Again */
532 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
533 	sumshort = ~(u_short)sum1;
534 	*(sp) = htons(sumshort);
535 }
536 
537 
538 /* ------------------------------------------------------------------------ */
539 /* Function:    fix_incksum                                                 */
540 /* Returns:     Nil                                                         */
541 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
542 /*              n((I)  - amount to adjust checksum by                       */
543 /*                                                                          */
544 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
545 /* ------------------------------------------------------------------------ */
546 void fix_incksum(sp, n)
547 u_short *sp;
548 u_32_t n;
549 {
550 	u_short sumshort;
551 	u_32_t sum1;
552 
553 	if (n == 0)
554 		return;
555 
556 	sum1 = (~ntohs(*sp)) & 0xffff;
557 	sum1 += ~(n) & 0xffff;
558 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
559 	/* Again */
560 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
561 	sumshort = ~(u_short)sum1;
562 	*(sp) = htons(sumshort);
563 }
564 
565 
566 /* ------------------------------------------------------------------------ */
567 /* Function:    fix_datacksum                                               */
568 /* Returns:     Nil                                                         */
569 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
570 /*              n((I)  - amount to adjust checksum by                       */
571 /*                                                                          */
572 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
573 /* data section of an IP packet.                                            */
574 /*                                                                          */
575 /* The only situation in which you need to do this is when NAT'ing an       */
576 /* ICMP error message. Such a message, contains in its body the IP header   */
577 /* of the original IP packet, that causes the error.                        */
578 /*                                                                          */
579 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
580 /* kernel the data section of the ICMP error is just data, and no special   */
581 /* processing like hardware cksum or ntohs processing have been done by the */
582 /* kernel on the data section.                                              */
583 /* ------------------------------------------------------------------------ */
584 void fix_datacksum(sp, n)
585 u_short *sp;
586 u_32_t n;
587 {
588 	u_short sumshort;
589 	u_32_t sum1;
590 
591 	if (n == 0)
592 		return;
593 
594 	sum1 = (~ntohs(*sp)) & 0xffff;
595 	sum1 += (n);
596 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
597 	/* Again */
598 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
599 	sumshort = ~(u_short)sum1;
600 	*(sp) = htons(sumshort);
601 }
602 
603 
604 /* ------------------------------------------------------------------------ */
605 /* Function:    fr_nat_ioctl                                                */
606 /* Returns:     int - 0 == success, != 0 == failure                         */
607 /* Parameters:  data(I) - pointer to ioctl data                             */
608 /*              cmd(I)  - ioctl command integer                             */
609 /*              mode(I) - file mode bits used with open                     */
610 /*                                                                          */
611 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
612 /* ------------------------------------------------------------------------ */
613 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
614 ioctlcmd_t cmd;
615 caddr_t data;
616 int mode, uid;
617 void *ctx;
618 ipf_stack_t *ifs;
619 {
620 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
621 	int error = 0, ret, arg, getlock;
622 	ipnat_t natd;
623 
624 #if (BSD >= 199306) && defined(_KERNEL)
625 	if ((securelevel >= 2) && (mode & FWRITE))
626 		return EPERM;
627 #endif
628 
629 #if defined(__osf__) && defined(_KERNEL)
630 	getlock = 0;
631 #else
632 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
633 #endif
634 
635 	nat = NULL;     /* XXX gcc -Wuninitialized */
636 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
637 		KMALLOC(nt, ipnat_t *);
638 	} else {
639 		nt = NULL;
640 	}
641 
642 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
643 		if (mode & NAT_SYSSPACE) {
644 			bcopy(data, (char *)&natd, sizeof(natd));
645 			error = 0;
646 		} else {
647 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
648 		}
649 
650 	}
651 
652 	if (error != 0)
653 		goto done;
654 
655 	/*
656 	 * For add/delete, look to see if the NAT entry is already present
657 	 */
658 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
659 		nat = &natd;
660 		if (nat->in_v == 0)	/* For backward compat. */
661 			nat->in_v = 4;
662 		nat->in_flags &= IPN_USERFLAGS;
663 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
664 			if ((nat->in_flags & IPN_SPLIT) == 0)
665 				nat->in_inip &= nat->in_inmsk;
666 			if ((nat->in_flags & IPN_IPRANGE) == 0)
667 				nat->in_outip &= nat->in_outmsk;
668 		}
669 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
670 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
671 		     np = &n->in_next)
672 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
673 			    IPN_CMPSIZ) == 0) {
674 				if (nat->in_redir == NAT_REDIRECT &&
675 				    nat->in_pnext != n->in_pnext)
676 					continue;
677 				break;
678 			}
679 	}
680 
681 	switch (cmd)
682 	{
683 	case SIOCGENITER :
684 	    {
685 		ipfgeniter_t iter;
686 		ipftoken_t *token;
687 
688 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
689 		if (error != 0)
690 			break;
691 
692 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
693 		if (token != NULL)
694 			error  = nat_iterator(token, &iter, ifs);
695 		else
696 			error = ESRCH;
697 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
698 		break;
699 	    }
700 #ifdef  IPFILTER_LOG
701 	case SIOCIPFFB :
702 	{
703 		int tmp;
704 
705 		if (!(mode & FWRITE))
706 			error = EPERM;
707 		else {
708 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
709 			error = BCOPYOUT((char *)&tmp, (char *)data,
710 					sizeof(tmp));
711 			if (error != 0)
712 				error = EFAULT;
713 		}
714 		break;
715 	}
716 	case SIOCSETLG :
717 		if (!(mode & FWRITE)) {
718 			error = EPERM;
719 		} else {
720 			error = BCOPYIN((char *)data,
721 					(char *)&ifs->ifs_nat_logging,
722 					sizeof(ifs->ifs_nat_logging));
723 			if (error != 0)
724 				error = EFAULT;
725 		}
726 		break;
727 	case SIOCGETLG :
728 		error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
729 				sizeof(ifs->ifs_nat_logging));
730 		if (error != 0)
731 			error = EFAULT;
732 		break;
733 	case FIONREAD :
734 		arg = ifs->ifs_iplused[IPL_LOGNAT];
735 		error = BCOPYOUT(&arg, data, sizeof(arg));
736 		if (error != 0)
737 			error = EFAULT;
738 		break;
739 #endif
740 	case SIOCADNAT :
741 		if (!(mode & FWRITE)) {
742 			error = EPERM;
743 		} else if (n != NULL) {
744 			error = EEXIST;
745 		} else if (nt == NULL) {
746 			error = ENOMEM;
747 		}
748 		if (error != 0) {
749 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
750 			break;
751 		}
752 		bcopy((char *)nat, (char *)nt, sizeof(*n));
753 		error = nat_siocaddnat(nt, np, getlock, ifs);
754 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
755 		if (error == 0)
756 			nt = NULL;
757 		break;
758 	case SIOCRMNAT :
759 		if (!(mode & FWRITE)) {
760 			error = EPERM;
761 			n = NULL;
762 		} else if (n == NULL) {
763 			error = ESRCH;
764 		}
765 
766 		if (error != 0) {
767 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
768 			break;
769 		}
770 		nat_siocdelnat(n, np, getlock, ifs);
771 
772 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
773 		n = NULL;
774 		break;
775 	case SIOCGNATS :
776 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
777 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
778 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
779 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
780 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
781 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
782 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
783 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
784 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
785 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
786 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
787 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
788 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
789 		break;
790 	case SIOCGNATL :
791 	    {
792 		natlookup_t nl;
793 
794 		if (getlock) {
795 			READ_ENTER(&ifs->ifs_ipf_nat);
796 		}
797 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
798 		if (nl.nl_v != 6)
799 			nl.nl_v = 4;
800 		if (error == 0) {
801 			void *ptr;
802 
803 			switch (nl.nl_v)
804 			{
805 			case 4:
806 				ptr = nat_lookupredir(&nl, ifs);
807 				break;
808 #ifdef	USE_INET6
809 			case 6:
810 				ptr = nat6_lookupredir(&nl, ifs);
811 				break;
812 #endif
813 			default:
814 				ptr = NULL;
815 				break;
816 			}
817 
818 			if (ptr != NULL) {
819 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
820 			} else {
821 				error = ESRCH;
822 			}
823 		}
824 		if (getlock) {
825 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
826 		}
827 		break;
828 	    }
829 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
830 		if (!(mode & FWRITE)) {
831 			error = EPERM;
832 			break;
833 		}
834 		if (getlock) {
835 			WRITE_ENTER(&ifs->ifs_ipf_nat);
836 		}
837 		error = BCOPYIN(data, &arg, sizeof(arg));
838 		if (error != 0) {
839 			error = EFAULT;
840 		} else {
841 			if (arg == 0)
842 				ret = nat_flushtable(ifs);
843 			else if (arg == 1)
844 				ret = nat_clearlist(ifs);
845 			else if (arg >= 2 && arg <= 4)
846 				ret = nat_extraflush(arg - 2, ifs);
847 			else
848 				error = EINVAL;
849 		}
850 		if (getlock) {
851 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
852 		}
853 		if (error == 0) {
854 			error = BCOPYOUT(&ret, data, sizeof(ret));
855 			if (error != 0)
856 				error = EFAULT;
857 		}
858 		break;
859 	case SIOCPROXY :
860 		error = appr_ioctl(data, cmd, mode, ifs);
861 		break;
862 	case SIOCSTLCK :
863 		if (!(mode & FWRITE)) {
864 			error = EPERM;
865 		} else {
866 			error = fr_lock(data, &ifs->ifs_fr_nat_lock);
867 		}
868 		break;
869 	case SIOCSTPUT :
870 		if ((mode & FWRITE) != 0) {
871 			error = fr_natputent(data, getlock, ifs);
872 		} else {
873 			error = EACCES;
874 		}
875 		break;
876 	case SIOCSTGSZ :
877 		if (ifs->ifs_fr_nat_lock) {
878 			if (getlock) {
879 				READ_ENTER(&ifs->ifs_ipf_nat);
880 			}
881 			error = fr_natgetsz(data, ifs);
882 			if (getlock) {
883 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
884 			}
885 		} else
886 			error = EACCES;
887 		break;
888 	case SIOCSTGET :
889 		if (ifs->ifs_fr_nat_lock) {
890 			if (getlock) {
891 				READ_ENTER(&ifs->ifs_ipf_nat);
892 			}
893 			error = fr_natgetent(data, ifs);
894 			if (getlock) {
895 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
896 			}
897 		} else
898 			error = EACCES;
899 		break;
900 	case SIOCIPFDELTOK :
901 		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
902 		if (error != 0) {
903 			error = EFAULT;
904 		} else {
905 			error = ipf_deltoken(arg, uid, ctx, ifs);
906 		}
907 		break;
908 	default :
909 		error = EINVAL;
910 		break;
911 	}
912 done:
913 	if (nt)
914 		KFREE(nt);
915 	return error;
916 }
917 
918 
919 /* ------------------------------------------------------------------------ */
920 /* Function:    nat_siocaddnat                                              */
921 /* Returns:     int - 0 == success, != 0 == failure                         */
922 /* Parameters:  n(I)       - pointer to new NAT rule                        */
923 /*              np(I)      - pointer to where to insert new NAT rule        */
924 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
925 /* Mutex Locks: ipf_natio                                                   */
926 /*                                                                          */
927 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
928 /* from information passed to the kernel, then add it  to the appropriate   */
929 /* NAT rule table(s).                                                       */
930 /* ------------------------------------------------------------------------ */
931 static int nat_siocaddnat(n, np, getlock, ifs)
932 ipnat_t *n, **np;
933 int getlock;
934 ipf_stack_t *ifs;
935 {
936 	int error = 0, i, j;
937 
938 	if (nat_resolverule(n, ifs) != 0)
939 		return ENOENT;
940 
941 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
942 		return EINVAL;
943 
944 	n->in_use = 0;
945 	if (n->in_redir & NAT_MAPBLK)
946 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
947 	else if (n->in_flags & IPN_AUTOPORTMAP)
948 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
949 	else if (n->in_flags & IPN_IPRANGE)
950 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
951 	else if (n->in_flags & IPN_SPLIT)
952 		n->in_space = 2;
953 	else if (n->in_outmsk != 0)
954 		n->in_space = ~ntohl(n->in_outmsk);
955 	else
956 		n->in_space = 1;
957 
958 	/*
959 	 * Calculate the number of valid IP addresses in the output
960 	 * mapping range.  In all cases, the range is inclusive of
961 	 * the start and ending IP addresses.
962 	 * If to a CIDR address, lose 2: broadcast + network address
963 	 *                               (so subtract 1)
964 	 * If to a range, add one.
965 	 * If to a single IP address, set to 1.
966 	 */
967 	if (n->in_space) {
968 		if ((n->in_flags & IPN_IPRANGE) != 0)
969 			n->in_space += 1;
970 		else
971 			n->in_space -= 1;
972 	} else
973 		n->in_space = 1;
974 
975 #ifdef	USE_INET6
976 	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
977 	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
978 		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
979 	else if (n->in_v == 6 &&
980 	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
981 		n->in_next6 = n->in_in[0];
982 	else if (n->in_v == 6)
983 		n->in_next6 = n->in_out[0];
984 	else
985 #endif
986 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
987 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
988 		n->in_nip = ntohl(n->in_outip) + 1;
989 	else if ((n->in_flags & IPN_SPLIT) &&
990 		 (n->in_redir & NAT_REDIRECT))
991 		n->in_nip = ntohl(n->in_inip);
992 	else
993 		n->in_nip = ntohl(n->in_outip);
994 
995 	if (n->in_redir & NAT_MAP) {
996 		n->in_pnext = ntohs(n->in_pmin);
997 		/*
998 		 * Multiply by the number of ports made available.
999 		 */
1000 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1001 			n->in_space *= (ntohs(n->in_pmax) -
1002 					ntohs(n->in_pmin) + 1);
1003 			/*
1004 			 * Because two different sources can map to
1005 			 * different destinations but use the same
1006 			 * local IP#/port #.
1007 			 * If the result is smaller than in_space, then
1008 			 * we may have wrapped around 32bits.
1009 			 */
1010 			i = n->in_inmsk;
1011 			if ((i != 0) && (i != 0xffffffff)) {
1012 				j = n->in_space * (~ntohl(i) + 1);
1013 				if (j >= n->in_space)
1014 					n->in_space = j;
1015 				else
1016 					n->in_space = 0xffffffff;
1017 			}
1018 		}
1019 		/*
1020 		 * If no protocol is specified, multiple by 256 to allow for
1021 		 * at least one IP:IP mapping per protocol.
1022 		 */
1023 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1024 				j = n->in_space * 256;
1025 				if (j >= n->in_space)
1026 					n->in_space = j;
1027 				else
1028 					n->in_space = 0xffffffff;
1029 		}
1030 	}
1031 
1032 	/* Otherwise, these fields are preset */
1033 
1034 	if (getlock) {
1035 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1036 	}
1037 	n->in_next = NULL;
1038 	*np = n;
1039 
1040 	if (n->in_age[0] != 0)
1041 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1042 						  n->in_age[0], ifs);
1043 
1044 	if (n->in_age[1] != 0)
1045 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1046 						  n->in_age[1], ifs);
1047 
1048 	if (n->in_redir & NAT_REDIRECT) {
1049 		n->in_flags &= ~IPN_NOTDST;
1050 		switch (n->in_v)
1051 		{
1052 		case 4 :
1053 			nat_addrdr(n, ifs);
1054 			break;
1055 #ifdef	USE_INET6
1056 		case 6 :
1057 			nat6_addrdr(n, ifs);
1058 			break;
1059 #endif
1060 		default :
1061 			break;
1062 		}
1063 	}
1064 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1065 		n->in_flags &= ~IPN_NOTSRC;
1066 		switch (n->in_v)
1067 		{
1068 		case 4 :
1069 			nat_addnat(n, ifs);
1070 			break;
1071 #ifdef	USE_INET6
1072 		case 6 :
1073 			nat6_addnat(n, ifs);
1074 			break;
1075 #endif
1076 		default :
1077 			break;
1078 		}
1079 	}
1080 	n = NULL;
1081 	ifs->ifs_nat_stats.ns_rules++;
1082 	if (getlock) {
1083 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1084 	}
1085 
1086 	return error;
1087 }
1088 
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* Function:    nat_resolvrule                                              */
1092 /* Returns:     int - 0 == success, -1 == failure                           */
1093 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1094 /*                                                                          */
1095 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1096 /* any specified interfaces and proxy labels, and determines whether or not */
1097 /* all proxy labels are correctly specified.				    */
1098 /*									    */
1099 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1100 /* ------------------------------------------------------------------------ */
1101 static int nat_resolverule(n, ifs)
1102 ipnat_t *n;
1103 ipf_stack_t *ifs;
1104 {
1105 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1106 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1107 
1108 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1109 	if (n->in_ifnames[1][0] == '\0') {
1110 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1111 		n->in_ifps[1] = n->in_ifps[0];
1112 	} else {
1113 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1114 	}
1115 
1116 	if (n->in_plabel[0] != '\0') {
1117 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1118 		if (n->in_apr == NULL)
1119 			return -1;
1120 	}
1121 	return 0;
1122 }
1123 
1124 
1125 /* ------------------------------------------------------------------------ */
1126 /* Function:    nat_siocdelnat                                              */
1127 /* Returns:     int - 0 == success, != 0 == failure                         */
1128 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1129 /*              np(I)      - pointer to where to insert new NAT rule        */
1130 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1131 /* Mutex Locks: ipf_natio                                                   */
1132 /*                                                                          */
1133 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1134 /* from information passed to the kernel, then add it  to the appropriate   */
1135 /* NAT rule table(s).                                                       */
1136 /* ------------------------------------------------------------------------ */
1137 static void nat_siocdelnat(n, np, getlock, ifs)
1138 ipnat_t *n, **np;
1139 int getlock;
1140 ipf_stack_t *ifs;
1141 {
1142 	int i;
1143 
1144 	if (getlock) {
1145 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1146 	}
1147 	if (n->in_redir & NAT_REDIRECT)
1148 		nat_delrdr(n);
1149 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1150 		nat_delnat(n);
1151 	if (ifs->ifs_nat_list == NULL) {
1152 		ifs->ifs_nat_masks = 0;
1153 		ifs->ifs_rdr_masks = 0;
1154 		for (i = 0; i < 4; i++) {
1155 			ifs->ifs_nat6_masks[i] = 0;
1156 			ifs->ifs_rdr6_masks[i] = 0;
1157 		}
1158 	}
1159 
1160 	if (n->in_tqehead[0] != NULL) {
1161 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1162 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1163 		}
1164 	}
1165 
1166 	if (n->in_tqehead[1] != NULL) {
1167 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1168 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1169 		}
1170 	}
1171 
1172 	*np = n->in_next;
1173 
1174 	if (n->in_use == 0) {
1175 		if (n->in_apr)
1176 			appr_free(n->in_apr);
1177 		KFREE(n);
1178 		ifs->ifs_nat_stats.ns_rules--;
1179 	} else {
1180 		n->in_flags |= IPN_DELETE;
1181 		n->in_next = NULL;
1182 	}
1183 	if (getlock) {
1184 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1185 	}
1186 }
1187 
1188 
1189 /* ------------------------------------------------------------------------ */
1190 /* Function:    fr_natgetsz                                                 */
1191 /* Returns:     int - 0 == success, != 0 is the error value.                */
1192 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1193 /*                        get the size of.                                  */
1194 /*                                                                          */
1195 /* Handle SIOCSTGSZ.                                                        */
1196 /* Return the size of the nat list entry to be copied back to user space.   */
1197 /* The size of the entry is stored in the ng_sz field and the enture natget */
1198 /* structure is copied back to the user.                                    */
1199 /* ------------------------------------------------------------------------ */
1200 static int fr_natgetsz(data, ifs)
1201 caddr_t data;
1202 ipf_stack_t *ifs;
1203 {
1204 	ap_session_t *aps;
1205 	nat_t *nat, *n;
1206 	natget_t ng;
1207 	int err;
1208 
1209 	err = BCOPYIN(data, &ng, sizeof(ng));
1210 	if (err != 0)
1211 		return EFAULT;
1212 
1213 	nat = ng.ng_ptr;
1214 	if (!nat) {
1215 		nat = ifs->ifs_nat_instances;
1216 		ng.ng_sz = 0;
1217 		/*
1218 		 * Empty list so the size returned is 0.  Simple.
1219 		 */
1220 		if (nat == NULL) {
1221 			err = BCOPYOUT(&ng, data, sizeof(ng));
1222 			if (err != 0) {
1223 				return EFAULT;
1224 			} else {
1225 				return 0;
1226 			}
1227 		}
1228 	} else {
1229 		/*
1230 		 * Make sure the pointer we're copying from exists in the
1231 		 * current list of entries.  Security precaution to prevent
1232 		 * copying of random kernel data.
1233 		 */
1234 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1235 			if (n == nat)
1236 				break;
1237 		if (!n)
1238 			return ESRCH;
1239 	}
1240 
1241 	/*
1242 	 * Incluse any space required for proxy data structures.
1243 	 */
1244 	ng.ng_sz = sizeof(nat_save_t);
1245 	aps = nat->nat_aps;
1246 	if (aps != NULL) {
1247 		ng.ng_sz += sizeof(ap_session_t) - 4;
1248 		if (aps->aps_data != 0)
1249 			ng.ng_sz += aps->aps_psiz;
1250 	}
1251 
1252 	err = BCOPYOUT(&ng, data, sizeof(ng));
1253 	if (err != 0)
1254 		return EFAULT;
1255 	return 0;
1256 }
1257 
1258 
1259 /* ------------------------------------------------------------------------ */
1260 /* Function:    fr_natgetent                                                */
1261 /* Returns:     int - 0 == success, != 0 is the error value.                */
1262 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1263 /*                        to NAT structure to copy out.                     */
1264 /*                                                                          */
1265 /* Handle SIOCSTGET.                                                        */
1266 /* Copies out NAT entry to user space.  Any additional data held for a      */
1267 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1268 /* ------------------------------------------------------------------------ */
1269 static int fr_natgetent(data, ifs)
1270 caddr_t data;
1271 ipf_stack_t *ifs;
1272 {
1273 	int error, outsize;
1274 	ap_session_t *aps;
1275 	nat_save_t *ipn, ipns;
1276 	nat_t *n, *nat;
1277 
1278 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1279 	if (error != 0)
1280 		return error;
1281 
1282 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1283 		return EINVAL;
1284 
1285 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1286 	if (ipn == NULL)
1287 		return ENOMEM;
1288 
1289 	ipn->ipn_dsize = ipns.ipn_dsize;
1290 	nat = ipns.ipn_next;
1291 	if (nat == NULL) {
1292 		nat = ifs->ifs_nat_instances;
1293 		if (nat == NULL) {
1294 			if (ifs->ifs_nat_instances == NULL)
1295 				error = ENOENT;
1296 			goto finished;
1297 		}
1298 	} else {
1299 		/*
1300 		 * Make sure the pointer we're copying from exists in the
1301 		 * current list of entries.  Security precaution to prevent
1302 		 * copying of random kernel data.
1303 		 */
1304 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1305 			if (n == nat)
1306 				break;
1307 		if (n == NULL) {
1308 			error = ESRCH;
1309 			goto finished;
1310 		}
1311 	}
1312 	ipn->ipn_next = nat->nat_next;
1313 
1314 	/*
1315 	 * Copy the NAT structure.
1316 	 */
1317 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1318 
1319 	/*
1320 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1321 	 */
1322 	if (nat->nat_ptr != NULL)
1323 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1324 		      sizeof(ipn->ipn_ipnat));
1325 
1326 	/*
1327 	 * If we also know the NAT entry has an associated filter rule,
1328 	 * save that too.
1329 	 */
1330 	if (nat->nat_fr != NULL)
1331 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1332 		      sizeof(ipn->ipn_fr));
1333 
1334 	/*
1335 	 * Last but not least, if there is an application proxy session set
1336 	 * up for this NAT entry, then copy that out too, including any
1337 	 * private data saved along side it by the proxy.
1338 	 */
1339 	aps = nat->nat_aps;
1340 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1341 	if (aps != NULL) {
1342 		char *s;
1343 
1344 		if (outsize < sizeof(*aps)) {
1345 			error = ENOBUFS;
1346 			goto finished;
1347 		}
1348 
1349 		s = ipn->ipn_data;
1350 		bcopy((char *)aps, s, sizeof(*aps));
1351 		s += sizeof(*aps);
1352 		outsize -= sizeof(*aps);
1353 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1354 			bcopy(aps->aps_data, s, aps->aps_psiz);
1355 		else
1356 			error = ENOBUFS;
1357 	}
1358 	if (error == 0) {
1359 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1360 	}
1361 
1362 finished:
1363 	if (ipn != NULL) {
1364 		KFREES(ipn, ipns.ipn_dsize);
1365 	}
1366 	return error;
1367 }
1368 
1369 /* ------------------------------------------------------------------------ */
1370 /* Function:    nat_calc_chksum_diffs					    */
1371 /* Returns:     void							    */
1372 /* Parameters:  nat	-	pointer to NAT table entry		    */
1373 /*                                                                          */
1374 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1375 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1376 /* we are dealing with partial chksum offload. For these cases we need to   */
1377 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1378 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1379 /* nat_sumd[0]. 							    */
1380 /*									    */
1381 /* The function accepts initialized NAT table entry and computes the deltas */
1382 /* from nat_inip/nat_outip members. The function is called right before	    */
1383 /* the new entry is inserted into the table.				    */
1384 /*									    */
1385 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1386 /* of delta between original and new IP addresses.			    */
1387 /*									    */
1388 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1389 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1390 /*									    */
1391 /* Some facts about chksum, we should remember:				    */
1392 /*	IP header chksum covers IP header only				    */
1393 /*									    */
1394 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1395 /*		SRC, DST IP address					    */
1396 /*		SRC, DST Port						    */
1397 /*		length of payload					    */
1398 /*									    */
1399 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1400 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1401 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1402 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1403 /* stored along with other IP packet data in dblk_t structure and used in   */
1404 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1405 /*									    */
1406 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1407 /* of delta between new and orig address. NOTE: the order of operands for   */
1408 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1409 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1410 /*									    */
1411 /* ------------------------------------------------------------------------ */
1412 void nat_calc_chksum_diffs(nat)
1413 nat_t *nat;
1414 {
1415 	u_32_t	sum_orig = 0;
1416 	u_32_t	sum_changed = 0;
1417 	u_32_t	sumd;
1418 	u_32_t	ipsum_orig = 0;
1419 	u_32_t	ipsum_changed = 0;
1420 
1421 	if (nat->nat_v != 4 && nat->nat_v != 6)
1422 		return;
1423 
1424 	/*
1425 	 * the switch calculates operands for CALC_SUMD(),
1426 	 * which will compute the partial chksum delta.
1427 	 */
1428 	switch (nat->nat_dir)
1429 	{
1430 	case NAT_INBOUND:
1431 		/*
1432 		 * we are dealing with RDR rule (DST address gets
1433 		 * modified on packet from client)
1434 		 */
1435 		if (nat->nat_v == 4) {
1436 			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1437 			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1438 		} else {
1439 			sum_changed = LONG_SUM6(&nat->nat_inip6);
1440 			sum_orig = LONG_SUM6(&nat->nat_outip6);
1441 		}
1442 		break;
1443 	case NAT_OUTBOUND:
1444 		/*
1445 		 * we are dealing with MAP rule (SRC address gets
1446 		 * modified on packet from client)
1447 		 */
1448 		if (nat->nat_v == 4) {
1449 			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1450 			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1451 		} else {
1452 			sum_changed = LONG_SUM6(&nat->nat_outip6);
1453 			sum_orig = LONG_SUM6(&nat->nat_inip6);
1454 		}
1455 		break;
1456 	default: ;
1457 		break;
1458 	}
1459 
1460 	/*
1461 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1462 	 * calculation, which happens at the end of function.
1463 	 */
1464 	ipsum_changed = sum_changed;
1465 	ipsum_orig = sum_orig;
1466 	/*
1467 	 * NOTE: the order of operands for partial chksum adjustment
1468 	 * computation has to be swapped!
1469 	 */
1470 	CALC_SUMD(sum_changed, sum_orig, sumd);
1471 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1472 
1473 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1474 
1475 		/*
1476 		 * switch calculates operands for CALC_SUMD(), which will
1477 		 * compute the full chksum delta.
1478 		 */
1479 		switch (nat->nat_dir)
1480 		{
1481 		case NAT_INBOUND:
1482 			if (nat->nat_v == 4) {
1483 				sum_changed = LONG_SUM(
1484 				    ntohl(nat->nat_inip.s_addr) +
1485 				    ntohs(nat->nat_inport));
1486 				sum_orig = LONG_SUM(
1487 				    ntohl(nat->nat_outip.s_addr) +
1488 				    ntohs(nat->nat_outport));
1489 			} else {
1490 				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1491 				    ntohs(nat->nat_inport);
1492 				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1493 				    ntohs(nat->nat_outport);
1494 			}
1495 			break;
1496 		case NAT_OUTBOUND:
1497 			if (nat->nat_v == 4) {
1498 				sum_changed = LONG_SUM(
1499 				    ntohl(nat->nat_outip.s_addr) +
1500 				    ntohs(nat->nat_outport));
1501 				sum_orig = LONG_SUM(
1502 				    ntohl(nat->nat_inip.s_addr) +
1503 				    ntohs(nat->nat_inport));
1504 			} else {
1505 				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1506 				    ntohs(nat->nat_outport);
1507 				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1508 				    ntohs(nat->nat_inport);
1509 			}
1510 			break;
1511 		default: ;
1512 			break;
1513 		}
1514 
1515 		CALC_SUMD(sum_orig, sum_changed, sumd);
1516 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1517 
1518 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1519 			/*
1520 			 * partial HW chksum offload works for TCP/UDP headers only,
1521 			 * so we need to enforce full chksum adjustment for ICMP
1522 			 */
1523 			nat->nat_sumd[1] = nat->nat_sumd[0];
1524 		}
1525 	}
1526 	else
1527 		nat->nat_sumd[0] = nat->nat_sumd[1];
1528 
1529 	/*
1530 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1531 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1532 	 */
1533 	if (nat->nat_v == 4) {
1534 		if (NAT_HAS_L4_CHANGED(nat)) {
1535 			/*
1536 			 * bad luck, NAT changes also the L4 header, use IP
1537 			 * addresses to compute chksum adjustment for IP header.
1538 			 */
1539 			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1540 			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1541 		} else {
1542 			/*
1543 			 * the NAT does not change L4 hdr -> reuse chksum
1544 			 * adjustment for IP hdr.
1545 			 */
1546 			nat->nat_ipsumd = nat->nat_sumd[0];
1547 
1548 			/*
1549 			 * if L4 header does not use chksum - zero out deltas
1550 			 */
1551 			if (!(nat->nat_flags & IPN_TCPUDP)) {
1552 				nat->nat_sumd[0] = 0;
1553 				nat->nat_sumd[1] = 0;
1554 			}
1555 		}
1556 	}
1557 
1558 	return;
1559 }
1560 
1561 /* ------------------------------------------------------------------------ */
1562 /* Function:    fr_natputent                                                */
1563 /* Returns:     int - 0 == success, != 0 is the error value.                */
1564 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1565 /*                            structure information to load into the kernel */
1566 /*              getlock(I) - flag indicating whether or not a write lock    */
1567 /*                           on ipf_nat is already held.                    */
1568 /*                                                                          */
1569 /* Handle SIOCSTPUT.                                                        */
1570 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1571 /* firewall rule data structures, if pointers to them indicate so.          */
1572 /* ------------------------------------------------------------------------ */
1573 static int fr_natputent(data, getlock, ifs)
1574 caddr_t data;
1575 int getlock;
1576 ipf_stack_t *ifs;
1577 {
1578 	nat_save_t ipn, *ipnn;
1579 	ap_session_t *aps;
1580 	nat_t *n, *nat;
1581 	frentry_t *fr;
1582 	fr_info_t fin;
1583 	ipnat_t *in;
1584 	int error;
1585 
1586 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1587 	if (error != 0)
1588 		return error;
1589 
1590 	/*
1591 	 * Trigger automatic call to nat_extraflush() if the
1592 	 * table has reached capcity specified by hi watermark.
1593 	 */
1594 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1595 		ifs->ifs_nat_doflush = 1;
1596 
1597 	/*
1598 	 * If automatic flushing did not do its job, and the table
1599 	 * has filled up, don't try to create a new entry.
1600 	 */
1601 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
1602 		ifs->ifs_nat_stats.ns_memfail++;
1603 		return ENOMEM;
1604 	}
1605 
1606 	/*
1607 	 * Initialise early because of code at junkput label.
1608 	 */
1609 	in = NULL;
1610 	aps = NULL;
1611 	nat = NULL;
1612 	ipnn = NULL;
1613 
1614 	/*
1615 	 * New entry, copy in the rest of the NAT entry if it's size is more
1616 	 * than just the nat_t structure.
1617 	 */
1618 	fr = NULL;
1619 	if (ipn.ipn_dsize > sizeof(ipn)) {
1620 		if (ipn.ipn_dsize > 81920) {
1621 			error = ENOMEM;
1622 			goto junkput;
1623 		}
1624 
1625 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1626 		if (ipnn == NULL)
1627 			return ENOMEM;
1628 
1629 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1630 		if (error != 0) {
1631 			error = EFAULT;
1632 			goto junkput;
1633 		}
1634 	} else
1635 		ipnn = &ipn;
1636 
1637 	KMALLOC(nat, nat_t *);
1638 	if (nat == NULL) {
1639 		error = ENOMEM;
1640 		goto junkput;
1641 	}
1642 
1643 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1644 	/*
1645 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1646 	 */
1647 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1648 	nat->nat_tqe.tqe_pnext = NULL;
1649 	nat->nat_tqe.tqe_next = NULL;
1650 	nat->nat_tqe.tqe_ifq = NULL;
1651 	nat->nat_tqe.tqe_parent = nat;
1652 
1653 	/*
1654 	 * Restore the rule associated with this nat session
1655 	 */
1656 	in = ipnn->ipn_nat.nat_ptr;
1657 	if (in != NULL) {
1658 		KMALLOC(in, ipnat_t *);
1659 		nat->nat_ptr = in;
1660 		if (in == NULL) {
1661 			error = ENOMEM;
1662 			goto junkput;
1663 		}
1664 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1665 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1666 		in->in_use = 1;
1667 		in->in_flags |= IPN_DELETE;
1668 
1669 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1670 
1671 		if (nat_resolverule(in, ifs) != 0) {
1672 			error = ESRCH;
1673 			goto junkput;
1674 		}
1675 	}
1676 
1677 	/*
1678 	 * Check that the NAT entry doesn't already exist in the kernel.
1679 	 */
1680 	if (nat->nat_v != 6)
1681 		nat->nat_v = 4;
1682 	bzero((char *)&fin, sizeof(fin));
1683 	fin.fin_p = nat->nat_p;
1684 	fin.fin_ifs = ifs;
1685 	if (nat->nat_dir == NAT_OUTBOUND) {
1686 		fin.fin_data[0] = ntohs(nat->nat_oport);
1687 		fin.fin_data[1] = ntohs(nat->nat_outport);
1688 		fin.fin_ifp = nat->nat_ifps[0];
1689 		if (getlock) {
1690 			READ_ENTER(&ifs->ifs_ipf_nat);
1691 		}
1692 
1693 		switch (nat->nat_v)
1694 		{
1695 		case 4:
1696 			fin.fin_v = nat->nat_v;
1697 			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1698 			    nat->nat_oip, nat->nat_outip);
1699 			break;
1700 #ifdef USE_INET6
1701 		case 6:
1702 			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1703 			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1704 			break;
1705 #endif
1706 		default:
1707 			n = NULL;
1708 			break;
1709 		}
1710 
1711 		if (getlock) {
1712 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1713 		}
1714 		if (n != NULL) {
1715 			error = EEXIST;
1716 			goto junkput;
1717 		}
1718 	} else if (nat->nat_dir == NAT_INBOUND) {
1719 		fin.fin_data[0] = ntohs(nat->nat_inport);
1720 		fin.fin_data[1] = ntohs(nat->nat_oport);
1721 		fin.fin_ifp = nat->nat_ifps[1];
1722 		if (getlock) {
1723 			READ_ENTER(&ifs->ifs_ipf_nat);
1724 		}
1725 
1726 		switch (nat->nat_v)
1727 		{
1728 		case 4:
1729 			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1730 			    nat->nat_inip, nat->nat_oip);
1731 			break;
1732 #ifdef USE_INET6
1733 		case 6:
1734 			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1735 			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1736 			break;
1737 #endif
1738 		default:
1739 			n = NULL;
1740 			break;
1741 		}
1742 
1743 		if (getlock) {
1744 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1745 		}
1746 		if (n != NULL) {
1747 			error = EEXIST;
1748 			goto junkput;
1749 		}
1750 	} else {
1751 		error = EINVAL;
1752 		goto junkput;
1753 	}
1754 
1755 	/*
1756 	 * Restore ap_session_t structure.  Include the private data allocated
1757 	 * if it was there.
1758 	 */
1759 	aps = nat->nat_aps;
1760 	if (aps != NULL) {
1761 		KMALLOC(aps, ap_session_t *);
1762 		nat->nat_aps = aps;
1763 		if (aps == NULL) {
1764 			error = ENOMEM;
1765 			goto junkput;
1766 		}
1767 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1768 		if (in != NULL)
1769 			aps->aps_apr = in->in_apr;
1770 		else
1771 			aps->aps_apr = NULL;
1772 		if (aps->aps_psiz != 0) {
1773 			if (aps->aps_psiz > 81920) {
1774 				error = ENOMEM;
1775 				goto junkput;
1776 			}
1777 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1778 			if (aps->aps_data == NULL) {
1779 				error = ENOMEM;
1780 				goto junkput;
1781 			}
1782 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1783 			      aps->aps_psiz);
1784 		} else {
1785 			aps->aps_psiz = 0;
1786 			aps->aps_data = NULL;
1787 		}
1788 	}
1789 
1790 	/*
1791 	 * If there was a filtering rule associated with this entry then
1792 	 * build up a new one.
1793 	 */
1794 	fr = nat->nat_fr;
1795 	if (fr != NULL) {
1796 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1797 			KMALLOC(fr, frentry_t *);
1798 			nat->nat_fr = fr;
1799 			if (fr == NULL) {
1800 				error = ENOMEM;
1801 				goto junkput;
1802 			}
1803 			ipnn->ipn_nat.nat_fr = fr;
1804 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1805 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1806 
1807 			fr->fr_ref = 1;
1808 			fr->fr_dsize = 0;
1809 			fr->fr_data = NULL;
1810 			fr->fr_type = FR_T_NONE;
1811 
1812 			MUTEX_NUKE(&fr->fr_lock);
1813 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1814 		} else {
1815 			if (getlock) {
1816 				READ_ENTER(&ifs->ifs_ipf_nat);
1817 			}
1818 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1819 				if (n->nat_fr == fr)
1820 					break;
1821 
1822 			if (n != NULL) {
1823 				MUTEX_ENTER(&fr->fr_lock);
1824 				fr->fr_ref++;
1825 				MUTEX_EXIT(&fr->fr_lock);
1826 			}
1827 			if (getlock) {
1828 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1829 			}
1830 			if (!n) {
1831 				error = ESRCH;
1832 				goto junkput;
1833 			}
1834 		}
1835 	}
1836 
1837 	if (ipnn != &ipn) {
1838 		KFREES(ipnn, ipn.ipn_dsize);
1839 		ipnn = NULL;
1840 	}
1841 
1842 	nat_calc_chksum_diffs(nat);
1843 
1844 	if (getlock) {
1845 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1846 	}
1847 
1848 	nat_calc_chksum_diffs(nat);
1849 
1850 	switch (nat->nat_v)
1851 	{
1852 	case 4 :
1853 		error = nat_insert(nat, nat->nat_rev, ifs);
1854 		break;
1855 #ifdef USE_INET6
1856 	case 6 :
1857 		error = nat6_insert(nat, nat->nat_rev, ifs);
1858 		break;
1859 #endif
1860 	default :
1861 		break;
1862 	}
1863 
1864 	if ((error == 0) && (aps != NULL)) {
1865 		aps->aps_next = ifs->ifs_ap_sess_list;
1866 		ifs->ifs_ap_sess_list = aps;
1867 	}
1868 	if (getlock) {
1869 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1870 	}
1871 
1872 	if (error == 0)
1873 		return 0;
1874 
1875 	error = ENOMEM;
1876 
1877 junkput:
1878 	if (fr != NULL)
1879 		(void) fr_derefrule(&fr, ifs);
1880 
1881 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1882 		KFREES(ipnn, ipn.ipn_dsize);
1883 	}
1884 	if (nat != NULL) {
1885 		if (aps != NULL) {
1886 			if (aps->aps_data != NULL) {
1887 				KFREES(aps->aps_data, aps->aps_psiz);
1888 			}
1889 			KFREE(aps);
1890 		}
1891 		if (in != NULL) {
1892 			if (in->in_apr)
1893 				appr_free(in->in_apr);
1894 			KFREE(in);
1895 		}
1896 		KFREE(nat);
1897 	}
1898 	return error;
1899 }
1900 
1901 
1902 /* ------------------------------------------------------------------------ */
1903 /* Function:    nat_delete                                                  */
1904 /* Returns:     Nil                                                         */
1905 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1906 /*              logtype(I) - type of LOG record to create before deleting   */
1907 /*		ifs - ipf stack instance				    */
1908 /* Write Lock:  ipf_nat                                                     */
1909 /*                                                                          */
1910 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1911 /* enabled then generate a NAT log record for this event.                   */
1912 /* ------------------------------------------------------------------------ */
1913 void nat_delete(nat, logtype, ifs)
1914 struct nat *nat;
1915 int logtype;
1916 ipf_stack_t *ifs;
1917 {
1918 	struct ipnat *ipn;
1919 	int removed = 0;
1920 
1921 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1922 		nat_log(nat, logtype, ifs);
1923 
1924 	/*
1925 	 * Start by removing the entry from the hash table of nat entries
1926 	 * so it will not be "used" again.
1927 	 *
1928 	 * It will remain in the "list" of nat entries until all references
1929 	 * have been accounted for.
1930 	 */
1931 	if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) {
1932 		removed = 1;
1933 
1934 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1935 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1936 
1937 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1938 		if (nat->nat_hnext[0] != NULL) {
1939 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1940 			nat->nat_hnext[0] = NULL;
1941 		}
1942 		nat->nat_phnext[0] = NULL;
1943 
1944 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1945 		if (nat->nat_hnext[1] != NULL) {
1946 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1947 			nat->nat_hnext[1] = NULL;
1948 		}
1949 		nat->nat_phnext[1] = NULL;
1950 
1951 		if ((nat->nat_flags & SI_WILDP) != 0)
1952 			ifs->ifs_nat_stats.ns_wilds--;
1953 	}
1954 
1955 	/*
1956 	 * Next, remove it from the timeout queue it is in.
1957 	 */
1958 	fr_deletequeueentry(&nat->nat_tqe);
1959 
1960 	if (nat->nat_me != NULL) {
1961 		*nat->nat_me = NULL;
1962 		nat->nat_me = NULL;
1963 	}
1964 
1965 	MUTEX_ENTER(&nat->nat_lock);
1966  	if (logtype == NL_DESTROY) {
1967  		/*
1968  		 * NL_DESTROY should only be passed when nat_ref >= 2.
1969  		 * This happens when a nat'd packet is blocked, we have
1970 		 * just created the nat table entry (reason why the ref
1971 		 * count is 2 or higher), but and we want to throw away
1972 		 * that NAT session as result of the blocked packet.
1973  		 */
1974  		if (nat->nat_ref > 2) {
1975  			nat->nat_ref -= 2;
1976  			MUTEX_EXIT(&nat->nat_lock);
1977  			if (removed)
1978  				ifs->ifs_nat_stats.ns_orphans++;
1979  			return;
1980  		}
1981  	} else if (nat->nat_ref > 1) {
1982 		nat->nat_ref--;
1983 		MUTEX_EXIT(&nat->nat_lock);
1984  		if (removed)
1985  			ifs->ifs_nat_stats.ns_orphans++;
1986 		return;
1987 	}
1988 	MUTEX_EXIT(&nat->nat_lock);
1989 
1990 	nat->nat_ref = 0;
1991 
1992 	/*
1993 	 * If entry had already been removed,
1994 	 * it means we're cleaning up an orphan.
1995 	 */
1996  	if (!removed)
1997  		ifs->ifs_nat_stats.ns_orphans--;
1998 
1999 #ifdef	IPFILTER_SYNC
2000 	if (nat->nat_sync)
2001 		ipfsync_del(nat->nat_sync);
2002 #endif
2003 
2004 	/*
2005 	 * Now remove it from master list of nat table entries
2006 	 */
2007 	if (nat->nat_pnext != NULL) {
2008 		*nat->nat_pnext = nat->nat_next;
2009 		if (nat->nat_next != NULL) {
2010 			nat->nat_next->nat_pnext = nat->nat_pnext;
2011 			nat->nat_next = NULL;
2012 		}
2013 		nat->nat_pnext = NULL;
2014 	}
2015 
2016 	if (nat->nat_fr != NULL)
2017 		(void)fr_derefrule(&nat->nat_fr, ifs);
2018 
2019 	if (nat->nat_hm != NULL)
2020 		fr_hostmapdel(&nat->nat_hm);
2021 
2022 	/*
2023 	 * If there is an active reference from the nat entry to its parent
2024 	 * rule, decrement the rule's reference count and free it too if no
2025 	 * longer being used.
2026 	 */
2027 	ipn = nat->nat_ptr;
2028 	if (ipn != NULL) {
2029 		ipn->in_space++;
2030 		ipn->in_use--;
2031 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
2032 			if (ipn->in_apr)
2033 				appr_free(ipn->in_apr);
2034 			KFREE(ipn);
2035 			ifs->ifs_nat_stats.ns_rules--;
2036 		}
2037 	}
2038 
2039 	MUTEX_DESTROY(&nat->nat_lock);
2040 
2041 	aps_free(nat->nat_aps, ifs);
2042 	ifs->ifs_nat_stats.ns_inuse--;
2043 
2044 	/*
2045 	 * If there's a fragment table entry too for this nat entry, then
2046 	 * dereference that as well.  This is after nat_lock is released
2047 	 * because of Tru64.
2048 	 */
2049 	fr_forgetnat((void *)nat, ifs);
2050 
2051 	KFREE(nat);
2052 }
2053 
2054 
2055 /* ------------------------------------------------------------------------ */
2056 /* Function:    nat_flushtable                                              */
2057 /* Returns:     int - number of NAT rules deleted                           */
2058 /* Parameters:  Nil                                                         */
2059 /*                                                                          */
2060 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
2061 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
2062 /* ------------------------------------------------------------------------ */
2063 /*
2064  * nat_flushtable - clear the NAT table of all mapping entries.
2065  */
2066 static int nat_flushtable(ifs)
2067 ipf_stack_t *ifs;
2068 {
2069 	nat_t *nat;
2070 	int j = 0;
2071 
2072 	/*
2073 	 * ALL NAT mappings deleted, so lets just make the deletions
2074 	 * quicker.
2075 	 */
2076 	if (ifs->ifs_nat_table[0] != NULL)
2077 		bzero((char *)ifs->ifs_nat_table[0],
2078 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
2079 	if (ifs->ifs_nat_table[1] != NULL)
2080 		bzero((char *)ifs->ifs_nat_table[1],
2081 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
2082 
2083 	while ((nat = ifs->ifs_nat_instances) != NULL) {
2084 		nat_delete(nat, NL_FLUSH, ifs);
2085 		j++;
2086 	}
2087 
2088 	return j;
2089 }
2090 
2091 
2092 /* ------------------------------------------------------------------------ */
2093 /* Function:    nat_clearlist                                               */
2094 /* Returns:     int - number of NAT/RDR rules deleted                       */
2095 /* Parameters:  Nil                                                         */
2096 /*                                                                          */
2097 /* Delete all rules in the current list of rules.  There is nothing elegant */
2098 /* about this cleanup: simply free all entries on the list of rules and     */
2099 /* clear out the tables used for hashed NAT rule lookups.                   */
2100 /* ------------------------------------------------------------------------ */
2101 static int nat_clearlist(ifs)
2102 ipf_stack_t *ifs;
2103 {
2104 	ipnat_t *n, **np = &ifs->ifs_nat_list;
2105 	int i = 0;
2106 
2107 	if (ifs->ifs_nat_rules != NULL)
2108 		bzero((char *)ifs->ifs_nat_rules,
2109 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2110 	if (ifs->ifs_rdr_rules != NULL)
2111 		bzero((char *)ifs->ifs_rdr_rules,
2112 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2113 
2114 	while ((n = *np) != NULL) {
2115 		*np = n->in_next;
2116 		if (n->in_use == 0) {
2117 			if (n->in_apr != NULL)
2118 				appr_free(n->in_apr);
2119 			KFREE(n);
2120 			ifs->ifs_nat_stats.ns_rules--;
2121 		} else {
2122 			n->in_flags |= IPN_DELETE;
2123 			n->in_next = NULL;
2124 		}
2125 		i++;
2126 	}
2127 	ifs->ifs_nat_masks = 0;
2128 	ifs->ifs_rdr_masks = 0;
2129 	for (i = 0; i < 4; i++) {
2130 		ifs->ifs_nat6_masks[i] = 0;
2131 		ifs->ifs_rdr6_masks[i] = 0;
2132 	}
2133 	return i;
2134 }
2135 
2136 
2137 /* ------------------------------------------------------------------------ */
2138 /* Function:    nat_newmap                                                  */
2139 /* Returns:     int - -1 == error, 0 == success                             */
2140 /* Parameters:  fin(I) - pointer to packet information                      */
2141 /*              nat(I) - pointer to NAT entry                               */
2142 /*              ni(I)  - pointer to structure with misc. information needed */
2143 /*                       to create new NAT entry.                           */
2144 /*                                                                          */
2145 /* Given an empty NAT structure, populate it with new information about a   */
2146 /* new NAT session, as defined by the matching NAT rule.                    */
2147 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2148 /* to the new IP address for the translation.                               */
2149 /* ------------------------------------------------------------------------ */
2150 static INLINE int nat_newmap(fin, nat, ni)
2151 fr_info_t *fin;
2152 nat_t *nat;
2153 natinfo_t *ni;
2154 {
2155 	u_short st_port, dport, sport, port, sp, dp;
2156 	struct in_addr in, inb;
2157 	hostmap_t *hm;
2158 	u_32_t flags;
2159 	u_32_t st_ip;
2160 	ipnat_t *np;
2161 	nat_t *natl;
2162 	int l;
2163 	ipf_stack_t *ifs = fin->fin_ifs;
2164 
2165 	/*
2166 	 * If it's an outbound packet which doesn't match any existing
2167 	 * record, then create a new port
2168 	 */
2169 	l = 0;
2170 	hm = NULL;
2171 	np = ni->nai_np;
2172 	st_ip = np->in_nip;
2173 	st_port = np->in_pnext;
2174 	flags = ni->nai_flags;
2175 	sport = ni->nai_sport;
2176 	dport = ni->nai_dport;
2177 
2178 	/*
2179 	 * Do a loop until we either run out of entries to try or we find
2180 	 * a NAT mapping that isn't currently being used.  This is done
2181 	 * because the change to the source is not (usually) being fixed.
2182 	 */
2183 	do {
2184 		port = 0;
2185 		in.s_addr = htonl(np->in_nip);
2186 		if (l == 0) {
2187 			/*
2188 			 * Check to see if there is an existing NAT
2189 			 * setup for this IP address pair.
2190 			 */
2191 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2192 					 in, 0, ifs);
2193 			if (hm != NULL)
2194 				in.s_addr = hm->hm_mapip.s_addr;
2195 		} else if ((l == 1) && (hm != NULL)) {
2196 			fr_hostmapdel(&hm);
2197 		}
2198 		in.s_addr = ntohl(in.s_addr);
2199 
2200 		nat->nat_hm = hm;
2201 
2202 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2203 			if (l > 0)
2204 				return -1;
2205 		}
2206 
2207 		if (np->in_redir == NAT_BIMAP &&
2208 		    np->in_inmsk == np->in_outmsk) {
2209 			/*
2210 			 * map the address block in a 1:1 fashion
2211 			 */
2212 			in.s_addr = np->in_outip;
2213 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2214 			in.s_addr = ntohl(in.s_addr);
2215 
2216 		} else if (np->in_redir & NAT_MAPBLK) {
2217 			if ((l >= np->in_ppip) || ((l > 0) &&
2218 			     !(flags & IPN_TCPUDP)))
2219 				return -1;
2220 			/*
2221 			 * map-block - Calculate destination address.
2222 			 */
2223 			in.s_addr = ntohl(fin->fin_saddr);
2224 			in.s_addr &= ntohl(~np->in_inmsk);
2225 			inb.s_addr = in.s_addr;
2226 			in.s_addr /= np->in_ippip;
2227 			in.s_addr &= ntohl(~np->in_outmsk);
2228 			in.s_addr += ntohl(np->in_outip);
2229 			/*
2230 			 * Calculate destination port.
2231 			 */
2232 			if ((flags & IPN_TCPUDP) &&
2233 			    (np->in_ppip != 0)) {
2234 				port = ntohs(sport) + l;
2235 				port %= np->in_ppip;
2236 				port += np->in_ppip *
2237 					(inb.s_addr % np->in_ippip);
2238 				port += MAPBLK_MINPORT;
2239 				port = htons(port);
2240 			}
2241 
2242 		} else if ((np->in_outip == 0) &&
2243 			   (np->in_outmsk == 0xffffffff)) {
2244 			/*
2245 			 * 0/32 - use the interface's IP address.
2246 			 */
2247 			if ((l > 0) ||
2248 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2249 				       &in, NULL, fin->fin_ifs) == -1)
2250 				return -1;
2251 			in.s_addr = ntohl(in.s_addr);
2252 
2253 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2254 			/*
2255 			 * 0/0 - use the original source address/port.
2256 			 */
2257 			if (l > 0)
2258 				return -1;
2259 			in.s_addr = ntohl(fin->fin_saddr);
2260 
2261 		} else if ((np->in_outmsk != 0xffffffff) &&
2262 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2263 			np->in_nip++;
2264 
2265 		natl = NULL;
2266 
2267 		if ((flags & IPN_TCPUDP) &&
2268 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2269 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2270 			/*
2271 			 * "ports auto" (without map-block)
2272 			 */
2273 			if ((l > 0) && (l % np->in_ppip == 0)) {
2274 				if (l > np->in_space) {
2275 					return -1;
2276 				} else if ((l > np->in_ppip) &&
2277 					   np->in_outmsk != 0xffffffff)
2278 					np->in_nip++;
2279 			}
2280 			if (np->in_ppip != 0) {
2281 				port = ntohs(sport);
2282 				port += (l % np->in_ppip);
2283 				port %= np->in_ppip;
2284 				port += np->in_ppip *
2285 					(ntohl(fin->fin_saddr) %
2286 					 np->in_ippip);
2287 				port += MAPBLK_MINPORT;
2288 				port = htons(port);
2289 			}
2290 
2291 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2292 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2293 			/*
2294 			 * Standard port translation.  Select next port.
2295 			 */
2296 			if (np->in_flags & IPN_SEQUENTIAL) {
2297 				port = np->in_pnext;
2298 			} else {
2299 				port = ipf_random() % (ntohs(np->in_pmax) -
2300 						       ntohs(np->in_pmin));
2301 				port += ntohs(np->in_pmin);
2302 			}
2303 			port = htons(port);
2304 			np->in_pnext++;
2305 
2306 			if (np->in_pnext > ntohs(np->in_pmax)) {
2307 				np->in_pnext = ntohs(np->in_pmin);
2308 				if (np->in_outmsk != 0xffffffff)
2309 					np->in_nip++;
2310 			}
2311 		}
2312 
2313 		if (np->in_flags & IPN_IPRANGE) {
2314 			if (np->in_nip > ntohl(np->in_outmsk))
2315 				np->in_nip = ntohl(np->in_outip);
2316 		} else {
2317 			if ((np->in_outmsk != 0xffffffff) &&
2318 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2319 			    ntohl(np->in_outip))
2320 				np->in_nip = ntohl(np->in_outip) + 1;
2321 		}
2322 
2323 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2324 			port = sport;
2325 
2326 		/*
2327 		 * Here we do a lookup of the connection as seen from
2328 		 * the outside.  If an IP# pair already exists, try
2329 		 * again.  So if you have A->B becomes C->B, you can
2330 		 * also have D->E become C->E but not D->B causing
2331 		 * another C->B.  Also take protocol and ports into
2332 		 * account when determining whether a pre-existing
2333 		 * NAT setup will cause an external conflict where
2334 		 * this is appropriate.
2335 		 */
2336 		inb.s_addr = htonl(in.s_addr);
2337 		sp = fin->fin_data[0];
2338 		dp = fin->fin_data[1];
2339 		fin->fin_data[0] = fin->fin_data[1];
2340 		fin->fin_data[1] = htons(port);
2341 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2342 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2343 		fin->fin_data[0] = sp;
2344 		fin->fin_data[1] = dp;
2345 
2346 		/*
2347 		 * Has the search wrapped around and come back to the
2348 		 * start ?
2349 		 */
2350 		if ((natl != NULL) &&
2351 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2352 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2353 			return -1;
2354 		l++;
2355 	} while (natl != NULL);
2356 
2357 	if (np->in_space > 0)
2358 		np->in_space--;
2359 
2360 	/* Setup the NAT table */
2361 	nat->nat_inip = fin->fin_src;
2362 	nat->nat_outip.s_addr = htonl(in.s_addr);
2363 	nat->nat_oip = fin->fin_dst;
2364 	if (nat->nat_hm == NULL)
2365 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2366 					  nat->nat_outip, 0, ifs);
2367 
2368 	if (flags & IPN_TCPUDP) {
2369 		nat->nat_inport = sport;
2370 		nat->nat_outport = port;	/* sport */
2371 		nat->nat_oport = dport;
2372 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2373 	} else if (flags & IPN_ICMPQUERY) {
2374 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2375 		nat->nat_inport = port;
2376 		nat->nat_outport = port;
2377 	}
2378 
2379 	ni->nai_ip.s_addr = in.s_addr;
2380 	ni->nai_port = port;
2381 	ni->nai_nport = dport;
2382 	return 0;
2383 }
2384 
2385 
2386 /* ------------------------------------------------------------------------ */
2387 /* Function:    nat_newrdr                                                  */
2388 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2389 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2390 /* Parameters:  fin(I) - pointer to packet information                      */
2391 /*              nat(I) - pointer to NAT entry                               */
2392 /*              ni(I)  - pointer to structure with misc. information needed */
2393 /*                       to create new NAT entry.                           */
2394 /*                                                                          */
2395 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2396 /* to the new IP address for the translation.                               */
2397 /* ------------------------------------------------------------------------ */
2398 static INLINE int nat_newrdr(fin, nat, ni)
2399 fr_info_t *fin;
2400 nat_t *nat;
2401 natinfo_t *ni;
2402 {
2403 	u_short nport, dport, sport;
2404 	struct in_addr in, inb;
2405 	u_short sp, dp;
2406 	hostmap_t *hm;
2407 	u_32_t flags;
2408 	ipnat_t *np;
2409 	nat_t *natl;
2410 	int move;
2411 	ipf_stack_t *ifs = fin->fin_ifs;
2412 
2413 	move = 1;
2414 	hm = NULL;
2415 	in.s_addr = 0;
2416 	np = ni->nai_np;
2417 	flags = ni->nai_flags;
2418 	sport = ni->nai_sport;
2419 	dport = ni->nai_dport;
2420 
2421 	/*
2422 	 * If the matching rule has IPN_STICKY set, then we want to have the
2423 	 * same rule kick in as before.  Why would this happen?  If you have
2424 	 * a collection of rdr rules with "round-robin sticky", the current
2425 	 * packet might match a different one to the previous connection but
2426 	 * we want the same destination to be used.
2427 	 */
2428 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2429 	    (IPN_ROUNDR|IPN_STICKY)) {
2430 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2431 				 (u_32_t)dport, ifs);
2432 		if (hm != NULL) {
2433 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2434 			np = hm->hm_ipnat;
2435 			ni->nai_np = np;
2436 			move = 0;
2437 		}
2438 	}
2439 
2440 	/*
2441 	 * Otherwise, it's an inbound packet. Most likely, we don't
2442 	 * want to rewrite source ports and source addresses. Instead,
2443 	 * we want to rewrite to a fixed internal address and fixed
2444 	 * internal port.
2445 	 */
2446 	if (np->in_flags & IPN_SPLIT) {
2447 		in.s_addr = np->in_nip;
2448 
2449 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2450 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2451 					 in, (u_32_t)dport, ifs);
2452 			if (hm != NULL) {
2453 				in.s_addr = hm->hm_mapip.s_addr;
2454 				move = 0;
2455 			}
2456 		}
2457 
2458 		if (hm == NULL || hm->hm_ref == 1) {
2459 			if (np->in_inip == htonl(in.s_addr)) {
2460 				np->in_nip = ntohl(np->in_inmsk);
2461 				move = 0;
2462 			} else {
2463 				np->in_nip = ntohl(np->in_inip);
2464 			}
2465 		}
2466 
2467 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2468 		/*
2469 		 * 0/32 - use the interface's IP address.
2470 		 */
2471 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2472 			   fin->fin_ifs) == -1)
2473 			return -1;
2474 		in.s_addr = ntohl(in.s_addr);
2475 
2476 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2477 		/*
2478 		 * 0/0 - use the original destination address/port.
2479 		 */
2480 		in.s_addr = ntohl(fin->fin_daddr);
2481 
2482 	} else if (np->in_redir == NAT_BIMAP &&
2483 		   np->in_inmsk == np->in_outmsk) {
2484 		/*
2485 		 * map the address block in a 1:1 fashion
2486 		 */
2487 		in.s_addr = np->in_inip;
2488 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2489 		in.s_addr = ntohl(in.s_addr);
2490 	} else {
2491 		in.s_addr = ntohl(np->in_inip);
2492 	}
2493 
2494 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2495 		nport = dport;
2496 	else {
2497 		/*
2498 		 * Whilst not optimized for the case where
2499 		 * pmin == pmax, the gain is not significant.
2500 		 */
2501 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2502 		    (np->in_pmin != np->in_pmax)) {
2503 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2504 				ntohs(np->in_pnext);
2505 			nport = htons(nport);
2506 		} else
2507 			nport = np->in_pnext;
2508 	}
2509 
2510 	/*
2511 	 * When the redirect-to address is set to 0.0.0.0, just
2512 	 * assume a blank `forwarding' of the packet.  We don't
2513 	 * setup any translation for this either.
2514 	 */
2515 	if (in.s_addr == 0) {
2516 		if (nport == dport)
2517 			return -1;
2518 		in.s_addr = ntohl(fin->fin_daddr);
2519 	}
2520 
2521 	/*
2522 	 * Check to see if this redirect mapping already exists and if
2523 	 * it does, return "failure" (allowing it to be created will just
2524 	 * cause one or both of these "connections" to stop working.)
2525 	 */
2526 	inb.s_addr = htonl(in.s_addr);
2527 	sp = fin->fin_data[0];
2528 	dp = fin->fin_data[1];
2529 	fin->fin_data[1] = fin->fin_data[0];
2530 	fin->fin_data[0] = ntohs(nport);
2531 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2532 		    (u_int)fin->fin_p, inb, fin->fin_src);
2533 	fin->fin_data[0] = sp;
2534 	fin->fin_data[1] = dp;
2535 	if (natl != NULL)
2536 		return (-1);
2537 
2538 	nat->nat_inip.s_addr = htonl(in.s_addr);
2539 	nat->nat_outip = fin->fin_dst;
2540 	nat->nat_oip = fin->fin_src;
2541 
2542 	ni->nai_ip.s_addr = in.s_addr;
2543 	ni->nai_nport = nport;
2544 	ni->nai_port = sport;
2545 
2546 	if (flags & IPN_TCPUDP) {
2547 		nat->nat_inport = nport;
2548 		nat->nat_outport = dport;
2549 		nat->nat_oport = sport;
2550 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2551 	} else if (flags & IPN_ICMPQUERY) {
2552 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2553 		nat->nat_inport = nport;
2554 		nat->nat_outport = nport;
2555 	}
2556 
2557 	return move;
2558 }
2559 
2560 /* ------------------------------------------------------------------------ */
2561 /* Function:    nat_new                                                     */
2562 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2563 /*                       else pointer to new NAT structure                  */
2564 /* Parameters:  fin(I)       - pointer to packet information                */
2565 /*              np(I)        - pointer to NAT rule                          */
2566 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2567 /*              flags(I)     - flags describing the current packet          */
2568 /*              direction(I) - direction of packet (in/out)                 */
2569 /* Write Lock:  ipf_nat                                                     */
2570 /*                                                                          */
2571 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2572 /* in any way.                                                              */
2573 /*                                                                          */
2574 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2575 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2576 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2577 /* and (3) building that structure and putting it into the NAT table(s).    */
2578 /* ------------------------------------------------------------------------ */
2579 nat_t *nat_new(fin, np, natsave, flags, direction)
2580 fr_info_t *fin;
2581 ipnat_t *np;
2582 nat_t **natsave;
2583 u_int flags;
2584 int direction;
2585 {
2586 	tcphdr_t *tcp = NULL;
2587 	hostmap_t *hm = NULL;
2588 	nat_t *nat, *natl;
2589 	u_int nflags;
2590 	natinfo_t ni;
2591 	int move;
2592 	ipf_stack_t *ifs = fin->fin_ifs;
2593 
2594 	/*
2595 	 * Trigger automatic call to nat_extraflush() if the
2596 	 * table has reached capcity specified by hi watermark.
2597 	 */
2598 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2599 		ifs->ifs_nat_doflush = 1;
2600 
2601 	/*
2602 	 * If automatic flushing did not do its job, and the table
2603 	 * has filled up, don't try to create a new entry.
2604 	 */
2605 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2606 		ifs->ifs_nat_stats.ns_memfail++;
2607 		return NULL;
2608 	}
2609 
2610 	move = 1;
2611 	nflags = np->in_flags & flags;
2612 	nflags &= NAT_FROMRULE;
2613 
2614 	ni.nai_np = np;
2615 	ni.nai_nflags = nflags;
2616 	ni.nai_flags = flags;
2617 
2618 	/* Give me a new nat */
2619 	KMALLOC(nat, nat_t *);
2620 	if (nat == NULL) {
2621 		ifs->ifs_nat_stats.ns_memfail++;
2622 		/*
2623 		 * Try to automatically tune the max # of entries in the
2624 		 * table allowed to be less than what will cause kmem_alloc()
2625 		 * to fail and try to eliminate panics due to out of memory
2626 		 * conditions arising.
2627 		 */
2628 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2629 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2630 			printf("ipf_nattable_max reduced to %d\n",
2631 				ifs->ifs_ipf_nattable_max);
2632 		}
2633 		return NULL;
2634 	}
2635 
2636 	if (flags & IPN_TCPUDP) {
2637 		tcp = fin->fin_dp;
2638 		ni.nai_sport = htons(fin->fin_sport);
2639 		ni.nai_dport = htons(fin->fin_dport);
2640 	} else if (flags & IPN_ICMPQUERY) {
2641 		/*
2642 		 * In the ICMP query NAT code, we translate the ICMP id fields
2643 		 * to make them unique. This is indepedent of the ICMP type
2644 		 * (e.g. in the unlikely event that a host sends an echo and
2645 		 * an tstamp request with the same id, both packets will have
2646 		 * their ip address/id field changed in the same way).
2647 		 */
2648 		/* The icmp_id field is used by the sender to identify the
2649 		 * process making the icmp request. (the receiver justs
2650 		 * copies it back in its response). So, it closely matches
2651 		 * the concept of source port. We overlay sport, so we can
2652 		 * maximally reuse the existing code.
2653 		 */
2654 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2655 		ni.nai_dport = ni.nai_sport;
2656 	}
2657 
2658 	bzero((char *)nat, sizeof(*nat));
2659 	nat->nat_flags = flags;
2660 	nat->nat_redir = np->in_redir;
2661 
2662 	if ((flags & NAT_SLAVE) == 0) {
2663 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2664 	}
2665 
2666 	/*
2667 	 * Search the current table for a match.
2668 	 */
2669 	if (direction == NAT_OUTBOUND) {
2670 		/*
2671 		 * We can now arrange to call this for the same connection
2672 		 * because ipf_nat_new doesn't protect the code path into
2673 		 * this function.
2674 		 */
2675 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2676 				     fin->fin_src, fin->fin_dst);
2677 		if (natl != NULL) {
2678 			KFREE(nat);
2679 			nat = natl;
2680 			goto done;
2681 		}
2682 
2683 		move = nat_newmap(fin, nat, &ni);
2684 		if (move == -1)
2685 			goto badnat;
2686 
2687 		np = ni.nai_np;
2688 	} else {
2689 		/*
2690 		 * NAT_INBOUND is used only for redirects rules
2691 		 */
2692 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2693 				    fin->fin_src, fin->fin_dst);
2694 		if (natl != NULL) {
2695 			KFREE(nat);
2696 			nat = natl;
2697 			goto done;
2698 		}
2699 
2700 		move = nat_newrdr(fin, nat, &ni);
2701 		if (move == -1)
2702 			goto badnat;
2703 
2704 		np = ni.nai_np;
2705 	}
2706 
2707 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2708 		if (np->in_redir == NAT_REDIRECT) {
2709 			nat_delrdr(np);
2710 			nat_addrdr(np, ifs);
2711 		} else if (np->in_redir == NAT_MAP) {
2712 			nat_delnat(np);
2713 			nat_addnat(np, ifs);
2714 		}
2715 	}
2716 
2717 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2718 		goto badnat;
2719 	}
2720 
2721 	nat_calc_chksum_diffs(nat);
2722 
2723 	if (flags & SI_WILDP)
2724 		ifs->ifs_nat_stats.ns_wilds++;
2725 	fin->fin_flx |= FI_NEWNAT;
2726 	goto done;
2727 badnat:
2728 	ifs->ifs_nat_stats.ns_badnat++;
2729 	if ((hm = nat->nat_hm) != NULL)
2730 		fr_hostmapdel(&hm);
2731 	KFREE(nat);
2732 	nat = NULL;
2733 done:
2734 	if ((flags & NAT_SLAVE) == 0) {
2735 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2736 	}
2737 	return nat;
2738 }
2739 
2740 
2741 /* ------------------------------------------------------------------------ */
2742 /* Function:    nat_finalise                                                */
2743 /* Returns:     int - 0 == sucess, -1 == failure                            */
2744 /* Parameters:  fin(I) - pointer to packet information                      */
2745 /*              nat(I) - pointer to NAT entry                               */
2746 /*              ni(I)  - pointer to structure with misc. information needed */
2747 /*                       to create new NAT entry.                           */
2748 /* Write Lock:  ipf_nat                                                     */
2749 /*                                                                          */
2750 /* This is the tail end of constructing a new NAT entry and is the same     */
2751 /* for both IPv4 and IPv6.                                                  */
2752 /* ------------------------------------------------------------------------ */
2753 /*ARGSUSED*/
2754 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2755 fr_info_t *fin;
2756 nat_t *nat;
2757 natinfo_t *ni;
2758 tcphdr_t *tcp;
2759 nat_t **natsave;
2760 int direction;
2761 {
2762 	frentry_t *fr;
2763 	ipnat_t *np;
2764 	ipf_stack_t *ifs = fin->fin_ifs;
2765 
2766 	np = ni->nai_np;
2767 
2768 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2769 
2770 #ifdef	IPFILTER_SYNC
2771 	if ((nat->nat_flags & SI_CLONE) == 0)
2772 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2773 #endif
2774 
2775 	nat->nat_me = natsave;
2776 	nat->nat_dir = direction;
2777 	nat->nat_ifps[0] = np->in_ifps[0];
2778 	nat->nat_ifps[1] = np->in_ifps[1];
2779 	nat->nat_ptr = np;
2780 	nat->nat_p = fin->fin_p;
2781 	nat->nat_v = fin->fin_v;
2782 	nat->nat_mssclamp = np->in_mssclamp;
2783 	fr = fin->fin_fr;
2784 	nat->nat_fr = fr;
2785 
2786 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2787 		if (appr_new(fin, nat) == -1)
2788 			return -1;
2789 
2790 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2791 		if (ifs->ifs_nat_logging)
2792 			nat_log(nat, (u_int)np->in_redir, ifs);
2793 		np->in_use++;
2794 		if (fr != NULL) {
2795 			MUTEX_ENTER(&fr->fr_lock);
2796 			fr->fr_ref++;
2797 			MUTEX_EXIT(&fr->fr_lock);
2798 		}
2799 		return 0;
2800 	}
2801 
2802 	/*
2803 	 * nat_insert failed, so cleanup time...
2804 	 */
2805 	return -1;
2806 }
2807 
2808 
2809 /* ------------------------------------------------------------------------ */
2810 /* Function:   nat_insert                                                   */
2811 /* Returns:    int - 0 == sucess, -1 == failure                             */
2812 /* Parameters: nat(I) - pointer to NAT structure                            */
2813 /*             rev(I) - flag indicating forward/reverse direction of packet */
2814 /* Write Lock: ipf_nat                                                      */
2815 /*                                                                          */
2816 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2817 /* list of active NAT entries.  Adjust global counters when complete.       */
2818 /* ------------------------------------------------------------------------ */
2819 int	nat_insert(nat, rev, ifs)
2820 nat_t	*nat;
2821 int	rev;
2822 ipf_stack_t *ifs;
2823 {
2824 	u_int hv1, hv2;
2825 	nat_t **natp;
2826 
2827 	/*
2828 	 * Try and return an error as early as possible, so calculate the hash
2829 	 * entry numbers first and then proceed.
2830 	 */
2831 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2832 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2833 				  0xffffffff);
2834 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2835 				  ifs->ifs_ipf_nattable_sz);
2836 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2837 				  0xffffffff);
2838 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2839 				  ifs->ifs_ipf_nattable_sz);
2840 	} else {
2841 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2842 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2843 				  ifs->ifs_ipf_nattable_sz);
2844 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2845 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2846 				  ifs->ifs_ipf_nattable_sz);
2847 	}
2848 
2849 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2850 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2851 		return -1;
2852 	}
2853 
2854 	nat->nat_hv[0] = hv1;
2855 	nat->nat_hv[1] = hv2;
2856 
2857 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2858 
2859 	nat->nat_rev = rev;
2860 	nat->nat_ref = 1;
2861 	nat->nat_bytes[0] = 0;
2862 	nat->nat_pkts[0] = 0;
2863 	nat->nat_bytes[1] = 0;
2864 	nat->nat_pkts[1] = 0;
2865 
2866 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2867 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2868 
2869 	if (nat->nat_ifnames[1][0] !='\0') {
2870 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2871 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2872 	} else {
2873 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2874 			       LIFNAMSIZ);
2875 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2876 		nat->nat_ifps[1] = nat->nat_ifps[0];
2877 	}
2878 
2879 	nat->nat_next = ifs->ifs_nat_instances;
2880 	nat->nat_pnext = &ifs->ifs_nat_instances;
2881 	if (ifs->ifs_nat_instances)
2882 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2883 	ifs->ifs_nat_instances = nat;
2884 
2885 	natp = &ifs->ifs_nat_table[0][hv1];
2886 	if (*natp)
2887 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2888 	nat->nat_phnext[0] = natp;
2889 	nat->nat_hnext[0] = *natp;
2890 	*natp = nat;
2891 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2892 
2893 	natp = &ifs->ifs_nat_table[1][hv2];
2894 	if (*natp)
2895 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2896 	nat->nat_phnext[1] = natp;
2897 	nat->nat_hnext[1] = *natp;
2898 	*natp = nat;
2899 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2900 
2901 	fr_setnatqueue(nat, rev, ifs);
2902 
2903 	ifs->ifs_nat_stats.ns_added++;
2904 	ifs->ifs_nat_stats.ns_inuse++;
2905 	return 0;
2906 }
2907 
2908 
2909 /* ------------------------------------------------------------------------ */
2910 /* Function:    nat_icmperrorlookup                                         */
2911 /* Returns:     nat_t* - point to matching NAT structure                    */
2912 /* Parameters:  fin(I) - pointer to packet information                      */
2913 /*              dir(I) - direction of packet (in/out)                       */
2914 /*                                                                          */
2915 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2916 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2917 /* the required length.                                                     */
2918 /* ------------------------------------------------------------------------ */
2919 nat_t *nat_icmperrorlookup(fin, dir)
2920 fr_info_t *fin;
2921 int dir;
2922 {
2923 	int flags = 0, minlen;
2924 	icmphdr_t *orgicmp;
2925 	tcphdr_t *tcp = NULL;
2926 	u_short data[2];
2927 	nat_t *nat;
2928 	ip_t *oip;
2929 	u_int p;
2930 
2931 	/*
2932 	 * Does it at least have the return (basic) IP header ?
2933 	 * Only a basic IP header (no options) should be with an ICMP error
2934 	 * header.  Also, if it's not an error type, then return.
2935 	 */
2936 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2937 		return NULL;
2938 
2939 	/*
2940 	 * Check packet size
2941 	 */
2942 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2943 	minlen = IP_HL(oip) << 2;
2944 	if ((minlen < sizeof(ip_t)) ||
2945 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2946 		return NULL;
2947 	/*
2948 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2949 	 * header claimed in the encapsulated part which is of concern.  It
2950 	 * may be too big to be in this buffer but not so big that it's
2951 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2952 	 * This is possible because we don't know how big oip_hl is when we
2953 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2954 	 * all here now.
2955 	 */
2956 #ifdef  _KERNEL
2957 	{
2958 	mb_t *m;
2959 
2960 	m = fin->fin_m;
2961 # if defined(MENTAT)
2962 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2963 		return NULL;
2964 # else
2965 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2966 	    (char *)fin->fin_ip + M_LEN(m))
2967 		return NULL;
2968 # endif
2969 	}
2970 #endif
2971 
2972 	if (fin->fin_daddr != oip->ip_src.s_addr)
2973 		return NULL;
2974 
2975 	p = oip->ip_p;
2976 	if (p == IPPROTO_TCP)
2977 		flags = IPN_TCP;
2978 	else if (p == IPPROTO_UDP)
2979 		flags = IPN_UDP;
2980 	else if (p == IPPROTO_ICMP) {
2981 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2982 
2983 		/* see if this is related to an ICMP query */
2984 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2985 			data[0] = fin->fin_data[0];
2986 			data[1] = fin->fin_data[1];
2987 			fin->fin_data[0] = 0;
2988 			fin->fin_data[1] = orgicmp->icmp_id;
2989 
2990 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2991 			/*
2992 			 * NOTE : dir refers to the direction of the original
2993 			 *        ip packet. By definition the icmp error
2994 			 *        message flows in the opposite direction.
2995 			 */
2996 			if (dir == NAT_INBOUND)
2997 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2998 						   oip->ip_src);
2999 			else
3000 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
3001 						    oip->ip_src);
3002 			fin->fin_data[0] = data[0];
3003 			fin->fin_data[1] = data[1];
3004 			return nat;
3005 		}
3006 	}
3007 
3008 	if (flags & IPN_TCPUDP) {
3009 		minlen += 8;		/* + 64bits of data to get ports */
3010 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
3011 			return NULL;
3012 
3013 		data[0] = fin->fin_data[0];
3014 		data[1] = fin->fin_data[1];
3015 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
3016 		fin->fin_data[0] = ntohs(tcp->th_dport);
3017 		fin->fin_data[1] = ntohs(tcp->th_sport);
3018 
3019 		if (dir == NAT_INBOUND) {
3020 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
3021 					   oip->ip_src);
3022 		} else {
3023 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
3024 					    oip->ip_src);
3025 		}
3026 		fin->fin_data[0] = data[0];
3027 		fin->fin_data[1] = data[1];
3028 		return nat;
3029 	}
3030 	if (dir == NAT_INBOUND)
3031 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
3032 	else
3033 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
3034 }
3035 
3036 
3037 /* ------------------------------------------------------------------------ */
3038 /* Function:    nat_icmperror                                               */
3039 /* Returns:     nat_t* - point to matching NAT structure                    */
3040 /* Parameters:  fin(I)    - pointer to packet information                   */
3041 /*              nflags(I) - NAT flags for this packet                       */
3042 /*              dir(I)    - direction of packet (in/out)                    */
3043 /*                                                                          */
3044 /* Fix up an ICMP packet which is an error message for an existing NAT      */
3045 /* session.  This will correct both packet header data and checksums.       */
3046 /*                                                                          */
3047 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
3048 /* a NAT'd ICMP packet gets correctly recognised.                           */
3049 /* ------------------------------------------------------------------------ */
3050 nat_t *nat_icmperror(fin, nflags, dir)
3051 fr_info_t *fin;
3052 u_int *nflags;
3053 int dir;
3054 {
3055 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
3056 	struct in_addr in;
3057 	icmphdr_t *icmp, *orgicmp;
3058 	int dlen;
3059 	udphdr_t *udp;
3060 	tcphdr_t *tcp;
3061 	nat_t *nat;
3062 	ip_t *oip;
3063 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
3064 		return NULL;
3065 
3066 	/*
3067 	 * nat_icmperrorlookup() looks up nat entry associated with the
3068 	 * offending IP packet and returns pointer to the entry, or NULL
3069 	 * if packet wasn't natted or for `defective' packets.
3070 	 */
3071 
3072 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3073 		return NULL;
3074 
3075 	sumd2 = 0;
3076 	*nflags = IPN_ICMPERR;
3077 	icmp = fin->fin_dp;
3078 	oip = (ip_t *)&icmp->icmp_ip;
3079 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3080 	tcp = (tcphdr_t *)udp;
3081 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3082 
3083 	/*
3084 	 * Need to adjust ICMP header to include the real IP#'s and
3085 	 * port #'s.  There are three steps required.
3086 	 *
3087 	 * Step 1
3088 	 * Fix the IP addresses in the offending IP packet and update
3089 	 * ip header checksum to compensate for the change.
3090 	 *
3091 	 * No update needed here for icmp_cksum because the ICMP checksum
3092 	 * is calculated over the complete ICMP packet, which includes the
3093 	 * changed oip IP addresses and oip->ip_sum.  These two changes
3094 	 * cancel each other out (if the delta for the IP address is x,
3095 	 * then the delta for ip_sum is minus x).
3096 	 */
3097 
3098 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3099 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3100 		in = nat->nat_inip;
3101 		oip->ip_src = in;
3102 	} else {
3103 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3104 		in = nat->nat_outip;
3105 		oip->ip_dst = in;
3106 	}
3107 
3108 	sum2 = LONG_SUM(ntohl(in.s_addr));
3109 	CALC_SUMD(sum1, sum2, sumd);
3110 	fix_datacksum(&oip->ip_sum, sumd);
3111 
3112 	/*
3113 	 * Step 2
3114 	 * Perform other adjustments based on protocol of offending packet.
3115 	 */
3116 
3117 	switch (oip->ip_p) {
3118 		case IPPROTO_TCP :
3119 		case IPPROTO_UDP :
3120 
3121 			/*
3122 			* For offending TCP/UDP IP packets, translate the ports
3123 			* based on the NAT specification.
3124 			*
3125 			* Advance notice : Now it becomes complicated :-)
3126 			*
3127 			* Since the port and IP addresse fields are both part
3128 			* of the TCP/UDP checksum of the offending IP packet,
3129 			* we need to adjust that checksum as well.
3130 			*
3131 			* To further complicate things, the TCP/UDP checksum
3132 			* may not be present.  We must check to see if the
3133 			* length of the data portion is big enough to hold
3134 			* the checksum.  In the UDP case, a test to determine
3135 			* if the checksum is even set is also required.
3136 			*
3137 			* Any changes to an IP address, port or checksum within
3138 			* the ICMP packet requires a change to icmp_cksum.
3139 			*
3140 			* Be extremely careful here ... The change is dependent
3141 			* upon whether or not the TCP/UPD checksum is present.
3142 			*
3143 			* If TCP/UPD checksum is present, the icmp_cksum must
3144 			* compensate for checksum modification resulting from
3145 			* IP address change only.  Port change and resulting
3146 			* data checksum adjustments cancel each other out.
3147 			*
3148 			* If TCP/UDP checksum is not present, icmp_cksum must
3149 			* compensate for port change only.  The IP address
3150 			* change does not modify anything else in this case.
3151 			*/
3152 
3153 			psum1 = 0;
3154 			psum2 = 0;
3155 			psumd = 0;
3156 
3157 			if ((tcp->th_dport == nat->nat_oport) &&
3158 			    (tcp->th_sport != nat->nat_inport)) {
3159 
3160 				/*
3161 				 * Translate the source port.
3162 				 */
3163 
3164 				psum1 = ntohs(tcp->th_sport);
3165 				psum2 = ntohs(nat->nat_inport);
3166 				tcp->th_sport = nat->nat_inport;
3167 
3168 			} else if ((tcp->th_sport == nat->nat_oport) &&
3169 				    (tcp->th_dport != nat->nat_outport)) {
3170 
3171 				/*
3172 				 * Translate the destination port.
3173 				 */
3174 
3175 				psum1 = ntohs(tcp->th_dport);
3176 				psum2 = ntohs(nat->nat_outport);
3177 				tcp->th_dport = nat->nat_outport;
3178 			}
3179 
3180 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3181 
3182 				/*
3183 				 * TCP checksum present.
3184 				 *
3185 				 * Adjust data checksum and icmp checksum to
3186 				 * compensate for any IP address change.
3187 				 */
3188 
3189 				sum1 = ntohs(tcp->th_sum);
3190 				fix_datacksum(&tcp->th_sum, sumd);
3191 				sum2 = ntohs(tcp->th_sum);
3192 				sumd2 = sumd << 1;
3193 				CALC_SUMD(sum1, sum2, sumd);
3194 				sumd2 += sumd;
3195 
3196 				/*
3197 				 * Also make data checksum adjustment to
3198 				 * compensate for any port change.
3199 				 */
3200 
3201 				if (psum1 != psum2) {
3202 					CALC_SUMD(psum1, psum2, psumd);
3203 					fix_datacksum(&tcp->th_sum, psumd);
3204 				}
3205 
3206 			} else if ((oip->ip_p == IPPROTO_UDP) &&
3207 				   (dlen >= 8) && (udp->uh_sum != 0)) {
3208 
3209 				/*
3210 				 * The UDP checksum is present and set.
3211 				 *
3212 				 * Adjust data checksum and icmp checksum to
3213 				 * compensate for any IP address change.
3214 				 */
3215 
3216 				sum1 = ntohs(udp->uh_sum);
3217 				fix_datacksum(&udp->uh_sum, sumd);
3218 				sum2 = ntohs(udp->uh_sum);
3219 				sumd2 = sumd << 1;
3220 				CALC_SUMD(sum1, sum2, sumd);
3221 				sumd2 += sumd;
3222 
3223 				/*
3224 				 * Also make data checksum adjustment to
3225 				 * compensate for any port change.
3226 				 */
3227 
3228 				if (psum1 != psum2) {
3229 					CALC_SUMD(psum1, psum2, psumd);
3230 					fix_datacksum(&udp->uh_sum, psumd);
3231 				}
3232 
3233 			} else {
3234 
3235 				/*
3236 				 * Data checksum was not present.
3237 				 *
3238 				 * Compensate for any port change.
3239 				 */
3240 
3241 				CALC_SUMD(psum2, psum1, psumd);
3242 				sumd2 += psumd;
3243 			}
3244 			break;
3245 
3246 		case IPPROTO_ICMP :
3247 
3248 			orgicmp = (icmphdr_t *)udp;
3249 
3250 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3251 			    (orgicmp->icmp_id != nat->nat_inport) &&
3252 			    (dlen >= 8)) {
3253 
3254 				/*
3255 				 * Fix ICMP checksum (of the offening ICMP
3256 				 * query packet) to compensate the change
3257 				 * in the ICMP id of the offending ICMP
3258 				 * packet.
3259 				 *
3260 				 * Since you modify orgicmp->icmp_id with
3261 				 * a delta (say x) and you compensate that
3262 				 * in origicmp->icmp_cksum with a delta
3263 				 * minus x, you don't have to adjust the
3264 				 * overall icmp->icmp_cksum
3265 				 */
3266 
3267 				sum1 = ntohs(orgicmp->icmp_id);
3268 				sum2 = ntohs(nat->nat_inport);
3269 				CALC_SUMD(sum1, sum2, sumd);
3270 				orgicmp->icmp_id = nat->nat_inport;
3271 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3272 
3273 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3274 
3275 			break;
3276 
3277 		default :
3278 
3279 			break;
3280 
3281 	} /* switch (oip->ip_p) */
3282 
3283 	/*
3284 	 * Step 3
3285 	 * Make the adjustments to icmp checksum.
3286 	 */
3287 
3288 	if (sumd2 != 0) {
3289 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3290 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3291 		fix_incksum(&icmp->icmp_cksum, sumd2);
3292 	}
3293 	return nat;
3294 }
3295 
3296 
3297 /*
3298  * NB: these lookups don't lock access to the list, it assumed that it has
3299  * already been done!
3300  */
3301 
3302 /* ------------------------------------------------------------------------ */
3303 /* Function:    nat_inlookup                                                */
3304 /* Returns:     nat_t* - NULL == no match,                                  */
3305 /*                       else pointer to matching NAT entry                 */
3306 /* Parameters:  fin(I)    - pointer to packet information                   */
3307 /*              flags(I)  - NAT flags for this packet                       */
3308 /*              p(I)      - protocol for this packet                        */
3309 /*              src(I)    - source IP address                               */
3310 /*              mapdst(I) - destination IP address                          */
3311 /*                                                                          */
3312 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3313 /* real source address/port.  We use this lookup when receiving a packet,   */
3314 /* we're looking for a table entry, based on the destination address.       */
3315 /*                                                                          */
3316 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3317 /*                                                                          */
3318 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3319 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3320 /*                                                                          */
3321 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3322 /*            the packet is of said protocol                                */
3323 /* ------------------------------------------------------------------------ */
3324 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3325 fr_info_t *fin;
3326 u_int flags, p;
3327 struct in_addr src , mapdst;
3328 {
3329 	u_short sport, dport;
3330 	ipnat_t *ipn;
3331 	u_int sflags;
3332 	nat_t *nat;
3333 	int nflags;
3334 	u_32_t dst;
3335 	void *ifp;
3336 	u_int hv;
3337 	ipf_stack_t *ifs = fin->fin_ifs;
3338 
3339 	if (fin != NULL)
3340 		ifp = fin->fin_ifp;
3341 	else
3342 		ifp = NULL;
3343 	sport = 0;
3344 	dport = 0;
3345 	dst = mapdst.s_addr;
3346 	sflags = flags & NAT_TCPUDPICMP;
3347 
3348 	switch (p)
3349 	{
3350 	case IPPROTO_TCP :
3351 	case IPPROTO_UDP :
3352 		sport = htons(fin->fin_data[0]);
3353 		dport = htons(fin->fin_data[1]);
3354 		break;
3355 	case IPPROTO_ICMP :
3356 		if (flags & IPN_ICMPERR)
3357 			sport = fin->fin_data[1];
3358 		else
3359 			dport = fin->fin_data[1];
3360 		break;
3361 	default :
3362 		break;
3363 	}
3364 
3365 
3366 	if ((flags & SI_WILDP) != 0)
3367 		goto find_in_wild_ports;
3368 
3369 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3370 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3371 	nat = ifs->ifs_nat_table[1][hv];
3372 	for (; nat; nat = nat->nat_hnext[1]) {
3373 		if (nat->nat_v != 4)
3374 			continue;
3375 
3376 		if (nat->nat_ifps[0] != NULL) {
3377 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3378 				continue;
3379 		} else if (ifp != NULL)
3380 			nat->nat_ifps[0] = ifp;
3381 
3382 		nflags = nat->nat_flags;
3383 
3384 		if (nat->nat_oip.s_addr == src.s_addr &&
3385 		    nat->nat_outip.s_addr == dst &&
3386 		    (((p == 0) &&
3387 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3388 		     || (p == nat->nat_p))) {
3389 			switch (p)
3390 			{
3391 #if 0
3392 			case IPPROTO_GRE :
3393 				if (nat->nat_call[1] != fin->fin_data[0])
3394 					continue;
3395 				break;
3396 #endif
3397 			case IPPROTO_ICMP :
3398 				if ((flags & IPN_ICMPERR) != 0) {
3399 					if (nat->nat_outport != sport)
3400 						continue;
3401 				} else {
3402 					if (nat->nat_outport != dport)
3403 						continue;
3404 				}
3405 				break;
3406 			case IPPROTO_TCP :
3407 			case IPPROTO_UDP :
3408 				if (nat->nat_oport != sport)
3409 					continue;
3410 				if (nat->nat_outport != dport)
3411 					continue;
3412 				break;
3413 			default :
3414 				break;
3415 			}
3416 
3417 			ipn = nat->nat_ptr;
3418 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3419 				if (appr_match(fin, nat) != 0)
3420 					continue;
3421 			return nat;
3422 		}
3423 	}
3424 
3425 	/*
3426 	 * So if we didn't find it but there are wildcard members in the hash
3427 	 * table, go back and look for them.  We do this search and update here
3428 	 * because it is modifying the NAT table and we want to do this only
3429 	 * for the first packet that matches.  The exception, of course, is
3430 	 * for "dummy" (FI_IGNORE) lookups.
3431 	 */
3432 find_in_wild_ports:
3433 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3434 		return NULL;
3435 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3436 		return NULL;
3437 
3438 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3439 
3440 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3441 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3442 
3443 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3444 
3445 	nat = ifs->ifs_nat_table[1][hv];
3446 	for (; nat; nat = nat->nat_hnext[1]) {
3447 		if (nat->nat_v != 4)
3448 			continue;
3449 
3450 		if (nat->nat_ifps[0] != NULL) {
3451 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3452 				continue;
3453 		} else if (ifp != NULL)
3454 			nat->nat_ifps[0] = ifp;
3455 
3456 		if (nat->nat_p != fin->fin_p)
3457 			continue;
3458 		if (nat->nat_oip.s_addr != src.s_addr ||
3459 		    nat->nat_outip.s_addr != dst)
3460 			continue;
3461 
3462 		nflags = nat->nat_flags;
3463 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3464 			continue;
3465 
3466 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3467 			       NAT_INBOUND) == 1) {
3468 			if ((fin->fin_flx & FI_IGNORE) != 0)
3469 				break;
3470 			if ((nflags & SI_CLONE) != 0) {
3471 				nat = fr_natclone(fin, nat);
3472 				if (nat == NULL)
3473 					break;
3474 			} else {
3475 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3476 				ifs->ifs_nat_stats.ns_wilds--;
3477 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3478 			}
3479 			nat->nat_oport = sport;
3480 			nat->nat_outport = dport;
3481 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3482 			nat_tabmove(nat, ifs);
3483 			break;
3484 		}
3485 	}
3486 
3487 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3488 
3489 	return nat;
3490 }
3491 
3492 
3493 /* ------------------------------------------------------------------------ */
3494 /* Function:    nat_tabmove                                                 */
3495 /* Returns:     Nil                                                         */
3496 /* Parameters:  nat(I) - pointer to NAT structure                           */
3497 /* Write Lock:  ipf_nat                                                     */
3498 /*                                                                          */
3499 /* This function is only called for TCP/UDP NAT table entries where the     */
3500 /* original was placed in the table without hashing on the ports and we now */
3501 /* want to include hashing on port numbers.                                 */
3502 /* ------------------------------------------------------------------------ */
3503 static void nat_tabmove(nat, ifs)
3504 nat_t *nat;
3505 ipf_stack_t *ifs;
3506 {
3507 	nat_t **natp;
3508 	u_int hv;
3509 
3510 	if (nat->nat_flags & SI_CLONE)
3511 		return;
3512 
3513 	/*
3514 	 * Remove the NAT entry from the old location
3515 	 */
3516 	if (nat->nat_hnext[0])
3517 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3518 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3519 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3520 
3521 	if (nat->nat_hnext[1])
3522 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3523 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3524 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3525 
3526 	/*
3527 	 * Add into the NAT table in the new position
3528 	 */
3529 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3530 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3531 			 ifs->ifs_ipf_nattable_sz);
3532 	nat->nat_hv[0] = hv;
3533 	natp = &ifs->ifs_nat_table[0][hv];
3534 	if (*natp)
3535 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3536 	nat->nat_phnext[0] = natp;
3537 	nat->nat_hnext[0] = *natp;
3538 	*natp = nat;
3539 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3540 
3541 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3542 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3543 			 ifs->ifs_ipf_nattable_sz);
3544 	nat->nat_hv[1] = hv;
3545 	natp = &ifs->ifs_nat_table[1][hv];
3546 	if (*natp)
3547 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3548 	nat->nat_phnext[1] = natp;
3549 	nat->nat_hnext[1] = *natp;
3550 	*natp = nat;
3551 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3552 }
3553 
3554 
3555 /* ------------------------------------------------------------------------ */
3556 /* Function:    nat_outlookup                                               */
3557 /* Returns:     nat_t* - NULL == no match,                                  */
3558 /*                       else pointer to matching NAT entry                 */
3559 /* Parameters:  fin(I)   - pointer to packet information                    */
3560 /*              flags(I) - NAT flags for this packet                        */
3561 /*              p(I)     - protocol for this packet                         */
3562 /*              src(I)   - source IP address                                */
3563 /*              dst(I)   - destination IP address                           */
3564 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3565 /*                                                                          */
3566 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3567 /* destination address/port.  We use this lookup when sending a packet out, */
3568 /* we're looking for a table entry, based on the source address.            */
3569 /*                                                                          */
3570 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3571 /*                                                                          */
3572 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3573 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3574 /*                                                                          */
3575 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3576 /*            the packet is of said protocol                                */
3577 /* ------------------------------------------------------------------------ */
3578 nat_t *nat_outlookup(fin, flags, p, src, dst)
3579 fr_info_t *fin;
3580 u_int flags, p;
3581 struct in_addr src , dst;
3582 {
3583 	u_short sport, dport;
3584 	u_int sflags;
3585 	ipnat_t *ipn;
3586 	u_32_t srcip;
3587 	nat_t *nat;
3588 	int nflags;
3589 	void *ifp;
3590 	u_int hv;
3591 	ipf_stack_t *ifs = fin->fin_ifs;
3592 
3593 	ifp = fin->fin_ifp;
3594 
3595 	srcip = src.s_addr;
3596 	sflags = flags & IPN_TCPUDPICMP;
3597 	sport = 0;
3598 	dport = 0;
3599 
3600 	switch (p)
3601 	{
3602 	case IPPROTO_TCP :
3603 	case IPPROTO_UDP :
3604 		sport = htons(fin->fin_data[0]);
3605 		dport = htons(fin->fin_data[1]);
3606 		break;
3607 	case IPPROTO_ICMP :
3608 		if (flags & IPN_ICMPERR)
3609 			sport = fin->fin_data[1];
3610 		else
3611 			dport = fin->fin_data[1];
3612 		break;
3613 	default :
3614 		break;
3615 	}
3616 
3617 	if ((flags & SI_WILDP) != 0)
3618 		goto find_out_wild_ports;
3619 
3620 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3621 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3622 	nat = ifs->ifs_nat_table[0][hv];
3623 	for (; nat; nat = nat->nat_hnext[0]) {
3624 		if (nat->nat_v != 4)
3625 			continue;
3626 
3627 		if (nat->nat_ifps[1] != NULL) {
3628 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3629 				continue;
3630 		} else if (ifp != NULL)
3631 			nat->nat_ifps[1] = ifp;
3632 
3633 		nflags = nat->nat_flags;
3634 
3635 		if (nat->nat_inip.s_addr == srcip &&
3636 		    nat->nat_oip.s_addr == dst.s_addr &&
3637 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3638 		     || (p == nat->nat_p))) {
3639 			switch (p)
3640 			{
3641 #if 0
3642 			case IPPROTO_GRE :
3643 				if (nat->nat_call[1] != fin->fin_data[0])
3644 					continue;
3645 				break;
3646 #endif
3647 			case IPPROTO_TCP :
3648 			case IPPROTO_UDP :
3649 				if (nat->nat_oport != dport)
3650 					continue;
3651 				if (nat->nat_inport != sport)
3652 					continue;
3653 				break;
3654 			default :
3655 				break;
3656 			}
3657 
3658 			ipn = nat->nat_ptr;
3659 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3660 				if (appr_match(fin, nat) != 0)
3661 					continue;
3662 			return nat;
3663 		}
3664 	}
3665 
3666 	/*
3667 	 * So if we didn't find it but there are wildcard members in the hash
3668 	 * table, go back and look for them.  We do this search and update here
3669 	 * because it is modifying the NAT table and we want to do this only
3670 	 * for the first packet that matches.  The exception, of course, is
3671 	 * for "dummy" (FI_IGNORE) lookups.
3672 	 */
3673 find_out_wild_ports:
3674 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3675 		return NULL;
3676 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3677 		return NULL;
3678 
3679 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3680 
3681 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3682 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3683 
3684 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3685 
3686 	nat = ifs->ifs_nat_table[0][hv];
3687 	for (; nat; nat = nat->nat_hnext[0]) {
3688 		if (nat->nat_v != 4)
3689 			continue;
3690 
3691 		if (nat->nat_ifps[1] != NULL) {
3692 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3693 				continue;
3694 		} else if (ifp != NULL)
3695 			nat->nat_ifps[1] = ifp;
3696 
3697 		if (nat->nat_p != fin->fin_p)
3698 			continue;
3699 		if ((nat->nat_inip.s_addr != srcip) ||
3700 		    (nat->nat_oip.s_addr != dst.s_addr))
3701 			continue;
3702 
3703 		nflags = nat->nat_flags;
3704 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3705 			continue;
3706 
3707 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3708 			       NAT_OUTBOUND) == 1) {
3709 			if ((fin->fin_flx & FI_IGNORE) != 0)
3710 				break;
3711 			if ((nflags & SI_CLONE) != 0) {
3712 				nat = fr_natclone(fin, nat);
3713 				if (nat == NULL)
3714 					break;
3715 			} else {
3716 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3717 				ifs->ifs_nat_stats.ns_wilds--;
3718 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3719 			}
3720 			nat->nat_inport = sport;
3721 			nat->nat_oport = dport;
3722 			if (nat->nat_outport == 0)
3723 				nat->nat_outport = sport;
3724 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3725 			nat_tabmove(nat, ifs);
3726 			break;
3727 		}
3728 	}
3729 
3730 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3731 
3732 	return nat;
3733 }
3734 
3735 
3736 /* ------------------------------------------------------------------------ */
3737 /* Function:    nat_lookupredir                                             */
3738 /* Returns:     nat_t* - NULL == no match,                                  */
3739 /*                       else pointer to matching NAT entry                 */
3740 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3741 /*                      entry for.                                          */
3742 /*                                                                          */
3743 /* Lookup the NAT tables to search for a matching redirect                  */
3744 /* ------------------------------------------------------------------------ */
3745 nat_t *nat_lookupredir(np, ifs)
3746 natlookup_t *np;
3747 ipf_stack_t *ifs;
3748 {
3749 	fr_info_t fi;
3750 	nat_t *nat;
3751 
3752 	bzero((char *)&fi, sizeof(fi));
3753 	if (np->nl_flags & IPN_IN) {
3754 		fi.fin_data[0] = ntohs(np->nl_realport);
3755 		fi.fin_data[1] = ntohs(np->nl_outport);
3756 	} else {
3757 		fi.fin_data[0] = ntohs(np->nl_inport);
3758 		fi.fin_data[1] = ntohs(np->nl_outport);
3759 	}
3760 	if (np->nl_flags & IPN_TCP)
3761 		fi.fin_p = IPPROTO_TCP;
3762 	else if (np->nl_flags & IPN_UDP)
3763 		fi.fin_p = IPPROTO_UDP;
3764 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3765 		fi.fin_p = IPPROTO_ICMP;
3766 
3767 	fi.fin_ifs = ifs;
3768 	/*
3769 	 * We can do two sorts of lookups:
3770 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3771 	 * - default: we have the `in' and `out' address, look for `real'.
3772 	 */
3773 	if (np->nl_flags & IPN_IN) {
3774 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3775 					np->nl_realip, np->nl_outip))) {
3776 			np->nl_inip = nat->nat_inip;
3777 			np->nl_inport = nat->nat_inport;
3778 		}
3779 	} else {
3780 		/*
3781 		 * If nl_inip is non null, this is a lookup based on the real
3782 		 * ip address. Else, we use the fake.
3783 		 */
3784 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3785 					 np->nl_inip, np->nl_outip))) {
3786 
3787 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3788 				fr_info_t fin;
3789 				bzero((char *)&fin, sizeof(fin));
3790 				fin.fin_p = nat->nat_p;
3791 				fin.fin_data[0] = ntohs(nat->nat_outport);
3792 				fin.fin_data[1] = ntohs(nat->nat_oport);
3793 				fin.fin_ifs = ifs;
3794 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3795 						 nat->nat_outip,
3796 						 nat->nat_oip) != NULL) {
3797 					np->nl_flags &= ~IPN_FINDFORWARD;
3798 				}
3799 			}
3800 
3801 			np->nl_realip = nat->nat_outip;
3802 			np->nl_realport = nat->nat_outport;
3803 		}
3804  	}
3805 
3806 	return nat;
3807 }
3808 
3809 
3810 /* ------------------------------------------------------------------------ */
3811 /* Function:    nat_match                                                   */
3812 /* Returns:     int - 0 == no match, 1 == match                             */
3813 /* Parameters:  fin(I)   - pointer to packet information                    */
3814 /*              np(I)    - pointer to NAT rule                              */
3815 /*                                                                          */
3816 /* Pull the matching of a packet against a NAT rule out of that complex     */
3817 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3818 /* ------------------------------------------------------------------------ */
3819 static int nat_match(fin, np)
3820 fr_info_t *fin;
3821 ipnat_t *np;
3822 {
3823 	frtuc_t *ft;
3824 
3825 	if (fin->fin_v != 4)
3826 		return 0;
3827 
3828 	if (np->in_p && fin->fin_p != np->in_p)
3829 		return 0;
3830 
3831 	if (fin->fin_out) {
3832 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3833 			return 0;
3834 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3835 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3836 			return 0;
3837 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3838 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3839 			return 0;
3840 	} else {
3841 		if (!(np->in_redir & NAT_REDIRECT))
3842 			return 0;
3843 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3844 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3845 			return 0;
3846 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3847 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3848 			return 0;
3849 	}
3850 
3851 	ft = &np->in_tuc;
3852 	if (!(fin->fin_flx & FI_TCPUDP) ||
3853 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3854 		if (ft->ftu_scmp || ft->ftu_dcmp)
3855 			return 0;
3856 		return 1;
3857 	}
3858 
3859 	return fr_tcpudpchk(fin, ft);
3860 }
3861 
3862 
3863 /* ------------------------------------------------------------------------ */
3864 /* Function:    nat_update                                                  */
3865 /* Returns:     Nil                                                         */
3866 /* Parameters:	fin(I) - pointer to packet information			    */
3867 /*		nat(I) - pointer to NAT structure			    */
3868 /*              np(I)     - pointer to NAT rule                             */
3869 /* Locks:	nat_lock						    */
3870 /*                                                                          */
3871 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3872 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3873 /* ------------------------------------------------------------------------ */
3874 void nat_update(fin, nat, np)
3875 fr_info_t *fin;
3876 nat_t *nat;
3877 ipnat_t *np;
3878 {
3879 	ipftq_t *ifq, *ifq2;
3880 	ipftqent_t *tqe;
3881 	ipf_stack_t *ifs = fin->fin_ifs;
3882 
3883 	tqe = &nat->nat_tqe;
3884 	ifq = tqe->tqe_ifq;
3885 
3886 	/*
3887 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3888 	 * TCP, however, if it is TCP and there is no rule timeout set,
3889 	 * then do not update the timeout here.
3890 	 */
3891 	if (np != NULL)
3892 		ifq2 = np->in_tqehead[fin->fin_rev];
3893 	else
3894 		ifq2 = NULL;
3895 
3896 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3897 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3898 	} else {
3899 		if (ifq2 == NULL) {
3900 			if (nat->nat_p == IPPROTO_UDP)
3901 				ifq2 = &ifs->ifs_nat_udptq;
3902 			else if (nat->nat_p == IPPROTO_ICMP)
3903 				ifq2 = &ifs->ifs_nat_icmptq;
3904 			else
3905 				ifq2 = &ifs->ifs_nat_iptq;
3906 		}
3907 
3908 		fr_movequeue(tqe, ifq, ifq2, ifs);
3909 	}
3910 }
3911 
3912 
3913 /* ------------------------------------------------------------------------ */
3914 /* Function:    fr_checknatout                                              */
3915 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3916 /*                     0 == no packet translation occurred,                 */
3917 /*                     1 == packet was successfully translated.             */
3918 /* Parameters:  fin(I)   - pointer to packet information                    */
3919 /*              passp(I) - pointer to filtering result flags                */
3920 /*                                                                          */
3921 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3922 /* first checked to see if they match an existing entry (if an error),      */
3923 /* otherwise a search of the current NAT table is made.  If neither results */
3924 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3925 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3926 /* packet header(s) as required.                                            */
3927 /* ------------------------------------------------------------------------ */
3928 int fr_checknatout(fin, passp)
3929 fr_info_t *fin;
3930 u_32_t *passp;
3931 {
3932 	ipnat_t *np = NULL, *npnext;
3933 	struct ifnet *ifp, *sifp;
3934 	icmphdr_t *icmp = NULL;
3935 	tcphdr_t *tcp = NULL;
3936 	int rval, natfailed;
3937 	u_int nflags = 0;
3938 	u_32_t ipa, iph;
3939 	int natadd = 1;
3940 	frentry_t *fr;
3941 	nat_t *nat;
3942 	ipf_stack_t *ifs = fin->fin_ifs;
3943 
3944 	if (ifs->ifs_fr_nat_lock != 0)
3945 		return 0;
3946 	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
3947 		return 0;
3948 
3949 	natfailed = 0;
3950 	fr = fin->fin_fr;
3951 	sifp = fin->fin_ifp;
3952 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3953 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3954 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3955 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3956 	ifp = fin->fin_ifp;
3957 
3958 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3959 		switch (fin->fin_p)
3960 		{
3961 		case IPPROTO_TCP :
3962 			nflags = IPN_TCP;
3963 			break;
3964 		case IPPROTO_UDP :
3965 			nflags = IPN_UDP;
3966 			break;
3967 		case IPPROTO_ICMP :
3968 			icmp = fin->fin_dp;
3969 
3970 			/*
3971 			 * This is an incoming packet, so the destination is
3972 			 * the icmp_id and the source port equals 0
3973 			 */
3974 			if (nat_icmpquerytype4(icmp->icmp_type))
3975 				nflags = IPN_ICMPQUERY;
3976 			break;
3977 		default :
3978 			break;
3979 		}
3980 
3981 		if ((nflags & IPN_TCPUDP))
3982 			tcp = fin->fin_dp;
3983 	}
3984 
3985 	ipa = fin->fin_saddr;
3986 
3987 	READ_ENTER(&ifs->ifs_ipf_nat);
3988 
3989 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3990 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3991 		/*EMPTY*/;
3992 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3993 		natadd = 0;
3994 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3995 				      fin->fin_src, fin->fin_dst))) {
3996 		nflags = nat->nat_flags;
3997 	} else {
3998 		u_32_t hv, msk, nmsk;
3999 
4000 		/*
4001 		 * There is no current entry in the nat table for this packet.
4002 		 *
4003 		 * If the packet is a fragment, but not the first fragment,
4004 		 * then don't do anything.  Otherwise, if there is a matching
4005 		 * nat rule, try to create a new nat entry.
4006 		 */
4007 		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
4008 			goto nonatfrag;
4009 
4010 		msk = 0xffffffff;
4011 		nmsk = ifs->ifs_nat_masks;
4012 maskloop:
4013 		iph = ipa & htonl(msk);
4014 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
4015 		for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
4016 			npnext = np->in_mnext;
4017 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
4018 				continue;
4019 			if (np->in_v != fin->fin_v)
4020 				continue;
4021 			if (np->in_p && (np->in_p != fin->fin_p))
4022 				continue;
4023 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4024 				continue;
4025 			if (np->in_flags & IPN_FILTER) {
4026 				if (!nat_match(fin, np))
4027 					continue;
4028 			} else if ((ipa & np->in_inmsk) != np->in_inip)
4029 				continue;
4030 
4031 			if ((fr != NULL) &&
4032 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
4033 				continue;
4034 
4035 			if (*np->in_plabel != '\0') {
4036 				if (((np->in_flags & IPN_FILTER) == 0) &&
4037 				    (np->in_dport != tcp->th_dport))
4038 					continue;
4039 				if (appr_ok(fin, tcp, np) == 0)
4040 					continue;
4041 			}
4042 
4043 			ATOMIC_INC32(np->in_use);
4044 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4045 			WRITE_ENTER(&ifs->ifs_ipf_nat);
4046 			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
4047 			if (nat != NULL) {
4048 				np->in_use--;
4049 				np->in_hits++;
4050 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4051 				break;
4052 			}
4053 			natfailed = -1;
4054 			npnext = np->in_mnext;
4055 			fr_ipnatderef(&np, ifs);
4056 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4057 		}
4058 		if ((np == NULL) && (nmsk != 0)) {
4059 			while (nmsk) {
4060 				msk <<= 1;
4061 				if (nmsk & 0x80000000)
4062 					break;
4063 				nmsk <<= 1;
4064 			}
4065 			if (nmsk != 0) {
4066 				nmsk <<= 1;
4067 				goto maskloop;
4068 			}
4069 		}
4070 	}
4071 
4072 nonatfrag:
4073 	if (nat != NULL) {
4074 		rval = fr_natout(fin, nat, natadd, nflags);
4075 		if (rval == 1) {
4076 			MUTEX_ENTER(&nat->nat_lock);
4077 			nat_update(fin, nat, nat->nat_ptr);
4078 			nat->nat_bytes[1] += fin->fin_plen;
4079 			nat->nat_pkts[1]++;
4080 			nat->nat_ref++;
4081 			MUTEX_EXIT(&nat->nat_lock);
4082 			fin->fin_nat = nat;
4083 		}
4084 	} else
4085 		rval = natfailed;
4086 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4087 
4088 	if (rval == -1) {
4089 		if (passp != NULL)
4090 			*passp = FR_BLOCK;
4091 		fin->fin_flx |= FI_BADNAT;
4092 	}
4093 	fin->fin_ifp = sifp;
4094 	return rval;
4095 }
4096 
4097 /* ------------------------------------------------------------------------ */
4098 /* Function:    fr_natout                                                   */
4099 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4100 /*                     1 == packet was successfully translated.             */
4101 /* Parameters:  fin(I)    - pointer to packet information                   */
4102 /*              nat(I)    - pointer to NAT structure                        */
4103 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4104 /*              nflags(I) - NAT flags set for this packet                   */
4105 /*                                                                          */
4106 /* Translate a packet coming "out" on an interface.                         */
4107 /* ------------------------------------------------------------------------ */
4108 int fr_natout(fin, nat, natadd, nflags)
4109 fr_info_t *fin;
4110 nat_t *nat;
4111 int natadd;
4112 u_32_t nflags;
4113 {
4114 	icmphdr_t *icmp;
4115 	u_short *csump;
4116 	u_32_t sumd;
4117 	tcphdr_t *tcp;
4118 	ipnat_t *np;
4119 	int i;
4120 	ipf_stack_t *ifs = fin->fin_ifs;
4121 
4122 	if (fin->fin_v == 6) {
4123 #ifdef	USE_INET6
4124 		return fr_nat6out(fin, nat, natadd, nflags);
4125 #else
4126 		return NULL;
4127 #endif
4128 	}
4129 
4130 #if SOLARIS && defined(_KERNEL)
4131 	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4132 #endif
4133 
4134 	tcp = NULL;
4135 	icmp = NULL;
4136 	csump = NULL;
4137 	np = nat->nat_ptr;
4138 
4139 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4140 		(void) fr_nat_newfrag(fin, 0, nat);
4141 
4142 	/*
4143 	 * Fix up checksums, not by recalculating them, but
4144 	 * simply computing adjustments.
4145 	 * This is only done for STREAMS based IP implementations where the
4146 	 * checksum has already been calculated by IP.  In all other cases,
4147 	 * IPFilter is called before the checksum needs calculating so there
4148 	 * is no call to modify whatever is in the header now.
4149 	 */
4150 	ASSERT(fin->fin_m != NULL);
4151 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4152 		if (nflags == IPN_ICMPERR) {
4153 			u_32_t s1, s2;
4154 
4155 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
4156 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4157 			CALC_SUMD(s1, s2, sumd);
4158 
4159 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4160 		}
4161 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4162     defined(linux) || defined(BRIDGE_IPF)
4163 		else {
4164 			/*
4165 			 * Strictly speaking, this isn't necessary on BSD
4166 			 * kernels because they do checksum calculation after
4167 			 * this code has run BUT if ipfilter is being used
4168 			 * to do NAT as a bridge, that code doesn't exist.
4169 			 */
4170 			if (nat->nat_dir == NAT_OUTBOUND)
4171 				fix_outcksum(&fin->fin_ip->ip_sum,
4172 					    nat->nat_ipsumd);
4173 			else
4174 				fix_incksum(&fin->fin_ip->ip_sum,
4175 				 	   nat->nat_ipsumd);
4176 		}
4177 #endif
4178 	}
4179 
4180 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4181 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4182 			tcp = fin->fin_dp;
4183 
4184 			tcp->th_sport = nat->nat_outport;
4185 			fin->fin_data[0] = ntohs(nat->nat_outport);
4186 		}
4187 
4188 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4189 			icmp = fin->fin_dp;
4190 			icmp->icmp_id = nat->nat_outport;
4191 		}
4192 
4193 		csump = nat_proto(fin, nat, nflags);
4194 	}
4195 
4196 	fin->fin_ip->ip_src = nat->nat_outip;
4197 
4198 	/*
4199 	 * The above comments do not hold for layer 4 (or higher) checksums...
4200 	 */
4201 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4202 		if (nflags & IPN_TCPUDP &&
4203 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4204 			sumd = nat->nat_sumd[1];
4205 		else
4206 			sumd = nat->nat_sumd[0];
4207 
4208 		if (nat->nat_dir == NAT_OUTBOUND)
4209 			fix_outcksum(csump, sumd);
4210 		else
4211 			fix_incksum(csump, sumd);
4212 	}
4213 #ifdef	IPFILTER_SYNC
4214 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4215 #endif
4216 	/* ------------------------------------------------------------- */
4217 	/* A few quick notes:						 */
4218 	/*	Following are test conditions prior to calling the 	 */
4219 	/*	appr_check routine.					 */
4220 	/*								 */
4221 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4222 	/*	with a redirect rule, we attempt to match the packet's	 */
4223 	/*	source port against in_dport, otherwise	we'd compare the */
4224 	/*	packet's destination.			 		 */
4225 	/* ------------------------------------------------------------- */
4226 	if ((np != NULL) && (np->in_apr != NULL)) {
4227 		i = appr_check(fin, nat);
4228 		if (i == 0)
4229 			i = 1;
4230 	} else
4231 		i = 1;
4232 	ifs->ifs_nat_stats.ns_mapped[1]++;
4233 	fin->fin_flx |= FI_NATED;
4234 	return i;
4235 }
4236 
4237 
4238 /* ------------------------------------------------------------------------ */
4239 /* Function:    fr_checknatin                                               */
4240 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4241 /*                     0 == no packet translation occurred,                 */
4242 /*                     1 == packet was successfully translated.             */
4243 /* Parameters:  fin(I)   - pointer to packet information                    */
4244 /*              passp(I) - pointer to filtering result flags                */
4245 /*                                                                          */
4246 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4247 /* first checked to see if they match an existing entry (if an error),      */
4248 /* otherwise a search of the current NAT table is made.  If neither results */
4249 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4250 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4251 /* packet header(s) as required.                                            */
4252 /* ------------------------------------------------------------------------ */
4253 int fr_checknatin(fin, passp)
4254 fr_info_t *fin;
4255 u_32_t *passp;
4256 {
4257 	u_int nflags, natadd;
4258 	ipnat_t *np, *npnext;
4259 	int rval, natfailed;
4260 	struct ifnet *ifp;
4261 	struct in_addr in;
4262 	icmphdr_t *icmp;
4263 	tcphdr_t *tcp;
4264 	u_short dport;
4265 	nat_t *nat;
4266 	u_32_t iph;
4267 	ipf_stack_t *ifs = fin->fin_ifs;
4268 
4269 	if (ifs->ifs_fr_nat_lock != 0)
4270 		return 0;
4271 	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
4272 		return 0;
4273 
4274 	tcp = NULL;
4275 	icmp = NULL;
4276 	dport = 0;
4277 	natadd = 1;
4278 	nflags = 0;
4279 	natfailed = 0;
4280 	ifp = fin->fin_ifp;
4281 
4282 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4283 		switch (fin->fin_p)
4284 		{
4285 		case IPPROTO_TCP :
4286 			nflags = IPN_TCP;
4287 			break;
4288 		case IPPROTO_UDP :
4289 			nflags = IPN_UDP;
4290 			break;
4291 		case IPPROTO_ICMP :
4292 			icmp = fin->fin_dp;
4293 
4294 			/*
4295 			 * This is an incoming packet, so the destination is
4296 			 * the icmp_id and the source port equals 0
4297 			 */
4298 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4299 				nflags = IPN_ICMPQUERY;
4300 				dport = icmp->icmp_id;
4301 			} break;
4302 		default :
4303 			break;
4304 		}
4305 
4306 		if ((nflags & IPN_TCPUDP)) {
4307 			tcp = fin->fin_dp;
4308 			dport = tcp->th_dport;
4309 		}
4310 	}
4311 
4312 	in = fin->fin_dst;
4313 
4314 	READ_ENTER(&ifs->ifs_ipf_nat);
4315 
4316 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4317 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4318 		/*EMPTY*/;
4319 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4320 		natadd = 0;
4321 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4322 				     fin->fin_src, in))) {
4323 		nflags = nat->nat_flags;
4324 	} else {
4325 		u_32_t hv, msk, rmsk;
4326 
4327 		/*
4328 		 * There is no current entry in the nat table for this packet.
4329 		 *
4330 		 * If the packet is a fragment, but not the first fragment,
4331 		 * then don't do anything.  Otherwise, if there is a matching
4332 		 * nat rule, try to create a new nat entry.
4333 		 */
4334 		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
4335 			goto nonatfrag;
4336 
4337 		rmsk = ifs->ifs_rdr_masks;
4338 		msk = 0xffffffff;
4339 maskloop:
4340 		iph = in.s_addr & htonl(msk);
4341 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4342 		for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4343 			npnext = np->in_rnext;
4344 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4345 				continue;
4346 			if (np->in_v != fin->fin_v)
4347 				continue;
4348 			if (np->in_p && (np->in_p != fin->fin_p))
4349 				continue;
4350 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4351 				continue;
4352 			if (np->in_flags & IPN_FILTER) {
4353 				if (!nat_match(fin, np))
4354 					continue;
4355 			} else {
4356 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4357 					continue;
4358 				if (np->in_pmin &&
4359 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4360 				     (ntohs(dport) < ntohs(np->in_pmin))))
4361 					continue;
4362 			}
4363 
4364 			if (*np->in_plabel != '\0') {
4365 				if (!appr_ok(fin, tcp, np)) {
4366 					continue;
4367 				}
4368 			}
4369 
4370 			ATOMIC_INC32(np->in_use);
4371 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4372 			WRITE_ENTER(&ifs->ifs_ipf_nat);
4373 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4374 			if (nat != NULL) {
4375 				np->in_use--;
4376 				np->in_hits++;
4377 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4378 				break;
4379 			}
4380 			natfailed = -1;
4381 			npnext = np->in_rnext;
4382 			fr_ipnatderef(&np, ifs);
4383 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4384 		}
4385 
4386 		if ((np == NULL) && (rmsk != 0)) {
4387 			while (rmsk) {
4388 				msk <<= 1;
4389 				if (rmsk & 0x80000000)
4390 					break;
4391 				rmsk <<= 1;
4392 			}
4393 			if (rmsk != 0) {
4394 				rmsk <<= 1;
4395 				goto maskloop;
4396 			}
4397 		}
4398 	}
4399 
4400 nonatfrag:
4401 	if (nat != NULL) {
4402 		rval = fr_natin(fin, nat, natadd, nflags);
4403 		if (rval == 1) {
4404 			MUTEX_ENTER(&nat->nat_lock);
4405 			nat_update(fin, nat, nat->nat_ptr);
4406 			nat->nat_bytes[0] += fin->fin_plen;
4407 			nat->nat_pkts[0]++;
4408 			nat->nat_ref++;
4409 			MUTEX_EXIT(&nat->nat_lock);
4410 			fin->fin_nat = nat;
4411 			fin->fin_state = nat->nat_state;
4412 		}
4413 	} else
4414 		rval = natfailed;
4415 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4416 
4417 	if (rval == -1) {
4418 		if (passp != NULL)
4419 			*passp = FR_BLOCK;
4420 		fin->fin_flx |= FI_BADNAT;
4421 	}
4422 	return rval;
4423 }
4424 
4425 
4426 /* ------------------------------------------------------------------------ */
4427 /* Function:    fr_natin                                                    */
4428 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4429 /*                     1 == packet was successfully translated.             */
4430 /* Parameters:  fin(I)    - pointer to packet information                   */
4431 /*              nat(I)    - pointer to NAT structure                        */
4432 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4433 /*              nflags(I) - NAT flags set for this packet                   */
4434 /* Locks Held:  ipf_nat (READ)                                              */
4435 /*                                                                          */
4436 /* Translate a packet coming "in" on an interface.                          */
4437 /* ------------------------------------------------------------------------ */
4438 int fr_natin(fin, nat, natadd, nflags)
4439 fr_info_t *fin;
4440 nat_t *nat;
4441 int natadd;
4442 u_32_t nflags;
4443 {
4444 	icmphdr_t *icmp;
4445 	u_short *csump;
4446 	tcphdr_t *tcp;
4447 	ipnat_t *np;
4448 	int i;
4449 	ipf_stack_t *ifs = fin->fin_ifs;
4450 
4451 	if (fin->fin_v == 6) {
4452 #ifdef	USE_INET6
4453 		return fr_nat6in(fin, nat, natadd, nflags);
4454 #else
4455 		return NULL;
4456 #endif
4457 	}
4458 
4459 #if SOLARIS && defined(_KERNEL)
4460 	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4461 #endif
4462 
4463 	tcp = NULL;
4464 	csump = NULL;
4465 	np = nat->nat_ptr;
4466 	fin->fin_fr = nat->nat_fr;
4467 
4468 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4469 		(void) fr_nat_newfrag(fin, 0, nat);
4470 
4471 	if (np != NULL) {
4472 
4473 	/* ------------------------------------------------------------- */
4474 	/* A few quick notes:						 */
4475 	/*	Following are test conditions prior to calling the 	 */
4476 	/*	appr_check routine.					 */
4477 	/*								 */
4478 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4479 	/*	with a map rule, we attempt to match the packet's	 */
4480 	/*	source port against in_dport, otherwise	we'd compare the */
4481 	/*	packet's destination.			 		 */
4482 	/* ------------------------------------------------------------- */
4483 		if (np->in_apr != NULL) {
4484 			i = appr_check(fin, nat);
4485 			if (i == -1) {
4486 				return -1;
4487 			}
4488 		}
4489 	}
4490 
4491 #ifdef	IPFILTER_SYNC
4492 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4493 #endif
4494 
4495 	fin->fin_ip->ip_dst = nat->nat_inip;
4496 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4497 	if (nflags & IPN_TCPUDP)
4498 		tcp = fin->fin_dp;
4499 
4500 	/*
4501 	 * Fix up checksums, not by recalculating them, but
4502 	 * simply computing adjustments.
4503 	 * Why only do this for some platforms on inbound packets ?
4504 	 * Because for those that it is done, IP processing is yet to happen
4505 	 * and so the IPv4 header checksum has not yet been evaluated.
4506 	 * Perhaps it should always be done for the benefit of things like
4507 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4508 	 * header checksum offloading, perhaps it is a moot point.
4509 	 */
4510 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4511      defined(__osf__) || defined(linux)
4512 	if (nat->nat_dir == NAT_OUTBOUND)
4513 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4514 	else
4515 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4516 #endif
4517 
4518 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4519 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4520 			tcp->th_dport = nat->nat_inport;
4521 			fin->fin_data[1] = ntohs(nat->nat_inport);
4522 		}
4523 
4524 
4525 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4526 			icmp = fin->fin_dp;
4527 
4528 			icmp->icmp_id = nat->nat_inport;
4529 		}
4530 
4531 		csump = nat_proto(fin, nat, nflags);
4532 	}
4533 
4534 	/*
4535 	 * In case they are being forwarded, inbound packets always need to have
4536 	 * their checksum adjusted even if hardware checksum validation said OK.
4537 	 */
4538 	if (csump != NULL) {
4539 		if (nat->nat_dir == NAT_OUTBOUND)
4540 			fix_incksum(csump, nat->nat_sumd[0]);
4541 		else
4542 			fix_outcksum(csump, nat->nat_sumd[0]);
4543 	}
4544 
4545 #if SOLARIS && defined(_KERNEL)
4546 	if (nflags & IPN_TCPUDP &&
4547 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4548 		/*
4549 		 * Need to adjust the partial checksum result stored in
4550 		 * db_cksum16, which will be used for validation in IP.
4551 		 * See IP_CKSUM_RECV().
4552 		 * Adjustment data should be the inverse of the IP address
4553 		 * changes, because db_cksum16 is supposed to be the complement
4554 		 * of the pesudo header.
4555 		 */
4556 		csump = &fin->fin_m->b_datap->db_cksum16;
4557 		if (nat->nat_dir == NAT_OUTBOUND)
4558 			fix_outcksum(csump, nat->nat_sumd[1]);
4559 		else
4560 			fix_incksum(csump, nat->nat_sumd[1]);
4561 	}
4562 #endif
4563 
4564 	ifs->ifs_nat_stats.ns_mapped[0]++;
4565 	fin->fin_flx |= FI_NATED;
4566 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4567 		fin->fin_nattag = &np->in_tag;
4568 	return 1;
4569 }
4570 
4571 
4572 /* ------------------------------------------------------------------------ */
4573 /* Function:    nat_proto                                                   */
4574 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4575 /*                         NULL if the transport protocol is not recognised */
4576 /*                         as needing a checksum update.                    */
4577 /* Parameters:  fin(I)    - pointer to packet information                   */
4578 /*              nat(I)    - pointer to NAT structure                        */
4579 /*              nflags(I) - NAT flags set for this packet                   */
4580 /*                                                                          */
4581 /* Return the pointer to the checksum field for each protocol so understood.*/
4582 /* If support for making other changes to a protocol header is required,    */
4583 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4584 /* TCP down to a specific value, then do it from here.                      */
4585 /* ------------------------------------------------------------------------ */
4586 u_short *nat_proto(fin, nat, nflags)
4587 fr_info_t *fin;
4588 nat_t *nat;
4589 u_int nflags;
4590 {
4591 	icmphdr_t *icmp;
4592 	struct icmp6_hdr *icmp6;
4593 	u_short *csump;
4594 	tcphdr_t *tcp;
4595 	udphdr_t *udp;
4596 
4597 	csump = NULL;
4598 	if (fin->fin_out == 0) {
4599 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4600 	} else {
4601 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4602 	}
4603 
4604 	switch (fin->fin_p)
4605 	{
4606 	case IPPROTO_TCP :
4607 		tcp = fin->fin_dp;
4608 
4609 		csump = &tcp->th_sum;
4610 
4611 		/*
4612 		 * Do a MSS CLAMPING on a SYN packet,
4613 		 * only deal IPv4 for now.
4614 		 */
4615 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4616 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4617 
4618 		break;
4619 
4620 	case IPPROTO_UDP :
4621 		udp = fin->fin_dp;
4622 
4623 		if (udp->uh_sum)
4624 			csump = &udp->uh_sum;
4625 		break;
4626 
4627 	case IPPROTO_ICMP :
4628 		icmp = fin->fin_dp;
4629 
4630 		if ((nflags & IPN_ICMPQUERY) != 0) {
4631 			if (icmp->icmp_cksum != 0)
4632 				csump = &icmp->icmp_cksum;
4633 		}
4634 		break;
4635 
4636 	case IPPROTO_ICMPV6 :
4637 		icmp6 = fin->fin_dp;
4638 
4639 		if ((nflags & IPN_ICMPQUERY) != 0) {
4640 			if (icmp6->icmp6_cksum != 0)
4641 				csump = &icmp6->icmp6_cksum;
4642 		}
4643 		break;
4644 	}
4645 	return csump;
4646 }
4647 
4648 
4649 /* ------------------------------------------------------------------------ */
4650 /* Function:    fr_natunload                                                */
4651 /* Returns:     Nil                                                         */
4652 /* Parameters:  Nil                                                         */
4653 /*                                                                          */
4654 /* Free all memory used by NAT structures allocated at runtime.             */
4655 /* ------------------------------------------------------------------------ */
4656 void fr_natunload(ifs)
4657 ipf_stack_t *ifs;
4658 {
4659 	ipftq_t *ifq, *ifqnext;
4660 
4661 	(void) nat_clearlist(ifs);
4662 	(void) nat_flushtable(ifs);
4663 
4664 	/*
4665 	 * Proxy timeout queues are not cleaned here because although they
4666 	 * exist on the NAT list, appr_unload is called after fr_natunload
4667 	 * and the proxies actually are responsible for them being created.
4668 	 * Should the proxy timeouts have their own list?  There's no real
4669 	 * justification as this is the only complication.
4670 	 */
4671 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4672 		ifqnext = ifq->ifq_next;
4673 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4674 		    (fr_deletetimeoutqueue(ifq) == 0))
4675 			fr_freetimeoutqueue(ifq, ifs);
4676 	}
4677 
4678 	if (ifs->ifs_nat_table[0] != NULL) {
4679 		KFREES(ifs->ifs_nat_table[0],
4680 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4681 		ifs->ifs_nat_table[0] = NULL;
4682 	}
4683 	if (ifs->ifs_nat_table[1] != NULL) {
4684 		KFREES(ifs->ifs_nat_table[1],
4685 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4686 		ifs->ifs_nat_table[1] = NULL;
4687 	}
4688 	if (ifs->ifs_nat_rules != NULL) {
4689 		KFREES(ifs->ifs_nat_rules,
4690 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4691 		ifs->ifs_nat_rules = NULL;
4692 	}
4693 	if (ifs->ifs_rdr_rules != NULL) {
4694 		KFREES(ifs->ifs_rdr_rules,
4695 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4696 		ifs->ifs_rdr_rules = NULL;
4697 	}
4698 	if (ifs->ifs_maptable != NULL) {
4699 		KFREES(ifs->ifs_maptable,
4700 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4701 		ifs->ifs_maptable = NULL;
4702 	}
4703 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4704 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4705 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4706 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4707 	}
4708 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4709 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4710 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4711 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4712 	}
4713 
4714 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4715 		ifs->ifs_fr_nat_maxbucket = 0;
4716 
4717 	if (ifs->ifs_fr_nat_init == 1) {
4718 		ifs->ifs_fr_nat_init = 0;
4719 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4720 
4721 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4722 		RW_DESTROY(&ifs->ifs_ipf_nat);
4723 
4724 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4725 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4726 
4727 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4728 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4729 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4730 	}
4731 }
4732 
4733 
4734 /* ------------------------------------------------------------------------ */
4735 /* Function:    fr_natexpire                                                */
4736 /* Returns:     Nil                                                         */
4737 /* Parameters:  Nil                                                         */
4738 /*                                                                          */
4739 /* Check all of the timeout queues for entries at the top which need to be  */
4740 /* expired.                                                                 */
4741 /* ------------------------------------------------------------------------ */
4742 void fr_natexpire(ifs)
4743 ipf_stack_t *ifs;
4744 {
4745 	ipftq_t *ifq, *ifqnext;
4746 	ipftqent_t *tqe, *tqn;
4747 	int i;
4748 	SPL_INT(s);
4749 
4750 	SPL_NET(s);
4751 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4752 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4753 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4754 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4755 				break;
4756 			tqn = tqe->tqe_next;
4757 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4758 		}
4759 	}
4760 
4761 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4762 		ifqnext = ifq->ifq_next;
4763 
4764 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4765 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4766 				break;
4767 			tqn = tqe->tqe_next;
4768 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4769 		}
4770 	}
4771 
4772 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4773 		ifqnext = ifq->ifq_next;
4774 
4775 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4776 		    (ifq->ifq_ref == 0)) {
4777 			fr_freetimeoutqueue(ifq, ifs);
4778 		}
4779 	}
4780 
4781 	if (ifs->ifs_nat_doflush != 0) {
4782 		(void) nat_extraflush(2, ifs);
4783 		ifs->ifs_nat_doflush = 0;
4784 	}
4785 
4786 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4787 	SPL_X(s);
4788 }
4789 
4790 
4791 /* ------------------------------------------------------------------------ */
4792 /* Function:    fr_nataddrsync                                              */
4793 /* Returns:     Nil                                                         */
4794 /* Parameters:  ifp(I) -  pointer to network interface                      */
4795 /*              addr(I) - pointer to new network address                    */
4796 /*                                                                          */
4797 /* Walk through all of the currently active NAT sessions, looking for those */
4798 /* which need to have their translated address updated (where the interface */
4799 /* matches the one passed in) and change it, recalculating the checksum sum */
4800 /* difference too.                                                          */
4801 /* ------------------------------------------------------------------------ */
4802 void fr_nataddrsync(v, ifp, addr, ifs)
4803 int v;
4804 void *ifp;
4805 void *addr;
4806 ipf_stack_t *ifs;
4807 {
4808 	u_32_t sum1, sum2, sumd;
4809 	nat_t *nat;
4810 	ipnat_t *np;
4811 	SPL_INT(s);
4812 
4813 	if (ifs->ifs_fr_running <= 0)
4814 		return;
4815 
4816 	SPL_NET(s);
4817 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4818 
4819 	if (ifs->ifs_fr_running <= 0) {
4820 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4821 		return;
4822 	}
4823 
4824 	/*
4825 	 * Change IP addresses for NAT sessions for any protocol except TCP
4826 	 * since it will break the TCP connection anyway.  The only rules
4827 	 * which will get changed are those which are "map ... -> 0/32",
4828 	 * where the rule specifies the address is taken from the interface.
4829 	 */
4830 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4831 		if (addr != NULL) {
4832 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4833 			    ((nat->nat_flags & IPN_TCP) != 0))
4834 				continue;
4835 			if ((np = nat->nat_ptr) == NULL)
4836 				continue;
4837 			if (v == 4 && np->in_v == 4) {
4838 				if (np->in_nip || np->in_outmsk != 0xffffffff)
4839 					continue;
4840 				/*
4841 				 * Change the map-to address to be the same as
4842 				 * the new one.
4843 				 */
4844 				sum1 = nat->nat_outip.s_addr;
4845 				nat->nat_outip = *(struct in_addr *)addr;
4846 				sum2 = nat->nat_outip.s_addr;
4847 			} else if (v == 6 && np->in_v == 6) {
4848 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4849 				    !IP6_ISONES(&np->in_out[1].in6))
4850 					continue;
4851 				/*
4852 				 * Change the map-to address to be the same as
4853 				 * the new one.
4854 				 */
4855 				nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4856 			} else
4857 				continue;
4858 
4859 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4860 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4861 			if (np->in_v == 4 && (v == 4 || v == 0)) {
4862 				struct in_addr in;
4863 				if (np->in_outmsk != 0xffffffff || np->in_nip)
4864 					continue;
4865 				/*
4866 				 * Change the map-to address to be the same as
4867 				 * the new one.
4868 				 */
4869 				sum1 = nat->nat_outip.s_addr;
4870 				if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4871 					       &in, NULL, ifs) != -1)
4872 					nat->nat_outip = in;
4873 				sum2 = nat->nat_outip.s_addr;
4874 			} else if (np->in_v == 6 && (v == 6 || v == 0)) {
4875 				struct in6_addr in6;
4876 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4877 				    !IP6_ISONES(&np->in_out[1].in6))
4878 					continue;
4879 				/*
4880 				 * Change the map-to address to be the same as
4881 				 * the new one.
4882 				 */
4883 				if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4884 					       (void *)&in6, NULL, ifs) != -1)
4885 					nat->nat_outip6.in6 = in6;
4886 			} else
4887 				continue;
4888 		} else {
4889 			continue;
4890 		}
4891 
4892 		if (sum1 == sum2)
4893 			continue;
4894 		/*
4895 		 * Readjust the checksum adjustment to take into
4896 		 * account the new IP#.
4897 		 */
4898 		CALC_SUMD(sum1, sum2, sumd);
4899 		/* XXX - dont change for TCP when solaris does
4900 		 * hardware checksumming.
4901 		 */
4902 		sumd += nat->nat_sumd[0];
4903 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4904 		nat->nat_sumd[1] = nat->nat_sumd[0];
4905 	}
4906 
4907 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4908 	SPL_X(s);
4909 }
4910 
4911 
4912 /* ------------------------------------------------------------------------ */
4913 /* Function:    fr_natifpsync                                               */
4914 /* Returns:     Nil                                                         */
4915 /* Parameters:  action(I) - how we are syncing                              */
4916 /*              ifp(I)    - pointer to network interface                    */
4917 /*              name(I)   - name of interface to sync to                    */
4918 /*                                                                          */
4919 /* This function is used to resync the mapping of interface names and their */
4920 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4921 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4922 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4923 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4924 /* there is no longer any interface associated with it.                     */
4925 /* ------------------------------------------------------------------------ */
4926 void fr_natifpsync(action, v, ifp, name, ifs)
4927 int action, v;
4928 void *ifp;
4929 char *name;
4930 ipf_stack_t *ifs;
4931 {
4932 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4933 	int s;
4934 #endif
4935 	nat_t *nat;
4936 	ipnat_t *n;
4937 	int nv;
4938 
4939 	if (ifs->ifs_fr_running <= 0)
4940 		return;
4941 
4942 	SPL_NET(s);
4943 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4944 
4945 	if (ifs->ifs_fr_running <= 0) {
4946 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4947 		return;
4948 	}
4949 
4950 	switch (action)
4951 	{
4952 	case IPFSYNC_RESYNC :
4953 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4954 			nv = (v == 0) ? nat->nat_v : v;
4955 			if (nat->nat_v != nv)
4956 				continue;
4957 			if ((ifp == nat->nat_ifps[0]) ||
4958 			    (nat->nat_ifps[0] == (void *)-1)) {
4959 				nat->nat_ifps[0] =
4960 				    fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4961 			}
4962 
4963 			if ((ifp == nat->nat_ifps[1]) ||
4964 			    (nat->nat_ifps[1] == (void *)-1)) {
4965 				nat->nat_ifps[1] =
4966 				    fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4967 			}
4968 		}
4969 
4970 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4971 			nv = (v == 0) ? (int)n->in_v : v;
4972 			if ((int)n->in_v != nv)
4973 				continue;
4974 			if (n->in_ifps[0] == ifp ||
4975 			    n->in_ifps[0] == (void *)-1) {
4976 				n->in_ifps[0] =
4977 				    fr_resolvenic(n->in_ifnames[0], nv, ifs);
4978 			}
4979 			if (n->in_ifps[1] == ifp ||
4980 			    n->in_ifps[1] == (void *)-1) {
4981 				n->in_ifps[1] =
4982 				    fr_resolvenic(n->in_ifnames[1], nv, ifs);
4983 			}
4984 		}
4985 		break;
4986 	case IPFSYNC_NEWIFP :
4987 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4988 			if (nat->nat_v != v)
4989 				continue;
4990 			if (!strncmp(name, nat->nat_ifnames[0],
4991 				     sizeof(nat->nat_ifnames[0])))
4992 				nat->nat_ifps[0] = ifp;
4993 			if (!strncmp(name, nat->nat_ifnames[1],
4994 				     sizeof(nat->nat_ifnames[1])))
4995 				nat->nat_ifps[1] = ifp;
4996 		}
4997 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4998 			if ((int)n->in_v != v)
4999 				continue;
5000 			if (!strncmp(name, n->in_ifnames[0],
5001 				     sizeof(n->in_ifnames[0])))
5002 				n->in_ifps[0] = ifp;
5003 			if (!strncmp(name, n->in_ifnames[1],
5004 				     sizeof(n->in_ifnames[1])))
5005 				n->in_ifps[1] = ifp;
5006 		}
5007 		break;
5008 	case IPFSYNC_OLDIFP :
5009 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
5010 			if (nat->nat_v != v)
5011 				continue;
5012 			if (ifp == nat->nat_ifps[0])
5013 				nat->nat_ifps[0] = (void *)-1;
5014 			if (ifp == nat->nat_ifps[1])
5015 				nat->nat_ifps[1] = (void *)-1;
5016 		}
5017 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
5018 			if ((int)n->in_v != v)
5019 				continue;
5020 			if (n->in_ifps[0] == ifp)
5021 				n->in_ifps[0] = (void *)-1;
5022 			if (n->in_ifps[1] == ifp)
5023 				n->in_ifps[1] = (void *)-1;
5024 		}
5025 		break;
5026 	}
5027 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5028 	SPL_X(s);
5029 }
5030 
5031 
5032 /* ------------------------------------------------------------------------ */
5033 /* Function:    nat_icmpquerytype4                                          */
5034 /* Returns:     int - 1 == success, 0 == failure                            */
5035 /* Parameters:  icmptype(I) - ICMP type number                              */
5036 /*                                                                          */
5037 /* Tests to see if the ICMP type number passed is a query/response type or  */
5038 /* not.                                                                     */
5039 /* ------------------------------------------------------------------------ */
5040 static INLINE int nat_icmpquerytype4(icmptype)
5041 int icmptype;
5042 {
5043 
5044 	/*
5045 	 * For the ICMP query NAT code, it is essential that both the query
5046 	 * and the reply match on the NAT rule. Because the NAT structure
5047 	 * does not keep track of the icmptype, and a single NAT structure
5048 	 * is used for all icmp types with the same src, dest and id, we
5049 	 * simply define the replies as queries as well. The funny thing is,
5050 	 * altough it seems silly to call a reply a query, this is exactly
5051 	 * as it is defined in the IPv4 specification
5052 	 */
5053 
5054 	switch (icmptype)
5055 	{
5056 
5057 	case ICMP_ECHOREPLY:
5058 	case ICMP_ECHO:
5059 	/* route aedvertisement/solliciation is currently unsupported: */
5060 	/* it would require rewriting the ICMP data section            */
5061 	case ICMP_TSTAMP:
5062 	case ICMP_TSTAMPREPLY:
5063 	case ICMP_IREQ:
5064 	case ICMP_IREQREPLY:
5065 	case ICMP_MASKREQ:
5066 	case ICMP_MASKREPLY:
5067 		return 1;
5068 	default:
5069 		return 0;
5070 	}
5071 }
5072 
5073 
5074 /* ------------------------------------------------------------------------ */
5075 /* Function:    nat_log                                                     */
5076 /* Returns:     Nil                                                         */
5077 /* Parameters:  nat(I)  - pointer to NAT structure                          */
5078 /*              type(I) - type of log entry to create                       */
5079 /*                                                                          */
5080 /* Creates a NAT log entry.                                                 */
5081 /* ------------------------------------------------------------------------ */
5082 void nat_log(nat, type, ifs)
5083 struct nat *nat;
5084 u_int type;
5085 ipf_stack_t *ifs;
5086 {
5087 #ifdef	IPFILTER_LOG
5088 # ifndef LARGE_NAT
5089 	struct ipnat *np;
5090 	int rulen;
5091 # endif
5092 	struct natlog natl;
5093 	void *items[1];
5094 	size_t sizes[1];
5095 	int types[1];
5096 
5097 	natl.nlg_inip = nat->nat_inip6;
5098 	natl.nlg_outip = nat->nat_outip6;
5099 	natl.nlg_origip = nat->nat_oip6;
5100 	natl.nlg_bytes[0] = nat->nat_bytes[0];
5101 	natl.nlg_bytes[1] = nat->nat_bytes[1];
5102 	natl.nlg_pkts[0] = nat->nat_pkts[0];
5103 	natl.nlg_pkts[1] = nat->nat_pkts[1];
5104 	natl.nlg_origport = nat->nat_oport;
5105 	natl.nlg_inport = nat->nat_inport;
5106 	natl.nlg_outport = nat->nat_outport;
5107 	natl.nlg_p = nat->nat_p;
5108 	natl.nlg_type = type;
5109 	natl.nlg_rule = -1;
5110 	natl.nlg_v = nat->nat_v;
5111 # ifndef LARGE_NAT
5112 	if (nat->nat_ptr != NULL) {
5113 		for (rulen = 0, np = ifs->ifs_nat_list; np;
5114 		     np = np->in_next, rulen++)
5115 			if (np == nat->nat_ptr) {
5116 				natl.nlg_rule = rulen;
5117 				break;
5118 			}
5119 	}
5120 # endif
5121 	items[0] = &natl;
5122 	sizes[0] = sizeof(natl);
5123 	types[0] = 0;
5124 
5125 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5126 #endif
5127 }
5128 
5129 
5130 #if defined(__OpenBSD__)
5131 /* ------------------------------------------------------------------------ */
5132 /* Function:    nat_ifdetach                                                */
5133 /* Returns:     Nil                                                         */
5134 /* Parameters:  ifp(I) - pointer to network interface                       */
5135 /*                                                                          */
5136 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
5137 /* interface references within IPFilter.                                    */
5138 /* ------------------------------------------------------------------------ */
5139 void nat_ifdetach(ifp, ifs)
5140 void *ifp;
5141 ipf_stack_t *ifs;
5142 {
5143 	frsync(ifp, ifs);
5144 	return;
5145 }
5146 #endif
5147 
5148 
5149 /* ------------------------------------------------------------------------ */
5150 /* Function:    fr_ipnatderef                                               */
5151 /* Returns:     Nil                                                         */
5152 /* Parameters:  inp(I) - pointer to pointer to NAT rule                     */
5153 /* Write Locks: ipf_nat                                                     */
5154 /*                                                                          */
5155 /* ------------------------------------------------------------------------ */
5156 void fr_ipnatderef(inp, ifs)
5157 ipnat_t **inp;
5158 ipf_stack_t *ifs;
5159 {
5160 	ipnat_t *in;
5161 
5162 	in = *inp;
5163 	*inp = NULL;
5164 	in->in_use--;
5165 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5166 		if (in->in_apr)
5167 			appr_free(in->in_apr);
5168 		KFREE(in);
5169 		ifs->ifs_nat_stats.ns_rules--;
5170 #ifdef notdef
5171 #if SOLARIS
5172 		if (ifs->ifs_nat_stats.ns_rules == 0)
5173 			ifs->ifs_pfil_delayed_copy = 1;
5174 #endif
5175 #endif
5176 	}
5177 }
5178 
5179 
5180 /* ------------------------------------------------------------------------ */
5181 /* Function:    fr_natderef                                                 */
5182 /* Returns:     Nil                                                         */
5183 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
5184 /*                                                                          */
5185 /* Decrement the reference counter for this NAT table entry and free it if  */
5186 /* there are no more things using it.                                       */
5187 /*                                                                          */
5188 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5189 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
5190 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
5191 /* because nat_delete() will do that and send nat_ref to -1.                */
5192 /*                                                                          */
5193 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
5194 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
5195 /* ------------------------------------------------------------------------ */
5196 void fr_natderef(natp, ifs)
5197 nat_t **natp;
5198 ipf_stack_t *ifs;
5199 {
5200 	nat_t *nat;
5201 
5202 	nat = *natp;
5203 	*natp = NULL;
5204 
5205 	MUTEX_ENTER(&nat->nat_lock);
5206 	if (nat->nat_ref > 1) {
5207 		nat->nat_ref--;
5208 		MUTEX_EXIT(&nat->nat_lock);
5209 		return;
5210 	}
5211 	MUTEX_EXIT(&nat->nat_lock);
5212 
5213 	WRITE_ENTER(&ifs->ifs_ipf_nat);
5214 	nat_delete(nat, NL_EXPIRE, ifs);
5215 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5216 }
5217 
5218 
5219 /* ------------------------------------------------------------------------ */
5220 /* Function:    fr_natclone                                                 */
5221 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
5222 /*                           else pointer to new state structure            */
5223 /* Parameters:  fin(I) - pointer to packet information                      */
5224 /*              is(I)  - pointer to master state structure                  */
5225 /* Write Lock:  ipf_nat                                                     */
5226 /*                                                                          */
5227 /* Create a "duplcate" state table entry from the master.                   */
5228 /* ------------------------------------------------------------------------ */
5229 nat_t *fr_natclone(fin, nat)
5230 fr_info_t *fin;
5231 nat_t *nat;
5232 {
5233 	frentry_t *fr;
5234 	nat_t *clone;
5235 	ipnat_t *np;
5236 	ipf_stack_t *ifs = fin->fin_ifs;
5237 
5238 	KMALLOC(clone, nat_t *);
5239 	if (clone == NULL)
5240 		return NULL;
5241 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
5242 
5243 	MUTEX_NUKE(&clone->nat_lock);
5244 
5245 	clone->nat_aps = NULL;
5246 	/*
5247 	 * Initialize all these so that nat_delete() doesn't cause a crash.
5248 	 */
5249 	clone->nat_tqe.tqe_pnext = NULL;
5250 	clone->nat_tqe.tqe_next = NULL;
5251 	clone->nat_tqe.tqe_ifq = NULL;
5252 	clone->nat_tqe.tqe_parent = clone;
5253 
5254 	clone->nat_flags &= ~SI_CLONE;
5255 	clone->nat_flags |= SI_CLONED;
5256 
5257 	if (clone->nat_hm)
5258 		clone->nat_hm->hm_ref++;
5259 
5260 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5261 		KFREE(clone);
5262 		return NULL;
5263 	}
5264 	np = clone->nat_ptr;
5265 	if (np != NULL) {
5266 		if (ifs->ifs_nat_logging)
5267 			nat_log(clone, (u_int)np->in_redir, ifs);
5268 		np->in_use++;
5269 	}
5270 	fr = clone->nat_fr;
5271 	if (fr != NULL) {
5272 		MUTEX_ENTER(&fr->fr_lock);
5273 		fr->fr_ref++;
5274 		MUTEX_EXIT(&fr->fr_lock);
5275 	}
5276 
5277 	/*
5278 	 * Because the clone is created outside the normal loop of things and
5279 	 * TCP has special needs in terms of state, initialise the timeout
5280 	 * state of the new NAT from here.
5281 	 */
5282 	if (clone->nat_p == IPPROTO_TCP) {
5283 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5284 				  clone->nat_flags);
5285 	}
5286 #ifdef	IPFILTER_SYNC
5287 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5288 #endif
5289 	if (ifs->ifs_nat_logging)
5290 		nat_log(clone, NL_CLONE, ifs);
5291 	return clone;
5292 }
5293 
5294 
5295 /* ------------------------------------------------------------------------ */
5296 /* Function:   nat_wildok                                                   */
5297 /* Returns:    int - 1 == packet's ports match wildcards                    */
5298 /*                   0 == packet's ports don't match wildcards              */
5299 /* Parameters: nat(I)   - NAT entry                                         */
5300 /*             sport(I) - source port                                       */
5301 /*             dport(I) - destination port                                  */
5302 /*             flags(I) - wildcard flags                                    */
5303 /*             dir(I)   - packet direction                                  */
5304 /*                                                                          */
5305 /* Use NAT entry and packet direction to determine which combination of     */
5306 /* wildcard flags should be used.                                           */
5307 /* ------------------------------------------------------------------------ */
5308 int nat_wildok(nat, sport, dport, flags, dir)
5309 nat_t *nat;
5310 int sport;
5311 int dport;
5312 int flags;
5313 int dir;
5314 {
5315 	/*
5316 	 * When called by       dir is set to
5317 	 * nat_inlookup         NAT_INBOUND (0)
5318 	 * nat_outlookup        NAT_OUTBOUND (1)
5319 	 *
5320 	 * We simply combine the packet's direction in dir with the original
5321 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5322 	 * which combination of wildcard flags to allow.
5323 	 */
5324 
5325 	switch ((dir << 1) | nat->nat_dir)
5326 	{
5327 	case 3: /* outbound packet / outbound entry */
5328 		if (((nat->nat_inport == sport) ||
5329 		    (flags & SI_W_SPORT)) &&
5330 		    ((nat->nat_oport == dport) ||
5331 		    (flags & SI_W_DPORT)))
5332 			return 1;
5333 		break;
5334 	case 2: /* outbound packet / inbound entry */
5335 		if (((nat->nat_outport == sport) ||
5336 		    (flags & SI_W_DPORT)) &&
5337 		    ((nat->nat_oport == dport) ||
5338 		    (flags & SI_W_SPORT)))
5339 			return 1;
5340 		break;
5341 	case 1: /* inbound packet / outbound entry */
5342 		if (((nat->nat_oport == sport) ||
5343 		    (flags & SI_W_DPORT)) &&
5344 		    ((nat->nat_outport == dport) ||
5345 		    (flags & SI_W_SPORT)))
5346 			return 1;
5347 		break;
5348 	case 0: /* inbound packet / inbound entry */
5349 		if (((nat->nat_oport == sport) ||
5350 		    (flags & SI_W_SPORT)) &&
5351 		    ((nat->nat_outport == dport) ||
5352 		    (flags & SI_W_DPORT)))
5353 			return 1;
5354 		break;
5355 	default:
5356 		break;
5357 	}
5358 
5359 	return(0);
5360 }
5361 
5362 
5363 /* ------------------------------------------------------------------------ */
5364 /* Function:    nat_mssclamp                                                */
5365 /* Returns:     Nil                                                         */
5366 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5367 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5368 /*              csump(I)  - pointer to TCP checksum                         */
5369 /*                                                                          */
5370 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5371 /* then the TCP header checksum will be updated to reflect the change in    */
5372 /* the MSS.                                                                 */
5373 /* ------------------------------------------------------------------------ */
5374 static void nat_mssclamp(tcp, maxmss, csump)
5375 tcphdr_t *tcp;
5376 u_32_t maxmss;
5377 u_short *csump;
5378 {
5379 	u_char *cp, *ep, opt;
5380 	int hlen, advance;
5381 	u_32_t mss, sumd;
5382 
5383 	hlen = TCP_OFF(tcp) << 2;
5384 	if (hlen > sizeof(*tcp)) {
5385 		cp = (u_char *)tcp + sizeof(*tcp);
5386 		ep = (u_char *)tcp + hlen;
5387 
5388 		while (cp < ep) {
5389 			opt = cp[0];
5390 			if (opt == TCPOPT_EOL)
5391 				break;
5392 			else if (opt == TCPOPT_NOP) {
5393 				cp++;
5394 				continue;
5395 			}
5396 
5397 			if (cp + 1 >= ep)
5398 				break;
5399 			advance = cp[1];
5400 			if ((cp + advance > ep) || (advance <= 0))
5401 				break;
5402 			switch (opt)
5403 			{
5404 			case TCPOPT_MAXSEG:
5405 				if (advance != 4)
5406 					break;
5407 				mss = cp[2] * 256 + cp[3];
5408 				if (mss > maxmss) {
5409 					cp[2] = maxmss / 256;
5410 					cp[3] = maxmss & 0xff;
5411 					CALC_SUMD(mss, maxmss, sumd);
5412 					fix_outcksum(csump, sumd);
5413 				}
5414 				break;
5415 			default:
5416 				/* ignore unknown options */
5417 				break;
5418 			}
5419 
5420 			cp += advance;
5421 		}
5422 	}
5423 }
5424 
5425 
5426 /* ------------------------------------------------------------------------ */
5427 /* Function:    fr_setnatqueue                                              */
5428 /* Returns:     Nil                                                         */
5429 /* Parameters:  nat(I)- pointer to NAT structure                            */
5430 /*              rev(I) - forward(0) or reverse(1) direction                 */
5431 /* Locks:       ipf_nat (read or write)                                     */
5432 /*                                                                          */
5433 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5434 /* determining which queue it should be placed on.                          */
5435 /* ------------------------------------------------------------------------ */
5436 void fr_setnatqueue(nat, rev, ifs)
5437 nat_t *nat;
5438 int rev;
5439 ipf_stack_t *ifs;
5440 {
5441 	ipftq_t *oifq, *nifq;
5442 
5443 	if (nat->nat_ptr != NULL)
5444 		nifq = nat->nat_ptr->in_tqehead[rev];
5445 	else
5446 		nifq = NULL;
5447 
5448 	if (nifq == NULL) {
5449 		switch (nat->nat_p)
5450 		{
5451 		case IPPROTO_UDP :
5452 			nifq = &ifs->ifs_nat_udptq;
5453 			break;
5454 		case IPPROTO_ICMP :
5455 			nifq = &ifs->ifs_nat_icmptq;
5456 			break;
5457 		case IPPROTO_TCP :
5458 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5459 			break;
5460 		default :
5461 			nifq = &ifs->ifs_nat_iptq;
5462 			break;
5463 		}
5464 	}
5465 
5466 	oifq = nat->nat_tqe.tqe_ifq;
5467 	/*
5468 	 * If it's currently on a timeout queue, move it from one queue to
5469 	 * another, else put it on the end of the newly determined queue.
5470 	 */
5471 	if (oifq != NULL)
5472 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5473 	else
5474 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5475 	return;
5476 }
5477 
5478 /* ------------------------------------------------------------------------ */
5479 /* Function:    nat_getnext                                                 */
5480 /* Returns:     int - 0 == ok, else error                                   */
5481 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5482 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5483 /*              ifs - ipf stack instance                                    */
5484 /*                                                                          */
5485 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5486 /* and copy it out to the storage space pointed to by itp.  The next item   */
5487 /* in the list to look at is put back in the ipftoken struture.             */
5488 /* ------------------------------------------------------------------------ */
5489 static int nat_getnext(t, itp, ifs)
5490 ipftoken_t *t;
5491 ipfgeniter_t *itp;
5492 ipf_stack_t *ifs;
5493 {
5494 	hostmap_t *hm, *nexthm = NULL, zerohm;
5495 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5496 	nat_t *nat, *nextnat = NULL, zeronat;
5497 	int error = 0, count;
5498 	char *dst;
5499 
5500 	if (itp->igi_nitems == 0)
5501 		return EINVAL;
5502 
5503 	READ_ENTER(&ifs->ifs_ipf_nat);
5504 
5505 	/*
5506 	 * Get "previous" entry from the token and find the next entry.
5507 	 */
5508 	switch (itp->igi_type)
5509 	{
5510 	case IPFGENITER_HOSTMAP :
5511 		hm = t->ipt_data;
5512 		if (hm == NULL) {
5513 			nexthm = ifs->ifs_ipf_hm_maplist;
5514 		} else {
5515 			nexthm = hm->hm_next;
5516 		}
5517 		break;
5518 
5519 	case IPFGENITER_IPNAT :
5520 		ipn = t->ipt_data;
5521 		if (ipn == NULL) {
5522 			nextipnat = ifs->ifs_nat_list;
5523 		} else {
5524 			nextipnat = ipn->in_next;
5525 		}
5526 		break;
5527 
5528 	case IPFGENITER_NAT :
5529 		nat = t->ipt_data;
5530 		if (nat == NULL) {
5531 			nextnat = ifs->ifs_nat_instances;
5532 		} else {
5533 			nextnat = nat->nat_next;
5534 		}
5535 		break;
5536 	default :
5537 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5538 		return EINVAL;
5539 	}
5540 
5541 	dst = itp->igi_data;
5542 	for (count = itp->igi_nitems; count > 0; count--) {
5543 		/*
5544 		 * If we found an entry, add a reference to it and update the token.
5545 		 * Otherwise, zero out data to be returned and NULL out token.
5546 		 */
5547 		switch (itp->igi_type)
5548 		{
5549 		case IPFGENITER_HOSTMAP :
5550 			if (nexthm != NULL) {
5551 				ATOMIC_INC32(nexthm->hm_ref);
5552 				t->ipt_data = nexthm;
5553 			} else {
5554 				bzero(&zerohm, sizeof(zerohm));
5555 				nexthm = &zerohm;
5556 				t->ipt_data = NULL;
5557 			}
5558 			break;
5559 		case IPFGENITER_IPNAT :
5560 			if (nextipnat != NULL) {
5561 				ATOMIC_INC32(nextipnat->in_use);
5562 				t->ipt_data = nextipnat;
5563 			} else {
5564 				bzero(&zeroipn, sizeof(zeroipn));
5565 				nextipnat = &zeroipn;
5566 				t->ipt_data = NULL;
5567 			}
5568 			break;
5569 		case IPFGENITER_NAT :
5570 			if (nextnat != NULL) {
5571 				MUTEX_ENTER(&nextnat->nat_lock);
5572 				nextnat->nat_ref++;
5573 				MUTEX_EXIT(&nextnat->nat_lock);
5574 				t->ipt_data = nextnat;
5575 			} else {
5576 				bzero(&zeronat, sizeof(zeronat));
5577 				nextnat = &zeronat;
5578 				t->ipt_data = NULL;
5579 			}
5580 			break;
5581 		default :
5582 			break;
5583 		}
5584 
5585 		/*
5586 		 * Now that we have ref, it's save to give up lock.
5587 		 */
5588 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5589 
5590 		/*
5591 		 * Copy out data and clean up references and token as needed.
5592 		 */
5593 		switch (itp->igi_type)
5594 		{
5595 		case IPFGENITER_HOSTMAP :
5596 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5597 			if (error != 0)
5598 				error = EFAULT;
5599 			if (t->ipt_data == NULL) {
5600 				ipf_freetoken(t, ifs);
5601 				break;
5602 			} else {
5603 				if (hm != NULL) {
5604 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5605 					fr_hostmapdel(&hm);
5606 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5607 				}
5608 				if (nexthm->hm_next == NULL) {
5609 					ipf_freetoken(t, ifs);
5610 					break;
5611 				}
5612 				dst += sizeof(*nexthm);
5613 				hm = nexthm;
5614 				nexthm = nexthm->hm_next;
5615 			}
5616 			break;
5617 
5618 		case IPFGENITER_IPNAT :
5619 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5620 			if (error != 0)
5621 				error = EFAULT;
5622 			if (t->ipt_data == NULL) {
5623 				ipf_freetoken(t, ifs);
5624 				break;
5625 			} else {
5626 				if (ipn != NULL) {
5627 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5628 					fr_ipnatderef(&ipn, ifs);
5629 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5630 				}
5631 				if (nextipnat->in_next == NULL) {
5632 					ipf_freetoken(t, ifs);
5633 					break;
5634 				}
5635 				dst += sizeof(*nextipnat);
5636 				ipn = nextipnat;
5637 				nextipnat = nextipnat->in_next;
5638 			}
5639 			break;
5640 
5641 		case IPFGENITER_NAT :
5642 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5643 			if (error != 0)
5644 				error = EFAULT;
5645 			if (t->ipt_data == NULL) {
5646 				ipf_freetoken(t, ifs);
5647 				break;
5648 			} else {
5649 				if (nat != NULL)
5650 					fr_natderef(&nat, ifs);
5651 				if (nextnat->nat_next == NULL) {
5652 					ipf_freetoken(t, ifs);
5653 					break;
5654 				}
5655 				dst += sizeof(*nextnat);
5656 				nat = nextnat;
5657 				nextnat = nextnat->nat_next;
5658 			}
5659 			break;
5660 		default :
5661 			break;
5662 		}
5663 
5664 		if ((count == 1) || (error != 0))
5665 			break;
5666 
5667 		READ_ENTER(&ifs->ifs_ipf_nat);
5668 	}
5669 
5670 	return error;
5671 }
5672 
5673 
5674 /* ------------------------------------------------------------------------ */
5675 /* Function:    nat_iterator                                                */
5676 /* Returns:     int - 0 == ok, else error                                   */
5677 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5678 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5679 /*                                                                          */
5680 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5681 /* generic structure to iterate through a list.  There are three different  */
5682 /* linked lists of NAT related information to go through: NAT rules, active */
5683 /* NAT mappings and the NAT fragment cache.                                 */
5684 /* ------------------------------------------------------------------------ */
5685 static int nat_iterator(token, itp, ifs)
5686 ipftoken_t *token;
5687 ipfgeniter_t *itp;
5688 ipf_stack_t *ifs;
5689 {
5690 	int error;
5691 
5692 	if (itp->igi_data == NULL)
5693 		return EFAULT;
5694 
5695 	token->ipt_subtype = itp->igi_type;
5696 
5697 	switch (itp->igi_type)
5698 	{
5699 	case IPFGENITER_HOSTMAP :
5700 	case IPFGENITER_IPNAT :
5701 	case IPFGENITER_NAT :
5702 		error = nat_getnext(token, itp, ifs);
5703 		break;
5704 	case IPFGENITER_NATFRAG :
5705 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5706 				    &ifs->ifs_ipfr_nattail,
5707 				    &ifs->ifs_ipf_natfrag, ifs);
5708 		break;
5709 	default :
5710 		error = EINVAL;
5711 		break;
5712 	}
5713 
5714 	return error;
5715 }
5716 
5717 
5718 /* -------------------------------------------------------------------- */
5719 /* Function:	nat_earlydrop						*/
5720 /* Returns:	number of dropped/removed entries from the queue	*/
5721 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5722 /*		maxidle - entry must be idle this long to be dropped	*/
5723 /*		ifs - ipf stack instance				*/
5724 /*									*/
5725 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5726 /* form specified timeout queue, based on how long they've sat idle,	*/
5727 /* without waiting for it to happen on its own.				*/
5728 /* -------------------------------------------------------------------- */
5729 static int nat_earlydrop(ifq, maxidle, ifs)
5730 ipftq_t *ifq;
5731 int maxidle;
5732 ipf_stack_t *ifs;
5733 {
5734 	ipftqent_t *tqe, *tqn;
5735 	nat_t *nat;
5736 	unsigned int dropped;
5737 	int droptick;
5738 
5739 	if (ifq == NULL)
5740 		return (0);
5741 
5742 	dropped = 0;
5743 
5744 	/*
5745 	 * Determine the tick representing the idle time we're interested
5746 	 * in.  If an entry exists in the queue, and it was touched before
5747 	 * that tick, then it's been idle longer than maxidle ... remove it.
5748 	 */
5749 	droptick = ifs->ifs_fr_ticks - maxidle;
5750 	tqn = ifq->ifq_head;
5751 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5752 		tqn = tqe->tqe_next;
5753 		nat = tqe->tqe_parent;
5754 		nat_delete(nat, ISL_EXPIRE, ifs);
5755 		dropped++;
5756 	}
5757 	return (dropped);
5758 }
5759 
5760 
5761 /* --------------------------------------------------------------------- */
5762 /* Function:	nat_flushclosing					 */
5763 /* Returns:	int - number of NAT entries deleted			 */
5764 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5765 /*		ifs - ipf stack instance				 */
5766 /*									 */
5767 /* Remove nat table entries for TCP connections which are in the process */
5768 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5769 /* --------------------------------------------------------------------- */
5770 static int nat_flushclosing(stateval, ifs)
5771 int stateval;
5772 ipf_stack_t *ifs;
5773 {
5774 	ipftq_t *ifq, *ifqn;
5775 	ipftqent_t *tqe, *tqn;
5776 	nat_t *nat;
5777 	int dropped;
5778 
5779 	dropped = 0;
5780 
5781 	/*
5782 	 * Start by deleting any entries in specific timeout queues.
5783 	 */
5784 	ifqn = &ifs->ifs_nat_tqb[stateval];
5785 	while ((ifq = ifqn) != NULL) {
5786 		ifqn = ifq->ifq_next;
5787 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5788 	}
5789 
5790 	/*
5791 	 * Next, look through user defined queues for closing entries.
5792 	 */
5793 	ifqn = ifs->ifs_nat_utqe;
5794 	while ((ifq = ifqn) != NULL) {
5795 		ifqn = ifq->ifq_next;
5796 		tqn = ifq->ifq_head;
5797 		while ((tqe = tqn) != NULL) {
5798 			tqn = tqe->tqe_next;
5799 			nat = tqe->tqe_parent;
5800 			if (nat->nat_p != IPPROTO_TCP)
5801 				continue;
5802 			if ((nat->nat_tcpstate[0] >= stateval) &&
5803 			    (nat->nat_tcpstate[1] >= stateval)) {
5804 				nat_delete(nat, NL_EXPIRE, ifs);
5805 				dropped++;
5806 			}
5807 		}
5808 	}
5809 	return (dropped);
5810 }
5811 
5812 
5813 /* --------------------------------------------------------------------- */
5814 /* Function:	nat_extraflush						 */
5815 /* Returns:	int - number of NAT entries deleted			 */
5816 /* Parameters:	which(I) - how to flush the active NAT table		 */
5817 /*		ifs - ipf stack instance				 */
5818 /* Write Locks:	ipf_nat							 */
5819 /*									 */
5820 /* Flush nat tables.  Three actions currently defined:			 */
5821 /*									 */
5822 /* which == 0 :	Flush all nat table entries.				 */
5823 /*									 */
5824 /* which == 1 :	Flush entries with TCP connections which have started	 */
5825 /*		to close on both ends.					 */
5826 /*									 */
5827 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5828 /*		does not take us below specified threshold in the table, */
5829 /*		we want to flush entries with TCP connections which have */
5830 /*		been idle for a long time.  Start with connections idle	 */
5831 /*		over 12 hours,  and then work backwards in half hour	 */
5832 /*		increments to at most 30 minutes idle, and finally work	 */
5833 /*		back in 30 second increments to at most 30 seconds.	 */
5834 /* --------------------------------------------------------------------- */
5835 static int nat_extraflush(which, ifs)
5836 int which;
5837 ipf_stack_t *ifs;
5838 {
5839 	ipftq_t *ifq, *ifqn;
5840 	nat_t *nat, **natp;
5841 	int idletime, removed, idle_idx;
5842 	SPL_INT(s);
5843 
5844 	removed = 0;
5845 
5846 	SPL_NET(s);
5847 	switch (which)
5848 	{
5849 	case 0:
5850 		natp = &ifs->ifs_nat_instances;
5851 		while ((nat = *natp) != NULL) {
5852 			natp = &nat->nat_next;
5853 			nat_delete(nat, ISL_FLUSH, ifs);
5854 			removed++;
5855 		}
5856 		break;
5857 
5858 	case 1:
5859 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5860 		break;
5861 
5862 	case 2:
5863 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5864 
5865 		/*
5866 		 * Be sure we haven't done this in the last 10 seconds.
5867 		 */
5868 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5869 		    IPF_TTLVAL(10))
5870 			break;
5871 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5872 
5873 		/*
5874 		 * Determine initial threshold for minimum idle time based on
5875 		 * how long ipfilter has been running.  Ipfilter needs to have
5876 		 * been up as long as the smallest interval to continue on.
5877 		 *
5878 		 * Minimum idle times stored in idletime_tab and indexed by
5879 		 * idle_idx.  Start at upper end of array and work backwards.
5880 		 *
5881 		 * Once the index is found, set the initial idle time to the
5882 		 * first interval before the current ipfilter run time.
5883 		 */
5884 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5885 			break;  /* switch */
5886 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5887 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5888 			idletime = idletime_tab[idle_idx];
5889 		} else {
5890 			while ((idle_idx > 0) &&
5891 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5892 				idle_idx--;
5893 			idletime = (ifs->ifs_fr_ticks /
5894 				    idletime_tab[idle_idx]) *
5895 				    idletime_tab[idle_idx];
5896 		}
5897 
5898 		while ((idle_idx >= 0) &&
5899 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5900 			/*
5901 			 * Start with appropriate timeout queue.
5902 			 */
5903 			removed += nat_earlydrop(
5904 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5905 					idletime, ifs);
5906 
5907 			/*
5908 			 * Make sure we haven't already deleted enough
5909 			 * entries before checking the user defined queues.
5910 			 */
5911 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5912 			    ifs->ifs_nat_flush_lvl_lo)
5913 				break;
5914 
5915 			/*
5916 			 * Next, look through the user defined queues.
5917 			 */
5918 			ifqn = ifs->ifs_nat_utqe;
5919 			while ((ifq = ifqn) != NULL) {
5920 				ifqn = ifq->ifq_next;
5921 				removed += nat_earlydrop(ifq, idletime, ifs);
5922 			}
5923 
5924 			/*
5925 			 * Adjust the granularity of idle time.
5926 			 *
5927 			 * If we reach an interval boundary, we need to
5928 			 * either adjust the idle time accordingly or exit
5929 			 * the loop altogether (if this is very last check).
5930 			 */
5931 			idletime -= idletime_tab[idle_idx];
5932 			if (idletime < idletime_tab[idle_idx]) {
5933 				if (idle_idx != 0) {
5934 					idletime = idletime_tab[idle_idx] -
5935 					    idletime_tab[idle_idx - 1];
5936 					idle_idx--;
5937 				} else {
5938 					break;  /* while */
5939 				}
5940 			}
5941 		}
5942 		break;
5943 	default:
5944 		break;
5945 	}
5946 
5947 	SPL_X(s);
5948 	return (removed);
5949 }
5950