xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision de8c4a14ec9a49bad5e62b2cfa6c1ba21de1c708)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL	1
14 # define        _KERNEL	1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 # define _KERNEL
30 # ifdef __OpenBSD__
31 struct file;
32 # endif
33 # include <sys/uio.h>
34 # undef _KERNEL
35 #endif
36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37 # include <sys/filio.h>
38 # include <sys/fcntl.h>
39 #else
40 # include <sys/ioctl.h>
41 #endif
42 #if !defined(AIX)
43 # include <sys/fcntl.h>
44 #endif
45 #if !defined(linux)
46 # include <sys/protosw.h>
47 #endif
48 #include <sys/socket.h>
49 #if defined(_KERNEL)
50 # include <sys/systm.h>
51 # if !defined(__SVR4) && !defined(__svr4__)
52 #  include <sys/mbuf.h>
53 # endif
54 #endif
55 #if defined(__SVR4) || defined(__svr4__)
56 # include <sys/filio.h>
57 # include <sys/byteorder.h>
58 # ifdef _KERNEL
59 #  include <sys/dditypes.h>
60 # endif
61 # include <sys/stream.h>
62 # include <sys/kmem.h>
63 #endif
64 #if __FreeBSD_version >= 300000
65 # include <sys/queue.h>
66 #endif
67 #include <net/if.h>
68 #if __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87 
88 #if !defined(linux)
89 # include <netinet/ip_var.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ipf_stack.h"
102 #ifdef	IPFILTER_SYNC
103 #include "netinet/ip_sync.h"
104 #endif
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 #endif
108 /* END OF INCLUDES */
109 
110 #undef	SOCKADDR_IN
111 #define	SOCKADDR_IN	struct sockaddr_in
112 
113 #if !defined(lint)
114 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116 #endif
117 
118 
119 /* ======================================================================== */
120 /* How the NAT is organised and works.                                      */
121 /*                                                                          */
122 /* Inside (interface y) NAT       Outside (interface x)                     */
123 /* -------------------- -+- -------------------------------------           */
124 /* Packet going          |   out, processsed by fr_checknatout() for x      */
125 /* ------------>         |   ------------>                                  */
126 /* src=10.1.1.1          |   src=192.1.1.1                                  */
127 /*                       |                                                  */
128 /*                       |   in, processed by fr_checknatin() for x         */
129 /* <------------         |   <------------                                  */
130 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
131 /* -------------------- -+- -------------------------------------           */
132 /* fr_checknatout() - changes ip_src and if required, sport                 */
133 /*             - creates a new mapping, if required.                        */
134 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
135 /*                                                                          */
136 /* In the NAT table, internal source is recorded as "in" and externally     */
137 /* seen as "out".                                                           */
138 /* ======================================================================== */
139 
140 
141 static	int	nat_clearlist __P((ipf_stack_t *));
142 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
143 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
144 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
145 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
146 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
147 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
148 static	int	nat_match __P((fr_info_t *, ipnat_t *));
149 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
150 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
151 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
152 				    struct in_addr, struct in_addr, u_32_t,
153 				    ipf_stack_t *));
154 static	INLINE	int nat_icmpquerytype4 __P((int));
155 static	int	nat_ruleaddrinit __P((ipnat_t *));
156 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
157 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
158 static	INLINE	int nat_icmperrortype4 __P((int));
159 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
160 				      tcphdr_t *, nat_t **, int));
161 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
162 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
163 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
164 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
165 static	int	nat_flushtable __P((int, ipf_stack_t *));
166 
167 #define NAT_HAS_L4_CHANGED(n)	\
168  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
169  	(n)->nat_inport != (n)->nat_outport)
170 
171 
172 /* ------------------------------------------------------------------------ */
173 /* Function:    fr_natinit                                                  */
174 /* Returns:     int - 0 == success, -1 == failure                           */
175 /* Parameters:  Nil                                                         */
176 /*                                                                          */
177 /* Initialise all of the NAT locks, tables and other structures.            */
178 /* ------------------------------------------------------------------------ */
179 int fr_natinit(ifs)
180 ipf_stack_t *ifs;
181 {
182 	int i;
183 
184 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
185 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
186 	if (ifs->ifs_nat_table[0] != NULL)
187 		bzero((char *)ifs->ifs_nat_table[0],
188 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
189 	else
190 		return -1;
191 
192 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
193 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
194 	if (ifs->ifs_nat_table[1] != NULL)
195 		bzero((char *)ifs->ifs_nat_table[1],
196 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
197 	else
198 		return -2;
199 
200 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
201 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
202 	if (ifs->ifs_nat_rules != NULL)
203 		bzero((char *)ifs->ifs_nat_rules,
204 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
205 	else
206 		return -3;
207 
208 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
209 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
210 	if (ifs->ifs_rdr_rules != NULL)
211 		bzero((char *)ifs->ifs_rdr_rules,
212 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
213 	else
214 		return -4;
215 
216 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
217 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
218 	if (ifs->ifs_maptable != NULL)
219 		bzero((char *)ifs->ifs_maptable,
220 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
221 	else
222 		return -5;
223 
224 	ifs->ifs_ipf_hm_maplist = NULL;
225 
226 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
227 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
228 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
229 		return -1;
230 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
231 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
232 
233 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
234 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
235 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
236 		return -1;
237 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
238 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
239 
240 	if (ifs->ifs_fr_nat_maxbucket == 0) {
241 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
242 			ifs->ifs_fr_nat_maxbucket++;
243 		ifs->ifs_fr_nat_maxbucket *= 2;
244 	}
245 
246 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
247 	/*
248 	 * Increase this because we may have "keep state" following this too
249 	 * and packet storms can occur if this is removed too quickly.
250 	 */
251 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
252 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
253 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
254 	ifs->ifs_nat_udptq.ifq_ref = 1;
255 	ifs->ifs_nat_udptq.ifq_head = NULL;
256 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
257 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
258 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
259 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
260 	ifs->ifs_nat_icmptq.ifq_ref = 1;
261 	ifs->ifs_nat_icmptq.ifq_head = NULL;
262 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
263 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
264 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
265 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
266 	ifs->ifs_nat_iptq.ifq_ref = 1;
267 	ifs->ifs_nat_iptq.ifq_head = NULL;
268 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
269 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
270 	ifs->ifs_nat_iptq.ifq_next = NULL;
271 
272 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
273 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
274 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
275 #ifdef LARGE_NAT
276 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
277 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
278 #endif
279 	}
280 
281 	/*
282 	 * Increase this because we may have "keep state" following
283 	 * this too and packet storms can occur if this is removed
284 	 * too quickly.
285 	 */
286 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
287 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
288 
289 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
290 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
291 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
292 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
293 
294 	ifs->ifs_fr_nat_init = 1;
295 	ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
296 	return 0;
297 }
298 
299 
300 /* ------------------------------------------------------------------------ */
301 /* Function:    nat_addrdr                                                  */
302 /* Returns:     Nil                                                         */
303 /* Parameters:  n(I) - pointer to NAT rule to add                           */
304 /*                                                                          */
305 /* Adds a redirect rule to the hash table of redirect rules and the list of */
306 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
307 /* use by redirect rules.                                                   */
308 /* ------------------------------------------------------------------------ */
309 static void nat_addrdr(n, ifs)
310 ipnat_t *n;
311 ipf_stack_t *ifs;
312 {
313 	ipnat_t **np;
314 	u_32_t j;
315 	u_int hv;
316 	int k;
317 
318 	k = count4bits(n->in_outmsk);
319 	if ((k >= 0) && (k != 32))
320 		ifs->ifs_rdr_masks |= 1 << k;
321 	j = (n->in_outip & n->in_outmsk);
322 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
323 	np = ifs->ifs_rdr_rules + hv;
324 	while (*np != NULL)
325 		np = &(*np)->in_rnext;
326 	n->in_rnext = NULL;
327 	n->in_prnext = np;
328 	n->in_hv = hv;
329 	*np = n;
330 }
331 
332 
333 /* ------------------------------------------------------------------------ */
334 /* Function:    nat_addnat                                                  */
335 /* Returns:     Nil                                                         */
336 /* Parameters:  n(I) - pointer to NAT rule to add                           */
337 /*                                                                          */
338 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
339 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
340 /* redirect rules.                                                          */
341 /* ------------------------------------------------------------------------ */
342 static void nat_addnat(n, ifs)
343 ipnat_t *n;
344 ipf_stack_t *ifs;
345 {
346 	ipnat_t **np;
347 	u_32_t j;
348 	u_int hv;
349 	int k;
350 
351 	k = count4bits(n->in_inmsk);
352 	if ((k >= 0) && (k != 32))
353 		ifs->ifs_nat_masks |= 1 << k;
354 	j = (n->in_inip & n->in_inmsk);
355 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
356 	np = ifs->ifs_nat_rules + hv;
357 	while (*np != NULL)
358 		np = &(*np)->in_mnext;
359 	n->in_mnext = NULL;
360 	n->in_pmnext = np;
361 	n->in_hv = hv;
362 	*np = n;
363 }
364 
365 
366 /* ------------------------------------------------------------------------ */
367 /* Function:    nat_delrdr                                                  */
368 /* Returns:     Nil                                                         */
369 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
370 /*                                                                          */
371 /* Removes a redirect rule from the hash table of redirect rules.           */
372 /* ------------------------------------------------------------------------ */
373 void nat_delrdr(n)
374 ipnat_t *n;
375 {
376 	if (n->in_rnext)
377 		n->in_rnext->in_prnext = n->in_prnext;
378 	*n->in_prnext = n->in_rnext;
379 }
380 
381 
382 /* ------------------------------------------------------------------------ */
383 /* Function:    nat_delnat                                                  */
384 /* Returns:     Nil                                                         */
385 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
386 /*                                                                          */
387 /* Removes a NAT map rule from the hash table of NAT map rules.             */
388 /* ------------------------------------------------------------------------ */
389 void nat_delnat(n)
390 ipnat_t *n;
391 {
392 	if (n->in_mnext != NULL)
393 		n->in_mnext->in_pmnext = n->in_pmnext;
394 	*n->in_pmnext = n->in_mnext;
395 }
396 
397 
398 /* ------------------------------------------------------------------------ */
399 /* Function:    nat_hostmap                                                 */
400 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
401 /*                                else a pointer to the hostmapping to use  */
402 /* Parameters:  np(I)   - pointer to NAT rule                               */
403 /*              real(I) - real IP address                                   */
404 /*              map(I)  - mapped IP address                                 */
405 /*              port(I) - destination port number                           */
406 /* Write Locks: ipf_nat                                                     */
407 /*                                                                          */
408 /* Check if an ip address has already been allocated for a given mapping    */
409 /* that is not doing port based translation.  If is not yet allocated, then */
410 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
411 /* ------------------------------------------------------------------------ */
412 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
413 ipnat_t *np;
414 struct in_addr src;
415 struct in_addr dst;
416 struct in_addr map;
417 u_32_t port;
418 ipf_stack_t *ifs;
419 {
420 	hostmap_t *hm;
421 	u_int hv;
422 
423 	hv = (src.s_addr ^ dst.s_addr);
424 	hv += src.s_addr;
425 	hv += dst.s_addr;
426 	hv %= HOSTMAP_SIZE;
427 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
428 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
429 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
430 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
431 		    ((port == 0) || (port == hm->hm_port))) {
432 			hm->hm_ref++;
433 			return hm;
434 		}
435 
436 	if (np == NULL)
437 		return NULL;
438 
439 	KMALLOC(hm, hostmap_t *);
440 	if (hm) {
441 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
442 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
443 		if (ifs->ifs_ipf_hm_maplist != NULL)
444 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
445 		ifs->ifs_ipf_hm_maplist = hm;
446 
447 		hm->hm_next = ifs->ifs_maptable[hv];
448 		hm->hm_pnext = ifs->ifs_maptable + hv;
449 		if (ifs->ifs_maptable[hv] != NULL)
450 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
451 		ifs->ifs_maptable[hv] = hm;
452 		hm->hm_ipnat = np;
453 		hm->hm_srcip = src;
454 		hm->hm_dstip = dst;
455 		hm->hm_mapip = map;
456 		hm->hm_ref = 1;
457 		hm->hm_port = port;
458 		hm->hm_v = 4;
459 	}
460 	return hm;
461 }
462 
463 
464 /* ------------------------------------------------------------------------ */
465 /* Function:    fr_hostmapdel                                              */
466 /* Returns:     Nil                                                         */
467 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
468 /* Write Locks: ipf_nat                                                     */
469 /*                                                                          */
470 /* Decrement the references to this hostmap structure by one.  If this      */
471 /* reaches zero then remove it and free it.                                 */
472 /* ------------------------------------------------------------------------ */
473 void fr_hostmapdel(hmp)
474 struct hostmap **hmp;
475 {
476 	struct hostmap *hm;
477 
478 	hm = *hmp;
479 	*hmp = NULL;
480 
481 	hm->hm_ref--;
482 	if (hm->hm_ref == 0) {
483 		if (hm->hm_next)
484 			hm->hm_next->hm_pnext = hm->hm_pnext;
485 		*hm->hm_pnext = hm->hm_next;
486 		if (hm->hm_hnext)
487 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
488 		*hm->hm_phnext = hm->hm_hnext;
489 		KFREE(hm);
490 	}
491 }
492 
493 
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fix_outcksum                                                */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
498 /*              n((I)  - amount to adjust checksum by                       */
499 /*                                                                          */
500 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
501 /* ------------------------------------------------------------------------ */
502 void fix_outcksum(sp, n)
503 u_short *sp;
504 u_32_t n;
505 {
506 	u_short sumshort;
507 	u_32_t sum1;
508 
509 	if (n == 0)
510 		return;
511 
512 	sum1 = (~ntohs(*sp)) & 0xffff;
513 	sum1 += (n);
514 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
515 	/* Again */
516 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
517 	sumshort = ~(u_short)sum1;
518 	*(sp) = htons(sumshort);
519 }
520 
521 
522 /* ------------------------------------------------------------------------ */
523 /* Function:    fix_incksum                                                 */
524 /* Returns:     Nil                                                         */
525 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
526 /*              n((I)  - amount to adjust checksum by                       */
527 /*                                                                          */
528 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
529 /* ------------------------------------------------------------------------ */
530 void fix_incksum(sp, n)
531 u_short *sp;
532 u_32_t n;
533 {
534 	u_short sumshort;
535 	u_32_t sum1;
536 
537 	if (n == 0)
538 		return;
539 
540 	sum1 = (~ntohs(*sp)) & 0xffff;
541 	sum1 += ~(n) & 0xffff;
542 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
543 	/* Again */
544 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
545 	sumshort = ~(u_short)sum1;
546 	*(sp) = htons(sumshort);
547 }
548 
549 
550 /* ------------------------------------------------------------------------ */
551 /* Function:    fix_datacksum                                               */
552 /* Returns:     Nil                                                         */
553 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
554 /*              n((I)  - amount to adjust checksum by                       */
555 /*                                                                          */
556 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
557 /* data section of an IP packet.                                            */
558 /*                                                                          */
559 /* The only situation in which you need to do this is when NAT'ing an       */
560 /* ICMP error message. Such a message, contains in its body the IP header   */
561 /* of the original IP packet, that causes the error.                        */
562 /*                                                                          */
563 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
564 /* kernel the data section of the ICMP error is just data, and no special   */
565 /* processing like hardware cksum or ntohs processing have been done by the */
566 /* kernel on the data section.                                              */
567 /* ------------------------------------------------------------------------ */
568 void fix_datacksum(sp, n)
569 u_short *sp;
570 u_32_t n;
571 {
572 	u_short sumshort;
573 	u_32_t sum1;
574 
575 	if (n == 0)
576 		return;
577 
578 	sum1 = (~ntohs(*sp)) & 0xffff;
579 	sum1 += (n);
580 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
581 	/* Again */
582 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
583 	sumshort = ~(u_short)sum1;
584 	*(sp) = htons(sumshort);
585 }
586 
587 
588 /* ------------------------------------------------------------------------ */
589 /* Function:    fr_nat_ioctl                                                */
590 /* Returns:     int - 0 == success, != 0 == failure                         */
591 /* Parameters:  data(I) - pointer to ioctl data                             */
592 /*              cmd(I)  - ioctl command integer                             */
593 /*              mode(I) - file mode bits used with open                     */
594 /*              uid(I)  - uid of caller                                     */
595 /*              ctx(I)  - pointer to give the uid context                   */
596 /*              ifs     - ipf stack instance                                */
597 /*                                                                          */
598 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
599 /* ------------------------------------------------------------------------ */
600 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
601 ioctlcmd_t cmd;
602 caddr_t data;
603 int mode, uid;
604 void *ctx;
605 ipf_stack_t *ifs;
606 {
607 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
608 	int error = 0, ret, arg, getlock;
609 	ipnat_t natd;
610 
611 #if (BSD >= 199306) && defined(_KERNEL)
612 	if ((securelevel >= 2) && (mode & FWRITE))
613 		return EPERM;
614 #endif
615 
616 #if defined(__osf__) && defined(_KERNEL)
617 	getlock = 0;
618 #else
619 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
620 #endif
621 
622 	nat = NULL;     /* XXX gcc -Wuninitialized */
623 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
624 		KMALLOC(nt, ipnat_t *);
625 	} else {
626 		nt = NULL;
627 	}
628 
629 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
630 		if (mode & NAT_SYSSPACE) {
631 			bcopy(data, (char *)&natd, sizeof(natd));
632 			error = 0;
633 		} else {
634 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
635 		}
636 
637 	}
638 
639 	if (error != 0)
640 		goto done;
641 
642 	/*
643 	 * For add/delete, look to see if the NAT entry is already present
644 	 */
645 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
646 		nat = &natd;
647 		if (nat->in_v == 0)	/* For backward compat. */
648 			nat->in_v = 4;
649 		nat->in_flags &= IPN_USERFLAGS;
650 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
651 			if ((nat->in_flags & IPN_SPLIT) == 0)
652 				nat->in_inip &= nat->in_inmsk;
653 			if ((nat->in_flags & IPN_IPRANGE) == 0)
654 				nat->in_outip &= nat->in_outmsk;
655 		}
656 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
657 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
658 		     np = &n->in_next)
659 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
660 			    IPN_CMPSIZ) == 0) {
661 				if (nat->in_redir == NAT_REDIRECT &&
662 				    nat->in_pnext != n->in_pnext)
663 					continue;
664 				break;
665 			}
666 	}
667 
668 	switch (cmd)
669 	{
670 	case SIOCGENITER :
671 	    {
672 		ipfgeniter_t iter;
673 		ipftoken_t *token;
674 
675 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
676 		if (error != 0)
677 			break;
678 
679 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
680 		if (token != NULL)
681 			error  = nat_iterator(token, &iter, ifs);
682 		else
683 			error = ESRCH;
684 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
685 		break;
686 	    }
687 #ifdef  IPFILTER_LOG
688 	case SIOCIPFFB :
689 	{
690 		int tmp;
691 
692 		if (!(mode & FWRITE))
693 			error = EPERM;
694 		else {
695 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
696 			error = BCOPYOUT((char *)&tmp, (char *)data,
697 					sizeof(tmp));
698 			if (error != 0)
699 				error = EFAULT;
700 		}
701 		break;
702 	}
703 	case SIOCSETLG :
704 		if (!(mode & FWRITE)) {
705 			error = EPERM;
706 		} else {
707 			error = BCOPYIN((char *)data,
708 					(char *)&ifs->ifs_nat_logging,
709 					sizeof(ifs->ifs_nat_logging));
710 			if (error != 0)
711 				error = EFAULT;
712 		}
713 		break;
714 	case SIOCGETLG :
715 		error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
716 				sizeof(ifs->ifs_nat_logging));
717 		if (error != 0)
718 			error = EFAULT;
719 		break;
720 	case FIONREAD :
721 		arg = ifs->ifs_iplused[IPL_LOGNAT];
722 		error = BCOPYOUT(&arg, data, sizeof(arg));
723 		if (error != 0)
724 			error = EFAULT;
725 		break;
726 #endif
727 	case SIOCADNAT :
728 		if (!(mode & FWRITE)) {
729 			error = EPERM;
730 		} else if (n != NULL) {
731 			error = EEXIST;
732 		} else if (nt == NULL) {
733 			error = ENOMEM;
734 		}
735 		if (error != 0) {
736 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
737 			break;
738 		}
739 		bcopy((char *)nat, (char *)nt, sizeof(*n));
740 		error = nat_siocaddnat(nt, np, getlock, ifs);
741 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
742 		if (error == 0)
743 			nt = NULL;
744 		break;
745 	case SIOCRMNAT :
746 		if (!(mode & FWRITE)) {
747 			error = EPERM;
748 			n = NULL;
749 		} else if (n == NULL) {
750 			error = ESRCH;
751 		}
752 
753 		if (error != 0) {
754 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
755 			break;
756 		}
757 		nat_siocdelnat(n, np, getlock, ifs);
758 
759 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
760 		n = NULL;
761 		break;
762 	case SIOCGNATS :
763 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
764 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
765 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
766 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
767 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
768 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
769 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
770 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
771 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
772 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
773 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
774 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
775 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
776 		break;
777 	case SIOCGNATL :
778 	    {
779 		natlookup_t nl;
780 
781 		if (getlock) {
782 			READ_ENTER(&ifs->ifs_ipf_nat);
783 		}
784 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
785 		if (nl.nl_v != 6)
786 			nl.nl_v = 4;
787 		if (error == 0) {
788 			void *ptr;
789 
790 			switch (nl.nl_v)
791 			{
792 			case 4:
793 				ptr = nat_lookupredir(&nl, ifs);
794 				break;
795 #ifdef	USE_INET6
796 			case 6:
797 				ptr = nat6_lookupredir(&nl, ifs);
798 				break;
799 #endif
800 			default:
801 				ptr = NULL;
802 				break;
803 			}
804 
805 			if (ptr != NULL) {
806 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
807 			} else {
808 				error = ESRCH;
809 			}
810 		}
811 		if (getlock) {
812 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
813 		}
814 		break;
815 	    }
816 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
817 		if (!(mode & FWRITE)) {
818 			error = EPERM;
819 			break;
820 		}
821 		if (getlock) {
822 			WRITE_ENTER(&ifs->ifs_ipf_nat);
823 		}
824 		error = BCOPYIN(data, &arg, sizeof(arg));
825 		if (error != 0) {
826 			error = EFAULT;
827 		} else {
828 			if (arg == FLUSH_LIST)
829 				ret = nat_clearlist(ifs);
830 			else if (VALID_TABLE_FLUSH_OPT(arg))
831 				ret = nat_flushtable(arg, ifs);
832 			else
833 				error = EINVAL;
834 		}
835 		if (getlock) {
836 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
837 		}
838 		if (error == 0) {
839 			error = BCOPYOUT(&ret, data, sizeof(ret));
840 			if (error != 0)
841 				error = EFAULT;
842 		}
843 		break;
844 	case SIOCPROXY :
845 		error = appr_ioctl(data, cmd, mode, ifs);
846 		break;
847 	case SIOCSTLCK :
848 		if (!(mode & FWRITE)) {
849 			error = EPERM;
850 		} else {
851 			error = fr_lock(data, &ifs->ifs_fr_nat_lock);
852 		}
853 		break;
854 	case SIOCSTPUT :
855 		if ((mode & FWRITE) != 0) {
856 			error = fr_natputent(data, getlock, ifs);
857 		} else {
858 			error = EACCES;
859 		}
860 		break;
861 	case SIOCSTGSZ :
862 		if (ifs->ifs_fr_nat_lock) {
863 			if (getlock) {
864 				READ_ENTER(&ifs->ifs_ipf_nat);
865 			}
866 			error = fr_natgetsz(data, ifs);
867 			if (getlock) {
868 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
869 			}
870 		} else
871 			error = EACCES;
872 		break;
873 	case SIOCSTGET :
874 		if (ifs->ifs_fr_nat_lock) {
875 			if (getlock) {
876 				READ_ENTER(&ifs->ifs_ipf_nat);
877 			}
878 			error = fr_natgetent(data, ifs);
879 			if (getlock) {
880 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
881 			}
882 		} else
883 			error = EACCES;
884 		break;
885 	case SIOCIPFDELTOK :
886 		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
887 		if (error != 0) {
888 			error = EFAULT;
889 		} else {
890 			error = ipf_deltoken(arg, uid, ctx, ifs);
891 		}
892 		break;
893 	default :
894 		error = EINVAL;
895 		break;
896 	}
897 done:
898 	if (nt)
899 		KFREE(nt);
900 	return error;
901 }
902 
903 
904 /* ------------------------------------------------------------------------ */
905 /* Function:    nat_siocaddnat                                              */
906 /* Returns:     int - 0 == success, != 0 == failure                         */
907 /* Parameters:  n(I)       - pointer to new NAT rule                        */
908 /*              np(I)      - pointer to where to insert new NAT rule        */
909 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
910 /* Mutex Locks: ipf_natio                                                   */
911 /*                                                                          */
912 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
913 /* from information passed to the kernel, then add it  to the appropriate   */
914 /* NAT rule table(s).                                                       */
915 /* ------------------------------------------------------------------------ */
916 static int nat_siocaddnat(n, np, getlock, ifs)
917 ipnat_t *n, **np;
918 int getlock;
919 ipf_stack_t *ifs;
920 {
921 	int error = 0, i, j;
922 
923 	if (nat_resolverule(n, ifs) != 0)
924 		return ENOENT;
925 
926 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
927 		return EINVAL;
928 
929 	n->in_use = 0;
930 	if (n->in_redir & NAT_MAPBLK)
931 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
932 	else if (n->in_flags & IPN_AUTOPORTMAP)
933 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
934 	else if (n->in_flags & IPN_IPRANGE)
935 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
936 	else if (n->in_flags & IPN_SPLIT)
937 		n->in_space = 2;
938 	else if (n->in_outmsk != 0)
939 		n->in_space = ~ntohl(n->in_outmsk);
940 	else
941 		n->in_space = 1;
942 	if (n->in_flags & NAT_TCPUDPICMPQ) {
943 		if (ntohs(n->in_pmax) < ntohs(n->in_pmin))
944 			return EINVAL;
945 	}
946 
947 	/*
948 	 * Calculate the number of valid IP addresses in the output
949 	 * mapping range.  In all cases, the range is inclusive of
950 	 * the start and ending IP addresses.
951 	 * If to a CIDR address, lose 2: broadcast + network address
952 	 *                               (so subtract 1)
953 	 * If to a range, add one.
954 	 * If to a single IP address, set to 1.
955 	 */
956 	if (n->in_space) {
957 		if ((n->in_flags & IPN_IPRANGE) != 0)
958 			n->in_space += 1;
959 		else
960 			n->in_space -= 1;
961 	} else
962 		n->in_space = 1;
963 
964 #ifdef	USE_INET6
965 	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
966 	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
967 		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
968 	else if (n->in_v == 6 &&
969 	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
970 		n->in_next6 = n->in_in[0];
971 	else if (n->in_v == 6)
972 		n->in_next6 = n->in_out[0];
973 	else
974 #endif
975 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
976 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
977 		n->in_nip = ntohl(n->in_outip) + 1;
978 	else if ((n->in_flags & IPN_SPLIT) &&
979 		 (n->in_redir & NAT_REDIRECT))
980 		n->in_nip = ntohl(n->in_inip);
981 	else
982 		n->in_nip = ntohl(n->in_outip);
983 
984 	if (n->in_redir & NAT_MAP) {
985 		n->in_pnext = ntohs(n->in_pmin);
986 		/*
987 		 * Multiply by the number of ports made available.
988 		 */
989 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
990 			n->in_space *= (ntohs(n->in_pmax) -
991 					ntohs(n->in_pmin) + 1);
992 			/*
993 			 * Because two different sources can map to
994 			 * different destinations but use the same
995 			 * local IP#/port #.
996 			 * If the result is smaller than in_space, then
997 			 * we may have wrapped around 32bits.
998 			 */
999 			i = n->in_inmsk;
1000 			if ((i != 0) && (i != 0xffffffff)) {
1001 				j = n->in_space * (~ntohl(i) + 1);
1002 				if (j >= n->in_space)
1003 					n->in_space = j;
1004 				else
1005 					n->in_space = 0xffffffff;
1006 			}
1007 		}
1008 		/*
1009 		 * If no protocol is specified, multiple by 256 to allow for
1010 		 * at least one IP:IP mapping per protocol.
1011 		 */
1012 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1013 				j = n->in_space * 256;
1014 				if (j >= n->in_space)
1015 					n->in_space = j;
1016 				else
1017 					n->in_space = 0xffffffff;
1018 		}
1019 	}
1020 
1021 	/* Otherwise, these fields are preset */
1022 
1023 	if (getlock) {
1024 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1025 	}
1026 	n->in_next = NULL;
1027 	*np = n;
1028 
1029 	if (n->in_age[0] != 0)
1030 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1031 						  n->in_age[0], ifs);
1032 
1033 	if (n->in_age[1] != 0)
1034 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1035 						  n->in_age[1], ifs);
1036 
1037 	if (n->in_redir & NAT_REDIRECT) {
1038 		n->in_flags &= ~IPN_NOTDST;
1039 		switch (n->in_v)
1040 		{
1041 		case 4 :
1042 			nat_addrdr(n, ifs);
1043 			break;
1044 #ifdef	USE_INET6
1045 		case 6 :
1046 			nat6_addrdr(n, ifs);
1047 			break;
1048 #endif
1049 		default :
1050 			break;
1051 		}
1052 	}
1053 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1054 		n->in_flags &= ~IPN_NOTSRC;
1055 		switch (n->in_v)
1056 		{
1057 		case 4 :
1058 			nat_addnat(n, ifs);
1059 			break;
1060 #ifdef	USE_INET6
1061 		case 6 :
1062 			nat6_addnat(n, ifs);
1063 			break;
1064 #endif
1065 		default :
1066 			break;
1067 		}
1068 	}
1069 	n = NULL;
1070 	ifs->ifs_nat_stats.ns_rules++;
1071 	if (getlock) {
1072 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1073 	}
1074 
1075 	return error;
1076 }
1077 
1078 
1079 /* ------------------------------------------------------------------------ */
1080 /* Function:    nat_resolvrule                                              */
1081 /* Returns:     int - 0 == success, -1 == failure                           */
1082 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1083 /*                                                                          */
1084 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1085 /* any specified interfaces and proxy labels, and determines whether or not */
1086 /* all proxy labels are correctly specified.				    */
1087 /*									    */
1088 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1089 /* ------------------------------------------------------------------------ */
1090 static int nat_resolverule(n, ifs)
1091 ipnat_t *n;
1092 ipf_stack_t *ifs;
1093 {
1094 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1095 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1096 
1097 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1098 	if (n->in_ifnames[1][0] == '\0') {
1099 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1100 		n->in_ifps[1] = n->in_ifps[0];
1101 	} else {
1102 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1103 	}
1104 
1105 	if (n->in_plabel[0] != '\0') {
1106 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1107 		if (n->in_apr == NULL)
1108 			return -1;
1109 	}
1110 	return 0;
1111 }
1112 
1113 
1114 /* ------------------------------------------------------------------------ */
1115 /* Function:    nat_siocdelnat                                              */
1116 /* Returns:     int - 0 == success, != 0 == failure                         */
1117 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1118 /*              np(I)      - pointer to where to insert new NAT rule        */
1119 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1120 /* Mutex Locks: ipf_natio                                                   */
1121 /*                                                                          */
1122 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1123 /* from information passed to the kernel, then add it  to the appropriate   */
1124 /* NAT rule table(s).                                                       */
1125 /* ------------------------------------------------------------------------ */
1126 static void nat_siocdelnat(n, np, getlock, ifs)
1127 ipnat_t *n, **np;
1128 int getlock;
1129 ipf_stack_t *ifs;
1130 {
1131 	int i;
1132 
1133 	if (getlock) {
1134 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1135 	}
1136 	if (n->in_redir & NAT_REDIRECT)
1137 		nat_delrdr(n);
1138 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1139 		nat_delnat(n);
1140 	if (ifs->ifs_nat_list == NULL) {
1141 		ifs->ifs_nat_masks = 0;
1142 		ifs->ifs_rdr_masks = 0;
1143 		for (i = 0; i < 4; i++) {
1144 			ifs->ifs_nat6_masks[i] = 0;
1145 			ifs->ifs_rdr6_masks[i] = 0;
1146 		}
1147 	}
1148 
1149 	if (n->in_tqehead[0] != NULL) {
1150 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1151 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1152 		}
1153 	}
1154 
1155 	if (n->in_tqehead[1] != NULL) {
1156 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1157 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1158 		}
1159 	}
1160 
1161 	*np = n->in_next;
1162 
1163 	if (n->in_use == 0) {
1164 		if (n->in_apr)
1165 			appr_free(n->in_apr);
1166 		KFREE(n);
1167 		ifs->ifs_nat_stats.ns_rules--;
1168 	} else {
1169 		n->in_flags |= IPN_DELETE;
1170 		n->in_next = NULL;
1171 	}
1172 	if (getlock) {
1173 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1174 	}
1175 }
1176 
1177 
1178 /* ------------------------------------------------------------------------ */
1179 /* Function:    fr_natgetsz                                                 */
1180 /* Returns:     int - 0 == success, != 0 is the error value.                */
1181 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1182 /*                        get the size of.                                  */
1183 /*                                                                          */
1184 /* Handle SIOCSTGSZ.                                                        */
1185 /* Return the size of the nat list entry to be copied back to user space.   */
1186 /* The size of the entry is stored in the ng_sz field and the enture natget */
1187 /* structure is copied back to the user.                                    */
1188 /* ------------------------------------------------------------------------ */
1189 static int fr_natgetsz(data, ifs)
1190 caddr_t data;
1191 ipf_stack_t *ifs;
1192 {
1193 	ap_session_t *aps;
1194 	nat_t *nat, *n;
1195 	natget_t ng;
1196 	int err;
1197 
1198 	err = BCOPYIN(data, &ng, sizeof(ng));
1199 	if (err != 0)
1200 		return EFAULT;
1201 
1202 	nat = ng.ng_ptr;
1203 	if (!nat) {
1204 		nat = ifs->ifs_nat_instances;
1205 		ng.ng_sz = 0;
1206 		/*
1207 		 * Empty list so the size returned is 0.  Simple.
1208 		 */
1209 		if (nat == NULL) {
1210 			err = BCOPYOUT(&ng, data, sizeof(ng));
1211 			if (err != 0) {
1212 				return EFAULT;
1213 			} else {
1214 				return 0;
1215 			}
1216 		}
1217 	} else {
1218 		/*
1219 		 * Make sure the pointer we're copying from exists in the
1220 		 * current list of entries.  Security precaution to prevent
1221 		 * copying of random kernel data.
1222 		 */
1223 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1224 			if (n == nat)
1225 				break;
1226 		if (!n)
1227 			return ESRCH;
1228 	}
1229 
1230 	/*
1231 	 * Incluse any space required for proxy data structures.
1232 	 */
1233 	ng.ng_sz = sizeof(nat_save_t);
1234 	aps = nat->nat_aps;
1235 	if (aps != NULL) {
1236 		ng.ng_sz += sizeof(ap_session_t) - 4;
1237 		if (aps->aps_data != 0)
1238 			ng.ng_sz += aps->aps_psiz;
1239 	}
1240 
1241 	err = BCOPYOUT(&ng, data, sizeof(ng));
1242 	if (err != 0)
1243 		return EFAULT;
1244 	return 0;
1245 }
1246 
1247 
1248 /* ------------------------------------------------------------------------ */
1249 /* Function:    fr_natgetent                                                */
1250 /* Returns:     int - 0 == success, != 0 is the error value.                */
1251 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1252 /*                        to NAT structure to copy out.                     */
1253 /*                                                                          */
1254 /* Handle SIOCSTGET.                                                        */
1255 /* Copies out NAT entry to user space.  Any additional data held for a      */
1256 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1257 /* ------------------------------------------------------------------------ */
1258 static int fr_natgetent(data, ifs)
1259 caddr_t data;
1260 ipf_stack_t *ifs;
1261 {
1262 	int error, outsize;
1263 	ap_session_t *aps;
1264 	nat_save_t *ipn, ipns;
1265 	nat_t *n, *nat;
1266 
1267 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1268 	if (error != 0)
1269 		return error;
1270 
1271 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1272 		return EINVAL;
1273 
1274 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1275 	if (ipn == NULL)
1276 		return ENOMEM;
1277 
1278 	ipn->ipn_dsize = ipns.ipn_dsize;
1279 	nat = ipns.ipn_next;
1280 	if (nat == NULL) {
1281 		nat = ifs->ifs_nat_instances;
1282 		if (nat == NULL) {
1283 			if (ifs->ifs_nat_instances == NULL)
1284 				error = ENOENT;
1285 			goto finished;
1286 		}
1287 	} else {
1288 		/*
1289 		 * Make sure the pointer we're copying from exists in the
1290 		 * current list of entries.  Security precaution to prevent
1291 		 * copying of random kernel data.
1292 		 */
1293 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1294 			if (n == nat)
1295 				break;
1296 		if (n == NULL) {
1297 			error = ESRCH;
1298 			goto finished;
1299 		}
1300 	}
1301 	ipn->ipn_next = nat->nat_next;
1302 
1303 	/*
1304 	 * Copy the NAT structure.
1305 	 */
1306 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1307 
1308 	/*
1309 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1310 	 */
1311 	if (nat->nat_ptr != NULL)
1312 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1313 		      sizeof(ipn->ipn_ipnat));
1314 
1315 	/*
1316 	 * If we also know the NAT entry has an associated filter rule,
1317 	 * save that too.
1318 	 */
1319 	if (nat->nat_fr != NULL)
1320 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1321 		      sizeof(ipn->ipn_fr));
1322 
1323 	/*
1324 	 * Last but not least, if there is an application proxy session set
1325 	 * up for this NAT entry, then copy that out too, including any
1326 	 * private data saved along side it by the proxy.
1327 	 */
1328 	aps = nat->nat_aps;
1329 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1330 	if (aps != NULL) {
1331 		char *s;
1332 
1333 		if (outsize < sizeof(*aps)) {
1334 			error = ENOBUFS;
1335 			goto finished;
1336 		}
1337 
1338 		s = ipn->ipn_data;
1339 		bcopy((char *)aps, s, sizeof(*aps));
1340 		s += sizeof(*aps);
1341 		outsize -= sizeof(*aps);
1342 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1343 			bcopy(aps->aps_data, s, aps->aps_psiz);
1344 		else
1345 			error = ENOBUFS;
1346 	}
1347 	if (error == 0) {
1348 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1349 	}
1350 
1351 finished:
1352 	if (ipn != NULL) {
1353 		KFREES(ipn, ipns.ipn_dsize);
1354 	}
1355 	return error;
1356 }
1357 
1358 /* ------------------------------------------------------------------------ */
1359 /* Function:    nat_calc_chksum_diffs					    */
1360 /* Returns:     void							    */
1361 /* Parameters:  nat	-	pointer to NAT table entry		    */
1362 /*                                                                          */
1363 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1364 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1365 /* we are dealing with partial chksum offload. For these cases we need to   */
1366 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1367 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1368 /* nat_sumd[0]. 							    */
1369 /*									    */
1370 /* The function accepts initialized NAT table entry and computes the deltas */
1371 /* from nat_inip/nat_outip members. The function is called right before	    */
1372 /* the new entry is inserted into the table.				    */
1373 /*									    */
1374 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1375 /* of delta between original and new IP addresses.			    */
1376 /*									    */
1377 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1378 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1379 /*									    */
1380 /* Some facts about chksum, we should remember:				    */
1381 /*	IP header chksum covers IP header only				    */
1382 /*									    */
1383 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1384 /*		SRC, DST IP address					    */
1385 /*		SRC, DST Port						    */
1386 /*		length of payload					    */
1387 /*									    */
1388 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1389 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1390 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1391 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1392 /* stored along with other IP packet data in dblk_t structure and used in   */
1393 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1394 /*									    */
1395 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1396 /* of delta between new and orig address. NOTE: the order of operands for   */
1397 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1398 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1399 /*									    */
1400 /* ------------------------------------------------------------------------ */
1401 void nat_calc_chksum_diffs(nat)
1402 nat_t *nat;
1403 {
1404 	u_32_t	sum_orig = 0;
1405 	u_32_t	sum_changed = 0;
1406 	u_32_t	sumd;
1407 	u_32_t	ipsum_orig = 0;
1408 	u_32_t	ipsum_changed = 0;
1409 
1410 	if (nat->nat_v != 4 && nat->nat_v != 6)
1411 		return;
1412 
1413 	/*
1414 	 * the switch calculates operands for CALC_SUMD(),
1415 	 * which will compute the partial chksum delta.
1416 	 */
1417 	switch (nat->nat_dir)
1418 	{
1419 	case NAT_INBOUND:
1420 		/*
1421 		 * we are dealing with RDR rule (DST address gets
1422 		 * modified on packet from client)
1423 		 */
1424 		if (nat->nat_v == 4) {
1425 			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1426 			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1427 		} else {
1428 			sum_changed = LONG_SUM6(&nat->nat_inip6);
1429 			sum_orig = LONG_SUM6(&nat->nat_outip6);
1430 		}
1431 		break;
1432 	case NAT_OUTBOUND:
1433 		/*
1434 		 * we are dealing with MAP rule (SRC address gets
1435 		 * modified on packet from client)
1436 		 */
1437 		if (nat->nat_v == 4) {
1438 			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1439 			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1440 		} else {
1441 			sum_changed = LONG_SUM6(&nat->nat_outip6);
1442 			sum_orig = LONG_SUM6(&nat->nat_inip6);
1443 		}
1444 		break;
1445 	default: ;
1446 		break;
1447 	}
1448 
1449 	/*
1450 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1451 	 * calculation, which happens at the end of function.
1452 	 */
1453 	ipsum_changed = sum_changed;
1454 	ipsum_orig = sum_orig;
1455 	/*
1456 	 * NOTE: the order of operands for partial chksum adjustment
1457 	 * computation has to be swapped!
1458 	 */
1459 	CALC_SUMD(sum_changed, sum_orig, sumd);
1460 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1461 
1462 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1463 
1464 		/*
1465 		 * switch calculates operands for CALC_SUMD(), which will
1466 		 * compute the full chksum delta.
1467 		 */
1468 		switch (nat->nat_dir)
1469 		{
1470 		case NAT_INBOUND:
1471 			if (nat->nat_v == 4) {
1472 				sum_changed = LONG_SUM(
1473 				    ntohl(nat->nat_inip.s_addr) +
1474 				    ntohs(nat->nat_inport));
1475 				sum_orig = LONG_SUM(
1476 				    ntohl(nat->nat_outip.s_addr) +
1477 				    ntohs(nat->nat_outport));
1478 			} else {
1479 				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1480 				    ntohs(nat->nat_inport);
1481 				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1482 				    ntohs(nat->nat_outport);
1483 			}
1484 			break;
1485 		case NAT_OUTBOUND:
1486 			if (nat->nat_v == 4) {
1487 				sum_changed = LONG_SUM(
1488 				    ntohl(nat->nat_outip.s_addr) +
1489 				    ntohs(nat->nat_outport));
1490 				sum_orig = LONG_SUM(
1491 				    ntohl(nat->nat_inip.s_addr) +
1492 				    ntohs(nat->nat_inport));
1493 			} else {
1494 				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1495 				    ntohs(nat->nat_outport);
1496 				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1497 				    ntohs(nat->nat_inport);
1498 			}
1499 			break;
1500 		default: ;
1501 			break;
1502 		}
1503 
1504 		CALC_SUMD(sum_orig, sum_changed, sumd);
1505 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1506 
1507 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1508 			/*
1509 			 * partial HW chksum offload works for TCP/UDP headers only,
1510 			 * so we need to enforce full chksum adjustment for ICMP
1511 			 */
1512 			nat->nat_sumd[1] = nat->nat_sumd[0];
1513 		}
1514 	}
1515 	else
1516 		nat->nat_sumd[0] = nat->nat_sumd[1];
1517 
1518 	/*
1519 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1520 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1521 	 */
1522 	if (nat->nat_v == 4) {
1523 		if (NAT_HAS_L4_CHANGED(nat)) {
1524 			/*
1525 			 * bad luck, NAT changes also the L4 header, use IP
1526 			 * addresses to compute chksum adjustment for IP header.
1527 			 */
1528 			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1529 			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1530 		} else {
1531 			/*
1532 			 * the NAT does not change L4 hdr -> reuse chksum
1533 			 * adjustment for IP hdr.
1534 			 */
1535 			nat->nat_ipsumd = nat->nat_sumd[0];
1536 
1537 			/*
1538 			 * if L4 header does not use chksum - zero out deltas
1539 			 */
1540 			if (!(nat->nat_flags & IPN_TCPUDP)) {
1541 				nat->nat_sumd[0] = 0;
1542 				nat->nat_sumd[1] = 0;
1543 			}
1544 		}
1545 	}
1546 
1547 	return;
1548 }
1549 
1550 /* ------------------------------------------------------------------------ */
1551 /* Function:    fr_natputent                                                */
1552 /* Returns:     int - 0 == success, != 0 is the error value.                */
1553 /* Parameters:  data(I)    - pointer to natget structure with NAT           */
1554 /*                           structure information to load into the kernel  */
1555 /*              getlock(I) - flag indicating whether or not a write lock    */
1556 /*                           on ipf_nat is already held.                    */
1557 /*              ifs        - ipf stack instance                             */
1558 /*                                                                          */
1559 /* Handle SIOCSTPUT.                                                        */
1560 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1561 /* firewall rule data structures, if pointers to them indicate so.          */
1562 /* ------------------------------------------------------------------------ */
1563 static int fr_natputent(data, getlock, ifs)
1564 caddr_t data;
1565 int getlock;
1566 ipf_stack_t *ifs;
1567 {
1568 	nat_save_t ipn, *ipnn;
1569 	ap_session_t *aps;
1570 	nat_t *n, *nat;
1571 	frentry_t *fr;
1572 	fr_info_t fin;
1573 	ipnat_t *in;
1574 	int error;
1575 
1576 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1577 	if (error != 0)
1578 		return error;
1579 
1580 	/*
1581 	 * Trigger automatic call to nat_flushtable() if the
1582 	 * table has reached capcity specified by hi watermark.
1583 	 */
1584 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
1585 		ifs->ifs_nat_doflush = 1;
1586 
1587 	/*
1588 	 * If automatic flushing did not do its job, and the table
1589 	 * has filled up, don't try to create a new entry.
1590 	 */
1591 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
1592 		ifs->ifs_nat_stats.ns_memfail++;
1593 		return ENOMEM;
1594 	}
1595 
1596 	/*
1597 	 * Initialise early because of code at junkput label.
1598 	 */
1599 	in = NULL;
1600 	aps = NULL;
1601 	nat = NULL;
1602 	ipnn = NULL;
1603 
1604 	/*
1605 	 * New entry, copy in the rest of the NAT entry if it's size is more
1606 	 * than just the nat_t structure.
1607 	 */
1608 	fr = NULL;
1609 	if (ipn.ipn_dsize > sizeof(ipn)) {
1610 		if (ipn.ipn_dsize > 81920) {
1611 			error = ENOMEM;
1612 			goto junkput;
1613 		}
1614 
1615 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1616 		if (ipnn == NULL)
1617 			return ENOMEM;
1618 
1619 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1620 		if (error != 0) {
1621 			error = EFAULT;
1622 			goto junkput;
1623 		}
1624 	} else
1625 		ipnn = &ipn;
1626 
1627 	KMALLOC(nat, nat_t *);
1628 	if (nat == NULL) {
1629 		error = ENOMEM;
1630 		goto junkput;
1631 	}
1632 
1633 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1634 	/*
1635 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1636 	 */
1637 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1638 	nat->nat_tqe.tqe_pnext = NULL;
1639 	nat->nat_tqe.tqe_next = NULL;
1640 	nat->nat_tqe.tqe_ifq = NULL;
1641 	nat->nat_tqe.tqe_parent = nat;
1642 
1643 	/*
1644 	 * Restore the rule associated with this nat session
1645 	 */
1646 	in = ipnn->ipn_nat.nat_ptr;
1647 	if (in != NULL) {
1648 		KMALLOC(in, ipnat_t *);
1649 		nat->nat_ptr = in;
1650 		if (in == NULL) {
1651 			error = ENOMEM;
1652 			goto junkput;
1653 		}
1654 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1655 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1656 		in->in_use = 1;
1657 		in->in_flags |= IPN_DELETE;
1658 
1659 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1660 
1661 		if (nat_resolverule(in, ifs) != 0) {
1662 			error = ESRCH;
1663 			goto junkput;
1664 		}
1665 	}
1666 
1667 	/*
1668 	 * Check that the NAT entry doesn't already exist in the kernel.
1669 	 */
1670 	if (nat->nat_v != 6)
1671 		nat->nat_v = 4;
1672 	bzero((char *)&fin, sizeof(fin));
1673 	fin.fin_p = nat->nat_p;
1674 	fin.fin_ifs = ifs;
1675 	if (nat->nat_dir == NAT_OUTBOUND) {
1676 		fin.fin_data[0] = ntohs(nat->nat_oport);
1677 		fin.fin_data[1] = ntohs(nat->nat_outport);
1678 		fin.fin_ifp = nat->nat_ifps[0];
1679 		if (getlock) {
1680 			READ_ENTER(&ifs->ifs_ipf_nat);
1681 		}
1682 
1683 		switch (nat->nat_v)
1684 		{
1685 		case 4:
1686 			fin.fin_v = nat->nat_v;
1687 			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1688 			    nat->nat_oip, nat->nat_outip);
1689 			break;
1690 #ifdef USE_INET6
1691 		case 6:
1692 			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1693 			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1694 			break;
1695 #endif
1696 		default:
1697 			n = NULL;
1698 			break;
1699 		}
1700 
1701 		if (getlock) {
1702 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1703 		}
1704 		if (n != NULL) {
1705 			error = EEXIST;
1706 			goto junkput;
1707 		}
1708 	} else if (nat->nat_dir == NAT_INBOUND) {
1709 		fin.fin_data[0] = ntohs(nat->nat_inport);
1710 		fin.fin_data[1] = ntohs(nat->nat_oport);
1711 		fin.fin_ifp = nat->nat_ifps[1];
1712 		if (getlock) {
1713 			READ_ENTER(&ifs->ifs_ipf_nat);
1714 		}
1715 
1716 		switch (nat->nat_v)
1717 		{
1718 		case 4:
1719 			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1720 			    nat->nat_inip, nat->nat_oip);
1721 			break;
1722 #ifdef USE_INET6
1723 		case 6:
1724 			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1725 			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1726 			break;
1727 #endif
1728 		default:
1729 			n = NULL;
1730 			break;
1731 		}
1732 
1733 		if (getlock) {
1734 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1735 		}
1736 		if (n != NULL) {
1737 			error = EEXIST;
1738 			goto junkput;
1739 		}
1740 	} else {
1741 		error = EINVAL;
1742 		goto junkput;
1743 	}
1744 
1745 	/*
1746 	 * Restore ap_session_t structure.  Include the private data allocated
1747 	 * if it was there.
1748 	 */
1749 	aps = nat->nat_aps;
1750 	if (aps != NULL) {
1751 		KMALLOC(aps, ap_session_t *);
1752 		nat->nat_aps = aps;
1753 		if (aps == NULL) {
1754 			error = ENOMEM;
1755 			goto junkput;
1756 		}
1757 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1758 		if (in != NULL)
1759 			aps->aps_apr = in->in_apr;
1760 		else
1761 			aps->aps_apr = NULL;
1762 		if (aps->aps_psiz != 0) {
1763 			if (aps->aps_psiz > 81920) {
1764 				error = ENOMEM;
1765 				goto junkput;
1766 			}
1767 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1768 			if (aps->aps_data == NULL) {
1769 				error = ENOMEM;
1770 				goto junkput;
1771 			}
1772 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1773 			      aps->aps_psiz);
1774 		} else {
1775 			aps->aps_psiz = 0;
1776 			aps->aps_data = NULL;
1777 		}
1778 	}
1779 
1780 	/*
1781 	 * If there was a filtering rule associated with this entry then
1782 	 * build up a new one.
1783 	 */
1784 	fr = nat->nat_fr;
1785 	if (fr != NULL) {
1786 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1787 			KMALLOC(fr, frentry_t *);
1788 			nat->nat_fr = fr;
1789 			if (fr == NULL) {
1790 				error = ENOMEM;
1791 				goto junkput;
1792 			}
1793 			ipnn->ipn_nat.nat_fr = fr;
1794 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1795 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1796 
1797 			fr->fr_ref = 1;
1798 			fr->fr_dsize = 0;
1799 			fr->fr_data = NULL;
1800 			fr->fr_type = FR_T_NONE;
1801 
1802 			MUTEX_NUKE(&fr->fr_lock);
1803 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1804 		} else {
1805 			if (getlock) {
1806 				READ_ENTER(&ifs->ifs_ipf_nat);
1807 			}
1808 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1809 				if (n->nat_fr == fr)
1810 					break;
1811 
1812 			if (n != NULL) {
1813 				MUTEX_ENTER(&fr->fr_lock);
1814 				fr->fr_ref++;
1815 				MUTEX_EXIT(&fr->fr_lock);
1816 			}
1817 			if (getlock) {
1818 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1819 			}
1820 			if (!n) {
1821 				error = ESRCH;
1822 				goto junkput;
1823 			}
1824 		}
1825 	}
1826 
1827 	if (ipnn != &ipn) {
1828 		KFREES(ipnn, ipn.ipn_dsize);
1829 		ipnn = NULL;
1830 	}
1831 
1832 	nat_calc_chksum_diffs(nat);
1833 
1834 	if (getlock) {
1835 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1836 	}
1837 
1838 	nat_calc_chksum_diffs(nat);
1839 
1840 	switch (nat->nat_v)
1841 	{
1842 	case 4 :
1843 		error = nat_insert(nat, nat->nat_rev, ifs);
1844 		break;
1845 #ifdef USE_INET6
1846 	case 6 :
1847 		error = nat6_insert(nat, nat->nat_rev, ifs);
1848 		break;
1849 #endif
1850 	default :
1851 		break;
1852 	}
1853 
1854 	if ((error == 0) && (aps != NULL)) {
1855 		aps->aps_next = ifs->ifs_ap_sess_list;
1856 		ifs->ifs_ap_sess_list = aps;
1857 	}
1858 	if (getlock) {
1859 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1860 	}
1861 
1862 	if (error == 0)
1863 		return 0;
1864 
1865 	error = ENOMEM;
1866 
1867 junkput:
1868 	if (fr != NULL)
1869 		(void) fr_derefrule(&fr, ifs);
1870 
1871 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1872 		KFREES(ipnn, ipn.ipn_dsize);
1873 	}
1874 	if (nat != NULL) {
1875 		if (aps != NULL) {
1876 			if (aps->aps_data != NULL) {
1877 				KFREES(aps->aps_data, aps->aps_psiz);
1878 			}
1879 			KFREE(aps);
1880 		}
1881 		if (in != NULL) {
1882 			if (in->in_apr)
1883 				appr_free(in->in_apr);
1884 			KFREE(in);
1885 		}
1886 		KFREE(nat);
1887 	}
1888 	return error;
1889 }
1890 
1891 
1892 /* ------------------------------------------------------------------------ */
1893 /* Function:    nat_delete                                                  */
1894 /* Returns:     int	- 0 if entry deleted. Otherwise, ref count on entry */
1895 /* Parameters:  nat	- pointer to the NAT entry to delete		    */
1896 /*		logtype	- type of LOG record to create before deleting	    */
1897 /*		ifs	- ipf stack instance				    */
1898 /* Write Lock:  ipf_nat                                                     */
1899 /*                                                                          */
1900 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1901 /* enabled then generate a NAT log record for this event.                   */
1902 /* ------------------------------------------------------------------------ */
1903 int nat_delete(nat, logtype, ifs)
1904 struct nat *nat;
1905 int logtype;
1906 ipf_stack_t *ifs;
1907 {
1908 	struct ipnat *ipn;
1909 	int removed = 0;
1910 
1911 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1912 		nat_log(nat, logtype, ifs);
1913 
1914 	/*
1915 	 * Start by removing the entry from the hash table of nat entries
1916 	 * so it will not be "used" again.
1917 	 *
1918 	 * It will remain in the "list" of nat entries until all references
1919 	 * have been accounted for.
1920 	 */
1921 	if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) {
1922 		removed = 1;
1923 
1924 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1925 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1926 
1927 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1928 		if (nat->nat_hnext[0] != NULL) {
1929 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1930 			nat->nat_hnext[0] = NULL;
1931 		}
1932 		nat->nat_phnext[0] = NULL;
1933 
1934 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1935 		if (nat->nat_hnext[1] != NULL) {
1936 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1937 			nat->nat_hnext[1] = NULL;
1938 		}
1939 		nat->nat_phnext[1] = NULL;
1940 
1941 		if ((nat->nat_flags & SI_WILDP) != 0)
1942 			ifs->ifs_nat_stats.ns_wilds--;
1943 	}
1944 
1945 	/*
1946 	 * Next, remove it from the timeout queue it is in.
1947 	 */
1948 	fr_deletequeueentry(&nat->nat_tqe);
1949 
1950 	if (nat->nat_me != NULL) {
1951 		*nat->nat_me = NULL;
1952 		nat->nat_me = NULL;
1953 	}
1954 
1955 	MUTEX_ENTER(&nat->nat_lock);
1956  	if (nat->nat_ref > 1) {
1957 		nat->nat_ref--;
1958 		MUTEX_EXIT(&nat->nat_lock);
1959  		if (removed)
1960  			ifs->ifs_nat_stats.ns_orphans++;
1961 		return (nat->nat_ref);
1962 	}
1963 	MUTEX_EXIT(&nat->nat_lock);
1964 
1965 	nat->nat_ref = 0;
1966 
1967 	/*
1968 	 * If entry had already been removed,
1969 	 * it means we're cleaning up an orphan.
1970 	 */
1971  	if (!removed)
1972  		ifs->ifs_nat_stats.ns_orphans--;
1973 
1974 #ifdef	IPFILTER_SYNC
1975 	if (nat->nat_sync)
1976 		ipfsync_del(nat->nat_sync);
1977 #endif
1978 
1979 	/*
1980 	 * Now remove it from master list of nat table entries
1981 	 */
1982 	if (nat->nat_pnext != NULL) {
1983 		*nat->nat_pnext = nat->nat_next;
1984 		if (nat->nat_next != NULL) {
1985 			nat->nat_next->nat_pnext = nat->nat_pnext;
1986 			nat->nat_next = NULL;
1987 		}
1988 		nat->nat_pnext = NULL;
1989 	}
1990 
1991 	if (nat->nat_fr != NULL)
1992 		(void)fr_derefrule(&nat->nat_fr, ifs);
1993 
1994 	if (nat->nat_hm != NULL)
1995 		fr_hostmapdel(&nat->nat_hm);
1996 
1997 	/*
1998 	 * If there is an active reference from the nat entry to its parent
1999 	 * rule, decrement the rule's reference count and free it too if no
2000 	 * longer being used.
2001 	 */
2002 	ipn = nat->nat_ptr;
2003 	if (ipn != NULL) {
2004 		ipn->in_space++;
2005 		ipn->in_use--;
2006 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
2007 			if (ipn->in_apr)
2008 				appr_free(ipn->in_apr);
2009 			KFREE(ipn);
2010 			ifs->ifs_nat_stats.ns_rules--;
2011 		}
2012 	}
2013 
2014 	MUTEX_DESTROY(&nat->nat_lock);
2015 
2016 	aps_free(nat->nat_aps, ifs);
2017 	ifs->ifs_nat_stats.ns_inuse--;
2018 
2019 	/*
2020 	 * If there's a fragment table entry too for this nat entry, then
2021 	 * dereference that as well.  This is after nat_lock is released
2022 	 * because of Tru64.
2023 	 */
2024 	fr_forgetnat((void *)nat, ifs);
2025 
2026 	KFREE(nat);
2027 
2028 	return (0);
2029 }
2030 
2031 
2032 /* ------------------------------------------------------------------------ */
2033 /* Function:    nat_clearlist                                               */
2034 /* Returns:     int - number of NAT/RDR rules deleted                       */
2035 /* Parameters:  Nil                                                         */
2036 /*                                                                          */
2037 /* Delete all rules in the current list of rules.  There is nothing elegant */
2038 /* about this cleanup: simply free all entries on the list of rules and     */
2039 /* clear out the tables used for hashed NAT rule lookups.                   */
2040 /* ------------------------------------------------------------------------ */
2041 static int nat_clearlist(ifs)
2042 ipf_stack_t *ifs;
2043 {
2044 	ipnat_t *n, **np = &ifs->ifs_nat_list;
2045 	int i = 0;
2046 
2047 	if (ifs->ifs_nat_rules != NULL)
2048 		bzero((char *)ifs->ifs_nat_rules,
2049 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2050 	if (ifs->ifs_rdr_rules != NULL)
2051 		bzero((char *)ifs->ifs_rdr_rules,
2052 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2053 
2054 	while ((n = *np) != NULL) {
2055 		*np = n->in_next;
2056 		if (n->in_use == 0) {
2057 			if (n->in_apr != NULL)
2058 				appr_free(n->in_apr);
2059 			KFREE(n);
2060 			ifs->ifs_nat_stats.ns_rules--;
2061 		} else {
2062 			n->in_flags |= IPN_DELETE;
2063 			n->in_next = NULL;
2064 		}
2065 		i++;
2066 	}
2067 	ifs->ifs_nat_masks = 0;
2068 	ifs->ifs_rdr_masks = 0;
2069 	for (i = 0; i < 4; i++) {
2070 		ifs->ifs_nat6_masks[i] = 0;
2071 		ifs->ifs_rdr6_masks[i] = 0;
2072 	}
2073 	return i;
2074 }
2075 
2076 
2077 /* ------------------------------------------------------------------------ */
2078 /* Function:    nat_newmap                                                  */
2079 /* Returns:     int - -1 == error, 0 == success                             */
2080 /* Parameters:  fin(I) - pointer to packet information                      */
2081 /*              nat(I) - pointer to NAT entry                               */
2082 /*              ni(I)  - pointer to structure with misc. information needed */
2083 /*                       to create new NAT entry.                           */
2084 /*                                                                          */
2085 /* Given an empty NAT structure, populate it with new information about a   */
2086 /* new NAT session, as defined by the matching NAT rule.                    */
2087 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2088 /* to the new IP address for the translation.                               */
2089 /* ------------------------------------------------------------------------ */
2090 static INLINE int nat_newmap(fin, nat, ni)
2091 fr_info_t *fin;
2092 nat_t *nat;
2093 natinfo_t *ni;
2094 {
2095 	u_short st_port, dport, sport, port, sp, dp;
2096 	struct in_addr in, inb;
2097 	hostmap_t *hm;
2098 	u_32_t flags;
2099 	u_32_t st_ip;
2100 	ipnat_t *np;
2101 	nat_t *natl;
2102 	int l;
2103 	ipf_stack_t *ifs = fin->fin_ifs;
2104 
2105 	/*
2106 	 * If it's an outbound packet which doesn't match any existing
2107 	 * record, then create a new port
2108 	 */
2109 	l = 0;
2110 	hm = NULL;
2111 	np = ni->nai_np;
2112 	st_ip = np->in_nip;
2113 	st_port = np->in_pnext;
2114 	flags = ni->nai_flags;
2115 	sport = ni->nai_sport;
2116 	dport = ni->nai_dport;
2117 
2118 	/*
2119 	 * Do a loop until we either run out of entries to try or we find
2120 	 * a NAT mapping that isn't currently being used.  This is done
2121 	 * because the change to the source is not (usually) being fixed.
2122 	 */
2123 	do {
2124 		port = 0;
2125 		in.s_addr = htonl(np->in_nip);
2126 		if (l == 0) {
2127 			/*
2128 			 * Check to see if there is an existing NAT
2129 			 * setup for this IP address pair.
2130 			 */
2131 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2132 					 in, 0, ifs);
2133 			if (hm != NULL)
2134 				in.s_addr = hm->hm_mapip.s_addr;
2135 		} else if ((l == 1) && (hm != NULL)) {
2136 			fr_hostmapdel(&hm);
2137 		}
2138 		in.s_addr = ntohl(in.s_addr);
2139 
2140 		nat->nat_hm = hm;
2141 
2142 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2143 			if (l > 0)
2144 				return -1;
2145 		}
2146 
2147 		if (np->in_redir == NAT_BIMAP &&
2148 		    np->in_inmsk == np->in_outmsk) {
2149 			/*
2150 			 * map the address block in a 1:1 fashion
2151 			 */
2152 			in.s_addr = np->in_outip;
2153 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2154 			in.s_addr = ntohl(in.s_addr);
2155 
2156 		} else if (np->in_redir & NAT_MAPBLK) {
2157 			if ((l >= np->in_ppip) || ((l > 0) &&
2158 			     !(flags & IPN_TCPUDP)))
2159 				return -1;
2160 			/*
2161 			 * map-block - Calculate destination address.
2162 			 */
2163 			in.s_addr = ntohl(fin->fin_saddr);
2164 			in.s_addr &= ntohl(~np->in_inmsk);
2165 			inb.s_addr = in.s_addr;
2166 			in.s_addr /= np->in_ippip;
2167 			in.s_addr &= ntohl(~np->in_outmsk);
2168 			in.s_addr += ntohl(np->in_outip);
2169 			/*
2170 			 * Calculate destination port.
2171 			 */
2172 			if ((flags & IPN_TCPUDP) &&
2173 			    (np->in_ppip != 0)) {
2174 				port = ntohs(sport) + l;
2175 				port %= np->in_ppip;
2176 				port += np->in_ppip *
2177 					(inb.s_addr % np->in_ippip);
2178 				port += MAPBLK_MINPORT;
2179 				port = htons(port);
2180 			}
2181 
2182 		} else if ((np->in_outip == 0) &&
2183 			   (np->in_outmsk == 0xffffffff)) {
2184 			/*
2185 			 * 0/32 - use the interface's IP address.
2186 			 */
2187 			if ((l > 0) ||
2188 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2189 				       &in, NULL, fin->fin_ifs) == -1)
2190 				return -1;
2191 			in.s_addr = ntohl(in.s_addr);
2192 
2193 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2194 			/*
2195 			 * 0/0 - use the original source address/port.
2196 			 */
2197 			if (l > 0)
2198 				return -1;
2199 			in.s_addr = ntohl(fin->fin_saddr);
2200 
2201 		} else if ((np->in_outmsk != 0xffffffff) &&
2202 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2203 			np->in_nip++;
2204 
2205 		natl = NULL;
2206 
2207 		if ((flags & IPN_TCPUDP) &&
2208 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2209 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2210 			/*
2211 			 * "ports auto" (without map-block)
2212 			 */
2213 			if ((l > 0) && (l % np->in_ppip == 0)) {
2214 				if (l > np->in_space) {
2215 					return -1;
2216 				} else if ((l > np->in_ppip) &&
2217 					   np->in_outmsk != 0xffffffff)
2218 					np->in_nip++;
2219 			}
2220 			if (np->in_ppip != 0) {
2221 				port = ntohs(sport);
2222 				port += (l % np->in_ppip);
2223 				port %= np->in_ppip;
2224 				port += np->in_ppip *
2225 					(ntohl(fin->fin_saddr) %
2226 					 np->in_ippip);
2227 				port += MAPBLK_MINPORT;
2228 				port = htons(port);
2229 			}
2230 
2231 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2232 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2233 			/*
2234 			 * Standard port translation.  Select next port.
2235 			 */
2236 			if (np->in_flags & IPN_SEQUENTIAL) {
2237 				port = np->in_pnext;
2238 			} else {
2239 				port = ipf_random() % (ntohs(np->in_pmax) -
2240 						       ntohs(np->in_pmin) + 1);
2241 				port += ntohs(np->in_pmin);
2242 			}
2243 			port = htons(port);
2244 			np->in_pnext++;
2245 
2246 			if (np->in_pnext > ntohs(np->in_pmax)) {
2247 				np->in_pnext = ntohs(np->in_pmin);
2248 				if (np->in_outmsk != 0xffffffff)
2249 					np->in_nip++;
2250 			}
2251 		}
2252 
2253 		if (np->in_flags & IPN_IPRANGE) {
2254 			if (np->in_nip > ntohl(np->in_outmsk))
2255 				np->in_nip = ntohl(np->in_outip);
2256 		} else {
2257 			if ((np->in_outmsk != 0xffffffff) &&
2258 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2259 			    ntohl(np->in_outip))
2260 				np->in_nip = ntohl(np->in_outip) + 1;
2261 		}
2262 
2263 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2264 			port = sport;
2265 
2266 		/*
2267 		 * Here we do a lookup of the connection as seen from
2268 		 * the outside.  If an IP# pair already exists, try
2269 		 * again.  So if you have A->B becomes C->B, you can
2270 		 * also have D->E become C->E but not D->B causing
2271 		 * another C->B.  Also take protocol and ports into
2272 		 * account when determining whether a pre-existing
2273 		 * NAT setup will cause an external conflict where
2274 		 * this is appropriate.
2275 		 */
2276 		inb.s_addr = htonl(in.s_addr);
2277 		sp = fin->fin_data[0];
2278 		dp = fin->fin_data[1];
2279 		fin->fin_data[0] = fin->fin_data[1];
2280 		fin->fin_data[1] = htons(port);
2281 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2282 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2283 		fin->fin_data[0] = sp;
2284 		fin->fin_data[1] = dp;
2285 
2286 		/*
2287 		 * Has the search wrapped around and come back to the
2288 		 * start ?
2289 		 */
2290 		if ((natl != NULL) &&
2291 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2292 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2293 			return -1;
2294 		l++;
2295 	} while (natl != NULL);
2296 
2297 	if (np->in_space > 0)
2298 		np->in_space--;
2299 
2300 	/* Setup the NAT table */
2301 	nat->nat_inip = fin->fin_src;
2302 	nat->nat_outip.s_addr = htonl(in.s_addr);
2303 	nat->nat_oip = fin->fin_dst;
2304 	if (nat->nat_hm == NULL)
2305 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2306 					  nat->nat_outip, 0, ifs);
2307 
2308 	if (flags & IPN_TCPUDP) {
2309 		nat->nat_inport = sport;
2310 		nat->nat_outport = port;	/* sport */
2311 		nat->nat_oport = dport;
2312 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2313 	} else if (flags & IPN_ICMPQUERY) {
2314 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2315 		nat->nat_inport = port;
2316 		nat->nat_outport = port;
2317 	}
2318 
2319 	ni->nai_ip.s_addr = in.s_addr;
2320 	ni->nai_port = port;
2321 	ni->nai_nport = dport;
2322 	return 0;
2323 }
2324 
2325 
2326 /* ------------------------------------------------------------------------ */
2327 /* Function:    nat_newrdr                                                  */
2328 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2329 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2330 /* Parameters:  fin(I) - pointer to packet information                      */
2331 /*              nat(I) - pointer to NAT entry                               */
2332 /*              ni(I)  - pointer to structure with misc. information needed */
2333 /*                       to create new NAT entry.                           */
2334 /*                                                                          */
2335 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2336 /* to the new IP address for the translation.                               */
2337 /* ------------------------------------------------------------------------ */
2338 static INLINE int nat_newrdr(fin, nat, ni)
2339 fr_info_t *fin;
2340 nat_t *nat;
2341 natinfo_t *ni;
2342 {
2343 	u_short nport, dport, sport;
2344 	struct in_addr in, inb;
2345 	u_short sp, dp;
2346 	hostmap_t *hm;
2347 	u_32_t flags;
2348 	ipnat_t *np;
2349 	nat_t *natl;
2350 	int move;
2351 	ipf_stack_t *ifs = fin->fin_ifs;
2352 
2353 	move = 1;
2354 	hm = NULL;
2355 	in.s_addr = 0;
2356 	np = ni->nai_np;
2357 	flags = ni->nai_flags;
2358 	sport = ni->nai_sport;
2359 	dport = ni->nai_dport;
2360 
2361 	/*
2362 	 * If the matching rule has IPN_STICKY set, then we want to have the
2363 	 * same rule kick in as before.  Why would this happen?  If you have
2364 	 * a collection of rdr rules with "round-robin sticky", the current
2365 	 * packet might match a different one to the previous connection but
2366 	 * we want the same destination to be used.
2367 	 */
2368 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2369 	    (IPN_ROUNDR|IPN_STICKY)) {
2370 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2371 				 (u_32_t)dport, ifs);
2372 		if (hm != NULL) {
2373 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2374 			np = hm->hm_ipnat;
2375 			ni->nai_np = np;
2376 			move = 0;
2377 		}
2378 	}
2379 
2380 	/*
2381 	 * Otherwise, it's an inbound packet. Most likely, we don't
2382 	 * want to rewrite source ports and source addresses. Instead,
2383 	 * we want to rewrite to a fixed internal address and fixed
2384 	 * internal port.
2385 	 */
2386 	if (np->in_flags & IPN_SPLIT) {
2387 		in.s_addr = np->in_nip;
2388 
2389 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2390 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2391 					 in, (u_32_t)dport, ifs);
2392 			if (hm != NULL) {
2393 				in.s_addr = hm->hm_mapip.s_addr;
2394 				move = 0;
2395 			}
2396 		}
2397 
2398 		if (hm == NULL || hm->hm_ref == 1) {
2399 			if (np->in_inip == htonl(in.s_addr)) {
2400 				np->in_nip = ntohl(np->in_inmsk);
2401 				move = 0;
2402 			} else {
2403 				np->in_nip = ntohl(np->in_inip);
2404 			}
2405 		}
2406 
2407 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2408 		/*
2409 		 * 0/32 - use the interface's IP address.
2410 		 */
2411 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2412 			   fin->fin_ifs) == -1)
2413 			return -1;
2414 		in.s_addr = ntohl(in.s_addr);
2415 
2416 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2417 		/*
2418 		 * 0/0 - use the original destination address/port.
2419 		 */
2420 		in.s_addr = ntohl(fin->fin_daddr);
2421 
2422 	} else if (np->in_redir == NAT_BIMAP &&
2423 		   np->in_inmsk == np->in_outmsk) {
2424 		/*
2425 		 * map the address block in a 1:1 fashion
2426 		 */
2427 		in.s_addr = np->in_inip;
2428 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2429 		in.s_addr = ntohl(in.s_addr);
2430 	} else {
2431 		in.s_addr = ntohl(np->in_inip);
2432 	}
2433 
2434 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2435 		nport = dport;
2436 	else {
2437 		/*
2438 		 * Whilst not optimized for the case where
2439 		 * pmin == pmax, the gain is not significant.
2440 		 */
2441 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2442 		    (np->in_pmin != np->in_pmax)) {
2443 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2444 				ntohs(np->in_pnext);
2445 			nport = htons(nport);
2446 		} else
2447 			nport = np->in_pnext;
2448 	}
2449 
2450 	/*
2451 	 * When the redirect-to address is set to 0.0.0.0, just
2452 	 * assume a blank `forwarding' of the packet.  We don't
2453 	 * setup any translation for this either.
2454 	 */
2455 	if (in.s_addr == 0) {
2456 		if (nport == dport)
2457 			return -1;
2458 		in.s_addr = ntohl(fin->fin_daddr);
2459 	}
2460 
2461 	/*
2462 	 * Check to see if this redirect mapping already exists and if
2463 	 * it does, return "failure" (allowing it to be created will just
2464 	 * cause one or both of these "connections" to stop working.)
2465 	 */
2466 	inb.s_addr = htonl(in.s_addr);
2467 	sp = fin->fin_data[0];
2468 	dp = fin->fin_data[1];
2469 	fin->fin_data[1] = fin->fin_data[0];
2470 	fin->fin_data[0] = ntohs(nport);
2471 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2472 		    (u_int)fin->fin_p, inb, fin->fin_src);
2473 	fin->fin_data[0] = sp;
2474 	fin->fin_data[1] = dp;
2475 	if (natl != NULL)
2476 		return (-1);
2477 
2478 	nat->nat_inip.s_addr = htonl(in.s_addr);
2479 	nat->nat_outip = fin->fin_dst;
2480 	nat->nat_oip = fin->fin_src;
2481 
2482 	ni->nai_ip.s_addr = in.s_addr;
2483 	ni->nai_nport = nport;
2484 	ni->nai_port = sport;
2485 
2486 	if (flags & IPN_TCPUDP) {
2487 		nat->nat_inport = nport;
2488 		nat->nat_outport = dport;
2489 		nat->nat_oport = sport;
2490 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2491 	} else if (flags & IPN_ICMPQUERY) {
2492 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2493 		nat->nat_inport = nport;
2494 		nat->nat_outport = nport;
2495 	}
2496 
2497 	return move;
2498 }
2499 
2500 /* ------------------------------------------------------------------------ */
2501 /* Function:    nat_new                                                     */
2502 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2503 /*                       else pointer to new NAT structure                  */
2504 /* Parameters:  fin(I)       - pointer to packet information                */
2505 /*              np(I)        - pointer to NAT rule                          */
2506 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2507 /*              flags(I)     - flags describing the current packet          */
2508 /*              direction(I) - direction of packet (in/out)                 */
2509 /* Write Lock:  ipf_nat                                                     */
2510 /*                                                                          */
2511 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2512 /* in any way.                                                              */
2513 /*                                                                          */
2514 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2515 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2516 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2517 /* and (3) building that structure and putting it into the NAT table(s).    */
2518 /* ------------------------------------------------------------------------ */
2519 nat_t *nat_new(fin, np, natsave, flags, direction)
2520 fr_info_t *fin;
2521 ipnat_t *np;
2522 nat_t **natsave;
2523 u_int flags;
2524 int direction;
2525 {
2526 	tcphdr_t *tcp = NULL;
2527 	hostmap_t *hm = NULL;
2528 	nat_t *nat, *natl;
2529 	u_int nflags;
2530 	natinfo_t ni;
2531 	int move;
2532 	ipf_stack_t *ifs = fin->fin_ifs;
2533 
2534 	/*
2535 	 * Trigger automatic call to nat_flushtable() if the
2536 	 * table has reached capcity specified by hi watermark.
2537 	 */
2538 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
2539 		ifs->ifs_nat_doflush = 1;
2540 
2541 	/*
2542 	 * If automatic flushing did not do its job, and the table
2543 	 * has filled up, don't try to create a new entry.
2544 	 */
2545 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2546 		ifs->ifs_nat_stats.ns_memfail++;
2547 		return NULL;
2548 	}
2549 
2550 	move = 1;
2551 	nflags = np->in_flags & flags;
2552 	nflags &= NAT_FROMRULE;
2553 
2554 	ni.nai_np = np;
2555 	ni.nai_nflags = nflags;
2556 	ni.nai_flags = flags;
2557 
2558 	/* Give me a new nat */
2559 	KMALLOC(nat, nat_t *);
2560 	if (nat == NULL) {
2561 		ifs->ifs_nat_stats.ns_memfail++;
2562 		/*
2563 		 * Try to automatically tune the max # of entries in the
2564 		 * table allowed to be less than what will cause kmem_alloc()
2565 		 * to fail and try to eliminate panics due to out of memory
2566 		 * conditions arising.
2567 		 */
2568 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2569 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2570 			printf("ipf_nattable_max reduced to %d\n",
2571 				ifs->ifs_ipf_nattable_max);
2572 		}
2573 		return NULL;
2574 	}
2575 
2576 	if (flags & IPN_TCPUDP) {
2577 		tcp = fin->fin_dp;
2578 		ni.nai_sport = htons(fin->fin_sport);
2579 		ni.nai_dport = htons(fin->fin_dport);
2580 	} else if (flags & IPN_ICMPQUERY) {
2581 		/*
2582 		 * In the ICMP query NAT code, we translate the ICMP id fields
2583 		 * to make them unique. This is indepedent of the ICMP type
2584 		 * (e.g. in the unlikely event that a host sends an echo and
2585 		 * an tstamp request with the same id, both packets will have
2586 		 * their ip address/id field changed in the same way).
2587 		 */
2588 		/* The icmp_id field is used by the sender to identify the
2589 		 * process making the icmp request. (the receiver justs
2590 		 * copies it back in its response). So, it closely matches
2591 		 * the concept of source port. We overlay sport, so we can
2592 		 * maximally reuse the existing code.
2593 		 */
2594 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2595 		ni.nai_dport = ni.nai_sport;
2596 	}
2597 
2598 	bzero((char *)nat, sizeof(*nat));
2599 	nat->nat_flags = flags;
2600 	nat->nat_redir = np->in_redir;
2601 
2602 	if ((flags & NAT_SLAVE) == 0) {
2603 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2604 	}
2605 
2606 	/*
2607 	 * Search the current table for a match.
2608 	 */
2609 	if (direction == NAT_OUTBOUND) {
2610 		/*
2611 		 * We can now arrange to call this for the same connection
2612 		 * because ipf_nat_new doesn't protect the code path into
2613 		 * this function.
2614 		 */
2615 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2616 				     fin->fin_src, fin->fin_dst);
2617 		if (natl != NULL) {
2618 			KFREE(nat);
2619 			nat = natl;
2620 			goto done;
2621 		}
2622 
2623 		move = nat_newmap(fin, nat, &ni);
2624 		if (move == -1)
2625 			goto badnat;
2626 
2627 		np = ni.nai_np;
2628 	} else {
2629 		/*
2630 		 * NAT_INBOUND is used only for redirects rules
2631 		 */
2632 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2633 				    fin->fin_src, fin->fin_dst);
2634 		if (natl != NULL) {
2635 			KFREE(nat);
2636 			nat = natl;
2637 			goto done;
2638 		}
2639 
2640 		move = nat_newrdr(fin, nat, &ni);
2641 		if (move == -1)
2642 			goto badnat;
2643 
2644 		np = ni.nai_np;
2645 	}
2646 
2647 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2648 		if (np->in_redir == NAT_REDIRECT) {
2649 			nat_delrdr(np);
2650 			nat_addrdr(np, ifs);
2651 		} else if (np->in_redir == NAT_MAP) {
2652 			nat_delnat(np);
2653 			nat_addnat(np, ifs);
2654 		}
2655 	}
2656 
2657 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2658 		goto badnat;
2659 	}
2660 
2661 	nat_calc_chksum_diffs(nat);
2662 
2663 	if (flags & SI_WILDP)
2664 		ifs->ifs_nat_stats.ns_wilds++;
2665 	fin->fin_flx |= FI_NEWNAT;
2666 	goto done;
2667 badnat:
2668 	ifs->ifs_nat_stats.ns_badnat++;
2669 	if ((hm = nat->nat_hm) != NULL)
2670 		fr_hostmapdel(&hm);
2671 	KFREE(nat);
2672 	nat = NULL;
2673 done:
2674 	if ((flags & NAT_SLAVE) == 0) {
2675 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2676 	}
2677 	return nat;
2678 }
2679 
2680 
2681 /* ------------------------------------------------------------------------ */
2682 /* Function:    nat_finalise                                                */
2683 /* Returns:     int - 0 == sucess, -1 == failure                            */
2684 /* Parameters:  fin(I) - pointer to packet information                      */
2685 /*              nat(I) - pointer to NAT entry                               */
2686 /*              ni(I)  - pointer to structure with misc. information needed */
2687 /*                       to create new NAT entry.                           */
2688 /* Write Lock:  ipf_nat                                                     */
2689 /*                                                                          */
2690 /* This is the tail end of constructing a new NAT entry and is the same     */
2691 /* for both IPv4 and IPv6.                                                  */
2692 /* ------------------------------------------------------------------------ */
2693 /*ARGSUSED*/
2694 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2695 fr_info_t *fin;
2696 nat_t *nat;
2697 natinfo_t *ni;
2698 tcphdr_t *tcp;
2699 nat_t **natsave;
2700 int direction;
2701 {
2702 	frentry_t *fr;
2703 	ipnat_t *np;
2704 	ipf_stack_t *ifs = fin->fin_ifs;
2705 
2706 	np = ni->nai_np;
2707 
2708 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2709 
2710 #ifdef	IPFILTER_SYNC
2711 	if ((nat->nat_flags & SI_CLONE) == 0)
2712 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2713 #endif
2714 
2715 	nat->nat_me = natsave;
2716 	nat->nat_dir = direction;
2717 	nat->nat_ifps[0] = np->in_ifps[0];
2718 	nat->nat_ifps[1] = np->in_ifps[1];
2719 	nat->nat_ptr = np;
2720 	nat->nat_p = fin->fin_p;
2721 	nat->nat_v = fin->fin_v;
2722 	nat->nat_mssclamp = np->in_mssclamp;
2723 	fr = fin->fin_fr;
2724 	nat->nat_fr = fr;
2725 
2726 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2727 		if (appr_new(fin, nat) == -1)
2728 			return -1;
2729 
2730 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2731 		if (ifs->ifs_nat_logging)
2732 			nat_log(nat, (u_int)np->in_redir, ifs);
2733 		np->in_use++;
2734 		if (fr != NULL) {
2735 			MUTEX_ENTER(&fr->fr_lock);
2736 			fr->fr_ref++;
2737 			MUTEX_EXIT(&fr->fr_lock);
2738 		}
2739 		return 0;
2740 	}
2741 
2742 	/*
2743 	 * nat_insert failed, so cleanup time...
2744 	 */
2745 	return -1;
2746 }
2747 
2748 
2749 /* ------------------------------------------------------------------------ */
2750 /* Function:   nat_insert                                                   */
2751 /* Returns:    int - 0 == sucess, -1 == failure                             */
2752 /* Parameters: nat(I) - pointer to NAT structure                            */
2753 /*             rev(I) - flag indicating forward/reverse direction of packet */
2754 /* Write Lock: ipf_nat                                                      */
2755 /*                                                                          */
2756 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2757 /* list of active NAT entries.  Adjust global counters when complete.       */
2758 /* ------------------------------------------------------------------------ */
2759 int	nat_insert(nat, rev, ifs)
2760 nat_t	*nat;
2761 int	rev;
2762 ipf_stack_t *ifs;
2763 {
2764 	u_int hv1, hv2;
2765 	nat_t **natp;
2766 
2767 	/*
2768 	 * Try and return an error as early as possible, so calculate the hash
2769 	 * entry numbers first and then proceed.
2770 	 */
2771 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2772 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2773 				  0xffffffff);
2774 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2775 				  ifs->ifs_ipf_nattable_sz);
2776 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2777 				  0xffffffff);
2778 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2779 				  ifs->ifs_ipf_nattable_sz);
2780 	} else {
2781 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2782 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2783 				  ifs->ifs_ipf_nattable_sz);
2784 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2785 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2786 				  ifs->ifs_ipf_nattable_sz);
2787 	}
2788 
2789 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2790 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2791 		return -1;
2792 	}
2793 
2794 	nat->nat_hv[0] = hv1;
2795 	nat->nat_hv[1] = hv2;
2796 
2797 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2798 
2799 	nat->nat_rev = rev;
2800 	nat->nat_ref = 1;
2801 	nat->nat_bytes[0] = 0;
2802 	nat->nat_pkts[0] = 0;
2803 	nat->nat_bytes[1] = 0;
2804 	nat->nat_pkts[1] = 0;
2805 
2806 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2807 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2808 
2809 	if (nat->nat_ifnames[1][0] !='\0') {
2810 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2811 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2812 	} else {
2813 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2814 			       LIFNAMSIZ);
2815 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2816 		nat->nat_ifps[1] = nat->nat_ifps[0];
2817 	}
2818 
2819 	nat->nat_next = ifs->ifs_nat_instances;
2820 	nat->nat_pnext = &ifs->ifs_nat_instances;
2821 	if (ifs->ifs_nat_instances)
2822 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2823 	ifs->ifs_nat_instances = nat;
2824 
2825 	natp = &ifs->ifs_nat_table[0][hv1];
2826 	if (*natp)
2827 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2828 	nat->nat_phnext[0] = natp;
2829 	nat->nat_hnext[0] = *natp;
2830 	*natp = nat;
2831 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2832 
2833 	natp = &ifs->ifs_nat_table[1][hv2];
2834 	if (*natp)
2835 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2836 	nat->nat_phnext[1] = natp;
2837 	nat->nat_hnext[1] = *natp;
2838 	*natp = nat;
2839 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2840 
2841 	fr_setnatqueue(nat, rev, ifs);
2842 
2843 	ifs->ifs_nat_stats.ns_added++;
2844 	ifs->ifs_nat_stats.ns_inuse++;
2845 	return 0;
2846 }
2847 
2848 
2849 /* ------------------------------------------------------------------------ */
2850 /* Function:    nat_icmperrorlookup                                         */
2851 /* Returns:     nat_t* - point to matching NAT structure                    */
2852 /* Parameters:  fin(I) - pointer to packet information                      */
2853 /*              dir(I) - direction of packet (in/out)                       */
2854 /*                                                                          */
2855 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2856 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2857 /* the required length.                                                     */
2858 /* ------------------------------------------------------------------------ */
2859 nat_t *nat_icmperrorlookup(fin, dir)
2860 fr_info_t *fin;
2861 int dir;
2862 {
2863 	int flags = 0, minlen;
2864 	icmphdr_t *orgicmp;
2865 	tcphdr_t *tcp = NULL;
2866 	u_short data[2];
2867 	nat_t *nat;
2868 	ip_t *oip;
2869 	u_int p;
2870 
2871 	/*
2872 	 * Does it at least have the return (basic) IP header ?
2873 	 * Only a basic IP header (no options) should be with an ICMP error
2874 	 * header.  Also, if it's not an error type, then return.
2875 	 */
2876 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2877 		return NULL;
2878 
2879 	/*
2880 	 * Check packet size
2881 	 */
2882 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2883 	minlen = IP_HL(oip) << 2;
2884 	if ((minlen < sizeof(ip_t)) ||
2885 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2886 		return NULL;
2887 	/*
2888 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2889 	 * header claimed in the encapsulated part which is of concern.  It
2890 	 * may be too big to be in this buffer but not so big that it's
2891 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2892 	 * This is possible because we don't know how big oip_hl is when we
2893 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2894 	 * all here now.
2895 	 */
2896 #ifdef  _KERNEL
2897 	{
2898 	mb_t *m;
2899 
2900 	m = fin->fin_m;
2901 # if defined(MENTAT)
2902 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2903 		return NULL;
2904 # else
2905 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2906 	    (char *)fin->fin_ip + M_LEN(m))
2907 		return NULL;
2908 # endif
2909 	}
2910 #endif
2911 
2912 	if (fin->fin_daddr != oip->ip_src.s_addr)
2913 		return NULL;
2914 
2915 	p = oip->ip_p;
2916 	if (p == IPPROTO_TCP)
2917 		flags = IPN_TCP;
2918 	else if (p == IPPROTO_UDP)
2919 		flags = IPN_UDP;
2920 	else if (p == IPPROTO_ICMP) {
2921 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2922 
2923 		/* see if this is related to an ICMP query */
2924 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2925 			data[0] = fin->fin_data[0];
2926 			data[1] = fin->fin_data[1];
2927 			fin->fin_data[0] = 0;
2928 			fin->fin_data[1] = orgicmp->icmp_id;
2929 
2930 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2931 			/*
2932 			 * NOTE : dir refers to the direction of the original
2933 			 *        ip packet. By definition the icmp error
2934 			 *        message flows in the opposite direction.
2935 			 */
2936 			if (dir == NAT_INBOUND)
2937 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2938 						   oip->ip_src);
2939 			else
2940 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2941 						    oip->ip_src);
2942 			fin->fin_data[0] = data[0];
2943 			fin->fin_data[1] = data[1];
2944 			return nat;
2945 		}
2946 	}
2947 
2948 	if (flags & IPN_TCPUDP) {
2949 		minlen += 8;		/* + 64bits of data to get ports */
2950 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2951 			return NULL;
2952 
2953 		data[0] = fin->fin_data[0];
2954 		data[1] = fin->fin_data[1];
2955 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2956 		fin->fin_data[0] = ntohs(tcp->th_dport);
2957 		fin->fin_data[1] = ntohs(tcp->th_sport);
2958 
2959 		if (dir == NAT_INBOUND) {
2960 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2961 					   oip->ip_src);
2962 		} else {
2963 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2964 					    oip->ip_src);
2965 		}
2966 		fin->fin_data[0] = data[0];
2967 		fin->fin_data[1] = data[1];
2968 		return nat;
2969 	}
2970 	if (dir == NAT_INBOUND)
2971 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2972 	else
2973 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2974 }
2975 
2976 
2977 /* ------------------------------------------------------------------------ */
2978 /* Function:    nat_icmperror                                               */
2979 /* Returns:     nat_t* - point to matching NAT structure                    */
2980 /* Parameters:  fin(I)    - pointer to packet information                   */
2981 /*              nflags(I) - NAT flags for this packet                       */
2982 /*              dir(I)    - direction of packet (in/out)                    */
2983 /*                                                                          */
2984 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2985 /* session.  This will correct both packet header data and checksums.       */
2986 /*                                                                          */
2987 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2988 /* a NAT'd ICMP packet gets correctly recognised.                           */
2989 /* ------------------------------------------------------------------------ */
2990 nat_t *nat_icmperror(fin, nflags, dir)
2991 fr_info_t *fin;
2992 u_int *nflags;
2993 int dir;
2994 {
2995 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2996 	struct in_addr in;
2997 	icmphdr_t *icmp, *orgicmp;
2998 	int dlen;
2999 	udphdr_t *udp;
3000 	tcphdr_t *tcp;
3001 	nat_t *nat;
3002 	ip_t *oip;
3003 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
3004 		return NULL;
3005 
3006 	/*
3007 	 * nat_icmperrorlookup() looks up nat entry associated with the
3008 	 * offending IP packet and returns pointer to the entry, or NULL
3009 	 * if packet wasn't natted or for `defective' packets.
3010 	 */
3011 
3012 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3013 		return NULL;
3014 
3015 	sumd2 = 0;
3016 	*nflags = IPN_ICMPERR;
3017 	icmp = fin->fin_dp;
3018 	oip = (ip_t *)&icmp->icmp_ip;
3019 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3020 	tcp = (tcphdr_t *)udp;
3021 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3022 
3023 	/*
3024 	 * Need to adjust ICMP header to include the real IP#'s and
3025 	 * port #'s.  There are three steps required.
3026 	 *
3027 	 * Step 1
3028 	 * Fix the IP addresses in the offending IP packet and update
3029 	 * ip header checksum to compensate for the change.
3030 	 *
3031 	 * No update needed here for icmp_cksum because the ICMP checksum
3032 	 * is calculated over the complete ICMP packet, which includes the
3033 	 * changed oip IP addresses and oip->ip_sum.  These two changes
3034 	 * cancel each other out (if the delta for the IP address is x,
3035 	 * then the delta for ip_sum is minus x).
3036 	 */
3037 
3038 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3039 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3040 		in = nat->nat_inip;
3041 		oip->ip_src = in;
3042 	} else {
3043 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3044 		in = nat->nat_outip;
3045 		oip->ip_dst = in;
3046 	}
3047 
3048 	sum2 = LONG_SUM(ntohl(in.s_addr));
3049 	CALC_SUMD(sum1, sum2, sumd);
3050 	fix_datacksum(&oip->ip_sum, sumd);
3051 
3052 	/*
3053 	 * Step 2
3054 	 * Perform other adjustments based on protocol of offending packet.
3055 	 */
3056 
3057 	switch (oip->ip_p) {
3058 		case IPPROTO_TCP :
3059 		case IPPROTO_UDP :
3060 
3061 			/*
3062 			* For offending TCP/UDP IP packets, translate the ports
3063 			* based on the NAT specification.
3064 			*
3065 			* Advance notice : Now it becomes complicated :-)
3066 			*
3067 			* Since the port and IP addresse fields are both part
3068 			* of the TCP/UDP checksum of the offending IP packet,
3069 			* we need to adjust that checksum as well.
3070 			*
3071 			* To further complicate things, the TCP/UDP checksum
3072 			* may not be present.  We must check to see if the
3073 			* length of the data portion is big enough to hold
3074 			* the checksum.  In the UDP case, a test to determine
3075 			* if the checksum is even set is also required.
3076 			*
3077 			* Any changes to an IP address, port or checksum within
3078 			* the ICMP packet requires a change to icmp_cksum.
3079 			*
3080 			* Be extremely careful here ... The change is dependent
3081 			* upon whether or not the TCP/UPD checksum is present.
3082 			*
3083 			* If TCP/UPD checksum is present, the icmp_cksum must
3084 			* compensate for checksum modification resulting from
3085 			* IP address change only.  Port change and resulting
3086 			* data checksum adjustments cancel each other out.
3087 			*
3088 			* If TCP/UDP checksum is not present, icmp_cksum must
3089 			* compensate for port change only.  The IP address
3090 			* change does not modify anything else in this case.
3091 			*/
3092 
3093 			psum1 = 0;
3094 			psum2 = 0;
3095 			psumd = 0;
3096 
3097 			if ((tcp->th_dport == nat->nat_oport) &&
3098 			    (tcp->th_sport != nat->nat_inport)) {
3099 
3100 				/*
3101 				 * Translate the source port.
3102 				 */
3103 
3104 				psum1 = ntohs(tcp->th_sport);
3105 				psum2 = ntohs(nat->nat_inport);
3106 				tcp->th_sport = nat->nat_inport;
3107 
3108 			} else if ((tcp->th_sport == nat->nat_oport) &&
3109 				    (tcp->th_dport != nat->nat_outport)) {
3110 
3111 				/*
3112 				 * Translate the destination port.
3113 				 */
3114 
3115 				psum1 = ntohs(tcp->th_dport);
3116 				psum2 = ntohs(nat->nat_outport);
3117 				tcp->th_dport = nat->nat_outport;
3118 			}
3119 
3120 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3121 
3122 				/*
3123 				 * TCP checksum present.
3124 				 *
3125 				 * Adjust data checksum and icmp checksum to
3126 				 * compensate for any IP address change.
3127 				 */
3128 
3129 				sum1 = ntohs(tcp->th_sum);
3130 				fix_datacksum(&tcp->th_sum, sumd);
3131 				sum2 = ntohs(tcp->th_sum);
3132 				sumd2 = sumd << 1;
3133 				CALC_SUMD(sum1, sum2, sumd);
3134 				sumd2 += sumd;
3135 
3136 				/*
3137 				 * Also make data checksum adjustment to
3138 				 * compensate for any port change.
3139 				 */
3140 
3141 				if (psum1 != psum2) {
3142 					CALC_SUMD(psum1, psum2, psumd);
3143 					fix_datacksum(&tcp->th_sum, psumd);
3144 				}
3145 
3146 			} else if ((oip->ip_p == IPPROTO_UDP) &&
3147 				   (dlen >= 8) && (udp->uh_sum != 0)) {
3148 
3149 				/*
3150 				 * The UDP checksum is present and set.
3151 				 *
3152 				 * Adjust data checksum and icmp checksum to
3153 				 * compensate for any IP address change.
3154 				 */
3155 
3156 				sum1 = ntohs(udp->uh_sum);
3157 				fix_datacksum(&udp->uh_sum, sumd);
3158 				sum2 = ntohs(udp->uh_sum);
3159 				sumd2 = sumd << 1;
3160 				CALC_SUMD(sum1, sum2, sumd);
3161 				sumd2 += sumd;
3162 
3163 				/*
3164 				 * Also make data checksum adjustment to
3165 				 * compensate for any port change.
3166 				 */
3167 
3168 				if (psum1 != psum2) {
3169 					CALC_SUMD(psum1, psum2, psumd);
3170 					fix_datacksum(&udp->uh_sum, psumd);
3171 				}
3172 
3173 			} else {
3174 
3175 				/*
3176 				 * Data checksum was not present.
3177 				 *
3178 				 * Compensate for any port change.
3179 				 */
3180 
3181 				CALC_SUMD(psum2, psum1, psumd);
3182 				sumd2 += psumd;
3183 			}
3184 			break;
3185 
3186 		case IPPROTO_ICMP :
3187 
3188 			orgicmp = (icmphdr_t *)udp;
3189 
3190 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3191 			    (orgicmp->icmp_id != nat->nat_inport) &&
3192 			    (dlen >= 8)) {
3193 
3194 				/*
3195 				 * Fix ICMP checksum (of the offening ICMP
3196 				 * query packet) to compensate the change
3197 				 * in the ICMP id of the offending ICMP
3198 				 * packet.
3199 				 *
3200 				 * Since you modify orgicmp->icmp_id with
3201 				 * a delta (say x) and you compensate that
3202 				 * in origicmp->icmp_cksum with a delta
3203 				 * minus x, you don't have to adjust the
3204 				 * overall icmp->icmp_cksum
3205 				 */
3206 
3207 				sum1 = ntohs(orgicmp->icmp_id);
3208 				sum2 = ntohs(nat->nat_inport);
3209 				CALC_SUMD(sum1, sum2, sumd);
3210 				orgicmp->icmp_id = nat->nat_inport;
3211 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3212 
3213 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3214 
3215 			break;
3216 
3217 		default :
3218 
3219 			break;
3220 
3221 	} /* switch (oip->ip_p) */
3222 
3223 	/*
3224 	 * Step 3
3225 	 * Make the adjustments to icmp checksum.
3226 	 */
3227 
3228 	if (sumd2 != 0) {
3229 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3230 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3231 		fix_incksum(&icmp->icmp_cksum, sumd2);
3232 	}
3233 	return nat;
3234 }
3235 
3236 
3237 /*
3238  * NB: these lookups don't lock access to the list, it assumed that it has
3239  * already been done!
3240  */
3241 
3242 /* ------------------------------------------------------------------------ */
3243 /* Function:    nat_inlookup                                                */
3244 /* Returns:     nat_t* - NULL == no match,                                  */
3245 /*                       else pointer to matching NAT entry                 */
3246 /* Parameters:  fin(I)    - pointer to packet information                   */
3247 /*              flags(I)  - NAT flags for this packet                       */
3248 /*              p(I)      - protocol for this packet                        */
3249 /*              src(I)    - source IP address                               */
3250 /*              mapdst(I) - destination IP address                          */
3251 /*                                                                          */
3252 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3253 /* real source address/port.  We use this lookup when receiving a packet,   */
3254 /* we're looking for a table entry, based on the destination address.       */
3255 /*                                                                          */
3256 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3257 /*                                                                          */
3258 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3259 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3260 /*                                                                          */
3261 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3262 /*            the packet is of said protocol                                */
3263 /* ------------------------------------------------------------------------ */
3264 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3265 fr_info_t *fin;
3266 u_int flags, p;
3267 struct in_addr src , mapdst;
3268 {
3269 	u_short sport, dport;
3270 	ipnat_t *ipn;
3271 	u_int sflags;
3272 	nat_t *nat;
3273 	int nflags;
3274 	u_32_t dst;
3275 	void *ifp;
3276 	u_int hv;
3277 	ipf_stack_t *ifs = fin->fin_ifs;
3278 
3279 	if (fin != NULL)
3280 		ifp = fin->fin_ifp;
3281 	else
3282 		ifp = NULL;
3283 	sport = 0;
3284 	dport = 0;
3285 	dst = mapdst.s_addr;
3286 	sflags = flags & NAT_TCPUDPICMP;
3287 
3288 	switch (p)
3289 	{
3290 	case IPPROTO_TCP :
3291 	case IPPROTO_UDP :
3292 		sport = htons(fin->fin_data[0]);
3293 		dport = htons(fin->fin_data[1]);
3294 		break;
3295 	case IPPROTO_ICMP :
3296 		if (flags & IPN_ICMPERR)
3297 			sport = fin->fin_data[1];
3298 		else
3299 			dport = fin->fin_data[1];
3300 		break;
3301 	default :
3302 		break;
3303 	}
3304 
3305 
3306 	if ((flags & SI_WILDP) != 0)
3307 		goto find_in_wild_ports;
3308 
3309 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3310 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3311 	nat = ifs->ifs_nat_table[1][hv];
3312 	for (; nat; nat = nat->nat_hnext[1]) {
3313 		if (nat->nat_v != 4)
3314 			continue;
3315 
3316 		if (nat->nat_ifps[0] != NULL) {
3317 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3318 				continue;
3319 		} else if (ifp != NULL)
3320 			nat->nat_ifps[0] = ifp;
3321 
3322 		nflags = nat->nat_flags;
3323 
3324 		if (nat->nat_oip.s_addr == src.s_addr &&
3325 		    nat->nat_outip.s_addr == dst &&
3326 		    (((p == 0) &&
3327 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3328 		     || (p == nat->nat_p))) {
3329 			switch (p)
3330 			{
3331 #if 0
3332 			case IPPROTO_GRE :
3333 				if (nat->nat_call[1] != fin->fin_data[0])
3334 					continue;
3335 				break;
3336 #endif
3337 			case IPPROTO_ICMP :
3338 				if ((flags & IPN_ICMPERR) != 0) {
3339 					if (nat->nat_outport != sport)
3340 						continue;
3341 				} else {
3342 					if (nat->nat_outport != dport)
3343 						continue;
3344 				}
3345 				break;
3346 			case IPPROTO_TCP :
3347 			case IPPROTO_UDP :
3348 				if (nat->nat_oport != sport)
3349 					continue;
3350 				if (nat->nat_outport != dport)
3351 					continue;
3352 				break;
3353 			default :
3354 				break;
3355 			}
3356 
3357 			ipn = nat->nat_ptr;
3358 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3359 				if (appr_match(fin, nat) != 0)
3360 					continue;
3361 			return nat;
3362 		}
3363 	}
3364 
3365 	/*
3366 	 * So if we didn't find it but there are wildcard members in the hash
3367 	 * table, go back and look for them.  We do this search and update here
3368 	 * because it is modifying the NAT table and we want to do this only
3369 	 * for the first packet that matches.  The exception, of course, is
3370 	 * for "dummy" (FI_IGNORE) lookups.
3371 	 */
3372 find_in_wild_ports:
3373 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3374 		return NULL;
3375 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3376 		return NULL;
3377 
3378 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3379 
3380 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3381 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3382 
3383 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3384 
3385 	nat = ifs->ifs_nat_table[1][hv];
3386 	for (; nat; nat = nat->nat_hnext[1]) {
3387 		if (nat->nat_v != 4)
3388 			continue;
3389 
3390 		if (nat->nat_ifps[0] != NULL) {
3391 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3392 				continue;
3393 		} else if (ifp != NULL)
3394 			nat->nat_ifps[0] = ifp;
3395 
3396 		if (nat->nat_p != fin->fin_p)
3397 			continue;
3398 		if (nat->nat_oip.s_addr != src.s_addr ||
3399 		    nat->nat_outip.s_addr != dst)
3400 			continue;
3401 
3402 		nflags = nat->nat_flags;
3403 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3404 			continue;
3405 
3406 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3407 			       NAT_INBOUND) == 1) {
3408 			if ((fin->fin_flx & FI_IGNORE) != 0)
3409 				break;
3410 			if ((nflags & SI_CLONE) != 0) {
3411 				nat = fr_natclone(fin, nat);
3412 				if (nat == NULL)
3413 					break;
3414 			} else {
3415 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3416 				ifs->ifs_nat_stats.ns_wilds--;
3417 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3418 			}
3419 			nat->nat_oport = sport;
3420 			nat->nat_outport = dport;
3421 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3422 			nat_tabmove(nat, ifs);
3423 			break;
3424 		}
3425 	}
3426 
3427 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3428 
3429 	return nat;
3430 }
3431 
3432 
3433 /* ------------------------------------------------------------------------ */
3434 /* Function:    nat_tabmove                                                 */
3435 /* Returns:     Nil                                                         */
3436 /* Parameters:  nat(I) - pointer to NAT structure                           */
3437 /* Write Lock:  ipf_nat                                                     */
3438 /*                                                                          */
3439 /* This function is only called for TCP/UDP NAT table entries where the     */
3440 /* original was placed in the table without hashing on the ports and we now */
3441 /* want to include hashing on port numbers.                                 */
3442 /* ------------------------------------------------------------------------ */
3443 static void nat_tabmove(nat, ifs)
3444 nat_t *nat;
3445 ipf_stack_t *ifs;
3446 {
3447 	nat_t **natp;
3448 	u_int hv;
3449 
3450 	if (nat->nat_flags & SI_CLONE)
3451 		return;
3452 
3453 	/*
3454 	 * Remove the NAT entry from the old location
3455 	 */
3456 	if (nat->nat_hnext[0])
3457 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3458 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3459 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3460 
3461 	if (nat->nat_hnext[1])
3462 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3463 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3464 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3465 
3466 	/*
3467 	 * Add into the NAT table in the new position
3468 	 */
3469 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3470 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3471 			 ifs->ifs_ipf_nattable_sz);
3472 	nat->nat_hv[0] = hv;
3473 	natp = &ifs->ifs_nat_table[0][hv];
3474 	if (*natp)
3475 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3476 	nat->nat_phnext[0] = natp;
3477 	nat->nat_hnext[0] = *natp;
3478 	*natp = nat;
3479 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3480 
3481 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3482 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3483 			 ifs->ifs_ipf_nattable_sz);
3484 	nat->nat_hv[1] = hv;
3485 	natp = &ifs->ifs_nat_table[1][hv];
3486 	if (*natp)
3487 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3488 	nat->nat_phnext[1] = natp;
3489 	nat->nat_hnext[1] = *natp;
3490 	*natp = nat;
3491 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3492 }
3493 
3494 
3495 /* ------------------------------------------------------------------------ */
3496 /* Function:    nat_outlookup                                               */
3497 /* Returns:     nat_t* - NULL == no match,                                  */
3498 /*                       else pointer to matching NAT entry                 */
3499 /* Parameters:  fin(I)   - pointer to packet information                    */
3500 /*              flags(I) - NAT flags for this packet                        */
3501 /*              p(I)     - protocol for this packet                         */
3502 /*              src(I)   - source IP address                                */
3503 /*              dst(I)   - destination IP address                           */
3504 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3505 /*                                                                          */
3506 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3507 /* destination address/port.  We use this lookup when sending a packet out, */
3508 /* we're looking for a table entry, based on the source address.            */
3509 /*                                                                          */
3510 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3511 /*                                                                          */
3512 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3513 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3514 /*                                                                          */
3515 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3516 /*            the packet is of said protocol                                */
3517 /* ------------------------------------------------------------------------ */
3518 nat_t *nat_outlookup(fin, flags, p, src, dst)
3519 fr_info_t *fin;
3520 u_int flags, p;
3521 struct in_addr src , dst;
3522 {
3523 	u_short sport, dport;
3524 	u_int sflags;
3525 	ipnat_t *ipn;
3526 	u_32_t srcip;
3527 	nat_t *nat;
3528 	int nflags;
3529 	void *ifp;
3530 	u_int hv;
3531 	ipf_stack_t *ifs = fin->fin_ifs;
3532 
3533 	ifp = fin->fin_ifp;
3534 
3535 	srcip = src.s_addr;
3536 	sflags = flags & IPN_TCPUDPICMP;
3537 	sport = 0;
3538 	dport = 0;
3539 
3540 	switch (p)
3541 	{
3542 	case IPPROTO_TCP :
3543 	case IPPROTO_UDP :
3544 		sport = htons(fin->fin_data[0]);
3545 		dport = htons(fin->fin_data[1]);
3546 		break;
3547 	case IPPROTO_ICMP :
3548 		if (flags & IPN_ICMPERR)
3549 			sport = fin->fin_data[1];
3550 		else
3551 			dport = fin->fin_data[1];
3552 		break;
3553 	default :
3554 		break;
3555 	}
3556 
3557 	if ((flags & SI_WILDP) != 0)
3558 		goto find_out_wild_ports;
3559 
3560 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3561 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3562 	nat = ifs->ifs_nat_table[0][hv];
3563 	for (; nat; nat = nat->nat_hnext[0]) {
3564 		if (nat->nat_v != 4)
3565 			continue;
3566 
3567 		if (nat->nat_ifps[1] != NULL) {
3568 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3569 				continue;
3570 		} else if (ifp != NULL)
3571 			nat->nat_ifps[1] = ifp;
3572 
3573 		nflags = nat->nat_flags;
3574 
3575 		if (nat->nat_inip.s_addr == srcip &&
3576 		    nat->nat_oip.s_addr == dst.s_addr &&
3577 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3578 		     || (p == nat->nat_p))) {
3579 			switch (p)
3580 			{
3581 #if 0
3582 			case IPPROTO_GRE :
3583 				if (nat->nat_call[1] != fin->fin_data[0])
3584 					continue;
3585 				break;
3586 #endif
3587 			case IPPROTO_TCP :
3588 			case IPPROTO_UDP :
3589 				if (nat->nat_oport != dport)
3590 					continue;
3591 				if (nat->nat_inport != sport)
3592 					continue;
3593 				break;
3594 			default :
3595 				break;
3596 			}
3597 
3598 			ipn = nat->nat_ptr;
3599 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3600 				if (appr_match(fin, nat) != 0)
3601 					continue;
3602 			return nat;
3603 		}
3604 	}
3605 
3606 	/*
3607 	 * So if we didn't find it but there are wildcard members in the hash
3608 	 * table, go back and look for them.  We do this search and update here
3609 	 * because it is modifying the NAT table and we want to do this only
3610 	 * for the first packet that matches.  The exception, of course, is
3611 	 * for "dummy" (FI_IGNORE) lookups.
3612 	 */
3613 find_out_wild_ports:
3614 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3615 		return NULL;
3616 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3617 		return NULL;
3618 
3619 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3620 
3621 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3622 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3623 
3624 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3625 
3626 	nat = ifs->ifs_nat_table[0][hv];
3627 	for (; nat; nat = nat->nat_hnext[0]) {
3628 		if (nat->nat_v != 4)
3629 			continue;
3630 
3631 		if (nat->nat_ifps[1] != NULL) {
3632 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3633 				continue;
3634 		} else if (ifp != NULL)
3635 			nat->nat_ifps[1] = ifp;
3636 
3637 		if (nat->nat_p != fin->fin_p)
3638 			continue;
3639 		if ((nat->nat_inip.s_addr != srcip) ||
3640 		    (nat->nat_oip.s_addr != dst.s_addr))
3641 			continue;
3642 
3643 		nflags = nat->nat_flags;
3644 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3645 			continue;
3646 
3647 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3648 			       NAT_OUTBOUND) == 1) {
3649 			if ((fin->fin_flx & FI_IGNORE) != 0)
3650 				break;
3651 			if ((nflags & SI_CLONE) != 0) {
3652 				nat = fr_natclone(fin, nat);
3653 				if (nat == NULL)
3654 					break;
3655 			} else {
3656 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3657 				ifs->ifs_nat_stats.ns_wilds--;
3658 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3659 			}
3660 			nat->nat_inport = sport;
3661 			nat->nat_oport = dport;
3662 			if (nat->nat_outport == 0)
3663 				nat->nat_outport = sport;
3664 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3665 			nat_tabmove(nat, ifs);
3666 			break;
3667 		}
3668 	}
3669 
3670 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3671 
3672 	return nat;
3673 }
3674 
3675 
3676 /* ------------------------------------------------------------------------ */
3677 /* Function:    nat_lookupredir                                             */
3678 /* Returns:     nat_t* - NULL == no match,                                  */
3679 /*                       else pointer to matching NAT entry                 */
3680 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3681 /*                      entry for.                                          */
3682 /*                                                                          */
3683 /* Lookup the NAT tables to search for a matching redirect                  */
3684 /* ------------------------------------------------------------------------ */
3685 nat_t *nat_lookupredir(np, ifs)
3686 natlookup_t *np;
3687 ipf_stack_t *ifs;
3688 {
3689 	fr_info_t fi;
3690 	nat_t *nat;
3691 
3692 	bzero((char *)&fi, sizeof(fi));
3693 	if (np->nl_flags & IPN_IN) {
3694 		fi.fin_data[0] = ntohs(np->nl_realport);
3695 		fi.fin_data[1] = ntohs(np->nl_outport);
3696 	} else {
3697 		fi.fin_data[0] = ntohs(np->nl_inport);
3698 		fi.fin_data[1] = ntohs(np->nl_outport);
3699 	}
3700 	if (np->nl_flags & IPN_TCP)
3701 		fi.fin_p = IPPROTO_TCP;
3702 	else if (np->nl_flags & IPN_UDP)
3703 		fi.fin_p = IPPROTO_UDP;
3704 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3705 		fi.fin_p = IPPROTO_ICMP;
3706 
3707 	fi.fin_ifs = ifs;
3708 	/*
3709 	 * We can do two sorts of lookups:
3710 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3711 	 * - default: we have the `in' and `out' address, look for `real'.
3712 	 */
3713 	if (np->nl_flags & IPN_IN) {
3714 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3715 					np->nl_realip, np->nl_outip))) {
3716 			np->nl_inip = nat->nat_inip;
3717 			np->nl_inport = nat->nat_inport;
3718 		}
3719 	} else {
3720 		/*
3721 		 * If nl_inip is non null, this is a lookup based on the real
3722 		 * ip address. Else, we use the fake.
3723 		 */
3724 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3725 					 np->nl_inip, np->nl_outip))) {
3726 
3727 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3728 				fr_info_t fin;
3729 				bzero((char *)&fin, sizeof(fin));
3730 				fin.fin_p = nat->nat_p;
3731 				fin.fin_data[0] = ntohs(nat->nat_outport);
3732 				fin.fin_data[1] = ntohs(nat->nat_oport);
3733 				fin.fin_ifs = ifs;
3734 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3735 						 nat->nat_outip,
3736 						 nat->nat_oip) != NULL) {
3737 					np->nl_flags &= ~IPN_FINDFORWARD;
3738 				}
3739 			}
3740 
3741 			np->nl_realip = nat->nat_outip;
3742 			np->nl_realport = nat->nat_outport;
3743 		}
3744  	}
3745 
3746 	return nat;
3747 }
3748 
3749 
3750 /* ------------------------------------------------------------------------ */
3751 /* Function:    nat_match                                                   */
3752 /* Returns:     int - 0 == no match, 1 == match                             */
3753 /* Parameters:  fin(I)   - pointer to packet information                    */
3754 /*              np(I)    - pointer to NAT rule                              */
3755 /*                                                                          */
3756 /* Pull the matching of a packet against a NAT rule out of that complex     */
3757 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3758 /* ------------------------------------------------------------------------ */
3759 static int nat_match(fin, np)
3760 fr_info_t *fin;
3761 ipnat_t *np;
3762 {
3763 	frtuc_t *ft;
3764 
3765 	if (fin->fin_v != 4)
3766 		return 0;
3767 
3768 	if (np->in_p && fin->fin_p != np->in_p)
3769 		return 0;
3770 
3771 	if (fin->fin_out) {
3772 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3773 			return 0;
3774 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3775 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3776 			return 0;
3777 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3778 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3779 			return 0;
3780 	} else {
3781 		if (!(np->in_redir & NAT_REDIRECT))
3782 			return 0;
3783 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3784 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3785 			return 0;
3786 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3787 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3788 			return 0;
3789 	}
3790 
3791 	ft = &np->in_tuc;
3792 	if (!(fin->fin_flx & FI_TCPUDP) ||
3793 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3794 		if (ft->ftu_scmp || ft->ftu_dcmp)
3795 			return 0;
3796 		return 1;
3797 	}
3798 
3799 	return fr_tcpudpchk(fin, ft);
3800 }
3801 
3802 
3803 /* ------------------------------------------------------------------------ */
3804 /* Function:    nat_update                                                  */
3805 /* Returns:     Nil                                                         */
3806 /* Parameters:	fin(I) - pointer to packet information			    */
3807 /*		nat(I) - pointer to NAT structure			    */
3808 /*              np(I)     - pointer to NAT rule                             */
3809 /* Locks:	nat_lock						    */
3810 /*                                                                          */
3811 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3812 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3813 /* ------------------------------------------------------------------------ */
3814 void nat_update(fin, nat, np)
3815 fr_info_t *fin;
3816 nat_t *nat;
3817 ipnat_t *np;
3818 {
3819 	ipftq_t *ifq, *ifq2;
3820 	ipftqent_t *tqe;
3821 	ipf_stack_t *ifs = fin->fin_ifs;
3822 
3823 	tqe = &nat->nat_tqe;
3824 	ifq = tqe->tqe_ifq;
3825 
3826 	/*
3827 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3828 	 * TCP, however, if it is TCP and there is no rule timeout set,
3829 	 * then do not update the timeout here.
3830 	 */
3831 	if (np != NULL)
3832 		ifq2 = np->in_tqehead[fin->fin_rev];
3833 	else
3834 		ifq2 = NULL;
3835 
3836 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3837 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3838 	} else {
3839 		if (ifq2 == NULL) {
3840 			if (nat->nat_p == IPPROTO_UDP)
3841 				ifq2 = &ifs->ifs_nat_udptq;
3842 			else if (nat->nat_p == IPPROTO_ICMP)
3843 				ifq2 = &ifs->ifs_nat_icmptq;
3844 			else
3845 				ifq2 = &ifs->ifs_nat_iptq;
3846 		}
3847 
3848 		fr_movequeue(tqe, ifq, ifq2, ifs);
3849 	}
3850 }
3851 
3852 
3853 /* ------------------------------------------------------------------------ */
3854 /* Function:    fr_checknatout                                              */
3855 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3856 /*                     0 == no packet translation occurred,                 */
3857 /*                     1 == packet was successfully translated.             */
3858 /* Parameters:  fin(I)   - pointer to packet information                    */
3859 /*              passp(I) - pointer to filtering result flags                */
3860 /*                                                                          */
3861 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3862 /* first checked to see if they match an existing entry (if an error),      */
3863 /* otherwise a search of the current NAT table is made.  If neither results */
3864 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3865 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3866 /* packet header(s) as required.                                            */
3867 /* ------------------------------------------------------------------------ */
3868 int fr_checknatout(fin, passp)
3869 fr_info_t *fin;
3870 u_32_t *passp;
3871 {
3872 	ipnat_t *np = NULL, *npnext;
3873 	struct ifnet *ifp, *sifp;
3874 	icmphdr_t *icmp = NULL;
3875 	tcphdr_t *tcp = NULL;
3876 	int rval, natfailed;
3877 	u_int nflags = 0;
3878 	u_32_t ipa, iph;
3879 	int natadd = 1;
3880 	frentry_t *fr;
3881 	nat_t *nat;
3882 	ipf_stack_t *ifs = fin->fin_ifs;
3883 
3884 	if (ifs->ifs_fr_nat_lock != 0)
3885 		return 0;
3886 	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
3887 		return 0;
3888 
3889 	natfailed = 0;
3890 	fr = fin->fin_fr;
3891 	sifp = fin->fin_ifp;
3892 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3893 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3894 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3895 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3896 	ifp = fin->fin_ifp;
3897 
3898 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3899 		switch (fin->fin_p)
3900 		{
3901 		case IPPROTO_TCP :
3902 			nflags = IPN_TCP;
3903 			break;
3904 		case IPPROTO_UDP :
3905 			nflags = IPN_UDP;
3906 			break;
3907 		case IPPROTO_ICMP :
3908 			icmp = fin->fin_dp;
3909 
3910 			/*
3911 			 * This is an incoming packet, so the destination is
3912 			 * the icmp_id and the source port equals 0
3913 			 */
3914 			if (nat_icmpquerytype4(icmp->icmp_type))
3915 				nflags = IPN_ICMPQUERY;
3916 			break;
3917 		default :
3918 			break;
3919 		}
3920 
3921 		if ((nflags & IPN_TCPUDP))
3922 			tcp = fin->fin_dp;
3923 	}
3924 
3925 	ipa = fin->fin_saddr;
3926 
3927 	READ_ENTER(&ifs->ifs_ipf_nat);
3928 
3929 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3930 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3931 		/*EMPTY*/;
3932 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3933 		natadd = 0;
3934 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3935 				      fin->fin_src, fin->fin_dst))) {
3936 		nflags = nat->nat_flags;
3937 	} else {
3938 		u_32_t hv, msk, nmsk;
3939 
3940 		/*
3941 		 * There is no current entry in the nat table for this packet.
3942 		 *
3943 		 * If the packet is a fragment, but not the first fragment,
3944 		 * then don't do anything.  Otherwise, if there is a matching
3945 		 * nat rule, try to create a new nat entry.
3946 		 */
3947 		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
3948 			goto nonatfrag;
3949 
3950 		msk = 0xffffffff;
3951 		nmsk = ifs->ifs_nat_masks;
3952 maskloop:
3953 		iph = ipa & htonl(msk);
3954 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3955 		for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
3956 			npnext = np->in_mnext;
3957 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3958 				continue;
3959 			if (np->in_v != fin->fin_v)
3960 				continue;
3961 			if (np->in_p && (np->in_p != fin->fin_p))
3962 				continue;
3963 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3964 				continue;
3965 			if (np->in_flags & IPN_FILTER) {
3966 				if (!nat_match(fin, np))
3967 					continue;
3968 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3969 				continue;
3970 
3971 			if ((fr != NULL) &&
3972 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3973 				continue;
3974 
3975 			if (*np->in_plabel != '\0') {
3976 				if (((np->in_flags & IPN_FILTER) == 0) &&
3977 				    (np->in_dport != tcp->th_dport))
3978 					continue;
3979 				if (appr_ok(fin, tcp, np) == 0)
3980 					continue;
3981 			}
3982 
3983 			ATOMIC_INC32(np->in_use);
3984 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3985 			WRITE_ENTER(&ifs->ifs_ipf_nat);
3986 			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3987 			if (nat != NULL) {
3988 				np->in_use--;
3989 				np->in_hits++;
3990 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3991 				break;
3992 			}
3993 			natfailed = -1;
3994 			npnext = np->in_mnext;
3995 			fr_ipnatderef(&np, ifs);
3996 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3997 		}
3998 		if ((np == NULL) && (nmsk != 0)) {
3999 			while (nmsk) {
4000 				msk <<= 1;
4001 				if (nmsk & 0x80000000)
4002 					break;
4003 				nmsk <<= 1;
4004 			}
4005 			if (nmsk != 0) {
4006 				nmsk <<= 1;
4007 				goto maskloop;
4008 			}
4009 		}
4010 	}
4011 
4012 nonatfrag:
4013 	if (nat != NULL) {
4014 		rval = fr_natout(fin, nat, natadd, nflags);
4015 		if (rval == 1) {
4016 			MUTEX_ENTER(&nat->nat_lock);
4017 			nat_update(fin, nat, nat->nat_ptr);
4018 			nat->nat_bytes[1] += fin->fin_plen;
4019 			nat->nat_pkts[1]++;
4020 			fin->fin_pktnum = nat->nat_pkts[1];
4021 			MUTEX_EXIT(&nat->nat_lock);
4022 		}
4023 	} else
4024 		rval = natfailed;
4025 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4026 
4027 	if (rval == -1) {
4028 		if (passp != NULL)
4029 			*passp = FR_BLOCK;
4030 		fin->fin_flx |= FI_BADNAT;
4031 	}
4032 	fin->fin_ifp = sifp;
4033 	return rval;
4034 }
4035 
4036 /* ------------------------------------------------------------------------ */
4037 /* Function:    fr_natout                                                   */
4038 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4039 /*                     1 == packet was successfully translated.             */
4040 /* Parameters:  fin(I)    - pointer to packet information                   */
4041 /*              nat(I)    - pointer to NAT structure                        */
4042 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4043 /*              nflags(I) - NAT flags set for this packet                   */
4044 /*                                                                          */
4045 /* Translate a packet coming "out" on an interface.                         */
4046 /* ------------------------------------------------------------------------ */
4047 int fr_natout(fin, nat, natadd, nflags)
4048 fr_info_t *fin;
4049 nat_t *nat;
4050 int natadd;
4051 u_32_t nflags;
4052 {
4053 	icmphdr_t *icmp;
4054 	u_short *csump;
4055 	u_32_t sumd;
4056 	tcphdr_t *tcp;
4057 	ipnat_t *np;
4058 	int i;
4059 	ipf_stack_t *ifs = fin->fin_ifs;
4060 
4061 	if (fin->fin_v == 6) {
4062 #ifdef	USE_INET6
4063 		return fr_nat6out(fin, nat, natadd, nflags);
4064 #else
4065 		return NULL;
4066 #endif
4067 	}
4068 
4069 #if SOLARIS && defined(_KERNEL)
4070 	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4071 #endif
4072 
4073 	tcp = NULL;
4074 	icmp = NULL;
4075 	csump = NULL;
4076 	np = nat->nat_ptr;
4077 
4078 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4079 		(void) fr_nat_newfrag(fin, 0, nat);
4080 
4081 	/*
4082 	 * Fix up checksums, not by recalculating them, but
4083 	 * simply computing adjustments.
4084 	 * This is only done for STREAMS based IP implementations where the
4085 	 * checksum has already been calculated by IP.  In all other cases,
4086 	 * IPFilter is called before the checksum needs calculating so there
4087 	 * is no call to modify whatever is in the header now.
4088 	 */
4089 	ASSERT(fin->fin_m != NULL);
4090 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4091 		if (nflags == IPN_ICMPERR) {
4092 			u_32_t s1, s2;
4093 
4094 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
4095 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4096 			CALC_SUMD(s1, s2, sumd);
4097 
4098 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4099 		}
4100 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4101     defined(linux) || defined(BRIDGE_IPF)
4102 		else {
4103 			/*
4104 			 * Strictly speaking, this isn't necessary on BSD
4105 			 * kernels because they do checksum calculation after
4106 			 * this code has run BUT if ipfilter is being used
4107 			 * to do NAT as a bridge, that code doesn't exist.
4108 			 */
4109 			if (nat->nat_dir == NAT_OUTBOUND)
4110 				fix_outcksum(&fin->fin_ip->ip_sum,
4111 					    nat->nat_ipsumd);
4112 			else
4113 				fix_incksum(&fin->fin_ip->ip_sum,
4114 				 	   nat->nat_ipsumd);
4115 		}
4116 #endif
4117 	}
4118 
4119 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4120 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4121 			tcp = fin->fin_dp;
4122 
4123 			tcp->th_sport = nat->nat_outport;
4124 			fin->fin_data[0] = ntohs(nat->nat_outport);
4125 		}
4126 
4127 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4128 			icmp = fin->fin_dp;
4129 			icmp->icmp_id = nat->nat_outport;
4130 		}
4131 
4132 		csump = nat_proto(fin, nat, nflags);
4133 	}
4134 
4135 	fin->fin_ip->ip_src = nat->nat_outip;
4136 
4137 	/*
4138 	 * The above comments do not hold for layer 4 (or higher) checksums...
4139 	 */
4140 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4141 		if (nflags & IPN_TCPUDP &&
4142 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4143 			sumd = nat->nat_sumd[1];
4144 		else
4145 			sumd = nat->nat_sumd[0];
4146 
4147 		if (nat->nat_dir == NAT_OUTBOUND)
4148 			fix_outcksum(csump, sumd);
4149 		else
4150 			fix_incksum(csump, sumd);
4151 	}
4152 #ifdef	IPFILTER_SYNC
4153 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4154 #endif
4155 	/* ------------------------------------------------------------- */
4156 	/* A few quick notes:						 */
4157 	/*	Following are test conditions prior to calling the 	 */
4158 	/*	appr_check routine.					 */
4159 	/*								 */
4160 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4161 	/*	with a redirect rule, we attempt to match the packet's	 */
4162 	/*	source port against in_dport, otherwise	we'd compare the */
4163 	/*	packet's destination.			 		 */
4164 	/* ------------------------------------------------------------- */
4165 	if ((np != NULL) && (np->in_apr != NULL)) {
4166 		i = appr_check(fin, nat);
4167 		if (i == 0)
4168 			i = 1;
4169 	} else
4170 		i = 1;
4171 	ifs->ifs_nat_stats.ns_mapped[1]++;
4172 	fin->fin_flx |= FI_NATED;
4173 	return i;
4174 }
4175 
4176 
4177 /* ------------------------------------------------------------------------ */
4178 /* Function:    fr_checknatin                                               */
4179 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4180 /*                     0 == no packet translation occurred,                 */
4181 /*                     1 == packet was successfully translated.             */
4182 /* Parameters:  fin(I)   - pointer to packet information                    */
4183 /*              passp(I) - pointer to filtering result flags                */
4184 /*                                                                          */
4185 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4186 /* first checked to see if they match an existing entry (if an error),      */
4187 /* otherwise a search of the current NAT table is made.  If neither results */
4188 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4189 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4190 /* packet header(s) as required.                                            */
4191 /* ------------------------------------------------------------------------ */
4192 int fr_checknatin(fin, passp)
4193 fr_info_t *fin;
4194 u_32_t *passp;
4195 {
4196 	u_int nflags, natadd;
4197 	ipnat_t *np, *npnext;
4198 	int rval, natfailed;
4199 	struct ifnet *ifp;
4200 	struct in_addr in;
4201 	icmphdr_t *icmp;
4202 	tcphdr_t *tcp;
4203 	u_short dport;
4204 	nat_t *nat;
4205 	u_32_t iph;
4206 	ipf_stack_t *ifs = fin->fin_ifs;
4207 
4208 	if (ifs->ifs_fr_nat_lock != 0)
4209 		return 0;
4210 	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
4211 		return 0;
4212 
4213 	tcp = NULL;
4214 	icmp = NULL;
4215 	dport = 0;
4216 	natadd = 1;
4217 	nflags = 0;
4218 	natfailed = 0;
4219 	ifp = fin->fin_ifp;
4220 
4221 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4222 		switch (fin->fin_p)
4223 		{
4224 		case IPPROTO_TCP :
4225 			nflags = IPN_TCP;
4226 			break;
4227 		case IPPROTO_UDP :
4228 			nflags = IPN_UDP;
4229 			break;
4230 		case IPPROTO_ICMP :
4231 			icmp = fin->fin_dp;
4232 
4233 			/*
4234 			 * This is an incoming packet, so the destination is
4235 			 * the icmp_id and the source port equals 0
4236 			 */
4237 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4238 				nflags = IPN_ICMPQUERY;
4239 				dport = icmp->icmp_id;
4240 			} break;
4241 		default :
4242 			break;
4243 		}
4244 
4245 		if ((nflags & IPN_TCPUDP)) {
4246 			tcp = fin->fin_dp;
4247 			dport = tcp->th_dport;
4248 		}
4249 	}
4250 
4251 	in = fin->fin_dst;
4252 
4253 	READ_ENTER(&ifs->ifs_ipf_nat);
4254 
4255 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4256 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4257 		/*EMPTY*/;
4258 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4259 		natadd = 0;
4260 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4261 				     fin->fin_src, in))) {
4262 		nflags = nat->nat_flags;
4263 	} else {
4264 		u_32_t hv, msk, rmsk;
4265 
4266 		/*
4267 		 * There is no current entry in the nat table for this packet.
4268 		 *
4269 		 * If the packet is a fragment, but not the first fragment,
4270 		 * then don't do anything.  Otherwise, if there is a matching
4271 		 * nat rule, try to create a new nat entry.
4272 		 */
4273 		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
4274 			goto nonatfrag;
4275 
4276 		rmsk = ifs->ifs_rdr_masks;
4277 		msk = 0xffffffff;
4278 maskloop:
4279 		iph = in.s_addr & htonl(msk);
4280 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4281 		for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4282 			npnext = np->in_rnext;
4283 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4284 				continue;
4285 			if (np->in_v != fin->fin_v)
4286 				continue;
4287 			if (np->in_p && (np->in_p != fin->fin_p))
4288 				continue;
4289 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4290 				continue;
4291 			if (np->in_flags & IPN_FILTER) {
4292 				if (!nat_match(fin, np))
4293 					continue;
4294 			} else {
4295 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4296 					continue;
4297 				if (np->in_pmin &&
4298 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4299 				     (ntohs(dport) < ntohs(np->in_pmin))))
4300 					continue;
4301 			}
4302 
4303 			if (*np->in_plabel != '\0') {
4304 				if (!appr_ok(fin, tcp, np)) {
4305 					continue;
4306 				}
4307 			}
4308 
4309 			ATOMIC_INC32(np->in_use);
4310 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4311 			WRITE_ENTER(&ifs->ifs_ipf_nat);
4312 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4313 			if (nat != NULL) {
4314 				np->in_use--;
4315 				np->in_hits++;
4316 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4317 				break;
4318 			}
4319 			natfailed = -1;
4320 			npnext = np->in_rnext;
4321 			fr_ipnatderef(&np, ifs);
4322 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4323 		}
4324 
4325 		if ((np == NULL) && (rmsk != 0)) {
4326 			while (rmsk) {
4327 				msk <<= 1;
4328 				if (rmsk & 0x80000000)
4329 					break;
4330 				rmsk <<= 1;
4331 			}
4332 			if (rmsk != 0) {
4333 				rmsk <<= 1;
4334 				goto maskloop;
4335 			}
4336 		}
4337 	}
4338 
4339 nonatfrag:
4340 	if (nat != NULL) {
4341 		rval = fr_natin(fin, nat, natadd, nflags);
4342 		if (rval == 1) {
4343 			MUTEX_ENTER(&nat->nat_lock);
4344 			nat_update(fin, nat, nat->nat_ptr);
4345 			nat->nat_bytes[0] += fin->fin_plen;
4346 			nat->nat_pkts[0]++;
4347 			fin->fin_pktnum = nat->nat_pkts[0];
4348 			MUTEX_EXIT(&nat->nat_lock);
4349 		}
4350 	} else
4351 		rval = natfailed;
4352 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4353 
4354 	if (rval == -1) {
4355 		if (passp != NULL)
4356 			*passp = FR_BLOCK;
4357 		fin->fin_flx |= FI_BADNAT;
4358 	}
4359 	return rval;
4360 }
4361 
4362 
4363 /* ------------------------------------------------------------------------ */
4364 /* Function:    fr_natin                                                    */
4365 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4366 /*                     1 == packet was successfully translated.             */
4367 /* Parameters:  fin(I)    - pointer to packet information                   */
4368 /*              nat(I)    - pointer to NAT structure                        */
4369 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4370 /*              nflags(I) - NAT flags set for this packet                   */
4371 /* Locks Held:  ipf_nat (READ)                                              */
4372 /*                                                                          */
4373 /* Translate a packet coming "in" on an interface.                          */
4374 /* ------------------------------------------------------------------------ */
4375 int fr_natin(fin, nat, natadd, nflags)
4376 fr_info_t *fin;
4377 nat_t *nat;
4378 int natadd;
4379 u_32_t nflags;
4380 {
4381 	icmphdr_t *icmp;
4382 	u_short *csump;
4383 	tcphdr_t *tcp;
4384 	ipnat_t *np;
4385 	int i;
4386 	ipf_stack_t *ifs = fin->fin_ifs;
4387 
4388 	if (fin->fin_v == 6) {
4389 #ifdef	USE_INET6
4390 		return fr_nat6in(fin, nat, natadd, nflags);
4391 #else
4392 		return NULL;
4393 #endif
4394 	}
4395 
4396 #if SOLARIS && defined(_KERNEL)
4397 	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4398 #endif
4399 
4400 	tcp = NULL;
4401 	csump = NULL;
4402 	np = nat->nat_ptr;
4403 	fin->fin_fr = nat->nat_fr;
4404 
4405 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4406 		(void) fr_nat_newfrag(fin, 0, nat);
4407 
4408 	if (np != NULL) {
4409 
4410 	/* ------------------------------------------------------------- */
4411 	/* A few quick notes:						 */
4412 	/*	Following are test conditions prior to calling the 	 */
4413 	/*	appr_check routine.					 */
4414 	/*								 */
4415 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4416 	/*	with a map rule, we attempt to match the packet's	 */
4417 	/*	source port against in_dport, otherwise	we'd compare the */
4418 	/*	packet's destination.			 		 */
4419 	/* ------------------------------------------------------------- */
4420 		if (np->in_apr != NULL) {
4421 			i = appr_check(fin, nat);
4422 			if (i == -1) {
4423 				return -1;
4424 			}
4425 		}
4426 	}
4427 
4428 #ifdef	IPFILTER_SYNC
4429 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4430 #endif
4431 
4432 	fin->fin_ip->ip_dst = nat->nat_inip;
4433 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4434 	if (nflags & IPN_TCPUDP)
4435 		tcp = fin->fin_dp;
4436 
4437 	/*
4438 	 * Fix up checksums, not by recalculating them, but
4439 	 * simply computing adjustments.
4440 	 * Why only do this for some platforms on inbound packets ?
4441 	 * Because for those that it is done, IP processing is yet to happen
4442 	 * and so the IPv4 header checksum has not yet been evaluated.
4443 	 * Perhaps it should always be done for the benefit of things like
4444 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4445 	 * header checksum offloading, perhaps it is a moot point.
4446 	 */
4447 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4448      defined(__osf__) || defined(linux)
4449 	if (nat->nat_dir == NAT_OUTBOUND)
4450 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4451 	else
4452 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4453 #endif
4454 
4455 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4456 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4457 			tcp->th_dport = nat->nat_inport;
4458 			fin->fin_data[1] = ntohs(nat->nat_inport);
4459 		}
4460 
4461 
4462 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4463 			icmp = fin->fin_dp;
4464 
4465 			icmp->icmp_id = nat->nat_inport;
4466 		}
4467 
4468 		csump = nat_proto(fin, nat, nflags);
4469 	}
4470 
4471 	/*
4472 	 * In case they are being forwarded, inbound packets always need to have
4473 	 * their checksum adjusted even if hardware checksum validation said OK.
4474 	 */
4475 	if (csump != NULL) {
4476 		if (nat->nat_dir == NAT_OUTBOUND)
4477 			fix_incksum(csump, nat->nat_sumd[0]);
4478 		else
4479 			fix_outcksum(csump, nat->nat_sumd[0]);
4480 	}
4481 
4482 #if SOLARIS && defined(_KERNEL)
4483 	if (nflags & IPN_TCPUDP &&
4484 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4485 		/*
4486 		 * Need to adjust the partial checksum result stored in
4487 		 * db_cksum16, which will be used for validation in IP.
4488 		 * See IP_CKSUM_RECV().
4489 		 * Adjustment data should be the inverse of the IP address
4490 		 * changes, because db_cksum16 is supposed to be the complement
4491 		 * of the pesudo header.
4492 		 */
4493 		csump = &fin->fin_m->b_datap->db_cksum16;
4494 		if (nat->nat_dir == NAT_OUTBOUND)
4495 			fix_outcksum(csump, nat->nat_sumd[1]);
4496 		else
4497 			fix_incksum(csump, nat->nat_sumd[1]);
4498 	}
4499 #endif
4500 
4501 	ifs->ifs_nat_stats.ns_mapped[0]++;
4502 	fin->fin_flx |= FI_NATED;
4503 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4504 		fin->fin_nattag = &np->in_tag;
4505 	return 1;
4506 }
4507 
4508 
4509 /* ------------------------------------------------------------------------ */
4510 /* Function:    nat_proto                                                   */
4511 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4512 /*                         NULL if the transport protocol is not recognised */
4513 /*                         as needing a checksum update.                    */
4514 /* Parameters:  fin(I)    - pointer to packet information                   */
4515 /*              nat(I)    - pointer to NAT structure                        */
4516 /*              nflags(I) - NAT flags set for this packet                   */
4517 /*                                                                          */
4518 /* Return the pointer to the checksum field for each protocol so understood.*/
4519 /* If support for making other changes to a protocol header is required,    */
4520 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4521 /* TCP down to a specific value, then do it from here.                      */
4522 /* ------------------------------------------------------------------------ */
4523 u_short *nat_proto(fin, nat, nflags)
4524 fr_info_t *fin;
4525 nat_t *nat;
4526 u_int nflags;
4527 {
4528 	icmphdr_t *icmp;
4529 	struct icmp6_hdr *icmp6;
4530 	u_short *csump;
4531 	tcphdr_t *tcp;
4532 	udphdr_t *udp;
4533 
4534 	csump = NULL;
4535 	if (fin->fin_out == 0) {
4536 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4537 	} else {
4538 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4539 	}
4540 
4541 	switch (fin->fin_p)
4542 	{
4543 	case IPPROTO_TCP :
4544 		tcp = fin->fin_dp;
4545 
4546 		csump = &tcp->th_sum;
4547 
4548 		/*
4549 		 * Do a MSS CLAMPING on a SYN packet,
4550 		 * only deal IPv4 for now.
4551 		 */
4552 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4553 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4554 
4555 		break;
4556 
4557 	case IPPROTO_UDP :
4558 		udp = fin->fin_dp;
4559 
4560 		if (udp->uh_sum)
4561 			csump = &udp->uh_sum;
4562 		break;
4563 
4564 	case IPPROTO_ICMP :
4565 		icmp = fin->fin_dp;
4566 
4567 		if ((nflags & IPN_ICMPQUERY) != 0) {
4568 			if (icmp->icmp_cksum != 0)
4569 				csump = &icmp->icmp_cksum;
4570 		}
4571 		break;
4572 
4573 	case IPPROTO_ICMPV6 :
4574 		icmp6 = fin->fin_dp;
4575 
4576 		if ((nflags & IPN_ICMPQUERY) != 0) {
4577 			if (icmp6->icmp6_cksum != 0)
4578 				csump = &icmp6->icmp6_cksum;
4579 		}
4580 		break;
4581 	}
4582 	return csump;
4583 }
4584 
4585 
4586 /* ------------------------------------------------------------------------ */
4587 /* Function:    fr_natunload                                                */
4588 /* Returns:     Nil                                                         */
4589 /* Parameters:  ifs - ipf stack instance                                  */
4590 /*                                                                          */
4591 /* Free all memory used by NAT structures allocated at runtime.             */
4592 /* ------------------------------------------------------------------------ */
4593 void fr_natunload(ifs)
4594 ipf_stack_t *ifs;
4595 {
4596 	ipftq_t *ifq, *ifqnext;
4597 
4598 	(void) nat_clearlist(ifs);
4599 	(void) nat_flushtable(FLUSH_TABLE_ALL, ifs);
4600 
4601 	/*
4602 	 * Proxy timeout queues are not cleaned here because although they
4603 	 * exist on the NAT list, appr_unload is called after fr_natunload
4604 	 * and the proxies actually are responsible for them being created.
4605 	 * Should the proxy timeouts have their own list?  There's no real
4606 	 * justification as this is the only complication.
4607 	 */
4608 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4609 		ifqnext = ifq->ifq_next;
4610 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4611 		    (fr_deletetimeoutqueue(ifq) == 0))
4612 			fr_freetimeoutqueue(ifq, ifs);
4613 	}
4614 
4615 	if (ifs->ifs_nat_table[0] != NULL) {
4616 		KFREES(ifs->ifs_nat_table[0],
4617 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4618 		ifs->ifs_nat_table[0] = NULL;
4619 	}
4620 	if (ifs->ifs_nat_table[1] != NULL) {
4621 		KFREES(ifs->ifs_nat_table[1],
4622 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4623 		ifs->ifs_nat_table[1] = NULL;
4624 	}
4625 	if (ifs->ifs_nat_rules != NULL) {
4626 		KFREES(ifs->ifs_nat_rules,
4627 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4628 		ifs->ifs_nat_rules = NULL;
4629 	}
4630 	if (ifs->ifs_rdr_rules != NULL) {
4631 		KFREES(ifs->ifs_rdr_rules,
4632 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4633 		ifs->ifs_rdr_rules = NULL;
4634 	}
4635 	if (ifs->ifs_maptable != NULL) {
4636 		KFREES(ifs->ifs_maptable,
4637 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4638 		ifs->ifs_maptable = NULL;
4639 	}
4640 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4641 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4642 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4643 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4644 	}
4645 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4646 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4647 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4648 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4649 	}
4650 
4651 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4652 		ifs->ifs_fr_nat_maxbucket = 0;
4653 
4654 	if (ifs->ifs_fr_nat_init == 1) {
4655 		ifs->ifs_fr_nat_init = 0;
4656 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4657 
4658 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4659 		RW_DESTROY(&ifs->ifs_ipf_nat);
4660 
4661 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4662 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4663 
4664 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4665 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4666 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4667 	}
4668 }
4669 
4670 
4671 /* ------------------------------------------------------------------------ */
4672 /* Function:    fr_natexpire                                                */
4673 /* Returns:     Nil                                                         */
4674 /* Parameters:  ifs - ipf stack instance                                    */
4675 /*                                                                          */
4676 /* Check all of the timeout queues for entries at the top which need to be  */
4677 /* expired.                                                                 */
4678 /* ------------------------------------------------------------------------ */
4679 void fr_natexpire(ifs)
4680 ipf_stack_t *ifs;
4681 {
4682 	ipftq_t *ifq, *ifqnext;
4683 	ipftqent_t *tqe, *tqn;
4684 	int i;
4685 	SPL_INT(s);
4686 
4687 	SPL_NET(s);
4688 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4689 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4690 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4691 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4692 				break;
4693 			tqn = tqe->tqe_next;
4694 			(void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4695 		}
4696 	}
4697 
4698 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4699 		ifqnext = ifq->ifq_next;
4700 
4701 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4702 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4703 				break;
4704 			tqn = tqe->tqe_next;
4705 			(void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4706 		}
4707 	}
4708 
4709 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4710 		ifqnext = ifq->ifq_next;
4711 
4712 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4713 		    (ifq->ifq_ref == 0)) {
4714 			fr_freetimeoutqueue(ifq, ifs);
4715 		}
4716 	}
4717 
4718 	if (ifs->ifs_nat_doflush != 0) {
4719 		(void) nat_flushtable(FLUSH_TABLE_EXTRA, ifs);
4720 		ifs->ifs_nat_doflush = 0;
4721 	}
4722 
4723 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4724 	SPL_X(s);
4725 }
4726 
4727 
4728 /* ------------------------------------------------------------------------ */
4729 /* Function:    fr_nataddrsync                                              */
4730 /* Returns:     Nil                                                         */
4731 /* Parameters:  ifp(I) -  pointer to network interface                      */
4732 /*              addr(I) - pointer to new network address                    */
4733 /*                                                                          */
4734 /* Walk through all of the currently active NAT sessions, looking for those */
4735 /* which need to have their translated address updated (where the interface */
4736 /* matches the one passed in) and change it, recalculating the checksum sum */
4737 /* difference too.                                                          */
4738 /* ------------------------------------------------------------------------ */
4739 void fr_nataddrsync(v, ifp, addr, ifs)
4740 int v;
4741 void *ifp;
4742 void *addr;
4743 ipf_stack_t *ifs;
4744 {
4745 	u_32_t sum1, sum2, sumd;
4746 	nat_t *nat;
4747 	ipnat_t *np;
4748 	SPL_INT(s);
4749 
4750 	if (ifs->ifs_fr_running <= 0)
4751 		return;
4752 
4753 	SPL_NET(s);
4754 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4755 
4756 	if (ifs->ifs_fr_running <= 0) {
4757 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4758 		return;
4759 	}
4760 
4761 	/*
4762 	 * Change IP addresses for NAT sessions for any protocol except TCP
4763 	 * since it will break the TCP connection anyway.  The only rules
4764 	 * which will get changed are those which are "map ... -> 0/32",
4765 	 * where the rule specifies the address is taken from the interface.
4766 	 */
4767 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4768 		if (addr != NULL) {
4769 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4770 			    ((nat->nat_flags & IPN_TCP) != 0))
4771 				continue;
4772 			if ((np = nat->nat_ptr) == NULL)
4773 				continue;
4774 			if (v == 4 && np->in_v == 4) {
4775 				if (np->in_nip || np->in_outmsk != 0xffffffff)
4776 					continue;
4777 				/*
4778 				 * Change the map-to address to be the same as
4779 				 * the new one.
4780 				 */
4781 				sum1 = nat->nat_outip.s_addr;
4782 				nat->nat_outip = *(struct in_addr *)addr;
4783 				sum2 = nat->nat_outip.s_addr;
4784 			} else if (v == 6 && np->in_v == 6) {
4785 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4786 				    !IP6_ISONES(&np->in_out[1].in6))
4787 					continue;
4788 				/*
4789 				 * Change the map-to address to be the same as
4790 				 * the new one.
4791 				 */
4792 				nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4793 			} else
4794 				continue;
4795 
4796 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4797 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4798 			if (np->in_v == 4 && (v == 4 || v == 0)) {
4799 				struct in_addr in;
4800 				if (np->in_outmsk != 0xffffffff || np->in_nip)
4801 					continue;
4802 				/*
4803 				 * Change the map-to address to be the same as
4804 				 * the new one.
4805 				 */
4806 				sum1 = nat->nat_outip.s_addr;
4807 				if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4808 					       &in, NULL, ifs) != -1)
4809 					nat->nat_outip = in;
4810 				sum2 = nat->nat_outip.s_addr;
4811 			} else if (np->in_v == 6 && (v == 6 || v == 0)) {
4812 				struct in6_addr in6;
4813 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4814 				    !IP6_ISONES(&np->in_out[1].in6))
4815 					continue;
4816 				/*
4817 				 * Change the map-to address to be the same as
4818 				 * the new one.
4819 				 */
4820 				if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4821 					       (void *)&in6, NULL, ifs) != -1)
4822 					nat->nat_outip6.in6 = in6;
4823 			} else
4824 				continue;
4825 		} else {
4826 			continue;
4827 		}
4828 
4829 		if (sum1 == sum2)
4830 			continue;
4831 		/*
4832 		 * Readjust the checksum adjustment to take into
4833 		 * account the new IP#.
4834 		 */
4835 		CALC_SUMD(sum1, sum2, sumd);
4836 		/* XXX - dont change for TCP when solaris does
4837 		 * hardware checksumming.
4838 		 */
4839 		sumd += nat->nat_sumd[0];
4840 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4841 		nat->nat_sumd[1] = nat->nat_sumd[0];
4842 	}
4843 
4844 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4845 	SPL_X(s);
4846 }
4847 
4848 
4849 /* ------------------------------------------------------------------------ */
4850 /* Function:    fr_natifpsync                                               */
4851 /* Returns:     Nil                                                         */
4852 /* Parameters:  action(I) - how we are syncing                              */
4853 /*              ifp(I)    - pointer to network interface                    */
4854 /*              name(I)   - name of interface to sync to                    */
4855 /*                                                                          */
4856 /* This function is used to resync the mapping of interface names and their */
4857 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4858 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4859 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4860 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4861 /* there is no longer any interface associated with it.                     */
4862 /* ------------------------------------------------------------------------ */
4863 void fr_natifpsync(action, v, ifp, name, ifs)
4864 int action, v;
4865 void *ifp;
4866 char *name;
4867 ipf_stack_t *ifs;
4868 {
4869 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4870 	int s;
4871 #endif
4872 	nat_t *nat;
4873 	ipnat_t *n;
4874 	int nv;
4875 
4876 	if (ifs->ifs_fr_running <= 0)
4877 		return;
4878 
4879 	SPL_NET(s);
4880 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4881 
4882 	if (ifs->ifs_fr_running <= 0) {
4883 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4884 		return;
4885 	}
4886 
4887 	switch (action)
4888 	{
4889 	case IPFSYNC_RESYNC :
4890 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4891 			nv = (v == 0) ? nat->nat_v : v;
4892 			if (nat->nat_v != nv)
4893 				continue;
4894 			if ((ifp == nat->nat_ifps[0]) ||
4895 			    (nat->nat_ifps[0] == (void *)-1)) {
4896 				nat->nat_ifps[0] =
4897 				    fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4898 			}
4899 
4900 			if ((ifp == nat->nat_ifps[1]) ||
4901 			    (nat->nat_ifps[1] == (void *)-1)) {
4902 				nat->nat_ifps[1] =
4903 				    fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4904 			}
4905 		}
4906 
4907 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4908 			nv = (v == 0) ? (int)n->in_v : v;
4909 			if ((int)n->in_v != nv)
4910 				continue;
4911 			if (n->in_ifps[0] == ifp ||
4912 			    n->in_ifps[0] == (void *)-1) {
4913 				n->in_ifps[0] =
4914 				    fr_resolvenic(n->in_ifnames[0], nv, ifs);
4915 			}
4916 			if (n->in_ifps[1] == ifp ||
4917 			    n->in_ifps[1] == (void *)-1) {
4918 				n->in_ifps[1] =
4919 				    fr_resolvenic(n->in_ifnames[1], nv, ifs);
4920 			}
4921 		}
4922 		break;
4923 	case IPFSYNC_NEWIFP :
4924 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4925 			if (nat->nat_v != v)
4926 				continue;
4927 			if (!strncmp(name, nat->nat_ifnames[0],
4928 				     sizeof(nat->nat_ifnames[0])))
4929 				nat->nat_ifps[0] = ifp;
4930 			if (!strncmp(name, nat->nat_ifnames[1],
4931 				     sizeof(nat->nat_ifnames[1])))
4932 				nat->nat_ifps[1] = ifp;
4933 		}
4934 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4935 			if ((int)n->in_v != v)
4936 				continue;
4937 			if (!strncmp(name, n->in_ifnames[0],
4938 				     sizeof(n->in_ifnames[0])))
4939 				n->in_ifps[0] = ifp;
4940 			if (!strncmp(name, n->in_ifnames[1],
4941 				     sizeof(n->in_ifnames[1])))
4942 				n->in_ifps[1] = ifp;
4943 		}
4944 		break;
4945 	case IPFSYNC_OLDIFP :
4946 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4947 			if (nat->nat_v != v)
4948 				continue;
4949 			if (ifp == nat->nat_ifps[0])
4950 				nat->nat_ifps[0] = (void *)-1;
4951 			if (ifp == nat->nat_ifps[1])
4952 				nat->nat_ifps[1] = (void *)-1;
4953 		}
4954 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4955 			if ((int)n->in_v != v)
4956 				continue;
4957 			if (n->in_ifps[0] == ifp)
4958 				n->in_ifps[0] = (void *)-1;
4959 			if (n->in_ifps[1] == ifp)
4960 				n->in_ifps[1] = (void *)-1;
4961 		}
4962 		break;
4963 	}
4964 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4965 	SPL_X(s);
4966 }
4967 
4968 
4969 /* ------------------------------------------------------------------------ */
4970 /* Function:    nat_icmpquerytype4                                          */
4971 /* Returns:     int - 1 == success, 0 == failure                            */
4972 /* Parameters:  icmptype(I) - ICMP type number                              */
4973 /*                                                                          */
4974 /* Tests to see if the ICMP type number passed is a query/response type or  */
4975 /* not.                                                                     */
4976 /* ------------------------------------------------------------------------ */
4977 static INLINE int nat_icmpquerytype4(icmptype)
4978 int icmptype;
4979 {
4980 
4981 	/*
4982 	 * For the ICMP query NAT code, it is essential that both the query
4983 	 * and the reply match on the NAT rule. Because the NAT structure
4984 	 * does not keep track of the icmptype, and a single NAT structure
4985 	 * is used for all icmp types with the same src, dest and id, we
4986 	 * simply define the replies as queries as well. The funny thing is,
4987 	 * altough it seems silly to call a reply a query, this is exactly
4988 	 * as it is defined in the IPv4 specification
4989 	 */
4990 
4991 	switch (icmptype)
4992 	{
4993 
4994 	case ICMP_ECHOREPLY:
4995 	case ICMP_ECHO:
4996 	/* route aedvertisement/solliciation is currently unsupported: */
4997 	/* it would require rewriting the ICMP data section            */
4998 	case ICMP_TSTAMP:
4999 	case ICMP_TSTAMPREPLY:
5000 	case ICMP_IREQ:
5001 	case ICMP_IREQREPLY:
5002 	case ICMP_MASKREQ:
5003 	case ICMP_MASKREPLY:
5004 		return 1;
5005 	default:
5006 		return 0;
5007 	}
5008 }
5009 
5010 
5011 /* ------------------------------------------------------------------------ */
5012 /* Function:    nat_log                                                     */
5013 /* Returns:     Nil                                                         */
5014 /* Parameters:  nat(I)  - pointer to NAT structure                          */
5015 /*              type(I) - type of log entry to create                       */
5016 /*                                                                          */
5017 /* Creates a NAT log entry.                                                 */
5018 /* ------------------------------------------------------------------------ */
5019 void nat_log(nat, type, ifs)
5020 struct nat *nat;
5021 u_int type;
5022 ipf_stack_t *ifs;
5023 {
5024 #ifdef	IPFILTER_LOG
5025 # ifndef LARGE_NAT
5026 	struct ipnat *np;
5027 	int rulen;
5028 # endif
5029 	struct natlog natl;
5030 	void *items[1];
5031 	size_t sizes[1];
5032 	int types[1];
5033 
5034 	natl.nlg_inip = nat->nat_inip6;
5035 	natl.nlg_outip = nat->nat_outip6;
5036 	natl.nlg_origip = nat->nat_oip6;
5037 	natl.nlg_bytes[0] = nat->nat_bytes[0];
5038 	natl.nlg_bytes[1] = nat->nat_bytes[1];
5039 	natl.nlg_pkts[0] = nat->nat_pkts[0];
5040 	natl.nlg_pkts[1] = nat->nat_pkts[1];
5041 	natl.nlg_origport = nat->nat_oport;
5042 	natl.nlg_inport = nat->nat_inport;
5043 	natl.nlg_outport = nat->nat_outport;
5044 	natl.nlg_p = nat->nat_p;
5045 	natl.nlg_type = type;
5046 	natl.nlg_rule = -1;
5047 	natl.nlg_v = nat->nat_v;
5048 # ifndef LARGE_NAT
5049 	if (nat->nat_ptr != NULL) {
5050 		for (rulen = 0, np = ifs->ifs_nat_list; np;
5051 		     np = np->in_next, rulen++)
5052 			if (np == nat->nat_ptr) {
5053 				natl.nlg_rule = rulen;
5054 				break;
5055 			}
5056 	}
5057 # endif
5058 	items[0] = &natl;
5059 	sizes[0] = sizeof(natl);
5060 	types[0] = 0;
5061 
5062 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5063 #endif
5064 }
5065 
5066 
5067 #if defined(__OpenBSD__)
5068 /* ------------------------------------------------------------------------ */
5069 /* Function:    nat_ifdetach                                                */
5070 /* Returns:     Nil                                                         */
5071 /* Parameters:  ifp(I) - pointer to network interface                       */
5072 /*                                                                          */
5073 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
5074 /* interface references within IPFilter.                                    */
5075 /* ------------------------------------------------------------------------ */
5076 void nat_ifdetach(ifp, ifs)
5077 void *ifp;
5078 ipf_stack_t *ifs;
5079 {
5080 	frsync(ifp, ifs);
5081 	return;
5082 }
5083 #endif
5084 
5085 
5086 /* ------------------------------------------------------------------------ */
5087 /* Function:    fr_ipnatderef                                               */
5088 /* Returns:     Nil                                                         */
5089 /* Parameters:  inp(I) - pointer to pointer to NAT rule                     */
5090 /* Write Locks: ipf_nat                                                     */
5091 /*                                                                          */
5092 /* ------------------------------------------------------------------------ */
5093 void fr_ipnatderef(inp, ifs)
5094 ipnat_t **inp;
5095 ipf_stack_t *ifs;
5096 {
5097 	ipnat_t *in;
5098 
5099 	in = *inp;
5100 	*inp = NULL;
5101 	in->in_use--;
5102 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5103 		if (in->in_apr)
5104 			appr_free(in->in_apr);
5105 		KFREE(in);
5106 		ifs->ifs_nat_stats.ns_rules--;
5107 #ifdef notdef
5108 #if SOLARIS
5109 		if (ifs->ifs_nat_stats.ns_rules == 0)
5110 			ifs->ifs_pfil_delayed_copy = 1;
5111 #endif
5112 #endif
5113 	}
5114 }
5115 
5116 
5117 /* ------------------------------------------------------------------------ */
5118 /* Function:    fr_natderef                                                 */
5119 /* Returns:     Nil                                                         */
5120 /* Parameters:  natp - pointer to pointer to NAT table entry                */
5121 /*              ifs  - ipf stack instance                                   */
5122 /*                                                                          */
5123 /* Decrement the reference counter for this NAT table entry and free it if  */
5124 /* there are no more things using it.                                       */
5125 /*                                                                          */
5126 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5127 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
5128 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
5129 /* because nat_delete() will do that and send nat_ref to -1.                */
5130 /*                                                                          */
5131 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
5132 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
5133 /* ------------------------------------------------------------------------ */
5134 void fr_natderef(natp, ifs)
5135 nat_t **natp;
5136 ipf_stack_t *ifs;
5137 {
5138 	nat_t *nat;
5139 
5140 	nat = *natp;
5141 	*natp = NULL;
5142 
5143 	MUTEX_ENTER(&nat->nat_lock);
5144 	if (nat->nat_ref > 1) {
5145 		nat->nat_ref--;
5146 		MUTEX_EXIT(&nat->nat_lock);
5147 		return;
5148 	}
5149 	MUTEX_EXIT(&nat->nat_lock);
5150 
5151 	WRITE_ENTER(&ifs->ifs_ipf_nat);
5152 	(void) nat_delete(nat, NL_EXPIRE, ifs);
5153 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5154 }
5155 
5156 
5157 /* ------------------------------------------------------------------------ */
5158 /* Function:    fr_natclone                                                 */
5159 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
5160 /*                           else pointer to new NAT structure              */
5161 /* Parameters:  fin(I)   - pointer to packet information                    */
5162 /*              nat(I)   - pointer to master NAT structure                  */
5163 /* Write Lock:  ipf_nat                                                     */
5164 /*                                                                          */
5165 /* Create a "duplicate" NAT table entry from the master.                    */
5166 /* ------------------------------------------------------------------------ */
5167 nat_t *fr_natclone(fin, nat)
5168 fr_info_t *fin;
5169 nat_t *nat;
5170 {
5171 	frentry_t *fr;
5172 	nat_t *clone;
5173 	ipnat_t *np;
5174 	ipf_stack_t *ifs = fin->fin_ifs;
5175 
5176 	/*
5177 	 * Trigger automatic call to nat_flushtable() if the
5178 	 * table has reached capcity specified by hi watermark.
5179 	 */
5180 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
5181 		ifs->ifs_nat_doflush = 1;
5182 
5183 	/*
5184 	 * If automatic flushing did not do its job, and the table
5185 	 * has filled up, don't try to create a new entry.
5186 	 */
5187 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
5188 		ifs->ifs_nat_stats.ns_memfail++;
5189 		return NULL;
5190 	}
5191 
5192 	KMALLOC(clone, nat_t *);
5193 	if (clone == NULL)
5194 		return NULL;
5195 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
5196 
5197 	MUTEX_NUKE(&clone->nat_lock);
5198 
5199 	clone->nat_aps = NULL;
5200 	/*
5201 	 * Initialize all these so that nat_delete() doesn't cause a crash.
5202 	 */
5203 	clone->nat_tqe.tqe_pnext = NULL;
5204 	clone->nat_tqe.tqe_next = NULL;
5205 	clone->nat_tqe.tqe_ifq = NULL;
5206 	clone->nat_tqe.tqe_parent = clone;
5207 
5208 	clone->nat_flags &= ~SI_CLONE;
5209 	clone->nat_flags |= SI_CLONED;
5210 
5211 	if (clone->nat_hm)
5212 		clone->nat_hm->hm_ref++;
5213 
5214 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5215 		KFREE(clone);
5216 		return NULL;
5217 	}
5218 	np = clone->nat_ptr;
5219 	if (np != NULL) {
5220 		if (ifs->ifs_nat_logging)
5221 			nat_log(clone, (u_int)np->in_redir, ifs);
5222 		np->in_use++;
5223 	}
5224 	fr = clone->nat_fr;
5225 	if (fr != NULL) {
5226 		MUTEX_ENTER(&fr->fr_lock);
5227 		fr->fr_ref++;
5228 		MUTEX_EXIT(&fr->fr_lock);
5229 	}
5230 
5231 	/*
5232 	 * Because the clone is created outside the normal loop of things and
5233 	 * TCP has special needs in terms of state, initialise the timeout
5234 	 * state of the new NAT from here.
5235 	 */
5236 	if (clone->nat_p == IPPROTO_TCP) {
5237 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5238 				  clone->nat_flags);
5239 	}
5240 #ifdef	IPFILTER_SYNC
5241 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5242 #endif
5243 	if (ifs->ifs_nat_logging)
5244 		nat_log(clone, NL_CLONE, ifs);
5245 	return clone;
5246 }
5247 
5248 
5249 /* ------------------------------------------------------------------------ */
5250 /* Function:   nat_wildok                                                   */
5251 /* Returns:    int - 1 == packet's ports match wildcards                    */
5252 /*                   0 == packet's ports don't match wildcards              */
5253 /* Parameters: nat(I)   - NAT entry                                         */
5254 /*             sport(I) - source port                                       */
5255 /*             dport(I) - destination port                                  */
5256 /*             flags(I) - wildcard flags                                    */
5257 /*             dir(I)   - packet direction                                  */
5258 /*                                                                          */
5259 /* Use NAT entry and packet direction to determine which combination of     */
5260 /* wildcard flags should be used.                                           */
5261 /* ------------------------------------------------------------------------ */
5262 int nat_wildok(nat, sport, dport, flags, dir)
5263 nat_t *nat;
5264 int sport;
5265 int dport;
5266 int flags;
5267 int dir;
5268 {
5269 	/*
5270 	 * When called by       dir is set to
5271 	 * nat_inlookup         NAT_INBOUND (0)
5272 	 * nat_outlookup        NAT_OUTBOUND (1)
5273 	 *
5274 	 * We simply combine the packet's direction in dir with the original
5275 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5276 	 * which combination of wildcard flags to allow.
5277 	 */
5278 
5279 	switch ((dir << 1) | nat->nat_dir)
5280 	{
5281 	case 3: /* outbound packet / outbound entry */
5282 		if (((nat->nat_inport == sport) ||
5283 		    (flags & SI_W_SPORT)) &&
5284 		    ((nat->nat_oport == dport) ||
5285 		    (flags & SI_W_DPORT)))
5286 			return 1;
5287 		break;
5288 	case 2: /* outbound packet / inbound entry */
5289 		if (((nat->nat_outport == sport) ||
5290 		    (flags & SI_W_DPORT)) &&
5291 		    ((nat->nat_oport == dport) ||
5292 		    (flags & SI_W_SPORT)))
5293 			return 1;
5294 		break;
5295 	case 1: /* inbound packet / outbound entry */
5296 		if (((nat->nat_oport == sport) ||
5297 		    (flags & SI_W_DPORT)) &&
5298 		    ((nat->nat_outport == dport) ||
5299 		    (flags & SI_W_SPORT)))
5300 			return 1;
5301 		break;
5302 	case 0: /* inbound packet / inbound entry */
5303 		if (((nat->nat_oport == sport) ||
5304 		    (flags & SI_W_SPORT)) &&
5305 		    ((nat->nat_outport == dport) ||
5306 		    (flags & SI_W_DPORT)))
5307 			return 1;
5308 		break;
5309 	default:
5310 		break;
5311 	}
5312 
5313 	return(0);
5314 }
5315 
5316 
5317 /* ------------------------------------------------------------------------ */
5318 /* Function:    nat_mssclamp                                                */
5319 /* Returns:     Nil                                                         */
5320 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5321 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5322 /*              csump(I)  - pointer to TCP checksum                         */
5323 /*                                                                          */
5324 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5325 /* then the TCP header checksum will be updated to reflect the change in    */
5326 /* the MSS.                                                                 */
5327 /* ------------------------------------------------------------------------ */
5328 static void nat_mssclamp(tcp, maxmss, csump)
5329 tcphdr_t *tcp;
5330 u_32_t maxmss;
5331 u_short *csump;
5332 {
5333 	u_char *cp, *ep, opt;
5334 	int hlen, advance;
5335 	u_32_t mss, sumd;
5336 
5337 	hlen = TCP_OFF(tcp) << 2;
5338 	if (hlen > sizeof(*tcp)) {
5339 		cp = (u_char *)tcp + sizeof(*tcp);
5340 		ep = (u_char *)tcp + hlen;
5341 
5342 		while (cp < ep) {
5343 			opt = cp[0];
5344 			if (opt == TCPOPT_EOL)
5345 				break;
5346 			else if (opt == TCPOPT_NOP) {
5347 				cp++;
5348 				continue;
5349 			}
5350 
5351 			if (cp + 1 >= ep)
5352 				break;
5353 			advance = cp[1];
5354 			if ((cp + advance > ep) || (advance <= 0))
5355 				break;
5356 			switch (opt)
5357 			{
5358 			case TCPOPT_MAXSEG:
5359 				if (advance != 4)
5360 					break;
5361 				mss = cp[2] * 256 + cp[3];
5362 				if (mss > maxmss) {
5363 					cp[2] = maxmss / 256;
5364 					cp[3] = maxmss & 0xff;
5365 					CALC_SUMD(mss, maxmss, sumd);
5366 					fix_outcksum(csump, sumd);
5367 				}
5368 				break;
5369 			default:
5370 				/* ignore unknown options */
5371 				break;
5372 			}
5373 
5374 			cp += advance;
5375 		}
5376 	}
5377 }
5378 
5379 
5380 /* ------------------------------------------------------------------------ */
5381 /* Function:    fr_setnatqueue                                              */
5382 /* Returns:     Nil                                                         */
5383 /* Parameters:  nat(I)- pointer to NAT structure                            */
5384 /*              rev(I) - forward(0) or reverse(1) direction                 */
5385 /* Locks:       ipf_nat (read or write)                                     */
5386 /*                                                                          */
5387 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5388 /* determining which queue it should be placed on.                          */
5389 /* ------------------------------------------------------------------------ */
5390 void fr_setnatqueue(nat, rev, ifs)
5391 nat_t *nat;
5392 int rev;
5393 ipf_stack_t *ifs;
5394 {
5395 	ipftq_t *oifq, *nifq;
5396 
5397 	if (nat->nat_ptr != NULL)
5398 		nifq = nat->nat_ptr->in_tqehead[rev];
5399 	else
5400 		nifq = NULL;
5401 
5402 	if (nifq == NULL) {
5403 		switch (nat->nat_p)
5404 		{
5405 		case IPPROTO_UDP :
5406 			nifq = &ifs->ifs_nat_udptq;
5407 			break;
5408 		case IPPROTO_ICMP :
5409 			nifq = &ifs->ifs_nat_icmptq;
5410 			break;
5411 		case IPPROTO_TCP :
5412 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5413 			break;
5414 		default :
5415 			nifq = &ifs->ifs_nat_iptq;
5416 			break;
5417 		}
5418 	}
5419 
5420 	oifq = nat->nat_tqe.tqe_ifq;
5421 	/*
5422 	 * If it's currently on a timeout queue, move it from one queue to
5423 	 * another, else put it on the end of the newly determined queue.
5424 	 */
5425 	if (oifq != NULL)
5426 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5427 	else
5428 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5429 	return;
5430 }
5431 
5432 /* ------------------------------------------------------------------------ */
5433 /* Function:    nat_getnext                                                 */
5434 /* Returns:     int - 0 == ok, else error                                   */
5435 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5436 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5437 /*              ifs - ipf stack instance                                    */
5438 /*                                                                          */
5439 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5440 /* and copy it out to the storage space pointed to by itp.  The next item   */
5441 /* in the list to look at is put back in the ipftoken struture.             */
5442 /* ------------------------------------------------------------------------ */
5443 static int nat_getnext(t, itp, ifs)
5444 ipftoken_t *t;
5445 ipfgeniter_t *itp;
5446 ipf_stack_t *ifs;
5447 {
5448 	hostmap_t *hm, *nexthm = NULL, zerohm;
5449 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5450 	nat_t *nat, *nextnat = NULL, zeronat;
5451 	int error = 0, count;
5452 	char *dst;
5453 
5454 	if (itp->igi_nitems == 0)
5455 		return EINVAL;
5456 
5457 	READ_ENTER(&ifs->ifs_ipf_nat);
5458 
5459 	/*
5460 	 * Get "previous" entry from the token and find the next entry.
5461 	 */
5462 	switch (itp->igi_type)
5463 	{
5464 	case IPFGENITER_HOSTMAP :
5465 		hm = t->ipt_data;
5466 		if (hm == NULL) {
5467 			nexthm = ifs->ifs_ipf_hm_maplist;
5468 		} else {
5469 			nexthm = hm->hm_next;
5470 		}
5471 		break;
5472 
5473 	case IPFGENITER_IPNAT :
5474 		ipn = t->ipt_data;
5475 		if (ipn == NULL) {
5476 			nextipnat = ifs->ifs_nat_list;
5477 		} else {
5478 			nextipnat = ipn->in_next;
5479 		}
5480 		break;
5481 
5482 	case IPFGENITER_NAT :
5483 		nat = t->ipt_data;
5484 		if (nat == NULL) {
5485 			nextnat = ifs->ifs_nat_instances;
5486 		} else {
5487 			nextnat = nat->nat_next;
5488 		}
5489 		break;
5490 	default :
5491 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5492 		return EINVAL;
5493 	}
5494 
5495 	dst = itp->igi_data;
5496 	for (count = itp->igi_nitems; count > 0; count--) {
5497 		/*
5498 		 * If we found an entry, add a reference to it and update the token.
5499 		 * Otherwise, zero out data to be returned and NULL out token.
5500 		 */
5501 		switch (itp->igi_type)
5502 		{
5503 		case IPFGENITER_HOSTMAP :
5504 			if (nexthm != NULL) {
5505 				ATOMIC_INC32(nexthm->hm_ref);
5506 				t->ipt_data = nexthm;
5507 			} else {
5508 				bzero(&zerohm, sizeof(zerohm));
5509 				nexthm = &zerohm;
5510 				t->ipt_data = NULL;
5511 			}
5512 			break;
5513 		case IPFGENITER_IPNAT :
5514 			if (nextipnat != NULL) {
5515 				ATOMIC_INC32(nextipnat->in_use);
5516 				t->ipt_data = nextipnat;
5517 			} else {
5518 				bzero(&zeroipn, sizeof(zeroipn));
5519 				nextipnat = &zeroipn;
5520 				t->ipt_data = NULL;
5521 			}
5522 			break;
5523 		case IPFGENITER_NAT :
5524 			if (nextnat != NULL) {
5525 				MUTEX_ENTER(&nextnat->nat_lock);
5526 				nextnat->nat_ref++;
5527 				MUTEX_EXIT(&nextnat->nat_lock);
5528 				t->ipt_data = nextnat;
5529 			} else {
5530 				bzero(&zeronat, sizeof(zeronat));
5531 				nextnat = &zeronat;
5532 				t->ipt_data = NULL;
5533 			}
5534 			break;
5535 		default :
5536 			break;
5537 		}
5538 
5539 		/*
5540 		 * Now that we have ref, it's save to give up lock.
5541 		 */
5542 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5543 
5544 		/*
5545 		 * Copy out data and clean up references and token as needed.
5546 		 */
5547 		switch (itp->igi_type)
5548 		{
5549 		case IPFGENITER_HOSTMAP :
5550 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5551 			if (error != 0)
5552 				error = EFAULT;
5553 			if (t->ipt_data == NULL) {
5554 				ipf_freetoken(t, ifs);
5555 				break;
5556 			} else {
5557 				if (hm != NULL) {
5558 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5559 					fr_hostmapdel(&hm);
5560 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5561 				}
5562 				if (nexthm->hm_next == NULL) {
5563 					ipf_freetoken(t, ifs);
5564 					break;
5565 				}
5566 				dst += sizeof(*nexthm);
5567 				hm = nexthm;
5568 				nexthm = nexthm->hm_next;
5569 			}
5570 			break;
5571 
5572 		case IPFGENITER_IPNAT :
5573 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5574 			if (error != 0)
5575 				error = EFAULT;
5576 			if (t->ipt_data == NULL) {
5577 				ipf_freetoken(t, ifs);
5578 				break;
5579 			} else {
5580 				if (ipn != NULL) {
5581 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5582 					fr_ipnatderef(&ipn, ifs);
5583 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5584 				}
5585 				if (nextipnat->in_next == NULL) {
5586 					ipf_freetoken(t, ifs);
5587 					break;
5588 				}
5589 				dst += sizeof(*nextipnat);
5590 				ipn = nextipnat;
5591 				nextipnat = nextipnat->in_next;
5592 			}
5593 			break;
5594 
5595 		case IPFGENITER_NAT :
5596 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5597 			if (error != 0)
5598 				error = EFAULT;
5599 			if (t->ipt_data == NULL) {
5600 				ipf_freetoken(t, ifs);
5601 				break;
5602 			} else {
5603 				if (nat != NULL)
5604 					fr_natderef(&nat, ifs);
5605 				if (nextnat->nat_next == NULL) {
5606 					ipf_freetoken(t, ifs);
5607 					break;
5608 				}
5609 				dst += sizeof(*nextnat);
5610 				nat = nextnat;
5611 				nextnat = nextnat->nat_next;
5612 			}
5613 			break;
5614 		default :
5615 			break;
5616 		}
5617 
5618 		if ((count == 1) || (error != 0))
5619 			break;
5620 
5621 		READ_ENTER(&ifs->ifs_ipf_nat);
5622 	}
5623 
5624 	return error;
5625 }
5626 
5627 
5628 /* ------------------------------------------------------------------------ */
5629 /* Function:    nat_iterator                                                */
5630 /* Returns:     int - 0 == ok, else error                                   */
5631 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5632 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5633 /*                                                                          */
5634 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5635 /* generic structure to iterate through a list.  There are three different  */
5636 /* linked lists of NAT related information to go through: NAT rules, active */
5637 /* NAT mappings and the NAT fragment cache.                                 */
5638 /* ------------------------------------------------------------------------ */
5639 static int nat_iterator(token, itp, ifs)
5640 ipftoken_t *token;
5641 ipfgeniter_t *itp;
5642 ipf_stack_t *ifs;
5643 {
5644 	int error;
5645 
5646 	if (itp->igi_data == NULL)
5647 		return EFAULT;
5648 
5649 	token->ipt_subtype = itp->igi_type;
5650 
5651 	switch (itp->igi_type)
5652 	{
5653 	case IPFGENITER_HOSTMAP :
5654 	case IPFGENITER_IPNAT :
5655 	case IPFGENITER_NAT :
5656 		error = nat_getnext(token, itp, ifs);
5657 		break;
5658 	case IPFGENITER_NATFRAG :
5659 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5660 				    &ifs->ifs_ipfr_nattail,
5661 				    &ifs->ifs_ipf_natfrag, ifs);
5662 		break;
5663 	default :
5664 		error = EINVAL;
5665 		break;
5666 	}
5667 
5668 	return error;
5669 }
5670 
5671 
5672 /* ---------------------------------------------------------------------- */
5673 /* Function:    nat_flushtable						  */
5674 /* Returns:     int - 0 == success, -1 == failure			  */
5675 /* Parameters:  flush_option - how to flush the active NAT table	  */
5676 /*              ifs - ipf stack instance				  */
5677 /* Write Locks: ipf_nat							  */
5678 /*									  */
5679 /* Flush NAT tables.  Three actions currently defined:                    */
5680 /*									  */
5681 /* FLUSH_TABLE_ALL	: Flush all NAT table entries			  */
5682 /*									  */
5683 /* FLUSH_TABLE_CLOSING	: Flush entries with TCP connections which	  */
5684 /*			  have started to close on both ends using	  */
5685 /*			  ipf_flushclosing().				  */
5686 /*									  */
5687 /* FLUSH_TABLE_EXTRA	: First, flush entries which are "almost" closed. */
5688 /*			  Then, if needed, flush entries with TCP	  */
5689 /*			  connections which have been idle for a long	  */
5690 /*			  time with ipf_extraflush().			  */
5691 /* ---------------------------------------------------------------------- */
5692 static int nat_flushtable(flush_option, ifs)
5693 int flush_option;
5694 ipf_stack_t *ifs;
5695 {
5696         nat_t *nat, *natn;
5697         int removed;
5698         SPL_INT(s);
5699 
5700         removed = 0;
5701 
5702         SPL_NET(s);
5703         switch (flush_option)
5704         {
5705         case FLUSH_TABLE_ALL:
5706 		natn = ifs->ifs_nat_instances;
5707 		while ((nat = natn) != NULL) {
5708 			natn = nat->nat_next;
5709 			if (nat_delete(nat, NL_FLUSH, ifs) == 0)
5710 				removed++;
5711 		}
5712                 break;
5713 
5714         case FLUSH_TABLE_CLOSING:
5715                 removed = ipf_flushclosing(NAT_FLUSH,
5716 					   IPF_TCPS_CLOSE_WAIT,
5717 					   ifs->ifs_nat_tqb,
5718 					   ifs->ifs_nat_utqe,
5719 					   ifs);
5720                 break;
5721 
5722         case FLUSH_TABLE_EXTRA:
5723                 removed = ipf_flushclosing(NAT_FLUSH,
5724 					   IPF_TCPS_FIN_WAIT_2,
5725 					   ifs->ifs_nat_tqb,
5726 					   ifs->ifs_nat_utqe,
5727 					   ifs);
5728 
5729                 /*
5730                  * Be sure we haven't done this in the last 10 seconds.
5731                  */
5732                 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5733                     IPF_TTLVAL(10))
5734                         break;
5735                 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5736                 removed += ipf_extraflush(NAT_FLUSH,
5737 					  &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5738 					  ifs->ifs_nat_utqe,
5739 					  ifs);
5740                 break;
5741 
5742         default: /* Flush Nothing */
5743                 break;
5744         }
5745 
5746         SPL_X(s);
5747         return (removed);
5748 }
5749 
5750 
5751 /* ------------------------------------------------------------------------ */
5752 /* Function:    nat_uncreate                                                */
5753 /* Returns:     Nil                                                         */
5754 /* Parameters:  fin(I) - pointer to packet information                      */
5755 /*                                                                          */
5756 /* This function is used to remove a NAT entry from the NAT table when we   */
5757 /* decide that the create was actually in error. It is thus assumed that    */
5758 /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
5759 /* with the translated packet (not the original), we have to reverse the    */
5760 /* lookup. Although doing the lookup is expensive (relatively speaking), it */
5761 /* is not anticipated that this will be a frequent occurance for normal     */
5762 /* traffic patterns.                                                        */
5763 /* ------------------------------------------------------------------------ */
5764 void nat_uncreate(fin)
5765 fr_info_t *fin;
5766 {
5767 	ipf_stack_t *ifs = fin->fin_ifs;
5768 	int nflags;
5769 	nat_t *nat;
5770 
5771 	switch (fin->fin_p)
5772 	{
5773 	case IPPROTO_TCP :
5774 		nflags = IPN_TCP;
5775 		break;
5776 	case IPPROTO_UDP :
5777 		nflags = IPN_UDP;
5778 		break;
5779 	default :
5780 		nflags = 0;
5781 		break;
5782 	}
5783 
5784 	WRITE_ENTER(&ifs->ifs_ipf_nat);
5785 
5786 	if (fin->fin_out == 0) {
5787 		nat = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
5788 				    fin->fin_dst, fin->fin_src);
5789 	} else {
5790 		nat = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
5791 				   fin->fin_src, fin->fin_dst);
5792 	}
5793 
5794 	if (nat != NULL) {
5795 		ifs->ifs_nat_stats.ns_uncreate[fin->fin_out][0]++;
5796 		nat_delete(nat, NL_DESTROY, ifs);
5797 	} else {
5798 		ifs->ifs_nat_stats.ns_uncreate[fin->fin_out][1]++;
5799 	}
5800 
5801 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5802 }
5803