xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision 74e7dc986c89efca1f2e4451c7a572e05e4a6e4f)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL	1
14 # define        _KERNEL	1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 # define _KERNEL
30 # ifdef __OpenBSD__
31 struct file;
32 # endif
33 # include <sys/uio.h>
34 # undef _KERNEL
35 #endif
36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37 # include <sys/filio.h>
38 # include <sys/fcntl.h>
39 #else
40 # include <sys/ioctl.h>
41 #endif
42 #if !defined(AIX)
43 # include <sys/fcntl.h>
44 #endif
45 #if !defined(linux)
46 # include <sys/protosw.h>
47 #endif
48 #include <sys/socket.h>
49 #if defined(_KERNEL)
50 # include <sys/systm.h>
51 # if !defined(__SVR4) && !defined(__svr4__)
52 #  include <sys/mbuf.h>
53 # endif
54 #endif
55 #if defined(__SVR4) || defined(__svr4__)
56 # include <sys/filio.h>
57 # include <sys/byteorder.h>
58 # ifdef _KERNEL
59 #  include <sys/dditypes.h>
60 # endif
61 # include <sys/stream.h>
62 # include <sys/kmem.h>
63 #endif
64 #if __FreeBSD_version >= 300000
65 # include <sys/queue.h>
66 #endif
67 #include <net/if.h>
68 #if __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87 
88 #if !defined(linux)
89 # include <netinet/ip_var.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ipf_stack.h"
102 #ifdef	IPFILTER_SYNC
103 #include "netinet/ip_sync.h"
104 #endif
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 #endif
108 /* END OF INCLUDES */
109 
110 #undef	SOCKADDR_IN
111 #define	SOCKADDR_IN	struct sockaddr_in
112 
113 #if !defined(lint)
114 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116 #endif
117 
118 
119 /* ======================================================================== */
120 /* How the NAT is organised and works.                                      */
121 /*                                                                          */
122 /* Inside (interface y) NAT       Outside (interface x)                     */
123 /* -------------------- -+- -------------------------------------           */
124 /* Packet going          |   out, processsed by fr_checknatout() for x      */
125 /* ------------>         |   ------------>                                  */
126 /* src=10.1.1.1          |   src=192.1.1.1                                  */
127 /*                       |                                                  */
128 /*                       |   in, processed by fr_checknatin() for x         */
129 /* <------------         |   <------------                                  */
130 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
131 /* -------------------- -+- -------------------------------------           */
132 /* fr_checknatout() - changes ip_src and if required, sport                 */
133 /*             - creates a new mapping, if required.                        */
134 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
135 /*                                                                          */
136 /* In the NAT table, internal source is recorded as "in" and externally     */
137 /* seen as "out".                                                           */
138 /* ======================================================================== */
139 
140 
141 static	int	nat_flushtable __P((ipf_stack_t *));
142 static	int	nat_clearlist __P((ipf_stack_t *));
143 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
144 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
145 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
146 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
147 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
148 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
149 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
150 static	int	nat_match __P((fr_info_t *, ipnat_t *));
151 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
152 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
153 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
154 				    struct in_addr, struct in_addr, u_32_t,
155 				    ipf_stack_t *));
156 static	INLINE	int nat_icmpquerytype4 __P((int));
157 static	int	nat_ruleaddrinit __P((ipnat_t *));
158 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
159 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
160 static	INLINE	int nat_icmperrortype4 __P((int));
161 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
162 				      tcphdr_t *, nat_t **, int));
163 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
164 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
165 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
166 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
167 static	int	nat_extraflush __P((int, ipf_stack_t *));
168 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
169 static	int	nat_flushclosing __P((int, ipf_stack_t *));
170 
171 
172 /*
173  * Below we declare a list of constants used only in the nat_extraflush()
174  * routine.  We are placing it here, instead of in nat_extraflush() itself,
175  * because we want to make it visible to tools such as mdb, nm etc., so the
176  * values can easily be altered during debugging.
177  */
178 static	const int	idletime_tab[] = {
179 	IPF_TTLVAL(30),		/* 30 seconds */
180 	IPF_TTLVAL(1800),	/* 30 minutes */
181 	IPF_TTLVAL(43200),	/* 12 hours */
182 	IPF_TTLVAL(345600),	/* 4 days */
183 };
184 
185 #define NAT_HAS_L4_CHANGED(n)	\
186  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
187  	(n)->nat_inport != (n)->nat_outport)
188 
189 /* ------------------------------------------------------------------------ */
190 /* Function:    fr_natinit                                                  */
191 /* Returns:     int - 0 == success, -1 == failure                           */
192 /* Parameters:  Nil                                                         */
193 /*                                                                          */
194 /* Initialise all of the NAT locks, tables and other structures.            */
195 /* ------------------------------------------------------------------------ */
196 int fr_natinit(ifs)
197 ipf_stack_t *ifs;
198 {
199 	int i;
200 
201 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
202 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
203 	if (ifs->ifs_nat_table[0] != NULL)
204 		bzero((char *)ifs->ifs_nat_table[0],
205 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
206 	else
207 		return -1;
208 
209 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
210 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
211 	if (ifs->ifs_nat_table[1] != NULL)
212 		bzero((char *)ifs->ifs_nat_table[1],
213 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
214 	else
215 		return -2;
216 
217 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
218 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
219 	if (ifs->ifs_nat_rules != NULL)
220 		bzero((char *)ifs->ifs_nat_rules,
221 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
222 	else
223 		return -3;
224 
225 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
226 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
227 	if (ifs->ifs_rdr_rules != NULL)
228 		bzero((char *)ifs->ifs_rdr_rules,
229 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
230 	else
231 		return -4;
232 
233 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
234 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
235 	if (ifs->ifs_maptable != NULL)
236 		bzero((char *)ifs->ifs_maptable,
237 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
238 	else
239 		return -5;
240 
241 	ifs->ifs_ipf_hm_maplist = NULL;
242 
243 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
244 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
245 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
246 		return -1;
247 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
248 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
249 
250 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
251 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
252 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
253 		return -1;
254 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
255 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
256 
257 	if (ifs->ifs_fr_nat_maxbucket == 0) {
258 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
259 			ifs->ifs_fr_nat_maxbucket++;
260 		ifs->ifs_fr_nat_maxbucket *= 2;
261 	}
262 
263 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
264 	/*
265 	 * Increase this because we may have "keep state" following this too
266 	 * and packet storms can occur if this is removed too quickly.
267 	 */
268 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
269 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
270 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
271 	ifs->ifs_nat_udptq.ifq_ref = 1;
272 	ifs->ifs_nat_udptq.ifq_head = NULL;
273 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
274 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
275 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
276 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
277 	ifs->ifs_nat_icmptq.ifq_ref = 1;
278 	ifs->ifs_nat_icmptq.ifq_head = NULL;
279 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
280 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
281 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
282 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
283 	ifs->ifs_nat_iptq.ifq_ref = 1;
284 	ifs->ifs_nat_iptq.ifq_head = NULL;
285 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
286 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
287 	ifs->ifs_nat_iptq.ifq_next = NULL;
288 
289 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
290 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
291 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
292 #ifdef LARGE_NAT
293 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
294 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
295 #endif
296 	}
297 
298 	/*
299 	 * Increase this because we may have "keep state" following
300 	 * this too and packet storms can occur if this is removed
301 	 * too quickly.
302 	 */
303 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
304 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
305 
306 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
307 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
308 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
309 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
310 
311 	ifs->ifs_fr_nat_init = 1;
312 
313 	return 0;
314 }
315 
316 
317 /* ------------------------------------------------------------------------ */
318 /* Function:    nat_addrdr                                                  */
319 /* Returns:     Nil                                                         */
320 /* Parameters:  n(I) - pointer to NAT rule to add                           */
321 /*                                                                          */
322 /* Adds a redirect rule to the hash table of redirect rules and the list of */
323 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
324 /* use by redirect rules.                                                   */
325 /* ------------------------------------------------------------------------ */
326 static void nat_addrdr(n, ifs)
327 ipnat_t *n;
328 ipf_stack_t *ifs;
329 {
330 	ipnat_t **np;
331 	u_32_t j;
332 	u_int hv;
333 	int k;
334 
335 	k = count4bits(n->in_outmsk);
336 	if ((k >= 0) && (k != 32))
337 		ifs->ifs_rdr_masks |= 1 << k;
338 	j = (n->in_outip & n->in_outmsk);
339 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
340 	np = ifs->ifs_rdr_rules + hv;
341 	while (*np != NULL)
342 		np = &(*np)->in_rnext;
343 	n->in_rnext = NULL;
344 	n->in_prnext = np;
345 	n->in_hv = hv;
346 	*np = n;
347 }
348 
349 
350 /* ------------------------------------------------------------------------ */
351 /* Function:    nat_addnat                                                  */
352 /* Returns:     Nil                                                         */
353 /* Parameters:  n(I) - pointer to NAT rule to add                           */
354 /*                                                                          */
355 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
356 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
357 /* redirect rules.                                                          */
358 /* ------------------------------------------------------------------------ */
359 static void nat_addnat(n, ifs)
360 ipnat_t *n;
361 ipf_stack_t *ifs;
362 {
363 	ipnat_t **np;
364 	u_32_t j;
365 	u_int hv;
366 	int k;
367 
368 	k = count4bits(n->in_inmsk);
369 	if ((k >= 0) && (k != 32))
370 		ifs->ifs_nat_masks |= 1 << k;
371 	j = (n->in_inip & n->in_inmsk);
372 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
373 	np = ifs->ifs_nat_rules + hv;
374 	while (*np != NULL)
375 		np = &(*np)->in_mnext;
376 	n->in_mnext = NULL;
377 	n->in_pmnext = np;
378 	n->in_hv = hv;
379 	*np = n;
380 }
381 
382 
383 /* ------------------------------------------------------------------------ */
384 /* Function:    nat_delrdr                                                  */
385 /* Returns:     Nil                                                         */
386 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
387 /*                                                                          */
388 /* Removes a redirect rule from the hash table of redirect rules.           */
389 /* ------------------------------------------------------------------------ */
390 void nat_delrdr(n)
391 ipnat_t *n;
392 {
393 	if (n->in_rnext)
394 		n->in_rnext->in_prnext = n->in_prnext;
395 	*n->in_prnext = n->in_rnext;
396 }
397 
398 
399 /* ------------------------------------------------------------------------ */
400 /* Function:    nat_delnat                                                  */
401 /* Returns:     Nil                                                         */
402 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
403 /*                                                                          */
404 /* Removes a NAT map rule from the hash table of NAT map rules.             */
405 /* ------------------------------------------------------------------------ */
406 void nat_delnat(n)
407 ipnat_t *n;
408 {
409 	if (n->in_mnext != NULL)
410 		n->in_mnext->in_pmnext = n->in_pmnext;
411 	*n->in_pmnext = n->in_mnext;
412 }
413 
414 
415 /* ------------------------------------------------------------------------ */
416 /* Function:    nat_hostmap                                                 */
417 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
418 /*                                else a pointer to the hostmapping to use  */
419 /* Parameters:  np(I)   - pointer to NAT rule                               */
420 /*              real(I) - real IP address                                   */
421 /*              map(I)  - mapped IP address                                 */
422 /*              port(I) - destination port number                           */
423 /* Write Locks: ipf_nat                                                     */
424 /*                                                                          */
425 /* Check if an ip address has already been allocated for a given mapping    */
426 /* that is not doing port based translation.  If is not yet allocated, then */
427 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
428 /* ------------------------------------------------------------------------ */
429 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
430 ipnat_t *np;
431 struct in_addr src;
432 struct in_addr dst;
433 struct in_addr map;
434 u_32_t port;
435 ipf_stack_t *ifs;
436 {
437 	hostmap_t *hm;
438 	u_int hv;
439 
440 	hv = (src.s_addr ^ dst.s_addr);
441 	hv += src.s_addr;
442 	hv += dst.s_addr;
443 	hv %= HOSTMAP_SIZE;
444 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
445 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
446 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
447 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
448 		    ((port == 0) || (port == hm->hm_port))) {
449 			hm->hm_ref++;
450 			return hm;
451 		}
452 
453 	if (np == NULL)
454 		return NULL;
455 
456 	KMALLOC(hm, hostmap_t *);
457 	if (hm) {
458 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
459 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
460 		if (ifs->ifs_ipf_hm_maplist != NULL)
461 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
462 		ifs->ifs_ipf_hm_maplist = hm;
463 
464 		hm->hm_next = ifs->ifs_maptable[hv];
465 		hm->hm_pnext = ifs->ifs_maptable + hv;
466 		if (ifs->ifs_maptable[hv] != NULL)
467 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
468 		ifs->ifs_maptable[hv] = hm;
469 		hm->hm_ipnat = np;
470 		hm->hm_srcip = src;
471 		hm->hm_dstip = dst;
472 		hm->hm_mapip = map;
473 		hm->hm_ref = 1;
474 		hm->hm_port = port;
475 		hm->hm_v = 4;
476 	}
477 	return hm;
478 }
479 
480 
481 /* ------------------------------------------------------------------------ */
482 /* Function:    fr_hostmapdel                                              */
483 /* Returns:     Nil                                                         */
484 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
485 /* Write Locks: ipf_nat                                                     */
486 /*                                                                          */
487 /* Decrement the references to this hostmap structure by one.  If this      */
488 /* reaches zero then remove it and free it.                                 */
489 /* ------------------------------------------------------------------------ */
490 void fr_hostmapdel(hmp)
491 struct hostmap **hmp;
492 {
493 	struct hostmap *hm;
494 
495 	hm = *hmp;
496 	*hmp = NULL;
497 
498 	hm->hm_ref--;
499 	if (hm->hm_ref == 0) {
500 		if (hm->hm_next)
501 			hm->hm_next->hm_pnext = hm->hm_pnext;
502 		*hm->hm_pnext = hm->hm_next;
503 		if (hm->hm_hnext)
504 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
505 		*hm->hm_phnext = hm->hm_hnext;
506 		KFREE(hm);
507 	}
508 }
509 
510 
511 /* ------------------------------------------------------------------------ */
512 /* Function:    fix_outcksum                                                */
513 /* Returns:     Nil                                                         */
514 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
515 /*              n((I)  - amount to adjust checksum by                       */
516 /*                                                                          */
517 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
518 /* ------------------------------------------------------------------------ */
519 void fix_outcksum(sp, n)
520 u_short *sp;
521 u_32_t n;
522 {
523 	u_short sumshort;
524 	u_32_t sum1;
525 
526 	if (n == 0)
527 		return;
528 
529 	sum1 = (~ntohs(*sp)) & 0xffff;
530 	sum1 += (n);
531 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
532 	/* Again */
533 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
534 	sumshort = ~(u_short)sum1;
535 	*(sp) = htons(sumshort);
536 }
537 
538 
539 /* ------------------------------------------------------------------------ */
540 /* Function:    fix_incksum                                                 */
541 /* Returns:     Nil                                                         */
542 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
543 /*              n((I)  - amount to adjust checksum by                       */
544 /*                                                                          */
545 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
546 /* ------------------------------------------------------------------------ */
547 void fix_incksum(sp, n)
548 u_short *sp;
549 u_32_t n;
550 {
551 	u_short sumshort;
552 	u_32_t sum1;
553 
554 	if (n == 0)
555 		return;
556 
557 	sum1 = (~ntohs(*sp)) & 0xffff;
558 	sum1 += ~(n) & 0xffff;
559 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
560 	/* Again */
561 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
562 	sumshort = ~(u_short)sum1;
563 	*(sp) = htons(sumshort);
564 }
565 
566 
567 /* ------------------------------------------------------------------------ */
568 /* Function:    fix_datacksum                                               */
569 /* Returns:     Nil                                                         */
570 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
571 /*              n((I)  - amount to adjust checksum by                       */
572 /*                                                                          */
573 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
574 /* data section of an IP packet.                                            */
575 /*                                                                          */
576 /* The only situation in which you need to do this is when NAT'ing an       */
577 /* ICMP error message. Such a message, contains in its body the IP header   */
578 /* of the original IP packet, that causes the error.                        */
579 /*                                                                          */
580 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
581 /* kernel the data section of the ICMP error is just data, and no special   */
582 /* processing like hardware cksum or ntohs processing have been done by the */
583 /* kernel on the data section.                                              */
584 /* ------------------------------------------------------------------------ */
585 void fix_datacksum(sp, n)
586 u_short *sp;
587 u_32_t n;
588 {
589 	u_short sumshort;
590 	u_32_t sum1;
591 
592 	if (n == 0)
593 		return;
594 
595 	sum1 = (~ntohs(*sp)) & 0xffff;
596 	sum1 += (n);
597 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
598 	/* Again */
599 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
600 	sumshort = ~(u_short)sum1;
601 	*(sp) = htons(sumshort);
602 }
603 
604 
605 /* ------------------------------------------------------------------------ */
606 /* Function:    fr_nat_ioctl                                                */
607 /* Returns:     int - 0 == success, != 0 == failure                         */
608 /* Parameters:  data(I) - pointer to ioctl data                             */
609 /*              cmd(I)  - ioctl command integer                             */
610 /*              mode(I) - file mode bits used with open                     */
611 /*                                                                          */
612 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
613 /* ------------------------------------------------------------------------ */
614 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
615 ioctlcmd_t cmd;
616 caddr_t data;
617 int mode, uid;
618 void *ctx;
619 ipf_stack_t *ifs;
620 {
621 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
622 	int error = 0, ret, arg, getlock;
623 	ipnat_t natd;
624 
625 #if (BSD >= 199306) && defined(_KERNEL)
626 	if ((securelevel >= 2) && (mode & FWRITE))
627 		return EPERM;
628 #endif
629 
630 #if defined(__osf__) && defined(_KERNEL)
631 	getlock = 0;
632 #else
633 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
634 #endif
635 
636 	nat = NULL;     /* XXX gcc -Wuninitialized */
637 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
638 		KMALLOC(nt, ipnat_t *);
639 	} else {
640 		nt = NULL;
641 	}
642 
643 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
644 		if (mode & NAT_SYSSPACE) {
645 			bcopy(data, (char *)&natd, sizeof(natd));
646 			error = 0;
647 		} else {
648 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
649 		}
650 
651 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
652 		BCOPYIN(data, &arg, sizeof(arg));
653 	}
654 
655 	if (error != 0)
656 		goto done;
657 
658 	/*
659 	 * For add/delete, look to see if the NAT entry is already present
660 	 */
661 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
662 		nat = &natd;
663 		if (nat->in_v == 0)	/* For backward compat. */
664 			nat->in_v = 4;
665 		nat->in_flags &= IPN_USERFLAGS;
666 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
667 			if ((nat->in_flags & IPN_SPLIT) == 0)
668 				nat->in_inip &= nat->in_inmsk;
669 			if ((nat->in_flags & IPN_IPRANGE) == 0)
670 				nat->in_outip &= nat->in_outmsk;
671 		}
672 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
673 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
674 		     np = &n->in_next)
675 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
676 			    IPN_CMPSIZ) == 0) {
677 				if (nat->in_redir == NAT_REDIRECT &&
678 				    nat->in_pnext != n->in_pnext)
679 					continue;
680 				break;
681 			}
682 	}
683 
684 	switch (cmd)
685 	{
686 	case SIOCGENITER :
687 	    {
688 		ipfgeniter_t iter;
689 		ipftoken_t *token;
690 
691 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
692 		if (error != 0)
693 			break;
694 
695 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
696 		if (token != NULL)
697 			error  = nat_iterator(token, &iter, ifs);
698 		else
699 			error = ESRCH;
700 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
701 		break;
702 	    }
703 #ifdef  IPFILTER_LOG
704 	case SIOCIPFFB :
705 	{
706 		int tmp;
707 
708 		if (!(mode & FWRITE))
709 			error = EPERM;
710 		else {
711 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
712 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
713 		}
714 		break;
715 	}
716 	case SIOCSETLG :
717 		if (!(mode & FWRITE))
718 			error = EPERM;
719 		else {
720 			BCOPYIN((char *)data,
721 				       (char *)&ifs->ifs_nat_logging,
722 				sizeof(ifs->ifs_nat_logging));
723 		}
724 		break;
725 	case SIOCGETLG :
726 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
727 			sizeof(ifs->ifs_nat_logging));
728 		break;
729 	case FIONREAD :
730 		arg = ifs->ifs_iplused[IPL_LOGNAT];
731 		BCOPYOUT(&arg, data, sizeof(arg));
732 		break;
733 #endif
734 	case SIOCADNAT :
735 		if (!(mode & FWRITE)) {
736 			error = EPERM;
737 		} else if (n != NULL) {
738 			error = EEXIST;
739 		} else if (nt == NULL) {
740 			error = ENOMEM;
741 		}
742 		if (error != 0) {
743 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
744 			break;
745 		}
746 		bcopy((char *)nat, (char *)nt, sizeof(*n));
747 		error = nat_siocaddnat(nt, np, getlock, ifs);
748 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
749 		if (error == 0)
750 			nt = NULL;
751 		break;
752 	case SIOCRMNAT :
753 		if (!(mode & FWRITE)) {
754 			error = EPERM;
755 			n = NULL;
756 		} else if (n == NULL) {
757 			error = ESRCH;
758 		}
759 
760 		if (error != 0) {
761 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
762 			break;
763 		}
764 		nat_siocdelnat(n, np, getlock, ifs);
765 
766 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
767 		n = NULL;
768 		break;
769 	case SIOCGNATS :
770 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
771 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
772 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
773 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
774 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
775 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
776 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
777 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
778 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
779 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
780 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
781 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
782 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
783 		break;
784 	case SIOCGNATL :
785 	    {
786 		natlookup_t nl;
787 
788 		if (getlock) {
789 			READ_ENTER(&ifs->ifs_ipf_nat);
790 		}
791 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
792 		if (nl.nl_v != 6)
793 			nl.nl_v = 4;
794 		if (error == 0) {
795 			void *ptr;
796 
797 			switch (nl.nl_v)
798 			{
799 			case 4:
800 				ptr = nat_lookupredir(&nl, ifs);
801 				break;
802 #ifdef	USE_INET6
803 			case 6:
804 				ptr = nat6_lookupredir(&nl, ifs);
805 				break;
806 #endif
807 			default:
808 				ptr = NULL;
809 				break;
810 			}
811 
812 			if (ptr != NULL) {
813 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
814 			} else {
815 				error = ESRCH;
816 			}
817 		}
818 		if (getlock) {
819 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
820 		}
821 		break;
822 	    }
823 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
824 		if (!(mode & FWRITE)) {
825 			error = EPERM;
826 			break;
827 		}
828 		if (getlock) {
829 			WRITE_ENTER(&ifs->ifs_ipf_nat);
830 		}
831 		error = 0;
832 		if (arg == 0)
833 			ret = nat_flushtable(ifs);
834 		else if (arg == 1)
835 			ret = nat_clearlist(ifs);
836 		else if (arg >= 2 && arg <= 4)
837 			ret = nat_extraflush(arg - 2, ifs);
838 		else
839 			error = EINVAL;
840 		if (getlock) {
841 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
842 		}
843 		if (error == 0) {
844 			BCOPYOUT(&ret, data, sizeof(ret));
845 		}
846 		break;
847 	case SIOCPROXY :
848 		error = appr_ioctl(data, cmd, mode, ifs);
849 		break;
850 	case SIOCSTLCK :
851 		if (!(mode & FWRITE)) {
852 			error = EPERM;
853 		} else {
854 			fr_lock(data, &ifs->ifs_fr_nat_lock);
855 		}
856 		break;
857 	case SIOCSTPUT :
858 		if ((mode & FWRITE) != 0) {
859 			error = fr_natputent(data, getlock, ifs);
860 		} else {
861 			error = EACCES;
862 		}
863 		break;
864 	case SIOCSTGSZ :
865 		if (ifs->ifs_fr_nat_lock) {
866 			if (getlock) {
867 				READ_ENTER(&ifs->ifs_ipf_nat);
868 			}
869 			error = fr_natgetsz(data, ifs);
870 			if (getlock) {
871 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
872 			}
873 		} else
874 			error = EACCES;
875 		break;
876 	case SIOCSTGET :
877 		if (ifs->ifs_fr_nat_lock) {
878 			if (getlock) {
879 				READ_ENTER(&ifs->ifs_ipf_nat);
880 			}
881 			error = fr_natgetent(data, ifs);
882 			if (getlock) {
883 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
884 			}
885 		} else
886 			error = EACCES;
887 		break;
888 	case SIOCIPFDELTOK :
889 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
890 		error = ipf_deltoken(arg, uid, ctx, ifs);
891 		break;
892 	default :
893 		error = EINVAL;
894 		break;
895 	}
896 done:
897 	if (nt)
898 		KFREE(nt);
899 	return error;
900 }
901 
902 
903 /* ------------------------------------------------------------------------ */
904 /* Function:    nat_siocaddnat                                              */
905 /* Returns:     int - 0 == success, != 0 == failure                         */
906 /* Parameters:  n(I)       - pointer to new NAT rule                        */
907 /*              np(I)      - pointer to where to insert new NAT rule        */
908 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
909 /* Mutex Locks: ipf_natio                                                   */
910 /*                                                                          */
911 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
912 /* from information passed to the kernel, then add it  to the appropriate   */
913 /* NAT rule table(s).                                                       */
914 /* ------------------------------------------------------------------------ */
915 static int nat_siocaddnat(n, np, getlock, ifs)
916 ipnat_t *n, **np;
917 int getlock;
918 ipf_stack_t *ifs;
919 {
920 	int error = 0, i, j;
921 
922 	if (nat_resolverule(n, ifs) != 0)
923 		return ENOENT;
924 
925 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
926 		return EINVAL;
927 
928 	n->in_use = 0;
929 	if (n->in_redir & NAT_MAPBLK)
930 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
931 	else if (n->in_flags & IPN_AUTOPORTMAP)
932 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
933 	else if (n->in_flags & IPN_IPRANGE)
934 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
935 	else if (n->in_flags & IPN_SPLIT)
936 		n->in_space = 2;
937 	else if (n->in_outmsk != 0)
938 		n->in_space = ~ntohl(n->in_outmsk);
939 	else
940 		n->in_space = 1;
941 
942 	/*
943 	 * Calculate the number of valid IP addresses in the output
944 	 * mapping range.  In all cases, the range is inclusive of
945 	 * the start and ending IP addresses.
946 	 * If to a CIDR address, lose 2: broadcast + network address
947 	 *                               (so subtract 1)
948 	 * If to a range, add one.
949 	 * If to a single IP address, set to 1.
950 	 */
951 	if (n->in_space) {
952 		if ((n->in_flags & IPN_IPRANGE) != 0)
953 			n->in_space += 1;
954 		else
955 			n->in_space -= 1;
956 	} else
957 		n->in_space = 1;
958 
959 #ifdef	USE_INET6
960 	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
961 	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
962 		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
963 	else if (n->in_v == 6 &&
964 	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
965 		n->in_next6 = n->in_in[0];
966 	else if (n->in_v == 6)
967 		n->in_next6 = n->in_out[0];
968 	else
969 #endif
970 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
971 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
972 		n->in_nip = ntohl(n->in_outip) + 1;
973 	else if ((n->in_flags & IPN_SPLIT) &&
974 		 (n->in_redir & NAT_REDIRECT))
975 		n->in_nip = ntohl(n->in_inip);
976 	else
977 		n->in_nip = ntohl(n->in_outip);
978 
979 	if (n->in_redir & NAT_MAP) {
980 		n->in_pnext = ntohs(n->in_pmin);
981 		/*
982 		 * Multiply by the number of ports made available.
983 		 */
984 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
985 			n->in_space *= (ntohs(n->in_pmax) -
986 					ntohs(n->in_pmin) + 1);
987 			/*
988 			 * Because two different sources can map to
989 			 * different destinations but use the same
990 			 * local IP#/port #.
991 			 * If the result is smaller than in_space, then
992 			 * we may have wrapped around 32bits.
993 			 */
994 			i = n->in_inmsk;
995 			if ((i != 0) && (i != 0xffffffff)) {
996 				j = n->in_space * (~ntohl(i) + 1);
997 				if (j >= n->in_space)
998 					n->in_space = j;
999 				else
1000 					n->in_space = 0xffffffff;
1001 			}
1002 		}
1003 		/*
1004 		 * If no protocol is specified, multiple by 256 to allow for
1005 		 * at least one IP:IP mapping per protocol.
1006 		 */
1007 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1008 				j = n->in_space * 256;
1009 				if (j >= n->in_space)
1010 					n->in_space = j;
1011 				else
1012 					n->in_space = 0xffffffff;
1013 		}
1014 	}
1015 
1016 	/* Otherwise, these fields are preset */
1017 
1018 	if (getlock) {
1019 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1020 	}
1021 	n->in_next = NULL;
1022 	*np = n;
1023 
1024 	if (n->in_age[0] != 0)
1025 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1026 						  n->in_age[0], ifs);
1027 
1028 	if (n->in_age[1] != 0)
1029 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1030 						  n->in_age[1], ifs);
1031 
1032 	if (n->in_redir & NAT_REDIRECT) {
1033 		n->in_flags &= ~IPN_NOTDST;
1034 		switch (n->in_v)
1035 		{
1036 		case 4 :
1037 			nat_addrdr(n, ifs);
1038 			break;
1039 #ifdef	USE_INET6
1040 		case 6 :
1041 			nat6_addrdr(n, ifs);
1042 			break;
1043 #endif
1044 		default :
1045 			break;
1046 		}
1047 	}
1048 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1049 		n->in_flags &= ~IPN_NOTSRC;
1050 		switch (n->in_v)
1051 		{
1052 		case 4 :
1053 			nat_addnat(n, ifs);
1054 			break;
1055 #ifdef	USE_INET6
1056 		case 6 :
1057 			nat6_addnat(n, ifs);
1058 			break;
1059 #endif
1060 		default :
1061 			break;
1062 		}
1063 	}
1064 	n = NULL;
1065 	ifs->ifs_nat_stats.ns_rules++;
1066 	if (getlock) {
1067 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1068 	}
1069 
1070 	return error;
1071 }
1072 
1073 
1074 /* ------------------------------------------------------------------------ */
1075 /* Function:    nat_resolvrule                                              */
1076 /* Returns:     int - 0 == success, -1 == failure                           */
1077 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1078 /*                                                                          */
1079 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1080 /* any specified interfaces and proxy labels, and determines whether or not */
1081 /* all proxy labels are correctly specified.				    */
1082 /*									    */
1083 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1084 /* ------------------------------------------------------------------------ */
1085 static int nat_resolverule(n, ifs)
1086 ipnat_t *n;
1087 ipf_stack_t *ifs;
1088 {
1089 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1090 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1091 
1092 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1093 	if (n->in_ifnames[1][0] == '\0') {
1094 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1095 		n->in_ifps[1] = n->in_ifps[0];
1096 	} else {
1097 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1098 	}
1099 
1100 	if (n->in_plabel[0] != '\0') {
1101 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1102 		if (n->in_apr == NULL)
1103 			return -1;
1104 	}
1105 	return 0;
1106 }
1107 
1108 
1109 /* ------------------------------------------------------------------------ */
1110 /* Function:    nat_siocdelnat                                              */
1111 /* Returns:     int - 0 == success, != 0 == failure                         */
1112 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1113 /*              np(I)      - pointer to where to insert new NAT rule        */
1114 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1115 /* Mutex Locks: ipf_natio                                                   */
1116 /*                                                                          */
1117 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1118 /* from information passed to the kernel, then add it  to the appropriate   */
1119 /* NAT rule table(s).                                                       */
1120 /* ------------------------------------------------------------------------ */
1121 static void nat_siocdelnat(n, np, getlock, ifs)
1122 ipnat_t *n, **np;
1123 int getlock;
1124 ipf_stack_t *ifs;
1125 {
1126 	int i;
1127 
1128 	if (getlock) {
1129 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1130 	}
1131 	if (n->in_redir & NAT_REDIRECT)
1132 		nat_delrdr(n);
1133 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1134 		nat_delnat(n);
1135 	if (ifs->ifs_nat_list == NULL) {
1136 		ifs->ifs_nat_masks = 0;
1137 		ifs->ifs_rdr_masks = 0;
1138 		for (i = 0; i < 4; i++) {
1139 			ifs->ifs_nat6_masks[i] = 0;
1140 			ifs->ifs_rdr6_masks[i] = 0;
1141 		}
1142 	}
1143 
1144 	if (n->in_tqehead[0] != NULL) {
1145 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1146 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1147 		}
1148 	}
1149 
1150 	if (n->in_tqehead[1] != NULL) {
1151 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1152 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1153 		}
1154 	}
1155 
1156 	*np = n->in_next;
1157 
1158 	if (n->in_use == 0) {
1159 		if (n->in_apr)
1160 			appr_free(n->in_apr);
1161 		KFREE(n);
1162 		ifs->ifs_nat_stats.ns_rules--;
1163 	} else {
1164 		n->in_flags |= IPN_DELETE;
1165 		n->in_next = NULL;
1166 	}
1167 	if (getlock) {
1168 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1169 	}
1170 }
1171 
1172 
1173 /* ------------------------------------------------------------------------ */
1174 /* Function:    fr_natgetsz                                                 */
1175 /* Returns:     int - 0 == success, != 0 is the error value.                */
1176 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1177 /*                        get the size of.                                  */
1178 /*                                                                          */
1179 /* Handle SIOCSTGSZ.                                                        */
1180 /* Return the size of the nat list entry to be copied back to user space.   */
1181 /* The size of the entry is stored in the ng_sz field and the enture natget */
1182 /* structure is copied back to the user.                                    */
1183 /* ------------------------------------------------------------------------ */
1184 static int fr_natgetsz(data, ifs)
1185 caddr_t data;
1186 ipf_stack_t *ifs;
1187 {
1188 	ap_session_t *aps;
1189 	nat_t *nat, *n;
1190 	natget_t ng;
1191 
1192 	BCOPYIN(data, &ng, sizeof(ng));
1193 
1194 	nat = ng.ng_ptr;
1195 	if (!nat) {
1196 		nat = ifs->ifs_nat_instances;
1197 		ng.ng_sz = 0;
1198 		/*
1199 		 * Empty list so the size returned is 0.  Simple.
1200 		 */
1201 		if (nat == NULL) {
1202 			BCOPYOUT(&ng, data, sizeof(ng));
1203 			return 0;
1204 		}
1205 	} else {
1206 		/*
1207 		 * Make sure the pointer we're copying from exists in the
1208 		 * current list of entries.  Security precaution to prevent
1209 		 * copying of random kernel data.
1210 		 */
1211 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1212 			if (n == nat)
1213 				break;
1214 		if (!n)
1215 			return ESRCH;
1216 	}
1217 
1218 	/*
1219 	 * Incluse any space required for proxy data structures.
1220 	 */
1221 	ng.ng_sz = sizeof(nat_save_t);
1222 	aps = nat->nat_aps;
1223 	if (aps != NULL) {
1224 		ng.ng_sz += sizeof(ap_session_t) - 4;
1225 		if (aps->aps_data != 0)
1226 			ng.ng_sz += aps->aps_psiz;
1227 	}
1228 
1229 	BCOPYOUT(&ng, data, sizeof(ng));
1230 	return 0;
1231 }
1232 
1233 
1234 /* ------------------------------------------------------------------------ */
1235 /* Function:    fr_natgetent                                                */
1236 /* Returns:     int - 0 == success, != 0 is the error value.                */
1237 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1238 /*                        to NAT structure to copy out.                     */
1239 /*                                                                          */
1240 /* Handle SIOCSTGET.                                                        */
1241 /* Copies out NAT entry to user space.  Any additional data held for a      */
1242 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1243 /* ------------------------------------------------------------------------ */
1244 static int fr_natgetent(data, ifs)
1245 caddr_t data;
1246 ipf_stack_t *ifs;
1247 {
1248 	int error, outsize;
1249 	ap_session_t *aps;
1250 	nat_save_t *ipn, ipns;
1251 	nat_t *n, *nat;
1252 
1253 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1254 	if (error != 0)
1255 		return error;
1256 
1257 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1258 		return EINVAL;
1259 
1260 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1261 	if (ipn == NULL)
1262 		return ENOMEM;
1263 
1264 	ipn->ipn_dsize = ipns.ipn_dsize;
1265 	nat = ipns.ipn_next;
1266 	if (nat == NULL) {
1267 		nat = ifs->ifs_nat_instances;
1268 		if (nat == NULL) {
1269 			if (ifs->ifs_nat_instances == NULL)
1270 				error = ENOENT;
1271 			goto finished;
1272 		}
1273 	} else {
1274 		/*
1275 		 * Make sure the pointer we're copying from exists in the
1276 		 * current list of entries.  Security precaution to prevent
1277 		 * copying of random kernel data.
1278 		 */
1279 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1280 			if (n == nat)
1281 				break;
1282 		if (n == NULL) {
1283 			error = ESRCH;
1284 			goto finished;
1285 		}
1286 	}
1287 	ipn->ipn_next = nat->nat_next;
1288 
1289 	/*
1290 	 * Copy the NAT structure.
1291 	 */
1292 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1293 
1294 	/*
1295 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1296 	 */
1297 	if (nat->nat_ptr != NULL)
1298 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1299 		      sizeof(ipn->ipn_ipnat));
1300 
1301 	/*
1302 	 * If we also know the NAT entry has an associated filter rule,
1303 	 * save that too.
1304 	 */
1305 	if (nat->nat_fr != NULL)
1306 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1307 		      sizeof(ipn->ipn_fr));
1308 
1309 	/*
1310 	 * Last but not least, if there is an application proxy session set
1311 	 * up for this NAT entry, then copy that out too, including any
1312 	 * private data saved along side it by the proxy.
1313 	 */
1314 	aps = nat->nat_aps;
1315 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1316 	if (aps != NULL) {
1317 		char *s;
1318 
1319 		if (outsize < sizeof(*aps)) {
1320 			error = ENOBUFS;
1321 			goto finished;
1322 		}
1323 
1324 		s = ipn->ipn_data;
1325 		bcopy((char *)aps, s, sizeof(*aps));
1326 		s += sizeof(*aps);
1327 		outsize -= sizeof(*aps);
1328 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1329 			bcopy(aps->aps_data, s, aps->aps_psiz);
1330 		else
1331 			error = ENOBUFS;
1332 	}
1333 	if (error == 0) {
1334 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1335 	}
1336 
1337 finished:
1338 	if (ipn != NULL) {
1339 		KFREES(ipn, ipns.ipn_dsize);
1340 	}
1341 	return error;
1342 }
1343 
1344 /* ------------------------------------------------------------------------ */
1345 /* Function:    nat_calc_chksum_diffs					    */
1346 /* Returns:     void							    */
1347 /* Parameters:  nat	-	pointer to NAT table entry		    */
1348 /*                                                                          */
1349 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1350 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1351 /* we are dealing with partial chksum offload. For these cases we need to   */
1352 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1353 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1354 /* nat_sumd[0]. 							    */
1355 /*									    */
1356 /* The function accepts initialized NAT table entry and computes the deltas */
1357 /* from nat_inip/nat_outip members. The function is called right before	    */
1358 /* the new entry is inserted into the table.				    */
1359 /*									    */
1360 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1361 /* of delta between original and new IP addresses.			    */
1362 /*									    */
1363 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1364 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1365 /*									    */
1366 /* Some facts about chksum, we should remember:				    */
1367 /*	IP header chksum covers IP header only				    */
1368 /*									    */
1369 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1370 /*		SRC, DST IP address					    */
1371 /*		SRC, DST Port						    */
1372 /*		length of payload					    */
1373 /*									    */
1374 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1375 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1376 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1377 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1378 /* stored along with other IP packet data in dblk_t structure and used in   */
1379 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1380 /*									    */
1381 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1382 /* of delta between new and orig address. NOTE: the order of operands for   */
1383 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1384 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1385 /*									    */
1386 /* ------------------------------------------------------------------------ */
1387 void nat_calc_chksum_diffs(nat)
1388 nat_t *nat;
1389 {
1390 	u_32_t	sum_orig = 0;
1391 	u_32_t	sum_changed = 0;
1392 	u_32_t	sumd;
1393 	u_32_t	ipsum_orig = 0;
1394 	u_32_t	ipsum_changed = 0;
1395 
1396 	if (nat->nat_v != 4 && nat->nat_v != 6)
1397 		return;
1398 
1399 	/*
1400 	 * the switch calculates operands for CALC_SUMD(),
1401 	 * which will compute the partial chksum delta.
1402 	 */
1403 	switch (nat->nat_dir)
1404 	{
1405 	case NAT_INBOUND:
1406 		/*
1407 		 * we are dealing with RDR rule (DST address gets
1408 		 * modified on packet from client)
1409 		 */
1410 		if (nat->nat_v == 4) {
1411 			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1412 			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1413 		} else {
1414 			sum_changed = LONG_SUM6(&nat->nat_inip6);
1415 			sum_orig = LONG_SUM6(&nat->nat_outip6);
1416 		}
1417 		break;
1418 	case NAT_OUTBOUND:
1419 		/*
1420 		 * we are dealing with MAP rule (SRC address gets
1421 		 * modified on packet from client)
1422 		 */
1423 		if (nat->nat_v == 4) {
1424 			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1425 			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1426 		} else {
1427 			sum_changed = LONG_SUM6(&nat->nat_outip6);
1428 			sum_orig = LONG_SUM6(&nat->nat_inip6);
1429 		}
1430 		break;
1431 	default: ;
1432 		break;
1433 	}
1434 
1435 	/*
1436 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1437 	 * calculation, which happens at the end of function.
1438 	 */
1439 	ipsum_changed = sum_changed;
1440 	ipsum_orig = sum_orig;
1441 	/*
1442 	 * NOTE: the order of operands for partial chksum adjustment
1443 	 * computation has to be swapped!
1444 	 */
1445 	CALC_SUMD(sum_changed, sum_orig, sumd);
1446 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1447 
1448 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1449 
1450 		/*
1451 		 * switch calculates operands for CALC_SUMD(), which will
1452 		 * compute the full chksum delta.
1453 		 */
1454 		switch (nat->nat_dir)
1455 		{
1456 		case NAT_INBOUND:
1457 			if (nat->nat_v == 4) {
1458 				sum_changed = LONG_SUM(
1459 				    ntohl(nat->nat_inip.s_addr) +
1460 				    ntohs(nat->nat_inport));
1461 				sum_orig = LONG_SUM(
1462 				    ntohl(nat->nat_outip.s_addr) +
1463 				    ntohs(nat->nat_outport));
1464 			} else {
1465 				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1466 				    ntohs(nat->nat_inport);
1467 				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1468 				    ntohs(nat->nat_outport);
1469 			}
1470 			break;
1471 		case NAT_OUTBOUND:
1472 			if (nat->nat_v == 4) {
1473 				sum_changed = LONG_SUM(
1474 				    ntohl(nat->nat_outip.s_addr) +
1475 				    ntohs(nat->nat_outport));
1476 				sum_orig = LONG_SUM(
1477 				    ntohl(nat->nat_inip.s_addr) +
1478 				    ntohs(nat->nat_inport));
1479 			} else {
1480 				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1481 				    ntohs(nat->nat_outport);
1482 				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1483 				    ntohs(nat->nat_inport);
1484 			}
1485 			break;
1486 		default: ;
1487 			break;
1488 		}
1489 
1490 		CALC_SUMD(sum_orig, sum_changed, sumd);
1491 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1492 
1493 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1494 			/*
1495 			 * partial HW chksum offload works for TCP/UDP headers only,
1496 			 * so we need to enforce full chksum adjustment for ICMP
1497 			 */
1498 			nat->nat_sumd[1] = nat->nat_sumd[0];
1499 		}
1500 	}
1501 	else
1502 		nat->nat_sumd[0] = nat->nat_sumd[1];
1503 
1504 	/*
1505 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1506 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1507 	 */
1508 	if (nat->nat_v == 4) {
1509 		if (NAT_HAS_L4_CHANGED(nat)) {
1510 			/*
1511 			 * bad luck, NAT changes also the L4 header, use IP
1512 			 * addresses to compute chksum adjustment for IP header.
1513 			 */
1514 			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1515 			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1516 		} else {
1517 			/*
1518 			 * the NAT does not change L4 hdr -> reuse chksum
1519 			 * adjustment for IP hdr.
1520 			 */
1521 			nat->nat_ipsumd = nat->nat_sumd[0];
1522 
1523 			/*
1524 			 * if L4 header does not use chksum - zero out deltas
1525 			 */
1526 			if (!(nat->nat_flags & IPN_TCPUDP)) {
1527 				nat->nat_sumd[0] = 0;
1528 				nat->nat_sumd[1] = 0;
1529 			}
1530 		}
1531 	}
1532 
1533 	return;
1534 }
1535 
1536 /* ------------------------------------------------------------------------ */
1537 /* Function:    fr_natputent                                                */
1538 /* Returns:     int - 0 == success, != 0 is the error value.                */
1539 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1540 /*                            structure information to load into the kernel */
1541 /*              getlock(I) - flag indicating whether or not a write lock    */
1542 /*                           on ipf_nat is already held.                    */
1543 /*                                                                          */
1544 /* Handle SIOCSTPUT.                                                        */
1545 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1546 /* firewall rule data structures, if pointers to them indicate so.          */
1547 /* ------------------------------------------------------------------------ */
1548 static int fr_natputent(data, getlock, ifs)
1549 caddr_t data;
1550 int getlock;
1551 ipf_stack_t *ifs;
1552 {
1553 	nat_save_t ipn, *ipnn;
1554 	ap_session_t *aps;
1555 	nat_t *n, *nat;
1556 	frentry_t *fr;
1557 	fr_info_t fin;
1558 	ipnat_t *in;
1559 	int error;
1560 
1561 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1562 	if (error != 0)
1563 		return error;
1564 
1565 	/*
1566 	 * Trigger automatic call to nat_extraflush() if the
1567 	 * table has reached capcity specified by hi watermark.
1568 	 */
1569 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1570 		ifs->ifs_nat_doflush = 1;
1571 
1572 	/*
1573 	 * Initialise early because of code at junkput label.
1574 	 */
1575 	in = NULL;
1576 	aps = NULL;
1577 	nat = NULL;
1578 	ipnn = NULL;
1579 
1580 	/*
1581 	 * New entry, copy in the rest of the NAT entry if it's size is more
1582 	 * than just the nat_t structure.
1583 	 */
1584 	fr = NULL;
1585 	if (ipn.ipn_dsize > sizeof(ipn)) {
1586 		if (ipn.ipn_dsize > 81920) {
1587 			error = ENOMEM;
1588 			goto junkput;
1589 		}
1590 
1591 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1592 		if (ipnn == NULL)
1593 			return ENOMEM;
1594 
1595 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1596 		if (error != 0) {
1597 			error = EFAULT;
1598 			goto junkput;
1599 		}
1600 	} else
1601 		ipnn = &ipn;
1602 
1603 	KMALLOC(nat, nat_t *);
1604 	if (nat == NULL) {
1605 		error = ENOMEM;
1606 		goto junkput;
1607 	}
1608 
1609 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1610 	/*
1611 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1612 	 */
1613 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1614 	nat->nat_tqe.tqe_pnext = NULL;
1615 	nat->nat_tqe.tqe_next = NULL;
1616 	nat->nat_tqe.tqe_ifq = NULL;
1617 	nat->nat_tqe.tqe_parent = nat;
1618 
1619 	/*
1620 	 * Restore the rule associated with this nat session
1621 	 */
1622 	in = ipnn->ipn_nat.nat_ptr;
1623 	if (in != NULL) {
1624 		KMALLOC(in, ipnat_t *);
1625 		nat->nat_ptr = in;
1626 		if (in == NULL) {
1627 			error = ENOMEM;
1628 			goto junkput;
1629 		}
1630 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1631 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1632 		in->in_use = 1;
1633 		in->in_flags |= IPN_DELETE;
1634 
1635 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1636 
1637 		if (nat_resolverule(in, ifs) != 0) {
1638 			error = ESRCH;
1639 			goto junkput;
1640 		}
1641 	}
1642 
1643 	/*
1644 	 * Check that the NAT entry doesn't already exist in the kernel.
1645 	 */
1646 	if (nat->nat_v != 6)
1647 		nat->nat_v = 4;
1648 	bzero((char *)&fin, sizeof(fin));
1649 	fin.fin_p = nat->nat_p;
1650 	fin.fin_ifs = ifs;
1651 	if (nat->nat_dir == NAT_OUTBOUND) {
1652 		fin.fin_data[0] = ntohs(nat->nat_oport);
1653 		fin.fin_data[1] = ntohs(nat->nat_outport);
1654 		fin.fin_ifp = nat->nat_ifps[0];
1655 		if (getlock) {
1656 			READ_ENTER(&ifs->ifs_ipf_nat);
1657 		}
1658 
1659 		switch (nat->nat_v)
1660 		{
1661 		case 4:
1662 			fin.fin_v = nat->nat_v;
1663 			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1664 			    nat->nat_oip, nat->nat_outip);
1665 			break;
1666 #ifdef USE_INET6
1667 		case 6:
1668 			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1669 			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1670 			break;
1671 #endif
1672 		default:
1673 			n = NULL;
1674 			break;
1675 		}
1676 
1677 		if (getlock) {
1678 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1679 		}
1680 		if (n != NULL) {
1681 			error = EEXIST;
1682 			goto junkput;
1683 		}
1684 	} else if (nat->nat_dir == NAT_INBOUND) {
1685 		fin.fin_data[0] = ntohs(nat->nat_inport);
1686 		fin.fin_data[1] = ntohs(nat->nat_oport);
1687 		fin.fin_ifp = nat->nat_ifps[1];
1688 		if (getlock) {
1689 			READ_ENTER(&ifs->ifs_ipf_nat);
1690 		}
1691 
1692 		switch (nat->nat_v)
1693 		{
1694 		case 4:
1695 			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1696 			    nat->nat_inip, nat->nat_oip);
1697 			break;
1698 #ifdef USE_INET6
1699 		case 6:
1700 			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1701 			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1702 			break;
1703 #endif
1704 		default:
1705 			n = NULL;
1706 			break;
1707 		}
1708 
1709 		if (getlock) {
1710 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1711 		}
1712 		if (n != NULL) {
1713 			error = EEXIST;
1714 			goto junkput;
1715 		}
1716 	} else {
1717 		error = EINVAL;
1718 		goto junkput;
1719 	}
1720 
1721 	/*
1722 	 * Restore ap_session_t structure.  Include the private data allocated
1723 	 * if it was there.
1724 	 */
1725 	aps = nat->nat_aps;
1726 	if (aps != NULL) {
1727 		KMALLOC(aps, ap_session_t *);
1728 		nat->nat_aps = aps;
1729 		if (aps == NULL) {
1730 			error = ENOMEM;
1731 			goto junkput;
1732 		}
1733 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1734 		if (in != NULL)
1735 			aps->aps_apr = in->in_apr;
1736 		else
1737 			aps->aps_apr = NULL;
1738 		if (aps->aps_psiz != 0) {
1739 			if (aps->aps_psiz > 81920) {
1740 				error = ENOMEM;
1741 				goto junkput;
1742 			}
1743 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1744 			if (aps->aps_data == NULL) {
1745 				error = ENOMEM;
1746 				goto junkput;
1747 			}
1748 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1749 			      aps->aps_psiz);
1750 		} else {
1751 			aps->aps_psiz = 0;
1752 			aps->aps_data = NULL;
1753 		}
1754 	}
1755 
1756 	/*
1757 	 * If there was a filtering rule associated with this entry then
1758 	 * build up a new one.
1759 	 */
1760 	fr = nat->nat_fr;
1761 	if (fr != NULL) {
1762 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1763 			KMALLOC(fr, frentry_t *);
1764 			nat->nat_fr = fr;
1765 			if (fr == NULL) {
1766 				error = ENOMEM;
1767 				goto junkput;
1768 			}
1769 			ipnn->ipn_nat.nat_fr = fr;
1770 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1771 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1772 
1773 			fr->fr_ref = 1;
1774 			fr->fr_dsize = 0;
1775 			fr->fr_data = NULL;
1776 			fr->fr_type = FR_T_NONE;
1777 
1778 			MUTEX_NUKE(&fr->fr_lock);
1779 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1780 		} else {
1781 			if (getlock) {
1782 				READ_ENTER(&ifs->ifs_ipf_nat);
1783 			}
1784 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1785 				if (n->nat_fr == fr)
1786 					break;
1787 
1788 			if (n != NULL) {
1789 				MUTEX_ENTER(&fr->fr_lock);
1790 				fr->fr_ref++;
1791 				MUTEX_EXIT(&fr->fr_lock);
1792 			}
1793 			if (getlock) {
1794 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1795 			}
1796 			if (!n) {
1797 				error = ESRCH;
1798 				goto junkput;
1799 			}
1800 		}
1801 	}
1802 
1803 	if (ipnn != &ipn) {
1804 		KFREES(ipnn, ipn.ipn_dsize);
1805 		ipnn = NULL;
1806 	}
1807 
1808 	nat_calc_chksum_diffs(nat);
1809 
1810 	if (getlock) {
1811 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1812 	}
1813 
1814 	nat_calc_chksum_diffs(nat);
1815 
1816 	switch (nat->nat_v)
1817 	{
1818 	case 4 :
1819 		error = nat_insert(nat, nat->nat_rev, ifs);
1820 		break;
1821 #ifdef USE_INET6
1822 	case 6 :
1823 		error = nat6_insert(nat, nat->nat_rev, ifs);
1824 		break;
1825 #endif
1826 	default :
1827 		break;
1828 	}
1829 
1830 	if ((error == 0) && (aps != NULL)) {
1831 		aps->aps_next = ifs->ifs_ap_sess_list;
1832 		ifs->ifs_ap_sess_list = aps;
1833 	}
1834 	if (getlock) {
1835 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1836 	}
1837 
1838 	if (error == 0)
1839 		return 0;
1840 
1841 	error = ENOMEM;
1842 
1843 junkput:
1844 	if (fr != NULL)
1845 		(void) fr_derefrule(&fr, ifs);
1846 
1847 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1848 		KFREES(ipnn, ipn.ipn_dsize);
1849 	}
1850 	if (nat != NULL) {
1851 		if (aps != NULL) {
1852 			if (aps->aps_data != NULL) {
1853 				KFREES(aps->aps_data, aps->aps_psiz);
1854 			}
1855 			KFREE(aps);
1856 		}
1857 		if (in != NULL) {
1858 			if (in->in_apr)
1859 				appr_free(in->in_apr);
1860 			KFREE(in);
1861 		}
1862 		KFREE(nat);
1863 	}
1864 	return error;
1865 }
1866 
1867 
1868 /* ------------------------------------------------------------------------ */
1869 /* Function:    nat_delete                                                  */
1870 /* Returns:     Nil                                                         */
1871 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1872 /*              logtype(I) - type of LOG record to create before deleting   */
1873 /* Write Lock:  ipf_nat                                                     */
1874 /*                                                                          */
1875 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1876 /* enabled then generate a NAT log record for this event.                   */
1877 /* ------------------------------------------------------------------------ */
1878 static void nat_delete(nat, logtype, ifs)
1879 struct nat *nat;
1880 int logtype;
1881 ipf_stack_t *ifs;
1882 {
1883 	struct ipnat *ipn;
1884 
1885 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1886 		nat_log(nat, logtype, ifs);
1887 
1888 	/*
1889 	 * Take it as a general indication that all the pointers are set if
1890 	 * nat_pnext is set.
1891 	 */
1892 	if (nat->nat_pnext != NULL) {
1893 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1894 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1895 
1896 		*nat->nat_pnext = nat->nat_next;
1897 		if (nat->nat_next != NULL) {
1898 			nat->nat_next->nat_pnext = nat->nat_pnext;
1899 			nat->nat_next = NULL;
1900 		}
1901 		nat->nat_pnext = NULL;
1902 
1903 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1904 		if (nat->nat_hnext[0] != NULL) {
1905 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1906 			nat->nat_hnext[0] = NULL;
1907 		}
1908 		nat->nat_phnext[0] = NULL;
1909 
1910 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1911 		if (nat->nat_hnext[1] != NULL) {
1912 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1913 			nat->nat_hnext[1] = NULL;
1914 		}
1915 		nat->nat_phnext[1] = NULL;
1916 
1917 		if ((nat->nat_flags & SI_WILDP) != 0)
1918 			ifs->ifs_nat_stats.ns_wilds--;
1919 	}
1920 
1921 	if (nat->nat_me != NULL) {
1922 		*nat->nat_me = NULL;
1923 		nat->nat_me = NULL;
1924 	}
1925 
1926 	fr_deletequeueentry(&nat->nat_tqe);
1927 
1928 	MUTEX_ENTER(&nat->nat_lock);
1929 	if (nat->nat_ref > 1) {
1930 		nat->nat_ref--;
1931 		MUTEX_EXIT(&nat->nat_lock);
1932 		return;
1933 	}
1934 	MUTEX_EXIT(&nat->nat_lock);
1935 
1936 	/*
1937 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1938 	 */
1939 	nat->nat_ref = 0;
1940 
1941 #ifdef	IPFILTER_SYNC
1942 	if (nat->nat_sync)
1943 		ipfsync_del(nat->nat_sync);
1944 #endif
1945 
1946 	if (nat->nat_fr != NULL)
1947 		(void)fr_derefrule(&nat->nat_fr, ifs);
1948 
1949 	if (nat->nat_hm != NULL)
1950 		fr_hostmapdel(&nat->nat_hm);
1951 
1952 	/*
1953 	 * If there is an active reference from the nat entry to its parent
1954 	 * rule, decrement the rule's reference count and free it too if no
1955 	 * longer being used.
1956 	 */
1957 	ipn = nat->nat_ptr;
1958 	if (ipn != NULL) {
1959 		ipn->in_space++;
1960 		ipn->in_use--;
1961 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1962 			if (ipn->in_apr)
1963 				appr_free(ipn->in_apr);
1964 			KFREE(ipn);
1965 			ifs->ifs_nat_stats.ns_rules--;
1966 		}
1967 	}
1968 
1969 	MUTEX_DESTROY(&nat->nat_lock);
1970 
1971 	aps_free(nat->nat_aps, ifs);
1972 	ifs->ifs_nat_stats.ns_inuse--;
1973 
1974 	/*
1975 	 * If there's a fragment table entry too for this nat entry, then
1976 	 * dereference that as well.  This is after nat_lock is released
1977 	 * because of Tru64.
1978 	 */
1979 	fr_forgetnat((void *)nat, ifs);
1980 
1981 	KFREE(nat);
1982 }
1983 
1984 
1985 /* ------------------------------------------------------------------------ */
1986 /* Function:    nat_flushtable                                              */
1987 /* Returns:     int - number of NAT rules deleted                           */
1988 /* Parameters:  Nil                                                         */
1989 /*                                                                          */
1990 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1991 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1992 /* ------------------------------------------------------------------------ */
1993 /*
1994  * nat_flushtable - clear the NAT table of all mapping entries.
1995  */
1996 static int nat_flushtable(ifs)
1997 ipf_stack_t *ifs;
1998 {
1999 	nat_t *nat;
2000 	int j = 0;
2001 
2002 	/*
2003 	 * ALL NAT mappings deleted, so lets just make the deletions
2004 	 * quicker.
2005 	 */
2006 	if (ifs->ifs_nat_table[0] != NULL)
2007 		bzero((char *)ifs->ifs_nat_table[0],
2008 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
2009 	if (ifs->ifs_nat_table[1] != NULL)
2010 		bzero((char *)ifs->ifs_nat_table[1],
2011 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
2012 
2013 	while ((nat = ifs->ifs_nat_instances) != NULL) {
2014 		nat_delete(nat, NL_FLUSH, ifs);
2015 		j++;
2016 	}
2017 
2018 	return j;
2019 }
2020 
2021 
2022 /* ------------------------------------------------------------------------ */
2023 /* Function:    nat_clearlist                                               */
2024 /* Returns:     int - number of NAT/RDR rules deleted                       */
2025 /* Parameters:  Nil                                                         */
2026 /*                                                                          */
2027 /* Delete all rules in the current list of rules.  There is nothing elegant */
2028 /* about this cleanup: simply free all entries on the list of rules and     */
2029 /* clear out the tables used for hashed NAT rule lookups.                   */
2030 /* ------------------------------------------------------------------------ */
2031 static int nat_clearlist(ifs)
2032 ipf_stack_t *ifs;
2033 {
2034 	ipnat_t *n, **np = &ifs->ifs_nat_list;
2035 	int i = 0;
2036 
2037 	if (ifs->ifs_nat_rules != NULL)
2038 		bzero((char *)ifs->ifs_nat_rules,
2039 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2040 	if (ifs->ifs_rdr_rules != NULL)
2041 		bzero((char *)ifs->ifs_rdr_rules,
2042 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2043 
2044 	while ((n = *np) != NULL) {
2045 		*np = n->in_next;
2046 		if (n->in_use == 0) {
2047 			if (n->in_apr != NULL)
2048 				appr_free(n->in_apr);
2049 			KFREE(n);
2050 			ifs->ifs_nat_stats.ns_rules--;
2051 		} else {
2052 			n->in_flags |= IPN_DELETE;
2053 			n->in_next = NULL;
2054 		}
2055 		i++;
2056 	}
2057 	ifs->ifs_nat_masks = 0;
2058 	ifs->ifs_rdr_masks = 0;
2059 	for (i = 0; i < 4; i++) {
2060 		ifs->ifs_nat6_masks[i] = 0;
2061 		ifs->ifs_rdr6_masks[i] = 0;
2062 	}
2063 	return i;
2064 }
2065 
2066 
2067 /* ------------------------------------------------------------------------ */
2068 /* Function:    nat_newmap                                                  */
2069 /* Returns:     int - -1 == error, 0 == success                             */
2070 /* Parameters:  fin(I) - pointer to packet information                      */
2071 /*              nat(I) - pointer to NAT entry                               */
2072 /*              ni(I)  - pointer to structure with misc. information needed */
2073 /*                       to create new NAT entry.                           */
2074 /*                                                                          */
2075 /* Given an empty NAT structure, populate it with new information about a   */
2076 /* new NAT session, as defined by the matching NAT rule.                    */
2077 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2078 /* to the new IP address for the translation.                               */
2079 /* ------------------------------------------------------------------------ */
2080 static INLINE int nat_newmap(fin, nat, ni)
2081 fr_info_t *fin;
2082 nat_t *nat;
2083 natinfo_t *ni;
2084 {
2085 	u_short st_port, dport, sport, port, sp, dp;
2086 	struct in_addr in, inb;
2087 	hostmap_t *hm;
2088 	u_32_t flags;
2089 	u_32_t st_ip;
2090 	ipnat_t *np;
2091 	nat_t *natl;
2092 	int l;
2093 	ipf_stack_t *ifs = fin->fin_ifs;
2094 
2095 	/*
2096 	 * If it's an outbound packet which doesn't match any existing
2097 	 * record, then create a new port
2098 	 */
2099 	l = 0;
2100 	hm = NULL;
2101 	np = ni->nai_np;
2102 	st_ip = np->in_nip;
2103 	st_port = np->in_pnext;
2104 	flags = ni->nai_flags;
2105 	sport = ni->nai_sport;
2106 	dport = ni->nai_dport;
2107 
2108 	/*
2109 	 * Do a loop until we either run out of entries to try or we find
2110 	 * a NAT mapping that isn't currently being used.  This is done
2111 	 * because the change to the source is not (usually) being fixed.
2112 	 */
2113 	do {
2114 		port = 0;
2115 		in.s_addr = htonl(np->in_nip);
2116 		if (l == 0) {
2117 			/*
2118 			 * Check to see if there is an existing NAT
2119 			 * setup for this IP address pair.
2120 			 */
2121 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2122 					 in, 0, ifs);
2123 			if (hm != NULL)
2124 				in.s_addr = hm->hm_mapip.s_addr;
2125 		} else if ((l == 1) && (hm != NULL)) {
2126 			fr_hostmapdel(&hm);
2127 		}
2128 		in.s_addr = ntohl(in.s_addr);
2129 
2130 		nat->nat_hm = hm;
2131 
2132 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2133 			if (l > 0)
2134 				return -1;
2135 		}
2136 
2137 		if (np->in_redir == NAT_BIMAP &&
2138 		    np->in_inmsk == np->in_outmsk) {
2139 			/*
2140 			 * map the address block in a 1:1 fashion
2141 			 */
2142 			in.s_addr = np->in_outip;
2143 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2144 			in.s_addr = ntohl(in.s_addr);
2145 
2146 		} else if (np->in_redir & NAT_MAPBLK) {
2147 			if ((l >= np->in_ppip) || ((l > 0) &&
2148 			     !(flags & IPN_TCPUDP)))
2149 				return -1;
2150 			/*
2151 			 * map-block - Calculate destination address.
2152 			 */
2153 			in.s_addr = ntohl(fin->fin_saddr);
2154 			in.s_addr &= ntohl(~np->in_inmsk);
2155 			inb.s_addr = in.s_addr;
2156 			in.s_addr /= np->in_ippip;
2157 			in.s_addr &= ntohl(~np->in_outmsk);
2158 			in.s_addr += ntohl(np->in_outip);
2159 			/*
2160 			 * Calculate destination port.
2161 			 */
2162 			if ((flags & IPN_TCPUDP) &&
2163 			    (np->in_ppip != 0)) {
2164 				port = ntohs(sport) + l;
2165 				port %= np->in_ppip;
2166 				port += np->in_ppip *
2167 					(inb.s_addr % np->in_ippip);
2168 				port += MAPBLK_MINPORT;
2169 				port = htons(port);
2170 			}
2171 
2172 		} else if ((np->in_outip == 0) &&
2173 			   (np->in_outmsk == 0xffffffff)) {
2174 			/*
2175 			 * 0/32 - use the interface's IP address.
2176 			 */
2177 			if ((l > 0) ||
2178 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2179 				       &in, NULL, fin->fin_ifs) == -1)
2180 				return -1;
2181 			in.s_addr = ntohl(in.s_addr);
2182 
2183 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2184 			/*
2185 			 * 0/0 - use the original source address/port.
2186 			 */
2187 			if (l > 0)
2188 				return -1;
2189 			in.s_addr = ntohl(fin->fin_saddr);
2190 
2191 		} else if ((np->in_outmsk != 0xffffffff) &&
2192 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2193 			np->in_nip++;
2194 
2195 		natl = NULL;
2196 
2197 		if ((flags & IPN_TCPUDP) &&
2198 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2199 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2200 			/*
2201 			 * "ports auto" (without map-block)
2202 			 */
2203 			if ((l > 0) && (l % np->in_ppip == 0)) {
2204 				if (l > np->in_space) {
2205 					return -1;
2206 				} else if ((l > np->in_ppip) &&
2207 					   np->in_outmsk != 0xffffffff)
2208 					np->in_nip++;
2209 			}
2210 			if (np->in_ppip != 0) {
2211 				port = ntohs(sport);
2212 				port += (l % np->in_ppip);
2213 				port %= np->in_ppip;
2214 				port += np->in_ppip *
2215 					(ntohl(fin->fin_saddr) %
2216 					 np->in_ippip);
2217 				port += MAPBLK_MINPORT;
2218 				port = htons(port);
2219 			}
2220 
2221 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2222 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2223 			/*
2224 			 * Standard port translation.  Select next port.
2225 			 */
2226 			if (np->in_flags & IPN_SEQUENTIAL) {
2227 				port = np->in_pnext;
2228 			} else {
2229 				port = ipf_random() % (ntohs(np->in_pmax) -
2230 						       ntohs(np->in_pmin));
2231 				port += ntohs(np->in_pmin);
2232 			}
2233 			port = htons(port);
2234 			np->in_pnext++;
2235 
2236 			if (np->in_pnext > ntohs(np->in_pmax)) {
2237 				np->in_pnext = ntohs(np->in_pmin);
2238 				if (np->in_outmsk != 0xffffffff)
2239 					np->in_nip++;
2240 			}
2241 		}
2242 
2243 		if (np->in_flags & IPN_IPRANGE) {
2244 			if (np->in_nip > ntohl(np->in_outmsk))
2245 				np->in_nip = ntohl(np->in_outip);
2246 		} else {
2247 			if ((np->in_outmsk != 0xffffffff) &&
2248 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2249 			    ntohl(np->in_outip))
2250 				np->in_nip = ntohl(np->in_outip) + 1;
2251 		}
2252 
2253 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2254 			port = sport;
2255 
2256 		/*
2257 		 * Here we do a lookup of the connection as seen from
2258 		 * the outside.  If an IP# pair already exists, try
2259 		 * again.  So if you have A->B becomes C->B, you can
2260 		 * also have D->E become C->E but not D->B causing
2261 		 * another C->B.  Also take protocol and ports into
2262 		 * account when determining whether a pre-existing
2263 		 * NAT setup will cause an external conflict where
2264 		 * this is appropriate.
2265 		 */
2266 		inb.s_addr = htonl(in.s_addr);
2267 		sp = fin->fin_data[0];
2268 		dp = fin->fin_data[1];
2269 		fin->fin_data[0] = fin->fin_data[1];
2270 		fin->fin_data[1] = htons(port);
2271 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2272 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2273 		fin->fin_data[0] = sp;
2274 		fin->fin_data[1] = dp;
2275 
2276 		/*
2277 		 * Has the search wrapped around and come back to the
2278 		 * start ?
2279 		 */
2280 		if ((natl != NULL) &&
2281 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2282 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2283 			return -1;
2284 		l++;
2285 	} while (natl != NULL);
2286 
2287 	if (np->in_space > 0)
2288 		np->in_space--;
2289 
2290 	/* Setup the NAT table */
2291 	nat->nat_inip = fin->fin_src;
2292 	nat->nat_outip.s_addr = htonl(in.s_addr);
2293 	nat->nat_oip = fin->fin_dst;
2294 	if (nat->nat_hm == NULL)
2295 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2296 					  nat->nat_outip, 0, ifs);
2297 
2298 	if (flags & IPN_TCPUDP) {
2299 		nat->nat_inport = sport;
2300 		nat->nat_outport = port;	/* sport */
2301 		nat->nat_oport = dport;
2302 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2303 	} else if (flags & IPN_ICMPQUERY) {
2304 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2305 		nat->nat_inport = port;
2306 		nat->nat_outport = port;
2307 	}
2308 
2309 	ni->nai_ip.s_addr = in.s_addr;
2310 	ni->nai_port = port;
2311 	ni->nai_nport = dport;
2312 	return 0;
2313 }
2314 
2315 
2316 /* ------------------------------------------------------------------------ */
2317 /* Function:    nat_newrdr                                                  */
2318 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2319 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2320 /* Parameters:  fin(I) - pointer to packet information                      */
2321 /*              nat(I) - pointer to NAT entry                               */
2322 /*              ni(I)  - pointer to structure with misc. information needed */
2323 /*                       to create new NAT entry.                           */
2324 /*                                                                          */
2325 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2326 /* to the new IP address for the translation.                               */
2327 /* ------------------------------------------------------------------------ */
2328 static INLINE int nat_newrdr(fin, nat, ni)
2329 fr_info_t *fin;
2330 nat_t *nat;
2331 natinfo_t *ni;
2332 {
2333 	u_short nport, dport, sport;
2334 	struct in_addr in, inb;
2335 	u_short sp, dp;
2336 	hostmap_t *hm;
2337 	u_32_t flags;
2338 	ipnat_t *np;
2339 	nat_t *natl;
2340 	int move;
2341 	ipf_stack_t *ifs = fin->fin_ifs;
2342 
2343 	move = 1;
2344 	hm = NULL;
2345 	in.s_addr = 0;
2346 	np = ni->nai_np;
2347 	flags = ni->nai_flags;
2348 	sport = ni->nai_sport;
2349 	dport = ni->nai_dport;
2350 
2351 	/*
2352 	 * If the matching rule has IPN_STICKY set, then we want to have the
2353 	 * same rule kick in as before.  Why would this happen?  If you have
2354 	 * a collection of rdr rules with "round-robin sticky", the current
2355 	 * packet might match a different one to the previous connection but
2356 	 * we want the same destination to be used.
2357 	 */
2358 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2359 	    (IPN_ROUNDR|IPN_STICKY)) {
2360 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2361 				 (u_32_t)dport, ifs);
2362 		if (hm != NULL) {
2363 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2364 			np = hm->hm_ipnat;
2365 			ni->nai_np = np;
2366 			move = 0;
2367 		}
2368 	}
2369 
2370 	/*
2371 	 * Otherwise, it's an inbound packet. Most likely, we don't
2372 	 * want to rewrite source ports and source addresses. Instead,
2373 	 * we want to rewrite to a fixed internal address and fixed
2374 	 * internal port.
2375 	 */
2376 	if (np->in_flags & IPN_SPLIT) {
2377 		in.s_addr = np->in_nip;
2378 
2379 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2380 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2381 					 in, (u_32_t)dport, ifs);
2382 			if (hm != NULL) {
2383 				in.s_addr = hm->hm_mapip.s_addr;
2384 				move = 0;
2385 			}
2386 		}
2387 
2388 		if (hm == NULL || hm->hm_ref == 1) {
2389 			if (np->in_inip == htonl(in.s_addr)) {
2390 				np->in_nip = ntohl(np->in_inmsk);
2391 				move = 0;
2392 			} else {
2393 				np->in_nip = ntohl(np->in_inip);
2394 			}
2395 		}
2396 
2397 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2398 		/*
2399 		 * 0/32 - use the interface's IP address.
2400 		 */
2401 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2402 			   fin->fin_ifs) == -1)
2403 			return -1;
2404 		in.s_addr = ntohl(in.s_addr);
2405 
2406 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2407 		/*
2408 		 * 0/0 - use the original destination address/port.
2409 		 */
2410 		in.s_addr = ntohl(fin->fin_daddr);
2411 
2412 	} else if (np->in_redir == NAT_BIMAP &&
2413 		   np->in_inmsk == np->in_outmsk) {
2414 		/*
2415 		 * map the address block in a 1:1 fashion
2416 		 */
2417 		in.s_addr = np->in_inip;
2418 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2419 		in.s_addr = ntohl(in.s_addr);
2420 	} else {
2421 		in.s_addr = ntohl(np->in_inip);
2422 	}
2423 
2424 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2425 		nport = dport;
2426 	else {
2427 		/*
2428 		 * Whilst not optimized for the case where
2429 		 * pmin == pmax, the gain is not significant.
2430 		 */
2431 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2432 		    (np->in_pmin != np->in_pmax)) {
2433 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2434 				ntohs(np->in_pnext);
2435 			nport = htons(nport);
2436 		} else
2437 			nport = np->in_pnext;
2438 	}
2439 
2440 	/*
2441 	 * When the redirect-to address is set to 0.0.0.0, just
2442 	 * assume a blank `forwarding' of the packet.  We don't
2443 	 * setup any translation for this either.
2444 	 */
2445 	if (in.s_addr == 0) {
2446 		if (nport == dport)
2447 			return -1;
2448 		in.s_addr = ntohl(fin->fin_daddr);
2449 	}
2450 
2451 	/*
2452 	 * Check to see if this redirect mapping already exists and if
2453 	 * it does, return "failure" (allowing it to be created will just
2454 	 * cause one or both of these "connections" to stop working.)
2455 	 */
2456 	inb.s_addr = htonl(in.s_addr);
2457 	sp = fin->fin_data[0];
2458 	dp = fin->fin_data[1];
2459 	fin->fin_data[1] = fin->fin_data[0];
2460 	fin->fin_data[0] = ntohs(nport);
2461 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2462 		    (u_int)fin->fin_p, inb, fin->fin_src);
2463 	fin->fin_data[0] = sp;
2464 	fin->fin_data[1] = dp;
2465 	if (natl != NULL)
2466 		return (-1);
2467 
2468 	nat->nat_inip.s_addr = htonl(in.s_addr);
2469 	nat->nat_outip = fin->fin_dst;
2470 	nat->nat_oip = fin->fin_src;
2471 
2472 	ni->nai_ip.s_addr = in.s_addr;
2473 	ni->nai_nport = nport;
2474 	ni->nai_port = sport;
2475 
2476 	if (flags & IPN_TCPUDP) {
2477 		nat->nat_inport = nport;
2478 		nat->nat_outport = dport;
2479 		nat->nat_oport = sport;
2480 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2481 	} else if (flags & IPN_ICMPQUERY) {
2482 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2483 		nat->nat_inport = nport;
2484 		nat->nat_outport = nport;
2485 	}
2486 
2487 	return move;
2488 }
2489 
2490 /* ------------------------------------------------------------------------ */
2491 /* Function:    nat_new                                                     */
2492 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2493 /*                       else pointer to new NAT structure                  */
2494 /* Parameters:  fin(I)       - pointer to packet information                */
2495 /*              np(I)        - pointer to NAT rule                          */
2496 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2497 /*              flags(I)     - flags describing the current packet          */
2498 /*              direction(I) - direction of packet (in/out)                 */
2499 /* Write Lock:  ipf_nat                                                     */
2500 /*                                                                          */
2501 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2502 /* in any way.                                                              */
2503 /*                                                                          */
2504 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2505 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2506 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2507 /* and (3) building that structure and putting it into the NAT table(s).    */
2508 /* ------------------------------------------------------------------------ */
2509 nat_t *nat_new(fin, np, natsave, flags, direction)
2510 fr_info_t *fin;
2511 ipnat_t *np;
2512 nat_t **natsave;
2513 u_int flags;
2514 int direction;
2515 {
2516 	tcphdr_t *tcp = NULL;
2517 	hostmap_t *hm = NULL;
2518 	nat_t *nat, *natl;
2519 	u_int nflags;
2520 	natinfo_t ni;
2521 	int move;
2522 	ipf_stack_t *ifs = fin->fin_ifs;
2523 
2524 	/*
2525 	 * Trigger automatic call to nat_extraflush() if the
2526 	 * table has reached capcity specified by hi watermark.
2527 	 */
2528 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2529 		ifs->ifs_nat_doflush = 1;
2530 
2531 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2532 		ifs->ifs_nat_stats.ns_memfail++;
2533 		return NULL;
2534 	}
2535 
2536 	move = 1;
2537 	nflags = np->in_flags & flags;
2538 	nflags &= NAT_FROMRULE;
2539 
2540 	ni.nai_np = np;
2541 	ni.nai_nflags = nflags;
2542 	ni.nai_flags = flags;
2543 
2544 	/* Give me a new nat */
2545 	KMALLOC(nat, nat_t *);
2546 	if (nat == NULL) {
2547 		ifs->ifs_nat_stats.ns_memfail++;
2548 		/*
2549 		 * Try to automatically tune the max # of entries in the
2550 		 * table allowed to be less than what will cause kmem_alloc()
2551 		 * to fail and try to eliminate panics due to out of memory
2552 		 * conditions arising.
2553 		 */
2554 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2555 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2556 			printf("ipf_nattable_max reduced to %d\n",
2557 				ifs->ifs_ipf_nattable_max);
2558 		}
2559 		return NULL;
2560 	}
2561 
2562 	if (flags & IPN_TCPUDP) {
2563 		tcp = fin->fin_dp;
2564 		ni.nai_sport = htons(fin->fin_sport);
2565 		ni.nai_dport = htons(fin->fin_dport);
2566 	} else if (flags & IPN_ICMPQUERY) {
2567 		/*
2568 		 * In the ICMP query NAT code, we translate the ICMP id fields
2569 		 * to make them unique. This is indepedent of the ICMP type
2570 		 * (e.g. in the unlikely event that a host sends an echo and
2571 		 * an tstamp request with the same id, both packets will have
2572 		 * their ip address/id field changed in the same way).
2573 		 */
2574 		/* The icmp_id field is used by the sender to identify the
2575 		 * process making the icmp request. (the receiver justs
2576 		 * copies it back in its response). So, it closely matches
2577 		 * the concept of source port. We overlay sport, so we can
2578 		 * maximally reuse the existing code.
2579 		 */
2580 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2581 		ni.nai_dport = ni.nai_sport;
2582 	}
2583 
2584 	bzero((char *)nat, sizeof(*nat));
2585 	nat->nat_flags = flags;
2586 	nat->nat_redir = np->in_redir;
2587 
2588 	if ((flags & NAT_SLAVE) == 0) {
2589 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2590 	}
2591 
2592 	/*
2593 	 * Search the current table for a match.
2594 	 */
2595 	if (direction == NAT_OUTBOUND) {
2596 		/*
2597 		 * We can now arrange to call this for the same connection
2598 		 * because ipf_nat_new doesn't protect the code path into
2599 		 * this function.
2600 		 */
2601 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2602 				     fin->fin_src, fin->fin_dst);
2603 		if (natl != NULL) {
2604 			KFREE(nat);
2605 			nat = natl;
2606 			goto done;
2607 		}
2608 
2609 		move = nat_newmap(fin, nat, &ni);
2610 		if (move == -1)
2611 			goto badnat;
2612 
2613 		np = ni.nai_np;
2614 	} else {
2615 		/*
2616 		 * NAT_INBOUND is used only for redirects rules
2617 		 */
2618 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2619 				    fin->fin_src, fin->fin_dst);
2620 		if (natl != NULL) {
2621 			KFREE(nat);
2622 			nat = natl;
2623 			goto done;
2624 		}
2625 
2626 		move = nat_newrdr(fin, nat, &ni);
2627 		if (move == -1)
2628 			goto badnat;
2629 
2630 		np = ni.nai_np;
2631 	}
2632 
2633 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2634 		if (np->in_redir == NAT_REDIRECT) {
2635 			nat_delrdr(np);
2636 			nat_addrdr(np, ifs);
2637 		} else if (np->in_redir == NAT_MAP) {
2638 			nat_delnat(np);
2639 			nat_addnat(np, ifs);
2640 		}
2641 	}
2642 
2643 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2644 		goto badnat;
2645 	}
2646 
2647 	nat_calc_chksum_diffs(nat);
2648 
2649 	if (flags & SI_WILDP)
2650 		ifs->ifs_nat_stats.ns_wilds++;
2651 	goto done;
2652 badnat:
2653 	ifs->ifs_nat_stats.ns_badnat++;
2654 	if ((hm = nat->nat_hm) != NULL)
2655 		fr_hostmapdel(&hm);
2656 	KFREE(nat);
2657 	nat = NULL;
2658 done:
2659 	if ((flags & NAT_SLAVE) == 0) {
2660 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2661 	}
2662 	return nat;
2663 }
2664 
2665 
2666 /* ------------------------------------------------------------------------ */
2667 /* Function:    nat_finalise                                                */
2668 /* Returns:     int - 0 == sucess, -1 == failure                            */
2669 /* Parameters:  fin(I) - pointer to packet information                      */
2670 /*              nat(I) - pointer to NAT entry                               */
2671 /*              ni(I)  - pointer to structure with misc. information needed */
2672 /*                       to create new NAT entry.                           */
2673 /* Write Lock:  ipf_nat                                                     */
2674 /*                                                                          */
2675 /* This is the tail end of constructing a new NAT entry and is the same     */
2676 /* for both IPv4 and IPv6.                                                  */
2677 /* ------------------------------------------------------------------------ */
2678 /*ARGSUSED*/
2679 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2680 fr_info_t *fin;
2681 nat_t *nat;
2682 natinfo_t *ni;
2683 tcphdr_t *tcp;
2684 nat_t **natsave;
2685 int direction;
2686 {
2687 	frentry_t *fr;
2688 	ipnat_t *np;
2689 	ipf_stack_t *ifs = fin->fin_ifs;
2690 
2691 	np = ni->nai_np;
2692 
2693 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2694 
2695 #ifdef	IPFILTER_SYNC
2696 	if ((nat->nat_flags & SI_CLONE) == 0)
2697 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2698 #endif
2699 
2700 	nat->nat_me = natsave;
2701 	nat->nat_dir = direction;
2702 	nat->nat_ifps[0] = np->in_ifps[0];
2703 	nat->nat_ifps[1] = np->in_ifps[1];
2704 	nat->nat_ptr = np;
2705 	nat->nat_p = fin->fin_p;
2706 	nat->nat_v = fin->fin_v;
2707 	nat->nat_mssclamp = np->in_mssclamp;
2708 	fr = fin->fin_fr;
2709 	nat->nat_fr = fr;
2710 
2711 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2712 		if (appr_new(fin, nat) == -1)
2713 			return -1;
2714 
2715 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2716 		if (ifs->ifs_nat_logging)
2717 			nat_log(nat, (u_int)np->in_redir, ifs);
2718 		np->in_use++;
2719 		if (fr != NULL) {
2720 			MUTEX_ENTER(&fr->fr_lock);
2721 			fr->fr_ref++;
2722 			MUTEX_EXIT(&fr->fr_lock);
2723 		}
2724 		return 0;
2725 	}
2726 
2727 	/*
2728 	 * nat_insert failed, so cleanup time...
2729 	 */
2730 	return -1;
2731 }
2732 
2733 
2734 /* ------------------------------------------------------------------------ */
2735 /* Function:   nat_insert                                                   */
2736 /* Returns:    int - 0 == sucess, -1 == failure                             */
2737 /* Parameters: nat(I) - pointer to NAT structure                            */
2738 /*             rev(I) - flag indicating forward/reverse direction of packet */
2739 /* Write Lock: ipf_nat                                                      */
2740 /*                                                                          */
2741 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2742 /* list of active NAT entries.  Adjust global counters when complete.       */
2743 /* ------------------------------------------------------------------------ */
2744 int	nat_insert(nat, rev, ifs)
2745 nat_t	*nat;
2746 int	rev;
2747 ipf_stack_t *ifs;
2748 {
2749 	u_int hv1, hv2;
2750 	nat_t **natp;
2751 
2752 	/*
2753 	 * Try and return an error as early as possible, so calculate the hash
2754 	 * entry numbers first and then proceed.
2755 	 */
2756 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2757 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2758 				  0xffffffff);
2759 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2760 				  ifs->ifs_ipf_nattable_sz);
2761 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2762 				  0xffffffff);
2763 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2764 				  ifs->ifs_ipf_nattable_sz);
2765 	} else {
2766 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2767 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2768 				  ifs->ifs_ipf_nattable_sz);
2769 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2770 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2771 				  ifs->ifs_ipf_nattable_sz);
2772 	}
2773 
2774 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2775 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2776 		return -1;
2777 	}
2778 
2779 	nat->nat_hv[0] = hv1;
2780 	nat->nat_hv[1] = hv2;
2781 
2782 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2783 
2784 	nat->nat_rev = rev;
2785 	nat->nat_ref = 1;
2786 	nat->nat_bytes[0] = 0;
2787 	nat->nat_pkts[0] = 0;
2788 	nat->nat_bytes[1] = 0;
2789 	nat->nat_pkts[1] = 0;
2790 
2791 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2792 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2793 
2794 	if (nat->nat_ifnames[1][0] !='\0') {
2795 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2796 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2797 	} else {
2798 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2799 			       LIFNAMSIZ);
2800 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2801 		nat->nat_ifps[1] = nat->nat_ifps[0];
2802 	}
2803 
2804 	nat->nat_next = ifs->ifs_nat_instances;
2805 	nat->nat_pnext = &ifs->ifs_nat_instances;
2806 	if (ifs->ifs_nat_instances)
2807 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2808 	ifs->ifs_nat_instances = nat;
2809 
2810 	natp = &ifs->ifs_nat_table[0][hv1];
2811 	if (*natp)
2812 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2813 	nat->nat_phnext[0] = natp;
2814 	nat->nat_hnext[0] = *natp;
2815 	*natp = nat;
2816 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2817 
2818 	natp = &ifs->ifs_nat_table[1][hv2];
2819 	if (*natp)
2820 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2821 	nat->nat_phnext[1] = natp;
2822 	nat->nat_hnext[1] = *natp;
2823 	*natp = nat;
2824 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2825 
2826 	fr_setnatqueue(nat, rev, ifs);
2827 
2828 	ifs->ifs_nat_stats.ns_added++;
2829 	ifs->ifs_nat_stats.ns_inuse++;
2830 	return 0;
2831 }
2832 
2833 
2834 /* ------------------------------------------------------------------------ */
2835 /* Function:    nat_icmperrorlookup                                         */
2836 /* Returns:     nat_t* - point to matching NAT structure                    */
2837 /* Parameters:  fin(I) - pointer to packet information                      */
2838 /*              dir(I) - direction of packet (in/out)                       */
2839 /*                                                                          */
2840 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2841 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2842 /* the required length.                                                     */
2843 /* ------------------------------------------------------------------------ */
2844 nat_t *nat_icmperrorlookup(fin, dir)
2845 fr_info_t *fin;
2846 int dir;
2847 {
2848 	int flags = 0, minlen;
2849 	icmphdr_t *orgicmp;
2850 	tcphdr_t *tcp = NULL;
2851 	u_short data[2];
2852 	nat_t *nat;
2853 	ip_t *oip;
2854 	u_int p;
2855 
2856 	/*
2857 	 * Does it at least have the return (basic) IP header ?
2858 	 * Only a basic IP header (no options) should be with an ICMP error
2859 	 * header.  Also, if it's not an error type, then return.
2860 	 */
2861 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2862 		return NULL;
2863 
2864 	/*
2865 	 * Check packet size
2866 	 */
2867 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2868 	minlen = IP_HL(oip) << 2;
2869 	if ((minlen < sizeof(ip_t)) ||
2870 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2871 		return NULL;
2872 	/*
2873 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2874 	 * header claimed in the encapsulated part which is of concern.  It
2875 	 * may be too big to be in this buffer but not so big that it's
2876 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2877 	 * This is possible because we don't know how big oip_hl is when we
2878 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2879 	 * all here now.
2880 	 */
2881 #ifdef  _KERNEL
2882 	{
2883 	mb_t *m;
2884 
2885 	m = fin->fin_m;
2886 # if defined(MENTAT)
2887 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2888 		return NULL;
2889 # else
2890 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2891 	    (char *)fin->fin_ip + M_LEN(m))
2892 		return NULL;
2893 # endif
2894 	}
2895 #endif
2896 
2897 	if (fin->fin_daddr != oip->ip_src.s_addr)
2898 		return NULL;
2899 
2900 	p = oip->ip_p;
2901 	if (p == IPPROTO_TCP)
2902 		flags = IPN_TCP;
2903 	else if (p == IPPROTO_UDP)
2904 		flags = IPN_UDP;
2905 	else if (p == IPPROTO_ICMP) {
2906 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2907 
2908 		/* see if this is related to an ICMP query */
2909 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2910 			data[0] = fin->fin_data[0];
2911 			data[1] = fin->fin_data[1];
2912 			fin->fin_data[0] = 0;
2913 			fin->fin_data[1] = orgicmp->icmp_id;
2914 
2915 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2916 			/*
2917 			 * NOTE : dir refers to the direction of the original
2918 			 *        ip packet. By definition the icmp error
2919 			 *        message flows in the opposite direction.
2920 			 */
2921 			if (dir == NAT_INBOUND)
2922 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2923 						   oip->ip_src);
2924 			else
2925 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2926 						    oip->ip_src);
2927 			fin->fin_data[0] = data[0];
2928 			fin->fin_data[1] = data[1];
2929 			return nat;
2930 		}
2931 	}
2932 
2933 	if (flags & IPN_TCPUDP) {
2934 		minlen += 8;		/* + 64bits of data to get ports */
2935 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2936 			return NULL;
2937 
2938 		data[0] = fin->fin_data[0];
2939 		data[1] = fin->fin_data[1];
2940 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2941 		fin->fin_data[0] = ntohs(tcp->th_dport);
2942 		fin->fin_data[1] = ntohs(tcp->th_sport);
2943 
2944 		if (dir == NAT_INBOUND) {
2945 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2946 					   oip->ip_src);
2947 		} else {
2948 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2949 					    oip->ip_src);
2950 		}
2951 		fin->fin_data[0] = data[0];
2952 		fin->fin_data[1] = data[1];
2953 		return nat;
2954 	}
2955 	if (dir == NAT_INBOUND)
2956 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2957 	else
2958 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2959 }
2960 
2961 
2962 /* ------------------------------------------------------------------------ */
2963 /* Function:    nat_icmperror                                               */
2964 /* Returns:     nat_t* - point to matching NAT structure                    */
2965 /* Parameters:  fin(I)    - pointer to packet information                   */
2966 /*              nflags(I) - NAT flags for this packet                       */
2967 /*              dir(I)    - direction of packet (in/out)                    */
2968 /*                                                                          */
2969 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2970 /* session.  This will correct both packet header data and checksums.       */
2971 /*                                                                          */
2972 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2973 /* a NAT'd ICMP packet gets correctly recognised.                           */
2974 /* ------------------------------------------------------------------------ */
2975 nat_t *nat_icmperror(fin, nflags, dir)
2976 fr_info_t *fin;
2977 u_int *nflags;
2978 int dir;
2979 {
2980 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2981 	struct in_addr in;
2982 	icmphdr_t *icmp, *orgicmp;
2983 	int dlen;
2984 	udphdr_t *udp;
2985 	tcphdr_t *tcp;
2986 	nat_t *nat;
2987 	ip_t *oip;
2988 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2989 		return NULL;
2990 
2991 	/*
2992 	 * nat_icmperrorlookup() looks up nat entry associated with the
2993 	 * offending IP packet and returns pointer to the entry, or NULL
2994 	 * if packet wasn't natted or for `defective' packets.
2995 	 */
2996 
2997 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2998 		return NULL;
2999 
3000 	sumd2 = 0;
3001 	*nflags = IPN_ICMPERR;
3002 	icmp = fin->fin_dp;
3003 	oip = (ip_t *)&icmp->icmp_ip;
3004 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3005 	tcp = (tcphdr_t *)udp;
3006 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3007 
3008 	/*
3009 	 * Need to adjust ICMP header to include the real IP#'s and
3010 	 * port #'s.  There are three steps required.
3011 	 *
3012 	 * Step 1
3013 	 * Fix the IP addresses in the offending IP packet and update
3014 	 * ip header checksum to compensate for the change.
3015 	 *
3016 	 * No update needed here for icmp_cksum because the ICMP checksum
3017 	 * is calculated over the complete ICMP packet, which includes the
3018 	 * changed oip IP addresses and oip->ip_sum.  These two changes
3019 	 * cancel each other out (if the delta for the IP address is x,
3020 	 * then the delta for ip_sum is minus x).
3021 	 */
3022 
3023 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3024 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3025 		in = nat->nat_inip;
3026 		oip->ip_src = in;
3027 	} else {
3028 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3029 		in = nat->nat_outip;
3030 		oip->ip_dst = in;
3031 	}
3032 
3033 	sum2 = LONG_SUM(ntohl(in.s_addr));
3034 	CALC_SUMD(sum1, sum2, sumd);
3035 	fix_datacksum(&oip->ip_sum, sumd);
3036 
3037 	/*
3038 	 * Step 2
3039 	 * Perform other adjustments based on protocol of offending packet.
3040 	 */
3041 
3042 	switch (oip->ip_p) {
3043 		case IPPROTO_TCP :
3044 		case IPPROTO_UDP :
3045 
3046 			/*
3047 			* For offending TCP/UDP IP packets, translate the ports
3048 			* based on the NAT specification.
3049 			*
3050 			* Advance notice : Now it becomes complicated :-)
3051 			*
3052 			* Since the port and IP addresse fields are both part
3053 			* of the TCP/UDP checksum of the offending IP packet,
3054 			* we need to adjust that checksum as well.
3055 			*
3056 			* To further complicate things, the TCP/UDP checksum
3057 			* may not be present.  We must check to see if the
3058 			* length of the data portion is big enough to hold
3059 			* the checksum.  In the UDP case, a test to determine
3060 			* if the checksum is even set is also required.
3061 			*
3062 			* Any changes to an IP address, port or checksum within
3063 			* the ICMP packet requires a change to icmp_cksum.
3064 			*
3065 			* Be extremely careful here ... The change is dependent
3066 			* upon whether or not the TCP/UPD checksum is present.
3067 			*
3068 			* If TCP/UPD checksum is present, the icmp_cksum must
3069 			* compensate for checksum modification resulting from
3070 			* IP address change only.  Port change and resulting
3071 			* data checksum adjustments cancel each other out.
3072 			*
3073 			* If TCP/UDP checksum is not present, icmp_cksum must
3074 			* compensate for port change only.  The IP address
3075 			* change does not modify anything else in this case.
3076 			*/
3077 
3078 			psum1 = 0;
3079 			psum2 = 0;
3080 			psumd = 0;
3081 
3082 			if ((tcp->th_dport == nat->nat_oport) &&
3083 			    (tcp->th_sport != nat->nat_inport)) {
3084 
3085 				/*
3086 				 * Translate the source port.
3087 				 */
3088 
3089 				psum1 = ntohs(tcp->th_sport);
3090 				psum2 = ntohs(nat->nat_inport);
3091 				tcp->th_sport = nat->nat_inport;
3092 
3093 			} else if ((tcp->th_sport == nat->nat_oport) &&
3094 				    (tcp->th_dport != nat->nat_outport)) {
3095 
3096 				/*
3097 				 * Translate the destination port.
3098 				 */
3099 
3100 				psum1 = ntohs(tcp->th_dport);
3101 				psum2 = ntohs(nat->nat_outport);
3102 				tcp->th_dport = nat->nat_outport;
3103 			}
3104 
3105 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3106 
3107 				/*
3108 				 * TCP checksum present.
3109 				 *
3110 				 * Adjust data checksum and icmp checksum to
3111 				 * compensate for any IP address change.
3112 				 */
3113 
3114 				sum1 = ntohs(tcp->th_sum);
3115 				fix_datacksum(&tcp->th_sum, sumd);
3116 				sum2 = ntohs(tcp->th_sum);
3117 				sumd2 = sumd << 1;
3118 				CALC_SUMD(sum1, sum2, sumd);
3119 				sumd2 += sumd;
3120 
3121 				/*
3122 				 * Also make data checksum adjustment to
3123 				 * compensate for any port change.
3124 				 */
3125 
3126 				if (psum1 != psum2) {
3127 					CALC_SUMD(psum1, psum2, psumd);
3128 					fix_datacksum(&tcp->th_sum, psumd);
3129 				}
3130 
3131 			} else if ((oip->ip_p == IPPROTO_UDP) &&
3132 				   (dlen >= 8) && (udp->uh_sum != 0)) {
3133 
3134 				/*
3135 				 * The UDP checksum is present and set.
3136 				 *
3137 				 * Adjust data checksum and icmp checksum to
3138 				 * compensate for any IP address change.
3139 				 */
3140 
3141 				sum1 = ntohs(udp->uh_sum);
3142 				fix_datacksum(&udp->uh_sum, sumd);
3143 				sum2 = ntohs(udp->uh_sum);
3144 				sumd2 = sumd << 1;
3145 				CALC_SUMD(sum1, sum2, sumd);
3146 				sumd2 += sumd;
3147 
3148 				/*
3149 				 * Also make data checksum adjustment to
3150 				 * compensate for any port change.
3151 				 */
3152 
3153 				if (psum1 != psum2) {
3154 					CALC_SUMD(psum1, psum2, psumd);
3155 					fix_datacksum(&udp->uh_sum, psumd);
3156 				}
3157 
3158 			} else {
3159 
3160 				/*
3161 				 * Data checksum was not present.
3162 				 *
3163 				 * Compensate for any port change.
3164 				 */
3165 
3166 				CALC_SUMD(psum2, psum1, psumd);
3167 				sumd2 += psumd;
3168 			}
3169 			break;
3170 
3171 		case IPPROTO_ICMP :
3172 
3173 			orgicmp = (icmphdr_t *)udp;
3174 
3175 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3176 			    (orgicmp->icmp_id != nat->nat_inport) &&
3177 			    (dlen >= 8)) {
3178 
3179 				/*
3180 				 * Fix ICMP checksum (of the offening ICMP
3181 				 * query packet) to compensate the change
3182 				 * in the ICMP id of the offending ICMP
3183 				 * packet.
3184 				 *
3185 				 * Since you modify orgicmp->icmp_id with
3186 				 * a delta (say x) and you compensate that
3187 				 * in origicmp->icmp_cksum with a delta
3188 				 * minus x, you don't have to adjust the
3189 				 * overall icmp->icmp_cksum
3190 				 */
3191 
3192 				sum1 = ntohs(orgicmp->icmp_id);
3193 				sum2 = ntohs(nat->nat_inport);
3194 				CALC_SUMD(sum1, sum2, sumd);
3195 				orgicmp->icmp_id = nat->nat_inport;
3196 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3197 
3198 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3199 
3200 			break;
3201 
3202 		default :
3203 
3204 			break;
3205 
3206 	} /* switch (oip->ip_p) */
3207 
3208 	/*
3209 	 * Step 3
3210 	 * Make the adjustments to icmp checksum.
3211 	 */
3212 
3213 	if (sumd2 != 0) {
3214 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3215 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3216 		fix_incksum(&icmp->icmp_cksum, sumd2);
3217 	}
3218 	return nat;
3219 }
3220 
3221 
3222 /*
3223  * NB: these lookups don't lock access to the list, it assumed that it has
3224  * already been done!
3225  */
3226 
3227 /* ------------------------------------------------------------------------ */
3228 /* Function:    nat_inlookup                                                */
3229 /* Returns:     nat_t* - NULL == no match,                                  */
3230 /*                       else pointer to matching NAT entry                 */
3231 /* Parameters:  fin(I)    - pointer to packet information                   */
3232 /*              flags(I)  - NAT flags for this packet                       */
3233 /*              p(I)      - protocol for this packet                        */
3234 /*              src(I)    - source IP address                               */
3235 /*              mapdst(I) - destination IP address                          */
3236 /*                                                                          */
3237 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3238 /* real source address/port.  We use this lookup when receiving a packet,   */
3239 /* we're looking for a table entry, based on the destination address.       */
3240 /*                                                                          */
3241 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3242 /*                                                                          */
3243 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3244 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3245 /*                                                                          */
3246 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3247 /*            the packet is of said protocol                                */
3248 /* ------------------------------------------------------------------------ */
3249 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3250 fr_info_t *fin;
3251 u_int flags, p;
3252 struct in_addr src , mapdst;
3253 {
3254 	u_short sport, dport;
3255 	ipnat_t *ipn;
3256 	u_int sflags;
3257 	nat_t *nat;
3258 	int nflags;
3259 	u_32_t dst;
3260 	void *ifp;
3261 	u_int hv;
3262 	ipf_stack_t *ifs = fin->fin_ifs;
3263 
3264 	if (fin != NULL)
3265 		ifp = fin->fin_ifp;
3266 	else
3267 		ifp = NULL;
3268 	sport = 0;
3269 	dport = 0;
3270 	dst = mapdst.s_addr;
3271 	sflags = flags & NAT_TCPUDPICMP;
3272 
3273 	switch (p)
3274 	{
3275 	case IPPROTO_TCP :
3276 	case IPPROTO_UDP :
3277 		sport = htons(fin->fin_data[0]);
3278 		dport = htons(fin->fin_data[1]);
3279 		break;
3280 	case IPPROTO_ICMP :
3281 		if (flags & IPN_ICMPERR)
3282 			sport = fin->fin_data[1];
3283 		else
3284 			dport = fin->fin_data[1];
3285 		break;
3286 	default :
3287 		break;
3288 	}
3289 
3290 
3291 	if ((flags & SI_WILDP) != 0)
3292 		goto find_in_wild_ports;
3293 
3294 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3295 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3296 	nat = ifs->ifs_nat_table[1][hv];
3297 	for (; nat; nat = nat->nat_hnext[1]) {
3298 		if (nat->nat_v != 4)
3299 			continue;
3300 
3301 		if (nat->nat_ifps[0] != NULL) {
3302 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3303 				continue;
3304 		} else if (ifp != NULL)
3305 			nat->nat_ifps[0] = ifp;
3306 
3307 		nflags = nat->nat_flags;
3308 
3309 		if (nat->nat_oip.s_addr == src.s_addr &&
3310 		    nat->nat_outip.s_addr == dst &&
3311 		    (((p == 0) &&
3312 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3313 		     || (p == nat->nat_p))) {
3314 			switch (p)
3315 			{
3316 #if 0
3317 			case IPPROTO_GRE :
3318 				if (nat->nat_call[1] != fin->fin_data[0])
3319 					continue;
3320 				break;
3321 #endif
3322 			case IPPROTO_ICMP :
3323 				if ((flags & IPN_ICMPERR) != 0) {
3324 					if (nat->nat_outport != sport)
3325 						continue;
3326 				} else {
3327 					if (nat->nat_outport != dport)
3328 						continue;
3329 				}
3330 				break;
3331 			case IPPROTO_TCP :
3332 			case IPPROTO_UDP :
3333 				if (nat->nat_oport != sport)
3334 					continue;
3335 				if (nat->nat_outport != dport)
3336 					continue;
3337 				break;
3338 			default :
3339 				break;
3340 			}
3341 
3342 			ipn = nat->nat_ptr;
3343 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3344 				if (appr_match(fin, nat) != 0)
3345 					continue;
3346 			return nat;
3347 		}
3348 	}
3349 
3350 	/*
3351 	 * So if we didn't find it but there are wildcard members in the hash
3352 	 * table, go back and look for them.  We do this search and update here
3353 	 * because it is modifying the NAT table and we want to do this only
3354 	 * for the first packet that matches.  The exception, of course, is
3355 	 * for "dummy" (FI_IGNORE) lookups.
3356 	 */
3357 find_in_wild_ports:
3358 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3359 		return NULL;
3360 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3361 		return NULL;
3362 
3363 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3364 
3365 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3366 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3367 
3368 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3369 
3370 	nat = ifs->ifs_nat_table[1][hv];
3371 	for (; nat; nat = nat->nat_hnext[1]) {
3372 		if (nat->nat_v != 4)
3373 			continue;
3374 
3375 		if (nat->nat_ifps[0] != NULL) {
3376 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3377 				continue;
3378 		} else if (ifp != NULL)
3379 			nat->nat_ifps[0] = ifp;
3380 
3381 		if (nat->nat_p != fin->fin_p)
3382 			continue;
3383 		if (nat->nat_oip.s_addr != src.s_addr ||
3384 		    nat->nat_outip.s_addr != dst)
3385 			continue;
3386 
3387 		nflags = nat->nat_flags;
3388 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3389 			continue;
3390 
3391 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3392 			       NAT_INBOUND) == 1) {
3393 			if ((fin->fin_flx & FI_IGNORE) != 0)
3394 				break;
3395 			if ((nflags & SI_CLONE) != 0) {
3396 				nat = fr_natclone(fin, nat);
3397 				if (nat == NULL)
3398 					break;
3399 			} else {
3400 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3401 				ifs->ifs_nat_stats.ns_wilds--;
3402 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3403 			}
3404 			nat->nat_oport = sport;
3405 			nat->nat_outport = dport;
3406 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3407 			nat_tabmove(nat, ifs);
3408 			break;
3409 		}
3410 	}
3411 
3412 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3413 
3414 	return nat;
3415 }
3416 
3417 
3418 /* ------------------------------------------------------------------------ */
3419 /* Function:    nat_tabmove                                                 */
3420 /* Returns:     Nil                                                         */
3421 /* Parameters:  nat(I) - pointer to NAT structure                           */
3422 /* Write Lock:  ipf_nat                                                     */
3423 /*                                                                          */
3424 /* This function is only called for TCP/UDP NAT table entries where the     */
3425 /* original was placed in the table without hashing on the ports and we now */
3426 /* want to include hashing on port numbers.                                 */
3427 /* ------------------------------------------------------------------------ */
3428 static void nat_tabmove(nat, ifs)
3429 nat_t *nat;
3430 ipf_stack_t *ifs;
3431 {
3432 	nat_t **natp;
3433 	u_int hv;
3434 
3435 	if (nat->nat_flags & SI_CLONE)
3436 		return;
3437 
3438 	/*
3439 	 * Remove the NAT entry from the old location
3440 	 */
3441 	if (nat->nat_hnext[0])
3442 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3443 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3444 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3445 
3446 	if (nat->nat_hnext[1])
3447 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3448 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3449 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3450 
3451 	/*
3452 	 * Add into the NAT table in the new position
3453 	 */
3454 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3455 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3456 			 ifs->ifs_ipf_nattable_sz);
3457 	nat->nat_hv[0] = hv;
3458 	natp = &ifs->ifs_nat_table[0][hv];
3459 	if (*natp)
3460 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3461 	nat->nat_phnext[0] = natp;
3462 	nat->nat_hnext[0] = *natp;
3463 	*natp = nat;
3464 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3465 
3466 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3467 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3468 			 ifs->ifs_ipf_nattable_sz);
3469 	nat->nat_hv[1] = hv;
3470 	natp = &ifs->ifs_nat_table[1][hv];
3471 	if (*natp)
3472 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3473 	nat->nat_phnext[1] = natp;
3474 	nat->nat_hnext[1] = *natp;
3475 	*natp = nat;
3476 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3477 }
3478 
3479 
3480 /* ------------------------------------------------------------------------ */
3481 /* Function:    nat_outlookup                                               */
3482 /* Returns:     nat_t* - NULL == no match,                                  */
3483 /*                       else pointer to matching NAT entry                 */
3484 /* Parameters:  fin(I)   - pointer to packet information                    */
3485 /*              flags(I) - NAT flags for this packet                        */
3486 /*              p(I)     - protocol for this packet                         */
3487 /*              src(I)   - source IP address                                */
3488 /*              dst(I)   - destination IP address                           */
3489 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3490 /*                                                                          */
3491 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3492 /* destination address/port.  We use this lookup when sending a packet out, */
3493 /* we're looking for a table entry, based on the source address.            */
3494 /*                                                                          */
3495 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3496 /*                                                                          */
3497 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3498 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3499 /*                                                                          */
3500 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3501 /*            the packet is of said protocol                                */
3502 /* ------------------------------------------------------------------------ */
3503 nat_t *nat_outlookup(fin, flags, p, src, dst)
3504 fr_info_t *fin;
3505 u_int flags, p;
3506 struct in_addr src , dst;
3507 {
3508 	u_short sport, dport;
3509 	u_int sflags;
3510 	ipnat_t *ipn;
3511 	u_32_t srcip;
3512 	nat_t *nat;
3513 	int nflags;
3514 	void *ifp;
3515 	u_int hv;
3516 	ipf_stack_t *ifs = fin->fin_ifs;
3517 
3518 	ifp = fin->fin_ifp;
3519 
3520 	srcip = src.s_addr;
3521 	sflags = flags & IPN_TCPUDPICMP;
3522 	sport = 0;
3523 	dport = 0;
3524 
3525 	switch (p)
3526 	{
3527 	case IPPROTO_TCP :
3528 	case IPPROTO_UDP :
3529 		sport = htons(fin->fin_data[0]);
3530 		dport = htons(fin->fin_data[1]);
3531 		break;
3532 	case IPPROTO_ICMP :
3533 		if (flags & IPN_ICMPERR)
3534 			sport = fin->fin_data[1];
3535 		else
3536 			dport = fin->fin_data[1];
3537 		break;
3538 	default :
3539 		break;
3540 	}
3541 
3542 	if ((flags & SI_WILDP) != 0)
3543 		goto find_out_wild_ports;
3544 
3545 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3546 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3547 	nat = ifs->ifs_nat_table[0][hv];
3548 	for (; nat; nat = nat->nat_hnext[0]) {
3549 		if (nat->nat_v != 4)
3550 			continue;
3551 
3552 		if (nat->nat_ifps[1] != NULL) {
3553 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3554 				continue;
3555 		} else if (ifp != NULL)
3556 			nat->nat_ifps[1] = ifp;
3557 
3558 		nflags = nat->nat_flags;
3559 
3560 		if (nat->nat_inip.s_addr == srcip &&
3561 		    nat->nat_oip.s_addr == dst.s_addr &&
3562 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3563 		     || (p == nat->nat_p))) {
3564 			switch (p)
3565 			{
3566 #if 0
3567 			case IPPROTO_GRE :
3568 				if (nat->nat_call[1] != fin->fin_data[0])
3569 					continue;
3570 				break;
3571 #endif
3572 			case IPPROTO_TCP :
3573 			case IPPROTO_UDP :
3574 				if (nat->nat_oport != dport)
3575 					continue;
3576 				if (nat->nat_inport != sport)
3577 					continue;
3578 				break;
3579 			default :
3580 				break;
3581 			}
3582 
3583 			ipn = nat->nat_ptr;
3584 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3585 				if (appr_match(fin, nat) != 0)
3586 					continue;
3587 			return nat;
3588 		}
3589 	}
3590 
3591 	/*
3592 	 * So if we didn't find it but there are wildcard members in the hash
3593 	 * table, go back and look for them.  We do this search and update here
3594 	 * because it is modifying the NAT table and we want to do this only
3595 	 * for the first packet that matches.  The exception, of course, is
3596 	 * for "dummy" (FI_IGNORE) lookups.
3597 	 */
3598 find_out_wild_ports:
3599 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3600 		return NULL;
3601 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3602 		return NULL;
3603 
3604 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3605 
3606 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3607 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3608 
3609 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3610 
3611 	nat = ifs->ifs_nat_table[0][hv];
3612 	for (; nat; nat = nat->nat_hnext[0]) {
3613 		if (nat->nat_v != 4)
3614 			continue;
3615 
3616 		if (nat->nat_ifps[1] != NULL) {
3617 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3618 				continue;
3619 		} else if (ifp != NULL)
3620 			nat->nat_ifps[1] = ifp;
3621 
3622 		if (nat->nat_p != fin->fin_p)
3623 			continue;
3624 		if ((nat->nat_inip.s_addr != srcip) ||
3625 		    (nat->nat_oip.s_addr != dst.s_addr))
3626 			continue;
3627 
3628 		nflags = nat->nat_flags;
3629 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3630 			continue;
3631 
3632 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3633 			       NAT_OUTBOUND) == 1) {
3634 			if ((fin->fin_flx & FI_IGNORE) != 0)
3635 				break;
3636 			if ((nflags & SI_CLONE) != 0) {
3637 				nat = fr_natclone(fin, nat);
3638 				if (nat == NULL)
3639 					break;
3640 			} else {
3641 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3642 				ifs->ifs_nat_stats.ns_wilds--;
3643 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3644 			}
3645 			nat->nat_inport = sport;
3646 			nat->nat_oport = dport;
3647 			if (nat->nat_outport == 0)
3648 				nat->nat_outport = sport;
3649 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3650 			nat_tabmove(nat, ifs);
3651 			break;
3652 		}
3653 	}
3654 
3655 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3656 
3657 	return nat;
3658 }
3659 
3660 
3661 /* ------------------------------------------------------------------------ */
3662 /* Function:    nat_lookupredir                                             */
3663 /* Returns:     nat_t* - NULL == no match,                                  */
3664 /*                       else pointer to matching NAT entry                 */
3665 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3666 /*                      entry for.                                          */
3667 /*                                                                          */
3668 /* Lookup the NAT tables to search for a matching redirect                  */
3669 /* ------------------------------------------------------------------------ */
3670 nat_t *nat_lookupredir(np, ifs)
3671 natlookup_t *np;
3672 ipf_stack_t *ifs;
3673 {
3674 	fr_info_t fi;
3675 	nat_t *nat;
3676 
3677 	bzero((char *)&fi, sizeof(fi));
3678 	if (np->nl_flags & IPN_IN) {
3679 		fi.fin_data[0] = ntohs(np->nl_realport);
3680 		fi.fin_data[1] = ntohs(np->nl_outport);
3681 	} else {
3682 		fi.fin_data[0] = ntohs(np->nl_inport);
3683 		fi.fin_data[1] = ntohs(np->nl_outport);
3684 	}
3685 	if (np->nl_flags & IPN_TCP)
3686 		fi.fin_p = IPPROTO_TCP;
3687 	else if (np->nl_flags & IPN_UDP)
3688 		fi.fin_p = IPPROTO_UDP;
3689 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3690 		fi.fin_p = IPPROTO_ICMP;
3691 
3692 	fi.fin_ifs = ifs;
3693 	/*
3694 	 * We can do two sorts of lookups:
3695 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3696 	 * - default: we have the `in' and `out' address, look for `real'.
3697 	 */
3698 	if (np->nl_flags & IPN_IN) {
3699 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3700 					np->nl_realip, np->nl_outip))) {
3701 			np->nl_inip = nat->nat_inip;
3702 			np->nl_inport = nat->nat_inport;
3703 		}
3704 	} else {
3705 		/*
3706 		 * If nl_inip is non null, this is a lookup based on the real
3707 		 * ip address. Else, we use the fake.
3708 		 */
3709 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3710 					 np->nl_inip, np->nl_outip))) {
3711 
3712 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3713 				fr_info_t fin;
3714 				bzero((char *)&fin, sizeof(fin));
3715 				fin.fin_p = nat->nat_p;
3716 				fin.fin_data[0] = ntohs(nat->nat_outport);
3717 				fin.fin_data[1] = ntohs(nat->nat_oport);
3718 				fin.fin_ifs = ifs;
3719 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3720 						 nat->nat_outip,
3721 						 nat->nat_oip) != NULL) {
3722 					np->nl_flags &= ~IPN_FINDFORWARD;
3723 				}
3724 			}
3725 
3726 			np->nl_realip = nat->nat_outip;
3727 			np->nl_realport = nat->nat_outport;
3728 		}
3729  	}
3730 
3731 	return nat;
3732 }
3733 
3734 
3735 /* ------------------------------------------------------------------------ */
3736 /* Function:    nat_match                                                   */
3737 /* Returns:     int - 0 == no match, 1 == match                             */
3738 /* Parameters:  fin(I)   - pointer to packet information                    */
3739 /*              np(I)    - pointer to NAT rule                              */
3740 /*                                                                          */
3741 /* Pull the matching of a packet against a NAT rule out of that complex     */
3742 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3743 /* ------------------------------------------------------------------------ */
3744 static int nat_match(fin, np)
3745 fr_info_t *fin;
3746 ipnat_t *np;
3747 {
3748 	frtuc_t *ft;
3749 
3750 	if (fin->fin_v != 4)
3751 		return 0;
3752 
3753 	if (np->in_p && fin->fin_p != np->in_p)
3754 		return 0;
3755 
3756 	if (fin->fin_out) {
3757 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3758 			return 0;
3759 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3760 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3761 			return 0;
3762 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3763 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3764 			return 0;
3765 	} else {
3766 		if (!(np->in_redir & NAT_REDIRECT))
3767 			return 0;
3768 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3769 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3770 			return 0;
3771 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3772 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3773 			return 0;
3774 	}
3775 
3776 	ft = &np->in_tuc;
3777 	if (!(fin->fin_flx & FI_TCPUDP) ||
3778 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3779 		if (ft->ftu_scmp || ft->ftu_dcmp)
3780 			return 0;
3781 		return 1;
3782 	}
3783 
3784 	return fr_tcpudpchk(fin, ft);
3785 }
3786 
3787 
3788 /* ------------------------------------------------------------------------ */
3789 /* Function:    nat_update                                                  */
3790 /* Returns:     Nil                                                         */
3791 /* Parameters:	fin(I) - pointer to packet information			    */
3792 /*		nat(I) - pointer to NAT structure			    */
3793 /*              np(I)     - pointer to NAT rule                             */
3794 /* Locks:	nat_lock						    */
3795 /*                                                                          */
3796 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3797 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3798 /* ------------------------------------------------------------------------ */
3799 void nat_update(fin, nat, np)
3800 fr_info_t *fin;
3801 nat_t *nat;
3802 ipnat_t *np;
3803 {
3804 	ipftq_t *ifq, *ifq2;
3805 	ipftqent_t *tqe;
3806 	ipf_stack_t *ifs = fin->fin_ifs;
3807 
3808 	tqe = &nat->nat_tqe;
3809 	ifq = tqe->tqe_ifq;
3810 
3811 	/*
3812 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3813 	 * TCP, however, if it is TCP and there is no rule timeout set,
3814 	 * then do not update the timeout here.
3815 	 */
3816 	if (np != NULL)
3817 		ifq2 = np->in_tqehead[fin->fin_rev];
3818 	else
3819 		ifq2 = NULL;
3820 
3821 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3822 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3823 	} else {
3824 		if (ifq2 == NULL) {
3825 			if (nat->nat_p == IPPROTO_UDP)
3826 				ifq2 = &ifs->ifs_nat_udptq;
3827 			else if (nat->nat_p == IPPROTO_ICMP)
3828 				ifq2 = &ifs->ifs_nat_icmptq;
3829 			else
3830 				ifq2 = &ifs->ifs_nat_iptq;
3831 		}
3832 
3833 		fr_movequeue(tqe, ifq, ifq2, ifs);
3834 	}
3835 }
3836 
3837 
3838 /* ------------------------------------------------------------------------ */
3839 /* Function:    fr_checknatout                                              */
3840 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3841 /*                     0 == no packet translation occurred,                 */
3842 /*                     1 == packet was successfully translated.             */
3843 /* Parameters:  fin(I)   - pointer to packet information                    */
3844 /*              passp(I) - pointer to filtering result flags                */
3845 /*                                                                          */
3846 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3847 /* first checked to see if they match an existing entry (if an error),      */
3848 /* otherwise a search of the current NAT table is made.  If neither results */
3849 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3850 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3851 /* packet header(s) as required.                                            */
3852 /* ------------------------------------------------------------------------ */
3853 int fr_checknatout(fin, passp)
3854 fr_info_t *fin;
3855 u_32_t *passp;
3856 {
3857 	ipnat_t *np = NULL, *npnext;
3858 	struct ifnet *ifp, *sifp;
3859 	icmphdr_t *icmp = NULL;
3860 	tcphdr_t *tcp = NULL;
3861 	int rval, natfailed;
3862 	u_int nflags = 0;
3863 	u_32_t ipa, iph;
3864 	int natadd = 1;
3865 	frentry_t *fr;
3866 	nat_t *nat;
3867 	ipf_stack_t *ifs = fin->fin_ifs;
3868 
3869 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3870 		return 0;
3871 
3872 	natfailed = 0;
3873 	fr = fin->fin_fr;
3874 	sifp = fin->fin_ifp;
3875 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3876 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3877 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3878 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3879 	ifp = fin->fin_ifp;
3880 
3881 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3882 		switch (fin->fin_p)
3883 		{
3884 		case IPPROTO_TCP :
3885 			nflags = IPN_TCP;
3886 			break;
3887 		case IPPROTO_UDP :
3888 			nflags = IPN_UDP;
3889 			break;
3890 		case IPPROTO_ICMP :
3891 			icmp = fin->fin_dp;
3892 
3893 			/*
3894 			 * This is an incoming packet, so the destination is
3895 			 * the icmp_id and the source port equals 0
3896 			 */
3897 			if (nat_icmpquerytype4(icmp->icmp_type))
3898 				nflags = IPN_ICMPQUERY;
3899 			break;
3900 		default :
3901 			break;
3902 		}
3903 
3904 		if ((nflags & IPN_TCPUDP))
3905 			tcp = fin->fin_dp;
3906 	}
3907 
3908 	ipa = fin->fin_saddr;
3909 
3910 	READ_ENTER(&ifs->ifs_ipf_nat);
3911 
3912 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3913 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3914 		/*EMPTY*/;
3915 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3916 		natadd = 0;
3917 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3918 				      fin->fin_src, fin->fin_dst))) {
3919 		nflags = nat->nat_flags;
3920 	} else {
3921 		u_32_t hv, msk, nmsk;
3922 
3923 		/*
3924 		 * If there is no current entry in the nat table for this IP#,
3925 		 * create one for it (if there is a matching rule).
3926 		 */
3927 		msk = 0xffffffff;
3928 		nmsk = ifs->ifs_nat_masks;
3929 maskloop:
3930 		iph = ipa & htonl(msk);
3931 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3932 		for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
3933 			npnext = np->in_mnext;
3934 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3935 				continue;
3936 			if (np->in_v != fin->fin_v)
3937 				continue;
3938 			if (np->in_p && (np->in_p != fin->fin_p))
3939 				continue;
3940 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3941 				continue;
3942 			if (np->in_flags & IPN_FILTER) {
3943 				if (!nat_match(fin, np))
3944 					continue;
3945 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3946 				continue;
3947 
3948 			if ((fr != NULL) &&
3949 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3950 				continue;
3951 
3952 			if (*np->in_plabel != '\0') {
3953 				if (((np->in_flags & IPN_FILTER) == 0) &&
3954 				    (np->in_dport != tcp->th_dport))
3955 					continue;
3956 				if (appr_ok(fin, tcp, np) == 0)
3957 					continue;
3958 			}
3959 
3960 			ATOMIC_INC32(np->in_use);
3961 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3962 			WRITE_ENTER(&ifs->ifs_ipf_nat);
3963 			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3964 			if (nat != NULL) {
3965 				np->in_use--;
3966 				np->in_hits++;
3967 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3968 				break;
3969 			}
3970 			natfailed = -1;
3971 			npnext = np->in_mnext;
3972 			fr_ipnatderef(&np, ifs);
3973 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3974 		}
3975 		if ((np == NULL) && (nmsk != 0)) {
3976 			while (nmsk) {
3977 				msk <<= 1;
3978 				if (nmsk & 0x80000000)
3979 					break;
3980 				nmsk <<= 1;
3981 			}
3982 			if (nmsk != 0) {
3983 				nmsk <<= 1;
3984 				goto maskloop;
3985 			}
3986 		}
3987 	}
3988 
3989 	if (nat != NULL) {
3990 		rval = fr_natout(fin, nat, natadd, nflags);
3991 		if (rval == 1) {
3992 			MUTEX_ENTER(&nat->nat_lock);
3993 			nat_update(fin, nat, nat->nat_ptr);
3994 			nat->nat_bytes[1] += fin->fin_plen;
3995 			nat->nat_pkts[1]++;
3996 			nat->nat_ref++;
3997 			MUTEX_EXIT(&nat->nat_lock);
3998 			nat->nat_touched = ifs->ifs_fr_ticks;
3999 			fin->fin_nat = nat;
4000 		}
4001 	} else
4002 		rval = natfailed;
4003 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4004 
4005 	if (rval == -1) {
4006 		if (passp != NULL)
4007 			*passp = FR_BLOCK;
4008 		fin->fin_flx |= FI_BADNAT;
4009 	}
4010 	fin->fin_ifp = sifp;
4011 	return rval;
4012 }
4013 
4014 /* ------------------------------------------------------------------------ */
4015 /* Function:    fr_natout                                                   */
4016 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4017 /*                     1 == packet was successfully translated.             */
4018 /* Parameters:  fin(I)    - pointer to packet information                   */
4019 /*              nat(I)    - pointer to NAT structure                        */
4020 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4021 /*              nflags(I) - NAT flags set for this packet                   */
4022 /*                                                                          */
4023 /* Translate a packet coming "out" on an interface.                         */
4024 /* ------------------------------------------------------------------------ */
4025 int fr_natout(fin, nat, natadd, nflags)
4026 fr_info_t *fin;
4027 nat_t *nat;
4028 int natadd;
4029 u_32_t nflags;
4030 {
4031 	icmphdr_t *icmp;
4032 	u_short *csump;
4033 	u_32_t sumd;
4034 	tcphdr_t *tcp;
4035 	ipnat_t *np;
4036 	int i;
4037 	ipf_stack_t *ifs = fin->fin_ifs;
4038 
4039 	if (fin->fin_v == 6) {
4040 #ifdef	USE_INET6
4041 		return fr_nat6out(fin, nat, natadd, nflags);
4042 #else
4043 		return NULL;
4044 #endif
4045 	}
4046 
4047 #if SOLARIS && defined(_KERNEL)
4048 	net_data_t net_data_p = ifs->ifs_ipf_ipv4;
4049 #endif
4050 
4051 	tcp = NULL;
4052 	icmp = NULL;
4053 	csump = NULL;
4054 	np = nat->nat_ptr;
4055 
4056 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4057 		(void) fr_nat_newfrag(fin, 0, nat);
4058 
4059 	/*
4060 	 * Fix up checksums, not by recalculating them, but
4061 	 * simply computing adjustments.
4062 	 * This is only done for STREAMS based IP implementations where the
4063 	 * checksum has already been calculated by IP.  In all other cases,
4064 	 * IPFilter is called before the checksum needs calculating so there
4065 	 * is no call to modify whatever is in the header now.
4066 	 */
4067 	ASSERT(fin->fin_m != NULL);
4068 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4069 		if (nflags == IPN_ICMPERR) {
4070 			u_32_t s1, s2;
4071 
4072 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
4073 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4074 			CALC_SUMD(s1, s2, sumd);
4075 
4076 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4077 		}
4078 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4079     defined(linux) || defined(BRIDGE_IPF)
4080 		else {
4081 			/*
4082 			 * Strictly speaking, this isn't necessary on BSD
4083 			 * kernels because they do checksum calculation after
4084 			 * this code has run BUT if ipfilter is being used
4085 			 * to do NAT as a bridge, that code doesn't exist.
4086 			 */
4087 			if (nat->nat_dir == NAT_OUTBOUND)
4088 				fix_outcksum(&fin->fin_ip->ip_sum,
4089 					    nat->nat_ipsumd);
4090 			else
4091 				fix_incksum(&fin->fin_ip->ip_sum,
4092 				 	   nat->nat_ipsumd);
4093 		}
4094 #endif
4095 	}
4096 
4097 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4098 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4099 			tcp = fin->fin_dp;
4100 
4101 			tcp->th_sport = nat->nat_outport;
4102 			fin->fin_data[0] = ntohs(nat->nat_outport);
4103 		}
4104 
4105 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4106 			icmp = fin->fin_dp;
4107 			icmp->icmp_id = nat->nat_outport;
4108 		}
4109 
4110 		csump = nat_proto(fin, nat, nflags);
4111 	}
4112 
4113 	fin->fin_ip->ip_src = nat->nat_outip;
4114 
4115 	/*
4116 	 * The above comments do not hold for layer 4 (or higher) checksums...
4117 	 */
4118 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4119 		if (nflags & IPN_TCPUDP &&
4120 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4121 			sumd = nat->nat_sumd[1];
4122 		else
4123 			sumd = nat->nat_sumd[0];
4124 
4125 		if (nat->nat_dir == NAT_OUTBOUND)
4126 			fix_outcksum(csump, sumd);
4127 		else
4128 			fix_incksum(csump, sumd);
4129 	}
4130 #ifdef	IPFILTER_SYNC
4131 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4132 #endif
4133 	/* ------------------------------------------------------------- */
4134 	/* A few quick notes:						 */
4135 	/*	Following are test conditions prior to calling the 	 */
4136 	/*	appr_check routine.					 */
4137 	/*								 */
4138 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4139 	/*	with a redirect rule, we attempt to match the packet's	 */
4140 	/*	source port against in_dport, otherwise	we'd compare the */
4141 	/*	packet's destination.			 		 */
4142 	/* ------------------------------------------------------------- */
4143 	if ((np != NULL) && (np->in_apr != NULL)) {
4144 		i = appr_check(fin, nat);
4145 		if (i == 0)
4146 			i = 1;
4147 	} else
4148 		i = 1;
4149 	ifs->ifs_nat_stats.ns_mapped[1]++;
4150 	fin->fin_flx |= FI_NATED;
4151 	return i;
4152 }
4153 
4154 
4155 /* ------------------------------------------------------------------------ */
4156 /* Function:    fr_checknatin                                               */
4157 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4158 /*                     0 == no packet translation occurred,                 */
4159 /*                     1 == packet was successfully translated.             */
4160 /* Parameters:  fin(I)   - pointer to packet information                    */
4161 /*              passp(I) - pointer to filtering result flags                */
4162 /*                                                                          */
4163 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4164 /* first checked to see if they match an existing entry (if an error),      */
4165 /* otherwise a search of the current NAT table is made.  If neither results */
4166 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4167 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4168 /* packet header(s) as required.                                            */
4169 /* ------------------------------------------------------------------------ */
4170 int fr_checknatin(fin, passp)
4171 fr_info_t *fin;
4172 u_32_t *passp;
4173 {
4174 	u_int nflags, natadd;
4175 	ipnat_t *np, *npnext;
4176 	int rval, natfailed;
4177 	struct ifnet *ifp;
4178 	struct in_addr in;
4179 	icmphdr_t *icmp;
4180 	tcphdr_t *tcp;
4181 	u_short dport;
4182 	nat_t *nat;
4183 	u_32_t iph;
4184 	ipf_stack_t *ifs = fin->fin_ifs;
4185 
4186 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
4187 		return 0;
4188 
4189 	tcp = NULL;
4190 	icmp = NULL;
4191 	dport = 0;
4192 	natadd = 1;
4193 	nflags = 0;
4194 	natfailed = 0;
4195 	ifp = fin->fin_ifp;
4196 
4197 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4198 		switch (fin->fin_p)
4199 		{
4200 		case IPPROTO_TCP :
4201 			nflags = IPN_TCP;
4202 			break;
4203 		case IPPROTO_UDP :
4204 			nflags = IPN_UDP;
4205 			break;
4206 		case IPPROTO_ICMP :
4207 			icmp = fin->fin_dp;
4208 
4209 			/*
4210 			 * This is an incoming packet, so the destination is
4211 			 * the icmp_id and the source port equals 0
4212 			 */
4213 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4214 				nflags = IPN_ICMPQUERY;
4215 				dport = icmp->icmp_id;
4216 			} break;
4217 		default :
4218 			break;
4219 		}
4220 
4221 		if ((nflags & IPN_TCPUDP)) {
4222 			tcp = fin->fin_dp;
4223 			dport = tcp->th_dport;
4224 		}
4225 	}
4226 
4227 	in = fin->fin_dst;
4228 
4229 	READ_ENTER(&ifs->ifs_ipf_nat);
4230 
4231 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4232 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4233 		/*EMPTY*/;
4234 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4235 		natadd = 0;
4236 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4237 				     fin->fin_src, in))) {
4238 		nflags = nat->nat_flags;
4239 	} else {
4240 		u_32_t hv, msk, rmsk;
4241 
4242 		rmsk = ifs->ifs_rdr_masks;
4243 		msk = 0xffffffff;
4244 		/*
4245 		 * If there is no current entry in the nat table for this IP#,
4246 		 * create one for it (if there is a matching rule).
4247 		 */
4248 maskloop:
4249 		iph = in.s_addr & htonl(msk);
4250 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4251 		for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4252 			npnext = np->in_rnext;
4253 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4254 				continue;
4255 			if (np->in_v != fin->fin_v)
4256 				continue;
4257 			if (np->in_p && (np->in_p != fin->fin_p))
4258 				continue;
4259 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4260 				continue;
4261 			if (np->in_flags & IPN_FILTER) {
4262 				if (!nat_match(fin, np))
4263 					continue;
4264 			} else {
4265 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4266 					continue;
4267 				if (np->in_pmin &&
4268 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4269 				     (ntohs(dport) < ntohs(np->in_pmin))))
4270 					continue;
4271 			}
4272 
4273 			if (*np->in_plabel != '\0') {
4274 				if (!appr_ok(fin, tcp, np)) {
4275 					continue;
4276 				}
4277 			}
4278 
4279 			ATOMIC_INC32(np->in_use);
4280 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4281 			WRITE_ENTER(&ifs->ifs_ipf_nat);
4282 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4283 			if (nat != NULL) {
4284 				np->in_use--;
4285 				np->in_hits++;
4286 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4287 				break;
4288 			}
4289 			natfailed = -1;
4290 			npnext = np->in_rnext;
4291 			fr_ipnatderef(&np, ifs);
4292 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4293 		}
4294 
4295 		if ((np == NULL) && (rmsk != 0)) {
4296 			while (rmsk) {
4297 				msk <<= 1;
4298 				if (rmsk & 0x80000000)
4299 					break;
4300 				rmsk <<= 1;
4301 			}
4302 			if (rmsk != 0) {
4303 				rmsk <<= 1;
4304 				goto maskloop;
4305 			}
4306 		}
4307 	}
4308 	if (nat != NULL) {
4309 		rval = fr_natin(fin, nat, natadd, nflags);
4310 		if (rval == 1) {
4311 			MUTEX_ENTER(&nat->nat_lock);
4312 			nat_update(fin, nat, nat->nat_ptr);
4313 			nat->nat_bytes[0] += fin->fin_plen;
4314 			nat->nat_pkts[0]++;
4315 			nat->nat_ref++;
4316 			MUTEX_EXIT(&nat->nat_lock);
4317 			nat->nat_touched = ifs->ifs_fr_ticks;
4318 			fin->fin_nat = nat;
4319 			fin->fin_state = nat->nat_state;
4320 		}
4321 	} else
4322 		rval = natfailed;
4323 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4324 
4325 	if (rval == -1) {
4326 		if (passp != NULL)
4327 			*passp = FR_BLOCK;
4328 		fin->fin_flx |= FI_BADNAT;
4329 	}
4330 	return rval;
4331 }
4332 
4333 
4334 /* ------------------------------------------------------------------------ */
4335 /* Function:    fr_natin                                                    */
4336 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4337 /*                     1 == packet was successfully translated.             */
4338 /* Parameters:  fin(I)    - pointer to packet information                   */
4339 /*              nat(I)    - pointer to NAT structure                        */
4340 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4341 /*              nflags(I) - NAT flags set for this packet                   */
4342 /* Locks Held:  ipf_nat (READ)                                              */
4343 /*                                                                          */
4344 /* Translate a packet coming "in" on an interface.                          */
4345 /* ------------------------------------------------------------------------ */
4346 int fr_natin(fin, nat, natadd, nflags)
4347 fr_info_t *fin;
4348 nat_t *nat;
4349 int natadd;
4350 u_32_t nflags;
4351 {
4352 	icmphdr_t *icmp;
4353 	u_short *csump;
4354 	tcphdr_t *tcp;
4355 	ipnat_t *np;
4356 	int i;
4357 	ipf_stack_t *ifs = fin->fin_ifs;
4358 
4359 	if (fin->fin_v == 6) {
4360 #ifdef	USE_INET6
4361 		return fr_nat6in(fin, nat, natadd, nflags);
4362 #else
4363 		return NULL;
4364 #endif
4365 	}
4366 
4367 #if SOLARIS && defined(_KERNEL)
4368 	net_data_t net_data_p = ifs->ifs_ipf_ipv4;
4369 #endif
4370 
4371 	tcp = NULL;
4372 	csump = NULL;
4373 	np = nat->nat_ptr;
4374 	fin->fin_fr = nat->nat_fr;
4375 
4376 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4377 		(void) fr_nat_newfrag(fin, 0, nat);
4378 
4379 	if (np != NULL) {
4380 
4381 	/* ------------------------------------------------------------- */
4382 	/* A few quick notes:						 */
4383 	/*	Following are test conditions prior to calling the 	 */
4384 	/*	appr_check routine.					 */
4385 	/*								 */
4386 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4387 	/*	with a map rule, we attempt to match the packet's	 */
4388 	/*	source port against in_dport, otherwise	we'd compare the */
4389 	/*	packet's destination.			 		 */
4390 	/* ------------------------------------------------------------- */
4391 		if (np->in_apr != NULL) {
4392 			i = appr_check(fin, nat);
4393 			if (i == -1) {
4394 				return -1;
4395 			}
4396 		}
4397 	}
4398 
4399 #ifdef	IPFILTER_SYNC
4400 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4401 #endif
4402 
4403 	fin->fin_ip->ip_dst = nat->nat_inip;
4404 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4405 	if (nflags & IPN_TCPUDP)
4406 		tcp = fin->fin_dp;
4407 
4408 	/*
4409 	 * Fix up checksums, not by recalculating them, but
4410 	 * simply computing adjustments.
4411 	 * Why only do this for some platforms on inbound packets ?
4412 	 * Because for those that it is done, IP processing is yet to happen
4413 	 * and so the IPv4 header checksum has not yet been evaluated.
4414 	 * Perhaps it should always be done for the benefit of things like
4415 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4416 	 * header checksum offloading, perhaps it is a moot point.
4417 	 */
4418 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4419      defined(__osf__) || defined(linux)
4420 	if (nat->nat_dir == NAT_OUTBOUND)
4421 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4422 	else
4423 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4424 #endif
4425 
4426 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4427 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4428 			tcp->th_dport = nat->nat_inport;
4429 			fin->fin_data[1] = ntohs(nat->nat_inport);
4430 		}
4431 
4432 
4433 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4434 			icmp = fin->fin_dp;
4435 
4436 			icmp->icmp_id = nat->nat_inport;
4437 		}
4438 
4439 		csump = nat_proto(fin, nat, nflags);
4440 	}
4441 
4442 	/*
4443 	 * In case they are being forwarded, inbound packets always need to have
4444 	 * their checksum adjusted even if hardware checksum validation said OK.
4445 	 */
4446 	if (csump != NULL) {
4447 		if (nat->nat_dir == NAT_OUTBOUND)
4448 			fix_incksum(csump, nat->nat_sumd[0]);
4449 		else
4450 			fix_outcksum(csump, nat->nat_sumd[0]);
4451 	}
4452 
4453 #if SOLARIS && defined(_KERNEL)
4454 	if (nflags & IPN_TCPUDP &&
4455 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4456 		/*
4457 		 * Need to adjust the partial checksum result stored in
4458 		 * db_cksum16, which will be used for validation in IP.
4459 		 * See IP_CKSUM_RECV().
4460 		 * Adjustment data should be the inverse of the IP address
4461 		 * changes, because db_cksum16 is supposed to be the complement
4462 		 * of the pesudo header.
4463 		 */
4464 		csump = &fin->fin_m->b_datap->db_cksum16;
4465 		if (nat->nat_dir == NAT_OUTBOUND)
4466 			fix_outcksum(csump, nat->nat_sumd[1]);
4467 		else
4468 			fix_incksum(csump, nat->nat_sumd[1]);
4469 	}
4470 #endif
4471 
4472 	ifs->ifs_nat_stats.ns_mapped[0]++;
4473 	fin->fin_flx |= FI_NATED;
4474 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4475 		fin->fin_nattag = &np->in_tag;
4476 	return 1;
4477 }
4478 
4479 
4480 /* ------------------------------------------------------------------------ */
4481 /* Function:    nat_proto                                                   */
4482 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4483 /*                         NULL if the transport protocol is not recognised */
4484 /*                         as needing a checksum update.                    */
4485 /* Parameters:  fin(I)    - pointer to packet information                   */
4486 /*              nat(I)    - pointer to NAT structure                        */
4487 /*              nflags(I) - NAT flags set for this packet                   */
4488 /*                                                                          */
4489 /* Return the pointer to the checksum field for each protocol so understood.*/
4490 /* If support for making other changes to a protocol header is required,    */
4491 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4492 /* TCP down to a specific value, then do it from here.                      */
4493 /* ------------------------------------------------------------------------ */
4494 u_short *nat_proto(fin, nat, nflags)
4495 fr_info_t *fin;
4496 nat_t *nat;
4497 u_int nflags;
4498 {
4499 	icmphdr_t *icmp;
4500 	struct icmp6_hdr *icmp6;
4501 	u_short *csump;
4502 	tcphdr_t *tcp;
4503 	udphdr_t *udp;
4504 
4505 	csump = NULL;
4506 	if (fin->fin_out == 0) {
4507 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4508 	} else {
4509 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4510 	}
4511 
4512 	switch (fin->fin_p)
4513 	{
4514 	case IPPROTO_TCP :
4515 		tcp = fin->fin_dp;
4516 
4517 		csump = &tcp->th_sum;
4518 
4519 		/*
4520 		 * Do a MSS CLAMPING on a SYN packet,
4521 		 * only deal IPv4 for now.
4522 		 */
4523 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4524 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4525 
4526 		break;
4527 
4528 	case IPPROTO_UDP :
4529 		udp = fin->fin_dp;
4530 
4531 		if (udp->uh_sum)
4532 			csump = &udp->uh_sum;
4533 		break;
4534 
4535 	case IPPROTO_ICMP :
4536 		icmp = fin->fin_dp;
4537 
4538 		if ((nflags & IPN_ICMPQUERY) != 0) {
4539 			if (icmp->icmp_cksum != 0)
4540 				csump = &icmp->icmp_cksum;
4541 		}
4542 		break;
4543 
4544 	case IPPROTO_ICMPV6 :
4545 		icmp6 = fin->fin_dp;
4546 
4547 		if ((nflags & IPN_ICMPQUERY) != 0) {
4548 			if (icmp6->icmp6_cksum != 0)
4549 				csump = &icmp6->icmp6_cksum;
4550 		}
4551 		break;
4552 	}
4553 	return csump;
4554 }
4555 
4556 
4557 /* ------------------------------------------------------------------------ */
4558 /* Function:    fr_natunload                                                */
4559 /* Returns:     Nil                                                         */
4560 /* Parameters:  Nil                                                         */
4561 /*                                                                          */
4562 /* Free all memory used by NAT structures allocated at runtime.             */
4563 /* ------------------------------------------------------------------------ */
4564 void fr_natunload(ifs)
4565 ipf_stack_t *ifs;
4566 {
4567 	ipftq_t *ifq, *ifqnext;
4568 
4569 	(void) nat_clearlist(ifs);
4570 	(void) nat_flushtable(ifs);
4571 
4572 	/*
4573 	 * Proxy timeout queues are not cleaned here because although they
4574 	 * exist on the NAT list, appr_unload is called after fr_natunload
4575 	 * and the proxies actually are responsible for them being created.
4576 	 * Should the proxy timeouts have their own list?  There's no real
4577 	 * justification as this is the only complication.
4578 	 */
4579 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4580 		ifqnext = ifq->ifq_next;
4581 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4582 		    (fr_deletetimeoutqueue(ifq) == 0))
4583 			fr_freetimeoutqueue(ifq, ifs);
4584 	}
4585 
4586 	if (ifs->ifs_nat_table[0] != NULL) {
4587 		KFREES(ifs->ifs_nat_table[0],
4588 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4589 		ifs->ifs_nat_table[0] = NULL;
4590 	}
4591 	if (ifs->ifs_nat_table[1] != NULL) {
4592 		KFREES(ifs->ifs_nat_table[1],
4593 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4594 		ifs->ifs_nat_table[1] = NULL;
4595 	}
4596 	if (ifs->ifs_nat_rules != NULL) {
4597 		KFREES(ifs->ifs_nat_rules,
4598 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4599 		ifs->ifs_nat_rules = NULL;
4600 	}
4601 	if (ifs->ifs_rdr_rules != NULL) {
4602 		KFREES(ifs->ifs_rdr_rules,
4603 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4604 		ifs->ifs_rdr_rules = NULL;
4605 	}
4606 	if (ifs->ifs_maptable != NULL) {
4607 		KFREES(ifs->ifs_maptable,
4608 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4609 		ifs->ifs_maptable = NULL;
4610 	}
4611 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4612 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4613 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4614 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4615 	}
4616 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4617 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4618 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4619 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4620 	}
4621 
4622 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4623 		ifs->ifs_fr_nat_maxbucket = 0;
4624 
4625 	if (ifs->ifs_fr_nat_init == 1) {
4626 		ifs->ifs_fr_nat_init = 0;
4627 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4628 
4629 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4630 		RW_DESTROY(&ifs->ifs_ipf_nat);
4631 
4632 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4633 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4634 
4635 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4636 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4637 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4638 	}
4639 }
4640 
4641 
4642 /* ------------------------------------------------------------------------ */
4643 /* Function:    fr_natexpire                                                */
4644 /* Returns:     Nil                                                         */
4645 /* Parameters:  Nil                                                         */
4646 /*                                                                          */
4647 /* Check all of the timeout queues for entries at the top which need to be  */
4648 /* expired.                                                                 */
4649 /* ------------------------------------------------------------------------ */
4650 void fr_natexpire(ifs)
4651 ipf_stack_t *ifs;
4652 {
4653 	ipftq_t *ifq, *ifqnext;
4654 	ipftqent_t *tqe, *tqn;
4655 	int i;
4656 	SPL_INT(s);
4657 
4658 	SPL_NET(s);
4659 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4660 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4661 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4662 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4663 				break;
4664 			tqn = tqe->tqe_next;
4665 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4666 		}
4667 	}
4668 
4669 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4670 		ifqnext = ifq->ifq_next;
4671 
4672 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4673 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4674 				break;
4675 			tqn = tqe->tqe_next;
4676 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4677 		}
4678 	}
4679 
4680 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4681 		ifqnext = ifq->ifq_next;
4682 
4683 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4684 		    (ifq->ifq_ref == 0)) {
4685 			fr_freetimeoutqueue(ifq, ifs);
4686 		}
4687 	}
4688 
4689 	if (ifs->ifs_nat_doflush != 0) {
4690 		(void) nat_extraflush(2, ifs);
4691 		ifs->ifs_nat_doflush = 0;
4692 	}
4693 
4694 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4695 	SPL_X(s);
4696 }
4697 
4698 
4699 /* ------------------------------------------------------------------------ */
4700 /* Function:    fr_nataddrsync                                              */
4701 /* Returns:     Nil                                                         */
4702 /* Parameters:  ifp(I) -  pointer to network interface                      */
4703 /*              addr(I) - pointer to new network address                    */
4704 /*                                                                          */
4705 /* Walk through all of the currently active NAT sessions, looking for those */
4706 /* which need to have their translated address updated (where the interface */
4707 /* matches the one passed in) and change it, recalculating the checksum sum */
4708 /* difference too.                                                          */
4709 /* ------------------------------------------------------------------------ */
4710 void fr_nataddrsync(v, ifp, addr, ifs)
4711 int v;
4712 void *ifp;
4713 void *addr;
4714 ipf_stack_t *ifs;
4715 {
4716 	u_32_t sum1, sum2, sumd;
4717 	nat_t *nat;
4718 	ipnat_t *np;
4719 	SPL_INT(s);
4720 
4721 	if (ifs->ifs_fr_running <= 0)
4722 		return;
4723 
4724 	SPL_NET(s);
4725 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4726 
4727 	if (ifs->ifs_fr_running <= 0) {
4728 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4729 		return;
4730 	}
4731 
4732 	/*
4733 	 * Change IP addresses for NAT sessions for any protocol except TCP
4734 	 * since it will break the TCP connection anyway.  The only rules
4735 	 * which will get changed are those which are "map ... -> 0/32",
4736 	 * where the rule specifies the address is taken from the interface.
4737 	 */
4738 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4739 		if (addr != NULL) {
4740 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4741 			    ((nat->nat_flags & IPN_TCP) != 0))
4742 				continue;
4743 			if ((np = nat->nat_ptr) == NULL)
4744 				continue;
4745 			if (v == 4 && np->in_v == 4) {
4746 				if (np->in_nip || np->in_outmsk != 0xffffffff)
4747 					continue;
4748 				/*
4749 				 * Change the map-to address to be the same as
4750 				 * the new one.
4751 				 */
4752 				sum1 = nat->nat_outip.s_addr;
4753 				nat->nat_outip = *(struct in_addr *)addr;
4754 				sum2 = nat->nat_outip.s_addr;
4755 			} else if (v == 6 && np->in_v == 6) {
4756 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4757 				    !IP6_ISONES(&np->in_out[1].in6))
4758 					continue;
4759 				/*
4760 				 * Change the map-to address to be the same as
4761 				 * the new one.
4762 				 */
4763 				nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4764 			} else
4765 				continue;
4766 
4767 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4768 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4769 			if (np->in_v == 4 && (v == 4 || v == 0)) {
4770 				struct in_addr in;
4771 				if (np->in_outmsk != 0xffffffff || np->in_nip)
4772 					continue;
4773 				/*
4774 				 * Change the map-to address to be the same as
4775 				 * the new one.
4776 				 */
4777 				sum1 = nat->nat_outip.s_addr;
4778 				if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4779 					       &in, NULL, ifs) != -1)
4780 					nat->nat_outip = in;
4781 				sum2 = nat->nat_outip.s_addr;
4782 			} else if (np->in_v == 6 && (v == 6 || v == 0)) {
4783 				struct in6_addr in6;
4784 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4785 				    !IP6_ISONES(&np->in_out[1].in6))
4786 					continue;
4787 				/*
4788 				 * Change the map-to address to be the same as
4789 				 * the new one.
4790 				 */
4791 				if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4792 					       (void *)&in6, NULL, ifs) != -1)
4793 					nat->nat_outip6.in6 = in6;
4794 			} else
4795 				continue;
4796 		} else {
4797 			continue;
4798 		}
4799 
4800 		if (sum1 == sum2)
4801 			continue;
4802 		/*
4803 		 * Readjust the checksum adjustment to take into
4804 		 * account the new IP#.
4805 		 */
4806 		CALC_SUMD(sum1, sum2, sumd);
4807 		/* XXX - dont change for TCP when solaris does
4808 		 * hardware checksumming.
4809 		 */
4810 		sumd += nat->nat_sumd[0];
4811 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4812 		nat->nat_sumd[1] = nat->nat_sumd[0];
4813 	}
4814 
4815 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4816 	SPL_X(s);
4817 }
4818 
4819 
4820 /* ------------------------------------------------------------------------ */
4821 /* Function:    fr_natifpsync                                               */
4822 /* Returns:     Nil                                                         */
4823 /* Parameters:  action(I) - how we are syncing                              */
4824 /*              ifp(I)    - pointer to network interface                    */
4825 /*              name(I)   - name of interface to sync to                    */
4826 /*                                                                          */
4827 /* This function is used to resync the mapping of interface names and their */
4828 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4829 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4830 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4831 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4832 /* there is no longer any interface associated with it.                     */
4833 /* ------------------------------------------------------------------------ */
4834 void fr_natifpsync(action, v, ifp, name, ifs)
4835 int action, v;
4836 void *ifp;
4837 char *name;
4838 ipf_stack_t *ifs;
4839 {
4840 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4841 	int s;
4842 #endif
4843 	nat_t *nat;
4844 	ipnat_t *n;
4845 	int nv;
4846 
4847 	if (ifs->ifs_fr_running <= 0)
4848 		return;
4849 
4850 	SPL_NET(s);
4851 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4852 
4853 	if (ifs->ifs_fr_running <= 0) {
4854 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4855 		return;
4856 	}
4857 
4858 	switch (action)
4859 	{
4860 	case IPFSYNC_RESYNC :
4861 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4862 			nv = (v == 0) ? nat->nat_v : v;
4863 			if (nat->nat_v != nv)
4864 				continue;
4865 			if ((ifp == nat->nat_ifps[0]) ||
4866 			    (nat->nat_ifps[0] == (void *)-1)) {
4867 				nat->nat_ifps[0] =
4868 				    fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4869 			}
4870 
4871 			if ((ifp == nat->nat_ifps[1]) ||
4872 			    (nat->nat_ifps[1] == (void *)-1)) {
4873 				nat->nat_ifps[1] =
4874 				    fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4875 			}
4876 		}
4877 
4878 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4879 			nv = (v == 0) ? (int)n->in_v : v;
4880 			if ((int)n->in_v != nv)
4881 				continue;
4882 			if (n->in_ifps[0] == ifp ||
4883 			    n->in_ifps[0] == (void *)-1) {
4884 				n->in_ifps[0] =
4885 				    fr_resolvenic(n->in_ifnames[0], nv, ifs);
4886 			}
4887 			if (n->in_ifps[1] == ifp ||
4888 			    n->in_ifps[1] == (void *)-1) {
4889 				n->in_ifps[1] =
4890 				    fr_resolvenic(n->in_ifnames[1], nv, ifs);
4891 			}
4892 		}
4893 		break;
4894 	case IPFSYNC_NEWIFP :
4895 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4896 			if (nat->nat_v != v)
4897 				continue;
4898 			if (!strncmp(name, nat->nat_ifnames[0],
4899 				     sizeof(nat->nat_ifnames[0])))
4900 				nat->nat_ifps[0] = ifp;
4901 			if (!strncmp(name, nat->nat_ifnames[1],
4902 				     sizeof(nat->nat_ifnames[1])))
4903 				nat->nat_ifps[1] = ifp;
4904 		}
4905 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4906 			if ((int)n->in_v != v)
4907 				continue;
4908 			if (!strncmp(name, n->in_ifnames[0],
4909 				     sizeof(n->in_ifnames[0])))
4910 				n->in_ifps[0] = ifp;
4911 			if (!strncmp(name, n->in_ifnames[1],
4912 				     sizeof(n->in_ifnames[1])))
4913 				n->in_ifps[1] = ifp;
4914 		}
4915 		break;
4916 	case IPFSYNC_OLDIFP :
4917 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4918 			if (nat->nat_v != v)
4919 				continue;
4920 			if (ifp == nat->nat_ifps[0])
4921 				nat->nat_ifps[0] = (void *)-1;
4922 			if (ifp == nat->nat_ifps[1])
4923 				nat->nat_ifps[1] = (void *)-1;
4924 		}
4925 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4926 			if ((int)n->in_v != v)
4927 				continue;
4928 			if (n->in_ifps[0] == ifp)
4929 				n->in_ifps[0] = (void *)-1;
4930 			if (n->in_ifps[1] == ifp)
4931 				n->in_ifps[1] = (void *)-1;
4932 		}
4933 		break;
4934 	}
4935 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4936 	SPL_X(s);
4937 }
4938 
4939 
4940 /* ------------------------------------------------------------------------ */
4941 /* Function:    nat_icmpquerytype4                                          */
4942 /* Returns:     int - 1 == success, 0 == failure                            */
4943 /* Parameters:  icmptype(I) - ICMP type number                              */
4944 /*                                                                          */
4945 /* Tests to see if the ICMP type number passed is a query/response type or  */
4946 /* not.                                                                     */
4947 /* ------------------------------------------------------------------------ */
4948 static INLINE int nat_icmpquerytype4(icmptype)
4949 int icmptype;
4950 {
4951 
4952 	/*
4953 	 * For the ICMP query NAT code, it is essential that both the query
4954 	 * and the reply match on the NAT rule. Because the NAT structure
4955 	 * does not keep track of the icmptype, and a single NAT structure
4956 	 * is used for all icmp types with the same src, dest and id, we
4957 	 * simply define the replies as queries as well. The funny thing is,
4958 	 * altough it seems silly to call a reply a query, this is exactly
4959 	 * as it is defined in the IPv4 specification
4960 	 */
4961 
4962 	switch (icmptype)
4963 	{
4964 
4965 	case ICMP_ECHOREPLY:
4966 	case ICMP_ECHO:
4967 	/* route aedvertisement/solliciation is currently unsupported: */
4968 	/* it would require rewriting the ICMP data section            */
4969 	case ICMP_TSTAMP:
4970 	case ICMP_TSTAMPREPLY:
4971 	case ICMP_IREQ:
4972 	case ICMP_IREQREPLY:
4973 	case ICMP_MASKREQ:
4974 	case ICMP_MASKREPLY:
4975 		return 1;
4976 	default:
4977 		return 0;
4978 	}
4979 }
4980 
4981 
4982 /* ------------------------------------------------------------------------ */
4983 /* Function:    nat_log                                                     */
4984 /* Returns:     Nil                                                         */
4985 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4986 /*              type(I) - type of log entry to create                       */
4987 /*                                                                          */
4988 /* Creates a NAT log entry.                                                 */
4989 /* ------------------------------------------------------------------------ */
4990 void nat_log(nat, type, ifs)
4991 struct nat *nat;
4992 u_int type;
4993 ipf_stack_t *ifs;
4994 {
4995 #ifdef	IPFILTER_LOG
4996 # ifndef LARGE_NAT
4997 	struct ipnat *np;
4998 	int rulen;
4999 # endif
5000 	struct natlog natl;
5001 	void *items[1];
5002 	size_t sizes[1];
5003 	int types[1];
5004 
5005 	natl.nlg_inip = nat->nat_inip6;
5006 	natl.nlg_outip = nat->nat_outip6;
5007 	natl.nlg_origip = nat->nat_oip6;
5008 	natl.nlg_bytes[0] = nat->nat_bytes[0];
5009 	natl.nlg_bytes[1] = nat->nat_bytes[1];
5010 	natl.nlg_pkts[0] = nat->nat_pkts[0];
5011 	natl.nlg_pkts[1] = nat->nat_pkts[1];
5012 	natl.nlg_origport = nat->nat_oport;
5013 	natl.nlg_inport = nat->nat_inport;
5014 	natl.nlg_outport = nat->nat_outport;
5015 	natl.nlg_p = nat->nat_p;
5016 	natl.nlg_type = type;
5017 	natl.nlg_rule = -1;
5018 	natl.nlg_v = nat->nat_v;
5019 # ifndef LARGE_NAT
5020 	if (nat->nat_ptr != NULL) {
5021 		for (rulen = 0, np = ifs->ifs_nat_list; np;
5022 		     np = np->in_next, rulen++)
5023 			if (np == nat->nat_ptr) {
5024 				natl.nlg_rule = rulen;
5025 				break;
5026 			}
5027 	}
5028 # endif
5029 	items[0] = &natl;
5030 	sizes[0] = sizeof(natl);
5031 	types[0] = 0;
5032 
5033 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5034 #endif
5035 }
5036 
5037 
5038 #if defined(__OpenBSD__)
5039 /* ------------------------------------------------------------------------ */
5040 /* Function:    nat_ifdetach                                                */
5041 /* Returns:     Nil                                                         */
5042 /* Parameters:  ifp(I) - pointer to network interface                       */
5043 /*                                                                          */
5044 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
5045 /* interface references within IPFilter.                                    */
5046 /* ------------------------------------------------------------------------ */
5047 void nat_ifdetach(ifp, ifs)
5048 void *ifp;
5049 ipf_stack_t *ifs;
5050 {
5051 	frsync(ifp, ifs);
5052 	return;
5053 }
5054 #endif
5055 
5056 
5057 /* ------------------------------------------------------------------------ */
5058 /* Function:    fr_ipnatderef                                               */
5059 /* Returns:     Nil                                                         */
5060 /* Parameters:  inp(I) - pointer to pointer to NAT rule                     */
5061 /* Write Locks: ipf_nat                                                     */
5062 /*                                                                          */
5063 /* ------------------------------------------------------------------------ */
5064 void fr_ipnatderef(inp, ifs)
5065 ipnat_t **inp;
5066 ipf_stack_t *ifs;
5067 {
5068 	ipnat_t *in;
5069 
5070 	in = *inp;
5071 	*inp = NULL;
5072 	in->in_use--;
5073 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5074 		if (in->in_apr)
5075 			appr_free(in->in_apr);
5076 		KFREE(in);
5077 		ifs->ifs_nat_stats.ns_rules--;
5078 #ifdef notdef
5079 #if SOLARIS
5080 		if (ifs->ifs_nat_stats.ns_rules == 0)
5081 			ifs->ifs_pfil_delayed_copy = 1;
5082 #endif
5083 #endif
5084 	}
5085 }
5086 
5087 
5088 /* ------------------------------------------------------------------------ */
5089 /* Function:    fr_natderef                                                 */
5090 /* Returns:     Nil                                                         */
5091 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
5092 /*                                                                          */
5093 /* Decrement the reference counter for this NAT table entry and free it if  */
5094 /* there are no more things using it.                                       */
5095 /*                                                                          */
5096 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5097 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
5098 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
5099 /* because nat_delete() will do that and send nat_ref to -1.                */
5100 /*                                                                          */
5101 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
5102 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
5103 /* ------------------------------------------------------------------------ */
5104 void fr_natderef(natp, ifs)
5105 nat_t **natp;
5106 ipf_stack_t *ifs;
5107 {
5108 	nat_t *nat;
5109 
5110 	nat = *natp;
5111 	*natp = NULL;
5112 
5113 	MUTEX_ENTER(&nat->nat_lock);
5114 	if (nat->nat_ref > 1) {
5115 		nat->nat_ref--;
5116 		MUTEX_EXIT(&nat->nat_lock);
5117 		return;
5118 	}
5119 	MUTEX_EXIT(&nat->nat_lock);
5120 
5121 	WRITE_ENTER(&ifs->ifs_ipf_nat);
5122 	nat_delete(nat, NL_EXPIRE, ifs);
5123 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5124 }
5125 
5126 
5127 /* ------------------------------------------------------------------------ */
5128 /* Function:    fr_natclone                                                 */
5129 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
5130 /*                           else pointer to new state structure            */
5131 /* Parameters:  fin(I) - pointer to packet information                      */
5132 /*              is(I)  - pointer to master state structure                  */
5133 /* Write Lock:  ipf_nat                                                     */
5134 /*                                                                          */
5135 /* Create a "duplcate" state table entry from the master.                   */
5136 /* ------------------------------------------------------------------------ */
5137 nat_t *fr_natclone(fin, nat)
5138 fr_info_t *fin;
5139 nat_t *nat;
5140 {
5141 	frentry_t *fr;
5142 	nat_t *clone;
5143 	ipnat_t *np;
5144 	ipf_stack_t *ifs = fin->fin_ifs;
5145 
5146 	KMALLOC(clone, nat_t *);
5147 	if (clone == NULL)
5148 		return NULL;
5149 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
5150 
5151 	MUTEX_NUKE(&clone->nat_lock);
5152 
5153 	clone->nat_aps = NULL;
5154 	/*
5155 	 * Initialize all these so that nat_delete() doesn't cause a crash.
5156 	 */
5157 	clone->nat_tqe.tqe_pnext = NULL;
5158 	clone->nat_tqe.tqe_next = NULL;
5159 	clone->nat_tqe.tqe_ifq = NULL;
5160 	clone->nat_tqe.tqe_parent = clone;
5161 
5162 	clone->nat_flags &= ~SI_CLONE;
5163 	clone->nat_flags |= SI_CLONED;
5164 
5165 	if (clone->nat_hm)
5166 		clone->nat_hm->hm_ref++;
5167 
5168 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5169 		KFREE(clone);
5170 		return NULL;
5171 	}
5172 	np = clone->nat_ptr;
5173 	if (np != NULL) {
5174 		if (ifs->ifs_nat_logging)
5175 			nat_log(clone, (u_int)np->in_redir, ifs);
5176 		np->in_use++;
5177 	}
5178 	fr = clone->nat_fr;
5179 	if (fr != NULL) {
5180 		MUTEX_ENTER(&fr->fr_lock);
5181 		fr->fr_ref++;
5182 		MUTEX_EXIT(&fr->fr_lock);
5183 	}
5184 
5185 	/*
5186 	 * Because the clone is created outside the normal loop of things and
5187 	 * TCP has special needs in terms of state, initialise the timeout
5188 	 * state of the new NAT from here.
5189 	 */
5190 	if (clone->nat_p == IPPROTO_TCP) {
5191 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5192 				  clone->nat_flags);
5193 	}
5194 #ifdef	IPFILTER_SYNC
5195 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5196 #endif
5197 	if (ifs->ifs_nat_logging)
5198 		nat_log(clone, NL_CLONE, ifs);
5199 	return clone;
5200 }
5201 
5202 
5203 /* ------------------------------------------------------------------------ */
5204 /* Function:   nat_wildok                                                   */
5205 /* Returns:    int - 1 == packet's ports match wildcards                    */
5206 /*                   0 == packet's ports don't match wildcards              */
5207 /* Parameters: nat(I)   - NAT entry                                         */
5208 /*             sport(I) - source port                                       */
5209 /*             dport(I) - destination port                                  */
5210 /*             flags(I) - wildcard flags                                    */
5211 /*             dir(I)   - packet direction                                  */
5212 /*                                                                          */
5213 /* Use NAT entry and packet direction to determine which combination of     */
5214 /* wildcard flags should be used.                                           */
5215 /* ------------------------------------------------------------------------ */
5216 int nat_wildok(nat, sport, dport, flags, dir)
5217 nat_t *nat;
5218 int sport;
5219 int dport;
5220 int flags;
5221 int dir;
5222 {
5223 	/*
5224 	 * When called by       dir is set to
5225 	 * nat_inlookup         NAT_INBOUND (0)
5226 	 * nat_outlookup        NAT_OUTBOUND (1)
5227 	 *
5228 	 * We simply combine the packet's direction in dir with the original
5229 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5230 	 * which combination of wildcard flags to allow.
5231 	 */
5232 
5233 	switch ((dir << 1) | nat->nat_dir)
5234 	{
5235 	case 3: /* outbound packet / outbound entry */
5236 		if (((nat->nat_inport == sport) ||
5237 		    (flags & SI_W_SPORT)) &&
5238 		    ((nat->nat_oport == dport) ||
5239 		    (flags & SI_W_DPORT)))
5240 			return 1;
5241 		break;
5242 	case 2: /* outbound packet / inbound entry */
5243 		if (((nat->nat_outport == sport) ||
5244 		    (flags & SI_W_DPORT)) &&
5245 		    ((nat->nat_oport == dport) ||
5246 		    (flags & SI_W_SPORT)))
5247 			return 1;
5248 		break;
5249 	case 1: /* inbound packet / outbound entry */
5250 		if (((nat->nat_oport == sport) ||
5251 		    (flags & SI_W_DPORT)) &&
5252 		    ((nat->nat_outport == dport) ||
5253 		    (flags & SI_W_SPORT)))
5254 			return 1;
5255 		break;
5256 	case 0: /* inbound packet / inbound entry */
5257 		if (((nat->nat_oport == sport) ||
5258 		    (flags & SI_W_SPORT)) &&
5259 		    ((nat->nat_outport == dport) ||
5260 		    (flags & SI_W_DPORT)))
5261 			return 1;
5262 		break;
5263 	default:
5264 		break;
5265 	}
5266 
5267 	return(0);
5268 }
5269 
5270 
5271 /* ------------------------------------------------------------------------ */
5272 /* Function:    nat_mssclamp                                                */
5273 /* Returns:     Nil                                                         */
5274 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5275 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5276 /*              csump(I)  - pointer to TCP checksum                         */
5277 /*                                                                          */
5278 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5279 /* then the TCP header checksum will be updated to reflect the change in    */
5280 /* the MSS.                                                                 */
5281 /* ------------------------------------------------------------------------ */
5282 static void nat_mssclamp(tcp, maxmss, csump)
5283 tcphdr_t *tcp;
5284 u_32_t maxmss;
5285 u_short *csump;
5286 {
5287 	u_char *cp, *ep, opt;
5288 	int hlen, advance;
5289 	u_32_t mss, sumd;
5290 
5291 	hlen = TCP_OFF(tcp) << 2;
5292 	if (hlen > sizeof(*tcp)) {
5293 		cp = (u_char *)tcp + sizeof(*tcp);
5294 		ep = (u_char *)tcp + hlen;
5295 
5296 		while (cp < ep) {
5297 			opt = cp[0];
5298 			if (opt == TCPOPT_EOL)
5299 				break;
5300 			else if (opt == TCPOPT_NOP) {
5301 				cp++;
5302 				continue;
5303 			}
5304 
5305 			if (cp + 1 >= ep)
5306 				break;
5307 			advance = cp[1];
5308 			if ((cp + advance > ep) || (advance <= 0))
5309 				break;
5310 			switch (opt)
5311 			{
5312 			case TCPOPT_MAXSEG:
5313 				if (advance != 4)
5314 					break;
5315 				mss = cp[2] * 256 + cp[3];
5316 				if (mss > maxmss) {
5317 					cp[2] = maxmss / 256;
5318 					cp[3] = maxmss & 0xff;
5319 					CALC_SUMD(mss, maxmss, sumd);
5320 					fix_outcksum(csump, sumd);
5321 				}
5322 				break;
5323 			default:
5324 				/* ignore unknown options */
5325 				break;
5326 			}
5327 
5328 			cp += advance;
5329 		}
5330 	}
5331 }
5332 
5333 
5334 /* ------------------------------------------------------------------------ */
5335 /* Function:    fr_setnatqueue                                              */
5336 /* Returns:     Nil                                                         */
5337 /* Parameters:  nat(I)- pointer to NAT structure                            */
5338 /*              rev(I) - forward(0) or reverse(1) direction                 */
5339 /* Locks:       ipf_nat (read or write)                                     */
5340 /*                                                                          */
5341 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5342 /* determining which queue it should be placed on.                          */
5343 /* ------------------------------------------------------------------------ */
5344 void fr_setnatqueue(nat, rev, ifs)
5345 nat_t *nat;
5346 int rev;
5347 ipf_stack_t *ifs;
5348 {
5349 	ipftq_t *oifq, *nifq;
5350 
5351 	if (nat->nat_ptr != NULL)
5352 		nifq = nat->nat_ptr->in_tqehead[rev];
5353 	else
5354 		nifq = NULL;
5355 
5356 	if (nifq == NULL) {
5357 		switch (nat->nat_p)
5358 		{
5359 		case IPPROTO_UDP :
5360 			nifq = &ifs->ifs_nat_udptq;
5361 			break;
5362 		case IPPROTO_ICMP :
5363 			nifq = &ifs->ifs_nat_icmptq;
5364 			break;
5365 		case IPPROTO_TCP :
5366 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5367 			break;
5368 		default :
5369 			nifq = &ifs->ifs_nat_iptq;
5370 			break;
5371 		}
5372 	}
5373 
5374 	oifq = nat->nat_tqe.tqe_ifq;
5375 	/*
5376 	 * If it's currently on a timeout queue, move it from one queue to
5377 	 * another, else put it on the end of the newly determined queue.
5378 	 */
5379 	if (oifq != NULL)
5380 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5381 	else
5382 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5383 	return;
5384 }
5385 
5386 /* ------------------------------------------------------------------------ */
5387 /* Function:    nat_getnext                                                 */
5388 /* Returns:     int - 0 == ok, else error                                   */
5389 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5390 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5391 /*              ifs - ipf stack instance                                    */
5392 /*                                                                          */
5393 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5394 /* and copy it out to the storage space pointed to by itp.  The next item   */
5395 /* in the list to look at is put back in the ipftoken struture.             */
5396 /* ------------------------------------------------------------------------ */
5397 static int nat_getnext(t, itp, ifs)
5398 ipftoken_t *t;
5399 ipfgeniter_t *itp;
5400 ipf_stack_t *ifs;
5401 {
5402 	hostmap_t *hm, *nexthm = NULL, zerohm;
5403 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5404 	nat_t *nat, *nextnat = NULL, zeronat;
5405 	int error = 0, count;
5406 	char *dst;
5407 
5408 	if (itp->igi_nitems == 0)
5409 		return EINVAL;
5410 
5411 	READ_ENTER(&ifs->ifs_ipf_nat);
5412 
5413 	/*
5414 	 * Get "previous" entry from the token and find the next entry.
5415 	 */
5416 	switch (itp->igi_type)
5417 	{
5418 	case IPFGENITER_HOSTMAP :
5419 		hm = t->ipt_data;
5420 		if (hm == NULL) {
5421 			nexthm = ifs->ifs_ipf_hm_maplist;
5422 		} else {
5423 			nexthm = hm->hm_next;
5424 		}
5425 		break;
5426 
5427 	case IPFGENITER_IPNAT :
5428 		ipn = t->ipt_data;
5429 		if (ipn == NULL) {
5430 			nextipnat = ifs->ifs_nat_list;
5431 		} else {
5432 			nextipnat = ipn->in_next;
5433 		}
5434 		break;
5435 
5436 	case IPFGENITER_NAT :
5437 		nat = t->ipt_data;
5438 		if (nat == NULL) {
5439 			nextnat = ifs->ifs_nat_instances;
5440 		} else {
5441 			nextnat = nat->nat_next;
5442 		}
5443 		break;
5444 	default :
5445 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5446 		return EINVAL;
5447 	}
5448 
5449 	dst = itp->igi_data;
5450 	for (count = itp->igi_nitems; count > 0; count--) {
5451 		/*
5452 		 * If we found an entry, add a reference to it and update the token.
5453 		 * Otherwise, zero out data to be returned and NULL out token.
5454 		 */
5455 		switch (itp->igi_type)
5456 		{
5457 		case IPFGENITER_HOSTMAP :
5458 			if (nexthm != NULL) {
5459 				ATOMIC_INC32(nexthm->hm_ref);
5460 				t->ipt_data = nexthm;
5461 			} else {
5462 				bzero(&zerohm, sizeof(zerohm));
5463 				nexthm = &zerohm;
5464 				t->ipt_data = NULL;
5465 			}
5466 			break;
5467 		case IPFGENITER_IPNAT :
5468 			if (nextipnat != NULL) {
5469 				ATOMIC_INC32(nextipnat->in_use);
5470 				t->ipt_data = nextipnat;
5471 			} else {
5472 				bzero(&zeroipn, sizeof(zeroipn));
5473 				nextipnat = &zeroipn;
5474 				t->ipt_data = NULL;
5475 			}
5476 			break;
5477 		case IPFGENITER_NAT :
5478 			if (nextnat != NULL) {
5479 				MUTEX_ENTER(&nextnat->nat_lock);
5480 				nextnat->nat_ref++;
5481 				MUTEX_EXIT(&nextnat->nat_lock);
5482 				t->ipt_data = nextnat;
5483 			} else {
5484 				bzero(&zeronat, sizeof(zeronat));
5485 				nextnat = &zeronat;
5486 				t->ipt_data = NULL;
5487 			}
5488 			break;
5489 		default :
5490 			break;
5491 		}
5492 
5493 		/*
5494 		 * Now that we have ref, it's save to give up lock.
5495 		 */
5496 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5497 
5498 		/*
5499 		 * Copy out data and clean up references and token as needed.
5500 		 */
5501 		switch (itp->igi_type)
5502 		{
5503 		case IPFGENITER_HOSTMAP :
5504 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5505 			if (error != 0)
5506 				error = EFAULT;
5507 			if (t->ipt_data == NULL) {
5508 				ipf_freetoken(t, ifs);
5509 				break;
5510 			} else {
5511 				if (hm != NULL) {
5512 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5513 					fr_hostmapdel(&hm);
5514 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5515 				}
5516 				if (nexthm->hm_next == NULL) {
5517 					ipf_freetoken(t, ifs);
5518 					break;
5519 				}
5520 				dst += sizeof(*nexthm);
5521 				hm = nexthm;
5522 				nexthm = nexthm->hm_next;
5523 			}
5524 			break;
5525 
5526 		case IPFGENITER_IPNAT :
5527 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5528 			if (error != 0)
5529 				error = EFAULT;
5530 			if (t->ipt_data == NULL) {
5531 				ipf_freetoken(t, ifs);
5532 				break;
5533 			} else {
5534 				if (ipn != NULL) {
5535 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5536 					fr_ipnatderef(&ipn, ifs);
5537 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5538 				}
5539 				if (nextipnat->in_next == NULL) {
5540 					ipf_freetoken(t, ifs);
5541 					break;
5542 				}
5543 				dst += sizeof(*nextipnat);
5544 				ipn = nextipnat;
5545 				nextipnat = nextipnat->in_next;
5546 			}
5547 			break;
5548 
5549 		case IPFGENITER_NAT :
5550 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5551 			if (error != 0)
5552 				error = EFAULT;
5553 			if (t->ipt_data == NULL) {
5554 				ipf_freetoken(t, ifs);
5555 				break;
5556 			} else {
5557 				if (nat != NULL)
5558 					fr_natderef(&nat, ifs);
5559 				if (nextnat->nat_next == NULL) {
5560 					ipf_freetoken(t, ifs);
5561 					break;
5562 				}
5563 				dst += sizeof(*nextnat);
5564 				nat = nextnat;
5565 				nextnat = nextnat->nat_next;
5566 			}
5567 			break;
5568 		default :
5569 			break;
5570 		}
5571 
5572 		if ((count == 1) || (error != 0))
5573 			break;
5574 
5575 		READ_ENTER(&ifs->ifs_ipf_nat);
5576 	}
5577 
5578 	return error;
5579 }
5580 
5581 
5582 /* ------------------------------------------------------------------------ */
5583 /* Function:    nat_iterator                                                */
5584 /* Returns:     int - 0 == ok, else error                                   */
5585 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5586 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5587 /*                                                                          */
5588 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5589 /* generic structure to iterate through a list.  There are three different  */
5590 /* linked lists of NAT related information to go through: NAT rules, active */
5591 /* NAT mappings and the NAT fragment cache.                                 */
5592 /* ------------------------------------------------------------------------ */
5593 static int nat_iterator(token, itp, ifs)
5594 ipftoken_t *token;
5595 ipfgeniter_t *itp;
5596 ipf_stack_t *ifs;
5597 {
5598 	int error;
5599 
5600 	if (itp->igi_data == NULL)
5601 		return EFAULT;
5602 
5603 	token->ipt_subtype = itp->igi_type;
5604 
5605 	switch (itp->igi_type)
5606 	{
5607 	case IPFGENITER_HOSTMAP :
5608 	case IPFGENITER_IPNAT :
5609 	case IPFGENITER_NAT :
5610 		error = nat_getnext(token, itp, ifs);
5611 		break;
5612 	case IPFGENITER_NATFRAG :
5613 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5614 				    &ifs->ifs_ipfr_nattail,
5615 				    &ifs->ifs_ipf_natfrag, ifs);
5616 		break;
5617 	default :
5618 		error = EINVAL;
5619 		break;
5620 	}
5621 
5622 	return error;
5623 }
5624 
5625 
5626 /* -------------------------------------------------------------------- */
5627 /* Function:	nat_earlydrop						*/
5628 /* Returns:	number of dropped/removed entries from the queue	*/
5629 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5630 /*		maxidle - entry must be idle this long to be dropped	*/
5631 /*		ifs - ipf stack instance				*/
5632 /*									*/
5633 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5634 /* form specified timeout queue, based on how long they've sat idle,	*/
5635 /* without waiting for it to happen on its own.				*/
5636 /* -------------------------------------------------------------------- */
5637 static int nat_earlydrop(ifq, maxidle, ifs)
5638 ipftq_t *ifq;
5639 int maxidle;
5640 ipf_stack_t *ifs;
5641 {
5642 	ipftqent_t *tqe, *tqn;
5643 	nat_t *nat;
5644 	unsigned int dropped;
5645 	int droptick;
5646 
5647 	if (ifq == NULL)
5648 		return (0);
5649 
5650 	dropped = 0;
5651 
5652 	/*
5653 	 * Determine the tick representing the idle time we're interested
5654 	 * in.  If an entry exists in the queue, and it was touched before
5655 	 * that tick, then it's been idle longer than maxidle ... remove it.
5656 	 */
5657 	droptick = ifs->ifs_fr_ticks - maxidle;
5658 	tqn = ifq->ifq_head;
5659 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5660 		tqn = tqe->tqe_next;
5661 		nat = tqe->tqe_parent;
5662 		nat_delete(nat, ISL_EXPIRE, ifs);
5663 		dropped++;
5664 	}
5665 	return (dropped);
5666 }
5667 
5668 
5669 /* --------------------------------------------------------------------- */
5670 /* Function:	nat_flushclosing					 */
5671 /* Returns:	int - number of NAT entries deleted			 */
5672 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5673 /*		ifs - ipf stack instance				 */
5674 /*									 */
5675 /* Remove nat table entries for TCP connections which are in the process */
5676 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5677 /* --------------------------------------------------------------------- */
5678 static int nat_flushclosing(stateval, ifs)
5679 int stateval;
5680 ipf_stack_t *ifs;
5681 {
5682 	ipftq_t *ifq, *ifqn;
5683 	ipftqent_t *tqe, *tqn;
5684 	nat_t *nat;
5685 	int dropped;
5686 
5687 	dropped = 0;
5688 
5689 	/*
5690 	 * Start by deleting any entries in specific timeout queues.
5691 	 */
5692 	ifqn = &ifs->ifs_nat_tqb[stateval];
5693 	while ((ifq = ifqn) != NULL) {
5694 		ifqn = ifq->ifq_next;
5695 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5696 	}
5697 
5698 	/*
5699 	 * Next, look through user defined queues for closing entries.
5700 	 */
5701 	ifqn = ifs->ifs_nat_utqe;
5702 	while ((ifq = ifqn) != NULL) {
5703 		ifqn = ifq->ifq_next;
5704 		tqn = ifq->ifq_head;
5705 		while ((tqe = tqn) != NULL) {
5706 			tqn = tqe->tqe_next;
5707 			nat = tqe->tqe_parent;
5708 			if (nat->nat_p != IPPROTO_TCP)
5709 				continue;
5710 			if ((nat->nat_tcpstate[0] >= stateval) &&
5711 			    (nat->nat_tcpstate[1] >= stateval)) {
5712 				nat_delete(nat, NL_EXPIRE, ifs);
5713 				dropped++;
5714 			}
5715 		}
5716 	}
5717 	return (dropped);
5718 }
5719 
5720 
5721 /* --------------------------------------------------------------------- */
5722 /* Function:	nat_extraflush						 */
5723 /* Returns:	int - number of NAT entries deleted			 */
5724 /* Parameters:	which(I) - how to flush the active NAT table		 */
5725 /*		ifs - ipf stack instance				 */
5726 /* Write Locks:	ipf_nat							 */
5727 /*									 */
5728 /* Flush nat tables.  Three actions currently defined:			 */
5729 /*									 */
5730 /* which == 0 :	Flush all nat table entries.				 */
5731 /*									 */
5732 /* which == 1 :	Flush entries with TCP connections which have started	 */
5733 /*		to close on both ends.					 */
5734 /*									 */
5735 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5736 /*		does not take us below specified threshold in the table, */
5737 /*		we want to flush entries with TCP connections which have */
5738 /*		been idle for a long time.  Start with connections idle	 */
5739 /*		over 12 hours,  and then work backwards in half hour	 */
5740 /*		increments to at most 30 minutes idle, and finally work	 */
5741 /*		back in 30 second increments to at most 30 seconds.	 */
5742 /* --------------------------------------------------------------------- */
5743 static int nat_extraflush(which, ifs)
5744 int which;
5745 ipf_stack_t *ifs;
5746 {
5747 	ipftq_t *ifq, *ifqn;
5748 	nat_t *nat, **natp;
5749 	int idletime, removed, idle_idx;
5750 	SPL_INT(s);
5751 
5752 	removed = 0;
5753 
5754 	SPL_NET(s);
5755 	switch (which)
5756 	{
5757 	case 0:
5758 		natp = &ifs->ifs_nat_instances;
5759 		while ((nat = *natp) != NULL) {
5760 			natp = &nat->nat_next;
5761 			nat_delete(nat, ISL_FLUSH, ifs);
5762 			removed++;
5763 		}
5764 		break;
5765 
5766 	case 1:
5767 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5768 		break;
5769 
5770 	case 2:
5771 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5772 
5773 		/*
5774 		 * Be sure we haven't done this in the last 10 seconds.
5775 		 */
5776 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5777 		    IPF_TTLVAL(10))
5778 			break;
5779 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5780 
5781 		/*
5782 		 * Determine initial threshold for minimum idle time based on
5783 		 * how long ipfilter has been running.  Ipfilter needs to have
5784 		 * been up as long as the smallest interval to continue on.
5785 		 *
5786 		 * Minimum idle times stored in idletime_tab and indexed by
5787 		 * idle_idx.  Start at upper end of array and work backwards.
5788 		 *
5789 		 * Once the index is found, set the initial idle time to the
5790 		 * first interval before the current ipfilter run time.
5791 		 */
5792 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5793 			break;  /* switch */
5794 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5795 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5796 			idletime = idletime_tab[idle_idx];
5797 		} else {
5798 			while ((idle_idx > 0) &&
5799 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5800 				idle_idx--;
5801 			idletime = (ifs->ifs_fr_ticks /
5802 				    idletime_tab[idle_idx]) *
5803 				    idletime_tab[idle_idx];
5804 		}
5805 
5806 		while ((idle_idx >= 0) &&
5807 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5808 			/*
5809 			 * Start with appropriate timeout queue.
5810 			 */
5811 			removed += nat_earlydrop(
5812 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5813 					idletime, ifs);
5814 
5815 			/*
5816 			 * Make sure we haven't already deleted enough
5817 			 * entries before checking the user defined queues.
5818 			 */
5819 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5820 			    ifs->ifs_nat_flush_lvl_lo)
5821 				break;
5822 
5823 			/*
5824 			 * Next, look through the user defined queues.
5825 			 */
5826 			ifqn = ifs->ifs_nat_utqe;
5827 			while ((ifq = ifqn) != NULL) {
5828 				ifqn = ifq->ifq_next;
5829 				removed += nat_earlydrop(ifq, idletime, ifs);
5830 			}
5831 
5832 			/*
5833 			 * Adjust the granularity of idle time.
5834 			 *
5835 			 * If we reach an interval boundary, we need to
5836 			 * either adjust the idle time accordingly or exit
5837 			 * the loop altogether (if this is very last check).
5838 			 */
5839 			idletime -= idletime_tab[idle_idx];
5840 			if (idletime < idletime_tab[idle_idx]) {
5841 				if (idle_idx != 0) {
5842 					idletime = idletime_tab[idle_idx] -
5843 					    idletime_tab[idle_idx - 1];
5844 					idle_idx--;
5845 				} else {
5846 					break;  /* while */
5847 				}
5848 			}
5849 		}
5850 		break;
5851 	default:
5852 		break;
5853 	}
5854 
5855 	SPL_X(s);
5856 	return (removed);
5857 }
5858