xref: /titanic_51/usr/src/uts/common/inet/ipf/ip_nat.c (revision fc3af78a71855c71878866a294572d00e6720533)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL	1
14 # define        _KERNEL	1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 # define _KERNEL
30 # ifdef __OpenBSD__
31 struct file;
32 # endif
33 # include <sys/uio.h>
34 # undef _KERNEL
35 #endif
36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37 # include <sys/filio.h>
38 # include <sys/fcntl.h>
39 #else
40 # include <sys/ioctl.h>
41 #endif
42 #if !defined(AIX)
43 # include <sys/fcntl.h>
44 #endif
45 #if !defined(linux)
46 # include <sys/protosw.h>
47 #endif
48 #include <sys/socket.h>
49 #if defined(_KERNEL)
50 # include <sys/systm.h>
51 # if !defined(__SVR4) && !defined(__svr4__)
52 #  include <sys/mbuf.h>
53 # endif
54 #endif
55 #if defined(__SVR4) || defined(__svr4__)
56 # include <sys/filio.h>
57 # include <sys/byteorder.h>
58 # ifdef _KERNEL
59 #  include <sys/dditypes.h>
60 # endif
61 # include <sys/stream.h>
62 # include <sys/kmem.h>
63 #endif
64 #if __FreeBSD_version >= 300000
65 # include <sys/queue.h>
66 #endif
67 #include <net/if.h>
68 #if __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87 
88 #if !defined(linux)
89 # include <netinet/ip_var.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ipf_stack.h"
102 #ifdef	IPFILTER_SYNC
103 #include "netinet/ip_sync.h"
104 #endif
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 #endif
108 /* END OF INCLUDES */
109 
110 #undef	SOCKADDR_IN
111 #define	SOCKADDR_IN	struct sockaddr_in
112 
113 #if !defined(lint)
114 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116 #endif
117 
118 
119 /* ======================================================================== */
120 /* How the NAT is organised and works.                                      */
121 /*                                                                          */
122 /* Inside (interface y) NAT       Outside (interface x)                     */
123 /* -------------------- -+- -------------------------------------           */
124 /* Packet going          |   out, processsed by fr_checknatout() for x      */
125 /* ------------>         |   ------------>                                  */
126 /* src=10.1.1.1          |   src=192.1.1.1                                  */
127 /*                       |                                                  */
128 /*                       |   in, processed by fr_checknatin() for x         */
129 /* <------------         |   <------------                                  */
130 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
131 /* -------------------- -+- -------------------------------------           */
132 /* fr_checknatout() - changes ip_src and if required, sport                 */
133 /*             - creates a new mapping, if required.                        */
134 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
135 /*                                                                          */
136 /* In the NAT table, internal source is recorded as "in" and externally     */
137 /* seen as "out".                                                           */
138 /* ======================================================================== */
139 
140 
141 static	int	nat_clearlist __P((ipf_stack_t *));
142 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
143 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
144 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
145 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
146 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
147 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
148 static	int	nat_match __P((fr_info_t *, ipnat_t *));
149 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
150 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
151 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
152 				    struct in_addr, struct in_addr, u_32_t,
153 				    ipf_stack_t *));
154 static	INLINE	int nat_icmpquerytype4 __P((int));
155 static	int	nat_ruleaddrinit __P((ipnat_t *));
156 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
157 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
158 static	INLINE	int nat_icmperrortype4 __P((int));
159 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
160 				      tcphdr_t *, nat_t **, int));
161 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
162 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
163 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
164 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
165 static	int	nat_flushtable __P((int, ipf_stack_t *));
166 
167 #define NAT_HAS_L4_CHANGED(n)	\
168  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
169  	(n)->nat_inport != (n)->nat_outport)
170 
171 
172 /* ------------------------------------------------------------------------ */
173 /* Function:    fr_natinit                                                  */
174 /* Returns:     int - 0 == success, -1 == failure                           */
175 /* Parameters:  Nil                                                         */
176 /*                                                                          */
177 /* Initialise all of the NAT locks, tables and other structures.            */
178 /* ------------------------------------------------------------------------ */
179 int fr_natinit(ifs)
180 ipf_stack_t *ifs;
181 {
182 	int i;
183 
184 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
185 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
186 	if (ifs->ifs_nat_table[0] != NULL)
187 		bzero((char *)ifs->ifs_nat_table[0],
188 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
189 	else
190 		return -1;
191 
192 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
193 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
194 	if (ifs->ifs_nat_table[1] != NULL)
195 		bzero((char *)ifs->ifs_nat_table[1],
196 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
197 	else
198 		return -2;
199 
200 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
201 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
202 	if (ifs->ifs_nat_rules != NULL)
203 		bzero((char *)ifs->ifs_nat_rules,
204 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
205 	else
206 		return -3;
207 
208 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
209 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
210 	if (ifs->ifs_rdr_rules != NULL)
211 		bzero((char *)ifs->ifs_rdr_rules,
212 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
213 	else
214 		return -4;
215 
216 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
217 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
218 	if (ifs->ifs_maptable != NULL)
219 		bzero((char *)ifs->ifs_maptable,
220 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
221 	else
222 		return -5;
223 
224 	ifs->ifs_ipf_hm_maplist = NULL;
225 
226 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
227 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
228 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
229 		return -1;
230 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
231 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
232 
233 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
234 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
235 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
236 		return -1;
237 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
238 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
239 
240 	if (ifs->ifs_fr_nat_maxbucket == 0) {
241 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
242 			ifs->ifs_fr_nat_maxbucket++;
243 		ifs->ifs_fr_nat_maxbucket *= 2;
244 	}
245 
246 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
247 	/*
248 	 * Increase this because we may have "keep state" following this too
249 	 * and packet storms can occur if this is removed too quickly.
250 	 */
251 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
252 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
253 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
254 	ifs->ifs_nat_udptq.ifq_ref = 1;
255 	ifs->ifs_nat_udptq.ifq_head = NULL;
256 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
257 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
258 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
259 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
260 	ifs->ifs_nat_icmptq.ifq_ref = 1;
261 	ifs->ifs_nat_icmptq.ifq_head = NULL;
262 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
263 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
264 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
265 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
266 	ifs->ifs_nat_iptq.ifq_ref = 1;
267 	ifs->ifs_nat_iptq.ifq_head = NULL;
268 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
269 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
270 	ifs->ifs_nat_iptq.ifq_next = NULL;
271 
272 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
273 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
274 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
275 #ifdef LARGE_NAT
276 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
277 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
278 #endif
279 	}
280 
281 	/*
282 	 * Increase this because we may have "keep state" following
283 	 * this too and packet storms can occur if this is removed
284 	 * too quickly.
285 	 */
286 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
287 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
288 
289 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
290 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
291 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
292 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
293 
294 	ifs->ifs_fr_nat_init = 1;
295 	ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
296 	return 0;
297 }
298 
299 
300 /* ------------------------------------------------------------------------ */
301 /* Function:    nat_addrdr                                                  */
302 /* Returns:     Nil                                                         */
303 /* Parameters:  n(I) - pointer to NAT rule to add                           */
304 /*                                                                          */
305 /* Adds a redirect rule to the hash table of redirect rules and the list of */
306 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
307 /* use by redirect rules.                                                   */
308 /* ------------------------------------------------------------------------ */
309 static void nat_addrdr(n, ifs)
310 ipnat_t *n;
311 ipf_stack_t *ifs;
312 {
313 	ipnat_t **np;
314 	u_32_t j;
315 	u_int hv;
316 	int k;
317 
318 	k = count4bits(n->in_outmsk);
319 	if ((k >= 0) && (k != 32))
320 		ifs->ifs_rdr_masks |= 1 << k;
321 	j = (n->in_outip & n->in_outmsk);
322 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
323 	np = ifs->ifs_rdr_rules + hv;
324 	while (*np != NULL)
325 		np = &(*np)->in_rnext;
326 	n->in_rnext = NULL;
327 	n->in_prnext = np;
328 	n->in_hv = hv;
329 	*np = n;
330 }
331 
332 
333 /* ------------------------------------------------------------------------ */
334 /* Function:    nat_addnat                                                  */
335 /* Returns:     Nil                                                         */
336 /* Parameters:  n(I) - pointer to NAT rule to add                           */
337 /*                                                                          */
338 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
339 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
340 /* redirect rules.                                                          */
341 /* ------------------------------------------------------------------------ */
342 static void nat_addnat(n, ifs)
343 ipnat_t *n;
344 ipf_stack_t *ifs;
345 {
346 	ipnat_t **np;
347 	u_32_t j;
348 	u_int hv;
349 	int k;
350 
351 	k = count4bits(n->in_inmsk);
352 	if ((k >= 0) && (k != 32))
353 		ifs->ifs_nat_masks |= 1 << k;
354 	j = (n->in_inip & n->in_inmsk);
355 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
356 	np = ifs->ifs_nat_rules + hv;
357 	while (*np != NULL)
358 		np = &(*np)->in_mnext;
359 	n->in_mnext = NULL;
360 	n->in_pmnext = np;
361 	n->in_hv = hv;
362 	*np = n;
363 }
364 
365 
366 /* ------------------------------------------------------------------------ */
367 /* Function:    nat_delrdr                                                  */
368 /* Returns:     Nil                                                         */
369 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
370 /*                                                                          */
371 /* Removes a redirect rule from the hash table of redirect rules.           */
372 /* ------------------------------------------------------------------------ */
373 void nat_delrdr(n)
374 ipnat_t *n;
375 {
376 	if (n->in_rnext)
377 		n->in_rnext->in_prnext = n->in_prnext;
378 	*n->in_prnext = n->in_rnext;
379 }
380 
381 
382 /* ------------------------------------------------------------------------ */
383 /* Function:    nat_delnat                                                  */
384 /* Returns:     Nil                                                         */
385 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
386 /*                                                                          */
387 /* Removes a NAT map rule from the hash table of NAT map rules.             */
388 /* ------------------------------------------------------------------------ */
389 void nat_delnat(n)
390 ipnat_t *n;
391 {
392 	if (n->in_mnext != NULL)
393 		n->in_mnext->in_pmnext = n->in_pmnext;
394 	*n->in_pmnext = n->in_mnext;
395 }
396 
397 
398 /* ------------------------------------------------------------------------ */
399 /* Function:    nat_hostmap                                                 */
400 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
401 /*                                else a pointer to the hostmapping to use  */
402 /* Parameters:  np(I)   - pointer to NAT rule                               */
403 /*              real(I) - real IP address                                   */
404 /*              map(I)  - mapped IP address                                 */
405 /*              port(I) - destination port number                           */
406 /* Write Locks: ipf_nat                                                     */
407 /*                                                                          */
408 /* Check if an ip address has already been allocated for a given mapping    */
409 /* that is not doing port based translation.  If is not yet allocated, then */
410 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
411 /* ------------------------------------------------------------------------ */
412 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
413 ipnat_t *np;
414 struct in_addr src;
415 struct in_addr dst;
416 struct in_addr map;
417 u_32_t port;
418 ipf_stack_t *ifs;
419 {
420 	hostmap_t *hm;
421 	u_int hv;
422 
423 	hv = (src.s_addr ^ dst.s_addr);
424 	hv += src.s_addr;
425 	hv += dst.s_addr;
426 	hv %= HOSTMAP_SIZE;
427 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
428 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
429 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
430 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
431 		    ((port == 0) || (port == hm->hm_port))) {
432 			hm->hm_ref++;
433 			return hm;
434 		}
435 
436 	if (np == NULL)
437 		return NULL;
438 
439 	KMALLOC(hm, hostmap_t *);
440 	if (hm) {
441 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
442 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
443 		if (ifs->ifs_ipf_hm_maplist != NULL)
444 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
445 		ifs->ifs_ipf_hm_maplist = hm;
446 
447 		hm->hm_next = ifs->ifs_maptable[hv];
448 		hm->hm_pnext = ifs->ifs_maptable + hv;
449 		if (ifs->ifs_maptable[hv] != NULL)
450 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
451 		ifs->ifs_maptable[hv] = hm;
452 		hm->hm_ipnat = np;
453 		hm->hm_srcip = src;
454 		hm->hm_dstip = dst;
455 		hm->hm_mapip = map;
456 		hm->hm_ref = 1;
457 		hm->hm_port = port;
458 		hm->hm_v = 4;
459 	}
460 	return hm;
461 }
462 
463 
464 /* ------------------------------------------------------------------------ */
465 /* Function:    fr_hostmapdel                                              */
466 /* Returns:     Nil                                                         */
467 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
468 /* Write Locks: ipf_nat                                                     */
469 /*                                                                          */
470 /* Decrement the references to this hostmap structure by one.  If this      */
471 /* reaches zero then remove it and free it.                                 */
472 /* ------------------------------------------------------------------------ */
473 void fr_hostmapdel(hmp)
474 struct hostmap **hmp;
475 {
476 	struct hostmap *hm;
477 
478 	hm = *hmp;
479 	*hmp = NULL;
480 
481 	hm->hm_ref--;
482 	if (hm->hm_ref == 0) {
483 		if (hm->hm_next)
484 			hm->hm_next->hm_pnext = hm->hm_pnext;
485 		*hm->hm_pnext = hm->hm_next;
486 		if (hm->hm_hnext)
487 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
488 		*hm->hm_phnext = hm->hm_hnext;
489 		KFREE(hm);
490 	}
491 }
492 
493 
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fix_outcksum                                                */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
498 /*              n((I)  - amount to adjust checksum by                       */
499 /*                                                                          */
500 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
501 /* ------------------------------------------------------------------------ */
502 void fix_outcksum(sp, n)
503 u_short *sp;
504 u_32_t n;
505 {
506 	u_short sumshort;
507 	u_32_t sum1;
508 
509 	if (n == 0)
510 		return;
511 
512 	sum1 = (~ntohs(*sp)) & 0xffff;
513 	sum1 += (n);
514 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
515 	/* Again */
516 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
517 	sumshort = ~(u_short)sum1;
518 	*(sp) = htons(sumshort);
519 }
520 
521 
522 /* ------------------------------------------------------------------------ */
523 /* Function:    fix_incksum                                                 */
524 /* Returns:     Nil                                                         */
525 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
526 /*              n((I)  - amount to adjust checksum by                       */
527 /*                                                                          */
528 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
529 /* ------------------------------------------------------------------------ */
530 void fix_incksum(sp, n)
531 u_short *sp;
532 u_32_t n;
533 {
534 	u_short sumshort;
535 	u_32_t sum1;
536 
537 	if (n == 0)
538 		return;
539 
540 	sum1 = (~ntohs(*sp)) & 0xffff;
541 	sum1 += ~(n) & 0xffff;
542 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
543 	/* Again */
544 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
545 	sumshort = ~(u_short)sum1;
546 	*(sp) = htons(sumshort);
547 }
548 
549 
550 /* ------------------------------------------------------------------------ */
551 /* Function:    fix_datacksum                                               */
552 /* Returns:     Nil                                                         */
553 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
554 /*              n((I)  - amount to adjust checksum by                       */
555 /*                                                                          */
556 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
557 /* data section of an IP packet.                                            */
558 /*                                                                          */
559 /* The only situation in which you need to do this is when NAT'ing an       */
560 /* ICMP error message. Such a message, contains in its body the IP header   */
561 /* of the original IP packet, that causes the error.                        */
562 /*                                                                          */
563 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
564 /* kernel the data section of the ICMP error is just data, and no special   */
565 /* processing like hardware cksum or ntohs processing have been done by the */
566 /* kernel on the data section.                                              */
567 /* ------------------------------------------------------------------------ */
568 void fix_datacksum(sp, n)
569 u_short *sp;
570 u_32_t n;
571 {
572 	u_short sumshort;
573 	u_32_t sum1;
574 
575 	if (n == 0)
576 		return;
577 
578 	sum1 = (~ntohs(*sp)) & 0xffff;
579 	sum1 += (n);
580 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
581 	/* Again */
582 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
583 	sumshort = ~(u_short)sum1;
584 	*(sp) = htons(sumshort);
585 }
586 
587 
588 /* ------------------------------------------------------------------------ */
589 /* Function:    fr_nat_ioctl                                                */
590 /* Returns:     int - 0 == success, != 0 == failure                         */
591 /* Parameters:  data(I) - pointer to ioctl data                             */
592 /*              cmd(I)  - ioctl command integer                             */
593 /*              mode(I) - file mode bits used with open                     */
594 /*              uid(I)  - uid of caller                                     */
595 /*              ctx(I)  - pointer to give the uid context                   */
596 /*              ifs     - ipf stack instance                                */
597 /*                                                                          */
598 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
599 /* ------------------------------------------------------------------------ */
600 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
601 ioctlcmd_t cmd;
602 caddr_t data;
603 int mode, uid;
604 void *ctx;
605 ipf_stack_t *ifs;
606 {
607 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
608 	int error = 0, ret, arg, getlock;
609 	ipnat_t natd;
610 
611 #if (BSD >= 199306) && defined(_KERNEL)
612 	if ((securelevel >= 2) && (mode & FWRITE))
613 		return EPERM;
614 #endif
615 
616 #if defined(__osf__) && defined(_KERNEL)
617 	getlock = 0;
618 #else
619 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
620 #endif
621 
622 	nat = NULL;     /* XXX gcc -Wuninitialized */
623 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
624 		KMALLOC(nt, ipnat_t *);
625 	} else {
626 		nt = NULL;
627 	}
628 
629 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
630 		if (mode & NAT_SYSSPACE) {
631 			bcopy(data, (char *)&natd, sizeof(natd));
632 			error = 0;
633 		} else {
634 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
635 		}
636 
637 	}
638 
639 	if (error != 0)
640 		goto done;
641 
642 	/*
643 	 * For add/delete, look to see if the NAT entry is already present
644 	 */
645 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
646 		nat = &natd;
647 		if (nat->in_v == 0)	/* For backward compat. */
648 			nat->in_v = 4;
649 		nat->in_flags &= IPN_USERFLAGS;
650 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
651 			if ((nat->in_flags & IPN_SPLIT) == 0)
652 				nat->in_inip &= nat->in_inmsk;
653 			if ((nat->in_flags & IPN_IPRANGE) == 0)
654 				nat->in_outip &= nat->in_outmsk;
655 		}
656 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
657 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
658 		     np = &n->in_next)
659 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
660 			    IPN_CMPSIZ) == 0) {
661 				if (nat->in_redir == NAT_REDIRECT &&
662 				    nat->in_pnext != n->in_pnext)
663 					continue;
664 				break;
665 			}
666 	}
667 
668 	switch (cmd)
669 	{
670 	case SIOCGENITER :
671 	    {
672 		ipfgeniter_t iter;
673 		ipftoken_t *token;
674 
675 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
676 		if (error != 0)
677 			break;
678 
679 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
680 		if (token != NULL)
681 			error  = nat_iterator(token, &iter, ifs);
682 		else
683 			error = ESRCH;
684 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
685 		break;
686 	    }
687 #ifdef  IPFILTER_LOG
688 	case SIOCIPFFB :
689 	{
690 		int tmp;
691 
692 		if (!(mode & FWRITE))
693 			error = EPERM;
694 		else {
695 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
696 			error = BCOPYOUT((char *)&tmp, (char *)data,
697 					sizeof(tmp));
698 			if (error != 0)
699 				error = EFAULT;
700 		}
701 		break;
702 	}
703 	case SIOCSETLG :
704 		if (!(mode & FWRITE)) {
705 			error = EPERM;
706 		} else {
707 			error = BCOPYIN((char *)data,
708 					(char *)&ifs->ifs_nat_logging,
709 					sizeof(ifs->ifs_nat_logging));
710 			if (error != 0)
711 				error = EFAULT;
712 		}
713 		break;
714 	case SIOCGETLG :
715 		error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
716 				sizeof(ifs->ifs_nat_logging));
717 		if (error != 0)
718 			error = EFAULT;
719 		break;
720 	case FIONREAD :
721 		arg = ifs->ifs_iplused[IPL_LOGNAT];
722 		error = BCOPYOUT(&arg, data, sizeof(arg));
723 		if (error != 0)
724 			error = EFAULT;
725 		break;
726 #endif
727 	case SIOCADNAT :
728 		if (!(mode & FWRITE)) {
729 			error = EPERM;
730 		} else if (n != NULL) {
731 			error = EEXIST;
732 		} else if (nt == NULL) {
733 			error = ENOMEM;
734 		}
735 		if (error != 0) {
736 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
737 			break;
738 		}
739 		bcopy((char *)nat, (char *)nt, sizeof(*n));
740 		error = nat_siocaddnat(nt, np, getlock, ifs);
741 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
742 		if (error == 0)
743 			nt = NULL;
744 		break;
745 	case SIOCRMNAT :
746 		if (!(mode & FWRITE)) {
747 			error = EPERM;
748 			n = NULL;
749 		} else if (n == NULL) {
750 			error = ESRCH;
751 		}
752 
753 		if (error != 0) {
754 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
755 			break;
756 		}
757 		nat_siocdelnat(n, np, getlock, ifs);
758 
759 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
760 		n = NULL;
761 		break;
762 	case SIOCGNATS :
763 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
764 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
765 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
766 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
767 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
768 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
769 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
770 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
771 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
772 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
773 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
774 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
775 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
776 		break;
777 	case SIOCGNATL :
778 	    {
779 		natlookup_t nl;
780 
781 		if (getlock) {
782 			READ_ENTER(&ifs->ifs_ipf_nat);
783 		}
784 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
785 		if (nl.nl_v != 6)
786 			nl.nl_v = 4;
787 		if (error == 0) {
788 			void *ptr;
789 
790 			switch (nl.nl_v)
791 			{
792 			case 4:
793 				ptr = nat_lookupredir(&nl, ifs);
794 				break;
795 #ifdef	USE_INET6
796 			case 6:
797 				ptr = nat6_lookupredir(&nl, ifs);
798 				break;
799 #endif
800 			default:
801 				ptr = NULL;
802 				break;
803 			}
804 
805 			if (ptr != NULL) {
806 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
807 			} else {
808 				error = ESRCH;
809 			}
810 		}
811 		if (getlock) {
812 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
813 		}
814 		break;
815 	    }
816 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
817 		if (!(mode & FWRITE)) {
818 			error = EPERM;
819 			break;
820 		}
821 		if (getlock) {
822 			WRITE_ENTER(&ifs->ifs_ipf_nat);
823 		}
824 		error = BCOPYIN(data, &arg, sizeof(arg));
825 		if (error != 0) {
826 			error = EFAULT;
827 		} else {
828 			if (arg == FLUSH_LIST)
829 				ret = nat_clearlist(ifs);
830 			else if (VALID_TABLE_FLUSH_OPT(arg))
831 				ret = nat_flushtable(arg, ifs);
832 			else
833 				error = EINVAL;
834 		}
835 		if (getlock) {
836 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
837 		}
838 		if (error == 0) {
839 			error = BCOPYOUT(&ret, data, sizeof(ret));
840 			if (error != 0)
841 				error = EFAULT;
842 		}
843 		break;
844 	case SIOCPROXY :
845 		error = appr_ioctl(data, cmd, mode, ifs);
846 		break;
847 	case SIOCSTLCK :
848 		if (!(mode & FWRITE)) {
849 			error = EPERM;
850 		} else {
851 			error = fr_lock(data, &ifs->ifs_fr_nat_lock);
852 		}
853 		break;
854 	case SIOCSTPUT :
855 		if ((mode & FWRITE) != 0) {
856 			error = fr_natputent(data, getlock, ifs);
857 		} else {
858 			error = EACCES;
859 		}
860 		break;
861 	case SIOCSTGSZ :
862 		if (ifs->ifs_fr_nat_lock) {
863 			if (getlock) {
864 				READ_ENTER(&ifs->ifs_ipf_nat);
865 			}
866 			error = fr_natgetsz(data, ifs);
867 			if (getlock) {
868 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
869 			}
870 		} else
871 			error = EACCES;
872 		break;
873 	case SIOCSTGET :
874 		if (ifs->ifs_fr_nat_lock) {
875 			if (getlock) {
876 				READ_ENTER(&ifs->ifs_ipf_nat);
877 			}
878 			error = fr_natgetent(data, ifs);
879 			if (getlock) {
880 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
881 			}
882 		} else
883 			error = EACCES;
884 		break;
885 	case SIOCIPFDELTOK :
886 		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
887 		if (error != 0) {
888 			error = EFAULT;
889 		} else {
890 			error = ipf_deltoken(arg, uid, ctx, ifs);
891 		}
892 		break;
893 	default :
894 		error = EINVAL;
895 		break;
896 	}
897 done:
898 	if (nt)
899 		KFREE(nt);
900 	return error;
901 }
902 
903 
904 /* ------------------------------------------------------------------------ */
905 /* Function:    nat_siocaddnat                                              */
906 /* Returns:     int - 0 == success, != 0 == failure                         */
907 /* Parameters:  n(I)       - pointer to new NAT rule                        */
908 /*              np(I)      - pointer to where to insert new NAT rule        */
909 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
910 /* Mutex Locks: ipf_natio                                                   */
911 /*                                                                          */
912 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
913 /* from information passed to the kernel, then add it  to the appropriate   */
914 /* NAT rule table(s).                                                       */
915 /* ------------------------------------------------------------------------ */
916 static int nat_siocaddnat(n, np, getlock, ifs)
917 ipnat_t *n, **np;
918 int getlock;
919 ipf_stack_t *ifs;
920 {
921 	int error = 0, i, j;
922 
923 	if (nat_resolverule(n, ifs) != 0)
924 		return ENOENT;
925 
926 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
927 		return EINVAL;
928 
929 	n->in_use = 0;
930 	if (n->in_redir & NAT_MAPBLK)
931 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
932 	else if (n->in_flags & IPN_AUTOPORTMAP)
933 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
934 	else if (n->in_flags & IPN_IPRANGE)
935 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
936 	else if (n->in_flags & IPN_SPLIT)
937 		n->in_space = 2;
938 	else if (n->in_outmsk != 0)
939 		n->in_space = ~ntohl(n->in_outmsk);
940 	else
941 		n->in_space = 1;
942 
943 	/*
944 	 * Calculate the number of valid IP addresses in the output
945 	 * mapping range.  In all cases, the range is inclusive of
946 	 * the start and ending IP addresses.
947 	 * If to a CIDR address, lose 2: broadcast + network address
948 	 *                               (so subtract 1)
949 	 * If to a range, add one.
950 	 * If to a single IP address, set to 1.
951 	 */
952 	if (n->in_space) {
953 		if ((n->in_flags & IPN_IPRANGE) != 0)
954 			n->in_space += 1;
955 		else
956 			n->in_space -= 1;
957 	} else
958 		n->in_space = 1;
959 
960 #ifdef	USE_INET6
961 	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
962 	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
963 		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
964 	else if (n->in_v == 6 &&
965 	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
966 		n->in_next6 = n->in_in[0];
967 	else if (n->in_v == 6)
968 		n->in_next6 = n->in_out[0];
969 	else
970 #endif
971 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
972 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
973 		n->in_nip = ntohl(n->in_outip) + 1;
974 	else if ((n->in_flags & IPN_SPLIT) &&
975 		 (n->in_redir & NAT_REDIRECT))
976 		n->in_nip = ntohl(n->in_inip);
977 	else
978 		n->in_nip = ntohl(n->in_outip);
979 
980 	if (n->in_redir & NAT_MAP) {
981 		n->in_pnext = ntohs(n->in_pmin);
982 		/*
983 		 * Multiply by the number of ports made available.
984 		 */
985 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
986 			n->in_space *= (ntohs(n->in_pmax) -
987 					ntohs(n->in_pmin) + 1);
988 			/*
989 			 * Because two different sources can map to
990 			 * different destinations but use the same
991 			 * local IP#/port #.
992 			 * If the result is smaller than in_space, then
993 			 * we may have wrapped around 32bits.
994 			 */
995 			i = n->in_inmsk;
996 			if ((i != 0) && (i != 0xffffffff)) {
997 				j = n->in_space * (~ntohl(i) + 1);
998 				if (j >= n->in_space)
999 					n->in_space = j;
1000 				else
1001 					n->in_space = 0xffffffff;
1002 			}
1003 		}
1004 		/*
1005 		 * If no protocol is specified, multiple by 256 to allow for
1006 		 * at least one IP:IP mapping per protocol.
1007 		 */
1008 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1009 				j = n->in_space * 256;
1010 				if (j >= n->in_space)
1011 					n->in_space = j;
1012 				else
1013 					n->in_space = 0xffffffff;
1014 		}
1015 	}
1016 
1017 	/* Otherwise, these fields are preset */
1018 
1019 	if (getlock) {
1020 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1021 	}
1022 	n->in_next = NULL;
1023 	*np = n;
1024 
1025 	if (n->in_age[0] != 0)
1026 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1027 						  n->in_age[0], ifs);
1028 
1029 	if (n->in_age[1] != 0)
1030 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1031 						  n->in_age[1], ifs);
1032 
1033 	if (n->in_redir & NAT_REDIRECT) {
1034 		n->in_flags &= ~IPN_NOTDST;
1035 		switch (n->in_v)
1036 		{
1037 		case 4 :
1038 			nat_addrdr(n, ifs);
1039 			break;
1040 #ifdef	USE_INET6
1041 		case 6 :
1042 			nat6_addrdr(n, ifs);
1043 			break;
1044 #endif
1045 		default :
1046 			break;
1047 		}
1048 	}
1049 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1050 		n->in_flags &= ~IPN_NOTSRC;
1051 		switch (n->in_v)
1052 		{
1053 		case 4 :
1054 			nat_addnat(n, ifs);
1055 			break;
1056 #ifdef	USE_INET6
1057 		case 6 :
1058 			nat6_addnat(n, ifs);
1059 			break;
1060 #endif
1061 		default :
1062 			break;
1063 		}
1064 	}
1065 	n = NULL;
1066 	ifs->ifs_nat_stats.ns_rules++;
1067 	if (getlock) {
1068 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1069 	}
1070 
1071 	return error;
1072 }
1073 
1074 
1075 /* ------------------------------------------------------------------------ */
1076 /* Function:    nat_resolvrule                                              */
1077 /* Returns:     int - 0 == success, -1 == failure                           */
1078 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1079 /*                                                                          */
1080 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1081 /* any specified interfaces and proxy labels, and determines whether or not */
1082 /* all proxy labels are correctly specified.				    */
1083 /*									    */
1084 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1085 /* ------------------------------------------------------------------------ */
1086 static int nat_resolverule(n, ifs)
1087 ipnat_t *n;
1088 ipf_stack_t *ifs;
1089 {
1090 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1091 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1092 
1093 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1094 	if (n->in_ifnames[1][0] == '\0') {
1095 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1096 		n->in_ifps[1] = n->in_ifps[0];
1097 	} else {
1098 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1099 	}
1100 
1101 	if (n->in_plabel[0] != '\0') {
1102 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1103 		if (n->in_apr == NULL)
1104 			return -1;
1105 	}
1106 	return 0;
1107 }
1108 
1109 
1110 /* ------------------------------------------------------------------------ */
1111 /* Function:    nat_siocdelnat                                              */
1112 /* Returns:     int - 0 == success, != 0 == failure                         */
1113 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1114 /*              np(I)      - pointer to where to insert new NAT rule        */
1115 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1116 /* Mutex Locks: ipf_natio                                                   */
1117 /*                                                                          */
1118 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1119 /* from information passed to the kernel, then add it  to the appropriate   */
1120 /* NAT rule table(s).                                                       */
1121 /* ------------------------------------------------------------------------ */
1122 static void nat_siocdelnat(n, np, getlock, ifs)
1123 ipnat_t *n, **np;
1124 int getlock;
1125 ipf_stack_t *ifs;
1126 {
1127 	int i;
1128 
1129 	if (getlock) {
1130 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1131 	}
1132 	if (n->in_redir & NAT_REDIRECT)
1133 		nat_delrdr(n);
1134 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1135 		nat_delnat(n);
1136 	if (ifs->ifs_nat_list == NULL) {
1137 		ifs->ifs_nat_masks = 0;
1138 		ifs->ifs_rdr_masks = 0;
1139 		for (i = 0; i < 4; i++) {
1140 			ifs->ifs_nat6_masks[i] = 0;
1141 			ifs->ifs_rdr6_masks[i] = 0;
1142 		}
1143 	}
1144 
1145 	if (n->in_tqehead[0] != NULL) {
1146 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1147 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1148 		}
1149 	}
1150 
1151 	if (n->in_tqehead[1] != NULL) {
1152 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1153 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1154 		}
1155 	}
1156 
1157 	*np = n->in_next;
1158 
1159 	if (n->in_use == 0) {
1160 		if (n->in_apr)
1161 			appr_free(n->in_apr);
1162 		KFREE(n);
1163 		ifs->ifs_nat_stats.ns_rules--;
1164 	} else {
1165 		n->in_flags |= IPN_DELETE;
1166 		n->in_next = NULL;
1167 	}
1168 	if (getlock) {
1169 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1170 	}
1171 }
1172 
1173 
1174 /* ------------------------------------------------------------------------ */
1175 /* Function:    fr_natgetsz                                                 */
1176 /* Returns:     int - 0 == success, != 0 is the error value.                */
1177 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1178 /*                        get the size of.                                  */
1179 /*                                                                          */
1180 /* Handle SIOCSTGSZ.                                                        */
1181 /* Return the size of the nat list entry to be copied back to user space.   */
1182 /* The size of the entry is stored in the ng_sz field and the enture natget */
1183 /* structure is copied back to the user.                                    */
1184 /* ------------------------------------------------------------------------ */
1185 static int fr_natgetsz(data, ifs)
1186 caddr_t data;
1187 ipf_stack_t *ifs;
1188 {
1189 	ap_session_t *aps;
1190 	nat_t *nat, *n;
1191 	natget_t ng;
1192 	int err;
1193 
1194 	err = BCOPYIN(data, &ng, sizeof(ng));
1195 	if (err != 0)
1196 		return EFAULT;
1197 
1198 	nat = ng.ng_ptr;
1199 	if (!nat) {
1200 		nat = ifs->ifs_nat_instances;
1201 		ng.ng_sz = 0;
1202 		/*
1203 		 * Empty list so the size returned is 0.  Simple.
1204 		 */
1205 		if (nat == NULL) {
1206 			err = BCOPYOUT(&ng, data, sizeof(ng));
1207 			if (err != 0) {
1208 				return EFAULT;
1209 			} else {
1210 				return 0;
1211 			}
1212 		}
1213 	} else {
1214 		/*
1215 		 * Make sure the pointer we're copying from exists in the
1216 		 * current list of entries.  Security precaution to prevent
1217 		 * copying of random kernel data.
1218 		 */
1219 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1220 			if (n == nat)
1221 				break;
1222 		if (!n)
1223 			return ESRCH;
1224 	}
1225 
1226 	/*
1227 	 * Incluse any space required for proxy data structures.
1228 	 */
1229 	ng.ng_sz = sizeof(nat_save_t);
1230 	aps = nat->nat_aps;
1231 	if (aps != NULL) {
1232 		ng.ng_sz += sizeof(ap_session_t) - 4;
1233 		if (aps->aps_data != 0)
1234 			ng.ng_sz += aps->aps_psiz;
1235 	}
1236 
1237 	err = BCOPYOUT(&ng, data, sizeof(ng));
1238 	if (err != 0)
1239 		return EFAULT;
1240 	return 0;
1241 }
1242 
1243 
1244 /* ------------------------------------------------------------------------ */
1245 /* Function:    fr_natgetent                                                */
1246 /* Returns:     int - 0 == success, != 0 is the error value.                */
1247 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1248 /*                        to NAT structure to copy out.                     */
1249 /*                                                                          */
1250 /* Handle SIOCSTGET.                                                        */
1251 /* Copies out NAT entry to user space.  Any additional data held for a      */
1252 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1253 /* ------------------------------------------------------------------------ */
1254 static int fr_natgetent(data, ifs)
1255 caddr_t data;
1256 ipf_stack_t *ifs;
1257 {
1258 	int error, outsize;
1259 	ap_session_t *aps;
1260 	nat_save_t *ipn, ipns;
1261 	nat_t *n, *nat;
1262 
1263 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1264 	if (error != 0)
1265 		return error;
1266 
1267 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1268 		return EINVAL;
1269 
1270 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1271 	if (ipn == NULL)
1272 		return ENOMEM;
1273 
1274 	ipn->ipn_dsize = ipns.ipn_dsize;
1275 	nat = ipns.ipn_next;
1276 	if (nat == NULL) {
1277 		nat = ifs->ifs_nat_instances;
1278 		if (nat == NULL) {
1279 			if (ifs->ifs_nat_instances == NULL)
1280 				error = ENOENT;
1281 			goto finished;
1282 		}
1283 	} else {
1284 		/*
1285 		 * Make sure the pointer we're copying from exists in the
1286 		 * current list of entries.  Security precaution to prevent
1287 		 * copying of random kernel data.
1288 		 */
1289 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1290 			if (n == nat)
1291 				break;
1292 		if (n == NULL) {
1293 			error = ESRCH;
1294 			goto finished;
1295 		}
1296 	}
1297 	ipn->ipn_next = nat->nat_next;
1298 
1299 	/*
1300 	 * Copy the NAT structure.
1301 	 */
1302 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1303 
1304 	/*
1305 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1306 	 */
1307 	if (nat->nat_ptr != NULL)
1308 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1309 		      sizeof(ipn->ipn_ipnat));
1310 
1311 	/*
1312 	 * If we also know the NAT entry has an associated filter rule,
1313 	 * save that too.
1314 	 */
1315 	if (nat->nat_fr != NULL)
1316 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1317 		      sizeof(ipn->ipn_fr));
1318 
1319 	/*
1320 	 * Last but not least, if there is an application proxy session set
1321 	 * up for this NAT entry, then copy that out too, including any
1322 	 * private data saved along side it by the proxy.
1323 	 */
1324 	aps = nat->nat_aps;
1325 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1326 	if (aps != NULL) {
1327 		char *s;
1328 
1329 		if (outsize < sizeof(*aps)) {
1330 			error = ENOBUFS;
1331 			goto finished;
1332 		}
1333 
1334 		s = ipn->ipn_data;
1335 		bcopy((char *)aps, s, sizeof(*aps));
1336 		s += sizeof(*aps);
1337 		outsize -= sizeof(*aps);
1338 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1339 			bcopy(aps->aps_data, s, aps->aps_psiz);
1340 		else
1341 			error = ENOBUFS;
1342 	}
1343 	if (error == 0) {
1344 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1345 	}
1346 
1347 finished:
1348 	if (ipn != NULL) {
1349 		KFREES(ipn, ipns.ipn_dsize);
1350 	}
1351 	return error;
1352 }
1353 
1354 /* ------------------------------------------------------------------------ */
1355 /* Function:    nat_calc_chksum_diffs					    */
1356 /* Returns:     void							    */
1357 /* Parameters:  nat	-	pointer to NAT table entry		    */
1358 /*                                                                          */
1359 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1360 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1361 /* we are dealing with partial chksum offload. For these cases we need to   */
1362 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1363 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1364 /* nat_sumd[0]. 							    */
1365 /*									    */
1366 /* The function accepts initialized NAT table entry and computes the deltas */
1367 /* from nat_inip/nat_outip members. The function is called right before	    */
1368 /* the new entry is inserted into the table.				    */
1369 /*									    */
1370 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1371 /* of delta between original and new IP addresses.			    */
1372 /*									    */
1373 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1374 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1375 /*									    */
1376 /* Some facts about chksum, we should remember:				    */
1377 /*	IP header chksum covers IP header only				    */
1378 /*									    */
1379 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1380 /*		SRC, DST IP address					    */
1381 /*		SRC, DST Port						    */
1382 /*		length of payload					    */
1383 /*									    */
1384 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1385 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1386 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1387 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1388 /* stored along with other IP packet data in dblk_t structure and used in   */
1389 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1390 /*									    */
1391 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1392 /* of delta between new and orig address. NOTE: the order of operands for   */
1393 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1394 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1395 /*									    */
1396 /* ------------------------------------------------------------------------ */
1397 void nat_calc_chksum_diffs(nat)
1398 nat_t *nat;
1399 {
1400 	u_32_t	sum_orig = 0;
1401 	u_32_t	sum_changed = 0;
1402 	u_32_t	sumd;
1403 	u_32_t	ipsum_orig = 0;
1404 	u_32_t	ipsum_changed = 0;
1405 
1406 	if (nat->nat_v != 4 && nat->nat_v != 6)
1407 		return;
1408 
1409 	/*
1410 	 * the switch calculates operands for CALC_SUMD(),
1411 	 * which will compute the partial chksum delta.
1412 	 */
1413 	switch (nat->nat_dir)
1414 	{
1415 	case NAT_INBOUND:
1416 		/*
1417 		 * we are dealing with RDR rule (DST address gets
1418 		 * modified on packet from client)
1419 		 */
1420 		if (nat->nat_v == 4) {
1421 			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1422 			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1423 		} else {
1424 			sum_changed = LONG_SUM6(&nat->nat_inip6);
1425 			sum_orig = LONG_SUM6(&nat->nat_outip6);
1426 		}
1427 		break;
1428 	case NAT_OUTBOUND:
1429 		/*
1430 		 * we are dealing with MAP rule (SRC address gets
1431 		 * modified on packet from client)
1432 		 */
1433 		if (nat->nat_v == 4) {
1434 			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1435 			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1436 		} else {
1437 			sum_changed = LONG_SUM6(&nat->nat_outip6);
1438 			sum_orig = LONG_SUM6(&nat->nat_inip6);
1439 		}
1440 		break;
1441 	default: ;
1442 		break;
1443 	}
1444 
1445 	/*
1446 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1447 	 * calculation, which happens at the end of function.
1448 	 */
1449 	ipsum_changed = sum_changed;
1450 	ipsum_orig = sum_orig;
1451 	/*
1452 	 * NOTE: the order of operands for partial chksum adjustment
1453 	 * computation has to be swapped!
1454 	 */
1455 	CALC_SUMD(sum_changed, sum_orig, sumd);
1456 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1457 
1458 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1459 
1460 		/*
1461 		 * switch calculates operands for CALC_SUMD(), which will
1462 		 * compute the full chksum delta.
1463 		 */
1464 		switch (nat->nat_dir)
1465 		{
1466 		case NAT_INBOUND:
1467 			if (nat->nat_v == 4) {
1468 				sum_changed = LONG_SUM(
1469 				    ntohl(nat->nat_inip.s_addr) +
1470 				    ntohs(nat->nat_inport));
1471 				sum_orig = LONG_SUM(
1472 				    ntohl(nat->nat_outip.s_addr) +
1473 				    ntohs(nat->nat_outport));
1474 			} else {
1475 				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1476 				    ntohs(nat->nat_inport);
1477 				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1478 				    ntohs(nat->nat_outport);
1479 			}
1480 			break;
1481 		case NAT_OUTBOUND:
1482 			if (nat->nat_v == 4) {
1483 				sum_changed = LONG_SUM(
1484 				    ntohl(nat->nat_outip.s_addr) +
1485 				    ntohs(nat->nat_outport));
1486 				sum_orig = LONG_SUM(
1487 				    ntohl(nat->nat_inip.s_addr) +
1488 				    ntohs(nat->nat_inport));
1489 			} else {
1490 				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1491 				    ntohs(nat->nat_outport);
1492 				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1493 				    ntohs(nat->nat_inport);
1494 			}
1495 			break;
1496 		default: ;
1497 			break;
1498 		}
1499 
1500 		CALC_SUMD(sum_orig, sum_changed, sumd);
1501 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1502 
1503 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1504 			/*
1505 			 * partial HW chksum offload works for TCP/UDP headers only,
1506 			 * so we need to enforce full chksum adjustment for ICMP
1507 			 */
1508 			nat->nat_sumd[1] = nat->nat_sumd[0];
1509 		}
1510 	}
1511 	else
1512 		nat->nat_sumd[0] = nat->nat_sumd[1];
1513 
1514 	/*
1515 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1516 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1517 	 */
1518 	if (nat->nat_v == 4) {
1519 		if (NAT_HAS_L4_CHANGED(nat)) {
1520 			/*
1521 			 * bad luck, NAT changes also the L4 header, use IP
1522 			 * addresses to compute chksum adjustment for IP header.
1523 			 */
1524 			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1525 			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1526 		} else {
1527 			/*
1528 			 * the NAT does not change L4 hdr -> reuse chksum
1529 			 * adjustment for IP hdr.
1530 			 */
1531 			nat->nat_ipsumd = nat->nat_sumd[0];
1532 
1533 			/*
1534 			 * if L4 header does not use chksum - zero out deltas
1535 			 */
1536 			if (!(nat->nat_flags & IPN_TCPUDP)) {
1537 				nat->nat_sumd[0] = 0;
1538 				nat->nat_sumd[1] = 0;
1539 			}
1540 		}
1541 	}
1542 
1543 	return;
1544 }
1545 
1546 /* ------------------------------------------------------------------------ */
1547 /* Function:    fr_natputent                                                */
1548 /* Returns:     int - 0 == success, != 0 is the error value.                */
1549 /* Parameters:  data(I)    - pointer to natget structure with NAT           */
1550 /*                           structure information to load into the kernel  */
1551 /*              getlock(I) - flag indicating whether or not a write lock    */
1552 /*                           on ipf_nat is already held.                    */
1553 /*              ifs        - ipf stack instance                             */
1554 /*                                                                          */
1555 /* Handle SIOCSTPUT.                                                        */
1556 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1557 /* firewall rule data structures, if pointers to them indicate so.          */
1558 /* ------------------------------------------------------------------------ */
1559 static int fr_natputent(data, getlock, ifs)
1560 caddr_t data;
1561 int getlock;
1562 ipf_stack_t *ifs;
1563 {
1564 	nat_save_t ipn, *ipnn;
1565 	ap_session_t *aps;
1566 	nat_t *n, *nat;
1567 	frentry_t *fr;
1568 	fr_info_t fin;
1569 	ipnat_t *in;
1570 	int error;
1571 
1572 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1573 	if (error != 0)
1574 		return error;
1575 
1576 	/*
1577 	 * Trigger automatic call to nat_flushtable() if the
1578 	 * table has reached capcity specified by hi watermark.
1579 	 */
1580 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
1581 		ifs->ifs_nat_doflush = 1;
1582 
1583 	/*
1584 	 * If automatic flushing did not do its job, and the table
1585 	 * has filled up, don't try to create a new entry.
1586 	 */
1587 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
1588 		ifs->ifs_nat_stats.ns_memfail++;
1589 		return ENOMEM;
1590 	}
1591 
1592 	/*
1593 	 * Initialise early because of code at junkput label.
1594 	 */
1595 	in = NULL;
1596 	aps = NULL;
1597 	nat = NULL;
1598 	ipnn = NULL;
1599 
1600 	/*
1601 	 * New entry, copy in the rest of the NAT entry if it's size is more
1602 	 * than just the nat_t structure.
1603 	 */
1604 	fr = NULL;
1605 	if (ipn.ipn_dsize > sizeof(ipn)) {
1606 		if (ipn.ipn_dsize > 81920) {
1607 			error = ENOMEM;
1608 			goto junkput;
1609 		}
1610 
1611 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1612 		if (ipnn == NULL)
1613 			return ENOMEM;
1614 
1615 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1616 		if (error != 0) {
1617 			error = EFAULT;
1618 			goto junkput;
1619 		}
1620 	} else
1621 		ipnn = &ipn;
1622 
1623 	KMALLOC(nat, nat_t *);
1624 	if (nat == NULL) {
1625 		error = ENOMEM;
1626 		goto junkput;
1627 	}
1628 
1629 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1630 	/*
1631 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1632 	 */
1633 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1634 	nat->nat_tqe.tqe_pnext = NULL;
1635 	nat->nat_tqe.tqe_next = NULL;
1636 	nat->nat_tqe.tqe_ifq = NULL;
1637 	nat->nat_tqe.tqe_parent = nat;
1638 
1639 	/*
1640 	 * Restore the rule associated with this nat session
1641 	 */
1642 	in = ipnn->ipn_nat.nat_ptr;
1643 	if (in != NULL) {
1644 		KMALLOC(in, ipnat_t *);
1645 		nat->nat_ptr = in;
1646 		if (in == NULL) {
1647 			error = ENOMEM;
1648 			goto junkput;
1649 		}
1650 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1651 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1652 		in->in_use = 1;
1653 		in->in_flags |= IPN_DELETE;
1654 
1655 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1656 
1657 		if (nat_resolverule(in, ifs) != 0) {
1658 			error = ESRCH;
1659 			goto junkput;
1660 		}
1661 	}
1662 
1663 	/*
1664 	 * Check that the NAT entry doesn't already exist in the kernel.
1665 	 */
1666 	if (nat->nat_v != 6)
1667 		nat->nat_v = 4;
1668 	bzero((char *)&fin, sizeof(fin));
1669 	fin.fin_p = nat->nat_p;
1670 	fin.fin_ifs = ifs;
1671 	if (nat->nat_dir == NAT_OUTBOUND) {
1672 		fin.fin_data[0] = ntohs(nat->nat_oport);
1673 		fin.fin_data[1] = ntohs(nat->nat_outport);
1674 		fin.fin_ifp = nat->nat_ifps[0];
1675 		if (getlock) {
1676 			READ_ENTER(&ifs->ifs_ipf_nat);
1677 		}
1678 
1679 		switch (nat->nat_v)
1680 		{
1681 		case 4:
1682 			fin.fin_v = nat->nat_v;
1683 			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1684 			    nat->nat_oip, nat->nat_outip);
1685 			break;
1686 #ifdef USE_INET6
1687 		case 6:
1688 			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1689 			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1690 			break;
1691 #endif
1692 		default:
1693 			n = NULL;
1694 			break;
1695 		}
1696 
1697 		if (getlock) {
1698 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1699 		}
1700 		if (n != NULL) {
1701 			error = EEXIST;
1702 			goto junkput;
1703 		}
1704 	} else if (nat->nat_dir == NAT_INBOUND) {
1705 		fin.fin_data[0] = ntohs(nat->nat_inport);
1706 		fin.fin_data[1] = ntohs(nat->nat_oport);
1707 		fin.fin_ifp = nat->nat_ifps[1];
1708 		if (getlock) {
1709 			READ_ENTER(&ifs->ifs_ipf_nat);
1710 		}
1711 
1712 		switch (nat->nat_v)
1713 		{
1714 		case 4:
1715 			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1716 			    nat->nat_inip, nat->nat_oip);
1717 			break;
1718 #ifdef USE_INET6
1719 		case 6:
1720 			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1721 			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1722 			break;
1723 #endif
1724 		default:
1725 			n = NULL;
1726 			break;
1727 		}
1728 
1729 		if (getlock) {
1730 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1731 		}
1732 		if (n != NULL) {
1733 			error = EEXIST;
1734 			goto junkput;
1735 		}
1736 	} else {
1737 		error = EINVAL;
1738 		goto junkput;
1739 	}
1740 
1741 	/*
1742 	 * Restore ap_session_t structure.  Include the private data allocated
1743 	 * if it was there.
1744 	 */
1745 	aps = nat->nat_aps;
1746 	if (aps != NULL) {
1747 		KMALLOC(aps, ap_session_t *);
1748 		nat->nat_aps = aps;
1749 		if (aps == NULL) {
1750 			error = ENOMEM;
1751 			goto junkput;
1752 		}
1753 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1754 		if (in != NULL)
1755 			aps->aps_apr = in->in_apr;
1756 		else
1757 			aps->aps_apr = NULL;
1758 		if (aps->aps_psiz != 0) {
1759 			if (aps->aps_psiz > 81920) {
1760 				error = ENOMEM;
1761 				goto junkput;
1762 			}
1763 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1764 			if (aps->aps_data == NULL) {
1765 				error = ENOMEM;
1766 				goto junkput;
1767 			}
1768 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1769 			      aps->aps_psiz);
1770 		} else {
1771 			aps->aps_psiz = 0;
1772 			aps->aps_data = NULL;
1773 		}
1774 	}
1775 
1776 	/*
1777 	 * If there was a filtering rule associated with this entry then
1778 	 * build up a new one.
1779 	 */
1780 	fr = nat->nat_fr;
1781 	if (fr != NULL) {
1782 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1783 			KMALLOC(fr, frentry_t *);
1784 			nat->nat_fr = fr;
1785 			if (fr == NULL) {
1786 				error = ENOMEM;
1787 				goto junkput;
1788 			}
1789 			ipnn->ipn_nat.nat_fr = fr;
1790 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1791 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1792 
1793 			fr->fr_ref = 1;
1794 			fr->fr_dsize = 0;
1795 			fr->fr_data = NULL;
1796 			fr->fr_type = FR_T_NONE;
1797 
1798 			MUTEX_NUKE(&fr->fr_lock);
1799 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1800 		} else {
1801 			if (getlock) {
1802 				READ_ENTER(&ifs->ifs_ipf_nat);
1803 			}
1804 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1805 				if (n->nat_fr == fr)
1806 					break;
1807 
1808 			if (n != NULL) {
1809 				MUTEX_ENTER(&fr->fr_lock);
1810 				fr->fr_ref++;
1811 				MUTEX_EXIT(&fr->fr_lock);
1812 			}
1813 			if (getlock) {
1814 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1815 			}
1816 			if (!n) {
1817 				error = ESRCH;
1818 				goto junkput;
1819 			}
1820 		}
1821 	}
1822 
1823 	if (ipnn != &ipn) {
1824 		KFREES(ipnn, ipn.ipn_dsize);
1825 		ipnn = NULL;
1826 	}
1827 
1828 	nat_calc_chksum_diffs(nat);
1829 
1830 	if (getlock) {
1831 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1832 	}
1833 
1834 	nat_calc_chksum_diffs(nat);
1835 
1836 	switch (nat->nat_v)
1837 	{
1838 	case 4 :
1839 		error = nat_insert(nat, nat->nat_rev, ifs);
1840 		break;
1841 #ifdef USE_INET6
1842 	case 6 :
1843 		error = nat6_insert(nat, nat->nat_rev, ifs);
1844 		break;
1845 #endif
1846 	default :
1847 		break;
1848 	}
1849 
1850 	if ((error == 0) && (aps != NULL)) {
1851 		aps->aps_next = ifs->ifs_ap_sess_list;
1852 		ifs->ifs_ap_sess_list = aps;
1853 	}
1854 	if (getlock) {
1855 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1856 	}
1857 
1858 	if (error == 0)
1859 		return 0;
1860 
1861 	error = ENOMEM;
1862 
1863 junkput:
1864 	if (fr != NULL)
1865 		(void) fr_derefrule(&fr, ifs);
1866 
1867 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1868 		KFREES(ipnn, ipn.ipn_dsize);
1869 	}
1870 	if (nat != NULL) {
1871 		if (aps != NULL) {
1872 			if (aps->aps_data != NULL) {
1873 				KFREES(aps->aps_data, aps->aps_psiz);
1874 			}
1875 			KFREE(aps);
1876 		}
1877 		if (in != NULL) {
1878 			if (in->in_apr)
1879 				appr_free(in->in_apr);
1880 			KFREE(in);
1881 		}
1882 		KFREE(nat);
1883 	}
1884 	return error;
1885 }
1886 
1887 
1888 /* ------------------------------------------------------------------------ */
1889 /* Function:    nat_delete                                                  */
1890 /* Returns:     int	- 0 if entry deleted. Otherwise, ref count on entry */
1891 /* Parameters:  nat	- pointer to the NAT entry to delete		    */
1892 /*		logtype	- type of LOG record to create before deleting	    */
1893 /*		ifs	- ipf stack instance				    */
1894 /* Write Lock:  ipf_nat                                                     */
1895 /*                                                                          */
1896 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1897 /* enabled then generate a NAT log record for this event.                   */
1898 /* ------------------------------------------------------------------------ */
1899 int nat_delete(nat, logtype, ifs)
1900 struct nat *nat;
1901 int logtype;
1902 ipf_stack_t *ifs;
1903 {
1904 	struct ipnat *ipn;
1905 	int removed = 0;
1906 
1907 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1908 		nat_log(nat, logtype, ifs);
1909 
1910 	/*
1911 	 * Start by removing the entry from the hash table of nat entries
1912 	 * so it will not be "used" again.
1913 	 *
1914 	 * It will remain in the "list" of nat entries until all references
1915 	 * have been accounted for.
1916 	 */
1917 	if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) {
1918 		removed = 1;
1919 
1920 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1921 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1922 
1923 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1924 		if (nat->nat_hnext[0] != NULL) {
1925 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1926 			nat->nat_hnext[0] = NULL;
1927 		}
1928 		nat->nat_phnext[0] = NULL;
1929 
1930 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1931 		if (nat->nat_hnext[1] != NULL) {
1932 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1933 			nat->nat_hnext[1] = NULL;
1934 		}
1935 		nat->nat_phnext[1] = NULL;
1936 
1937 		if ((nat->nat_flags & SI_WILDP) != 0)
1938 			ifs->ifs_nat_stats.ns_wilds--;
1939 	}
1940 
1941 	/*
1942 	 * Next, remove it from the timeout queue it is in.
1943 	 */
1944 	fr_deletequeueentry(&nat->nat_tqe);
1945 
1946 	if (nat->nat_me != NULL) {
1947 		*nat->nat_me = NULL;
1948 		nat->nat_me = NULL;
1949 	}
1950 
1951 	MUTEX_ENTER(&nat->nat_lock);
1952  	if (logtype == NL_DESTROY) {
1953  		/*
1954  		 * NL_DESTROY should only be passed when nat_ref >= 2.
1955  		 * This happens when a nat'd packet is blocked, we have
1956 		 * just created the nat table entry (reason why the ref
1957 		 * count is 2 or higher), but and we want to throw away
1958 		 * that NAT session as result of the blocked packet.
1959  		 */
1960  		if (nat->nat_ref > 2) {
1961  			nat->nat_ref -= 2;
1962  			MUTEX_EXIT(&nat->nat_lock);
1963  			if (removed)
1964  				ifs->ifs_nat_stats.ns_orphans++;
1965  			return (nat->nat_ref);
1966  		}
1967  	} else if (nat->nat_ref > 1) {
1968 		nat->nat_ref--;
1969 		MUTEX_EXIT(&nat->nat_lock);
1970  		if (removed)
1971  			ifs->ifs_nat_stats.ns_orphans++;
1972 		return (nat->nat_ref);
1973 	}
1974 	MUTEX_EXIT(&nat->nat_lock);
1975 
1976 	nat->nat_ref = 0;
1977 
1978 	/*
1979 	 * If entry had already been removed,
1980 	 * it means we're cleaning up an orphan.
1981 	 */
1982  	if (!removed)
1983  		ifs->ifs_nat_stats.ns_orphans--;
1984 
1985 #ifdef	IPFILTER_SYNC
1986 	if (nat->nat_sync)
1987 		ipfsync_del(nat->nat_sync);
1988 #endif
1989 
1990 	/*
1991 	 * Now remove it from master list of nat table entries
1992 	 */
1993 	if (nat->nat_pnext != NULL) {
1994 		*nat->nat_pnext = nat->nat_next;
1995 		if (nat->nat_next != NULL) {
1996 			nat->nat_next->nat_pnext = nat->nat_pnext;
1997 			nat->nat_next = NULL;
1998 		}
1999 		nat->nat_pnext = NULL;
2000 	}
2001 
2002 	if (nat->nat_fr != NULL)
2003 		(void)fr_derefrule(&nat->nat_fr, ifs);
2004 
2005 	if (nat->nat_hm != NULL)
2006 		fr_hostmapdel(&nat->nat_hm);
2007 
2008 	/*
2009 	 * If there is an active reference from the nat entry to its parent
2010 	 * rule, decrement the rule's reference count and free it too if no
2011 	 * longer being used.
2012 	 */
2013 	ipn = nat->nat_ptr;
2014 	if (ipn != NULL) {
2015 		ipn->in_space++;
2016 		ipn->in_use--;
2017 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
2018 			if (ipn->in_apr)
2019 				appr_free(ipn->in_apr);
2020 			KFREE(ipn);
2021 			ifs->ifs_nat_stats.ns_rules--;
2022 		}
2023 	}
2024 
2025 	MUTEX_DESTROY(&nat->nat_lock);
2026 
2027 	aps_free(nat->nat_aps, ifs);
2028 	ifs->ifs_nat_stats.ns_inuse--;
2029 
2030 	/*
2031 	 * If there's a fragment table entry too for this nat entry, then
2032 	 * dereference that as well.  This is after nat_lock is released
2033 	 * because of Tru64.
2034 	 */
2035 	fr_forgetnat((void *)nat, ifs);
2036 
2037 	KFREE(nat);
2038 
2039 	return (0);
2040 }
2041 
2042 
2043 /* ------------------------------------------------------------------------ */
2044 /* Function:    nat_clearlist                                               */
2045 /* Returns:     int - number of NAT/RDR rules deleted                       */
2046 /* Parameters:  Nil                                                         */
2047 /*                                                                          */
2048 /* Delete all rules in the current list of rules.  There is nothing elegant */
2049 /* about this cleanup: simply free all entries on the list of rules and     */
2050 /* clear out the tables used for hashed NAT rule lookups.                   */
2051 /* ------------------------------------------------------------------------ */
2052 static int nat_clearlist(ifs)
2053 ipf_stack_t *ifs;
2054 {
2055 	ipnat_t *n, **np = &ifs->ifs_nat_list;
2056 	int i = 0;
2057 
2058 	if (ifs->ifs_nat_rules != NULL)
2059 		bzero((char *)ifs->ifs_nat_rules,
2060 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2061 	if (ifs->ifs_rdr_rules != NULL)
2062 		bzero((char *)ifs->ifs_rdr_rules,
2063 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2064 
2065 	while ((n = *np) != NULL) {
2066 		*np = n->in_next;
2067 		if (n->in_use == 0) {
2068 			if (n->in_apr != NULL)
2069 				appr_free(n->in_apr);
2070 			KFREE(n);
2071 			ifs->ifs_nat_stats.ns_rules--;
2072 		} else {
2073 			n->in_flags |= IPN_DELETE;
2074 			n->in_next = NULL;
2075 		}
2076 		i++;
2077 	}
2078 	ifs->ifs_nat_masks = 0;
2079 	ifs->ifs_rdr_masks = 0;
2080 	for (i = 0; i < 4; i++) {
2081 		ifs->ifs_nat6_masks[i] = 0;
2082 		ifs->ifs_rdr6_masks[i] = 0;
2083 	}
2084 	return i;
2085 }
2086 
2087 
2088 /* ------------------------------------------------------------------------ */
2089 /* Function:    nat_newmap                                                  */
2090 /* Returns:     int - -1 == error, 0 == success                             */
2091 /* Parameters:  fin(I) - pointer to packet information                      */
2092 /*              nat(I) - pointer to NAT entry                               */
2093 /*              ni(I)  - pointer to structure with misc. information needed */
2094 /*                       to create new NAT entry.                           */
2095 /*                                                                          */
2096 /* Given an empty NAT structure, populate it with new information about a   */
2097 /* new NAT session, as defined by the matching NAT rule.                    */
2098 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2099 /* to the new IP address for the translation.                               */
2100 /* ------------------------------------------------------------------------ */
2101 static INLINE int nat_newmap(fin, nat, ni)
2102 fr_info_t *fin;
2103 nat_t *nat;
2104 natinfo_t *ni;
2105 {
2106 	u_short st_port, dport, sport, port, sp, dp;
2107 	struct in_addr in, inb;
2108 	hostmap_t *hm;
2109 	u_32_t flags;
2110 	u_32_t st_ip;
2111 	ipnat_t *np;
2112 	nat_t *natl;
2113 	int l;
2114 	ipf_stack_t *ifs = fin->fin_ifs;
2115 
2116 	/*
2117 	 * If it's an outbound packet which doesn't match any existing
2118 	 * record, then create a new port
2119 	 */
2120 	l = 0;
2121 	hm = NULL;
2122 	np = ni->nai_np;
2123 	st_ip = np->in_nip;
2124 	st_port = np->in_pnext;
2125 	flags = ni->nai_flags;
2126 	sport = ni->nai_sport;
2127 	dport = ni->nai_dport;
2128 
2129 	/*
2130 	 * Do a loop until we either run out of entries to try or we find
2131 	 * a NAT mapping that isn't currently being used.  This is done
2132 	 * because the change to the source is not (usually) being fixed.
2133 	 */
2134 	do {
2135 		port = 0;
2136 		in.s_addr = htonl(np->in_nip);
2137 		if (l == 0) {
2138 			/*
2139 			 * Check to see if there is an existing NAT
2140 			 * setup for this IP address pair.
2141 			 */
2142 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2143 					 in, 0, ifs);
2144 			if (hm != NULL)
2145 				in.s_addr = hm->hm_mapip.s_addr;
2146 		} else if ((l == 1) && (hm != NULL)) {
2147 			fr_hostmapdel(&hm);
2148 		}
2149 		in.s_addr = ntohl(in.s_addr);
2150 
2151 		nat->nat_hm = hm;
2152 
2153 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2154 			if (l > 0)
2155 				return -1;
2156 		}
2157 
2158 		if (np->in_redir == NAT_BIMAP &&
2159 		    np->in_inmsk == np->in_outmsk) {
2160 			/*
2161 			 * map the address block in a 1:1 fashion
2162 			 */
2163 			in.s_addr = np->in_outip;
2164 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2165 			in.s_addr = ntohl(in.s_addr);
2166 
2167 		} else if (np->in_redir & NAT_MAPBLK) {
2168 			if ((l >= np->in_ppip) || ((l > 0) &&
2169 			     !(flags & IPN_TCPUDP)))
2170 				return -1;
2171 			/*
2172 			 * map-block - Calculate destination address.
2173 			 */
2174 			in.s_addr = ntohl(fin->fin_saddr);
2175 			in.s_addr &= ntohl(~np->in_inmsk);
2176 			inb.s_addr = in.s_addr;
2177 			in.s_addr /= np->in_ippip;
2178 			in.s_addr &= ntohl(~np->in_outmsk);
2179 			in.s_addr += ntohl(np->in_outip);
2180 			/*
2181 			 * Calculate destination port.
2182 			 */
2183 			if ((flags & IPN_TCPUDP) &&
2184 			    (np->in_ppip != 0)) {
2185 				port = ntohs(sport) + l;
2186 				port %= np->in_ppip;
2187 				port += np->in_ppip *
2188 					(inb.s_addr % np->in_ippip);
2189 				port += MAPBLK_MINPORT;
2190 				port = htons(port);
2191 			}
2192 
2193 		} else if ((np->in_outip == 0) &&
2194 			   (np->in_outmsk == 0xffffffff)) {
2195 			/*
2196 			 * 0/32 - use the interface's IP address.
2197 			 */
2198 			if ((l > 0) ||
2199 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2200 				       &in, NULL, fin->fin_ifs) == -1)
2201 				return -1;
2202 			in.s_addr = ntohl(in.s_addr);
2203 
2204 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2205 			/*
2206 			 * 0/0 - use the original source address/port.
2207 			 */
2208 			if (l > 0)
2209 				return -1;
2210 			in.s_addr = ntohl(fin->fin_saddr);
2211 
2212 		} else if ((np->in_outmsk != 0xffffffff) &&
2213 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2214 			np->in_nip++;
2215 
2216 		natl = NULL;
2217 
2218 		if ((flags & IPN_TCPUDP) &&
2219 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2220 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2221 			/*
2222 			 * "ports auto" (without map-block)
2223 			 */
2224 			if ((l > 0) && (l % np->in_ppip == 0)) {
2225 				if (l > np->in_space) {
2226 					return -1;
2227 				} else if ((l > np->in_ppip) &&
2228 					   np->in_outmsk != 0xffffffff)
2229 					np->in_nip++;
2230 			}
2231 			if (np->in_ppip != 0) {
2232 				port = ntohs(sport);
2233 				port += (l % np->in_ppip);
2234 				port %= np->in_ppip;
2235 				port += np->in_ppip *
2236 					(ntohl(fin->fin_saddr) %
2237 					 np->in_ippip);
2238 				port += MAPBLK_MINPORT;
2239 				port = htons(port);
2240 			}
2241 
2242 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2243 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2244 			/*
2245 			 * Standard port translation.  Select next port.
2246 			 */
2247 			if (np->in_flags & IPN_SEQUENTIAL) {
2248 				port = np->in_pnext;
2249 			} else {
2250 				port = ipf_random() % (ntohs(np->in_pmax) -
2251 						       ntohs(np->in_pmin));
2252 				port += ntohs(np->in_pmin);
2253 			}
2254 			port = htons(port);
2255 			np->in_pnext++;
2256 
2257 			if (np->in_pnext > ntohs(np->in_pmax)) {
2258 				np->in_pnext = ntohs(np->in_pmin);
2259 				if (np->in_outmsk != 0xffffffff)
2260 					np->in_nip++;
2261 			}
2262 		}
2263 
2264 		if (np->in_flags & IPN_IPRANGE) {
2265 			if (np->in_nip > ntohl(np->in_outmsk))
2266 				np->in_nip = ntohl(np->in_outip);
2267 		} else {
2268 			if ((np->in_outmsk != 0xffffffff) &&
2269 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2270 			    ntohl(np->in_outip))
2271 				np->in_nip = ntohl(np->in_outip) + 1;
2272 		}
2273 
2274 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2275 			port = sport;
2276 
2277 		/*
2278 		 * Here we do a lookup of the connection as seen from
2279 		 * the outside.  If an IP# pair already exists, try
2280 		 * again.  So if you have A->B becomes C->B, you can
2281 		 * also have D->E become C->E but not D->B causing
2282 		 * another C->B.  Also take protocol and ports into
2283 		 * account when determining whether a pre-existing
2284 		 * NAT setup will cause an external conflict where
2285 		 * this is appropriate.
2286 		 */
2287 		inb.s_addr = htonl(in.s_addr);
2288 		sp = fin->fin_data[0];
2289 		dp = fin->fin_data[1];
2290 		fin->fin_data[0] = fin->fin_data[1];
2291 		fin->fin_data[1] = htons(port);
2292 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2293 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2294 		fin->fin_data[0] = sp;
2295 		fin->fin_data[1] = dp;
2296 
2297 		/*
2298 		 * Has the search wrapped around and come back to the
2299 		 * start ?
2300 		 */
2301 		if ((natl != NULL) &&
2302 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2303 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2304 			return -1;
2305 		l++;
2306 	} while (natl != NULL);
2307 
2308 	if (np->in_space > 0)
2309 		np->in_space--;
2310 
2311 	/* Setup the NAT table */
2312 	nat->nat_inip = fin->fin_src;
2313 	nat->nat_outip.s_addr = htonl(in.s_addr);
2314 	nat->nat_oip = fin->fin_dst;
2315 	if (nat->nat_hm == NULL)
2316 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2317 					  nat->nat_outip, 0, ifs);
2318 
2319 	if (flags & IPN_TCPUDP) {
2320 		nat->nat_inport = sport;
2321 		nat->nat_outport = port;	/* sport */
2322 		nat->nat_oport = dport;
2323 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2324 	} else if (flags & IPN_ICMPQUERY) {
2325 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2326 		nat->nat_inport = port;
2327 		nat->nat_outport = port;
2328 	}
2329 
2330 	ni->nai_ip.s_addr = in.s_addr;
2331 	ni->nai_port = port;
2332 	ni->nai_nport = dport;
2333 	return 0;
2334 }
2335 
2336 
2337 /* ------------------------------------------------------------------------ */
2338 /* Function:    nat_newrdr                                                  */
2339 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2340 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2341 /* Parameters:  fin(I) - pointer to packet information                      */
2342 /*              nat(I) - pointer to NAT entry                               */
2343 /*              ni(I)  - pointer to structure with misc. information needed */
2344 /*                       to create new NAT entry.                           */
2345 /*                                                                          */
2346 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2347 /* to the new IP address for the translation.                               */
2348 /* ------------------------------------------------------------------------ */
2349 static INLINE int nat_newrdr(fin, nat, ni)
2350 fr_info_t *fin;
2351 nat_t *nat;
2352 natinfo_t *ni;
2353 {
2354 	u_short nport, dport, sport;
2355 	struct in_addr in, inb;
2356 	u_short sp, dp;
2357 	hostmap_t *hm;
2358 	u_32_t flags;
2359 	ipnat_t *np;
2360 	nat_t *natl;
2361 	int move;
2362 	ipf_stack_t *ifs = fin->fin_ifs;
2363 
2364 	move = 1;
2365 	hm = NULL;
2366 	in.s_addr = 0;
2367 	np = ni->nai_np;
2368 	flags = ni->nai_flags;
2369 	sport = ni->nai_sport;
2370 	dport = ni->nai_dport;
2371 
2372 	/*
2373 	 * If the matching rule has IPN_STICKY set, then we want to have the
2374 	 * same rule kick in as before.  Why would this happen?  If you have
2375 	 * a collection of rdr rules with "round-robin sticky", the current
2376 	 * packet might match a different one to the previous connection but
2377 	 * we want the same destination to be used.
2378 	 */
2379 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2380 	    (IPN_ROUNDR|IPN_STICKY)) {
2381 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2382 				 (u_32_t)dport, ifs);
2383 		if (hm != NULL) {
2384 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2385 			np = hm->hm_ipnat;
2386 			ni->nai_np = np;
2387 			move = 0;
2388 		}
2389 	}
2390 
2391 	/*
2392 	 * Otherwise, it's an inbound packet. Most likely, we don't
2393 	 * want to rewrite source ports and source addresses. Instead,
2394 	 * we want to rewrite to a fixed internal address and fixed
2395 	 * internal port.
2396 	 */
2397 	if (np->in_flags & IPN_SPLIT) {
2398 		in.s_addr = np->in_nip;
2399 
2400 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2401 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2402 					 in, (u_32_t)dport, ifs);
2403 			if (hm != NULL) {
2404 				in.s_addr = hm->hm_mapip.s_addr;
2405 				move = 0;
2406 			}
2407 		}
2408 
2409 		if (hm == NULL || hm->hm_ref == 1) {
2410 			if (np->in_inip == htonl(in.s_addr)) {
2411 				np->in_nip = ntohl(np->in_inmsk);
2412 				move = 0;
2413 			} else {
2414 				np->in_nip = ntohl(np->in_inip);
2415 			}
2416 		}
2417 
2418 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2419 		/*
2420 		 * 0/32 - use the interface's IP address.
2421 		 */
2422 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2423 			   fin->fin_ifs) == -1)
2424 			return -1;
2425 		in.s_addr = ntohl(in.s_addr);
2426 
2427 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2428 		/*
2429 		 * 0/0 - use the original destination address/port.
2430 		 */
2431 		in.s_addr = ntohl(fin->fin_daddr);
2432 
2433 	} else if (np->in_redir == NAT_BIMAP &&
2434 		   np->in_inmsk == np->in_outmsk) {
2435 		/*
2436 		 * map the address block in a 1:1 fashion
2437 		 */
2438 		in.s_addr = np->in_inip;
2439 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2440 		in.s_addr = ntohl(in.s_addr);
2441 	} else {
2442 		in.s_addr = ntohl(np->in_inip);
2443 	}
2444 
2445 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2446 		nport = dport;
2447 	else {
2448 		/*
2449 		 * Whilst not optimized for the case where
2450 		 * pmin == pmax, the gain is not significant.
2451 		 */
2452 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2453 		    (np->in_pmin != np->in_pmax)) {
2454 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2455 				ntohs(np->in_pnext);
2456 			nport = htons(nport);
2457 		} else
2458 			nport = np->in_pnext;
2459 	}
2460 
2461 	/*
2462 	 * When the redirect-to address is set to 0.0.0.0, just
2463 	 * assume a blank `forwarding' of the packet.  We don't
2464 	 * setup any translation for this either.
2465 	 */
2466 	if (in.s_addr == 0) {
2467 		if (nport == dport)
2468 			return -1;
2469 		in.s_addr = ntohl(fin->fin_daddr);
2470 	}
2471 
2472 	/*
2473 	 * Check to see if this redirect mapping already exists and if
2474 	 * it does, return "failure" (allowing it to be created will just
2475 	 * cause one or both of these "connections" to stop working.)
2476 	 */
2477 	inb.s_addr = htonl(in.s_addr);
2478 	sp = fin->fin_data[0];
2479 	dp = fin->fin_data[1];
2480 	fin->fin_data[1] = fin->fin_data[0];
2481 	fin->fin_data[0] = ntohs(nport);
2482 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2483 		    (u_int)fin->fin_p, inb, fin->fin_src);
2484 	fin->fin_data[0] = sp;
2485 	fin->fin_data[1] = dp;
2486 	if (natl != NULL)
2487 		return (-1);
2488 
2489 	nat->nat_inip.s_addr = htonl(in.s_addr);
2490 	nat->nat_outip = fin->fin_dst;
2491 	nat->nat_oip = fin->fin_src;
2492 
2493 	ni->nai_ip.s_addr = in.s_addr;
2494 	ni->nai_nport = nport;
2495 	ni->nai_port = sport;
2496 
2497 	if (flags & IPN_TCPUDP) {
2498 		nat->nat_inport = nport;
2499 		nat->nat_outport = dport;
2500 		nat->nat_oport = sport;
2501 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2502 	} else if (flags & IPN_ICMPQUERY) {
2503 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2504 		nat->nat_inport = nport;
2505 		nat->nat_outport = nport;
2506 	}
2507 
2508 	return move;
2509 }
2510 
2511 /* ------------------------------------------------------------------------ */
2512 /* Function:    nat_new                                                     */
2513 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2514 /*                       else pointer to new NAT structure                  */
2515 /* Parameters:  fin(I)       - pointer to packet information                */
2516 /*              np(I)        - pointer to NAT rule                          */
2517 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2518 /*              flags(I)     - flags describing the current packet          */
2519 /*              direction(I) - direction of packet (in/out)                 */
2520 /* Write Lock:  ipf_nat                                                     */
2521 /*                                                                          */
2522 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2523 /* in any way.                                                              */
2524 /*                                                                          */
2525 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2526 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2527 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2528 /* and (3) building that structure and putting it into the NAT table(s).    */
2529 /* ------------------------------------------------------------------------ */
2530 nat_t *nat_new(fin, np, natsave, flags, direction)
2531 fr_info_t *fin;
2532 ipnat_t *np;
2533 nat_t **natsave;
2534 u_int flags;
2535 int direction;
2536 {
2537 	tcphdr_t *tcp = NULL;
2538 	hostmap_t *hm = NULL;
2539 	nat_t *nat, *natl;
2540 	u_int nflags;
2541 	natinfo_t ni;
2542 	int move;
2543 	ipf_stack_t *ifs = fin->fin_ifs;
2544 
2545 	/*
2546 	 * Trigger automatic call to nat_flushtable() if the
2547 	 * table has reached capcity specified by hi watermark.
2548 	 */
2549 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
2550 		ifs->ifs_nat_doflush = 1;
2551 
2552 	/*
2553 	 * If automatic flushing did not do its job, and the table
2554 	 * has filled up, don't try to create a new entry.
2555 	 */
2556 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2557 		ifs->ifs_nat_stats.ns_memfail++;
2558 		return NULL;
2559 	}
2560 
2561 	move = 1;
2562 	nflags = np->in_flags & flags;
2563 	nflags &= NAT_FROMRULE;
2564 
2565 	ni.nai_np = np;
2566 	ni.nai_nflags = nflags;
2567 	ni.nai_flags = flags;
2568 
2569 	/* Give me a new nat */
2570 	KMALLOC(nat, nat_t *);
2571 	if (nat == NULL) {
2572 		ifs->ifs_nat_stats.ns_memfail++;
2573 		/*
2574 		 * Try to automatically tune the max # of entries in the
2575 		 * table allowed to be less than what will cause kmem_alloc()
2576 		 * to fail and try to eliminate panics due to out of memory
2577 		 * conditions arising.
2578 		 */
2579 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2580 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2581 			printf("ipf_nattable_max reduced to %d\n",
2582 				ifs->ifs_ipf_nattable_max);
2583 		}
2584 		return NULL;
2585 	}
2586 
2587 	if (flags & IPN_TCPUDP) {
2588 		tcp = fin->fin_dp;
2589 		ni.nai_sport = htons(fin->fin_sport);
2590 		ni.nai_dport = htons(fin->fin_dport);
2591 	} else if (flags & IPN_ICMPQUERY) {
2592 		/*
2593 		 * In the ICMP query NAT code, we translate the ICMP id fields
2594 		 * to make them unique. This is indepedent of the ICMP type
2595 		 * (e.g. in the unlikely event that a host sends an echo and
2596 		 * an tstamp request with the same id, both packets will have
2597 		 * their ip address/id field changed in the same way).
2598 		 */
2599 		/* The icmp_id field is used by the sender to identify the
2600 		 * process making the icmp request. (the receiver justs
2601 		 * copies it back in its response). So, it closely matches
2602 		 * the concept of source port. We overlay sport, so we can
2603 		 * maximally reuse the existing code.
2604 		 */
2605 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2606 		ni.nai_dport = ni.nai_sport;
2607 	}
2608 
2609 	bzero((char *)nat, sizeof(*nat));
2610 	nat->nat_flags = flags;
2611 	nat->nat_redir = np->in_redir;
2612 
2613 	if ((flags & NAT_SLAVE) == 0) {
2614 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2615 	}
2616 
2617 	/*
2618 	 * Search the current table for a match.
2619 	 */
2620 	if (direction == NAT_OUTBOUND) {
2621 		/*
2622 		 * We can now arrange to call this for the same connection
2623 		 * because ipf_nat_new doesn't protect the code path into
2624 		 * this function.
2625 		 */
2626 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2627 				     fin->fin_src, fin->fin_dst);
2628 		if (natl != NULL) {
2629 			KFREE(nat);
2630 			nat = natl;
2631 			goto done;
2632 		}
2633 
2634 		move = nat_newmap(fin, nat, &ni);
2635 		if (move == -1)
2636 			goto badnat;
2637 
2638 		np = ni.nai_np;
2639 	} else {
2640 		/*
2641 		 * NAT_INBOUND is used only for redirects rules
2642 		 */
2643 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2644 				    fin->fin_src, fin->fin_dst);
2645 		if (natl != NULL) {
2646 			KFREE(nat);
2647 			nat = natl;
2648 			goto done;
2649 		}
2650 
2651 		move = nat_newrdr(fin, nat, &ni);
2652 		if (move == -1)
2653 			goto badnat;
2654 
2655 		np = ni.nai_np;
2656 	}
2657 
2658 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2659 		if (np->in_redir == NAT_REDIRECT) {
2660 			nat_delrdr(np);
2661 			nat_addrdr(np, ifs);
2662 		} else if (np->in_redir == NAT_MAP) {
2663 			nat_delnat(np);
2664 			nat_addnat(np, ifs);
2665 		}
2666 	}
2667 
2668 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2669 		goto badnat;
2670 	}
2671 
2672 	nat_calc_chksum_diffs(nat);
2673 
2674 	if (flags & SI_WILDP)
2675 		ifs->ifs_nat_stats.ns_wilds++;
2676 	fin->fin_flx |= FI_NEWNAT;
2677 	goto done;
2678 badnat:
2679 	ifs->ifs_nat_stats.ns_badnat++;
2680 	if ((hm = nat->nat_hm) != NULL)
2681 		fr_hostmapdel(&hm);
2682 	KFREE(nat);
2683 	nat = NULL;
2684 done:
2685 	if ((flags & NAT_SLAVE) == 0) {
2686 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2687 	}
2688 	return nat;
2689 }
2690 
2691 
2692 /* ------------------------------------------------------------------------ */
2693 /* Function:    nat_finalise                                                */
2694 /* Returns:     int - 0 == sucess, -1 == failure                            */
2695 /* Parameters:  fin(I) - pointer to packet information                      */
2696 /*              nat(I) - pointer to NAT entry                               */
2697 /*              ni(I)  - pointer to structure with misc. information needed */
2698 /*                       to create new NAT entry.                           */
2699 /* Write Lock:  ipf_nat                                                     */
2700 /*                                                                          */
2701 /* This is the tail end of constructing a new NAT entry and is the same     */
2702 /* for both IPv4 and IPv6.                                                  */
2703 /* ------------------------------------------------------------------------ */
2704 /*ARGSUSED*/
2705 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2706 fr_info_t *fin;
2707 nat_t *nat;
2708 natinfo_t *ni;
2709 tcphdr_t *tcp;
2710 nat_t **natsave;
2711 int direction;
2712 {
2713 	frentry_t *fr;
2714 	ipnat_t *np;
2715 	ipf_stack_t *ifs = fin->fin_ifs;
2716 
2717 	np = ni->nai_np;
2718 
2719 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2720 
2721 #ifdef	IPFILTER_SYNC
2722 	if ((nat->nat_flags & SI_CLONE) == 0)
2723 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2724 #endif
2725 
2726 	nat->nat_me = natsave;
2727 	nat->nat_dir = direction;
2728 	nat->nat_ifps[0] = np->in_ifps[0];
2729 	nat->nat_ifps[1] = np->in_ifps[1];
2730 	nat->nat_ptr = np;
2731 	nat->nat_p = fin->fin_p;
2732 	nat->nat_v = fin->fin_v;
2733 	nat->nat_mssclamp = np->in_mssclamp;
2734 	fr = fin->fin_fr;
2735 	nat->nat_fr = fr;
2736 
2737 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2738 		if (appr_new(fin, nat) == -1)
2739 			return -1;
2740 
2741 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2742 		if (ifs->ifs_nat_logging)
2743 			nat_log(nat, (u_int)np->in_redir, ifs);
2744 		np->in_use++;
2745 		if (fr != NULL) {
2746 			MUTEX_ENTER(&fr->fr_lock);
2747 			fr->fr_ref++;
2748 			MUTEX_EXIT(&fr->fr_lock);
2749 		}
2750 		return 0;
2751 	}
2752 
2753 	/*
2754 	 * nat_insert failed, so cleanup time...
2755 	 */
2756 	return -1;
2757 }
2758 
2759 
2760 /* ------------------------------------------------------------------------ */
2761 /* Function:   nat_insert                                                   */
2762 /* Returns:    int - 0 == sucess, -1 == failure                             */
2763 /* Parameters: nat(I) - pointer to NAT structure                            */
2764 /*             rev(I) - flag indicating forward/reverse direction of packet */
2765 /* Write Lock: ipf_nat                                                      */
2766 /*                                                                          */
2767 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2768 /* list of active NAT entries.  Adjust global counters when complete.       */
2769 /* ------------------------------------------------------------------------ */
2770 int	nat_insert(nat, rev, ifs)
2771 nat_t	*nat;
2772 int	rev;
2773 ipf_stack_t *ifs;
2774 {
2775 	u_int hv1, hv2;
2776 	nat_t **natp;
2777 
2778 	/*
2779 	 * Try and return an error as early as possible, so calculate the hash
2780 	 * entry numbers first and then proceed.
2781 	 */
2782 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2783 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2784 				  0xffffffff);
2785 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2786 				  ifs->ifs_ipf_nattable_sz);
2787 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2788 				  0xffffffff);
2789 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2790 				  ifs->ifs_ipf_nattable_sz);
2791 	} else {
2792 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2793 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2794 				  ifs->ifs_ipf_nattable_sz);
2795 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2796 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2797 				  ifs->ifs_ipf_nattable_sz);
2798 	}
2799 
2800 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2801 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2802 		return -1;
2803 	}
2804 
2805 	nat->nat_hv[0] = hv1;
2806 	nat->nat_hv[1] = hv2;
2807 
2808 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2809 
2810 	nat->nat_rev = rev;
2811 	nat->nat_ref = 1;
2812 	nat->nat_bytes[0] = 0;
2813 	nat->nat_pkts[0] = 0;
2814 	nat->nat_bytes[1] = 0;
2815 	nat->nat_pkts[1] = 0;
2816 
2817 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2818 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2819 
2820 	if (nat->nat_ifnames[1][0] !='\0') {
2821 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2822 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2823 	} else {
2824 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2825 			       LIFNAMSIZ);
2826 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2827 		nat->nat_ifps[1] = nat->nat_ifps[0];
2828 	}
2829 
2830 	nat->nat_next = ifs->ifs_nat_instances;
2831 	nat->nat_pnext = &ifs->ifs_nat_instances;
2832 	if (ifs->ifs_nat_instances)
2833 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2834 	ifs->ifs_nat_instances = nat;
2835 
2836 	natp = &ifs->ifs_nat_table[0][hv1];
2837 	if (*natp)
2838 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2839 	nat->nat_phnext[0] = natp;
2840 	nat->nat_hnext[0] = *natp;
2841 	*natp = nat;
2842 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2843 
2844 	natp = &ifs->ifs_nat_table[1][hv2];
2845 	if (*natp)
2846 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2847 	nat->nat_phnext[1] = natp;
2848 	nat->nat_hnext[1] = *natp;
2849 	*natp = nat;
2850 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2851 
2852 	fr_setnatqueue(nat, rev, ifs);
2853 
2854 	ifs->ifs_nat_stats.ns_added++;
2855 	ifs->ifs_nat_stats.ns_inuse++;
2856 	return 0;
2857 }
2858 
2859 
2860 /* ------------------------------------------------------------------------ */
2861 /* Function:    nat_icmperrorlookup                                         */
2862 /* Returns:     nat_t* - point to matching NAT structure                    */
2863 /* Parameters:  fin(I) - pointer to packet information                      */
2864 /*              dir(I) - direction of packet (in/out)                       */
2865 /*                                                                          */
2866 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2867 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2868 /* the required length.                                                     */
2869 /* ------------------------------------------------------------------------ */
2870 nat_t *nat_icmperrorlookup(fin, dir)
2871 fr_info_t *fin;
2872 int dir;
2873 {
2874 	int flags = 0, minlen;
2875 	icmphdr_t *orgicmp;
2876 	tcphdr_t *tcp = NULL;
2877 	u_short data[2];
2878 	nat_t *nat;
2879 	ip_t *oip;
2880 	u_int p;
2881 
2882 	/*
2883 	 * Does it at least have the return (basic) IP header ?
2884 	 * Only a basic IP header (no options) should be with an ICMP error
2885 	 * header.  Also, if it's not an error type, then return.
2886 	 */
2887 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2888 		return NULL;
2889 
2890 	/*
2891 	 * Check packet size
2892 	 */
2893 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2894 	minlen = IP_HL(oip) << 2;
2895 	if ((minlen < sizeof(ip_t)) ||
2896 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2897 		return NULL;
2898 	/*
2899 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2900 	 * header claimed in the encapsulated part which is of concern.  It
2901 	 * may be too big to be in this buffer but not so big that it's
2902 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2903 	 * This is possible because we don't know how big oip_hl is when we
2904 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2905 	 * all here now.
2906 	 */
2907 #ifdef  _KERNEL
2908 	{
2909 	mb_t *m;
2910 
2911 	m = fin->fin_m;
2912 # if defined(MENTAT)
2913 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2914 		return NULL;
2915 # else
2916 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2917 	    (char *)fin->fin_ip + M_LEN(m))
2918 		return NULL;
2919 # endif
2920 	}
2921 #endif
2922 
2923 	if (fin->fin_daddr != oip->ip_src.s_addr)
2924 		return NULL;
2925 
2926 	p = oip->ip_p;
2927 	if (p == IPPROTO_TCP)
2928 		flags = IPN_TCP;
2929 	else if (p == IPPROTO_UDP)
2930 		flags = IPN_UDP;
2931 	else if (p == IPPROTO_ICMP) {
2932 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2933 
2934 		/* see if this is related to an ICMP query */
2935 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2936 			data[0] = fin->fin_data[0];
2937 			data[1] = fin->fin_data[1];
2938 			fin->fin_data[0] = 0;
2939 			fin->fin_data[1] = orgicmp->icmp_id;
2940 
2941 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2942 			/*
2943 			 * NOTE : dir refers to the direction of the original
2944 			 *        ip packet. By definition the icmp error
2945 			 *        message flows in the opposite direction.
2946 			 */
2947 			if (dir == NAT_INBOUND)
2948 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2949 						   oip->ip_src);
2950 			else
2951 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2952 						    oip->ip_src);
2953 			fin->fin_data[0] = data[0];
2954 			fin->fin_data[1] = data[1];
2955 			return nat;
2956 		}
2957 	}
2958 
2959 	if (flags & IPN_TCPUDP) {
2960 		minlen += 8;		/* + 64bits of data to get ports */
2961 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2962 			return NULL;
2963 
2964 		data[0] = fin->fin_data[0];
2965 		data[1] = fin->fin_data[1];
2966 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2967 		fin->fin_data[0] = ntohs(tcp->th_dport);
2968 		fin->fin_data[1] = ntohs(tcp->th_sport);
2969 
2970 		if (dir == NAT_INBOUND) {
2971 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2972 					   oip->ip_src);
2973 		} else {
2974 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2975 					    oip->ip_src);
2976 		}
2977 		fin->fin_data[0] = data[0];
2978 		fin->fin_data[1] = data[1];
2979 		return nat;
2980 	}
2981 	if (dir == NAT_INBOUND)
2982 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2983 	else
2984 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2985 }
2986 
2987 
2988 /* ------------------------------------------------------------------------ */
2989 /* Function:    nat_icmperror                                               */
2990 /* Returns:     nat_t* - point to matching NAT structure                    */
2991 /* Parameters:  fin(I)    - pointer to packet information                   */
2992 /*              nflags(I) - NAT flags for this packet                       */
2993 /*              dir(I)    - direction of packet (in/out)                    */
2994 /*                                                                          */
2995 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2996 /* session.  This will correct both packet header data and checksums.       */
2997 /*                                                                          */
2998 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2999 /* a NAT'd ICMP packet gets correctly recognised.                           */
3000 /* ------------------------------------------------------------------------ */
3001 nat_t *nat_icmperror(fin, nflags, dir)
3002 fr_info_t *fin;
3003 u_int *nflags;
3004 int dir;
3005 {
3006 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
3007 	struct in_addr in;
3008 	icmphdr_t *icmp, *orgicmp;
3009 	int dlen;
3010 	udphdr_t *udp;
3011 	tcphdr_t *tcp;
3012 	nat_t *nat;
3013 	ip_t *oip;
3014 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
3015 		return NULL;
3016 
3017 	/*
3018 	 * nat_icmperrorlookup() looks up nat entry associated with the
3019 	 * offending IP packet and returns pointer to the entry, or NULL
3020 	 * if packet wasn't natted or for `defective' packets.
3021 	 */
3022 
3023 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3024 		return NULL;
3025 
3026 	sumd2 = 0;
3027 	*nflags = IPN_ICMPERR;
3028 	icmp = fin->fin_dp;
3029 	oip = (ip_t *)&icmp->icmp_ip;
3030 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3031 	tcp = (tcphdr_t *)udp;
3032 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3033 
3034 	/*
3035 	 * Need to adjust ICMP header to include the real IP#'s and
3036 	 * port #'s.  There are three steps required.
3037 	 *
3038 	 * Step 1
3039 	 * Fix the IP addresses in the offending IP packet and update
3040 	 * ip header checksum to compensate for the change.
3041 	 *
3042 	 * No update needed here for icmp_cksum because the ICMP checksum
3043 	 * is calculated over the complete ICMP packet, which includes the
3044 	 * changed oip IP addresses and oip->ip_sum.  These two changes
3045 	 * cancel each other out (if the delta for the IP address is x,
3046 	 * then the delta for ip_sum is minus x).
3047 	 */
3048 
3049 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3050 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3051 		in = nat->nat_inip;
3052 		oip->ip_src = in;
3053 	} else {
3054 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3055 		in = nat->nat_outip;
3056 		oip->ip_dst = in;
3057 	}
3058 
3059 	sum2 = LONG_SUM(ntohl(in.s_addr));
3060 	CALC_SUMD(sum1, sum2, sumd);
3061 	fix_datacksum(&oip->ip_sum, sumd);
3062 
3063 	/*
3064 	 * Step 2
3065 	 * Perform other adjustments based on protocol of offending packet.
3066 	 */
3067 
3068 	switch (oip->ip_p) {
3069 		case IPPROTO_TCP :
3070 		case IPPROTO_UDP :
3071 
3072 			/*
3073 			* For offending TCP/UDP IP packets, translate the ports
3074 			* based on the NAT specification.
3075 			*
3076 			* Advance notice : Now it becomes complicated :-)
3077 			*
3078 			* Since the port and IP addresse fields are both part
3079 			* of the TCP/UDP checksum of the offending IP packet,
3080 			* we need to adjust that checksum as well.
3081 			*
3082 			* To further complicate things, the TCP/UDP checksum
3083 			* may not be present.  We must check to see if the
3084 			* length of the data portion is big enough to hold
3085 			* the checksum.  In the UDP case, a test to determine
3086 			* if the checksum is even set is also required.
3087 			*
3088 			* Any changes to an IP address, port or checksum within
3089 			* the ICMP packet requires a change to icmp_cksum.
3090 			*
3091 			* Be extremely careful here ... The change is dependent
3092 			* upon whether or not the TCP/UPD checksum is present.
3093 			*
3094 			* If TCP/UPD checksum is present, the icmp_cksum must
3095 			* compensate for checksum modification resulting from
3096 			* IP address change only.  Port change and resulting
3097 			* data checksum adjustments cancel each other out.
3098 			*
3099 			* If TCP/UDP checksum is not present, icmp_cksum must
3100 			* compensate for port change only.  The IP address
3101 			* change does not modify anything else in this case.
3102 			*/
3103 
3104 			psum1 = 0;
3105 			psum2 = 0;
3106 			psumd = 0;
3107 
3108 			if ((tcp->th_dport == nat->nat_oport) &&
3109 			    (tcp->th_sport != nat->nat_inport)) {
3110 
3111 				/*
3112 				 * Translate the source port.
3113 				 */
3114 
3115 				psum1 = ntohs(tcp->th_sport);
3116 				psum2 = ntohs(nat->nat_inport);
3117 				tcp->th_sport = nat->nat_inport;
3118 
3119 			} else if ((tcp->th_sport == nat->nat_oport) &&
3120 				    (tcp->th_dport != nat->nat_outport)) {
3121 
3122 				/*
3123 				 * Translate the destination port.
3124 				 */
3125 
3126 				psum1 = ntohs(tcp->th_dport);
3127 				psum2 = ntohs(nat->nat_outport);
3128 				tcp->th_dport = nat->nat_outport;
3129 			}
3130 
3131 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3132 
3133 				/*
3134 				 * TCP checksum present.
3135 				 *
3136 				 * Adjust data checksum and icmp checksum to
3137 				 * compensate for any IP address change.
3138 				 */
3139 
3140 				sum1 = ntohs(tcp->th_sum);
3141 				fix_datacksum(&tcp->th_sum, sumd);
3142 				sum2 = ntohs(tcp->th_sum);
3143 				sumd2 = sumd << 1;
3144 				CALC_SUMD(sum1, sum2, sumd);
3145 				sumd2 += sumd;
3146 
3147 				/*
3148 				 * Also make data checksum adjustment to
3149 				 * compensate for any port change.
3150 				 */
3151 
3152 				if (psum1 != psum2) {
3153 					CALC_SUMD(psum1, psum2, psumd);
3154 					fix_datacksum(&tcp->th_sum, psumd);
3155 				}
3156 
3157 			} else if ((oip->ip_p == IPPROTO_UDP) &&
3158 				   (dlen >= 8) && (udp->uh_sum != 0)) {
3159 
3160 				/*
3161 				 * The UDP checksum is present and set.
3162 				 *
3163 				 * Adjust data checksum and icmp checksum to
3164 				 * compensate for any IP address change.
3165 				 */
3166 
3167 				sum1 = ntohs(udp->uh_sum);
3168 				fix_datacksum(&udp->uh_sum, sumd);
3169 				sum2 = ntohs(udp->uh_sum);
3170 				sumd2 = sumd << 1;
3171 				CALC_SUMD(sum1, sum2, sumd);
3172 				sumd2 += sumd;
3173 
3174 				/*
3175 				 * Also make data checksum adjustment to
3176 				 * compensate for any port change.
3177 				 */
3178 
3179 				if (psum1 != psum2) {
3180 					CALC_SUMD(psum1, psum2, psumd);
3181 					fix_datacksum(&udp->uh_sum, psumd);
3182 				}
3183 
3184 			} else {
3185 
3186 				/*
3187 				 * Data checksum was not present.
3188 				 *
3189 				 * Compensate for any port change.
3190 				 */
3191 
3192 				CALC_SUMD(psum2, psum1, psumd);
3193 				sumd2 += psumd;
3194 			}
3195 			break;
3196 
3197 		case IPPROTO_ICMP :
3198 
3199 			orgicmp = (icmphdr_t *)udp;
3200 
3201 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3202 			    (orgicmp->icmp_id != nat->nat_inport) &&
3203 			    (dlen >= 8)) {
3204 
3205 				/*
3206 				 * Fix ICMP checksum (of the offening ICMP
3207 				 * query packet) to compensate the change
3208 				 * in the ICMP id of the offending ICMP
3209 				 * packet.
3210 				 *
3211 				 * Since you modify orgicmp->icmp_id with
3212 				 * a delta (say x) and you compensate that
3213 				 * in origicmp->icmp_cksum with a delta
3214 				 * minus x, you don't have to adjust the
3215 				 * overall icmp->icmp_cksum
3216 				 */
3217 
3218 				sum1 = ntohs(orgicmp->icmp_id);
3219 				sum2 = ntohs(nat->nat_inport);
3220 				CALC_SUMD(sum1, sum2, sumd);
3221 				orgicmp->icmp_id = nat->nat_inport;
3222 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3223 
3224 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3225 
3226 			break;
3227 
3228 		default :
3229 
3230 			break;
3231 
3232 	} /* switch (oip->ip_p) */
3233 
3234 	/*
3235 	 * Step 3
3236 	 * Make the adjustments to icmp checksum.
3237 	 */
3238 
3239 	if (sumd2 != 0) {
3240 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3241 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3242 		fix_incksum(&icmp->icmp_cksum, sumd2);
3243 	}
3244 	return nat;
3245 }
3246 
3247 
3248 /*
3249  * NB: these lookups don't lock access to the list, it assumed that it has
3250  * already been done!
3251  */
3252 
3253 /* ------------------------------------------------------------------------ */
3254 /* Function:    nat_inlookup                                                */
3255 /* Returns:     nat_t* - NULL == no match,                                  */
3256 /*                       else pointer to matching NAT entry                 */
3257 /* Parameters:  fin(I)    - pointer to packet information                   */
3258 /*              flags(I)  - NAT flags for this packet                       */
3259 /*              p(I)      - protocol for this packet                        */
3260 /*              src(I)    - source IP address                               */
3261 /*              mapdst(I) - destination IP address                          */
3262 /*                                                                          */
3263 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3264 /* real source address/port.  We use this lookup when receiving a packet,   */
3265 /* we're looking for a table entry, based on the destination address.       */
3266 /*                                                                          */
3267 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3268 /*                                                                          */
3269 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3270 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3271 /*                                                                          */
3272 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3273 /*            the packet is of said protocol                                */
3274 /* ------------------------------------------------------------------------ */
3275 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3276 fr_info_t *fin;
3277 u_int flags, p;
3278 struct in_addr src , mapdst;
3279 {
3280 	u_short sport, dport;
3281 	ipnat_t *ipn;
3282 	u_int sflags;
3283 	nat_t *nat;
3284 	int nflags;
3285 	u_32_t dst;
3286 	void *ifp;
3287 	u_int hv;
3288 	ipf_stack_t *ifs = fin->fin_ifs;
3289 
3290 	if (fin != NULL)
3291 		ifp = fin->fin_ifp;
3292 	else
3293 		ifp = NULL;
3294 	sport = 0;
3295 	dport = 0;
3296 	dst = mapdst.s_addr;
3297 	sflags = flags & NAT_TCPUDPICMP;
3298 
3299 	switch (p)
3300 	{
3301 	case IPPROTO_TCP :
3302 	case IPPROTO_UDP :
3303 		sport = htons(fin->fin_data[0]);
3304 		dport = htons(fin->fin_data[1]);
3305 		break;
3306 	case IPPROTO_ICMP :
3307 		if (flags & IPN_ICMPERR)
3308 			sport = fin->fin_data[1];
3309 		else
3310 			dport = fin->fin_data[1];
3311 		break;
3312 	default :
3313 		break;
3314 	}
3315 
3316 
3317 	if ((flags & SI_WILDP) != 0)
3318 		goto find_in_wild_ports;
3319 
3320 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3321 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3322 	nat = ifs->ifs_nat_table[1][hv];
3323 	for (; nat; nat = nat->nat_hnext[1]) {
3324 		if (nat->nat_v != 4)
3325 			continue;
3326 
3327 		if (nat->nat_ifps[0] != NULL) {
3328 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3329 				continue;
3330 		} else if (ifp != NULL)
3331 			nat->nat_ifps[0] = ifp;
3332 
3333 		nflags = nat->nat_flags;
3334 
3335 		if (nat->nat_oip.s_addr == src.s_addr &&
3336 		    nat->nat_outip.s_addr == dst &&
3337 		    (((p == 0) &&
3338 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3339 		     || (p == nat->nat_p))) {
3340 			switch (p)
3341 			{
3342 #if 0
3343 			case IPPROTO_GRE :
3344 				if (nat->nat_call[1] != fin->fin_data[0])
3345 					continue;
3346 				break;
3347 #endif
3348 			case IPPROTO_ICMP :
3349 				if ((flags & IPN_ICMPERR) != 0) {
3350 					if (nat->nat_outport != sport)
3351 						continue;
3352 				} else {
3353 					if (nat->nat_outport != dport)
3354 						continue;
3355 				}
3356 				break;
3357 			case IPPROTO_TCP :
3358 			case IPPROTO_UDP :
3359 				if (nat->nat_oport != sport)
3360 					continue;
3361 				if (nat->nat_outport != dport)
3362 					continue;
3363 				break;
3364 			default :
3365 				break;
3366 			}
3367 
3368 			ipn = nat->nat_ptr;
3369 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3370 				if (appr_match(fin, nat) != 0)
3371 					continue;
3372 			return nat;
3373 		}
3374 	}
3375 
3376 	/*
3377 	 * So if we didn't find it but there are wildcard members in the hash
3378 	 * table, go back and look for them.  We do this search and update here
3379 	 * because it is modifying the NAT table and we want to do this only
3380 	 * for the first packet that matches.  The exception, of course, is
3381 	 * for "dummy" (FI_IGNORE) lookups.
3382 	 */
3383 find_in_wild_ports:
3384 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3385 		return NULL;
3386 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3387 		return NULL;
3388 
3389 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3390 
3391 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3392 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3393 
3394 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3395 
3396 	nat = ifs->ifs_nat_table[1][hv];
3397 	for (; nat; nat = nat->nat_hnext[1]) {
3398 		if (nat->nat_v != 4)
3399 			continue;
3400 
3401 		if (nat->nat_ifps[0] != NULL) {
3402 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3403 				continue;
3404 		} else if (ifp != NULL)
3405 			nat->nat_ifps[0] = ifp;
3406 
3407 		if (nat->nat_p != fin->fin_p)
3408 			continue;
3409 		if (nat->nat_oip.s_addr != src.s_addr ||
3410 		    nat->nat_outip.s_addr != dst)
3411 			continue;
3412 
3413 		nflags = nat->nat_flags;
3414 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3415 			continue;
3416 
3417 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3418 			       NAT_INBOUND) == 1) {
3419 			if ((fin->fin_flx & FI_IGNORE) != 0)
3420 				break;
3421 			if ((nflags & SI_CLONE) != 0) {
3422 				nat = fr_natclone(fin, nat);
3423 				if (nat == NULL)
3424 					break;
3425 			} else {
3426 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3427 				ifs->ifs_nat_stats.ns_wilds--;
3428 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3429 			}
3430 			nat->nat_oport = sport;
3431 			nat->nat_outport = dport;
3432 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3433 			nat_tabmove(nat, ifs);
3434 			break;
3435 		}
3436 	}
3437 
3438 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3439 
3440 	return nat;
3441 }
3442 
3443 
3444 /* ------------------------------------------------------------------------ */
3445 /* Function:    nat_tabmove                                                 */
3446 /* Returns:     Nil                                                         */
3447 /* Parameters:  nat(I) - pointer to NAT structure                           */
3448 /* Write Lock:  ipf_nat                                                     */
3449 /*                                                                          */
3450 /* This function is only called for TCP/UDP NAT table entries where the     */
3451 /* original was placed in the table without hashing on the ports and we now */
3452 /* want to include hashing on port numbers.                                 */
3453 /* ------------------------------------------------------------------------ */
3454 static void nat_tabmove(nat, ifs)
3455 nat_t *nat;
3456 ipf_stack_t *ifs;
3457 {
3458 	nat_t **natp;
3459 	u_int hv;
3460 
3461 	if (nat->nat_flags & SI_CLONE)
3462 		return;
3463 
3464 	/*
3465 	 * Remove the NAT entry from the old location
3466 	 */
3467 	if (nat->nat_hnext[0])
3468 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3469 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3470 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3471 
3472 	if (nat->nat_hnext[1])
3473 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3474 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3475 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3476 
3477 	/*
3478 	 * Add into the NAT table in the new position
3479 	 */
3480 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3481 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3482 			 ifs->ifs_ipf_nattable_sz);
3483 	nat->nat_hv[0] = hv;
3484 	natp = &ifs->ifs_nat_table[0][hv];
3485 	if (*natp)
3486 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3487 	nat->nat_phnext[0] = natp;
3488 	nat->nat_hnext[0] = *natp;
3489 	*natp = nat;
3490 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3491 
3492 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3493 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3494 			 ifs->ifs_ipf_nattable_sz);
3495 	nat->nat_hv[1] = hv;
3496 	natp = &ifs->ifs_nat_table[1][hv];
3497 	if (*natp)
3498 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3499 	nat->nat_phnext[1] = natp;
3500 	nat->nat_hnext[1] = *natp;
3501 	*natp = nat;
3502 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3503 }
3504 
3505 
3506 /* ------------------------------------------------------------------------ */
3507 /* Function:    nat_outlookup                                               */
3508 /* Returns:     nat_t* - NULL == no match,                                  */
3509 /*                       else pointer to matching NAT entry                 */
3510 /* Parameters:  fin(I)   - pointer to packet information                    */
3511 /*              flags(I) - NAT flags for this packet                        */
3512 /*              p(I)     - protocol for this packet                         */
3513 /*              src(I)   - source IP address                                */
3514 /*              dst(I)   - destination IP address                           */
3515 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3516 /*                                                                          */
3517 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3518 /* destination address/port.  We use this lookup when sending a packet out, */
3519 /* we're looking for a table entry, based on the source address.            */
3520 /*                                                                          */
3521 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3522 /*                                                                          */
3523 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3524 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3525 /*                                                                          */
3526 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3527 /*            the packet is of said protocol                                */
3528 /* ------------------------------------------------------------------------ */
3529 nat_t *nat_outlookup(fin, flags, p, src, dst)
3530 fr_info_t *fin;
3531 u_int flags, p;
3532 struct in_addr src , dst;
3533 {
3534 	u_short sport, dport;
3535 	u_int sflags;
3536 	ipnat_t *ipn;
3537 	u_32_t srcip;
3538 	nat_t *nat;
3539 	int nflags;
3540 	void *ifp;
3541 	u_int hv;
3542 	ipf_stack_t *ifs = fin->fin_ifs;
3543 
3544 	ifp = fin->fin_ifp;
3545 
3546 	srcip = src.s_addr;
3547 	sflags = flags & IPN_TCPUDPICMP;
3548 	sport = 0;
3549 	dport = 0;
3550 
3551 	switch (p)
3552 	{
3553 	case IPPROTO_TCP :
3554 	case IPPROTO_UDP :
3555 		sport = htons(fin->fin_data[0]);
3556 		dport = htons(fin->fin_data[1]);
3557 		break;
3558 	case IPPROTO_ICMP :
3559 		if (flags & IPN_ICMPERR)
3560 			sport = fin->fin_data[1];
3561 		else
3562 			dport = fin->fin_data[1];
3563 		break;
3564 	default :
3565 		break;
3566 	}
3567 
3568 	if ((flags & SI_WILDP) != 0)
3569 		goto find_out_wild_ports;
3570 
3571 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3572 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3573 	nat = ifs->ifs_nat_table[0][hv];
3574 	for (; nat; nat = nat->nat_hnext[0]) {
3575 		if (nat->nat_v != 4)
3576 			continue;
3577 
3578 		if (nat->nat_ifps[1] != NULL) {
3579 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3580 				continue;
3581 		} else if (ifp != NULL)
3582 			nat->nat_ifps[1] = ifp;
3583 
3584 		nflags = nat->nat_flags;
3585 
3586 		if (nat->nat_inip.s_addr == srcip &&
3587 		    nat->nat_oip.s_addr == dst.s_addr &&
3588 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3589 		     || (p == nat->nat_p))) {
3590 			switch (p)
3591 			{
3592 #if 0
3593 			case IPPROTO_GRE :
3594 				if (nat->nat_call[1] != fin->fin_data[0])
3595 					continue;
3596 				break;
3597 #endif
3598 			case IPPROTO_TCP :
3599 			case IPPROTO_UDP :
3600 				if (nat->nat_oport != dport)
3601 					continue;
3602 				if (nat->nat_inport != sport)
3603 					continue;
3604 				break;
3605 			default :
3606 				break;
3607 			}
3608 
3609 			ipn = nat->nat_ptr;
3610 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3611 				if (appr_match(fin, nat) != 0)
3612 					continue;
3613 			return nat;
3614 		}
3615 	}
3616 
3617 	/*
3618 	 * So if we didn't find it but there are wildcard members in the hash
3619 	 * table, go back and look for them.  We do this search and update here
3620 	 * because it is modifying the NAT table and we want to do this only
3621 	 * for the first packet that matches.  The exception, of course, is
3622 	 * for "dummy" (FI_IGNORE) lookups.
3623 	 */
3624 find_out_wild_ports:
3625 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3626 		return NULL;
3627 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3628 		return NULL;
3629 
3630 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3631 
3632 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3633 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3634 
3635 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3636 
3637 	nat = ifs->ifs_nat_table[0][hv];
3638 	for (; nat; nat = nat->nat_hnext[0]) {
3639 		if (nat->nat_v != 4)
3640 			continue;
3641 
3642 		if (nat->nat_ifps[1] != NULL) {
3643 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3644 				continue;
3645 		} else if (ifp != NULL)
3646 			nat->nat_ifps[1] = ifp;
3647 
3648 		if (nat->nat_p != fin->fin_p)
3649 			continue;
3650 		if ((nat->nat_inip.s_addr != srcip) ||
3651 		    (nat->nat_oip.s_addr != dst.s_addr))
3652 			continue;
3653 
3654 		nflags = nat->nat_flags;
3655 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3656 			continue;
3657 
3658 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3659 			       NAT_OUTBOUND) == 1) {
3660 			if ((fin->fin_flx & FI_IGNORE) != 0)
3661 				break;
3662 			if ((nflags & SI_CLONE) != 0) {
3663 				nat = fr_natclone(fin, nat);
3664 				if (nat == NULL)
3665 					break;
3666 			} else {
3667 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3668 				ifs->ifs_nat_stats.ns_wilds--;
3669 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3670 			}
3671 			nat->nat_inport = sport;
3672 			nat->nat_oport = dport;
3673 			if (nat->nat_outport == 0)
3674 				nat->nat_outport = sport;
3675 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3676 			nat_tabmove(nat, ifs);
3677 			break;
3678 		}
3679 	}
3680 
3681 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3682 
3683 	return nat;
3684 }
3685 
3686 
3687 /* ------------------------------------------------------------------------ */
3688 /* Function:    nat_lookupredir                                             */
3689 /* Returns:     nat_t* - NULL == no match,                                  */
3690 /*                       else pointer to matching NAT entry                 */
3691 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3692 /*                      entry for.                                          */
3693 /*                                                                          */
3694 /* Lookup the NAT tables to search for a matching redirect                  */
3695 /* ------------------------------------------------------------------------ */
3696 nat_t *nat_lookupredir(np, ifs)
3697 natlookup_t *np;
3698 ipf_stack_t *ifs;
3699 {
3700 	fr_info_t fi;
3701 	nat_t *nat;
3702 
3703 	bzero((char *)&fi, sizeof(fi));
3704 	if (np->nl_flags & IPN_IN) {
3705 		fi.fin_data[0] = ntohs(np->nl_realport);
3706 		fi.fin_data[1] = ntohs(np->nl_outport);
3707 	} else {
3708 		fi.fin_data[0] = ntohs(np->nl_inport);
3709 		fi.fin_data[1] = ntohs(np->nl_outport);
3710 	}
3711 	if (np->nl_flags & IPN_TCP)
3712 		fi.fin_p = IPPROTO_TCP;
3713 	else if (np->nl_flags & IPN_UDP)
3714 		fi.fin_p = IPPROTO_UDP;
3715 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3716 		fi.fin_p = IPPROTO_ICMP;
3717 
3718 	fi.fin_ifs = ifs;
3719 	/*
3720 	 * We can do two sorts of lookups:
3721 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3722 	 * - default: we have the `in' and `out' address, look for `real'.
3723 	 */
3724 	if (np->nl_flags & IPN_IN) {
3725 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3726 					np->nl_realip, np->nl_outip))) {
3727 			np->nl_inip = nat->nat_inip;
3728 			np->nl_inport = nat->nat_inport;
3729 		}
3730 	} else {
3731 		/*
3732 		 * If nl_inip is non null, this is a lookup based on the real
3733 		 * ip address. Else, we use the fake.
3734 		 */
3735 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3736 					 np->nl_inip, np->nl_outip))) {
3737 
3738 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3739 				fr_info_t fin;
3740 				bzero((char *)&fin, sizeof(fin));
3741 				fin.fin_p = nat->nat_p;
3742 				fin.fin_data[0] = ntohs(nat->nat_outport);
3743 				fin.fin_data[1] = ntohs(nat->nat_oport);
3744 				fin.fin_ifs = ifs;
3745 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3746 						 nat->nat_outip,
3747 						 nat->nat_oip) != NULL) {
3748 					np->nl_flags &= ~IPN_FINDFORWARD;
3749 				}
3750 			}
3751 
3752 			np->nl_realip = nat->nat_outip;
3753 			np->nl_realport = nat->nat_outport;
3754 		}
3755  	}
3756 
3757 	return nat;
3758 }
3759 
3760 
3761 /* ------------------------------------------------------------------------ */
3762 /* Function:    nat_match                                                   */
3763 /* Returns:     int - 0 == no match, 1 == match                             */
3764 /* Parameters:  fin(I)   - pointer to packet information                    */
3765 /*              np(I)    - pointer to NAT rule                              */
3766 /*                                                                          */
3767 /* Pull the matching of a packet against a NAT rule out of that complex     */
3768 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3769 /* ------------------------------------------------------------------------ */
3770 static int nat_match(fin, np)
3771 fr_info_t *fin;
3772 ipnat_t *np;
3773 {
3774 	frtuc_t *ft;
3775 
3776 	if (fin->fin_v != 4)
3777 		return 0;
3778 
3779 	if (np->in_p && fin->fin_p != np->in_p)
3780 		return 0;
3781 
3782 	if (fin->fin_out) {
3783 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3784 			return 0;
3785 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3786 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3787 			return 0;
3788 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3789 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3790 			return 0;
3791 	} else {
3792 		if (!(np->in_redir & NAT_REDIRECT))
3793 			return 0;
3794 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3795 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3796 			return 0;
3797 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3798 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3799 			return 0;
3800 	}
3801 
3802 	ft = &np->in_tuc;
3803 	if (!(fin->fin_flx & FI_TCPUDP) ||
3804 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3805 		if (ft->ftu_scmp || ft->ftu_dcmp)
3806 			return 0;
3807 		return 1;
3808 	}
3809 
3810 	return fr_tcpudpchk(fin, ft);
3811 }
3812 
3813 
3814 /* ------------------------------------------------------------------------ */
3815 /* Function:    nat_update                                                  */
3816 /* Returns:     Nil                                                         */
3817 /* Parameters:	fin(I) - pointer to packet information			    */
3818 /*		nat(I) - pointer to NAT structure			    */
3819 /*              np(I)     - pointer to NAT rule                             */
3820 /* Locks:	nat_lock						    */
3821 /*                                                                          */
3822 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3823 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3824 /* ------------------------------------------------------------------------ */
3825 void nat_update(fin, nat, np)
3826 fr_info_t *fin;
3827 nat_t *nat;
3828 ipnat_t *np;
3829 {
3830 	ipftq_t *ifq, *ifq2;
3831 	ipftqent_t *tqe;
3832 	ipf_stack_t *ifs = fin->fin_ifs;
3833 
3834 	tqe = &nat->nat_tqe;
3835 	ifq = tqe->tqe_ifq;
3836 
3837 	/*
3838 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3839 	 * TCP, however, if it is TCP and there is no rule timeout set,
3840 	 * then do not update the timeout here.
3841 	 */
3842 	if (np != NULL)
3843 		ifq2 = np->in_tqehead[fin->fin_rev];
3844 	else
3845 		ifq2 = NULL;
3846 
3847 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3848 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3849 	} else {
3850 		if (ifq2 == NULL) {
3851 			if (nat->nat_p == IPPROTO_UDP)
3852 				ifq2 = &ifs->ifs_nat_udptq;
3853 			else if (nat->nat_p == IPPROTO_ICMP)
3854 				ifq2 = &ifs->ifs_nat_icmptq;
3855 			else
3856 				ifq2 = &ifs->ifs_nat_iptq;
3857 		}
3858 
3859 		fr_movequeue(tqe, ifq, ifq2, ifs);
3860 	}
3861 }
3862 
3863 
3864 /* ------------------------------------------------------------------------ */
3865 /* Function:    fr_checknatout                                              */
3866 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3867 /*                     0 == no packet translation occurred,                 */
3868 /*                     1 == packet was successfully translated.             */
3869 /* Parameters:  fin(I)   - pointer to packet information                    */
3870 /*              passp(I) - pointer to filtering result flags                */
3871 /*                                                                          */
3872 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3873 /* first checked to see if they match an existing entry (if an error),      */
3874 /* otherwise a search of the current NAT table is made.  If neither results */
3875 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3876 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3877 /* packet header(s) as required.                                            */
3878 /* ------------------------------------------------------------------------ */
3879 int fr_checknatout(fin, passp)
3880 fr_info_t *fin;
3881 u_32_t *passp;
3882 {
3883 	ipnat_t *np = NULL, *npnext;
3884 	struct ifnet *ifp, *sifp;
3885 	icmphdr_t *icmp = NULL;
3886 	tcphdr_t *tcp = NULL;
3887 	int rval, natfailed;
3888 	u_int nflags = 0;
3889 	u_32_t ipa, iph;
3890 	int natadd = 1;
3891 	frentry_t *fr;
3892 	nat_t *nat;
3893 	ipf_stack_t *ifs = fin->fin_ifs;
3894 
3895 	if (ifs->ifs_fr_nat_lock != 0)
3896 		return 0;
3897 	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
3898 		return 0;
3899 
3900 	natfailed = 0;
3901 	fr = fin->fin_fr;
3902 	sifp = fin->fin_ifp;
3903 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3904 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3905 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3906 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3907 	ifp = fin->fin_ifp;
3908 
3909 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3910 		switch (fin->fin_p)
3911 		{
3912 		case IPPROTO_TCP :
3913 			nflags = IPN_TCP;
3914 			break;
3915 		case IPPROTO_UDP :
3916 			nflags = IPN_UDP;
3917 			break;
3918 		case IPPROTO_ICMP :
3919 			icmp = fin->fin_dp;
3920 
3921 			/*
3922 			 * This is an incoming packet, so the destination is
3923 			 * the icmp_id and the source port equals 0
3924 			 */
3925 			if (nat_icmpquerytype4(icmp->icmp_type))
3926 				nflags = IPN_ICMPQUERY;
3927 			break;
3928 		default :
3929 			break;
3930 		}
3931 
3932 		if ((nflags & IPN_TCPUDP))
3933 			tcp = fin->fin_dp;
3934 	}
3935 
3936 	ipa = fin->fin_saddr;
3937 
3938 	READ_ENTER(&ifs->ifs_ipf_nat);
3939 
3940 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3941 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3942 		/*EMPTY*/;
3943 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3944 		natadd = 0;
3945 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3946 				      fin->fin_src, fin->fin_dst))) {
3947 		nflags = nat->nat_flags;
3948 	} else {
3949 		u_32_t hv, msk, nmsk;
3950 
3951 		/*
3952 		 * There is no current entry in the nat table for this packet.
3953 		 *
3954 		 * If the packet is a fragment, but not the first fragment,
3955 		 * then don't do anything.  Otherwise, if there is a matching
3956 		 * nat rule, try to create a new nat entry.
3957 		 */
3958 		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
3959 			goto nonatfrag;
3960 
3961 		msk = 0xffffffff;
3962 		nmsk = ifs->ifs_nat_masks;
3963 maskloop:
3964 		iph = ipa & htonl(msk);
3965 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3966 		for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
3967 			npnext = np->in_mnext;
3968 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3969 				continue;
3970 			if (np->in_v != fin->fin_v)
3971 				continue;
3972 			if (np->in_p && (np->in_p != fin->fin_p))
3973 				continue;
3974 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3975 				continue;
3976 			if (np->in_flags & IPN_FILTER) {
3977 				if (!nat_match(fin, np))
3978 					continue;
3979 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3980 				continue;
3981 
3982 			if ((fr != NULL) &&
3983 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3984 				continue;
3985 
3986 			if (*np->in_plabel != '\0') {
3987 				if (((np->in_flags & IPN_FILTER) == 0) &&
3988 				    (np->in_dport != tcp->th_dport))
3989 					continue;
3990 				if (appr_ok(fin, tcp, np) == 0)
3991 					continue;
3992 			}
3993 
3994 			ATOMIC_INC32(np->in_use);
3995 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3996 			WRITE_ENTER(&ifs->ifs_ipf_nat);
3997 			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3998 			if (nat != NULL) {
3999 				np->in_use--;
4000 				np->in_hits++;
4001 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4002 				break;
4003 			}
4004 			natfailed = -1;
4005 			npnext = np->in_mnext;
4006 			fr_ipnatderef(&np, ifs);
4007 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4008 		}
4009 		if ((np == NULL) && (nmsk != 0)) {
4010 			while (nmsk) {
4011 				msk <<= 1;
4012 				if (nmsk & 0x80000000)
4013 					break;
4014 				nmsk <<= 1;
4015 			}
4016 			if (nmsk != 0) {
4017 				nmsk <<= 1;
4018 				goto maskloop;
4019 			}
4020 		}
4021 	}
4022 
4023 nonatfrag:
4024 	if (nat != NULL) {
4025 		rval = fr_natout(fin, nat, natadd, nflags);
4026 		if (rval == 1) {
4027 			MUTEX_ENTER(&nat->nat_lock);
4028 			nat_update(fin, nat, nat->nat_ptr);
4029 			nat->nat_bytes[1] += fin->fin_plen;
4030 			nat->nat_pkts[1]++;
4031 			nat->nat_ref++;
4032 			MUTEX_EXIT(&nat->nat_lock);
4033 			fin->fin_nat = nat;
4034 		}
4035 	} else
4036 		rval = natfailed;
4037 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4038 
4039 	if (rval == -1) {
4040 		if (passp != NULL)
4041 			*passp = FR_BLOCK;
4042 		fin->fin_flx |= FI_BADNAT;
4043 	}
4044 	fin->fin_ifp = sifp;
4045 	return rval;
4046 }
4047 
4048 /* ------------------------------------------------------------------------ */
4049 /* Function:    fr_natout                                                   */
4050 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4051 /*                     1 == packet was successfully translated.             */
4052 /* Parameters:  fin(I)    - pointer to packet information                   */
4053 /*              nat(I)    - pointer to NAT structure                        */
4054 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4055 /*              nflags(I) - NAT flags set for this packet                   */
4056 /*                                                                          */
4057 /* Translate a packet coming "out" on an interface.                         */
4058 /* ------------------------------------------------------------------------ */
4059 int fr_natout(fin, nat, natadd, nflags)
4060 fr_info_t *fin;
4061 nat_t *nat;
4062 int natadd;
4063 u_32_t nflags;
4064 {
4065 	icmphdr_t *icmp;
4066 	u_short *csump;
4067 	u_32_t sumd;
4068 	tcphdr_t *tcp;
4069 	ipnat_t *np;
4070 	int i;
4071 	ipf_stack_t *ifs = fin->fin_ifs;
4072 
4073 	if (fin->fin_v == 6) {
4074 #ifdef	USE_INET6
4075 		return fr_nat6out(fin, nat, natadd, nflags);
4076 #else
4077 		return NULL;
4078 #endif
4079 	}
4080 
4081 #if SOLARIS && defined(_KERNEL)
4082 	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4083 #endif
4084 
4085 	tcp = NULL;
4086 	icmp = NULL;
4087 	csump = NULL;
4088 	np = nat->nat_ptr;
4089 
4090 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4091 		(void) fr_nat_newfrag(fin, 0, nat);
4092 
4093 	/*
4094 	 * Fix up checksums, not by recalculating them, but
4095 	 * simply computing adjustments.
4096 	 * This is only done for STREAMS based IP implementations where the
4097 	 * checksum has already been calculated by IP.  In all other cases,
4098 	 * IPFilter is called before the checksum needs calculating so there
4099 	 * is no call to modify whatever is in the header now.
4100 	 */
4101 	ASSERT(fin->fin_m != NULL);
4102 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4103 		if (nflags == IPN_ICMPERR) {
4104 			u_32_t s1, s2;
4105 
4106 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
4107 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4108 			CALC_SUMD(s1, s2, sumd);
4109 
4110 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4111 		}
4112 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4113     defined(linux) || defined(BRIDGE_IPF)
4114 		else {
4115 			/*
4116 			 * Strictly speaking, this isn't necessary on BSD
4117 			 * kernels because they do checksum calculation after
4118 			 * this code has run BUT if ipfilter is being used
4119 			 * to do NAT as a bridge, that code doesn't exist.
4120 			 */
4121 			if (nat->nat_dir == NAT_OUTBOUND)
4122 				fix_outcksum(&fin->fin_ip->ip_sum,
4123 					    nat->nat_ipsumd);
4124 			else
4125 				fix_incksum(&fin->fin_ip->ip_sum,
4126 				 	   nat->nat_ipsumd);
4127 		}
4128 #endif
4129 	}
4130 
4131 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4132 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4133 			tcp = fin->fin_dp;
4134 
4135 			tcp->th_sport = nat->nat_outport;
4136 			fin->fin_data[0] = ntohs(nat->nat_outport);
4137 		}
4138 
4139 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4140 			icmp = fin->fin_dp;
4141 			icmp->icmp_id = nat->nat_outport;
4142 		}
4143 
4144 		csump = nat_proto(fin, nat, nflags);
4145 	}
4146 
4147 	fin->fin_ip->ip_src = nat->nat_outip;
4148 
4149 	/*
4150 	 * The above comments do not hold for layer 4 (or higher) checksums...
4151 	 */
4152 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4153 		if (nflags & IPN_TCPUDP &&
4154 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4155 			sumd = nat->nat_sumd[1];
4156 		else
4157 			sumd = nat->nat_sumd[0];
4158 
4159 		if (nat->nat_dir == NAT_OUTBOUND)
4160 			fix_outcksum(csump, sumd);
4161 		else
4162 			fix_incksum(csump, sumd);
4163 	}
4164 #ifdef	IPFILTER_SYNC
4165 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4166 #endif
4167 	/* ------------------------------------------------------------- */
4168 	/* A few quick notes:						 */
4169 	/*	Following are test conditions prior to calling the 	 */
4170 	/*	appr_check routine.					 */
4171 	/*								 */
4172 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4173 	/*	with a redirect rule, we attempt to match the packet's	 */
4174 	/*	source port against in_dport, otherwise	we'd compare the */
4175 	/*	packet's destination.			 		 */
4176 	/* ------------------------------------------------------------- */
4177 	if ((np != NULL) && (np->in_apr != NULL)) {
4178 		i = appr_check(fin, nat);
4179 		if (i == 0)
4180 			i = 1;
4181 	} else
4182 		i = 1;
4183 	ifs->ifs_nat_stats.ns_mapped[1]++;
4184 	fin->fin_flx |= FI_NATED;
4185 	return i;
4186 }
4187 
4188 
4189 /* ------------------------------------------------------------------------ */
4190 /* Function:    fr_checknatin                                               */
4191 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4192 /*                     0 == no packet translation occurred,                 */
4193 /*                     1 == packet was successfully translated.             */
4194 /* Parameters:  fin(I)   - pointer to packet information                    */
4195 /*              passp(I) - pointer to filtering result flags                */
4196 /*                                                                          */
4197 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4198 /* first checked to see if they match an existing entry (if an error),      */
4199 /* otherwise a search of the current NAT table is made.  If neither results */
4200 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4201 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4202 /* packet header(s) as required.                                            */
4203 /* ------------------------------------------------------------------------ */
4204 int fr_checknatin(fin, passp)
4205 fr_info_t *fin;
4206 u_32_t *passp;
4207 {
4208 	u_int nflags, natadd;
4209 	ipnat_t *np, *npnext;
4210 	int rval, natfailed;
4211 	struct ifnet *ifp;
4212 	struct in_addr in;
4213 	icmphdr_t *icmp;
4214 	tcphdr_t *tcp;
4215 	u_short dport;
4216 	nat_t *nat;
4217 	u_32_t iph;
4218 	ipf_stack_t *ifs = fin->fin_ifs;
4219 
4220 	if (ifs->ifs_fr_nat_lock != 0)
4221 		return 0;
4222 	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
4223 		return 0;
4224 
4225 	tcp = NULL;
4226 	icmp = NULL;
4227 	dport = 0;
4228 	natadd = 1;
4229 	nflags = 0;
4230 	natfailed = 0;
4231 	ifp = fin->fin_ifp;
4232 
4233 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4234 		switch (fin->fin_p)
4235 		{
4236 		case IPPROTO_TCP :
4237 			nflags = IPN_TCP;
4238 			break;
4239 		case IPPROTO_UDP :
4240 			nflags = IPN_UDP;
4241 			break;
4242 		case IPPROTO_ICMP :
4243 			icmp = fin->fin_dp;
4244 
4245 			/*
4246 			 * This is an incoming packet, so the destination is
4247 			 * the icmp_id and the source port equals 0
4248 			 */
4249 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4250 				nflags = IPN_ICMPQUERY;
4251 				dport = icmp->icmp_id;
4252 			} break;
4253 		default :
4254 			break;
4255 		}
4256 
4257 		if ((nflags & IPN_TCPUDP)) {
4258 			tcp = fin->fin_dp;
4259 			dport = tcp->th_dport;
4260 		}
4261 	}
4262 
4263 	in = fin->fin_dst;
4264 
4265 	READ_ENTER(&ifs->ifs_ipf_nat);
4266 
4267 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4268 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4269 		/*EMPTY*/;
4270 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4271 		natadd = 0;
4272 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4273 				     fin->fin_src, in))) {
4274 		nflags = nat->nat_flags;
4275 	} else {
4276 		u_32_t hv, msk, rmsk;
4277 
4278 		/*
4279 		 * There is no current entry in the nat table for this packet.
4280 		 *
4281 		 * If the packet is a fragment, but not the first fragment,
4282 		 * then don't do anything.  Otherwise, if there is a matching
4283 		 * nat rule, try to create a new nat entry.
4284 		 */
4285 		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
4286 			goto nonatfrag;
4287 
4288 		rmsk = ifs->ifs_rdr_masks;
4289 		msk = 0xffffffff;
4290 maskloop:
4291 		iph = in.s_addr & htonl(msk);
4292 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4293 		for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4294 			npnext = np->in_rnext;
4295 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4296 				continue;
4297 			if (np->in_v != fin->fin_v)
4298 				continue;
4299 			if (np->in_p && (np->in_p != fin->fin_p))
4300 				continue;
4301 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4302 				continue;
4303 			if (np->in_flags & IPN_FILTER) {
4304 				if (!nat_match(fin, np))
4305 					continue;
4306 			} else {
4307 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4308 					continue;
4309 				if (np->in_pmin &&
4310 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4311 				     (ntohs(dport) < ntohs(np->in_pmin))))
4312 					continue;
4313 			}
4314 
4315 			if (*np->in_plabel != '\0') {
4316 				if (!appr_ok(fin, tcp, np)) {
4317 					continue;
4318 				}
4319 			}
4320 
4321 			ATOMIC_INC32(np->in_use);
4322 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4323 			WRITE_ENTER(&ifs->ifs_ipf_nat);
4324 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4325 			if (nat != NULL) {
4326 				np->in_use--;
4327 				np->in_hits++;
4328 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4329 				break;
4330 			}
4331 			natfailed = -1;
4332 			npnext = np->in_rnext;
4333 			fr_ipnatderef(&np, ifs);
4334 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4335 		}
4336 
4337 		if ((np == NULL) && (rmsk != 0)) {
4338 			while (rmsk) {
4339 				msk <<= 1;
4340 				if (rmsk & 0x80000000)
4341 					break;
4342 				rmsk <<= 1;
4343 			}
4344 			if (rmsk != 0) {
4345 				rmsk <<= 1;
4346 				goto maskloop;
4347 			}
4348 		}
4349 	}
4350 
4351 nonatfrag:
4352 	if (nat != NULL) {
4353 		rval = fr_natin(fin, nat, natadd, nflags);
4354 		if (rval == 1) {
4355 			MUTEX_ENTER(&nat->nat_lock);
4356 			nat_update(fin, nat, nat->nat_ptr);
4357 			nat->nat_bytes[0] += fin->fin_plen;
4358 			nat->nat_pkts[0]++;
4359 			nat->nat_ref++;
4360 			MUTEX_EXIT(&nat->nat_lock);
4361 			fin->fin_nat = nat;
4362 			fin->fin_state = nat->nat_state;
4363 		}
4364 	} else
4365 		rval = natfailed;
4366 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4367 
4368 	if (rval == -1) {
4369 		if (passp != NULL)
4370 			*passp = FR_BLOCK;
4371 		fin->fin_flx |= FI_BADNAT;
4372 	}
4373 	return rval;
4374 }
4375 
4376 
4377 /* ------------------------------------------------------------------------ */
4378 /* Function:    fr_natin                                                    */
4379 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4380 /*                     1 == packet was successfully translated.             */
4381 /* Parameters:  fin(I)    - pointer to packet information                   */
4382 /*              nat(I)    - pointer to NAT structure                        */
4383 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4384 /*              nflags(I) - NAT flags set for this packet                   */
4385 /* Locks Held:  ipf_nat (READ)                                              */
4386 /*                                                                          */
4387 /* Translate a packet coming "in" on an interface.                          */
4388 /* ------------------------------------------------------------------------ */
4389 int fr_natin(fin, nat, natadd, nflags)
4390 fr_info_t *fin;
4391 nat_t *nat;
4392 int natadd;
4393 u_32_t nflags;
4394 {
4395 	icmphdr_t *icmp;
4396 	u_short *csump;
4397 	tcphdr_t *tcp;
4398 	ipnat_t *np;
4399 	int i;
4400 	ipf_stack_t *ifs = fin->fin_ifs;
4401 
4402 	if (fin->fin_v == 6) {
4403 #ifdef	USE_INET6
4404 		return fr_nat6in(fin, nat, natadd, nflags);
4405 #else
4406 		return NULL;
4407 #endif
4408 	}
4409 
4410 #if SOLARIS && defined(_KERNEL)
4411 	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4412 #endif
4413 
4414 	tcp = NULL;
4415 	csump = NULL;
4416 	np = nat->nat_ptr;
4417 	fin->fin_fr = nat->nat_fr;
4418 
4419 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4420 		(void) fr_nat_newfrag(fin, 0, nat);
4421 
4422 	if (np != NULL) {
4423 
4424 	/* ------------------------------------------------------------- */
4425 	/* A few quick notes:						 */
4426 	/*	Following are test conditions prior to calling the 	 */
4427 	/*	appr_check routine.					 */
4428 	/*								 */
4429 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4430 	/*	with a map rule, we attempt to match the packet's	 */
4431 	/*	source port against in_dport, otherwise	we'd compare the */
4432 	/*	packet's destination.			 		 */
4433 	/* ------------------------------------------------------------- */
4434 		if (np->in_apr != NULL) {
4435 			i = appr_check(fin, nat);
4436 			if (i == -1) {
4437 				return -1;
4438 			}
4439 		}
4440 	}
4441 
4442 #ifdef	IPFILTER_SYNC
4443 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4444 #endif
4445 
4446 	fin->fin_ip->ip_dst = nat->nat_inip;
4447 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4448 	if (nflags & IPN_TCPUDP)
4449 		tcp = fin->fin_dp;
4450 
4451 	/*
4452 	 * Fix up checksums, not by recalculating them, but
4453 	 * simply computing adjustments.
4454 	 * Why only do this for some platforms on inbound packets ?
4455 	 * Because for those that it is done, IP processing is yet to happen
4456 	 * and so the IPv4 header checksum has not yet been evaluated.
4457 	 * Perhaps it should always be done for the benefit of things like
4458 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4459 	 * header checksum offloading, perhaps it is a moot point.
4460 	 */
4461 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4462      defined(__osf__) || defined(linux)
4463 	if (nat->nat_dir == NAT_OUTBOUND)
4464 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4465 	else
4466 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4467 #endif
4468 
4469 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4470 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4471 			tcp->th_dport = nat->nat_inport;
4472 			fin->fin_data[1] = ntohs(nat->nat_inport);
4473 		}
4474 
4475 
4476 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4477 			icmp = fin->fin_dp;
4478 
4479 			icmp->icmp_id = nat->nat_inport;
4480 		}
4481 
4482 		csump = nat_proto(fin, nat, nflags);
4483 	}
4484 
4485 	/*
4486 	 * In case they are being forwarded, inbound packets always need to have
4487 	 * their checksum adjusted even if hardware checksum validation said OK.
4488 	 */
4489 	if (csump != NULL) {
4490 		if (nat->nat_dir == NAT_OUTBOUND)
4491 			fix_incksum(csump, nat->nat_sumd[0]);
4492 		else
4493 			fix_outcksum(csump, nat->nat_sumd[0]);
4494 	}
4495 
4496 #if SOLARIS && defined(_KERNEL)
4497 	if (nflags & IPN_TCPUDP &&
4498 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4499 		/*
4500 		 * Need to adjust the partial checksum result stored in
4501 		 * db_cksum16, which will be used for validation in IP.
4502 		 * See IP_CKSUM_RECV().
4503 		 * Adjustment data should be the inverse of the IP address
4504 		 * changes, because db_cksum16 is supposed to be the complement
4505 		 * of the pesudo header.
4506 		 */
4507 		csump = &fin->fin_m->b_datap->db_cksum16;
4508 		if (nat->nat_dir == NAT_OUTBOUND)
4509 			fix_outcksum(csump, nat->nat_sumd[1]);
4510 		else
4511 			fix_incksum(csump, nat->nat_sumd[1]);
4512 	}
4513 #endif
4514 
4515 	ifs->ifs_nat_stats.ns_mapped[0]++;
4516 	fin->fin_flx |= FI_NATED;
4517 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4518 		fin->fin_nattag = &np->in_tag;
4519 	return 1;
4520 }
4521 
4522 
4523 /* ------------------------------------------------------------------------ */
4524 /* Function:    nat_proto                                                   */
4525 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4526 /*                         NULL if the transport protocol is not recognised */
4527 /*                         as needing a checksum update.                    */
4528 /* Parameters:  fin(I)    - pointer to packet information                   */
4529 /*              nat(I)    - pointer to NAT structure                        */
4530 /*              nflags(I) - NAT flags set for this packet                   */
4531 /*                                                                          */
4532 /* Return the pointer to the checksum field for each protocol so understood.*/
4533 /* If support for making other changes to a protocol header is required,    */
4534 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4535 /* TCP down to a specific value, then do it from here.                      */
4536 /* ------------------------------------------------------------------------ */
4537 u_short *nat_proto(fin, nat, nflags)
4538 fr_info_t *fin;
4539 nat_t *nat;
4540 u_int nflags;
4541 {
4542 	icmphdr_t *icmp;
4543 	struct icmp6_hdr *icmp6;
4544 	u_short *csump;
4545 	tcphdr_t *tcp;
4546 	udphdr_t *udp;
4547 
4548 	csump = NULL;
4549 	if (fin->fin_out == 0) {
4550 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4551 	} else {
4552 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4553 	}
4554 
4555 	switch (fin->fin_p)
4556 	{
4557 	case IPPROTO_TCP :
4558 		tcp = fin->fin_dp;
4559 
4560 		csump = &tcp->th_sum;
4561 
4562 		/*
4563 		 * Do a MSS CLAMPING on a SYN packet,
4564 		 * only deal IPv4 for now.
4565 		 */
4566 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4567 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4568 
4569 		break;
4570 
4571 	case IPPROTO_UDP :
4572 		udp = fin->fin_dp;
4573 
4574 		if (udp->uh_sum)
4575 			csump = &udp->uh_sum;
4576 		break;
4577 
4578 	case IPPROTO_ICMP :
4579 		icmp = fin->fin_dp;
4580 
4581 		if ((nflags & IPN_ICMPQUERY) != 0) {
4582 			if (icmp->icmp_cksum != 0)
4583 				csump = &icmp->icmp_cksum;
4584 		}
4585 		break;
4586 
4587 	case IPPROTO_ICMPV6 :
4588 		icmp6 = fin->fin_dp;
4589 
4590 		if ((nflags & IPN_ICMPQUERY) != 0) {
4591 			if (icmp6->icmp6_cksum != 0)
4592 				csump = &icmp6->icmp6_cksum;
4593 		}
4594 		break;
4595 	}
4596 	return csump;
4597 }
4598 
4599 
4600 /* ------------------------------------------------------------------------ */
4601 /* Function:    fr_natunload                                                */
4602 /* Returns:     Nil                                                         */
4603 /* Parameters:  ifs - ipf stack instance                                  */
4604 /*                                                                          */
4605 /* Free all memory used by NAT structures allocated at runtime.             */
4606 /* ------------------------------------------------------------------------ */
4607 void fr_natunload(ifs)
4608 ipf_stack_t *ifs;
4609 {
4610 	ipftq_t *ifq, *ifqnext;
4611 
4612 	(void) nat_clearlist(ifs);
4613 	(void) nat_flushtable(FLUSH_TABLE_ALL, ifs);
4614 
4615 	/*
4616 	 * Proxy timeout queues are not cleaned here because although they
4617 	 * exist on the NAT list, appr_unload is called after fr_natunload
4618 	 * and the proxies actually are responsible for them being created.
4619 	 * Should the proxy timeouts have their own list?  There's no real
4620 	 * justification as this is the only complication.
4621 	 */
4622 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4623 		ifqnext = ifq->ifq_next;
4624 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4625 		    (fr_deletetimeoutqueue(ifq) == 0))
4626 			fr_freetimeoutqueue(ifq, ifs);
4627 	}
4628 
4629 	if (ifs->ifs_nat_table[0] != NULL) {
4630 		KFREES(ifs->ifs_nat_table[0],
4631 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4632 		ifs->ifs_nat_table[0] = NULL;
4633 	}
4634 	if (ifs->ifs_nat_table[1] != NULL) {
4635 		KFREES(ifs->ifs_nat_table[1],
4636 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4637 		ifs->ifs_nat_table[1] = NULL;
4638 	}
4639 	if (ifs->ifs_nat_rules != NULL) {
4640 		KFREES(ifs->ifs_nat_rules,
4641 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4642 		ifs->ifs_nat_rules = NULL;
4643 	}
4644 	if (ifs->ifs_rdr_rules != NULL) {
4645 		KFREES(ifs->ifs_rdr_rules,
4646 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4647 		ifs->ifs_rdr_rules = NULL;
4648 	}
4649 	if (ifs->ifs_maptable != NULL) {
4650 		KFREES(ifs->ifs_maptable,
4651 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4652 		ifs->ifs_maptable = NULL;
4653 	}
4654 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4655 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4656 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4657 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4658 	}
4659 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4660 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4661 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4662 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4663 	}
4664 
4665 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4666 		ifs->ifs_fr_nat_maxbucket = 0;
4667 
4668 	if (ifs->ifs_fr_nat_init == 1) {
4669 		ifs->ifs_fr_nat_init = 0;
4670 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4671 
4672 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4673 		RW_DESTROY(&ifs->ifs_ipf_nat);
4674 
4675 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4676 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4677 
4678 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4679 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4680 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4681 	}
4682 }
4683 
4684 
4685 /* ------------------------------------------------------------------------ */
4686 /* Function:    fr_natexpire                                                */
4687 /* Returns:     Nil                                                         */
4688 /* Parameters:  ifs - ipf stack instance                                    */
4689 /*                                                                          */
4690 /* Check all of the timeout queues for entries at the top which need to be  */
4691 /* expired.                                                                 */
4692 /* ------------------------------------------------------------------------ */
4693 void fr_natexpire(ifs)
4694 ipf_stack_t *ifs;
4695 {
4696 	ipftq_t *ifq, *ifqnext;
4697 	ipftqent_t *tqe, *tqn;
4698 	int i;
4699 	SPL_INT(s);
4700 
4701 	SPL_NET(s);
4702 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4703 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4704 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4705 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4706 				break;
4707 			tqn = tqe->tqe_next;
4708 			(void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4709 		}
4710 	}
4711 
4712 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4713 		ifqnext = ifq->ifq_next;
4714 
4715 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4716 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4717 				break;
4718 			tqn = tqe->tqe_next;
4719 			(void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4720 		}
4721 	}
4722 
4723 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4724 		ifqnext = ifq->ifq_next;
4725 
4726 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4727 		    (ifq->ifq_ref == 0)) {
4728 			fr_freetimeoutqueue(ifq, ifs);
4729 		}
4730 	}
4731 
4732 	if (ifs->ifs_nat_doflush != 0) {
4733 		(void) nat_flushtable(FLUSH_TABLE_EXTRA, ifs);
4734 		ifs->ifs_nat_doflush = 0;
4735 	}
4736 
4737 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4738 	SPL_X(s);
4739 }
4740 
4741 
4742 /* ------------------------------------------------------------------------ */
4743 /* Function:    fr_nataddrsync                                              */
4744 /* Returns:     Nil                                                         */
4745 /* Parameters:  ifp(I) -  pointer to network interface                      */
4746 /*              addr(I) - pointer to new network address                    */
4747 /*                                                                          */
4748 /* Walk through all of the currently active NAT sessions, looking for those */
4749 /* which need to have their translated address updated (where the interface */
4750 /* matches the one passed in) and change it, recalculating the checksum sum */
4751 /* difference too.                                                          */
4752 /* ------------------------------------------------------------------------ */
4753 void fr_nataddrsync(v, ifp, addr, ifs)
4754 int v;
4755 void *ifp;
4756 void *addr;
4757 ipf_stack_t *ifs;
4758 {
4759 	u_32_t sum1, sum2, sumd;
4760 	nat_t *nat;
4761 	ipnat_t *np;
4762 	SPL_INT(s);
4763 
4764 	if (ifs->ifs_fr_running <= 0)
4765 		return;
4766 
4767 	SPL_NET(s);
4768 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4769 
4770 	if (ifs->ifs_fr_running <= 0) {
4771 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4772 		return;
4773 	}
4774 
4775 	/*
4776 	 * Change IP addresses for NAT sessions for any protocol except TCP
4777 	 * since it will break the TCP connection anyway.  The only rules
4778 	 * which will get changed are those which are "map ... -> 0/32",
4779 	 * where the rule specifies the address is taken from the interface.
4780 	 */
4781 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4782 		if (addr != NULL) {
4783 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4784 			    ((nat->nat_flags & IPN_TCP) != 0))
4785 				continue;
4786 			if ((np = nat->nat_ptr) == NULL)
4787 				continue;
4788 			if (v == 4 && np->in_v == 4) {
4789 				if (np->in_nip || np->in_outmsk != 0xffffffff)
4790 					continue;
4791 				/*
4792 				 * Change the map-to address to be the same as
4793 				 * the new one.
4794 				 */
4795 				sum1 = nat->nat_outip.s_addr;
4796 				nat->nat_outip = *(struct in_addr *)addr;
4797 				sum2 = nat->nat_outip.s_addr;
4798 			} else if (v == 6 && np->in_v == 6) {
4799 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4800 				    !IP6_ISONES(&np->in_out[1].in6))
4801 					continue;
4802 				/*
4803 				 * Change the map-to address to be the same as
4804 				 * the new one.
4805 				 */
4806 				nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4807 			} else
4808 				continue;
4809 
4810 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4811 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4812 			if (np->in_v == 4 && (v == 4 || v == 0)) {
4813 				struct in_addr in;
4814 				if (np->in_outmsk != 0xffffffff || np->in_nip)
4815 					continue;
4816 				/*
4817 				 * Change the map-to address to be the same as
4818 				 * the new one.
4819 				 */
4820 				sum1 = nat->nat_outip.s_addr;
4821 				if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4822 					       &in, NULL, ifs) != -1)
4823 					nat->nat_outip = in;
4824 				sum2 = nat->nat_outip.s_addr;
4825 			} else if (np->in_v == 6 && (v == 6 || v == 0)) {
4826 				struct in6_addr in6;
4827 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4828 				    !IP6_ISONES(&np->in_out[1].in6))
4829 					continue;
4830 				/*
4831 				 * Change the map-to address to be the same as
4832 				 * the new one.
4833 				 */
4834 				if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4835 					       (void *)&in6, NULL, ifs) != -1)
4836 					nat->nat_outip6.in6 = in6;
4837 			} else
4838 				continue;
4839 		} else {
4840 			continue;
4841 		}
4842 
4843 		if (sum1 == sum2)
4844 			continue;
4845 		/*
4846 		 * Readjust the checksum adjustment to take into
4847 		 * account the new IP#.
4848 		 */
4849 		CALC_SUMD(sum1, sum2, sumd);
4850 		/* XXX - dont change for TCP when solaris does
4851 		 * hardware checksumming.
4852 		 */
4853 		sumd += nat->nat_sumd[0];
4854 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4855 		nat->nat_sumd[1] = nat->nat_sumd[0];
4856 	}
4857 
4858 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4859 	SPL_X(s);
4860 }
4861 
4862 
4863 /* ------------------------------------------------------------------------ */
4864 /* Function:    fr_natifpsync                                               */
4865 /* Returns:     Nil                                                         */
4866 /* Parameters:  action(I) - how we are syncing                              */
4867 /*              ifp(I)    - pointer to network interface                    */
4868 /*              name(I)   - name of interface to sync to                    */
4869 /*                                                                          */
4870 /* This function is used to resync the mapping of interface names and their */
4871 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4872 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4873 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4874 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4875 /* there is no longer any interface associated with it.                     */
4876 /* ------------------------------------------------------------------------ */
4877 void fr_natifpsync(action, v, ifp, name, ifs)
4878 int action, v;
4879 void *ifp;
4880 char *name;
4881 ipf_stack_t *ifs;
4882 {
4883 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4884 	int s;
4885 #endif
4886 	nat_t *nat;
4887 	ipnat_t *n;
4888 	int nv;
4889 
4890 	if (ifs->ifs_fr_running <= 0)
4891 		return;
4892 
4893 	SPL_NET(s);
4894 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4895 
4896 	if (ifs->ifs_fr_running <= 0) {
4897 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4898 		return;
4899 	}
4900 
4901 	switch (action)
4902 	{
4903 	case IPFSYNC_RESYNC :
4904 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4905 			nv = (v == 0) ? nat->nat_v : v;
4906 			if (nat->nat_v != nv)
4907 				continue;
4908 			if ((ifp == nat->nat_ifps[0]) ||
4909 			    (nat->nat_ifps[0] == (void *)-1)) {
4910 				nat->nat_ifps[0] =
4911 				    fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4912 			}
4913 
4914 			if ((ifp == nat->nat_ifps[1]) ||
4915 			    (nat->nat_ifps[1] == (void *)-1)) {
4916 				nat->nat_ifps[1] =
4917 				    fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4918 			}
4919 		}
4920 
4921 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4922 			nv = (v == 0) ? (int)n->in_v : v;
4923 			if ((int)n->in_v != nv)
4924 				continue;
4925 			if (n->in_ifps[0] == ifp ||
4926 			    n->in_ifps[0] == (void *)-1) {
4927 				n->in_ifps[0] =
4928 				    fr_resolvenic(n->in_ifnames[0], nv, ifs);
4929 			}
4930 			if (n->in_ifps[1] == ifp ||
4931 			    n->in_ifps[1] == (void *)-1) {
4932 				n->in_ifps[1] =
4933 				    fr_resolvenic(n->in_ifnames[1], nv, ifs);
4934 			}
4935 		}
4936 		break;
4937 	case IPFSYNC_NEWIFP :
4938 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4939 			if (nat->nat_v != v)
4940 				continue;
4941 			if (!strncmp(name, nat->nat_ifnames[0],
4942 				     sizeof(nat->nat_ifnames[0])))
4943 				nat->nat_ifps[0] = ifp;
4944 			if (!strncmp(name, nat->nat_ifnames[1],
4945 				     sizeof(nat->nat_ifnames[1])))
4946 				nat->nat_ifps[1] = ifp;
4947 		}
4948 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4949 			if ((int)n->in_v != v)
4950 				continue;
4951 			if (!strncmp(name, n->in_ifnames[0],
4952 				     sizeof(n->in_ifnames[0])))
4953 				n->in_ifps[0] = ifp;
4954 			if (!strncmp(name, n->in_ifnames[1],
4955 				     sizeof(n->in_ifnames[1])))
4956 				n->in_ifps[1] = ifp;
4957 		}
4958 		break;
4959 	case IPFSYNC_OLDIFP :
4960 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4961 			if (nat->nat_v != v)
4962 				continue;
4963 			if (ifp == nat->nat_ifps[0])
4964 				nat->nat_ifps[0] = (void *)-1;
4965 			if (ifp == nat->nat_ifps[1])
4966 				nat->nat_ifps[1] = (void *)-1;
4967 		}
4968 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4969 			if ((int)n->in_v != v)
4970 				continue;
4971 			if (n->in_ifps[0] == ifp)
4972 				n->in_ifps[0] = (void *)-1;
4973 			if (n->in_ifps[1] == ifp)
4974 				n->in_ifps[1] = (void *)-1;
4975 		}
4976 		break;
4977 	}
4978 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4979 	SPL_X(s);
4980 }
4981 
4982 
4983 /* ------------------------------------------------------------------------ */
4984 /* Function:    nat_icmpquerytype4                                          */
4985 /* Returns:     int - 1 == success, 0 == failure                            */
4986 /* Parameters:  icmptype(I) - ICMP type number                              */
4987 /*                                                                          */
4988 /* Tests to see if the ICMP type number passed is a query/response type or  */
4989 /* not.                                                                     */
4990 /* ------------------------------------------------------------------------ */
4991 static INLINE int nat_icmpquerytype4(icmptype)
4992 int icmptype;
4993 {
4994 
4995 	/*
4996 	 * For the ICMP query NAT code, it is essential that both the query
4997 	 * and the reply match on the NAT rule. Because the NAT structure
4998 	 * does not keep track of the icmptype, and a single NAT structure
4999 	 * is used for all icmp types with the same src, dest and id, we
5000 	 * simply define the replies as queries as well. The funny thing is,
5001 	 * altough it seems silly to call a reply a query, this is exactly
5002 	 * as it is defined in the IPv4 specification
5003 	 */
5004 
5005 	switch (icmptype)
5006 	{
5007 
5008 	case ICMP_ECHOREPLY:
5009 	case ICMP_ECHO:
5010 	/* route aedvertisement/solliciation is currently unsupported: */
5011 	/* it would require rewriting the ICMP data section            */
5012 	case ICMP_TSTAMP:
5013 	case ICMP_TSTAMPREPLY:
5014 	case ICMP_IREQ:
5015 	case ICMP_IREQREPLY:
5016 	case ICMP_MASKREQ:
5017 	case ICMP_MASKREPLY:
5018 		return 1;
5019 	default:
5020 		return 0;
5021 	}
5022 }
5023 
5024 
5025 /* ------------------------------------------------------------------------ */
5026 /* Function:    nat_log                                                     */
5027 /* Returns:     Nil                                                         */
5028 /* Parameters:  nat(I)  - pointer to NAT structure                          */
5029 /*              type(I) - type of log entry to create                       */
5030 /*                                                                          */
5031 /* Creates a NAT log entry.                                                 */
5032 /* ------------------------------------------------------------------------ */
5033 void nat_log(nat, type, ifs)
5034 struct nat *nat;
5035 u_int type;
5036 ipf_stack_t *ifs;
5037 {
5038 #ifdef	IPFILTER_LOG
5039 # ifndef LARGE_NAT
5040 	struct ipnat *np;
5041 	int rulen;
5042 # endif
5043 	struct natlog natl;
5044 	void *items[1];
5045 	size_t sizes[1];
5046 	int types[1];
5047 
5048 	natl.nlg_inip = nat->nat_inip6;
5049 	natl.nlg_outip = nat->nat_outip6;
5050 	natl.nlg_origip = nat->nat_oip6;
5051 	natl.nlg_bytes[0] = nat->nat_bytes[0];
5052 	natl.nlg_bytes[1] = nat->nat_bytes[1];
5053 	natl.nlg_pkts[0] = nat->nat_pkts[0];
5054 	natl.nlg_pkts[1] = nat->nat_pkts[1];
5055 	natl.nlg_origport = nat->nat_oport;
5056 	natl.nlg_inport = nat->nat_inport;
5057 	natl.nlg_outport = nat->nat_outport;
5058 	natl.nlg_p = nat->nat_p;
5059 	natl.nlg_type = type;
5060 	natl.nlg_rule = -1;
5061 	natl.nlg_v = nat->nat_v;
5062 # ifndef LARGE_NAT
5063 	if (nat->nat_ptr != NULL) {
5064 		for (rulen = 0, np = ifs->ifs_nat_list; np;
5065 		     np = np->in_next, rulen++)
5066 			if (np == nat->nat_ptr) {
5067 				natl.nlg_rule = rulen;
5068 				break;
5069 			}
5070 	}
5071 # endif
5072 	items[0] = &natl;
5073 	sizes[0] = sizeof(natl);
5074 	types[0] = 0;
5075 
5076 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5077 #endif
5078 }
5079 
5080 
5081 #if defined(__OpenBSD__)
5082 /* ------------------------------------------------------------------------ */
5083 /* Function:    nat_ifdetach                                                */
5084 /* Returns:     Nil                                                         */
5085 /* Parameters:  ifp(I) - pointer to network interface                       */
5086 /*                                                                          */
5087 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
5088 /* interface references within IPFilter.                                    */
5089 /* ------------------------------------------------------------------------ */
5090 void nat_ifdetach(ifp, ifs)
5091 void *ifp;
5092 ipf_stack_t *ifs;
5093 {
5094 	frsync(ifp, ifs);
5095 	return;
5096 }
5097 #endif
5098 
5099 
5100 /* ------------------------------------------------------------------------ */
5101 /* Function:    fr_ipnatderef                                               */
5102 /* Returns:     Nil                                                         */
5103 /* Parameters:  inp(I) - pointer to pointer to NAT rule                     */
5104 /* Write Locks: ipf_nat                                                     */
5105 /*                                                                          */
5106 /* ------------------------------------------------------------------------ */
5107 void fr_ipnatderef(inp, ifs)
5108 ipnat_t **inp;
5109 ipf_stack_t *ifs;
5110 {
5111 	ipnat_t *in;
5112 
5113 	in = *inp;
5114 	*inp = NULL;
5115 	in->in_use--;
5116 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5117 		if (in->in_apr)
5118 			appr_free(in->in_apr);
5119 		KFREE(in);
5120 		ifs->ifs_nat_stats.ns_rules--;
5121 #ifdef notdef
5122 #if SOLARIS
5123 		if (ifs->ifs_nat_stats.ns_rules == 0)
5124 			ifs->ifs_pfil_delayed_copy = 1;
5125 #endif
5126 #endif
5127 	}
5128 }
5129 
5130 
5131 /* ------------------------------------------------------------------------ */
5132 /* Function:    fr_natderef                                                 */
5133 /* Returns:     Nil                                                         */
5134 /* Parameters:  natp - pointer to pointer to NAT table entry                */
5135 /*              ifs  - ipf stack instance                                   */
5136 /*                                                                          */
5137 /* Decrement the reference counter for this NAT table entry and free it if  */
5138 /* there are no more things using it.                                       */
5139 /*                                                                          */
5140 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5141 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
5142 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
5143 /* because nat_delete() will do that and send nat_ref to -1.                */
5144 /*                                                                          */
5145 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
5146 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
5147 /* ------------------------------------------------------------------------ */
5148 void fr_natderef(natp, ifs)
5149 nat_t **natp;
5150 ipf_stack_t *ifs;
5151 {
5152 	nat_t *nat;
5153 
5154 	nat = *natp;
5155 	*natp = NULL;
5156 
5157 	MUTEX_ENTER(&nat->nat_lock);
5158 	if (nat->nat_ref > 1) {
5159 		nat->nat_ref--;
5160 		MUTEX_EXIT(&nat->nat_lock);
5161 		return;
5162 	}
5163 	MUTEX_EXIT(&nat->nat_lock);
5164 
5165 	WRITE_ENTER(&ifs->ifs_ipf_nat);
5166 	(void) nat_delete(nat, NL_EXPIRE, ifs);
5167 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5168 }
5169 
5170 
5171 /* ------------------------------------------------------------------------ */
5172 /* Function:    fr_natclone                                                 */
5173 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
5174 /*                           else pointer to new NAT structure              */
5175 /* Parameters:  fin(I)   - pointer to packet information                    */
5176 /*              nat(I)   - pointer to master NAT structure                  */
5177 /* Write Lock:  ipf_nat                                                     */
5178 /*                                                                          */
5179 /* Create a "duplicate" NAT table entry from the master.                    */
5180 /* ------------------------------------------------------------------------ */
5181 nat_t *fr_natclone(fin, nat)
5182 fr_info_t *fin;
5183 nat_t *nat;
5184 {
5185 	frentry_t *fr;
5186 	nat_t *clone;
5187 	ipnat_t *np;
5188 	ipf_stack_t *ifs = fin->fin_ifs;
5189 
5190 	/*
5191 	 * Trigger automatic call to nat_flushtable() if the
5192 	 * table has reached capcity specified by hi watermark.
5193 	 */
5194 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
5195 		ifs->ifs_nat_doflush = 1;
5196 
5197 	/*
5198 	 * If automatic flushing did not do its job, and the table
5199 	 * has filled up, don't try to create a new entry.
5200 	 */
5201 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
5202 		ifs->ifs_nat_stats.ns_memfail++;
5203 		return NULL;
5204 	}
5205 
5206 	KMALLOC(clone, nat_t *);
5207 	if (clone == NULL)
5208 		return NULL;
5209 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
5210 
5211 	MUTEX_NUKE(&clone->nat_lock);
5212 
5213 	clone->nat_aps = NULL;
5214 	/*
5215 	 * Initialize all these so that nat_delete() doesn't cause a crash.
5216 	 */
5217 	clone->nat_tqe.tqe_pnext = NULL;
5218 	clone->nat_tqe.tqe_next = NULL;
5219 	clone->nat_tqe.tqe_ifq = NULL;
5220 	clone->nat_tqe.tqe_parent = clone;
5221 
5222 	clone->nat_flags &= ~SI_CLONE;
5223 	clone->nat_flags |= SI_CLONED;
5224 
5225 	if (clone->nat_hm)
5226 		clone->nat_hm->hm_ref++;
5227 
5228 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5229 		KFREE(clone);
5230 		return NULL;
5231 	}
5232 	np = clone->nat_ptr;
5233 	if (np != NULL) {
5234 		if (ifs->ifs_nat_logging)
5235 			nat_log(clone, (u_int)np->in_redir, ifs);
5236 		np->in_use++;
5237 	}
5238 	fr = clone->nat_fr;
5239 	if (fr != NULL) {
5240 		MUTEX_ENTER(&fr->fr_lock);
5241 		fr->fr_ref++;
5242 		MUTEX_EXIT(&fr->fr_lock);
5243 	}
5244 
5245 	/*
5246 	 * Because the clone is created outside the normal loop of things and
5247 	 * TCP has special needs in terms of state, initialise the timeout
5248 	 * state of the new NAT from here.
5249 	 */
5250 	if (clone->nat_p == IPPROTO_TCP) {
5251 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5252 				  clone->nat_flags);
5253 	}
5254 #ifdef	IPFILTER_SYNC
5255 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5256 #endif
5257 	if (ifs->ifs_nat_logging)
5258 		nat_log(clone, NL_CLONE, ifs);
5259 	return clone;
5260 }
5261 
5262 
5263 /* ------------------------------------------------------------------------ */
5264 /* Function:   nat_wildok                                                   */
5265 /* Returns:    int - 1 == packet's ports match wildcards                    */
5266 /*                   0 == packet's ports don't match wildcards              */
5267 /* Parameters: nat(I)   - NAT entry                                         */
5268 /*             sport(I) - source port                                       */
5269 /*             dport(I) - destination port                                  */
5270 /*             flags(I) - wildcard flags                                    */
5271 /*             dir(I)   - packet direction                                  */
5272 /*                                                                          */
5273 /* Use NAT entry and packet direction to determine which combination of     */
5274 /* wildcard flags should be used.                                           */
5275 /* ------------------------------------------------------------------------ */
5276 int nat_wildok(nat, sport, dport, flags, dir)
5277 nat_t *nat;
5278 int sport;
5279 int dport;
5280 int flags;
5281 int dir;
5282 {
5283 	/*
5284 	 * When called by       dir is set to
5285 	 * nat_inlookup         NAT_INBOUND (0)
5286 	 * nat_outlookup        NAT_OUTBOUND (1)
5287 	 *
5288 	 * We simply combine the packet's direction in dir with the original
5289 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5290 	 * which combination of wildcard flags to allow.
5291 	 */
5292 
5293 	switch ((dir << 1) | nat->nat_dir)
5294 	{
5295 	case 3: /* outbound packet / outbound entry */
5296 		if (((nat->nat_inport == sport) ||
5297 		    (flags & SI_W_SPORT)) &&
5298 		    ((nat->nat_oport == dport) ||
5299 		    (flags & SI_W_DPORT)))
5300 			return 1;
5301 		break;
5302 	case 2: /* outbound packet / inbound entry */
5303 		if (((nat->nat_outport == sport) ||
5304 		    (flags & SI_W_DPORT)) &&
5305 		    ((nat->nat_oport == dport) ||
5306 		    (flags & SI_W_SPORT)))
5307 			return 1;
5308 		break;
5309 	case 1: /* inbound packet / outbound entry */
5310 		if (((nat->nat_oport == sport) ||
5311 		    (flags & SI_W_DPORT)) &&
5312 		    ((nat->nat_outport == dport) ||
5313 		    (flags & SI_W_SPORT)))
5314 			return 1;
5315 		break;
5316 	case 0: /* inbound packet / inbound entry */
5317 		if (((nat->nat_oport == sport) ||
5318 		    (flags & SI_W_SPORT)) &&
5319 		    ((nat->nat_outport == dport) ||
5320 		    (flags & SI_W_DPORT)))
5321 			return 1;
5322 		break;
5323 	default:
5324 		break;
5325 	}
5326 
5327 	return(0);
5328 }
5329 
5330 
5331 /* ------------------------------------------------------------------------ */
5332 /* Function:    nat_mssclamp                                                */
5333 /* Returns:     Nil                                                         */
5334 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5335 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5336 /*              csump(I)  - pointer to TCP checksum                         */
5337 /*                                                                          */
5338 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5339 /* then the TCP header checksum will be updated to reflect the change in    */
5340 /* the MSS.                                                                 */
5341 /* ------------------------------------------------------------------------ */
5342 static void nat_mssclamp(tcp, maxmss, csump)
5343 tcphdr_t *tcp;
5344 u_32_t maxmss;
5345 u_short *csump;
5346 {
5347 	u_char *cp, *ep, opt;
5348 	int hlen, advance;
5349 	u_32_t mss, sumd;
5350 
5351 	hlen = TCP_OFF(tcp) << 2;
5352 	if (hlen > sizeof(*tcp)) {
5353 		cp = (u_char *)tcp + sizeof(*tcp);
5354 		ep = (u_char *)tcp + hlen;
5355 
5356 		while (cp < ep) {
5357 			opt = cp[0];
5358 			if (opt == TCPOPT_EOL)
5359 				break;
5360 			else if (opt == TCPOPT_NOP) {
5361 				cp++;
5362 				continue;
5363 			}
5364 
5365 			if (cp + 1 >= ep)
5366 				break;
5367 			advance = cp[1];
5368 			if ((cp + advance > ep) || (advance <= 0))
5369 				break;
5370 			switch (opt)
5371 			{
5372 			case TCPOPT_MAXSEG:
5373 				if (advance != 4)
5374 					break;
5375 				mss = cp[2] * 256 + cp[3];
5376 				if (mss > maxmss) {
5377 					cp[2] = maxmss / 256;
5378 					cp[3] = maxmss & 0xff;
5379 					CALC_SUMD(mss, maxmss, sumd);
5380 					fix_outcksum(csump, sumd);
5381 				}
5382 				break;
5383 			default:
5384 				/* ignore unknown options */
5385 				break;
5386 			}
5387 
5388 			cp += advance;
5389 		}
5390 	}
5391 }
5392 
5393 
5394 /* ------------------------------------------------------------------------ */
5395 /* Function:    fr_setnatqueue                                              */
5396 /* Returns:     Nil                                                         */
5397 /* Parameters:  nat(I)- pointer to NAT structure                            */
5398 /*              rev(I) - forward(0) or reverse(1) direction                 */
5399 /* Locks:       ipf_nat (read or write)                                     */
5400 /*                                                                          */
5401 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5402 /* determining which queue it should be placed on.                          */
5403 /* ------------------------------------------------------------------------ */
5404 void fr_setnatqueue(nat, rev, ifs)
5405 nat_t *nat;
5406 int rev;
5407 ipf_stack_t *ifs;
5408 {
5409 	ipftq_t *oifq, *nifq;
5410 
5411 	if (nat->nat_ptr != NULL)
5412 		nifq = nat->nat_ptr->in_tqehead[rev];
5413 	else
5414 		nifq = NULL;
5415 
5416 	if (nifq == NULL) {
5417 		switch (nat->nat_p)
5418 		{
5419 		case IPPROTO_UDP :
5420 			nifq = &ifs->ifs_nat_udptq;
5421 			break;
5422 		case IPPROTO_ICMP :
5423 			nifq = &ifs->ifs_nat_icmptq;
5424 			break;
5425 		case IPPROTO_TCP :
5426 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5427 			break;
5428 		default :
5429 			nifq = &ifs->ifs_nat_iptq;
5430 			break;
5431 		}
5432 	}
5433 
5434 	oifq = nat->nat_tqe.tqe_ifq;
5435 	/*
5436 	 * If it's currently on a timeout queue, move it from one queue to
5437 	 * another, else put it on the end of the newly determined queue.
5438 	 */
5439 	if (oifq != NULL)
5440 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5441 	else
5442 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5443 	return;
5444 }
5445 
5446 /* ------------------------------------------------------------------------ */
5447 /* Function:    nat_getnext                                                 */
5448 /* Returns:     int - 0 == ok, else error                                   */
5449 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5450 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5451 /*              ifs - ipf stack instance                                    */
5452 /*                                                                          */
5453 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5454 /* and copy it out to the storage space pointed to by itp.  The next item   */
5455 /* in the list to look at is put back in the ipftoken struture.             */
5456 /* ------------------------------------------------------------------------ */
5457 static int nat_getnext(t, itp, ifs)
5458 ipftoken_t *t;
5459 ipfgeniter_t *itp;
5460 ipf_stack_t *ifs;
5461 {
5462 	hostmap_t *hm, *nexthm = NULL, zerohm;
5463 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5464 	nat_t *nat, *nextnat = NULL, zeronat;
5465 	int error = 0, count;
5466 	char *dst;
5467 
5468 	if (itp->igi_nitems == 0)
5469 		return EINVAL;
5470 
5471 	READ_ENTER(&ifs->ifs_ipf_nat);
5472 
5473 	/*
5474 	 * Get "previous" entry from the token and find the next entry.
5475 	 */
5476 	switch (itp->igi_type)
5477 	{
5478 	case IPFGENITER_HOSTMAP :
5479 		hm = t->ipt_data;
5480 		if (hm == NULL) {
5481 			nexthm = ifs->ifs_ipf_hm_maplist;
5482 		} else {
5483 			nexthm = hm->hm_next;
5484 		}
5485 		break;
5486 
5487 	case IPFGENITER_IPNAT :
5488 		ipn = t->ipt_data;
5489 		if (ipn == NULL) {
5490 			nextipnat = ifs->ifs_nat_list;
5491 		} else {
5492 			nextipnat = ipn->in_next;
5493 		}
5494 		break;
5495 
5496 	case IPFGENITER_NAT :
5497 		nat = t->ipt_data;
5498 		if (nat == NULL) {
5499 			nextnat = ifs->ifs_nat_instances;
5500 		} else {
5501 			nextnat = nat->nat_next;
5502 		}
5503 		break;
5504 	default :
5505 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5506 		return EINVAL;
5507 	}
5508 
5509 	dst = itp->igi_data;
5510 	for (count = itp->igi_nitems; count > 0; count--) {
5511 		/*
5512 		 * If we found an entry, add a reference to it and update the token.
5513 		 * Otherwise, zero out data to be returned and NULL out token.
5514 		 */
5515 		switch (itp->igi_type)
5516 		{
5517 		case IPFGENITER_HOSTMAP :
5518 			if (nexthm != NULL) {
5519 				ATOMIC_INC32(nexthm->hm_ref);
5520 				t->ipt_data = nexthm;
5521 			} else {
5522 				bzero(&zerohm, sizeof(zerohm));
5523 				nexthm = &zerohm;
5524 				t->ipt_data = NULL;
5525 			}
5526 			break;
5527 		case IPFGENITER_IPNAT :
5528 			if (nextipnat != NULL) {
5529 				ATOMIC_INC32(nextipnat->in_use);
5530 				t->ipt_data = nextipnat;
5531 			} else {
5532 				bzero(&zeroipn, sizeof(zeroipn));
5533 				nextipnat = &zeroipn;
5534 				t->ipt_data = NULL;
5535 			}
5536 			break;
5537 		case IPFGENITER_NAT :
5538 			if (nextnat != NULL) {
5539 				MUTEX_ENTER(&nextnat->nat_lock);
5540 				nextnat->nat_ref++;
5541 				MUTEX_EXIT(&nextnat->nat_lock);
5542 				t->ipt_data = nextnat;
5543 			} else {
5544 				bzero(&zeronat, sizeof(zeronat));
5545 				nextnat = &zeronat;
5546 				t->ipt_data = NULL;
5547 			}
5548 			break;
5549 		default :
5550 			break;
5551 		}
5552 
5553 		/*
5554 		 * Now that we have ref, it's save to give up lock.
5555 		 */
5556 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5557 
5558 		/*
5559 		 * Copy out data and clean up references and token as needed.
5560 		 */
5561 		switch (itp->igi_type)
5562 		{
5563 		case IPFGENITER_HOSTMAP :
5564 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5565 			if (error != 0)
5566 				error = EFAULT;
5567 			if (t->ipt_data == NULL) {
5568 				ipf_freetoken(t, ifs);
5569 				break;
5570 			} else {
5571 				if (hm != NULL) {
5572 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5573 					fr_hostmapdel(&hm);
5574 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5575 				}
5576 				if (nexthm->hm_next == NULL) {
5577 					ipf_freetoken(t, ifs);
5578 					break;
5579 				}
5580 				dst += sizeof(*nexthm);
5581 				hm = nexthm;
5582 				nexthm = nexthm->hm_next;
5583 			}
5584 			break;
5585 
5586 		case IPFGENITER_IPNAT :
5587 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5588 			if (error != 0)
5589 				error = EFAULT;
5590 			if (t->ipt_data == NULL) {
5591 				ipf_freetoken(t, ifs);
5592 				break;
5593 			} else {
5594 				if (ipn != NULL) {
5595 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5596 					fr_ipnatderef(&ipn, ifs);
5597 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5598 				}
5599 				if (nextipnat->in_next == NULL) {
5600 					ipf_freetoken(t, ifs);
5601 					break;
5602 				}
5603 				dst += sizeof(*nextipnat);
5604 				ipn = nextipnat;
5605 				nextipnat = nextipnat->in_next;
5606 			}
5607 			break;
5608 
5609 		case IPFGENITER_NAT :
5610 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5611 			if (error != 0)
5612 				error = EFAULT;
5613 			if (t->ipt_data == NULL) {
5614 				ipf_freetoken(t, ifs);
5615 				break;
5616 			} else {
5617 				if (nat != NULL)
5618 					fr_natderef(&nat, ifs);
5619 				if (nextnat->nat_next == NULL) {
5620 					ipf_freetoken(t, ifs);
5621 					break;
5622 				}
5623 				dst += sizeof(*nextnat);
5624 				nat = nextnat;
5625 				nextnat = nextnat->nat_next;
5626 			}
5627 			break;
5628 		default :
5629 			break;
5630 		}
5631 
5632 		if ((count == 1) || (error != 0))
5633 			break;
5634 
5635 		READ_ENTER(&ifs->ifs_ipf_nat);
5636 	}
5637 
5638 	return error;
5639 }
5640 
5641 
5642 /* ------------------------------------------------------------------------ */
5643 /* Function:    nat_iterator                                                */
5644 /* Returns:     int - 0 == ok, else error                                   */
5645 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5646 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5647 /*                                                                          */
5648 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5649 /* generic structure to iterate through a list.  There are three different  */
5650 /* linked lists of NAT related information to go through: NAT rules, active */
5651 /* NAT mappings and the NAT fragment cache.                                 */
5652 /* ------------------------------------------------------------------------ */
5653 static int nat_iterator(token, itp, ifs)
5654 ipftoken_t *token;
5655 ipfgeniter_t *itp;
5656 ipf_stack_t *ifs;
5657 {
5658 	int error;
5659 
5660 	if (itp->igi_data == NULL)
5661 		return EFAULT;
5662 
5663 	token->ipt_subtype = itp->igi_type;
5664 
5665 	switch (itp->igi_type)
5666 	{
5667 	case IPFGENITER_HOSTMAP :
5668 	case IPFGENITER_IPNAT :
5669 	case IPFGENITER_NAT :
5670 		error = nat_getnext(token, itp, ifs);
5671 		break;
5672 	case IPFGENITER_NATFRAG :
5673 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5674 				    &ifs->ifs_ipfr_nattail,
5675 				    &ifs->ifs_ipf_natfrag, ifs);
5676 		break;
5677 	default :
5678 		error = EINVAL;
5679 		break;
5680 	}
5681 
5682 	return error;
5683 }
5684 
5685 
5686 /* ---------------------------------------------------------------------- */
5687 /* Function:    nat_flushtable						  */
5688 /* Returns:     int - 0 == success, -1 == failure			  */
5689 /* Parameters:  flush_option - how to flush the active NAT table	  */
5690 /*              ifs - ipf stack instance				  */
5691 /* Write Locks: ipf_nat							  */
5692 /*									  */
5693 /* Flush NAT tables.  Three actions currently defined:                    */
5694 /*									  */
5695 /* FLUSH_TABLE_ALL	: Flush all NAT table entries			  */
5696 /*									  */
5697 /* FLUSH_TABLE_CLOSING	: Flush entries with TCP connections which	  */
5698 /*			  have started to close on both ends using	  */
5699 /*			  ipf_flushclosing().				  */
5700 /*									  */
5701 /* FLUSH_TABLE_EXTRA	: First, flush entries which are "almost" closed. */
5702 /*			  Then, if needed, flush entries with TCP	  */
5703 /*			  connections which have been idle for a long	  */
5704 /*			  time with ipf_extraflush().			  */
5705 /* ---------------------------------------------------------------------- */
5706 static int nat_flushtable(flush_option, ifs)
5707 int flush_option;
5708 ipf_stack_t *ifs;
5709 {
5710         nat_t *nat, *natn;
5711         int removed;
5712         SPL_INT(s);
5713 
5714         removed = 0;
5715 
5716         SPL_NET(s);
5717         switch (flush_option)
5718         {
5719         case FLUSH_TABLE_ALL:
5720 		natn = ifs->ifs_nat_instances;
5721 		while ((nat = natn) != NULL) {
5722 			natn = nat->nat_next;
5723 			if (nat_delete(nat, NL_FLUSH, ifs) == 0)
5724 				removed++;
5725 		}
5726                 break;
5727 
5728         case FLUSH_TABLE_CLOSING:
5729                 removed = ipf_flushclosing(NAT_FLUSH,
5730 					   IPF_TCPS_CLOSE_WAIT,
5731 					   ifs->ifs_nat_tqb,
5732 					   ifs->ifs_nat_utqe,
5733 					   ifs);
5734                 break;
5735 
5736         case FLUSH_TABLE_EXTRA:
5737                 removed = ipf_flushclosing(NAT_FLUSH,
5738 					   IPF_TCPS_FIN_WAIT_2,
5739 					   ifs->ifs_nat_tqb,
5740 					   ifs->ifs_nat_utqe,
5741 					   ifs);
5742 
5743                 /*
5744                  * Be sure we haven't done this in the last 10 seconds.
5745                  */
5746                 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5747                     IPF_TTLVAL(10))
5748                         break;
5749                 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5750                 removed += ipf_extraflush(NAT_FLUSH,
5751 					  &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5752 					  ifs->ifs_nat_utqe,
5753 					  ifs);
5754                 break;
5755 
5756         default: /* Flush Nothing */
5757                 break;
5758         }
5759 
5760         SPL_X(s);
5761         return (removed);
5762 }
5763