xref: /titanic_44/usr/src/uts/common/inet/ipf/ip_nat.c (revision 3441f6a1af86b9b2f883f3323bf02c9dd0f7a94d)
1 /*
2  * Copyright (C) 1995-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 
144 static	int	nat_flushtable __P((ipf_stack_t *));
145 static	int	nat_clearlist __P((ipf_stack_t *));
146 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
148 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
149 static	void	nat_delrdr __P((struct ipnat *));
150 static	void	nat_delnat __P((struct ipnat *));
151 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
152 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
153 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
154 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
155 static	int	nat_match __P((fr_info_t *, ipnat_t *));
156 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
157 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
158 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
159 				    struct in_addr, struct in_addr, u_32_t,
160 				    ipf_stack_t *));
161 static	void	nat_hostmapdel __P((struct hostmap *));
162 static	INLINE	int nat_icmpquerytype4 __P((int));
163 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int,
164 				    ipf_stack_t *));
165 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int,
166 				    ipf_stack_t *));
167 static	INLINE	int nat_icmperrortype4 __P((int));
168 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
169 				      tcphdr_t *, nat_t **, int));
170 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
171 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
172 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
173 static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
174 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
175 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
176 
177 
178 /* ------------------------------------------------------------------------ */
179 /* Function:    fr_natinit                                                  */
180 /* Returns:     int - 0 == success, -1 == failure                           */
181 /* Parameters:  Nil                                                         */
182 /*                                                                          */
183 /* Initialise all of the NAT locks, tables and other structures.            */
184 /* ------------------------------------------------------------------------ */
185 int fr_natinit(ifs)
186 ipf_stack_t *ifs;
187 {
188 	int i;
189 
190 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
191 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
192 	if (ifs->ifs_nat_table[0] != NULL)
193 		bzero((char *)ifs->ifs_nat_table[0],
194 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
195 	else
196 		return -1;
197 
198 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
199 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
200 	if (ifs->ifs_nat_table[1] != NULL)
201 		bzero((char *)ifs->ifs_nat_table[1],
202 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
203 	else
204 		return -2;
205 
206 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
207 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
208 	if (ifs->ifs_nat_rules != NULL)
209 		bzero((char *)ifs->ifs_nat_rules,
210 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
211 	else
212 		return -3;
213 
214 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
215 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
216 	if (ifs->ifs_rdr_rules != NULL)
217 		bzero((char *)ifs->ifs_rdr_rules,
218 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
219 	else
220 		return -4;
221 
222 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
223 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
224 	if (ifs->ifs_maptable != NULL)
225 		bzero((char *)ifs->ifs_maptable,
226 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
227 	else
228 		return -5;
229 
230 	ifs->ifs_ipf_hm_maplist = NULL;
231 
232 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
233 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
234 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
235 		return -1;
236 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
237 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
238 
239 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
240 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
241 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
242 		return -1;
243 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
244 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
245 
246 	if (ifs->ifs_fr_nat_maxbucket == 0) {
247 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
248 			ifs->ifs_fr_nat_maxbucket++;
249 		ifs->ifs_fr_nat_maxbucket *= 2;
250 	}
251 
252 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
253 	/*
254 	 * Increase this because we may have "keep state" following this too
255 	 * and packet storms can occur if this is removed too quickly.
256 	 */
257 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
258 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
259 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
260 	ifs->ifs_nat_udptq.ifq_ref = 1;
261 	ifs->ifs_nat_udptq.ifq_head = NULL;
262 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
263 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
264 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
265 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
266 	ifs->ifs_nat_icmptq.ifq_ref = 1;
267 	ifs->ifs_nat_icmptq.ifq_head = NULL;
268 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
269 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
270 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
271 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
272 	ifs->ifs_nat_iptq.ifq_ref = 1;
273 	ifs->ifs_nat_iptq.ifq_head = NULL;
274 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
275 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
276 	ifs->ifs_nat_iptq.ifq_next = NULL;
277 
278 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
279 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
280 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
281 #ifdef LARGE_NAT
282 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
283 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
284 #endif
285 	}
286 
287 	/*
288 	 * Increase this because we may have "keep state" following
289 	 * this too and packet storms can occur if this is removed
290 	 * too quickly.
291 	 */
292 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
293 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
294 
295 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
296 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
297 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
298 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
299 
300 	ifs->ifs_fr_nat_init = 1;
301 
302 	return 0;
303 }
304 
305 
306 /* ------------------------------------------------------------------------ */
307 /* Function:    nat_addrdr                                                  */
308 /* Returns:     Nil                                                         */
309 /* Parameters:  n(I) - pointer to NAT rule to add                           */
310 /*                                                                          */
311 /* Adds a redirect rule to the hash table of redirect rules and the list of */
312 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
313 /* use by redirect rules.                                                   */
314 /* ------------------------------------------------------------------------ */
315 static void nat_addrdr(n, ifs)
316 ipnat_t *n;
317 ipf_stack_t *ifs;
318 {
319 	ipnat_t **np;
320 	u_32_t j;
321 	u_int hv;
322 	int k;
323 
324 	k = count4bits(n->in_outmsk);
325 	if ((k >= 0) && (k != 32))
326 		ifs->ifs_rdr_masks |= 1 << k;
327 	j = (n->in_outip & n->in_outmsk);
328 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
329 	np = ifs->ifs_rdr_rules + hv;
330 	while (*np != NULL)
331 		np = &(*np)->in_rnext;
332 	n->in_rnext = NULL;
333 	n->in_prnext = np;
334 	n->in_hv = hv;
335 	*np = n;
336 }
337 
338 
339 /* ------------------------------------------------------------------------ */
340 /* Function:    nat_addnat                                                  */
341 /* Returns:     Nil                                                         */
342 /* Parameters:  n(I) - pointer to NAT rule to add                           */
343 /*                                                                          */
344 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
345 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
346 /* redirect rules.                                                          */
347 /* ------------------------------------------------------------------------ */
348 static void nat_addnat(n, ifs)
349 ipnat_t *n;
350 ipf_stack_t *ifs;
351 {
352 	ipnat_t **np;
353 	u_32_t j;
354 	u_int hv;
355 	int k;
356 
357 	k = count4bits(n->in_inmsk);
358 	if ((k >= 0) && (k != 32))
359 		ifs->ifs_nat_masks |= 1 << k;
360 	j = (n->in_inip & n->in_inmsk);
361 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
362 	np = ifs->ifs_nat_rules + hv;
363 	while (*np != NULL)
364 		np = &(*np)->in_mnext;
365 	n->in_mnext = NULL;
366 	n->in_pmnext = np;
367 	n->in_hv = hv;
368 	*np = n;
369 }
370 
371 
372 /* ------------------------------------------------------------------------ */
373 /* Function:    nat_delrdr                                                  */
374 /* Returns:     Nil                                                         */
375 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
376 /*                                                                          */
377 /* Removes a redirect rule from the hash table of redirect rules.           */
378 /* ------------------------------------------------------------------------ */
379 static void nat_delrdr(n)
380 ipnat_t *n;
381 {
382 	if (n->in_rnext)
383 		n->in_rnext->in_prnext = n->in_prnext;
384 	*n->in_prnext = n->in_rnext;
385 }
386 
387 
388 /* ------------------------------------------------------------------------ */
389 /* Function:    nat_delnat                                                  */
390 /* Returns:     Nil                                                         */
391 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
392 /*                                                                          */
393 /* Removes a NAT map rule from the hash table of NAT map rules.             */
394 /* ------------------------------------------------------------------------ */
395 static void nat_delnat(n)
396 ipnat_t *n;
397 {
398 	if (n->in_mnext != NULL)
399 		n->in_mnext->in_pmnext = n->in_pmnext;
400 	*n->in_pmnext = n->in_mnext;
401 }
402 
403 
404 /* ------------------------------------------------------------------------ */
405 /* Function:    nat_hostmap                                                 */
406 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
407 /*                                else a pointer to the hostmapping to use  */
408 /* Parameters:  np(I)   - pointer to NAT rule                               */
409 /*              real(I) - real IP address                                   */
410 /*              map(I)  - mapped IP address                                 */
411 /*              port(I) - destination port number                           */
412 /* Write Locks: ipf_nat                                                     */
413 /*                                                                          */
414 /* Check if an ip address has already been allocated for a given mapping    */
415 /* that is not doing port based translation.  If is not yet allocated, then */
416 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
417 /* ------------------------------------------------------------------------ */
418 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
419 ipnat_t *np;
420 struct in_addr src;
421 struct in_addr dst;
422 struct in_addr map;
423 u_32_t port;
424 ipf_stack_t *ifs;
425 {
426 	hostmap_t *hm;
427 	u_int hv;
428 
429 	hv = (src.s_addr ^ dst.s_addr);
430 	hv += src.s_addr;
431 	hv += dst.s_addr;
432 	hv %= HOSTMAP_SIZE;
433 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
434 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
435 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
436 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
437 		    ((port == 0) || (port == hm->hm_port))) {
438 			hm->hm_ref++;
439 			return hm;
440 		}
441 
442 	if (np == NULL)
443 		return NULL;
444 
445 	KMALLOC(hm, hostmap_t *);
446 	if (hm) {
447 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
448 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
449 		if (ifs->ifs_ipf_hm_maplist != NULL)
450 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
451 		ifs->ifs_ipf_hm_maplist = hm;
452 
453 		hm->hm_next = ifs->ifs_maptable[hv];
454 		hm->hm_pnext = ifs->ifs_maptable + hv;
455 		if (ifs->ifs_maptable[hv] != NULL)
456 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
457 		ifs->ifs_maptable[hv] = hm;
458 		hm->hm_ipnat = np;
459 		hm->hm_srcip = src;
460 		hm->hm_dstip = dst;
461 		hm->hm_mapip = map;
462 		hm->hm_ref = 1;
463 		hm->hm_port = port;
464 	}
465 	return hm;
466 }
467 
468 
469 /* ------------------------------------------------------------------------ */
470 /* Function:    nat_hostmapdel                                              */
471 /* Returns:     Nil                                                         */
472 /* Parameters:  hm(I) - pointer to hostmap structure                        */
473 /* Write Locks: ipf_nat                                                     */
474 /*                                                                          */
475 /* Decrement the references to this hostmap structure by one.  If this      */
476 /* reaches zero then remove it and free it.                                 */
477 /* ------------------------------------------------------------------------ */
478 static void nat_hostmapdel(hm)
479 struct hostmap *hm;
480 {
481 	hm->hm_ref--;
482 	if (hm->hm_ref == 0) {
483 		if (hm->hm_next)
484 			hm->hm_next->hm_pnext = hm->hm_pnext;
485 		*hm->hm_pnext = hm->hm_next;
486 		if (hm->hm_hnext)
487 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
488 		*hm->hm_phnext = hm->hm_hnext;
489 		KFREE(hm);
490 	}
491 }
492 
493 void fr_hostmapderef(hmp)
494 struct hostmap **hmp;
495 {
496 	struct hostmap *hm;
497 
498 	hm = *hmp;
499 	*hmp = NULL;
500 	hm->hm_ref--;
501 	if (hm->hm_ref == 0)
502 		nat_hostmapdel(hm);
503 }
504 
505 
506 /* ------------------------------------------------------------------------ */
507 /* Function:    fix_outcksum                                                */
508 /* Returns:     Nil                                                         */
509 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
510 /*              n((I)  - amount to adjust checksum by                       */
511 /*                                                                          */
512 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
513 /* ------------------------------------------------------------------------ */
514 void fix_outcksum(sp, n)
515 u_short *sp;
516 u_32_t n;
517 {
518 	u_short sumshort;
519 	u_32_t sum1;
520 
521 	if (n == 0)
522 		return;
523 
524 	sum1 = (~ntohs(*sp)) & 0xffff;
525 	sum1 += (n);
526 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
527 	/* Again */
528 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
529 	sumshort = ~(u_short)sum1;
530 	*(sp) = htons(sumshort);
531 }
532 
533 
534 /* ------------------------------------------------------------------------ */
535 /* Function:    fix_incksum                                                 */
536 /* Returns:     Nil                                                         */
537 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
538 /*              n((I)  - amount to adjust checksum by                       */
539 /*                                                                          */
540 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
541 /* ------------------------------------------------------------------------ */
542 void fix_incksum(sp, n)
543 u_short *sp;
544 u_32_t n;
545 {
546 	u_short sumshort;
547 	u_32_t sum1;
548 
549 	if (n == 0)
550 		return;
551 
552 	sum1 = (~ntohs(*sp)) & 0xffff;
553 	sum1 += ~(n) & 0xffff;
554 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
555 	/* Again */
556 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
557 	sumshort = ~(u_short)sum1;
558 	*(sp) = htons(sumshort);
559 }
560 
561 
562 /* ------------------------------------------------------------------------ */
563 /* Function:    fix_datacksum                                               */
564 /* Returns:     Nil                                                         */
565 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
566 /*              n((I)  - amount to adjust checksum by                       */
567 /*                                                                          */
568 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
569 /* data section of an IP packet.                                            */
570 /*                                                                          */
571 /* The only situation in which you need to do this is when NAT'ing an       */
572 /* ICMP error message. Such a message, contains in its body the IP header   */
573 /* of the original IP packet, that causes the error.                        */
574 /*                                                                          */
575 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
576 /* kernel the data section of the ICMP error is just data, and no special   */
577 /* processing like hardware cksum or ntohs processing have been done by the */
578 /* kernel on the data section.                                              */
579 /* ------------------------------------------------------------------------ */
580 void fix_datacksum(sp, n)
581 u_short *sp;
582 u_32_t n;
583 {
584 	u_short sumshort;
585 	u_32_t sum1;
586 
587 	if (n == 0)
588 		return;
589 
590 	sum1 = (~ntohs(*sp)) & 0xffff;
591 	sum1 += (n);
592 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593 	/* Again */
594 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
595 	sumshort = ~(u_short)sum1;
596 	*(sp) = htons(sumshort);
597 }
598 
599 
600 /* ------------------------------------------------------------------------ */
601 /* Function:    fr_nat_ioctl                                                */
602 /* Returns:     int - 0 == success, != 0 == failure                         */
603 /* Parameters:  data(I) - pointer to ioctl data                             */
604 /*              cmd(I)  - ioctl command integer                             */
605 /*              mode(I) - file mode bits used with open                     */
606 /*                                                                          */
607 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
608 /* ------------------------------------------------------------------------ */
609 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
610 ioctlcmd_t cmd;
611 caddr_t data;
612 int mode, uid;
613 void *ctx;
614 ipf_stack_t *ifs;
615 {
616 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
617 	int error = 0, ret, arg, getlock;
618 	ipnat_t natd;
619 
620 #if (BSD >= 199306) && defined(_KERNEL)
621 	if ((securelevel >= 2) && (mode & FWRITE))
622 		return EPERM;
623 #endif
624 
625 #if defined(__osf__) && defined(_KERNEL)
626 	getlock = 0;
627 #else
628 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
629 #endif
630 
631 	nat = NULL;     /* XXX gcc -Wuninitialized */
632 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
633 		KMALLOC(nt, ipnat_t *);
634 	} else {
635 		nt = NULL;
636 	}
637 
638 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
639 		if (mode & NAT_SYSSPACE) {
640 			bcopy(data, (char *)&natd, sizeof(natd));
641 			error = 0;
642 		} else {
643 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
644 		}
645 
646 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
647 		BCOPYIN(data, &arg, sizeof(arg));
648 	}
649 
650 	if (error != 0)
651 		goto done;
652 
653 	/*
654 	 * For add/delete, look to see if the NAT entry is already present
655 	 */
656 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
657 		nat = &natd;
658 		if (nat->in_v == 0)	/* For backward compat. */
659 			nat->in_v = 4;
660 		nat->in_flags &= IPN_USERFLAGS;
661 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
662 			if ((nat->in_flags & IPN_SPLIT) == 0)
663 				nat->in_inip &= nat->in_inmsk;
664 			if ((nat->in_flags & IPN_IPRANGE) == 0)
665 				nat->in_outip &= nat->in_outmsk;
666 		}
667 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
668 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
669 		     np = &n->in_next)
670 			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
671 					IPN_CMPSIZ))
672 				break;
673 	}
674 
675 	switch (cmd)
676 	{
677 	case SIOCGENITER :
678 	    {
679 		ipfgeniter_t iter;
680 		ipftoken_t *token;
681 
682 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
683 		if (error != 0)
684 			break;
685 
686 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
687 		if (token != NULL)
688 			error  = nat_iterator(token, &iter, ifs);
689 		else
690 			error = ESRCH;
691 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
692 		break;
693 	    }
694 #ifdef  IPFILTER_LOG
695 	case SIOCIPFFB :
696 	{
697 		int tmp;
698 
699 		if (!(mode & FWRITE))
700 			error = EPERM;
701 		else {
702 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
703 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
704 		}
705 		break;
706 	}
707 	case SIOCSETLG :
708 		if (!(mode & FWRITE))
709 			error = EPERM;
710 		else {
711 			BCOPYIN((char *)data,
712 				       (char *)&ifs->ifs_nat_logging,
713 				sizeof(ifs->ifs_nat_logging));
714 		}
715 		break;
716 	case SIOCGETLG :
717 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
718 			sizeof(ifs->ifs_nat_logging));
719 		break;
720 	case FIONREAD :
721 		arg = ifs->ifs_iplused[IPL_LOGNAT];
722 		BCOPYOUT(&arg, data, sizeof(arg));
723 		break;
724 #endif
725 	case SIOCADNAT :
726 		if (!(mode & FWRITE)) {
727 			error = EPERM;
728 		} else if (n != NULL) {
729 			error = EEXIST;
730 		} else if (nt == NULL) {
731 			error = ENOMEM;
732 		}
733 		if (error != 0) {
734 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
735 			break;
736 		}
737 		bcopy((char *)nat, (char *)nt, sizeof(*n));
738 		error = nat_siocaddnat(nt, np, getlock, ifs);
739 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
740 		if (error == 0)
741 			nt = NULL;
742 		break;
743 	case SIOCRMNAT :
744 		if (!(mode & FWRITE)) {
745 			error = EPERM;
746 			n = NULL;
747 		} else if (n == NULL) {
748 			error = ESRCH;
749 		}
750 
751 		if (error != 0) {
752 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
753 			break;
754 		}
755 		nat_siocdelnat(n, np, getlock, ifs);
756 
757 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
758 		n = NULL;
759 		break;
760 	case SIOCGNATS :
761 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
762 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
763 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
764 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
765 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
766 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
767 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
768 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
769 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
770 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
771 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
772 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
773 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
774 		break;
775 	case SIOCGNATL :
776 	    {
777 		natlookup_t nl;
778 
779 		if (getlock) {
780 			READ_ENTER(&ifs->ifs_ipf_nat);
781 		}
782 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
783 		if (error == 0) {
784 			if (nat_lookupredir(&nl, ifs) != NULL) {
785 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
786 			} else {
787 				error = ESRCH;
788 			}
789 		}
790 		if (getlock) {
791 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
792 		}
793 		break;
794 	    }
795 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
796 		if (!(mode & FWRITE)) {
797 			error = EPERM;
798 			break;
799 		}
800 		if (getlock) {
801 			WRITE_ENTER(&ifs->ifs_ipf_nat);
802 		}
803 		error = 0;
804 		if (arg == 0)
805 			ret = nat_flushtable(ifs);
806 		else if (arg == 1)
807 			ret = nat_clearlist(ifs);
808 		else
809 			error = EINVAL;
810 		if (getlock) {
811 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
812 		}
813 		if (error == 0) {
814 			BCOPYOUT(&ret, data, sizeof(ret));
815 		}
816 		break;
817 	case SIOCPROXY :
818 		error = appr_ioctl(data, cmd, mode, ifs);
819 		break;
820 	case SIOCSTLCK :
821 		if (!(mode & FWRITE)) {
822 			error = EPERM;
823 		} else {
824 			fr_lock(data, &ifs->ifs_fr_nat_lock);
825 		}
826 		break;
827 	case SIOCSTPUT :
828 		if ((mode & FWRITE) != 0) {
829 			error = fr_natputent(data, getlock, ifs);
830 		} else {
831 			error = EACCES;
832 		}
833 		break;
834 	case SIOCSTGSZ :
835 		if (ifs->ifs_fr_nat_lock) {
836 			if (getlock) {
837 				READ_ENTER(&ifs->ifs_ipf_nat);
838 			}
839 			error = fr_natgetsz(data, ifs);
840 			if (getlock) {
841 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
842 			}
843 		} else
844 			error = EACCES;
845 		break;
846 	case SIOCSTGET :
847 		if (ifs->ifs_fr_nat_lock) {
848 			if (getlock) {
849 				READ_ENTER(&ifs->ifs_ipf_nat);
850 			}
851 			error = fr_natgetent(data, ifs);
852 			if (getlock) {
853 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
854 			}
855 		} else
856 			error = EACCES;
857 		break;
858 	case SIOCIPFDELTOK :
859 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
860 		error = ipf_deltoken(arg, uid, ctx, ifs);
861 		break;
862 	default :
863 		error = EINVAL;
864 		break;
865 	}
866 done:
867 	if (nt)
868 		KFREE(nt);
869 	return error;
870 }
871 
872 
873 /* ------------------------------------------------------------------------ */
874 /* Function:    nat_siocaddnat                                              */
875 /* Returns:     int - 0 == success, != 0 == failure                         */
876 /* Parameters:  n(I)       - pointer to new NAT rule                        */
877 /*              np(I)      - pointer to where to insert new NAT rule        */
878 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
879 /* Mutex Locks: ipf_natio                                                   */
880 /*                                                                          */
881 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
882 /* from information passed to the kernel, then add it  to the appropriate   */
883 /* NAT rule table(s).                                                       */
884 /* ------------------------------------------------------------------------ */
885 static int nat_siocaddnat(n, np, getlock, ifs)
886 ipnat_t *n, **np;
887 int getlock;
888 ipf_stack_t *ifs;
889 {
890 	int error = 0, i, j;
891 
892 	if (nat_resolverule(n, ifs) != 0)
893 		return ENOENT;
894 
895 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
896 		return EINVAL;
897 
898 	n->in_use = 0;
899 	if (n->in_redir & NAT_MAPBLK)
900 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
901 	else if (n->in_flags & IPN_AUTOPORTMAP)
902 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
903 	else if (n->in_flags & IPN_IPRANGE)
904 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
905 	else if (n->in_flags & IPN_SPLIT)
906 		n->in_space = 2;
907 	else if (n->in_outmsk != 0)
908 		n->in_space = ~ntohl(n->in_outmsk);
909 	else
910 		n->in_space = 1;
911 
912 	/*
913 	 * Calculate the number of valid IP addresses in the output
914 	 * mapping range.  In all cases, the range is inclusive of
915 	 * the start and ending IP addresses.
916 	 * If to a CIDR address, lose 2: broadcast + network address
917 	 *                               (so subtract 1)
918 	 * If to a range, add one.
919 	 * If to a single IP address, set to 1.
920 	 */
921 	if (n->in_space) {
922 		if ((n->in_flags & IPN_IPRANGE) != 0)
923 			n->in_space += 1;
924 		else
925 			n->in_space -= 1;
926 	} else
927 		n->in_space = 1;
928 
929 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
930 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
931 		n->in_nip = ntohl(n->in_outip) + 1;
932 	else if ((n->in_flags & IPN_SPLIT) &&
933 		 (n->in_redir & NAT_REDIRECT))
934 		n->in_nip = ntohl(n->in_inip);
935 	else
936 		n->in_nip = ntohl(n->in_outip);
937 	if (n->in_redir & NAT_MAP) {
938 		n->in_pnext = ntohs(n->in_pmin);
939 		/*
940 		 * Multiply by the number of ports made available.
941 		 */
942 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
943 			n->in_space *= (ntohs(n->in_pmax) -
944 					ntohs(n->in_pmin) + 1);
945 			/*
946 			 * Because two different sources can map to
947 			 * different destinations but use the same
948 			 * local IP#/port #.
949 			 * If the result is smaller than in_space, then
950 			 * we may have wrapped around 32bits.
951 			 */
952 			i = n->in_inmsk;
953 			if ((i != 0) && (i != 0xffffffff)) {
954 				j = n->in_space * (~ntohl(i) + 1);
955 				if (j >= n->in_space)
956 					n->in_space = j;
957 				else
958 					n->in_space = 0xffffffff;
959 			}
960 		}
961 		/*
962 		 * If no protocol is specified, multiple by 256 to allow for
963 		 * at least one IP:IP mapping per protocol.
964 		 */
965 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
966 				j = n->in_space * 256;
967 				if (j >= n->in_space)
968 					n->in_space = j;
969 				else
970 					n->in_space = 0xffffffff;
971 		}
972 	}
973 
974 	/* Otherwise, these fields are preset */
975 
976 	if (getlock) {
977 		WRITE_ENTER(&ifs->ifs_ipf_nat);
978 	}
979 	n->in_next = NULL;
980 	*np = n;
981 
982 	if (n->in_age[0] != 0)
983 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
984 						  n->in_age[0], ifs);
985 
986 	if (n->in_age[1] != 0)
987 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
988 						  n->in_age[1], ifs);
989 
990 	if (n->in_redir & NAT_REDIRECT) {
991 		n->in_flags &= ~IPN_NOTDST;
992 		nat_addrdr(n, ifs);
993 	}
994 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
995 		n->in_flags &= ~IPN_NOTSRC;
996 		nat_addnat(n, ifs);
997 	}
998 	n = NULL;
999 	ifs->ifs_nat_stats.ns_rules++;
1000 	if (getlock) {
1001 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1002 	}
1003 
1004 	return error;
1005 }
1006 
1007 
1008 /* ------------------------------------------------------------------------ */
1009 /* Function:    nat_resolvrule                                              */
1010 /* Returns:     int - 0 == success, -1 == failure                           */
1011 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1012 /*                                                                          */
1013 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1014 /* any specified interfaces and proxy labels, and determines whether or not */
1015 /* all proxy labels are correctly specified.				    */
1016 /*									    */
1017 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1018 /* ------------------------------------------------------------------------ */
1019 static int nat_resolverule(n, ifs)
1020 ipnat_t *n;
1021 ipf_stack_t *ifs;
1022 {
1023 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1024 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs);
1025 
1026 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1027 	if (n->in_ifnames[1][0] == '\0') {
1028 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1029 		n->in_ifps[1] = n->in_ifps[0];
1030 	} else {
1031 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs);
1032 	}
1033 
1034 	if (n->in_plabel[0] != '\0') {
1035 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1036 		if (n->in_apr == NULL)
1037 			return -1;
1038 	}
1039 	return 0;
1040 }
1041 
1042 
1043 /* ------------------------------------------------------------------------ */
1044 /* Function:    nat_siocdelnat                                              */
1045 /* Returns:     int - 0 == success, != 0 == failure                         */
1046 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1047 /*              np(I)      - pointer to where to insert new NAT rule        */
1048 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1049 /* Mutex Locks: ipf_natio                                                   */
1050 /*                                                                          */
1051 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1052 /* from information passed to the kernel, then add it  to the appropriate   */
1053 /* NAT rule table(s).                                                       */
1054 /* ------------------------------------------------------------------------ */
1055 static void nat_siocdelnat(n, np, getlock, ifs)
1056 ipnat_t *n, **np;
1057 int getlock;
1058 ipf_stack_t *ifs;
1059 {
1060 	if (getlock) {
1061 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1062 	}
1063 	if (n->in_redir & NAT_REDIRECT)
1064 		nat_delrdr(n);
1065 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1066 		nat_delnat(n);
1067 	if (ifs->ifs_nat_list == NULL) {
1068 		ifs->ifs_nat_masks = 0;
1069 		ifs->ifs_rdr_masks = 0;
1070 	}
1071 
1072 	if (n->in_tqehead[0] != NULL) {
1073 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1074 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1075 		}
1076 	}
1077 
1078 	if (n->in_tqehead[1] != NULL) {
1079 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1080 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1081 		}
1082 	}
1083 
1084 	*np = n->in_next;
1085 
1086 	if (n->in_use == 0) {
1087 		if (n->in_apr)
1088 			appr_free(n->in_apr);
1089 		KFREE(n);
1090 		ifs->ifs_nat_stats.ns_rules--;
1091 	} else {
1092 		n->in_flags |= IPN_DELETE;
1093 		n->in_next = NULL;
1094 	}
1095 	if (getlock) {
1096 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1097 	}
1098 }
1099 
1100 
1101 /* ------------------------------------------------------------------------ */
1102 /* Function:    fr_natgetsz                                                 */
1103 /* Returns:     int - 0 == success, != 0 is the error value.                */
1104 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1105 /*                        get the size of.                                  */
1106 /*                                                                          */
1107 /* Handle SIOCSTGSZ.                                                        */
1108 /* Return the size of the nat list entry to be copied back to user space.   */
1109 /* The size of the entry is stored in the ng_sz field and the enture natget */
1110 /* structure is copied back to the user.                                    */
1111 /* ------------------------------------------------------------------------ */
1112 static int fr_natgetsz(data, ifs)
1113 caddr_t data;
1114 ipf_stack_t *ifs;
1115 {
1116 	ap_session_t *aps;
1117 	nat_t *nat, *n;
1118 	natget_t ng;
1119 
1120 	BCOPYIN(data, &ng, sizeof(ng));
1121 
1122 	nat = ng.ng_ptr;
1123 	if (!nat) {
1124 		nat = ifs->ifs_nat_instances;
1125 		ng.ng_sz = 0;
1126 		/*
1127 		 * Empty list so the size returned is 0.  Simple.
1128 		 */
1129 		if (nat == NULL) {
1130 			BCOPYOUT(&ng, data, sizeof(ng));
1131 			return 0;
1132 		}
1133 	} else {
1134 		/*
1135 		 * Make sure the pointer we're copying from exists in the
1136 		 * current list of entries.  Security precaution to prevent
1137 		 * copying of random kernel data.
1138 		 */
1139 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1140 			if (n == nat)
1141 				break;
1142 		if (!n)
1143 			return ESRCH;
1144 	}
1145 
1146 	/*
1147 	 * Incluse any space required for proxy data structures.
1148 	 */
1149 	ng.ng_sz = sizeof(nat_save_t);
1150 	aps = nat->nat_aps;
1151 	if (aps != NULL) {
1152 		ng.ng_sz += sizeof(ap_session_t) - 4;
1153 		if (aps->aps_data != 0)
1154 			ng.ng_sz += aps->aps_psiz;
1155 	}
1156 
1157 	BCOPYOUT(&ng, data, sizeof(ng));
1158 	return 0;
1159 }
1160 
1161 
1162 /* ------------------------------------------------------------------------ */
1163 /* Function:    fr_natgetent                                                */
1164 /* Returns:     int - 0 == success, != 0 is the error value.                */
1165 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1166 /*                        to NAT structure to copy out.                     */
1167 /*                                                                          */
1168 /* Handle SIOCSTGET.                                                        */
1169 /* Copies out NAT entry to user space.  Any additional data held for a      */
1170 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1171 /* ------------------------------------------------------------------------ */
1172 static int fr_natgetent(data, ifs)
1173 caddr_t data;
1174 ipf_stack_t *ifs;
1175 {
1176 	int error, outsize;
1177 	ap_session_t *aps;
1178 	nat_save_t *ipn, ipns;
1179 	nat_t *n, *nat;
1180 
1181 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1182 	if (error != 0)
1183 		return error;
1184 
1185 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1186 		return EINVAL;
1187 
1188 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1189 	if (ipn == NULL)
1190 		return ENOMEM;
1191 
1192 	ipn->ipn_dsize = ipns.ipn_dsize;
1193 	nat = ipns.ipn_next;
1194 	if (nat == NULL) {
1195 		nat = ifs->ifs_nat_instances;
1196 		if (nat == NULL) {
1197 			if (ifs->ifs_nat_instances == NULL)
1198 				error = ENOENT;
1199 			goto finished;
1200 		}
1201 	} else {
1202 		/*
1203 		 * Make sure the pointer we're copying from exists in the
1204 		 * current list of entries.  Security precaution to prevent
1205 		 * copying of random kernel data.
1206 		 */
1207 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1208 			if (n == nat)
1209 				break;
1210 		if (n == NULL) {
1211 			error = ESRCH;
1212 			goto finished;
1213 		}
1214 	}
1215 	ipn->ipn_next = nat->nat_next;
1216 
1217 	/*
1218 	 * Copy the NAT structure.
1219 	 */
1220 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1221 
1222 	/*
1223 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1224 	 */
1225 	if (nat->nat_ptr != NULL)
1226 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1227 		      sizeof(ipn->ipn_ipnat));
1228 
1229 	/*
1230 	 * If we also know the NAT entry has an associated filter rule,
1231 	 * save that too.
1232 	 */
1233 	if (nat->nat_fr != NULL)
1234 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1235 		      sizeof(ipn->ipn_fr));
1236 
1237 	/*
1238 	 * Last but not least, if there is an application proxy session set
1239 	 * up for this NAT entry, then copy that out too, including any
1240 	 * private data saved along side it by the proxy.
1241 	 */
1242 	aps = nat->nat_aps;
1243 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1244 	if (aps != NULL) {
1245 		char *s;
1246 
1247 		if (outsize < sizeof(*aps)) {
1248 			error = ENOBUFS;
1249 			goto finished;
1250 		}
1251 
1252 		s = ipn->ipn_data;
1253 		bcopy((char *)aps, s, sizeof(*aps));
1254 		s += sizeof(*aps);
1255 		outsize -= sizeof(*aps);
1256 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1257 			bcopy(aps->aps_data, s, aps->aps_psiz);
1258 		else
1259 			error = ENOBUFS;
1260 	}
1261 	if (error == 0) {
1262 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1263 	}
1264 
1265 finished:
1266 	if (ipn != NULL) {
1267 		KFREES(ipn, ipns.ipn_dsize);
1268 	}
1269 	return error;
1270 }
1271 
1272 
1273 /* ------------------------------------------------------------------------ */
1274 /* Function:    fr_natputent                                                */
1275 /* Returns:     int - 0 == success, != 0 is the error value.                */
1276 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1277 /*                            structure information to load into the kernel */
1278 /*              getlock(I) - flag indicating whether or not a write lock    */
1279 /*                           on ipf_nat is already held.                    */
1280 /*                                                                          */
1281 /* Handle SIOCSTPUT.                                                        */
1282 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1283 /* firewall rule data structures, if pointers to them indicate so.          */
1284 /* ------------------------------------------------------------------------ */
1285 static int fr_natputent(data, getlock, ifs)
1286 caddr_t data;
1287 int getlock;
1288 ipf_stack_t *ifs;
1289 {
1290 	nat_save_t ipn, *ipnn;
1291 	ap_session_t *aps;
1292 	nat_t *n, *nat;
1293 	frentry_t *fr;
1294 	fr_info_t fin;
1295 	ipnat_t *in;
1296 	int error;
1297 
1298 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1299 	if (error != 0)
1300 		return error;
1301 
1302 	/*
1303 	 * Initialise early because of code at junkput label.
1304 	 */
1305 	in = NULL;
1306 	aps = NULL;
1307 	nat = NULL;
1308 	ipnn = NULL;
1309 
1310 	/*
1311 	 * New entry, copy in the rest of the NAT entry if it's size is more
1312 	 * than just the nat_t structure.
1313 	 */
1314 	fr = NULL;
1315 	if (ipn.ipn_dsize > sizeof(ipn)) {
1316 		if (ipn.ipn_dsize > 81920) {
1317 			error = ENOMEM;
1318 			goto junkput;
1319 		}
1320 
1321 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1322 		if (ipnn == NULL)
1323 			return ENOMEM;
1324 
1325 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1326 		if (error != 0) {
1327 			error = EFAULT;
1328 			goto junkput;
1329 		}
1330 	} else
1331 		ipnn = &ipn;
1332 
1333 	KMALLOC(nat, nat_t *);
1334 	if (nat == NULL) {
1335 		error = ENOMEM;
1336 		goto junkput;
1337 	}
1338 
1339 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1340 	/*
1341 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1342 	 */
1343 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1344 	nat->nat_tqe.tqe_pnext = NULL;
1345 	nat->nat_tqe.tqe_next = NULL;
1346 	nat->nat_tqe.tqe_ifq = NULL;
1347 	nat->nat_tqe.tqe_parent = nat;
1348 
1349 	/*
1350 	 * Restore the rule associated with this nat session
1351 	 */
1352 	in = ipnn->ipn_nat.nat_ptr;
1353 	if (in != NULL) {
1354 		KMALLOC(in, ipnat_t *);
1355 		nat->nat_ptr = in;
1356 		if (in == NULL) {
1357 			error = ENOMEM;
1358 			goto junkput;
1359 		}
1360 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1361 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1362 		in->in_use = 1;
1363 		in->in_flags |= IPN_DELETE;
1364 
1365 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1366 
1367 		if (nat_resolverule(in, ifs) != 0) {
1368 			error = ESRCH;
1369 			goto junkput;
1370 		}
1371 	}
1372 
1373 	/*
1374 	 * Check that the NAT entry doesn't already exist in the kernel.
1375 	 */
1376 	bzero((char *)&fin, sizeof(fin));
1377 	fin.fin_p = nat->nat_p;
1378 	fin.fin_ifs = ifs;
1379 	if (nat->nat_dir == NAT_OUTBOUND) {
1380 		fin.fin_data[0] = ntohs(nat->nat_oport);
1381 		fin.fin_data[1] = ntohs(nat->nat_outport);
1382 		fin.fin_ifp = nat->nat_ifps[0];
1383 		if (getlock) {
1384 			READ_ENTER(&ifs->ifs_ipf_nat);
1385 		}
1386 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1387 			nat->nat_oip, nat->nat_outip);
1388 		if (getlock) {
1389 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1390 		}
1391 		if (n != NULL) {
1392 			error = EEXIST;
1393 			goto junkput;
1394 		}
1395 	} else if (nat->nat_dir == NAT_INBOUND) {
1396 		fin.fin_data[0] = ntohs(nat->nat_inport);
1397 		fin.fin_data[1] = ntohs(nat->nat_oport);
1398 		fin.fin_ifp = nat->nat_ifps[1];
1399 		if (getlock) {
1400 			READ_ENTER(&ifs->ifs_ipf_nat);
1401 		}
1402 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1403 			nat->nat_inip, nat->nat_oip);
1404 		if (getlock) {
1405 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1406 		}
1407 		if (n != NULL) {
1408 			error = EEXIST;
1409 			goto junkput;
1410 		}
1411 	} else {
1412 		error = EINVAL;
1413 		goto junkput;
1414 	}
1415 
1416 	/*
1417 	 * Restore ap_session_t structure.  Include the private data allocated
1418 	 * if it was there.
1419 	 */
1420 	aps = nat->nat_aps;
1421 	if (aps != NULL) {
1422 		KMALLOC(aps, ap_session_t *);
1423 		nat->nat_aps = aps;
1424 		if (aps == NULL) {
1425 			error = ENOMEM;
1426 			goto junkput;
1427 		}
1428 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1429 		if (in != NULL)
1430 			aps->aps_apr = in->in_apr;
1431 		else
1432 			aps->aps_apr = NULL;
1433 		if (aps->aps_psiz != 0) {
1434 			if (aps->aps_psiz > 81920) {
1435 				error = ENOMEM;
1436 				goto junkput;
1437 			}
1438 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1439 			if (aps->aps_data == NULL) {
1440 				error = ENOMEM;
1441 				goto junkput;
1442 			}
1443 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1444 			      aps->aps_psiz);
1445 		} else {
1446 			aps->aps_psiz = 0;
1447 			aps->aps_data = NULL;
1448 		}
1449 	}
1450 
1451 	/*
1452 	 * If there was a filtering rule associated with this entry then
1453 	 * build up a new one.
1454 	 */
1455 	fr = nat->nat_fr;
1456 	if (fr != NULL) {
1457 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1458 			KMALLOC(fr, frentry_t *);
1459 			nat->nat_fr = fr;
1460 			if (fr == NULL) {
1461 				error = ENOMEM;
1462 				goto junkput;
1463 			}
1464 			ipnn->ipn_nat.nat_fr = fr;
1465 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1466 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1467 
1468 			fr->fr_ref = 1;
1469 			fr->fr_dsize = 0;
1470 			fr->fr_data = NULL;
1471 			fr->fr_type = FR_T_NONE;
1472 
1473 			MUTEX_NUKE(&fr->fr_lock);
1474 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1475 		} else {
1476 			if (getlock) {
1477 				READ_ENTER(&ifs->ifs_ipf_nat);
1478 			}
1479 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1480 				if (n->nat_fr == fr)
1481 					break;
1482 
1483 			if (n != NULL) {
1484 				MUTEX_ENTER(&fr->fr_lock);
1485 				fr->fr_ref++;
1486 				MUTEX_EXIT(&fr->fr_lock);
1487 			}
1488 			if (getlock) {
1489 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1490 			}
1491 			if (!n) {
1492 				error = ESRCH;
1493 				goto junkput;
1494 			}
1495 		}
1496 	}
1497 
1498 	if (ipnn != &ipn) {
1499 		KFREES(ipnn, ipn.ipn_dsize);
1500 		ipnn = NULL;
1501 	}
1502 
1503 	if (getlock) {
1504 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1505 	}
1506 	error = nat_insert(nat, nat->nat_rev, ifs);
1507 	if ((error == 0) && (aps != NULL)) {
1508 		aps->aps_next = ifs->ifs_ap_sess_list;
1509 		ifs->ifs_ap_sess_list = aps;
1510 	}
1511 	if (getlock) {
1512 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1513 	}
1514 
1515 	if (error == 0)
1516 		return 0;
1517 
1518 	error = ENOMEM;
1519 
1520 junkput:
1521 	if (fr != NULL)
1522 		(void) fr_derefrule(&fr, ifs);
1523 
1524 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1525 		KFREES(ipnn, ipn.ipn_dsize);
1526 	}
1527 	if (nat != NULL) {
1528 		if (aps != NULL) {
1529 			if (aps->aps_data != NULL) {
1530 				KFREES(aps->aps_data, aps->aps_psiz);
1531 			}
1532 			KFREE(aps);
1533 		}
1534 		if (in != NULL) {
1535 			if (in->in_apr)
1536 				appr_free(in->in_apr);
1537 			KFREE(in);
1538 		}
1539 		KFREE(nat);
1540 	}
1541 	return error;
1542 }
1543 
1544 
1545 /* ------------------------------------------------------------------------ */
1546 /* Function:    nat_delete                                                  */
1547 /* Returns:     Nil                                                         */
1548 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1549 /*              logtype(I) - type of LOG record to create before deleting   */
1550 /* Write Lock:  ipf_nat                                                     */
1551 /*                                                                          */
1552 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1553 /* enabled then generate a NAT log record for this event.                   */
1554 /* ------------------------------------------------------------------------ */
1555 static void nat_delete(nat, logtype, ifs)
1556 struct nat *nat;
1557 int logtype;
1558 ipf_stack_t *ifs;
1559 {
1560 	struct ipnat *ipn;
1561 
1562 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1563 		nat_log(nat, logtype, ifs);
1564 
1565 	MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
1566 
1567 	/*
1568 	 * Take it as a general indication that all the pointers are set if
1569 	 * nat_pnext is set.
1570 	 */
1571 	if (nat->nat_pnext != NULL) {
1572 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1573 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1574 
1575 		*nat->nat_pnext = nat->nat_next;
1576 		if (nat->nat_next != NULL) {
1577 			nat->nat_next->nat_pnext = nat->nat_pnext;
1578 			nat->nat_next = NULL;
1579 		}
1580 		nat->nat_pnext = NULL;
1581 
1582 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1583 		if (nat->nat_hnext[0] != NULL) {
1584 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1585 			nat->nat_hnext[0] = NULL;
1586 		}
1587 		nat->nat_phnext[0] = NULL;
1588 
1589 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1590 		if (nat->nat_hnext[1] != NULL) {
1591 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1592 			nat->nat_hnext[1] = NULL;
1593 		}
1594 		nat->nat_phnext[1] = NULL;
1595 
1596 		if ((nat->nat_flags & SI_WILDP) != 0)
1597 			ifs->ifs_nat_stats.ns_wilds--;
1598 	}
1599 
1600 	if (nat->nat_me != NULL) {
1601 		*nat->nat_me = NULL;
1602 		nat->nat_me = NULL;
1603 	}
1604 
1605 	fr_deletequeueentry(&nat->nat_tqe);
1606 
1607 	nat->nat_ref--;
1608 	if (nat->nat_ref > 0) {
1609 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
1610 		return;
1611 	}
1612 
1613 #ifdef	IPFILTER_SYNC
1614 	if (nat->nat_sync)
1615 		ipfsync_del(nat->nat_sync);
1616 #endif
1617 
1618 	if (nat->nat_fr != NULL)
1619 		(void)fr_derefrule(&nat->nat_fr, ifs);
1620 
1621 	if (nat->nat_hm != NULL)
1622 		nat_hostmapdel(nat->nat_hm);
1623 
1624 	/*
1625 	 * If there is an active reference from the nat entry to its parent
1626 	 * rule, decrement the rule's reference count and free it too if no
1627 	 * longer being used.
1628 	 */
1629 	ipn = nat->nat_ptr;
1630 	if (ipn != NULL) {
1631 		ipn->in_space++;
1632 		ipn->in_use--;
1633 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1634 			if (ipn->in_apr)
1635 				appr_free(ipn->in_apr);
1636 			KFREE(ipn);
1637 			ifs->ifs_nat_stats.ns_rules--;
1638 		}
1639 	}
1640 
1641 	MUTEX_DESTROY(&nat->nat_lock);
1642 
1643 	aps_free(nat->nat_aps, ifs);
1644 	ifs->ifs_nat_stats.ns_inuse--;
1645 	MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
1646 
1647 	/*
1648 	 * If there's a fragment table entry too for this nat entry, then
1649 	 * dereference that as well.  This is after nat_lock is released
1650 	 * because of Tru64.
1651 	 */
1652 	fr_forgetnat((void *)nat, ifs);
1653 
1654 	KFREE(nat);
1655 }
1656 
1657 
1658 /* ------------------------------------------------------------------------ */
1659 /* Function:    nat_flushtable                                              */
1660 /* Returns:     int - number of NAT rules deleted                           */
1661 /* Parameters:  Nil                                                         */
1662 /*                                                                          */
1663 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1664 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1665 /* ------------------------------------------------------------------------ */
1666 /*
1667  * nat_flushtable - clear the NAT table of all mapping entries.
1668  */
1669 static int nat_flushtable(ifs)
1670 ipf_stack_t *ifs;
1671 {
1672 	nat_t *nat;
1673 	int j = 0;
1674 
1675 	/*
1676 	 * ALL NAT mappings deleted, so lets just make the deletions
1677 	 * quicker.
1678 	 */
1679 	if (ifs->ifs_nat_table[0] != NULL)
1680 		bzero((char *)ifs->ifs_nat_table[0],
1681 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
1682 	if (ifs->ifs_nat_table[1] != NULL)
1683 		bzero((char *)ifs->ifs_nat_table[1],
1684 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
1685 
1686 	while ((nat = ifs->ifs_nat_instances) != NULL) {
1687 		nat_delete(nat, NL_FLUSH, ifs);
1688 		j++;
1689 	}
1690 
1691 	ifs->ifs_nat_stats.ns_inuse = 0;
1692 	return j;
1693 }
1694 
1695 
1696 /* ------------------------------------------------------------------------ */
1697 /* Function:    nat_clearlist                                               */
1698 /* Returns:     int - number of NAT/RDR rules deleted                       */
1699 /* Parameters:  Nil                                                         */
1700 /*                                                                          */
1701 /* Delete all rules in the current list of rules.  There is nothing elegant */
1702 /* about this cleanup: simply free all entries on the list of rules and     */
1703 /* clear out the tables used for hashed NAT rule lookups.                   */
1704 /* ------------------------------------------------------------------------ */
1705 static int nat_clearlist(ifs)
1706 ipf_stack_t *ifs;
1707 {
1708 	ipnat_t *n, **np = &ifs->ifs_nat_list;
1709 	int i = 0;
1710 
1711 	if (ifs->ifs_nat_rules != NULL)
1712 		bzero((char *)ifs->ifs_nat_rules,
1713 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
1714 	if (ifs->ifs_rdr_rules != NULL)
1715 		bzero((char *)ifs->ifs_rdr_rules,
1716 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
1717 
1718 	while ((n = *np) != NULL) {
1719 		*np = n->in_next;
1720 		if (n->in_use == 0) {
1721 			if (n->in_apr != NULL)
1722 				appr_free(n->in_apr);
1723 			KFREE(n);
1724 			ifs->ifs_nat_stats.ns_rules--;
1725 		} else {
1726 			n->in_flags |= IPN_DELETE;
1727 			n->in_next = NULL;
1728 		}
1729 		i++;
1730 	}
1731 	ifs->ifs_nat_masks = 0;
1732 	ifs->ifs_rdr_masks = 0;
1733 	return i;
1734 }
1735 
1736 
1737 /* ------------------------------------------------------------------------ */
1738 /* Function:    nat_newmap                                                  */
1739 /* Returns:     int - -1 == error, 0 == success                             */
1740 /* Parameters:  fin(I) - pointer to packet information                      */
1741 /*              nat(I) - pointer to NAT entry                               */
1742 /*              ni(I)  - pointer to structure with misc. information needed */
1743 /*                       to create new NAT entry.                           */
1744 /*                                                                          */
1745 /* Given an empty NAT structure, populate it with new information about a   */
1746 /* new NAT session, as defined by the matching NAT rule.                    */
1747 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1748 /* to the new IP address for the translation.                               */
1749 /* ------------------------------------------------------------------------ */
1750 static INLINE int nat_newmap(fin, nat, ni)
1751 fr_info_t *fin;
1752 nat_t *nat;
1753 natinfo_t *ni;
1754 {
1755 	u_short st_port, dport, sport, port, sp, dp;
1756 	struct in_addr in, inb;
1757 	hostmap_t *hm;
1758 	u_32_t flags;
1759 	u_32_t st_ip;
1760 	ipnat_t *np;
1761 	nat_t *natl;
1762 	int l;
1763 	ipf_stack_t *ifs = fin->fin_ifs;
1764 
1765 	/*
1766 	 * If it's an outbound packet which doesn't match any existing
1767 	 * record, then create a new port
1768 	 */
1769 	l = 0;
1770 	hm = NULL;
1771 	np = ni->nai_np;
1772 	st_ip = np->in_nip;
1773 	st_port = np->in_pnext;
1774 	flags = ni->nai_flags;
1775 	sport = ni->nai_sport;
1776 	dport = ni->nai_dport;
1777 
1778 	/*
1779 	 * Do a loop until we either run out of entries to try or we find
1780 	 * a NAT mapping that isn't currently being used.  This is done
1781 	 * because the change to the source is not (usually) being fixed.
1782 	 */
1783 	do {
1784 		port = 0;
1785 		in.s_addr = htonl(np->in_nip);
1786 		if (l == 0) {
1787 			/*
1788 			 * Check to see if there is an existing NAT
1789 			 * setup for this IP address pair.
1790 			 */
1791 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1792 					 in, 0, ifs);
1793 			if (hm != NULL)
1794 				in.s_addr = hm->hm_mapip.s_addr;
1795 		} else if ((l == 1) && (hm != NULL)) {
1796 			nat_hostmapdel(hm);
1797 			hm = NULL;
1798 		}
1799 		in.s_addr = ntohl(in.s_addr);
1800 
1801 		nat->nat_hm = hm;
1802 
1803 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1804 			if (l > 0)
1805 				return -1;
1806 		}
1807 
1808 		if (np->in_redir == NAT_BIMAP &&
1809 		    np->in_inmsk == np->in_outmsk) {
1810 			/*
1811 			 * map the address block in a 1:1 fashion
1812 			 */
1813 			in.s_addr = np->in_outip;
1814 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1815 			in.s_addr = ntohl(in.s_addr);
1816 
1817 		} else if (np->in_redir & NAT_MAPBLK) {
1818 			if ((l >= np->in_ppip) || ((l > 0) &&
1819 			     !(flags & IPN_TCPUDP)))
1820 				return -1;
1821 			/*
1822 			 * map-block - Calculate destination address.
1823 			 */
1824 			in.s_addr = ntohl(fin->fin_saddr);
1825 			in.s_addr &= ntohl(~np->in_inmsk);
1826 			inb.s_addr = in.s_addr;
1827 			in.s_addr /= np->in_ippip;
1828 			in.s_addr &= ntohl(~np->in_outmsk);
1829 			in.s_addr += ntohl(np->in_outip);
1830 			/*
1831 			 * Calculate destination port.
1832 			 */
1833 			if ((flags & IPN_TCPUDP) &&
1834 			    (np->in_ppip != 0)) {
1835 				port = ntohs(sport) + l;
1836 				port %= np->in_ppip;
1837 				port += np->in_ppip *
1838 					(inb.s_addr % np->in_ippip);
1839 				port += MAPBLK_MINPORT;
1840 				port = htons(port);
1841 			}
1842 
1843 		} else if ((np->in_outip == 0) &&
1844 			   (np->in_outmsk == 0xffffffff)) {
1845 			/*
1846 			 * 0/32 - use the interface's IP address.
1847 			 */
1848 			if ((l > 0) ||
1849 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1850 				       &in, NULL, fin->fin_ifs) == -1)
1851 				return -1;
1852 			in.s_addr = ntohl(in.s_addr);
1853 
1854 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1855 			/*
1856 			 * 0/0 - use the original source address/port.
1857 			 */
1858 			if (l > 0)
1859 				return -1;
1860 			in.s_addr = ntohl(fin->fin_saddr);
1861 
1862 		} else if ((np->in_outmsk != 0xffffffff) &&
1863 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1864 			np->in_nip++;
1865 
1866 		natl = NULL;
1867 
1868 		if ((flags & IPN_TCPUDP) &&
1869 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
1870 		    (np->in_flags & IPN_AUTOPORTMAP)) {
1871 			/*
1872 			 * "ports auto" (without map-block)
1873 			 */
1874 			if ((l > 0) && (l % np->in_ppip == 0)) {
1875 				if (l > np->in_space) {
1876 					return -1;
1877 				} else if ((l > np->in_ppip) &&
1878 					   np->in_outmsk != 0xffffffff)
1879 					np->in_nip++;
1880 			}
1881 			if (np->in_ppip != 0) {
1882 				port = ntohs(sport);
1883 				port += (l % np->in_ppip);
1884 				port %= np->in_ppip;
1885 				port += np->in_ppip *
1886 					(ntohl(fin->fin_saddr) %
1887 					 np->in_ippip);
1888 				port += MAPBLK_MINPORT;
1889 				port = htons(port);
1890 			}
1891 
1892 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1893 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
1894 			/*
1895 			 * Standard port translation.  Select next port.
1896 			 */
1897 			port = htons(np->in_pnext++);
1898 
1899 			if (np->in_pnext > ntohs(np->in_pmax)) {
1900 				np->in_pnext = ntohs(np->in_pmin);
1901 				if (np->in_outmsk != 0xffffffff)
1902 					np->in_nip++;
1903 			}
1904 		}
1905 
1906 		if (np->in_flags & IPN_IPRANGE) {
1907 			if (np->in_nip > ntohl(np->in_outmsk))
1908 				np->in_nip = ntohl(np->in_outip);
1909 		} else {
1910 			if ((np->in_outmsk != 0xffffffff) &&
1911 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1912 			    ntohl(np->in_outip))
1913 				np->in_nip = ntohl(np->in_outip) + 1;
1914 		}
1915 
1916 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
1917 			port = sport;
1918 
1919 		/*
1920 		 * Here we do a lookup of the connection as seen from
1921 		 * the outside.  If an IP# pair already exists, try
1922 		 * again.  So if you have A->B becomes C->B, you can
1923 		 * also have D->E become C->E but not D->B causing
1924 		 * another C->B.  Also take protocol and ports into
1925 		 * account when determining whether a pre-existing
1926 		 * NAT setup will cause an external conflict where
1927 		 * this is appropriate.
1928 		 */
1929 		inb.s_addr = htonl(in.s_addr);
1930 		sp = fin->fin_data[0];
1931 		dp = fin->fin_data[1];
1932 		fin->fin_data[0] = fin->fin_data[1];
1933 		fin->fin_data[1] = htons(port);
1934 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
1935 				    (u_int)fin->fin_p, fin->fin_dst, inb);
1936 		fin->fin_data[0] = sp;
1937 		fin->fin_data[1] = dp;
1938 
1939 		/*
1940 		 * Has the search wrapped around and come back to the
1941 		 * start ?
1942 		 */
1943 		if ((natl != NULL) &&
1944 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1945 		    (np->in_nip != 0) && (st_ip == np->in_nip))
1946 			return -1;
1947 		l++;
1948 	} while (natl != NULL);
1949 
1950 	if (np->in_space > 0)
1951 		np->in_space--;
1952 
1953 	/* Setup the NAT table */
1954 	nat->nat_inip = fin->fin_src;
1955 	nat->nat_outip.s_addr = htonl(in.s_addr);
1956 	nat->nat_oip = fin->fin_dst;
1957 	if (nat->nat_hm == NULL)
1958 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1959 					  nat->nat_outip, 0, ifs);
1960 
1961 	/*
1962 	 * The ICMP checksum does not have a pseudo header containing
1963 	 * the IP addresses
1964 	 */
1965 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1966 	ni->nai_sum2 = LONG_SUM(in.s_addr);
1967 	if ((flags & IPN_TCPUDP)) {
1968 		ni->nai_sum1 += ntohs(sport);
1969 		ni->nai_sum2 += ntohs(port);
1970 	}
1971 
1972 	if (flags & IPN_TCPUDP) {
1973 		nat->nat_inport = sport;
1974 		nat->nat_outport = port;	/* sport */
1975 		nat->nat_oport = dport;
1976 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
1977 	} else if (flags & IPN_ICMPQUERY) {
1978 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
1979 		nat->nat_inport = port;
1980 		nat->nat_outport = port;
1981 	}
1982 
1983 	ni->nai_ip.s_addr = in.s_addr;
1984 	ni->nai_port = port;
1985 	ni->nai_nport = dport;
1986 	return 0;
1987 }
1988 
1989 
1990 /* ------------------------------------------------------------------------ */
1991 /* Function:    nat_newrdr                                                  */
1992 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
1993 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
1994 /* Parameters:  fin(I) - pointer to packet information                      */
1995 /*              nat(I) - pointer to NAT entry                               */
1996 /*              ni(I)  - pointer to structure with misc. information needed */
1997 /*                       to create new NAT entry.                           */
1998 /*                                                                          */
1999 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2000 /* to the new IP address for the translation.                               */
2001 /* ------------------------------------------------------------------------ */
2002 static INLINE int nat_newrdr(fin, nat, ni)
2003 fr_info_t *fin;
2004 nat_t *nat;
2005 natinfo_t *ni;
2006 {
2007 	u_short nport, dport, sport;
2008 	struct in_addr in;
2009 	hostmap_t *hm;
2010 	u_32_t flags;
2011 	ipnat_t *np;
2012 	int move;
2013 	ipf_stack_t *ifs = fin->fin_ifs;
2014 
2015 	move = 1;
2016 	hm = NULL;
2017 	in.s_addr = 0;
2018 	np = ni->nai_np;
2019 	flags = ni->nai_flags;
2020 	sport = ni->nai_sport;
2021 	dport = ni->nai_dport;
2022 
2023 	/*
2024 	 * If the matching rule has IPN_STICKY set, then we want to have the
2025 	 * same rule kick in as before.  Why would this happen?  If you have
2026 	 * a collection of rdr rules with "round-robin sticky", the current
2027 	 * packet might match a different one to the previous connection but
2028 	 * we want the same destination to be used.
2029 	 */
2030 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2031 	    (IPN_ROUNDR|IPN_STICKY)) {
2032 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2033 				 (u_32_t)dport, ifs);
2034 		if (hm != NULL) {
2035 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2036 			np = hm->hm_ipnat;
2037 			ni->nai_np = np;
2038 			move = 0;
2039 		}
2040 	}
2041 
2042 	/*
2043 	 * Otherwise, it's an inbound packet. Most likely, we don't
2044 	 * want to rewrite source ports and source addresses. Instead,
2045 	 * we want to rewrite to a fixed internal address and fixed
2046 	 * internal port.
2047 	 */
2048 	if (np->in_flags & IPN_SPLIT) {
2049 		in.s_addr = np->in_nip;
2050 
2051 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2052 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2053 					 in, (u_32_t)dport, ifs);
2054 			if (hm != NULL) {
2055 				in.s_addr = hm->hm_mapip.s_addr;
2056 				move = 0;
2057 			}
2058 		}
2059 
2060 		if (hm == NULL || hm->hm_ref == 1) {
2061 			if (np->in_inip == htonl(in.s_addr)) {
2062 				np->in_nip = ntohl(np->in_inmsk);
2063 				move = 0;
2064 			} else {
2065 				np->in_nip = ntohl(np->in_inip);
2066 			}
2067 		}
2068 
2069 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2070 		/*
2071 		 * 0/32 - use the interface's IP address.
2072 		 */
2073 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2074 			   fin->fin_ifs) == -1)
2075 			return -1;
2076 		in.s_addr = ntohl(in.s_addr);
2077 
2078 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2079 		/*
2080 		 * 0/0 - use the original destination address/port.
2081 		 */
2082 		in.s_addr = ntohl(fin->fin_daddr);
2083 
2084 	} else if (np->in_redir == NAT_BIMAP &&
2085 		   np->in_inmsk == np->in_outmsk) {
2086 		/*
2087 		 * map the address block in a 1:1 fashion
2088 		 */
2089 		in.s_addr = np->in_inip;
2090 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2091 		in.s_addr = ntohl(in.s_addr);
2092 	} else {
2093 		in.s_addr = ntohl(np->in_inip);
2094 	}
2095 
2096 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2097 		nport = dport;
2098 	else {
2099 		/*
2100 		 * Whilst not optimized for the case where
2101 		 * pmin == pmax, the gain is not significant.
2102 		 */
2103 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2104 		    (np->in_pmin != np->in_pmax)) {
2105 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2106 				ntohs(np->in_pnext);
2107 			nport = htons(nport);
2108 		} else
2109 			nport = np->in_pnext;
2110 	}
2111 
2112 	/*
2113 	 * When the redirect-to address is set to 0.0.0.0, just
2114 	 * assume a blank `forwarding' of the packet.  We don't
2115 	 * setup any translation for this either.
2116 	 */
2117 	if (in.s_addr == 0) {
2118 		if (nport == dport)
2119 			return -1;
2120 		in.s_addr = ntohl(fin->fin_daddr);
2121 	}
2122 
2123 	nat->nat_inip.s_addr = htonl(in.s_addr);
2124 	nat->nat_outip = fin->fin_dst;
2125 	nat->nat_oip = fin->fin_src;
2126 
2127 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2128 	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2129 
2130 	ni->nai_ip.s_addr = in.s_addr;
2131 	ni->nai_nport = nport;
2132 	ni->nai_port = sport;
2133 
2134 	if (flags & IPN_TCPUDP) {
2135 		nat->nat_inport = nport;
2136 		nat->nat_outport = dport;
2137 		nat->nat_oport = sport;
2138 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2139 	} else if (flags & IPN_ICMPQUERY) {
2140 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2141 		nat->nat_inport = nport;
2142 		nat->nat_outport = nport;
2143 	}
2144 
2145 	return move;
2146 }
2147 
2148 /* ------------------------------------------------------------------------ */
2149 /* Function:    nat_new                                                     */
2150 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2151 /*                       else pointer to new NAT structure                  */
2152 /* Parameters:  fin(I)       - pointer to packet information                */
2153 /*              np(I)        - pointer to NAT rule                          */
2154 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2155 /*              flags(I)     - flags describing the current packet          */
2156 /*              direction(I) - direction of packet (in/out)                 */
2157 /* Write Lock:  ipf_nat                                                     */
2158 /*                                                                          */
2159 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2160 /* in any way.                                                              */
2161 /*                                                                          */
2162 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2163 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2164 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2165 /* and (3) building that structure and putting it into the NAT table(s).    */
2166 /* ------------------------------------------------------------------------ */
2167 nat_t *nat_new(fin, np, natsave, flags, direction)
2168 fr_info_t *fin;
2169 ipnat_t *np;
2170 nat_t **natsave;
2171 u_int flags;
2172 int direction;
2173 {
2174 	u_short port = 0, sport = 0, dport = 0, nport = 0;
2175 	tcphdr_t *tcp = NULL;
2176 	hostmap_t *hm = NULL;
2177 	struct in_addr in;
2178 	nat_t *nat, *natl;
2179 	u_int nflags;
2180 	natinfo_t ni;
2181 	u_32_t sumd;
2182 	int move;
2183 	ipf_stack_t *ifs = fin->fin_ifs;
2184 
2185 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2186 		ifs->ifs_nat_stats.ns_memfail++;
2187 		return NULL;
2188 	}
2189 
2190 	move = 1;
2191 	nflags = np->in_flags & flags;
2192 	nflags &= NAT_FROMRULE;
2193 
2194 	ni.nai_np = np;
2195 	ni.nai_nflags = nflags;
2196 	ni.nai_flags = flags;
2197 
2198 	/* Give me a new nat */
2199 	KMALLOC(nat, nat_t *);
2200 	if (nat == NULL) {
2201 		ifs->ifs_nat_stats.ns_memfail++;
2202 		/*
2203 		 * Try to automatically tune the max # of entries in the
2204 		 * table allowed to be less than what will cause kmem_alloc()
2205 		 * to fail and try to eliminate panics due to out of memory
2206 		 * conditions arising.
2207 		 */
2208 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2209 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2210 			printf("ipf_nattable_max reduced to %d\n",
2211 				ifs->ifs_ipf_nattable_max);
2212 		}
2213 		return NULL;
2214 	}
2215 
2216 	if (flags & IPN_TCPUDP) {
2217 		tcp = fin->fin_dp;
2218 		ni.nai_sport = htons(fin->fin_sport);
2219 		ni.nai_dport = htons(fin->fin_dport);
2220 	} else if (flags & IPN_ICMPQUERY) {
2221 		/*
2222 		 * In the ICMP query NAT code, we translate the ICMP id fields
2223 		 * to make them unique. This is indepedent of the ICMP type
2224 		 * (e.g. in the unlikely event that a host sends an echo and
2225 		 * an tstamp request with the same id, both packets will have
2226 		 * their ip address/id field changed in the same way).
2227 		 */
2228 		/* The icmp_id field is used by the sender to identify the
2229 		 * process making the icmp request. (the receiver justs
2230 		 * copies it back in its response). So, it closely matches
2231 		 * the concept of source port. We overlay sport, so we can
2232 		 * maximally reuse the existing code.
2233 		 */
2234 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2235 		ni.nai_dport = ni.nai_sport;
2236 	}
2237 
2238 	bzero((char *)nat, sizeof(*nat));
2239 	nat->nat_flags = flags;
2240 	nat->nat_redir = np->in_redir;
2241 
2242 	if ((flags & NAT_SLAVE) == 0) {
2243 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2244 	}
2245 
2246 	/*
2247 	 * Search the current table for a match.
2248 	 */
2249 	if (direction == NAT_OUTBOUND) {
2250 		/*
2251 		 * We can now arrange to call this for the same connection
2252 		 * because ipf_nat_new doesn't protect the code path into
2253 		 * this function.
2254 		 */
2255 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2256 				     fin->fin_src, fin->fin_dst);
2257 		if (natl != NULL) {
2258 			KFREE(nat);
2259 			nat = natl;
2260 			goto done;
2261 		}
2262 
2263 		move = nat_newmap(fin, nat, &ni);
2264 		if (move == -1)
2265 			goto badnat;
2266 
2267 		np = ni.nai_np;
2268 		in = ni.nai_ip;
2269 	} else {
2270 		/*
2271 		 * NAT_INBOUND is used only for redirects rules
2272 		 */
2273 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2274 				    fin->fin_src, fin->fin_dst);
2275 		if (natl != NULL) {
2276 			KFREE(nat);
2277 			nat = natl;
2278 			goto done;
2279 		}
2280 
2281 		move = nat_newrdr(fin, nat, &ni);
2282 		if (move == -1)
2283 			goto badnat;
2284 
2285 		np = ni.nai_np;
2286 		in = ni.nai_ip;
2287 	}
2288 	port = ni.nai_port;
2289 	nport = ni.nai_nport;
2290 
2291 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2292 		if (np->in_redir == NAT_REDIRECT) {
2293 			nat_delrdr(np);
2294 			nat_addrdr(np, ifs);
2295 		} else if (np->in_redir == NAT_MAP) {
2296 			nat_delnat(np);
2297 			nat_addnat(np, ifs);
2298 		}
2299 	}
2300 
2301 	if (flags & IPN_TCPUDP) {
2302 		sport = ni.nai_sport;
2303 		dport = ni.nai_dport;
2304 	} else if (flags & IPN_ICMPQUERY) {
2305 		sport = ni.nai_sport;
2306 		dport = 0;
2307 	}
2308 
2309 	/*
2310 	 * nat_sumd[0] stores adjustment value including both IP address and
2311 	 * port number changes. nat_sumd[1] stores adjustment value only for
2312 	 * IP address changes, to be used for pseudo header adjustment, in
2313 	 * case hardware partial checksum offload is offered.
2314 	 */
2315 	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2316 	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2317 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
2318 	if (flags & IPN_TCPUDP) {
2319 		ni.nai_sum1 = LONG_SUM(in.s_addr);
2320 		if (direction == NAT_OUTBOUND)
2321 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_saddr));
2322 		else
2323 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_daddr));
2324 
2325 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2326 		nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
2327 	} else
2328 #endif
2329 		nat->nat_sumd[1] = nat->nat_sumd[0];
2330 
2331 	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2332 		if (direction == NAT_OUTBOUND)
2333 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2334 		else
2335 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2336 
2337 		ni.nai_sum2 = LONG_SUM(in.s_addr);
2338 
2339 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2340 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2341 	} else {
2342 		nat->nat_ipsumd = nat->nat_sumd[0];
2343 		if (!(flags & IPN_TCPUDPICMP)) {
2344 			nat->nat_sumd[0] = 0;
2345 			nat->nat_sumd[1] = 0;
2346 		}
2347 	}
2348 
2349 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2350 		goto badnat;
2351 	}
2352 	if (flags & SI_WILDP)
2353 		ifs->ifs_nat_stats.ns_wilds++;
2354 	goto done;
2355 badnat:
2356 	ifs->ifs_nat_stats.ns_badnat++;
2357 	if ((hm = nat->nat_hm) != NULL)
2358 		nat_hostmapdel(hm);
2359 	KFREE(nat);
2360 	nat = NULL;
2361 done:
2362 	if ((flags & NAT_SLAVE) == 0) {
2363 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2364 	}
2365 	return nat;
2366 }
2367 
2368 
2369 /* ------------------------------------------------------------------------ */
2370 /* Function:    nat_finalise                                                */
2371 /* Returns:     int - 0 == sucess, -1 == failure                            */
2372 /* Parameters:  fin(I) - pointer to packet information                      */
2373 /*              nat(I) - pointer to NAT entry                               */
2374 /*              ni(I)  - pointer to structure with misc. information needed */
2375 /*                       to create new NAT entry.                           */
2376 /* Write Lock:  ipf_nat                                                     */
2377 /*                                                                          */
2378 /* This is the tail end of constructing a new NAT entry and is the same     */
2379 /* for both IPv4 and IPv6.                                                  */
2380 /* ------------------------------------------------------------------------ */
2381 /*ARGSUSED*/
2382 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2383 fr_info_t *fin;
2384 nat_t *nat;
2385 natinfo_t *ni;
2386 tcphdr_t *tcp;
2387 nat_t **natsave;
2388 int direction;
2389 {
2390 	frentry_t *fr;
2391 	ipnat_t *np;
2392 	ipf_stack_t *ifs = fin->fin_ifs;
2393 
2394 	np = ni->nai_np;
2395 
2396 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2397 
2398 #ifdef	IPFILTER_SYNC
2399 	if ((nat->nat_flags & SI_CLONE) == 0)
2400 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2401 #endif
2402 
2403 	nat->nat_me = natsave;
2404 	nat->nat_dir = direction;
2405 	nat->nat_ifps[0] = np->in_ifps[0];
2406 	nat->nat_ifps[1] = np->in_ifps[1];
2407 	nat->nat_ptr = np;
2408 	nat->nat_p = fin->fin_p;
2409 	nat->nat_mssclamp = np->in_mssclamp;
2410 	fr = fin->fin_fr;
2411 	nat->nat_fr = fr;
2412 
2413 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2414 		if (appr_new(fin, nat) == -1)
2415 			return -1;
2416 
2417 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2418 		if (ifs->ifs_nat_logging)
2419 			nat_log(nat, (u_int)np->in_redir, ifs);
2420 		np->in_use++;
2421 		if (fr != NULL) {
2422 			MUTEX_ENTER(&fr->fr_lock);
2423 			fr->fr_ref++;
2424 			MUTEX_EXIT(&fr->fr_lock);
2425 		}
2426 		return 0;
2427 	}
2428 
2429 	/*
2430 	 * nat_insert failed, so cleanup time...
2431 	 */
2432 	return -1;
2433 }
2434 
2435 
2436 /* ------------------------------------------------------------------------ */
2437 /* Function:   nat_insert                                                   */
2438 /* Returns:    int - 0 == sucess, -1 == failure                             */
2439 /* Parameters: nat(I) - pointer to NAT structure                            */
2440 /*             rev(I) - flag indicating forward/reverse direction of packet */
2441 /* Write Lock: ipf_nat                                                      */
2442 /*                                                                          */
2443 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2444 /* list of active NAT entries.  Adjust global counters when complete.       */
2445 /* ------------------------------------------------------------------------ */
2446 int	nat_insert(nat, rev, ifs)
2447 nat_t	*nat;
2448 int	rev;
2449 ipf_stack_t *ifs;
2450 {
2451 	u_int hv1, hv2;
2452 	nat_t **natp;
2453 
2454 	/*
2455 	 * Try and return an error as early as possible, so calculate the hash
2456 	 * entry numbers first and then proceed.
2457 	 */
2458 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2459 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2460 				  0xffffffff);
2461 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2462 				  ifs->ifs_ipf_nattable_sz);
2463 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2464 				  0xffffffff);
2465 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2466 				  ifs->ifs_ipf_nattable_sz);
2467 	} else {
2468 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2469 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2470 				  ifs->ifs_ipf_nattable_sz);
2471 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2472 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2473 				  ifs->ifs_ipf_nattable_sz);
2474 	}
2475 
2476 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2477 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2478 		return -1;
2479 	}
2480 
2481 	nat->nat_hv[0] = hv1;
2482 	nat->nat_hv[1] = hv2;
2483 
2484 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2485 
2486 	nat->nat_rev = rev;
2487 	nat->nat_ref = 1;
2488 	nat->nat_bytes[0] = 0;
2489 	nat->nat_pkts[0] = 0;
2490 	nat->nat_bytes[1] = 0;
2491 	nat->nat_pkts[1] = 0;
2492 
2493 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2494 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2495 
2496 	if (nat->nat_ifnames[1][0] !='\0') {
2497 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2498 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2499 	} else {
2500 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2501 			       LIFNAMSIZ);
2502 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2503 		nat->nat_ifps[1] = nat->nat_ifps[0];
2504 	}
2505 
2506 	nat->nat_next = ifs->ifs_nat_instances;
2507 	nat->nat_pnext = &ifs->ifs_nat_instances;
2508 	if (ifs->ifs_nat_instances)
2509 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2510 	ifs->ifs_nat_instances = nat;
2511 
2512 	natp = &ifs->ifs_nat_table[0][hv1];
2513 	if (*natp)
2514 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2515 	nat->nat_phnext[0] = natp;
2516 	nat->nat_hnext[0] = *natp;
2517 	*natp = nat;
2518 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2519 
2520 	natp = &ifs->ifs_nat_table[1][hv2];
2521 	if (*natp)
2522 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2523 	nat->nat_phnext[1] = natp;
2524 	nat->nat_hnext[1] = *natp;
2525 	*natp = nat;
2526 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2527 
2528 	fr_setnatqueue(nat, rev, ifs);
2529 
2530 	ifs->ifs_nat_stats.ns_added++;
2531 	ifs->ifs_nat_stats.ns_inuse++;
2532 	return 0;
2533 }
2534 
2535 
2536 /* ------------------------------------------------------------------------ */
2537 /* Function:    nat_icmperrorlookup                                         */
2538 /* Returns:     nat_t* - point to matching NAT structure                    */
2539 /* Parameters:  fin(I) - pointer to packet information                      */
2540 /*              dir(I) - direction of packet (in/out)                       */
2541 /*                                                                          */
2542 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2543 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2544 /* the required length.                                                     */
2545 /* ------------------------------------------------------------------------ */
2546 nat_t *nat_icmperrorlookup(fin, dir)
2547 fr_info_t *fin;
2548 int dir;
2549 {
2550 	int flags = 0, minlen;
2551 	icmphdr_t *orgicmp;
2552 	tcphdr_t *tcp = NULL;
2553 	u_short data[2];
2554 	nat_t *nat;
2555 	ip_t *oip;
2556 	u_int p;
2557 
2558 	/*
2559 	 * Does it at least have the return (basic) IP header ?
2560 	 * Only a basic IP header (no options) should be with an ICMP error
2561 	 * header.  Also, if it's not an error type, then return.
2562 	 */
2563 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2564 		return NULL;
2565 
2566 	/*
2567 	 * Check packet size
2568 	 */
2569 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2570 	minlen = IP_HL(oip) << 2;
2571 	if ((minlen < sizeof(ip_t)) ||
2572 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2573 		return NULL;
2574 	/*
2575 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2576 	 * header claimed in the encapsulated part which is of concern.  It
2577 	 * may be too big to be in this buffer but not so big that it's
2578 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2579 	 * This is possible because we don't know how big oip_hl is when we
2580 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2581 	 * all here now.
2582 	 */
2583 #ifdef  _KERNEL
2584 	{
2585 	mb_t *m;
2586 
2587 	m = fin->fin_m;
2588 # if defined(MENTAT)
2589 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2590 		return NULL;
2591 # else
2592 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2593 	    (char *)fin->fin_ip + M_LEN(m))
2594 		return NULL;
2595 # endif
2596 	}
2597 #endif
2598 
2599 	if (fin->fin_daddr != oip->ip_src.s_addr)
2600 		return NULL;
2601 
2602 	p = oip->ip_p;
2603 	if (p == IPPROTO_TCP)
2604 		flags = IPN_TCP;
2605 	else if (p == IPPROTO_UDP)
2606 		flags = IPN_UDP;
2607 	else if (p == IPPROTO_ICMP) {
2608 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2609 
2610 		/* see if this is related to an ICMP query */
2611 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2612 			data[0] = fin->fin_data[0];
2613 			data[1] = fin->fin_data[1];
2614 			fin->fin_data[0] = 0;
2615 			fin->fin_data[1] = orgicmp->icmp_id;
2616 
2617 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2618 			/*
2619 			 * NOTE : dir refers to the direction of the original
2620 			 *        ip packet. By definition the icmp error
2621 			 *        message flows in the opposite direction.
2622 			 */
2623 			if (dir == NAT_INBOUND)
2624 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2625 						   oip->ip_src);
2626 			else
2627 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2628 						    oip->ip_src);
2629 			fin->fin_data[0] = data[0];
2630 			fin->fin_data[1] = data[1];
2631 			return nat;
2632 		}
2633 	}
2634 
2635 	if (flags & IPN_TCPUDP) {
2636 		minlen += 8;		/* + 64bits of data to get ports */
2637 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2638 			return NULL;
2639 
2640 		data[0] = fin->fin_data[0];
2641 		data[1] = fin->fin_data[1];
2642 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2643 		fin->fin_data[0] = ntohs(tcp->th_dport);
2644 		fin->fin_data[1] = ntohs(tcp->th_sport);
2645 
2646 		if (dir == NAT_INBOUND) {
2647 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2648 					   oip->ip_src);
2649 		} else {
2650 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2651 					    oip->ip_src);
2652 		}
2653 		fin->fin_data[0] = data[0];
2654 		fin->fin_data[1] = data[1];
2655 		return nat;
2656 	}
2657 	if (dir == NAT_INBOUND)
2658 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2659 	else
2660 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2661 }
2662 
2663 
2664 /* ------------------------------------------------------------------------ */
2665 /* Function:    nat_icmperror                                               */
2666 /* Returns:     nat_t* - point to matching NAT structure                    */
2667 /* Parameters:  fin(I)    - pointer to packet information                   */
2668 /*              nflags(I) - NAT flags for this packet                       */
2669 /*              dir(I)    - direction of packet (in/out)                    */
2670 /*                                                                          */
2671 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2672 /* session.  This will correct both packet header data and checksums.       */
2673 /*                                                                          */
2674 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2675 /* a NAT'd ICMP packet gets correctly recognised.                           */
2676 /* ------------------------------------------------------------------------ */
2677 nat_t *nat_icmperror(fin, nflags, dir)
2678 fr_info_t *fin;
2679 u_int *nflags;
2680 int dir;
2681 {
2682 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2683 	struct in_addr in;
2684 	icmphdr_t *icmp, *orgicmp;
2685 	int dlen;
2686 	udphdr_t *udp;
2687 	tcphdr_t *tcp;
2688 	nat_t *nat;
2689 	ip_t *oip;
2690 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2691 		return NULL;
2692 
2693 	/*
2694 	 * nat_icmperrorlookup() looks up nat entry associated with the
2695 	 * offending IP packet and returns pointer to the entry, or NULL
2696 	 * if packet wasn't natted or for `defective' packets.
2697 	 */
2698 
2699 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2700 		return NULL;
2701 
2702 	sumd2 = 0;
2703 	*nflags = IPN_ICMPERR;
2704 	icmp = fin->fin_dp;
2705 	oip = (ip_t *)&icmp->icmp_ip;
2706 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
2707 	tcp = (tcphdr_t *)udp;
2708 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
2709 
2710 	/*
2711 	 * Need to adjust ICMP header to include the real IP#'s and
2712 	 * port #'s.  There are three steps required.
2713 	 *
2714 	 * Step 1
2715 	 * Fix the IP addresses in the offending IP packet and update
2716 	 * ip header checksum to compensate for the change.
2717 	 *
2718 	 * No update needed here for icmp_cksum because the ICMP checksum
2719 	 * is calculated over the complete ICMP packet, which includes the
2720 	 * changed oip IP addresses and oip->ip_sum.  These two changes
2721 	 * cancel each other out (if the delta for the IP address is x,
2722 	 * then the delta for ip_sum is minus x).
2723 	 */
2724 
2725 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2726 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2727 		in = nat->nat_inip;
2728 		oip->ip_src = in;
2729 	} else {
2730 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2731 		in = nat->nat_outip;
2732 		oip->ip_dst = in;
2733 	}
2734 
2735 	sum2 = LONG_SUM(ntohl(in.s_addr));
2736 	CALC_SUMD(sum1, sum2, sumd);
2737 	fix_datacksum(&oip->ip_sum, sumd);
2738 
2739 	/*
2740 	 * Step 2
2741 	 * Perform other adjustments based on protocol of offending packet.
2742 	 */
2743 
2744 	switch (oip->ip_p) {
2745 		case IPPROTO_TCP :
2746 		case IPPROTO_UDP :
2747 
2748 			/*
2749 			* For offending TCP/UDP IP packets, translate the ports
2750 			* based on the NAT specification.
2751 			*
2752 			* Advance notice : Now it becomes complicated :-)
2753 			*
2754 			* Since the port and IP addresse fields are both part
2755 			* of the TCP/UDP checksum of the offending IP packet,
2756 			* we need to adjust that checksum as well.
2757 			*
2758 			* To further complicate things, the TCP/UDP checksum
2759 			* may not be present.  We must check to see if the
2760 			* length of the data portion is big enough to hold
2761 			* the checksum.  In the UDP case, a test to determine
2762 			* if the checksum is even set is also required.
2763 			*
2764 			* Any changes to an IP address, port or checksum within
2765 			* the ICMP packet requires a change to icmp_cksum.
2766 			*
2767 			* Be extremely careful here ... The change is dependent
2768 			* upon whether or not the TCP/UPD checksum is present.
2769 			*
2770 			* If TCP/UPD checksum is present, the icmp_cksum must
2771 			* compensate for checksum modification resulting from
2772 			* IP address change only.  Port change and resulting
2773 			* data checksum adjustments cancel each other out.
2774 			*
2775 			* If TCP/UDP checksum is not present, icmp_cksum must
2776 			* compensate for port change only.  The IP address
2777 			* change does not modify anything else in this case.
2778 			*/
2779 
2780 			psum1 = 0;
2781 			psum2 = 0;
2782 			psumd = 0;
2783 
2784 			if ((tcp->th_dport == nat->nat_oport) &&
2785 			    (tcp->th_sport != nat->nat_inport)) {
2786 
2787 				/*
2788 				 * Translate the source port.
2789 				 */
2790 
2791 				psum1 = ntohs(tcp->th_sport);
2792 				psum2 = ntohs(nat->nat_inport);
2793 				tcp->th_sport = nat->nat_inport;
2794 
2795 			} else if ((tcp->th_sport == nat->nat_oport) &&
2796 				    (tcp->th_dport != nat->nat_outport)) {
2797 
2798 				/*
2799 				 * Translate the destination port.
2800 				 */
2801 
2802 				psum1 = ntohs(tcp->th_dport);
2803 				psum2 = ntohs(nat->nat_outport);
2804 				tcp->th_dport = nat->nat_outport;
2805 			}
2806 
2807 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
2808 
2809 				/*
2810 				 * TCP checksum present.
2811 				 *
2812 				 * Adjust data checksum and icmp checksum to
2813 				 * compensate for any IP address change.
2814 				 */
2815 
2816 				sum1 = ntohs(tcp->th_sum);
2817 				fix_datacksum(&tcp->th_sum, sumd);
2818 				sum2 = ntohs(tcp->th_sum);
2819 				sumd2 = sumd << 1;
2820 				CALC_SUMD(sum1, sum2, sumd);
2821 				sumd2 += sumd;
2822 
2823 				/*
2824 				 * Also make data checksum adjustment to
2825 				 * compensate for any port change.
2826 				 */
2827 
2828 				if (psum1 != psum2) {
2829 					CALC_SUMD(psum1, psum2, psumd);
2830 					fix_datacksum(&tcp->th_sum, psumd);
2831 				}
2832 
2833 			} else if ((oip->ip_p == IPPROTO_UDP) &&
2834 				   (dlen >= 8) && (udp->uh_sum != 0)) {
2835 
2836 				/*
2837 				 * The UDP checksum is present and set.
2838 				 *
2839 				 * Adjust data checksum and icmp checksum to
2840 				 * compensate for any IP address change.
2841 				 */
2842 
2843 				sum1 = ntohs(udp->uh_sum);
2844 				fix_datacksum(&udp->uh_sum, sumd);
2845 				sum2 = ntohs(udp->uh_sum);
2846 				sumd2 = sumd << 1;
2847 				CALC_SUMD(sum1, sum2, sumd);
2848 				sumd2 += sumd;
2849 
2850 				/*
2851 				 * Also make data checksum adjustment to
2852 				 * compensate for any port change.
2853 				 */
2854 
2855 				if (psum1 != psum2) {
2856 					CALC_SUMD(psum1, psum2, psumd);
2857 					fix_datacksum(&udp->uh_sum, psumd);
2858 				}
2859 
2860 			} else {
2861 
2862 				/*
2863 				 * Data checksum was not present.
2864 				 *
2865 				 * Compensate for any port change.
2866 				 */
2867 
2868 				CALC_SUMD(psum2, psum1, psumd);
2869 				sumd2 += psumd;
2870 			}
2871 			break;
2872 
2873 		case IPPROTO_ICMP :
2874 
2875 			orgicmp = (icmphdr_t *)udp;
2876 
2877 			if ((nat->nat_dir == NAT_OUTBOUND) &&
2878 			    (orgicmp->icmp_id != nat->nat_inport) &&
2879 			    (dlen >= 8)) {
2880 
2881 				/*
2882 				 * Fix ICMP checksum (of the offening ICMP
2883 				 * query packet) to compensate the change
2884 				 * in the ICMP id of the offending ICMP
2885 				 * packet.
2886 				 *
2887 				 * Since you modify orgicmp->icmp_id with
2888 				 * a delta (say x) and you compensate that
2889 				 * in origicmp->icmp_cksum with a delta
2890 				 * minus x, you don't have to adjust the
2891 				 * overall icmp->icmp_cksum
2892 				 */
2893 
2894 				sum1 = ntohs(orgicmp->icmp_id);
2895 				sum2 = ntohs(nat->nat_inport);
2896 				CALC_SUMD(sum1, sum2, sumd);
2897 				orgicmp->icmp_id = nat->nat_inport;
2898 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
2899 
2900 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
2901 
2902 			break;
2903 
2904 		default :
2905 
2906 			break;
2907 
2908 	} /* switch (oip->ip_p) */
2909 
2910 	/*
2911 	 * Step 3
2912 	 * Make the adjustments to icmp checksum.
2913 	 */
2914 
2915 	if (sumd2 != 0) {
2916 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2917 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2918 		fix_incksum(&icmp->icmp_cksum, sumd2);
2919 	}
2920 	return nat;
2921 }
2922 
2923 
2924 /*
2925  * NB: these lookups don't lock access to the list, it assumed that it has
2926  * already been done!
2927  */
2928 
2929 /* ------------------------------------------------------------------------ */
2930 /* Function:    nat_inlookup                                                */
2931 /* Returns:     nat_t* - NULL == no match,                                  */
2932 /*                       else pointer to matching NAT entry                 */
2933 /* Parameters:  fin(I)    - pointer to packet information                   */
2934 /*              flags(I)  - NAT flags for this packet                       */
2935 /*              p(I)      - protocol for this packet                        */
2936 /*              src(I)    - source IP address                               */
2937 /*              mapdst(I) - destination IP address                          */
2938 /*                                                                          */
2939 /* Lookup a nat entry based on the mapped destination ip address/port and   */
2940 /* real source address/port.  We use this lookup when receiving a packet,   */
2941 /* we're looking for a table entry, based on the destination address.       */
2942 /*                                                                          */
2943 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
2944 /*                                                                          */
2945 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
2946 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
2947 /*                                                                          */
2948 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
2949 /*            the packet is of said protocol                                */
2950 /* ------------------------------------------------------------------------ */
2951 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
2952 fr_info_t *fin;
2953 u_int flags, p;
2954 struct in_addr src , mapdst;
2955 {
2956 	u_short sport, dport;
2957 	ipnat_t *ipn;
2958 	u_int sflags;
2959 	nat_t *nat;
2960 	int nflags;
2961 	u_32_t dst;
2962 	void *ifp;
2963 	u_int hv;
2964 	ipf_stack_t *ifs = fin->fin_ifs;
2965 
2966 	if (fin != NULL)
2967 		ifp = fin->fin_ifp;
2968 	else
2969 		ifp = NULL;
2970 	sport = 0;
2971 	dport = 0;
2972 	dst = mapdst.s_addr;
2973 	sflags = flags & NAT_TCPUDPICMP;
2974 
2975 	switch (p)
2976 	{
2977 	case IPPROTO_TCP :
2978 	case IPPROTO_UDP :
2979 		sport = htons(fin->fin_data[0]);
2980 		dport = htons(fin->fin_data[1]);
2981 		break;
2982 	case IPPROTO_ICMP :
2983 		if (flags & IPN_ICMPERR)
2984 			sport = fin->fin_data[1];
2985 		else
2986 			dport = fin->fin_data[1];
2987 		break;
2988 	default :
2989 		break;
2990 	}
2991 
2992 
2993 	if ((flags & SI_WILDP) != 0)
2994 		goto find_in_wild_ports;
2995 
2996 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2997 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
2998 	nat = ifs->ifs_nat_table[1][hv];
2999 	for (; nat; nat = nat->nat_hnext[1]) {
3000 		if (nat->nat_ifps[0] != NULL) {
3001 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3002 				continue;
3003 		} else if (ifp != NULL)
3004 			nat->nat_ifps[0] = ifp;
3005 
3006 		nflags = nat->nat_flags;
3007 
3008 		if (nat->nat_oip.s_addr == src.s_addr &&
3009 		    nat->nat_outip.s_addr == dst &&
3010 		    (((p == 0) &&
3011 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3012 		     || (p == nat->nat_p))) {
3013 			switch (p)
3014 			{
3015 #if 0
3016 			case IPPROTO_GRE :
3017 				if (nat->nat_call[1] != fin->fin_data[0])
3018 					continue;
3019 				break;
3020 #endif
3021 			case IPPROTO_ICMP :
3022 				if ((flags & IPN_ICMPERR) != 0) {
3023 					if (nat->nat_outport != sport)
3024 						continue;
3025 				} else {
3026 					if (nat->nat_outport != dport)
3027 						continue;
3028 				}
3029 				break;
3030 			case IPPROTO_TCP :
3031 			case IPPROTO_UDP :
3032 				if (nat->nat_oport != sport)
3033 					continue;
3034 				if (nat->nat_outport != dport)
3035 					continue;
3036 				break;
3037 			default :
3038 				break;
3039 			}
3040 
3041 			ipn = nat->nat_ptr;
3042 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3043 				if (appr_match(fin, nat) != 0)
3044 					continue;
3045 			return nat;
3046 		}
3047 	}
3048 
3049 	/*
3050 	 * So if we didn't find it but there are wildcard members in the hash
3051 	 * table, go back and look for them.  We do this search and update here
3052 	 * because it is modifying the NAT table and we want to do this only
3053 	 * for the first packet that matches.  The exception, of course, is
3054 	 * for "dummy" (FI_IGNORE) lookups.
3055 	 */
3056 find_in_wild_ports:
3057 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3058 		return NULL;
3059 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3060 		return NULL;
3061 
3062 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3063 
3064 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3065 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3066 
3067 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3068 
3069 	nat = ifs->ifs_nat_table[1][hv];
3070 	for (; nat; nat = nat->nat_hnext[1]) {
3071 		if (nat->nat_ifps[0] != NULL) {
3072 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3073 				continue;
3074 		} else if (ifp != NULL)
3075 			nat->nat_ifps[0] = ifp;
3076 
3077 		if (nat->nat_p != fin->fin_p)
3078 			continue;
3079 		if (nat->nat_oip.s_addr != src.s_addr ||
3080 		    nat->nat_outip.s_addr != dst)
3081 			continue;
3082 
3083 		nflags = nat->nat_flags;
3084 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3085 			continue;
3086 
3087 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3088 			       NAT_INBOUND) == 1) {
3089 			if ((fin->fin_flx & FI_IGNORE) != 0)
3090 				break;
3091 			if ((nflags & SI_CLONE) != 0) {
3092 				nat = fr_natclone(fin, nat);
3093 				if (nat == NULL)
3094 					break;
3095 			} else {
3096 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3097 				ifs->ifs_nat_stats.ns_wilds--;
3098 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3099 			}
3100 			nat->nat_oport = sport;
3101 			nat->nat_outport = dport;
3102 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3103 			nat_tabmove(nat, ifs);
3104 			break;
3105 		}
3106 	}
3107 
3108 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3109 
3110 	return nat;
3111 }
3112 
3113 
3114 /* ------------------------------------------------------------------------ */
3115 /* Function:    nat_tabmove                                                 */
3116 /* Returns:     Nil                                                         */
3117 /* Parameters:  nat(I) - pointer to NAT structure                           */
3118 /* Write Lock:  ipf_nat                                                     */
3119 /*                                                                          */
3120 /* This function is only called for TCP/UDP NAT table entries where the     */
3121 /* original was placed in the table without hashing on the ports and we now */
3122 /* want to include hashing on port numbers.                                 */
3123 /* ------------------------------------------------------------------------ */
3124 static void nat_tabmove(nat, ifs)
3125 nat_t *nat;
3126 ipf_stack_t *ifs;
3127 {
3128 	nat_t **natp;
3129 	u_int hv;
3130 
3131 	if (nat->nat_flags & SI_CLONE)
3132 		return;
3133 
3134 	/*
3135 	 * Remove the NAT entry from the old location
3136 	 */
3137 	if (nat->nat_hnext[0])
3138 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3139 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3140 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3141 
3142 	if (nat->nat_hnext[1])
3143 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3144 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3145 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3146 
3147 	/*
3148 	 * Add into the NAT table in the new position
3149 	 */
3150 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3151 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3152 			 ifs->ifs_ipf_nattable_sz);
3153 	nat->nat_hv[0] = hv;
3154 	natp = &ifs->ifs_nat_table[0][hv];
3155 	if (*natp)
3156 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3157 	nat->nat_phnext[0] = natp;
3158 	nat->nat_hnext[0] = *natp;
3159 	*natp = nat;
3160 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3161 
3162 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3163 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3164 			 ifs->ifs_ipf_nattable_sz);
3165 	nat->nat_hv[1] = hv;
3166 	natp = &ifs->ifs_nat_table[1][hv];
3167 	if (*natp)
3168 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3169 	nat->nat_phnext[1] = natp;
3170 	nat->nat_hnext[1] = *natp;
3171 	*natp = nat;
3172 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3173 }
3174 
3175 
3176 /* ------------------------------------------------------------------------ */
3177 /* Function:    nat_outlookup                                               */
3178 /* Returns:     nat_t* - NULL == no match,                                  */
3179 /*                       else pointer to matching NAT entry                 */
3180 /* Parameters:  fin(I)   - pointer to packet information                    */
3181 /*              flags(I) - NAT flags for this packet                        */
3182 /*              p(I)     - protocol for this packet                         */
3183 /*              src(I)   - source IP address                                */
3184 /*              dst(I)   - destination IP address                           */
3185 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3186 /*                                                                          */
3187 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3188 /* destination address/port.  We use this lookup when sending a packet out, */
3189 /* we're looking for a table entry, based on the source address.            */
3190 /*                                                                          */
3191 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3192 /*                                                                          */
3193 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3194 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3195 /*                                                                          */
3196 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3197 /*            the packet is of said protocol                                */
3198 /* ------------------------------------------------------------------------ */
3199 nat_t *nat_outlookup(fin, flags, p, src, dst)
3200 fr_info_t *fin;
3201 u_int flags, p;
3202 struct in_addr src , dst;
3203 {
3204 	u_short sport, dport;
3205 	u_int sflags;
3206 	ipnat_t *ipn;
3207 	u_32_t srcip;
3208 	nat_t *nat;
3209 	int nflags;
3210 	void *ifp;
3211 	u_int hv;
3212 	ipf_stack_t *ifs = fin->fin_ifs;
3213 
3214 	ifp = fin->fin_ifp;
3215 
3216 	srcip = src.s_addr;
3217 	sflags = flags & IPN_TCPUDPICMP;
3218 	sport = 0;
3219 	dport = 0;
3220 
3221 	switch (p)
3222 	{
3223 	case IPPROTO_TCP :
3224 	case IPPROTO_UDP :
3225 		sport = htons(fin->fin_data[0]);
3226 		dport = htons(fin->fin_data[1]);
3227 		break;
3228 	case IPPROTO_ICMP :
3229 		if (flags & IPN_ICMPERR)
3230 			sport = fin->fin_data[1];
3231 		else
3232 			dport = fin->fin_data[1];
3233 		break;
3234 	default :
3235 		break;
3236 	}
3237 
3238 	if ((flags & SI_WILDP) != 0)
3239 		goto find_out_wild_ports;
3240 
3241 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3242 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3243 	nat = ifs->ifs_nat_table[0][hv];
3244 	for (; nat; nat = nat->nat_hnext[0]) {
3245 		if (nat->nat_ifps[1] != NULL) {
3246 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3247 				continue;
3248 		} else if (ifp != NULL)
3249 			nat->nat_ifps[1] = ifp;
3250 
3251 		nflags = nat->nat_flags;
3252 
3253 		if (nat->nat_inip.s_addr == srcip &&
3254 		    nat->nat_oip.s_addr == dst.s_addr &&
3255 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3256 		     || (p == nat->nat_p))) {
3257 			switch (p)
3258 			{
3259 #if 0
3260 			case IPPROTO_GRE :
3261 				if (nat->nat_call[1] != fin->fin_data[0])
3262 					continue;
3263 				break;
3264 #endif
3265 			case IPPROTO_TCP :
3266 			case IPPROTO_UDP :
3267 				if (nat->nat_oport != dport)
3268 					continue;
3269 				if (nat->nat_inport != sport)
3270 					continue;
3271 				break;
3272 			default :
3273 				break;
3274 			}
3275 
3276 			ipn = nat->nat_ptr;
3277 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3278 				if (appr_match(fin, nat) != 0)
3279 					continue;
3280 			return nat;
3281 		}
3282 	}
3283 
3284 	/*
3285 	 * So if we didn't find it but there are wildcard members in the hash
3286 	 * table, go back and look for them.  We do this search and update here
3287 	 * because it is modifying the NAT table and we want to do this only
3288 	 * for the first packet that matches.  The exception, of course, is
3289 	 * for "dummy" (FI_IGNORE) lookups.
3290 	 */
3291 find_out_wild_ports:
3292 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3293 		return NULL;
3294 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3295 		return NULL;
3296 
3297 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3298 
3299 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3300 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3301 
3302 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3303 
3304 	nat = ifs->ifs_nat_table[0][hv];
3305 	for (; nat; nat = nat->nat_hnext[0]) {
3306 		if (nat->nat_ifps[1] != NULL) {
3307 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3308 				continue;
3309 		} else if (ifp != NULL)
3310 			nat->nat_ifps[1] = ifp;
3311 
3312 		if (nat->nat_p != fin->fin_p)
3313 			continue;
3314 		if ((nat->nat_inip.s_addr != srcip) ||
3315 		    (nat->nat_oip.s_addr != dst.s_addr))
3316 			continue;
3317 
3318 		nflags = nat->nat_flags;
3319 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3320 			continue;
3321 
3322 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3323 			       NAT_OUTBOUND) == 1) {
3324 			if ((fin->fin_flx & FI_IGNORE) != 0)
3325 				break;
3326 			if ((nflags & SI_CLONE) != 0) {
3327 				nat = fr_natclone(fin, nat);
3328 				if (nat == NULL)
3329 					break;
3330 			} else {
3331 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3332 				ifs->ifs_nat_stats.ns_wilds--;
3333 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3334 			}
3335 			nat->nat_inport = sport;
3336 			nat->nat_oport = dport;
3337 			if (nat->nat_outport == 0)
3338 				nat->nat_outport = sport;
3339 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3340 			nat_tabmove(nat, ifs);
3341 			break;
3342 		}
3343 	}
3344 
3345 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3346 
3347 	return nat;
3348 }
3349 
3350 
3351 /* ------------------------------------------------------------------------ */
3352 /* Function:    nat_lookupredir                                             */
3353 /* Returns:     nat_t* - NULL == no match,                                  */
3354 /*                       else pointer to matching NAT entry                 */
3355 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3356 /*                      entry for.                                          */
3357 /*                                                                          */
3358 /* Lookup the NAT tables to search for a matching redirect                  */
3359 /* ------------------------------------------------------------------------ */
3360 nat_t *nat_lookupredir(np, ifs)
3361 natlookup_t *np;
3362 ipf_stack_t *ifs;
3363 {
3364 	fr_info_t fi;
3365 	nat_t *nat;
3366 
3367 	bzero((char *)&fi, sizeof(fi));
3368 	if (np->nl_flags & IPN_IN) {
3369 		fi.fin_data[0] = ntohs(np->nl_realport);
3370 		fi.fin_data[1] = ntohs(np->nl_outport);
3371 	} else {
3372 		fi.fin_data[0] = ntohs(np->nl_inport);
3373 		fi.fin_data[1] = ntohs(np->nl_outport);
3374 	}
3375 	if (np->nl_flags & IPN_TCP)
3376 		fi.fin_p = IPPROTO_TCP;
3377 	else if (np->nl_flags & IPN_UDP)
3378 		fi.fin_p = IPPROTO_UDP;
3379 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3380 		fi.fin_p = IPPROTO_ICMP;
3381 
3382 	fi.fin_ifs = ifs;
3383 	/*
3384 	 * We can do two sorts of lookups:
3385 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3386 	 * - default: we have the `in' and `out' address, look for `real'.
3387 	 */
3388 	if (np->nl_flags & IPN_IN) {
3389 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3390 					np->nl_realip, np->nl_outip))) {
3391 			np->nl_inip = nat->nat_inip;
3392 			np->nl_inport = nat->nat_inport;
3393 		}
3394 	} else {
3395 		/*
3396 		 * If nl_inip is non null, this is a lookup based on the real
3397 		 * ip address. Else, we use the fake.
3398 		 */
3399 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3400 					 np->nl_inip, np->nl_outip))) {
3401 
3402 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3403 				fr_info_t fin;
3404 				bzero((char *)&fin, sizeof(fin));
3405 				fin.fin_p = nat->nat_p;
3406 				fin.fin_data[0] = ntohs(nat->nat_outport);
3407 				fin.fin_data[1] = ntohs(nat->nat_oport);
3408 				fin.fin_ifs = ifs;
3409 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3410 						 nat->nat_outip,
3411 						 nat->nat_oip) != NULL) {
3412 					np->nl_flags &= ~IPN_FINDFORWARD;
3413 				}
3414 			}
3415 
3416 			np->nl_realip = nat->nat_outip;
3417 			np->nl_realport = nat->nat_outport;
3418 		}
3419  	}
3420 
3421 	return nat;
3422 }
3423 
3424 
3425 /* ------------------------------------------------------------------------ */
3426 /* Function:    nat_match                                                   */
3427 /* Returns:     int - 0 == no match, 1 == match                             */
3428 /* Parameters:  fin(I)   - pointer to packet information                    */
3429 /*              np(I)    - pointer to NAT rule                              */
3430 /*                                                                          */
3431 /* Pull the matching of a packet against a NAT rule out of that complex     */
3432 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3433 /* ------------------------------------------------------------------------ */
3434 static int nat_match(fin, np)
3435 fr_info_t *fin;
3436 ipnat_t *np;
3437 {
3438 	frtuc_t *ft;
3439 
3440 	if (fin->fin_v != 4)
3441 		return 0;
3442 
3443 	if (np->in_p && fin->fin_p != np->in_p)
3444 		return 0;
3445 
3446 	if (fin->fin_out) {
3447 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3448 			return 0;
3449 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3450 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3451 			return 0;
3452 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3453 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3454 			return 0;
3455 	} else {
3456 		if (!(np->in_redir & NAT_REDIRECT))
3457 			return 0;
3458 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3459 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3460 			return 0;
3461 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3462 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3463 			return 0;
3464 	}
3465 
3466 	ft = &np->in_tuc;
3467 	if (!(fin->fin_flx & FI_TCPUDP) ||
3468 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3469 		if (ft->ftu_scmp || ft->ftu_dcmp)
3470 			return 0;
3471 		return 1;
3472 	}
3473 
3474 	return fr_tcpudpchk(fin, ft);
3475 }
3476 
3477 
3478 /* ------------------------------------------------------------------------ */
3479 /* Function:    nat_update                                                  */
3480 /* Returns:     Nil                                                         */
3481 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3482 /*              np(I)     - pointer to NAT rule                             */
3483 /*                                                                          */
3484 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3485 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3486 /* ------------------------------------------------------------------------ */
3487 void nat_update(fin, nat, np)
3488 fr_info_t *fin;
3489 nat_t *nat;
3490 ipnat_t *np;
3491 {
3492 	ipftq_t *ifq, *ifq2;
3493 	ipftqent_t *tqe;
3494 	ipf_stack_t *ifs = fin->fin_ifs;
3495 
3496 	MUTEX_ENTER(&nat->nat_lock);
3497 	tqe = &nat->nat_tqe;
3498 	ifq = tqe->tqe_ifq;
3499 
3500 	/*
3501 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3502 	 * TCP, however, if it is TCP and there is no rule timeout set,
3503 	 * then do not update the timeout here.
3504 	 */
3505 	if (np != NULL)
3506 		ifq2 = np->in_tqehead[fin->fin_rev];
3507 	else
3508 		ifq2 = NULL;
3509 
3510 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3511 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3512 	} else {
3513 		if (ifq2 == NULL) {
3514 			if (nat->nat_p == IPPROTO_UDP)
3515 				ifq2 = &ifs->ifs_nat_udptq;
3516 			else if (nat->nat_p == IPPROTO_ICMP)
3517 				ifq2 = &ifs->ifs_nat_icmptq;
3518 			else
3519 				ifq2 = &ifs->ifs_nat_iptq;
3520 		}
3521 
3522 		fr_movequeue(tqe, ifq, ifq2, ifs);
3523 	}
3524 	MUTEX_EXIT(&nat->nat_lock);
3525 }
3526 
3527 
3528 /* ------------------------------------------------------------------------ */
3529 /* Function:    fr_checknatout                                              */
3530 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3531 /*                     0 == no packet translation occurred,                 */
3532 /*                     1 == packet was successfully translated.             */
3533 /* Parameters:  fin(I)   - pointer to packet information                    */
3534 /*              passp(I) - pointer to filtering result flags                */
3535 /*                                                                          */
3536 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3537 /* first checked to see if they match an existing entry (if an error),      */
3538 /* otherwise a search of the current NAT table is made.  If neither results */
3539 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3540 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3541 /* packet header(s) as required.                                            */
3542 /* ------------------------------------------------------------------------ */
3543 int fr_checknatout(fin, passp)
3544 fr_info_t *fin;
3545 u_32_t *passp;
3546 {
3547 	struct ifnet *ifp, *sifp;
3548 	icmphdr_t *icmp = NULL;
3549 	tcphdr_t *tcp = NULL;
3550 	int rval, natfailed;
3551 	ipnat_t *np = NULL;
3552 	u_int nflags = 0;
3553 	u_32_t ipa, iph;
3554 	int natadd = 1;
3555 	frentry_t *fr;
3556 	nat_t *nat;
3557 	ipf_stack_t *ifs = fin->fin_ifs;
3558 
3559 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3560 		return 0;
3561 
3562 	natfailed = 0;
3563 	fr = fin->fin_fr;
3564 	sifp = fin->fin_ifp;
3565 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3566 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3567 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3568 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3569 	ifp = fin->fin_ifp;
3570 
3571 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3572 		switch (fin->fin_p)
3573 		{
3574 		case IPPROTO_TCP :
3575 			nflags = IPN_TCP;
3576 			break;
3577 		case IPPROTO_UDP :
3578 			nflags = IPN_UDP;
3579 			break;
3580 		case IPPROTO_ICMP :
3581 			icmp = fin->fin_dp;
3582 
3583 			/*
3584 			 * This is an incoming packet, so the destination is
3585 			 * the icmp_id and the source port equals 0
3586 			 */
3587 			if (nat_icmpquerytype4(icmp->icmp_type))
3588 				nflags = IPN_ICMPQUERY;
3589 			break;
3590 		default :
3591 			break;
3592 		}
3593 
3594 		if ((nflags & IPN_TCPUDP))
3595 			tcp = fin->fin_dp;
3596 	}
3597 
3598 	ipa = fin->fin_saddr;
3599 
3600 	READ_ENTER(&ifs->ifs_ipf_nat);
3601 
3602 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3603 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3604 		/*EMPTY*/;
3605 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3606 		natadd = 0;
3607 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3608 				      fin->fin_src, fin->fin_dst))) {
3609 		nflags = nat->nat_flags;
3610 	} else {
3611 		u_32_t hv, msk, nmsk;
3612 
3613 		/*
3614 		 * If there is no current entry in the nat table for this IP#,
3615 		 * create one for it (if there is a matching rule).
3616 		 */
3617 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3618 		msk = 0xffffffff;
3619 		nmsk = ifs->ifs_nat_masks;
3620 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3621 maskloop:
3622 		iph = ipa & htonl(msk);
3623 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3624 		for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext)
3625 		{
3626 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3627 				continue;
3628 			if (np->in_v != fin->fin_v)
3629 				continue;
3630 			if (np->in_p && (np->in_p != fin->fin_p))
3631 				continue;
3632 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3633 				continue;
3634 			if (np->in_flags & IPN_FILTER) {
3635 				if (!nat_match(fin, np))
3636 					continue;
3637 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3638 				continue;
3639 
3640 			if ((fr != NULL) &&
3641 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3642 				continue;
3643 
3644 			if (*np->in_plabel != '\0') {
3645 				if (((np->in_flags & IPN_FILTER) == 0) &&
3646 				    (np->in_dport != tcp->th_dport))
3647 					continue;
3648 				if (appr_ok(fin, tcp, np) == 0)
3649 					continue;
3650 			}
3651 
3652 			if ((nat = nat_new(fin, np, NULL, nflags,
3653 					   NAT_OUTBOUND))) {
3654 				np->in_hits++;
3655 				break;
3656 			} else
3657 				natfailed = -1;
3658 		}
3659 		if ((np == NULL) && (nmsk != 0)) {
3660 			while (nmsk) {
3661 				msk <<= 1;
3662 				if (nmsk & 0x80000000)
3663 					break;
3664 				nmsk <<= 1;
3665 			}
3666 			if (nmsk != 0) {
3667 				nmsk <<= 1;
3668 				goto maskloop;
3669 			}
3670 		}
3671 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3672 	}
3673 
3674 	if (nat != NULL) {
3675 		rval = fr_natout(fin, nat, natadd, nflags);
3676 		if (rval == 1) {
3677 			MUTEX_ENTER(&nat->nat_lock);
3678 			nat->nat_ref++;
3679 			MUTEX_EXIT(&nat->nat_lock);
3680 			fin->fin_nat = nat;
3681 		}
3682 	} else
3683 		rval = natfailed;
3684 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3685 
3686 	if (rval == -1) {
3687 		if (passp != NULL)
3688 			*passp = FR_BLOCK;
3689 		fin->fin_flx |= FI_BADNAT;
3690 	}
3691 	fin->fin_ifp = sifp;
3692 	return rval;
3693 }
3694 
3695 /* ------------------------------------------------------------------------ */
3696 /* Function:    fr_natout                                                   */
3697 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3698 /*                     1 == packet was successfully translated.             */
3699 /* Parameters:  fin(I)    - pointer to packet information                   */
3700 /*              nat(I)    - pointer to NAT structure                        */
3701 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3702 /*              nflags(I) - NAT flags set for this packet                   */
3703 /*                                                                          */
3704 /* Translate a packet coming "out" on an interface.                         */
3705 /* ------------------------------------------------------------------------ */
3706 int fr_natout(fin, nat, natadd, nflags)
3707 fr_info_t *fin;
3708 nat_t *nat;
3709 int natadd;
3710 u_32_t nflags;
3711 {
3712 	icmphdr_t *icmp;
3713 	u_short *csump;
3714 	u_32_t sumd;
3715 	tcphdr_t *tcp;
3716 	ipnat_t *np;
3717 	int i;
3718 	ipf_stack_t *ifs = fin->fin_ifs;
3719 
3720 #if SOLARIS && defined(_KERNEL)
3721 	net_data_t net_data_p;
3722 	if (fin->fin_v == 4)
3723 		net_data_p = ifs->ifs_ipf_ipv4;
3724 	else
3725 		net_data_p = ifs->ifs_ipf_ipv6;
3726 #endif
3727 
3728 	tcp = NULL;
3729 	icmp = NULL;
3730 	csump = NULL;
3731 	np = nat->nat_ptr;
3732 
3733 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3734 		(void) fr_nat_newfrag(fin, 0, nat);
3735 
3736 	MUTEX_ENTER(&nat->nat_lock);
3737 	nat->nat_bytes[1] += fin->fin_plen;
3738 	nat->nat_pkts[1]++;
3739 	MUTEX_EXIT(&nat->nat_lock);
3740 
3741 	/*
3742 	 * Fix up checksums, not by recalculating them, but
3743 	 * simply computing adjustments.
3744 	 * This is only done for STREAMS based IP implementations where the
3745 	 * checksum has already been calculated by IP.  In all other cases,
3746 	 * IPFilter is called before the checksum needs calculating so there
3747 	 * is no call to modify whatever is in the header now.
3748 	 */
3749 	ASSERT(fin->fin_m != NULL);
3750 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
3751 		if (nflags == IPN_ICMPERR) {
3752 			u_32_t s1, s2;
3753 
3754 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3755 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3756 			CALC_SUMD(s1, s2, sumd);
3757 
3758 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
3759 		}
3760 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3761     defined(linux) || defined(BRIDGE_IPF)
3762 		else {
3763 			/*
3764 			 * Strictly speaking, this isn't necessary on BSD
3765 			 * kernels because they do checksum calculation after
3766 			 * this code has run BUT if ipfilter is being used
3767 			 * to do NAT as a bridge, that code doesn't exist.
3768 			 */
3769 			if (nat->nat_dir == NAT_OUTBOUND)
3770 				fix_outcksum(&fin->fin_ip->ip_sum,
3771 					    nat->nat_ipsumd);
3772 			else
3773 				fix_incksum(&fin->fin_ip->ip_sum,
3774 				 	   nat->nat_ipsumd);
3775 		}
3776 #endif
3777 	}
3778 
3779 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3780 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3781 			tcp = fin->fin_dp;
3782 
3783 			tcp->th_sport = nat->nat_outport;
3784 			fin->fin_data[0] = ntohs(nat->nat_outport);
3785 		}
3786 
3787 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3788 			icmp = fin->fin_dp;
3789 			icmp->icmp_id = nat->nat_outport;
3790 		}
3791 
3792 		csump = nat_proto(fin, nat, nflags);
3793 	}
3794 
3795 	fin->fin_ip->ip_src = nat->nat_outip;
3796 
3797 	nat_update(fin, nat, np);
3798 
3799 	/*
3800 	 * The above comments do not hold for layer 4 (or higher) checksums...
3801 	 */
3802 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
3803 		if (nflags & IPN_TCPUDP &&
3804 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
3805 			sumd = nat->nat_sumd[1];
3806 		else
3807 			sumd = nat->nat_sumd[0];
3808 
3809 		if (nat->nat_dir == NAT_OUTBOUND)
3810 			fix_outcksum(csump, sumd);
3811 		else
3812 			fix_incksum(csump, sumd);
3813 	}
3814 #ifdef	IPFILTER_SYNC
3815 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3816 #endif
3817 	/* ------------------------------------------------------------- */
3818 	/* A few quick notes:						 */
3819 	/*	Following are test conditions prior to calling the 	 */
3820 	/*	appr_check routine.					 */
3821 	/*								 */
3822 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3823 	/*	with a redirect rule, we attempt to match the packet's	 */
3824 	/*	source port against in_dport, otherwise	we'd compare the */
3825 	/*	packet's destination.			 		 */
3826 	/* ------------------------------------------------------------- */
3827 	if ((np != NULL) && (np->in_apr != NULL)) {
3828 		i = appr_check(fin, nat);
3829 		if (i == 0)
3830 			i = 1;
3831 	} else
3832 		i = 1;
3833 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]);
3834 	fin->fin_flx |= FI_NATED;
3835 	return i;
3836 }
3837 
3838 
3839 /* ------------------------------------------------------------------------ */
3840 /* Function:    fr_checknatin                                               */
3841 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3842 /*                     0 == no packet translation occurred,                 */
3843 /*                     1 == packet was successfully translated.             */
3844 /* Parameters:  fin(I)   - pointer to packet information                    */
3845 /*              passp(I) - pointer to filtering result flags                */
3846 /*                                                                          */
3847 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
3848 /* first checked to see if they match an existing entry (if an error),      */
3849 /* otherwise a search of the current NAT table is made.  If neither results */
3850 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3851 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3852 /* packet header(s) as required.                                            */
3853 /* ------------------------------------------------------------------------ */
3854 int fr_checknatin(fin, passp)
3855 fr_info_t *fin;
3856 u_32_t *passp;
3857 {
3858 	u_int nflags, natadd;
3859 	int rval, natfailed;
3860 	struct ifnet *ifp;
3861 	struct in_addr in;
3862 	icmphdr_t *icmp;
3863 	tcphdr_t *tcp;
3864 	u_short dport;
3865 	ipnat_t *np;
3866 	nat_t *nat;
3867 	u_32_t iph;
3868 	ipf_stack_t *ifs = fin->fin_ifs;
3869 
3870 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3871 		return 0;
3872 
3873 	tcp = NULL;
3874 	icmp = NULL;
3875 	dport = 0;
3876 	natadd = 1;
3877 	nflags = 0;
3878 	natfailed = 0;
3879 	ifp = fin->fin_ifp;
3880 
3881 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3882 		switch (fin->fin_p)
3883 		{
3884 		case IPPROTO_TCP :
3885 			nflags = IPN_TCP;
3886 			break;
3887 		case IPPROTO_UDP :
3888 			nflags = IPN_UDP;
3889 			break;
3890 		case IPPROTO_ICMP :
3891 			icmp = fin->fin_dp;
3892 
3893 			/*
3894 			 * This is an incoming packet, so the destination is
3895 			 * the icmp_id and the source port equals 0
3896 			 */
3897 			if (nat_icmpquerytype4(icmp->icmp_type)) {
3898 				nflags = IPN_ICMPQUERY;
3899 				dport = icmp->icmp_id;
3900 			} break;
3901 		default :
3902 			break;
3903 		}
3904 
3905 		if ((nflags & IPN_TCPUDP)) {
3906 			tcp = fin->fin_dp;
3907 			dport = tcp->th_dport;
3908 		}
3909 	}
3910 
3911 	in = fin->fin_dst;
3912 
3913 	READ_ENTER(&ifs->ifs_ipf_nat);
3914 
3915 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3916 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
3917 		/*EMPTY*/;
3918 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3919 		natadd = 0;
3920 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3921 				     fin->fin_src, in))) {
3922 		nflags = nat->nat_flags;
3923 	} else {
3924 		u_32_t hv, msk, rmsk;
3925 
3926 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3927 		rmsk = ifs->ifs_rdr_masks;
3928 		msk = 0xffffffff;
3929 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3930 		/*
3931 		 * If there is no current entry in the nat table for this IP#,
3932 		 * create one for it (if there is a matching rule).
3933 		 */
3934 maskloop:
3935 		iph = in.s_addr & htonl(msk);
3936 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
3937 		for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) {
3938 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
3939 				continue;
3940 			if (np->in_v != fin->fin_v)
3941 				continue;
3942 			if (np->in_p && (np->in_p != fin->fin_p))
3943 				continue;
3944 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3945 				continue;
3946 			if (np->in_flags & IPN_FILTER) {
3947 				if (!nat_match(fin, np))
3948 					continue;
3949 			} else {
3950 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
3951 					continue;
3952 				if (np->in_pmin &&
3953 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
3954 				     (ntohs(dport) < ntohs(np->in_pmin))))
3955 					continue;
3956 			}
3957 
3958 			if (*np->in_plabel != '\0') {
3959 				if (!appr_ok(fin, tcp, np)) {
3960 					continue;
3961 				}
3962 			}
3963 
3964 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
3965 			if (nat != NULL) {
3966 				np->in_hits++;
3967 				break;
3968 			} else
3969 				natfailed = -1;
3970 		}
3971 
3972 		if ((np == NULL) && (rmsk != 0)) {
3973 			while (rmsk) {
3974 				msk <<= 1;
3975 				if (rmsk & 0x80000000)
3976 					break;
3977 				rmsk <<= 1;
3978 			}
3979 			if (rmsk != 0) {
3980 				rmsk <<= 1;
3981 				goto maskloop;
3982 			}
3983 		}
3984 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3985 	}
3986 	if (nat != NULL) {
3987 		rval = fr_natin(fin, nat, natadd, nflags);
3988 		if (rval == 1) {
3989 			MUTEX_ENTER(&nat->nat_lock);
3990 			nat->nat_ref++;
3991 			MUTEX_EXIT(&nat->nat_lock);
3992 			fin->fin_nat = nat;
3993 			fin->fin_state = nat->nat_state;
3994 		}
3995 	} else
3996 		rval = natfailed;
3997 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3998 
3999 	if (rval == -1) {
4000 		if (passp != NULL)
4001 			*passp = FR_BLOCK;
4002 		fin->fin_flx |= FI_BADNAT;
4003 	}
4004 	return rval;
4005 }
4006 
4007 
4008 /* ------------------------------------------------------------------------ */
4009 /* Function:    fr_natin                                                    */
4010 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4011 /*                     1 == packet was successfully translated.             */
4012 /* Parameters:  fin(I)    - pointer to packet information                   */
4013 /*              nat(I)    - pointer to NAT structure                        */
4014 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4015 /*              nflags(I) - NAT flags set for this packet                   */
4016 /* Locks Held:  ipf_nat (READ)                                              */
4017 /*                                                                          */
4018 /* Translate a packet coming "in" on an interface.                          */
4019 /* ------------------------------------------------------------------------ */
4020 int fr_natin(fin, nat, natadd, nflags)
4021 fr_info_t *fin;
4022 nat_t *nat;
4023 int natadd;
4024 u_32_t nflags;
4025 {
4026 	icmphdr_t *icmp;
4027 	u_short *csump, *csump1;
4028 	u_32_t sumd;
4029 	tcphdr_t *tcp;
4030 	ipnat_t *np;
4031 	int i;
4032 	ipf_stack_t *ifs = fin->fin_ifs;
4033 
4034 #if SOLARIS && defined(_KERNEL)
4035 	net_data_t net_data_p;
4036 	if (fin->fin_v == 4)
4037 		net_data_p = ifs->ifs_ipf_ipv4;
4038 	else
4039 		net_data_p = ifs->ifs_ipf_ipv6;
4040 #endif
4041 
4042 	tcp = NULL;
4043 	csump = NULL;
4044 	np = nat->nat_ptr;
4045 	fin->fin_fr = nat->nat_fr;
4046 
4047 	if (np != NULL) {
4048 		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4049 			(void) fr_nat_newfrag(fin, 0, nat);
4050 
4051 	/* ------------------------------------------------------------- */
4052 	/* A few quick notes:						 */
4053 	/*	Following are test conditions prior to calling the 	 */
4054 	/*	appr_check routine.					 */
4055 	/*								 */
4056 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4057 	/*	with a map rule, we attempt to match the packet's	 */
4058 	/*	source port against in_dport, otherwise	we'd compare the */
4059 	/*	packet's destination.			 		 */
4060 	/* ------------------------------------------------------------- */
4061 		if (np->in_apr != NULL) {
4062 			i = appr_check(fin, nat);
4063 			if (i == -1) {
4064 				return -1;
4065 			}
4066 		}
4067 	}
4068 
4069 #ifdef	IPFILTER_SYNC
4070 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4071 #endif
4072 
4073 	MUTEX_ENTER(&nat->nat_lock);
4074 	nat->nat_bytes[0] += fin->fin_plen;
4075 	nat->nat_pkts[0]++;
4076 	MUTEX_EXIT(&nat->nat_lock);
4077 
4078 	fin->fin_ip->ip_dst = nat->nat_inip;
4079 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4080 	if (nflags & IPN_TCPUDP)
4081 		tcp = fin->fin_dp;
4082 
4083 	/*
4084 	 * Fix up checksums, not by recalculating them, but
4085 	 * simply computing adjustments.
4086 	 * Why only do this for some platforms on inbound packets ?
4087 	 * Because for those that it is done, IP processing is yet to happen
4088 	 * and so the IPv4 header checksum has not yet been evaluated.
4089 	 * Perhaps it should always be done for the benefit of things like
4090 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4091 	 * header checksum offloading, perhaps it is a moot point.
4092 	 */
4093 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4094      defined(__osf__) || defined(linux)
4095 	if (nat->nat_dir == NAT_OUTBOUND)
4096 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4097 	else
4098 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4099 #endif
4100 
4101 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4102 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4103 			tcp->th_dport = nat->nat_inport;
4104 			fin->fin_data[1] = ntohs(nat->nat_inport);
4105 		}
4106 
4107 
4108 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4109 			icmp = fin->fin_dp;
4110 
4111 			icmp->icmp_id = nat->nat_inport;
4112 		}
4113 
4114 		csump = nat_proto(fin, nat, nflags);
4115 	}
4116 
4117 	nat_update(fin, nat, np);
4118 
4119 #if SOLARIS && defined(_KERNEL)
4120 	if (nflags & IPN_TCPUDP &&
4121 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4122 		sumd = nat->nat_sumd[1];
4123 		csump1 = &(fin->fin_m->b_datap->db_struioun.cksum.cksum_val.u16);
4124 		if (csump1 != NULL) {
4125 			if (nat->nat_dir == NAT_OUTBOUND)
4126 				fix_incksum(csump1, sumd);
4127 			else
4128 				fix_outcksum(csump1, sumd);
4129 		}
4130 	} else
4131 #endif
4132 		sumd = nat->nat_sumd[0];
4133 
4134 	/*
4135 	 * Inbound packets always need to have their address adjusted in case
4136 	 * code following this validates it.
4137 	 */
4138 	if (csump != NULL) {
4139 		if (nat->nat_dir == NAT_OUTBOUND)
4140 			fix_incksum(csump, sumd);
4141 		else
4142 			fix_outcksum(csump, sumd);
4143 	}
4144 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]);
4145 	fin->fin_flx |= FI_NATED;
4146 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4147 		fin->fin_nattag = &np->in_tag;
4148 	return 1;
4149 }
4150 
4151 
4152 /* ------------------------------------------------------------------------ */
4153 /* Function:    nat_proto                                                   */
4154 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4155 /*                         NULL if the transport protocol is not recognised */
4156 /*                         as needing a checksum update.                    */
4157 /* Parameters:  fin(I)    - pointer to packet information                   */
4158 /*              nat(I)    - pointer to NAT structure                        */
4159 /*              nflags(I) - NAT flags set for this packet                   */
4160 /*                                                                          */
4161 /* Return the pointer to the checksum field for each protocol so understood.*/
4162 /* If support for making other changes to a protocol header is required,    */
4163 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4164 /* TCP down to a specific value, then do it from here.                      */
4165 /* ------------------------------------------------------------------------ */
4166 u_short *nat_proto(fin, nat, nflags)
4167 fr_info_t *fin;
4168 nat_t *nat;
4169 u_int nflags;
4170 {
4171 	icmphdr_t *icmp;
4172 	u_short *csump;
4173 	tcphdr_t *tcp;
4174 	udphdr_t *udp;
4175 
4176 	csump = NULL;
4177 	if (fin->fin_out == 0) {
4178 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4179 	} else {
4180 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4181 	}
4182 
4183 	switch (fin->fin_p)
4184 	{
4185 	case IPPROTO_TCP :
4186 		tcp = fin->fin_dp;
4187 
4188 		csump = &tcp->th_sum;
4189 
4190 		/*
4191 		 * Do a MSS CLAMPING on a SYN packet,
4192 		 * only deal IPv4 for now.
4193 		 */
4194 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4195 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4196 
4197 		break;
4198 
4199 	case IPPROTO_UDP :
4200 		udp = fin->fin_dp;
4201 
4202 		if (udp->uh_sum)
4203 			csump = &udp->uh_sum;
4204 		break;
4205 
4206 	case IPPROTO_ICMP :
4207 		icmp = fin->fin_dp;
4208 
4209 		if ((nflags & IPN_ICMPQUERY) != 0) {
4210 			if (icmp->icmp_cksum != 0)
4211 				csump = &icmp->icmp_cksum;
4212 		}
4213 		break;
4214 	}
4215 	return csump;
4216 }
4217 
4218 
4219 /* ------------------------------------------------------------------------ */
4220 /* Function:    fr_natunload                                                */
4221 /* Returns:     Nil                                                         */
4222 /* Parameters:  Nil                                                         */
4223 /*                                                                          */
4224 /* Free all memory used by NAT structures allocated at runtime.             */
4225 /* ------------------------------------------------------------------------ */
4226 void fr_natunload(ifs)
4227 ipf_stack_t *ifs;
4228 {
4229 	ipftq_t *ifq, *ifqnext;
4230 
4231 	(void) nat_clearlist(ifs);
4232 	(void) nat_flushtable(ifs);
4233 
4234 	/*
4235 	 * Proxy timeout queues are not cleaned here because although they
4236 	 * exist on the NAT list, appr_unload is called after fr_natunload
4237 	 * and the proxies actually are responsible for them being created.
4238 	 * Should the proxy timeouts have their own list?  There's no real
4239 	 * justification as this is the only complication.
4240 	 */
4241 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4242 		ifqnext = ifq->ifq_next;
4243 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4244 		    (fr_deletetimeoutqueue(ifq) == 0))
4245 			fr_freetimeoutqueue(ifq, ifs);
4246 	}
4247 
4248 	if (ifs->ifs_nat_table[0] != NULL) {
4249 		KFREES(ifs->ifs_nat_table[0],
4250 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4251 		ifs->ifs_nat_table[0] = NULL;
4252 	}
4253 	if (ifs->ifs_nat_table[1] != NULL) {
4254 		KFREES(ifs->ifs_nat_table[1],
4255 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4256 		ifs->ifs_nat_table[1] = NULL;
4257 	}
4258 	if (ifs->ifs_nat_rules != NULL) {
4259 		KFREES(ifs->ifs_nat_rules,
4260 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4261 		ifs->ifs_nat_rules = NULL;
4262 	}
4263 	if (ifs->ifs_rdr_rules != NULL) {
4264 		KFREES(ifs->ifs_rdr_rules,
4265 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4266 		ifs->ifs_rdr_rules = NULL;
4267 	}
4268 	if (ifs->ifs_maptable != NULL) {
4269 		KFREES(ifs->ifs_maptable,
4270 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4271 		ifs->ifs_maptable = NULL;
4272 	}
4273 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4274 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4275 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4276 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4277 	}
4278 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4279 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4280 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4281 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4282 	}
4283 
4284 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4285 		ifs->ifs_fr_nat_maxbucket = 0;
4286 
4287 	if (ifs->ifs_fr_nat_init == 1) {
4288 		ifs->ifs_fr_nat_init = 0;
4289 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4290 
4291 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4292 		RW_DESTROY(&ifs->ifs_ipf_nat);
4293 
4294 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4295 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4296 
4297 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4298 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4299 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4300 	}
4301 }
4302 
4303 
4304 /* ------------------------------------------------------------------------ */
4305 /* Function:    fr_natexpire                                                */
4306 /* Returns:     Nil                                                         */
4307 /* Parameters:  Nil                                                         */
4308 /*                                                                          */
4309 /* Check all of the timeout queues for entries at the top which need to be  */
4310 /* expired.                                                                 */
4311 /* ------------------------------------------------------------------------ */
4312 void fr_natexpire(ifs)
4313 ipf_stack_t *ifs;
4314 {
4315 	ipftq_t *ifq, *ifqnext;
4316 	ipftqent_t *tqe, *tqn;
4317 	int i;
4318 	SPL_INT(s);
4319 
4320 	SPL_NET(s);
4321 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4322 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4323 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4324 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4325 				break;
4326 			tqn = tqe->tqe_next;
4327 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4328 		}
4329 	}
4330 
4331 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4332 		ifqnext = ifq->ifq_next;
4333 
4334 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4335 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4336 				break;
4337 			tqn = tqe->tqe_next;
4338 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4339 		}
4340 	}
4341 
4342 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4343 		ifqnext = ifq->ifq_next;
4344 
4345 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4346 		    (ifq->ifq_ref == 0)) {
4347 			fr_freetimeoutqueue(ifq, ifs);
4348 		}
4349 	}
4350 
4351 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4352 	SPL_X(s);
4353 }
4354 
4355 
4356 /* ------------------------------------------------------------------------ */
4357 /* Function:    fr_nataddrsync                                              */
4358 /* Returns:     Nil                                                         */
4359 /* Parameters:  ifp(I) -  pointer to network interface                      */
4360 /*              addr(I) - pointer to new network address                    */
4361 /*                                                                          */
4362 /* Walk through all of the currently active NAT sessions, looking for those */
4363 /* which need to have their translated address updated (where the interface */
4364 /* matches the one passed in) and change it, recalculating the checksum sum */
4365 /* difference too.                                                          */
4366 /* ------------------------------------------------------------------------ */
4367 void fr_nataddrsync(ifp, addr, ifs)
4368 void *ifp;
4369 struct in_addr *addr;
4370 ipf_stack_t *ifs;
4371 {
4372 	u_32_t sum1, sum2, sumd;
4373 	nat_t *nat;
4374 	ipnat_t *np;
4375 	SPL_INT(s);
4376 
4377 	if (ifs->ifs_fr_running <= 0)
4378 		return;
4379 
4380 	SPL_NET(s);
4381 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4382 
4383 	if (ifs->ifs_fr_running <= 0) {
4384 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4385 		return;
4386 	}
4387 
4388 	/*
4389 	 * Change IP addresses for NAT sessions for any protocol except TCP
4390 	 * since it will break the TCP connection anyway.  The only rules
4391 	 * which will get changed are those which are "map ... -> 0/32",
4392 	 * where the rule specifies the address is taken from the interface.
4393 	 */
4394 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4395 		if (addr != NULL) {
4396 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4397 			    ((nat->nat_flags & IPN_TCP) != 0))
4398 				continue;
4399 			if (((np = nat->nat_ptr) == NULL) ||
4400 			    (np->in_nip || (np->in_outmsk != 0xffffffff)))
4401 				continue;
4402 
4403 			/*
4404 			 * Change the map-to address to be the same as the
4405 			 * new one.
4406 			 */
4407 			sum1 = nat->nat_outip.s_addr;
4408 			nat->nat_outip = *addr;
4409 			sum2 = nat->nat_outip.s_addr;
4410 
4411 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4412 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
4413 		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
4414 			struct in_addr in;
4415 
4416 			/*
4417 			 * Change the map-to address to be the same as the
4418 			 * new one.
4419 			 */
4420 			sum1 = nat->nat_outip.s_addr;
4421 			if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4422 				       &in, NULL, ifs) != -1)
4423 				nat->nat_outip = in;
4424 			sum2 = nat->nat_outip.s_addr;
4425 		} else {
4426 			continue;
4427 		}
4428 
4429 		if (sum1 == sum2)
4430 			continue;
4431 		/*
4432 		 * Readjust the checksum adjustment to take into
4433 		 * account the new IP#.
4434 		 */
4435 		CALC_SUMD(sum1, sum2, sumd);
4436 		/* XXX - dont change for TCP when solaris does
4437 		 * hardware checksumming.
4438 		 */
4439 		sumd += nat->nat_sumd[0];
4440 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4441 		nat->nat_sumd[1] = nat->nat_sumd[0];
4442 	}
4443 
4444 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4445 	SPL_X(s);
4446 }
4447 
4448 
4449 /* ------------------------------------------------------------------------ */
4450 /* Function:    fr_natifpsync                                               */
4451 /* Returns:     Nil                                                         */
4452 /* Parameters:  action(I) - how we are syncing                              */
4453 /*              ifp(I)    - pointer to network interface                    */
4454 /*              name(I)   - name of interface to sync to                    */
4455 /*                                                                          */
4456 /* This function is used to resync the mapping of interface names and their */
4457 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4458 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4459 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4460 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4461 /* there is no longer any interface associated with it.                     */
4462 /* ------------------------------------------------------------------------ */
4463 void fr_natifpsync(action, ifp, name, ifs)
4464 int action;
4465 void *ifp;
4466 char *name;
4467 ipf_stack_t *ifs;
4468 {
4469 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4470 	int s;
4471 #endif
4472 	nat_t *nat;
4473 	ipnat_t *n;
4474 
4475 	if (ifs->ifs_fr_running <= 0)
4476 		return;
4477 
4478 	SPL_NET(s);
4479 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4480 
4481 	if (ifs->ifs_fr_running <= 0) {
4482 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4483 		return;
4484 	}
4485 
4486 	switch (action)
4487 	{
4488 	case IPFSYNC_RESYNC :
4489 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4490 			if ((ifp == nat->nat_ifps[0]) ||
4491 			    (nat->nat_ifps[0] == (void *)-1)) {
4492 				nat->nat_ifps[0] =
4493 				    fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
4494 			}
4495 
4496 			if ((ifp == nat->nat_ifps[1]) ||
4497 			    (nat->nat_ifps[1] == (void *)-1)) {
4498 				nat->nat_ifps[1] =
4499 				    fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
4500 			}
4501 		}
4502 
4503 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4504 			if (n->in_ifps[0] == ifp ||
4505 			    n->in_ifps[0] == (void *)-1) {
4506 				n->in_ifps[0] =
4507 				    fr_resolvenic(n->in_ifnames[0], 4, ifs);
4508 			}
4509 			if (n->in_ifps[1] == ifp ||
4510 			    n->in_ifps[1] == (void *)-1) {
4511 				n->in_ifps[1] =
4512 				    fr_resolvenic(n->in_ifnames[1], 4, ifs);
4513 			}
4514 		}
4515 		break;
4516 	case IPFSYNC_NEWIFP :
4517 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4518 			if (!strncmp(name, nat->nat_ifnames[0],
4519 				     sizeof(nat->nat_ifnames[0])))
4520 				nat->nat_ifps[0] = ifp;
4521 			if (!strncmp(name, nat->nat_ifnames[1],
4522 				     sizeof(nat->nat_ifnames[1])))
4523 				nat->nat_ifps[1] = ifp;
4524 		}
4525 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4526 			if (!strncmp(name, n->in_ifnames[0],
4527 				     sizeof(n->in_ifnames[0])))
4528 				n->in_ifps[0] = ifp;
4529 			if (!strncmp(name, n->in_ifnames[1],
4530 				     sizeof(n->in_ifnames[1])))
4531 				n->in_ifps[1] = ifp;
4532 		}
4533 		break;
4534 	case IPFSYNC_OLDIFP :
4535 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4536 			if (ifp == nat->nat_ifps[0])
4537 				nat->nat_ifps[0] = (void *)-1;
4538 			if (ifp == nat->nat_ifps[1])
4539 				nat->nat_ifps[1] = (void *)-1;
4540 		}
4541 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4542 			if (n->in_ifps[0] == ifp)
4543 				n->in_ifps[0] = (void *)-1;
4544 			if (n->in_ifps[1] == ifp)
4545 				n->in_ifps[1] = (void *)-1;
4546 		}
4547 		break;
4548 	}
4549 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4550 	SPL_X(s);
4551 }
4552 
4553 
4554 /* ------------------------------------------------------------------------ */
4555 /* Function:    nat_icmpquerytype4                                          */
4556 /* Returns:     int - 1 == success, 0 == failure                            */
4557 /* Parameters:  icmptype(I) - ICMP type number                              */
4558 /*                                                                          */
4559 /* Tests to see if the ICMP type number passed is a query/response type or  */
4560 /* not.                                                                     */
4561 /* ------------------------------------------------------------------------ */
4562 static INLINE int nat_icmpquerytype4(icmptype)
4563 int icmptype;
4564 {
4565 
4566 	/*
4567 	 * For the ICMP query NAT code, it is essential that both the query
4568 	 * and the reply match on the NAT rule. Because the NAT structure
4569 	 * does not keep track of the icmptype, and a single NAT structure
4570 	 * is used for all icmp types with the same src, dest and id, we
4571 	 * simply define the replies as queries as well. The funny thing is,
4572 	 * altough it seems silly to call a reply a query, this is exactly
4573 	 * as it is defined in the IPv4 specification
4574 	 */
4575 
4576 	switch (icmptype)
4577 	{
4578 
4579 	case ICMP_ECHOREPLY:
4580 	case ICMP_ECHO:
4581 	/* route aedvertisement/solliciation is currently unsupported: */
4582 	/* it would require rewriting the ICMP data section            */
4583 	case ICMP_TSTAMP:
4584 	case ICMP_TSTAMPREPLY:
4585 	case ICMP_IREQ:
4586 	case ICMP_IREQREPLY:
4587 	case ICMP_MASKREQ:
4588 	case ICMP_MASKREPLY:
4589 		return 1;
4590 	default:
4591 		return 0;
4592 	}
4593 }
4594 
4595 
4596 /* ------------------------------------------------------------------------ */
4597 /* Function:    nat_log                                                     */
4598 /* Returns:     Nil                                                         */
4599 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4600 /*              type(I) - type of log entry to create                       */
4601 /*                                                                          */
4602 /* Creates a NAT log entry.                                                 */
4603 /* ------------------------------------------------------------------------ */
4604 void nat_log(nat, type, ifs)
4605 struct nat *nat;
4606 u_int type;
4607 ipf_stack_t *ifs;
4608 {
4609 #ifdef	IPFILTER_LOG
4610 # ifndef LARGE_NAT
4611 	struct ipnat *np;
4612 	int rulen;
4613 # endif
4614 	struct natlog natl;
4615 	void *items[1];
4616 	size_t sizes[1];
4617 	int types[1];
4618 
4619 	natl.nl_inip = nat->nat_inip;
4620 	natl.nl_outip = nat->nat_outip;
4621 	natl.nl_origip = nat->nat_oip;
4622 	natl.nl_bytes[0] = nat->nat_bytes[0];
4623 	natl.nl_bytes[1] = nat->nat_bytes[1];
4624 	natl.nl_pkts[0] = nat->nat_pkts[0];
4625 	natl.nl_pkts[1] = nat->nat_pkts[1];
4626 	natl.nl_origport = nat->nat_oport;
4627 	natl.nl_inport = nat->nat_inport;
4628 	natl.nl_outport = nat->nat_outport;
4629 	natl.nl_p = nat->nat_p;
4630 	natl.nl_type = type;
4631 	natl.nl_rule = -1;
4632 # ifndef LARGE_NAT
4633 	if (nat->nat_ptr != NULL) {
4634 		for (rulen = 0, np = ifs->ifs_nat_list; np;
4635 		     np = np->in_next, rulen++)
4636 			if (np == nat->nat_ptr) {
4637 				natl.nl_rule = rulen;
4638 				break;
4639 			}
4640 	}
4641 # endif
4642 	items[0] = &natl;
4643 	sizes[0] = sizeof(natl);
4644 	types[0] = 0;
4645 
4646 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
4647 #endif
4648 }
4649 
4650 
4651 #if defined(__OpenBSD__)
4652 /* ------------------------------------------------------------------------ */
4653 /* Function:    nat_ifdetach                                                */
4654 /* Returns:     Nil                                                         */
4655 /* Parameters:  ifp(I) - pointer to network interface                       */
4656 /*                                                                          */
4657 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4658 /* interface references within IPFilter.                                    */
4659 /* ------------------------------------------------------------------------ */
4660 void nat_ifdetach(ifp, ifs)
4661 void *ifp;
4662 ipf_stack_t *ifs;
4663 {
4664 	frsync(ifp, ifs);
4665 	return;
4666 }
4667 #endif
4668 
4669 
4670 /* ------------------------------------------------------------------------ */
4671 /* Function:    fr_ipnatderef                                               */
4672 /* Returns:     Nil                                                         */
4673 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4674 /* Write Locks: ipf_nat                                                     */
4675 /*                                                                          */
4676 /* ------------------------------------------------------------------------ */
4677 void fr_ipnatderef(inp, ifs)
4678 ipnat_t **inp;
4679 ipf_stack_t *ifs;
4680 {
4681 	ipnat_t *in;
4682 
4683 	in = *inp;
4684 	*inp = NULL;
4685 	in->in_space++;
4686 	in->in_use--;
4687 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4688 		if (in->in_apr)
4689 			appr_free(in->in_apr);
4690 		KFREE(in);
4691 		ifs->ifs_nat_stats.ns_rules--;
4692 #ifdef notdef
4693 #if SOLARIS
4694 		if (ifs->ifs_nat_stats.ns_rules == 0)
4695 			ifs->ifs_pfil_delayed_copy = 1;
4696 #endif
4697 #endif
4698 	}
4699 }
4700 
4701 
4702 /* ------------------------------------------------------------------------ */
4703 /* Function:    fr_natderef                                                 */
4704 /* Returns:     Nil                                                         */
4705 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4706 /*                                                                          */
4707 /* Decrement the reference counter for this NAT table entry and free it if  */
4708 /* there are no more things using it.                                       */
4709 /* ------------------------------------------------------------------------ */
4710 void fr_natderef(natp, ifs)
4711 nat_t **natp;
4712 ipf_stack_t *ifs;
4713 {
4714 	nat_t *nat;
4715 
4716 	nat = *natp;
4717 	*natp = NULL;
4718 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4719 	nat->nat_ref--;
4720 	if (nat->nat_ref == 0)
4721 	    nat_delete(nat, NL_EXPIRE, ifs);
4722 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4723 }
4724 
4725 
4726 /* ------------------------------------------------------------------------ */
4727 /* Function:    fr_natclone                                                 */
4728 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4729 /*                           else pointer to new state structure            */
4730 /* Parameters:  fin(I) - pointer to packet information                      */
4731 /*              is(I)  - pointer to master state structure                  */
4732 /* Write Lock:  ipf_nat                                                     */
4733 /*                                                                          */
4734 /* Create a "duplcate" state table entry from the master.                   */
4735 /* ------------------------------------------------------------------------ */
4736 static nat_t *fr_natclone(fin, nat)
4737 fr_info_t *fin;
4738 nat_t *nat;
4739 {
4740 	frentry_t *fr;
4741 	nat_t *clone;
4742 	ipnat_t *np;
4743 	ipf_stack_t *ifs = fin->fin_ifs;
4744 
4745 	KMALLOC(clone, nat_t *);
4746 	if (clone == NULL)
4747 		return NULL;
4748 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4749 
4750 	MUTEX_NUKE(&clone->nat_lock);
4751 
4752 	clone->nat_aps = NULL;
4753 	/*
4754 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4755 	 */
4756 	clone->nat_tqe.tqe_pnext = NULL;
4757 	clone->nat_tqe.tqe_next = NULL;
4758 	clone->nat_tqe.tqe_ifq = NULL;
4759 	clone->nat_tqe.tqe_parent = clone;
4760 
4761 	clone->nat_flags &= ~SI_CLONE;
4762 	clone->nat_flags |= SI_CLONED;
4763 
4764 	if (clone->nat_hm)
4765 		clone->nat_hm->hm_ref++;
4766 
4767 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
4768 		KFREE(clone);
4769 		return NULL;
4770 	}
4771 	np = clone->nat_ptr;
4772 	if (np != NULL) {
4773 		if (ifs->ifs_nat_logging)
4774 			nat_log(clone, (u_int)np->in_redir, ifs);
4775 		np->in_use++;
4776 	}
4777 	fr = clone->nat_fr;
4778 	if (fr != NULL) {
4779 		MUTEX_ENTER(&fr->fr_lock);
4780 		fr->fr_ref++;
4781 		MUTEX_EXIT(&fr->fr_lock);
4782 	}
4783 
4784 	/*
4785 	 * Because the clone is created outside the normal loop of things and
4786 	 * TCP has special needs in terms of state, initialise the timeout
4787 	 * state of the new NAT from here.
4788 	 */
4789 	if (clone->nat_p == IPPROTO_TCP) {
4790 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
4791 				  clone->nat_flags);
4792 	}
4793 #ifdef	IPFILTER_SYNC
4794 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4795 #endif
4796 	if (ifs->ifs_nat_logging)
4797 		nat_log(clone, NL_CLONE, ifs);
4798 	return clone;
4799 }
4800 
4801 
4802 /* ------------------------------------------------------------------------ */
4803 /* Function:   nat_wildok                                                   */
4804 /* Returns:    int - 1 == packet's ports match wildcards                    */
4805 /*                   0 == packet's ports don't match wildcards              */
4806 /* Parameters: nat(I)   - NAT entry                                         */
4807 /*             sport(I) - source port                                       */
4808 /*             dport(I) - destination port                                  */
4809 /*             flags(I) - wildcard flags                                    */
4810 /*             dir(I)   - packet direction                                  */
4811 /*                                                                          */
4812 /* Use NAT entry and packet direction to determine which combination of     */
4813 /* wildcard flags should be used.                                           */
4814 /* ------------------------------------------------------------------------ */
4815 static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4816 nat_t *nat;
4817 int sport;
4818 int dport;
4819 int flags;
4820 int dir;
4821 {
4822 	/*
4823 	 * When called by       dir is set to
4824 	 * nat_inlookup         NAT_INBOUND (0)
4825 	 * nat_outlookup        NAT_OUTBOUND (1)
4826 	 *
4827 	 * We simply combine the packet's direction in dir with the original
4828 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4829 	 * which combination of wildcard flags to allow.
4830 	 */
4831 
4832 	switch ((dir << 1) | nat->nat_dir)
4833 	{
4834 	case 3: /* outbound packet / outbound entry */
4835 		if (((nat->nat_inport == sport) ||
4836 		    (flags & SI_W_SPORT)) &&
4837 		    ((nat->nat_oport == dport) ||
4838 		    (flags & SI_W_DPORT)))
4839 			return 1;
4840 		break;
4841 	case 2: /* outbound packet / inbound entry */
4842 		if (((nat->nat_outport == sport) ||
4843 		    (flags & SI_W_DPORT)) &&
4844 		    ((nat->nat_oport == dport) ||
4845 		    (flags & SI_W_SPORT)))
4846 			return 1;
4847 		break;
4848 	case 1: /* inbound packet / outbound entry */
4849 		if (((nat->nat_oport == sport) ||
4850 		    (flags & SI_W_DPORT)) &&
4851 		    ((nat->nat_outport == dport) ||
4852 		    (flags & SI_W_SPORT)))
4853 			return 1;
4854 		break;
4855 	case 0: /* inbound packet / inbound entry */
4856 		if (((nat->nat_oport == sport) ||
4857 		    (flags & SI_W_SPORT)) &&
4858 		    ((nat->nat_outport == dport) ||
4859 		    (flags & SI_W_DPORT)))
4860 			return 1;
4861 		break;
4862 	default:
4863 		break;
4864 	}
4865 
4866 	return(0);
4867 }
4868 
4869 
4870 /* ------------------------------------------------------------------------ */
4871 /* Function:    nat_mssclamp                                                */
4872 /* Returns:     Nil                                                         */
4873 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4874 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4875 /*              csump(I)  - pointer to TCP checksum                         */
4876 /*                                                                          */
4877 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4878 /* then the TCP header checksum will be updated to reflect the change in    */
4879 /* the MSS.                                                                 */
4880 /* ------------------------------------------------------------------------ */
4881 static void nat_mssclamp(tcp, maxmss, csump)
4882 tcphdr_t *tcp;
4883 u_32_t maxmss;
4884 u_short *csump;
4885 {
4886 	u_char *cp, *ep, opt;
4887 	int hlen, advance;
4888 	u_32_t mss, sumd;
4889 
4890 	hlen = TCP_OFF(tcp) << 2;
4891 	if (hlen > sizeof(*tcp)) {
4892 		cp = (u_char *)tcp + sizeof(*tcp);
4893 		ep = (u_char *)tcp + hlen;
4894 
4895 		while (cp < ep) {
4896 			opt = cp[0];
4897 			if (opt == TCPOPT_EOL)
4898 				break;
4899 			else if (opt == TCPOPT_NOP) {
4900 				cp++;
4901 				continue;
4902 			}
4903 
4904 			if (cp + 1 >= ep)
4905 				break;
4906 			advance = cp[1];
4907 			if ((cp + advance > ep) || (advance <= 0))
4908 				break;
4909 			switch (opt)
4910 			{
4911 			case TCPOPT_MAXSEG:
4912 				if (advance != 4)
4913 					break;
4914 				mss = cp[2] * 256 + cp[3];
4915 				if (mss > maxmss) {
4916 					cp[2] = maxmss / 256;
4917 					cp[3] = maxmss & 0xff;
4918 					CALC_SUMD(mss, maxmss, sumd);
4919 					fix_outcksum(csump, sumd);
4920 				}
4921 				break;
4922 			default:
4923 				/* ignore unknown options */
4924 				break;
4925 			}
4926 
4927 			cp += advance;
4928 		}
4929 	}
4930 }
4931 
4932 
4933 /* ------------------------------------------------------------------------ */
4934 /* Function:    fr_setnatqueue                                              */
4935 /* Returns:     Nil                                                         */
4936 /* Parameters:  nat(I)- pointer to NAT structure                            */
4937 /*              rev(I) - forward(0) or reverse(1) direction                 */
4938 /* Locks:       ipf_nat (read or write)                                     */
4939 /*                                                                          */
4940 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
4941 /* determining which queue it should be placed on.                          */
4942 /* ------------------------------------------------------------------------ */
4943 void fr_setnatqueue(nat, rev, ifs)
4944 nat_t *nat;
4945 int rev;
4946 ipf_stack_t *ifs;
4947 {
4948 	ipftq_t *oifq, *nifq;
4949 
4950 	if (nat->nat_ptr != NULL)
4951 		nifq = nat->nat_ptr->in_tqehead[rev];
4952 	else
4953 		nifq = NULL;
4954 
4955 	if (nifq == NULL) {
4956 		switch (nat->nat_p)
4957 		{
4958 		case IPPROTO_UDP :
4959 			nifq = &ifs->ifs_nat_udptq;
4960 			break;
4961 		case IPPROTO_ICMP :
4962 			nifq = &ifs->ifs_nat_icmptq;
4963 			break;
4964 		case IPPROTO_TCP :
4965 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
4966 			break;
4967 		default :
4968 			nifq = &ifs->ifs_nat_iptq;
4969 			break;
4970 		}
4971 	}
4972 
4973 	oifq = nat->nat_tqe.tqe_ifq;
4974 	/*
4975 	 * If it's currently on a timeout queue, move it from one queue to
4976 	 * another, else put it on the end of the newly determined queue.
4977 	 */
4978 	if (oifq != NULL)
4979 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
4980 	else
4981 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
4982 	return;
4983 }
4984 
4985 /* Function:    nat_getnext                                                 */
4986 /* Returns:     int - 0 == ok, else error                                   */
4987 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
4988 /*              itp(I) - pointer to ipfgeniter_t structure                  */
4989 /*                                                                          */
4990 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
4991 /* copy it out to the storage space pointed to by itp_data.  The next item  */
4992 /* in the list to look at is put back in the ipftoken struture.             */
4993 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
4994 /* ipf_freetoken will call a deref function for us and we dont want to call */
4995 /* that twice (second time would be in the second switch statement below.   */
4996 /* ------------------------------------------------------------------------ */
4997 static int nat_getnext(t, itp, ifs)
4998 ipftoken_t *t;
4999 ipfgeniter_t *itp;
5000 ipf_stack_t *ifs;
5001 {
5002 	hostmap_t *hm, *nexthm = NULL, zerohm;
5003 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5004 	nat_t *nat, *nextnat = NULL, zeronat;
5005 	int error = 0;
5006 
5007 	READ_ENTER(&ifs->ifs_ipf_nat);
5008 	switch (itp->igi_type)
5009 	{
5010 	case IPFGENITER_HOSTMAP :
5011 		hm = t->ipt_data;
5012 		if (hm == NULL) {
5013 			nexthm = ifs->ifs_ipf_hm_maplist;
5014 		} else {
5015 			nexthm = hm->hm_hnext;
5016 		}
5017 		if (nexthm != NULL) {
5018 			if (nexthm->hm_hnext == NULL) {
5019 				t->ipt_alive = 0;
5020 				ipf_unlinktoken(t, ifs);
5021 				KFREE(t);
5022 			} else {
5023 				/*MUTEX_ENTER(&nexthm->hm_lock);*/
5024 				nexthm->hm_ref++;
5025 				/*MUTEX_EXIT(&nextipnat->hm_lock);*/
5026 			}
5027 
5028 		} else {
5029 			bzero(&zerohm, sizeof(zerohm));
5030 			nexthm = &zerohm;
5031 			ipf_freetoken(t, ifs);
5032 		}
5033 		break;
5034 
5035 	case IPFGENITER_IPNAT :
5036 		ipn = t->ipt_data;
5037 		if (ipn == NULL) {
5038 			nextipnat = ifs->ifs_nat_list;
5039 		} else {
5040 			nextipnat = ipn->in_next;
5041 		}
5042 		if (nextipnat != NULL) {
5043 			if (nextipnat->in_next == NULL) {
5044 				t->ipt_alive = 0;
5045 				ipf_unlinktoken(t, ifs);
5046 				KFREE(t);
5047 			} else {
5048 				/* MUTEX_ENTER(&nextipnat->in_lock); */
5049 				nextipnat->in_use++;
5050 				/* MUTEX_EXIT(&nextipnat->in_lock); */
5051 			}
5052 		} else {
5053 			bzero(&zeroipn, sizeof(zeroipn));
5054 			nextipnat = &zeroipn;
5055 			ipf_freetoken(t, ifs);
5056 		}
5057 		break;
5058 
5059 	case IPFGENITER_NAT :
5060 		nat = t->ipt_data;
5061 		if (nat == NULL) {
5062 			nextnat = ifs->ifs_nat_instances;
5063 		} else {
5064 			nextnat = nat->nat_next;
5065 		}
5066 		if (nextnat != NULL) {
5067 			if (nextnat->nat_next == NULL) {
5068 				t->ipt_alive = 0;
5069 				ipf_unlinktoken(t, ifs);
5070 				KFREE(t);
5071 			} else {
5072 				MUTEX_ENTER(&nextnat->nat_lock);
5073 				nextnat->nat_ref++;
5074 				MUTEX_EXIT(&nextnat->nat_lock);
5075 			}
5076 		} else {
5077 			bzero(&zeronat, sizeof(zeronat));
5078 			nextnat = &zeronat;
5079 			ipf_freetoken(t, ifs);
5080 		}
5081 		break;
5082 	}
5083 
5084 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5085 
5086 	switch (itp->igi_type)
5087 	{
5088 	case IPFGENITER_HOSTMAP :
5089 		if (hm != NULL) {
5090 			WRITE_ENTER(&ifs->ifs_ipf_nat);
5091 			fr_hostmapderef(&hm);
5092 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5093 		}
5094 		if (nexthm->hm_hnext != NULL)
5095 			t->ipt_data = nexthm;
5096 		error = COPYOUT(nexthm, itp->igi_data, sizeof(*nexthm));
5097 		if (error != 0)
5098 			error = EFAULT;
5099 		break;
5100 
5101 	case IPFGENITER_IPNAT :
5102 		if (ipn != NULL)
5103 			fr_ipnatderef(&ipn, ifs);
5104 		if (nextipnat->in_next != NULL)
5105 			t->ipt_data = nextipnat;
5106 		error = COPYOUT(nextipnat, itp->igi_data, sizeof(*nextipnat));
5107 		if (error != 0)
5108 			error = EFAULT;
5109 		break;
5110 
5111 	case IPFGENITER_NAT :
5112 		if (nat != NULL)
5113 			fr_natderef(&nat, ifs);
5114 		if (nextnat->nat_next != NULL)
5115 			t->ipt_data = nextnat;
5116 		error = COPYOUT(nextnat, itp->igi_data, sizeof(*nextnat));
5117 		if (error != 0)
5118 			error = EFAULT;
5119 		break;
5120 	}
5121 
5122 	return error;
5123 }
5124 
5125 
5126 /* ------------------------------------------------------------------------ */
5127 /* Function:    nat_iterator                                                */
5128 /* Returns:     int - 0 == ok, else error                                   */
5129 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5130 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5131 /*                                                                          */
5132 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5133 /* generic structure to iterate through a list.  There are three different  */
5134 /* linked lists of NAT related information to go through: NAT rules, active */
5135 /* NAT mappings and the NAT fragment cache.                                 */
5136 /* ------------------------------------------------------------------------ */
5137 static int nat_iterator(token, itp, ifs)
5138 ipftoken_t *token;
5139 ipfgeniter_t *itp;
5140 ipf_stack_t *ifs;
5141 {
5142 	int error;
5143 
5144 	if (itp->igi_data == NULL)
5145 		return EFAULT;
5146 
5147 	token->ipt_subtype = itp->igi_type;
5148 
5149 	switch (itp->igi_type)
5150 	{
5151 	case IPFGENITER_HOSTMAP :
5152 	case IPFGENITER_IPNAT :
5153 	case IPFGENITER_NAT :
5154 		error = nat_getnext(token, itp, ifs);
5155 		break;
5156 	case IPFGENITER_NATFRAG :
5157 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5158 				    &ifs->ifs_ipfr_nattail,
5159 				    &ifs->ifs_ipf_natfrag, ifs);
5160 		break;
5161 	default :
5162 		error = EINVAL;
5163 		break;
5164 	}
5165 
5166 	return error;
5167 }
5168