xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision bb0ade0978a02d3fe0b0165cd4725fdcb593fbfb)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 static	int	nat_flushtable __P((ipf_stack_t *));
144 static	int	nat_clearlist __P((ipf_stack_t *));
145 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
146 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
148 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
149 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
150 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
151 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
152 static	int	nat_match __P((fr_info_t *, ipnat_t *));
153 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
154 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
155 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
156 				    struct in_addr, struct in_addr, u_32_t,
157 				    ipf_stack_t *));
158 static	INLINE	int nat_icmpquerytype4 __P((int));
159 static	int	nat_ruleaddrinit __P((ipnat_t *));
160 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
161 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
162 static	INLINE	int nat_icmperrortype4 __P((int));
163 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
164 				      tcphdr_t *, nat_t **, int));
165 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
166 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
167 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
168 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
169 static	int	nat_extraflush __P((int, ipf_stack_t *));
170 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
171 static	int	nat_flushclosing __P((int, ipf_stack_t *));
172 
173 
174 /*
175  * Below we declare a list of constants used only in the nat_extraflush()
176  * routine.  We are placing it here, instead of in nat_extraflush() itself,
177  * because we want to make it visible to tools such as mdb, nm etc., so the
178  * values can easily be altered during debugging.
179  */
180 static	const int	idletime_tab[] = {
181 	IPF_TTLVAL(30),		/* 30 seconds */
182 	IPF_TTLVAL(1800),	/* 30 minutes */
183 	IPF_TTLVAL(43200),	/* 12 hours */
184 	IPF_TTLVAL(345600),	/* 4 days */
185 };
186 
187 #define NAT_HAS_L4_CHANGED(n)	\
188  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
189  	(n)->nat_inport != (n)->nat_outport)
190 
191 /* ------------------------------------------------------------------------ */
192 /* Function:    fr_natinit                                                  */
193 /* Returns:     int - 0 == success, -1 == failure                           */
194 /* Parameters:  Nil                                                         */
195 /*                                                                          */
196 /* Initialise all of the NAT locks, tables and other structures.            */
197 /* ------------------------------------------------------------------------ */
198 int fr_natinit(ifs)
199 ipf_stack_t *ifs;
200 {
201 	int i;
202 
203 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
204 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
205 	if (ifs->ifs_nat_table[0] != NULL)
206 		bzero((char *)ifs->ifs_nat_table[0],
207 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
208 	else
209 		return -1;
210 
211 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
212 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
213 	if (ifs->ifs_nat_table[1] != NULL)
214 		bzero((char *)ifs->ifs_nat_table[1],
215 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
216 	else
217 		return -2;
218 
219 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
220 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
221 	if (ifs->ifs_nat_rules != NULL)
222 		bzero((char *)ifs->ifs_nat_rules,
223 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
224 	else
225 		return -3;
226 
227 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
228 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
229 	if (ifs->ifs_rdr_rules != NULL)
230 		bzero((char *)ifs->ifs_rdr_rules,
231 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
232 	else
233 		return -4;
234 
235 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
236 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
237 	if (ifs->ifs_maptable != NULL)
238 		bzero((char *)ifs->ifs_maptable,
239 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
240 	else
241 		return -5;
242 
243 	ifs->ifs_ipf_hm_maplist = NULL;
244 
245 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
246 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
247 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
248 		return -1;
249 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
250 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
251 
252 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
253 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
254 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
255 		return -1;
256 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
257 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
258 
259 	if (ifs->ifs_fr_nat_maxbucket == 0) {
260 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
261 			ifs->ifs_fr_nat_maxbucket++;
262 		ifs->ifs_fr_nat_maxbucket *= 2;
263 	}
264 
265 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
266 	/*
267 	 * Increase this because we may have "keep state" following this too
268 	 * and packet storms can occur if this is removed too quickly.
269 	 */
270 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
271 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
272 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
273 	ifs->ifs_nat_udptq.ifq_ref = 1;
274 	ifs->ifs_nat_udptq.ifq_head = NULL;
275 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
276 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
277 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
278 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
279 	ifs->ifs_nat_icmptq.ifq_ref = 1;
280 	ifs->ifs_nat_icmptq.ifq_head = NULL;
281 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
282 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
283 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
284 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
285 	ifs->ifs_nat_iptq.ifq_ref = 1;
286 	ifs->ifs_nat_iptq.ifq_head = NULL;
287 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
288 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
289 	ifs->ifs_nat_iptq.ifq_next = NULL;
290 
291 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
292 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
293 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
294 #ifdef LARGE_NAT
295 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
296 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
297 #endif
298 	}
299 
300 	/*
301 	 * Increase this because we may have "keep state" following
302 	 * this too and packet storms can occur if this is removed
303 	 * too quickly.
304 	 */
305 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
306 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
307 
308 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
309 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
310 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
311 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
312 
313 	ifs->ifs_fr_nat_init = 1;
314 
315 	return 0;
316 }
317 
318 
319 /* ------------------------------------------------------------------------ */
320 /* Function:    nat_addrdr                                                  */
321 /* Returns:     Nil                                                         */
322 /* Parameters:  n(I) - pointer to NAT rule to add                           */
323 /*                                                                          */
324 /* Adds a redirect rule to the hash table of redirect rules and the list of */
325 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
326 /* use by redirect rules.                                                   */
327 /* ------------------------------------------------------------------------ */
328 static void nat_addrdr(n, ifs)
329 ipnat_t *n;
330 ipf_stack_t *ifs;
331 {
332 	ipnat_t **np;
333 	u_32_t j;
334 	u_int hv;
335 	int k;
336 
337 	k = count4bits(n->in_outmsk);
338 	if ((k >= 0) && (k != 32))
339 		ifs->ifs_rdr_masks |= 1 << k;
340 	j = (n->in_outip & n->in_outmsk);
341 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
342 	np = ifs->ifs_rdr_rules + hv;
343 	while (*np != NULL)
344 		np = &(*np)->in_rnext;
345 	n->in_rnext = NULL;
346 	n->in_prnext = np;
347 	n->in_hv = hv;
348 	*np = n;
349 }
350 
351 
352 /* ------------------------------------------------------------------------ */
353 /* Function:    nat_addnat                                                  */
354 /* Returns:     Nil                                                         */
355 /* Parameters:  n(I) - pointer to NAT rule to add                           */
356 /*                                                                          */
357 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
358 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
359 /* redirect rules.                                                          */
360 /* ------------------------------------------------------------------------ */
361 static void nat_addnat(n, ifs)
362 ipnat_t *n;
363 ipf_stack_t *ifs;
364 {
365 	ipnat_t **np;
366 	u_32_t j;
367 	u_int hv;
368 	int k;
369 
370 	k = count4bits(n->in_inmsk);
371 	if ((k >= 0) && (k != 32))
372 		ifs->ifs_nat_masks |= 1 << k;
373 	j = (n->in_inip & n->in_inmsk);
374 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
375 	np = ifs->ifs_nat_rules + hv;
376 	while (*np != NULL)
377 		np = &(*np)->in_mnext;
378 	n->in_mnext = NULL;
379 	n->in_pmnext = np;
380 	n->in_hv = hv;
381 	*np = n;
382 }
383 
384 
385 /* ------------------------------------------------------------------------ */
386 /* Function:    nat_delrdr                                                  */
387 /* Returns:     Nil                                                         */
388 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
389 /*                                                                          */
390 /* Removes a redirect rule from the hash table of redirect rules.           */
391 /* ------------------------------------------------------------------------ */
392 void nat_delrdr(n)
393 ipnat_t *n;
394 {
395 	if (n->in_rnext)
396 		n->in_rnext->in_prnext = n->in_prnext;
397 	*n->in_prnext = n->in_rnext;
398 }
399 
400 
401 /* ------------------------------------------------------------------------ */
402 /* Function:    nat_delnat                                                  */
403 /* Returns:     Nil                                                         */
404 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
405 /*                                                                          */
406 /* Removes a NAT map rule from the hash table of NAT map rules.             */
407 /* ------------------------------------------------------------------------ */
408 void nat_delnat(n)
409 ipnat_t *n;
410 {
411 	if (n->in_mnext != NULL)
412 		n->in_mnext->in_pmnext = n->in_pmnext;
413 	*n->in_pmnext = n->in_mnext;
414 }
415 
416 
417 /* ------------------------------------------------------------------------ */
418 /* Function:    nat_hostmap                                                 */
419 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
420 /*                                else a pointer to the hostmapping to use  */
421 /* Parameters:  np(I)   - pointer to NAT rule                               */
422 /*              real(I) - real IP address                                   */
423 /*              map(I)  - mapped IP address                                 */
424 /*              port(I) - destination port number                           */
425 /* Write Locks: ipf_nat                                                     */
426 /*                                                                          */
427 /* Check if an ip address has already been allocated for a given mapping    */
428 /* that is not doing port based translation.  If is not yet allocated, then */
429 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
430 /* ------------------------------------------------------------------------ */
431 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
432 ipnat_t *np;
433 struct in_addr src;
434 struct in_addr dst;
435 struct in_addr map;
436 u_32_t port;
437 ipf_stack_t *ifs;
438 {
439 	hostmap_t *hm;
440 	u_int hv;
441 
442 	hv = (src.s_addr ^ dst.s_addr);
443 	hv += src.s_addr;
444 	hv += dst.s_addr;
445 	hv %= HOSTMAP_SIZE;
446 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
447 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
448 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
449 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
450 		    ((port == 0) || (port == hm->hm_port))) {
451 			hm->hm_ref++;
452 			return hm;
453 		}
454 
455 	if (np == NULL)
456 		return NULL;
457 
458 	KMALLOC(hm, hostmap_t *);
459 	if (hm) {
460 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
461 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
462 		if (ifs->ifs_ipf_hm_maplist != NULL)
463 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
464 		ifs->ifs_ipf_hm_maplist = hm;
465 
466 		hm->hm_next = ifs->ifs_maptable[hv];
467 		hm->hm_pnext = ifs->ifs_maptable + hv;
468 		if (ifs->ifs_maptable[hv] != NULL)
469 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
470 		ifs->ifs_maptable[hv] = hm;
471 		hm->hm_ipnat = np;
472 		hm->hm_srcip = src;
473 		hm->hm_dstip = dst;
474 		hm->hm_mapip = map;
475 		hm->hm_ref = 1;
476 		hm->hm_port = port;
477 		hm->hm_v = 4;
478 	}
479 	return hm;
480 }
481 
482 
483 /* ------------------------------------------------------------------------ */
484 /* Function:    fr_hostmapdel                                              */
485 /* Returns:     Nil                                                         */
486 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
487 /* Write Locks: ipf_nat                                                     */
488 /*                                                                          */
489 /* Decrement the references to this hostmap structure by one.  If this      */
490 /* reaches zero then remove it and free it.                                 */
491 /* ------------------------------------------------------------------------ */
492 void fr_hostmapdel(hmp)
493 struct hostmap **hmp;
494 {
495 	struct hostmap *hm;
496 
497 	hm = *hmp;
498 	*hmp = NULL;
499 
500 	hm->hm_ref--;
501 	if (hm->hm_ref == 0) {
502 		if (hm->hm_next)
503 			hm->hm_next->hm_pnext = hm->hm_pnext;
504 		*hm->hm_pnext = hm->hm_next;
505 		if (hm->hm_hnext)
506 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
507 		*hm->hm_phnext = hm->hm_hnext;
508 		KFREE(hm);
509 	}
510 }
511 
512 
513 /* ------------------------------------------------------------------------ */
514 /* Function:    fix_outcksum                                                */
515 /* Returns:     Nil                                                         */
516 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
517 /*              n((I)  - amount to adjust checksum by                       */
518 /*                                                                          */
519 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
520 /* ------------------------------------------------------------------------ */
521 void fix_outcksum(sp, n)
522 u_short *sp;
523 u_32_t n;
524 {
525 	u_short sumshort;
526 	u_32_t sum1;
527 
528 	if (n == 0)
529 		return;
530 
531 	sum1 = (~ntohs(*sp)) & 0xffff;
532 	sum1 += (n);
533 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
534 	/* Again */
535 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
536 	sumshort = ~(u_short)sum1;
537 	*(sp) = htons(sumshort);
538 }
539 
540 
541 /* ------------------------------------------------------------------------ */
542 /* Function:    fix_incksum                                                 */
543 /* Returns:     Nil                                                         */
544 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
545 /*              n((I)  - amount to adjust checksum by                       */
546 /*                                                                          */
547 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
548 /* ------------------------------------------------------------------------ */
549 void fix_incksum(sp, n)
550 u_short *sp;
551 u_32_t n;
552 {
553 	u_short sumshort;
554 	u_32_t sum1;
555 
556 	if (n == 0)
557 		return;
558 
559 	sum1 = (~ntohs(*sp)) & 0xffff;
560 	sum1 += ~(n) & 0xffff;
561 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
562 	/* Again */
563 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
564 	sumshort = ~(u_short)sum1;
565 	*(sp) = htons(sumshort);
566 }
567 
568 
569 /* ------------------------------------------------------------------------ */
570 /* Function:    fix_datacksum                                               */
571 /* Returns:     Nil                                                         */
572 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
573 /*              n((I)  - amount to adjust checksum by                       */
574 /*                                                                          */
575 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
576 /* data section of an IP packet.                                            */
577 /*                                                                          */
578 /* The only situation in which you need to do this is when NAT'ing an       */
579 /* ICMP error message. Such a message, contains in its body the IP header   */
580 /* of the original IP packet, that causes the error.                        */
581 /*                                                                          */
582 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
583 /* kernel the data section of the ICMP error is just data, and no special   */
584 /* processing like hardware cksum or ntohs processing have been done by the */
585 /* kernel on the data section.                                              */
586 /* ------------------------------------------------------------------------ */
587 void fix_datacksum(sp, n)
588 u_short *sp;
589 u_32_t n;
590 {
591 	u_short sumshort;
592 	u_32_t sum1;
593 
594 	if (n == 0)
595 		return;
596 
597 	sum1 = (~ntohs(*sp)) & 0xffff;
598 	sum1 += (n);
599 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
600 	/* Again */
601 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
602 	sumshort = ~(u_short)sum1;
603 	*(sp) = htons(sumshort);
604 }
605 
606 
607 /* ------------------------------------------------------------------------ */
608 /* Function:    fr_nat_ioctl                                                */
609 /* Returns:     int - 0 == success, != 0 == failure                         */
610 /* Parameters:  data(I) - pointer to ioctl data                             */
611 /*              cmd(I)  - ioctl command integer                             */
612 /*              mode(I) - file mode bits used with open                     */
613 /*                                                                          */
614 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
615 /* ------------------------------------------------------------------------ */
616 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
617 ioctlcmd_t cmd;
618 caddr_t data;
619 int mode, uid;
620 void *ctx;
621 ipf_stack_t *ifs;
622 {
623 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
624 	int error = 0, ret, arg, getlock;
625 	ipnat_t natd;
626 
627 #if (BSD >= 199306) && defined(_KERNEL)
628 	if ((securelevel >= 2) && (mode & FWRITE))
629 		return EPERM;
630 #endif
631 
632 #if defined(__osf__) && defined(_KERNEL)
633 	getlock = 0;
634 #else
635 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
636 #endif
637 
638 	nat = NULL;     /* XXX gcc -Wuninitialized */
639 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
640 		KMALLOC(nt, ipnat_t *);
641 	} else {
642 		nt = NULL;
643 	}
644 
645 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
646 		if (mode & NAT_SYSSPACE) {
647 			bcopy(data, (char *)&natd, sizeof(natd));
648 			error = 0;
649 		} else {
650 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
651 		}
652 
653 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
654 		BCOPYIN(data, &arg, sizeof(arg));
655 	}
656 
657 	if (error != 0)
658 		goto done;
659 
660 	/*
661 	 * For add/delete, look to see if the NAT entry is already present
662 	 */
663 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
664 		nat = &natd;
665 		if (nat->in_v == 0)	/* For backward compat. */
666 			nat->in_v = 4;
667 		nat->in_flags &= IPN_USERFLAGS;
668 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
669 			if ((nat->in_flags & IPN_SPLIT) == 0)
670 				nat->in_inip &= nat->in_inmsk;
671 			if ((nat->in_flags & IPN_IPRANGE) == 0)
672 				nat->in_outip &= nat->in_outmsk;
673 		}
674 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
675 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
676 		     np = &n->in_next)
677 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
678 			    IPN_CMPSIZ) == 0) {
679 				if (nat->in_redir == NAT_REDIRECT &&
680 				    nat->in_pnext != n->in_pnext)
681 					continue;
682 				break;
683 			}
684 	}
685 
686 	switch (cmd)
687 	{
688 	case SIOCGENITER :
689 	    {
690 		ipfgeniter_t iter;
691 		ipftoken_t *token;
692 
693 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
694 		if (error != 0)
695 			break;
696 
697 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
698 		if (token != NULL)
699 			error  = nat_iterator(token, &iter, ifs);
700 		else
701 			error = ESRCH;
702 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
703 		break;
704 	    }
705 #ifdef  IPFILTER_LOG
706 	case SIOCIPFFB :
707 	{
708 		int tmp;
709 
710 		if (!(mode & FWRITE))
711 			error = EPERM;
712 		else {
713 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
714 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
715 		}
716 		break;
717 	}
718 	case SIOCSETLG :
719 		if (!(mode & FWRITE))
720 			error = EPERM;
721 		else {
722 			BCOPYIN((char *)data,
723 				       (char *)&ifs->ifs_nat_logging,
724 				sizeof(ifs->ifs_nat_logging));
725 		}
726 		break;
727 	case SIOCGETLG :
728 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
729 			sizeof(ifs->ifs_nat_logging));
730 		break;
731 	case FIONREAD :
732 		arg = ifs->ifs_iplused[IPL_LOGNAT];
733 		BCOPYOUT(&arg, data, sizeof(arg));
734 		break;
735 #endif
736 	case SIOCADNAT :
737 		if (!(mode & FWRITE)) {
738 			error = EPERM;
739 		} else if (n != NULL) {
740 			error = EEXIST;
741 		} else if (nt == NULL) {
742 			error = ENOMEM;
743 		}
744 		if (error != 0) {
745 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
746 			break;
747 		}
748 		bcopy((char *)nat, (char *)nt, sizeof(*n));
749 		error = nat_siocaddnat(nt, np, getlock, ifs);
750 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
751 		if (error == 0)
752 			nt = NULL;
753 		break;
754 	case SIOCRMNAT :
755 		if (!(mode & FWRITE)) {
756 			error = EPERM;
757 			n = NULL;
758 		} else if (n == NULL) {
759 			error = ESRCH;
760 		}
761 
762 		if (error != 0) {
763 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
764 			break;
765 		}
766 		nat_siocdelnat(n, np, getlock, ifs);
767 
768 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
769 		n = NULL;
770 		break;
771 	case SIOCGNATS :
772 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
773 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
774 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
775 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
776 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
777 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
778 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
779 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
780 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
781 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
782 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
783 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
784 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
785 		break;
786 	case SIOCGNATL :
787 	    {
788 		natlookup_t nl;
789 
790 		if (getlock) {
791 			READ_ENTER(&ifs->ifs_ipf_nat);
792 		}
793 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
794 		if (nl.nl_v != 6)
795 			nl.nl_v = 4;
796 		if (error == 0) {
797 			void *ptr;
798 
799 			switch (nl.nl_v)
800 			{
801 			case 4:
802 				ptr = nat_lookupredir(&nl, ifs);
803 				break;
804 #ifdef	USE_INET6
805 			case 6:
806 				ptr = nat6_lookupredir(&nl, ifs);
807 				break;
808 #endif
809 			default:
810 				ptr = NULL;
811 				break;
812 			}
813 
814 			if (ptr != NULL) {
815 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
816 			} else {
817 				error = ESRCH;
818 			}
819 		}
820 		if (getlock) {
821 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
822 		}
823 		break;
824 	    }
825 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
826 		if (!(mode & FWRITE)) {
827 			error = EPERM;
828 			break;
829 		}
830 		if (getlock) {
831 			WRITE_ENTER(&ifs->ifs_ipf_nat);
832 		}
833 		error = 0;
834 		if (arg == 0)
835 			ret = nat_flushtable(ifs);
836 		else if (arg == 1)
837 			ret = nat_clearlist(ifs);
838 		else if (arg >= 2 && arg <= 4)
839 			ret = nat_extraflush(arg - 2, ifs);
840 		else
841 			error = EINVAL;
842 		if (getlock) {
843 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
844 		}
845 		if (error == 0) {
846 			BCOPYOUT(&ret, data, sizeof(ret));
847 		}
848 		break;
849 	case SIOCPROXY :
850 		error = appr_ioctl(data, cmd, mode, ifs);
851 		break;
852 	case SIOCSTLCK :
853 		if (!(mode & FWRITE)) {
854 			error = EPERM;
855 		} else {
856 			fr_lock(data, &ifs->ifs_fr_nat_lock);
857 		}
858 		break;
859 	case SIOCSTPUT :
860 		if ((mode & FWRITE) != 0) {
861 			error = fr_natputent(data, getlock, ifs);
862 		} else {
863 			error = EACCES;
864 		}
865 		break;
866 	case SIOCSTGSZ :
867 		if (ifs->ifs_fr_nat_lock) {
868 			if (getlock) {
869 				READ_ENTER(&ifs->ifs_ipf_nat);
870 			}
871 			error = fr_natgetsz(data, ifs);
872 			if (getlock) {
873 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
874 			}
875 		} else
876 			error = EACCES;
877 		break;
878 	case SIOCSTGET :
879 		if (ifs->ifs_fr_nat_lock) {
880 			if (getlock) {
881 				READ_ENTER(&ifs->ifs_ipf_nat);
882 			}
883 			error = fr_natgetent(data, ifs);
884 			if (getlock) {
885 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
886 			}
887 		} else
888 			error = EACCES;
889 		break;
890 	case SIOCIPFDELTOK :
891 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
892 		error = ipf_deltoken(arg, uid, ctx, ifs);
893 		break;
894 	default :
895 		error = EINVAL;
896 		break;
897 	}
898 done:
899 	if (nt)
900 		KFREE(nt);
901 	return error;
902 }
903 
904 
905 /* ------------------------------------------------------------------------ */
906 /* Function:    nat_siocaddnat                                              */
907 /* Returns:     int - 0 == success, != 0 == failure                         */
908 /* Parameters:  n(I)       - pointer to new NAT rule                        */
909 /*              np(I)      - pointer to where to insert new NAT rule        */
910 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
911 /* Mutex Locks: ipf_natio                                                   */
912 /*                                                                          */
913 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
914 /* from information passed to the kernel, then add it  to the appropriate   */
915 /* NAT rule table(s).                                                       */
916 /* ------------------------------------------------------------------------ */
917 static int nat_siocaddnat(n, np, getlock, ifs)
918 ipnat_t *n, **np;
919 int getlock;
920 ipf_stack_t *ifs;
921 {
922 	int error = 0, i, j;
923 
924 	if (nat_resolverule(n, ifs) != 0)
925 		return ENOENT;
926 
927 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
928 		return EINVAL;
929 
930 	n->in_use = 0;
931 	if (n->in_redir & NAT_MAPBLK)
932 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
933 	else if (n->in_flags & IPN_AUTOPORTMAP)
934 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
935 	else if (n->in_flags & IPN_IPRANGE)
936 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
937 	else if (n->in_flags & IPN_SPLIT)
938 		n->in_space = 2;
939 	else if (n->in_outmsk != 0)
940 		n->in_space = ~ntohl(n->in_outmsk);
941 	else
942 		n->in_space = 1;
943 
944 	/*
945 	 * Calculate the number of valid IP addresses in the output
946 	 * mapping range.  In all cases, the range is inclusive of
947 	 * the start and ending IP addresses.
948 	 * If to a CIDR address, lose 2: broadcast + network address
949 	 *                               (so subtract 1)
950 	 * If to a range, add one.
951 	 * If to a single IP address, set to 1.
952 	 */
953 	if (n->in_space) {
954 		if ((n->in_flags & IPN_IPRANGE) != 0)
955 			n->in_space += 1;
956 		else
957 			n->in_space -= 1;
958 	} else
959 		n->in_space = 1;
960 
961 #ifdef	USE_INET6
962 	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
963 	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
964 		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
965 	else if (n->in_v == 6 &&
966 	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
967 		n->in_next6 = n->in_in[0];
968 	else if (n->in_v == 6)
969 		n->in_next6 = n->in_out[0];
970 	else
971 #endif
972 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
973 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
974 		n->in_nip = ntohl(n->in_outip) + 1;
975 	else if ((n->in_flags & IPN_SPLIT) &&
976 		 (n->in_redir & NAT_REDIRECT))
977 		n->in_nip = ntohl(n->in_inip);
978 	else
979 		n->in_nip = ntohl(n->in_outip);
980 
981 	if (n->in_redir & NAT_MAP) {
982 		n->in_pnext = ntohs(n->in_pmin);
983 		/*
984 		 * Multiply by the number of ports made available.
985 		 */
986 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
987 			n->in_space *= (ntohs(n->in_pmax) -
988 					ntohs(n->in_pmin) + 1);
989 			/*
990 			 * Because two different sources can map to
991 			 * different destinations but use the same
992 			 * local IP#/port #.
993 			 * If the result is smaller than in_space, then
994 			 * we may have wrapped around 32bits.
995 			 */
996 			i = n->in_inmsk;
997 			if ((i != 0) && (i != 0xffffffff)) {
998 				j = n->in_space * (~ntohl(i) + 1);
999 				if (j >= n->in_space)
1000 					n->in_space = j;
1001 				else
1002 					n->in_space = 0xffffffff;
1003 			}
1004 		}
1005 		/*
1006 		 * If no protocol is specified, multiple by 256 to allow for
1007 		 * at least one IP:IP mapping per protocol.
1008 		 */
1009 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1010 				j = n->in_space * 256;
1011 				if (j >= n->in_space)
1012 					n->in_space = j;
1013 				else
1014 					n->in_space = 0xffffffff;
1015 		}
1016 	}
1017 
1018 	/* Otherwise, these fields are preset */
1019 
1020 	if (getlock) {
1021 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1022 	}
1023 	n->in_next = NULL;
1024 	*np = n;
1025 
1026 	if (n->in_age[0] != 0)
1027 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1028 						  n->in_age[0], ifs);
1029 
1030 	if (n->in_age[1] != 0)
1031 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1032 						  n->in_age[1], ifs);
1033 
1034 	if (n->in_redir & NAT_REDIRECT) {
1035 		n->in_flags &= ~IPN_NOTDST;
1036 		switch (n->in_v)
1037 		{
1038 		case 4 :
1039 			nat_addrdr(n, ifs);
1040 			break;
1041 #ifdef	USE_INET6
1042 		case 6 :
1043 			nat6_addrdr(n, ifs);
1044 			break;
1045 #endif
1046 		default :
1047 			break;
1048 		}
1049 	}
1050 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1051 		n->in_flags &= ~IPN_NOTSRC;
1052 		switch (n->in_v)
1053 		{
1054 		case 4 :
1055 			nat_addnat(n, ifs);
1056 			break;
1057 #ifdef	USE_INET6
1058 		case 6 :
1059 			nat6_addnat(n, ifs);
1060 			break;
1061 #endif
1062 		default :
1063 			break;
1064 		}
1065 	}
1066 	n = NULL;
1067 	ifs->ifs_nat_stats.ns_rules++;
1068 	if (getlock) {
1069 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1070 	}
1071 
1072 	return error;
1073 }
1074 
1075 
1076 /* ------------------------------------------------------------------------ */
1077 /* Function:    nat_resolvrule                                              */
1078 /* Returns:     int - 0 == success, -1 == failure                           */
1079 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1080 /*                                                                          */
1081 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1082 /* any specified interfaces and proxy labels, and determines whether or not */
1083 /* all proxy labels are correctly specified.				    */
1084 /*									    */
1085 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1086 /* ------------------------------------------------------------------------ */
1087 static int nat_resolverule(n, ifs)
1088 ipnat_t *n;
1089 ipf_stack_t *ifs;
1090 {
1091 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1092 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1093 
1094 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1095 	if (n->in_ifnames[1][0] == '\0') {
1096 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1097 		n->in_ifps[1] = n->in_ifps[0];
1098 	} else {
1099 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1100 	}
1101 
1102 	if (n->in_plabel[0] != '\0') {
1103 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1104 		if (n->in_apr == NULL)
1105 			return -1;
1106 	}
1107 	return 0;
1108 }
1109 
1110 
1111 /* ------------------------------------------------------------------------ */
1112 /* Function:    nat_siocdelnat                                              */
1113 /* Returns:     int - 0 == success, != 0 == failure                         */
1114 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1115 /*              np(I)      - pointer to where to insert new NAT rule        */
1116 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1117 /* Mutex Locks: ipf_natio                                                   */
1118 /*                                                                          */
1119 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1120 /* from information passed to the kernel, then add it  to the appropriate   */
1121 /* NAT rule table(s).                                                       */
1122 /* ------------------------------------------------------------------------ */
1123 static void nat_siocdelnat(n, np, getlock, ifs)
1124 ipnat_t *n, **np;
1125 int getlock;
1126 ipf_stack_t *ifs;
1127 {
1128 	int i;
1129 
1130 	if (getlock) {
1131 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1132 	}
1133 	if (n->in_redir & NAT_REDIRECT)
1134 		nat_delrdr(n);
1135 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1136 		nat_delnat(n);
1137 	if (ifs->ifs_nat_list == NULL) {
1138 		ifs->ifs_nat_masks = 0;
1139 		ifs->ifs_rdr_masks = 0;
1140 		for (i = 0; i < 4; i++) {
1141 			ifs->ifs_nat6_masks[i] = 0;
1142 			ifs->ifs_rdr6_masks[i] = 0;
1143 		}
1144 	}
1145 
1146 	if (n->in_tqehead[0] != NULL) {
1147 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1148 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1149 		}
1150 	}
1151 
1152 	if (n->in_tqehead[1] != NULL) {
1153 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1154 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1155 		}
1156 	}
1157 
1158 	*np = n->in_next;
1159 
1160 	if (n->in_use == 0) {
1161 		if (n->in_apr)
1162 			appr_free(n->in_apr);
1163 		KFREE(n);
1164 		ifs->ifs_nat_stats.ns_rules--;
1165 	} else {
1166 		n->in_flags |= IPN_DELETE;
1167 		n->in_next = NULL;
1168 	}
1169 	if (getlock) {
1170 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1171 	}
1172 }
1173 
1174 
1175 /* ------------------------------------------------------------------------ */
1176 /* Function:    fr_natgetsz                                                 */
1177 /* Returns:     int - 0 == success, != 0 is the error value.                */
1178 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1179 /*                        get the size of.                                  */
1180 /*                                                                          */
1181 /* Handle SIOCSTGSZ.                                                        */
1182 /* Return the size of the nat list entry to be copied back to user space.   */
1183 /* The size of the entry is stored in the ng_sz field and the enture natget */
1184 /* structure is copied back to the user.                                    */
1185 /* ------------------------------------------------------------------------ */
1186 static int fr_natgetsz(data, ifs)
1187 caddr_t data;
1188 ipf_stack_t *ifs;
1189 {
1190 	ap_session_t *aps;
1191 	nat_t *nat, *n;
1192 	natget_t ng;
1193 
1194 	BCOPYIN(data, &ng, sizeof(ng));
1195 
1196 	nat = ng.ng_ptr;
1197 	if (!nat) {
1198 		nat = ifs->ifs_nat_instances;
1199 		ng.ng_sz = 0;
1200 		/*
1201 		 * Empty list so the size returned is 0.  Simple.
1202 		 */
1203 		if (nat == NULL) {
1204 			BCOPYOUT(&ng, data, sizeof(ng));
1205 			return 0;
1206 		}
1207 	} else {
1208 		/*
1209 		 * Make sure the pointer we're copying from exists in the
1210 		 * current list of entries.  Security precaution to prevent
1211 		 * copying of random kernel data.
1212 		 */
1213 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1214 			if (n == nat)
1215 				break;
1216 		if (!n)
1217 			return ESRCH;
1218 	}
1219 
1220 	/*
1221 	 * Incluse any space required for proxy data structures.
1222 	 */
1223 	ng.ng_sz = sizeof(nat_save_t);
1224 	aps = nat->nat_aps;
1225 	if (aps != NULL) {
1226 		ng.ng_sz += sizeof(ap_session_t) - 4;
1227 		if (aps->aps_data != 0)
1228 			ng.ng_sz += aps->aps_psiz;
1229 	}
1230 
1231 	BCOPYOUT(&ng, data, sizeof(ng));
1232 	return 0;
1233 }
1234 
1235 
1236 /* ------------------------------------------------------------------------ */
1237 /* Function:    fr_natgetent                                                */
1238 /* Returns:     int - 0 == success, != 0 is the error value.                */
1239 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1240 /*                        to NAT structure to copy out.                     */
1241 /*                                                                          */
1242 /* Handle SIOCSTGET.                                                        */
1243 /* Copies out NAT entry to user space.  Any additional data held for a      */
1244 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1245 /* ------------------------------------------------------------------------ */
1246 static int fr_natgetent(data, ifs)
1247 caddr_t data;
1248 ipf_stack_t *ifs;
1249 {
1250 	int error, outsize;
1251 	ap_session_t *aps;
1252 	nat_save_t *ipn, ipns;
1253 	nat_t *n, *nat;
1254 
1255 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1256 	if (error != 0)
1257 		return error;
1258 
1259 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1260 		return EINVAL;
1261 
1262 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1263 	if (ipn == NULL)
1264 		return ENOMEM;
1265 
1266 	ipn->ipn_dsize = ipns.ipn_dsize;
1267 	nat = ipns.ipn_next;
1268 	if (nat == NULL) {
1269 		nat = ifs->ifs_nat_instances;
1270 		if (nat == NULL) {
1271 			if (ifs->ifs_nat_instances == NULL)
1272 				error = ENOENT;
1273 			goto finished;
1274 		}
1275 	} else {
1276 		/*
1277 		 * Make sure the pointer we're copying from exists in the
1278 		 * current list of entries.  Security precaution to prevent
1279 		 * copying of random kernel data.
1280 		 */
1281 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1282 			if (n == nat)
1283 				break;
1284 		if (n == NULL) {
1285 			error = ESRCH;
1286 			goto finished;
1287 		}
1288 	}
1289 	ipn->ipn_next = nat->nat_next;
1290 
1291 	/*
1292 	 * Copy the NAT structure.
1293 	 */
1294 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1295 
1296 	/*
1297 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1298 	 */
1299 	if (nat->nat_ptr != NULL)
1300 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1301 		      sizeof(ipn->ipn_ipnat));
1302 
1303 	/*
1304 	 * If we also know the NAT entry has an associated filter rule,
1305 	 * save that too.
1306 	 */
1307 	if (nat->nat_fr != NULL)
1308 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1309 		      sizeof(ipn->ipn_fr));
1310 
1311 	/*
1312 	 * Last but not least, if there is an application proxy session set
1313 	 * up for this NAT entry, then copy that out too, including any
1314 	 * private data saved along side it by the proxy.
1315 	 */
1316 	aps = nat->nat_aps;
1317 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1318 	if (aps != NULL) {
1319 		char *s;
1320 
1321 		if (outsize < sizeof(*aps)) {
1322 			error = ENOBUFS;
1323 			goto finished;
1324 		}
1325 
1326 		s = ipn->ipn_data;
1327 		bcopy((char *)aps, s, sizeof(*aps));
1328 		s += sizeof(*aps);
1329 		outsize -= sizeof(*aps);
1330 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1331 			bcopy(aps->aps_data, s, aps->aps_psiz);
1332 		else
1333 			error = ENOBUFS;
1334 	}
1335 	if (error == 0) {
1336 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1337 	}
1338 
1339 finished:
1340 	if (ipn != NULL) {
1341 		KFREES(ipn, ipns.ipn_dsize);
1342 	}
1343 	return error;
1344 }
1345 
1346 /* ------------------------------------------------------------------------ */
1347 /* Function:    nat_calc_chksum_diffs					    */
1348 /* Returns:     void							    */
1349 /* Parameters:  nat	-	pointer to NAT table entry		    */
1350 /*                                                                          */
1351 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1352 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1353 /* we are dealing with partial chksum offload. For these cases we need to   */
1354 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1355 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1356 /* nat_sumd[0]. 							    */
1357 /*									    */
1358 /* The function accepts initialized NAT table entry and computes the deltas */
1359 /* from nat_inip/nat_outip members. The function is called right before	    */
1360 /* the new entry is inserted into the table.				    */
1361 /*									    */
1362 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1363 /* of delta between original and new IP addresses.			    */
1364 /*									    */
1365 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1366 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1367 /*									    */
1368 /* Some facts about chksum, we should remember:				    */
1369 /*	IP header chksum covers IP header only				    */
1370 /*									    */
1371 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1372 /*		SRC, DST IP address					    */
1373 /*		SRC, DST Port						    */
1374 /*		length of payload					    */
1375 /*									    */
1376 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1377 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1378 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1379 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1380 /* stored along with other IP packet data in dblk_t structure and used in   */
1381 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1382 /*									    */
1383 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1384 /* of delta between new and orig address. NOTE: the order of operands for   */
1385 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1386 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1387 /*									    */
1388 /* ------------------------------------------------------------------------ */
1389 void nat_calc_chksum_diffs(nat)
1390 nat_t *nat;
1391 {
1392 	u_32_t	sum_orig = 0;
1393 	u_32_t	sum_changed = 0;
1394 	u_32_t	sumd;
1395 	u_32_t	ipsum_orig = 0;
1396 	u_32_t	ipsum_changed = 0;
1397 
1398 	if (nat->nat_v != 4 && nat->nat_v != 6)
1399 		return;
1400 
1401 	/*
1402 	 * the switch calculates operands for CALC_SUMD(),
1403 	 * which will compute the partial chksum delta.
1404 	 */
1405 	switch (nat->nat_dir)
1406 	{
1407 	case NAT_INBOUND:
1408 		/*
1409 		 * we are dealing with RDR rule (DST address gets
1410 		 * modified on packet from client)
1411 		 */
1412 		if (nat->nat_v == 4) {
1413 			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1414 			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1415 		} else {
1416 			sum_changed = LONG_SUM6(&nat->nat_inip6);
1417 			sum_orig = LONG_SUM6(&nat->nat_outip6);
1418 		}
1419 		break;
1420 	case NAT_OUTBOUND:
1421 		/*
1422 		 * we are dealing with MAP rule (SRC address gets
1423 		 * modified on packet from client)
1424 		 */
1425 		if (nat->nat_v == 4) {
1426 			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1427 			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1428 		} else {
1429 			sum_changed = LONG_SUM6(&nat->nat_outip6);
1430 			sum_orig = LONG_SUM6(&nat->nat_inip6);
1431 		}
1432 		break;
1433 	default: ;
1434 		break;
1435 	}
1436 
1437 	/*
1438 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1439 	 * calculation, which happens at the end of function.
1440 	 */
1441 	ipsum_changed = sum_changed;
1442 	ipsum_orig = sum_orig;
1443 	/*
1444 	 * NOTE: the order of operands for partial chksum adjustment
1445 	 * computation has to be swapped!
1446 	 */
1447 	CALC_SUMD(sum_changed, sum_orig, sumd);
1448 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1449 
1450 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1451 
1452 		/*
1453 		 * switch calculates operands for CALC_SUMD(), which will
1454 		 * compute the full chksum delta.
1455 		 */
1456 		switch (nat->nat_dir)
1457 		{
1458 		case NAT_INBOUND:
1459 			if (nat->nat_v == 4) {
1460 				sum_changed = LONG_SUM(
1461 				    ntohl(nat->nat_inip.s_addr) +
1462 				    ntohs(nat->nat_inport));
1463 				sum_orig = LONG_SUM(
1464 				    ntohl(nat->nat_outip.s_addr) +
1465 				    ntohs(nat->nat_outport));
1466 			} else {
1467 				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1468 				    ntohs(nat->nat_inport);
1469 				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1470 				    ntohs(nat->nat_outport);
1471 			}
1472 			break;
1473 		case NAT_OUTBOUND:
1474 			if (nat->nat_v == 4) {
1475 				sum_changed = LONG_SUM(
1476 				    ntohl(nat->nat_outip.s_addr) +
1477 				    ntohs(nat->nat_outport));
1478 				sum_orig = LONG_SUM(
1479 				    ntohl(nat->nat_inip.s_addr) +
1480 				    ntohs(nat->nat_inport));
1481 			} else {
1482 				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1483 				    ntohs(nat->nat_outport);
1484 				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1485 				    ntohs(nat->nat_inport);
1486 			}
1487 			break;
1488 		default: ;
1489 			break;
1490 		}
1491 
1492 		CALC_SUMD(sum_orig, sum_changed, sumd);
1493 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1494 
1495 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1496 			/*
1497 			 * partial HW chksum offload works for TCP/UDP headers only,
1498 			 * so we need to enforce full chksum adjustment for ICMP
1499 			 */
1500 			nat->nat_sumd[1] = nat->nat_sumd[0];
1501 		}
1502 	}
1503 	else
1504 		nat->nat_sumd[0] = nat->nat_sumd[1];
1505 
1506 	/*
1507 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1508 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1509 	 */
1510 	if (nat->nat_v == 4) {
1511 		if (NAT_HAS_L4_CHANGED(nat)) {
1512 			/*
1513 			 * bad luck, NAT changes also the L4 header, use IP
1514 			 * addresses to compute chksum adjustment for IP header.
1515 			 */
1516 			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1517 			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1518 		} else {
1519 			/*
1520 			 * the NAT does not change L4 hdr -> reuse chksum
1521 			 * adjustment for IP hdr.
1522 			 */
1523 			nat->nat_ipsumd = nat->nat_sumd[0];
1524 
1525 			/*
1526 			 * if L4 header does not use chksum - zero out deltas
1527 			 */
1528 			if (!(nat->nat_flags & IPN_TCPUDP)) {
1529 				nat->nat_sumd[0] = 0;
1530 				nat->nat_sumd[1] = 0;
1531 			}
1532 		}
1533 	}
1534 
1535 	return;
1536 }
1537 
1538 /* ------------------------------------------------------------------------ */
1539 /* Function:    fr_natputent                                                */
1540 /* Returns:     int - 0 == success, != 0 is the error value.                */
1541 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1542 /*                            structure information to load into the kernel */
1543 /*              getlock(I) - flag indicating whether or not a write lock    */
1544 /*                           on ipf_nat is already held.                    */
1545 /*                                                                          */
1546 /* Handle SIOCSTPUT.                                                        */
1547 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1548 /* firewall rule data structures, if pointers to them indicate so.          */
1549 /* ------------------------------------------------------------------------ */
1550 static int fr_natputent(data, getlock, ifs)
1551 caddr_t data;
1552 int getlock;
1553 ipf_stack_t *ifs;
1554 {
1555 	nat_save_t ipn, *ipnn;
1556 	ap_session_t *aps;
1557 	nat_t *n, *nat;
1558 	frentry_t *fr;
1559 	fr_info_t fin;
1560 	ipnat_t *in;
1561 	int error;
1562 
1563 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1564 	if (error != 0)
1565 		return error;
1566 
1567 	/*
1568 	 * Trigger automatic call to nat_extraflush() if the
1569 	 * table has reached capcity specified by hi watermark.
1570 	 */
1571 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1572 		ifs->ifs_nat_doflush = 1;
1573 
1574 	/*
1575 	 * Initialise early because of code at junkput label.
1576 	 */
1577 	in = NULL;
1578 	aps = NULL;
1579 	nat = NULL;
1580 	ipnn = NULL;
1581 
1582 	/*
1583 	 * New entry, copy in the rest of the NAT entry if it's size is more
1584 	 * than just the nat_t structure.
1585 	 */
1586 	fr = NULL;
1587 	if (ipn.ipn_dsize > sizeof(ipn)) {
1588 		if (ipn.ipn_dsize > 81920) {
1589 			error = ENOMEM;
1590 			goto junkput;
1591 		}
1592 
1593 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1594 		if (ipnn == NULL)
1595 			return ENOMEM;
1596 
1597 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1598 		if (error != 0) {
1599 			error = EFAULT;
1600 			goto junkput;
1601 		}
1602 	} else
1603 		ipnn = &ipn;
1604 
1605 	KMALLOC(nat, nat_t *);
1606 	if (nat == NULL) {
1607 		error = ENOMEM;
1608 		goto junkput;
1609 	}
1610 
1611 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1612 	/*
1613 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1614 	 */
1615 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1616 	nat->nat_tqe.tqe_pnext = NULL;
1617 	nat->nat_tqe.tqe_next = NULL;
1618 	nat->nat_tqe.tqe_ifq = NULL;
1619 	nat->nat_tqe.tqe_parent = nat;
1620 
1621 	/*
1622 	 * Restore the rule associated with this nat session
1623 	 */
1624 	in = ipnn->ipn_nat.nat_ptr;
1625 	if (in != NULL) {
1626 		KMALLOC(in, ipnat_t *);
1627 		nat->nat_ptr = in;
1628 		if (in == NULL) {
1629 			error = ENOMEM;
1630 			goto junkput;
1631 		}
1632 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1633 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1634 		in->in_use = 1;
1635 		in->in_flags |= IPN_DELETE;
1636 
1637 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1638 
1639 		if (nat_resolverule(in, ifs) != 0) {
1640 			error = ESRCH;
1641 			goto junkput;
1642 		}
1643 	}
1644 
1645 	/*
1646 	 * Check that the NAT entry doesn't already exist in the kernel.
1647 	 */
1648 	if (nat->nat_v != 6)
1649 		nat->nat_v = 4;
1650 	bzero((char *)&fin, sizeof(fin));
1651 	fin.fin_p = nat->nat_p;
1652 	fin.fin_ifs = ifs;
1653 	if (nat->nat_dir == NAT_OUTBOUND) {
1654 		fin.fin_data[0] = ntohs(nat->nat_oport);
1655 		fin.fin_data[1] = ntohs(nat->nat_outport);
1656 		fin.fin_ifp = nat->nat_ifps[0];
1657 		if (getlock) {
1658 			READ_ENTER(&ifs->ifs_ipf_nat);
1659 		}
1660 
1661 		switch (nat->nat_v)
1662 		{
1663 		case 4:
1664 			fin.fin_v = nat->nat_v;
1665 			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1666 			    nat->nat_oip, nat->nat_outip);
1667 			break;
1668 #ifdef USE_INET6
1669 		case 6:
1670 			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1671 			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1672 			break;
1673 #endif
1674 		default:
1675 			n = NULL;
1676 			break;
1677 		}
1678 
1679 		if (getlock) {
1680 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1681 		}
1682 		if (n != NULL) {
1683 			error = EEXIST;
1684 			goto junkput;
1685 		}
1686 	} else if (nat->nat_dir == NAT_INBOUND) {
1687 		fin.fin_data[0] = ntohs(nat->nat_inport);
1688 		fin.fin_data[1] = ntohs(nat->nat_oport);
1689 		fin.fin_ifp = nat->nat_ifps[1];
1690 		if (getlock) {
1691 			READ_ENTER(&ifs->ifs_ipf_nat);
1692 		}
1693 
1694 		switch (nat->nat_v)
1695 		{
1696 		case 4:
1697 			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1698 			    nat->nat_inip, nat->nat_oip);
1699 			break;
1700 #ifdef USE_INET6
1701 		case 6:
1702 			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1703 			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1704 			break;
1705 #endif
1706 		default:
1707 			n = NULL;
1708 			break;
1709 		}
1710 
1711 		if (getlock) {
1712 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1713 		}
1714 		if (n != NULL) {
1715 			error = EEXIST;
1716 			goto junkput;
1717 		}
1718 	} else {
1719 		error = EINVAL;
1720 		goto junkput;
1721 	}
1722 
1723 	/*
1724 	 * Restore ap_session_t structure.  Include the private data allocated
1725 	 * if it was there.
1726 	 */
1727 	aps = nat->nat_aps;
1728 	if (aps != NULL) {
1729 		KMALLOC(aps, ap_session_t *);
1730 		nat->nat_aps = aps;
1731 		if (aps == NULL) {
1732 			error = ENOMEM;
1733 			goto junkput;
1734 		}
1735 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1736 		if (in != NULL)
1737 			aps->aps_apr = in->in_apr;
1738 		else
1739 			aps->aps_apr = NULL;
1740 		if (aps->aps_psiz != 0) {
1741 			if (aps->aps_psiz > 81920) {
1742 				error = ENOMEM;
1743 				goto junkput;
1744 			}
1745 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1746 			if (aps->aps_data == NULL) {
1747 				error = ENOMEM;
1748 				goto junkput;
1749 			}
1750 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1751 			      aps->aps_psiz);
1752 		} else {
1753 			aps->aps_psiz = 0;
1754 			aps->aps_data = NULL;
1755 		}
1756 	}
1757 
1758 	/*
1759 	 * If there was a filtering rule associated with this entry then
1760 	 * build up a new one.
1761 	 */
1762 	fr = nat->nat_fr;
1763 	if (fr != NULL) {
1764 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1765 			KMALLOC(fr, frentry_t *);
1766 			nat->nat_fr = fr;
1767 			if (fr == NULL) {
1768 				error = ENOMEM;
1769 				goto junkput;
1770 			}
1771 			ipnn->ipn_nat.nat_fr = fr;
1772 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1773 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1774 
1775 			fr->fr_ref = 1;
1776 			fr->fr_dsize = 0;
1777 			fr->fr_data = NULL;
1778 			fr->fr_type = FR_T_NONE;
1779 
1780 			MUTEX_NUKE(&fr->fr_lock);
1781 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1782 		} else {
1783 			if (getlock) {
1784 				READ_ENTER(&ifs->ifs_ipf_nat);
1785 			}
1786 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1787 				if (n->nat_fr == fr)
1788 					break;
1789 
1790 			if (n != NULL) {
1791 				MUTEX_ENTER(&fr->fr_lock);
1792 				fr->fr_ref++;
1793 				MUTEX_EXIT(&fr->fr_lock);
1794 			}
1795 			if (getlock) {
1796 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1797 			}
1798 			if (!n) {
1799 				error = ESRCH;
1800 				goto junkput;
1801 			}
1802 		}
1803 	}
1804 
1805 	if (ipnn != &ipn) {
1806 		KFREES(ipnn, ipn.ipn_dsize);
1807 		ipnn = NULL;
1808 	}
1809 
1810 	nat_calc_chksum_diffs(nat);
1811 
1812 	if (getlock) {
1813 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1814 	}
1815 
1816 	nat_calc_chksum_diffs(nat);
1817 
1818 	switch (nat->nat_v)
1819 	{
1820 	case 4 :
1821 		error = nat_insert(nat, nat->nat_rev, ifs);
1822 		break;
1823 #ifdef USE_INET6
1824 	case 6 :
1825 		error = nat6_insert(nat, nat->nat_rev, ifs);
1826 		break;
1827 #endif
1828 	default :
1829 		break;
1830 	}
1831 
1832 	if ((error == 0) && (aps != NULL)) {
1833 		aps->aps_next = ifs->ifs_ap_sess_list;
1834 		ifs->ifs_ap_sess_list = aps;
1835 	}
1836 	if (getlock) {
1837 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1838 	}
1839 
1840 	if (error == 0)
1841 		return 0;
1842 
1843 	error = ENOMEM;
1844 
1845 junkput:
1846 	if (fr != NULL)
1847 		(void) fr_derefrule(&fr, ifs);
1848 
1849 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1850 		KFREES(ipnn, ipn.ipn_dsize);
1851 	}
1852 	if (nat != NULL) {
1853 		if (aps != NULL) {
1854 			if (aps->aps_data != NULL) {
1855 				KFREES(aps->aps_data, aps->aps_psiz);
1856 			}
1857 			KFREE(aps);
1858 		}
1859 		if (in != NULL) {
1860 			if (in->in_apr)
1861 				appr_free(in->in_apr);
1862 			KFREE(in);
1863 		}
1864 		KFREE(nat);
1865 	}
1866 	return error;
1867 }
1868 
1869 
1870 /* ------------------------------------------------------------------------ */
1871 /* Function:    nat_delete                                                  */
1872 /* Returns:     Nil                                                         */
1873 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1874 /*              logtype(I) - type of LOG record to create before deleting   */
1875 /* Write Lock:  ipf_nat                                                     */
1876 /*                                                                          */
1877 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1878 /* enabled then generate a NAT log record for this event.                   */
1879 /* ------------------------------------------------------------------------ */
1880 static void nat_delete(nat, logtype, ifs)
1881 struct nat *nat;
1882 int logtype;
1883 ipf_stack_t *ifs;
1884 {
1885 	struct ipnat *ipn;
1886 
1887 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1888 		nat_log(nat, logtype, ifs);
1889 
1890 	/*
1891 	 * Take it as a general indication that all the pointers are set if
1892 	 * nat_pnext is set.
1893 	 */
1894 	if (nat->nat_pnext != NULL) {
1895 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1896 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1897 
1898 		*nat->nat_pnext = nat->nat_next;
1899 		if (nat->nat_next != NULL) {
1900 			nat->nat_next->nat_pnext = nat->nat_pnext;
1901 			nat->nat_next = NULL;
1902 		}
1903 		nat->nat_pnext = NULL;
1904 
1905 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1906 		if (nat->nat_hnext[0] != NULL) {
1907 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1908 			nat->nat_hnext[0] = NULL;
1909 		}
1910 		nat->nat_phnext[0] = NULL;
1911 
1912 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1913 		if (nat->nat_hnext[1] != NULL) {
1914 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1915 			nat->nat_hnext[1] = NULL;
1916 		}
1917 		nat->nat_phnext[1] = NULL;
1918 
1919 		if ((nat->nat_flags & SI_WILDP) != 0)
1920 			ifs->ifs_nat_stats.ns_wilds--;
1921 	}
1922 
1923 	if (nat->nat_me != NULL) {
1924 		*nat->nat_me = NULL;
1925 		nat->nat_me = NULL;
1926 	}
1927 
1928 	fr_deletequeueentry(&nat->nat_tqe);
1929 
1930 	MUTEX_ENTER(&nat->nat_lock);
1931 	if (nat->nat_ref > 1) {
1932 		nat->nat_ref--;
1933 		MUTEX_EXIT(&nat->nat_lock);
1934 		return;
1935 	}
1936 	MUTEX_EXIT(&nat->nat_lock);
1937 
1938 	/*
1939 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1940 	 */
1941 	nat->nat_ref = 0;
1942 
1943 #ifdef	IPFILTER_SYNC
1944 	if (nat->nat_sync)
1945 		ipfsync_del(nat->nat_sync);
1946 #endif
1947 
1948 	if (nat->nat_fr != NULL)
1949 		(void)fr_derefrule(&nat->nat_fr, ifs);
1950 
1951 	if (nat->nat_hm != NULL)
1952 		fr_hostmapdel(&nat->nat_hm);
1953 
1954 	/*
1955 	 * If there is an active reference from the nat entry to its parent
1956 	 * rule, decrement the rule's reference count and free it too if no
1957 	 * longer being used.
1958 	 */
1959 	ipn = nat->nat_ptr;
1960 	if (ipn != NULL) {
1961 		ipn->in_space++;
1962 		ipn->in_use--;
1963 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1964 			if (ipn->in_apr)
1965 				appr_free(ipn->in_apr);
1966 			KFREE(ipn);
1967 			ifs->ifs_nat_stats.ns_rules--;
1968 		}
1969 	}
1970 
1971 	MUTEX_DESTROY(&nat->nat_lock);
1972 
1973 	aps_free(nat->nat_aps, ifs);
1974 	ifs->ifs_nat_stats.ns_inuse--;
1975 
1976 	/*
1977 	 * If there's a fragment table entry too for this nat entry, then
1978 	 * dereference that as well.  This is after nat_lock is released
1979 	 * because of Tru64.
1980 	 */
1981 	fr_forgetnat((void *)nat, ifs);
1982 
1983 	KFREE(nat);
1984 }
1985 
1986 
1987 /* ------------------------------------------------------------------------ */
1988 /* Function:    nat_flushtable                                              */
1989 /* Returns:     int - number of NAT rules deleted                           */
1990 /* Parameters:  Nil                                                         */
1991 /*                                                                          */
1992 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1993 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1994 /* ------------------------------------------------------------------------ */
1995 /*
1996  * nat_flushtable - clear the NAT table of all mapping entries.
1997  */
1998 static int nat_flushtable(ifs)
1999 ipf_stack_t *ifs;
2000 {
2001 	nat_t *nat;
2002 	int j = 0;
2003 
2004 	/*
2005 	 * ALL NAT mappings deleted, so lets just make the deletions
2006 	 * quicker.
2007 	 */
2008 	if (ifs->ifs_nat_table[0] != NULL)
2009 		bzero((char *)ifs->ifs_nat_table[0],
2010 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
2011 	if (ifs->ifs_nat_table[1] != NULL)
2012 		bzero((char *)ifs->ifs_nat_table[1],
2013 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
2014 
2015 	while ((nat = ifs->ifs_nat_instances) != NULL) {
2016 		nat_delete(nat, NL_FLUSH, ifs);
2017 		j++;
2018 	}
2019 
2020 	return j;
2021 }
2022 
2023 
2024 /* ------------------------------------------------------------------------ */
2025 /* Function:    nat_clearlist                                               */
2026 /* Returns:     int - number of NAT/RDR rules deleted                       */
2027 /* Parameters:  Nil                                                         */
2028 /*                                                                          */
2029 /* Delete all rules in the current list of rules.  There is nothing elegant */
2030 /* about this cleanup: simply free all entries on the list of rules and     */
2031 /* clear out the tables used for hashed NAT rule lookups.                   */
2032 /* ------------------------------------------------------------------------ */
2033 static int nat_clearlist(ifs)
2034 ipf_stack_t *ifs;
2035 {
2036 	ipnat_t *n, **np = &ifs->ifs_nat_list;
2037 	int i = 0;
2038 
2039 	if (ifs->ifs_nat_rules != NULL)
2040 		bzero((char *)ifs->ifs_nat_rules,
2041 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2042 	if (ifs->ifs_rdr_rules != NULL)
2043 		bzero((char *)ifs->ifs_rdr_rules,
2044 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2045 
2046 	while ((n = *np) != NULL) {
2047 		*np = n->in_next;
2048 		if (n->in_use == 0) {
2049 			if (n->in_apr != NULL)
2050 				appr_free(n->in_apr);
2051 			KFREE(n);
2052 			ifs->ifs_nat_stats.ns_rules--;
2053 		} else {
2054 			n->in_flags |= IPN_DELETE;
2055 			n->in_next = NULL;
2056 		}
2057 		i++;
2058 	}
2059 	ifs->ifs_nat_masks = 0;
2060 	ifs->ifs_rdr_masks = 0;
2061 	for (i = 0; i < 4; i++) {
2062 		ifs->ifs_nat6_masks[i] = 0;
2063 		ifs->ifs_rdr6_masks[i] = 0;
2064 	}
2065 	return i;
2066 }
2067 
2068 
2069 /* ------------------------------------------------------------------------ */
2070 /* Function:    nat_newmap                                                  */
2071 /* Returns:     int - -1 == error, 0 == success                             */
2072 /* Parameters:  fin(I) - pointer to packet information                      */
2073 /*              nat(I) - pointer to NAT entry                               */
2074 /*              ni(I)  - pointer to structure with misc. information needed */
2075 /*                       to create new NAT entry.                           */
2076 /*                                                                          */
2077 /* Given an empty NAT structure, populate it with new information about a   */
2078 /* new NAT session, as defined by the matching NAT rule.                    */
2079 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2080 /* to the new IP address for the translation.                               */
2081 /* ------------------------------------------------------------------------ */
2082 static INLINE int nat_newmap(fin, nat, ni)
2083 fr_info_t *fin;
2084 nat_t *nat;
2085 natinfo_t *ni;
2086 {
2087 	u_short st_port, dport, sport, port, sp, dp;
2088 	struct in_addr in, inb;
2089 	hostmap_t *hm;
2090 	u_32_t flags;
2091 	u_32_t st_ip;
2092 	ipnat_t *np;
2093 	nat_t *natl;
2094 	int l;
2095 	ipf_stack_t *ifs = fin->fin_ifs;
2096 
2097 	/*
2098 	 * If it's an outbound packet which doesn't match any existing
2099 	 * record, then create a new port
2100 	 */
2101 	l = 0;
2102 	hm = NULL;
2103 	np = ni->nai_np;
2104 	st_ip = np->in_nip;
2105 	st_port = np->in_pnext;
2106 	flags = ni->nai_flags;
2107 	sport = ni->nai_sport;
2108 	dport = ni->nai_dport;
2109 
2110 	/*
2111 	 * Do a loop until we either run out of entries to try or we find
2112 	 * a NAT mapping that isn't currently being used.  This is done
2113 	 * because the change to the source is not (usually) being fixed.
2114 	 */
2115 	do {
2116 		port = 0;
2117 		in.s_addr = htonl(np->in_nip);
2118 		if (l == 0) {
2119 			/*
2120 			 * Check to see if there is an existing NAT
2121 			 * setup for this IP address pair.
2122 			 */
2123 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2124 					 in, 0, ifs);
2125 			if (hm != NULL)
2126 				in.s_addr = hm->hm_mapip.s_addr;
2127 		} else if ((l == 1) && (hm != NULL)) {
2128 			fr_hostmapdel(&hm);
2129 		}
2130 		in.s_addr = ntohl(in.s_addr);
2131 
2132 		nat->nat_hm = hm;
2133 
2134 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2135 			if (l > 0)
2136 				return -1;
2137 		}
2138 
2139 		if (np->in_redir == NAT_BIMAP &&
2140 		    np->in_inmsk == np->in_outmsk) {
2141 			/*
2142 			 * map the address block in a 1:1 fashion
2143 			 */
2144 			in.s_addr = np->in_outip;
2145 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2146 			in.s_addr = ntohl(in.s_addr);
2147 
2148 		} else if (np->in_redir & NAT_MAPBLK) {
2149 			if ((l >= np->in_ppip) || ((l > 0) &&
2150 			     !(flags & IPN_TCPUDP)))
2151 				return -1;
2152 			/*
2153 			 * map-block - Calculate destination address.
2154 			 */
2155 			in.s_addr = ntohl(fin->fin_saddr);
2156 			in.s_addr &= ntohl(~np->in_inmsk);
2157 			inb.s_addr = in.s_addr;
2158 			in.s_addr /= np->in_ippip;
2159 			in.s_addr &= ntohl(~np->in_outmsk);
2160 			in.s_addr += ntohl(np->in_outip);
2161 			/*
2162 			 * Calculate destination port.
2163 			 */
2164 			if ((flags & IPN_TCPUDP) &&
2165 			    (np->in_ppip != 0)) {
2166 				port = ntohs(sport) + l;
2167 				port %= np->in_ppip;
2168 				port += np->in_ppip *
2169 					(inb.s_addr % np->in_ippip);
2170 				port += MAPBLK_MINPORT;
2171 				port = htons(port);
2172 			}
2173 
2174 		} else if ((np->in_outip == 0) &&
2175 			   (np->in_outmsk == 0xffffffff)) {
2176 			/*
2177 			 * 0/32 - use the interface's IP address.
2178 			 */
2179 			if ((l > 0) ||
2180 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2181 				       &in, NULL, fin->fin_ifs) == -1)
2182 				return -1;
2183 			in.s_addr = ntohl(in.s_addr);
2184 
2185 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2186 			/*
2187 			 * 0/0 - use the original source address/port.
2188 			 */
2189 			if (l > 0)
2190 				return -1;
2191 			in.s_addr = ntohl(fin->fin_saddr);
2192 
2193 		} else if ((np->in_outmsk != 0xffffffff) &&
2194 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2195 			np->in_nip++;
2196 
2197 		natl = NULL;
2198 
2199 		if ((flags & IPN_TCPUDP) &&
2200 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2201 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2202 			/*
2203 			 * "ports auto" (without map-block)
2204 			 */
2205 			if ((l > 0) && (l % np->in_ppip == 0)) {
2206 				if (l > np->in_space) {
2207 					return -1;
2208 				} else if ((l > np->in_ppip) &&
2209 					   np->in_outmsk != 0xffffffff)
2210 					np->in_nip++;
2211 			}
2212 			if (np->in_ppip != 0) {
2213 				port = ntohs(sport);
2214 				port += (l % np->in_ppip);
2215 				port %= np->in_ppip;
2216 				port += np->in_ppip *
2217 					(ntohl(fin->fin_saddr) %
2218 					 np->in_ippip);
2219 				port += MAPBLK_MINPORT;
2220 				port = htons(port);
2221 			}
2222 
2223 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2224 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2225 			/*
2226 			 * Standard port translation.  Select next port.
2227 			 */
2228 			if (np->in_flags & IPN_SEQUENTIAL) {
2229 				port = np->in_pnext;
2230 			} else {
2231 				port = ipf_random() % (ntohs(np->in_pmax) -
2232 						       ntohs(np->in_pmin));
2233 				port += ntohs(np->in_pmin);
2234 			}
2235 			port = htons(port);
2236 			np->in_pnext++;
2237 
2238 			if (np->in_pnext > ntohs(np->in_pmax)) {
2239 				np->in_pnext = ntohs(np->in_pmin);
2240 				if (np->in_outmsk != 0xffffffff)
2241 					np->in_nip++;
2242 			}
2243 		}
2244 
2245 		if (np->in_flags & IPN_IPRANGE) {
2246 			if (np->in_nip > ntohl(np->in_outmsk))
2247 				np->in_nip = ntohl(np->in_outip);
2248 		} else {
2249 			if ((np->in_outmsk != 0xffffffff) &&
2250 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2251 			    ntohl(np->in_outip))
2252 				np->in_nip = ntohl(np->in_outip) + 1;
2253 		}
2254 
2255 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2256 			port = sport;
2257 
2258 		/*
2259 		 * Here we do a lookup of the connection as seen from
2260 		 * the outside.  If an IP# pair already exists, try
2261 		 * again.  So if you have A->B becomes C->B, you can
2262 		 * also have D->E become C->E but not D->B causing
2263 		 * another C->B.  Also take protocol and ports into
2264 		 * account when determining whether a pre-existing
2265 		 * NAT setup will cause an external conflict where
2266 		 * this is appropriate.
2267 		 */
2268 		inb.s_addr = htonl(in.s_addr);
2269 		sp = fin->fin_data[0];
2270 		dp = fin->fin_data[1];
2271 		fin->fin_data[0] = fin->fin_data[1];
2272 		fin->fin_data[1] = htons(port);
2273 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2274 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2275 		fin->fin_data[0] = sp;
2276 		fin->fin_data[1] = dp;
2277 
2278 		/*
2279 		 * Has the search wrapped around and come back to the
2280 		 * start ?
2281 		 */
2282 		if ((natl != NULL) &&
2283 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2284 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2285 			return -1;
2286 		l++;
2287 	} while (natl != NULL);
2288 
2289 	if (np->in_space > 0)
2290 		np->in_space--;
2291 
2292 	/* Setup the NAT table */
2293 	nat->nat_inip = fin->fin_src;
2294 	nat->nat_outip.s_addr = htonl(in.s_addr);
2295 	nat->nat_oip = fin->fin_dst;
2296 	if (nat->nat_hm == NULL)
2297 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2298 					  nat->nat_outip, 0, ifs);
2299 
2300 	if (flags & IPN_TCPUDP) {
2301 		nat->nat_inport = sport;
2302 		nat->nat_outport = port;	/* sport */
2303 		nat->nat_oport = dport;
2304 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2305 	} else if (flags & IPN_ICMPQUERY) {
2306 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2307 		nat->nat_inport = port;
2308 		nat->nat_outport = port;
2309 	}
2310 
2311 	ni->nai_ip.s_addr = in.s_addr;
2312 	ni->nai_port = port;
2313 	ni->nai_nport = dport;
2314 	return 0;
2315 }
2316 
2317 
2318 /* ------------------------------------------------------------------------ */
2319 /* Function:    nat_newrdr                                                  */
2320 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2321 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2322 /* Parameters:  fin(I) - pointer to packet information                      */
2323 /*              nat(I) - pointer to NAT entry                               */
2324 /*              ni(I)  - pointer to structure with misc. information needed */
2325 /*                       to create new NAT entry.                           */
2326 /*                                                                          */
2327 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2328 /* to the new IP address for the translation.                               */
2329 /* ------------------------------------------------------------------------ */
2330 static INLINE int nat_newrdr(fin, nat, ni)
2331 fr_info_t *fin;
2332 nat_t *nat;
2333 natinfo_t *ni;
2334 {
2335 	u_short nport, dport, sport;
2336 	struct in_addr in, inb;
2337 	u_short sp, dp;
2338 	hostmap_t *hm;
2339 	u_32_t flags;
2340 	ipnat_t *np;
2341 	nat_t *natl;
2342 	int move;
2343 	ipf_stack_t *ifs = fin->fin_ifs;
2344 
2345 	move = 1;
2346 	hm = NULL;
2347 	in.s_addr = 0;
2348 	np = ni->nai_np;
2349 	flags = ni->nai_flags;
2350 	sport = ni->nai_sport;
2351 	dport = ni->nai_dport;
2352 
2353 	/*
2354 	 * If the matching rule has IPN_STICKY set, then we want to have the
2355 	 * same rule kick in as before.  Why would this happen?  If you have
2356 	 * a collection of rdr rules with "round-robin sticky", the current
2357 	 * packet might match a different one to the previous connection but
2358 	 * we want the same destination to be used.
2359 	 */
2360 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2361 	    (IPN_ROUNDR|IPN_STICKY)) {
2362 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2363 				 (u_32_t)dport, ifs);
2364 		if (hm != NULL) {
2365 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2366 			np = hm->hm_ipnat;
2367 			ni->nai_np = np;
2368 			move = 0;
2369 		}
2370 	}
2371 
2372 	/*
2373 	 * Otherwise, it's an inbound packet. Most likely, we don't
2374 	 * want to rewrite source ports and source addresses. Instead,
2375 	 * we want to rewrite to a fixed internal address and fixed
2376 	 * internal port.
2377 	 */
2378 	if (np->in_flags & IPN_SPLIT) {
2379 		in.s_addr = np->in_nip;
2380 
2381 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2382 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2383 					 in, (u_32_t)dport, ifs);
2384 			if (hm != NULL) {
2385 				in.s_addr = hm->hm_mapip.s_addr;
2386 				move = 0;
2387 			}
2388 		}
2389 
2390 		if (hm == NULL || hm->hm_ref == 1) {
2391 			if (np->in_inip == htonl(in.s_addr)) {
2392 				np->in_nip = ntohl(np->in_inmsk);
2393 				move = 0;
2394 			} else {
2395 				np->in_nip = ntohl(np->in_inip);
2396 			}
2397 		}
2398 
2399 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2400 		/*
2401 		 * 0/32 - use the interface's IP address.
2402 		 */
2403 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2404 			   fin->fin_ifs) == -1)
2405 			return -1;
2406 		in.s_addr = ntohl(in.s_addr);
2407 
2408 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2409 		/*
2410 		 * 0/0 - use the original destination address/port.
2411 		 */
2412 		in.s_addr = ntohl(fin->fin_daddr);
2413 
2414 	} else if (np->in_redir == NAT_BIMAP &&
2415 		   np->in_inmsk == np->in_outmsk) {
2416 		/*
2417 		 * map the address block in a 1:1 fashion
2418 		 */
2419 		in.s_addr = np->in_inip;
2420 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2421 		in.s_addr = ntohl(in.s_addr);
2422 	} else {
2423 		in.s_addr = ntohl(np->in_inip);
2424 	}
2425 
2426 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2427 		nport = dport;
2428 	else {
2429 		/*
2430 		 * Whilst not optimized for the case where
2431 		 * pmin == pmax, the gain is not significant.
2432 		 */
2433 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2434 		    (np->in_pmin != np->in_pmax)) {
2435 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2436 				ntohs(np->in_pnext);
2437 			nport = htons(nport);
2438 		} else
2439 			nport = np->in_pnext;
2440 	}
2441 
2442 	/*
2443 	 * When the redirect-to address is set to 0.0.0.0, just
2444 	 * assume a blank `forwarding' of the packet.  We don't
2445 	 * setup any translation for this either.
2446 	 */
2447 	if (in.s_addr == 0) {
2448 		if (nport == dport)
2449 			return -1;
2450 		in.s_addr = ntohl(fin->fin_daddr);
2451 	}
2452 
2453 	/*
2454 	 * Check to see if this redirect mapping already exists and if
2455 	 * it does, return "failure" (allowing it to be created will just
2456 	 * cause one or both of these "connections" to stop working.)
2457 	 */
2458 	inb.s_addr = htonl(in.s_addr);
2459 	sp = fin->fin_data[0];
2460 	dp = fin->fin_data[1];
2461 	fin->fin_data[1] = fin->fin_data[0];
2462 	fin->fin_data[0] = ntohs(nport);
2463 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2464 		    (u_int)fin->fin_p, inb, fin->fin_src);
2465 	fin->fin_data[0] = sp;
2466 	fin->fin_data[1] = dp;
2467 	if (natl != NULL)
2468 		return (-1);
2469 
2470 	nat->nat_inip.s_addr = htonl(in.s_addr);
2471 	nat->nat_outip = fin->fin_dst;
2472 	nat->nat_oip = fin->fin_src;
2473 
2474 	ni->nai_ip.s_addr = in.s_addr;
2475 	ni->nai_nport = nport;
2476 	ni->nai_port = sport;
2477 
2478 	if (flags & IPN_TCPUDP) {
2479 		nat->nat_inport = nport;
2480 		nat->nat_outport = dport;
2481 		nat->nat_oport = sport;
2482 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2483 	} else if (flags & IPN_ICMPQUERY) {
2484 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2485 		nat->nat_inport = nport;
2486 		nat->nat_outport = nport;
2487 	}
2488 
2489 	return move;
2490 }
2491 
2492 /* ------------------------------------------------------------------------ */
2493 /* Function:    nat_new                                                     */
2494 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2495 /*                       else pointer to new NAT structure                  */
2496 /* Parameters:  fin(I)       - pointer to packet information                */
2497 /*              np(I)        - pointer to NAT rule                          */
2498 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2499 /*              flags(I)     - flags describing the current packet          */
2500 /*              direction(I) - direction of packet (in/out)                 */
2501 /* Write Lock:  ipf_nat                                                     */
2502 /*                                                                          */
2503 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2504 /* in any way.                                                              */
2505 /*                                                                          */
2506 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2507 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2508 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2509 /* and (3) building that structure and putting it into the NAT table(s).    */
2510 /* ------------------------------------------------------------------------ */
2511 nat_t *nat_new(fin, np, natsave, flags, direction)
2512 fr_info_t *fin;
2513 ipnat_t *np;
2514 nat_t **natsave;
2515 u_int flags;
2516 int direction;
2517 {
2518 	tcphdr_t *tcp = NULL;
2519 	hostmap_t *hm = NULL;
2520 	nat_t *nat, *natl;
2521 	u_int nflags;
2522 	natinfo_t ni;
2523 	int move;
2524 	ipf_stack_t *ifs = fin->fin_ifs;
2525 
2526 	/*
2527 	 * Trigger automatic call to nat_extraflush() if the
2528 	 * table has reached capcity specified by hi watermark.
2529 	 */
2530 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2531 		ifs->ifs_nat_doflush = 1;
2532 
2533 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2534 		ifs->ifs_nat_stats.ns_memfail++;
2535 		return NULL;
2536 	}
2537 
2538 	move = 1;
2539 	nflags = np->in_flags & flags;
2540 	nflags &= NAT_FROMRULE;
2541 
2542 	ni.nai_np = np;
2543 	ni.nai_nflags = nflags;
2544 	ni.nai_flags = flags;
2545 
2546 	/* Give me a new nat */
2547 	KMALLOC(nat, nat_t *);
2548 	if (nat == NULL) {
2549 		ifs->ifs_nat_stats.ns_memfail++;
2550 		/*
2551 		 * Try to automatically tune the max # of entries in the
2552 		 * table allowed to be less than what will cause kmem_alloc()
2553 		 * to fail and try to eliminate panics due to out of memory
2554 		 * conditions arising.
2555 		 */
2556 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2557 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2558 			printf("ipf_nattable_max reduced to %d\n",
2559 				ifs->ifs_ipf_nattable_max);
2560 		}
2561 		return NULL;
2562 	}
2563 
2564 	if (flags & IPN_TCPUDP) {
2565 		tcp = fin->fin_dp;
2566 		ni.nai_sport = htons(fin->fin_sport);
2567 		ni.nai_dport = htons(fin->fin_dport);
2568 	} else if (flags & IPN_ICMPQUERY) {
2569 		/*
2570 		 * In the ICMP query NAT code, we translate the ICMP id fields
2571 		 * to make them unique. This is indepedent of the ICMP type
2572 		 * (e.g. in the unlikely event that a host sends an echo and
2573 		 * an tstamp request with the same id, both packets will have
2574 		 * their ip address/id field changed in the same way).
2575 		 */
2576 		/* The icmp_id field is used by the sender to identify the
2577 		 * process making the icmp request. (the receiver justs
2578 		 * copies it back in its response). So, it closely matches
2579 		 * the concept of source port. We overlay sport, so we can
2580 		 * maximally reuse the existing code.
2581 		 */
2582 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2583 		ni.nai_dport = ni.nai_sport;
2584 	}
2585 
2586 	bzero((char *)nat, sizeof(*nat));
2587 	nat->nat_flags = flags;
2588 	nat->nat_redir = np->in_redir;
2589 
2590 	if ((flags & NAT_SLAVE) == 0) {
2591 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2592 	}
2593 
2594 	/*
2595 	 * Search the current table for a match.
2596 	 */
2597 	if (direction == NAT_OUTBOUND) {
2598 		/*
2599 		 * We can now arrange to call this for the same connection
2600 		 * because ipf_nat_new doesn't protect the code path into
2601 		 * this function.
2602 		 */
2603 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2604 				     fin->fin_src, fin->fin_dst);
2605 		if (natl != NULL) {
2606 			KFREE(nat);
2607 			nat = natl;
2608 			goto done;
2609 		}
2610 
2611 		move = nat_newmap(fin, nat, &ni);
2612 		if (move == -1)
2613 			goto badnat;
2614 
2615 		np = ni.nai_np;
2616 	} else {
2617 		/*
2618 		 * NAT_INBOUND is used only for redirects rules
2619 		 */
2620 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2621 				    fin->fin_src, fin->fin_dst);
2622 		if (natl != NULL) {
2623 			KFREE(nat);
2624 			nat = natl;
2625 			goto done;
2626 		}
2627 
2628 		move = nat_newrdr(fin, nat, &ni);
2629 		if (move == -1)
2630 			goto badnat;
2631 
2632 		np = ni.nai_np;
2633 	}
2634 
2635 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2636 		if (np->in_redir == NAT_REDIRECT) {
2637 			nat_delrdr(np);
2638 			nat_addrdr(np, ifs);
2639 		} else if (np->in_redir == NAT_MAP) {
2640 			nat_delnat(np);
2641 			nat_addnat(np, ifs);
2642 		}
2643 	}
2644 
2645 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2646 		goto badnat;
2647 	}
2648 
2649 	nat_calc_chksum_diffs(nat);
2650 
2651 	if (flags & SI_WILDP)
2652 		ifs->ifs_nat_stats.ns_wilds++;
2653 	goto done;
2654 badnat:
2655 	ifs->ifs_nat_stats.ns_badnat++;
2656 	if ((hm = nat->nat_hm) != NULL)
2657 		fr_hostmapdel(&hm);
2658 	KFREE(nat);
2659 	nat = NULL;
2660 done:
2661 	if ((flags & NAT_SLAVE) == 0) {
2662 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2663 	}
2664 	return nat;
2665 }
2666 
2667 
2668 /* ------------------------------------------------------------------------ */
2669 /* Function:    nat_finalise                                                */
2670 /* Returns:     int - 0 == sucess, -1 == failure                            */
2671 /* Parameters:  fin(I) - pointer to packet information                      */
2672 /*              nat(I) - pointer to NAT entry                               */
2673 /*              ni(I)  - pointer to structure with misc. information needed */
2674 /*                       to create new NAT entry.                           */
2675 /* Write Lock:  ipf_nat                                                     */
2676 /*                                                                          */
2677 /* This is the tail end of constructing a new NAT entry and is the same     */
2678 /* for both IPv4 and IPv6.                                                  */
2679 /* ------------------------------------------------------------------------ */
2680 /*ARGSUSED*/
2681 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2682 fr_info_t *fin;
2683 nat_t *nat;
2684 natinfo_t *ni;
2685 tcphdr_t *tcp;
2686 nat_t **natsave;
2687 int direction;
2688 {
2689 	frentry_t *fr;
2690 	ipnat_t *np;
2691 	ipf_stack_t *ifs = fin->fin_ifs;
2692 
2693 	np = ni->nai_np;
2694 
2695 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2696 
2697 #ifdef	IPFILTER_SYNC
2698 	if ((nat->nat_flags & SI_CLONE) == 0)
2699 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2700 #endif
2701 
2702 	nat->nat_me = natsave;
2703 	nat->nat_dir = direction;
2704 	nat->nat_ifps[0] = np->in_ifps[0];
2705 	nat->nat_ifps[1] = np->in_ifps[1];
2706 	nat->nat_ptr = np;
2707 	nat->nat_p = fin->fin_p;
2708 	nat->nat_v = fin->fin_v;
2709 	nat->nat_mssclamp = np->in_mssclamp;
2710 	fr = fin->fin_fr;
2711 	nat->nat_fr = fr;
2712 
2713 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2714 		if (appr_new(fin, nat) == -1)
2715 			return -1;
2716 
2717 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2718 		if (ifs->ifs_nat_logging)
2719 			nat_log(nat, (u_int)np->in_redir, ifs);
2720 		np->in_use++;
2721 		if (fr != NULL) {
2722 			MUTEX_ENTER(&fr->fr_lock);
2723 			fr->fr_ref++;
2724 			MUTEX_EXIT(&fr->fr_lock);
2725 		}
2726 		return 0;
2727 	}
2728 
2729 	/*
2730 	 * nat_insert failed, so cleanup time...
2731 	 */
2732 	return -1;
2733 }
2734 
2735 
2736 /* ------------------------------------------------------------------------ */
2737 /* Function:   nat_insert                                                   */
2738 /* Returns:    int - 0 == sucess, -1 == failure                             */
2739 /* Parameters: nat(I) - pointer to NAT structure                            */
2740 /*             rev(I) - flag indicating forward/reverse direction of packet */
2741 /* Write Lock: ipf_nat                                                      */
2742 /*                                                                          */
2743 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2744 /* list of active NAT entries.  Adjust global counters when complete.       */
2745 /* ------------------------------------------------------------------------ */
2746 int	nat_insert(nat, rev, ifs)
2747 nat_t	*nat;
2748 int	rev;
2749 ipf_stack_t *ifs;
2750 {
2751 	u_int hv1, hv2;
2752 	nat_t **natp;
2753 
2754 	/*
2755 	 * Try and return an error as early as possible, so calculate the hash
2756 	 * entry numbers first and then proceed.
2757 	 */
2758 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2759 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2760 				  0xffffffff);
2761 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2762 				  ifs->ifs_ipf_nattable_sz);
2763 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2764 				  0xffffffff);
2765 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2766 				  ifs->ifs_ipf_nattable_sz);
2767 	} else {
2768 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2769 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2770 				  ifs->ifs_ipf_nattable_sz);
2771 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2772 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2773 				  ifs->ifs_ipf_nattable_sz);
2774 	}
2775 
2776 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2777 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2778 		return -1;
2779 	}
2780 
2781 	nat->nat_hv[0] = hv1;
2782 	nat->nat_hv[1] = hv2;
2783 
2784 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2785 
2786 	nat->nat_rev = rev;
2787 	nat->nat_ref = 1;
2788 	nat->nat_bytes[0] = 0;
2789 	nat->nat_pkts[0] = 0;
2790 	nat->nat_bytes[1] = 0;
2791 	nat->nat_pkts[1] = 0;
2792 
2793 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2794 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2795 
2796 	if (nat->nat_ifnames[1][0] !='\0') {
2797 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2798 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2799 	} else {
2800 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2801 			       LIFNAMSIZ);
2802 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2803 		nat->nat_ifps[1] = nat->nat_ifps[0];
2804 	}
2805 
2806 	nat->nat_next = ifs->ifs_nat_instances;
2807 	nat->nat_pnext = &ifs->ifs_nat_instances;
2808 	if (ifs->ifs_nat_instances)
2809 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2810 	ifs->ifs_nat_instances = nat;
2811 
2812 	natp = &ifs->ifs_nat_table[0][hv1];
2813 	if (*natp)
2814 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2815 	nat->nat_phnext[0] = natp;
2816 	nat->nat_hnext[0] = *natp;
2817 	*natp = nat;
2818 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2819 
2820 	natp = &ifs->ifs_nat_table[1][hv2];
2821 	if (*natp)
2822 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2823 	nat->nat_phnext[1] = natp;
2824 	nat->nat_hnext[1] = *natp;
2825 	*natp = nat;
2826 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2827 
2828 	fr_setnatqueue(nat, rev, ifs);
2829 
2830 	ifs->ifs_nat_stats.ns_added++;
2831 	ifs->ifs_nat_stats.ns_inuse++;
2832 	return 0;
2833 }
2834 
2835 
2836 /* ------------------------------------------------------------------------ */
2837 /* Function:    nat_icmperrorlookup                                         */
2838 /* Returns:     nat_t* - point to matching NAT structure                    */
2839 /* Parameters:  fin(I) - pointer to packet information                      */
2840 /*              dir(I) - direction of packet (in/out)                       */
2841 /*                                                                          */
2842 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2843 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2844 /* the required length.                                                     */
2845 /* ------------------------------------------------------------------------ */
2846 nat_t *nat_icmperrorlookup(fin, dir)
2847 fr_info_t *fin;
2848 int dir;
2849 {
2850 	int flags = 0, minlen;
2851 	icmphdr_t *orgicmp;
2852 	tcphdr_t *tcp = NULL;
2853 	u_short data[2];
2854 	nat_t *nat;
2855 	ip_t *oip;
2856 	u_int p;
2857 
2858 	/*
2859 	 * Does it at least have the return (basic) IP header ?
2860 	 * Only a basic IP header (no options) should be with an ICMP error
2861 	 * header.  Also, if it's not an error type, then return.
2862 	 */
2863 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2864 		return NULL;
2865 
2866 	/*
2867 	 * Check packet size
2868 	 */
2869 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2870 	minlen = IP_HL(oip) << 2;
2871 	if ((minlen < sizeof(ip_t)) ||
2872 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2873 		return NULL;
2874 	/*
2875 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2876 	 * header claimed in the encapsulated part which is of concern.  It
2877 	 * may be too big to be in this buffer but not so big that it's
2878 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2879 	 * This is possible because we don't know how big oip_hl is when we
2880 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2881 	 * all here now.
2882 	 */
2883 #ifdef  _KERNEL
2884 	{
2885 	mb_t *m;
2886 
2887 	m = fin->fin_m;
2888 # if defined(MENTAT)
2889 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2890 		return NULL;
2891 # else
2892 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2893 	    (char *)fin->fin_ip + M_LEN(m))
2894 		return NULL;
2895 # endif
2896 	}
2897 #endif
2898 
2899 	if (fin->fin_daddr != oip->ip_src.s_addr)
2900 		return NULL;
2901 
2902 	p = oip->ip_p;
2903 	if (p == IPPROTO_TCP)
2904 		flags = IPN_TCP;
2905 	else if (p == IPPROTO_UDP)
2906 		flags = IPN_UDP;
2907 	else if (p == IPPROTO_ICMP) {
2908 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2909 
2910 		/* see if this is related to an ICMP query */
2911 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2912 			data[0] = fin->fin_data[0];
2913 			data[1] = fin->fin_data[1];
2914 			fin->fin_data[0] = 0;
2915 			fin->fin_data[1] = orgicmp->icmp_id;
2916 
2917 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2918 			/*
2919 			 * NOTE : dir refers to the direction of the original
2920 			 *        ip packet. By definition the icmp error
2921 			 *        message flows in the opposite direction.
2922 			 */
2923 			if (dir == NAT_INBOUND)
2924 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2925 						   oip->ip_src);
2926 			else
2927 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2928 						    oip->ip_src);
2929 			fin->fin_data[0] = data[0];
2930 			fin->fin_data[1] = data[1];
2931 			return nat;
2932 		}
2933 	}
2934 
2935 	if (flags & IPN_TCPUDP) {
2936 		minlen += 8;		/* + 64bits of data to get ports */
2937 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2938 			return NULL;
2939 
2940 		data[0] = fin->fin_data[0];
2941 		data[1] = fin->fin_data[1];
2942 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2943 		fin->fin_data[0] = ntohs(tcp->th_dport);
2944 		fin->fin_data[1] = ntohs(tcp->th_sport);
2945 
2946 		if (dir == NAT_INBOUND) {
2947 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2948 					   oip->ip_src);
2949 		} else {
2950 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2951 					    oip->ip_src);
2952 		}
2953 		fin->fin_data[0] = data[0];
2954 		fin->fin_data[1] = data[1];
2955 		return nat;
2956 	}
2957 	if (dir == NAT_INBOUND)
2958 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2959 	else
2960 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2961 }
2962 
2963 
2964 /* ------------------------------------------------------------------------ */
2965 /* Function:    nat_icmperror                                               */
2966 /* Returns:     nat_t* - point to matching NAT structure                    */
2967 /* Parameters:  fin(I)    - pointer to packet information                   */
2968 /*              nflags(I) - NAT flags for this packet                       */
2969 /*              dir(I)    - direction of packet (in/out)                    */
2970 /*                                                                          */
2971 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2972 /* session.  This will correct both packet header data and checksums.       */
2973 /*                                                                          */
2974 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2975 /* a NAT'd ICMP packet gets correctly recognised.                           */
2976 /* ------------------------------------------------------------------------ */
2977 nat_t *nat_icmperror(fin, nflags, dir)
2978 fr_info_t *fin;
2979 u_int *nflags;
2980 int dir;
2981 {
2982 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2983 	struct in_addr in;
2984 	icmphdr_t *icmp, *orgicmp;
2985 	int dlen;
2986 	udphdr_t *udp;
2987 	tcphdr_t *tcp;
2988 	nat_t *nat;
2989 	ip_t *oip;
2990 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2991 		return NULL;
2992 
2993 	/*
2994 	 * nat_icmperrorlookup() looks up nat entry associated with the
2995 	 * offending IP packet and returns pointer to the entry, or NULL
2996 	 * if packet wasn't natted or for `defective' packets.
2997 	 */
2998 
2999 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3000 		return NULL;
3001 
3002 	sumd2 = 0;
3003 	*nflags = IPN_ICMPERR;
3004 	icmp = fin->fin_dp;
3005 	oip = (ip_t *)&icmp->icmp_ip;
3006 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3007 	tcp = (tcphdr_t *)udp;
3008 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3009 
3010 	/*
3011 	 * Need to adjust ICMP header to include the real IP#'s and
3012 	 * port #'s.  There are three steps required.
3013 	 *
3014 	 * Step 1
3015 	 * Fix the IP addresses in the offending IP packet and update
3016 	 * ip header checksum to compensate for the change.
3017 	 *
3018 	 * No update needed here for icmp_cksum because the ICMP checksum
3019 	 * is calculated over the complete ICMP packet, which includes the
3020 	 * changed oip IP addresses and oip->ip_sum.  These two changes
3021 	 * cancel each other out (if the delta for the IP address is x,
3022 	 * then the delta for ip_sum is minus x).
3023 	 */
3024 
3025 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3026 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3027 		in = nat->nat_inip;
3028 		oip->ip_src = in;
3029 	} else {
3030 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3031 		in = nat->nat_outip;
3032 		oip->ip_dst = in;
3033 	}
3034 
3035 	sum2 = LONG_SUM(ntohl(in.s_addr));
3036 	CALC_SUMD(sum1, sum2, sumd);
3037 	fix_datacksum(&oip->ip_sum, sumd);
3038 
3039 	/*
3040 	 * Step 2
3041 	 * Perform other adjustments based on protocol of offending packet.
3042 	 */
3043 
3044 	switch (oip->ip_p) {
3045 		case IPPROTO_TCP :
3046 		case IPPROTO_UDP :
3047 
3048 			/*
3049 			* For offending TCP/UDP IP packets, translate the ports
3050 			* based on the NAT specification.
3051 			*
3052 			* Advance notice : Now it becomes complicated :-)
3053 			*
3054 			* Since the port and IP addresse fields are both part
3055 			* of the TCP/UDP checksum of the offending IP packet,
3056 			* we need to adjust that checksum as well.
3057 			*
3058 			* To further complicate things, the TCP/UDP checksum
3059 			* may not be present.  We must check to see if the
3060 			* length of the data portion is big enough to hold
3061 			* the checksum.  In the UDP case, a test to determine
3062 			* if the checksum is even set is also required.
3063 			*
3064 			* Any changes to an IP address, port or checksum within
3065 			* the ICMP packet requires a change to icmp_cksum.
3066 			*
3067 			* Be extremely careful here ... The change is dependent
3068 			* upon whether or not the TCP/UPD checksum is present.
3069 			*
3070 			* If TCP/UPD checksum is present, the icmp_cksum must
3071 			* compensate for checksum modification resulting from
3072 			* IP address change only.  Port change and resulting
3073 			* data checksum adjustments cancel each other out.
3074 			*
3075 			* If TCP/UDP checksum is not present, icmp_cksum must
3076 			* compensate for port change only.  The IP address
3077 			* change does not modify anything else in this case.
3078 			*/
3079 
3080 			psum1 = 0;
3081 			psum2 = 0;
3082 			psumd = 0;
3083 
3084 			if ((tcp->th_dport == nat->nat_oport) &&
3085 			    (tcp->th_sport != nat->nat_inport)) {
3086 
3087 				/*
3088 				 * Translate the source port.
3089 				 */
3090 
3091 				psum1 = ntohs(tcp->th_sport);
3092 				psum2 = ntohs(nat->nat_inport);
3093 				tcp->th_sport = nat->nat_inport;
3094 
3095 			} else if ((tcp->th_sport == nat->nat_oport) &&
3096 				    (tcp->th_dport != nat->nat_outport)) {
3097 
3098 				/*
3099 				 * Translate the destination port.
3100 				 */
3101 
3102 				psum1 = ntohs(tcp->th_dport);
3103 				psum2 = ntohs(nat->nat_outport);
3104 				tcp->th_dport = nat->nat_outport;
3105 			}
3106 
3107 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3108 
3109 				/*
3110 				 * TCP checksum present.
3111 				 *
3112 				 * Adjust data checksum and icmp checksum to
3113 				 * compensate for any IP address change.
3114 				 */
3115 
3116 				sum1 = ntohs(tcp->th_sum);
3117 				fix_datacksum(&tcp->th_sum, sumd);
3118 				sum2 = ntohs(tcp->th_sum);
3119 				sumd2 = sumd << 1;
3120 				CALC_SUMD(sum1, sum2, sumd);
3121 				sumd2 += sumd;
3122 
3123 				/*
3124 				 * Also make data checksum adjustment to
3125 				 * compensate for any port change.
3126 				 */
3127 
3128 				if (psum1 != psum2) {
3129 					CALC_SUMD(psum1, psum2, psumd);
3130 					fix_datacksum(&tcp->th_sum, psumd);
3131 				}
3132 
3133 			} else if ((oip->ip_p == IPPROTO_UDP) &&
3134 				   (dlen >= 8) && (udp->uh_sum != 0)) {
3135 
3136 				/*
3137 				 * The UDP checksum is present and set.
3138 				 *
3139 				 * Adjust data checksum and icmp checksum to
3140 				 * compensate for any IP address change.
3141 				 */
3142 
3143 				sum1 = ntohs(udp->uh_sum);
3144 				fix_datacksum(&udp->uh_sum, sumd);
3145 				sum2 = ntohs(udp->uh_sum);
3146 				sumd2 = sumd << 1;
3147 				CALC_SUMD(sum1, sum2, sumd);
3148 				sumd2 += sumd;
3149 
3150 				/*
3151 				 * Also make data checksum adjustment to
3152 				 * compensate for any port change.
3153 				 */
3154 
3155 				if (psum1 != psum2) {
3156 					CALC_SUMD(psum1, psum2, psumd);
3157 					fix_datacksum(&udp->uh_sum, psumd);
3158 				}
3159 
3160 			} else {
3161 
3162 				/*
3163 				 * Data checksum was not present.
3164 				 *
3165 				 * Compensate for any port change.
3166 				 */
3167 
3168 				CALC_SUMD(psum2, psum1, psumd);
3169 				sumd2 += psumd;
3170 			}
3171 			break;
3172 
3173 		case IPPROTO_ICMP :
3174 
3175 			orgicmp = (icmphdr_t *)udp;
3176 
3177 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3178 			    (orgicmp->icmp_id != nat->nat_inport) &&
3179 			    (dlen >= 8)) {
3180 
3181 				/*
3182 				 * Fix ICMP checksum (of the offening ICMP
3183 				 * query packet) to compensate the change
3184 				 * in the ICMP id of the offending ICMP
3185 				 * packet.
3186 				 *
3187 				 * Since you modify orgicmp->icmp_id with
3188 				 * a delta (say x) and you compensate that
3189 				 * in origicmp->icmp_cksum with a delta
3190 				 * minus x, you don't have to adjust the
3191 				 * overall icmp->icmp_cksum
3192 				 */
3193 
3194 				sum1 = ntohs(orgicmp->icmp_id);
3195 				sum2 = ntohs(nat->nat_inport);
3196 				CALC_SUMD(sum1, sum2, sumd);
3197 				orgicmp->icmp_id = nat->nat_inport;
3198 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3199 
3200 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3201 
3202 			break;
3203 
3204 		default :
3205 
3206 			break;
3207 
3208 	} /* switch (oip->ip_p) */
3209 
3210 	/*
3211 	 * Step 3
3212 	 * Make the adjustments to icmp checksum.
3213 	 */
3214 
3215 	if (sumd2 != 0) {
3216 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3217 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3218 		fix_incksum(&icmp->icmp_cksum, sumd2);
3219 	}
3220 	return nat;
3221 }
3222 
3223 
3224 /*
3225  * NB: these lookups don't lock access to the list, it assumed that it has
3226  * already been done!
3227  */
3228 
3229 /* ------------------------------------------------------------------------ */
3230 /* Function:    nat_inlookup                                                */
3231 /* Returns:     nat_t* - NULL == no match,                                  */
3232 /*                       else pointer to matching NAT entry                 */
3233 /* Parameters:  fin(I)    - pointer to packet information                   */
3234 /*              flags(I)  - NAT flags for this packet                       */
3235 /*              p(I)      - protocol for this packet                        */
3236 /*              src(I)    - source IP address                               */
3237 /*              mapdst(I) - destination IP address                          */
3238 /*                                                                          */
3239 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3240 /* real source address/port.  We use this lookup when receiving a packet,   */
3241 /* we're looking for a table entry, based on the destination address.       */
3242 /*                                                                          */
3243 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3244 /*                                                                          */
3245 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3246 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3247 /*                                                                          */
3248 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3249 /*            the packet is of said protocol                                */
3250 /* ------------------------------------------------------------------------ */
3251 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3252 fr_info_t *fin;
3253 u_int flags, p;
3254 struct in_addr src , mapdst;
3255 {
3256 	u_short sport, dport;
3257 	ipnat_t *ipn;
3258 	u_int sflags;
3259 	nat_t *nat;
3260 	int nflags;
3261 	u_32_t dst;
3262 	void *ifp;
3263 	u_int hv;
3264 	ipf_stack_t *ifs = fin->fin_ifs;
3265 
3266 	if (fin != NULL)
3267 		ifp = fin->fin_ifp;
3268 	else
3269 		ifp = NULL;
3270 	sport = 0;
3271 	dport = 0;
3272 	dst = mapdst.s_addr;
3273 	sflags = flags & NAT_TCPUDPICMP;
3274 
3275 	switch (p)
3276 	{
3277 	case IPPROTO_TCP :
3278 	case IPPROTO_UDP :
3279 		sport = htons(fin->fin_data[0]);
3280 		dport = htons(fin->fin_data[1]);
3281 		break;
3282 	case IPPROTO_ICMP :
3283 		if (flags & IPN_ICMPERR)
3284 			sport = fin->fin_data[1];
3285 		else
3286 			dport = fin->fin_data[1];
3287 		break;
3288 	default :
3289 		break;
3290 	}
3291 
3292 
3293 	if ((flags & SI_WILDP) != 0)
3294 		goto find_in_wild_ports;
3295 
3296 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3297 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3298 	nat = ifs->ifs_nat_table[1][hv];
3299 	for (; nat; nat = nat->nat_hnext[1]) {
3300 		if (nat->nat_v != 4)
3301 			continue;
3302 
3303 		if (nat->nat_ifps[0] != NULL) {
3304 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3305 				continue;
3306 		} else if (ifp != NULL)
3307 			nat->nat_ifps[0] = ifp;
3308 
3309 		nflags = nat->nat_flags;
3310 
3311 		if (nat->nat_oip.s_addr == src.s_addr &&
3312 		    nat->nat_outip.s_addr == dst &&
3313 		    (((p == 0) &&
3314 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3315 		     || (p == nat->nat_p))) {
3316 			switch (p)
3317 			{
3318 #if 0
3319 			case IPPROTO_GRE :
3320 				if (nat->nat_call[1] != fin->fin_data[0])
3321 					continue;
3322 				break;
3323 #endif
3324 			case IPPROTO_ICMP :
3325 				if ((flags & IPN_ICMPERR) != 0) {
3326 					if (nat->nat_outport != sport)
3327 						continue;
3328 				} else {
3329 					if (nat->nat_outport != dport)
3330 						continue;
3331 				}
3332 				break;
3333 			case IPPROTO_TCP :
3334 			case IPPROTO_UDP :
3335 				if (nat->nat_oport != sport)
3336 					continue;
3337 				if (nat->nat_outport != dport)
3338 					continue;
3339 				break;
3340 			default :
3341 				break;
3342 			}
3343 
3344 			ipn = nat->nat_ptr;
3345 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3346 				if (appr_match(fin, nat) != 0)
3347 					continue;
3348 			return nat;
3349 		}
3350 	}
3351 
3352 	/*
3353 	 * So if we didn't find it but there are wildcard members in the hash
3354 	 * table, go back and look for them.  We do this search and update here
3355 	 * because it is modifying the NAT table and we want to do this only
3356 	 * for the first packet that matches.  The exception, of course, is
3357 	 * for "dummy" (FI_IGNORE) lookups.
3358 	 */
3359 find_in_wild_ports:
3360 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3361 		return NULL;
3362 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3363 		return NULL;
3364 
3365 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3366 
3367 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3368 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3369 
3370 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3371 
3372 	nat = ifs->ifs_nat_table[1][hv];
3373 	for (; nat; nat = nat->nat_hnext[1]) {
3374 		if (nat->nat_v != 4)
3375 			continue;
3376 
3377 		if (nat->nat_ifps[0] != NULL) {
3378 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3379 				continue;
3380 		} else if (ifp != NULL)
3381 			nat->nat_ifps[0] = ifp;
3382 
3383 		if (nat->nat_p != fin->fin_p)
3384 			continue;
3385 		if (nat->nat_oip.s_addr != src.s_addr ||
3386 		    nat->nat_outip.s_addr != dst)
3387 			continue;
3388 
3389 		nflags = nat->nat_flags;
3390 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3391 			continue;
3392 
3393 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3394 			       NAT_INBOUND) == 1) {
3395 			if ((fin->fin_flx & FI_IGNORE) != 0)
3396 				break;
3397 			if ((nflags & SI_CLONE) != 0) {
3398 				nat = fr_natclone(fin, nat);
3399 				if (nat == NULL)
3400 					break;
3401 			} else {
3402 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3403 				ifs->ifs_nat_stats.ns_wilds--;
3404 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3405 			}
3406 			nat->nat_oport = sport;
3407 			nat->nat_outport = dport;
3408 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3409 			nat_tabmove(nat, ifs);
3410 			break;
3411 		}
3412 	}
3413 
3414 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3415 
3416 	return nat;
3417 }
3418 
3419 
3420 /* ------------------------------------------------------------------------ */
3421 /* Function:    nat_tabmove                                                 */
3422 /* Returns:     Nil                                                         */
3423 /* Parameters:  nat(I) - pointer to NAT structure                           */
3424 /* Write Lock:  ipf_nat                                                     */
3425 /*                                                                          */
3426 /* This function is only called for TCP/UDP NAT table entries where the     */
3427 /* original was placed in the table without hashing on the ports and we now */
3428 /* want to include hashing on port numbers.                                 */
3429 /* ------------------------------------------------------------------------ */
3430 static void nat_tabmove(nat, ifs)
3431 nat_t *nat;
3432 ipf_stack_t *ifs;
3433 {
3434 	nat_t **natp;
3435 	u_int hv;
3436 
3437 	if (nat->nat_flags & SI_CLONE)
3438 		return;
3439 
3440 	/*
3441 	 * Remove the NAT entry from the old location
3442 	 */
3443 	if (nat->nat_hnext[0])
3444 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3445 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3446 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3447 
3448 	if (nat->nat_hnext[1])
3449 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3450 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3451 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3452 
3453 	/*
3454 	 * Add into the NAT table in the new position
3455 	 */
3456 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3457 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3458 			 ifs->ifs_ipf_nattable_sz);
3459 	nat->nat_hv[0] = hv;
3460 	natp = &ifs->ifs_nat_table[0][hv];
3461 	if (*natp)
3462 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3463 	nat->nat_phnext[0] = natp;
3464 	nat->nat_hnext[0] = *natp;
3465 	*natp = nat;
3466 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3467 
3468 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3469 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3470 			 ifs->ifs_ipf_nattable_sz);
3471 	nat->nat_hv[1] = hv;
3472 	natp = &ifs->ifs_nat_table[1][hv];
3473 	if (*natp)
3474 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3475 	nat->nat_phnext[1] = natp;
3476 	nat->nat_hnext[1] = *natp;
3477 	*natp = nat;
3478 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3479 }
3480 
3481 
3482 /* ------------------------------------------------------------------------ */
3483 /* Function:    nat_outlookup                                               */
3484 /* Returns:     nat_t* - NULL == no match,                                  */
3485 /*                       else pointer to matching NAT entry                 */
3486 /* Parameters:  fin(I)   - pointer to packet information                    */
3487 /*              flags(I) - NAT flags for this packet                        */
3488 /*              p(I)     - protocol for this packet                         */
3489 /*              src(I)   - source IP address                                */
3490 /*              dst(I)   - destination IP address                           */
3491 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3492 /*                                                                          */
3493 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3494 /* destination address/port.  We use this lookup when sending a packet out, */
3495 /* we're looking for a table entry, based on the source address.            */
3496 /*                                                                          */
3497 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3498 /*                                                                          */
3499 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3500 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3501 /*                                                                          */
3502 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3503 /*            the packet is of said protocol                                */
3504 /* ------------------------------------------------------------------------ */
3505 nat_t *nat_outlookup(fin, flags, p, src, dst)
3506 fr_info_t *fin;
3507 u_int flags, p;
3508 struct in_addr src , dst;
3509 {
3510 	u_short sport, dport;
3511 	u_int sflags;
3512 	ipnat_t *ipn;
3513 	u_32_t srcip;
3514 	nat_t *nat;
3515 	int nflags;
3516 	void *ifp;
3517 	u_int hv;
3518 	ipf_stack_t *ifs = fin->fin_ifs;
3519 
3520 	ifp = fin->fin_ifp;
3521 
3522 	srcip = src.s_addr;
3523 	sflags = flags & IPN_TCPUDPICMP;
3524 	sport = 0;
3525 	dport = 0;
3526 
3527 	switch (p)
3528 	{
3529 	case IPPROTO_TCP :
3530 	case IPPROTO_UDP :
3531 		sport = htons(fin->fin_data[0]);
3532 		dport = htons(fin->fin_data[1]);
3533 		break;
3534 	case IPPROTO_ICMP :
3535 		if (flags & IPN_ICMPERR)
3536 			sport = fin->fin_data[1];
3537 		else
3538 			dport = fin->fin_data[1];
3539 		break;
3540 	default :
3541 		break;
3542 	}
3543 
3544 	if ((flags & SI_WILDP) != 0)
3545 		goto find_out_wild_ports;
3546 
3547 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3548 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3549 	nat = ifs->ifs_nat_table[0][hv];
3550 	for (; nat; nat = nat->nat_hnext[0]) {
3551 		if (nat->nat_v != 4)
3552 			continue;
3553 
3554 		if (nat->nat_ifps[1] != NULL) {
3555 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3556 				continue;
3557 		} else if (ifp != NULL)
3558 			nat->nat_ifps[1] = ifp;
3559 
3560 		nflags = nat->nat_flags;
3561 
3562 		if (nat->nat_inip.s_addr == srcip &&
3563 		    nat->nat_oip.s_addr == dst.s_addr &&
3564 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3565 		     || (p == nat->nat_p))) {
3566 			switch (p)
3567 			{
3568 #if 0
3569 			case IPPROTO_GRE :
3570 				if (nat->nat_call[1] != fin->fin_data[0])
3571 					continue;
3572 				break;
3573 #endif
3574 			case IPPROTO_TCP :
3575 			case IPPROTO_UDP :
3576 				if (nat->nat_oport != dport)
3577 					continue;
3578 				if (nat->nat_inport != sport)
3579 					continue;
3580 				break;
3581 			default :
3582 				break;
3583 			}
3584 
3585 			ipn = nat->nat_ptr;
3586 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3587 				if (appr_match(fin, nat) != 0)
3588 					continue;
3589 			return nat;
3590 		}
3591 	}
3592 
3593 	/*
3594 	 * So if we didn't find it but there are wildcard members in the hash
3595 	 * table, go back and look for them.  We do this search and update here
3596 	 * because it is modifying the NAT table and we want to do this only
3597 	 * for the first packet that matches.  The exception, of course, is
3598 	 * for "dummy" (FI_IGNORE) lookups.
3599 	 */
3600 find_out_wild_ports:
3601 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3602 		return NULL;
3603 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3604 		return NULL;
3605 
3606 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3607 
3608 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3609 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3610 
3611 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3612 
3613 	nat = ifs->ifs_nat_table[0][hv];
3614 	for (; nat; nat = nat->nat_hnext[0]) {
3615 		if (nat->nat_v != 4)
3616 			continue;
3617 
3618 		if (nat->nat_ifps[1] != NULL) {
3619 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3620 				continue;
3621 		} else if (ifp != NULL)
3622 			nat->nat_ifps[1] = ifp;
3623 
3624 		if (nat->nat_p != fin->fin_p)
3625 			continue;
3626 		if ((nat->nat_inip.s_addr != srcip) ||
3627 		    (nat->nat_oip.s_addr != dst.s_addr))
3628 			continue;
3629 
3630 		nflags = nat->nat_flags;
3631 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3632 			continue;
3633 
3634 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3635 			       NAT_OUTBOUND) == 1) {
3636 			if ((fin->fin_flx & FI_IGNORE) != 0)
3637 				break;
3638 			if ((nflags & SI_CLONE) != 0) {
3639 				nat = fr_natclone(fin, nat);
3640 				if (nat == NULL)
3641 					break;
3642 			} else {
3643 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3644 				ifs->ifs_nat_stats.ns_wilds--;
3645 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3646 			}
3647 			nat->nat_inport = sport;
3648 			nat->nat_oport = dport;
3649 			if (nat->nat_outport == 0)
3650 				nat->nat_outport = sport;
3651 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3652 			nat_tabmove(nat, ifs);
3653 			break;
3654 		}
3655 	}
3656 
3657 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3658 
3659 	return nat;
3660 }
3661 
3662 
3663 /* ------------------------------------------------------------------------ */
3664 /* Function:    nat_lookupredir                                             */
3665 /* Returns:     nat_t* - NULL == no match,                                  */
3666 /*                       else pointer to matching NAT entry                 */
3667 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3668 /*                      entry for.                                          */
3669 /*                                                                          */
3670 /* Lookup the NAT tables to search for a matching redirect                  */
3671 /* ------------------------------------------------------------------------ */
3672 nat_t *nat_lookupredir(np, ifs)
3673 natlookup_t *np;
3674 ipf_stack_t *ifs;
3675 {
3676 	fr_info_t fi;
3677 	nat_t *nat;
3678 
3679 	bzero((char *)&fi, sizeof(fi));
3680 	if (np->nl_flags & IPN_IN) {
3681 		fi.fin_data[0] = ntohs(np->nl_realport);
3682 		fi.fin_data[1] = ntohs(np->nl_outport);
3683 	} else {
3684 		fi.fin_data[0] = ntohs(np->nl_inport);
3685 		fi.fin_data[1] = ntohs(np->nl_outport);
3686 	}
3687 	if (np->nl_flags & IPN_TCP)
3688 		fi.fin_p = IPPROTO_TCP;
3689 	else if (np->nl_flags & IPN_UDP)
3690 		fi.fin_p = IPPROTO_UDP;
3691 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3692 		fi.fin_p = IPPROTO_ICMP;
3693 
3694 	fi.fin_ifs = ifs;
3695 	/*
3696 	 * We can do two sorts of lookups:
3697 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3698 	 * - default: we have the `in' and `out' address, look for `real'.
3699 	 */
3700 	if (np->nl_flags & IPN_IN) {
3701 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3702 					np->nl_realip, np->nl_outip))) {
3703 			np->nl_inip = nat->nat_inip;
3704 			np->nl_inport = nat->nat_inport;
3705 		}
3706 	} else {
3707 		/*
3708 		 * If nl_inip is non null, this is a lookup based on the real
3709 		 * ip address. Else, we use the fake.
3710 		 */
3711 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3712 					 np->nl_inip, np->nl_outip))) {
3713 
3714 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3715 				fr_info_t fin;
3716 				bzero((char *)&fin, sizeof(fin));
3717 				fin.fin_p = nat->nat_p;
3718 				fin.fin_data[0] = ntohs(nat->nat_outport);
3719 				fin.fin_data[1] = ntohs(nat->nat_oport);
3720 				fin.fin_ifs = ifs;
3721 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3722 						 nat->nat_outip,
3723 						 nat->nat_oip) != NULL) {
3724 					np->nl_flags &= ~IPN_FINDFORWARD;
3725 				}
3726 			}
3727 
3728 			np->nl_realip = nat->nat_outip;
3729 			np->nl_realport = nat->nat_outport;
3730 		}
3731  	}
3732 
3733 	return nat;
3734 }
3735 
3736 
3737 /* ------------------------------------------------------------------------ */
3738 /* Function:    nat_match                                                   */
3739 /* Returns:     int - 0 == no match, 1 == match                             */
3740 /* Parameters:  fin(I)   - pointer to packet information                    */
3741 /*              np(I)    - pointer to NAT rule                              */
3742 /*                                                                          */
3743 /* Pull the matching of a packet against a NAT rule out of that complex     */
3744 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3745 /* ------------------------------------------------------------------------ */
3746 static int nat_match(fin, np)
3747 fr_info_t *fin;
3748 ipnat_t *np;
3749 {
3750 	frtuc_t *ft;
3751 
3752 	if (fin->fin_v != 4)
3753 		return 0;
3754 
3755 	if (np->in_p && fin->fin_p != np->in_p)
3756 		return 0;
3757 
3758 	if (fin->fin_out) {
3759 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3760 			return 0;
3761 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3762 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3763 			return 0;
3764 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3765 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3766 			return 0;
3767 	} else {
3768 		if (!(np->in_redir & NAT_REDIRECT))
3769 			return 0;
3770 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3771 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3772 			return 0;
3773 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3774 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3775 			return 0;
3776 	}
3777 
3778 	ft = &np->in_tuc;
3779 	if (!(fin->fin_flx & FI_TCPUDP) ||
3780 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3781 		if (ft->ftu_scmp || ft->ftu_dcmp)
3782 			return 0;
3783 		return 1;
3784 	}
3785 
3786 	return fr_tcpudpchk(fin, ft);
3787 }
3788 
3789 
3790 /* ------------------------------------------------------------------------ */
3791 /* Function:    nat_update                                                  */
3792 /* Returns:     Nil                                                         */
3793 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3794 /*              np(I)     - pointer to NAT rule                             */
3795 /*                                                                          */
3796 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3797 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3798 /* ------------------------------------------------------------------------ */
3799 void nat_update(fin, nat, np)
3800 fr_info_t *fin;
3801 nat_t *nat;
3802 ipnat_t *np;
3803 {
3804 	ipftq_t *ifq, *ifq2;
3805 	ipftqent_t *tqe;
3806 	ipf_stack_t *ifs = fin->fin_ifs;
3807 
3808 	MUTEX_ENTER(&nat->nat_lock);
3809 	tqe = &nat->nat_tqe;
3810 	ifq = tqe->tqe_ifq;
3811 
3812 	/*
3813 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3814 	 * TCP, however, if it is TCP and there is no rule timeout set,
3815 	 * then do not update the timeout here.
3816 	 */
3817 	if (np != NULL)
3818 		ifq2 = np->in_tqehead[fin->fin_rev];
3819 	else
3820 		ifq2 = NULL;
3821 
3822 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3823 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3824 	} else {
3825 		if (ifq2 == NULL) {
3826 			if (nat->nat_p == IPPROTO_UDP)
3827 				ifq2 = &ifs->ifs_nat_udptq;
3828 			else if (nat->nat_p == IPPROTO_ICMP)
3829 				ifq2 = &ifs->ifs_nat_icmptq;
3830 			else
3831 				ifq2 = &ifs->ifs_nat_iptq;
3832 		}
3833 
3834 		fr_movequeue(tqe, ifq, ifq2, ifs);
3835 	}
3836 	MUTEX_EXIT(&nat->nat_lock);
3837 }
3838 
3839 
3840 /* ------------------------------------------------------------------------ */
3841 /* Function:    fr_checknatout                                              */
3842 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3843 /*                     0 == no packet translation occurred,                 */
3844 /*                     1 == packet was successfully translated.             */
3845 /* Parameters:  fin(I)   - pointer to packet information                    */
3846 /*              passp(I) - pointer to filtering result flags                */
3847 /*                                                                          */
3848 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3849 /* first checked to see if they match an existing entry (if an error),      */
3850 /* otherwise a search of the current NAT table is made.  If neither results */
3851 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3852 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3853 /* packet header(s) as required.                                            */
3854 /* ------------------------------------------------------------------------ */
3855 int fr_checknatout(fin, passp)
3856 fr_info_t *fin;
3857 u_32_t *passp;
3858 {
3859 	ipnat_t *np = NULL, *npnext;
3860 	struct ifnet *ifp, *sifp;
3861 	icmphdr_t *icmp = NULL;
3862 	tcphdr_t *tcp = NULL;
3863 	int rval, natfailed;
3864 	u_int nflags = 0;
3865 	u_32_t ipa, iph;
3866 	int natadd = 1;
3867 	frentry_t *fr;
3868 	nat_t *nat;
3869 	ipf_stack_t *ifs = fin->fin_ifs;
3870 
3871 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3872 		return 0;
3873 
3874 	natfailed = 0;
3875 	fr = fin->fin_fr;
3876 	sifp = fin->fin_ifp;
3877 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3878 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3879 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3880 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3881 	ifp = fin->fin_ifp;
3882 
3883 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3884 		switch (fin->fin_p)
3885 		{
3886 		case IPPROTO_TCP :
3887 			nflags = IPN_TCP;
3888 			break;
3889 		case IPPROTO_UDP :
3890 			nflags = IPN_UDP;
3891 			break;
3892 		case IPPROTO_ICMP :
3893 			icmp = fin->fin_dp;
3894 
3895 			/*
3896 			 * This is an incoming packet, so the destination is
3897 			 * the icmp_id and the source port equals 0
3898 			 */
3899 			if (nat_icmpquerytype4(icmp->icmp_type))
3900 				nflags = IPN_ICMPQUERY;
3901 			break;
3902 		default :
3903 			break;
3904 		}
3905 
3906 		if ((nflags & IPN_TCPUDP))
3907 			tcp = fin->fin_dp;
3908 	}
3909 
3910 	ipa = fin->fin_saddr;
3911 
3912 	READ_ENTER(&ifs->ifs_ipf_nat);
3913 
3914 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3915 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3916 		/*EMPTY*/;
3917 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3918 		natadd = 0;
3919 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3920 				      fin->fin_src, fin->fin_dst))) {
3921 		nflags = nat->nat_flags;
3922 	} else {
3923 		u_32_t hv, msk, nmsk;
3924 
3925 		/*
3926 		 * If there is no current entry in the nat table for this IP#,
3927 		 * create one for it (if there is a matching rule).
3928 		 */
3929 		msk = 0xffffffff;
3930 		nmsk = ifs->ifs_nat_masks;
3931 maskloop:
3932 		iph = ipa & htonl(msk);
3933 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3934 		for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
3935 			npnext = np->in_mnext;
3936 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3937 				continue;
3938 			if (np->in_v != fin->fin_v)
3939 				continue;
3940 			if (np->in_p && (np->in_p != fin->fin_p))
3941 				continue;
3942 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3943 				continue;
3944 			if (np->in_flags & IPN_FILTER) {
3945 				if (!nat_match(fin, np))
3946 					continue;
3947 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3948 				continue;
3949 
3950 			if ((fr != NULL) &&
3951 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3952 				continue;
3953 
3954 			if (*np->in_plabel != '\0') {
3955 				if (((np->in_flags & IPN_FILTER) == 0) &&
3956 				    (np->in_dport != tcp->th_dport))
3957 					continue;
3958 				if (appr_ok(fin, tcp, np) == 0)
3959 					continue;
3960 			}
3961 
3962 			ATOMIC_INC32(np->in_use);
3963 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3964 			WRITE_ENTER(&ifs->ifs_ipf_nat);
3965 			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3966 			if (nat != NULL) {
3967 				np->in_use--;
3968 				np->in_hits++;
3969 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3970 				break;
3971 			}
3972 			natfailed = -1;
3973 			npnext = np->in_mnext;
3974 			fr_ipnatderef(&np, ifs);
3975 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3976 		}
3977 		if ((np == NULL) && (nmsk != 0)) {
3978 			while (nmsk) {
3979 				msk <<= 1;
3980 				if (nmsk & 0x80000000)
3981 					break;
3982 				nmsk <<= 1;
3983 			}
3984 			if (nmsk != 0) {
3985 				nmsk <<= 1;
3986 				goto maskloop;
3987 			}
3988 		}
3989 	}
3990 
3991 	if (nat != NULL) {
3992 		rval = fr_natout(fin, nat, natadd, nflags);
3993 		if (rval == 1) {
3994 			MUTEX_ENTER(&nat->nat_lock);
3995 			nat->nat_ref++;
3996 			MUTEX_EXIT(&nat->nat_lock);
3997 			nat->nat_touched = ifs->ifs_fr_ticks;
3998 			fin->fin_nat = nat;
3999 		}
4000 	} else
4001 		rval = natfailed;
4002 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4003 
4004 	if (rval == -1) {
4005 		if (passp != NULL)
4006 			*passp = FR_BLOCK;
4007 		fin->fin_flx |= FI_BADNAT;
4008 	}
4009 	fin->fin_ifp = sifp;
4010 	return rval;
4011 }
4012 
4013 /* ------------------------------------------------------------------------ */
4014 /* Function:    fr_natout                                                   */
4015 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4016 /*                     1 == packet was successfully translated.             */
4017 /* Parameters:  fin(I)    - pointer to packet information                   */
4018 /*              nat(I)    - pointer to NAT structure                        */
4019 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4020 /*              nflags(I) - NAT flags set for this packet                   */
4021 /*                                                                          */
4022 /* Translate a packet coming "out" on an interface.                         */
4023 /* ------------------------------------------------------------------------ */
4024 int fr_natout(fin, nat, natadd, nflags)
4025 fr_info_t *fin;
4026 nat_t *nat;
4027 int natadd;
4028 u_32_t nflags;
4029 {
4030 	icmphdr_t *icmp;
4031 	u_short *csump;
4032 	u_32_t sumd;
4033 	tcphdr_t *tcp;
4034 	ipnat_t *np;
4035 	int i;
4036 	ipf_stack_t *ifs = fin->fin_ifs;
4037 
4038 	if (fin->fin_v == 6) {
4039 #ifdef	USE_INET6
4040 		return fr_nat6out(fin, nat, natadd, nflags);
4041 #else
4042 		return NULL;
4043 #endif
4044 	}
4045 
4046 #if SOLARIS && defined(_KERNEL)
4047 	net_data_t net_data_p = ifs->ifs_ipf_ipv4;
4048 #endif
4049 
4050 	tcp = NULL;
4051 	icmp = NULL;
4052 	csump = NULL;
4053 	np = nat->nat_ptr;
4054 
4055 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4056 		(void) fr_nat_newfrag(fin, 0, nat);
4057 
4058 	MUTEX_ENTER(&nat->nat_lock);
4059 	nat->nat_bytes[1] += fin->fin_plen;
4060 	nat->nat_pkts[1]++;
4061 	MUTEX_EXIT(&nat->nat_lock);
4062 
4063 	/*
4064 	 * Fix up checksums, not by recalculating them, but
4065 	 * simply computing adjustments.
4066 	 * This is only done for STREAMS based IP implementations where the
4067 	 * checksum has already been calculated by IP.  In all other cases,
4068 	 * IPFilter is called before the checksum needs calculating so there
4069 	 * is no call to modify whatever is in the header now.
4070 	 */
4071 	ASSERT(fin->fin_m != NULL);
4072 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4073 		if (nflags == IPN_ICMPERR) {
4074 			u_32_t s1, s2;
4075 
4076 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
4077 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4078 			CALC_SUMD(s1, s2, sumd);
4079 
4080 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4081 		}
4082 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4083     defined(linux) || defined(BRIDGE_IPF)
4084 		else {
4085 			/*
4086 			 * Strictly speaking, this isn't necessary on BSD
4087 			 * kernels because they do checksum calculation after
4088 			 * this code has run BUT if ipfilter is being used
4089 			 * to do NAT as a bridge, that code doesn't exist.
4090 			 */
4091 			if (nat->nat_dir == NAT_OUTBOUND)
4092 				fix_outcksum(&fin->fin_ip->ip_sum,
4093 					    nat->nat_ipsumd);
4094 			else
4095 				fix_incksum(&fin->fin_ip->ip_sum,
4096 				 	   nat->nat_ipsumd);
4097 		}
4098 #endif
4099 	}
4100 
4101 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4102 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4103 			tcp = fin->fin_dp;
4104 
4105 			tcp->th_sport = nat->nat_outport;
4106 			fin->fin_data[0] = ntohs(nat->nat_outport);
4107 		}
4108 
4109 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4110 			icmp = fin->fin_dp;
4111 			icmp->icmp_id = nat->nat_outport;
4112 		}
4113 
4114 		csump = nat_proto(fin, nat, nflags);
4115 	}
4116 
4117 	fin->fin_ip->ip_src = nat->nat_outip;
4118 
4119 	nat_update(fin, nat, np);
4120 
4121 	/*
4122 	 * The above comments do not hold for layer 4 (or higher) checksums...
4123 	 */
4124 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4125 		if (nflags & IPN_TCPUDP &&
4126 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4127 			sumd = nat->nat_sumd[1];
4128 		else
4129 			sumd = nat->nat_sumd[0];
4130 
4131 		if (nat->nat_dir == NAT_OUTBOUND)
4132 			fix_outcksum(csump, sumd);
4133 		else
4134 			fix_incksum(csump, sumd);
4135 	}
4136 #ifdef	IPFILTER_SYNC
4137 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4138 #endif
4139 	/* ------------------------------------------------------------- */
4140 	/* A few quick notes:						 */
4141 	/*	Following are test conditions prior to calling the 	 */
4142 	/*	appr_check routine.					 */
4143 	/*								 */
4144 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4145 	/*	with a redirect rule, we attempt to match the packet's	 */
4146 	/*	source port against in_dport, otherwise	we'd compare the */
4147 	/*	packet's destination.			 		 */
4148 	/* ------------------------------------------------------------- */
4149 	if ((np != NULL) && (np->in_apr != NULL)) {
4150 		i = appr_check(fin, nat);
4151 		if (i == 0)
4152 			i = 1;
4153 	} else
4154 		i = 1;
4155 	ifs->ifs_nat_stats.ns_mapped[1]++;
4156 	fin->fin_flx |= FI_NATED;
4157 	return i;
4158 }
4159 
4160 
4161 /* ------------------------------------------------------------------------ */
4162 /* Function:    fr_checknatin                                               */
4163 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4164 /*                     0 == no packet translation occurred,                 */
4165 /*                     1 == packet was successfully translated.             */
4166 /* Parameters:  fin(I)   - pointer to packet information                    */
4167 /*              passp(I) - pointer to filtering result flags                */
4168 /*                                                                          */
4169 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4170 /* first checked to see if they match an existing entry (if an error),      */
4171 /* otherwise a search of the current NAT table is made.  If neither results */
4172 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4173 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4174 /* packet header(s) as required.                                            */
4175 /* ------------------------------------------------------------------------ */
4176 int fr_checknatin(fin, passp)
4177 fr_info_t *fin;
4178 u_32_t *passp;
4179 {
4180 	u_int nflags, natadd;
4181 	ipnat_t *np, *npnext;
4182 	int rval, natfailed;
4183 	struct ifnet *ifp;
4184 	struct in_addr in;
4185 	icmphdr_t *icmp;
4186 	tcphdr_t *tcp;
4187 	u_short dport;
4188 	nat_t *nat;
4189 	u_32_t iph;
4190 	ipf_stack_t *ifs = fin->fin_ifs;
4191 
4192 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
4193 		return 0;
4194 
4195 	tcp = NULL;
4196 	icmp = NULL;
4197 	dport = 0;
4198 	natadd = 1;
4199 	nflags = 0;
4200 	natfailed = 0;
4201 	ifp = fin->fin_ifp;
4202 
4203 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4204 		switch (fin->fin_p)
4205 		{
4206 		case IPPROTO_TCP :
4207 			nflags = IPN_TCP;
4208 			break;
4209 		case IPPROTO_UDP :
4210 			nflags = IPN_UDP;
4211 			break;
4212 		case IPPROTO_ICMP :
4213 			icmp = fin->fin_dp;
4214 
4215 			/*
4216 			 * This is an incoming packet, so the destination is
4217 			 * the icmp_id and the source port equals 0
4218 			 */
4219 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4220 				nflags = IPN_ICMPQUERY;
4221 				dport = icmp->icmp_id;
4222 			} break;
4223 		default :
4224 			break;
4225 		}
4226 
4227 		if ((nflags & IPN_TCPUDP)) {
4228 			tcp = fin->fin_dp;
4229 			dport = tcp->th_dport;
4230 		}
4231 	}
4232 
4233 	in = fin->fin_dst;
4234 
4235 	READ_ENTER(&ifs->ifs_ipf_nat);
4236 
4237 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4238 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4239 		/*EMPTY*/;
4240 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4241 		natadd = 0;
4242 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4243 				     fin->fin_src, in))) {
4244 		nflags = nat->nat_flags;
4245 	} else {
4246 		u_32_t hv, msk, rmsk;
4247 
4248 		rmsk = ifs->ifs_rdr_masks;
4249 		msk = 0xffffffff;
4250 		/*
4251 		 * If there is no current entry in the nat table for this IP#,
4252 		 * create one for it (if there is a matching rule).
4253 		 */
4254 maskloop:
4255 		iph = in.s_addr & htonl(msk);
4256 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4257 		for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4258 			npnext = np->in_rnext;
4259 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4260 				continue;
4261 			if (np->in_v != fin->fin_v)
4262 				continue;
4263 			if (np->in_p && (np->in_p != fin->fin_p))
4264 				continue;
4265 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4266 				continue;
4267 			if (np->in_flags & IPN_FILTER) {
4268 				if (!nat_match(fin, np))
4269 					continue;
4270 			} else {
4271 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4272 					continue;
4273 				if (np->in_pmin &&
4274 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4275 				     (ntohs(dport) < ntohs(np->in_pmin))))
4276 					continue;
4277 			}
4278 
4279 			if (*np->in_plabel != '\0') {
4280 				if (!appr_ok(fin, tcp, np)) {
4281 					continue;
4282 				}
4283 			}
4284 
4285 			ATOMIC_INC32(np->in_use);
4286 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4287 			WRITE_ENTER(&ifs->ifs_ipf_nat);
4288 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4289 			if (nat != NULL) {
4290 				np->in_use--;
4291 				np->in_hits++;
4292 				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4293 				break;
4294 			}
4295 			natfailed = -1;
4296 			npnext = np->in_rnext;
4297 			fr_ipnatderef(&np, ifs);
4298 			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4299 		}
4300 
4301 		if ((np == NULL) && (rmsk != 0)) {
4302 			while (rmsk) {
4303 				msk <<= 1;
4304 				if (rmsk & 0x80000000)
4305 					break;
4306 				rmsk <<= 1;
4307 			}
4308 			if (rmsk != 0) {
4309 				rmsk <<= 1;
4310 				goto maskloop;
4311 			}
4312 		}
4313 	}
4314 	if (nat != NULL) {
4315 		rval = fr_natin(fin, nat, natadd, nflags);
4316 		if (rval == 1) {
4317 			MUTEX_ENTER(&nat->nat_lock);
4318 			nat->nat_ref++;
4319 			MUTEX_EXIT(&nat->nat_lock);
4320 			nat->nat_touched = ifs->ifs_fr_ticks;
4321 			fin->fin_nat = nat;
4322 			fin->fin_state = nat->nat_state;
4323 		}
4324 	} else
4325 		rval = natfailed;
4326 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4327 
4328 	if (rval == -1) {
4329 		if (passp != NULL)
4330 			*passp = FR_BLOCK;
4331 		fin->fin_flx |= FI_BADNAT;
4332 	}
4333 	return rval;
4334 }
4335 
4336 
4337 /* ------------------------------------------------------------------------ */
4338 /* Function:    fr_natin                                                    */
4339 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4340 /*                     1 == packet was successfully translated.             */
4341 /* Parameters:  fin(I)    - pointer to packet information                   */
4342 /*              nat(I)    - pointer to NAT structure                        */
4343 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4344 /*              nflags(I) - NAT flags set for this packet                   */
4345 /* Locks Held:  ipf_nat (READ)                                              */
4346 /*                                                                          */
4347 /* Translate a packet coming "in" on an interface.                          */
4348 /* ------------------------------------------------------------------------ */
4349 int fr_natin(fin, nat, natadd, nflags)
4350 fr_info_t *fin;
4351 nat_t *nat;
4352 int natadd;
4353 u_32_t nflags;
4354 {
4355 	icmphdr_t *icmp;
4356 	u_short *csump;
4357 	tcphdr_t *tcp;
4358 	ipnat_t *np;
4359 	int i;
4360 	ipf_stack_t *ifs = fin->fin_ifs;
4361 
4362 	if (fin->fin_v == 6) {
4363 #ifdef	USE_INET6
4364 		return fr_nat6in(fin, nat, natadd, nflags);
4365 #else
4366 		return NULL;
4367 #endif
4368 	}
4369 
4370 #if SOLARIS && defined(_KERNEL)
4371 	net_data_t net_data_p = ifs->ifs_ipf_ipv4;
4372 #endif
4373 
4374 	tcp = NULL;
4375 	csump = NULL;
4376 	np = nat->nat_ptr;
4377 	fin->fin_fr = nat->nat_fr;
4378 
4379 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4380 		(void) fr_nat_newfrag(fin, 0, nat);
4381 
4382 	if (np != NULL) {
4383 
4384 	/* ------------------------------------------------------------- */
4385 	/* A few quick notes:						 */
4386 	/*	Following are test conditions prior to calling the 	 */
4387 	/*	appr_check routine.					 */
4388 	/*								 */
4389 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4390 	/*	with a map rule, we attempt to match the packet's	 */
4391 	/*	source port against in_dport, otherwise	we'd compare the */
4392 	/*	packet's destination.			 		 */
4393 	/* ------------------------------------------------------------- */
4394 		if (np->in_apr != NULL) {
4395 			i = appr_check(fin, nat);
4396 			if (i == -1) {
4397 				return -1;
4398 			}
4399 		}
4400 	}
4401 
4402 #ifdef	IPFILTER_SYNC
4403 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4404 #endif
4405 
4406 	MUTEX_ENTER(&nat->nat_lock);
4407 	nat->nat_bytes[0] += fin->fin_plen;
4408 	nat->nat_pkts[0]++;
4409 	MUTEX_EXIT(&nat->nat_lock);
4410 
4411 	fin->fin_ip->ip_dst = nat->nat_inip;
4412 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4413 	if (nflags & IPN_TCPUDP)
4414 		tcp = fin->fin_dp;
4415 
4416 	/*
4417 	 * Fix up checksums, not by recalculating them, but
4418 	 * simply computing adjustments.
4419 	 * Why only do this for some platforms on inbound packets ?
4420 	 * Because for those that it is done, IP processing is yet to happen
4421 	 * and so the IPv4 header checksum has not yet been evaluated.
4422 	 * Perhaps it should always be done for the benefit of things like
4423 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4424 	 * header checksum offloading, perhaps it is a moot point.
4425 	 */
4426 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4427      defined(__osf__) || defined(linux)
4428 	if (nat->nat_dir == NAT_OUTBOUND)
4429 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4430 	else
4431 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4432 #endif
4433 
4434 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4435 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4436 			tcp->th_dport = nat->nat_inport;
4437 			fin->fin_data[1] = ntohs(nat->nat_inport);
4438 		}
4439 
4440 
4441 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4442 			icmp = fin->fin_dp;
4443 
4444 			icmp->icmp_id = nat->nat_inport;
4445 		}
4446 
4447 		csump = nat_proto(fin, nat, nflags);
4448 	}
4449 
4450 	nat_update(fin, nat, np);
4451 
4452 	/*
4453 	 * In case they are being forwarded, inbound packets always need to have
4454 	 * their checksum adjusted even if hardware checksum validation said OK.
4455 	 */
4456 	if (csump != NULL) {
4457 		if (nat->nat_dir == NAT_OUTBOUND)
4458 			fix_incksum(csump, nat->nat_sumd[0]);
4459 		else
4460 			fix_outcksum(csump, nat->nat_sumd[0]);
4461 	}
4462 
4463 #if SOLARIS && defined(_KERNEL)
4464 	if (nflags & IPN_TCPUDP &&
4465 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4466 		/*
4467 		 * Need to adjust the partial checksum result stored in
4468 		 * db_cksum16, which will be used for validation in IP.
4469 		 * See IP_CKSUM_RECV().
4470 		 * Adjustment data should be the inverse of the IP address
4471 		 * changes, because db_cksum16 is supposed to be the complement
4472 		 * of the pesudo header.
4473 		 */
4474 		csump = &fin->fin_m->b_datap->db_cksum16;
4475 		if (nat->nat_dir == NAT_OUTBOUND)
4476 			fix_outcksum(csump, nat->nat_sumd[1]);
4477 		else
4478 			fix_incksum(csump, nat->nat_sumd[1]);
4479 	}
4480 #endif
4481 
4482 	ifs->ifs_nat_stats.ns_mapped[0]++;
4483 	fin->fin_flx |= FI_NATED;
4484 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4485 		fin->fin_nattag = &np->in_tag;
4486 	return 1;
4487 }
4488 
4489 
4490 /* ------------------------------------------------------------------------ */
4491 /* Function:    nat_proto                                                   */
4492 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4493 /*                         NULL if the transport protocol is not recognised */
4494 /*                         as needing a checksum update.                    */
4495 /* Parameters:  fin(I)    - pointer to packet information                   */
4496 /*              nat(I)    - pointer to NAT structure                        */
4497 /*              nflags(I) - NAT flags set for this packet                   */
4498 /*                                                                          */
4499 /* Return the pointer to the checksum field for each protocol so understood.*/
4500 /* If support for making other changes to a protocol header is required,    */
4501 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4502 /* TCP down to a specific value, then do it from here.                      */
4503 /* ------------------------------------------------------------------------ */
4504 u_short *nat_proto(fin, nat, nflags)
4505 fr_info_t *fin;
4506 nat_t *nat;
4507 u_int nflags;
4508 {
4509 	icmphdr_t *icmp;
4510 	struct icmp6_hdr *icmp6;
4511 	u_short *csump;
4512 	tcphdr_t *tcp;
4513 	udphdr_t *udp;
4514 
4515 	csump = NULL;
4516 	if (fin->fin_out == 0) {
4517 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4518 	} else {
4519 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4520 	}
4521 
4522 	switch (fin->fin_p)
4523 	{
4524 	case IPPROTO_TCP :
4525 		tcp = fin->fin_dp;
4526 
4527 		csump = &tcp->th_sum;
4528 
4529 		/*
4530 		 * Do a MSS CLAMPING on a SYN packet,
4531 		 * only deal IPv4 for now.
4532 		 */
4533 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4534 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4535 
4536 		break;
4537 
4538 	case IPPROTO_UDP :
4539 		udp = fin->fin_dp;
4540 
4541 		if (udp->uh_sum)
4542 			csump = &udp->uh_sum;
4543 		break;
4544 
4545 	case IPPROTO_ICMP :
4546 		icmp = fin->fin_dp;
4547 
4548 		if ((nflags & IPN_ICMPQUERY) != 0) {
4549 			if (icmp->icmp_cksum != 0)
4550 				csump = &icmp->icmp_cksum;
4551 		}
4552 		break;
4553 
4554 	case IPPROTO_ICMPV6 :
4555 		icmp6 = fin->fin_dp;
4556 
4557 		if ((nflags & IPN_ICMPQUERY) != 0) {
4558 			if (icmp6->icmp6_cksum != 0)
4559 				csump = &icmp6->icmp6_cksum;
4560 		}
4561 		break;
4562 	}
4563 	return csump;
4564 }
4565 
4566 
4567 /* ------------------------------------------------------------------------ */
4568 /* Function:    fr_natunload                                                */
4569 /* Returns:     Nil                                                         */
4570 /* Parameters:  Nil                                                         */
4571 /*                                                                          */
4572 /* Free all memory used by NAT structures allocated at runtime.             */
4573 /* ------------------------------------------------------------------------ */
4574 void fr_natunload(ifs)
4575 ipf_stack_t *ifs;
4576 {
4577 	ipftq_t *ifq, *ifqnext;
4578 
4579 	(void) nat_clearlist(ifs);
4580 	(void) nat_flushtable(ifs);
4581 
4582 	/*
4583 	 * Proxy timeout queues are not cleaned here because although they
4584 	 * exist on the NAT list, appr_unload is called after fr_natunload
4585 	 * and the proxies actually are responsible for them being created.
4586 	 * Should the proxy timeouts have their own list?  There's no real
4587 	 * justification as this is the only complication.
4588 	 */
4589 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4590 		ifqnext = ifq->ifq_next;
4591 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4592 		    (fr_deletetimeoutqueue(ifq) == 0))
4593 			fr_freetimeoutqueue(ifq, ifs);
4594 	}
4595 
4596 	if (ifs->ifs_nat_table[0] != NULL) {
4597 		KFREES(ifs->ifs_nat_table[0],
4598 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4599 		ifs->ifs_nat_table[0] = NULL;
4600 	}
4601 	if (ifs->ifs_nat_table[1] != NULL) {
4602 		KFREES(ifs->ifs_nat_table[1],
4603 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4604 		ifs->ifs_nat_table[1] = NULL;
4605 	}
4606 	if (ifs->ifs_nat_rules != NULL) {
4607 		KFREES(ifs->ifs_nat_rules,
4608 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4609 		ifs->ifs_nat_rules = NULL;
4610 	}
4611 	if (ifs->ifs_rdr_rules != NULL) {
4612 		KFREES(ifs->ifs_rdr_rules,
4613 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4614 		ifs->ifs_rdr_rules = NULL;
4615 	}
4616 	if (ifs->ifs_maptable != NULL) {
4617 		KFREES(ifs->ifs_maptable,
4618 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4619 		ifs->ifs_maptable = NULL;
4620 	}
4621 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4622 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4623 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4624 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4625 	}
4626 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4627 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4628 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4629 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4630 	}
4631 
4632 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4633 		ifs->ifs_fr_nat_maxbucket = 0;
4634 
4635 	if (ifs->ifs_fr_nat_init == 1) {
4636 		ifs->ifs_fr_nat_init = 0;
4637 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4638 
4639 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4640 		RW_DESTROY(&ifs->ifs_ipf_nat);
4641 
4642 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4643 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4644 
4645 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4646 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4647 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4648 	}
4649 }
4650 
4651 
4652 /* ------------------------------------------------------------------------ */
4653 /* Function:    fr_natexpire                                                */
4654 /* Returns:     Nil                                                         */
4655 /* Parameters:  Nil                                                         */
4656 /*                                                                          */
4657 /* Check all of the timeout queues for entries at the top which need to be  */
4658 /* expired.                                                                 */
4659 /* ------------------------------------------------------------------------ */
4660 void fr_natexpire(ifs)
4661 ipf_stack_t *ifs;
4662 {
4663 	ipftq_t *ifq, *ifqnext;
4664 	ipftqent_t *tqe, *tqn;
4665 	int i;
4666 	SPL_INT(s);
4667 
4668 	SPL_NET(s);
4669 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4670 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4671 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4672 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4673 				break;
4674 			tqn = tqe->tqe_next;
4675 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4676 		}
4677 	}
4678 
4679 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4680 		ifqnext = ifq->ifq_next;
4681 
4682 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4683 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4684 				break;
4685 			tqn = tqe->tqe_next;
4686 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4687 		}
4688 	}
4689 
4690 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4691 		ifqnext = ifq->ifq_next;
4692 
4693 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4694 		    (ifq->ifq_ref == 0)) {
4695 			fr_freetimeoutqueue(ifq, ifs);
4696 		}
4697 	}
4698 
4699 	if (ifs->ifs_nat_doflush != 0) {
4700 		(void) nat_extraflush(2, ifs);
4701 		ifs->ifs_nat_doflush = 0;
4702 	}
4703 
4704 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4705 	SPL_X(s);
4706 }
4707 
4708 
4709 /* ------------------------------------------------------------------------ */
4710 /* Function:    fr_nataddrsync                                              */
4711 /* Returns:     Nil                                                         */
4712 /* Parameters:  ifp(I) -  pointer to network interface                      */
4713 /*              addr(I) - pointer to new network address                    */
4714 /*                                                                          */
4715 /* Walk through all of the currently active NAT sessions, looking for those */
4716 /* which need to have their translated address updated (where the interface */
4717 /* matches the one passed in) and change it, recalculating the checksum sum */
4718 /* difference too.                                                          */
4719 /* ------------------------------------------------------------------------ */
4720 void fr_nataddrsync(v, ifp, addr, ifs)
4721 int v;
4722 void *ifp;
4723 void *addr;
4724 ipf_stack_t *ifs;
4725 {
4726 	u_32_t sum1, sum2, sumd;
4727 	nat_t *nat;
4728 	ipnat_t *np;
4729 	SPL_INT(s);
4730 
4731 	if (ifs->ifs_fr_running <= 0)
4732 		return;
4733 
4734 	SPL_NET(s);
4735 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4736 
4737 	if (ifs->ifs_fr_running <= 0) {
4738 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4739 		return;
4740 	}
4741 
4742 	/*
4743 	 * Change IP addresses for NAT sessions for any protocol except TCP
4744 	 * since it will break the TCP connection anyway.  The only rules
4745 	 * which will get changed are those which are "map ... -> 0/32",
4746 	 * where the rule specifies the address is taken from the interface.
4747 	 */
4748 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4749 		if (addr != NULL) {
4750 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4751 			    ((nat->nat_flags & IPN_TCP) != 0))
4752 				continue;
4753 			if ((np = nat->nat_ptr) == NULL)
4754 				continue;
4755 			if (v == 4 && np->in_v == 4) {
4756 				if (np->in_nip || np->in_outmsk != 0xffffffff)
4757 					continue;
4758 				/*
4759 				 * Change the map-to address to be the same as
4760 				 * the new one.
4761 				 */
4762 				sum1 = nat->nat_outip.s_addr;
4763 				nat->nat_outip = *(struct in_addr *)addr;
4764 				sum2 = nat->nat_outip.s_addr;
4765 			} else if (v == 6 && np->in_v == 6) {
4766 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4767 				    !IP6_ISONES(&np->in_out[1].in6))
4768 					continue;
4769 				/*
4770 				 * Change the map-to address to be the same as
4771 				 * the new one.
4772 				 */
4773 				nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4774 			} else
4775 				continue;
4776 
4777 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4778 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4779 			if (np->in_v == 4 && (v == 4 || v == 0)) {
4780 				struct in_addr in;
4781 				if (np->in_outmsk != 0xffffffff || np->in_nip)
4782 					continue;
4783 				/*
4784 				 * Change the map-to address to be the same as
4785 				 * the new one.
4786 				 */
4787 				sum1 = nat->nat_outip.s_addr;
4788 				if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4789 					       &in, NULL, ifs) != -1)
4790 					nat->nat_outip = in;
4791 				sum2 = nat->nat_outip.s_addr;
4792 			} else if (np->in_v == 6 && (v == 6 || v == 0)) {
4793 				struct in6_addr in6;
4794 				if (!IP6_ISZERO(&np->in_next6.in6) ||
4795 				    !IP6_ISONES(&np->in_out[1].in6))
4796 					continue;
4797 				/*
4798 				 * Change the map-to address to be the same as
4799 				 * the new one.
4800 				 */
4801 				if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4802 					       (void *)&in6, NULL, ifs) != -1)
4803 					nat->nat_outip6.in6 = in6;
4804 			} else
4805 				continue;
4806 		} else {
4807 			continue;
4808 		}
4809 
4810 		if (sum1 == sum2)
4811 			continue;
4812 		/*
4813 		 * Readjust the checksum adjustment to take into
4814 		 * account the new IP#.
4815 		 */
4816 		CALC_SUMD(sum1, sum2, sumd);
4817 		/* XXX - dont change for TCP when solaris does
4818 		 * hardware checksumming.
4819 		 */
4820 		sumd += nat->nat_sumd[0];
4821 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4822 		nat->nat_sumd[1] = nat->nat_sumd[0];
4823 	}
4824 
4825 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4826 	SPL_X(s);
4827 }
4828 
4829 
4830 /* ------------------------------------------------------------------------ */
4831 /* Function:    fr_natifpsync                                               */
4832 /* Returns:     Nil                                                         */
4833 /* Parameters:  action(I) - how we are syncing                              */
4834 /*              ifp(I)    - pointer to network interface                    */
4835 /*              name(I)   - name of interface to sync to                    */
4836 /*                                                                          */
4837 /* This function is used to resync the mapping of interface names and their */
4838 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4839 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4840 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4841 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4842 /* there is no longer any interface associated with it.                     */
4843 /* ------------------------------------------------------------------------ */
4844 void fr_natifpsync(action, v, ifp, name, ifs)
4845 int action, v;
4846 void *ifp;
4847 char *name;
4848 ipf_stack_t *ifs;
4849 {
4850 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4851 	int s;
4852 #endif
4853 	nat_t *nat;
4854 	ipnat_t *n;
4855 	int nv;
4856 
4857 	if (ifs->ifs_fr_running <= 0)
4858 		return;
4859 
4860 	SPL_NET(s);
4861 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4862 
4863 	if (ifs->ifs_fr_running <= 0) {
4864 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4865 		return;
4866 	}
4867 
4868 	switch (action)
4869 	{
4870 	case IPFSYNC_RESYNC :
4871 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4872 			nv = (v == 0) ? nat->nat_v : v;
4873 			if (nat->nat_v != nv)
4874 				continue;
4875 			if ((ifp == nat->nat_ifps[0]) ||
4876 			    (nat->nat_ifps[0] == (void *)-1)) {
4877 				nat->nat_ifps[0] =
4878 				    fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4879 			}
4880 
4881 			if ((ifp == nat->nat_ifps[1]) ||
4882 			    (nat->nat_ifps[1] == (void *)-1)) {
4883 				nat->nat_ifps[1] =
4884 				    fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4885 			}
4886 		}
4887 
4888 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4889 			nv = (v == 0) ? (int)n->in_v : v;
4890 			if ((int)n->in_v != nv)
4891 				continue;
4892 			if (n->in_ifps[0] == ifp ||
4893 			    n->in_ifps[0] == (void *)-1) {
4894 				n->in_ifps[0] =
4895 				    fr_resolvenic(n->in_ifnames[0], nv, ifs);
4896 			}
4897 			if (n->in_ifps[1] == ifp ||
4898 			    n->in_ifps[1] == (void *)-1) {
4899 				n->in_ifps[1] =
4900 				    fr_resolvenic(n->in_ifnames[1], nv, ifs);
4901 			}
4902 		}
4903 		break;
4904 	case IPFSYNC_NEWIFP :
4905 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4906 			if (nat->nat_v != v)
4907 				continue;
4908 			if (!strncmp(name, nat->nat_ifnames[0],
4909 				     sizeof(nat->nat_ifnames[0])))
4910 				nat->nat_ifps[0] = ifp;
4911 			if (!strncmp(name, nat->nat_ifnames[1],
4912 				     sizeof(nat->nat_ifnames[1])))
4913 				nat->nat_ifps[1] = ifp;
4914 		}
4915 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4916 			if ((int)n->in_v != v)
4917 				continue;
4918 			if (!strncmp(name, n->in_ifnames[0],
4919 				     sizeof(n->in_ifnames[0])))
4920 				n->in_ifps[0] = ifp;
4921 			if (!strncmp(name, n->in_ifnames[1],
4922 				     sizeof(n->in_ifnames[1])))
4923 				n->in_ifps[1] = ifp;
4924 		}
4925 		break;
4926 	case IPFSYNC_OLDIFP :
4927 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4928 			if (nat->nat_v != v)
4929 				continue;
4930 			if (ifp == nat->nat_ifps[0])
4931 				nat->nat_ifps[0] = (void *)-1;
4932 			if (ifp == nat->nat_ifps[1])
4933 				nat->nat_ifps[1] = (void *)-1;
4934 		}
4935 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4936 			if ((int)n->in_v != v)
4937 				continue;
4938 			if (n->in_ifps[0] == ifp)
4939 				n->in_ifps[0] = (void *)-1;
4940 			if (n->in_ifps[1] == ifp)
4941 				n->in_ifps[1] = (void *)-1;
4942 		}
4943 		break;
4944 	}
4945 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4946 	SPL_X(s);
4947 }
4948 
4949 
4950 /* ------------------------------------------------------------------------ */
4951 /* Function:    nat_icmpquerytype4                                          */
4952 /* Returns:     int - 1 == success, 0 == failure                            */
4953 /* Parameters:  icmptype(I) - ICMP type number                              */
4954 /*                                                                          */
4955 /* Tests to see if the ICMP type number passed is a query/response type or  */
4956 /* not.                                                                     */
4957 /* ------------------------------------------------------------------------ */
4958 static INLINE int nat_icmpquerytype4(icmptype)
4959 int icmptype;
4960 {
4961 
4962 	/*
4963 	 * For the ICMP query NAT code, it is essential that both the query
4964 	 * and the reply match on the NAT rule. Because the NAT structure
4965 	 * does not keep track of the icmptype, and a single NAT structure
4966 	 * is used for all icmp types with the same src, dest and id, we
4967 	 * simply define the replies as queries as well. The funny thing is,
4968 	 * altough it seems silly to call a reply a query, this is exactly
4969 	 * as it is defined in the IPv4 specification
4970 	 */
4971 
4972 	switch (icmptype)
4973 	{
4974 
4975 	case ICMP_ECHOREPLY:
4976 	case ICMP_ECHO:
4977 	/* route aedvertisement/solliciation is currently unsupported: */
4978 	/* it would require rewriting the ICMP data section            */
4979 	case ICMP_TSTAMP:
4980 	case ICMP_TSTAMPREPLY:
4981 	case ICMP_IREQ:
4982 	case ICMP_IREQREPLY:
4983 	case ICMP_MASKREQ:
4984 	case ICMP_MASKREPLY:
4985 		return 1;
4986 	default:
4987 		return 0;
4988 	}
4989 }
4990 
4991 
4992 /* ------------------------------------------------------------------------ */
4993 /* Function:    nat_log                                                     */
4994 /* Returns:     Nil                                                         */
4995 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4996 /*              type(I) - type of log entry to create                       */
4997 /*                                                                          */
4998 /* Creates a NAT log entry.                                                 */
4999 /* ------------------------------------------------------------------------ */
5000 void nat_log(nat, type, ifs)
5001 struct nat *nat;
5002 u_int type;
5003 ipf_stack_t *ifs;
5004 {
5005 #ifdef	IPFILTER_LOG
5006 # ifndef LARGE_NAT
5007 	struct ipnat *np;
5008 	int rulen;
5009 # endif
5010 	struct natlog natl;
5011 	void *items[1];
5012 	size_t sizes[1];
5013 	int types[1];
5014 
5015 	natl.nlg_inip = nat->nat_inip6;
5016 	natl.nlg_outip = nat->nat_outip6;
5017 	natl.nlg_origip = nat->nat_oip6;
5018 	natl.nlg_bytes[0] = nat->nat_bytes[0];
5019 	natl.nlg_bytes[1] = nat->nat_bytes[1];
5020 	natl.nlg_pkts[0] = nat->nat_pkts[0];
5021 	natl.nlg_pkts[1] = nat->nat_pkts[1];
5022 	natl.nlg_origport = nat->nat_oport;
5023 	natl.nlg_inport = nat->nat_inport;
5024 	natl.nlg_outport = nat->nat_outport;
5025 	natl.nlg_p = nat->nat_p;
5026 	natl.nlg_type = type;
5027 	natl.nlg_rule = -1;
5028 	natl.nlg_v = nat->nat_v;
5029 # ifndef LARGE_NAT
5030 	if (nat->nat_ptr != NULL) {
5031 		for (rulen = 0, np = ifs->ifs_nat_list; np;
5032 		     np = np->in_next, rulen++)
5033 			if (np == nat->nat_ptr) {
5034 				natl.nlg_rule = rulen;
5035 				break;
5036 			}
5037 	}
5038 # endif
5039 	items[0] = &natl;
5040 	sizes[0] = sizeof(natl);
5041 	types[0] = 0;
5042 
5043 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5044 #endif
5045 }
5046 
5047 
5048 #if defined(__OpenBSD__)
5049 /* ------------------------------------------------------------------------ */
5050 /* Function:    nat_ifdetach                                                */
5051 /* Returns:     Nil                                                         */
5052 /* Parameters:  ifp(I) - pointer to network interface                       */
5053 /*                                                                          */
5054 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
5055 /* interface references within IPFilter.                                    */
5056 /* ------------------------------------------------------------------------ */
5057 void nat_ifdetach(ifp, ifs)
5058 void *ifp;
5059 ipf_stack_t *ifs;
5060 {
5061 	frsync(ifp, ifs);
5062 	return;
5063 }
5064 #endif
5065 
5066 
5067 /* ------------------------------------------------------------------------ */
5068 /* Function:    fr_ipnatderef                                               */
5069 /* Returns:     Nil                                                         */
5070 /* Parameters:  inp(I) - pointer to pointer to NAT rule                     */
5071 /* Write Locks: ipf_nat                                                     */
5072 /*                                                                          */
5073 /* ------------------------------------------------------------------------ */
5074 void fr_ipnatderef(inp, ifs)
5075 ipnat_t **inp;
5076 ipf_stack_t *ifs;
5077 {
5078 	ipnat_t *in;
5079 
5080 	in = *inp;
5081 	*inp = NULL;
5082 	in->in_use--;
5083 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5084 		if (in->in_apr)
5085 			appr_free(in->in_apr);
5086 		KFREE(in);
5087 		ifs->ifs_nat_stats.ns_rules--;
5088 #ifdef notdef
5089 #if SOLARIS
5090 		if (ifs->ifs_nat_stats.ns_rules == 0)
5091 			ifs->ifs_pfil_delayed_copy = 1;
5092 #endif
5093 #endif
5094 	}
5095 }
5096 
5097 
5098 /* ------------------------------------------------------------------------ */
5099 /* Function:    fr_natderef                                                 */
5100 /* Returns:     Nil                                                         */
5101 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
5102 /*                                                                          */
5103 /* Decrement the reference counter for this NAT table entry and free it if  */
5104 /* there are no more things using it.                                       */
5105 /*                                                                          */
5106 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5107 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
5108 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
5109 /* because nat_delete() will do that and send nat_ref to -1.                */
5110 /*                                                                          */
5111 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
5112 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
5113 /* ------------------------------------------------------------------------ */
5114 void fr_natderef(natp, ifs)
5115 nat_t **natp;
5116 ipf_stack_t *ifs;
5117 {
5118 	nat_t *nat;
5119 
5120 	nat = *natp;
5121 	*natp = NULL;
5122 
5123 	MUTEX_ENTER(&nat->nat_lock);
5124 	if (nat->nat_ref > 1) {
5125 		nat->nat_ref--;
5126 		MUTEX_EXIT(&nat->nat_lock);
5127 		return;
5128 	}
5129 	MUTEX_EXIT(&nat->nat_lock);
5130 
5131 	WRITE_ENTER(&ifs->ifs_ipf_nat);
5132 	nat_delete(nat, NL_EXPIRE, ifs);
5133 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5134 }
5135 
5136 
5137 /* ------------------------------------------------------------------------ */
5138 /* Function:    fr_natclone                                                 */
5139 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
5140 /*                           else pointer to new state structure            */
5141 /* Parameters:  fin(I) - pointer to packet information                      */
5142 /*              is(I)  - pointer to master state structure                  */
5143 /* Write Lock:  ipf_nat                                                     */
5144 /*                                                                          */
5145 /* Create a "duplcate" state table entry from the master.                   */
5146 /* ------------------------------------------------------------------------ */
5147 nat_t *fr_natclone(fin, nat)
5148 fr_info_t *fin;
5149 nat_t *nat;
5150 {
5151 	frentry_t *fr;
5152 	nat_t *clone;
5153 	ipnat_t *np;
5154 	ipf_stack_t *ifs = fin->fin_ifs;
5155 
5156 	KMALLOC(clone, nat_t *);
5157 	if (clone == NULL)
5158 		return NULL;
5159 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
5160 
5161 	MUTEX_NUKE(&clone->nat_lock);
5162 
5163 	clone->nat_aps = NULL;
5164 	/*
5165 	 * Initialize all these so that nat_delete() doesn't cause a crash.
5166 	 */
5167 	clone->nat_tqe.tqe_pnext = NULL;
5168 	clone->nat_tqe.tqe_next = NULL;
5169 	clone->nat_tqe.tqe_ifq = NULL;
5170 	clone->nat_tqe.tqe_parent = clone;
5171 
5172 	clone->nat_flags &= ~SI_CLONE;
5173 	clone->nat_flags |= SI_CLONED;
5174 
5175 	if (clone->nat_hm)
5176 		clone->nat_hm->hm_ref++;
5177 
5178 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5179 		KFREE(clone);
5180 		return NULL;
5181 	}
5182 	np = clone->nat_ptr;
5183 	if (np != NULL) {
5184 		if (ifs->ifs_nat_logging)
5185 			nat_log(clone, (u_int)np->in_redir, ifs);
5186 		np->in_use++;
5187 	}
5188 	fr = clone->nat_fr;
5189 	if (fr != NULL) {
5190 		MUTEX_ENTER(&fr->fr_lock);
5191 		fr->fr_ref++;
5192 		MUTEX_EXIT(&fr->fr_lock);
5193 	}
5194 
5195 	/*
5196 	 * Because the clone is created outside the normal loop of things and
5197 	 * TCP has special needs in terms of state, initialise the timeout
5198 	 * state of the new NAT from here.
5199 	 */
5200 	if (clone->nat_p == IPPROTO_TCP) {
5201 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5202 				  clone->nat_flags);
5203 	}
5204 #ifdef	IPFILTER_SYNC
5205 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5206 #endif
5207 	if (ifs->ifs_nat_logging)
5208 		nat_log(clone, NL_CLONE, ifs);
5209 	return clone;
5210 }
5211 
5212 
5213 /* ------------------------------------------------------------------------ */
5214 /* Function:   nat_wildok                                                   */
5215 /* Returns:    int - 1 == packet's ports match wildcards                    */
5216 /*                   0 == packet's ports don't match wildcards              */
5217 /* Parameters: nat(I)   - NAT entry                                         */
5218 /*             sport(I) - source port                                       */
5219 /*             dport(I) - destination port                                  */
5220 /*             flags(I) - wildcard flags                                    */
5221 /*             dir(I)   - packet direction                                  */
5222 /*                                                                          */
5223 /* Use NAT entry and packet direction to determine which combination of     */
5224 /* wildcard flags should be used.                                           */
5225 /* ------------------------------------------------------------------------ */
5226 int nat_wildok(nat, sport, dport, flags, dir)
5227 nat_t *nat;
5228 int sport;
5229 int dport;
5230 int flags;
5231 int dir;
5232 {
5233 	/*
5234 	 * When called by       dir is set to
5235 	 * nat_inlookup         NAT_INBOUND (0)
5236 	 * nat_outlookup        NAT_OUTBOUND (1)
5237 	 *
5238 	 * We simply combine the packet's direction in dir with the original
5239 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5240 	 * which combination of wildcard flags to allow.
5241 	 */
5242 
5243 	switch ((dir << 1) | nat->nat_dir)
5244 	{
5245 	case 3: /* outbound packet / outbound entry */
5246 		if (((nat->nat_inport == sport) ||
5247 		    (flags & SI_W_SPORT)) &&
5248 		    ((nat->nat_oport == dport) ||
5249 		    (flags & SI_W_DPORT)))
5250 			return 1;
5251 		break;
5252 	case 2: /* outbound packet / inbound entry */
5253 		if (((nat->nat_outport == sport) ||
5254 		    (flags & SI_W_DPORT)) &&
5255 		    ((nat->nat_oport == dport) ||
5256 		    (flags & SI_W_SPORT)))
5257 			return 1;
5258 		break;
5259 	case 1: /* inbound packet / outbound entry */
5260 		if (((nat->nat_oport == sport) ||
5261 		    (flags & SI_W_DPORT)) &&
5262 		    ((nat->nat_outport == dport) ||
5263 		    (flags & SI_W_SPORT)))
5264 			return 1;
5265 		break;
5266 	case 0: /* inbound packet / inbound entry */
5267 		if (((nat->nat_oport == sport) ||
5268 		    (flags & SI_W_SPORT)) &&
5269 		    ((nat->nat_outport == dport) ||
5270 		    (flags & SI_W_DPORT)))
5271 			return 1;
5272 		break;
5273 	default:
5274 		break;
5275 	}
5276 
5277 	return(0);
5278 }
5279 
5280 
5281 /* ------------------------------------------------------------------------ */
5282 /* Function:    nat_mssclamp                                                */
5283 /* Returns:     Nil                                                         */
5284 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5285 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5286 /*              csump(I)  - pointer to TCP checksum                         */
5287 /*                                                                          */
5288 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5289 /* then the TCP header checksum will be updated to reflect the change in    */
5290 /* the MSS.                                                                 */
5291 /* ------------------------------------------------------------------------ */
5292 static void nat_mssclamp(tcp, maxmss, csump)
5293 tcphdr_t *tcp;
5294 u_32_t maxmss;
5295 u_short *csump;
5296 {
5297 	u_char *cp, *ep, opt;
5298 	int hlen, advance;
5299 	u_32_t mss, sumd;
5300 
5301 	hlen = TCP_OFF(tcp) << 2;
5302 	if (hlen > sizeof(*tcp)) {
5303 		cp = (u_char *)tcp + sizeof(*tcp);
5304 		ep = (u_char *)tcp + hlen;
5305 
5306 		while (cp < ep) {
5307 			opt = cp[0];
5308 			if (opt == TCPOPT_EOL)
5309 				break;
5310 			else if (opt == TCPOPT_NOP) {
5311 				cp++;
5312 				continue;
5313 			}
5314 
5315 			if (cp + 1 >= ep)
5316 				break;
5317 			advance = cp[1];
5318 			if ((cp + advance > ep) || (advance <= 0))
5319 				break;
5320 			switch (opt)
5321 			{
5322 			case TCPOPT_MAXSEG:
5323 				if (advance != 4)
5324 					break;
5325 				mss = cp[2] * 256 + cp[3];
5326 				if (mss > maxmss) {
5327 					cp[2] = maxmss / 256;
5328 					cp[3] = maxmss & 0xff;
5329 					CALC_SUMD(mss, maxmss, sumd);
5330 					fix_outcksum(csump, sumd);
5331 				}
5332 				break;
5333 			default:
5334 				/* ignore unknown options */
5335 				break;
5336 			}
5337 
5338 			cp += advance;
5339 		}
5340 	}
5341 }
5342 
5343 
5344 /* ------------------------------------------------------------------------ */
5345 /* Function:    fr_setnatqueue                                              */
5346 /* Returns:     Nil                                                         */
5347 /* Parameters:  nat(I)- pointer to NAT structure                            */
5348 /*              rev(I) - forward(0) or reverse(1) direction                 */
5349 /* Locks:       ipf_nat (read or write)                                     */
5350 /*                                                                          */
5351 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5352 /* determining which queue it should be placed on.                          */
5353 /* ------------------------------------------------------------------------ */
5354 void fr_setnatqueue(nat, rev, ifs)
5355 nat_t *nat;
5356 int rev;
5357 ipf_stack_t *ifs;
5358 {
5359 	ipftq_t *oifq, *nifq;
5360 
5361 	if (nat->nat_ptr != NULL)
5362 		nifq = nat->nat_ptr->in_tqehead[rev];
5363 	else
5364 		nifq = NULL;
5365 
5366 	if (nifq == NULL) {
5367 		switch (nat->nat_p)
5368 		{
5369 		case IPPROTO_UDP :
5370 			nifq = &ifs->ifs_nat_udptq;
5371 			break;
5372 		case IPPROTO_ICMP :
5373 			nifq = &ifs->ifs_nat_icmptq;
5374 			break;
5375 		case IPPROTO_TCP :
5376 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5377 			break;
5378 		default :
5379 			nifq = &ifs->ifs_nat_iptq;
5380 			break;
5381 		}
5382 	}
5383 
5384 	oifq = nat->nat_tqe.tqe_ifq;
5385 	/*
5386 	 * If it's currently on a timeout queue, move it from one queue to
5387 	 * another, else put it on the end of the newly determined queue.
5388 	 */
5389 	if (oifq != NULL)
5390 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5391 	else
5392 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5393 	return;
5394 }
5395 
5396 /* ------------------------------------------------------------------------ */
5397 /* Function:    nat_getnext                                                 */
5398 /* Returns:     int - 0 == ok, else error                                   */
5399 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5400 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5401 /*              ifs - ipf stack instance                                    */
5402 /*                                                                          */
5403 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5404 /* and copy it out to the storage space pointed to by itp.  The next item   */
5405 /* in the list to look at is put back in the ipftoken struture.             */
5406 /* ------------------------------------------------------------------------ */
5407 static int nat_getnext(t, itp, ifs)
5408 ipftoken_t *t;
5409 ipfgeniter_t *itp;
5410 ipf_stack_t *ifs;
5411 {
5412 	hostmap_t *hm, *nexthm = NULL, zerohm;
5413 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5414 	nat_t *nat, *nextnat = NULL, zeronat;
5415 	int error = 0, count;
5416 	char *dst;
5417 
5418 	if (itp->igi_nitems == 0)
5419 		return EINVAL;
5420 
5421 	READ_ENTER(&ifs->ifs_ipf_nat);
5422 
5423 	/*
5424 	 * Get "previous" entry from the token and find the next entry.
5425 	 */
5426 	switch (itp->igi_type)
5427 	{
5428 	case IPFGENITER_HOSTMAP :
5429 		hm = t->ipt_data;
5430 		if (hm == NULL) {
5431 			nexthm = ifs->ifs_ipf_hm_maplist;
5432 		} else {
5433 			nexthm = hm->hm_next;
5434 		}
5435 		break;
5436 
5437 	case IPFGENITER_IPNAT :
5438 		ipn = t->ipt_data;
5439 		if (ipn == NULL) {
5440 			nextipnat = ifs->ifs_nat_list;
5441 		} else {
5442 			nextipnat = ipn->in_next;
5443 		}
5444 		break;
5445 
5446 	case IPFGENITER_NAT :
5447 		nat = t->ipt_data;
5448 		if (nat == NULL) {
5449 			nextnat = ifs->ifs_nat_instances;
5450 		} else {
5451 			nextnat = nat->nat_next;
5452 		}
5453 		break;
5454 	default :
5455 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5456 		return EINVAL;
5457 	}
5458 
5459 	dst = itp->igi_data;
5460 	for (count = itp->igi_nitems; count > 0; count--) {
5461 		/*
5462 		 * If we found an entry, add a reference to it and update the token.
5463 		 * Otherwise, zero out data to be returned and NULL out token.
5464 		 */
5465 		switch (itp->igi_type)
5466 		{
5467 		case IPFGENITER_HOSTMAP :
5468 			if (nexthm != NULL) {
5469 				ATOMIC_INC32(nexthm->hm_ref);
5470 				t->ipt_data = nexthm;
5471 			} else {
5472 				bzero(&zerohm, sizeof(zerohm));
5473 				nexthm = &zerohm;
5474 				t->ipt_data = NULL;
5475 			}
5476 			break;
5477 		case IPFGENITER_IPNAT :
5478 			if (nextipnat != NULL) {
5479 				ATOMIC_INC32(nextipnat->in_use);
5480 				t->ipt_data = nextipnat;
5481 			} else {
5482 				bzero(&zeroipn, sizeof(zeroipn));
5483 				nextipnat = &zeroipn;
5484 				t->ipt_data = NULL;
5485 			}
5486 			break;
5487 		case IPFGENITER_NAT :
5488 			if (nextnat != NULL) {
5489 				MUTEX_ENTER(&nextnat->nat_lock);
5490 				nextnat->nat_ref++;
5491 				MUTEX_EXIT(&nextnat->nat_lock);
5492 				t->ipt_data = nextnat;
5493 			} else {
5494 				bzero(&zeronat, sizeof(zeronat));
5495 				nextnat = &zeronat;
5496 				t->ipt_data = NULL;
5497 			}
5498 			break;
5499 		default :
5500 			break;
5501 		}
5502 
5503 		/*
5504 		 * Now that we have ref, it's save to give up lock.
5505 		 */
5506 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5507 
5508 		/*
5509 		 * Copy out data and clean up references and token as needed.
5510 		 */
5511 		switch (itp->igi_type)
5512 		{
5513 		case IPFGENITER_HOSTMAP :
5514 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5515 			if (error != 0)
5516 				error = EFAULT;
5517 			if (t->ipt_data == NULL) {
5518 				ipf_freetoken(t, ifs);
5519 				break;
5520 			} else {
5521 				if (hm != NULL) {
5522 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5523 					fr_hostmapdel(&hm);
5524 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5525 				}
5526 				if (nexthm->hm_next == NULL) {
5527 					ipf_freetoken(t, ifs);
5528 					break;
5529 				}
5530 				dst += sizeof(*nexthm);
5531 				hm = nexthm;
5532 				nexthm = nexthm->hm_next;
5533 			}
5534 			break;
5535 
5536 		case IPFGENITER_IPNAT :
5537 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5538 			if (error != 0)
5539 				error = EFAULT;
5540 			if (t->ipt_data == NULL) {
5541 				ipf_freetoken(t, ifs);
5542 				break;
5543 			} else {
5544 				if (ipn != NULL) {
5545 					WRITE_ENTER(&ifs->ifs_ipf_nat);
5546 					fr_ipnatderef(&ipn, ifs);
5547 					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5548 				}
5549 				if (nextipnat->in_next == NULL) {
5550 					ipf_freetoken(t, ifs);
5551 					break;
5552 				}
5553 				dst += sizeof(*nextipnat);
5554 				ipn = nextipnat;
5555 				nextipnat = nextipnat->in_next;
5556 			}
5557 			break;
5558 
5559 		case IPFGENITER_NAT :
5560 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5561 			if (error != 0)
5562 				error = EFAULT;
5563 			if (t->ipt_data == NULL) {
5564 				ipf_freetoken(t, ifs);
5565 				break;
5566 			} else {
5567 				if (nat != NULL)
5568 					fr_natderef(&nat, ifs);
5569 				if (nextnat->nat_next == NULL) {
5570 					ipf_freetoken(t, ifs);
5571 					break;
5572 				}
5573 				dst += sizeof(*nextnat);
5574 				nat = nextnat;
5575 				nextnat = nextnat->nat_next;
5576 			}
5577 			break;
5578 		default :
5579 			break;
5580 		}
5581 
5582 		if ((count == 1) || (error != 0))
5583 			break;
5584 
5585 		READ_ENTER(&ifs->ifs_ipf_nat);
5586 	}
5587 
5588 	return error;
5589 }
5590 
5591 
5592 /* ------------------------------------------------------------------------ */
5593 /* Function:    nat_iterator                                                */
5594 /* Returns:     int - 0 == ok, else error                                   */
5595 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5596 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5597 /*                                                                          */
5598 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5599 /* generic structure to iterate through a list.  There are three different  */
5600 /* linked lists of NAT related information to go through: NAT rules, active */
5601 /* NAT mappings and the NAT fragment cache.                                 */
5602 /* ------------------------------------------------------------------------ */
5603 static int nat_iterator(token, itp, ifs)
5604 ipftoken_t *token;
5605 ipfgeniter_t *itp;
5606 ipf_stack_t *ifs;
5607 {
5608 	int error;
5609 
5610 	if (itp->igi_data == NULL)
5611 		return EFAULT;
5612 
5613 	token->ipt_subtype = itp->igi_type;
5614 
5615 	switch (itp->igi_type)
5616 	{
5617 	case IPFGENITER_HOSTMAP :
5618 	case IPFGENITER_IPNAT :
5619 	case IPFGENITER_NAT :
5620 		error = nat_getnext(token, itp, ifs);
5621 		break;
5622 	case IPFGENITER_NATFRAG :
5623 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5624 				    &ifs->ifs_ipfr_nattail,
5625 				    &ifs->ifs_ipf_natfrag, ifs);
5626 		break;
5627 	default :
5628 		error = EINVAL;
5629 		break;
5630 	}
5631 
5632 	return error;
5633 }
5634 
5635 
5636 /* -------------------------------------------------------------------- */
5637 /* Function:	nat_earlydrop						*/
5638 /* Returns:	number of dropped/removed entries from the queue	*/
5639 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5640 /*		maxidle - entry must be idle this long to be dropped	*/
5641 /*		ifs - ipf stack instance				*/
5642 /*									*/
5643 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5644 /* form specified timeout queue, based on how long they've sat idle,	*/
5645 /* without waiting for it to happen on its own.				*/
5646 /* -------------------------------------------------------------------- */
5647 static int nat_earlydrop(ifq, maxidle, ifs)
5648 ipftq_t *ifq;
5649 int maxidle;
5650 ipf_stack_t *ifs;
5651 {
5652 	ipftqent_t *tqe, *tqn;
5653 	nat_t *nat;
5654 	unsigned int dropped;
5655 	int droptick;
5656 
5657 	if (ifq == NULL)
5658 		return (0);
5659 
5660 	dropped = 0;
5661 
5662 	/*
5663 	 * Determine the tick representing the idle time we're interested
5664 	 * in.  If an entry exists in the queue, and it was touched before
5665 	 * that tick, then it's been idle longer than maxidle ... remove it.
5666 	 */
5667 	droptick = ifs->ifs_fr_ticks - maxidle;
5668 	tqn = ifq->ifq_head;
5669 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5670 		tqn = tqe->tqe_next;
5671 		nat = tqe->tqe_parent;
5672 		nat_delete(nat, ISL_EXPIRE, ifs);
5673 		dropped++;
5674 	}
5675 	return (dropped);
5676 }
5677 
5678 
5679 /* --------------------------------------------------------------------- */
5680 /* Function:	nat_flushclosing					 */
5681 /* Returns:	int - number of NAT entries deleted			 */
5682 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5683 /*		ifs - ipf stack instance				 */
5684 /*									 */
5685 /* Remove nat table entries for TCP connections which are in the process */
5686 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5687 /* --------------------------------------------------------------------- */
5688 static int nat_flushclosing(stateval, ifs)
5689 int stateval;
5690 ipf_stack_t *ifs;
5691 {
5692 	ipftq_t *ifq, *ifqn;
5693 	ipftqent_t *tqe, *tqn;
5694 	nat_t *nat;
5695 	int dropped;
5696 
5697 	dropped = 0;
5698 
5699 	/*
5700 	 * Start by deleting any entries in specific timeout queues.
5701 	 */
5702 	ifqn = &ifs->ifs_nat_tqb[stateval];
5703 	while ((ifq = ifqn) != NULL) {
5704 		ifqn = ifq->ifq_next;
5705 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5706 	}
5707 
5708 	/*
5709 	 * Next, look through user defined queues for closing entries.
5710 	 */
5711 	ifqn = ifs->ifs_nat_utqe;
5712 	while ((ifq = ifqn) != NULL) {
5713 		ifqn = ifq->ifq_next;
5714 		tqn = ifq->ifq_head;
5715 		while ((tqe = tqn) != NULL) {
5716 			tqn = tqe->tqe_next;
5717 			nat = tqe->tqe_parent;
5718 			if (nat->nat_p != IPPROTO_TCP)
5719 				continue;
5720 			if ((nat->nat_tcpstate[0] >= stateval) &&
5721 			    (nat->nat_tcpstate[1] >= stateval)) {
5722 				nat_delete(nat, NL_EXPIRE, ifs);
5723 				dropped++;
5724 			}
5725 		}
5726 	}
5727 	return (dropped);
5728 }
5729 
5730 
5731 /* --------------------------------------------------------------------- */
5732 /* Function:	nat_extraflush						 */
5733 /* Returns:	int - number of NAT entries deleted			 */
5734 /* Parameters:	which(I) - how to flush the active NAT table		 */
5735 /*		ifs - ipf stack instance				 */
5736 /* Write Locks:	ipf_nat							 */
5737 /*									 */
5738 /* Flush nat tables.  Three actions currently defined:			 */
5739 /*									 */
5740 /* which == 0 :	Flush all nat table entries.				 */
5741 /*									 */
5742 /* which == 1 :	Flush entries with TCP connections which have started	 */
5743 /*		to close on both ends.					 */
5744 /*									 */
5745 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5746 /*		does not take us below specified threshold in the table, */
5747 /*		we want to flush entries with TCP connections which have */
5748 /*		been idle for a long time.  Start with connections idle	 */
5749 /*		over 12 hours,  and then work backwards in half hour	 */
5750 /*		increments to at most 30 minutes idle, and finally work	 */
5751 /*		back in 30 second increments to at most 30 seconds.	 */
5752 /* --------------------------------------------------------------------- */
5753 static int nat_extraflush(which, ifs)
5754 int which;
5755 ipf_stack_t *ifs;
5756 {
5757 	ipftq_t *ifq, *ifqn;
5758 	nat_t *nat, **natp;
5759 	int idletime, removed, idle_idx;
5760 	SPL_INT(s);
5761 
5762 	removed = 0;
5763 
5764 	SPL_NET(s);
5765 	switch (which)
5766 	{
5767 	case 0:
5768 		natp = &ifs->ifs_nat_instances;
5769 		while ((nat = *natp) != NULL) {
5770 			natp = &nat->nat_next;
5771 			nat_delete(nat, ISL_FLUSH, ifs);
5772 			removed++;
5773 		}
5774 		break;
5775 
5776 	case 1:
5777 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5778 		break;
5779 
5780 	case 2:
5781 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5782 
5783 		/*
5784 		 * Be sure we haven't done this in the last 10 seconds.
5785 		 */
5786 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5787 		    IPF_TTLVAL(10))
5788 			break;
5789 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5790 
5791 		/*
5792 		 * Determine initial threshold for minimum idle time based on
5793 		 * how long ipfilter has been running.  Ipfilter needs to have
5794 		 * been up as long as the smallest interval to continue on.
5795 		 *
5796 		 * Minimum idle times stored in idletime_tab and indexed by
5797 		 * idle_idx.  Start at upper end of array and work backwards.
5798 		 *
5799 		 * Once the index is found, set the initial idle time to the
5800 		 * first interval before the current ipfilter run time.
5801 		 */
5802 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5803 			break;  /* switch */
5804 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5805 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5806 			idletime = idletime_tab[idle_idx];
5807 		} else {
5808 			while ((idle_idx > 0) &&
5809 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5810 				idle_idx--;
5811 			idletime = (ifs->ifs_fr_ticks /
5812 				    idletime_tab[idle_idx]) *
5813 				    idletime_tab[idle_idx];
5814 		}
5815 
5816 		while ((idle_idx >= 0) &&
5817 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5818 			/*
5819 			 * Start with appropriate timeout queue.
5820 			 */
5821 			removed += nat_earlydrop(
5822 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5823 					idletime, ifs);
5824 
5825 			/*
5826 			 * Make sure we haven't already deleted enough
5827 			 * entries before checking the user defined queues.
5828 			 */
5829 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5830 			    ifs->ifs_nat_flush_lvl_lo)
5831 				break;
5832 
5833 			/*
5834 			 * Next, look through the user defined queues.
5835 			 */
5836 			ifqn = ifs->ifs_nat_utqe;
5837 			while ((ifq = ifqn) != NULL) {
5838 				ifqn = ifq->ifq_next;
5839 				removed += nat_earlydrop(ifq, idletime, ifs);
5840 			}
5841 
5842 			/*
5843 			 * Adjust the granularity of idle time.
5844 			 *
5845 			 * If we reach an interval boundary, we need to
5846 			 * either adjust the idle time accordingly or exit
5847 			 * the loop altogether (if this is very last check).
5848 			 */
5849 			idletime -= idletime_tab[idle_idx];
5850 			if (idletime < idletime_tab[idle_idx]) {
5851 				if (idle_idx != 0) {
5852 					idletime = idletime_tab[idle_idx] -
5853 					    idletime_tab[idle_idx - 1];
5854 					idle_idx--;
5855 				} else {
5856 					break;  /* while */
5857 				}
5858 			}
5859 		}
5860 		break;
5861 	default:
5862 		break;
5863 	}
5864 
5865 	SPL_X(s);
5866 	return (removed);
5867 }
5868