1 /* 2 * Copyright (C) 1995-2004 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI"$ 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/time.h> 22 #include <sys/file.h> 23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 24 defined(_KERNEL) 25 # include "opt_ipfilter_log.h" 26 #endif 27 #if !defined(_KERNEL) 28 # include <stdio.h> 29 # include <string.h> 30 # include <stdlib.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 #else 42 # include <sys/ioctl.h> 43 #endif 44 #if !defined(AIX) 45 # include <sys/fcntl.h> 46 #endif 47 #if !defined(linux) 48 # include <sys/protosw.h> 49 #endif 50 #include <sys/socket.h> 51 #if defined(_KERNEL) 52 # include <sys/systm.h> 53 # if !defined(__SVR4) && !defined(__svr4__) 54 # include <sys/mbuf.h> 55 # endif 56 #endif 57 #if defined(__SVR4) || defined(__svr4__) 58 # include <sys/filio.h> 59 # include <sys/byteorder.h> 60 # ifdef _KERNEL 61 # include <sys/dditypes.h> 62 # endif 63 # include <sys/stream.h> 64 # include <sys/kmem.h> 65 #endif 66 #if __FreeBSD_version >= 300000 67 # include <sys/queue.h> 68 #endif 69 #include <net/if.h> 70 #if __FreeBSD_version >= 300000 71 # include <net/if_var.h> 72 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 73 # include "opt_ipfilter.h" 74 # endif 75 #endif 76 #ifdef sun 77 # include <net/af.h> 78 #endif 79 #include <net/route.h> 80 #include <netinet/in.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 84 #ifdef RFC1825 85 # include <vpn/md5.h> 86 # include <vpn/ipsec.h> 87 extern struct ifnet vpnif; 88 #endif 89 90 #if !defined(linux) 91 # include <netinet/ip_var.h> 92 #endif 93 #include <netinet/tcp.h> 94 #include <netinet/udp.h> 95 #include <netinet/ip_icmp.h> 96 #include "netinet/ip_compat.h" 97 #include <netinet/tcpip.h> 98 #include "netinet/ip_fil.h" 99 #include "netinet/ip_nat.h" 100 #include "netinet/ip_frag.h" 101 #include "netinet/ip_state.h" 102 #include "netinet/ip_proxy.h" 103 #include "netinet/ipf_stack.h" 104 #ifdef IPFILTER_SYNC 105 #include "netinet/ip_sync.h" 106 #endif 107 #if (__FreeBSD_version >= 300000) 108 # include <sys/malloc.h> 109 #endif 110 /* END OF INCLUDES */ 111 112 #undef SOCKADDR_IN 113 #define SOCKADDR_IN struct sockaddr_in 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 118 #endif 119 120 121 /* ======================================================================== */ 122 /* How the NAT is organised and works. */ 123 /* */ 124 /* Inside (interface y) NAT Outside (interface x) */ 125 /* -------------------- -+- ------------------------------------- */ 126 /* Packet going | out, processsed by fr_checknatout() for x */ 127 /* ------------> | ------------> */ 128 /* src=10.1.1.1 | src=192.1.1.1 */ 129 /* | */ 130 /* | in, processed by fr_checknatin() for x */ 131 /* <------------ | <------------ */ 132 /* dst=10.1.1.1 | dst=192.1.1.1 */ 133 /* -------------------- -+- ------------------------------------- */ 134 /* fr_checknatout() - changes ip_src and if required, sport */ 135 /* - creates a new mapping, if required. */ 136 /* fr_checknatin() - changes ip_dst and if required, dport */ 137 /* */ 138 /* In the NAT table, internal source is recorded as "in" and externally */ 139 /* seen as "out". */ 140 /* ======================================================================== */ 141 142 143 static int nat_flushtable __P((ipf_stack_t *)); 144 static int nat_clearlist __P((ipf_stack_t *)); 145 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 146 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 147 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 148 static void nat_delrdr __P((struct ipnat *)); 149 static void nat_delnat __P((struct ipnat *)); 150 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 151 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 152 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 153 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 154 static int nat_match __P((fr_info_t *, ipnat_t *)); 155 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 156 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 157 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 158 struct in_addr, struct in_addr, u_32_t, 159 ipf_stack_t *)); 160 static INLINE int nat_icmpquerytype4 __P((int)); 161 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, 162 ipf_stack_t *)); 163 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, 164 ipf_stack_t *)); 165 static INLINE int nat_icmperrortype4 __P((int)); 166 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 167 tcphdr_t *, nat_t **, int)); 168 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 169 static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); 170 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 171 static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); 172 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 173 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 174 static int nat_extraflush __P((int, ipf_stack_t *)); 175 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 176 static int nat_flushclosing __P((int, ipf_stack_t *)); 177 178 179 /* 180 * Below we declare a list of constants used only in the nat_extraflush() 181 * routine. We are placing it here, instead of in nat_extraflush() itself, 182 * because we want to make it visible to tools such as mdb, nm etc., so the 183 * values can easily be altered during debugging. 184 */ 185 static const int idletime_tab[] = { 186 IPF_TTLVAL(30), /* 30 seconds */ 187 IPF_TTLVAL(1800), /* 30 minutes */ 188 IPF_TTLVAL(43200), /* 12 hours */ 189 IPF_TTLVAL(345600), /* 4 days */ 190 }; 191 192 #define NAT_HAS_L4_CHANGED(n) \ 193 (((n)->nat_flags & (IPN_TCPUDPICMP)) && \ 194 (n)->nat_inport != (n)->nat_outport) 195 196 /* ------------------------------------------------------------------------ */ 197 /* Function: fr_natinit */ 198 /* Returns: int - 0 == success, -1 == failure */ 199 /* Parameters: Nil */ 200 /* */ 201 /* Initialise all of the NAT locks, tables and other structures. */ 202 /* ------------------------------------------------------------------------ */ 203 int fr_natinit(ifs) 204 ipf_stack_t *ifs; 205 { 206 int i; 207 208 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 209 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 210 if (ifs->ifs_nat_table[0] != NULL) 211 bzero((char *)ifs->ifs_nat_table[0], 212 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 213 else 214 return -1; 215 216 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 217 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 218 if (ifs->ifs_nat_table[1] != NULL) 219 bzero((char *)ifs->ifs_nat_table[1], 220 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 221 else 222 return -2; 223 224 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 225 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 226 if (ifs->ifs_nat_rules != NULL) 227 bzero((char *)ifs->ifs_nat_rules, 228 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 229 else 230 return -3; 231 232 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 233 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 234 if (ifs->ifs_rdr_rules != NULL) 235 bzero((char *)ifs->ifs_rdr_rules, 236 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 237 else 238 return -4; 239 240 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 241 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 242 if (ifs->ifs_maptable != NULL) 243 bzero((char *)ifs->ifs_maptable, 244 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 245 else 246 return -5; 247 248 ifs->ifs_ipf_hm_maplist = NULL; 249 250 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 251 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 252 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 253 return -1; 254 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 255 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 256 257 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 258 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 259 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 260 return -1; 261 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 262 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 263 264 if (ifs->ifs_fr_nat_maxbucket == 0) { 265 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 266 ifs->ifs_fr_nat_maxbucket++; 267 ifs->ifs_fr_nat_maxbucket *= 2; 268 } 269 270 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 271 /* 272 * Increase this because we may have "keep state" following this too 273 * and packet storms can occur if this is removed too quickly. 274 */ 275 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 276 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 277 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 278 ifs->ifs_nat_udptq.ifq_ref = 1; 279 ifs->ifs_nat_udptq.ifq_head = NULL; 280 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 281 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 282 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 283 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 284 ifs->ifs_nat_icmptq.ifq_ref = 1; 285 ifs->ifs_nat_icmptq.ifq_head = NULL; 286 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 287 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 288 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 289 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 290 ifs->ifs_nat_iptq.ifq_ref = 1; 291 ifs->ifs_nat_iptq.ifq_head = NULL; 292 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 293 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 294 ifs->ifs_nat_iptq.ifq_next = NULL; 295 296 for (i = 0; i < IPF_TCP_NSTATES; i++) { 297 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 298 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 299 #ifdef LARGE_NAT 300 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 301 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 302 #endif 303 } 304 305 /* 306 * Increase this because we may have "keep state" following 307 * this too and packet storms can occur if this is removed 308 * too quickly. 309 */ 310 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 311 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 312 313 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 314 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 315 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 316 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 317 318 ifs->ifs_fr_nat_init = 1; 319 320 return 0; 321 } 322 323 324 /* ------------------------------------------------------------------------ */ 325 /* Function: nat_addrdr */ 326 /* Returns: Nil */ 327 /* Parameters: n(I) - pointer to NAT rule to add */ 328 /* */ 329 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 330 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 331 /* use by redirect rules. */ 332 /* ------------------------------------------------------------------------ */ 333 static void nat_addrdr(n, ifs) 334 ipnat_t *n; 335 ipf_stack_t *ifs; 336 { 337 ipnat_t **np; 338 u_32_t j; 339 u_int hv; 340 int k; 341 342 k = count4bits(n->in_outmsk); 343 if ((k >= 0) && (k != 32)) 344 ifs->ifs_rdr_masks |= 1 << k; 345 j = (n->in_outip & n->in_outmsk); 346 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 347 np = ifs->ifs_rdr_rules + hv; 348 while (*np != NULL) 349 np = &(*np)->in_rnext; 350 n->in_rnext = NULL; 351 n->in_prnext = np; 352 n->in_hv = hv; 353 *np = n; 354 } 355 356 357 /* ------------------------------------------------------------------------ */ 358 /* Function: nat_addnat */ 359 /* Returns: Nil */ 360 /* Parameters: n(I) - pointer to NAT rule to add */ 361 /* */ 362 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 363 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 364 /* redirect rules. */ 365 /* ------------------------------------------------------------------------ */ 366 static void nat_addnat(n, ifs) 367 ipnat_t *n; 368 ipf_stack_t *ifs; 369 { 370 ipnat_t **np; 371 u_32_t j; 372 u_int hv; 373 int k; 374 375 k = count4bits(n->in_inmsk); 376 if ((k >= 0) && (k != 32)) 377 ifs->ifs_nat_masks |= 1 << k; 378 j = (n->in_inip & n->in_inmsk); 379 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 380 np = ifs->ifs_nat_rules + hv; 381 while (*np != NULL) 382 np = &(*np)->in_mnext; 383 n->in_mnext = NULL; 384 n->in_pmnext = np; 385 n->in_hv = hv; 386 *np = n; 387 } 388 389 390 /* ------------------------------------------------------------------------ */ 391 /* Function: nat_delrdr */ 392 /* Returns: Nil */ 393 /* Parameters: n(I) - pointer to NAT rule to delete */ 394 /* */ 395 /* Removes a redirect rule from the hash table of redirect rules. */ 396 /* ------------------------------------------------------------------------ */ 397 static void nat_delrdr(n) 398 ipnat_t *n; 399 { 400 if (n->in_rnext) 401 n->in_rnext->in_prnext = n->in_prnext; 402 *n->in_prnext = n->in_rnext; 403 } 404 405 406 /* ------------------------------------------------------------------------ */ 407 /* Function: nat_delnat */ 408 /* Returns: Nil */ 409 /* Parameters: n(I) - pointer to NAT rule to delete */ 410 /* */ 411 /* Removes a NAT map rule from the hash table of NAT map rules. */ 412 /* ------------------------------------------------------------------------ */ 413 static void nat_delnat(n) 414 ipnat_t *n; 415 { 416 if (n->in_mnext != NULL) 417 n->in_mnext->in_pmnext = n->in_pmnext; 418 *n->in_pmnext = n->in_mnext; 419 } 420 421 422 /* ------------------------------------------------------------------------ */ 423 /* Function: nat_hostmap */ 424 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 425 /* else a pointer to the hostmapping to use */ 426 /* Parameters: np(I) - pointer to NAT rule */ 427 /* real(I) - real IP address */ 428 /* map(I) - mapped IP address */ 429 /* port(I) - destination port number */ 430 /* Write Locks: ipf_nat */ 431 /* */ 432 /* Check if an ip address has already been allocated for a given mapping */ 433 /* that is not doing port based translation. If is not yet allocated, then */ 434 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 435 /* ------------------------------------------------------------------------ */ 436 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 437 ipnat_t *np; 438 struct in_addr src; 439 struct in_addr dst; 440 struct in_addr map; 441 u_32_t port; 442 ipf_stack_t *ifs; 443 { 444 hostmap_t *hm; 445 u_int hv; 446 447 hv = (src.s_addr ^ dst.s_addr); 448 hv += src.s_addr; 449 hv += dst.s_addr; 450 hv %= HOSTMAP_SIZE; 451 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 452 if ((hm->hm_srcip.s_addr == src.s_addr) && 453 (hm->hm_dstip.s_addr == dst.s_addr) && 454 ((np == NULL) || (np == hm->hm_ipnat)) && 455 ((port == 0) || (port == hm->hm_port))) { 456 hm->hm_ref++; 457 return hm; 458 } 459 460 if (np == NULL) 461 return NULL; 462 463 KMALLOC(hm, hostmap_t *); 464 if (hm) { 465 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 466 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 467 if (ifs->ifs_ipf_hm_maplist != NULL) 468 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 469 ifs->ifs_ipf_hm_maplist = hm; 470 471 hm->hm_next = ifs->ifs_maptable[hv]; 472 hm->hm_pnext = ifs->ifs_maptable + hv; 473 if (ifs->ifs_maptable[hv] != NULL) 474 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 475 ifs->ifs_maptable[hv] = hm; 476 hm->hm_ipnat = np; 477 hm->hm_srcip = src; 478 hm->hm_dstip = dst; 479 hm->hm_mapip = map; 480 hm->hm_ref = 1; 481 hm->hm_port = port; 482 } 483 return hm; 484 } 485 486 487 /* ------------------------------------------------------------------------ */ 488 /* Function: fr_hostmapdel */ 489 /* Returns: Nil */ 490 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 491 /* Write Locks: ipf_nat */ 492 /* */ 493 /* Decrement the references to this hostmap structure by one. If this */ 494 /* reaches zero then remove it and free it. */ 495 /* ------------------------------------------------------------------------ */ 496 void fr_hostmapdel(hmp) 497 struct hostmap **hmp; 498 { 499 struct hostmap *hm; 500 501 hm = *hmp; 502 *hmp = NULL; 503 504 hm->hm_ref--; 505 if (hm->hm_ref == 0) { 506 if (hm->hm_next) 507 hm->hm_next->hm_pnext = hm->hm_pnext; 508 *hm->hm_pnext = hm->hm_next; 509 if (hm->hm_hnext) 510 hm->hm_hnext->hm_phnext = hm->hm_phnext; 511 *hm->hm_phnext = hm->hm_hnext; 512 KFREE(hm); 513 } 514 } 515 516 517 /* ------------------------------------------------------------------------ */ 518 /* Function: fix_outcksum */ 519 /* Returns: Nil */ 520 /* Parameters: sp(I) - location of 16bit checksum to update */ 521 /* n((I) - amount to adjust checksum by */ 522 /* */ 523 /* Adjusts the 16bit checksum by "n" for packets going out. */ 524 /* ------------------------------------------------------------------------ */ 525 void fix_outcksum(sp, n) 526 u_short *sp; 527 u_32_t n; 528 { 529 u_short sumshort; 530 u_32_t sum1; 531 532 if (n == 0) 533 return; 534 535 sum1 = (~ntohs(*sp)) & 0xffff; 536 sum1 += (n); 537 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 538 /* Again */ 539 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 540 sumshort = ~(u_short)sum1; 541 *(sp) = htons(sumshort); 542 } 543 544 545 /* ------------------------------------------------------------------------ */ 546 /* Function: fix_incksum */ 547 /* Returns: Nil */ 548 /* Parameters: sp(I) - location of 16bit checksum to update */ 549 /* n((I) - amount to adjust checksum by */ 550 /* */ 551 /* Adjusts the 16bit checksum by "n" for packets going in. */ 552 /* ------------------------------------------------------------------------ */ 553 void fix_incksum(sp, n) 554 u_short *sp; 555 u_32_t n; 556 { 557 u_short sumshort; 558 u_32_t sum1; 559 560 if (n == 0) 561 return; 562 563 sum1 = (~ntohs(*sp)) & 0xffff; 564 sum1 += ~(n) & 0xffff; 565 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 566 /* Again */ 567 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 568 sumshort = ~(u_short)sum1; 569 *(sp) = htons(sumshort); 570 } 571 572 573 /* ------------------------------------------------------------------------ */ 574 /* Function: fix_datacksum */ 575 /* Returns: Nil */ 576 /* Parameters: sp(I) - location of 16bit checksum to update */ 577 /* n((I) - amount to adjust checksum by */ 578 /* */ 579 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 580 /* data section of an IP packet. */ 581 /* */ 582 /* The only situation in which you need to do this is when NAT'ing an */ 583 /* ICMP error message. Such a message, contains in its body the IP header */ 584 /* of the original IP packet, that causes the error. */ 585 /* */ 586 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 587 /* kernel the data section of the ICMP error is just data, and no special */ 588 /* processing like hardware cksum or ntohs processing have been done by the */ 589 /* kernel on the data section. */ 590 /* ------------------------------------------------------------------------ */ 591 void fix_datacksum(sp, n) 592 u_short *sp; 593 u_32_t n; 594 { 595 u_short sumshort; 596 u_32_t sum1; 597 598 if (n == 0) 599 return; 600 601 sum1 = (~ntohs(*sp)) & 0xffff; 602 sum1 += (n); 603 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 604 /* Again */ 605 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 606 sumshort = ~(u_short)sum1; 607 *(sp) = htons(sumshort); 608 } 609 610 611 /* ------------------------------------------------------------------------ */ 612 /* Function: fr_nat_ioctl */ 613 /* Returns: int - 0 == success, != 0 == failure */ 614 /* Parameters: data(I) - pointer to ioctl data */ 615 /* cmd(I) - ioctl command integer */ 616 /* mode(I) - file mode bits used with open */ 617 /* */ 618 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 619 /* ------------------------------------------------------------------------ */ 620 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 621 ioctlcmd_t cmd; 622 caddr_t data; 623 int mode, uid; 624 void *ctx; 625 ipf_stack_t *ifs; 626 { 627 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 628 int error = 0, ret, arg, getlock; 629 ipnat_t natd; 630 631 #if (BSD >= 199306) && defined(_KERNEL) 632 if ((securelevel >= 2) && (mode & FWRITE)) 633 return EPERM; 634 #endif 635 636 #if defined(__osf__) && defined(_KERNEL) 637 getlock = 0; 638 #else 639 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 640 #endif 641 642 nat = NULL; /* XXX gcc -Wuninitialized */ 643 if (cmd == (ioctlcmd_t)SIOCADNAT) { 644 KMALLOC(nt, ipnat_t *); 645 } else { 646 nt = NULL; 647 } 648 649 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 650 if (mode & NAT_SYSSPACE) { 651 bcopy(data, (char *)&natd, sizeof(natd)); 652 error = 0; 653 } else { 654 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 655 } 656 657 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 658 BCOPYIN(data, &arg, sizeof(arg)); 659 } 660 661 if (error != 0) 662 goto done; 663 664 /* 665 * For add/delete, look to see if the NAT entry is already present 666 */ 667 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 668 nat = &natd; 669 if (nat->in_v == 0) /* For backward compat. */ 670 nat->in_v = 4; 671 nat->in_flags &= IPN_USERFLAGS; 672 if ((nat->in_redir & NAT_MAPBLK) == 0) { 673 if ((nat->in_flags & IPN_SPLIT) == 0) 674 nat->in_inip &= nat->in_inmsk; 675 if ((nat->in_flags & IPN_IPRANGE) == 0) 676 nat->in_outip &= nat->in_outmsk; 677 } 678 MUTEX_ENTER(&ifs->ifs_ipf_natio); 679 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 680 np = &n->in_next) 681 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 682 IPN_CMPSIZ) == 0) { 683 if (nat->in_redir == NAT_REDIRECT && 684 nat->in_pnext != n->in_pnext) 685 continue; 686 break; 687 } 688 } 689 690 switch (cmd) 691 { 692 case SIOCGENITER : 693 { 694 ipfgeniter_t iter; 695 ipftoken_t *token; 696 697 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 698 if (error != 0) 699 break; 700 701 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 702 if (token != NULL) 703 error = nat_iterator(token, &iter, ifs); 704 else 705 error = ESRCH; 706 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 707 break; 708 } 709 #ifdef IPFILTER_LOG 710 case SIOCIPFFB : 711 { 712 int tmp; 713 714 if (!(mode & FWRITE)) 715 error = EPERM; 716 else { 717 tmp = ipflog_clear(IPL_LOGNAT, ifs); 718 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 719 } 720 break; 721 } 722 case SIOCSETLG : 723 if (!(mode & FWRITE)) 724 error = EPERM; 725 else { 726 BCOPYIN((char *)data, 727 (char *)&ifs->ifs_nat_logging, 728 sizeof(ifs->ifs_nat_logging)); 729 } 730 break; 731 case SIOCGETLG : 732 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 733 sizeof(ifs->ifs_nat_logging)); 734 break; 735 case FIONREAD : 736 arg = ifs->ifs_iplused[IPL_LOGNAT]; 737 BCOPYOUT(&arg, data, sizeof(arg)); 738 break; 739 #endif 740 case SIOCADNAT : 741 if (!(mode & FWRITE)) { 742 error = EPERM; 743 } else if (n != NULL) { 744 error = EEXIST; 745 } else if (nt == NULL) { 746 error = ENOMEM; 747 } 748 if (error != 0) { 749 MUTEX_EXIT(&ifs->ifs_ipf_natio); 750 break; 751 } 752 bcopy((char *)nat, (char *)nt, sizeof(*n)); 753 error = nat_siocaddnat(nt, np, getlock, ifs); 754 MUTEX_EXIT(&ifs->ifs_ipf_natio); 755 if (error == 0) 756 nt = NULL; 757 break; 758 case SIOCRMNAT : 759 if (!(mode & FWRITE)) { 760 error = EPERM; 761 n = NULL; 762 } else if (n == NULL) { 763 error = ESRCH; 764 } 765 766 if (error != 0) { 767 MUTEX_EXIT(&ifs->ifs_ipf_natio); 768 break; 769 } 770 nat_siocdelnat(n, np, getlock, ifs); 771 772 MUTEX_EXIT(&ifs->ifs_ipf_natio); 773 n = NULL; 774 break; 775 case SIOCGNATS : 776 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 777 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 778 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 779 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 780 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 781 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 782 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 783 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 784 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 785 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 786 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 787 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 788 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 789 break; 790 case SIOCGNATL : 791 { 792 natlookup_t nl; 793 794 if (getlock) { 795 READ_ENTER(&ifs->ifs_ipf_nat); 796 } 797 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 798 if (error == 0) { 799 if (nat_lookupredir(&nl, ifs) != NULL) { 800 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 801 } else { 802 error = ESRCH; 803 } 804 } 805 if (getlock) { 806 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 807 } 808 break; 809 } 810 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 811 if (!(mode & FWRITE)) { 812 error = EPERM; 813 break; 814 } 815 if (getlock) { 816 WRITE_ENTER(&ifs->ifs_ipf_nat); 817 } 818 error = 0; 819 if (arg == 0) 820 ret = nat_flushtable(ifs); 821 else if (arg == 1) 822 ret = nat_clearlist(ifs); 823 else if (arg >= 2 && arg <= 4) 824 ret = nat_extraflush(arg - 2, ifs); 825 else 826 error = EINVAL; 827 if (getlock) { 828 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 829 } 830 if (error == 0) { 831 BCOPYOUT(&ret, data, sizeof(ret)); 832 } 833 break; 834 case SIOCPROXY : 835 error = appr_ioctl(data, cmd, mode, ifs); 836 break; 837 case SIOCSTLCK : 838 if (!(mode & FWRITE)) { 839 error = EPERM; 840 } else { 841 fr_lock(data, &ifs->ifs_fr_nat_lock); 842 } 843 break; 844 case SIOCSTPUT : 845 if ((mode & FWRITE) != 0) { 846 error = fr_natputent(data, getlock, ifs); 847 } else { 848 error = EACCES; 849 } 850 break; 851 case SIOCSTGSZ : 852 if (ifs->ifs_fr_nat_lock) { 853 if (getlock) { 854 READ_ENTER(&ifs->ifs_ipf_nat); 855 } 856 error = fr_natgetsz(data, ifs); 857 if (getlock) { 858 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 859 } 860 } else 861 error = EACCES; 862 break; 863 case SIOCSTGET : 864 if (ifs->ifs_fr_nat_lock) { 865 if (getlock) { 866 READ_ENTER(&ifs->ifs_ipf_nat); 867 } 868 error = fr_natgetent(data, ifs); 869 if (getlock) { 870 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 871 } 872 } else 873 error = EACCES; 874 break; 875 case SIOCIPFDELTOK : 876 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 877 error = ipf_deltoken(arg, uid, ctx, ifs); 878 break; 879 default : 880 error = EINVAL; 881 break; 882 } 883 done: 884 if (nt) 885 KFREE(nt); 886 return error; 887 } 888 889 890 /* ------------------------------------------------------------------------ */ 891 /* Function: nat_siocaddnat */ 892 /* Returns: int - 0 == success, != 0 == failure */ 893 /* Parameters: n(I) - pointer to new NAT rule */ 894 /* np(I) - pointer to where to insert new NAT rule */ 895 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 896 /* Mutex Locks: ipf_natio */ 897 /* */ 898 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 899 /* from information passed to the kernel, then add it to the appropriate */ 900 /* NAT rule table(s). */ 901 /* ------------------------------------------------------------------------ */ 902 static int nat_siocaddnat(n, np, getlock, ifs) 903 ipnat_t *n, **np; 904 int getlock; 905 ipf_stack_t *ifs; 906 { 907 int error = 0, i, j; 908 909 if (nat_resolverule(n, ifs) != 0) 910 return ENOENT; 911 912 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 913 return EINVAL; 914 915 n->in_use = 0; 916 if (n->in_redir & NAT_MAPBLK) 917 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 918 else if (n->in_flags & IPN_AUTOPORTMAP) 919 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 920 else if (n->in_flags & IPN_IPRANGE) 921 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 922 else if (n->in_flags & IPN_SPLIT) 923 n->in_space = 2; 924 else if (n->in_outmsk != 0) 925 n->in_space = ~ntohl(n->in_outmsk); 926 else 927 n->in_space = 1; 928 929 /* 930 * Calculate the number of valid IP addresses in the output 931 * mapping range. In all cases, the range is inclusive of 932 * the start and ending IP addresses. 933 * If to a CIDR address, lose 2: broadcast + network address 934 * (so subtract 1) 935 * If to a range, add one. 936 * If to a single IP address, set to 1. 937 */ 938 if (n->in_space) { 939 if ((n->in_flags & IPN_IPRANGE) != 0) 940 n->in_space += 1; 941 else 942 n->in_space -= 1; 943 } else 944 n->in_space = 1; 945 946 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 947 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 948 n->in_nip = ntohl(n->in_outip) + 1; 949 else if ((n->in_flags & IPN_SPLIT) && 950 (n->in_redir & NAT_REDIRECT)) 951 n->in_nip = ntohl(n->in_inip); 952 else 953 n->in_nip = ntohl(n->in_outip); 954 if (n->in_redir & NAT_MAP) { 955 n->in_pnext = ntohs(n->in_pmin); 956 /* 957 * Multiply by the number of ports made available. 958 */ 959 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 960 n->in_space *= (ntohs(n->in_pmax) - 961 ntohs(n->in_pmin) + 1); 962 /* 963 * Because two different sources can map to 964 * different destinations but use the same 965 * local IP#/port #. 966 * If the result is smaller than in_space, then 967 * we may have wrapped around 32bits. 968 */ 969 i = n->in_inmsk; 970 if ((i != 0) && (i != 0xffffffff)) { 971 j = n->in_space * (~ntohl(i) + 1); 972 if (j >= n->in_space) 973 n->in_space = j; 974 else 975 n->in_space = 0xffffffff; 976 } 977 } 978 /* 979 * If no protocol is specified, multiple by 256 to allow for 980 * at least one IP:IP mapping per protocol. 981 */ 982 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 983 j = n->in_space * 256; 984 if (j >= n->in_space) 985 n->in_space = j; 986 else 987 n->in_space = 0xffffffff; 988 } 989 } 990 991 /* Otherwise, these fields are preset */ 992 993 if (getlock) { 994 WRITE_ENTER(&ifs->ifs_ipf_nat); 995 } 996 n->in_next = NULL; 997 *np = n; 998 999 if (n->in_age[0] != 0) 1000 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1001 n->in_age[0], ifs); 1002 1003 if (n->in_age[1] != 0) 1004 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1005 n->in_age[1], ifs); 1006 1007 if (n->in_redir & NAT_REDIRECT) { 1008 n->in_flags &= ~IPN_NOTDST; 1009 nat_addrdr(n, ifs); 1010 } 1011 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1012 n->in_flags &= ~IPN_NOTSRC; 1013 nat_addnat(n, ifs); 1014 } 1015 n = NULL; 1016 ifs->ifs_nat_stats.ns_rules++; 1017 if (getlock) { 1018 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1019 } 1020 1021 return error; 1022 } 1023 1024 1025 /* ------------------------------------------------------------------------ */ 1026 /* Function: nat_resolvrule */ 1027 /* Returns: int - 0 == success, -1 == failure */ 1028 /* Parameters: n(I) - pointer to NAT rule */ 1029 /* */ 1030 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1031 /* any specified interfaces and proxy labels, and determines whether or not */ 1032 /* all proxy labels are correctly specified. */ 1033 /* */ 1034 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1035 /* ------------------------------------------------------------------------ */ 1036 static int nat_resolverule(n, ifs) 1037 ipnat_t *n; 1038 ipf_stack_t *ifs; 1039 { 1040 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1041 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs); 1042 1043 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1044 if (n->in_ifnames[1][0] == '\0') { 1045 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1046 n->in_ifps[1] = n->in_ifps[0]; 1047 } else { 1048 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs); 1049 } 1050 1051 if (n->in_plabel[0] != '\0') { 1052 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1053 if (n->in_apr == NULL) 1054 return -1; 1055 } 1056 return 0; 1057 } 1058 1059 1060 /* ------------------------------------------------------------------------ */ 1061 /* Function: nat_siocdelnat */ 1062 /* Returns: int - 0 == success, != 0 == failure */ 1063 /* Parameters: n(I) - pointer to new NAT rule */ 1064 /* np(I) - pointer to where to insert new NAT rule */ 1065 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1066 /* Mutex Locks: ipf_natio */ 1067 /* */ 1068 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1069 /* from information passed to the kernel, then add it to the appropriate */ 1070 /* NAT rule table(s). */ 1071 /* ------------------------------------------------------------------------ */ 1072 static void nat_siocdelnat(n, np, getlock, ifs) 1073 ipnat_t *n, **np; 1074 int getlock; 1075 ipf_stack_t *ifs; 1076 { 1077 if (getlock) { 1078 WRITE_ENTER(&ifs->ifs_ipf_nat); 1079 } 1080 if (n->in_redir & NAT_REDIRECT) 1081 nat_delrdr(n); 1082 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1083 nat_delnat(n); 1084 if (ifs->ifs_nat_list == NULL) { 1085 ifs->ifs_nat_masks = 0; 1086 ifs->ifs_rdr_masks = 0; 1087 } 1088 1089 if (n->in_tqehead[0] != NULL) { 1090 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1091 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1092 } 1093 } 1094 1095 if (n->in_tqehead[1] != NULL) { 1096 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1097 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1098 } 1099 } 1100 1101 *np = n->in_next; 1102 1103 if (n->in_use == 0) { 1104 if (n->in_apr) 1105 appr_free(n->in_apr); 1106 KFREE(n); 1107 ifs->ifs_nat_stats.ns_rules--; 1108 } else { 1109 n->in_flags |= IPN_DELETE; 1110 n->in_next = NULL; 1111 } 1112 if (getlock) { 1113 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1114 } 1115 } 1116 1117 1118 /* ------------------------------------------------------------------------ */ 1119 /* Function: fr_natgetsz */ 1120 /* Returns: int - 0 == success, != 0 is the error value. */ 1121 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1122 /* get the size of. */ 1123 /* */ 1124 /* Handle SIOCSTGSZ. */ 1125 /* Return the size of the nat list entry to be copied back to user space. */ 1126 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1127 /* structure is copied back to the user. */ 1128 /* ------------------------------------------------------------------------ */ 1129 static int fr_natgetsz(data, ifs) 1130 caddr_t data; 1131 ipf_stack_t *ifs; 1132 { 1133 ap_session_t *aps; 1134 nat_t *nat, *n; 1135 natget_t ng; 1136 1137 BCOPYIN(data, &ng, sizeof(ng)); 1138 1139 nat = ng.ng_ptr; 1140 if (!nat) { 1141 nat = ifs->ifs_nat_instances; 1142 ng.ng_sz = 0; 1143 /* 1144 * Empty list so the size returned is 0. Simple. 1145 */ 1146 if (nat == NULL) { 1147 BCOPYOUT(&ng, data, sizeof(ng)); 1148 return 0; 1149 } 1150 } else { 1151 /* 1152 * Make sure the pointer we're copying from exists in the 1153 * current list of entries. Security precaution to prevent 1154 * copying of random kernel data. 1155 */ 1156 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1157 if (n == nat) 1158 break; 1159 if (!n) 1160 return ESRCH; 1161 } 1162 1163 /* 1164 * Incluse any space required for proxy data structures. 1165 */ 1166 ng.ng_sz = sizeof(nat_save_t); 1167 aps = nat->nat_aps; 1168 if (aps != NULL) { 1169 ng.ng_sz += sizeof(ap_session_t) - 4; 1170 if (aps->aps_data != 0) 1171 ng.ng_sz += aps->aps_psiz; 1172 } 1173 1174 BCOPYOUT(&ng, data, sizeof(ng)); 1175 return 0; 1176 } 1177 1178 1179 /* ------------------------------------------------------------------------ */ 1180 /* Function: fr_natgetent */ 1181 /* Returns: int - 0 == success, != 0 is the error value. */ 1182 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1183 /* to NAT structure to copy out. */ 1184 /* */ 1185 /* Handle SIOCSTGET. */ 1186 /* Copies out NAT entry to user space. Any additional data held for a */ 1187 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1188 /* ------------------------------------------------------------------------ */ 1189 static int fr_natgetent(data, ifs) 1190 caddr_t data; 1191 ipf_stack_t *ifs; 1192 { 1193 int error, outsize; 1194 ap_session_t *aps; 1195 nat_save_t *ipn, ipns; 1196 nat_t *n, *nat; 1197 1198 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1199 if (error != 0) 1200 return error; 1201 1202 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1203 return EINVAL; 1204 1205 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1206 if (ipn == NULL) 1207 return ENOMEM; 1208 1209 ipn->ipn_dsize = ipns.ipn_dsize; 1210 nat = ipns.ipn_next; 1211 if (nat == NULL) { 1212 nat = ifs->ifs_nat_instances; 1213 if (nat == NULL) { 1214 if (ifs->ifs_nat_instances == NULL) 1215 error = ENOENT; 1216 goto finished; 1217 } 1218 } else { 1219 /* 1220 * Make sure the pointer we're copying from exists in the 1221 * current list of entries. Security precaution to prevent 1222 * copying of random kernel data. 1223 */ 1224 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1225 if (n == nat) 1226 break; 1227 if (n == NULL) { 1228 error = ESRCH; 1229 goto finished; 1230 } 1231 } 1232 ipn->ipn_next = nat->nat_next; 1233 1234 /* 1235 * Copy the NAT structure. 1236 */ 1237 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1238 1239 /* 1240 * If we have a pointer to the NAT rule it belongs to, save that too. 1241 */ 1242 if (nat->nat_ptr != NULL) 1243 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1244 sizeof(ipn->ipn_ipnat)); 1245 1246 /* 1247 * If we also know the NAT entry has an associated filter rule, 1248 * save that too. 1249 */ 1250 if (nat->nat_fr != NULL) 1251 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1252 sizeof(ipn->ipn_fr)); 1253 1254 /* 1255 * Last but not least, if there is an application proxy session set 1256 * up for this NAT entry, then copy that out too, including any 1257 * private data saved along side it by the proxy. 1258 */ 1259 aps = nat->nat_aps; 1260 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1261 if (aps != NULL) { 1262 char *s; 1263 1264 if (outsize < sizeof(*aps)) { 1265 error = ENOBUFS; 1266 goto finished; 1267 } 1268 1269 s = ipn->ipn_data; 1270 bcopy((char *)aps, s, sizeof(*aps)); 1271 s += sizeof(*aps); 1272 outsize -= sizeof(*aps); 1273 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1274 bcopy(aps->aps_data, s, aps->aps_psiz); 1275 else 1276 error = ENOBUFS; 1277 } 1278 if (error == 0) { 1279 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1280 } 1281 1282 finished: 1283 if (ipn != NULL) { 1284 KFREES(ipn, ipns.ipn_dsize); 1285 } 1286 return error; 1287 } 1288 1289 /* ------------------------------------------------------------------------ */ 1290 /* Function: nat_calc_chksum_diffs */ 1291 /* Returns: void */ 1292 /* Parameters: nat - pointer to NAT table entry */ 1293 /* */ 1294 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */ 1295 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */ 1296 /* we are dealing with partial chksum offload. For these cases we need to */ 1297 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */ 1298 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */ 1299 /* nat_sumd[0]. */ 1300 /* */ 1301 /* The function accepts initialized NAT table entry and computes the deltas */ 1302 /* from nat_inip/nat_outip members. The function is called right before */ 1303 /* the new entry is inserted into the table. */ 1304 /* */ 1305 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */ 1306 /* of delta between original and new IP addresses. */ 1307 /* */ 1308 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */ 1309 /* a chkusm of delta between original an new IP addrress:port tupples. */ 1310 /* */ 1311 /* Some facts about chksum, we should remember: */ 1312 /* IP header chksum covers IP header only */ 1313 /* */ 1314 /* TCP/UDP chksum covers data payload and so called pseudo header */ 1315 /* SRC, DST IP address */ 1316 /* SRC, DST Port */ 1317 /* length of payload */ 1318 /* */ 1319 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */ 1320 /* member of dblk_t structure. The db_ckusm16 member is not part of */ 1321 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */ 1322 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */ 1323 /* stored along with other IP packet data in dblk_t structure and used in */ 1324 /* for IP/UDP/TCP chksum validation later in ip.c. */ 1325 /* */ 1326 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */ 1327 /* of delta between new and orig address. NOTE: the order of operands for */ 1328 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */ 1329 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */ 1330 /* */ 1331 /* ------------------------------------------------------------------------ */ 1332 static void nat_calc_chksum_diffs(nat) 1333 nat_t *nat; 1334 { 1335 u_32_t sum_orig = 0; 1336 u_32_t sum_changed = 0; 1337 u_32_t sumd; 1338 u_32_t ipsum_orig = 0; 1339 u_32_t ipsum_changed = 0; 1340 1341 /* 1342 * the switch calculates operands for CALC_SUMD(), 1343 * which will compute the partial chksum delta. 1344 */ 1345 switch (nat->nat_dir) 1346 { 1347 case NAT_INBOUND: 1348 /* 1349 * we are dealing with RDR rule (DST address gets 1350 * modified on packet from client) 1351 */ 1352 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1353 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1354 break; 1355 case NAT_OUTBOUND: 1356 /* 1357 * we are dealing with MAP rule (SRC address gets 1358 * modified on packet from client) 1359 */ 1360 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1361 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1362 break; 1363 default: ; 1364 break; 1365 } 1366 1367 /* 1368 * we also preserve CALC_SUMD() operands here, for IP chksum delta 1369 * calculation, which happens at the end of function. 1370 */ 1371 ipsum_changed = sum_changed; 1372 ipsum_orig = sum_orig; 1373 /* 1374 * NOTE: the order of operands for partial chksum adjustment 1375 * computation has to be swapped! 1376 */ 1377 CALC_SUMD(sum_changed, sum_orig, sumd); 1378 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 1379 1380 if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) { 1381 1382 /* 1383 * switch calculates operands for CALC_SUMD(), which will 1384 * compute the full chksum delta. 1385 */ 1386 switch (nat->nat_dir) 1387 { 1388 case NAT_INBOUND: 1389 sum_changed = LONG_SUM( 1390 ntohl(nat->nat_inip.s_addr) + 1391 ntohs(nat->nat_inport) 1392 ); 1393 sum_orig = LONG_SUM( 1394 ntohl(nat->nat_outip.s_addr) + 1395 ntohs(nat->nat_outport) 1396 ); 1397 break; 1398 case NAT_OUTBOUND: 1399 sum_changed = LONG_SUM( 1400 ntohl(nat->nat_outip.s_addr) + 1401 ntohs(nat->nat_outport) 1402 ); 1403 sum_orig = LONG_SUM( 1404 ntohl(nat->nat_inip.s_addr) + 1405 ntohs(nat->nat_inport) 1406 ); 1407 break; 1408 default: ; 1409 break; 1410 } 1411 1412 CALC_SUMD(sum_orig, sum_changed, sumd); 1413 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 1414 1415 if (!(nat->nat_flags & IPN_TCPUDP)) { 1416 /* 1417 * partial HW chksum offload works for TCP/UDP headers only, 1418 * so we need to enforce full chksum adjustment for ICMP 1419 */ 1420 nat->nat_sumd[1] = nat->nat_sumd[0]; 1421 } 1422 } 1423 else 1424 nat->nat_sumd[0] = nat->nat_sumd[1]; 1425 1426 /* 1427 * we may reuse the already computed nat_sumd[0] for IP header chksum 1428 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT. 1429 */ 1430 if (NAT_HAS_L4_CHANGED(nat)) { 1431 /* 1432 * bad luck, NAT changes also the L4 header, use IP addresses 1433 * to compute chksum adjustment for IP header. 1434 */ 1435 CALC_SUMD(ipsum_orig, ipsum_changed, sumd); 1436 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 1437 } 1438 else { 1439 /* 1440 * the NAT does not change L4 hdr -> reuse chksum adjustment 1441 * for IP hdr. 1442 */ 1443 nat->nat_ipsumd = nat->nat_sumd[0]; 1444 1445 /* 1446 * if L4 header does not use chkusm - zero out deltas 1447 */ 1448 if (!(nat->nat_flags & IPN_TCPUDPICMP)) { 1449 nat->nat_sumd[0] = 0; 1450 nat->nat_sumd[1] = 0; 1451 } 1452 } 1453 1454 return; 1455 } 1456 1457 /* ------------------------------------------------------------------------ */ 1458 /* Function: fr_natputent */ 1459 /* Returns: int - 0 == success, != 0 is the error value. */ 1460 /* Parameters: data(I) - pointer to natget structure with NAT */ 1461 /* structure information to load into the kernel */ 1462 /* getlock(I) - flag indicating whether or not a write lock */ 1463 /* on ipf_nat is already held. */ 1464 /* */ 1465 /* Handle SIOCSTPUT. */ 1466 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1467 /* firewall rule data structures, if pointers to them indicate so. */ 1468 /* ------------------------------------------------------------------------ */ 1469 static int fr_natputent(data, getlock, ifs) 1470 caddr_t data; 1471 int getlock; 1472 ipf_stack_t *ifs; 1473 { 1474 nat_save_t ipn, *ipnn; 1475 ap_session_t *aps; 1476 nat_t *n, *nat; 1477 frentry_t *fr; 1478 fr_info_t fin; 1479 ipnat_t *in; 1480 int error; 1481 1482 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1483 if (error != 0) 1484 return error; 1485 1486 /* 1487 * Trigger automatic call to nat_extraflush() if the 1488 * table has reached capcity specified by hi watermark. 1489 */ 1490 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1491 ifs->ifs_nat_doflush = 1; 1492 1493 /* 1494 * Initialise early because of code at junkput label. 1495 */ 1496 in = NULL; 1497 aps = NULL; 1498 nat = NULL; 1499 ipnn = NULL; 1500 1501 /* 1502 * New entry, copy in the rest of the NAT entry if it's size is more 1503 * than just the nat_t structure. 1504 */ 1505 fr = NULL; 1506 if (ipn.ipn_dsize > sizeof(ipn)) { 1507 if (ipn.ipn_dsize > 81920) { 1508 error = ENOMEM; 1509 goto junkput; 1510 } 1511 1512 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1513 if (ipnn == NULL) 1514 return ENOMEM; 1515 1516 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1517 if (error != 0) { 1518 error = EFAULT; 1519 goto junkput; 1520 } 1521 } else 1522 ipnn = &ipn; 1523 1524 KMALLOC(nat, nat_t *); 1525 if (nat == NULL) { 1526 error = ENOMEM; 1527 goto junkput; 1528 } 1529 1530 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1531 /* 1532 * Initialize all these so that nat_delete() doesn't cause a crash. 1533 */ 1534 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1535 nat->nat_tqe.tqe_pnext = NULL; 1536 nat->nat_tqe.tqe_next = NULL; 1537 nat->nat_tqe.tqe_ifq = NULL; 1538 nat->nat_tqe.tqe_parent = nat; 1539 1540 /* 1541 * Restore the rule associated with this nat session 1542 */ 1543 in = ipnn->ipn_nat.nat_ptr; 1544 if (in != NULL) { 1545 KMALLOC(in, ipnat_t *); 1546 nat->nat_ptr = in; 1547 if (in == NULL) { 1548 error = ENOMEM; 1549 goto junkput; 1550 } 1551 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1552 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1553 in->in_use = 1; 1554 in->in_flags |= IPN_DELETE; 1555 1556 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1557 1558 if (nat_resolverule(in, ifs) != 0) { 1559 error = ESRCH; 1560 goto junkput; 1561 } 1562 } 1563 1564 /* 1565 * Check that the NAT entry doesn't already exist in the kernel. 1566 */ 1567 bzero((char *)&fin, sizeof(fin)); 1568 fin.fin_p = nat->nat_p; 1569 fin.fin_ifs = ifs; 1570 if (nat->nat_dir == NAT_OUTBOUND) { 1571 fin.fin_data[0] = ntohs(nat->nat_oport); 1572 fin.fin_data[1] = ntohs(nat->nat_outport); 1573 fin.fin_ifp = nat->nat_ifps[0]; 1574 if (getlock) { 1575 READ_ENTER(&ifs->ifs_ipf_nat); 1576 } 1577 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1578 nat->nat_oip, nat->nat_outip); 1579 if (getlock) { 1580 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1581 } 1582 if (n != NULL) { 1583 error = EEXIST; 1584 goto junkput; 1585 } 1586 } else if (nat->nat_dir == NAT_INBOUND) { 1587 fin.fin_data[0] = ntohs(nat->nat_inport); 1588 fin.fin_data[1] = ntohs(nat->nat_oport); 1589 fin.fin_ifp = nat->nat_ifps[1]; 1590 if (getlock) { 1591 READ_ENTER(&ifs->ifs_ipf_nat); 1592 } 1593 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1594 nat->nat_inip, nat->nat_oip); 1595 if (getlock) { 1596 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1597 } 1598 if (n != NULL) { 1599 error = EEXIST; 1600 goto junkput; 1601 } 1602 } else { 1603 error = EINVAL; 1604 goto junkput; 1605 } 1606 1607 /* 1608 * Restore ap_session_t structure. Include the private data allocated 1609 * if it was there. 1610 */ 1611 aps = nat->nat_aps; 1612 if (aps != NULL) { 1613 KMALLOC(aps, ap_session_t *); 1614 nat->nat_aps = aps; 1615 if (aps == NULL) { 1616 error = ENOMEM; 1617 goto junkput; 1618 } 1619 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1620 if (in != NULL) 1621 aps->aps_apr = in->in_apr; 1622 else 1623 aps->aps_apr = NULL; 1624 if (aps->aps_psiz != 0) { 1625 if (aps->aps_psiz > 81920) { 1626 error = ENOMEM; 1627 goto junkput; 1628 } 1629 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1630 if (aps->aps_data == NULL) { 1631 error = ENOMEM; 1632 goto junkput; 1633 } 1634 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1635 aps->aps_psiz); 1636 } else { 1637 aps->aps_psiz = 0; 1638 aps->aps_data = NULL; 1639 } 1640 } 1641 1642 /* 1643 * If there was a filtering rule associated with this entry then 1644 * build up a new one. 1645 */ 1646 fr = nat->nat_fr; 1647 if (fr != NULL) { 1648 if ((nat->nat_flags & SI_NEWFR) != 0) { 1649 KMALLOC(fr, frentry_t *); 1650 nat->nat_fr = fr; 1651 if (fr == NULL) { 1652 error = ENOMEM; 1653 goto junkput; 1654 } 1655 ipnn->ipn_nat.nat_fr = fr; 1656 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1657 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1658 1659 fr->fr_ref = 1; 1660 fr->fr_dsize = 0; 1661 fr->fr_data = NULL; 1662 fr->fr_type = FR_T_NONE; 1663 1664 MUTEX_NUKE(&fr->fr_lock); 1665 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1666 } else { 1667 if (getlock) { 1668 READ_ENTER(&ifs->ifs_ipf_nat); 1669 } 1670 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1671 if (n->nat_fr == fr) 1672 break; 1673 1674 if (n != NULL) { 1675 MUTEX_ENTER(&fr->fr_lock); 1676 fr->fr_ref++; 1677 MUTEX_EXIT(&fr->fr_lock); 1678 } 1679 if (getlock) { 1680 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1681 } 1682 if (!n) { 1683 error = ESRCH; 1684 goto junkput; 1685 } 1686 } 1687 } 1688 1689 if (ipnn != &ipn) { 1690 KFREES(ipnn, ipn.ipn_dsize); 1691 ipnn = NULL; 1692 } 1693 1694 nat_calc_chksum_diffs(nat); 1695 1696 if (getlock) { 1697 WRITE_ENTER(&ifs->ifs_ipf_nat); 1698 } 1699 error = nat_insert(nat, nat->nat_rev, ifs); 1700 if ((error == 0) && (aps != NULL)) { 1701 aps->aps_next = ifs->ifs_ap_sess_list; 1702 ifs->ifs_ap_sess_list = aps; 1703 } 1704 if (getlock) { 1705 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1706 } 1707 1708 if (error == 0) 1709 return 0; 1710 1711 error = ENOMEM; 1712 1713 junkput: 1714 if (fr != NULL) 1715 (void) fr_derefrule(&fr, ifs); 1716 1717 if ((ipnn != NULL) && (ipnn != &ipn)) { 1718 KFREES(ipnn, ipn.ipn_dsize); 1719 } 1720 if (nat != NULL) { 1721 if (aps != NULL) { 1722 if (aps->aps_data != NULL) { 1723 KFREES(aps->aps_data, aps->aps_psiz); 1724 } 1725 KFREE(aps); 1726 } 1727 if (in != NULL) { 1728 if (in->in_apr) 1729 appr_free(in->in_apr); 1730 KFREE(in); 1731 } 1732 KFREE(nat); 1733 } 1734 return error; 1735 } 1736 1737 1738 /* ------------------------------------------------------------------------ */ 1739 /* Function: nat_delete */ 1740 /* Returns: Nil */ 1741 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1742 /* logtype(I) - type of LOG record to create before deleting */ 1743 /* Write Lock: ipf_nat */ 1744 /* */ 1745 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1746 /* enabled then generate a NAT log record for this event. */ 1747 /* ------------------------------------------------------------------------ */ 1748 static void nat_delete(nat, logtype, ifs) 1749 struct nat *nat; 1750 int logtype; 1751 ipf_stack_t *ifs; 1752 { 1753 struct ipnat *ipn; 1754 1755 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1756 nat_log(nat, logtype, ifs); 1757 1758 /* 1759 * Take it as a general indication that all the pointers are set if 1760 * nat_pnext is set. 1761 */ 1762 if (nat->nat_pnext != NULL) { 1763 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1764 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1765 1766 *nat->nat_pnext = nat->nat_next; 1767 if (nat->nat_next != NULL) { 1768 nat->nat_next->nat_pnext = nat->nat_pnext; 1769 nat->nat_next = NULL; 1770 } 1771 nat->nat_pnext = NULL; 1772 1773 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1774 if (nat->nat_hnext[0] != NULL) { 1775 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1776 nat->nat_hnext[0] = NULL; 1777 } 1778 nat->nat_phnext[0] = NULL; 1779 1780 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1781 if (nat->nat_hnext[1] != NULL) { 1782 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1783 nat->nat_hnext[1] = NULL; 1784 } 1785 nat->nat_phnext[1] = NULL; 1786 1787 if ((nat->nat_flags & SI_WILDP) != 0) 1788 ifs->ifs_nat_stats.ns_wilds--; 1789 } 1790 1791 if (nat->nat_me != NULL) { 1792 *nat->nat_me = NULL; 1793 nat->nat_me = NULL; 1794 } 1795 1796 fr_deletequeueentry(&nat->nat_tqe); 1797 1798 MUTEX_ENTER(&nat->nat_lock); 1799 if (nat->nat_ref > 1) { 1800 nat->nat_ref--; 1801 MUTEX_EXIT(&nat->nat_lock); 1802 return; 1803 } 1804 MUTEX_EXIT(&nat->nat_lock); 1805 1806 /* 1807 * At this point, nat_ref is 1, doing "--" would make it 0.. 1808 */ 1809 nat->nat_ref = 0; 1810 1811 #ifdef IPFILTER_SYNC 1812 if (nat->nat_sync) 1813 ipfsync_del(nat->nat_sync); 1814 #endif 1815 1816 if (nat->nat_fr != NULL) 1817 (void)fr_derefrule(&nat->nat_fr, ifs); 1818 1819 if (nat->nat_hm != NULL) 1820 fr_hostmapdel(&nat->nat_hm); 1821 1822 /* 1823 * If there is an active reference from the nat entry to its parent 1824 * rule, decrement the rule's reference count and free it too if no 1825 * longer being used. 1826 */ 1827 ipn = nat->nat_ptr; 1828 if (ipn != NULL) { 1829 ipn->in_space++; 1830 ipn->in_use--; 1831 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1832 if (ipn->in_apr) 1833 appr_free(ipn->in_apr); 1834 KFREE(ipn); 1835 ifs->ifs_nat_stats.ns_rules--; 1836 } 1837 } 1838 1839 MUTEX_DESTROY(&nat->nat_lock); 1840 1841 aps_free(nat->nat_aps, ifs); 1842 ifs->ifs_nat_stats.ns_inuse--; 1843 1844 /* 1845 * If there's a fragment table entry too for this nat entry, then 1846 * dereference that as well. This is after nat_lock is released 1847 * because of Tru64. 1848 */ 1849 fr_forgetnat((void *)nat, ifs); 1850 1851 KFREE(nat); 1852 } 1853 1854 1855 /* ------------------------------------------------------------------------ */ 1856 /* Function: nat_flushtable */ 1857 /* Returns: int - number of NAT rules deleted */ 1858 /* Parameters: Nil */ 1859 /* */ 1860 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1861 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1862 /* ------------------------------------------------------------------------ */ 1863 /* 1864 * nat_flushtable - clear the NAT table of all mapping entries. 1865 */ 1866 static int nat_flushtable(ifs) 1867 ipf_stack_t *ifs; 1868 { 1869 nat_t *nat; 1870 int j = 0; 1871 1872 /* 1873 * ALL NAT mappings deleted, so lets just make the deletions 1874 * quicker. 1875 */ 1876 if (ifs->ifs_nat_table[0] != NULL) 1877 bzero((char *)ifs->ifs_nat_table[0], 1878 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 1879 if (ifs->ifs_nat_table[1] != NULL) 1880 bzero((char *)ifs->ifs_nat_table[1], 1881 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 1882 1883 while ((nat = ifs->ifs_nat_instances) != NULL) { 1884 nat_delete(nat, NL_FLUSH, ifs); 1885 j++; 1886 } 1887 1888 return j; 1889 } 1890 1891 1892 /* ------------------------------------------------------------------------ */ 1893 /* Function: nat_clearlist */ 1894 /* Returns: int - number of NAT/RDR rules deleted */ 1895 /* Parameters: Nil */ 1896 /* */ 1897 /* Delete all rules in the current list of rules. There is nothing elegant */ 1898 /* about this cleanup: simply free all entries on the list of rules and */ 1899 /* clear out the tables used for hashed NAT rule lookups. */ 1900 /* ------------------------------------------------------------------------ */ 1901 static int nat_clearlist(ifs) 1902 ipf_stack_t *ifs; 1903 { 1904 ipnat_t *n, **np = &ifs->ifs_nat_list; 1905 int i = 0; 1906 1907 if (ifs->ifs_nat_rules != NULL) 1908 bzero((char *)ifs->ifs_nat_rules, 1909 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 1910 if (ifs->ifs_rdr_rules != NULL) 1911 bzero((char *)ifs->ifs_rdr_rules, 1912 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 1913 1914 while ((n = *np) != NULL) { 1915 *np = n->in_next; 1916 if (n->in_use == 0) { 1917 if (n->in_apr != NULL) 1918 appr_free(n->in_apr); 1919 KFREE(n); 1920 ifs->ifs_nat_stats.ns_rules--; 1921 } else { 1922 n->in_flags |= IPN_DELETE; 1923 n->in_next = NULL; 1924 } 1925 i++; 1926 } 1927 ifs->ifs_nat_masks = 0; 1928 ifs->ifs_rdr_masks = 0; 1929 return i; 1930 } 1931 1932 1933 /* ------------------------------------------------------------------------ */ 1934 /* Function: nat_newmap */ 1935 /* Returns: int - -1 == error, 0 == success */ 1936 /* Parameters: fin(I) - pointer to packet information */ 1937 /* nat(I) - pointer to NAT entry */ 1938 /* ni(I) - pointer to structure with misc. information needed */ 1939 /* to create new NAT entry. */ 1940 /* */ 1941 /* Given an empty NAT structure, populate it with new information about a */ 1942 /* new NAT session, as defined by the matching NAT rule. */ 1943 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 1944 /* to the new IP address for the translation. */ 1945 /* ------------------------------------------------------------------------ */ 1946 static INLINE int nat_newmap(fin, nat, ni) 1947 fr_info_t *fin; 1948 nat_t *nat; 1949 natinfo_t *ni; 1950 { 1951 u_short st_port, dport, sport, port, sp, dp; 1952 struct in_addr in, inb; 1953 hostmap_t *hm; 1954 u_32_t flags; 1955 u_32_t st_ip; 1956 ipnat_t *np; 1957 nat_t *natl; 1958 int l; 1959 ipf_stack_t *ifs = fin->fin_ifs; 1960 1961 /* 1962 * If it's an outbound packet which doesn't match any existing 1963 * record, then create a new port 1964 */ 1965 l = 0; 1966 hm = NULL; 1967 np = ni->nai_np; 1968 st_ip = np->in_nip; 1969 st_port = np->in_pnext; 1970 flags = ni->nai_flags; 1971 sport = ni->nai_sport; 1972 dport = ni->nai_dport; 1973 1974 /* 1975 * Do a loop until we either run out of entries to try or we find 1976 * a NAT mapping that isn't currently being used. This is done 1977 * because the change to the source is not (usually) being fixed. 1978 */ 1979 do { 1980 port = 0; 1981 in.s_addr = htonl(np->in_nip); 1982 if (l == 0) { 1983 /* 1984 * Check to see if there is an existing NAT 1985 * setup for this IP address pair. 1986 */ 1987 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 1988 in, 0, ifs); 1989 if (hm != NULL) 1990 in.s_addr = hm->hm_mapip.s_addr; 1991 } else if ((l == 1) && (hm != NULL)) { 1992 fr_hostmapdel(&hm); 1993 } 1994 in.s_addr = ntohl(in.s_addr); 1995 1996 nat->nat_hm = hm; 1997 1998 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 1999 if (l > 0) 2000 return -1; 2001 } 2002 2003 if (np->in_redir == NAT_BIMAP && 2004 np->in_inmsk == np->in_outmsk) { 2005 /* 2006 * map the address block in a 1:1 fashion 2007 */ 2008 in.s_addr = np->in_outip; 2009 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 2010 in.s_addr = ntohl(in.s_addr); 2011 2012 } else if (np->in_redir & NAT_MAPBLK) { 2013 if ((l >= np->in_ppip) || ((l > 0) && 2014 !(flags & IPN_TCPUDP))) 2015 return -1; 2016 /* 2017 * map-block - Calculate destination address. 2018 */ 2019 in.s_addr = ntohl(fin->fin_saddr); 2020 in.s_addr &= ntohl(~np->in_inmsk); 2021 inb.s_addr = in.s_addr; 2022 in.s_addr /= np->in_ippip; 2023 in.s_addr &= ntohl(~np->in_outmsk); 2024 in.s_addr += ntohl(np->in_outip); 2025 /* 2026 * Calculate destination port. 2027 */ 2028 if ((flags & IPN_TCPUDP) && 2029 (np->in_ppip != 0)) { 2030 port = ntohs(sport) + l; 2031 port %= np->in_ppip; 2032 port += np->in_ppip * 2033 (inb.s_addr % np->in_ippip); 2034 port += MAPBLK_MINPORT; 2035 port = htons(port); 2036 } 2037 2038 } else if ((np->in_outip == 0) && 2039 (np->in_outmsk == 0xffffffff)) { 2040 /* 2041 * 0/32 - use the interface's IP address. 2042 */ 2043 if ((l > 0) || 2044 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 2045 &in, NULL, fin->fin_ifs) == -1) 2046 return -1; 2047 in.s_addr = ntohl(in.s_addr); 2048 2049 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 2050 /* 2051 * 0/0 - use the original source address/port. 2052 */ 2053 if (l > 0) 2054 return -1; 2055 in.s_addr = ntohl(fin->fin_saddr); 2056 2057 } else if ((np->in_outmsk != 0xffffffff) && 2058 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 2059 np->in_nip++; 2060 2061 natl = NULL; 2062 2063 if ((flags & IPN_TCPUDP) && 2064 ((np->in_redir & NAT_MAPBLK) == 0) && 2065 (np->in_flags & IPN_AUTOPORTMAP)) { 2066 /* 2067 * "ports auto" (without map-block) 2068 */ 2069 if ((l > 0) && (l % np->in_ppip == 0)) { 2070 if (l > np->in_space) { 2071 return -1; 2072 } else if ((l > np->in_ppip) && 2073 np->in_outmsk != 0xffffffff) 2074 np->in_nip++; 2075 } 2076 if (np->in_ppip != 0) { 2077 port = ntohs(sport); 2078 port += (l % np->in_ppip); 2079 port %= np->in_ppip; 2080 port += np->in_ppip * 2081 (ntohl(fin->fin_saddr) % 2082 np->in_ippip); 2083 port += MAPBLK_MINPORT; 2084 port = htons(port); 2085 } 2086 2087 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 2088 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 2089 /* 2090 * Standard port translation. Select next port. 2091 */ 2092 port = htons(np->in_pnext++); 2093 2094 if (np->in_pnext > ntohs(np->in_pmax)) { 2095 np->in_pnext = ntohs(np->in_pmin); 2096 if (np->in_outmsk != 0xffffffff) 2097 np->in_nip++; 2098 } 2099 } 2100 2101 if (np->in_flags & IPN_IPRANGE) { 2102 if (np->in_nip > ntohl(np->in_outmsk)) 2103 np->in_nip = ntohl(np->in_outip); 2104 } else { 2105 if ((np->in_outmsk != 0xffffffff) && 2106 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 2107 ntohl(np->in_outip)) 2108 np->in_nip = ntohl(np->in_outip) + 1; 2109 } 2110 2111 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 2112 port = sport; 2113 2114 /* 2115 * Here we do a lookup of the connection as seen from 2116 * the outside. If an IP# pair already exists, try 2117 * again. So if you have A->B becomes C->B, you can 2118 * also have D->E become C->E but not D->B causing 2119 * another C->B. Also take protocol and ports into 2120 * account when determining whether a pre-existing 2121 * NAT setup will cause an external conflict where 2122 * this is appropriate. 2123 */ 2124 inb.s_addr = htonl(in.s_addr); 2125 sp = fin->fin_data[0]; 2126 dp = fin->fin_data[1]; 2127 fin->fin_data[0] = fin->fin_data[1]; 2128 fin->fin_data[1] = htons(port); 2129 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2130 (u_int)fin->fin_p, fin->fin_dst, inb); 2131 fin->fin_data[0] = sp; 2132 fin->fin_data[1] = dp; 2133 2134 /* 2135 * Has the search wrapped around and come back to the 2136 * start ? 2137 */ 2138 if ((natl != NULL) && 2139 (np->in_pnext != 0) && (st_port == np->in_pnext) && 2140 (np->in_nip != 0) && (st_ip == np->in_nip)) 2141 return -1; 2142 l++; 2143 } while (natl != NULL); 2144 2145 if (np->in_space > 0) 2146 np->in_space--; 2147 2148 /* Setup the NAT table */ 2149 nat->nat_inip = fin->fin_src; 2150 nat->nat_outip.s_addr = htonl(in.s_addr); 2151 nat->nat_oip = fin->fin_dst; 2152 if (nat->nat_hm == NULL) 2153 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2154 nat->nat_outip, 0, ifs); 2155 2156 if (flags & IPN_TCPUDP) { 2157 nat->nat_inport = sport; 2158 nat->nat_outport = port; /* sport */ 2159 nat->nat_oport = dport; 2160 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2161 } else if (flags & IPN_ICMPQUERY) { 2162 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2163 nat->nat_inport = port; 2164 nat->nat_outport = port; 2165 } 2166 2167 ni->nai_ip.s_addr = in.s_addr; 2168 ni->nai_port = port; 2169 ni->nai_nport = dport; 2170 return 0; 2171 } 2172 2173 2174 /* ------------------------------------------------------------------------ */ 2175 /* Function: nat_newrdr */ 2176 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2177 /* allow rule to be moved if IPN_ROUNDR is set. */ 2178 /* Parameters: fin(I) - pointer to packet information */ 2179 /* nat(I) - pointer to NAT entry */ 2180 /* ni(I) - pointer to structure with misc. information needed */ 2181 /* to create new NAT entry. */ 2182 /* */ 2183 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2184 /* to the new IP address for the translation. */ 2185 /* ------------------------------------------------------------------------ */ 2186 static INLINE int nat_newrdr(fin, nat, ni) 2187 fr_info_t *fin; 2188 nat_t *nat; 2189 natinfo_t *ni; 2190 { 2191 u_short nport, dport, sport; 2192 struct in_addr in, inb; 2193 u_short sp, dp; 2194 hostmap_t *hm; 2195 u_32_t flags; 2196 ipnat_t *np; 2197 nat_t *natl; 2198 int move; 2199 ipf_stack_t *ifs = fin->fin_ifs; 2200 2201 move = 1; 2202 hm = NULL; 2203 in.s_addr = 0; 2204 np = ni->nai_np; 2205 flags = ni->nai_flags; 2206 sport = ni->nai_sport; 2207 dport = ni->nai_dport; 2208 2209 /* 2210 * If the matching rule has IPN_STICKY set, then we want to have the 2211 * same rule kick in as before. Why would this happen? If you have 2212 * a collection of rdr rules with "round-robin sticky", the current 2213 * packet might match a different one to the previous connection but 2214 * we want the same destination to be used. 2215 */ 2216 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2217 (IPN_ROUNDR|IPN_STICKY)) { 2218 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2219 (u_32_t)dport, ifs); 2220 if (hm != NULL) { 2221 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2222 np = hm->hm_ipnat; 2223 ni->nai_np = np; 2224 move = 0; 2225 } 2226 } 2227 2228 /* 2229 * Otherwise, it's an inbound packet. Most likely, we don't 2230 * want to rewrite source ports and source addresses. Instead, 2231 * we want to rewrite to a fixed internal address and fixed 2232 * internal port. 2233 */ 2234 if (np->in_flags & IPN_SPLIT) { 2235 in.s_addr = np->in_nip; 2236 2237 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2238 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2239 in, (u_32_t)dport, ifs); 2240 if (hm != NULL) { 2241 in.s_addr = hm->hm_mapip.s_addr; 2242 move = 0; 2243 } 2244 } 2245 2246 if (hm == NULL || hm->hm_ref == 1) { 2247 if (np->in_inip == htonl(in.s_addr)) { 2248 np->in_nip = ntohl(np->in_inmsk); 2249 move = 0; 2250 } else { 2251 np->in_nip = ntohl(np->in_inip); 2252 } 2253 } 2254 2255 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2256 /* 2257 * 0/32 - use the interface's IP address. 2258 */ 2259 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2260 fin->fin_ifs) == -1) 2261 return -1; 2262 in.s_addr = ntohl(in.s_addr); 2263 2264 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2265 /* 2266 * 0/0 - use the original destination address/port. 2267 */ 2268 in.s_addr = ntohl(fin->fin_daddr); 2269 2270 } else if (np->in_redir == NAT_BIMAP && 2271 np->in_inmsk == np->in_outmsk) { 2272 /* 2273 * map the address block in a 1:1 fashion 2274 */ 2275 in.s_addr = np->in_inip; 2276 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2277 in.s_addr = ntohl(in.s_addr); 2278 } else { 2279 in.s_addr = ntohl(np->in_inip); 2280 } 2281 2282 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2283 nport = dport; 2284 else { 2285 /* 2286 * Whilst not optimized for the case where 2287 * pmin == pmax, the gain is not significant. 2288 */ 2289 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2290 (np->in_pmin != np->in_pmax)) { 2291 nport = ntohs(dport) - ntohs(np->in_pmin) + 2292 ntohs(np->in_pnext); 2293 nport = htons(nport); 2294 } else 2295 nport = np->in_pnext; 2296 } 2297 2298 /* 2299 * When the redirect-to address is set to 0.0.0.0, just 2300 * assume a blank `forwarding' of the packet. We don't 2301 * setup any translation for this either. 2302 */ 2303 if (in.s_addr == 0) { 2304 if (nport == dport) 2305 return -1; 2306 in.s_addr = ntohl(fin->fin_daddr); 2307 } 2308 2309 /* 2310 * Check to see if this redirect mapping already exists and if 2311 * it does, return "failure" (allowing it to be created will just 2312 * cause one or both of these "connections" to stop working.) 2313 */ 2314 inb.s_addr = htonl(in.s_addr); 2315 sp = fin->fin_data[0]; 2316 dp = fin->fin_data[1]; 2317 fin->fin_data[1] = fin->fin_data[0]; 2318 fin->fin_data[0] = ntohs(nport); 2319 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2320 (u_int)fin->fin_p, inb, fin->fin_src); 2321 fin->fin_data[0] = sp; 2322 fin->fin_data[1] = dp; 2323 if (natl != NULL) 2324 return (-1); 2325 2326 nat->nat_inip.s_addr = htonl(in.s_addr); 2327 nat->nat_outip = fin->fin_dst; 2328 nat->nat_oip = fin->fin_src; 2329 2330 ni->nai_ip.s_addr = in.s_addr; 2331 ni->nai_nport = nport; 2332 ni->nai_port = sport; 2333 2334 if (flags & IPN_TCPUDP) { 2335 nat->nat_inport = nport; 2336 nat->nat_outport = dport; 2337 nat->nat_oport = sport; 2338 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2339 } else if (flags & IPN_ICMPQUERY) { 2340 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2341 nat->nat_inport = nport; 2342 nat->nat_outport = nport; 2343 } 2344 2345 return move; 2346 } 2347 2348 /* ------------------------------------------------------------------------ */ 2349 /* Function: nat_new */ 2350 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2351 /* else pointer to new NAT structure */ 2352 /* Parameters: fin(I) - pointer to packet information */ 2353 /* np(I) - pointer to NAT rule */ 2354 /* natsave(I) - pointer to where to store NAT struct pointer */ 2355 /* flags(I) - flags describing the current packet */ 2356 /* direction(I) - direction of packet (in/out) */ 2357 /* Write Lock: ipf_nat */ 2358 /* */ 2359 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2360 /* in any way. */ 2361 /* */ 2362 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2363 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2364 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2365 /* and (3) building that structure and putting it into the NAT table(s). */ 2366 /* ------------------------------------------------------------------------ */ 2367 nat_t *nat_new(fin, np, natsave, flags, direction) 2368 fr_info_t *fin; 2369 ipnat_t *np; 2370 nat_t **natsave; 2371 u_int flags; 2372 int direction; 2373 { 2374 tcphdr_t *tcp = NULL; 2375 hostmap_t *hm = NULL; 2376 nat_t *nat, *natl; 2377 u_int nflags; 2378 natinfo_t ni; 2379 int move; 2380 ipf_stack_t *ifs = fin->fin_ifs; 2381 2382 /* 2383 * Trigger automatic call to nat_extraflush() if the 2384 * table has reached capcity specified by hi watermark. 2385 */ 2386 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2387 ifs->ifs_nat_doflush = 1; 2388 2389 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2390 ifs->ifs_nat_stats.ns_memfail++; 2391 return NULL; 2392 } 2393 2394 move = 1; 2395 nflags = np->in_flags & flags; 2396 nflags &= NAT_FROMRULE; 2397 2398 ni.nai_np = np; 2399 ni.nai_nflags = nflags; 2400 ni.nai_flags = flags; 2401 2402 /* Give me a new nat */ 2403 KMALLOC(nat, nat_t *); 2404 if (nat == NULL) { 2405 ifs->ifs_nat_stats.ns_memfail++; 2406 /* 2407 * Try to automatically tune the max # of entries in the 2408 * table allowed to be less than what will cause kmem_alloc() 2409 * to fail and try to eliminate panics due to out of memory 2410 * conditions arising. 2411 */ 2412 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2413 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2414 printf("ipf_nattable_max reduced to %d\n", 2415 ifs->ifs_ipf_nattable_max); 2416 } 2417 return NULL; 2418 } 2419 2420 if (flags & IPN_TCPUDP) { 2421 tcp = fin->fin_dp; 2422 ni.nai_sport = htons(fin->fin_sport); 2423 ni.nai_dport = htons(fin->fin_dport); 2424 } else if (flags & IPN_ICMPQUERY) { 2425 /* 2426 * In the ICMP query NAT code, we translate the ICMP id fields 2427 * to make them unique. This is indepedent of the ICMP type 2428 * (e.g. in the unlikely event that a host sends an echo and 2429 * an tstamp request with the same id, both packets will have 2430 * their ip address/id field changed in the same way). 2431 */ 2432 /* The icmp_id field is used by the sender to identify the 2433 * process making the icmp request. (the receiver justs 2434 * copies it back in its response). So, it closely matches 2435 * the concept of source port. We overlay sport, so we can 2436 * maximally reuse the existing code. 2437 */ 2438 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2439 ni.nai_dport = ni.nai_sport; 2440 } 2441 2442 bzero((char *)nat, sizeof(*nat)); 2443 nat->nat_flags = flags; 2444 nat->nat_redir = np->in_redir; 2445 2446 if ((flags & NAT_SLAVE) == 0) { 2447 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2448 } 2449 2450 /* 2451 * Search the current table for a match. 2452 */ 2453 if (direction == NAT_OUTBOUND) { 2454 /* 2455 * We can now arrange to call this for the same connection 2456 * because ipf_nat_new doesn't protect the code path into 2457 * this function. 2458 */ 2459 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2460 fin->fin_src, fin->fin_dst); 2461 if (natl != NULL) { 2462 KFREE(nat); 2463 nat = natl; 2464 goto done; 2465 } 2466 2467 move = nat_newmap(fin, nat, &ni); 2468 if (move == -1) 2469 goto badnat; 2470 2471 np = ni.nai_np; 2472 } else { 2473 /* 2474 * NAT_INBOUND is used only for redirects rules 2475 */ 2476 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2477 fin->fin_src, fin->fin_dst); 2478 if (natl != NULL) { 2479 KFREE(nat); 2480 nat = natl; 2481 goto done; 2482 } 2483 2484 move = nat_newrdr(fin, nat, &ni); 2485 if (move == -1) 2486 goto badnat; 2487 2488 np = ni.nai_np; 2489 } 2490 2491 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2492 if (np->in_redir == NAT_REDIRECT) { 2493 nat_delrdr(np); 2494 nat_addrdr(np, ifs); 2495 } else if (np->in_redir == NAT_MAP) { 2496 nat_delnat(np); 2497 nat_addnat(np, ifs); 2498 } 2499 } 2500 2501 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2502 goto badnat; 2503 } 2504 2505 nat_calc_chksum_diffs(nat); 2506 2507 if (flags & SI_WILDP) 2508 ifs->ifs_nat_stats.ns_wilds++; 2509 goto done; 2510 badnat: 2511 ifs->ifs_nat_stats.ns_badnat++; 2512 if ((hm = nat->nat_hm) != NULL) 2513 fr_hostmapdel(&hm); 2514 KFREE(nat); 2515 nat = NULL; 2516 done: 2517 if ((flags & NAT_SLAVE) == 0) { 2518 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2519 } 2520 return nat; 2521 } 2522 2523 2524 /* ------------------------------------------------------------------------ */ 2525 /* Function: nat_finalise */ 2526 /* Returns: int - 0 == sucess, -1 == failure */ 2527 /* Parameters: fin(I) - pointer to packet information */ 2528 /* nat(I) - pointer to NAT entry */ 2529 /* ni(I) - pointer to structure with misc. information needed */ 2530 /* to create new NAT entry. */ 2531 /* Write Lock: ipf_nat */ 2532 /* */ 2533 /* This is the tail end of constructing a new NAT entry and is the same */ 2534 /* for both IPv4 and IPv6. */ 2535 /* ------------------------------------------------------------------------ */ 2536 /*ARGSUSED*/ 2537 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2538 fr_info_t *fin; 2539 nat_t *nat; 2540 natinfo_t *ni; 2541 tcphdr_t *tcp; 2542 nat_t **natsave; 2543 int direction; 2544 { 2545 frentry_t *fr; 2546 ipnat_t *np; 2547 ipf_stack_t *ifs = fin->fin_ifs; 2548 2549 np = ni->nai_np; 2550 2551 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2552 2553 #ifdef IPFILTER_SYNC 2554 if ((nat->nat_flags & SI_CLONE) == 0) 2555 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2556 #endif 2557 2558 nat->nat_me = natsave; 2559 nat->nat_dir = direction; 2560 nat->nat_ifps[0] = np->in_ifps[0]; 2561 nat->nat_ifps[1] = np->in_ifps[1]; 2562 nat->nat_ptr = np; 2563 nat->nat_p = fin->fin_p; 2564 nat->nat_mssclamp = np->in_mssclamp; 2565 fr = fin->fin_fr; 2566 nat->nat_fr = fr; 2567 2568 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2569 if (appr_new(fin, nat) == -1) 2570 return -1; 2571 2572 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2573 if (ifs->ifs_nat_logging) 2574 nat_log(nat, (u_int)np->in_redir, ifs); 2575 np->in_use++; 2576 if (fr != NULL) { 2577 MUTEX_ENTER(&fr->fr_lock); 2578 fr->fr_ref++; 2579 MUTEX_EXIT(&fr->fr_lock); 2580 } 2581 return 0; 2582 } 2583 2584 /* 2585 * nat_insert failed, so cleanup time... 2586 */ 2587 return -1; 2588 } 2589 2590 2591 /* ------------------------------------------------------------------------ */ 2592 /* Function: nat_insert */ 2593 /* Returns: int - 0 == sucess, -1 == failure */ 2594 /* Parameters: nat(I) - pointer to NAT structure */ 2595 /* rev(I) - flag indicating forward/reverse direction of packet */ 2596 /* Write Lock: ipf_nat */ 2597 /* */ 2598 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2599 /* list of active NAT entries. Adjust global counters when complete. */ 2600 /* ------------------------------------------------------------------------ */ 2601 int nat_insert(nat, rev, ifs) 2602 nat_t *nat; 2603 int rev; 2604 ipf_stack_t *ifs; 2605 { 2606 u_int hv1, hv2; 2607 nat_t **natp; 2608 2609 /* 2610 * Try and return an error as early as possible, so calculate the hash 2611 * entry numbers first and then proceed. 2612 */ 2613 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2614 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2615 0xffffffff); 2616 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2617 ifs->ifs_ipf_nattable_sz); 2618 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2619 0xffffffff); 2620 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2621 ifs->ifs_ipf_nattable_sz); 2622 } else { 2623 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2624 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2625 ifs->ifs_ipf_nattable_sz); 2626 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2627 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2628 ifs->ifs_ipf_nattable_sz); 2629 } 2630 2631 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2632 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2633 return -1; 2634 } 2635 2636 nat->nat_hv[0] = hv1; 2637 nat->nat_hv[1] = hv2; 2638 2639 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2640 2641 nat->nat_rev = rev; 2642 nat->nat_ref = 1; 2643 nat->nat_bytes[0] = 0; 2644 nat->nat_pkts[0] = 0; 2645 nat->nat_bytes[1] = 0; 2646 nat->nat_pkts[1] = 0; 2647 2648 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2649 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2650 2651 if (nat->nat_ifnames[1][0] !='\0') { 2652 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2653 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2654 } else { 2655 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2656 LIFNAMSIZ); 2657 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2658 nat->nat_ifps[1] = nat->nat_ifps[0]; 2659 } 2660 2661 nat->nat_next = ifs->ifs_nat_instances; 2662 nat->nat_pnext = &ifs->ifs_nat_instances; 2663 if (ifs->ifs_nat_instances) 2664 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2665 ifs->ifs_nat_instances = nat; 2666 2667 natp = &ifs->ifs_nat_table[0][hv1]; 2668 if (*natp) 2669 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2670 nat->nat_phnext[0] = natp; 2671 nat->nat_hnext[0] = *natp; 2672 *natp = nat; 2673 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2674 2675 natp = &ifs->ifs_nat_table[1][hv2]; 2676 if (*natp) 2677 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2678 nat->nat_phnext[1] = natp; 2679 nat->nat_hnext[1] = *natp; 2680 *natp = nat; 2681 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2682 2683 fr_setnatqueue(nat, rev, ifs); 2684 2685 ifs->ifs_nat_stats.ns_added++; 2686 ifs->ifs_nat_stats.ns_inuse++; 2687 return 0; 2688 } 2689 2690 2691 /* ------------------------------------------------------------------------ */ 2692 /* Function: nat_icmperrorlookup */ 2693 /* Returns: nat_t* - point to matching NAT structure */ 2694 /* Parameters: fin(I) - pointer to packet information */ 2695 /* dir(I) - direction of packet (in/out) */ 2696 /* */ 2697 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2698 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2699 /* the required length. */ 2700 /* ------------------------------------------------------------------------ */ 2701 nat_t *nat_icmperrorlookup(fin, dir) 2702 fr_info_t *fin; 2703 int dir; 2704 { 2705 int flags = 0, minlen; 2706 icmphdr_t *orgicmp; 2707 tcphdr_t *tcp = NULL; 2708 u_short data[2]; 2709 nat_t *nat; 2710 ip_t *oip; 2711 u_int p; 2712 2713 /* 2714 * Does it at least have the return (basic) IP header ? 2715 * Only a basic IP header (no options) should be with an ICMP error 2716 * header. Also, if it's not an error type, then return. 2717 */ 2718 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2719 return NULL; 2720 2721 /* 2722 * Check packet size 2723 */ 2724 oip = (ip_t *)((char *)fin->fin_dp + 8); 2725 minlen = IP_HL(oip) << 2; 2726 if ((minlen < sizeof(ip_t)) || 2727 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2728 return NULL; 2729 /* 2730 * Is the buffer big enough for all of it ? It's the size of the IP 2731 * header claimed in the encapsulated part which is of concern. It 2732 * may be too big to be in this buffer but not so big that it's 2733 * outside the ICMP packet, leading to TCP deref's causing problems. 2734 * This is possible because we don't know how big oip_hl is when we 2735 * do the pullup early in fr_check() and thus can't gaurantee it is 2736 * all here now. 2737 */ 2738 #ifdef _KERNEL 2739 { 2740 mb_t *m; 2741 2742 m = fin->fin_m; 2743 # if defined(MENTAT) 2744 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2745 return NULL; 2746 # else 2747 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2748 (char *)fin->fin_ip + M_LEN(m)) 2749 return NULL; 2750 # endif 2751 } 2752 #endif 2753 2754 if (fin->fin_daddr != oip->ip_src.s_addr) 2755 return NULL; 2756 2757 p = oip->ip_p; 2758 if (p == IPPROTO_TCP) 2759 flags = IPN_TCP; 2760 else if (p == IPPROTO_UDP) 2761 flags = IPN_UDP; 2762 else if (p == IPPROTO_ICMP) { 2763 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2764 2765 /* see if this is related to an ICMP query */ 2766 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2767 data[0] = fin->fin_data[0]; 2768 data[1] = fin->fin_data[1]; 2769 fin->fin_data[0] = 0; 2770 fin->fin_data[1] = orgicmp->icmp_id; 2771 2772 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2773 /* 2774 * NOTE : dir refers to the direction of the original 2775 * ip packet. By definition the icmp error 2776 * message flows in the opposite direction. 2777 */ 2778 if (dir == NAT_INBOUND) 2779 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2780 oip->ip_src); 2781 else 2782 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2783 oip->ip_src); 2784 fin->fin_data[0] = data[0]; 2785 fin->fin_data[1] = data[1]; 2786 return nat; 2787 } 2788 } 2789 2790 if (flags & IPN_TCPUDP) { 2791 minlen += 8; /* + 64bits of data to get ports */ 2792 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2793 return NULL; 2794 2795 data[0] = fin->fin_data[0]; 2796 data[1] = fin->fin_data[1]; 2797 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2798 fin->fin_data[0] = ntohs(tcp->th_dport); 2799 fin->fin_data[1] = ntohs(tcp->th_sport); 2800 2801 if (dir == NAT_INBOUND) { 2802 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2803 oip->ip_src); 2804 } else { 2805 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2806 oip->ip_src); 2807 } 2808 fin->fin_data[0] = data[0]; 2809 fin->fin_data[1] = data[1]; 2810 return nat; 2811 } 2812 if (dir == NAT_INBOUND) 2813 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2814 else 2815 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2816 } 2817 2818 2819 /* ------------------------------------------------------------------------ */ 2820 /* Function: nat_icmperror */ 2821 /* Returns: nat_t* - point to matching NAT structure */ 2822 /* Parameters: fin(I) - pointer to packet information */ 2823 /* nflags(I) - NAT flags for this packet */ 2824 /* dir(I) - direction of packet (in/out) */ 2825 /* */ 2826 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2827 /* session. This will correct both packet header data and checksums. */ 2828 /* */ 2829 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2830 /* a NAT'd ICMP packet gets correctly recognised. */ 2831 /* ------------------------------------------------------------------------ */ 2832 nat_t *nat_icmperror(fin, nflags, dir) 2833 fr_info_t *fin; 2834 u_int *nflags; 2835 int dir; 2836 { 2837 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2838 struct in_addr in; 2839 icmphdr_t *icmp, *orgicmp; 2840 int dlen; 2841 udphdr_t *udp; 2842 tcphdr_t *tcp; 2843 nat_t *nat; 2844 ip_t *oip; 2845 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2846 return NULL; 2847 2848 /* 2849 * nat_icmperrorlookup() looks up nat entry associated with the 2850 * offending IP packet and returns pointer to the entry, or NULL 2851 * if packet wasn't natted or for `defective' packets. 2852 */ 2853 2854 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 2855 return NULL; 2856 2857 sumd2 = 0; 2858 *nflags = IPN_ICMPERR; 2859 icmp = fin->fin_dp; 2860 oip = (ip_t *)&icmp->icmp_ip; 2861 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 2862 tcp = (tcphdr_t *)udp; 2863 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 2864 2865 /* 2866 * Need to adjust ICMP header to include the real IP#'s and 2867 * port #'s. There are three steps required. 2868 * 2869 * Step 1 2870 * Fix the IP addresses in the offending IP packet and update 2871 * ip header checksum to compensate for the change. 2872 * 2873 * No update needed here for icmp_cksum because the ICMP checksum 2874 * is calculated over the complete ICMP packet, which includes the 2875 * changed oip IP addresses and oip->ip_sum. These two changes 2876 * cancel each other out (if the delta for the IP address is x, 2877 * then the delta for ip_sum is minus x). 2878 */ 2879 2880 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 2881 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 2882 in = nat->nat_inip; 2883 oip->ip_src = in; 2884 } else { 2885 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 2886 in = nat->nat_outip; 2887 oip->ip_dst = in; 2888 } 2889 2890 sum2 = LONG_SUM(ntohl(in.s_addr)); 2891 CALC_SUMD(sum1, sum2, sumd); 2892 fix_datacksum(&oip->ip_sum, sumd); 2893 2894 /* 2895 * Step 2 2896 * Perform other adjustments based on protocol of offending packet. 2897 */ 2898 2899 switch (oip->ip_p) { 2900 case IPPROTO_TCP : 2901 case IPPROTO_UDP : 2902 2903 /* 2904 * For offending TCP/UDP IP packets, translate the ports 2905 * based on the NAT specification. 2906 * 2907 * Advance notice : Now it becomes complicated :-) 2908 * 2909 * Since the port and IP addresse fields are both part 2910 * of the TCP/UDP checksum of the offending IP packet, 2911 * we need to adjust that checksum as well. 2912 * 2913 * To further complicate things, the TCP/UDP checksum 2914 * may not be present. We must check to see if the 2915 * length of the data portion is big enough to hold 2916 * the checksum. In the UDP case, a test to determine 2917 * if the checksum is even set is also required. 2918 * 2919 * Any changes to an IP address, port or checksum within 2920 * the ICMP packet requires a change to icmp_cksum. 2921 * 2922 * Be extremely careful here ... The change is dependent 2923 * upon whether or not the TCP/UPD checksum is present. 2924 * 2925 * If TCP/UPD checksum is present, the icmp_cksum must 2926 * compensate for checksum modification resulting from 2927 * IP address change only. Port change and resulting 2928 * data checksum adjustments cancel each other out. 2929 * 2930 * If TCP/UDP checksum is not present, icmp_cksum must 2931 * compensate for port change only. The IP address 2932 * change does not modify anything else in this case. 2933 */ 2934 2935 psum1 = 0; 2936 psum2 = 0; 2937 psumd = 0; 2938 2939 if ((tcp->th_dport == nat->nat_oport) && 2940 (tcp->th_sport != nat->nat_inport)) { 2941 2942 /* 2943 * Translate the source port. 2944 */ 2945 2946 psum1 = ntohs(tcp->th_sport); 2947 psum2 = ntohs(nat->nat_inport); 2948 tcp->th_sport = nat->nat_inport; 2949 2950 } else if ((tcp->th_sport == nat->nat_oport) && 2951 (tcp->th_dport != nat->nat_outport)) { 2952 2953 /* 2954 * Translate the destination port. 2955 */ 2956 2957 psum1 = ntohs(tcp->th_dport); 2958 psum2 = ntohs(nat->nat_outport); 2959 tcp->th_dport = nat->nat_outport; 2960 } 2961 2962 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 2963 2964 /* 2965 * TCP checksum present. 2966 * 2967 * Adjust data checksum and icmp checksum to 2968 * compensate for any IP address change. 2969 */ 2970 2971 sum1 = ntohs(tcp->th_sum); 2972 fix_datacksum(&tcp->th_sum, sumd); 2973 sum2 = ntohs(tcp->th_sum); 2974 sumd2 = sumd << 1; 2975 CALC_SUMD(sum1, sum2, sumd); 2976 sumd2 += sumd; 2977 2978 /* 2979 * Also make data checksum adjustment to 2980 * compensate for any port change. 2981 */ 2982 2983 if (psum1 != psum2) { 2984 CALC_SUMD(psum1, psum2, psumd); 2985 fix_datacksum(&tcp->th_sum, psumd); 2986 } 2987 2988 } else if ((oip->ip_p == IPPROTO_UDP) && 2989 (dlen >= 8) && (udp->uh_sum != 0)) { 2990 2991 /* 2992 * The UDP checksum is present and set. 2993 * 2994 * Adjust data checksum and icmp checksum to 2995 * compensate for any IP address change. 2996 */ 2997 2998 sum1 = ntohs(udp->uh_sum); 2999 fix_datacksum(&udp->uh_sum, sumd); 3000 sum2 = ntohs(udp->uh_sum); 3001 sumd2 = sumd << 1; 3002 CALC_SUMD(sum1, sum2, sumd); 3003 sumd2 += sumd; 3004 3005 /* 3006 * Also make data checksum adjustment to 3007 * compensate for any port change. 3008 */ 3009 3010 if (psum1 != psum2) { 3011 CALC_SUMD(psum1, psum2, psumd); 3012 fix_datacksum(&udp->uh_sum, psumd); 3013 } 3014 3015 } else { 3016 3017 /* 3018 * Data checksum was not present. 3019 * 3020 * Compensate for any port change. 3021 */ 3022 3023 CALC_SUMD(psum2, psum1, psumd); 3024 sumd2 += psumd; 3025 } 3026 break; 3027 3028 case IPPROTO_ICMP : 3029 3030 orgicmp = (icmphdr_t *)udp; 3031 3032 if ((nat->nat_dir == NAT_OUTBOUND) && 3033 (orgicmp->icmp_id != nat->nat_inport) && 3034 (dlen >= 8)) { 3035 3036 /* 3037 * Fix ICMP checksum (of the offening ICMP 3038 * query packet) to compensate the change 3039 * in the ICMP id of the offending ICMP 3040 * packet. 3041 * 3042 * Since you modify orgicmp->icmp_id with 3043 * a delta (say x) and you compensate that 3044 * in origicmp->icmp_cksum with a delta 3045 * minus x, you don't have to adjust the 3046 * overall icmp->icmp_cksum 3047 */ 3048 3049 sum1 = ntohs(orgicmp->icmp_id); 3050 sum2 = ntohs(nat->nat_inport); 3051 CALC_SUMD(sum1, sum2, sumd); 3052 orgicmp->icmp_id = nat->nat_inport; 3053 fix_datacksum(&orgicmp->icmp_cksum, sumd); 3054 3055 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 3056 3057 break; 3058 3059 default : 3060 3061 break; 3062 3063 } /* switch (oip->ip_p) */ 3064 3065 /* 3066 * Step 3 3067 * Make the adjustments to icmp checksum. 3068 */ 3069 3070 if (sumd2 != 0) { 3071 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3072 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3073 fix_incksum(&icmp->icmp_cksum, sumd2); 3074 } 3075 return nat; 3076 } 3077 3078 3079 /* 3080 * NB: these lookups don't lock access to the list, it assumed that it has 3081 * already been done! 3082 */ 3083 3084 /* ------------------------------------------------------------------------ */ 3085 /* Function: nat_inlookup */ 3086 /* Returns: nat_t* - NULL == no match, */ 3087 /* else pointer to matching NAT entry */ 3088 /* Parameters: fin(I) - pointer to packet information */ 3089 /* flags(I) - NAT flags for this packet */ 3090 /* p(I) - protocol for this packet */ 3091 /* src(I) - source IP address */ 3092 /* mapdst(I) - destination IP address */ 3093 /* */ 3094 /* Lookup a nat entry based on the mapped destination ip address/port and */ 3095 /* real source address/port. We use this lookup when receiving a packet, */ 3096 /* we're looking for a table entry, based on the destination address. */ 3097 /* */ 3098 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3099 /* */ 3100 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3101 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3102 /* */ 3103 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3104 /* the packet is of said protocol */ 3105 /* ------------------------------------------------------------------------ */ 3106 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 3107 fr_info_t *fin; 3108 u_int flags, p; 3109 struct in_addr src , mapdst; 3110 { 3111 u_short sport, dport; 3112 ipnat_t *ipn; 3113 u_int sflags; 3114 nat_t *nat; 3115 int nflags; 3116 u_32_t dst; 3117 void *ifp; 3118 u_int hv; 3119 ipf_stack_t *ifs = fin->fin_ifs; 3120 3121 if (fin != NULL) 3122 ifp = fin->fin_ifp; 3123 else 3124 ifp = NULL; 3125 sport = 0; 3126 dport = 0; 3127 dst = mapdst.s_addr; 3128 sflags = flags & NAT_TCPUDPICMP; 3129 3130 switch (p) 3131 { 3132 case IPPROTO_TCP : 3133 case IPPROTO_UDP : 3134 sport = htons(fin->fin_data[0]); 3135 dport = htons(fin->fin_data[1]); 3136 break; 3137 case IPPROTO_ICMP : 3138 if (flags & IPN_ICMPERR) 3139 sport = fin->fin_data[1]; 3140 else 3141 dport = fin->fin_data[1]; 3142 break; 3143 default : 3144 break; 3145 } 3146 3147 3148 if ((flags & SI_WILDP) != 0) 3149 goto find_in_wild_ports; 3150 3151 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3152 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3153 nat = ifs->ifs_nat_table[1][hv]; 3154 for (; nat; nat = nat->nat_hnext[1]) { 3155 if (nat->nat_ifps[0] != NULL) { 3156 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3157 continue; 3158 } else if (ifp != NULL) 3159 nat->nat_ifps[0] = ifp; 3160 3161 nflags = nat->nat_flags; 3162 3163 if (nat->nat_oip.s_addr == src.s_addr && 3164 nat->nat_outip.s_addr == dst && 3165 (((p == 0) && 3166 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3167 || (p == nat->nat_p))) { 3168 switch (p) 3169 { 3170 #if 0 3171 case IPPROTO_GRE : 3172 if (nat->nat_call[1] != fin->fin_data[0]) 3173 continue; 3174 break; 3175 #endif 3176 case IPPROTO_ICMP : 3177 if ((flags & IPN_ICMPERR) != 0) { 3178 if (nat->nat_outport != sport) 3179 continue; 3180 } else { 3181 if (nat->nat_outport != dport) 3182 continue; 3183 } 3184 break; 3185 case IPPROTO_TCP : 3186 case IPPROTO_UDP : 3187 if (nat->nat_oport != sport) 3188 continue; 3189 if (nat->nat_outport != dport) 3190 continue; 3191 break; 3192 default : 3193 break; 3194 } 3195 3196 ipn = nat->nat_ptr; 3197 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3198 if (appr_match(fin, nat) != 0) 3199 continue; 3200 return nat; 3201 } 3202 } 3203 3204 /* 3205 * So if we didn't find it but there are wildcard members in the hash 3206 * table, go back and look for them. We do this search and update here 3207 * because it is modifying the NAT table and we want to do this only 3208 * for the first packet that matches. The exception, of course, is 3209 * for "dummy" (FI_IGNORE) lookups. 3210 */ 3211 find_in_wild_ports: 3212 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3213 return NULL; 3214 if (ifs->ifs_nat_stats.ns_wilds == 0) 3215 return NULL; 3216 3217 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3218 3219 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3220 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3221 3222 WRITE_ENTER(&ifs->ifs_ipf_nat); 3223 3224 nat = ifs->ifs_nat_table[1][hv]; 3225 for (; nat; nat = nat->nat_hnext[1]) { 3226 if (nat->nat_ifps[0] != NULL) { 3227 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3228 continue; 3229 } else if (ifp != NULL) 3230 nat->nat_ifps[0] = ifp; 3231 3232 if (nat->nat_p != fin->fin_p) 3233 continue; 3234 if (nat->nat_oip.s_addr != src.s_addr || 3235 nat->nat_outip.s_addr != dst) 3236 continue; 3237 3238 nflags = nat->nat_flags; 3239 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3240 continue; 3241 3242 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3243 NAT_INBOUND) == 1) { 3244 if ((fin->fin_flx & FI_IGNORE) != 0) 3245 break; 3246 if ((nflags & SI_CLONE) != 0) { 3247 nat = fr_natclone(fin, nat); 3248 if (nat == NULL) 3249 break; 3250 } else { 3251 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3252 ifs->ifs_nat_stats.ns_wilds--; 3253 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3254 } 3255 nat->nat_oport = sport; 3256 nat->nat_outport = dport; 3257 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3258 nat_tabmove(nat, ifs); 3259 break; 3260 } 3261 } 3262 3263 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3264 3265 return nat; 3266 } 3267 3268 3269 /* ------------------------------------------------------------------------ */ 3270 /* Function: nat_tabmove */ 3271 /* Returns: Nil */ 3272 /* Parameters: nat(I) - pointer to NAT structure */ 3273 /* Write Lock: ipf_nat */ 3274 /* */ 3275 /* This function is only called for TCP/UDP NAT table entries where the */ 3276 /* original was placed in the table without hashing on the ports and we now */ 3277 /* want to include hashing on port numbers. */ 3278 /* ------------------------------------------------------------------------ */ 3279 static void nat_tabmove(nat, ifs) 3280 nat_t *nat; 3281 ipf_stack_t *ifs; 3282 { 3283 nat_t **natp; 3284 u_int hv; 3285 3286 if (nat->nat_flags & SI_CLONE) 3287 return; 3288 3289 /* 3290 * Remove the NAT entry from the old location 3291 */ 3292 if (nat->nat_hnext[0]) 3293 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3294 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3295 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3296 3297 if (nat->nat_hnext[1]) 3298 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3299 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3300 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3301 3302 /* 3303 * Add into the NAT table in the new position 3304 */ 3305 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3306 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3307 ifs->ifs_ipf_nattable_sz); 3308 nat->nat_hv[0] = hv; 3309 natp = &ifs->ifs_nat_table[0][hv]; 3310 if (*natp) 3311 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3312 nat->nat_phnext[0] = natp; 3313 nat->nat_hnext[0] = *natp; 3314 *natp = nat; 3315 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3316 3317 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3318 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3319 ifs->ifs_ipf_nattable_sz); 3320 nat->nat_hv[1] = hv; 3321 natp = &ifs->ifs_nat_table[1][hv]; 3322 if (*natp) 3323 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3324 nat->nat_phnext[1] = natp; 3325 nat->nat_hnext[1] = *natp; 3326 *natp = nat; 3327 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3328 } 3329 3330 3331 /* ------------------------------------------------------------------------ */ 3332 /* Function: nat_outlookup */ 3333 /* Returns: nat_t* - NULL == no match, */ 3334 /* else pointer to matching NAT entry */ 3335 /* Parameters: fin(I) - pointer to packet information */ 3336 /* flags(I) - NAT flags for this packet */ 3337 /* p(I) - protocol for this packet */ 3338 /* src(I) - source IP address */ 3339 /* dst(I) - destination IP address */ 3340 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3341 /* */ 3342 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3343 /* destination address/port. We use this lookup when sending a packet out, */ 3344 /* we're looking for a table entry, based on the source address. */ 3345 /* */ 3346 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3347 /* */ 3348 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3349 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3350 /* */ 3351 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3352 /* the packet is of said protocol */ 3353 /* ------------------------------------------------------------------------ */ 3354 nat_t *nat_outlookup(fin, flags, p, src, dst) 3355 fr_info_t *fin; 3356 u_int flags, p; 3357 struct in_addr src , dst; 3358 { 3359 u_short sport, dport; 3360 u_int sflags; 3361 ipnat_t *ipn; 3362 u_32_t srcip; 3363 nat_t *nat; 3364 int nflags; 3365 void *ifp; 3366 u_int hv; 3367 ipf_stack_t *ifs = fin->fin_ifs; 3368 3369 ifp = fin->fin_ifp; 3370 3371 srcip = src.s_addr; 3372 sflags = flags & IPN_TCPUDPICMP; 3373 sport = 0; 3374 dport = 0; 3375 3376 switch (p) 3377 { 3378 case IPPROTO_TCP : 3379 case IPPROTO_UDP : 3380 sport = htons(fin->fin_data[0]); 3381 dport = htons(fin->fin_data[1]); 3382 break; 3383 case IPPROTO_ICMP : 3384 if (flags & IPN_ICMPERR) 3385 sport = fin->fin_data[1]; 3386 else 3387 dport = fin->fin_data[1]; 3388 break; 3389 default : 3390 break; 3391 } 3392 3393 if ((flags & SI_WILDP) != 0) 3394 goto find_out_wild_ports; 3395 3396 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3397 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3398 nat = ifs->ifs_nat_table[0][hv]; 3399 for (; nat; nat = nat->nat_hnext[0]) { 3400 if (nat->nat_ifps[1] != NULL) { 3401 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3402 continue; 3403 } else if (ifp != NULL) 3404 nat->nat_ifps[1] = ifp; 3405 3406 nflags = nat->nat_flags; 3407 3408 if (nat->nat_inip.s_addr == srcip && 3409 nat->nat_oip.s_addr == dst.s_addr && 3410 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3411 || (p == nat->nat_p))) { 3412 switch (p) 3413 { 3414 #if 0 3415 case IPPROTO_GRE : 3416 if (nat->nat_call[1] != fin->fin_data[0]) 3417 continue; 3418 break; 3419 #endif 3420 case IPPROTO_TCP : 3421 case IPPROTO_UDP : 3422 if (nat->nat_oport != dport) 3423 continue; 3424 if (nat->nat_inport != sport) 3425 continue; 3426 break; 3427 default : 3428 break; 3429 } 3430 3431 ipn = nat->nat_ptr; 3432 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3433 if (appr_match(fin, nat) != 0) 3434 continue; 3435 return nat; 3436 } 3437 } 3438 3439 /* 3440 * So if we didn't find it but there are wildcard members in the hash 3441 * table, go back and look for them. We do this search and update here 3442 * because it is modifying the NAT table and we want to do this only 3443 * for the first packet that matches. The exception, of course, is 3444 * for "dummy" (FI_IGNORE) lookups. 3445 */ 3446 find_out_wild_ports: 3447 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3448 return NULL; 3449 if (ifs->ifs_nat_stats.ns_wilds == 0) 3450 return NULL; 3451 3452 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3453 3454 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3455 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3456 3457 WRITE_ENTER(&ifs->ifs_ipf_nat); 3458 3459 nat = ifs->ifs_nat_table[0][hv]; 3460 for (; nat; nat = nat->nat_hnext[0]) { 3461 if (nat->nat_ifps[1] != NULL) { 3462 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3463 continue; 3464 } else if (ifp != NULL) 3465 nat->nat_ifps[1] = ifp; 3466 3467 if (nat->nat_p != fin->fin_p) 3468 continue; 3469 if ((nat->nat_inip.s_addr != srcip) || 3470 (nat->nat_oip.s_addr != dst.s_addr)) 3471 continue; 3472 3473 nflags = nat->nat_flags; 3474 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3475 continue; 3476 3477 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3478 NAT_OUTBOUND) == 1) { 3479 if ((fin->fin_flx & FI_IGNORE) != 0) 3480 break; 3481 if ((nflags & SI_CLONE) != 0) { 3482 nat = fr_natclone(fin, nat); 3483 if (nat == NULL) 3484 break; 3485 } else { 3486 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3487 ifs->ifs_nat_stats.ns_wilds--; 3488 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3489 } 3490 nat->nat_inport = sport; 3491 nat->nat_oport = dport; 3492 if (nat->nat_outport == 0) 3493 nat->nat_outport = sport; 3494 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3495 nat_tabmove(nat, ifs); 3496 break; 3497 } 3498 } 3499 3500 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3501 3502 return nat; 3503 } 3504 3505 3506 /* ------------------------------------------------------------------------ */ 3507 /* Function: nat_lookupredir */ 3508 /* Returns: nat_t* - NULL == no match, */ 3509 /* else pointer to matching NAT entry */ 3510 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3511 /* entry for. */ 3512 /* */ 3513 /* Lookup the NAT tables to search for a matching redirect */ 3514 /* ------------------------------------------------------------------------ */ 3515 nat_t *nat_lookupredir(np, ifs) 3516 natlookup_t *np; 3517 ipf_stack_t *ifs; 3518 { 3519 fr_info_t fi; 3520 nat_t *nat; 3521 3522 bzero((char *)&fi, sizeof(fi)); 3523 if (np->nl_flags & IPN_IN) { 3524 fi.fin_data[0] = ntohs(np->nl_realport); 3525 fi.fin_data[1] = ntohs(np->nl_outport); 3526 } else { 3527 fi.fin_data[0] = ntohs(np->nl_inport); 3528 fi.fin_data[1] = ntohs(np->nl_outport); 3529 } 3530 if (np->nl_flags & IPN_TCP) 3531 fi.fin_p = IPPROTO_TCP; 3532 else if (np->nl_flags & IPN_UDP) 3533 fi.fin_p = IPPROTO_UDP; 3534 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3535 fi.fin_p = IPPROTO_ICMP; 3536 3537 fi.fin_ifs = ifs; 3538 /* 3539 * We can do two sorts of lookups: 3540 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3541 * - default: we have the `in' and `out' address, look for `real'. 3542 */ 3543 if (np->nl_flags & IPN_IN) { 3544 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3545 np->nl_realip, np->nl_outip))) { 3546 np->nl_inip = nat->nat_inip; 3547 np->nl_inport = nat->nat_inport; 3548 } 3549 } else { 3550 /* 3551 * If nl_inip is non null, this is a lookup based on the real 3552 * ip address. Else, we use the fake. 3553 */ 3554 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3555 np->nl_inip, np->nl_outip))) { 3556 3557 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3558 fr_info_t fin; 3559 bzero((char *)&fin, sizeof(fin)); 3560 fin.fin_p = nat->nat_p; 3561 fin.fin_data[0] = ntohs(nat->nat_outport); 3562 fin.fin_data[1] = ntohs(nat->nat_oport); 3563 fin.fin_ifs = ifs; 3564 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3565 nat->nat_outip, 3566 nat->nat_oip) != NULL) { 3567 np->nl_flags &= ~IPN_FINDFORWARD; 3568 } 3569 } 3570 3571 np->nl_realip = nat->nat_outip; 3572 np->nl_realport = nat->nat_outport; 3573 } 3574 } 3575 3576 return nat; 3577 } 3578 3579 3580 /* ------------------------------------------------------------------------ */ 3581 /* Function: nat_match */ 3582 /* Returns: int - 0 == no match, 1 == match */ 3583 /* Parameters: fin(I) - pointer to packet information */ 3584 /* np(I) - pointer to NAT rule */ 3585 /* */ 3586 /* Pull the matching of a packet against a NAT rule out of that complex */ 3587 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3588 /* ------------------------------------------------------------------------ */ 3589 static int nat_match(fin, np) 3590 fr_info_t *fin; 3591 ipnat_t *np; 3592 { 3593 frtuc_t *ft; 3594 3595 if (fin->fin_v != 4) 3596 return 0; 3597 3598 if (np->in_p && fin->fin_p != np->in_p) 3599 return 0; 3600 3601 if (fin->fin_out) { 3602 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3603 return 0; 3604 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3605 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3606 return 0; 3607 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3608 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3609 return 0; 3610 } else { 3611 if (!(np->in_redir & NAT_REDIRECT)) 3612 return 0; 3613 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3614 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3615 return 0; 3616 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3617 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3618 return 0; 3619 } 3620 3621 ft = &np->in_tuc; 3622 if (!(fin->fin_flx & FI_TCPUDP) || 3623 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3624 if (ft->ftu_scmp || ft->ftu_dcmp) 3625 return 0; 3626 return 1; 3627 } 3628 3629 return fr_tcpudpchk(fin, ft); 3630 } 3631 3632 3633 /* ------------------------------------------------------------------------ */ 3634 /* Function: nat_update */ 3635 /* Returns: Nil */ 3636 /* Parameters: nat(I) - pointer to NAT structure */ 3637 /* np(I) - pointer to NAT rule */ 3638 /* */ 3639 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3640 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3641 /* ------------------------------------------------------------------------ */ 3642 void nat_update(fin, nat, np) 3643 fr_info_t *fin; 3644 nat_t *nat; 3645 ipnat_t *np; 3646 { 3647 ipftq_t *ifq, *ifq2; 3648 ipftqent_t *tqe; 3649 ipf_stack_t *ifs = fin->fin_ifs; 3650 3651 MUTEX_ENTER(&nat->nat_lock); 3652 tqe = &nat->nat_tqe; 3653 ifq = tqe->tqe_ifq; 3654 3655 /* 3656 * We allow over-riding of NAT timeouts from NAT rules, even for 3657 * TCP, however, if it is TCP and there is no rule timeout set, 3658 * then do not update the timeout here. 3659 */ 3660 if (np != NULL) 3661 ifq2 = np->in_tqehead[fin->fin_rev]; 3662 else 3663 ifq2 = NULL; 3664 3665 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3666 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3667 } else { 3668 if (ifq2 == NULL) { 3669 if (nat->nat_p == IPPROTO_UDP) 3670 ifq2 = &ifs->ifs_nat_udptq; 3671 else if (nat->nat_p == IPPROTO_ICMP) 3672 ifq2 = &ifs->ifs_nat_icmptq; 3673 else 3674 ifq2 = &ifs->ifs_nat_iptq; 3675 } 3676 3677 fr_movequeue(tqe, ifq, ifq2, ifs); 3678 } 3679 MUTEX_EXIT(&nat->nat_lock); 3680 } 3681 3682 3683 /* ------------------------------------------------------------------------ */ 3684 /* Function: fr_checknatout */ 3685 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3686 /* 0 == no packet translation occurred, */ 3687 /* 1 == packet was successfully translated. */ 3688 /* Parameters: fin(I) - pointer to packet information */ 3689 /* passp(I) - pointer to filtering result flags */ 3690 /* */ 3691 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3692 /* first checked to see if they match an existing entry (if an error), */ 3693 /* otherwise a search of the current NAT table is made. If neither results */ 3694 /* in a match then a search for a matching NAT rule is made. Create a new */ 3695 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3696 /* packet header(s) as required. */ 3697 /* ------------------------------------------------------------------------ */ 3698 int fr_checknatout(fin, passp) 3699 fr_info_t *fin; 3700 u_32_t *passp; 3701 { 3702 ipnat_t *np = NULL, *npnext; 3703 struct ifnet *ifp, *sifp; 3704 icmphdr_t *icmp = NULL; 3705 tcphdr_t *tcp = NULL; 3706 int rval, natfailed; 3707 u_int nflags = 0; 3708 u_32_t ipa, iph; 3709 int natadd = 1; 3710 frentry_t *fr; 3711 nat_t *nat; 3712 ipf_stack_t *ifs = fin->fin_ifs; 3713 3714 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3715 return 0; 3716 3717 natfailed = 0; 3718 fr = fin->fin_fr; 3719 sifp = fin->fin_ifp; 3720 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3721 fr->fr_tifs[fin->fin_rev].fd_ifp && 3722 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3723 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3724 ifp = fin->fin_ifp; 3725 3726 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3727 switch (fin->fin_p) 3728 { 3729 case IPPROTO_TCP : 3730 nflags = IPN_TCP; 3731 break; 3732 case IPPROTO_UDP : 3733 nflags = IPN_UDP; 3734 break; 3735 case IPPROTO_ICMP : 3736 icmp = fin->fin_dp; 3737 3738 /* 3739 * This is an incoming packet, so the destination is 3740 * the icmp_id and the source port equals 0 3741 */ 3742 if (nat_icmpquerytype4(icmp->icmp_type)) 3743 nflags = IPN_ICMPQUERY; 3744 break; 3745 default : 3746 break; 3747 } 3748 3749 if ((nflags & IPN_TCPUDP)) 3750 tcp = fin->fin_dp; 3751 } 3752 3753 ipa = fin->fin_saddr; 3754 3755 READ_ENTER(&ifs->ifs_ipf_nat); 3756 3757 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3758 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3759 /*EMPTY*/; 3760 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3761 natadd = 0; 3762 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3763 fin->fin_src, fin->fin_dst))) { 3764 nflags = nat->nat_flags; 3765 } else { 3766 u_32_t hv, msk, nmsk; 3767 3768 /* 3769 * If there is no current entry in the nat table for this IP#, 3770 * create one for it (if there is a matching rule). 3771 */ 3772 msk = 0xffffffff; 3773 nmsk = ifs->ifs_nat_masks; 3774 maskloop: 3775 iph = ipa & htonl(msk); 3776 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3777 for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) { 3778 npnext = np->in_mnext; 3779 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3780 continue; 3781 if (np->in_v != fin->fin_v) 3782 continue; 3783 if (np->in_p && (np->in_p != fin->fin_p)) 3784 continue; 3785 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3786 continue; 3787 if (np->in_flags & IPN_FILTER) { 3788 if (!nat_match(fin, np)) 3789 continue; 3790 } else if ((ipa & np->in_inmsk) != np->in_inip) 3791 continue; 3792 3793 if ((fr != NULL) && 3794 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3795 continue; 3796 3797 if (*np->in_plabel != '\0') { 3798 if (((np->in_flags & IPN_FILTER) == 0) && 3799 (np->in_dport != tcp->th_dport)) 3800 continue; 3801 if (appr_ok(fin, tcp, np) == 0) 3802 continue; 3803 } 3804 3805 ATOMIC_INC32(np->in_use); 3806 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3807 WRITE_ENTER(&ifs->ifs_ipf_nat); 3808 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND); 3809 if (nat != NULL) { 3810 np->in_use--; 3811 np->in_hits++; 3812 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3813 break; 3814 } 3815 natfailed = -1; 3816 npnext = np->in_mnext; 3817 fr_ipnatderef(&np, ifs); 3818 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3819 } 3820 if ((np == NULL) && (nmsk != 0)) { 3821 while (nmsk) { 3822 msk <<= 1; 3823 if (nmsk & 0x80000000) 3824 break; 3825 nmsk <<= 1; 3826 } 3827 if (nmsk != 0) { 3828 nmsk <<= 1; 3829 goto maskloop; 3830 } 3831 } 3832 } 3833 3834 if (nat != NULL) { 3835 rval = fr_natout(fin, nat, natadd, nflags); 3836 if (rval == 1) { 3837 MUTEX_ENTER(&nat->nat_lock); 3838 nat->nat_ref++; 3839 MUTEX_EXIT(&nat->nat_lock); 3840 nat->nat_touched = ifs->ifs_fr_ticks; 3841 fin->fin_nat = nat; 3842 } 3843 } else 3844 rval = natfailed; 3845 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3846 3847 if (rval == -1) { 3848 if (passp != NULL) 3849 *passp = FR_BLOCK; 3850 fin->fin_flx |= FI_BADNAT; 3851 } 3852 fin->fin_ifp = sifp; 3853 return rval; 3854 } 3855 3856 /* ------------------------------------------------------------------------ */ 3857 /* Function: fr_natout */ 3858 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3859 /* 1 == packet was successfully translated. */ 3860 /* Parameters: fin(I) - pointer to packet information */ 3861 /* nat(I) - pointer to NAT structure */ 3862 /* natadd(I) - flag indicating if it is safe to add frag cache */ 3863 /* nflags(I) - NAT flags set for this packet */ 3864 /* */ 3865 /* Translate a packet coming "out" on an interface. */ 3866 /* ------------------------------------------------------------------------ */ 3867 int fr_natout(fin, nat, natadd, nflags) 3868 fr_info_t *fin; 3869 nat_t *nat; 3870 int natadd; 3871 u_32_t nflags; 3872 { 3873 icmphdr_t *icmp; 3874 u_short *csump; 3875 u_32_t sumd; 3876 tcphdr_t *tcp; 3877 ipnat_t *np; 3878 int i; 3879 ipf_stack_t *ifs = fin->fin_ifs; 3880 3881 #if SOLARIS && defined(_KERNEL) 3882 net_data_t net_data_p; 3883 if (fin->fin_v == 4) 3884 net_data_p = ifs->ifs_ipf_ipv4; 3885 else 3886 net_data_p = ifs->ifs_ipf_ipv6; 3887 #endif 3888 3889 tcp = NULL; 3890 icmp = NULL; 3891 csump = NULL; 3892 np = nat->nat_ptr; 3893 3894 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 3895 (void) fr_nat_newfrag(fin, 0, nat); 3896 3897 MUTEX_ENTER(&nat->nat_lock); 3898 nat->nat_bytes[1] += fin->fin_plen; 3899 nat->nat_pkts[1]++; 3900 MUTEX_EXIT(&nat->nat_lock); 3901 3902 /* 3903 * Fix up checksums, not by recalculating them, but 3904 * simply computing adjustments. 3905 * This is only done for STREAMS based IP implementations where the 3906 * checksum has already been calculated by IP. In all other cases, 3907 * IPFilter is called before the checksum needs calculating so there 3908 * is no call to modify whatever is in the header now. 3909 */ 3910 ASSERT(fin->fin_m != NULL); 3911 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 3912 if (nflags == IPN_ICMPERR) { 3913 u_32_t s1, s2; 3914 3915 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 3916 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 3917 CALC_SUMD(s1, s2, sumd); 3918 3919 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 3920 } 3921 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 3922 defined(linux) || defined(BRIDGE_IPF) 3923 else { 3924 /* 3925 * Strictly speaking, this isn't necessary on BSD 3926 * kernels because they do checksum calculation after 3927 * this code has run BUT if ipfilter is being used 3928 * to do NAT as a bridge, that code doesn't exist. 3929 */ 3930 if (nat->nat_dir == NAT_OUTBOUND) 3931 fix_outcksum(&fin->fin_ip->ip_sum, 3932 nat->nat_ipsumd); 3933 else 3934 fix_incksum(&fin->fin_ip->ip_sum, 3935 nat->nat_ipsumd); 3936 } 3937 #endif 3938 } 3939 3940 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3941 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 3942 tcp = fin->fin_dp; 3943 3944 tcp->th_sport = nat->nat_outport; 3945 fin->fin_data[0] = ntohs(nat->nat_outport); 3946 } 3947 3948 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 3949 icmp = fin->fin_dp; 3950 icmp->icmp_id = nat->nat_outport; 3951 } 3952 3953 csump = nat_proto(fin, nat, nflags); 3954 } 3955 3956 fin->fin_ip->ip_src = nat->nat_outip; 3957 3958 nat_update(fin, nat, np); 3959 3960 /* 3961 * The above comments do not hold for layer 4 (or higher) checksums... 3962 */ 3963 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 3964 if (nflags & IPN_TCPUDP && 3965 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 3966 sumd = nat->nat_sumd[1]; 3967 else 3968 sumd = nat->nat_sumd[0]; 3969 3970 if (nat->nat_dir == NAT_OUTBOUND) 3971 fix_outcksum(csump, sumd); 3972 else 3973 fix_incksum(csump, sumd); 3974 } 3975 #ifdef IPFILTER_SYNC 3976 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 3977 #endif 3978 /* ------------------------------------------------------------- */ 3979 /* A few quick notes: */ 3980 /* Following are test conditions prior to calling the */ 3981 /* appr_check routine. */ 3982 /* */ 3983 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 3984 /* with a redirect rule, we attempt to match the packet's */ 3985 /* source port against in_dport, otherwise we'd compare the */ 3986 /* packet's destination. */ 3987 /* ------------------------------------------------------------- */ 3988 if ((np != NULL) && (np->in_apr != NULL)) { 3989 i = appr_check(fin, nat); 3990 if (i == 0) 3991 i = 1; 3992 } else 3993 i = 1; 3994 ifs->ifs_nat_stats.ns_mapped[1]++; 3995 fin->fin_flx |= FI_NATED; 3996 return i; 3997 } 3998 3999 4000 /* ------------------------------------------------------------------------ */ 4001 /* Function: fr_checknatin */ 4002 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4003 /* 0 == no packet translation occurred, */ 4004 /* 1 == packet was successfully translated. */ 4005 /* Parameters: fin(I) - pointer to packet information */ 4006 /* passp(I) - pointer to filtering result flags */ 4007 /* */ 4008 /* Check to see if an incoming packet should be changed. ICMP packets are */ 4009 /* first checked to see if they match an existing entry (if an error), */ 4010 /* otherwise a search of the current NAT table is made. If neither results */ 4011 /* in a match then a search for a matching NAT rule is made. Create a new */ 4012 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 4013 /* packet header(s) as required. */ 4014 /* ------------------------------------------------------------------------ */ 4015 int fr_checknatin(fin, passp) 4016 fr_info_t *fin; 4017 u_32_t *passp; 4018 { 4019 u_int nflags, natadd; 4020 ipnat_t *np, *npnext; 4021 int rval, natfailed; 4022 struct ifnet *ifp; 4023 struct in_addr in; 4024 icmphdr_t *icmp; 4025 tcphdr_t *tcp; 4026 u_short dport; 4027 nat_t *nat; 4028 u_32_t iph; 4029 ipf_stack_t *ifs = fin->fin_ifs; 4030 4031 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 4032 return 0; 4033 4034 tcp = NULL; 4035 icmp = NULL; 4036 dport = 0; 4037 natadd = 1; 4038 nflags = 0; 4039 natfailed = 0; 4040 ifp = fin->fin_ifp; 4041 4042 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4043 switch (fin->fin_p) 4044 { 4045 case IPPROTO_TCP : 4046 nflags = IPN_TCP; 4047 break; 4048 case IPPROTO_UDP : 4049 nflags = IPN_UDP; 4050 break; 4051 case IPPROTO_ICMP : 4052 icmp = fin->fin_dp; 4053 4054 /* 4055 * This is an incoming packet, so the destination is 4056 * the icmp_id and the source port equals 0 4057 */ 4058 if (nat_icmpquerytype4(icmp->icmp_type)) { 4059 nflags = IPN_ICMPQUERY; 4060 dport = icmp->icmp_id; 4061 } break; 4062 default : 4063 break; 4064 } 4065 4066 if ((nflags & IPN_TCPUDP)) { 4067 tcp = fin->fin_dp; 4068 dport = tcp->th_dport; 4069 } 4070 } 4071 4072 in = fin->fin_dst; 4073 4074 READ_ENTER(&ifs->ifs_ipf_nat); 4075 4076 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 4077 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 4078 /*EMPTY*/; 4079 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 4080 natadd = 0; 4081 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 4082 fin->fin_src, in))) { 4083 nflags = nat->nat_flags; 4084 } else { 4085 u_32_t hv, msk, rmsk; 4086 4087 rmsk = ifs->ifs_rdr_masks; 4088 msk = 0xffffffff; 4089 /* 4090 * If there is no current entry in the nat table for this IP#, 4091 * create one for it (if there is a matching rule). 4092 */ 4093 maskloop: 4094 iph = in.s_addr & htonl(msk); 4095 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 4096 for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) { 4097 npnext = np->in_rnext; 4098 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 4099 continue; 4100 if (np->in_v != fin->fin_v) 4101 continue; 4102 if (np->in_p && (np->in_p != fin->fin_p)) 4103 continue; 4104 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4105 continue; 4106 if (np->in_flags & IPN_FILTER) { 4107 if (!nat_match(fin, np)) 4108 continue; 4109 } else { 4110 if ((in.s_addr & np->in_outmsk) != np->in_outip) 4111 continue; 4112 if (np->in_pmin && 4113 ((ntohs(np->in_pmax) < ntohs(dport)) || 4114 (ntohs(dport) < ntohs(np->in_pmin)))) 4115 continue; 4116 } 4117 4118 if (*np->in_plabel != '\0') { 4119 if (!appr_ok(fin, tcp, np)) { 4120 continue; 4121 } 4122 } 4123 4124 ATOMIC_INC32(np->in_use); 4125 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4126 WRITE_ENTER(&ifs->ifs_ipf_nat); 4127 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4128 if (nat != NULL) { 4129 np->in_use--; 4130 np->in_hits++; 4131 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4132 break; 4133 } 4134 natfailed = -1; 4135 npnext = np->in_rnext; 4136 fr_ipnatderef(&np, ifs); 4137 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4138 } 4139 4140 if ((np == NULL) && (rmsk != 0)) { 4141 while (rmsk) { 4142 msk <<= 1; 4143 if (rmsk & 0x80000000) 4144 break; 4145 rmsk <<= 1; 4146 } 4147 if (rmsk != 0) { 4148 rmsk <<= 1; 4149 goto maskloop; 4150 } 4151 } 4152 } 4153 if (nat != NULL) { 4154 rval = fr_natin(fin, nat, natadd, nflags); 4155 if (rval == 1) { 4156 MUTEX_ENTER(&nat->nat_lock); 4157 nat->nat_ref++; 4158 MUTEX_EXIT(&nat->nat_lock); 4159 nat->nat_touched = ifs->ifs_fr_ticks; 4160 fin->fin_nat = nat; 4161 fin->fin_state = nat->nat_state; 4162 } 4163 } else 4164 rval = natfailed; 4165 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4166 4167 if (rval == -1) { 4168 if (passp != NULL) 4169 *passp = FR_BLOCK; 4170 fin->fin_flx |= FI_BADNAT; 4171 } 4172 return rval; 4173 } 4174 4175 4176 /* ------------------------------------------------------------------------ */ 4177 /* Function: fr_natin */ 4178 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4179 /* 1 == packet was successfully translated. */ 4180 /* Parameters: fin(I) - pointer to packet information */ 4181 /* nat(I) - pointer to NAT structure */ 4182 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4183 /* nflags(I) - NAT flags set for this packet */ 4184 /* Locks Held: ipf_nat (READ) */ 4185 /* */ 4186 /* Translate a packet coming "in" on an interface. */ 4187 /* ------------------------------------------------------------------------ */ 4188 int fr_natin(fin, nat, natadd, nflags) 4189 fr_info_t *fin; 4190 nat_t *nat; 4191 int natadd; 4192 u_32_t nflags; 4193 { 4194 icmphdr_t *icmp; 4195 u_short *csump; 4196 tcphdr_t *tcp; 4197 ipnat_t *np; 4198 int i; 4199 ipf_stack_t *ifs = fin->fin_ifs; 4200 4201 #if SOLARIS && defined(_KERNEL) 4202 net_data_t net_data_p; 4203 if (fin->fin_v == 4) 4204 net_data_p = ifs->ifs_ipf_ipv4; 4205 else 4206 net_data_p = ifs->ifs_ipf_ipv6; 4207 #endif 4208 4209 tcp = NULL; 4210 csump = NULL; 4211 np = nat->nat_ptr; 4212 fin->fin_fr = nat->nat_fr; 4213 4214 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4215 (void) fr_nat_newfrag(fin, 0, nat); 4216 4217 if (np != NULL) { 4218 4219 /* ------------------------------------------------------------- */ 4220 /* A few quick notes: */ 4221 /* Following are test conditions prior to calling the */ 4222 /* appr_check routine. */ 4223 /* */ 4224 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4225 /* with a map rule, we attempt to match the packet's */ 4226 /* source port against in_dport, otherwise we'd compare the */ 4227 /* packet's destination. */ 4228 /* ------------------------------------------------------------- */ 4229 if (np->in_apr != NULL) { 4230 i = appr_check(fin, nat); 4231 if (i == -1) { 4232 return -1; 4233 } 4234 } 4235 } 4236 4237 #ifdef IPFILTER_SYNC 4238 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4239 #endif 4240 4241 MUTEX_ENTER(&nat->nat_lock); 4242 nat->nat_bytes[0] += fin->fin_plen; 4243 nat->nat_pkts[0]++; 4244 MUTEX_EXIT(&nat->nat_lock); 4245 4246 fin->fin_ip->ip_dst = nat->nat_inip; 4247 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4248 if (nflags & IPN_TCPUDP) 4249 tcp = fin->fin_dp; 4250 4251 /* 4252 * Fix up checksums, not by recalculating them, but 4253 * simply computing adjustments. 4254 * Why only do this for some platforms on inbound packets ? 4255 * Because for those that it is done, IP processing is yet to happen 4256 * and so the IPv4 header checksum has not yet been evaluated. 4257 * Perhaps it should always be done for the benefit of things like 4258 * fast forwarding (so that it doesn't need to be recomputed) but with 4259 * header checksum offloading, perhaps it is a moot point. 4260 */ 4261 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4262 defined(__osf__) || defined(linux) 4263 if (nat->nat_dir == NAT_OUTBOUND) 4264 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4265 else 4266 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4267 #endif 4268 4269 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4270 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4271 tcp->th_dport = nat->nat_inport; 4272 fin->fin_data[1] = ntohs(nat->nat_inport); 4273 } 4274 4275 4276 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4277 icmp = fin->fin_dp; 4278 4279 icmp->icmp_id = nat->nat_inport; 4280 } 4281 4282 csump = nat_proto(fin, nat, nflags); 4283 } 4284 4285 nat_update(fin, nat, np); 4286 4287 /* 4288 * In case they are being forwarded, inbound packets always need to have 4289 * their checksum adjusted even if hardware checksum validation said OK. 4290 */ 4291 if (csump != NULL) { 4292 if (nat->nat_dir == NAT_OUTBOUND) 4293 fix_incksum(csump, nat->nat_sumd[0]); 4294 else 4295 fix_outcksum(csump, nat->nat_sumd[0]); 4296 } 4297 4298 #if SOLARIS && defined(_KERNEL) 4299 if (nflags & IPN_TCPUDP && 4300 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4301 /* 4302 * Need to adjust the partial checksum result stored in 4303 * db_cksum16, which will be used for validation in IP. 4304 * See IP_CKSUM_RECV(). 4305 * Adjustment data should be the inverse of the IP address 4306 * changes, because db_cksum16 is supposed to be the complement 4307 * of the pesudo header. 4308 */ 4309 csump = &fin->fin_m->b_datap->db_cksum16; 4310 if (nat->nat_dir == NAT_OUTBOUND) 4311 fix_outcksum(csump, nat->nat_sumd[1]); 4312 else 4313 fix_incksum(csump, nat->nat_sumd[1]); 4314 } 4315 #endif 4316 4317 ifs->ifs_nat_stats.ns_mapped[0]++; 4318 fin->fin_flx |= FI_NATED; 4319 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4320 fin->fin_nattag = &np->in_tag; 4321 return 1; 4322 } 4323 4324 4325 /* ------------------------------------------------------------------------ */ 4326 /* Function: nat_proto */ 4327 /* Returns: u_short* - pointer to transport header checksum to update, */ 4328 /* NULL if the transport protocol is not recognised */ 4329 /* as needing a checksum update. */ 4330 /* Parameters: fin(I) - pointer to packet information */ 4331 /* nat(I) - pointer to NAT structure */ 4332 /* nflags(I) - NAT flags set for this packet */ 4333 /* */ 4334 /* Return the pointer to the checksum field for each protocol so understood.*/ 4335 /* If support for making other changes to a protocol header is required, */ 4336 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4337 /* TCP down to a specific value, then do it from here. */ 4338 /* ------------------------------------------------------------------------ */ 4339 u_short *nat_proto(fin, nat, nflags) 4340 fr_info_t *fin; 4341 nat_t *nat; 4342 u_int nflags; 4343 { 4344 icmphdr_t *icmp; 4345 u_short *csump; 4346 tcphdr_t *tcp; 4347 udphdr_t *udp; 4348 4349 csump = NULL; 4350 if (fin->fin_out == 0) { 4351 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4352 } else { 4353 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4354 } 4355 4356 switch (fin->fin_p) 4357 { 4358 case IPPROTO_TCP : 4359 tcp = fin->fin_dp; 4360 4361 csump = &tcp->th_sum; 4362 4363 /* 4364 * Do a MSS CLAMPING on a SYN packet, 4365 * only deal IPv4 for now. 4366 */ 4367 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4368 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4369 4370 break; 4371 4372 case IPPROTO_UDP : 4373 udp = fin->fin_dp; 4374 4375 if (udp->uh_sum) 4376 csump = &udp->uh_sum; 4377 break; 4378 4379 case IPPROTO_ICMP : 4380 icmp = fin->fin_dp; 4381 4382 if ((nflags & IPN_ICMPQUERY) != 0) { 4383 if (icmp->icmp_cksum != 0) 4384 csump = &icmp->icmp_cksum; 4385 } 4386 break; 4387 } 4388 return csump; 4389 } 4390 4391 4392 /* ------------------------------------------------------------------------ */ 4393 /* Function: fr_natunload */ 4394 /* Returns: Nil */ 4395 /* Parameters: Nil */ 4396 /* */ 4397 /* Free all memory used by NAT structures allocated at runtime. */ 4398 /* ------------------------------------------------------------------------ */ 4399 void fr_natunload(ifs) 4400 ipf_stack_t *ifs; 4401 { 4402 ipftq_t *ifq, *ifqnext; 4403 4404 (void) nat_clearlist(ifs); 4405 (void) nat_flushtable(ifs); 4406 4407 /* 4408 * Proxy timeout queues are not cleaned here because although they 4409 * exist on the NAT list, appr_unload is called after fr_natunload 4410 * and the proxies actually are responsible for them being created. 4411 * Should the proxy timeouts have their own list? There's no real 4412 * justification as this is the only complication. 4413 */ 4414 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4415 ifqnext = ifq->ifq_next; 4416 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4417 (fr_deletetimeoutqueue(ifq) == 0)) 4418 fr_freetimeoutqueue(ifq, ifs); 4419 } 4420 4421 if (ifs->ifs_nat_table[0] != NULL) { 4422 KFREES(ifs->ifs_nat_table[0], 4423 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4424 ifs->ifs_nat_table[0] = NULL; 4425 } 4426 if (ifs->ifs_nat_table[1] != NULL) { 4427 KFREES(ifs->ifs_nat_table[1], 4428 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4429 ifs->ifs_nat_table[1] = NULL; 4430 } 4431 if (ifs->ifs_nat_rules != NULL) { 4432 KFREES(ifs->ifs_nat_rules, 4433 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4434 ifs->ifs_nat_rules = NULL; 4435 } 4436 if (ifs->ifs_rdr_rules != NULL) { 4437 KFREES(ifs->ifs_rdr_rules, 4438 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4439 ifs->ifs_rdr_rules = NULL; 4440 } 4441 if (ifs->ifs_maptable != NULL) { 4442 KFREES(ifs->ifs_maptable, 4443 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4444 ifs->ifs_maptable = NULL; 4445 } 4446 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4447 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4448 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4449 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4450 } 4451 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4452 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4453 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4454 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4455 } 4456 4457 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4458 ifs->ifs_fr_nat_maxbucket = 0; 4459 4460 if (ifs->ifs_fr_nat_init == 1) { 4461 ifs->ifs_fr_nat_init = 0; 4462 fr_sttab_destroy(ifs->ifs_nat_tqb); 4463 4464 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4465 RW_DESTROY(&ifs->ifs_ipf_nat); 4466 4467 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4468 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4469 4470 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4471 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4472 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4473 } 4474 } 4475 4476 4477 /* ------------------------------------------------------------------------ */ 4478 /* Function: fr_natexpire */ 4479 /* Returns: Nil */ 4480 /* Parameters: Nil */ 4481 /* */ 4482 /* Check all of the timeout queues for entries at the top which need to be */ 4483 /* expired. */ 4484 /* ------------------------------------------------------------------------ */ 4485 void fr_natexpire(ifs) 4486 ipf_stack_t *ifs; 4487 { 4488 ipftq_t *ifq, *ifqnext; 4489 ipftqent_t *tqe, *tqn; 4490 int i; 4491 SPL_INT(s); 4492 4493 SPL_NET(s); 4494 WRITE_ENTER(&ifs->ifs_ipf_nat); 4495 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4496 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4497 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4498 break; 4499 tqn = tqe->tqe_next; 4500 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4501 } 4502 } 4503 4504 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4505 ifqnext = ifq->ifq_next; 4506 4507 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4508 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4509 break; 4510 tqn = tqe->tqe_next; 4511 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4512 } 4513 } 4514 4515 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4516 ifqnext = ifq->ifq_next; 4517 4518 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4519 (ifq->ifq_ref == 0)) { 4520 fr_freetimeoutqueue(ifq, ifs); 4521 } 4522 } 4523 4524 if (ifs->ifs_nat_doflush != 0) { 4525 (void) nat_extraflush(2, ifs); 4526 ifs->ifs_nat_doflush = 0; 4527 } 4528 4529 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4530 SPL_X(s); 4531 } 4532 4533 4534 /* ------------------------------------------------------------------------ */ 4535 /* Function: fr_nataddrsync */ 4536 /* Returns: Nil */ 4537 /* Parameters: ifp(I) - pointer to network interface */ 4538 /* addr(I) - pointer to new network address */ 4539 /* */ 4540 /* Walk through all of the currently active NAT sessions, looking for those */ 4541 /* which need to have their translated address updated (where the interface */ 4542 /* matches the one passed in) and change it, recalculating the checksum sum */ 4543 /* difference too. */ 4544 /* ------------------------------------------------------------------------ */ 4545 void fr_nataddrsync(ifp, addr, ifs) 4546 void *ifp; 4547 struct in_addr *addr; 4548 ipf_stack_t *ifs; 4549 { 4550 u_32_t sum1, sum2, sumd; 4551 nat_t *nat; 4552 ipnat_t *np; 4553 SPL_INT(s); 4554 4555 if (ifs->ifs_fr_running <= 0) 4556 return; 4557 4558 SPL_NET(s); 4559 WRITE_ENTER(&ifs->ifs_ipf_nat); 4560 4561 if (ifs->ifs_fr_running <= 0) { 4562 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4563 return; 4564 } 4565 4566 /* 4567 * Change IP addresses for NAT sessions for any protocol except TCP 4568 * since it will break the TCP connection anyway. The only rules 4569 * which will get changed are those which are "map ... -> 0/32", 4570 * where the rule specifies the address is taken from the interface. 4571 */ 4572 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4573 if (addr != NULL) { 4574 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4575 ((nat->nat_flags & IPN_TCP) != 0)) 4576 continue; 4577 if (((np = nat->nat_ptr) == NULL) || 4578 (np->in_nip || (np->in_outmsk != 0xffffffff))) 4579 continue; 4580 4581 /* 4582 * Change the map-to address to be the same as the 4583 * new one. 4584 */ 4585 sum1 = nat->nat_outip.s_addr; 4586 nat->nat_outip = *addr; 4587 sum2 = nat->nat_outip.s_addr; 4588 4589 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4590 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) && 4591 (np->in_outmsk == 0xffffffff) && !np->in_nip) { 4592 struct in_addr in; 4593 4594 /* 4595 * Change the map-to address to be the same as the 4596 * new one. 4597 */ 4598 sum1 = nat->nat_outip.s_addr; 4599 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4600 &in, NULL, ifs) != -1) 4601 nat->nat_outip = in; 4602 sum2 = nat->nat_outip.s_addr; 4603 } else { 4604 continue; 4605 } 4606 4607 if (sum1 == sum2) 4608 continue; 4609 /* 4610 * Readjust the checksum adjustment to take into 4611 * account the new IP#. 4612 */ 4613 CALC_SUMD(sum1, sum2, sumd); 4614 /* XXX - dont change for TCP when solaris does 4615 * hardware checksumming. 4616 */ 4617 sumd += nat->nat_sumd[0]; 4618 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4619 nat->nat_sumd[1] = nat->nat_sumd[0]; 4620 } 4621 4622 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4623 SPL_X(s); 4624 } 4625 4626 4627 /* ------------------------------------------------------------------------ */ 4628 /* Function: fr_natifpsync */ 4629 /* Returns: Nil */ 4630 /* Parameters: action(I) - how we are syncing */ 4631 /* ifp(I) - pointer to network interface */ 4632 /* name(I) - name of interface to sync to */ 4633 /* */ 4634 /* This function is used to resync the mapping of interface names and their */ 4635 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4636 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4637 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4638 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4639 /* there is no longer any interface associated with it. */ 4640 /* ------------------------------------------------------------------------ */ 4641 void fr_natifpsync(action, ifp, name, ifs) 4642 int action; 4643 void *ifp; 4644 char *name; 4645 ipf_stack_t *ifs; 4646 { 4647 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4648 int s; 4649 #endif 4650 nat_t *nat; 4651 ipnat_t *n; 4652 4653 if (ifs->ifs_fr_running <= 0) 4654 return; 4655 4656 SPL_NET(s); 4657 WRITE_ENTER(&ifs->ifs_ipf_nat); 4658 4659 if (ifs->ifs_fr_running <= 0) { 4660 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4661 return; 4662 } 4663 4664 switch (action) 4665 { 4666 case IPFSYNC_RESYNC : 4667 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4668 if ((ifp == nat->nat_ifps[0]) || 4669 (nat->nat_ifps[0] == (void *)-1)) { 4670 nat->nat_ifps[0] = 4671 fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 4672 } 4673 4674 if ((ifp == nat->nat_ifps[1]) || 4675 (nat->nat_ifps[1] == (void *)-1)) { 4676 nat->nat_ifps[1] = 4677 fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 4678 } 4679 } 4680 4681 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4682 if (n->in_ifps[0] == ifp || 4683 n->in_ifps[0] == (void *)-1) { 4684 n->in_ifps[0] = 4685 fr_resolvenic(n->in_ifnames[0], 4, ifs); 4686 } 4687 if (n->in_ifps[1] == ifp || 4688 n->in_ifps[1] == (void *)-1) { 4689 n->in_ifps[1] = 4690 fr_resolvenic(n->in_ifnames[1], 4, ifs); 4691 } 4692 } 4693 break; 4694 case IPFSYNC_NEWIFP : 4695 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4696 if (!strncmp(name, nat->nat_ifnames[0], 4697 sizeof(nat->nat_ifnames[0]))) 4698 nat->nat_ifps[0] = ifp; 4699 if (!strncmp(name, nat->nat_ifnames[1], 4700 sizeof(nat->nat_ifnames[1]))) 4701 nat->nat_ifps[1] = ifp; 4702 } 4703 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4704 if (!strncmp(name, n->in_ifnames[0], 4705 sizeof(n->in_ifnames[0]))) 4706 n->in_ifps[0] = ifp; 4707 if (!strncmp(name, n->in_ifnames[1], 4708 sizeof(n->in_ifnames[1]))) 4709 n->in_ifps[1] = ifp; 4710 } 4711 break; 4712 case IPFSYNC_OLDIFP : 4713 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4714 if (ifp == nat->nat_ifps[0]) 4715 nat->nat_ifps[0] = (void *)-1; 4716 if (ifp == nat->nat_ifps[1]) 4717 nat->nat_ifps[1] = (void *)-1; 4718 } 4719 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4720 if (n->in_ifps[0] == ifp) 4721 n->in_ifps[0] = (void *)-1; 4722 if (n->in_ifps[1] == ifp) 4723 n->in_ifps[1] = (void *)-1; 4724 } 4725 break; 4726 } 4727 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4728 SPL_X(s); 4729 } 4730 4731 4732 /* ------------------------------------------------------------------------ */ 4733 /* Function: nat_icmpquerytype4 */ 4734 /* Returns: int - 1 == success, 0 == failure */ 4735 /* Parameters: icmptype(I) - ICMP type number */ 4736 /* */ 4737 /* Tests to see if the ICMP type number passed is a query/response type or */ 4738 /* not. */ 4739 /* ------------------------------------------------------------------------ */ 4740 static INLINE int nat_icmpquerytype4(icmptype) 4741 int icmptype; 4742 { 4743 4744 /* 4745 * For the ICMP query NAT code, it is essential that both the query 4746 * and the reply match on the NAT rule. Because the NAT structure 4747 * does not keep track of the icmptype, and a single NAT structure 4748 * is used for all icmp types with the same src, dest and id, we 4749 * simply define the replies as queries as well. The funny thing is, 4750 * altough it seems silly to call a reply a query, this is exactly 4751 * as it is defined in the IPv4 specification 4752 */ 4753 4754 switch (icmptype) 4755 { 4756 4757 case ICMP_ECHOREPLY: 4758 case ICMP_ECHO: 4759 /* route aedvertisement/solliciation is currently unsupported: */ 4760 /* it would require rewriting the ICMP data section */ 4761 case ICMP_TSTAMP: 4762 case ICMP_TSTAMPREPLY: 4763 case ICMP_IREQ: 4764 case ICMP_IREQREPLY: 4765 case ICMP_MASKREQ: 4766 case ICMP_MASKREPLY: 4767 return 1; 4768 default: 4769 return 0; 4770 } 4771 } 4772 4773 4774 /* ------------------------------------------------------------------------ */ 4775 /* Function: nat_log */ 4776 /* Returns: Nil */ 4777 /* Parameters: nat(I) - pointer to NAT structure */ 4778 /* type(I) - type of log entry to create */ 4779 /* */ 4780 /* Creates a NAT log entry. */ 4781 /* ------------------------------------------------------------------------ */ 4782 void nat_log(nat, type, ifs) 4783 struct nat *nat; 4784 u_int type; 4785 ipf_stack_t *ifs; 4786 { 4787 #ifdef IPFILTER_LOG 4788 # ifndef LARGE_NAT 4789 struct ipnat *np; 4790 int rulen; 4791 # endif 4792 struct natlog natl; 4793 void *items[1]; 4794 size_t sizes[1]; 4795 int types[1]; 4796 4797 natl.nl_inip = nat->nat_inip; 4798 natl.nl_outip = nat->nat_outip; 4799 natl.nl_origip = nat->nat_oip; 4800 natl.nl_bytes[0] = nat->nat_bytes[0]; 4801 natl.nl_bytes[1] = nat->nat_bytes[1]; 4802 natl.nl_pkts[0] = nat->nat_pkts[0]; 4803 natl.nl_pkts[1] = nat->nat_pkts[1]; 4804 natl.nl_origport = nat->nat_oport; 4805 natl.nl_inport = nat->nat_inport; 4806 natl.nl_outport = nat->nat_outport; 4807 natl.nl_p = nat->nat_p; 4808 natl.nl_type = type; 4809 natl.nl_rule = -1; 4810 # ifndef LARGE_NAT 4811 if (nat->nat_ptr != NULL) { 4812 for (rulen = 0, np = ifs->ifs_nat_list; np; 4813 np = np->in_next, rulen++) 4814 if (np == nat->nat_ptr) { 4815 natl.nl_rule = rulen; 4816 break; 4817 } 4818 } 4819 # endif 4820 items[0] = &natl; 4821 sizes[0] = sizeof(natl); 4822 types[0] = 0; 4823 4824 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 4825 #endif 4826 } 4827 4828 4829 #if defined(__OpenBSD__) 4830 /* ------------------------------------------------------------------------ */ 4831 /* Function: nat_ifdetach */ 4832 /* Returns: Nil */ 4833 /* Parameters: ifp(I) - pointer to network interface */ 4834 /* */ 4835 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 4836 /* interface references within IPFilter. */ 4837 /* ------------------------------------------------------------------------ */ 4838 void nat_ifdetach(ifp, ifs) 4839 void *ifp; 4840 ipf_stack_t *ifs; 4841 { 4842 frsync(ifp, ifs); 4843 return; 4844 } 4845 #endif 4846 4847 4848 /* ------------------------------------------------------------------------ */ 4849 /* Function: fr_ipnatderef */ 4850 /* Returns: Nil */ 4851 /* Parameters: inp(I) - pointer to pointer to NAT rule */ 4852 /* Write Locks: ipf_nat */ 4853 /* */ 4854 /* ------------------------------------------------------------------------ */ 4855 void fr_ipnatderef(inp, ifs) 4856 ipnat_t **inp; 4857 ipf_stack_t *ifs; 4858 { 4859 ipnat_t *in; 4860 4861 in = *inp; 4862 *inp = NULL; 4863 in->in_use--; 4864 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 4865 if (in->in_apr) 4866 appr_free(in->in_apr); 4867 KFREE(in); 4868 ifs->ifs_nat_stats.ns_rules--; 4869 #ifdef notdef 4870 #if SOLARIS 4871 if (ifs->ifs_nat_stats.ns_rules == 0) 4872 ifs->ifs_pfil_delayed_copy = 1; 4873 #endif 4874 #endif 4875 } 4876 } 4877 4878 4879 /* ------------------------------------------------------------------------ */ 4880 /* Function: fr_natderef */ 4881 /* Returns: Nil */ 4882 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 4883 /* */ 4884 /* Decrement the reference counter for this NAT table entry and free it if */ 4885 /* there are no more things using it. */ 4886 /* */ 4887 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 4888 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 4889 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 4890 /* because nat_delete() will do that and send nat_ref to -1. */ 4891 /* */ 4892 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 4893 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 4894 /* ------------------------------------------------------------------------ */ 4895 void fr_natderef(natp, ifs) 4896 nat_t **natp; 4897 ipf_stack_t *ifs; 4898 { 4899 nat_t *nat; 4900 4901 nat = *natp; 4902 *natp = NULL; 4903 4904 MUTEX_ENTER(&nat->nat_lock); 4905 if (nat->nat_ref > 1) { 4906 nat->nat_ref--; 4907 MUTEX_EXIT(&nat->nat_lock); 4908 return; 4909 } 4910 MUTEX_EXIT(&nat->nat_lock); 4911 4912 WRITE_ENTER(&ifs->ifs_ipf_nat); 4913 nat_delete(nat, NL_EXPIRE, ifs); 4914 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4915 } 4916 4917 4918 /* ------------------------------------------------------------------------ */ 4919 /* Function: fr_natclone */ 4920 /* Returns: ipstate_t* - NULL == cloning failed, */ 4921 /* else pointer to new state structure */ 4922 /* Parameters: fin(I) - pointer to packet information */ 4923 /* is(I) - pointer to master state structure */ 4924 /* Write Lock: ipf_nat */ 4925 /* */ 4926 /* Create a "duplcate" state table entry from the master. */ 4927 /* ------------------------------------------------------------------------ */ 4928 static nat_t *fr_natclone(fin, nat) 4929 fr_info_t *fin; 4930 nat_t *nat; 4931 { 4932 frentry_t *fr; 4933 nat_t *clone; 4934 ipnat_t *np; 4935 ipf_stack_t *ifs = fin->fin_ifs; 4936 4937 KMALLOC(clone, nat_t *); 4938 if (clone == NULL) 4939 return NULL; 4940 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 4941 4942 MUTEX_NUKE(&clone->nat_lock); 4943 4944 clone->nat_aps = NULL; 4945 /* 4946 * Initialize all these so that nat_delete() doesn't cause a crash. 4947 */ 4948 clone->nat_tqe.tqe_pnext = NULL; 4949 clone->nat_tqe.tqe_next = NULL; 4950 clone->nat_tqe.tqe_ifq = NULL; 4951 clone->nat_tqe.tqe_parent = clone; 4952 4953 clone->nat_flags &= ~SI_CLONE; 4954 clone->nat_flags |= SI_CLONED; 4955 4956 if (clone->nat_hm) 4957 clone->nat_hm->hm_ref++; 4958 4959 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 4960 KFREE(clone); 4961 return NULL; 4962 } 4963 np = clone->nat_ptr; 4964 if (np != NULL) { 4965 if (ifs->ifs_nat_logging) 4966 nat_log(clone, (u_int)np->in_redir, ifs); 4967 np->in_use++; 4968 } 4969 fr = clone->nat_fr; 4970 if (fr != NULL) { 4971 MUTEX_ENTER(&fr->fr_lock); 4972 fr->fr_ref++; 4973 MUTEX_EXIT(&fr->fr_lock); 4974 } 4975 4976 /* 4977 * Because the clone is created outside the normal loop of things and 4978 * TCP has special needs in terms of state, initialise the timeout 4979 * state of the new NAT from here. 4980 */ 4981 if (clone->nat_p == IPPROTO_TCP) { 4982 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 4983 clone->nat_flags); 4984 } 4985 #ifdef IPFILTER_SYNC 4986 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 4987 #endif 4988 if (ifs->ifs_nat_logging) 4989 nat_log(clone, NL_CLONE, ifs); 4990 return clone; 4991 } 4992 4993 4994 /* ------------------------------------------------------------------------ */ 4995 /* Function: nat_wildok */ 4996 /* Returns: int - 1 == packet's ports match wildcards */ 4997 /* 0 == packet's ports don't match wildcards */ 4998 /* Parameters: nat(I) - NAT entry */ 4999 /* sport(I) - source port */ 5000 /* dport(I) - destination port */ 5001 /* flags(I) - wildcard flags */ 5002 /* dir(I) - packet direction */ 5003 /* */ 5004 /* Use NAT entry and packet direction to determine which combination of */ 5005 /* wildcard flags should be used. */ 5006 /* ------------------------------------------------------------------------ */ 5007 static INLINE int nat_wildok(nat, sport, dport, flags, dir) 5008 nat_t *nat; 5009 int sport; 5010 int dport; 5011 int flags; 5012 int dir; 5013 { 5014 /* 5015 * When called by dir is set to 5016 * nat_inlookup NAT_INBOUND (0) 5017 * nat_outlookup NAT_OUTBOUND (1) 5018 * 5019 * We simply combine the packet's direction in dir with the original 5020 * "intended" direction of that NAT entry in nat->nat_dir to decide 5021 * which combination of wildcard flags to allow. 5022 */ 5023 5024 switch ((dir << 1) | nat->nat_dir) 5025 { 5026 case 3: /* outbound packet / outbound entry */ 5027 if (((nat->nat_inport == sport) || 5028 (flags & SI_W_SPORT)) && 5029 ((nat->nat_oport == dport) || 5030 (flags & SI_W_DPORT))) 5031 return 1; 5032 break; 5033 case 2: /* outbound packet / inbound entry */ 5034 if (((nat->nat_outport == sport) || 5035 (flags & SI_W_DPORT)) && 5036 ((nat->nat_oport == dport) || 5037 (flags & SI_W_SPORT))) 5038 return 1; 5039 break; 5040 case 1: /* inbound packet / outbound entry */ 5041 if (((nat->nat_oport == sport) || 5042 (flags & SI_W_DPORT)) && 5043 ((nat->nat_outport == dport) || 5044 (flags & SI_W_SPORT))) 5045 return 1; 5046 break; 5047 case 0: /* inbound packet / inbound entry */ 5048 if (((nat->nat_oport == sport) || 5049 (flags & SI_W_SPORT)) && 5050 ((nat->nat_outport == dport) || 5051 (flags & SI_W_DPORT))) 5052 return 1; 5053 break; 5054 default: 5055 break; 5056 } 5057 5058 return(0); 5059 } 5060 5061 5062 /* ------------------------------------------------------------------------ */ 5063 /* Function: nat_mssclamp */ 5064 /* Returns: Nil */ 5065 /* Parameters: tcp(I) - pointer to TCP header */ 5066 /* maxmss(I) - value to clamp the TCP MSS to */ 5067 /* csump(I) - pointer to TCP checksum */ 5068 /* */ 5069 /* Check for MSS option and clamp it if necessary. If found and changed, */ 5070 /* then the TCP header checksum will be updated to reflect the change in */ 5071 /* the MSS. */ 5072 /* ------------------------------------------------------------------------ */ 5073 static void nat_mssclamp(tcp, maxmss, csump) 5074 tcphdr_t *tcp; 5075 u_32_t maxmss; 5076 u_short *csump; 5077 { 5078 u_char *cp, *ep, opt; 5079 int hlen, advance; 5080 u_32_t mss, sumd; 5081 5082 hlen = TCP_OFF(tcp) << 2; 5083 if (hlen > sizeof(*tcp)) { 5084 cp = (u_char *)tcp + sizeof(*tcp); 5085 ep = (u_char *)tcp + hlen; 5086 5087 while (cp < ep) { 5088 opt = cp[0]; 5089 if (opt == TCPOPT_EOL) 5090 break; 5091 else if (opt == TCPOPT_NOP) { 5092 cp++; 5093 continue; 5094 } 5095 5096 if (cp + 1 >= ep) 5097 break; 5098 advance = cp[1]; 5099 if ((cp + advance > ep) || (advance <= 0)) 5100 break; 5101 switch (opt) 5102 { 5103 case TCPOPT_MAXSEG: 5104 if (advance != 4) 5105 break; 5106 mss = cp[2] * 256 + cp[3]; 5107 if (mss > maxmss) { 5108 cp[2] = maxmss / 256; 5109 cp[3] = maxmss & 0xff; 5110 CALC_SUMD(mss, maxmss, sumd); 5111 fix_outcksum(csump, sumd); 5112 } 5113 break; 5114 default: 5115 /* ignore unknown options */ 5116 break; 5117 } 5118 5119 cp += advance; 5120 } 5121 } 5122 } 5123 5124 5125 /* ------------------------------------------------------------------------ */ 5126 /* Function: fr_setnatqueue */ 5127 /* Returns: Nil */ 5128 /* Parameters: nat(I)- pointer to NAT structure */ 5129 /* rev(I) - forward(0) or reverse(1) direction */ 5130 /* Locks: ipf_nat (read or write) */ 5131 /* */ 5132 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5133 /* determining which queue it should be placed on. */ 5134 /* ------------------------------------------------------------------------ */ 5135 void fr_setnatqueue(nat, rev, ifs) 5136 nat_t *nat; 5137 int rev; 5138 ipf_stack_t *ifs; 5139 { 5140 ipftq_t *oifq, *nifq; 5141 5142 if (nat->nat_ptr != NULL) 5143 nifq = nat->nat_ptr->in_tqehead[rev]; 5144 else 5145 nifq = NULL; 5146 5147 if (nifq == NULL) { 5148 switch (nat->nat_p) 5149 { 5150 case IPPROTO_UDP : 5151 nifq = &ifs->ifs_nat_udptq; 5152 break; 5153 case IPPROTO_ICMP : 5154 nifq = &ifs->ifs_nat_icmptq; 5155 break; 5156 case IPPROTO_TCP : 5157 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5158 break; 5159 default : 5160 nifq = &ifs->ifs_nat_iptq; 5161 break; 5162 } 5163 } 5164 5165 oifq = nat->nat_tqe.tqe_ifq; 5166 /* 5167 * If it's currently on a timeout queue, move it from one queue to 5168 * another, else put it on the end of the newly determined queue. 5169 */ 5170 if (oifq != NULL) 5171 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5172 else 5173 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5174 return; 5175 } 5176 5177 /* ------------------------------------------------------------------------ */ 5178 /* Function: nat_getnext */ 5179 /* Returns: int - 0 == ok, else error */ 5180 /* Parameters: t(I) - pointer to ipftoken structure */ 5181 /* itp(I) - pointer to ipfgeniter_t structure */ 5182 /* ifs - ipf stack instance */ 5183 /* */ 5184 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5185 /* and copy it out to the storage space pointed to by itp. The next item */ 5186 /* in the list to look at is put back in the ipftoken struture. */ 5187 /* ------------------------------------------------------------------------ */ 5188 static int nat_getnext(t, itp, ifs) 5189 ipftoken_t *t; 5190 ipfgeniter_t *itp; 5191 ipf_stack_t *ifs; 5192 { 5193 hostmap_t *hm, *nexthm = NULL, zerohm; 5194 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5195 nat_t *nat, *nextnat = NULL, zeronat; 5196 int error = 0, count; 5197 char *dst; 5198 5199 if (itp->igi_nitems == 0) 5200 return EINVAL; 5201 5202 READ_ENTER(&ifs->ifs_ipf_nat); 5203 5204 /* 5205 * Get "previous" entry from the token and find the next entry. 5206 */ 5207 switch (itp->igi_type) 5208 { 5209 case IPFGENITER_HOSTMAP : 5210 hm = t->ipt_data; 5211 if (hm == NULL) { 5212 nexthm = ifs->ifs_ipf_hm_maplist; 5213 } else { 5214 nexthm = hm->hm_next; 5215 } 5216 break; 5217 5218 case IPFGENITER_IPNAT : 5219 ipn = t->ipt_data; 5220 if (ipn == NULL) { 5221 nextipnat = ifs->ifs_nat_list; 5222 } else { 5223 nextipnat = ipn->in_next; 5224 } 5225 break; 5226 5227 case IPFGENITER_NAT : 5228 nat = t->ipt_data; 5229 if (nat == NULL) { 5230 nextnat = ifs->ifs_nat_instances; 5231 } else { 5232 nextnat = nat->nat_next; 5233 } 5234 break; 5235 default : 5236 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5237 return EINVAL; 5238 } 5239 5240 dst = itp->igi_data; 5241 for (count = itp->igi_nitems; count > 0; count--) { 5242 /* 5243 * If we found an entry, add a reference to it and update the token. 5244 * Otherwise, zero out data to be returned and NULL out token. 5245 */ 5246 switch (itp->igi_type) 5247 { 5248 case IPFGENITER_HOSTMAP : 5249 if (nexthm != NULL) { 5250 ATOMIC_INC32(nexthm->hm_ref); 5251 t->ipt_data = nexthm; 5252 } else { 5253 bzero(&zerohm, sizeof(zerohm)); 5254 nexthm = &zerohm; 5255 t->ipt_data = NULL; 5256 } 5257 break; 5258 case IPFGENITER_IPNAT : 5259 if (nextipnat != NULL) { 5260 ATOMIC_INC32(nextipnat->in_use); 5261 t->ipt_data = nextipnat; 5262 } else { 5263 bzero(&zeroipn, sizeof(zeroipn)); 5264 nextipnat = &zeroipn; 5265 t->ipt_data = NULL; 5266 } 5267 break; 5268 case IPFGENITER_NAT : 5269 if (nextnat != NULL) { 5270 MUTEX_ENTER(&nextnat->nat_lock); 5271 nextnat->nat_ref++; 5272 MUTEX_EXIT(&nextnat->nat_lock); 5273 t->ipt_data = nextnat; 5274 } else { 5275 bzero(&zeronat, sizeof(zeronat)); 5276 nextnat = &zeronat; 5277 t->ipt_data = NULL; 5278 } 5279 break; 5280 default : 5281 break; 5282 } 5283 5284 /* 5285 * Now that we have ref, it's save to give up lock. 5286 */ 5287 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5288 5289 /* 5290 * Copy out data and clean up references and token as needed. 5291 */ 5292 switch (itp->igi_type) 5293 { 5294 case IPFGENITER_HOSTMAP : 5295 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5296 if (error != 0) 5297 error = EFAULT; 5298 if (t->ipt_data == NULL) { 5299 ipf_freetoken(t, ifs); 5300 break; 5301 } else { 5302 if (hm != NULL) { 5303 WRITE_ENTER(&ifs->ifs_ipf_nat); 5304 fr_hostmapdel(&hm); 5305 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5306 } 5307 if (nexthm->hm_next == NULL) { 5308 ipf_freetoken(t, ifs); 5309 break; 5310 } 5311 dst += sizeof(*nexthm); 5312 hm = nexthm; 5313 nexthm = nexthm->hm_next; 5314 } 5315 break; 5316 5317 case IPFGENITER_IPNAT : 5318 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5319 if (error != 0) 5320 error = EFAULT; 5321 if (t->ipt_data == NULL) { 5322 ipf_freetoken(t, ifs); 5323 break; 5324 } else { 5325 if (ipn != NULL) { 5326 WRITE_ENTER(&ifs->ifs_ipf_nat); 5327 fr_ipnatderef(&ipn, ifs); 5328 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5329 } 5330 if (nextipnat->in_next == NULL) { 5331 ipf_freetoken(t, ifs); 5332 break; 5333 } 5334 dst += sizeof(*nextipnat); 5335 ipn = nextipnat; 5336 nextipnat = nextipnat->in_next; 5337 } 5338 break; 5339 5340 case IPFGENITER_NAT : 5341 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5342 if (error != 0) 5343 error = EFAULT; 5344 if (t->ipt_data == NULL) { 5345 ipf_freetoken(t, ifs); 5346 break; 5347 } else { 5348 if (nat != NULL) 5349 fr_natderef(&nat, ifs); 5350 if (nextnat->nat_next == NULL) { 5351 ipf_freetoken(t, ifs); 5352 break; 5353 } 5354 dst += sizeof(*nextnat); 5355 nat = nextnat; 5356 nextnat = nextnat->nat_next; 5357 } 5358 break; 5359 default : 5360 break; 5361 } 5362 5363 if ((count == 1) || (error != 0)) 5364 break; 5365 5366 READ_ENTER(&ifs->ifs_ipf_nat); 5367 } 5368 5369 return error; 5370 } 5371 5372 5373 /* ------------------------------------------------------------------------ */ 5374 /* Function: nat_iterator */ 5375 /* Returns: int - 0 == ok, else error */ 5376 /* Parameters: token(I) - pointer to ipftoken structure */ 5377 /* itp(I) - pointer to ipfgeniter_t structure */ 5378 /* */ 5379 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5380 /* generic structure to iterate through a list. There are three different */ 5381 /* linked lists of NAT related information to go through: NAT rules, active */ 5382 /* NAT mappings and the NAT fragment cache. */ 5383 /* ------------------------------------------------------------------------ */ 5384 static int nat_iterator(token, itp, ifs) 5385 ipftoken_t *token; 5386 ipfgeniter_t *itp; 5387 ipf_stack_t *ifs; 5388 { 5389 int error; 5390 5391 if (itp->igi_data == NULL) 5392 return EFAULT; 5393 5394 token->ipt_subtype = itp->igi_type; 5395 5396 switch (itp->igi_type) 5397 { 5398 case IPFGENITER_HOSTMAP : 5399 case IPFGENITER_IPNAT : 5400 case IPFGENITER_NAT : 5401 error = nat_getnext(token, itp, ifs); 5402 break; 5403 case IPFGENITER_NATFRAG : 5404 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5405 &ifs->ifs_ipfr_nattail, 5406 &ifs->ifs_ipf_natfrag, ifs); 5407 break; 5408 default : 5409 error = EINVAL; 5410 break; 5411 } 5412 5413 return error; 5414 } 5415 5416 5417 /* -------------------------------------------------------------------- */ 5418 /* Function: nat_earlydrop */ 5419 /* Returns: number of dropped/removed entries from the queue */ 5420 /* Parameters: ifq - pointer to queue with entries to be processed */ 5421 /* maxidle - entry must be idle this long to be dropped */ 5422 /* ifs - ipf stack instance */ 5423 /* */ 5424 /* Function is invoked from nat_extraflush() only. Removes entries */ 5425 /* form specified timeout queue, based on how long they've sat idle, */ 5426 /* without waiting for it to happen on its own. */ 5427 /* -------------------------------------------------------------------- */ 5428 static int nat_earlydrop(ifq, maxidle, ifs) 5429 ipftq_t *ifq; 5430 int maxidle; 5431 ipf_stack_t *ifs; 5432 { 5433 ipftqent_t *tqe, *tqn; 5434 nat_t *nat; 5435 unsigned int dropped; 5436 int droptick; 5437 5438 if (ifq == NULL) 5439 return (0); 5440 5441 dropped = 0; 5442 5443 /* 5444 * Determine the tick representing the idle time we're interested 5445 * in. If an entry exists in the queue, and it was touched before 5446 * that tick, then it's been idle longer than maxidle ... remove it. 5447 */ 5448 droptick = ifs->ifs_fr_ticks - maxidle; 5449 tqn = ifq->ifq_head; 5450 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5451 tqn = tqe->tqe_next; 5452 nat = tqe->tqe_parent; 5453 nat_delete(nat, ISL_EXPIRE, ifs); 5454 dropped++; 5455 } 5456 return (dropped); 5457 } 5458 5459 5460 /* --------------------------------------------------------------------- */ 5461 /* Function: nat_flushclosing */ 5462 /* Returns: int - number of NAT entries deleted */ 5463 /* Parameters: stateval(I) - State at which to start removing entries */ 5464 /* ifs - ipf stack instance */ 5465 /* */ 5466 /* Remove nat table entries for TCP connections which are in the process */ 5467 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5468 /* --------------------------------------------------------------------- */ 5469 static int nat_flushclosing(stateval, ifs) 5470 int stateval; 5471 ipf_stack_t *ifs; 5472 { 5473 ipftq_t *ifq, *ifqn; 5474 ipftqent_t *tqe, *tqn; 5475 nat_t *nat; 5476 int dropped; 5477 5478 dropped = 0; 5479 5480 /* 5481 * Start by deleting any entries in specific timeout queues. 5482 */ 5483 ifqn = &ifs->ifs_nat_tqb[stateval]; 5484 while ((ifq = ifqn) != NULL) { 5485 ifqn = ifq->ifq_next; 5486 dropped += nat_earlydrop(ifq, (int)0, ifs); 5487 } 5488 5489 /* 5490 * Next, look through user defined queues for closing entries. 5491 */ 5492 ifqn = ifs->ifs_nat_utqe; 5493 while ((ifq = ifqn) != NULL) { 5494 ifqn = ifq->ifq_next; 5495 tqn = ifq->ifq_head; 5496 while ((tqe = tqn) != NULL) { 5497 tqn = tqe->tqe_next; 5498 nat = tqe->tqe_parent; 5499 if (nat->nat_p != IPPROTO_TCP) 5500 continue; 5501 if ((nat->nat_tcpstate[0] >= stateval) && 5502 (nat->nat_tcpstate[1] >= stateval)) { 5503 nat_delete(nat, NL_EXPIRE, ifs); 5504 dropped++; 5505 } 5506 } 5507 } 5508 return (dropped); 5509 } 5510 5511 5512 /* --------------------------------------------------------------------- */ 5513 /* Function: nat_extraflush */ 5514 /* Returns: int - number of NAT entries deleted */ 5515 /* Parameters: which(I) - how to flush the active NAT table */ 5516 /* ifs - ipf stack instance */ 5517 /* Write Locks: ipf_nat */ 5518 /* */ 5519 /* Flush nat tables. Three actions currently defined: */ 5520 /* */ 5521 /* which == 0 : Flush all nat table entries. */ 5522 /* */ 5523 /* which == 1 : Flush entries with TCP connections which have started */ 5524 /* to close on both ends. */ 5525 /* */ 5526 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5527 /* does not take us below specified threshold in the table, */ 5528 /* we want to flush entries with TCP connections which have */ 5529 /* been idle for a long time. Start with connections idle */ 5530 /* over 12 hours, and then work backwards in half hour */ 5531 /* increments to at most 30 minutes idle, and finally work */ 5532 /* back in 30 second increments to at most 30 seconds. */ 5533 /* --------------------------------------------------------------------- */ 5534 static int nat_extraflush(which, ifs) 5535 int which; 5536 ipf_stack_t *ifs; 5537 { 5538 ipftq_t *ifq, *ifqn; 5539 nat_t *nat, **natp; 5540 int idletime, removed, idle_idx; 5541 SPL_INT(s); 5542 5543 removed = 0; 5544 5545 SPL_NET(s); 5546 switch (which) 5547 { 5548 case 0: 5549 natp = &ifs->ifs_nat_instances; 5550 while ((nat = *natp) != NULL) { 5551 natp = &nat->nat_next; 5552 nat_delete(nat, ISL_FLUSH, ifs); 5553 removed++; 5554 } 5555 break; 5556 5557 case 1: 5558 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5559 break; 5560 5561 case 2: 5562 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5563 5564 /* 5565 * Be sure we haven't done this in the last 10 seconds. 5566 */ 5567 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5568 IPF_TTLVAL(10)) 5569 break; 5570 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5571 5572 /* 5573 * Determine initial threshold for minimum idle time based on 5574 * how long ipfilter has been running. Ipfilter needs to have 5575 * been up as long as the smallest interval to continue on. 5576 * 5577 * Minimum idle times stored in idletime_tab and indexed by 5578 * idle_idx. Start at upper end of array and work backwards. 5579 * 5580 * Once the index is found, set the initial idle time to the 5581 * first interval before the current ipfilter run time. 5582 */ 5583 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5584 break; /* switch */ 5585 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5586 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5587 idletime = idletime_tab[idle_idx]; 5588 } else { 5589 while ((idle_idx > 0) && 5590 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5591 idle_idx--; 5592 idletime = (ifs->ifs_fr_ticks / 5593 idletime_tab[idle_idx]) * 5594 idletime_tab[idle_idx]; 5595 } 5596 5597 while ((idle_idx >= 0) && 5598 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5599 /* 5600 * Start with appropriate timeout queue. 5601 */ 5602 removed += nat_earlydrop( 5603 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5604 idletime, ifs); 5605 5606 /* 5607 * Make sure we haven't already deleted enough 5608 * entries before checking the user defined queues. 5609 */ 5610 if (NAT_TAB_WATER_LEVEL(ifs) <= 5611 ifs->ifs_nat_flush_lvl_lo) 5612 break; 5613 5614 /* 5615 * Next, look through the user defined queues. 5616 */ 5617 ifqn = ifs->ifs_nat_utqe; 5618 while ((ifq = ifqn) != NULL) { 5619 ifqn = ifq->ifq_next; 5620 removed += nat_earlydrop(ifq, idletime, ifs); 5621 } 5622 5623 /* 5624 * Adjust the granularity of idle time. 5625 * 5626 * If we reach an interval boundary, we need to 5627 * either adjust the idle time accordingly or exit 5628 * the loop altogether (if this is very last check). 5629 */ 5630 idletime -= idletime_tab[idle_idx]; 5631 if (idletime < idletime_tab[idle_idx]) { 5632 if (idle_idx != 0) { 5633 idletime = idletime_tab[idle_idx] - 5634 idletime_tab[idle_idx - 1]; 5635 idle_idx--; 5636 } else { 5637 break; /* while */ 5638 } 5639 } 5640 } 5641 break; 5642 default: 5643 break; 5644 } 5645 5646 SPL_X(s); 5647 return (removed); 5648 } 5649