1 /* 2 * Copyright (C) 1995-2004 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/time.h> 20 #include <sys/file.h> 21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 22 defined(_KERNEL) 23 # include "opt_ipfilter_log.h" 24 #endif 25 #if !defined(_KERNEL) 26 # include <stdio.h> 27 # include <string.h> 28 # include <stdlib.h> 29 # define _KERNEL 30 # ifdef __OpenBSD__ 31 struct file; 32 # endif 33 # include <sys/uio.h> 34 # undef _KERNEL 35 #endif 36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 37 # include <sys/filio.h> 38 # include <sys/fcntl.h> 39 #else 40 # include <sys/ioctl.h> 41 #endif 42 #if !defined(AIX) 43 # include <sys/fcntl.h> 44 #endif 45 #if !defined(linux) 46 # include <sys/protosw.h> 47 #endif 48 #include <sys/socket.h> 49 #if defined(_KERNEL) 50 # include <sys/systm.h> 51 # if !defined(__SVR4) && !defined(__svr4__) 52 # include <sys/mbuf.h> 53 # endif 54 #endif 55 #if defined(__SVR4) || defined(__svr4__) 56 # include <sys/filio.h> 57 # include <sys/byteorder.h> 58 # ifdef _KERNEL 59 # include <sys/dditypes.h> 60 # endif 61 # include <sys/stream.h> 62 # include <sys/kmem.h> 63 #endif 64 #if __FreeBSD_version >= 300000 65 # include <sys/queue.h> 66 #endif 67 #include <net/if.h> 68 #if __FreeBSD_version >= 300000 69 # include <net/if_var.h> 70 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 71 # include "opt_ipfilter.h" 72 # endif 73 #endif 74 #ifdef sun 75 # include <net/af.h> 76 #endif 77 #include <net/route.h> 78 #include <netinet/in.h> 79 #include <netinet/in_systm.h> 80 #include <netinet/ip.h> 81 82 #ifdef RFC1825 83 # include <vpn/md5.h> 84 # include <vpn/ipsec.h> 85 extern struct ifnet vpnif; 86 #endif 87 88 #if !defined(linux) 89 # include <netinet/ip_var.h> 90 #endif 91 #include <netinet/tcp.h> 92 #include <netinet/udp.h> 93 #include <netinet/ip_icmp.h> 94 #include "netinet/ip_compat.h" 95 #include <netinet/tcpip.h> 96 #include "netinet/ip_fil.h" 97 #include "netinet/ip_nat.h" 98 #include "netinet/ip_frag.h" 99 #include "netinet/ip_state.h" 100 #include "netinet/ip_proxy.h" 101 #include "netinet/ipf_stack.h" 102 #ifdef IPFILTER_SYNC 103 #include "netinet/ip_sync.h" 104 #endif 105 #if (__FreeBSD_version >= 300000) 106 # include <sys/malloc.h> 107 #endif 108 /* END OF INCLUDES */ 109 110 #undef SOCKADDR_IN 111 #define SOCKADDR_IN struct sockaddr_in 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 116 #endif 117 118 119 /* ======================================================================== */ 120 /* How the NAT is organised and works. */ 121 /* */ 122 /* Inside (interface y) NAT Outside (interface x) */ 123 /* -------------------- -+- ------------------------------------- */ 124 /* Packet going | out, processsed by fr_checknatout() for x */ 125 /* ------------> | ------------> */ 126 /* src=10.1.1.1 | src=192.1.1.1 */ 127 /* | */ 128 /* | in, processed by fr_checknatin() for x */ 129 /* <------------ | <------------ */ 130 /* dst=10.1.1.1 | dst=192.1.1.1 */ 131 /* -------------------- -+- ------------------------------------- */ 132 /* fr_checknatout() - changes ip_src and if required, sport */ 133 /* - creates a new mapping, if required. */ 134 /* fr_checknatin() - changes ip_dst and if required, dport */ 135 /* */ 136 /* In the NAT table, internal source is recorded as "in" and externally */ 137 /* seen as "out". */ 138 /* ======================================================================== */ 139 140 141 static int nat_flushtable __P((ipf_stack_t *)); 142 static int nat_clearlist __P((ipf_stack_t *)); 143 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 144 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 145 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 146 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 147 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 148 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 149 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 150 static int nat_match __P((fr_info_t *, ipnat_t *)); 151 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 152 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 153 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 154 struct in_addr, struct in_addr, u_32_t, 155 ipf_stack_t *)); 156 static INLINE int nat_icmpquerytype4 __P((int)); 157 static int nat_ruleaddrinit __P((ipnat_t *)); 158 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 159 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 160 static INLINE int nat_icmperrortype4 __P((int)); 161 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 162 tcphdr_t *, nat_t **, int)); 163 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 164 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 165 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 166 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 167 static int nat_extraflush __P((int, ipf_stack_t *)); 168 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 169 static int nat_flushclosing __P((int, ipf_stack_t *)); 170 171 172 /* 173 * Below we declare a list of constants used only in the nat_extraflush() 174 * routine. We are placing it here, instead of in nat_extraflush() itself, 175 * because we want to make it visible to tools such as mdb, nm etc., so the 176 * values can easily be altered during debugging. 177 */ 178 static const int idletime_tab[] = { 179 IPF_TTLVAL(30), /* 30 seconds */ 180 IPF_TTLVAL(1800), /* 30 minutes */ 181 IPF_TTLVAL(43200), /* 12 hours */ 182 IPF_TTLVAL(345600), /* 4 days */ 183 }; 184 185 #define NAT_HAS_L4_CHANGED(n) \ 186 (((n)->nat_flags & (IPN_TCPUDPICMP)) && \ 187 (n)->nat_inport != (n)->nat_outport) 188 189 /* ------------------------------------------------------------------------ */ 190 /* Function: fr_natinit */ 191 /* Returns: int - 0 == success, -1 == failure */ 192 /* Parameters: Nil */ 193 /* */ 194 /* Initialise all of the NAT locks, tables and other structures. */ 195 /* ------------------------------------------------------------------------ */ 196 int fr_natinit(ifs) 197 ipf_stack_t *ifs; 198 { 199 int i; 200 201 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 202 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 203 if (ifs->ifs_nat_table[0] != NULL) 204 bzero((char *)ifs->ifs_nat_table[0], 205 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 206 else 207 return -1; 208 209 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 210 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 211 if (ifs->ifs_nat_table[1] != NULL) 212 bzero((char *)ifs->ifs_nat_table[1], 213 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 214 else 215 return -2; 216 217 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 218 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 219 if (ifs->ifs_nat_rules != NULL) 220 bzero((char *)ifs->ifs_nat_rules, 221 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 222 else 223 return -3; 224 225 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 226 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 227 if (ifs->ifs_rdr_rules != NULL) 228 bzero((char *)ifs->ifs_rdr_rules, 229 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 230 else 231 return -4; 232 233 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 234 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 235 if (ifs->ifs_maptable != NULL) 236 bzero((char *)ifs->ifs_maptable, 237 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 238 else 239 return -5; 240 241 ifs->ifs_ipf_hm_maplist = NULL; 242 243 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 244 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 245 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 246 return -1; 247 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 248 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 249 250 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 251 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 252 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 253 return -1; 254 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 255 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 256 257 if (ifs->ifs_fr_nat_maxbucket == 0) { 258 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 259 ifs->ifs_fr_nat_maxbucket++; 260 ifs->ifs_fr_nat_maxbucket *= 2; 261 } 262 263 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 264 /* 265 * Increase this because we may have "keep state" following this too 266 * and packet storms can occur if this is removed too quickly. 267 */ 268 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 269 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 270 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 271 ifs->ifs_nat_udptq.ifq_ref = 1; 272 ifs->ifs_nat_udptq.ifq_head = NULL; 273 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 274 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 275 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 276 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 277 ifs->ifs_nat_icmptq.ifq_ref = 1; 278 ifs->ifs_nat_icmptq.ifq_head = NULL; 279 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 280 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 281 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 282 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 283 ifs->ifs_nat_iptq.ifq_ref = 1; 284 ifs->ifs_nat_iptq.ifq_head = NULL; 285 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 286 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 287 ifs->ifs_nat_iptq.ifq_next = NULL; 288 289 for (i = 0; i < IPF_TCP_NSTATES; i++) { 290 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 291 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 292 #ifdef LARGE_NAT 293 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 294 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 295 #endif 296 } 297 298 /* 299 * Increase this because we may have "keep state" following 300 * this too and packet storms can occur if this is removed 301 * too quickly. 302 */ 303 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 304 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 305 306 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 307 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 308 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 309 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 310 311 ifs->ifs_fr_nat_init = 1; 312 313 return 0; 314 } 315 316 317 /* ------------------------------------------------------------------------ */ 318 /* Function: nat_addrdr */ 319 /* Returns: Nil */ 320 /* Parameters: n(I) - pointer to NAT rule to add */ 321 /* */ 322 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 323 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 324 /* use by redirect rules. */ 325 /* ------------------------------------------------------------------------ */ 326 static void nat_addrdr(n, ifs) 327 ipnat_t *n; 328 ipf_stack_t *ifs; 329 { 330 ipnat_t **np; 331 u_32_t j; 332 u_int hv; 333 int k; 334 335 k = count4bits(n->in_outmsk); 336 if ((k >= 0) && (k != 32)) 337 ifs->ifs_rdr_masks |= 1 << k; 338 j = (n->in_outip & n->in_outmsk); 339 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 340 np = ifs->ifs_rdr_rules + hv; 341 while (*np != NULL) 342 np = &(*np)->in_rnext; 343 n->in_rnext = NULL; 344 n->in_prnext = np; 345 n->in_hv = hv; 346 *np = n; 347 } 348 349 350 /* ------------------------------------------------------------------------ */ 351 /* Function: nat_addnat */ 352 /* Returns: Nil */ 353 /* Parameters: n(I) - pointer to NAT rule to add */ 354 /* */ 355 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 356 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 357 /* redirect rules. */ 358 /* ------------------------------------------------------------------------ */ 359 static void nat_addnat(n, ifs) 360 ipnat_t *n; 361 ipf_stack_t *ifs; 362 { 363 ipnat_t **np; 364 u_32_t j; 365 u_int hv; 366 int k; 367 368 k = count4bits(n->in_inmsk); 369 if ((k >= 0) && (k != 32)) 370 ifs->ifs_nat_masks |= 1 << k; 371 j = (n->in_inip & n->in_inmsk); 372 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 373 np = ifs->ifs_nat_rules + hv; 374 while (*np != NULL) 375 np = &(*np)->in_mnext; 376 n->in_mnext = NULL; 377 n->in_pmnext = np; 378 n->in_hv = hv; 379 *np = n; 380 } 381 382 383 /* ------------------------------------------------------------------------ */ 384 /* Function: nat_delrdr */ 385 /* Returns: Nil */ 386 /* Parameters: n(I) - pointer to NAT rule to delete */ 387 /* */ 388 /* Removes a redirect rule from the hash table of redirect rules. */ 389 /* ------------------------------------------------------------------------ */ 390 void nat_delrdr(n) 391 ipnat_t *n; 392 { 393 if (n->in_rnext) 394 n->in_rnext->in_prnext = n->in_prnext; 395 *n->in_prnext = n->in_rnext; 396 } 397 398 399 /* ------------------------------------------------------------------------ */ 400 /* Function: nat_delnat */ 401 /* Returns: Nil */ 402 /* Parameters: n(I) - pointer to NAT rule to delete */ 403 /* */ 404 /* Removes a NAT map rule from the hash table of NAT map rules. */ 405 /* ------------------------------------------------------------------------ */ 406 void nat_delnat(n) 407 ipnat_t *n; 408 { 409 if (n->in_mnext != NULL) 410 n->in_mnext->in_pmnext = n->in_pmnext; 411 *n->in_pmnext = n->in_mnext; 412 } 413 414 415 /* ------------------------------------------------------------------------ */ 416 /* Function: nat_hostmap */ 417 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 418 /* else a pointer to the hostmapping to use */ 419 /* Parameters: np(I) - pointer to NAT rule */ 420 /* real(I) - real IP address */ 421 /* map(I) - mapped IP address */ 422 /* port(I) - destination port number */ 423 /* Write Locks: ipf_nat */ 424 /* */ 425 /* Check if an ip address has already been allocated for a given mapping */ 426 /* that is not doing port based translation. If is not yet allocated, then */ 427 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 428 /* ------------------------------------------------------------------------ */ 429 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 430 ipnat_t *np; 431 struct in_addr src; 432 struct in_addr dst; 433 struct in_addr map; 434 u_32_t port; 435 ipf_stack_t *ifs; 436 { 437 hostmap_t *hm; 438 u_int hv; 439 440 hv = (src.s_addr ^ dst.s_addr); 441 hv += src.s_addr; 442 hv += dst.s_addr; 443 hv %= HOSTMAP_SIZE; 444 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 445 if ((hm->hm_srcip.s_addr == src.s_addr) && 446 (hm->hm_dstip.s_addr == dst.s_addr) && 447 ((np == NULL) || (np == hm->hm_ipnat)) && 448 ((port == 0) || (port == hm->hm_port))) { 449 hm->hm_ref++; 450 return hm; 451 } 452 453 if (np == NULL) 454 return NULL; 455 456 KMALLOC(hm, hostmap_t *); 457 if (hm) { 458 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 459 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 460 if (ifs->ifs_ipf_hm_maplist != NULL) 461 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 462 ifs->ifs_ipf_hm_maplist = hm; 463 464 hm->hm_next = ifs->ifs_maptable[hv]; 465 hm->hm_pnext = ifs->ifs_maptable + hv; 466 if (ifs->ifs_maptable[hv] != NULL) 467 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 468 ifs->ifs_maptable[hv] = hm; 469 hm->hm_ipnat = np; 470 hm->hm_srcip = src; 471 hm->hm_dstip = dst; 472 hm->hm_mapip = map; 473 hm->hm_ref = 1; 474 hm->hm_port = port; 475 hm->hm_v = 4; 476 } 477 return hm; 478 } 479 480 481 /* ------------------------------------------------------------------------ */ 482 /* Function: fr_hostmapdel */ 483 /* Returns: Nil */ 484 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 485 /* Write Locks: ipf_nat */ 486 /* */ 487 /* Decrement the references to this hostmap structure by one. If this */ 488 /* reaches zero then remove it and free it. */ 489 /* ------------------------------------------------------------------------ */ 490 void fr_hostmapdel(hmp) 491 struct hostmap **hmp; 492 { 493 struct hostmap *hm; 494 495 hm = *hmp; 496 *hmp = NULL; 497 498 hm->hm_ref--; 499 if (hm->hm_ref == 0) { 500 if (hm->hm_next) 501 hm->hm_next->hm_pnext = hm->hm_pnext; 502 *hm->hm_pnext = hm->hm_next; 503 if (hm->hm_hnext) 504 hm->hm_hnext->hm_phnext = hm->hm_phnext; 505 *hm->hm_phnext = hm->hm_hnext; 506 KFREE(hm); 507 } 508 } 509 510 511 /* ------------------------------------------------------------------------ */ 512 /* Function: fix_outcksum */ 513 /* Returns: Nil */ 514 /* Parameters: sp(I) - location of 16bit checksum to update */ 515 /* n((I) - amount to adjust checksum by */ 516 /* */ 517 /* Adjusts the 16bit checksum by "n" for packets going out. */ 518 /* ------------------------------------------------------------------------ */ 519 void fix_outcksum(sp, n) 520 u_short *sp; 521 u_32_t n; 522 { 523 u_short sumshort; 524 u_32_t sum1; 525 526 if (n == 0) 527 return; 528 529 sum1 = (~ntohs(*sp)) & 0xffff; 530 sum1 += (n); 531 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 532 /* Again */ 533 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 534 sumshort = ~(u_short)sum1; 535 *(sp) = htons(sumshort); 536 } 537 538 539 /* ------------------------------------------------------------------------ */ 540 /* Function: fix_incksum */ 541 /* Returns: Nil */ 542 /* Parameters: sp(I) - location of 16bit checksum to update */ 543 /* n((I) - amount to adjust checksum by */ 544 /* */ 545 /* Adjusts the 16bit checksum by "n" for packets going in. */ 546 /* ------------------------------------------------------------------------ */ 547 void fix_incksum(sp, n) 548 u_short *sp; 549 u_32_t n; 550 { 551 u_short sumshort; 552 u_32_t sum1; 553 554 if (n == 0) 555 return; 556 557 sum1 = (~ntohs(*sp)) & 0xffff; 558 sum1 += ~(n) & 0xffff; 559 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 560 /* Again */ 561 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 562 sumshort = ~(u_short)sum1; 563 *(sp) = htons(sumshort); 564 } 565 566 567 /* ------------------------------------------------------------------------ */ 568 /* Function: fix_datacksum */ 569 /* Returns: Nil */ 570 /* Parameters: sp(I) - location of 16bit checksum to update */ 571 /* n((I) - amount to adjust checksum by */ 572 /* */ 573 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 574 /* data section of an IP packet. */ 575 /* */ 576 /* The only situation in which you need to do this is when NAT'ing an */ 577 /* ICMP error message. Such a message, contains in its body the IP header */ 578 /* of the original IP packet, that causes the error. */ 579 /* */ 580 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 581 /* kernel the data section of the ICMP error is just data, and no special */ 582 /* processing like hardware cksum or ntohs processing have been done by the */ 583 /* kernel on the data section. */ 584 /* ------------------------------------------------------------------------ */ 585 void fix_datacksum(sp, n) 586 u_short *sp; 587 u_32_t n; 588 { 589 u_short sumshort; 590 u_32_t sum1; 591 592 if (n == 0) 593 return; 594 595 sum1 = (~ntohs(*sp)) & 0xffff; 596 sum1 += (n); 597 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 598 /* Again */ 599 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 600 sumshort = ~(u_short)sum1; 601 *(sp) = htons(sumshort); 602 } 603 604 605 /* ------------------------------------------------------------------------ */ 606 /* Function: fr_nat_ioctl */ 607 /* Returns: int - 0 == success, != 0 == failure */ 608 /* Parameters: data(I) - pointer to ioctl data */ 609 /* cmd(I) - ioctl command integer */ 610 /* mode(I) - file mode bits used with open */ 611 /* */ 612 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 613 /* ------------------------------------------------------------------------ */ 614 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 615 ioctlcmd_t cmd; 616 caddr_t data; 617 int mode, uid; 618 void *ctx; 619 ipf_stack_t *ifs; 620 { 621 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 622 int error = 0, ret, arg, getlock; 623 ipnat_t natd; 624 625 #if (BSD >= 199306) && defined(_KERNEL) 626 if ((securelevel >= 2) && (mode & FWRITE)) 627 return EPERM; 628 #endif 629 630 #if defined(__osf__) && defined(_KERNEL) 631 getlock = 0; 632 #else 633 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 634 #endif 635 636 nat = NULL; /* XXX gcc -Wuninitialized */ 637 if (cmd == (ioctlcmd_t)SIOCADNAT) { 638 KMALLOC(nt, ipnat_t *); 639 } else { 640 nt = NULL; 641 } 642 643 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 644 if (mode & NAT_SYSSPACE) { 645 bcopy(data, (char *)&natd, sizeof(natd)); 646 error = 0; 647 } else { 648 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 649 } 650 651 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 652 BCOPYIN(data, &arg, sizeof(arg)); 653 } 654 655 if (error != 0) 656 goto done; 657 658 /* 659 * For add/delete, look to see if the NAT entry is already present 660 */ 661 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 662 nat = &natd; 663 if (nat->in_v == 0) /* For backward compat. */ 664 nat->in_v = 4; 665 nat->in_flags &= IPN_USERFLAGS; 666 if ((nat->in_redir & NAT_MAPBLK) == 0) { 667 if ((nat->in_flags & IPN_SPLIT) == 0) 668 nat->in_inip &= nat->in_inmsk; 669 if ((nat->in_flags & IPN_IPRANGE) == 0) 670 nat->in_outip &= nat->in_outmsk; 671 } 672 MUTEX_ENTER(&ifs->ifs_ipf_natio); 673 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 674 np = &n->in_next) 675 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 676 IPN_CMPSIZ) == 0) { 677 if (nat->in_redir == NAT_REDIRECT && 678 nat->in_pnext != n->in_pnext) 679 continue; 680 break; 681 } 682 } 683 684 switch (cmd) 685 { 686 case SIOCGENITER : 687 { 688 ipfgeniter_t iter; 689 ipftoken_t *token; 690 691 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 692 if (error != 0) 693 break; 694 695 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 696 if (token != NULL) 697 error = nat_iterator(token, &iter, ifs); 698 else 699 error = ESRCH; 700 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 701 break; 702 } 703 #ifdef IPFILTER_LOG 704 case SIOCIPFFB : 705 { 706 int tmp; 707 708 if (!(mode & FWRITE)) 709 error = EPERM; 710 else { 711 tmp = ipflog_clear(IPL_LOGNAT, ifs); 712 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 713 } 714 break; 715 } 716 case SIOCSETLG : 717 if (!(mode & FWRITE)) 718 error = EPERM; 719 else { 720 BCOPYIN((char *)data, 721 (char *)&ifs->ifs_nat_logging, 722 sizeof(ifs->ifs_nat_logging)); 723 } 724 break; 725 case SIOCGETLG : 726 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 727 sizeof(ifs->ifs_nat_logging)); 728 break; 729 case FIONREAD : 730 arg = ifs->ifs_iplused[IPL_LOGNAT]; 731 BCOPYOUT(&arg, data, sizeof(arg)); 732 break; 733 #endif 734 case SIOCADNAT : 735 if (!(mode & FWRITE)) { 736 error = EPERM; 737 } else if (n != NULL) { 738 error = EEXIST; 739 } else if (nt == NULL) { 740 error = ENOMEM; 741 } 742 if (error != 0) { 743 MUTEX_EXIT(&ifs->ifs_ipf_natio); 744 break; 745 } 746 bcopy((char *)nat, (char *)nt, sizeof(*n)); 747 error = nat_siocaddnat(nt, np, getlock, ifs); 748 MUTEX_EXIT(&ifs->ifs_ipf_natio); 749 if (error == 0) 750 nt = NULL; 751 break; 752 case SIOCRMNAT : 753 if (!(mode & FWRITE)) { 754 error = EPERM; 755 n = NULL; 756 } else if (n == NULL) { 757 error = ESRCH; 758 } 759 760 if (error != 0) { 761 MUTEX_EXIT(&ifs->ifs_ipf_natio); 762 break; 763 } 764 nat_siocdelnat(n, np, getlock, ifs); 765 766 MUTEX_EXIT(&ifs->ifs_ipf_natio); 767 n = NULL; 768 break; 769 case SIOCGNATS : 770 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 771 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 772 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 773 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 774 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 775 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 776 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 777 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 778 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 779 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 780 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 781 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 782 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 783 break; 784 case SIOCGNATL : 785 { 786 natlookup_t nl; 787 788 if (getlock) { 789 READ_ENTER(&ifs->ifs_ipf_nat); 790 } 791 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 792 if (nl.nl_v != 6) 793 nl.nl_v = 4; 794 if (error == 0) { 795 void *ptr; 796 797 switch (nl.nl_v) 798 { 799 case 4: 800 ptr = nat_lookupredir(&nl, ifs); 801 break; 802 #ifdef USE_INET6 803 case 6: 804 ptr = nat6_lookupredir(&nl, ifs); 805 break; 806 #endif 807 default: 808 ptr = NULL; 809 break; 810 } 811 812 if (ptr != NULL) { 813 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 814 } else { 815 error = ESRCH; 816 } 817 } 818 if (getlock) { 819 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 820 } 821 break; 822 } 823 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 824 if (!(mode & FWRITE)) { 825 error = EPERM; 826 break; 827 } 828 if (getlock) { 829 WRITE_ENTER(&ifs->ifs_ipf_nat); 830 } 831 error = 0; 832 if (arg == 0) 833 ret = nat_flushtable(ifs); 834 else if (arg == 1) 835 ret = nat_clearlist(ifs); 836 else if (arg >= 2 && arg <= 4) 837 ret = nat_extraflush(arg - 2, ifs); 838 else 839 error = EINVAL; 840 if (getlock) { 841 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 842 } 843 if (error == 0) { 844 BCOPYOUT(&ret, data, sizeof(ret)); 845 } 846 break; 847 case SIOCPROXY : 848 error = appr_ioctl(data, cmd, mode, ifs); 849 break; 850 case SIOCSTLCK : 851 if (!(mode & FWRITE)) { 852 error = EPERM; 853 } else { 854 fr_lock(data, &ifs->ifs_fr_nat_lock); 855 } 856 break; 857 case SIOCSTPUT : 858 if ((mode & FWRITE) != 0) { 859 error = fr_natputent(data, getlock, ifs); 860 } else { 861 error = EACCES; 862 } 863 break; 864 case SIOCSTGSZ : 865 if (ifs->ifs_fr_nat_lock) { 866 if (getlock) { 867 READ_ENTER(&ifs->ifs_ipf_nat); 868 } 869 error = fr_natgetsz(data, ifs); 870 if (getlock) { 871 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 872 } 873 } else 874 error = EACCES; 875 break; 876 case SIOCSTGET : 877 if (ifs->ifs_fr_nat_lock) { 878 if (getlock) { 879 READ_ENTER(&ifs->ifs_ipf_nat); 880 } 881 error = fr_natgetent(data, ifs); 882 if (getlock) { 883 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 884 } 885 } else 886 error = EACCES; 887 break; 888 case SIOCIPFDELTOK : 889 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 890 error = ipf_deltoken(arg, uid, ctx, ifs); 891 break; 892 default : 893 error = EINVAL; 894 break; 895 } 896 done: 897 if (nt) 898 KFREE(nt); 899 return error; 900 } 901 902 903 /* ------------------------------------------------------------------------ */ 904 /* Function: nat_siocaddnat */ 905 /* Returns: int - 0 == success, != 0 == failure */ 906 /* Parameters: n(I) - pointer to new NAT rule */ 907 /* np(I) - pointer to where to insert new NAT rule */ 908 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 909 /* Mutex Locks: ipf_natio */ 910 /* */ 911 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 912 /* from information passed to the kernel, then add it to the appropriate */ 913 /* NAT rule table(s). */ 914 /* ------------------------------------------------------------------------ */ 915 static int nat_siocaddnat(n, np, getlock, ifs) 916 ipnat_t *n, **np; 917 int getlock; 918 ipf_stack_t *ifs; 919 { 920 int error = 0, i, j; 921 922 if (nat_resolverule(n, ifs) != 0) 923 return ENOENT; 924 925 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 926 return EINVAL; 927 928 n->in_use = 0; 929 if (n->in_redir & NAT_MAPBLK) 930 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 931 else if (n->in_flags & IPN_AUTOPORTMAP) 932 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 933 else if (n->in_flags & IPN_IPRANGE) 934 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 935 else if (n->in_flags & IPN_SPLIT) 936 n->in_space = 2; 937 else if (n->in_outmsk != 0) 938 n->in_space = ~ntohl(n->in_outmsk); 939 else 940 n->in_space = 1; 941 942 /* 943 * Calculate the number of valid IP addresses in the output 944 * mapping range. In all cases, the range is inclusive of 945 * the start and ending IP addresses. 946 * If to a CIDR address, lose 2: broadcast + network address 947 * (so subtract 1) 948 * If to a range, add one. 949 * If to a single IP address, set to 1. 950 */ 951 if (n->in_space) { 952 if ((n->in_flags & IPN_IPRANGE) != 0) 953 n->in_space += 1; 954 else 955 n->in_space -= 1; 956 } else 957 n->in_space = 1; 958 959 #ifdef USE_INET6 960 if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 && 961 !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1])) 962 IP6_ADD(&n->in_out[0], 1, &n->in_next6) 963 else if (n->in_v == 6 && 964 (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT)) 965 n->in_next6 = n->in_in[0]; 966 else if (n->in_v == 6) 967 n->in_next6 = n->in_out[0]; 968 else 969 #endif 970 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 971 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 972 n->in_nip = ntohl(n->in_outip) + 1; 973 else if ((n->in_flags & IPN_SPLIT) && 974 (n->in_redir & NAT_REDIRECT)) 975 n->in_nip = ntohl(n->in_inip); 976 else 977 n->in_nip = ntohl(n->in_outip); 978 979 if (n->in_redir & NAT_MAP) { 980 n->in_pnext = ntohs(n->in_pmin); 981 /* 982 * Multiply by the number of ports made available. 983 */ 984 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 985 n->in_space *= (ntohs(n->in_pmax) - 986 ntohs(n->in_pmin) + 1); 987 /* 988 * Because two different sources can map to 989 * different destinations but use the same 990 * local IP#/port #. 991 * If the result is smaller than in_space, then 992 * we may have wrapped around 32bits. 993 */ 994 i = n->in_inmsk; 995 if ((i != 0) && (i != 0xffffffff)) { 996 j = n->in_space * (~ntohl(i) + 1); 997 if (j >= n->in_space) 998 n->in_space = j; 999 else 1000 n->in_space = 0xffffffff; 1001 } 1002 } 1003 /* 1004 * If no protocol is specified, multiple by 256 to allow for 1005 * at least one IP:IP mapping per protocol. 1006 */ 1007 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 1008 j = n->in_space * 256; 1009 if (j >= n->in_space) 1010 n->in_space = j; 1011 else 1012 n->in_space = 0xffffffff; 1013 } 1014 } 1015 1016 /* Otherwise, these fields are preset */ 1017 1018 if (getlock) { 1019 WRITE_ENTER(&ifs->ifs_ipf_nat); 1020 } 1021 n->in_next = NULL; 1022 *np = n; 1023 1024 if (n->in_age[0] != 0) 1025 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1026 n->in_age[0], ifs); 1027 1028 if (n->in_age[1] != 0) 1029 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1030 n->in_age[1], ifs); 1031 1032 if (n->in_redir & NAT_REDIRECT) { 1033 n->in_flags &= ~IPN_NOTDST; 1034 switch (n->in_v) 1035 { 1036 case 4 : 1037 nat_addrdr(n, ifs); 1038 break; 1039 #ifdef USE_INET6 1040 case 6 : 1041 nat6_addrdr(n, ifs); 1042 break; 1043 #endif 1044 default : 1045 break; 1046 } 1047 } 1048 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1049 n->in_flags &= ~IPN_NOTSRC; 1050 switch (n->in_v) 1051 { 1052 case 4 : 1053 nat_addnat(n, ifs); 1054 break; 1055 #ifdef USE_INET6 1056 case 6 : 1057 nat6_addnat(n, ifs); 1058 break; 1059 #endif 1060 default : 1061 break; 1062 } 1063 } 1064 n = NULL; 1065 ifs->ifs_nat_stats.ns_rules++; 1066 if (getlock) { 1067 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1068 } 1069 1070 return error; 1071 } 1072 1073 1074 /* ------------------------------------------------------------------------ */ 1075 /* Function: nat_resolvrule */ 1076 /* Returns: int - 0 == success, -1 == failure */ 1077 /* Parameters: n(I) - pointer to NAT rule */ 1078 /* */ 1079 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1080 /* any specified interfaces and proxy labels, and determines whether or not */ 1081 /* all proxy labels are correctly specified. */ 1082 /* */ 1083 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1084 /* ------------------------------------------------------------------------ */ 1085 static int nat_resolverule(n, ifs) 1086 ipnat_t *n; 1087 ipf_stack_t *ifs; 1088 { 1089 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1090 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs); 1091 1092 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1093 if (n->in_ifnames[1][0] == '\0') { 1094 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1095 n->in_ifps[1] = n->in_ifps[0]; 1096 } else { 1097 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs); 1098 } 1099 1100 if (n->in_plabel[0] != '\0') { 1101 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1102 if (n->in_apr == NULL) 1103 return -1; 1104 } 1105 return 0; 1106 } 1107 1108 1109 /* ------------------------------------------------------------------------ */ 1110 /* Function: nat_siocdelnat */ 1111 /* Returns: int - 0 == success, != 0 == failure */ 1112 /* Parameters: n(I) - pointer to new NAT rule */ 1113 /* np(I) - pointer to where to insert new NAT rule */ 1114 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1115 /* Mutex Locks: ipf_natio */ 1116 /* */ 1117 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1118 /* from information passed to the kernel, then add it to the appropriate */ 1119 /* NAT rule table(s). */ 1120 /* ------------------------------------------------------------------------ */ 1121 static void nat_siocdelnat(n, np, getlock, ifs) 1122 ipnat_t *n, **np; 1123 int getlock; 1124 ipf_stack_t *ifs; 1125 { 1126 int i; 1127 1128 if (getlock) { 1129 WRITE_ENTER(&ifs->ifs_ipf_nat); 1130 } 1131 if (n->in_redir & NAT_REDIRECT) 1132 nat_delrdr(n); 1133 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1134 nat_delnat(n); 1135 if (ifs->ifs_nat_list == NULL) { 1136 ifs->ifs_nat_masks = 0; 1137 ifs->ifs_rdr_masks = 0; 1138 for (i = 0; i < 4; i++) { 1139 ifs->ifs_nat6_masks[i] = 0; 1140 ifs->ifs_rdr6_masks[i] = 0; 1141 } 1142 } 1143 1144 if (n->in_tqehead[0] != NULL) { 1145 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1146 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1147 } 1148 } 1149 1150 if (n->in_tqehead[1] != NULL) { 1151 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1152 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1153 } 1154 } 1155 1156 *np = n->in_next; 1157 1158 if (n->in_use == 0) { 1159 if (n->in_apr) 1160 appr_free(n->in_apr); 1161 KFREE(n); 1162 ifs->ifs_nat_stats.ns_rules--; 1163 } else { 1164 n->in_flags |= IPN_DELETE; 1165 n->in_next = NULL; 1166 } 1167 if (getlock) { 1168 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1169 } 1170 } 1171 1172 1173 /* ------------------------------------------------------------------------ */ 1174 /* Function: fr_natgetsz */ 1175 /* Returns: int - 0 == success, != 0 is the error value. */ 1176 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1177 /* get the size of. */ 1178 /* */ 1179 /* Handle SIOCSTGSZ. */ 1180 /* Return the size of the nat list entry to be copied back to user space. */ 1181 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1182 /* structure is copied back to the user. */ 1183 /* ------------------------------------------------------------------------ */ 1184 static int fr_natgetsz(data, ifs) 1185 caddr_t data; 1186 ipf_stack_t *ifs; 1187 { 1188 ap_session_t *aps; 1189 nat_t *nat, *n; 1190 natget_t ng; 1191 1192 BCOPYIN(data, &ng, sizeof(ng)); 1193 1194 nat = ng.ng_ptr; 1195 if (!nat) { 1196 nat = ifs->ifs_nat_instances; 1197 ng.ng_sz = 0; 1198 /* 1199 * Empty list so the size returned is 0. Simple. 1200 */ 1201 if (nat == NULL) { 1202 BCOPYOUT(&ng, data, sizeof(ng)); 1203 return 0; 1204 } 1205 } else { 1206 /* 1207 * Make sure the pointer we're copying from exists in the 1208 * current list of entries. Security precaution to prevent 1209 * copying of random kernel data. 1210 */ 1211 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1212 if (n == nat) 1213 break; 1214 if (!n) 1215 return ESRCH; 1216 } 1217 1218 /* 1219 * Incluse any space required for proxy data structures. 1220 */ 1221 ng.ng_sz = sizeof(nat_save_t); 1222 aps = nat->nat_aps; 1223 if (aps != NULL) { 1224 ng.ng_sz += sizeof(ap_session_t) - 4; 1225 if (aps->aps_data != 0) 1226 ng.ng_sz += aps->aps_psiz; 1227 } 1228 1229 BCOPYOUT(&ng, data, sizeof(ng)); 1230 return 0; 1231 } 1232 1233 1234 /* ------------------------------------------------------------------------ */ 1235 /* Function: fr_natgetent */ 1236 /* Returns: int - 0 == success, != 0 is the error value. */ 1237 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1238 /* to NAT structure to copy out. */ 1239 /* */ 1240 /* Handle SIOCSTGET. */ 1241 /* Copies out NAT entry to user space. Any additional data held for a */ 1242 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1243 /* ------------------------------------------------------------------------ */ 1244 static int fr_natgetent(data, ifs) 1245 caddr_t data; 1246 ipf_stack_t *ifs; 1247 { 1248 int error, outsize; 1249 ap_session_t *aps; 1250 nat_save_t *ipn, ipns; 1251 nat_t *n, *nat; 1252 1253 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1254 if (error != 0) 1255 return error; 1256 1257 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1258 return EINVAL; 1259 1260 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1261 if (ipn == NULL) 1262 return ENOMEM; 1263 1264 ipn->ipn_dsize = ipns.ipn_dsize; 1265 nat = ipns.ipn_next; 1266 if (nat == NULL) { 1267 nat = ifs->ifs_nat_instances; 1268 if (nat == NULL) { 1269 if (ifs->ifs_nat_instances == NULL) 1270 error = ENOENT; 1271 goto finished; 1272 } 1273 } else { 1274 /* 1275 * Make sure the pointer we're copying from exists in the 1276 * current list of entries. Security precaution to prevent 1277 * copying of random kernel data. 1278 */ 1279 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1280 if (n == nat) 1281 break; 1282 if (n == NULL) { 1283 error = ESRCH; 1284 goto finished; 1285 } 1286 } 1287 ipn->ipn_next = nat->nat_next; 1288 1289 /* 1290 * Copy the NAT structure. 1291 */ 1292 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1293 1294 /* 1295 * If we have a pointer to the NAT rule it belongs to, save that too. 1296 */ 1297 if (nat->nat_ptr != NULL) 1298 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1299 sizeof(ipn->ipn_ipnat)); 1300 1301 /* 1302 * If we also know the NAT entry has an associated filter rule, 1303 * save that too. 1304 */ 1305 if (nat->nat_fr != NULL) 1306 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1307 sizeof(ipn->ipn_fr)); 1308 1309 /* 1310 * Last but not least, if there is an application proxy session set 1311 * up for this NAT entry, then copy that out too, including any 1312 * private data saved along side it by the proxy. 1313 */ 1314 aps = nat->nat_aps; 1315 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1316 if (aps != NULL) { 1317 char *s; 1318 1319 if (outsize < sizeof(*aps)) { 1320 error = ENOBUFS; 1321 goto finished; 1322 } 1323 1324 s = ipn->ipn_data; 1325 bcopy((char *)aps, s, sizeof(*aps)); 1326 s += sizeof(*aps); 1327 outsize -= sizeof(*aps); 1328 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1329 bcopy(aps->aps_data, s, aps->aps_psiz); 1330 else 1331 error = ENOBUFS; 1332 } 1333 if (error == 0) { 1334 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1335 } 1336 1337 finished: 1338 if (ipn != NULL) { 1339 KFREES(ipn, ipns.ipn_dsize); 1340 } 1341 return error; 1342 } 1343 1344 /* ------------------------------------------------------------------------ */ 1345 /* Function: nat_calc_chksum_diffs */ 1346 /* Returns: void */ 1347 /* Parameters: nat - pointer to NAT table entry */ 1348 /* */ 1349 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */ 1350 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */ 1351 /* we are dealing with partial chksum offload. For these cases we need to */ 1352 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */ 1353 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */ 1354 /* nat_sumd[0]. */ 1355 /* */ 1356 /* The function accepts initialized NAT table entry and computes the deltas */ 1357 /* from nat_inip/nat_outip members. The function is called right before */ 1358 /* the new entry is inserted into the table. */ 1359 /* */ 1360 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */ 1361 /* of delta between original and new IP addresses. */ 1362 /* */ 1363 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */ 1364 /* a chkusm of delta between original an new IP addrress:port tupples. */ 1365 /* */ 1366 /* Some facts about chksum, we should remember: */ 1367 /* IP header chksum covers IP header only */ 1368 /* */ 1369 /* TCP/UDP chksum covers data payload and so called pseudo header */ 1370 /* SRC, DST IP address */ 1371 /* SRC, DST Port */ 1372 /* length of payload */ 1373 /* */ 1374 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */ 1375 /* member of dblk_t structure. The db_ckusm16 member is not part of */ 1376 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */ 1377 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */ 1378 /* stored along with other IP packet data in dblk_t structure and used in */ 1379 /* for IP/UDP/TCP chksum validation later in ip.c. */ 1380 /* */ 1381 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */ 1382 /* of delta between new and orig address. NOTE: the order of operands for */ 1383 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */ 1384 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */ 1385 /* */ 1386 /* ------------------------------------------------------------------------ */ 1387 void nat_calc_chksum_diffs(nat) 1388 nat_t *nat; 1389 { 1390 u_32_t sum_orig = 0; 1391 u_32_t sum_changed = 0; 1392 u_32_t sumd; 1393 u_32_t ipsum_orig = 0; 1394 u_32_t ipsum_changed = 0; 1395 1396 if (nat->nat_v != 4 && nat->nat_v != 6) 1397 return; 1398 1399 /* 1400 * the switch calculates operands for CALC_SUMD(), 1401 * which will compute the partial chksum delta. 1402 */ 1403 switch (nat->nat_dir) 1404 { 1405 case NAT_INBOUND: 1406 /* 1407 * we are dealing with RDR rule (DST address gets 1408 * modified on packet from client) 1409 */ 1410 if (nat->nat_v == 4) { 1411 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1412 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1413 } else { 1414 sum_changed = LONG_SUM6(&nat->nat_inip6); 1415 sum_orig = LONG_SUM6(&nat->nat_outip6); 1416 } 1417 break; 1418 case NAT_OUTBOUND: 1419 /* 1420 * we are dealing with MAP rule (SRC address gets 1421 * modified on packet from client) 1422 */ 1423 if (nat->nat_v == 4) { 1424 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1425 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1426 } else { 1427 sum_changed = LONG_SUM6(&nat->nat_outip6); 1428 sum_orig = LONG_SUM6(&nat->nat_inip6); 1429 } 1430 break; 1431 default: ; 1432 break; 1433 } 1434 1435 /* 1436 * we also preserve CALC_SUMD() operands here, for IP chksum delta 1437 * calculation, which happens at the end of function. 1438 */ 1439 ipsum_changed = sum_changed; 1440 ipsum_orig = sum_orig; 1441 /* 1442 * NOTE: the order of operands for partial chksum adjustment 1443 * computation has to be swapped! 1444 */ 1445 CALC_SUMD(sum_changed, sum_orig, sumd); 1446 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 1447 1448 if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) { 1449 1450 /* 1451 * switch calculates operands for CALC_SUMD(), which will 1452 * compute the full chksum delta. 1453 */ 1454 switch (nat->nat_dir) 1455 { 1456 case NAT_INBOUND: 1457 if (nat->nat_v == 4) { 1458 sum_changed = LONG_SUM( 1459 ntohl(nat->nat_inip.s_addr) + 1460 ntohs(nat->nat_inport)); 1461 sum_orig = LONG_SUM( 1462 ntohl(nat->nat_outip.s_addr) + 1463 ntohs(nat->nat_outport)); 1464 } else { 1465 sum_changed = LONG_SUM6(&nat->nat_inip6) + 1466 ntohs(nat->nat_inport); 1467 sum_orig = LONG_SUM6(&nat->nat_outip6) + 1468 ntohs(nat->nat_outport); 1469 } 1470 break; 1471 case NAT_OUTBOUND: 1472 if (nat->nat_v == 4) { 1473 sum_changed = LONG_SUM( 1474 ntohl(nat->nat_outip.s_addr) + 1475 ntohs(nat->nat_outport)); 1476 sum_orig = LONG_SUM( 1477 ntohl(nat->nat_inip.s_addr) + 1478 ntohs(nat->nat_inport)); 1479 } else { 1480 sum_changed = LONG_SUM6(&nat->nat_outip6) + 1481 ntohs(nat->nat_outport); 1482 sum_orig = LONG_SUM6(&nat->nat_inip6) + 1483 ntohs(nat->nat_inport); 1484 } 1485 break; 1486 default: ; 1487 break; 1488 } 1489 1490 CALC_SUMD(sum_orig, sum_changed, sumd); 1491 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 1492 1493 if (!(nat->nat_flags & IPN_TCPUDP)) { 1494 /* 1495 * partial HW chksum offload works for TCP/UDP headers only, 1496 * so we need to enforce full chksum adjustment for ICMP 1497 */ 1498 nat->nat_sumd[1] = nat->nat_sumd[0]; 1499 } 1500 } 1501 else 1502 nat->nat_sumd[0] = nat->nat_sumd[1]; 1503 1504 /* 1505 * we may reuse the already computed nat_sumd[0] for IP header chksum 1506 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT. 1507 */ 1508 if (nat->nat_v == 4) { 1509 if (NAT_HAS_L4_CHANGED(nat)) { 1510 /* 1511 * bad luck, NAT changes also the L4 header, use IP 1512 * addresses to compute chksum adjustment for IP header. 1513 */ 1514 CALC_SUMD(ipsum_orig, ipsum_changed, sumd); 1515 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 1516 } else { 1517 /* 1518 * the NAT does not change L4 hdr -> reuse chksum 1519 * adjustment for IP hdr. 1520 */ 1521 nat->nat_ipsumd = nat->nat_sumd[0]; 1522 1523 /* 1524 * if L4 header does not use chksum - zero out deltas 1525 */ 1526 if (!(nat->nat_flags & IPN_TCPUDP)) { 1527 nat->nat_sumd[0] = 0; 1528 nat->nat_sumd[1] = 0; 1529 } 1530 } 1531 } 1532 1533 return; 1534 } 1535 1536 /* ------------------------------------------------------------------------ */ 1537 /* Function: fr_natputent */ 1538 /* Returns: int - 0 == success, != 0 is the error value. */ 1539 /* Parameters: data(I) - pointer to natget structure with NAT */ 1540 /* structure information to load into the kernel */ 1541 /* getlock(I) - flag indicating whether or not a write lock */ 1542 /* on ipf_nat is already held. */ 1543 /* */ 1544 /* Handle SIOCSTPUT. */ 1545 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1546 /* firewall rule data structures, if pointers to them indicate so. */ 1547 /* ------------------------------------------------------------------------ */ 1548 static int fr_natputent(data, getlock, ifs) 1549 caddr_t data; 1550 int getlock; 1551 ipf_stack_t *ifs; 1552 { 1553 nat_save_t ipn, *ipnn; 1554 ap_session_t *aps; 1555 nat_t *n, *nat; 1556 frentry_t *fr; 1557 fr_info_t fin; 1558 ipnat_t *in; 1559 int error; 1560 1561 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1562 if (error != 0) 1563 return error; 1564 1565 /* 1566 * Trigger automatic call to nat_extraflush() if the 1567 * table has reached capcity specified by hi watermark. 1568 */ 1569 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1570 ifs->ifs_nat_doflush = 1; 1571 1572 /* 1573 * Initialise early because of code at junkput label. 1574 */ 1575 in = NULL; 1576 aps = NULL; 1577 nat = NULL; 1578 ipnn = NULL; 1579 1580 /* 1581 * New entry, copy in the rest of the NAT entry if it's size is more 1582 * than just the nat_t structure. 1583 */ 1584 fr = NULL; 1585 if (ipn.ipn_dsize > sizeof(ipn)) { 1586 if (ipn.ipn_dsize > 81920) { 1587 error = ENOMEM; 1588 goto junkput; 1589 } 1590 1591 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1592 if (ipnn == NULL) 1593 return ENOMEM; 1594 1595 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1596 if (error != 0) { 1597 error = EFAULT; 1598 goto junkput; 1599 } 1600 } else 1601 ipnn = &ipn; 1602 1603 KMALLOC(nat, nat_t *); 1604 if (nat == NULL) { 1605 error = ENOMEM; 1606 goto junkput; 1607 } 1608 1609 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1610 /* 1611 * Initialize all these so that nat_delete() doesn't cause a crash. 1612 */ 1613 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1614 nat->nat_tqe.tqe_pnext = NULL; 1615 nat->nat_tqe.tqe_next = NULL; 1616 nat->nat_tqe.tqe_ifq = NULL; 1617 nat->nat_tqe.tqe_parent = nat; 1618 1619 /* 1620 * Restore the rule associated with this nat session 1621 */ 1622 in = ipnn->ipn_nat.nat_ptr; 1623 if (in != NULL) { 1624 KMALLOC(in, ipnat_t *); 1625 nat->nat_ptr = in; 1626 if (in == NULL) { 1627 error = ENOMEM; 1628 goto junkput; 1629 } 1630 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1631 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1632 in->in_use = 1; 1633 in->in_flags |= IPN_DELETE; 1634 1635 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1636 1637 if (nat_resolverule(in, ifs) != 0) { 1638 error = ESRCH; 1639 goto junkput; 1640 } 1641 } 1642 1643 /* 1644 * Check that the NAT entry doesn't already exist in the kernel. 1645 */ 1646 if (nat->nat_v != 6) 1647 nat->nat_v = 4; 1648 bzero((char *)&fin, sizeof(fin)); 1649 fin.fin_p = nat->nat_p; 1650 fin.fin_ifs = ifs; 1651 if (nat->nat_dir == NAT_OUTBOUND) { 1652 fin.fin_data[0] = ntohs(nat->nat_oport); 1653 fin.fin_data[1] = ntohs(nat->nat_outport); 1654 fin.fin_ifp = nat->nat_ifps[0]; 1655 if (getlock) { 1656 READ_ENTER(&ifs->ifs_ipf_nat); 1657 } 1658 1659 switch (nat->nat_v) 1660 { 1661 case 4: 1662 fin.fin_v = nat->nat_v; 1663 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1664 nat->nat_oip, nat->nat_outip); 1665 break; 1666 #ifdef USE_INET6 1667 case 6: 1668 n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p, 1669 &nat->nat_oip6.in6, &nat->nat_outip6.in6); 1670 break; 1671 #endif 1672 default: 1673 n = NULL; 1674 break; 1675 } 1676 1677 if (getlock) { 1678 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1679 } 1680 if (n != NULL) { 1681 error = EEXIST; 1682 goto junkput; 1683 } 1684 } else if (nat->nat_dir == NAT_INBOUND) { 1685 fin.fin_data[0] = ntohs(nat->nat_inport); 1686 fin.fin_data[1] = ntohs(nat->nat_oport); 1687 fin.fin_ifp = nat->nat_ifps[1]; 1688 if (getlock) { 1689 READ_ENTER(&ifs->ifs_ipf_nat); 1690 } 1691 1692 switch (nat->nat_v) 1693 { 1694 case 4: 1695 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1696 nat->nat_inip, nat->nat_oip); 1697 break; 1698 #ifdef USE_INET6 1699 case 6: 1700 n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p, 1701 &nat->nat_inip6.in6, &nat->nat_oip6.in6); 1702 break; 1703 #endif 1704 default: 1705 n = NULL; 1706 break; 1707 } 1708 1709 if (getlock) { 1710 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1711 } 1712 if (n != NULL) { 1713 error = EEXIST; 1714 goto junkput; 1715 } 1716 } else { 1717 error = EINVAL; 1718 goto junkput; 1719 } 1720 1721 /* 1722 * Restore ap_session_t structure. Include the private data allocated 1723 * if it was there. 1724 */ 1725 aps = nat->nat_aps; 1726 if (aps != NULL) { 1727 KMALLOC(aps, ap_session_t *); 1728 nat->nat_aps = aps; 1729 if (aps == NULL) { 1730 error = ENOMEM; 1731 goto junkput; 1732 } 1733 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1734 if (in != NULL) 1735 aps->aps_apr = in->in_apr; 1736 else 1737 aps->aps_apr = NULL; 1738 if (aps->aps_psiz != 0) { 1739 if (aps->aps_psiz > 81920) { 1740 error = ENOMEM; 1741 goto junkput; 1742 } 1743 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1744 if (aps->aps_data == NULL) { 1745 error = ENOMEM; 1746 goto junkput; 1747 } 1748 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1749 aps->aps_psiz); 1750 } else { 1751 aps->aps_psiz = 0; 1752 aps->aps_data = NULL; 1753 } 1754 } 1755 1756 /* 1757 * If there was a filtering rule associated with this entry then 1758 * build up a new one. 1759 */ 1760 fr = nat->nat_fr; 1761 if (fr != NULL) { 1762 if ((nat->nat_flags & SI_NEWFR) != 0) { 1763 KMALLOC(fr, frentry_t *); 1764 nat->nat_fr = fr; 1765 if (fr == NULL) { 1766 error = ENOMEM; 1767 goto junkput; 1768 } 1769 ipnn->ipn_nat.nat_fr = fr; 1770 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1771 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1772 1773 fr->fr_ref = 1; 1774 fr->fr_dsize = 0; 1775 fr->fr_data = NULL; 1776 fr->fr_type = FR_T_NONE; 1777 1778 MUTEX_NUKE(&fr->fr_lock); 1779 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1780 } else { 1781 if (getlock) { 1782 READ_ENTER(&ifs->ifs_ipf_nat); 1783 } 1784 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1785 if (n->nat_fr == fr) 1786 break; 1787 1788 if (n != NULL) { 1789 MUTEX_ENTER(&fr->fr_lock); 1790 fr->fr_ref++; 1791 MUTEX_EXIT(&fr->fr_lock); 1792 } 1793 if (getlock) { 1794 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1795 } 1796 if (!n) { 1797 error = ESRCH; 1798 goto junkput; 1799 } 1800 } 1801 } 1802 1803 if (ipnn != &ipn) { 1804 KFREES(ipnn, ipn.ipn_dsize); 1805 ipnn = NULL; 1806 } 1807 1808 nat_calc_chksum_diffs(nat); 1809 1810 if (getlock) { 1811 WRITE_ENTER(&ifs->ifs_ipf_nat); 1812 } 1813 1814 nat_calc_chksum_diffs(nat); 1815 1816 switch (nat->nat_v) 1817 { 1818 case 4 : 1819 error = nat_insert(nat, nat->nat_rev, ifs); 1820 break; 1821 #ifdef USE_INET6 1822 case 6 : 1823 error = nat6_insert(nat, nat->nat_rev, ifs); 1824 break; 1825 #endif 1826 default : 1827 break; 1828 } 1829 1830 if ((error == 0) && (aps != NULL)) { 1831 aps->aps_next = ifs->ifs_ap_sess_list; 1832 ifs->ifs_ap_sess_list = aps; 1833 } 1834 if (getlock) { 1835 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1836 } 1837 1838 if (error == 0) 1839 return 0; 1840 1841 error = ENOMEM; 1842 1843 junkput: 1844 if (fr != NULL) 1845 (void) fr_derefrule(&fr, ifs); 1846 1847 if ((ipnn != NULL) && (ipnn != &ipn)) { 1848 KFREES(ipnn, ipn.ipn_dsize); 1849 } 1850 if (nat != NULL) { 1851 if (aps != NULL) { 1852 if (aps->aps_data != NULL) { 1853 KFREES(aps->aps_data, aps->aps_psiz); 1854 } 1855 KFREE(aps); 1856 } 1857 if (in != NULL) { 1858 if (in->in_apr) 1859 appr_free(in->in_apr); 1860 KFREE(in); 1861 } 1862 KFREE(nat); 1863 } 1864 return error; 1865 } 1866 1867 1868 /* ------------------------------------------------------------------------ */ 1869 /* Function: nat_delete */ 1870 /* Returns: Nil */ 1871 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1872 /* logtype(I) - type of LOG record to create before deleting */ 1873 /* Write Lock: ipf_nat */ 1874 /* */ 1875 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1876 /* enabled then generate a NAT log record for this event. */ 1877 /* ------------------------------------------------------------------------ */ 1878 static void nat_delete(nat, logtype, ifs) 1879 struct nat *nat; 1880 int logtype; 1881 ipf_stack_t *ifs; 1882 { 1883 struct ipnat *ipn; 1884 1885 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1886 nat_log(nat, logtype, ifs); 1887 1888 /* 1889 * Take it as a general indication that all the pointers are set if 1890 * nat_pnext is set. 1891 */ 1892 if (nat->nat_pnext != NULL) { 1893 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1894 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1895 1896 *nat->nat_pnext = nat->nat_next; 1897 if (nat->nat_next != NULL) { 1898 nat->nat_next->nat_pnext = nat->nat_pnext; 1899 nat->nat_next = NULL; 1900 } 1901 nat->nat_pnext = NULL; 1902 1903 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1904 if (nat->nat_hnext[0] != NULL) { 1905 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1906 nat->nat_hnext[0] = NULL; 1907 } 1908 nat->nat_phnext[0] = NULL; 1909 1910 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1911 if (nat->nat_hnext[1] != NULL) { 1912 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1913 nat->nat_hnext[1] = NULL; 1914 } 1915 nat->nat_phnext[1] = NULL; 1916 1917 if ((nat->nat_flags & SI_WILDP) != 0) 1918 ifs->ifs_nat_stats.ns_wilds--; 1919 } 1920 1921 if (nat->nat_me != NULL) { 1922 *nat->nat_me = NULL; 1923 nat->nat_me = NULL; 1924 } 1925 1926 fr_deletequeueentry(&nat->nat_tqe); 1927 1928 MUTEX_ENTER(&nat->nat_lock); 1929 if (nat->nat_ref > 1) { 1930 nat->nat_ref--; 1931 MUTEX_EXIT(&nat->nat_lock); 1932 return; 1933 } 1934 MUTEX_EXIT(&nat->nat_lock); 1935 1936 /* 1937 * At this point, nat_ref is 1, doing "--" would make it 0.. 1938 */ 1939 nat->nat_ref = 0; 1940 1941 #ifdef IPFILTER_SYNC 1942 if (nat->nat_sync) 1943 ipfsync_del(nat->nat_sync); 1944 #endif 1945 1946 if (nat->nat_fr != NULL) 1947 (void)fr_derefrule(&nat->nat_fr, ifs); 1948 1949 if (nat->nat_hm != NULL) 1950 fr_hostmapdel(&nat->nat_hm); 1951 1952 /* 1953 * If there is an active reference from the nat entry to its parent 1954 * rule, decrement the rule's reference count and free it too if no 1955 * longer being used. 1956 */ 1957 ipn = nat->nat_ptr; 1958 if (ipn != NULL) { 1959 ipn->in_space++; 1960 ipn->in_use--; 1961 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1962 if (ipn->in_apr) 1963 appr_free(ipn->in_apr); 1964 KFREE(ipn); 1965 ifs->ifs_nat_stats.ns_rules--; 1966 } 1967 } 1968 1969 MUTEX_DESTROY(&nat->nat_lock); 1970 1971 aps_free(nat->nat_aps, ifs); 1972 ifs->ifs_nat_stats.ns_inuse--; 1973 1974 /* 1975 * If there's a fragment table entry too for this nat entry, then 1976 * dereference that as well. This is after nat_lock is released 1977 * because of Tru64. 1978 */ 1979 fr_forgetnat((void *)nat, ifs); 1980 1981 KFREE(nat); 1982 } 1983 1984 1985 /* ------------------------------------------------------------------------ */ 1986 /* Function: nat_flushtable */ 1987 /* Returns: int - number of NAT rules deleted */ 1988 /* Parameters: Nil */ 1989 /* */ 1990 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1991 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1992 /* ------------------------------------------------------------------------ */ 1993 /* 1994 * nat_flushtable - clear the NAT table of all mapping entries. 1995 */ 1996 static int nat_flushtable(ifs) 1997 ipf_stack_t *ifs; 1998 { 1999 nat_t *nat; 2000 int j = 0; 2001 2002 /* 2003 * ALL NAT mappings deleted, so lets just make the deletions 2004 * quicker. 2005 */ 2006 if (ifs->ifs_nat_table[0] != NULL) 2007 bzero((char *)ifs->ifs_nat_table[0], 2008 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 2009 if (ifs->ifs_nat_table[1] != NULL) 2010 bzero((char *)ifs->ifs_nat_table[1], 2011 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 2012 2013 while ((nat = ifs->ifs_nat_instances) != NULL) { 2014 nat_delete(nat, NL_FLUSH, ifs); 2015 j++; 2016 } 2017 2018 return j; 2019 } 2020 2021 2022 /* ------------------------------------------------------------------------ */ 2023 /* Function: nat_clearlist */ 2024 /* Returns: int - number of NAT/RDR rules deleted */ 2025 /* Parameters: Nil */ 2026 /* */ 2027 /* Delete all rules in the current list of rules. There is nothing elegant */ 2028 /* about this cleanup: simply free all entries on the list of rules and */ 2029 /* clear out the tables used for hashed NAT rule lookups. */ 2030 /* ------------------------------------------------------------------------ */ 2031 static int nat_clearlist(ifs) 2032 ipf_stack_t *ifs; 2033 { 2034 ipnat_t *n, **np = &ifs->ifs_nat_list; 2035 int i = 0; 2036 2037 if (ifs->ifs_nat_rules != NULL) 2038 bzero((char *)ifs->ifs_nat_rules, 2039 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 2040 if (ifs->ifs_rdr_rules != NULL) 2041 bzero((char *)ifs->ifs_rdr_rules, 2042 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 2043 2044 while ((n = *np) != NULL) { 2045 *np = n->in_next; 2046 if (n->in_use == 0) { 2047 if (n->in_apr != NULL) 2048 appr_free(n->in_apr); 2049 KFREE(n); 2050 ifs->ifs_nat_stats.ns_rules--; 2051 } else { 2052 n->in_flags |= IPN_DELETE; 2053 n->in_next = NULL; 2054 } 2055 i++; 2056 } 2057 ifs->ifs_nat_masks = 0; 2058 ifs->ifs_rdr_masks = 0; 2059 for (i = 0; i < 4; i++) { 2060 ifs->ifs_nat6_masks[i] = 0; 2061 ifs->ifs_rdr6_masks[i] = 0; 2062 } 2063 return i; 2064 } 2065 2066 2067 /* ------------------------------------------------------------------------ */ 2068 /* Function: nat_newmap */ 2069 /* Returns: int - -1 == error, 0 == success */ 2070 /* Parameters: fin(I) - pointer to packet information */ 2071 /* nat(I) - pointer to NAT entry */ 2072 /* ni(I) - pointer to structure with misc. information needed */ 2073 /* to create new NAT entry. */ 2074 /* */ 2075 /* Given an empty NAT structure, populate it with new information about a */ 2076 /* new NAT session, as defined by the matching NAT rule. */ 2077 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2078 /* to the new IP address for the translation. */ 2079 /* ------------------------------------------------------------------------ */ 2080 static INLINE int nat_newmap(fin, nat, ni) 2081 fr_info_t *fin; 2082 nat_t *nat; 2083 natinfo_t *ni; 2084 { 2085 u_short st_port, dport, sport, port, sp, dp; 2086 struct in_addr in, inb; 2087 hostmap_t *hm; 2088 u_32_t flags; 2089 u_32_t st_ip; 2090 ipnat_t *np; 2091 nat_t *natl; 2092 int l; 2093 ipf_stack_t *ifs = fin->fin_ifs; 2094 2095 /* 2096 * If it's an outbound packet which doesn't match any existing 2097 * record, then create a new port 2098 */ 2099 l = 0; 2100 hm = NULL; 2101 np = ni->nai_np; 2102 st_ip = np->in_nip; 2103 st_port = np->in_pnext; 2104 flags = ni->nai_flags; 2105 sport = ni->nai_sport; 2106 dport = ni->nai_dport; 2107 2108 /* 2109 * Do a loop until we either run out of entries to try or we find 2110 * a NAT mapping that isn't currently being used. This is done 2111 * because the change to the source is not (usually) being fixed. 2112 */ 2113 do { 2114 port = 0; 2115 in.s_addr = htonl(np->in_nip); 2116 if (l == 0) { 2117 /* 2118 * Check to see if there is an existing NAT 2119 * setup for this IP address pair. 2120 */ 2121 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2122 in, 0, ifs); 2123 if (hm != NULL) 2124 in.s_addr = hm->hm_mapip.s_addr; 2125 } else if ((l == 1) && (hm != NULL)) { 2126 fr_hostmapdel(&hm); 2127 } 2128 in.s_addr = ntohl(in.s_addr); 2129 2130 nat->nat_hm = hm; 2131 2132 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 2133 if (l > 0) 2134 return -1; 2135 } 2136 2137 if (np->in_redir == NAT_BIMAP && 2138 np->in_inmsk == np->in_outmsk) { 2139 /* 2140 * map the address block in a 1:1 fashion 2141 */ 2142 in.s_addr = np->in_outip; 2143 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 2144 in.s_addr = ntohl(in.s_addr); 2145 2146 } else if (np->in_redir & NAT_MAPBLK) { 2147 if ((l >= np->in_ppip) || ((l > 0) && 2148 !(flags & IPN_TCPUDP))) 2149 return -1; 2150 /* 2151 * map-block - Calculate destination address. 2152 */ 2153 in.s_addr = ntohl(fin->fin_saddr); 2154 in.s_addr &= ntohl(~np->in_inmsk); 2155 inb.s_addr = in.s_addr; 2156 in.s_addr /= np->in_ippip; 2157 in.s_addr &= ntohl(~np->in_outmsk); 2158 in.s_addr += ntohl(np->in_outip); 2159 /* 2160 * Calculate destination port. 2161 */ 2162 if ((flags & IPN_TCPUDP) && 2163 (np->in_ppip != 0)) { 2164 port = ntohs(sport) + l; 2165 port %= np->in_ppip; 2166 port += np->in_ppip * 2167 (inb.s_addr % np->in_ippip); 2168 port += MAPBLK_MINPORT; 2169 port = htons(port); 2170 } 2171 2172 } else if ((np->in_outip == 0) && 2173 (np->in_outmsk == 0xffffffff)) { 2174 /* 2175 * 0/32 - use the interface's IP address. 2176 */ 2177 if ((l > 0) || 2178 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 2179 &in, NULL, fin->fin_ifs) == -1) 2180 return -1; 2181 in.s_addr = ntohl(in.s_addr); 2182 2183 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 2184 /* 2185 * 0/0 - use the original source address/port. 2186 */ 2187 if (l > 0) 2188 return -1; 2189 in.s_addr = ntohl(fin->fin_saddr); 2190 2191 } else if ((np->in_outmsk != 0xffffffff) && 2192 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 2193 np->in_nip++; 2194 2195 natl = NULL; 2196 2197 if ((flags & IPN_TCPUDP) && 2198 ((np->in_redir & NAT_MAPBLK) == 0) && 2199 (np->in_flags & IPN_AUTOPORTMAP)) { 2200 /* 2201 * "ports auto" (without map-block) 2202 */ 2203 if ((l > 0) && (l % np->in_ppip == 0)) { 2204 if (l > np->in_space) { 2205 return -1; 2206 } else if ((l > np->in_ppip) && 2207 np->in_outmsk != 0xffffffff) 2208 np->in_nip++; 2209 } 2210 if (np->in_ppip != 0) { 2211 port = ntohs(sport); 2212 port += (l % np->in_ppip); 2213 port %= np->in_ppip; 2214 port += np->in_ppip * 2215 (ntohl(fin->fin_saddr) % 2216 np->in_ippip); 2217 port += MAPBLK_MINPORT; 2218 port = htons(port); 2219 } 2220 2221 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 2222 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 2223 /* 2224 * Standard port translation. Select next port. 2225 */ 2226 if (np->in_flags & IPN_SEQUENTIAL) { 2227 port = np->in_pnext; 2228 } else { 2229 port = ipf_random() % (ntohs(np->in_pmax) - 2230 ntohs(np->in_pmin)); 2231 port += ntohs(np->in_pmin); 2232 } 2233 port = htons(port); 2234 np->in_pnext++; 2235 2236 if (np->in_pnext > ntohs(np->in_pmax)) { 2237 np->in_pnext = ntohs(np->in_pmin); 2238 if (np->in_outmsk != 0xffffffff) 2239 np->in_nip++; 2240 } 2241 } 2242 2243 if (np->in_flags & IPN_IPRANGE) { 2244 if (np->in_nip > ntohl(np->in_outmsk)) 2245 np->in_nip = ntohl(np->in_outip); 2246 } else { 2247 if ((np->in_outmsk != 0xffffffff) && 2248 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 2249 ntohl(np->in_outip)) 2250 np->in_nip = ntohl(np->in_outip) + 1; 2251 } 2252 2253 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 2254 port = sport; 2255 2256 /* 2257 * Here we do a lookup of the connection as seen from 2258 * the outside. If an IP# pair already exists, try 2259 * again. So if you have A->B becomes C->B, you can 2260 * also have D->E become C->E but not D->B causing 2261 * another C->B. Also take protocol and ports into 2262 * account when determining whether a pre-existing 2263 * NAT setup will cause an external conflict where 2264 * this is appropriate. 2265 */ 2266 inb.s_addr = htonl(in.s_addr); 2267 sp = fin->fin_data[0]; 2268 dp = fin->fin_data[1]; 2269 fin->fin_data[0] = fin->fin_data[1]; 2270 fin->fin_data[1] = htons(port); 2271 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2272 (u_int)fin->fin_p, fin->fin_dst, inb); 2273 fin->fin_data[0] = sp; 2274 fin->fin_data[1] = dp; 2275 2276 /* 2277 * Has the search wrapped around and come back to the 2278 * start ? 2279 */ 2280 if ((natl != NULL) && 2281 (np->in_pnext != 0) && (st_port == np->in_pnext) && 2282 (np->in_nip != 0) && (st_ip == np->in_nip)) 2283 return -1; 2284 l++; 2285 } while (natl != NULL); 2286 2287 if (np->in_space > 0) 2288 np->in_space--; 2289 2290 /* Setup the NAT table */ 2291 nat->nat_inip = fin->fin_src; 2292 nat->nat_outip.s_addr = htonl(in.s_addr); 2293 nat->nat_oip = fin->fin_dst; 2294 if (nat->nat_hm == NULL) 2295 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2296 nat->nat_outip, 0, ifs); 2297 2298 if (flags & IPN_TCPUDP) { 2299 nat->nat_inport = sport; 2300 nat->nat_outport = port; /* sport */ 2301 nat->nat_oport = dport; 2302 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2303 } else if (flags & IPN_ICMPQUERY) { 2304 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2305 nat->nat_inport = port; 2306 nat->nat_outport = port; 2307 } 2308 2309 ni->nai_ip.s_addr = in.s_addr; 2310 ni->nai_port = port; 2311 ni->nai_nport = dport; 2312 return 0; 2313 } 2314 2315 2316 /* ------------------------------------------------------------------------ */ 2317 /* Function: nat_newrdr */ 2318 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2319 /* allow rule to be moved if IPN_ROUNDR is set. */ 2320 /* Parameters: fin(I) - pointer to packet information */ 2321 /* nat(I) - pointer to NAT entry */ 2322 /* ni(I) - pointer to structure with misc. information needed */ 2323 /* to create new NAT entry. */ 2324 /* */ 2325 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2326 /* to the new IP address for the translation. */ 2327 /* ------------------------------------------------------------------------ */ 2328 static INLINE int nat_newrdr(fin, nat, ni) 2329 fr_info_t *fin; 2330 nat_t *nat; 2331 natinfo_t *ni; 2332 { 2333 u_short nport, dport, sport; 2334 struct in_addr in, inb; 2335 u_short sp, dp; 2336 hostmap_t *hm; 2337 u_32_t flags; 2338 ipnat_t *np; 2339 nat_t *natl; 2340 int move; 2341 ipf_stack_t *ifs = fin->fin_ifs; 2342 2343 move = 1; 2344 hm = NULL; 2345 in.s_addr = 0; 2346 np = ni->nai_np; 2347 flags = ni->nai_flags; 2348 sport = ni->nai_sport; 2349 dport = ni->nai_dport; 2350 2351 /* 2352 * If the matching rule has IPN_STICKY set, then we want to have the 2353 * same rule kick in as before. Why would this happen? If you have 2354 * a collection of rdr rules with "round-robin sticky", the current 2355 * packet might match a different one to the previous connection but 2356 * we want the same destination to be used. 2357 */ 2358 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2359 (IPN_ROUNDR|IPN_STICKY)) { 2360 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2361 (u_32_t)dport, ifs); 2362 if (hm != NULL) { 2363 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2364 np = hm->hm_ipnat; 2365 ni->nai_np = np; 2366 move = 0; 2367 } 2368 } 2369 2370 /* 2371 * Otherwise, it's an inbound packet. Most likely, we don't 2372 * want to rewrite source ports and source addresses. Instead, 2373 * we want to rewrite to a fixed internal address and fixed 2374 * internal port. 2375 */ 2376 if (np->in_flags & IPN_SPLIT) { 2377 in.s_addr = np->in_nip; 2378 2379 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2380 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2381 in, (u_32_t)dport, ifs); 2382 if (hm != NULL) { 2383 in.s_addr = hm->hm_mapip.s_addr; 2384 move = 0; 2385 } 2386 } 2387 2388 if (hm == NULL || hm->hm_ref == 1) { 2389 if (np->in_inip == htonl(in.s_addr)) { 2390 np->in_nip = ntohl(np->in_inmsk); 2391 move = 0; 2392 } else { 2393 np->in_nip = ntohl(np->in_inip); 2394 } 2395 } 2396 2397 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2398 /* 2399 * 0/32 - use the interface's IP address. 2400 */ 2401 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2402 fin->fin_ifs) == -1) 2403 return -1; 2404 in.s_addr = ntohl(in.s_addr); 2405 2406 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2407 /* 2408 * 0/0 - use the original destination address/port. 2409 */ 2410 in.s_addr = ntohl(fin->fin_daddr); 2411 2412 } else if (np->in_redir == NAT_BIMAP && 2413 np->in_inmsk == np->in_outmsk) { 2414 /* 2415 * map the address block in a 1:1 fashion 2416 */ 2417 in.s_addr = np->in_inip; 2418 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2419 in.s_addr = ntohl(in.s_addr); 2420 } else { 2421 in.s_addr = ntohl(np->in_inip); 2422 } 2423 2424 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2425 nport = dport; 2426 else { 2427 /* 2428 * Whilst not optimized for the case where 2429 * pmin == pmax, the gain is not significant. 2430 */ 2431 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2432 (np->in_pmin != np->in_pmax)) { 2433 nport = ntohs(dport) - ntohs(np->in_pmin) + 2434 ntohs(np->in_pnext); 2435 nport = htons(nport); 2436 } else 2437 nport = np->in_pnext; 2438 } 2439 2440 /* 2441 * When the redirect-to address is set to 0.0.0.0, just 2442 * assume a blank `forwarding' of the packet. We don't 2443 * setup any translation for this either. 2444 */ 2445 if (in.s_addr == 0) { 2446 if (nport == dport) 2447 return -1; 2448 in.s_addr = ntohl(fin->fin_daddr); 2449 } 2450 2451 /* 2452 * Check to see if this redirect mapping already exists and if 2453 * it does, return "failure" (allowing it to be created will just 2454 * cause one or both of these "connections" to stop working.) 2455 */ 2456 inb.s_addr = htonl(in.s_addr); 2457 sp = fin->fin_data[0]; 2458 dp = fin->fin_data[1]; 2459 fin->fin_data[1] = fin->fin_data[0]; 2460 fin->fin_data[0] = ntohs(nport); 2461 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2462 (u_int)fin->fin_p, inb, fin->fin_src); 2463 fin->fin_data[0] = sp; 2464 fin->fin_data[1] = dp; 2465 if (natl != NULL) 2466 return (-1); 2467 2468 nat->nat_inip.s_addr = htonl(in.s_addr); 2469 nat->nat_outip = fin->fin_dst; 2470 nat->nat_oip = fin->fin_src; 2471 2472 ni->nai_ip.s_addr = in.s_addr; 2473 ni->nai_nport = nport; 2474 ni->nai_port = sport; 2475 2476 if (flags & IPN_TCPUDP) { 2477 nat->nat_inport = nport; 2478 nat->nat_outport = dport; 2479 nat->nat_oport = sport; 2480 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2481 } else if (flags & IPN_ICMPQUERY) { 2482 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2483 nat->nat_inport = nport; 2484 nat->nat_outport = nport; 2485 } 2486 2487 return move; 2488 } 2489 2490 /* ------------------------------------------------------------------------ */ 2491 /* Function: nat_new */ 2492 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2493 /* else pointer to new NAT structure */ 2494 /* Parameters: fin(I) - pointer to packet information */ 2495 /* np(I) - pointer to NAT rule */ 2496 /* natsave(I) - pointer to where to store NAT struct pointer */ 2497 /* flags(I) - flags describing the current packet */ 2498 /* direction(I) - direction of packet (in/out) */ 2499 /* Write Lock: ipf_nat */ 2500 /* */ 2501 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2502 /* in any way. */ 2503 /* */ 2504 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2505 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2506 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2507 /* and (3) building that structure and putting it into the NAT table(s). */ 2508 /* ------------------------------------------------------------------------ */ 2509 nat_t *nat_new(fin, np, natsave, flags, direction) 2510 fr_info_t *fin; 2511 ipnat_t *np; 2512 nat_t **natsave; 2513 u_int flags; 2514 int direction; 2515 { 2516 tcphdr_t *tcp = NULL; 2517 hostmap_t *hm = NULL; 2518 nat_t *nat, *natl; 2519 u_int nflags; 2520 natinfo_t ni; 2521 int move; 2522 ipf_stack_t *ifs = fin->fin_ifs; 2523 2524 /* 2525 * Trigger automatic call to nat_extraflush() if the 2526 * table has reached capcity specified by hi watermark. 2527 */ 2528 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2529 ifs->ifs_nat_doflush = 1; 2530 2531 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2532 ifs->ifs_nat_stats.ns_memfail++; 2533 return NULL; 2534 } 2535 2536 move = 1; 2537 nflags = np->in_flags & flags; 2538 nflags &= NAT_FROMRULE; 2539 2540 ni.nai_np = np; 2541 ni.nai_nflags = nflags; 2542 ni.nai_flags = flags; 2543 2544 /* Give me a new nat */ 2545 KMALLOC(nat, nat_t *); 2546 if (nat == NULL) { 2547 ifs->ifs_nat_stats.ns_memfail++; 2548 /* 2549 * Try to automatically tune the max # of entries in the 2550 * table allowed to be less than what will cause kmem_alloc() 2551 * to fail and try to eliminate panics due to out of memory 2552 * conditions arising. 2553 */ 2554 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2555 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2556 printf("ipf_nattable_max reduced to %d\n", 2557 ifs->ifs_ipf_nattable_max); 2558 } 2559 return NULL; 2560 } 2561 2562 if (flags & IPN_TCPUDP) { 2563 tcp = fin->fin_dp; 2564 ni.nai_sport = htons(fin->fin_sport); 2565 ni.nai_dport = htons(fin->fin_dport); 2566 } else if (flags & IPN_ICMPQUERY) { 2567 /* 2568 * In the ICMP query NAT code, we translate the ICMP id fields 2569 * to make them unique. This is indepedent of the ICMP type 2570 * (e.g. in the unlikely event that a host sends an echo and 2571 * an tstamp request with the same id, both packets will have 2572 * their ip address/id field changed in the same way). 2573 */ 2574 /* The icmp_id field is used by the sender to identify the 2575 * process making the icmp request. (the receiver justs 2576 * copies it back in its response). So, it closely matches 2577 * the concept of source port. We overlay sport, so we can 2578 * maximally reuse the existing code. 2579 */ 2580 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2581 ni.nai_dport = ni.nai_sport; 2582 } 2583 2584 bzero((char *)nat, sizeof(*nat)); 2585 nat->nat_flags = flags; 2586 nat->nat_redir = np->in_redir; 2587 2588 if ((flags & NAT_SLAVE) == 0) { 2589 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2590 } 2591 2592 /* 2593 * Search the current table for a match. 2594 */ 2595 if (direction == NAT_OUTBOUND) { 2596 /* 2597 * We can now arrange to call this for the same connection 2598 * because ipf_nat_new doesn't protect the code path into 2599 * this function. 2600 */ 2601 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2602 fin->fin_src, fin->fin_dst); 2603 if (natl != NULL) { 2604 KFREE(nat); 2605 nat = natl; 2606 goto done; 2607 } 2608 2609 move = nat_newmap(fin, nat, &ni); 2610 if (move == -1) 2611 goto badnat; 2612 2613 np = ni.nai_np; 2614 } else { 2615 /* 2616 * NAT_INBOUND is used only for redirects rules 2617 */ 2618 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2619 fin->fin_src, fin->fin_dst); 2620 if (natl != NULL) { 2621 KFREE(nat); 2622 nat = natl; 2623 goto done; 2624 } 2625 2626 move = nat_newrdr(fin, nat, &ni); 2627 if (move == -1) 2628 goto badnat; 2629 2630 np = ni.nai_np; 2631 } 2632 2633 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2634 if (np->in_redir == NAT_REDIRECT) { 2635 nat_delrdr(np); 2636 nat_addrdr(np, ifs); 2637 } else if (np->in_redir == NAT_MAP) { 2638 nat_delnat(np); 2639 nat_addnat(np, ifs); 2640 } 2641 } 2642 2643 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2644 goto badnat; 2645 } 2646 2647 nat_calc_chksum_diffs(nat); 2648 2649 if (flags & SI_WILDP) 2650 ifs->ifs_nat_stats.ns_wilds++; 2651 goto done; 2652 badnat: 2653 ifs->ifs_nat_stats.ns_badnat++; 2654 if ((hm = nat->nat_hm) != NULL) 2655 fr_hostmapdel(&hm); 2656 KFREE(nat); 2657 nat = NULL; 2658 done: 2659 if ((flags & NAT_SLAVE) == 0) { 2660 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2661 } 2662 return nat; 2663 } 2664 2665 2666 /* ------------------------------------------------------------------------ */ 2667 /* Function: nat_finalise */ 2668 /* Returns: int - 0 == sucess, -1 == failure */ 2669 /* Parameters: fin(I) - pointer to packet information */ 2670 /* nat(I) - pointer to NAT entry */ 2671 /* ni(I) - pointer to structure with misc. information needed */ 2672 /* to create new NAT entry. */ 2673 /* Write Lock: ipf_nat */ 2674 /* */ 2675 /* This is the tail end of constructing a new NAT entry and is the same */ 2676 /* for both IPv4 and IPv6. */ 2677 /* ------------------------------------------------------------------------ */ 2678 /*ARGSUSED*/ 2679 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2680 fr_info_t *fin; 2681 nat_t *nat; 2682 natinfo_t *ni; 2683 tcphdr_t *tcp; 2684 nat_t **natsave; 2685 int direction; 2686 { 2687 frentry_t *fr; 2688 ipnat_t *np; 2689 ipf_stack_t *ifs = fin->fin_ifs; 2690 2691 np = ni->nai_np; 2692 2693 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2694 2695 #ifdef IPFILTER_SYNC 2696 if ((nat->nat_flags & SI_CLONE) == 0) 2697 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2698 #endif 2699 2700 nat->nat_me = natsave; 2701 nat->nat_dir = direction; 2702 nat->nat_ifps[0] = np->in_ifps[0]; 2703 nat->nat_ifps[1] = np->in_ifps[1]; 2704 nat->nat_ptr = np; 2705 nat->nat_p = fin->fin_p; 2706 nat->nat_v = fin->fin_v; 2707 nat->nat_mssclamp = np->in_mssclamp; 2708 fr = fin->fin_fr; 2709 nat->nat_fr = fr; 2710 2711 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2712 if (appr_new(fin, nat) == -1) 2713 return -1; 2714 2715 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2716 if (ifs->ifs_nat_logging) 2717 nat_log(nat, (u_int)np->in_redir, ifs); 2718 np->in_use++; 2719 if (fr != NULL) { 2720 MUTEX_ENTER(&fr->fr_lock); 2721 fr->fr_ref++; 2722 MUTEX_EXIT(&fr->fr_lock); 2723 } 2724 return 0; 2725 } 2726 2727 /* 2728 * nat_insert failed, so cleanup time... 2729 */ 2730 return -1; 2731 } 2732 2733 2734 /* ------------------------------------------------------------------------ */ 2735 /* Function: nat_insert */ 2736 /* Returns: int - 0 == sucess, -1 == failure */ 2737 /* Parameters: nat(I) - pointer to NAT structure */ 2738 /* rev(I) - flag indicating forward/reverse direction of packet */ 2739 /* Write Lock: ipf_nat */ 2740 /* */ 2741 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2742 /* list of active NAT entries. Adjust global counters when complete. */ 2743 /* ------------------------------------------------------------------------ */ 2744 int nat_insert(nat, rev, ifs) 2745 nat_t *nat; 2746 int rev; 2747 ipf_stack_t *ifs; 2748 { 2749 u_int hv1, hv2; 2750 nat_t **natp; 2751 2752 /* 2753 * Try and return an error as early as possible, so calculate the hash 2754 * entry numbers first and then proceed. 2755 */ 2756 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2757 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2758 0xffffffff); 2759 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2760 ifs->ifs_ipf_nattable_sz); 2761 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2762 0xffffffff); 2763 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2764 ifs->ifs_ipf_nattable_sz); 2765 } else { 2766 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2767 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2768 ifs->ifs_ipf_nattable_sz); 2769 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2770 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2771 ifs->ifs_ipf_nattable_sz); 2772 } 2773 2774 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2775 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2776 return -1; 2777 } 2778 2779 nat->nat_hv[0] = hv1; 2780 nat->nat_hv[1] = hv2; 2781 2782 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2783 2784 nat->nat_rev = rev; 2785 nat->nat_ref = 1; 2786 nat->nat_bytes[0] = 0; 2787 nat->nat_pkts[0] = 0; 2788 nat->nat_bytes[1] = 0; 2789 nat->nat_pkts[1] = 0; 2790 2791 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2792 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2793 2794 if (nat->nat_ifnames[1][0] !='\0') { 2795 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2796 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2797 } else { 2798 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2799 LIFNAMSIZ); 2800 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2801 nat->nat_ifps[1] = nat->nat_ifps[0]; 2802 } 2803 2804 nat->nat_next = ifs->ifs_nat_instances; 2805 nat->nat_pnext = &ifs->ifs_nat_instances; 2806 if (ifs->ifs_nat_instances) 2807 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2808 ifs->ifs_nat_instances = nat; 2809 2810 natp = &ifs->ifs_nat_table[0][hv1]; 2811 if (*natp) 2812 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2813 nat->nat_phnext[0] = natp; 2814 nat->nat_hnext[0] = *natp; 2815 *natp = nat; 2816 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2817 2818 natp = &ifs->ifs_nat_table[1][hv2]; 2819 if (*natp) 2820 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2821 nat->nat_phnext[1] = natp; 2822 nat->nat_hnext[1] = *natp; 2823 *natp = nat; 2824 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2825 2826 fr_setnatqueue(nat, rev, ifs); 2827 2828 ifs->ifs_nat_stats.ns_added++; 2829 ifs->ifs_nat_stats.ns_inuse++; 2830 return 0; 2831 } 2832 2833 2834 /* ------------------------------------------------------------------------ */ 2835 /* Function: nat_icmperrorlookup */ 2836 /* Returns: nat_t* - point to matching NAT structure */ 2837 /* Parameters: fin(I) - pointer to packet information */ 2838 /* dir(I) - direction of packet (in/out) */ 2839 /* */ 2840 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2841 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2842 /* the required length. */ 2843 /* ------------------------------------------------------------------------ */ 2844 nat_t *nat_icmperrorlookup(fin, dir) 2845 fr_info_t *fin; 2846 int dir; 2847 { 2848 int flags = 0, minlen; 2849 icmphdr_t *orgicmp; 2850 tcphdr_t *tcp = NULL; 2851 u_short data[2]; 2852 nat_t *nat; 2853 ip_t *oip; 2854 u_int p; 2855 2856 /* 2857 * Does it at least have the return (basic) IP header ? 2858 * Only a basic IP header (no options) should be with an ICMP error 2859 * header. Also, if it's not an error type, then return. 2860 */ 2861 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2862 return NULL; 2863 2864 /* 2865 * Check packet size 2866 */ 2867 oip = (ip_t *)((char *)fin->fin_dp + 8); 2868 minlen = IP_HL(oip) << 2; 2869 if ((minlen < sizeof(ip_t)) || 2870 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2871 return NULL; 2872 /* 2873 * Is the buffer big enough for all of it ? It's the size of the IP 2874 * header claimed in the encapsulated part which is of concern. It 2875 * may be too big to be in this buffer but not so big that it's 2876 * outside the ICMP packet, leading to TCP deref's causing problems. 2877 * This is possible because we don't know how big oip_hl is when we 2878 * do the pullup early in fr_check() and thus can't gaurantee it is 2879 * all here now. 2880 */ 2881 #ifdef _KERNEL 2882 { 2883 mb_t *m; 2884 2885 m = fin->fin_m; 2886 # if defined(MENTAT) 2887 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2888 return NULL; 2889 # else 2890 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2891 (char *)fin->fin_ip + M_LEN(m)) 2892 return NULL; 2893 # endif 2894 } 2895 #endif 2896 2897 if (fin->fin_daddr != oip->ip_src.s_addr) 2898 return NULL; 2899 2900 p = oip->ip_p; 2901 if (p == IPPROTO_TCP) 2902 flags = IPN_TCP; 2903 else if (p == IPPROTO_UDP) 2904 flags = IPN_UDP; 2905 else if (p == IPPROTO_ICMP) { 2906 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2907 2908 /* see if this is related to an ICMP query */ 2909 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2910 data[0] = fin->fin_data[0]; 2911 data[1] = fin->fin_data[1]; 2912 fin->fin_data[0] = 0; 2913 fin->fin_data[1] = orgicmp->icmp_id; 2914 2915 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2916 /* 2917 * NOTE : dir refers to the direction of the original 2918 * ip packet. By definition the icmp error 2919 * message flows in the opposite direction. 2920 */ 2921 if (dir == NAT_INBOUND) 2922 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2923 oip->ip_src); 2924 else 2925 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2926 oip->ip_src); 2927 fin->fin_data[0] = data[0]; 2928 fin->fin_data[1] = data[1]; 2929 return nat; 2930 } 2931 } 2932 2933 if (flags & IPN_TCPUDP) { 2934 minlen += 8; /* + 64bits of data to get ports */ 2935 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2936 return NULL; 2937 2938 data[0] = fin->fin_data[0]; 2939 data[1] = fin->fin_data[1]; 2940 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2941 fin->fin_data[0] = ntohs(tcp->th_dport); 2942 fin->fin_data[1] = ntohs(tcp->th_sport); 2943 2944 if (dir == NAT_INBOUND) { 2945 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2946 oip->ip_src); 2947 } else { 2948 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2949 oip->ip_src); 2950 } 2951 fin->fin_data[0] = data[0]; 2952 fin->fin_data[1] = data[1]; 2953 return nat; 2954 } 2955 if (dir == NAT_INBOUND) 2956 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2957 else 2958 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2959 } 2960 2961 2962 /* ------------------------------------------------------------------------ */ 2963 /* Function: nat_icmperror */ 2964 /* Returns: nat_t* - point to matching NAT structure */ 2965 /* Parameters: fin(I) - pointer to packet information */ 2966 /* nflags(I) - NAT flags for this packet */ 2967 /* dir(I) - direction of packet (in/out) */ 2968 /* */ 2969 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2970 /* session. This will correct both packet header data and checksums. */ 2971 /* */ 2972 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2973 /* a NAT'd ICMP packet gets correctly recognised. */ 2974 /* ------------------------------------------------------------------------ */ 2975 nat_t *nat_icmperror(fin, nflags, dir) 2976 fr_info_t *fin; 2977 u_int *nflags; 2978 int dir; 2979 { 2980 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2981 struct in_addr in; 2982 icmphdr_t *icmp, *orgicmp; 2983 int dlen; 2984 udphdr_t *udp; 2985 tcphdr_t *tcp; 2986 nat_t *nat; 2987 ip_t *oip; 2988 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2989 return NULL; 2990 2991 /* 2992 * nat_icmperrorlookup() looks up nat entry associated with the 2993 * offending IP packet and returns pointer to the entry, or NULL 2994 * if packet wasn't natted or for `defective' packets. 2995 */ 2996 2997 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 2998 return NULL; 2999 3000 sumd2 = 0; 3001 *nflags = IPN_ICMPERR; 3002 icmp = fin->fin_dp; 3003 oip = (ip_t *)&icmp->icmp_ip; 3004 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 3005 tcp = (tcphdr_t *)udp; 3006 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 3007 3008 /* 3009 * Need to adjust ICMP header to include the real IP#'s and 3010 * port #'s. There are three steps required. 3011 * 3012 * Step 1 3013 * Fix the IP addresses in the offending IP packet and update 3014 * ip header checksum to compensate for the change. 3015 * 3016 * No update needed here for icmp_cksum because the ICMP checksum 3017 * is calculated over the complete ICMP packet, which includes the 3018 * changed oip IP addresses and oip->ip_sum. These two changes 3019 * cancel each other out (if the delta for the IP address is x, 3020 * then the delta for ip_sum is minus x). 3021 */ 3022 3023 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 3024 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 3025 in = nat->nat_inip; 3026 oip->ip_src = in; 3027 } else { 3028 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 3029 in = nat->nat_outip; 3030 oip->ip_dst = in; 3031 } 3032 3033 sum2 = LONG_SUM(ntohl(in.s_addr)); 3034 CALC_SUMD(sum1, sum2, sumd); 3035 fix_datacksum(&oip->ip_sum, sumd); 3036 3037 /* 3038 * Step 2 3039 * Perform other adjustments based on protocol of offending packet. 3040 */ 3041 3042 switch (oip->ip_p) { 3043 case IPPROTO_TCP : 3044 case IPPROTO_UDP : 3045 3046 /* 3047 * For offending TCP/UDP IP packets, translate the ports 3048 * based on the NAT specification. 3049 * 3050 * Advance notice : Now it becomes complicated :-) 3051 * 3052 * Since the port and IP addresse fields are both part 3053 * of the TCP/UDP checksum of the offending IP packet, 3054 * we need to adjust that checksum as well. 3055 * 3056 * To further complicate things, the TCP/UDP checksum 3057 * may not be present. We must check to see if the 3058 * length of the data portion is big enough to hold 3059 * the checksum. In the UDP case, a test to determine 3060 * if the checksum is even set is also required. 3061 * 3062 * Any changes to an IP address, port or checksum within 3063 * the ICMP packet requires a change to icmp_cksum. 3064 * 3065 * Be extremely careful here ... The change is dependent 3066 * upon whether or not the TCP/UPD checksum is present. 3067 * 3068 * If TCP/UPD checksum is present, the icmp_cksum must 3069 * compensate for checksum modification resulting from 3070 * IP address change only. Port change and resulting 3071 * data checksum adjustments cancel each other out. 3072 * 3073 * If TCP/UDP checksum is not present, icmp_cksum must 3074 * compensate for port change only. The IP address 3075 * change does not modify anything else in this case. 3076 */ 3077 3078 psum1 = 0; 3079 psum2 = 0; 3080 psumd = 0; 3081 3082 if ((tcp->th_dport == nat->nat_oport) && 3083 (tcp->th_sport != nat->nat_inport)) { 3084 3085 /* 3086 * Translate the source port. 3087 */ 3088 3089 psum1 = ntohs(tcp->th_sport); 3090 psum2 = ntohs(nat->nat_inport); 3091 tcp->th_sport = nat->nat_inport; 3092 3093 } else if ((tcp->th_sport == nat->nat_oport) && 3094 (tcp->th_dport != nat->nat_outport)) { 3095 3096 /* 3097 * Translate the destination port. 3098 */ 3099 3100 psum1 = ntohs(tcp->th_dport); 3101 psum2 = ntohs(nat->nat_outport); 3102 tcp->th_dport = nat->nat_outport; 3103 } 3104 3105 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 3106 3107 /* 3108 * TCP checksum present. 3109 * 3110 * Adjust data checksum and icmp checksum to 3111 * compensate for any IP address change. 3112 */ 3113 3114 sum1 = ntohs(tcp->th_sum); 3115 fix_datacksum(&tcp->th_sum, sumd); 3116 sum2 = ntohs(tcp->th_sum); 3117 sumd2 = sumd << 1; 3118 CALC_SUMD(sum1, sum2, sumd); 3119 sumd2 += sumd; 3120 3121 /* 3122 * Also make data checksum adjustment to 3123 * compensate for any port change. 3124 */ 3125 3126 if (psum1 != psum2) { 3127 CALC_SUMD(psum1, psum2, psumd); 3128 fix_datacksum(&tcp->th_sum, psumd); 3129 } 3130 3131 } else if ((oip->ip_p == IPPROTO_UDP) && 3132 (dlen >= 8) && (udp->uh_sum != 0)) { 3133 3134 /* 3135 * The UDP checksum is present and set. 3136 * 3137 * Adjust data checksum and icmp checksum to 3138 * compensate for any IP address change. 3139 */ 3140 3141 sum1 = ntohs(udp->uh_sum); 3142 fix_datacksum(&udp->uh_sum, sumd); 3143 sum2 = ntohs(udp->uh_sum); 3144 sumd2 = sumd << 1; 3145 CALC_SUMD(sum1, sum2, sumd); 3146 sumd2 += sumd; 3147 3148 /* 3149 * Also make data checksum adjustment to 3150 * compensate for any port change. 3151 */ 3152 3153 if (psum1 != psum2) { 3154 CALC_SUMD(psum1, psum2, psumd); 3155 fix_datacksum(&udp->uh_sum, psumd); 3156 } 3157 3158 } else { 3159 3160 /* 3161 * Data checksum was not present. 3162 * 3163 * Compensate for any port change. 3164 */ 3165 3166 CALC_SUMD(psum2, psum1, psumd); 3167 sumd2 += psumd; 3168 } 3169 break; 3170 3171 case IPPROTO_ICMP : 3172 3173 orgicmp = (icmphdr_t *)udp; 3174 3175 if ((nat->nat_dir == NAT_OUTBOUND) && 3176 (orgicmp->icmp_id != nat->nat_inport) && 3177 (dlen >= 8)) { 3178 3179 /* 3180 * Fix ICMP checksum (of the offening ICMP 3181 * query packet) to compensate the change 3182 * in the ICMP id of the offending ICMP 3183 * packet. 3184 * 3185 * Since you modify orgicmp->icmp_id with 3186 * a delta (say x) and you compensate that 3187 * in origicmp->icmp_cksum with a delta 3188 * minus x, you don't have to adjust the 3189 * overall icmp->icmp_cksum 3190 */ 3191 3192 sum1 = ntohs(orgicmp->icmp_id); 3193 sum2 = ntohs(nat->nat_inport); 3194 CALC_SUMD(sum1, sum2, sumd); 3195 orgicmp->icmp_id = nat->nat_inport; 3196 fix_datacksum(&orgicmp->icmp_cksum, sumd); 3197 3198 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 3199 3200 break; 3201 3202 default : 3203 3204 break; 3205 3206 } /* switch (oip->ip_p) */ 3207 3208 /* 3209 * Step 3 3210 * Make the adjustments to icmp checksum. 3211 */ 3212 3213 if (sumd2 != 0) { 3214 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3215 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3216 fix_incksum(&icmp->icmp_cksum, sumd2); 3217 } 3218 return nat; 3219 } 3220 3221 3222 /* 3223 * NB: these lookups don't lock access to the list, it assumed that it has 3224 * already been done! 3225 */ 3226 3227 /* ------------------------------------------------------------------------ */ 3228 /* Function: nat_inlookup */ 3229 /* Returns: nat_t* - NULL == no match, */ 3230 /* else pointer to matching NAT entry */ 3231 /* Parameters: fin(I) - pointer to packet information */ 3232 /* flags(I) - NAT flags for this packet */ 3233 /* p(I) - protocol for this packet */ 3234 /* src(I) - source IP address */ 3235 /* mapdst(I) - destination IP address */ 3236 /* */ 3237 /* Lookup a nat entry based on the mapped destination ip address/port and */ 3238 /* real source address/port. We use this lookup when receiving a packet, */ 3239 /* we're looking for a table entry, based on the destination address. */ 3240 /* */ 3241 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3242 /* */ 3243 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3244 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3245 /* */ 3246 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3247 /* the packet is of said protocol */ 3248 /* ------------------------------------------------------------------------ */ 3249 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 3250 fr_info_t *fin; 3251 u_int flags, p; 3252 struct in_addr src , mapdst; 3253 { 3254 u_short sport, dport; 3255 ipnat_t *ipn; 3256 u_int sflags; 3257 nat_t *nat; 3258 int nflags; 3259 u_32_t dst; 3260 void *ifp; 3261 u_int hv; 3262 ipf_stack_t *ifs = fin->fin_ifs; 3263 3264 if (fin != NULL) 3265 ifp = fin->fin_ifp; 3266 else 3267 ifp = NULL; 3268 sport = 0; 3269 dport = 0; 3270 dst = mapdst.s_addr; 3271 sflags = flags & NAT_TCPUDPICMP; 3272 3273 switch (p) 3274 { 3275 case IPPROTO_TCP : 3276 case IPPROTO_UDP : 3277 sport = htons(fin->fin_data[0]); 3278 dport = htons(fin->fin_data[1]); 3279 break; 3280 case IPPROTO_ICMP : 3281 if (flags & IPN_ICMPERR) 3282 sport = fin->fin_data[1]; 3283 else 3284 dport = fin->fin_data[1]; 3285 break; 3286 default : 3287 break; 3288 } 3289 3290 3291 if ((flags & SI_WILDP) != 0) 3292 goto find_in_wild_ports; 3293 3294 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3295 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3296 nat = ifs->ifs_nat_table[1][hv]; 3297 for (; nat; nat = nat->nat_hnext[1]) { 3298 if (nat->nat_v != 4) 3299 continue; 3300 3301 if (nat->nat_ifps[0] != NULL) { 3302 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3303 continue; 3304 } else if (ifp != NULL) 3305 nat->nat_ifps[0] = ifp; 3306 3307 nflags = nat->nat_flags; 3308 3309 if (nat->nat_oip.s_addr == src.s_addr && 3310 nat->nat_outip.s_addr == dst && 3311 (((p == 0) && 3312 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3313 || (p == nat->nat_p))) { 3314 switch (p) 3315 { 3316 #if 0 3317 case IPPROTO_GRE : 3318 if (nat->nat_call[1] != fin->fin_data[0]) 3319 continue; 3320 break; 3321 #endif 3322 case IPPROTO_ICMP : 3323 if ((flags & IPN_ICMPERR) != 0) { 3324 if (nat->nat_outport != sport) 3325 continue; 3326 } else { 3327 if (nat->nat_outport != dport) 3328 continue; 3329 } 3330 break; 3331 case IPPROTO_TCP : 3332 case IPPROTO_UDP : 3333 if (nat->nat_oport != sport) 3334 continue; 3335 if (nat->nat_outport != dport) 3336 continue; 3337 break; 3338 default : 3339 break; 3340 } 3341 3342 ipn = nat->nat_ptr; 3343 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3344 if (appr_match(fin, nat) != 0) 3345 continue; 3346 return nat; 3347 } 3348 } 3349 3350 /* 3351 * So if we didn't find it but there are wildcard members in the hash 3352 * table, go back and look for them. We do this search and update here 3353 * because it is modifying the NAT table and we want to do this only 3354 * for the first packet that matches. The exception, of course, is 3355 * for "dummy" (FI_IGNORE) lookups. 3356 */ 3357 find_in_wild_ports: 3358 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3359 return NULL; 3360 if (ifs->ifs_nat_stats.ns_wilds == 0) 3361 return NULL; 3362 3363 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3364 3365 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3366 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3367 3368 WRITE_ENTER(&ifs->ifs_ipf_nat); 3369 3370 nat = ifs->ifs_nat_table[1][hv]; 3371 for (; nat; nat = nat->nat_hnext[1]) { 3372 if (nat->nat_v != 4) 3373 continue; 3374 3375 if (nat->nat_ifps[0] != NULL) { 3376 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3377 continue; 3378 } else if (ifp != NULL) 3379 nat->nat_ifps[0] = ifp; 3380 3381 if (nat->nat_p != fin->fin_p) 3382 continue; 3383 if (nat->nat_oip.s_addr != src.s_addr || 3384 nat->nat_outip.s_addr != dst) 3385 continue; 3386 3387 nflags = nat->nat_flags; 3388 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3389 continue; 3390 3391 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3392 NAT_INBOUND) == 1) { 3393 if ((fin->fin_flx & FI_IGNORE) != 0) 3394 break; 3395 if ((nflags & SI_CLONE) != 0) { 3396 nat = fr_natclone(fin, nat); 3397 if (nat == NULL) 3398 break; 3399 } else { 3400 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3401 ifs->ifs_nat_stats.ns_wilds--; 3402 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3403 } 3404 nat->nat_oport = sport; 3405 nat->nat_outport = dport; 3406 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3407 nat_tabmove(nat, ifs); 3408 break; 3409 } 3410 } 3411 3412 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3413 3414 return nat; 3415 } 3416 3417 3418 /* ------------------------------------------------------------------------ */ 3419 /* Function: nat_tabmove */ 3420 /* Returns: Nil */ 3421 /* Parameters: nat(I) - pointer to NAT structure */ 3422 /* Write Lock: ipf_nat */ 3423 /* */ 3424 /* This function is only called for TCP/UDP NAT table entries where the */ 3425 /* original was placed in the table without hashing on the ports and we now */ 3426 /* want to include hashing on port numbers. */ 3427 /* ------------------------------------------------------------------------ */ 3428 static void nat_tabmove(nat, ifs) 3429 nat_t *nat; 3430 ipf_stack_t *ifs; 3431 { 3432 nat_t **natp; 3433 u_int hv; 3434 3435 if (nat->nat_flags & SI_CLONE) 3436 return; 3437 3438 /* 3439 * Remove the NAT entry from the old location 3440 */ 3441 if (nat->nat_hnext[0]) 3442 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3443 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3444 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3445 3446 if (nat->nat_hnext[1]) 3447 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3448 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3449 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3450 3451 /* 3452 * Add into the NAT table in the new position 3453 */ 3454 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3455 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3456 ifs->ifs_ipf_nattable_sz); 3457 nat->nat_hv[0] = hv; 3458 natp = &ifs->ifs_nat_table[0][hv]; 3459 if (*natp) 3460 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3461 nat->nat_phnext[0] = natp; 3462 nat->nat_hnext[0] = *natp; 3463 *natp = nat; 3464 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3465 3466 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3467 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3468 ifs->ifs_ipf_nattable_sz); 3469 nat->nat_hv[1] = hv; 3470 natp = &ifs->ifs_nat_table[1][hv]; 3471 if (*natp) 3472 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3473 nat->nat_phnext[1] = natp; 3474 nat->nat_hnext[1] = *natp; 3475 *natp = nat; 3476 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3477 } 3478 3479 3480 /* ------------------------------------------------------------------------ */ 3481 /* Function: nat_outlookup */ 3482 /* Returns: nat_t* - NULL == no match, */ 3483 /* else pointer to matching NAT entry */ 3484 /* Parameters: fin(I) - pointer to packet information */ 3485 /* flags(I) - NAT flags for this packet */ 3486 /* p(I) - protocol for this packet */ 3487 /* src(I) - source IP address */ 3488 /* dst(I) - destination IP address */ 3489 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3490 /* */ 3491 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3492 /* destination address/port. We use this lookup when sending a packet out, */ 3493 /* we're looking for a table entry, based on the source address. */ 3494 /* */ 3495 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3496 /* */ 3497 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3498 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3499 /* */ 3500 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3501 /* the packet is of said protocol */ 3502 /* ------------------------------------------------------------------------ */ 3503 nat_t *nat_outlookup(fin, flags, p, src, dst) 3504 fr_info_t *fin; 3505 u_int flags, p; 3506 struct in_addr src , dst; 3507 { 3508 u_short sport, dport; 3509 u_int sflags; 3510 ipnat_t *ipn; 3511 u_32_t srcip; 3512 nat_t *nat; 3513 int nflags; 3514 void *ifp; 3515 u_int hv; 3516 ipf_stack_t *ifs = fin->fin_ifs; 3517 3518 ifp = fin->fin_ifp; 3519 3520 srcip = src.s_addr; 3521 sflags = flags & IPN_TCPUDPICMP; 3522 sport = 0; 3523 dport = 0; 3524 3525 switch (p) 3526 { 3527 case IPPROTO_TCP : 3528 case IPPROTO_UDP : 3529 sport = htons(fin->fin_data[0]); 3530 dport = htons(fin->fin_data[1]); 3531 break; 3532 case IPPROTO_ICMP : 3533 if (flags & IPN_ICMPERR) 3534 sport = fin->fin_data[1]; 3535 else 3536 dport = fin->fin_data[1]; 3537 break; 3538 default : 3539 break; 3540 } 3541 3542 if ((flags & SI_WILDP) != 0) 3543 goto find_out_wild_ports; 3544 3545 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3546 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3547 nat = ifs->ifs_nat_table[0][hv]; 3548 for (; nat; nat = nat->nat_hnext[0]) { 3549 if (nat->nat_v != 4) 3550 continue; 3551 3552 if (nat->nat_ifps[1] != NULL) { 3553 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3554 continue; 3555 } else if (ifp != NULL) 3556 nat->nat_ifps[1] = ifp; 3557 3558 nflags = nat->nat_flags; 3559 3560 if (nat->nat_inip.s_addr == srcip && 3561 nat->nat_oip.s_addr == dst.s_addr && 3562 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3563 || (p == nat->nat_p))) { 3564 switch (p) 3565 { 3566 #if 0 3567 case IPPROTO_GRE : 3568 if (nat->nat_call[1] != fin->fin_data[0]) 3569 continue; 3570 break; 3571 #endif 3572 case IPPROTO_TCP : 3573 case IPPROTO_UDP : 3574 if (nat->nat_oport != dport) 3575 continue; 3576 if (nat->nat_inport != sport) 3577 continue; 3578 break; 3579 default : 3580 break; 3581 } 3582 3583 ipn = nat->nat_ptr; 3584 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3585 if (appr_match(fin, nat) != 0) 3586 continue; 3587 return nat; 3588 } 3589 } 3590 3591 /* 3592 * So if we didn't find it but there are wildcard members in the hash 3593 * table, go back and look for them. We do this search and update here 3594 * because it is modifying the NAT table and we want to do this only 3595 * for the first packet that matches. The exception, of course, is 3596 * for "dummy" (FI_IGNORE) lookups. 3597 */ 3598 find_out_wild_ports: 3599 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3600 return NULL; 3601 if (ifs->ifs_nat_stats.ns_wilds == 0) 3602 return NULL; 3603 3604 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3605 3606 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3607 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3608 3609 WRITE_ENTER(&ifs->ifs_ipf_nat); 3610 3611 nat = ifs->ifs_nat_table[0][hv]; 3612 for (; nat; nat = nat->nat_hnext[0]) { 3613 if (nat->nat_v != 4) 3614 continue; 3615 3616 if (nat->nat_ifps[1] != NULL) { 3617 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3618 continue; 3619 } else if (ifp != NULL) 3620 nat->nat_ifps[1] = ifp; 3621 3622 if (nat->nat_p != fin->fin_p) 3623 continue; 3624 if ((nat->nat_inip.s_addr != srcip) || 3625 (nat->nat_oip.s_addr != dst.s_addr)) 3626 continue; 3627 3628 nflags = nat->nat_flags; 3629 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3630 continue; 3631 3632 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3633 NAT_OUTBOUND) == 1) { 3634 if ((fin->fin_flx & FI_IGNORE) != 0) 3635 break; 3636 if ((nflags & SI_CLONE) != 0) { 3637 nat = fr_natclone(fin, nat); 3638 if (nat == NULL) 3639 break; 3640 } else { 3641 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3642 ifs->ifs_nat_stats.ns_wilds--; 3643 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3644 } 3645 nat->nat_inport = sport; 3646 nat->nat_oport = dport; 3647 if (nat->nat_outport == 0) 3648 nat->nat_outport = sport; 3649 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3650 nat_tabmove(nat, ifs); 3651 break; 3652 } 3653 } 3654 3655 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3656 3657 return nat; 3658 } 3659 3660 3661 /* ------------------------------------------------------------------------ */ 3662 /* Function: nat_lookupredir */ 3663 /* Returns: nat_t* - NULL == no match, */ 3664 /* else pointer to matching NAT entry */ 3665 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3666 /* entry for. */ 3667 /* */ 3668 /* Lookup the NAT tables to search for a matching redirect */ 3669 /* ------------------------------------------------------------------------ */ 3670 nat_t *nat_lookupredir(np, ifs) 3671 natlookup_t *np; 3672 ipf_stack_t *ifs; 3673 { 3674 fr_info_t fi; 3675 nat_t *nat; 3676 3677 bzero((char *)&fi, sizeof(fi)); 3678 if (np->nl_flags & IPN_IN) { 3679 fi.fin_data[0] = ntohs(np->nl_realport); 3680 fi.fin_data[1] = ntohs(np->nl_outport); 3681 } else { 3682 fi.fin_data[0] = ntohs(np->nl_inport); 3683 fi.fin_data[1] = ntohs(np->nl_outport); 3684 } 3685 if (np->nl_flags & IPN_TCP) 3686 fi.fin_p = IPPROTO_TCP; 3687 else if (np->nl_flags & IPN_UDP) 3688 fi.fin_p = IPPROTO_UDP; 3689 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3690 fi.fin_p = IPPROTO_ICMP; 3691 3692 fi.fin_ifs = ifs; 3693 /* 3694 * We can do two sorts of lookups: 3695 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3696 * - default: we have the `in' and `out' address, look for `real'. 3697 */ 3698 if (np->nl_flags & IPN_IN) { 3699 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3700 np->nl_realip, np->nl_outip))) { 3701 np->nl_inip = nat->nat_inip; 3702 np->nl_inport = nat->nat_inport; 3703 } 3704 } else { 3705 /* 3706 * If nl_inip is non null, this is a lookup based on the real 3707 * ip address. Else, we use the fake. 3708 */ 3709 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3710 np->nl_inip, np->nl_outip))) { 3711 3712 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3713 fr_info_t fin; 3714 bzero((char *)&fin, sizeof(fin)); 3715 fin.fin_p = nat->nat_p; 3716 fin.fin_data[0] = ntohs(nat->nat_outport); 3717 fin.fin_data[1] = ntohs(nat->nat_oport); 3718 fin.fin_ifs = ifs; 3719 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3720 nat->nat_outip, 3721 nat->nat_oip) != NULL) { 3722 np->nl_flags &= ~IPN_FINDFORWARD; 3723 } 3724 } 3725 3726 np->nl_realip = nat->nat_outip; 3727 np->nl_realport = nat->nat_outport; 3728 } 3729 } 3730 3731 return nat; 3732 } 3733 3734 3735 /* ------------------------------------------------------------------------ */ 3736 /* Function: nat_match */ 3737 /* Returns: int - 0 == no match, 1 == match */ 3738 /* Parameters: fin(I) - pointer to packet information */ 3739 /* np(I) - pointer to NAT rule */ 3740 /* */ 3741 /* Pull the matching of a packet against a NAT rule out of that complex */ 3742 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3743 /* ------------------------------------------------------------------------ */ 3744 static int nat_match(fin, np) 3745 fr_info_t *fin; 3746 ipnat_t *np; 3747 { 3748 frtuc_t *ft; 3749 3750 if (fin->fin_v != 4) 3751 return 0; 3752 3753 if (np->in_p && fin->fin_p != np->in_p) 3754 return 0; 3755 3756 if (fin->fin_out) { 3757 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3758 return 0; 3759 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3760 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3761 return 0; 3762 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3763 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3764 return 0; 3765 } else { 3766 if (!(np->in_redir & NAT_REDIRECT)) 3767 return 0; 3768 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3769 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3770 return 0; 3771 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3772 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3773 return 0; 3774 } 3775 3776 ft = &np->in_tuc; 3777 if (!(fin->fin_flx & FI_TCPUDP) || 3778 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3779 if (ft->ftu_scmp || ft->ftu_dcmp) 3780 return 0; 3781 return 1; 3782 } 3783 3784 return fr_tcpudpchk(fin, ft); 3785 } 3786 3787 3788 /* ------------------------------------------------------------------------ */ 3789 /* Function: nat_update */ 3790 /* Returns: Nil */ 3791 /* Parameters: fin(I) - pointer to packet information */ 3792 /* nat(I) - pointer to NAT structure */ 3793 /* np(I) - pointer to NAT rule */ 3794 /* Locks: nat_lock */ 3795 /* */ 3796 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3797 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3798 /* ------------------------------------------------------------------------ */ 3799 void nat_update(fin, nat, np) 3800 fr_info_t *fin; 3801 nat_t *nat; 3802 ipnat_t *np; 3803 { 3804 ipftq_t *ifq, *ifq2; 3805 ipftqent_t *tqe; 3806 ipf_stack_t *ifs = fin->fin_ifs; 3807 3808 tqe = &nat->nat_tqe; 3809 ifq = tqe->tqe_ifq; 3810 3811 /* 3812 * We allow over-riding of NAT timeouts from NAT rules, even for 3813 * TCP, however, if it is TCP and there is no rule timeout set, 3814 * then do not update the timeout here. 3815 */ 3816 if (np != NULL) 3817 ifq2 = np->in_tqehead[fin->fin_rev]; 3818 else 3819 ifq2 = NULL; 3820 3821 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3822 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3823 } else { 3824 if (ifq2 == NULL) { 3825 if (nat->nat_p == IPPROTO_UDP) 3826 ifq2 = &ifs->ifs_nat_udptq; 3827 else if (nat->nat_p == IPPROTO_ICMP) 3828 ifq2 = &ifs->ifs_nat_icmptq; 3829 else 3830 ifq2 = &ifs->ifs_nat_iptq; 3831 } 3832 3833 fr_movequeue(tqe, ifq, ifq2, ifs); 3834 } 3835 } 3836 3837 3838 /* ------------------------------------------------------------------------ */ 3839 /* Function: fr_checknatout */ 3840 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3841 /* 0 == no packet translation occurred, */ 3842 /* 1 == packet was successfully translated. */ 3843 /* Parameters: fin(I) - pointer to packet information */ 3844 /* passp(I) - pointer to filtering result flags */ 3845 /* */ 3846 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3847 /* first checked to see if they match an existing entry (if an error), */ 3848 /* otherwise a search of the current NAT table is made. If neither results */ 3849 /* in a match then a search for a matching NAT rule is made. Create a new */ 3850 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3851 /* packet header(s) as required. */ 3852 /* ------------------------------------------------------------------------ */ 3853 int fr_checknatout(fin, passp) 3854 fr_info_t *fin; 3855 u_32_t *passp; 3856 { 3857 ipnat_t *np = NULL, *npnext; 3858 struct ifnet *ifp, *sifp; 3859 icmphdr_t *icmp = NULL; 3860 tcphdr_t *tcp = NULL; 3861 int rval, natfailed; 3862 u_int nflags = 0; 3863 u_32_t ipa, iph; 3864 int natadd = 1; 3865 frentry_t *fr; 3866 nat_t *nat; 3867 ipf_stack_t *ifs = fin->fin_ifs; 3868 3869 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3870 return 0; 3871 3872 natfailed = 0; 3873 fr = fin->fin_fr; 3874 sifp = fin->fin_ifp; 3875 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3876 fr->fr_tifs[fin->fin_rev].fd_ifp && 3877 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3878 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3879 ifp = fin->fin_ifp; 3880 3881 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3882 switch (fin->fin_p) 3883 { 3884 case IPPROTO_TCP : 3885 nflags = IPN_TCP; 3886 break; 3887 case IPPROTO_UDP : 3888 nflags = IPN_UDP; 3889 break; 3890 case IPPROTO_ICMP : 3891 icmp = fin->fin_dp; 3892 3893 /* 3894 * This is an incoming packet, so the destination is 3895 * the icmp_id and the source port equals 0 3896 */ 3897 if (nat_icmpquerytype4(icmp->icmp_type)) 3898 nflags = IPN_ICMPQUERY; 3899 break; 3900 default : 3901 break; 3902 } 3903 3904 if ((nflags & IPN_TCPUDP)) 3905 tcp = fin->fin_dp; 3906 } 3907 3908 ipa = fin->fin_saddr; 3909 3910 READ_ENTER(&ifs->ifs_ipf_nat); 3911 3912 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3913 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3914 /*EMPTY*/; 3915 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3916 natadd = 0; 3917 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3918 fin->fin_src, fin->fin_dst))) { 3919 nflags = nat->nat_flags; 3920 } else { 3921 u_32_t hv, msk, nmsk; 3922 3923 /* 3924 * If there is no current entry in the nat table for this IP#, 3925 * create one for it (if there is a matching rule). 3926 */ 3927 msk = 0xffffffff; 3928 nmsk = ifs->ifs_nat_masks; 3929 maskloop: 3930 iph = ipa & htonl(msk); 3931 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3932 for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) { 3933 npnext = np->in_mnext; 3934 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3935 continue; 3936 if (np->in_v != fin->fin_v) 3937 continue; 3938 if (np->in_p && (np->in_p != fin->fin_p)) 3939 continue; 3940 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3941 continue; 3942 if (np->in_flags & IPN_FILTER) { 3943 if (!nat_match(fin, np)) 3944 continue; 3945 } else if ((ipa & np->in_inmsk) != np->in_inip) 3946 continue; 3947 3948 if ((fr != NULL) && 3949 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3950 continue; 3951 3952 if (*np->in_plabel != '\0') { 3953 if (((np->in_flags & IPN_FILTER) == 0) && 3954 (np->in_dport != tcp->th_dport)) 3955 continue; 3956 if (appr_ok(fin, tcp, np) == 0) 3957 continue; 3958 } 3959 3960 ATOMIC_INC32(np->in_use); 3961 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3962 WRITE_ENTER(&ifs->ifs_ipf_nat); 3963 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND); 3964 if (nat != NULL) { 3965 np->in_use--; 3966 np->in_hits++; 3967 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3968 break; 3969 } 3970 natfailed = -1; 3971 npnext = np->in_mnext; 3972 fr_ipnatderef(&np, ifs); 3973 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3974 } 3975 if ((np == NULL) && (nmsk != 0)) { 3976 while (nmsk) { 3977 msk <<= 1; 3978 if (nmsk & 0x80000000) 3979 break; 3980 nmsk <<= 1; 3981 } 3982 if (nmsk != 0) { 3983 nmsk <<= 1; 3984 goto maskloop; 3985 } 3986 } 3987 } 3988 3989 if (nat != NULL) { 3990 rval = fr_natout(fin, nat, natadd, nflags); 3991 if (rval == 1) { 3992 MUTEX_ENTER(&nat->nat_lock); 3993 nat_update(fin, nat, nat->nat_ptr); 3994 nat->nat_bytes[1] += fin->fin_plen; 3995 nat->nat_pkts[1]++; 3996 nat->nat_ref++; 3997 MUTEX_EXIT(&nat->nat_lock); 3998 nat->nat_touched = ifs->ifs_fr_ticks; 3999 fin->fin_nat = nat; 4000 } 4001 } else 4002 rval = natfailed; 4003 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4004 4005 if (rval == -1) { 4006 if (passp != NULL) 4007 *passp = FR_BLOCK; 4008 fin->fin_flx |= FI_BADNAT; 4009 } 4010 fin->fin_ifp = sifp; 4011 return rval; 4012 } 4013 4014 /* ------------------------------------------------------------------------ */ 4015 /* Function: fr_natout */ 4016 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4017 /* 1 == packet was successfully translated. */ 4018 /* Parameters: fin(I) - pointer to packet information */ 4019 /* nat(I) - pointer to NAT structure */ 4020 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4021 /* nflags(I) - NAT flags set for this packet */ 4022 /* */ 4023 /* Translate a packet coming "out" on an interface. */ 4024 /* ------------------------------------------------------------------------ */ 4025 int fr_natout(fin, nat, natadd, nflags) 4026 fr_info_t *fin; 4027 nat_t *nat; 4028 int natadd; 4029 u_32_t nflags; 4030 { 4031 icmphdr_t *icmp; 4032 u_short *csump; 4033 u_32_t sumd; 4034 tcphdr_t *tcp; 4035 ipnat_t *np; 4036 int i; 4037 ipf_stack_t *ifs = fin->fin_ifs; 4038 4039 if (fin->fin_v == 6) { 4040 #ifdef USE_INET6 4041 return fr_nat6out(fin, nat, natadd, nflags); 4042 #else 4043 return NULL; 4044 #endif 4045 } 4046 4047 #if SOLARIS && defined(_KERNEL) 4048 net_data_t net_data_p = ifs->ifs_ipf_ipv4; 4049 #endif 4050 4051 tcp = NULL; 4052 icmp = NULL; 4053 csump = NULL; 4054 np = nat->nat_ptr; 4055 4056 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4057 (void) fr_nat_newfrag(fin, 0, nat); 4058 4059 /* 4060 * Fix up checksums, not by recalculating them, but 4061 * simply computing adjustments. 4062 * This is only done for STREAMS based IP implementations where the 4063 * checksum has already been calculated by IP. In all other cases, 4064 * IPFilter is called before the checksum needs calculating so there 4065 * is no call to modify whatever is in the header now. 4066 */ 4067 ASSERT(fin->fin_m != NULL); 4068 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 4069 if (nflags == IPN_ICMPERR) { 4070 u_32_t s1, s2; 4071 4072 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 4073 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 4074 CALC_SUMD(s1, s2, sumd); 4075 4076 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 4077 } 4078 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4079 defined(linux) || defined(BRIDGE_IPF) 4080 else { 4081 /* 4082 * Strictly speaking, this isn't necessary on BSD 4083 * kernels because they do checksum calculation after 4084 * this code has run BUT if ipfilter is being used 4085 * to do NAT as a bridge, that code doesn't exist. 4086 */ 4087 if (nat->nat_dir == NAT_OUTBOUND) 4088 fix_outcksum(&fin->fin_ip->ip_sum, 4089 nat->nat_ipsumd); 4090 else 4091 fix_incksum(&fin->fin_ip->ip_sum, 4092 nat->nat_ipsumd); 4093 } 4094 #endif 4095 } 4096 4097 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4098 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 4099 tcp = fin->fin_dp; 4100 4101 tcp->th_sport = nat->nat_outport; 4102 fin->fin_data[0] = ntohs(nat->nat_outport); 4103 } 4104 4105 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 4106 icmp = fin->fin_dp; 4107 icmp->icmp_id = nat->nat_outport; 4108 } 4109 4110 csump = nat_proto(fin, nat, nflags); 4111 } 4112 4113 fin->fin_ip->ip_src = nat->nat_outip; 4114 4115 /* 4116 * The above comments do not hold for layer 4 (or higher) checksums... 4117 */ 4118 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 4119 if (nflags & IPN_TCPUDP && 4120 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 4121 sumd = nat->nat_sumd[1]; 4122 else 4123 sumd = nat->nat_sumd[0]; 4124 4125 if (nat->nat_dir == NAT_OUTBOUND) 4126 fix_outcksum(csump, sumd); 4127 else 4128 fix_incksum(csump, sumd); 4129 } 4130 #ifdef IPFILTER_SYNC 4131 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4132 #endif 4133 /* ------------------------------------------------------------- */ 4134 /* A few quick notes: */ 4135 /* Following are test conditions prior to calling the */ 4136 /* appr_check routine. */ 4137 /* */ 4138 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4139 /* with a redirect rule, we attempt to match the packet's */ 4140 /* source port against in_dport, otherwise we'd compare the */ 4141 /* packet's destination. */ 4142 /* ------------------------------------------------------------- */ 4143 if ((np != NULL) && (np->in_apr != NULL)) { 4144 i = appr_check(fin, nat); 4145 if (i == 0) 4146 i = 1; 4147 } else 4148 i = 1; 4149 ifs->ifs_nat_stats.ns_mapped[1]++; 4150 fin->fin_flx |= FI_NATED; 4151 return i; 4152 } 4153 4154 4155 /* ------------------------------------------------------------------------ */ 4156 /* Function: fr_checknatin */ 4157 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4158 /* 0 == no packet translation occurred, */ 4159 /* 1 == packet was successfully translated. */ 4160 /* Parameters: fin(I) - pointer to packet information */ 4161 /* passp(I) - pointer to filtering result flags */ 4162 /* */ 4163 /* Check to see if an incoming packet should be changed. ICMP packets are */ 4164 /* first checked to see if they match an existing entry (if an error), */ 4165 /* otherwise a search of the current NAT table is made. If neither results */ 4166 /* in a match then a search for a matching NAT rule is made. Create a new */ 4167 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 4168 /* packet header(s) as required. */ 4169 /* ------------------------------------------------------------------------ */ 4170 int fr_checknatin(fin, passp) 4171 fr_info_t *fin; 4172 u_32_t *passp; 4173 { 4174 u_int nflags, natadd; 4175 ipnat_t *np, *npnext; 4176 int rval, natfailed; 4177 struct ifnet *ifp; 4178 struct in_addr in; 4179 icmphdr_t *icmp; 4180 tcphdr_t *tcp; 4181 u_short dport; 4182 nat_t *nat; 4183 u_32_t iph; 4184 ipf_stack_t *ifs = fin->fin_ifs; 4185 4186 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 4187 return 0; 4188 4189 tcp = NULL; 4190 icmp = NULL; 4191 dport = 0; 4192 natadd = 1; 4193 nflags = 0; 4194 natfailed = 0; 4195 ifp = fin->fin_ifp; 4196 4197 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4198 switch (fin->fin_p) 4199 { 4200 case IPPROTO_TCP : 4201 nflags = IPN_TCP; 4202 break; 4203 case IPPROTO_UDP : 4204 nflags = IPN_UDP; 4205 break; 4206 case IPPROTO_ICMP : 4207 icmp = fin->fin_dp; 4208 4209 /* 4210 * This is an incoming packet, so the destination is 4211 * the icmp_id and the source port equals 0 4212 */ 4213 if (nat_icmpquerytype4(icmp->icmp_type)) { 4214 nflags = IPN_ICMPQUERY; 4215 dport = icmp->icmp_id; 4216 } break; 4217 default : 4218 break; 4219 } 4220 4221 if ((nflags & IPN_TCPUDP)) { 4222 tcp = fin->fin_dp; 4223 dport = tcp->th_dport; 4224 } 4225 } 4226 4227 in = fin->fin_dst; 4228 4229 READ_ENTER(&ifs->ifs_ipf_nat); 4230 4231 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 4232 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 4233 /*EMPTY*/; 4234 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 4235 natadd = 0; 4236 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 4237 fin->fin_src, in))) { 4238 nflags = nat->nat_flags; 4239 } else { 4240 u_32_t hv, msk, rmsk; 4241 4242 rmsk = ifs->ifs_rdr_masks; 4243 msk = 0xffffffff; 4244 /* 4245 * If there is no current entry in the nat table for this IP#, 4246 * create one for it (if there is a matching rule). 4247 */ 4248 maskloop: 4249 iph = in.s_addr & htonl(msk); 4250 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 4251 for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) { 4252 npnext = np->in_rnext; 4253 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 4254 continue; 4255 if (np->in_v != fin->fin_v) 4256 continue; 4257 if (np->in_p && (np->in_p != fin->fin_p)) 4258 continue; 4259 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4260 continue; 4261 if (np->in_flags & IPN_FILTER) { 4262 if (!nat_match(fin, np)) 4263 continue; 4264 } else { 4265 if ((in.s_addr & np->in_outmsk) != np->in_outip) 4266 continue; 4267 if (np->in_pmin && 4268 ((ntohs(np->in_pmax) < ntohs(dport)) || 4269 (ntohs(dport) < ntohs(np->in_pmin)))) 4270 continue; 4271 } 4272 4273 if (*np->in_plabel != '\0') { 4274 if (!appr_ok(fin, tcp, np)) { 4275 continue; 4276 } 4277 } 4278 4279 ATOMIC_INC32(np->in_use); 4280 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4281 WRITE_ENTER(&ifs->ifs_ipf_nat); 4282 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4283 if (nat != NULL) { 4284 np->in_use--; 4285 np->in_hits++; 4286 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4287 break; 4288 } 4289 natfailed = -1; 4290 npnext = np->in_rnext; 4291 fr_ipnatderef(&np, ifs); 4292 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4293 } 4294 4295 if ((np == NULL) && (rmsk != 0)) { 4296 while (rmsk) { 4297 msk <<= 1; 4298 if (rmsk & 0x80000000) 4299 break; 4300 rmsk <<= 1; 4301 } 4302 if (rmsk != 0) { 4303 rmsk <<= 1; 4304 goto maskloop; 4305 } 4306 } 4307 } 4308 if (nat != NULL) { 4309 rval = fr_natin(fin, nat, natadd, nflags); 4310 if (rval == 1) { 4311 MUTEX_ENTER(&nat->nat_lock); 4312 nat_update(fin, nat, nat->nat_ptr); 4313 nat->nat_bytes[0] += fin->fin_plen; 4314 nat->nat_pkts[0]++; 4315 nat->nat_ref++; 4316 MUTEX_EXIT(&nat->nat_lock); 4317 nat->nat_touched = ifs->ifs_fr_ticks; 4318 fin->fin_nat = nat; 4319 fin->fin_state = nat->nat_state; 4320 } 4321 } else 4322 rval = natfailed; 4323 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4324 4325 if (rval == -1) { 4326 if (passp != NULL) 4327 *passp = FR_BLOCK; 4328 fin->fin_flx |= FI_BADNAT; 4329 } 4330 return rval; 4331 } 4332 4333 4334 /* ------------------------------------------------------------------------ */ 4335 /* Function: fr_natin */ 4336 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4337 /* 1 == packet was successfully translated. */ 4338 /* Parameters: fin(I) - pointer to packet information */ 4339 /* nat(I) - pointer to NAT structure */ 4340 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4341 /* nflags(I) - NAT flags set for this packet */ 4342 /* Locks Held: ipf_nat (READ) */ 4343 /* */ 4344 /* Translate a packet coming "in" on an interface. */ 4345 /* ------------------------------------------------------------------------ */ 4346 int fr_natin(fin, nat, natadd, nflags) 4347 fr_info_t *fin; 4348 nat_t *nat; 4349 int natadd; 4350 u_32_t nflags; 4351 { 4352 icmphdr_t *icmp; 4353 u_short *csump; 4354 tcphdr_t *tcp; 4355 ipnat_t *np; 4356 int i; 4357 ipf_stack_t *ifs = fin->fin_ifs; 4358 4359 if (fin->fin_v == 6) { 4360 #ifdef USE_INET6 4361 return fr_nat6in(fin, nat, natadd, nflags); 4362 #else 4363 return NULL; 4364 #endif 4365 } 4366 4367 #if SOLARIS && defined(_KERNEL) 4368 net_data_t net_data_p = ifs->ifs_ipf_ipv4; 4369 #endif 4370 4371 tcp = NULL; 4372 csump = NULL; 4373 np = nat->nat_ptr; 4374 fin->fin_fr = nat->nat_fr; 4375 4376 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4377 (void) fr_nat_newfrag(fin, 0, nat); 4378 4379 if (np != NULL) { 4380 4381 /* ------------------------------------------------------------- */ 4382 /* A few quick notes: */ 4383 /* Following are test conditions prior to calling the */ 4384 /* appr_check routine. */ 4385 /* */ 4386 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4387 /* with a map rule, we attempt to match the packet's */ 4388 /* source port against in_dport, otherwise we'd compare the */ 4389 /* packet's destination. */ 4390 /* ------------------------------------------------------------- */ 4391 if (np->in_apr != NULL) { 4392 i = appr_check(fin, nat); 4393 if (i == -1) { 4394 return -1; 4395 } 4396 } 4397 } 4398 4399 #ifdef IPFILTER_SYNC 4400 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4401 #endif 4402 4403 fin->fin_ip->ip_dst = nat->nat_inip; 4404 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4405 if (nflags & IPN_TCPUDP) 4406 tcp = fin->fin_dp; 4407 4408 /* 4409 * Fix up checksums, not by recalculating them, but 4410 * simply computing adjustments. 4411 * Why only do this for some platforms on inbound packets ? 4412 * Because for those that it is done, IP processing is yet to happen 4413 * and so the IPv4 header checksum has not yet been evaluated. 4414 * Perhaps it should always be done for the benefit of things like 4415 * fast forwarding (so that it doesn't need to be recomputed) but with 4416 * header checksum offloading, perhaps it is a moot point. 4417 */ 4418 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4419 defined(__osf__) || defined(linux) 4420 if (nat->nat_dir == NAT_OUTBOUND) 4421 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4422 else 4423 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4424 #endif 4425 4426 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4427 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4428 tcp->th_dport = nat->nat_inport; 4429 fin->fin_data[1] = ntohs(nat->nat_inport); 4430 } 4431 4432 4433 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4434 icmp = fin->fin_dp; 4435 4436 icmp->icmp_id = nat->nat_inport; 4437 } 4438 4439 csump = nat_proto(fin, nat, nflags); 4440 } 4441 4442 /* 4443 * In case they are being forwarded, inbound packets always need to have 4444 * their checksum adjusted even if hardware checksum validation said OK. 4445 */ 4446 if (csump != NULL) { 4447 if (nat->nat_dir == NAT_OUTBOUND) 4448 fix_incksum(csump, nat->nat_sumd[0]); 4449 else 4450 fix_outcksum(csump, nat->nat_sumd[0]); 4451 } 4452 4453 #if SOLARIS && defined(_KERNEL) 4454 if (nflags & IPN_TCPUDP && 4455 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4456 /* 4457 * Need to adjust the partial checksum result stored in 4458 * db_cksum16, which will be used for validation in IP. 4459 * See IP_CKSUM_RECV(). 4460 * Adjustment data should be the inverse of the IP address 4461 * changes, because db_cksum16 is supposed to be the complement 4462 * of the pesudo header. 4463 */ 4464 csump = &fin->fin_m->b_datap->db_cksum16; 4465 if (nat->nat_dir == NAT_OUTBOUND) 4466 fix_outcksum(csump, nat->nat_sumd[1]); 4467 else 4468 fix_incksum(csump, nat->nat_sumd[1]); 4469 } 4470 #endif 4471 4472 ifs->ifs_nat_stats.ns_mapped[0]++; 4473 fin->fin_flx |= FI_NATED; 4474 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4475 fin->fin_nattag = &np->in_tag; 4476 return 1; 4477 } 4478 4479 4480 /* ------------------------------------------------------------------------ */ 4481 /* Function: nat_proto */ 4482 /* Returns: u_short* - pointer to transport header checksum to update, */ 4483 /* NULL if the transport protocol is not recognised */ 4484 /* as needing a checksum update. */ 4485 /* Parameters: fin(I) - pointer to packet information */ 4486 /* nat(I) - pointer to NAT structure */ 4487 /* nflags(I) - NAT flags set for this packet */ 4488 /* */ 4489 /* Return the pointer to the checksum field for each protocol so understood.*/ 4490 /* If support for making other changes to a protocol header is required, */ 4491 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4492 /* TCP down to a specific value, then do it from here. */ 4493 /* ------------------------------------------------------------------------ */ 4494 u_short *nat_proto(fin, nat, nflags) 4495 fr_info_t *fin; 4496 nat_t *nat; 4497 u_int nflags; 4498 { 4499 icmphdr_t *icmp; 4500 struct icmp6_hdr *icmp6; 4501 u_short *csump; 4502 tcphdr_t *tcp; 4503 udphdr_t *udp; 4504 4505 csump = NULL; 4506 if (fin->fin_out == 0) { 4507 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4508 } else { 4509 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4510 } 4511 4512 switch (fin->fin_p) 4513 { 4514 case IPPROTO_TCP : 4515 tcp = fin->fin_dp; 4516 4517 csump = &tcp->th_sum; 4518 4519 /* 4520 * Do a MSS CLAMPING on a SYN packet, 4521 * only deal IPv4 for now. 4522 */ 4523 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4524 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4525 4526 break; 4527 4528 case IPPROTO_UDP : 4529 udp = fin->fin_dp; 4530 4531 if (udp->uh_sum) 4532 csump = &udp->uh_sum; 4533 break; 4534 4535 case IPPROTO_ICMP : 4536 icmp = fin->fin_dp; 4537 4538 if ((nflags & IPN_ICMPQUERY) != 0) { 4539 if (icmp->icmp_cksum != 0) 4540 csump = &icmp->icmp_cksum; 4541 } 4542 break; 4543 4544 case IPPROTO_ICMPV6 : 4545 icmp6 = fin->fin_dp; 4546 4547 if ((nflags & IPN_ICMPQUERY) != 0) { 4548 if (icmp6->icmp6_cksum != 0) 4549 csump = &icmp6->icmp6_cksum; 4550 } 4551 break; 4552 } 4553 return csump; 4554 } 4555 4556 4557 /* ------------------------------------------------------------------------ */ 4558 /* Function: fr_natunload */ 4559 /* Returns: Nil */ 4560 /* Parameters: Nil */ 4561 /* */ 4562 /* Free all memory used by NAT structures allocated at runtime. */ 4563 /* ------------------------------------------------------------------------ */ 4564 void fr_natunload(ifs) 4565 ipf_stack_t *ifs; 4566 { 4567 ipftq_t *ifq, *ifqnext; 4568 4569 (void) nat_clearlist(ifs); 4570 (void) nat_flushtable(ifs); 4571 4572 /* 4573 * Proxy timeout queues are not cleaned here because although they 4574 * exist on the NAT list, appr_unload is called after fr_natunload 4575 * and the proxies actually are responsible for them being created. 4576 * Should the proxy timeouts have their own list? There's no real 4577 * justification as this is the only complication. 4578 */ 4579 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4580 ifqnext = ifq->ifq_next; 4581 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4582 (fr_deletetimeoutqueue(ifq) == 0)) 4583 fr_freetimeoutqueue(ifq, ifs); 4584 } 4585 4586 if (ifs->ifs_nat_table[0] != NULL) { 4587 KFREES(ifs->ifs_nat_table[0], 4588 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4589 ifs->ifs_nat_table[0] = NULL; 4590 } 4591 if (ifs->ifs_nat_table[1] != NULL) { 4592 KFREES(ifs->ifs_nat_table[1], 4593 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4594 ifs->ifs_nat_table[1] = NULL; 4595 } 4596 if (ifs->ifs_nat_rules != NULL) { 4597 KFREES(ifs->ifs_nat_rules, 4598 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4599 ifs->ifs_nat_rules = NULL; 4600 } 4601 if (ifs->ifs_rdr_rules != NULL) { 4602 KFREES(ifs->ifs_rdr_rules, 4603 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4604 ifs->ifs_rdr_rules = NULL; 4605 } 4606 if (ifs->ifs_maptable != NULL) { 4607 KFREES(ifs->ifs_maptable, 4608 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4609 ifs->ifs_maptable = NULL; 4610 } 4611 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4612 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4613 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4614 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4615 } 4616 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4617 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4618 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4619 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4620 } 4621 4622 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4623 ifs->ifs_fr_nat_maxbucket = 0; 4624 4625 if (ifs->ifs_fr_nat_init == 1) { 4626 ifs->ifs_fr_nat_init = 0; 4627 fr_sttab_destroy(ifs->ifs_nat_tqb); 4628 4629 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4630 RW_DESTROY(&ifs->ifs_ipf_nat); 4631 4632 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4633 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4634 4635 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4636 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4637 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4638 } 4639 } 4640 4641 4642 /* ------------------------------------------------------------------------ */ 4643 /* Function: fr_natexpire */ 4644 /* Returns: Nil */ 4645 /* Parameters: Nil */ 4646 /* */ 4647 /* Check all of the timeout queues for entries at the top which need to be */ 4648 /* expired. */ 4649 /* ------------------------------------------------------------------------ */ 4650 void fr_natexpire(ifs) 4651 ipf_stack_t *ifs; 4652 { 4653 ipftq_t *ifq, *ifqnext; 4654 ipftqent_t *tqe, *tqn; 4655 int i; 4656 SPL_INT(s); 4657 4658 SPL_NET(s); 4659 WRITE_ENTER(&ifs->ifs_ipf_nat); 4660 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4661 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4662 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4663 break; 4664 tqn = tqe->tqe_next; 4665 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4666 } 4667 } 4668 4669 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4670 ifqnext = ifq->ifq_next; 4671 4672 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4673 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4674 break; 4675 tqn = tqe->tqe_next; 4676 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4677 } 4678 } 4679 4680 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4681 ifqnext = ifq->ifq_next; 4682 4683 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4684 (ifq->ifq_ref == 0)) { 4685 fr_freetimeoutqueue(ifq, ifs); 4686 } 4687 } 4688 4689 if (ifs->ifs_nat_doflush != 0) { 4690 (void) nat_extraflush(2, ifs); 4691 ifs->ifs_nat_doflush = 0; 4692 } 4693 4694 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4695 SPL_X(s); 4696 } 4697 4698 4699 /* ------------------------------------------------------------------------ */ 4700 /* Function: fr_nataddrsync */ 4701 /* Returns: Nil */ 4702 /* Parameters: ifp(I) - pointer to network interface */ 4703 /* addr(I) - pointer to new network address */ 4704 /* */ 4705 /* Walk through all of the currently active NAT sessions, looking for those */ 4706 /* which need to have their translated address updated (where the interface */ 4707 /* matches the one passed in) and change it, recalculating the checksum sum */ 4708 /* difference too. */ 4709 /* ------------------------------------------------------------------------ */ 4710 void fr_nataddrsync(v, ifp, addr, ifs) 4711 int v; 4712 void *ifp; 4713 void *addr; 4714 ipf_stack_t *ifs; 4715 { 4716 u_32_t sum1, sum2, sumd; 4717 nat_t *nat; 4718 ipnat_t *np; 4719 SPL_INT(s); 4720 4721 if (ifs->ifs_fr_running <= 0) 4722 return; 4723 4724 SPL_NET(s); 4725 WRITE_ENTER(&ifs->ifs_ipf_nat); 4726 4727 if (ifs->ifs_fr_running <= 0) { 4728 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4729 return; 4730 } 4731 4732 /* 4733 * Change IP addresses for NAT sessions for any protocol except TCP 4734 * since it will break the TCP connection anyway. The only rules 4735 * which will get changed are those which are "map ... -> 0/32", 4736 * where the rule specifies the address is taken from the interface. 4737 */ 4738 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4739 if (addr != NULL) { 4740 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4741 ((nat->nat_flags & IPN_TCP) != 0)) 4742 continue; 4743 if ((np = nat->nat_ptr) == NULL) 4744 continue; 4745 if (v == 4 && np->in_v == 4) { 4746 if (np->in_nip || np->in_outmsk != 0xffffffff) 4747 continue; 4748 /* 4749 * Change the map-to address to be the same as 4750 * the new one. 4751 */ 4752 sum1 = nat->nat_outip.s_addr; 4753 nat->nat_outip = *(struct in_addr *)addr; 4754 sum2 = nat->nat_outip.s_addr; 4755 } else if (v == 6 && np->in_v == 6) { 4756 if (!IP6_ISZERO(&np->in_next6.in6) || 4757 !IP6_ISONES(&np->in_out[1].in6)) 4758 continue; 4759 /* 4760 * Change the map-to address to be the same as 4761 * the new one. 4762 */ 4763 nat->nat_outip6.in6 = *(struct in6_addr *)addr; 4764 } else 4765 continue; 4766 4767 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4768 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) { 4769 if (np->in_v == 4 && (v == 4 || v == 0)) { 4770 struct in_addr in; 4771 if (np->in_outmsk != 0xffffffff || np->in_nip) 4772 continue; 4773 /* 4774 * Change the map-to address to be the same as 4775 * the new one. 4776 */ 4777 sum1 = nat->nat_outip.s_addr; 4778 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4779 &in, NULL, ifs) != -1) 4780 nat->nat_outip = in; 4781 sum2 = nat->nat_outip.s_addr; 4782 } else if (np->in_v == 6 && (v == 6 || v == 0)) { 4783 struct in6_addr in6; 4784 if (!IP6_ISZERO(&np->in_next6.in6) || 4785 !IP6_ISONES(&np->in_out[1].in6)) 4786 continue; 4787 /* 4788 * Change the map-to address to be the same as 4789 * the new one. 4790 */ 4791 if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0], 4792 (void *)&in6, NULL, ifs) != -1) 4793 nat->nat_outip6.in6 = in6; 4794 } else 4795 continue; 4796 } else { 4797 continue; 4798 } 4799 4800 if (sum1 == sum2) 4801 continue; 4802 /* 4803 * Readjust the checksum adjustment to take into 4804 * account the new IP#. 4805 */ 4806 CALC_SUMD(sum1, sum2, sumd); 4807 /* XXX - dont change for TCP when solaris does 4808 * hardware checksumming. 4809 */ 4810 sumd += nat->nat_sumd[0]; 4811 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4812 nat->nat_sumd[1] = nat->nat_sumd[0]; 4813 } 4814 4815 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4816 SPL_X(s); 4817 } 4818 4819 4820 /* ------------------------------------------------------------------------ */ 4821 /* Function: fr_natifpsync */ 4822 /* Returns: Nil */ 4823 /* Parameters: action(I) - how we are syncing */ 4824 /* ifp(I) - pointer to network interface */ 4825 /* name(I) - name of interface to sync to */ 4826 /* */ 4827 /* This function is used to resync the mapping of interface names and their */ 4828 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4829 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4830 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4831 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4832 /* there is no longer any interface associated with it. */ 4833 /* ------------------------------------------------------------------------ */ 4834 void fr_natifpsync(action, v, ifp, name, ifs) 4835 int action, v; 4836 void *ifp; 4837 char *name; 4838 ipf_stack_t *ifs; 4839 { 4840 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4841 int s; 4842 #endif 4843 nat_t *nat; 4844 ipnat_t *n; 4845 int nv; 4846 4847 if (ifs->ifs_fr_running <= 0) 4848 return; 4849 4850 SPL_NET(s); 4851 WRITE_ENTER(&ifs->ifs_ipf_nat); 4852 4853 if (ifs->ifs_fr_running <= 0) { 4854 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4855 return; 4856 } 4857 4858 switch (action) 4859 { 4860 case IPFSYNC_RESYNC : 4861 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4862 nv = (v == 0) ? nat->nat_v : v; 4863 if (nat->nat_v != nv) 4864 continue; 4865 if ((ifp == nat->nat_ifps[0]) || 4866 (nat->nat_ifps[0] == (void *)-1)) { 4867 nat->nat_ifps[0] = 4868 fr_resolvenic(nat->nat_ifnames[0], nv, ifs); 4869 } 4870 4871 if ((ifp == nat->nat_ifps[1]) || 4872 (nat->nat_ifps[1] == (void *)-1)) { 4873 nat->nat_ifps[1] = 4874 fr_resolvenic(nat->nat_ifnames[1], nv, ifs); 4875 } 4876 } 4877 4878 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4879 nv = (v == 0) ? (int)n->in_v : v; 4880 if ((int)n->in_v != nv) 4881 continue; 4882 if (n->in_ifps[0] == ifp || 4883 n->in_ifps[0] == (void *)-1) { 4884 n->in_ifps[0] = 4885 fr_resolvenic(n->in_ifnames[0], nv, ifs); 4886 } 4887 if (n->in_ifps[1] == ifp || 4888 n->in_ifps[1] == (void *)-1) { 4889 n->in_ifps[1] = 4890 fr_resolvenic(n->in_ifnames[1], nv, ifs); 4891 } 4892 } 4893 break; 4894 case IPFSYNC_NEWIFP : 4895 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4896 if (nat->nat_v != v) 4897 continue; 4898 if (!strncmp(name, nat->nat_ifnames[0], 4899 sizeof(nat->nat_ifnames[0]))) 4900 nat->nat_ifps[0] = ifp; 4901 if (!strncmp(name, nat->nat_ifnames[1], 4902 sizeof(nat->nat_ifnames[1]))) 4903 nat->nat_ifps[1] = ifp; 4904 } 4905 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4906 if ((int)n->in_v != v) 4907 continue; 4908 if (!strncmp(name, n->in_ifnames[0], 4909 sizeof(n->in_ifnames[0]))) 4910 n->in_ifps[0] = ifp; 4911 if (!strncmp(name, n->in_ifnames[1], 4912 sizeof(n->in_ifnames[1]))) 4913 n->in_ifps[1] = ifp; 4914 } 4915 break; 4916 case IPFSYNC_OLDIFP : 4917 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4918 if (nat->nat_v != v) 4919 continue; 4920 if (ifp == nat->nat_ifps[0]) 4921 nat->nat_ifps[0] = (void *)-1; 4922 if (ifp == nat->nat_ifps[1]) 4923 nat->nat_ifps[1] = (void *)-1; 4924 } 4925 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4926 if ((int)n->in_v != v) 4927 continue; 4928 if (n->in_ifps[0] == ifp) 4929 n->in_ifps[0] = (void *)-1; 4930 if (n->in_ifps[1] == ifp) 4931 n->in_ifps[1] = (void *)-1; 4932 } 4933 break; 4934 } 4935 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4936 SPL_X(s); 4937 } 4938 4939 4940 /* ------------------------------------------------------------------------ */ 4941 /* Function: nat_icmpquerytype4 */ 4942 /* Returns: int - 1 == success, 0 == failure */ 4943 /* Parameters: icmptype(I) - ICMP type number */ 4944 /* */ 4945 /* Tests to see if the ICMP type number passed is a query/response type or */ 4946 /* not. */ 4947 /* ------------------------------------------------------------------------ */ 4948 static INLINE int nat_icmpquerytype4(icmptype) 4949 int icmptype; 4950 { 4951 4952 /* 4953 * For the ICMP query NAT code, it is essential that both the query 4954 * and the reply match on the NAT rule. Because the NAT structure 4955 * does not keep track of the icmptype, and a single NAT structure 4956 * is used for all icmp types with the same src, dest and id, we 4957 * simply define the replies as queries as well. The funny thing is, 4958 * altough it seems silly to call a reply a query, this is exactly 4959 * as it is defined in the IPv4 specification 4960 */ 4961 4962 switch (icmptype) 4963 { 4964 4965 case ICMP_ECHOREPLY: 4966 case ICMP_ECHO: 4967 /* route aedvertisement/solliciation is currently unsupported: */ 4968 /* it would require rewriting the ICMP data section */ 4969 case ICMP_TSTAMP: 4970 case ICMP_TSTAMPREPLY: 4971 case ICMP_IREQ: 4972 case ICMP_IREQREPLY: 4973 case ICMP_MASKREQ: 4974 case ICMP_MASKREPLY: 4975 return 1; 4976 default: 4977 return 0; 4978 } 4979 } 4980 4981 4982 /* ------------------------------------------------------------------------ */ 4983 /* Function: nat_log */ 4984 /* Returns: Nil */ 4985 /* Parameters: nat(I) - pointer to NAT structure */ 4986 /* type(I) - type of log entry to create */ 4987 /* */ 4988 /* Creates a NAT log entry. */ 4989 /* ------------------------------------------------------------------------ */ 4990 void nat_log(nat, type, ifs) 4991 struct nat *nat; 4992 u_int type; 4993 ipf_stack_t *ifs; 4994 { 4995 #ifdef IPFILTER_LOG 4996 # ifndef LARGE_NAT 4997 struct ipnat *np; 4998 int rulen; 4999 # endif 5000 struct natlog natl; 5001 void *items[1]; 5002 size_t sizes[1]; 5003 int types[1]; 5004 5005 natl.nlg_inip = nat->nat_inip6; 5006 natl.nlg_outip = nat->nat_outip6; 5007 natl.nlg_origip = nat->nat_oip6; 5008 natl.nlg_bytes[0] = nat->nat_bytes[0]; 5009 natl.nlg_bytes[1] = nat->nat_bytes[1]; 5010 natl.nlg_pkts[0] = nat->nat_pkts[0]; 5011 natl.nlg_pkts[1] = nat->nat_pkts[1]; 5012 natl.nlg_origport = nat->nat_oport; 5013 natl.nlg_inport = nat->nat_inport; 5014 natl.nlg_outport = nat->nat_outport; 5015 natl.nlg_p = nat->nat_p; 5016 natl.nlg_type = type; 5017 natl.nlg_rule = -1; 5018 natl.nlg_v = nat->nat_v; 5019 # ifndef LARGE_NAT 5020 if (nat->nat_ptr != NULL) { 5021 for (rulen = 0, np = ifs->ifs_nat_list; np; 5022 np = np->in_next, rulen++) 5023 if (np == nat->nat_ptr) { 5024 natl.nlg_rule = rulen; 5025 break; 5026 } 5027 } 5028 # endif 5029 items[0] = &natl; 5030 sizes[0] = sizeof(natl); 5031 types[0] = 0; 5032 5033 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 5034 #endif 5035 } 5036 5037 5038 #if defined(__OpenBSD__) 5039 /* ------------------------------------------------------------------------ */ 5040 /* Function: nat_ifdetach */ 5041 /* Returns: Nil */ 5042 /* Parameters: ifp(I) - pointer to network interface */ 5043 /* */ 5044 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 5045 /* interface references within IPFilter. */ 5046 /* ------------------------------------------------------------------------ */ 5047 void nat_ifdetach(ifp, ifs) 5048 void *ifp; 5049 ipf_stack_t *ifs; 5050 { 5051 frsync(ifp, ifs); 5052 return; 5053 } 5054 #endif 5055 5056 5057 /* ------------------------------------------------------------------------ */ 5058 /* Function: fr_ipnatderef */ 5059 /* Returns: Nil */ 5060 /* Parameters: inp(I) - pointer to pointer to NAT rule */ 5061 /* Write Locks: ipf_nat */ 5062 /* */ 5063 /* ------------------------------------------------------------------------ */ 5064 void fr_ipnatderef(inp, ifs) 5065 ipnat_t **inp; 5066 ipf_stack_t *ifs; 5067 { 5068 ipnat_t *in; 5069 5070 in = *inp; 5071 *inp = NULL; 5072 in->in_use--; 5073 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 5074 if (in->in_apr) 5075 appr_free(in->in_apr); 5076 KFREE(in); 5077 ifs->ifs_nat_stats.ns_rules--; 5078 #ifdef notdef 5079 #if SOLARIS 5080 if (ifs->ifs_nat_stats.ns_rules == 0) 5081 ifs->ifs_pfil_delayed_copy = 1; 5082 #endif 5083 #endif 5084 } 5085 } 5086 5087 5088 /* ------------------------------------------------------------------------ */ 5089 /* Function: fr_natderef */ 5090 /* Returns: Nil */ 5091 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 5092 /* */ 5093 /* Decrement the reference counter for this NAT table entry and free it if */ 5094 /* there are no more things using it. */ 5095 /* */ 5096 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 5097 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 5098 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 5099 /* because nat_delete() will do that and send nat_ref to -1. */ 5100 /* */ 5101 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 5102 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 5103 /* ------------------------------------------------------------------------ */ 5104 void fr_natderef(natp, ifs) 5105 nat_t **natp; 5106 ipf_stack_t *ifs; 5107 { 5108 nat_t *nat; 5109 5110 nat = *natp; 5111 *natp = NULL; 5112 5113 MUTEX_ENTER(&nat->nat_lock); 5114 if (nat->nat_ref > 1) { 5115 nat->nat_ref--; 5116 MUTEX_EXIT(&nat->nat_lock); 5117 return; 5118 } 5119 MUTEX_EXIT(&nat->nat_lock); 5120 5121 WRITE_ENTER(&ifs->ifs_ipf_nat); 5122 nat_delete(nat, NL_EXPIRE, ifs); 5123 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5124 } 5125 5126 5127 /* ------------------------------------------------------------------------ */ 5128 /* Function: fr_natclone */ 5129 /* Returns: ipstate_t* - NULL == cloning failed, */ 5130 /* else pointer to new state structure */ 5131 /* Parameters: fin(I) - pointer to packet information */ 5132 /* is(I) - pointer to master state structure */ 5133 /* Write Lock: ipf_nat */ 5134 /* */ 5135 /* Create a "duplcate" state table entry from the master. */ 5136 /* ------------------------------------------------------------------------ */ 5137 nat_t *fr_natclone(fin, nat) 5138 fr_info_t *fin; 5139 nat_t *nat; 5140 { 5141 frentry_t *fr; 5142 nat_t *clone; 5143 ipnat_t *np; 5144 ipf_stack_t *ifs = fin->fin_ifs; 5145 5146 KMALLOC(clone, nat_t *); 5147 if (clone == NULL) 5148 return NULL; 5149 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 5150 5151 MUTEX_NUKE(&clone->nat_lock); 5152 5153 clone->nat_aps = NULL; 5154 /* 5155 * Initialize all these so that nat_delete() doesn't cause a crash. 5156 */ 5157 clone->nat_tqe.tqe_pnext = NULL; 5158 clone->nat_tqe.tqe_next = NULL; 5159 clone->nat_tqe.tqe_ifq = NULL; 5160 clone->nat_tqe.tqe_parent = clone; 5161 5162 clone->nat_flags &= ~SI_CLONE; 5163 clone->nat_flags |= SI_CLONED; 5164 5165 if (clone->nat_hm) 5166 clone->nat_hm->hm_ref++; 5167 5168 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 5169 KFREE(clone); 5170 return NULL; 5171 } 5172 np = clone->nat_ptr; 5173 if (np != NULL) { 5174 if (ifs->ifs_nat_logging) 5175 nat_log(clone, (u_int)np->in_redir, ifs); 5176 np->in_use++; 5177 } 5178 fr = clone->nat_fr; 5179 if (fr != NULL) { 5180 MUTEX_ENTER(&fr->fr_lock); 5181 fr->fr_ref++; 5182 MUTEX_EXIT(&fr->fr_lock); 5183 } 5184 5185 /* 5186 * Because the clone is created outside the normal loop of things and 5187 * TCP has special needs in terms of state, initialise the timeout 5188 * state of the new NAT from here. 5189 */ 5190 if (clone->nat_p == IPPROTO_TCP) { 5191 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 5192 clone->nat_flags); 5193 } 5194 #ifdef IPFILTER_SYNC 5195 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 5196 #endif 5197 if (ifs->ifs_nat_logging) 5198 nat_log(clone, NL_CLONE, ifs); 5199 return clone; 5200 } 5201 5202 5203 /* ------------------------------------------------------------------------ */ 5204 /* Function: nat_wildok */ 5205 /* Returns: int - 1 == packet's ports match wildcards */ 5206 /* 0 == packet's ports don't match wildcards */ 5207 /* Parameters: nat(I) - NAT entry */ 5208 /* sport(I) - source port */ 5209 /* dport(I) - destination port */ 5210 /* flags(I) - wildcard flags */ 5211 /* dir(I) - packet direction */ 5212 /* */ 5213 /* Use NAT entry and packet direction to determine which combination of */ 5214 /* wildcard flags should be used. */ 5215 /* ------------------------------------------------------------------------ */ 5216 int nat_wildok(nat, sport, dport, flags, dir) 5217 nat_t *nat; 5218 int sport; 5219 int dport; 5220 int flags; 5221 int dir; 5222 { 5223 /* 5224 * When called by dir is set to 5225 * nat_inlookup NAT_INBOUND (0) 5226 * nat_outlookup NAT_OUTBOUND (1) 5227 * 5228 * We simply combine the packet's direction in dir with the original 5229 * "intended" direction of that NAT entry in nat->nat_dir to decide 5230 * which combination of wildcard flags to allow. 5231 */ 5232 5233 switch ((dir << 1) | nat->nat_dir) 5234 { 5235 case 3: /* outbound packet / outbound entry */ 5236 if (((nat->nat_inport == sport) || 5237 (flags & SI_W_SPORT)) && 5238 ((nat->nat_oport == dport) || 5239 (flags & SI_W_DPORT))) 5240 return 1; 5241 break; 5242 case 2: /* outbound packet / inbound entry */ 5243 if (((nat->nat_outport == sport) || 5244 (flags & SI_W_DPORT)) && 5245 ((nat->nat_oport == dport) || 5246 (flags & SI_W_SPORT))) 5247 return 1; 5248 break; 5249 case 1: /* inbound packet / outbound entry */ 5250 if (((nat->nat_oport == sport) || 5251 (flags & SI_W_DPORT)) && 5252 ((nat->nat_outport == dport) || 5253 (flags & SI_W_SPORT))) 5254 return 1; 5255 break; 5256 case 0: /* inbound packet / inbound entry */ 5257 if (((nat->nat_oport == sport) || 5258 (flags & SI_W_SPORT)) && 5259 ((nat->nat_outport == dport) || 5260 (flags & SI_W_DPORT))) 5261 return 1; 5262 break; 5263 default: 5264 break; 5265 } 5266 5267 return(0); 5268 } 5269 5270 5271 /* ------------------------------------------------------------------------ */ 5272 /* Function: nat_mssclamp */ 5273 /* Returns: Nil */ 5274 /* Parameters: tcp(I) - pointer to TCP header */ 5275 /* maxmss(I) - value to clamp the TCP MSS to */ 5276 /* csump(I) - pointer to TCP checksum */ 5277 /* */ 5278 /* Check for MSS option and clamp it if necessary. If found and changed, */ 5279 /* then the TCP header checksum will be updated to reflect the change in */ 5280 /* the MSS. */ 5281 /* ------------------------------------------------------------------------ */ 5282 static void nat_mssclamp(tcp, maxmss, csump) 5283 tcphdr_t *tcp; 5284 u_32_t maxmss; 5285 u_short *csump; 5286 { 5287 u_char *cp, *ep, opt; 5288 int hlen, advance; 5289 u_32_t mss, sumd; 5290 5291 hlen = TCP_OFF(tcp) << 2; 5292 if (hlen > sizeof(*tcp)) { 5293 cp = (u_char *)tcp + sizeof(*tcp); 5294 ep = (u_char *)tcp + hlen; 5295 5296 while (cp < ep) { 5297 opt = cp[0]; 5298 if (opt == TCPOPT_EOL) 5299 break; 5300 else if (opt == TCPOPT_NOP) { 5301 cp++; 5302 continue; 5303 } 5304 5305 if (cp + 1 >= ep) 5306 break; 5307 advance = cp[1]; 5308 if ((cp + advance > ep) || (advance <= 0)) 5309 break; 5310 switch (opt) 5311 { 5312 case TCPOPT_MAXSEG: 5313 if (advance != 4) 5314 break; 5315 mss = cp[2] * 256 + cp[3]; 5316 if (mss > maxmss) { 5317 cp[2] = maxmss / 256; 5318 cp[3] = maxmss & 0xff; 5319 CALC_SUMD(mss, maxmss, sumd); 5320 fix_outcksum(csump, sumd); 5321 } 5322 break; 5323 default: 5324 /* ignore unknown options */ 5325 break; 5326 } 5327 5328 cp += advance; 5329 } 5330 } 5331 } 5332 5333 5334 /* ------------------------------------------------------------------------ */ 5335 /* Function: fr_setnatqueue */ 5336 /* Returns: Nil */ 5337 /* Parameters: nat(I)- pointer to NAT structure */ 5338 /* rev(I) - forward(0) or reverse(1) direction */ 5339 /* Locks: ipf_nat (read or write) */ 5340 /* */ 5341 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5342 /* determining which queue it should be placed on. */ 5343 /* ------------------------------------------------------------------------ */ 5344 void fr_setnatqueue(nat, rev, ifs) 5345 nat_t *nat; 5346 int rev; 5347 ipf_stack_t *ifs; 5348 { 5349 ipftq_t *oifq, *nifq; 5350 5351 if (nat->nat_ptr != NULL) 5352 nifq = nat->nat_ptr->in_tqehead[rev]; 5353 else 5354 nifq = NULL; 5355 5356 if (nifq == NULL) { 5357 switch (nat->nat_p) 5358 { 5359 case IPPROTO_UDP : 5360 nifq = &ifs->ifs_nat_udptq; 5361 break; 5362 case IPPROTO_ICMP : 5363 nifq = &ifs->ifs_nat_icmptq; 5364 break; 5365 case IPPROTO_TCP : 5366 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5367 break; 5368 default : 5369 nifq = &ifs->ifs_nat_iptq; 5370 break; 5371 } 5372 } 5373 5374 oifq = nat->nat_tqe.tqe_ifq; 5375 /* 5376 * If it's currently on a timeout queue, move it from one queue to 5377 * another, else put it on the end of the newly determined queue. 5378 */ 5379 if (oifq != NULL) 5380 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5381 else 5382 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5383 return; 5384 } 5385 5386 /* ------------------------------------------------------------------------ */ 5387 /* Function: nat_getnext */ 5388 /* Returns: int - 0 == ok, else error */ 5389 /* Parameters: t(I) - pointer to ipftoken structure */ 5390 /* itp(I) - pointer to ipfgeniter_t structure */ 5391 /* ifs - ipf stack instance */ 5392 /* */ 5393 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5394 /* and copy it out to the storage space pointed to by itp. The next item */ 5395 /* in the list to look at is put back in the ipftoken struture. */ 5396 /* ------------------------------------------------------------------------ */ 5397 static int nat_getnext(t, itp, ifs) 5398 ipftoken_t *t; 5399 ipfgeniter_t *itp; 5400 ipf_stack_t *ifs; 5401 { 5402 hostmap_t *hm, *nexthm = NULL, zerohm; 5403 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5404 nat_t *nat, *nextnat = NULL, zeronat; 5405 int error = 0, count; 5406 char *dst; 5407 5408 if (itp->igi_nitems == 0) 5409 return EINVAL; 5410 5411 READ_ENTER(&ifs->ifs_ipf_nat); 5412 5413 /* 5414 * Get "previous" entry from the token and find the next entry. 5415 */ 5416 switch (itp->igi_type) 5417 { 5418 case IPFGENITER_HOSTMAP : 5419 hm = t->ipt_data; 5420 if (hm == NULL) { 5421 nexthm = ifs->ifs_ipf_hm_maplist; 5422 } else { 5423 nexthm = hm->hm_next; 5424 } 5425 break; 5426 5427 case IPFGENITER_IPNAT : 5428 ipn = t->ipt_data; 5429 if (ipn == NULL) { 5430 nextipnat = ifs->ifs_nat_list; 5431 } else { 5432 nextipnat = ipn->in_next; 5433 } 5434 break; 5435 5436 case IPFGENITER_NAT : 5437 nat = t->ipt_data; 5438 if (nat == NULL) { 5439 nextnat = ifs->ifs_nat_instances; 5440 } else { 5441 nextnat = nat->nat_next; 5442 } 5443 break; 5444 default : 5445 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5446 return EINVAL; 5447 } 5448 5449 dst = itp->igi_data; 5450 for (count = itp->igi_nitems; count > 0; count--) { 5451 /* 5452 * If we found an entry, add a reference to it and update the token. 5453 * Otherwise, zero out data to be returned and NULL out token. 5454 */ 5455 switch (itp->igi_type) 5456 { 5457 case IPFGENITER_HOSTMAP : 5458 if (nexthm != NULL) { 5459 ATOMIC_INC32(nexthm->hm_ref); 5460 t->ipt_data = nexthm; 5461 } else { 5462 bzero(&zerohm, sizeof(zerohm)); 5463 nexthm = &zerohm; 5464 t->ipt_data = NULL; 5465 } 5466 break; 5467 case IPFGENITER_IPNAT : 5468 if (nextipnat != NULL) { 5469 ATOMIC_INC32(nextipnat->in_use); 5470 t->ipt_data = nextipnat; 5471 } else { 5472 bzero(&zeroipn, sizeof(zeroipn)); 5473 nextipnat = &zeroipn; 5474 t->ipt_data = NULL; 5475 } 5476 break; 5477 case IPFGENITER_NAT : 5478 if (nextnat != NULL) { 5479 MUTEX_ENTER(&nextnat->nat_lock); 5480 nextnat->nat_ref++; 5481 MUTEX_EXIT(&nextnat->nat_lock); 5482 t->ipt_data = nextnat; 5483 } else { 5484 bzero(&zeronat, sizeof(zeronat)); 5485 nextnat = &zeronat; 5486 t->ipt_data = NULL; 5487 } 5488 break; 5489 default : 5490 break; 5491 } 5492 5493 /* 5494 * Now that we have ref, it's save to give up lock. 5495 */ 5496 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5497 5498 /* 5499 * Copy out data and clean up references and token as needed. 5500 */ 5501 switch (itp->igi_type) 5502 { 5503 case IPFGENITER_HOSTMAP : 5504 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5505 if (error != 0) 5506 error = EFAULT; 5507 if (t->ipt_data == NULL) { 5508 ipf_freetoken(t, ifs); 5509 break; 5510 } else { 5511 if (hm != NULL) { 5512 WRITE_ENTER(&ifs->ifs_ipf_nat); 5513 fr_hostmapdel(&hm); 5514 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5515 } 5516 if (nexthm->hm_next == NULL) { 5517 ipf_freetoken(t, ifs); 5518 break; 5519 } 5520 dst += sizeof(*nexthm); 5521 hm = nexthm; 5522 nexthm = nexthm->hm_next; 5523 } 5524 break; 5525 5526 case IPFGENITER_IPNAT : 5527 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5528 if (error != 0) 5529 error = EFAULT; 5530 if (t->ipt_data == NULL) { 5531 ipf_freetoken(t, ifs); 5532 break; 5533 } else { 5534 if (ipn != NULL) { 5535 WRITE_ENTER(&ifs->ifs_ipf_nat); 5536 fr_ipnatderef(&ipn, ifs); 5537 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5538 } 5539 if (nextipnat->in_next == NULL) { 5540 ipf_freetoken(t, ifs); 5541 break; 5542 } 5543 dst += sizeof(*nextipnat); 5544 ipn = nextipnat; 5545 nextipnat = nextipnat->in_next; 5546 } 5547 break; 5548 5549 case IPFGENITER_NAT : 5550 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5551 if (error != 0) 5552 error = EFAULT; 5553 if (t->ipt_data == NULL) { 5554 ipf_freetoken(t, ifs); 5555 break; 5556 } else { 5557 if (nat != NULL) 5558 fr_natderef(&nat, ifs); 5559 if (nextnat->nat_next == NULL) { 5560 ipf_freetoken(t, ifs); 5561 break; 5562 } 5563 dst += sizeof(*nextnat); 5564 nat = nextnat; 5565 nextnat = nextnat->nat_next; 5566 } 5567 break; 5568 default : 5569 break; 5570 } 5571 5572 if ((count == 1) || (error != 0)) 5573 break; 5574 5575 READ_ENTER(&ifs->ifs_ipf_nat); 5576 } 5577 5578 return error; 5579 } 5580 5581 5582 /* ------------------------------------------------------------------------ */ 5583 /* Function: nat_iterator */ 5584 /* Returns: int - 0 == ok, else error */ 5585 /* Parameters: token(I) - pointer to ipftoken structure */ 5586 /* itp(I) - pointer to ipfgeniter_t structure */ 5587 /* */ 5588 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5589 /* generic structure to iterate through a list. There are three different */ 5590 /* linked lists of NAT related information to go through: NAT rules, active */ 5591 /* NAT mappings and the NAT fragment cache. */ 5592 /* ------------------------------------------------------------------------ */ 5593 static int nat_iterator(token, itp, ifs) 5594 ipftoken_t *token; 5595 ipfgeniter_t *itp; 5596 ipf_stack_t *ifs; 5597 { 5598 int error; 5599 5600 if (itp->igi_data == NULL) 5601 return EFAULT; 5602 5603 token->ipt_subtype = itp->igi_type; 5604 5605 switch (itp->igi_type) 5606 { 5607 case IPFGENITER_HOSTMAP : 5608 case IPFGENITER_IPNAT : 5609 case IPFGENITER_NAT : 5610 error = nat_getnext(token, itp, ifs); 5611 break; 5612 case IPFGENITER_NATFRAG : 5613 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5614 &ifs->ifs_ipfr_nattail, 5615 &ifs->ifs_ipf_natfrag, ifs); 5616 break; 5617 default : 5618 error = EINVAL; 5619 break; 5620 } 5621 5622 return error; 5623 } 5624 5625 5626 /* -------------------------------------------------------------------- */ 5627 /* Function: nat_earlydrop */ 5628 /* Returns: number of dropped/removed entries from the queue */ 5629 /* Parameters: ifq - pointer to queue with entries to be processed */ 5630 /* maxidle - entry must be idle this long to be dropped */ 5631 /* ifs - ipf stack instance */ 5632 /* */ 5633 /* Function is invoked from nat_extraflush() only. Removes entries */ 5634 /* form specified timeout queue, based on how long they've sat idle, */ 5635 /* without waiting for it to happen on its own. */ 5636 /* -------------------------------------------------------------------- */ 5637 static int nat_earlydrop(ifq, maxidle, ifs) 5638 ipftq_t *ifq; 5639 int maxidle; 5640 ipf_stack_t *ifs; 5641 { 5642 ipftqent_t *tqe, *tqn; 5643 nat_t *nat; 5644 unsigned int dropped; 5645 int droptick; 5646 5647 if (ifq == NULL) 5648 return (0); 5649 5650 dropped = 0; 5651 5652 /* 5653 * Determine the tick representing the idle time we're interested 5654 * in. If an entry exists in the queue, and it was touched before 5655 * that tick, then it's been idle longer than maxidle ... remove it. 5656 */ 5657 droptick = ifs->ifs_fr_ticks - maxidle; 5658 tqn = ifq->ifq_head; 5659 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5660 tqn = tqe->tqe_next; 5661 nat = tqe->tqe_parent; 5662 nat_delete(nat, ISL_EXPIRE, ifs); 5663 dropped++; 5664 } 5665 return (dropped); 5666 } 5667 5668 5669 /* --------------------------------------------------------------------- */ 5670 /* Function: nat_flushclosing */ 5671 /* Returns: int - number of NAT entries deleted */ 5672 /* Parameters: stateval(I) - State at which to start removing entries */ 5673 /* ifs - ipf stack instance */ 5674 /* */ 5675 /* Remove nat table entries for TCP connections which are in the process */ 5676 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5677 /* --------------------------------------------------------------------- */ 5678 static int nat_flushclosing(stateval, ifs) 5679 int stateval; 5680 ipf_stack_t *ifs; 5681 { 5682 ipftq_t *ifq, *ifqn; 5683 ipftqent_t *tqe, *tqn; 5684 nat_t *nat; 5685 int dropped; 5686 5687 dropped = 0; 5688 5689 /* 5690 * Start by deleting any entries in specific timeout queues. 5691 */ 5692 ifqn = &ifs->ifs_nat_tqb[stateval]; 5693 while ((ifq = ifqn) != NULL) { 5694 ifqn = ifq->ifq_next; 5695 dropped += nat_earlydrop(ifq, (int)0, ifs); 5696 } 5697 5698 /* 5699 * Next, look through user defined queues for closing entries. 5700 */ 5701 ifqn = ifs->ifs_nat_utqe; 5702 while ((ifq = ifqn) != NULL) { 5703 ifqn = ifq->ifq_next; 5704 tqn = ifq->ifq_head; 5705 while ((tqe = tqn) != NULL) { 5706 tqn = tqe->tqe_next; 5707 nat = tqe->tqe_parent; 5708 if (nat->nat_p != IPPROTO_TCP) 5709 continue; 5710 if ((nat->nat_tcpstate[0] >= stateval) && 5711 (nat->nat_tcpstate[1] >= stateval)) { 5712 nat_delete(nat, NL_EXPIRE, ifs); 5713 dropped++; 5714 } 5715 } 5716 } 5717 return (dropped); 5718 } 5719 5720 5721 /* --------------------------------------------------------------------- */ 5722 /* Function: nat_extraflush */ 5723 /* Returns: int - number of NAT entries deleted */ 5724 /* Parameters: which(I) - how to flush the active NAT table */ 5725 /* ifs - ipf stack instance */ 5726 /* Write Locks: ipf_nat */ 5727 /* */ 5728 /* Flush nat tables. Three actions currently defined: */ 5729 /* */ 5730 /* which == 0 : Flush all nat table entries. */ 5731 /* */ 5732 /* which == 1 : Flush entries with TCP connections which have started */ 5733 /* to close on both ends. */ 5734 /* */ 5735 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5736 /* does not take us below specified threshold in the table, */ 5737 /* we want to flush entries with TCP connections which have */ 5738 /* been idle for a long time. Start with connections idle */ 5739 /* over 12 hours, and then work backwards in half hour */ 5740 /* increments to at most 30 minutes idle, and finally work */ 5741 /* back in 30 second increments to at most 30 seconds. */ 5742 /* --------------------------------------------------------------------- */ 5743 static int nat_extraflush(which, ifs) 5744 int which; 5745 ipf_stack_t *ifs; 5746 { 5747 ipftq_t *ifq, *ifqn; 5748 nat_t *nat, **natp; 5749 int idletime, removed, idle_idx; 5750 SPL_INT(s); 5751 5752 removed = 0; 5753 5754 SPL_NET(s); 5755 switch (which) 5756 { 5757 case 0: 5758 natp = &ifs->ifs_nat_instances; 5759 while ((nat = *natp) != NULL) { 5760 natp = &nat->nat_next; 5761 nat_delete(nat, ISL_FLUSH, ifs); 5762 removed++; 5763 } 5764 break; 5765 5766 case 1: 5767 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5768 break; 5769 5770 case 2: 5771 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5772 5773 /* 5774 * Be sure we haven't done this in the last 10 seconds. 5775 */ 5776 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5777 IPF_TTLVAL(10)) 5778 break; 5779 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5780 5781 /* 5782 * Determine initial threshold for minimum idle time based on 5783 * how long ipfilter has been running. Ipfilter needs to have 5784 * been up as long as the smallest interval to continue on. 5785 * 5786 * Minimum idle times stored in idletime_tab and indexed by 5787 * idle_idx. Start at upper end of array and work backwards. 5788 * 5789 * Once the index is found, set the initial idle time to the 5790 * first interval before the current ipfilter run time. 5791 */ 5792 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5793 break; /* switch */ 5794 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5795 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5796 idletime = idletime_tab[idle_idx]; 5797 } else { 5798 while ((idle_idx > 0) && 5799 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5800 idle_idx--; 5801 idletime = (ifs->ifs_fr_ticks / 5802 idletime_tab[idle_idx]) * 5803 idletime_tab[idle_idx]; 5804 } 5805 5806 while ((idle_idx >= 0) && 5807 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5808 /* 5809 * Start with appropriate timeout queue. 5810 */ 5811 removed += nat_earlydrop( 5812 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5813 idletime, ifs); 5814 5815 /* 5816 * Make sure we haven't already deleted enough 5817 * entries before checking the user defined queues. 5818 */ 5819 if (NAT_TAB_WATER_LEVEL(ifs) <= 5820 ifs->ifs_nat_flush_lvl_lo) 5821 break; 5822 5823 /* 5824 * Next, look through the user defined queues. 5825 */ 5826 ifqn = ifs->ifs_nat_utqe; 5827 while ((ifq = ifqn) != NULL) { 5828 ifqn = ifq->ifq_next; 5829 removed += nat_earlydrop(ifq, idletime, ifs); 5830 } 5831 5832 /* 5833 * Adjust the granularity of idle time. 5834 * 5835 * If we reach an interval boundary, we need to 5836 * either adjust the idle time accordingly or exit 5837 * the loop altogether (if this is very last check). 5838 */ 5839 idletime -= idletime_tab[idle_idx]; 5840 if (idletime < idletime_tab[idle_idx]) { 5841 if (idle_idx != 0) { 5842 idletime = idletime_tab[idle_idx] - 5843 idletime_tab[idle_idx - 1]; 5844 idle_idx--; 5845 } else { 5846 break; /* while */ 5847 } 5848 } 5849 } 5850 break; 5851 default: 5852 break; 5853 } 5854 5855 SPL_X(s); 5856 return (removed); 5857 } 5858