1 /* 2 * Copyright (C) 1995-2004 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI"$ 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/time.h> 22 #include <sys/file.h> 23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 24 defined(_KERNEL) 25 # include "opt_ipfilter_log.h" 26 #endif 27 #if !defined(_KERNEL) 28 # include <stdio.h> 29 # include <string.h> 30 # include <stdlib.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 #else 42 # include <sys/ioctl.h> 43 #endif 44 #if !defined(AIX) 45 # include <sys/fcntl.h> 46 #endif 47 #if !defined(linux) 48 # include <sys/protosw.h> 49 #endif 50 #include <sys/socket.h> 51 #if defined(_KERNEL) 52 # include <sys/systm.h> 53 # if !defined(__SVR4) && !defined(__svr4__) 54 # include <sys/mbuf.h> 55 # endif 56 #endif 57 #if defined(__SVR4) || defined(__svr4__) 58 # include <sys/filio.h> 59 # include <sys/byteorder.h> 60 # ifdef _KERNEL 61 # include <sys/dditypes.h> 62 # endif 63 # include <sys/stream.h> 64 # include <sys/kmem.h> 65 #endif 66 #if __FreeBSD_version >= 300000 67 # include <sys/queue.h> 68 #endif 69 #include <net/if.h> 70 #if __FreeBSD_version >= 300000 71 # include <net/if_var.h> 72 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 73 # include "opt_ipfilter.h" 74 # endif 75 #endif 76 #ifdef sun 77 # include <net/af.h> 78 #endif 79 #include <net/route.h> 80 #include <netinet/in.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 84 #ifdef RFC1825 85 # include <vpn/md5.h> 86 # include <vpn/ipsec.h> 87 extern struct ifnet vpnif; 88 #endif 89 90 #if !defined(linux) 91 # include <netinet/ip_var.h> 92 #endif 93 #include <netinet/tcp.h> 94 #include <netinet/udp.h> 95 #include <netinet/ip_icmp.h> 96 #include "netinet/ip_compat.h" 97 #include <netinet/tcpip.h> 98 #include "netinet/ip_fil.h" 99 #include "netinet/ip_nat.h" 100 #include "netinet/ip_frag.h" 101 #include "netinet/ip_state.h" 102 #include "netinet/ip_proxy.h" 103 #include "netinet/ipf_stack.h" 104 #ifdef IPFILTER_SYNC 105 #include "netinet/ip_sync.h" 106 #endif 107 #if (__FreeBSD_version >= 300000) 108 # include <sys/malloc.h> 109 #endif 110 /* END OF INCLUDES */ 111 112 #undef SOCKADDR_IN 113 #define SOCKADDR_IN struct sockaddr_in 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 118 #endif 119 120 121 /* ======================================================================== */ 122 /* How the NAT is organised and works. */ 123 /* */ 124 /* Inside (interface y) NAT Outside (interface x) */ 125 /* -------------------- -+- ------------------------------------- */ 126 /* Packet going | out, processsed by fr_checknatout() for x */ 127 /* ------------> | ------------> */ 128 /* src=10.1.1.1 | src=192.1.1.1 */ 129 /* | */ 130 /* | in, processed by fr_checknatin() for x */ 131 /* <------------ | <------------ */ 132 /* dst=10.1.1.1 | dst=192.1.1.1 */ 133 /* -------------------- -+- ------------------------------------- */ 134 /* fr_checknatout() - changes ip_src and if required, sport */ 135 /* - creates a new mapping, if required. */ 136 /* fr_checknatin() - changes ip_dst and if required, dport */ 137 /* */ 138 /* In the NAT table, internal source is recorded as "in" and externally */ 139 /* seen as "out". */ 140 /* ======================================================================== */ 141 142 143 static int nat_flushtable __P((ipf_stack_t *)); 144 static int nat_clearlist __P((ipf_stack_t *)); 145 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 146 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 147 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 148 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 149 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 150 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 151 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 152 static int nat_match __P((fr_info_t *, ipnat_t *)); 153 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 154 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 155 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 156 struct in_addr, struct in_addr, u_32_t, 157 ipf_stack_t *)); 158 static INLINE int nat_icmpquerytype4 __P((int)); 159 static int nat_ruleaddrinit __P((ipnat_t *)); 160 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 161 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 162 static INLINE int nat_icmperrortype4 __P((int)); 163 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 164 tcphdr_t *, nat_t **, int)); 165 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 166 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 167 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 168 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 169 static int nat_extraflush __P((int, ipf_stack_t *)); 170 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 171 static int nat_flushclosing __P((int, ipf_stack_t *)); 172 173 174 /* 175 * Below we declare a list of constants used only in the nat_extraflush() 176 * routine. We are placing it here, instead of in nat_extraflush() itself, 177 * because we want to make it visible to tools such as mdb, nm etc., so the 178 * values can easily be altered during debugging. 179 */ 180 static const int idletime_tab[] = { 181 IPF_TTLVAL(30), /* 30 seconds */ 182 IPF_TTLVAL(1800), /* 30 minutes */ 183 IPF_TTLVAL(43200), /* 12 hours */ 184 IPF_TTLVAL(345600), /* 4 days */ 185 }; 186 187 #define NAT_HAS_L4_CHANGED(n) \ 188 (((n)->nat_flags & (IPN_TCPUDPICMP)) && \ 189 (n)->nat_inport != (n)->nat_outport) 190 191 /* ------------------------------------------------------------------------ */ 192 /* Function: fr_natinit */ 193 /* Returns: int - 0 == success, -1 == failure */ 194 /* Parameters: Nil */ 195 /* */ 196 /* Initialise all of the NAT locks, tables and other structures. */ 197 /* ------------------------------------------------------------------------ */ 198 int fr_natinit(ifs) 199 ipf_stack_t *ifs; 200 { 201 int i; 202 203 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 204 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 205 if (ifs->ifs_nat_table[0] != NULL) 206 bzero((char *)ifs->ifs_nat_table[0], 207 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 208 else 209 return -1; 210 211 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 212 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 213 if (ifs->ifs_nat_table[1] != NULL) 214 bzero((char *)ifs->ifs_nat_table[1], 215 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 216 else 217 return -2; 218 219 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 220 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 221 if (ifs->ifs_nat_rules != NULL) 222 bzero((char *)ifs->ifs_nat_rules, 223 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 224 else 225 return -3; 226 227 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 228 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 229 if (ifs->ifs_rdr_rules != NULL) 230 bzero((char *)ifs->ifs_rdr_rules, 231 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 232 else 233 return -4; 234 235 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 236 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 237 if (ifs->ifs_maptable != NULL) 238 bzero((char *)ifs->ifs_maptable, 239 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 240 else 241 return -5; 242 243 ifs->ifs_ipf_hm_maplist = NULL; 244 245 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 246 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 247 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 248 return -1; 249 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 250 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 251 252 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 253 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 254 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 255 return -1; 256 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 257 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 258 259 if (ifs->ifs_fr_nat_maxbucket == 0) { 260 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 261 ifs->ifs_fr_nat_maxbucket++; 262 ifs->ifs_fr_nat_maxbucket *= 2; 263 } 264 265 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 266 /* 267 * Increase this because we may have "keep state" following this too 268 * and packet storms can occur if this is removed too quickly. 269 */ 270 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 271 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 272 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 273 ifs->ifs_nat_udptq.ifq_ref = 1; 274 ifs->ifs_nat_udptq.ifq_head = NULL; 275 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 276 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 277 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 278 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 279 ifs->ifs_nat_icmptq.ifq_ref = 1; 280 ifs->ifs_nat_icmptq.ifq_head = NULL; 281 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 282 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 283 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 284 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 285 ifs->ifs_nat_iptq.ifq_ref = 1; 286 ifs->ifs_nat_iptq.ifq_head = NULL; 287 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 288 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 289 ifs->ifs_nat_iptq.ifq_next = NULL; 290 291 for (i = 0; i < IPF_TCP_NSTATES; i++) { 292 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 293 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 294 #ifdef LARGE_NAT 295 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 296 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 297 #endif 298 } 299 300 /* 301 * Increase this because we may have "keep state" following 302 * this too and packet storms can occur if this is removed 303 * too quickly. 304 */ 305 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 306 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 307 308 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 309 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 310 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 311 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 312 313 ifs->ifs_fr_nat_init = 1; 314 315 return 0; 316 } 317 318 319 /* ------------------------------------------------------------------------ */ 320 /* Function: nat_addrdr */ 321 /* Returns: Nil */ 322 /* Parameters: n(I) - pointer to NAT rule to add */ 323 /* */ 324 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 325 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 326 /* use by redirect rules. */ 327 /* ------------------------------------------------------------------------ */ 328 static void nat_addrdr(n, ifs) 329 ipnat_t *n; 330 ipf_stack_t *ifs; 331 { 332 ipnat_t **np; 333 u_32_t j; 334 u_int hv; 335 int k; 336 337 k = count4bits(n->in_outmsk); 338 if ((k >= 0) && (k != 32)) 339 ifs->ifs_rdr_masks |= 1 << k; 340 j = (n->in_outip & n->in_outmsk); 341 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 342 np = ifs->ifs_rdr_rules + hv; 343 while (*np != NULL) 344 np = &(*np)->in_rnext; 345 n->in_rnext = NULL; 346 n->in_prnext = np; 347 n->in_hv = hv; 348 *np = n; 349 } 350 351 352 /* ------------------------------------------------------------------------ */ 353 /* Function: nat_addnat */ 354 /* Returns: Nil */ 355 /* Parameters: n(I) - pointer to NAT rule to add */ 356 /* */ 357 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 358 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 359 /* redirect rules. */ 360 /* ------------------------------------------------------------------------ */ 361 static void nat_addnat(n, ifs) 362 ipnat_t *n; 363 ipf_stack_t *ifs; 364 { 365 ipnat_t **np; 366 u_32_t j; 367 u_int hv; 368 int k; 369 370 k = count4bits(n->in_inmsk); 371 if ((k >= 0) && (k != 32)) 372 ifs->ifs_nat_masks |= 1 << k; 373 j = (n->in_inip & n->in_inmsk); 374 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 375 np = ifs->ifs_nat_rules + hv; 376 while (*np != NULL) 377 np = &(*np)->in_mnext; 378 n->in_mnext = NULL; 379 n->in_pmnext = np; 380 n->in_hv = hv; 381 *np = n; 382 } 383 384 385 /* ------------------------------------------------------------------------ */ 386 /* Function: nat_delrdr */ 387 /* Returns: Nil */ 388 /* Parameters: n(I) - pointer to NAT rule to delete */ 389 /* */ 390 /* Removes a redirect rule from the hash table of redirect rules. */ 391 /* ------------------------------------------------------------------------ */ 392 void nat_delrdr(n) 393 ipnat_t *n; 394 { 395 if (n->in_rnext) 396 n->in_rnext->in_prnext = n->in_prnext; 397 *n->in_prnext = n->in_rnext; 398 } 399 400 401 /* ------------------------------------------------------------------------ */ 402 /* Function: nat_delnat */ 403 /* Returns: Nil */ 404 /* Parameters: n(I) - pointer to NAT rule to delete */ 405 /* */ 406 /* Removes a NAT map rule from the hash table of NAT map rules. */ 407 /* ------------------------------------------------------------------------ */ 408 void nat_delnat(n) 409 ipnat_t *n; 410 { 411 if (n->in_mnext != NULL) 412 n->in_mnext->in_pmnext = n->in_pmnext; 413 *n->in_pmnext = n->in_mnext; 414 } 415 416 417 /* ------------------------------------------------------------------------ */ 418 /* Function: nat_hostmap */ 419 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 420 /* else a pointer to the hostmapping to use */ 421 /* Parameters: np(I) - pointer to NAT rule */ 422 /* real(I) - real IP address */ 423 /* map(I) - mapped IP address */ 424 /* port(I) - destination port number */ 425 /* Write Locks: ipf_nat */ 426 /* */ 427 /* Check if an ip address has already been allocated for a given mapping */ 428 /* that is not doing port based translation. If is not yet allocated, then */ 429 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 430 /* ------------------------------------------------------------------------ */ 431 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 432 ipnat_t *np; 433 struct in_addr src; 434 struct in_addr dst; 435 struct in_addr map; 436 u_32_t port; 437 ipf_stack_t *ifs; 438 { 439 hostmap_t *hm; 440 u_int hv; 441 442 hv = (src.s_addr ^ dst.s_addr); 443 hv += src.s_addr; 444 hv += dst.s_addr; 445 hv %= HOSTMAP_SIZE; 446 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 447 if ((hm->hm_srcip.s_addr == src.s_addr) && 448 (hm->hm_dstip.s_addr == dst.s_addr) && 449 ((np == NULL) || (np == hm->hm_ipnat)) && 450 ((port == 0) || (port == hm->hm_port))) { 451 hm->hm_ref++; 452 return hm; 453 } 454 455 if (np == NULL) 456 return NULL; 457 458 KMALLOC(hm, hostmap_t *); 459 if (hm) { 460 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 461 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 462 if (ifs->ifs_ipf_hm_maplist != NULL) 463 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 464 ifs->ifs_ipf_hm_maplist = hm; 465 466 hm->hm_next = ifs->ifs_maptable[hv]; 467 hm->hm_pnext = ifs->ifs_maptable + hv; 468 if (ifs->ifs_maptable[hv] != NULL) 469 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 470 ifs->ifs_maptable[hv] = hm; 471 hm->hm_ipnat = np; 472 hm->hm_srcip = src; 473 hm->hm_dstip = dst; 474 hm->hm_mapip = map; 475 hm->hm_ref = 1; 476 hm->hm_port = port; 477 hm->hm_v = 4; 478 } 479 return hm; 480 } 481 482 483 /* ------------------------------------------------------------------------ */ 484 /* Function: fr_hostmapdel */ 485 /* Returns: Nil */ 486 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 487 /* Write Locks: ipf_nat */ 488 /* */ 489 /* Decrement the references to this hostmap structure by one. If this */ 490 /* reaches zero then remove it and free it. */ 491 /* ------------------------------------------------------------------------ */ 492 void fr_hostmapdel(hmp) 493 struct hostmap **hmp; 494 { 495 struct hostmap *hm; 496 497 hm = *hmp; 498 *hmp = NULL; 499 500 hm->hm_ref--; 501 if (hm->hm_ref == 0) { 502 if (hm->hm_next) 503 hm->hm_next->hm_pnext = hm->hm_pnext; 504 *hm->hm_pnext = hm->hm_next; 505 if (hm->hm_hnext) 506 hm->hm_hnext->hm_phnext = hm->hm_phnext; 507 *hm->hm_phnext = hm->hm_hnext; 508 KFREE(hm); 509 } 510 } 511 512 513 /* ------------------------------------------------------------------------ */ 514 /* Function: fix_outcksum */ 515 /* Returns: Nil */ 516 /* Parameters: sp(I) - location of 16bit checksum to update */ 517 /* n((I) - amount to adjust checksum by */ 518 /* */ 519 /* Adjusts the 16bit checksum by "n" for packets going out. */ 520 /* ------------------------------------------------------------------------ */ 521 void fix_outcksum(sp, n) 522 u_short *sp; 523 u_32_t n; 524 { 525 u_short sumshort; 526 u_32_t sum1; 527 528 if (n == 0) 529 return; 530 531 sum1 = (~ntohs(*sp)) & 0xffff; 532 sum1 += (n); 533 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 534 /* Again */ 535 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 536 sumshort = ~(u_short)sum1; 537 *(sp) = htons(sumshort); 538 } 539 540 541 /* ------------------------------------------------------------------------ */ 542 /* Function: fix_incksum */ 543 /* Returns: Nil */ 544 /* Parameters: sp(I) - location of 16bit checksum to update */ 545 /* n((I) - amount to adjust checksum by */ 546 /* */ 547 /* Adjusts the 16bit checksum by "n" for packets going in. */ 548 /* ------------------------------------------------------------------------ */ 549 void fix_incksum(sp, n) 550 u_short *sp; 551 u_32_t n; 552 { 553 u_short sumshort; 554 u_32_t sum1; 555 556 if (n == 0) 557 return; 558 559 sum1 = (~ntohs(*sp)) & 0xffff; 560 sum1 += ~(n) & 0xffff; 561 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 562 /* Again */ 563 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 564 sumshort = ~(u_short)sum1; 565 *(sp) = htons(sumshort); 566 } 567 568 569 /* ------------------------------------------------------------------------ */ 570 /* Function: fix_datacksum */ 571 /* Returns: Nil */ 572 /* Parameters: sp(I) - location of 16bit checksum to update */ 573 /* n((I) - amount to adjust checksum by */ 574 /* */ 575 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 576 /* data section of an IP packet. */ 577 /* */ 578 /* The only situation in which you need to do this is when NAT'ing an */ 579 /* ICMP error message. Such a message, contains in its body the IP header */ 580 /* of the original IP packet, that causes the error. */ 581 /* */ 582 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 583 /* kernel the data section of the ICMP error is just data, and no special */ 584 /* processing like hardware cksum or ntohs processing have been done by the */ 585 /* kernel on the data section. */ 586 /* ------------------------------------------------------------------------ */ 587 void fix_datacksum(sp, n) 588 u_short *sp; 589 u_32_t n; 590 { 591 u_short sumshort; 592 u_32_t sum1; 593 594 if (n == 0) 595 return; 596 597 sum1 = (~ntohs(*sp)) & 0xffff; 598 sum1 += (n); 599 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 600 /* Again */ 601 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 602 sumshort = ~(u_short)sum1; 603 *(sp) = htons(sumshort); 604 } 605 606 607 /* ------------------------------------------------------------------------ */ 608 /* Function: fr_nat_ioctl */ 609 /* Returns: int - 0 == success, != 0 == failure */ 610 /* Parameters: data(I) - pointer to ioctl data */ 611 /* cmd(I) - ioctl command integer */ 612 /* mode(I) - file mode bits used with open */ 613 /* */ 614 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 615 /* ------------------------------------------------------------------------ */ 616 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 617 ioctlcmd_t cmd; 618 caddr_t data; 619 int mode, uid; 620 void *ctx; 621 ipf_stack_t *ifs; 622 { 623 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 624 int error = 0, ret, arg, getlock; 625 ipnat_t natd; 626 627 #if (BSD >= 199306) && defined(_KERNEL) 628 if ((securelevel >= 2) && (mode & FWRITE)) 629 return EPERM; 630 #endif 631 632 #if defined(__osf__) && defined(_KERNEL) 633 getlock = 0; 634 #else 635 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 636 #endif 637 638 nat = NULL; /* XXX gcc -Wuninitialized */ 639 if (cmd == (ioctlcmd_t)SIOCADNAT) { 640 KMALLOC(nt, ipnat_t *); 641 } else { 642 nt = NULL; 643 } 644 645 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 646 if (mode & NAT_SYSSPACE) { 647 bcopy(data, (char *)&natd, sizeof(natd)); 648 error = 0; 649 } else { 650 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 651 } 652 653 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 654 BCOPYIN(data, &arg, sizeof(arg)); 655 } 656 657 if (error != 0) 658 goto done; 659 660 /* 661 * For add/delete, look to see if the NAT entry is already present 662 */ 663 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 664 nat = &natd; 665 if (nat->in_v == 0) /* For backward compat. */ 666 nat->in_v = 4; 667 nat->in_flags &= IPN_USERFLAGS; 668 if ((nat->in_redir & NAT_MAPBLK) == 0) { 669 if ((nat->in_flags & IPN_SPLIT) == 0) 670 nat->in_inip &= nat->in_inmsk; 671 if ((nat->in_flags & IPN_IPRANGE) == 0) 672 nat->in_outip &= nat->in_outmsk; 673 } 674 MUTEX_ENTER(&ifs->ifs_ipf_natio); 675 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 676 np = &n->in_next) 677 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 678 IPN_CMPSIZ) == 0) { 679 if (nat->in_redir == NAT_REDIRECT && 680 nat->in_pnext != n->in_pnext) 681 continue; 682 break; 683 } 684 } 685 686 switch (cmd) 687 { 688 case SIOCGENITER : 689 { 690 ipfgeniter_t iter; 691 ipftoken_t *token; 692 693 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 694 if (error != 0) 695 break; 696 697 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 698 if (token != NULL) 699 error = nat_iterator(token, &iter, ifs); 700 else 701 error = ESRCH; 702 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 703 break; 704 } 705 #ifdef IPFILTER_LOG 706 case SIOCIPFFB : 707 { 708 int tmp; 709 710 if (!(mode & FWRITE)) 711 error = EPERM; 712 else { 713 tmp = ipflog_clear(IPL_LOGNAT, ifs); 714 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 715 } 716 break; 717 } 718 case SIOCSETLG : 719 if (!(mode & FWRITE)) 720 error = EPERM; 721 else { 722 BCOPYIN((char *)data, 723 (char *)&ifs->ifs_nat_logging, 724 sizeof(ifs->ifs_nat_logging)); 725 } 726 break; 727 case SIOCGETLG : 728 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 729 sizeof(ifs->ifs_nat_logging)); 730 break; 731 case FIONREAD : 732 arg = ifs->ifs_iplused[IPL_LOGNAT]; 733 BCOPYOUT(&arg, data, sizeof(arg)); 734 break; 735 #endif 736 case SIOCADNAT : 737 if (!(mode & FWRITE)) { 738 error = EPERM; 739 } else if (n != NULL) { 740 error = EEXIST; 741 } else if (nt == NULL) { 742 error = ENOMEM; 743 } 744 if (error != 0) { 745 MUTEX_EXIT(&ifs->ifs_ipf_natio); 746 break; 747 } 748 bcopy((char *)nat, (char *)nt, sizeof(*n)); 749 error = nat_siocaddnat(nt, np, getlock, ifs); 750 MUTEX_EXIT(&ifs->ifs_ipf_natio); 751 if (error == 0) 752 nt = NULL; 753 break; 754 case SIOCRMNAT : 755 if (!(mode & FWRITE)) { 756 error = EPERM; 757 n = NULL; 758 } else if (n == NULL) { 759 error = ESRCH; 760 } 761 762 if (error != 0) { 763 MUTEX_EXIT(&ifs->ifs_ipf_natio); 764 break; 765 } 766 nat_siocdelnat(n, np, getlock, ifs); 767 768 MUTEX_EXIT(&ifs->ifs_ipf_natio); 769 n = NULL; 770 break; 771 case SIOCGNATS : 772 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 773 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 774 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 775 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 776 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 777 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 778 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 779 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 780 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 781 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 782 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 783 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 784 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 785 break; 786 case SIOCGNATL : 787 { 788 natlookup_t nl; 789 790 if (getlock) { 791 READ_ENTER(&ifs->ifs_ipf_nat); 792 } 793 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 794 if (nl.nl_v != 6) 795 nl.nl_v = 4; 796 if (error == 0) { 797 void *ptr; 798 799 switch (nl.nl_v) 800 { 801 case 4: 802 ptr = nat_lookupredir(&nl, ifs); 803 break; 804 #ifdef USE_INET6 805 case 6: 806 ptr = nat6_lookupredir(&nl, ifs); 807 break; 808 #endif 809 default: 810 ptr = NULL; 811 break; 812 } 813 814 if (ptr != NULL) { 815 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 816 } else { 817 error = ESRCH; 818 } 819 } 820 if (getlock) { 821 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 822 } 823 break; 824 } 825 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 826 if (!(mode & FWRITE)) { 827 error = EPERM; 828 break; 829 } 830 if (getlock) { 831 WRITE_ENTER(&ifs->ifs_ipf_nat); 832 } 833 error = 0; 834 if (arg == 0) 835 ret = nat_flushtable(ifs); 836 else if (arg == 1) 837 ret = nat_clearlist(ifs); 838 else if (arg >= 2 && arg <= 4) 839 ret = nat_extraflush(arg - 2, ifs); 840 else 841 error = EINVAL; 842 if (getlock) { 843 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 844 } 845 if (error == 0) { 846 BCOPYOUT(&ret, data, sizeof(ret)); 847 } 848 break; 849 case SIOCPROXY : 850 error = appr_ioctl(data, cmd, mode, ifs); 851 break; 852 case SIOCSTLCK : 853 if (!(mode & FWRITE)) { 854 error = EPERM; 855 } else { 856 fr_lock(data, &ifs->ifs_fr_nat_lock); 857 } 858 break; 859 case SIOCSTPUT : 860 if ((mode & FWRITE) != 0) { 861 error = fr_natputent(data, getlock, ifs); 862 } else { 863 error = EACCES; 864 } 865 break; 866 case SIOCSTGSZ : 867 if (ifs->ifs_fr_nat_lock) { 868 if (getlock) { 869 READ_ENTER(&ifs->ifs_ipf_nat); 870 } 871 error = fr_natgetsz(data, ifs); 872 if (getlock) { 873 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 874 } 875 } else 876 error = EACCES; 877 break; 878 case SIOCSTGET : 879 if (ifs->ifs_fr_nat_lock) { 880 if (getlock) { 881 READ_ENTER(&ifs->ifs_ipf_nat); 882 } 883 error = fr_natgetent(data, ifs); 884 if (getlock) { 885 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 886 } 887 } else 888 error = EACCES; 889 break; 890 case SIOCIPFDELTOK : 891 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 892 error = ipf_deltoken(arg, uid, ctx, ifs); 893 break; 894 default : 895 error = EINVAL; 896 break; 897 } 898 done: 899 if (nt) 900 KFREE(nt); 901 return error; 902 } 903 904 905 /* ------------------------------------------------------------------------ */ 906 /* Function: nat_siocaddnat */ 907 /* Returns: int - 0 == success, != 0 == failure */ 908 /* Parameters: n(I) - pointer to new NAT rule */ 909 /* np(I) - pointer to where to insert new NAT rule */ 910 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 911 /* Mutex Locks: ipf_natio */ 912 /* */ 913 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 914 /* from information passed to the kernel, then add it to the appropriate */ 915 /* NAT rule table(s). */ 916 /* ------------------------------------------------------------------------ */ 917 static int nat_siocaddnat(n, np, getlock, ifs) 918 ipnat_t *n, **np; 919 int getlock; 920 ipf_stack_t *ifs; 921 { 922 int error = 0, i, j; 923 924 if (nat_resolverule(n, ifs) != 0) 925 return ENOENT; 926 927 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 928 return EINVAL; 929 930 n->in_use = 0; 931 if (n->in_redir & NAT_MAPBLK) 932 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 933 else if (n->in_flags & IPN_AUTOPORTMAP) 934 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 935 else if (n->in_flags & IPN_IPRANGE) 936 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 937 else if (n->in_flags & IPN_SPLIT) 938 n->in_space = 2; 939 else if (n->in_outmsk != 0) 940 n->in_space = ~ntohl(n->in_outmsk); 941 else 942 n->in_space = 1; 943 944 /* 945 * Calculate the number of valid IP addresses in the output 946 * mapping range. In all cases, the range is inclusive of 947 * the start and ending IP addresses. 948 * If to a CIDR address, lose 2: broadcast + network address 949 * (so subtract 1) 950 * If to a range, add one. 951 * If to a single IP address, set to 1. 952 */ 953 if (n->in_space) { 954 if ((n->in_flags & IPN_IPRANGE) != 0) 955 n->in_space += 1; 956 else 957 n->in_space -= 1; 958 } else 959 n->in_space = 1; 960 961 #ifdef USE_INET6 962 if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 && 963 !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1])) 964 IP6_ADD(&n->in_out[0], 1, &n->in_next6) 965 else if (n->in_v == 6 && 966 (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT)) 967 n->in_next6 = n->in_in[0]; 968 else if (n->in_v == 6) 969 n->in_next6 = n->in_out[0]; 970 else 971 #endif 972 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 973 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 974 n->in_nip = ntohl(n->in_outip) + 1; 975 else if ((n->in_flags & IPN_SPLIT) && 976 (n->in_redir & NAT_REDIRECT)) 977 n->in_nip = ntohl(n->in_inip); 978 else 979 n->in_nip = ntohl(n->in_outip); 980 981 if (n->in_redir & NAT_MAP) { 982 n->in_pnext = ntohs(n->in_pmin); 983 /* 984 * Multiply by the number of ports made available. 985 */ 986 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 987 n->in_space *= (ntohs(n->in_pmax) - 988 ntohs(n->in_pmin) + 1); 989 /* 990 * Because two different sources can map to 991 * different destinations but use the same 992 * local IP#/port #. 993 * If the result is smaller than in_space, then 994 * we may have wrapped around 32bits. 995 */ 996 i = n->in_inmsk; 997 if ((i != 0) && (i != 0xffffffff)) { 998 j = n->in_space * (~ntohl(i) + 1); 999 if (j >= n->in_space) 1000 n->in_space = j; 1001 else 1002 n->in_space = 0xffffffff; 1003 } 1004 } 1005 /* 1006 * If no protocol is specified, multiple by 256 to allow for 1007 * at least one IP:IP mapping per protocol. 1008 */ 1009 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 1010 j = n->in_space * 256; 1011 if (j >= n->in_space) 1012 n->in_space = j; 1013 else 1014 n->in_space = 0xffffffff; 1015 } 1016 } 1017 1018 /* Otherwise, these fields are preset */ 1019 1020 if (getlock) { 1021 WRITE_ENTER(&ifs->ifs_ipf_nat); 1022 } 1023 n->in_next = NULL; 1024 *np = n; 1025 1026 if (n->in_age[0] != 0) 1027 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1028 n->in_age[0], ifs); 1029 1030 if (n->in_age[1] != 0) 1031 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1032 n->in_age[1], ifs); 1033 1034 if (n->in_redir & NAT_REDIRECT) { 1035 n->in_flags &= ~IPN_NOTDST; 1036 switch (n->in_v) 1037 { 1038 case 4 : 1039 nat_addrdr(n, ifs); 1040 break; 1041 #ifdef USE_INET6 1042 case 6 : 1043 nat6_addrdr(n, ifs); 1044 break; 1045 #endif 1046 default : 1047 break; 1048 } 1049 } 1050 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1051 n->in_flags &= ~IPN_NOTSRC; 1052 switch (n->in_v) 1053 { 1054 case 4 : 1055 nat_addnat(n, ifs); 1056 break; 1057 #ifdef USE_INET6 1058 case 6 : 1059 nat6_addnat(n, ifs); 1060 break; 1061 #endif 1062 default : 1063 break; 1064 } 1065 } 1066 n = NULL; 1067 ifs->ifs_nat_stats.ns_rules++; 1068 if (getlock) { 1069 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1070 } 1071 1072 return error; 1073 } 1074 1075 1076 /* ------------------------------------------------------------------------ */ 1077 /* Function: nat_resolvrule */ 1078 /* Returns: int - 0 == success, -1 == failure */ 1079 /* Parameters: n(I) - pointer to NAT rule */ 1080 /* */ 1081 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1082 /* any specified interfaces and proxy labels, and determines whether or not */ 1083 /* all proxy labels are correctly specified. */ 1084 /* */ 1085 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1086 /* ------------------------------------------------------------------------ */ 1087 static int nat_resolverule(n, ifs) 1088 ipnat_t *n; 1089 ipf_stack_t *ifs; 1090 { 1091 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1092 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs); 1093 1094 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1095 if (n->in_ifnames[1][0] == '\0') { 1096 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1097 n->in_ifps[1] = n->in_ifps[0]; 1098 } else { 1099 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs); 1100 } 1101 1102 if (n->in_plabel[0] != '\0') { 1103 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1104 if (n->in_apr == NULL) 1105 return -1; 1106 } 1107 return 0; 1108 } 1109 1110 1111 /* ------------------------------------------------------------------------ */ 1112 /* Function: nat_siocdelnat */ 1113 /* Returns: int - 0 == success, != 0 == failure */ 1114 /* Parameters: n(I) - pointer to new NAT rule */ 1115 /* np(I) - pointer to where to insert new NAT rule */ 1116 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1117 /* Mutex Locks: ipf_natio */ 1118 /* */ 1119 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1120 /* from information passed to the kernel, then add it to the appropriate */ 1121 /* NAT rule table(s). */ 1122 /* ------------------------------------------------------------------------ */ 1123 static void nat_siocdelnat(n, np, getlock, ifs) 1124 ipnat_t *n, **np; 1125 int getlock; 1126 ipf_stack_t *ifs; 1127 { 1128 int i; 1129 1130 if (getlock) { 1131 WRITE_ENTER(&ifs->ifs_ipf_nat); 1132 } 1133 if (n->in_redir & NAT_REDIRECT) 1134 nat_delrdr(n); 1135 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1136 nat_delnat(n); 1137 if (ifs->ifs_nat_list == NULL) { 1138 ifs->ifs_nat_masks = 0; 1139 ifs->ifs_rdr_masks = 0; 1140 for (i = 0; i < 4; i++) { 1141 ifs->ifs_nat6_masks[i] = 0; 1142 ifs->ifs_rdr6_masks[i] = 0; 1143 } 1144 } 1145 1146 if (n->in_tqehead[0] != NULL) { 1147 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1148 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1149 } 1150 } 1151 1152 if (n->in_tqehead[1] != NULL) { 1153 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1154 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1155 } 1156 } 1157 1158 *np = n->in_next; 1159 1160 if (n->in_use == 0) { 1161 if (n->in_apr) 1162 appr_free(n->in_apr); 1163 KFREE(n); 1164 ifs->ifs_nat_stats.ns_rules--; 1165 } else { 1166 n->in_flags |= IPN_DELETE; 1167 n->in_next = NULL; 1168 } 1169 if (getlock) { 1170 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1171 } 1172 } 1173 1174 1175 /* ------------------------------------------------------------------------ */ 1176 /* Function: fr_natgetsz */ 1177 /* Returns: int - 0 == success, != 0 is the error value. */ 1178 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1179 /* get the size of. */ 1180 /* */ 1181 /* Handle SIOCSTGSZ. */ 1182 /* Return the size of the nat list entry to be copied back to user space. */ 1183 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1184 /* structure is copied back to the user. */ 1185 /* ------------------------------------------------------------------------ */ 1186 static int fr_natgetsz(data, ifs) 1187 caddr_t data; 1188 ipf_stack_t *ifs; 1189 { 1190 ap_session_t *aps; 1191 nat_t *nat, *n; 1192 natget_t ng; 1193 1194 BCOPYIN(data, &ng, sizeof(ng)); 1195 1196 nat = ng.ng_ptr; 1197 if (!nat) { 1198 nat = ifs->ifs_nat_instances; 1199 ng.ng_sz = 0; 1200 /* 1201 * Empty list so the size returned is 0. Simple. 1202 */ 1203 if (nat == NULL) { 1204 BCOPYOUT(&ng, data, sizeof(ng)); 1205 return 0; 1206 } 1207 } else { 1208 /* 1209 * Make sure the pointer we're copying from exists in the 1210 * current list of entries. Security precaution to prevent 1211 * copying of random kernel data. 1212 */ 1213 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1214 if (n == nat) 1215 break; 1216 if (!n) 1217 return ESRCH; 1218 } 1219 1220 /* 1221 * Incluse any space required for proxy data structures. 1222 */ 1223 ng.ng_sz = sizeof(nat_save_t); 1224 aps = nat->nat_aps; 1225 if (aps != NULL) { 1226 ng.ng_sz += sizeof(ap_session_t) - 4; 1227 if (aps->aps_data != 0) 1228 ng.ng_sz += aps->aps_psiz; 1229 } 1230 1231 BCOPYOUT(&ng, data, sizeof(ng)); 1232 return 0; 1233 } 1234 1235 1236 /* ------------------------------------------------------------------------ */ 1237 /* Function: fr_natgetent */ 1238 /* Returns: int - 0 == success, != 0 is the error value. */ 1239 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1240 /* to NAT structure to copy out. */ 1241 /* */ 1242 /* Handle SIOCSTGET. */ 1243 /* Copies out NAT entry to user space. Any additional data held for a */ 1244 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1245 /* ------------------------------------------------------------------------ */ 1246 static int fr_natgetent(data, ifs) 1247 caddr_t data; 1248 ipf_stack_t *ifs; 1249 { 1250 int error, outsize; 1251 ap_session_t *aps; 1252 nat_save_t *ipn, ipns; 1253 nat_t *n, *nat; 1254 1255 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1256 if (error != 0) 1257 return error; 1258 1259 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1260 return EINVAL; 1261 1262 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1263 if (ipn == NULL) 1264 return ENOMEM; 1265 1266 ipn->ipn_dsize = ipns.ipn_dsize; 1267 nat = ipns.ipn_next; 1268 if (nat == NULL) { 1269 nat = ifs->ifs_nat_instances; 1270 if (nat == NULL) { 1271 if (ifs->ifs_nat_instances == NULL) 1272 error = ENOENT; 1273 goto finished; 1274 } 1275 } else { 1276 /* 1277 * Make sure the pointer we're copying from exists in the 1278 * current list of entries. Security precaution to prevent 1279 * copying of random kernel data. 1280 */ 1281 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1282 if (n == nat) 1283 break; 1284 if (n == NULL) { 1285 error = ESRCH; 1286 goto finished; 1287 } 1288 } 1289 ipn->ipn_next = nat->nat_next; 1290 1291 /* 1292 * Copy the NAT structure. 1293 */ 1294 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1295 1296 /* 1297 * If we have a pointer to the NAT rule it belongs to, save that too. 1298 */ 1299 if (nat->nat_ptr != NULL) 1300 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1301 sizeof(ipn->ipn_ipnat)); 1302 1303 /* 1304 * If we also know the NAT entry has an associated filter rule, 1305 * save that too. 1306 */ 1307 if (nat->nat_fr != NULL) 1308 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1309 sizeof(ipn->ipn_fr)); 1310 1311 /* 1312 * Last but not least, if there is an application proxy session set 1313 * up for this NAT entry, then copy that out too, including any 1314 * private data saved along side it by the proxy. 1315 */ 1316 aps = nat->nat_aps; 1317 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1318 if (aps != NULL) { 1319 char *s; 1320 1321 if (outsize < sizeof(*aps)) { 1322 error = ENOBUFS; 1323 goto finished; 1324 } 1325 1326 s = ipn->ipn_data; 1327 bcopy((char *)aps, s, sizeof(*aps)); 1328 s += sizeof(*aps); 1329 outsize -= sizeof(*aps); 1330 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1331 bcopy(aps->aps_data, s, aps->aps_psiz); 1332 else 1333 error = ENOBUFS; 1334 } 1335 if (error == 0) { 1336 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1337 } 1338 1339 finished: 1340 if (ipn != NULL) { 1341 KFREES(ipn, ipns.ipn_dsize); 1342 } 1343 return error; 1344 } 1345 1346 /* ------------------------------------------------------------------------ */ 1347 /* Function: nat_calc_chksum_diffs */ 1348 /* Returns: void */ 1349 /* Parameters: nat - pointer to NAT table entry */ 1350 /* */ 1351 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */ 1352 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */ 1353 /* we are dealing with partial chksum offload. For these cases we need to */ 1354 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */ 1355 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */ 1356 /* nat_sumd[0]. */ 1357 /* */ 1358 /* The function accepts initialized NAT table entry and computes the deltas */ 1359 /* from nat_inip/nat_outip members. The function is called right before */ 1360 /* the new entry is inserted into the table. */ 1361 /* */ 1362 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */ 1363 /* of delta between original and new IP addresses. */ 1364 /* */ 1365 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */ 1366 /* a chkusm of delta between original an new IP addrress:port tupples. */ 1367 /* */ 1368 /* Some facts about chksum, we should remember: */ 1369 /* IP header chksum covers IP header only */ 1370 /* */ 1371 /* TCP/UDP chksum covers data payload and so called pseudo header */ 1372 /* SRC, DST IP address */ 1373 /* SRC, DST Port */ 1374 /* length of payload */ 1375 /* */ 1376 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */ 1377 /* member of dblk_t structure. The db_ckusm16 member is not part of */ 1378 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */ 1379 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */ 1380 /* stored along with other IP packet data in dblk_t structure and used in */ 1381 /* for IP/UDP/TCP chksum validation later in ip.c. */ 1382 /* */ 1383 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */ 1384 /* of delta between new and orig address. NOTE: the order of operands for */ 1385 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */ 1386 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */ 1387 /* */ 1388 /* ------------------------------------------------------------------------ */ 1389 void nat_calc_chksum_diffs(nat) 1390 nat_t *nat; 1391 { 1392 u_32_t sum_orig = 0; 1393 u_32_t sum_changed = 0; 1394 u_32_t sumd; 1395 u_32_t ipsum_orig = 0; 1396 u_32_t ipsum_changed = 0; 1397 1398 if (nat->nat_v != 4 && nat->nat_v != 6) 1399 return; 1400 1401 /* 1402 * the switch calculates operands for CALC_SUMD(), 1403 * which will compute the partial chksum delta. 1404 */ 1405 switch (nat->nat_dir) 1406 { 1407 case NAT_INBOUND: 1408 /* 1409 * we are dealing with RDR rule (DST address gets 1410 * modified on packet from client) 1411 */ 1412 if (nat->nat_v == 4) { 1413 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1414 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1415 } else { 1416 sum_changed = LONG_SUM6(&nat->nat_inip6); 1417 sum_orig = LONG_SUM6(&nat->nat_outip6); 1418 } 1419 break; 1420 case NAT_OUTBOUND: 1421 /* 1422 * we are dealing with MAP rule (SRC address gets 1423 * modified on packet from client) 1424 */ 1425 if (nat->nat_v == 4) { 1426 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1427 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1428 } else { 1429 sum_changed = LONG_SUM6(&nat->nat_outip6); 1430 sum_orig = LONG_SUM6(&nat->nat_inip6); 1431 } 1432 break; 1433 default: ; 1434 break; 1435 } 1436 1437 /* 1438 * we also preserve CALC_SUMD() operands here, for IP chksum delta 1439 * calculation, which happens at the end of function. 1440 */ 1441 ipsum_changed = sum_changed; 1442 ipsum_orig = sum_orig; 1443 /* 1444 * NOTE: the order of operands for partial chksum adjustment 1445 * computation has to be swapped! 1446 */ 1447 CALC_SUMD(sum_changed, sum_orig, sumd); 1448 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 1449 1450 if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) { 1451 1452 /* 1453 * switch calculates operands for CALC_SUMD(), which will 1454 * compute the full chksum delta. 1455 */ 1456 switch (nat->nat_dir) 1457 { 1458 case NAT_INBOUND: 1459 if (nat->nat_v == 4) { 1460 sum_changed = LONG_SUM( 1461 ntohl(nat->nat_inip.s_addr) + 1462 ntohs(nat->nat_inport)); 1463 sum_orig = LONG_SUM( 1464 ntohl(nat->nat_outip.s_addr) + 1465 ntohs(nat->nat_outport)); 1466 } else { 1467 sum_changed = LONG_SUM6(&nat->nat_inip6) + 1468 ntohs(nat->nat_inport); 1469 sum_orig = LONG_SUM6(&nat->nat_outip6) + 1470 ntohs(nat->nat_outport); 1471 } 1472 break; 1473 case NAT_OUTBOUND: 1474 if (nat->nat_v == 4) { 1475 sum_changed = LONG_SUM( 1476 ntohl(nat->nat_outip.s_addr) + 1477 ntohs(nat->nat_outport)); 1478 sum_orig = LONG_SUM( 1479 ntohl(nat->nat_inip.s_addr) + 1480 ntohs(nat->nat_inport)); 1481 } else { 1482 sum_changed = LONG_SUM6(&nat->nat_outip6) + 1483 ntohs(nat->nat_outport); 1484 sum_orig = LONG_SUM6(&nat->nat_inip6) + 1485 ntohs(nat->nat_inport); 1486 } 1487 break; 1488 default: ; 1489 break; 1490 } 1491 1492 CALC_SUMD(sum_orig, sum_changed, sumd); 1493 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 1494 1495 if (!(nat->nat_flags & IPN_TCPUDP)) { 1496 /* 1497 * partial HW chksum offload works for TCP/UDP headers only, 1498 * so we need to enforce full chksum adjustment for ICMP 1499 */ 1500 nat->nat_sumd[1] = nat->nat_sumd[0]; 1501 } 1502 } 1503 else 1504 nat->nat_sumd[0] = nat->nat_sumd[1]; 1505 1506 /* 1507 * we may reuse the already computed nat_sumd[0] for IP header chksum 1508 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT. 1509 */ 1510 if (nat->nat_v == 4) { 1511 if (NAT_HAS_L4_CHANGED(nat)) { 1512 /* 1513 * bad luck, NAT changes also the L4 header, use IP 1514 * addresses to compute chksum adjustment for IP header. 1515 */ 1516 CALC_SUMD(ipsum_orig, ipsum_changed, sumd); 1517 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 1518 } else { 1519 /* 1520 * the NAT does not change L4 hdr -> reuse chksum 1521 * adjustment for IP hdr. 1522 */ 1523 nat->nat_ipsumd = nat->nat_sumd[0]; 1524 1525 /* 1526 * if L4 header does not use chksum - zero out deltas 1527 */ 1528 if (!(nat->nat_flags & IPN_TCPUDP)) { 1529 nat->nat_sumd[0] = 0; 1530 nat->nat_sumd[1] = 0; 1531 } 1532 } 1533 } 1534 1535 return; 1536 } 1537 1538 /* ------------------------------------------------------------------------ */ 1539 /* Function: fr_natputent */ 1540 /* Returns: int - 0 == success, != 0 is the error value. */ 1541 /* Parameters: data(I) - pointer to natget structure with NAT */ 1542 /* structure information to load into the kernel */ 1543 /* getlock(I) - flag indicating whether or not a write lock */ 1544 /* on ipf_nat is already held. */ 1545 /* */ 1546 /* Handle SIOCSTPUT. */ 1547 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1548 /* firewall rule data structures, if pointers to them indicate so. */ 1549 /* ------------------------------------------------------------------------ */ 1550 static int fr_natputent(data, getlock, ifs) 1551 caddr_t data; 1552 int getlock; 1553 ipf_stack_t *ifs; 1554 { 1555 nat_save_t ipn, *ipnn; 1556 ap_session_t *aps; 1557 nat_t *n, *nat; 1558 frentry_t *fr; 1559 fr_info_t fin; 1560 ipnat_t *in; 1561 int error; 1562 1563 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1564 if (error != 0) 1565 return error; 1566 1567 /* 1568 * Trigger automatic call to nat_extraflush() if the 1569 * table has reached capcity specified by hi watermark. 1570 */ 1571 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1572 ifs->ifs_nat_doflush = 1; 1573 1574 /* 1575 * Initialise early because of code at junkput label. 1576 */ 1577 in = NULL; 1578 aps = NULL; 1579 nat = NULL; 1580 ipnn = NULL; 1581 1582 /* 1583 * New entry, copy in the rest of the NAT entry if it's size is more 1584 * than just the nat_t structure. 1585 */ 1586 fr = NULL; 1587 if (ipn.ipn_dsize > sizeof(ipn)) { 1588 if (ipn.ipn_dsize > 81920) { 1589 error = ENOMEM; 1590 goto junkput; 1591 } 1592 1593 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1594 if (ipnn == NULL) 1595 return ENOMEM; 1596 1597 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1598 if (error != 0) { 1599 error = EFAULT; 1600 goto junkput; 1601 } 1602 } else 1603 ipnn = &ipn; 1604 1605 KMALLOC(nat, nat_t *); 1606 if (nat == NULL) { 1607 error = ENOMEM; 1608 goto junkput; 1609 } 1610 1611 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1612 /* 1613 * Initialize all these so that nat_delete() doesn't cause a crash. 1614 */ 1615 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1616 nat->nat_tqe.tqe_pnext = NULL; 1617 nat->nat_tqe.tqe_next = NULL; 1618 nat->nat_tqe.tqe_ifq = NULL; 1619 nat->nat_tqe.tqe_parent = nat; 1620 1621 /* 1622 * Restore the rule associated with this nat session 1623 */ 1624 in = ipnn->ipn_nat.nat_ptr; 1625 if (in != NULL) { 1626 KMALLOC(in, ipnat_t *); 1627 nat->nat_ptr = in; 1628 if (in == NULL) { 1629 error = ENOMEM; 1630 goto junkput; 1631 } 1632 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1633 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1634 in->in_use = 1; 1635 in->in_flags |= IPN_DELETE; 1636 1637 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1638 1639 if (nat_resolverule(in, ifs) != 0) { 1640 error = ESRCH; 1641 goto junkput; 1642 } 1643 } 1644 1645 /* 1646 * Check that the NAT entry doesn't already exist in the kernel. 1647 */ 1648 if (nat->nat_v != 6) 1649 nat->nat_v = 4; 1650 bzero((char *)&fin, sizeof(fin)); 1651 fin.fin_p = nat->nat_p; 1652 fin.fin_ifs = ifs; 1653 if (nat->nat_dir == NAT_OUTBOUND) { 1654 fin.fin_data[0] = ntohs(nat->nat_oport); 1655 fin.fin_data[1] = ntohs(nat->nat_outport); 1656 fin.fin_ifp = nat->nat_ifps[0]; 1657 if (getlock) { 1658 READ_ENTER(&ifs->ifs_ipf_nat); 1659 } 1660 1661 switch (nat->nat_v) 1662 { 1663 case 4: 1664 fin.fin_v = nat->nat_v; 1665 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1666 nat->nat_oip, nat->nat_outip); 1667 break; 1668 #ifdef USE_INET6 1669 case 6: 1670 n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p, 1671 &nat->nat_oip6.in6, &nat->nat_outip6.in6); 1672 break; 1673 #endif 1674 default: 1675 n = NULL; 1676 break; 1677 } 1678 1679 if (getlock) { 1680 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1681 } 1682 if (n != NULL) { 1683 error = EEXIST; 1684 goto junkput; 1685 } 1686 } else if (nat->nat_dir == NAT_INBOUND) { 1687 fin.fin_data[0] = ntohs(nat->nat_inport); 1688 fin.fin_data[1] = ntohs(nat->nat_oport); 1689 fin.fin_ifp = nat->nat_ifps[1]; 1690 if (getlock) { 1691 READ_ENTER(&ifs->ifs_ipf_nat); 1692 } 1693 1694 switch (nat->nat_v) 1695 { 1696 case 4: 1697 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1698 nat->nat_inip, nat->nat_oip); 1699 break; 1700 #ifdef USE_INET6 1701 case 6: 1702 n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p, 1703 &nat->nat_inip6.in6, &nat->nat_oip6.in6); 1704 break; 1705 #endif 1706 default: 1707 n = NULL; 1708 break; 1709 } 1710 1711 if (getlock) { 1712 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1713 } 1714 if (n != NULL) { 1715 error = EEXIST; 1716 goto junkput; 1717 } 1718 } else { 1719 error = EINVAL; 1720 goto junkput; 1721 } 1722 1723 /* 1724 * Restore ap_session_t structure. Include the private data allocated 1725 * if it was there. 1726 */ 1727 aps = nat->nat_aps; 1728 if (aps != NULL) { 1729 KMALLOC(aps, ap_session_t *); 1730 nat->nat_aps = aps; 1731 if (aps == NULL) { 1732 error = ENOMEM; 1733 goto junkput; 1734 } 1735 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1736 if (in != NULL) 1737 aps->aps_apr = in->in_apr; 1738 else 1739 aps->aps_apr = NULL; 1740 if (aps->aps_psiz != 0) { 1741 if (aps->aps_psiz > 81920) { 1742 error = ENOMEM; 1743 goto junkput; 1744 } 1745 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1746 if (aps->aps_data == NULL) { 1747 error = ENOMEM; 1748 goto junkput; 1749 } 1750 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1751 aps->aps_psiz); 1752 } else { 1753 aps->aps_psiz = 0; 1754 aps->aps_data = NULL; 1755 } 1756 } 1757 1758 /* 1759 * If there was a filtering rule associated with this entry then 1760 * build up a new one. 1761 */ 1762 fr = nat->nat_fr; 1763 if (fr != NULL) { 1764 if ((nat->nat_flags & SI_NEWFR) != 0) { 1765 KMALLOC(fr, frentry_t *); 1766 nat->nat_fr = fr; 1767 if (fr == NULL) { 1768 error = ENOMEM; 1769 goto junkput; 1770 } 1771 ipnn->ipn_nat.nat_fr = fr; 1772 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1773 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1774 1775 fr->fr_ref = 1; 1776 fr->fr_dsize = 0; 1777 fr->fr_data = NULL; 1778 fr->fr_type = FR_T_NONE; 1779 1780 MUTEX_NUKE(&fr->fr_lock); 1781 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1782 } else { 1783 if (getlock) { 1784 READ_ENTER(&ifs->ifs_ipf_nat); 1785 } 1786 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1787 if (n->nat_fr == fr) 1788 break; 1789 1790 if (n != NULL) { 1791 MUTEX_ENTER(&fr->fr_lock); 1792 fr->fr_ref++; 1793 MUTEX_EXIT(&fr->fr_lock); 1794 } 1795 if (getlock) { 1796 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1797 } 1798 if (!n) { 1799 error = ESRCH; 1800 goto junkput; 1801 } 1802 } 1803 } 1804 1805 if (ipnn != &ipn) { 1806 KFREES(ipnn, ipn.ipn_dsize); 1807 ipnn = NULL; 1808 } 1809 1810 nat_calc_chksum_diffs(nat); 1811 1812 if (getlock) { 1813 WRITE_ENTER(&ifs->ifs_ipf_nat); 1814 } 1815 1816 nat_calc_chksum_diffs(nat); 1817 1818 switch (nat->nat_v) 1819 { 1820 case 4 : 1821 error = nat_insert(nat, nat->nat_rev, ifs); 1822 break; 1823 #ifdef USE_INET6 1824 case 6 : 1825 error = nat6_insert(nat, nat->nat_rev, ifs); 1826 break; 1827 #endif 1828 default : 1829 break; 1830 } 1831 1832 if ((error == 0) && (aps != NULL)) { 1833 aps->aps_next = ifs->ifs_ap_sess_list; 1834 ifs->ifs_ap_sess_list = aps; 1835 } 1836 if (getlock) { 1837 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1838 } 1839 1840 if (error == 0) 1841 return 0; 1842 1843 error = ENOMEM; 1844 1845 junkput: 1846 if (fr != NULL) 1847 (void) fr_derefrule(&fr, ifs); 1848 1849 if ((ipnn != NULL) && (ipnn != &ipn)) { 1850 KFREES(ipnn, ipn.ipn_dsize); 1851 } 1852 if (nat != NULL) { 1853 if (aps != NULL) { 1854 if (aps->aps_data != NULL) { 1855 KFREES(aps->aps_data, aps->aps_psiz); 1856 } 1857 KFREE(aps); 1858 } 1859 if (in != NULL) { 1860 if (in->in_apr) 1861 appr_free(in->in_apr); 1862 KFREE(in); 1863 } 1864 KFREE(nat); 1865 } 1866 return error; 1867 } 1868 1869 1870 /* ------------------------------------------------------------------------ */ 1871 /* Function: nat_delete */ 1872 /* Returns: Nil */ 1873 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1874 /* logtype(I) - type of LOG record to create before deleting */ 1875 /* Write Lock: ipf_nat */ 1876 /* */ 1877 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1878 /* enabled then generate a NAT log record for this event. */ 1879 /* ------------------------------------------------------------------------ */ 1880 static void nat_delete(nat, logtype, ifs) 1881 struct nat *nat; 1882 int logtype; 1883 ipf_stack_t *ifs; 1884 { 1885 struct ipnat *ipn; 1886 1887 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1888 nat_log(nat, logtype, ifs); 1889 1890 /* 1891 * Take it as a general indication that all the pointers are set if 1892 * nat_pnext is set. 1893 */ 1894 if (nat->nat_pnext != NULL) { 1895 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1896 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1897 1898 *nat->nat_pnext = nat->nat_next; 1899 if (nat->nat_next != NULL) { 1900 nat->nat_next->nat_pnext = nat->nat_pnext; 1901 nat->nat_next = NULL; 1902 } 1903 nat->nat_pnext = NULL; 1904 1905 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1906 if (nat->nat_hnext[0] != NULL) { 1907 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1908 nat->nat_hnext[0] = NULL; 1909 } 1910 nat->nat_phnext[0] = NULL; 1911 1912 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1913 if (nat->nat_hnext[1] != NULL) { 1914 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1915 nat->nat_hnext[1] = NULL; 1916 } 1917 nat->nat_phnext[1] = NULL; 1918 1919 if ((nat->nat_flags & SI_WILDP) != 0) 1920 ifs->ifs_nat_stats.ns_wilds--; 1921 } 1922 1923 if (nat->nat_me != NULL) { 1924 *nat->nat_me = NULL; 1925 nat->nat_me = NULL; 1926 } 1927 1928 fr_deletequeueentry(&nat->nat_tqe); 1929 1930 MUTEX_ENTER(&nat->nat_lock); 1931 if (nat->nat_ref > 1) { 1932 nat->nat_ref--; 1933 MUTEX_EXIT(&nat->nat_lock); 1934 return; 1935 } 1936 MUTEX_EXIT(&nat->nat_lock); 1937 1938 /* 1939 * At this point, nat_ref is 1, doing "--" would make it 0.. 1940 */ 1941 nat->nat_ref = 0; 1942 1943 #ifdef IPFILTER_SYNC 1944 if (nat->nat_sync) 1945 ipfsync_del(nat->nat_sync); 1946 #endif 1947 1948 if (nat->nat_fr != NULL) 1949 (void)fr_derefrule(&nat->nat_fr, ifs); 1950 1951 if (nat->nat_hm != NULL) 1952 fr_hostmapdel(&nat->nat_hm); 1953 1954 /* 1955 * If there is an active reference from the nat entry to its parent 1956 * rule, decrement the rule's reference count and free it too if no 1957 * longer being used. 1958 */ 1959 ipn = nat->nat_ptr; 1960 if (ipn != NULL) { 1961 ipn->in_space++; 1962 ipn->in_use--; 1963 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1964 if (ipn->in_apr) 1965 appr_free(ipn->in_apr); 1966 KFREE(ipn); 1967 ifs->ifs_nat_stats.ns_rules--; 1968 } 1969 } 1970 1971 MUTEX_DESTROY(&nat->nat_lock); 1972 1973 aps_free(nat->nat_aps, ifs); 1974 ifs->ifs_nat_stats.ns_inuse--; 1975 1976 /* 1977 * If there's a fragment table entry too for this nat entry, then 1978 * dereference that as well. This is after nat_lock is released 1979 * because of Tru64. 1980 */ 1981 fr_forgetnat((void *)nat, ifs); 1982 1983 KFREE(nat); 1984 } 1985 1986 1987 /* ------------------------------------------------------------------------ */ 1988 /* Function: nat_flushtable */ 1989 /* Returns: int - number of NAT rules deleted */ 1990 /* Parameters: Nil */ 1991 /* */ 1992 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1993 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1994 /* ------------------------------------------------------------------------ */ 1995 /* 1996 * nat_flushtable - clear the NAT table of all mapping entries. 1997 */ 1998 static int nat_flushtable(ifs) 1999 ipf_stack_t *ifs; 2000 { 2001 nat_t *nat; 2002 int j = 0; 2003 2004 /* 2005 * ALL NAT mappings deleted, so lets just make the deletions 2006 * quicker. 2007 */ 2008 if (ifs->ifs_nat_table[0] != NULL) 2009 bzero((char *)ifs->ifs_nat_table[0], 2010 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 2011 if (ifs->ifs_nat_table[1] != NULL) 2012 bzero((char *)ifs->ifs_nat_table[1], 2013 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 2014 2015 while ((nat = ifs->ifs_nat_instances) != NULL) { 2016 nat_delete(nat, NL_FLUSH, ifs); 2017 j++; 2018 } 2019 2020 return j; 2021 } 2022 2023 2024 /* ------------------------------------------------------------------------ */ 2025 /* Function: nat_clearlist */ 2026 /* Returns: int - number of NAT/RDR rules deleted */ 2027 /* Parameters: Nil */ 2028 /* */ 2029 /* Delete all rules in the current list of rules. There is nothing elegant */ 2030 /* about this cleanup: simply free all entries on the list of rules and */ 2031 /* clear out the tables used for hashed NAT rule lookups. */ 2032 /* ------------------------------------------------------------------------ */ 2033 static int nat_clearlist(ifs) 2034 ipf_stack_t *ifs; 2035 { 2036 ipnat_t *n, **np = &ifs->ifs_nat_list; 2037 int i = 0; 2038 2039 if (ifs->ifs_nat_rules != NULL) 2040 bzero((char *)ifs->ifs_nat_rules, 2041 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 2042 if (ifs->ifs_rdr_rules != NULL) 2043 bzero((char *)ifs->ifs_rdr_rules, 2044 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 2045 2046 while ((n = *np) != NULL) { 2047 *np = n->in_next; 2048 if (n->in_use == 0) { 2049 if (n->in_apr != NULL) 2050 appr_free(n->in_apr); 2051 KFREE(n); 2052 ifs->ifs_nat_stats.ns_rules--; 2053 } else { 2054 n->in_flags |= IPN_DELETE; 2055 n->in_next = NULL; 2056 } 2057 i++; 2058 } 2059 ifs->ifs_nat_masks = 0; 2060 ifs->ifs_rdr_masks = 0; 2061 for (i = 0; i < 4; i++) { 2062 ifs->ifs_nat6_masks[i] = 0; 2063 ifs->ifs_rdr6_masks[i] = 0; 2064 } 2065 return i; 2066 } 2067 2068 2069 /* ------------------------------------------------------------------------ */ 2070 /* Function: nat_newmap */ 2071 /* Returns: int - -1 == error, 0 == success */ 2072 /* Parameters: fin(I) - pointer to packet information */ 2073 /* nat(I) - pointer to NAT entry */ 2074 /* ni(I) - pointer to structure with misc. information needed */ 2075 /* to create new NAT entry. */ 2076 /* */ 2077 /* Given an empty NAT structure, populate it with new information about a */ 2078 /* new NAT session, as defined by the matching NAT rule. */ 2079 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2080 /* to the new IP address for the translation. */ 2081 /* ------------------------------------------------------------------------ */ 2082 static INLINE int nat_newmap(fin, nat, ni) 2083 fr_info_t *fin; 2084 nat_t *nat; 2085 natinfo_t *ni; 2086 { 2087 u_short st_port, dport, sport, port, sp, dp; 2088 struct in_addr in, inb; 2089 hostmap_t *hm; 2090 u_32_t flags; 2091 u_32_t st_ip; 2092 ipnat_t *np; 2093 nat_t *natl; 2094 int l; 2095 ipf_stack_t *ifs = fin->fin_ifs; 2096 2097 /* 2098 * If it's an outbound packet which doesn't match any existing 2099 * record, then create a new port 2100 */ 2101 l = 0; 2102 hm = NULL; 2103 np = ni->nai_np; 2104 st_ip = np->in_nip; 2105 st_port = np->in_pnext; 2106 flags = ni->nai_flags; 2107 sport = ni->nai_sport; 2108 dport = ni->nai_dport; 2109 2110 /* 2111 * Do a loop until we either run out of entries to try or we find 2112 * a NAT mapping that isn't currently being used. This is done 2113 * because the change to the source is not (usually) being fixed. 2114 */ 2115 do { 2116 port = 0; 2117 in.s_addr = htonl(np->in_nip); 2118 if (l == 0) { 2119 /* 2120 * Check to see if there is an existing NAT 2121 * setup for this IP address pair. 2122 */ 2123 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2124 in, 0, ifs); 2125 if (hm != NULL) 2126 in.s_addr = hm->hm_mapip.s_addr; 2127 } else if ((l == 1) && (hm != NULL)) { 2128 fr_hostmapdel(&hm); 2129 } 2130 in.s_addr = ntohl(in.s_addr); 2131 2132 nat->nat_hm = hm; 2133 2134 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 2135 if (l > 0) 2136 return -1; 2137 } 2138 2139 if (np->in_redir == NAT_BIMAP && 2140 np->in_inmsk == np->in_outmsk) { 2141 /* 2142 * map the address block in a 1:1 fashion 2143 */ 2144 in.s_addr = np->in_outip; 2145 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 2146 in.s_addr = ntohl(in.s_addr); 2147 2148 } else if (np->in_redir & NAT_MAPBLK) { 2149 if ((l >= np->in_ppip) || ((l > 0) && 2150 !(flags & IPN_TCPUDP))) 2151 return -1; 2152 /* 2153 * map-block - Calculate destination address. 2154 */ 2155 in.s_addr = ntohl(fin->fin_saddr); 2156 in.s_addr &= ntohl(~np->in_inmsk); 2157 inb.s_addr = in.s_addr; 2158 in.s_addr /= np->in_ippip; 2159 in.s_addr &= ntohl(~np->in_outmsk); 2160 in.s_addr += ntohl(np->in_outip); 2161 /* 2162 * Calculate destination port. 2163 */ 2164 if ((flags & IPN_TCPUDP) && 2165 (np->in_ppip != 0)) { 2166 port = ntohs(sport) + l; 2167 port %= np->in_ppip; 2168 port += np->in_ppip * 2169 (inb.s_addr % np->in_ippip); 2170 port += MAPBLK_MINPORT; 2171 port = htons(port); 2172 } 2173 2174 } else if ((np->in_outip == 0) && 2175 (np->in_outmsk == 0xffffffff)) { 2176 /* 2177 * 0/32 - use the interface's IP address. 2178 */ 2179 if ((l > 0) || 2180 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 2181 &in, NULL, fin->fin_ifs) == -1) 2182 return -1; 2183 in.s_addr = ntohl(in.s_addr); 2184 2185 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 2186 /* 2187 * 0/0 - use the original source address/port. 2188 */ 2189 if (l > 0) 2190 return -1; 2191 in.s_addr = ntohl(fin->fin_saddr); 2192 2193 } else if ((np->in_outmsk != 0xffffffff) && 2194 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 2195 np->in_nip++; 2196 2197 natl = NULL; 2198 2199 if ((flags & IPN_TCPUDP) && 2200 ((np->in_redir & NAT_MAPBLK) == 0) && 2201 (np->in_flags & IPN_AUTOPORTMAP)) { 2202 /* 2203 * "ports auto" (without map-block) 2204 */ 2205 if ((l > 0) && (l % np->in_ppip == 0)) { 2206 if (l > np->in_space) { 2207 return -1; 2208 } else if ((l > np->in_ppip) && 2209 np->in_outmsk != 0xffffffff) 2210 np->in_nip++; 2211 } 2212 if (np->in_ppip != 0) { 2213 port = ntohs(sport); 2214 port += (l % np->in_ppip); 2215 port %= np->in_ppip; 2216 port += np->in_ppip * 2217 (ntohl(fin->fin_saddr) % 2218 np->in_ippip); 2219 port += MAPBLK_MINPORT; 2220 port = htons(port); 2221 } 2222 2223 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 2224 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 2225 /* 2226 * Standard port translation. Select next port. 2227 */ 2228 if (np->in_flags & IPN_SEQUENTIAL) { 2229 port = np->in_pnext; 2230 } else { 2231 port = ipf_random() % (ntohs(np->in_pmax) - 2232 ntohs(np->in_pmin)); 2233 port += ntohs(np->in_pmin); 2234 } 2235 port = htons(port); 2236 np->in_pnext++; 2237 2238 if (np->in_pnext > ntohs(np->in_pmax)) { 2239 np->in_pnext = ntohs(np->in_pmin); 2240 if (np->in_outmsk != 0xffffffff) 2241 np->in_nip++; 2242 } 2243 } 2244 2245 if (np->in_flags & IPN_IPRANGE) { 2246 if (np->in_nip > ntohl(np->in_outmsk)) 2247 np->in_nip = ntohl(np->in_outip); 2248 } else { 2249 if ((np->in_outmsk != 0xffffffff) && 2250 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 2251 ntohl(np->in_outip)) 2252 np->in_nip = ntohl(np->in_outip) + 1; 2253 } 2254 2255 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 2256 port = sport; 2257 2258 /* 2259 * Here we do a lookup of the connection as seen from 2260 * the outside. If an IP# pair already exists, try 2261 * again. So if you have A->B becomes C->B, you can 2262 * also have D->E become C->E but not D->B causing 2263 * another C->B. Also take protocol and ports into 2264 * account when determining whether a pre-existing 2265 * NAT setup will cause an external conflict where 2266 * this is appropriate. 2267 */ 2268 inb.s_addr = htonl(in.s_addr); 2269 sp = fin->fin_data[0]; 2270 dp = fin->fin_data[1]; 2271 fin->fin_data[0] = fin->fin_data[1]; 2272 fin->fin_data[1] = htons(port); 2273 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2274 (u_int)fin->fin_p, fin->fin_dst, inb); 2275 fin->fin_data[0] = sp; 2276 fin->fin_data[1] = dp; 2277 2278 /* 2279 * Has the search wrapped around and come back to the 2280 * start ? 2281 */ 2282 if ((natl != NULL) && 2283 (np->in_pnext != 0) && (st_port == np->in_pnext) && 2284 (np->in_nip != 0) && (st_ip == np->in_nip)) 2285 return -1; 2286 l++; 2287 } while (natl != NULL); 2288 2289 if (np->in_space > 0) 2290 np->in_space--; 2291 2292 /* Setup the NAT table */ 2293 nat->nat_inip = fin->fin_src; 2294 nat->nat_outip.s_addr = htonl(in.s_addr); 2295 nat->nat_oip = fin->fin_dst; 2296 if (nat->nat_hm == NULL) 2297 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2298 nat->nat_outip, 0, ifs); 2299 2300 if (flags & IPN_TCPUDP) { 2301 nat->nat_inport = sport; 2302 nat->nat_outport = port; /* sport */ 2303 nat->nat_oport = dport; 2304 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2305 } else if (flags & IPN_ICMPQUERY) { 2306 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2307 nat->nat_inport = port; 2308 nat->nat_outport = port; 2309 } 2310 2311 ni->nai_ip.s_addr = in.s_addr; 2312 ni->nai_port = port; 2313 ni->nai_nport = dport; 2314 return 0; 2315 } 2316 2317 2318 /* ------------------------------------------------------------------------ */ 2319 /* Function: nat_newrdr */ 2320 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2321 /* allow rule to be moved if IPN_ROUNDR is set. */ 2322 /* Parameters: fin(I) - pointer to packet information */ 2323 /* nat(I) - pointer to NAT entry */ 2324 /* ni(I) - pointer to structure with misc. information needed */ 2325 /* to create new NAT entry. */ 2326 /* */ 2327 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2328 /* to the new IP address for the translation. */ 2329 /* ------------------------------------------------------------------------ */ 2330 static INLINE int nat_newrdr(fin, nat, ni) 2331 fr_info_t *fin; 2332 nat_t *nat; 2333 natinfo_t *ni; 2334 { 2335 u_short nport, dport, sport; 2336 struct in_addr in, inb; 2337 u_short sp, dp; 2338 hostmap_t *hm; 2339 u_32_t flags; 2340 ipnat_t *np; 2341 nat_t *natl; 2342 int move; 2343 ipf_stack_t *ifs = fin->fin_ifs; 2344 2345 move = 1; 2346 hm = NULL; 2347 in.s_addr = 0; 2348 np = ni->nai_np; 2349 flags = ni->nai_flags; 2350 sport = ni->nai_sport; 2351 dport = ni->nai_dport; 2352 2353 /* 2354 * If the matching rule has IPN_STICKY set, then we want to have the 2355 * same rule kick in as before. Why would this happen? If you have 2356 * a collection of rdr rules with "round-robin sticky", the current 2357 * packet might match a different one to the previous connection but 2358 * we want the same destination to be used. 2359 */ 2360 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2361 (IPN_ROUNDR|IPN_STICKY)) { 2362 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2363 (u_32_t)dport, ifs); 2364 if (hm != NULL) { 2365 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2366 np = hm->hm_ipnat; 2367 ni->nai_np = np; 2368 move = 0; 2369 } 2370 } 2371 2372 /* 2373 * Otherwise, it's an inbound packet. Most likely, we don't 2374 * want to rewrite source ports and source addresses. Instead, 2375 * we want to rewrite to a fixed internal address and fixed 2376 * internal port. 2377 */ 2378 if (np->in_flags & IPN_SPLIT) { 2379 in.s_addr = np->in_nip; 2380 2381 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2382 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2383 in, (u_32_t)dport, ifs); 2384 if (hm != NULL) { 2385 in.s_addr = hm->hm_mapip.s_addr; 2386 move = 0; 2387 } 2388 } 2389 2390 if (hm == NULL || hm->hm_ref == 1) { 2391 if (np->in_inip == htonl(in.s_addr)) { 2392 np->in_nip = ntohl(np->in_inmsk); 2393 move = 0; 2394 } else { 2395 np->in_nip = ntohl(np->in_inip); 2396 } 2397 } 2398 2399 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2400 /* 2401 * 0/32 - use the interface's IP address. 2402 */ 2403 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2404 fin->fin_ifs) == -1) 2405 return -1; 2406 in.s_addr = ntohl(in.s_addr); 2407 2408 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2409 /* 2410 * 0/0 - use the original destination address/port. 2411 */ 2412 in.s_addr = ntohl(fin->fin_daddr); 2413 2414 } else if (np->in_redir == NAT_BIMAP && 2415 np->in_inmsk == np->in_outmsk) { 2416 /* 2417 * map the address block in a 1:1 fashion 2418 */ 2419 in.s_addr = np->in_inip; 2420 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2421 in.s_addr = ntohl(in.s_addr); 2422 } else { 2423 in.s_addr = ntohl(np->in_inip); 2424 } 2425 2426 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2427 nport = dport; 2428 else { 2429 /* 2430 * Whilst not optimized for the case where 2431 * pmin == pmax, the gain is not significant. 2432 */ 2433 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2434 (np->in_pmin != np->in_pmax)) { 2435 nport = ntohs(dport) - ntohs(np->in_pmin) + 2436 ntohs(np->in_pnext); 2437 nport = htons(nport); 2438 } else 2439 nport = np->in_pnext; 2440 } 2441 2442 /* 2443 * When the redirect-to address is set to 0.0.0.0, just 2444 * assume a blank `forwarding' of the packet. We don't 2445 * setup any translation for this either. 2446 */ 2447 if (in.s_addr == 0) { 2448 if (nport == dport) 2449 return -1; 2450 in.s_addr = ntohl(fin->fin_daddr); 2451 } 2452 2453 /* 2454 * Check to see if this redirect mapping already exists and if 2455 * it does, return "failure" (allowing it to be created will just 2456 * cause one or both of these "connections" to stop working.) 2457 */ 2458 inb.s_addr = htonl(in.s_addr); 2459 sp = fin->fin_data[0]; 2460 dp = fin->fin_data[1]; 2461 fin->fin_data[1] = fin->fin_data[0]; 2462 fin->fin_data[0] = ntohs(nport); 2463 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2464 (u_int)fin->fin_p, inb, fin->fin_src); 2465 fin->fin_data[0] = sp; 2466 fin->fin_data[1] = dp; 2467 if (natl != NULL) 2468 return (-1); 2469 2470 nat->nat_inip.s_addr = htonl(in.s_addr); 2471 nat->nat_outip = fin->fin_dst; 2472 nat->nat_oip = fin->fin_src; 2473 2474 ni->nai_ip.s_addr = in.s_addr; 2475 ni->nai_nport = nport; 2476 ni->nai_port = sport; 2477 2478 if (flags & IPN_TCPUDP) { 2479 nat->nat_inport = nport; 2480 nat->nat_outport = dport; 2481 nat->nat_oport = sport; 2482 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2483 } else if (flags & IPN_ICMPQUERY) { 2484 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2485 nat->nat_inport = nport; 2486 nat->nat_outport = nport; 2487 } 2488 2489 return move; 2490 } 2491 2492 /* ------------------------------------------------------------------------ */ 2493 /* Function: nat_new */ 2494 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2495 /* else pointer to new NAT structure */ 2496 /* Parameters: fin(I) - pointer to packet information */ 2497 /* np(I) - pointer to NAT rule */ 2498 /* natsave(I) - pointer to where to store NAT struct pointer */ 2499 /* flags(I) - flags describing the current packet */ 2500 /* direction(I) - direction of packet (in/out) */ 2501 /* Write Lock: ipf_nat */ 2502 /* */ 2503 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2504 /* in any way. */ 2505 /* */ 2506 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2507 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2508 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2509 /* and (3) building that structure and putting it into the NAT table(s). */ 2510 /* ------------------------------------------------------------------------ */ 2511 nat_t *nat_new(fin, np, natsave, flags, direction) 2512 fr_info_t *fin; 2513 ipnat_t *np; 2514 nat_t **natsave; 2515 u_int flags; 2516 int direction; 2517 { 2518 tcphdr_t *tcp = NULL; 2519 hostmap_t *hm = NULL; 2520 nat_t *nat, *natl; 2521 u_int nflags; 2522 natinfo_t ni; 2523 int move; 2524 ipf_stack_t *ifs = fin->fin_ifs; 2525 2526 /* 2527 * Trigger automatic call to nat_extraflush() if the 2528 * table has reached capcity specified by hi watermark. 2529 */ 2530 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2531 ifs->ifs_nat_doflush = 1; 2532 2533 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2534 ifs->ifs_nat_stats.ns_memfail++; 2535 return NULL; 2536 } 2537 2538 move = 1; 2539 nflags = np->in_flags & flags; 2540 nflags &= NAT_FROMRULE; 2541 2542 ni.nai_np = np; 2543 ni.nai_nflags = nflags; 2544 ni.nai_flags = flags; 2545 2546 /* Give me a new nat */ 2547 KMALLOC(nat, nat_t *); 2548 if (nat == NULL) { 2549 ifs->ifs_nat_stats.ns_memfail++; 2550 /* 2551 * Try to automatically tune the max # of entries in the 2552 * table allowed to be less than what will cause kmem_alloc() 2553 * to fail and try to eliminate panics due to out of memory 2554 * conditions arising. 2555 */ 2556 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2557 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2558 printf("ipf_nattable_max reduced to %d\n", 2559 ifs->ifs_ipf_nattable_max); 2560 } 2561 return NULL; 2562 } 2563 2564 if (flags & IPN_TCPUDP) { 2565 tcp = fin->fin_dp; 2566 ni.nai_sport = htons(fin->fin_sport); 2567 ni.nai_dport = htons(fin->fin_dport); 2568 } else if (flags & IPN_ICMPQUERY) { 2569 /* 2570 * In the ICMP query NAT code, we translate the ICMP id fields 2571 * to make them unique. This is indepedent of the ICMP type 2572 * (e.g. in the unlikely event that a host sends an echo and 2573 * an tstamp request with the same id, both packets will have 2574 * their ip address/id field changed in the same way). 2575 */ 2576 /* The icmp_id field is used by the sender to identify the 2577 * process making the icmp request. (the receiver justs 2578 * copies it back in its response). So, it closely matches 2579 * the concept of source port. We overlay sport, so we can 2580 * maximally reuse the existing code. 2581 */ 2582 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2583 ni.nai_dport = ni.nai_sport; 2584 } 2585 2586 bzero((char *)nat, sizeof(*nat)); 2587 nat->nat_flags = flags; 2588 nat->nat_redir = np->in_redir; 2589 2590 if ((flags & NAT_SLAVE) == 0) { 2591 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2592 } 2593 2594 /* 2595 * Search the current table for a match. 2596 */ 2597 if (direction == NAT_OUTBOUND) { 2598 /* 2599 * We can now arrange to call this for the same connection 2600 * because ipf_nat_new doesn't protect the code path into 2601 * this function. 2602 */ 2603 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2604 fin->fin_src, fin->fin_dst); 2605 if (natl != NULL) { 2606 KFREE(nat); 2607 nat = natl; 2608 goto done; 2609 } 2610 2611 move = nat_newmap(fin, nat, &ni); 2612 if (move == -1) 2613 goto badnat; 2614 2615 np = ni.nai_np; 2616 } else { 2617 /* 2618 * NAT_INBOUND is used only for redirects rules 2619 */ 2620 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2621 fin->fin_src, fin->fin_dst); 2622 if (natl != NULL) { 2623 KFREE(nat); 2624 nat = natl; 2625 goto done; 2626 } 2627 2628 move = nat_newrdr(fin, nat, &ni); 2629 if (move == -1) 2630 goto badnat; 2631 2632 np = ni.nai_np; 2633 } 2634 2635 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2636 if (np->in_redir == NAT_REDIRECT) { 2637 nat_delrdr(np); 2638 nat_addrdr(np, ifs); 2639 } else if (np->in_redir == NAT_MAP) { 2640 nat_delnat(np); 2641 nat_addnat(np, ifs); 2642 } 2643 } 2644 2645 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2646 goto badnat; 2647 } 2648 2649 nat_calc_chksum_diffs(nat); 2650 2651 if (flags & SI_WILDP) 2652 ifs->ifs_nat_stats.ns_wilds++; 2653 goto done; 2654 badnat: 2655 ifs->ifs_nat_stats.ns_badnat++; 2656 if ((hm = nat->nat_hm) != NULL) 2657 fr_hostmapdel(&hm); 2658 KFREE(nat); 2659 nat = NULL; 2660 done: 2661 if ((flags & NAT_SLAVE) == 0) { 2662 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2663 } 2664 return nat; 2665 } 2666 2667 2668 /* ------------------------------------------------------------------------ */ 2669 /* Function: nat_finalise */ 2670 /* Returns: int - 0 == sucess, -1 == failure */ 2671 /* Parameters: fin(I) - pointer to packet information */ 2672 /* nat(I) - pointer to NAT entry */ 2673 /* ni(I) - pointer to structure with misc. information needed */ 2674 /* to create new NAT entry. */ 2675 /* Write Lock: ipf_nat */ 2676 /* */ 2677 /* This is the tail end of constructing a new NAT entry and is the same */ 2678 /* for both IPv4 and IPv6. */ 2679 /* ------------------------------------------------------------------------ */ 2680 /*ARGSUSED*/ 2681 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2682 fr_info_t *fin; 2683 nat_t *nat; 2684 natinfo_t *ni; 2685 tcphdr_t *tcp; 2686 nat_t **natsave; 2687 int direction; 2688 { 2689 frentry_t *fr; 2690 ipnat_t *np; 2691 ipf_stack_t *ifs = fin->fin_ifs; 2692 2693 np = ni->nai_np; 2694 2695 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2696 2697 #ifdef IPFILTER_SYNC 2698 if ((nat->nat_flags & SI_CLONE) == 0) 2699 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2700 #endif 2701 2702 nat->nat_me = natsave; 2703 nat->nat_dir = direction; 2704 nat->nat_ifps[0] = np->in_ifps[0]; 2705 nat->nat_ifps[1] = np->in_ifps[1]; 2706 nat->nat_ptr = np; 2707 nat->nat_p = fin->fin_p; 2708 nat->nat_v = fin->fin_v; 2709 nat->nat_mssclamp = np->in_mssclamp; 2710 fr = fin->fin_fr; 2711 nat->nat_fr = fr; 2712 2713 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2714 if (appr_new(fin, nat) == -1) 2715 return -1; 2716 2717 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2718 if (ifs->ifs_nat_logging) 2719 nat_log(nat, (u_int)np->in_redir, ifs); 2720 np->in_use++; 2721 if (fr != NULL) { 2722 MUTEX_ENTER(&fr->fr_lock); 2723 fr->fr_ref++; 2724 MUTEX_EXIT(&fr->fr_lock); 2725 } 2726 return 0; 2727 } 2728 2729 /* 2730 * nat_insert failed, so cleanup time... 2731 */ 2732 return -1; 2733 } 2734 2735 2736 /* ------------------------------------------------------------------------ */ 2737 /* Function: nat_insert */ 2738 /* Returns: int - 0 == sucess, -1 == failure */ 2739 /* Parameters: nat(I) - pointer to NAT structure */ 2740 /* rev(I) - flag indicating forward/reverse direction of packet */ 2741 /* Write Lock: ipf_nat */ 2742 /* */ 2743 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2744 /* list of active NAT entries. Adjust global counters when complete. */ 2745 /* ------------------------------------------------------------------------ */ 2746 int nat_insert(nat, rev, ifs) 2747 nat_t *nat; 2748 int rev; 2749 ipf_stack_t *ifs; 2750 { 2751 u_int hv1, hv2; 2752 nat_t **natp; 2753 2754 /* 2755 * Try and return an error as early as possible, so calculate the hash 2756 * entry numbers first and then proceed. 2757 */ 2758 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2759 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2760 0xffffffff); 2761 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2762 ifs->ifs_ipf_nattable_sz); 2763 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2764 0xffffffff); 2765 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2766 ifs->ifs_ipf_nattable_sz); 2767 } else { 2768 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2769 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2770 ifs->ifs_ipf_nattable_sz); 2771 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2772 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2773 ifs->ifs_ipf_nattable_sz); 2774 } 2775 2776 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2777 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2778 return -1; 2779 } 2780 2781 nat->nat_hv[0] = hv1; 2782 nat->nat_hv[1] = hv2; 2783 2784 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2785 2786 nat->nat_rev = rev; 2787 nat->nat_ref = 1; 2788 nat->nat_bytes[0] = 0; 2789 nat->nat_pkts[0] = 0; 2790 nat->nat_bytes[1] = 0; 2791 nat->nat_pkts[1] = 0; 2792 2793 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2794 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2795 2796 if (nat->nat_ifnames[1][0] !='\0') { 2797 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2798 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2799 } else { 2800 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2801 LIFNAMSIZ); 2802 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2803 nat->nat_ifps[1] = nat->nat_ifps[0]; 2804 } 2805 2806 nat->nat_next = ifs->ifs_nat_instances; 2807 nat->nat_pnext = &ifs->ifs_nat_instances; 2808 if (ifs->ifs_nat_instances) 2809 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2810 ifs->ifs_nat_instances = nat; 2811 2812 natp = &ifs->ifs_nat_table[0][hv1]; 2813 if (*natp) 2814 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2815 nat->nat_phnext[0] = natp; 2816 nat->nat_hnext[0] = *natp; 2817 *natp = nat; 2818 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2819 2820 natp = &ifs->ifs_nat_table[1][hv2]; 2821 if (*natp) 2822 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2823 nat->nat_phnext[1] = natp; 2824 nat->nat_hnext[1] = *natp; 2825 *natp = nat; 2826 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2827 2828 fr_setnatqueue(nat, rev, ifs); 2829 2830 ifs->ifs_nat_stats.ns_added++; 2831 ifs->ifs_nat_stats.ns_inuse++; 2832 return 0; 2833 } 2834 2835 2836 /* ------------------------------------------------------------------------ */ 2837 /* Function: nat_icmperrorlookup */ 2838 /* Returns: nat_t* - point to matching NAT structure */ 2839 /* Parameters: fin(I) - pointer to packet information */ 2840 /* dir(I) - direction of packet (in/out) */ 2841 /* */ 2842 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2843 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2844 /* the required length. */ 2845 /* ------------------------------------------------------------------------ */ 2846 nat_t *nat_icmperrorlookup(fin, dir) 2847 fr_info_t *fin; 2848 int dir; 2849 { 2850 int flags = 0, minlen; 2851 icmphdr_t *orgicmp; 2852 tcphdr_t *tcp = NULL; 2853 u_short data[2]; 2854 nat_t *nat; 2855 ip_t *oip; 2856 u_int p; 2857 2858 /* 2859 * Does it at least have the return (basic) IP header ? 2860 * Only a basic IP header (no options) should be with an ICMP error 2861 * header. Also, if it's not an error type, then return. 2862 */ 2863 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2864 return NULL; 2865 2866 /* 2867 * Check packet size 2868 */ 2869 oip = (ip_t *)((char *)fin->fin_dp + 8); 2870 minlen = IP_HL(oip) << 2; 2871 if ((minlen < sizeof(ip_t)) || 2872 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2873 return NULL; 2874 /* 2875 * Is the buffer big enough for all of it ? It's the size of the IP 2876 * header claimed in the encapsulated part which is of concern. It 2877 * may be too big to be in this buffer but not so big that it's 2878 * outside the ICMP packet, leading to TCP deref's causing problems. 2879 * This is possible because we don't know how big oip_hl is when we 2880 * do the pullup early in fr_check() and thus can't gaurantee it is 2881 * all here now. 2882 */ 2883 #ifdef _KERNEL 2884 { 2885 mb_t *m; 2886 2887 m = fin->fin_m; 2888 # if defined(MENTAT) 2889 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2890 return NULL; 2891 # else 2892 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2893 (char *)fin->fin_ip + M_LEN(m)) 2894 return NULL; 2895 # endif 2896 } 2897 #endif 2898 2899 if (fin->fin_daddr != oip->ip_src.s_addr) 2900 return NULL; 2901 2902 p = oip->ip_p; 2903 if (p == IPPROTO_TCP) 2904 flags = IPN_TCP; 2905 else if (p == IPPROTO_UDP) 2906 flags = IPN_UDP; 2907 else if (p == IPPROTO_ICMP) { 2908 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2909 2910 /* see if this is related to an ICMP query */ 2911 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2912 data[0] = fin->fin_data[0]; 2913 data[1] = fin->fin_data[1]; 2914 fin->fin_data[0] = 0; 2915 fin->fin_data[1] = orgicmp->icmp_id; 2916 2917 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2918 /* 2919 * NOTE : dir refers to the direction of the original 2920 * ip packet. By definition the icmp error 2921 * message flows in the opposite direction. 2922 */ 2923 if (dir == NAT_INBOUND) 2924 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2925 oip->ip_src); 2926 else 2927 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2928 oip->ip_src); 2929 fin->fin_data[0] = data[0]; 2930 fin->fin_data[1] = data[1]; 2931 return nat; 2932 } 2933 } 2934 2935 if (flags & IPN_TCPUDP) { 2936 minlen += 8; /* + 64bits of data to get ports */ 2937 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2938 return NULL; 2939 2940 data[0] = fin->fin_data[0]; 2941 data[1] = fin->fin_data[1]; 2942 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2943 fin->fin_data[0] = ntohs(tcp->th_dport); 2944 fin->fin_data[1] = ntohs(tcp->th_sport); 2945 2946 if (dir == NAT_INBOUND) { 2947 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2948 oip->ip_src); 2949 } else { 2950 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2951 oip->ip_src); 2952 } 2953 fin->fin_data[0] = data[0]; 2954 fin->fin_data[1] = data[1]; 2955 return nat; 2956 } 2957 if (dir == NAT_INBOUND) 2958 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2959 else 2960 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2961 } 2962 2963 2964 /* ------------------------------------------------------------------------ */ 2965 /* Function: nat_icmperror */ 2966 /* Returns: nat_t* - point to matching NAT structure */ 2967 /* Parameters: fin(I) - pointer to packet information */ 2968 /* nflags(I) - NAT flags for this packet */ 2969 /* dir(I) - direction of packet (in/out) */ 2970 /* */ 2971 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2972 /* session. This will correct both packet header data and checksums. */ 2973 /* */ 2974 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2975 /* a NAT'd ICMP packet gets correctly recognised. */ 2976 /* ------------------------------------------------------------------------ */ 2977 nat_t *nat_icmperror(fin, nflags, dir) 2978 fr_info_t *fin; 2979 u_int *nflags; 2980 int dir; 2981 { 2982 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2983 struct in_addr in; 2984 icmphdr_t *icmp, *orgicmp; 2985 int dlen; 2986 udphdr_t *udp; 2987 tcphdr_t *tcp; 2988 nat_t *nat; 2989 ip_t *oip; 2990 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2991 return NULL; 2992 2993 /* 2994 * nat_icmperrorlookup() looks up nat entry associated with the 2995 * offending IP packet and returns pointer to the entry, or NULL 2996 * if packet wasn't natted or for `defective' packets. 2997 */ 2998 2999 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 3000 return NULL; 3001 3002 sumd2 = 0; 3003 *nflags = IPN_ICMPERR; 3004 icmp = fin->fin_dp; 3005 oip = (ip_t *)&icmp->icmp_ip; 3006 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 3007 tcp = (tcphdr_t *)udp; 3008 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 3009 3010 /* 3011 * Need to adjust ICMP header to include the real IP#'s and 3012 * port #'s. There are three steps required. 3013 * 3014 * Step 1 3015 * Fix the IP addresses in the offending IP packet and update 3016 * ip header checksum to compensate for the change. 3017 * 3018 * No update needed here for icmp_cksum because the ICMP checksum 3019 * is calculated over the complete ICMP packet, which includes the 3020 * changed oip IP addresses and oip->ip_sum. These two changes 3021 * cancel each other out (if the delta for the IP address is x, 3022 * then the delta for ip_sum is minus x). 3023 */ 3024 3025 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 3026 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 3027 in = nat->nat_inip; 3028 oip->ip_src = in; 3029 } else { 3030 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 3031 in = nat->nat_outip; 3032 oip->ip_dst = in; 3033 } 3034 3035 sum2 = LONG_SUM(ntohl(in.s_addr)); 3036 CALC_SUMD(sum1, sum2, sumd); 3037 fix_datacksum(&oip->ip_sum, sumd); 3038 3039 /* 3040 * Step 2 3041 * Perform other adjustments based on protocol of offending packet. 3042 */ 3043 3044 switch (oip->ip_p) { 3045 case IPPROTO_TCP : 3046 case IPPROTO_UDP : 3047 3048 /* 3049 * For offending TCP/UDP IP packets, translate the ports 3050 * based on the NAT specification. 3051 * 3052 * Advance notice : Now it becomes complicated :-) 3053 * 3054 * Since the port and IP addresse fields are both part 3055 * of the TCP/UDP checksum of the offending IP packet, 3056 * we need to adjust that checksum as well. 3057 * 3058 * To further complicate things, the TCP/UDP checksum 3059 * may not be present. We must check to see if the 3060 * length of the data portion is big enough to hold 3061 * the checksum. In the UDP case, a test to determine 3062 * if the checksum is even set is also required. 3063 * 3064 * Any changes to an IP address, port or checksum within 3065 * the ICMP packet requires a change to icmp_cksum. 3066 * 3067 * Be extremely careful here ... The change is dependent 3068 * upon whether or not the TCP/UPD checksum is present. 3069 * 3070 * If TCP/UPD checksum is present, the icmp_cksum must 3071 * compensate for checksum modification resulting from 3072 * IP address change only. Port change and resulting 3073 * data checksum adjustments cancel each other out. 3074 * 3075 * If TCP/UDP checksum is not present, icmp_cksum must 3076 * compensate for port change only. The IP address 3077 * change does not modify anything else in this case. 3078 */ 3079 3080 psum1 = 0; 3081 psum2 = 0; 3082 psumd = 0; 3083 3084 if ((tcp->th_dport == nat->nat_oport) && 3085 (tcp->th_sport != nat->nat_inport)) { 3086 3087 /* 3088 * Translate the source port. 3089 */ 3090 3091 psum1 = ntohs(tcp->th_sport); 3092 psum2 = ntohs(nat->nat_inport); 3093 tcp->th_sport = nat->nat_inport; 3094 3095 } else if ((tcp->th_sport == nat->nat_oport) && 3096 (tcp->th_dport != nat->nat_outport)) { 3097 3098 /* 3099 * Translate the destination port. 3100 */ 3101 3102 psum1 = ntohs(tcp->th_dport); 3103 psum2 = ntohs(nat->nat_outport); 3104 tcp->th_dport = nat->nat_outport; 3105 } 3106 3107 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 3108 3109 /* 3110 * TCP checksum present. 3111 * 3112 * Adjust data checksum and icmp checksum to 3113 * compensate for any IP address change. 3114 */ 3115 3116 sum1 = ntohs(tcp->th_sum); 3117 fix_datacksum(&tcp->th_sum, sumd); 3118 sum2 = ntohs(tcp->th_sum); 3119 sumd2 = sumd << 1; 3120 CALC_SUMD(sum1, sum2, sumd); 3121 sumd2 += sumd; 3122 3123 /* 3124 * Also make data checksum adjustment to 3125 * compensate for any port change. 3126 */ 3127 3128 if (psum1 != psum2) { 3129 CALC_SUMD(psum1, psum2, psumd); 3130 fix_datacksum(&tcp->th_sum, psumd); 3131 } 3132 3133 } else if ((oip->ip_p == IPPROTO_UDP) && 3134 (dlen >= 8) && (udp->uh_sum != 0)) { 3135 3136 /* 3137 * The UDP checksum is present and set. 3138 * 3139 * Adjust data checksum and icmp checksum to 3140 * compensate for any IP address change. 3141 */ 3142 3143 sum1 = ntohs(udp->uh_sum); 3144 fix_datacksum(&udp->uh_sum, sumd); 3145 sum2 = ntohs(udp->uh_sum); 3146 sumd2 = sumd << 1; 3147 CALC_SUMD(sum1, sum2, sumd); 3148 sumd2 += sumd; 3149 3150 /* 3151 * Also make data checksum adjustment to 3152 * compensate for any port change. 3153 */ 3154 3155 if (psum1 != psum2) { 3156 CALC_SUMD(psum1, psum2, psumd); 3157 fix_datacksum(&udp->uh_sum, psumd); 3158 } 3159 3160 } else { 3161 3162 /* 3163 * Data checksum was not present. 3164 * 3165 * Compensate for any port change. 3166 */ 3167 3168 CALC_SUMD(psum2, psum1, psumd); 3169 sumd2 += psumd; 3170 } 3171 break; 3172 3173 case IPPROTO_ICMP : 3174 3175 orgicmp = (icmphdr_t *)udp; 3176 3177 if ((nat->nat_dir == NAT_OUTBOUND) && 3178 (orgicmp->icmp_id != nat->nat_inport) && 3179 (dlen >= 8)) { 3180 3181 /* 3182 * Fix ICMP checksum (of the offening ICMP 3183 * query packet) to compensate the change 3184 * in the ICMP id of the offending ICMP 3185 * packet. 3186 * 3187 * Since you modify orgicmp->icmp_id with 3188 * a delta (say x) and you compensate that 3189 * in origicmp->icmp_cksum with a delta 3190 * minus x, you don't have to adjust the 3191 * overall icmp->icmp_cksum 3192 */ 3193 3194 sum1 = ntohs(orgicmp->icmp_id); 3195 sum2 = ntohs(nat->nat_inport); 3196 CALC_SUMD(sum1, sum2, sumd); 3197 orgicmp->icmp_id = nat->nat_inport; 3198 fix_datacksum(&orgicmp->icmp_cksum, sumd); 3199 3200 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 3201 3202 break; 3203 3204 default : 3205 3206 break; 3207 3208 } /* switch (oip->ip_p) */ 3209 3210 /* 3211 * Step 3 3212 * Make the adjustments to icmp checksum. 3213 */ 3214 3215 if (sumd2 != 0) { 3216 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3217 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3218 fix_incksum(&icmp->icmp_cksum, sumd2); 3219 } 3220 return nat; 3221 } 3222 3223 3224 /* 3225 * NB: these lookups don't lock access to the list, it assumed that it has 3226 * already been done! 3227 */ 3228 3229 /* ------------------------------------------------------------------------ */ 3230 /* Function: nat_inlookup */ 3231 /* Returns: nat_t* - NULL == no match, */ 3232 /* else pointer to matching NAT entry */ 3233 /* Parameters: fin(I) - pointer to packet information */ 3234 /* flags(I) - NAT flags for this packet */ 3235 /* p(I) - protocol for this packet */ 3236 /* src(I) - source IP address */ 3237 /* mapdst(I) - destination IP address */ 3238 /* */ 3239 /* Lookup a nat entry based on the mapped destination ip address/port and */ 3240 /* real source address/port. We use this lookup when receiving a packet, */ 3241 /* we're looking for a table entry, based on the destination address. */ 3242 /* */ 3243 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3244 /* */ 3245 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3246 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3247 /* */ 3248 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3249 /* the packet is of said protocol */ 3250 /* ------------------------------------------------------------------------ */ 3251 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 3252 fr_info_t *fin; 3253 u_int flags, p; 3254 struct in_addr src , mapdst; 3255 { 3256 u_short sport, dport; 3257 ipnat_t *ipn; 3258 u_int sflags; 3259 nat_t *nat; 3260 int nflags; 3261 u_32_t dst; 3262 void *ifp; 3263 u_int hv; 3264 ipf_stack_t *ifs = fin->fin_ifs; 3265 3266 if (fin != NULL) 3267 ifp = fin->fin_ifp; 3268 else 3269 ifp = NULL; 3270 sport = 0; 3271 dport = 0; 3272 dst = mapdst.s_addr; 3273 sflags = flags & NAT_TCPUDPICMP; 3274 3275 switch (p) 3276 { 3277 case IPPROTO_TCP : 3278 case IPPROTO_UDP : 3279 sport = htons(fin->fin_data[0]); 3280 dport = htons(fin->fin_data[1]); 3281 break; 3282 case IPPROTO_ICMP : 3283 if (flags & IPN_ICMPERR) 3284 sport = fin->fin_data[1]; 3285 else 3286 dport = fin->fin_data[1]; 3287 break; 3288 default : 3289 break; 3290 } 3291 3292 3293 if ((flags & SI_WILDP) != 0) 3294 goto find_in_wild_ports; 3295 3296 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3297 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3298 nat = ifs->ifs_nat_table[1][hv]; 3299 for (; nat; nat = nat->nat_hnext[1]) { 3300 if (nat->nat_v != 4) 3301 continue; 3302 3303 if (nat->nat_ifps[0] != NULL) { 3304 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3305 continue; 3306 } else if (ifp != NULL) 3307 nat->nat_ifps[0] = ifp; 3308 3309 nflags = nat->nat_flags; 3310 3311 if (nat->nat_oip.s_addr == src.s_addr && 3312 nat->nat_outip.s_addr == dst && 3313 (((p == 0) && 3314 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3315 || (p == nat->nat_p))) { 3316 switch (p) 3317 { 3318 #if 0 3319 case IPPROTO_GRE : 3320 if (nat->nat_call[1] != fin->fin_data[0]) 3321 continue; 3322 break; 3323 #endif 3324 case IPPROTO_ICMP : 3325 if ((flags & IPN_ICMPERR) != 0) { 3326 if (nat->nat_outport != sport) 3327 continue; 3328 } else { 3329 if (nat->nat_outport != dport) 3330 continue; 3331 } 3332 break; 3333 case IPPROTO_TCP : 3334 case IPPROTO_UDP : 3335 if (nat->nat_oport != sport) 3336 continue; 3337 if (nat->nat_outport != dport) 3338 continue; 3339 break; 3340 default : 3341 break; 3342 } 3343 3344 ipn = nat->nat_ptr; 3345 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3346 if (appr_match(fin, nat) != 0) 3347 continue; 3348 return nat; 3349 } 3350 } 3351 3352 /* 3353 * So if we didn't find it but there are wildcard members in the hash 3354 * table, go back and look for them. We do this search and update here 3355 * because it is modifying the NAT table and we want to do this only 3356 * for the first packet that matches. The exception, of course, is 3357 * for "dummy" (FI_IGNORE) lookups. 3358 */ 3359 find_in_wild_ports: 3360 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3361 return NULL; 3362 if (ifs->ifs_nat_stats.ns_wilds == 0) 3363 return NULL; 3364 3365 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3366 3367 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3368 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3369 3370 WRITE_ENTER(&ifs->ifs_ipf_nat); 3371 3372 nat = ifs->ifs_nat_table[1][hv]; 3373 for (; nat; nat = nat->nat_hnext[1]) { 3374 if (nat->nat_v != 4) 3375 continue; 3376 3377 if (nat->nat_ifps[0] != NULL) { 3378 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3379 continue; 3380 } else if (ifp != NULL) 3381 nat->nat_ifps[0] = ifp; 3382 3383 if (nat->nat_p != fin->fin_p) 3384 continue; 3385 if (nat->nat_oip.s_addr != src.s_addr || 3386 nat->nat_outip.s_addr != dst) 3387 continue; 3388 3389 nflags = nat->nat_flags; 3390 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3391 continue; 3392 3393 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3394 NAT_INBOUND) == 1) { 3395 if ((fin->fin_flx & FI_IGNORE) != 0) 3396 break; 3397 if ((nflags & SI_CLONE) != 0) { 3398 nat = fr_natclone(fin, nat); 3399 if (nat == NULL) 3400 break; 3401 } else { 3402 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3403 ifs->ifs_nat_stats.ns_wilds--; 3404 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3405 } 3406 nat->nat_oport = sport; 3407 nat->nat_outport = dport; 3408 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3409 nat_tabmove(nat, ifs); 3410 break; 3411 } 3412 } 3413 3414 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3415 3416 return nat; 3417 } 3418 3419 3420 /* ------------------------------------------------------------------------ */ 3421 /* Function: nat_tabmove */ 3422 /* Returns: Nil */ 3423 /* Parameters: nat(I) - pointer to NAT structure */ 3424 /* Write Lock: ipf_nat */ 3425 /* */ 3426 /* This function is only called for TCP/UDP NAT table entries where the */ 3427 /* original was placed in the table without hashing on the ports and we now */ 3428 /* want to include hashing on port numbers. */ 3429 /* ------------------------------------------------------------------------ */ 3430 static void nat_tabmove(nat, ifs) 3431 nat_t *nat; 3432 ipf_stack_t *ifs; 3433 { 3434 nat_t **natp; 3435 u_int hv; 3436 3437 if (nat->nat_flags & SI_CLONE) 3438 return; 3439 3440 /* 3441 * Remove the NAT entry from the old location 3442 */ 3443 if (nat->nat_hnext[0]) 3444 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3445 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3446 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3447 3448 if (nat->nat_hnext[1]) 3449 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3450 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3451 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3452 3453 /* 3454 * Add into the NAT table in the new position 3455 */ 3456 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3457 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3458 ifs->ifs_ipf_nattable_sz); 3459 nat->nat_hv[0] = hv; 3460 natp = &ifs->ifs_nat_table[0][hv]; 3461 if (*natp) 3462 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3463 nat->nat_phnext[0] = natp; 3464 nat->nat_hnext[0] = *natp; 3465 *natp = nat; 3466 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3467 3468 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3469 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3470 ifs->ifs_ipf_nattable_sz); 3471 nat->nat_hv[1] = hv; 3472 natp = &ifs->ifs_nat_table[1][hv]; 3473 if (*natp) 3474 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3475 nat->nat_phnext[1] = natp; 3476 nat->nat_hnext[1] = *natp; 3477 *natp = nat; 3478 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3479 } 3480 3481 3482 /* ------------------------------------------------------------------------ */ 3483 /* Function: nat_outlookup */ 3484 /* Returns: nat_t* - NULL == no match, */ 3485 /* else pointer to matching NAT entry */ 3486 /* Parameters: fin(I) - pointer to packet information */ 3487 /* flags(I) - NAT flags for this packet */ 3488 /* p(I) - protocol for this packet */ 3489 /* src(I) - source IP address */ 3490 /* dst(I) - destination IP address */ 3491 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3492 /* */ 3493 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3494 /* destination address/port. We use this lookup when sending a packet out, */ 3495 /* we're looking for a table entry, based on the source address. */ 3496 /* */ 3497 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3498 /* */ 3499 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3500 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3501 /* */ 3502 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3503 /* the packet is of said protocol */ 3504 /* ------------------------------------------------------------------------ */ 3505 nat_t *nat_outlookup(fin, flags, p, src, dst) 3506 fr_info_t *fin; 3507 u_int flags, p; 3508 struct in_addr src , dst; 3509 { 3510 u_short sport, dport; 3511 u_int sflags; 3512 ipnat_t *ipn; 3513 u_32_t srcip; 3514 nat_t *nat; 3515 int nflags; 3516 void *ifp; 3517 u_int hv; 3518 ipf_stack_t *ifs = fin->fin_ifs; 3519 3520 ifp = fin->fin_ifp; 3521 3522 srcip = src.s_addr; 3523 sflags = flags & IPN_TCPUDPICMP; 3524 sport = 0; 3525 dport = 0; 3526 3527 switch (p) 3528 { 3529 case IPPROTO_TCP : 3530 case IPPROTO_UDP : 3531 sport = htons(fin->fin_data[0]); 3532 dport = htons(fin->fin_data[1]); 3533 break; 3534 case IPPROTO_ICMP : 3535 if (flags & IPN_ICMPERR) 3536 sport = fin->fin_data[1]; 3537 else 3538 dport = fin->fin_data[1]; 3539 break; 3540 default : 3541 break; 3542 } 3543 3544 if ((flags & SI_WILDP) != 0) 3545 goto find_out_wild_ports; 3546 3547 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3548 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3549 nat = ifs->ifs_nat_table[0][hv]; 3550 for (; nat; nat = nat->nat_hnext[0]) { 3551 if (nat->nat_v != 4) 3552 continue; 3553 3554 if (nat->nat_ifps[1] != NULL) { 3555 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3556 continue; 3557 } else if (ifp != NULL) 3558 nat->nat_ifps[1] = ifp; 3559 3560 nflags = nat->nat_flags; 3561 3562 if (nat->nat_inip.s_addr == srcip && 3563 nat->nat_oip.s_addr == dst.s_addr && 3564 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3565 || (p == nat->nat_p))) { 3566 switch (p) 3567 { 3568 #if 0 3569 case IPPROTO_GRE : 3570 if (nat->nat_call[1] != fin->fin_data[0]) 3571 continue; 3572 break; 3573 #endif 3574 case IPPROTO_TCP : 3575 case IPPROTO_UDP : 3576 if (nat->nat_oport != dport) 3577 continue; 3578 if (nat->nat_inport != sport) 3579 continue; 3580 break; 3581 default : 3582 break; 3583 } 3584 3585 ipn = nat->nat_ptr; 3586 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3587 if (appr_match(fin, nat) != 0) 3588 continue; 3589 return nat; 3590 } 3591 } 3592 3593 /* 3594 * So if we didn't find it but there are wildcard members in the hash 3595 * table, go back and look for them. We do this search and update here 3596 * because it is modifying the NAT table and we want to do this only 3597 * for the first packet that matches. The exception, of course, is 3598 * for "dummy" (FI_IGNORE) lookups. 3599 */ 3600 find_out_wild_ports: 3601 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3602 return NULL; 3603 if (ifs->ifs_nat_stats.ns_wilds == 0) 3604 return NULL; 3605 3606 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3607 3608 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3609 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3610 3611 WRITE_ENTER(&ifs->ifs_ipf_nat); 3612 3613 nat = ifs->ifs_nat_table[0][hv]; 3614 for (; nat; nat = nat->nat_hnext[0]) { 3615 if (nat->nat_v != 4) 3616 continue; 3617 3618 if (nat->nat_ifps[1] != NULL) { 3619 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3620 continue; 3621 } else if (ifp != NULL) 3622 nat->nat_ifps[1] = ifp; 3623 3624 if (nat->nat_p != fin->fin_p) 3625 continue; 3626 if ((nat->nat_inip.s_addr != srcip) || 3627 (nat->nat_oip.s_addr != dst.s_addr)) 3628 continue; 3629 3630 nflags = nat->nat_flags; 3631 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3632 continue; 3633 3634 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3635 NAT_OUTBOUND) == 1) { 3636 if ((fin->fin_flx & FI_IGNORE) != 0) 3637 break; 3638 if ((nflags & SI_CLONE) != 0) { 3639 nat = fr_natclone(fin, nat); 3640 if (nat == NULL) 3641 break; 3642 } else { 3643 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3644 ifs->ifs_nat_stats.ns_wilds--; 3645 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3646 } 3647 nat->nat_inport = sport; 3648 nat->nat_oport = dport; 3649 if (nat->nat_outport == 0) 3650 nat->nat_outport = sport; 3651 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3652 nat_tabmove(nat, ifs); 3653 break; 3654 } 3655 } 3656 3657 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3658 3659 return nat; 3660 } 3661 3662 3663 /* ------------------------------------------------------------------------ */ 3664 /* Function: nat_lookupredir */ 3665 /* Returns: nat_t* - NULL == no match, */ 3666 /* else pointer to matching NAT entry */ 3667 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3668 /* entry for. */ 3669 /* */ 3670 /* Lookup the NAT tables to search for a matching redirect */ 3671 /* ------------------------------------------------------------------------ */ 3672 nat_t *nat_lookupredir(np, ifs) 3673 natlookup_t *np; 3674 ipf_stack_t *ifs; 3675 { 3676 fr_info_t fi; 3677 nat_t *nat; 3678 3679 bzero((char *)&fi, sizeof(fi)); 3680 if (np->nl_flags & IPN_IN) { 3681 fi.fin_data[0] = ntohs(np->nl_realport); 3682 fi.fin_data[1] = ntohs(np->nl_outport); 3683 } else { 3684 fi.fin_data[0] = ntohs(np->nl_inport); 3685 fi.fin_data[1] = ntohs(np->nl_outport); 3686 } 3687 if (np->nl_flags & IPN_TCP) 3688 fi.fin_p = IPPROTO_TCP; 3689 else if (np->nl_flags & IPN_UDP) 3690 fi.fin_p = IPPROTO_UDP; 3691 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3692 fi.fin_p = IPPROTO_ICMP; 3693 3694 fi.fin_ifs = ifs; 3695 /* 3696 * We can do two sorts of lookups: 3697 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3698 * - default: we have the `in' and `out' address, look for `real'. 3699 */ 3700 if (np->nl_flags & IPN_IN) { 3701 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3702 np->nl_realip, np->nl_outip))) { 3703 np->nl_inip = nat->nat_inip; 3704 np->nl_inport = nat->nat_inport; 3705 } 3706 } else { 3707 /* 3708 * If nl_inip is non null, this is a lookup based on the real 3709 * ip address. Else, we use the fake. 3710 */ 3711 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3712 np->nl_inip, np->nl_outip))) { 3713 3714 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3715 fr_info_t fin; 3716 bzero((char *)&fin, sizeof(fin)); 3717 fin.fin_p = nat->nat_p; 3718 fin.fin_data[0] = ntohs(nat->nat_outport); 3719 fin.fin_data[1] = ntohs(nat->nat_oport); 3720 fin.fin_ifs = ifs; 3721 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3722 nat->nat_outip, 3723 nat->nat_oip) != NULL) { 3724 np->nl_flags &= ~IPN_FINDFORWARD; 3725 } 3726 } 3727 3728 np->nl_realip = nat->nat_outip; 3729 np->nl_realport = nat->nat_outport; 3730 } 3731 } 3732 3733 return nat; 3734 } 3735 3736 3737 /* ------------------------------------------------------------------------ */ 3738 /* Function: nat_match */ 3739 /* Returns: int - 0 == no match, 1 == match */ 3740 /* Parameters: fin(I) - pointer to packet information */ 3741 /* np(I) - pointer to NAT rule */ 3742 /* */ 3743 /* Pull the matching of a packet against a NAT rule out of that complex */ 3744 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3745 /* ------------------------------------------------------------------------ */ 3746 static int nat_match(fin, np) 3747 fr_info_t *fin; 3748 ipnat_t *np; 3749 { 3750 frtuc_t *ft; 3751 3752 if (fin->fin_v != 4) 3753 return 0; 3754 3755 if (np->in_p && fin->fin_p != np->in_p) 3756 return 0; 3757 3758 if (fin->fin_out) { 3759 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3760 return 0; 3761 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3762 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3763 return 0; 3764 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3765 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3766 return 0; 3767 } else { 3768 if (!(np->in_redir & NAT_REDIRECT)) 3769 return 0; 3770 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3771 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3772 return 0; 3773 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3774 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3775 return 0; 3776 } 3777 3778 ft = &np->in_tuc; 3779 if (!(fin->fin_flx & FI_TCPUDP) || 3780 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3781 if (ft->ftu_scmp || ft->ftu_dcmp) 3782 return 0; 3783 return 1; 3784 } 3785 3786 return fr_tcpudpchk(fin, ft); 3787 } 3788 3789 3790 /* ------------------------------------------------------------------------ */ 3791 /* Function: nat_update */ 3792 /* Returns: Nil */ 3793 /* Parameters: nat(I) - pointer to NAT structure */ 3794 /* np(I) - pointer to NAT rule */ 3795 /* */ 3796 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3797 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3798 /* ------------------------------------------------------------------------ */ 3799 void nat_update(fin, nat, np) 3800 fr_info_t *fin; 3801 nat_t *nat; 3802 ipnat_t *np; 3803 { 3804 ipftq_t *ifq, *ifq2; 3805 ipftqent_t *tqe; 3806 ipf_stack_t *ifs = fin->fin_ifs; 3807 3808 MUTEX_ENTER(&nat->nat_lock); 3809 tqe = &nat->nat_tqe; 3810 ifq = tqe->tqe_ifq; 3811 3812 /* 3813 * We allow over-riding of NAT timeouts from NAT rules, even for 3814 * TCP, however, if it is TCP and there is no rule timeout set, 3815 * then do not update the timeout here. 3816 */ 3817 if (np != NULL) 3818 ifq2 = np->in_tqehead[fin->fin_rev]; 3819 else 3820 ifq2 = NULL; 3821 3822 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3823 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3824 } else { 3825 if (ifq2 == NULL) { 3826 if (nat->nat_p == IPPROTO_UDP) 3827 ifq2 = &ifs->ifs_nat_udptq; 3828 else if (nat->nat_p == IPPROTO_ICMP) 3829 ifq2 = &ifs->ifs_nat_icmptq; 3830 else 3831 ifq2 = &ifs->ifs_nat_iptq; 3832 } 3833 3834 fr_movequeue(tqe, ifq, ifq2, ifs); 3835 } 3836 MUTEX_EXIT(&nat->nat_lock); 3837 } 3838 3839 3840 /* ------------------------------------------------------------------------ */ 3841 /* Function: fr_checknatout */ 3842 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3843 /* 0 == no packet translation occurred, */ 3844 /* 1 == packet was successfully translated. */ 3845 /* Parameters: fin(I) - pointer to packet information */ 3846 /* passp(I) - pointer to filtering result flags */ 3847 /* */ 3848 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3849 /* first checked to see if they match an existing entry (if an error), */ 3850 /* otherwise a search of the current NAT table is made. If neither results */ 3851 /* in a match then a search for a matching NAT rule is made. Create a new */ 3852 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3853 /* packet header(s) as required. */ 3854 /* ------------------------------------------------------------------------ */ 3855 int fr_checknatout(fin, passp) 3856 fr_info_t *fin; 3857 u_32_t *passp; 3858 { 3859 ipnat_t *np = NULL, *npnext; 3860 struct ifnet *ifp, *sifp; 3861 icmphdr_t *icmp = NULL; 3862 tcphdr_t *tcp = NULL; 3863 int rval, natfailed; 3864 u_int nflags = 0; 3865 u_32_t ipa, iph; 3866 int natadd = 1; 3867 frentry_t *fr; 3868 nat_t *nat; 3869 ipf_stack_t *ifs = fin->fin_ifs; 3870 3871 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3872 return 0; 3873 3874 natfailed = 0; 3875 fr = fin->fin_fr; 3876 sifp = fin->fin_ifp; 3877 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3878 fr->fr_tifs[fin->fin_rev].fd_ifp && 3879 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3880 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3881 ifp = fin->fin_ifp; 3882 3883 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3884 switch (fin->fin_p) 3885 { 3886 case IPPROTO_TCP : 3887 nflags = IPN_TCP; 3888 break; 3889 case IPPROTO_UDP : 3890 nflags = IPN_UDP; 3891 break; 3892 case IPPROTO_ICMP : 3893 icmp = fin->fin_dp; 3894 3895 /* 3896 * This is an incoming packet, so the destination is 3897 * the icmp_id and the source port equals 0 3898 */ 3899 if (nat_icmpquerytype4(icmp->icmp_type)) 3900 nflags = IPN_ICMPQUERY; 3901 break; 3902 default : 3903 break; 3904 } 3905 3906 if ((nflags & IPN_TCPUDP)) 3907 tcp = fin->fin_dp; 3908 } 3909 3910 ipa = fin->fin_saddr; 3911 3912 READ_ENTER(&ifs->ifs_ipf_nat); 3913 3914 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3915 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3916 /*EMPTY*/; 3917 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3918 natadd = 0; 3919 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3920 fin->fin_src, fin->fin_dst))) { 3921 nflags = nat->nat_flags; 3922 } else { 3923 u_32_t hv, msk, nmsk; 3924 3925 /* 3926 * If there is no current entry in the nat table for this IP#, 3927 * create one for it (if there is a matching rule). 3928 */ 3929 msk = 0xffffffff; 3930 nmsk = ifs->ifs_nat_masks; 3931 maskloop: 3932 iph = ipa & htonl(msk); 3933 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3934 for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) { 3935 npnext = np->in_mnext; 3936 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3937 continue; 3938 if (np->in_v != fin->fin_v) 3939 continue; 3940 if (np->in_p && (np->in_p != fin->fin_p)) 3941 continue; 3942 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3943 continue; 3944 if (np->in_flags & IPN_FILTER) { 3945 if (!nat_match(fin, np)) 3946 continue; 3947 } else if ((ipa & np->in_inmsk) != np->in_inip) 3948 continue; 3949 3950 if ((fr != NULL) && 3951 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3952 continue; 3953 3954 if (*np->in_plabel != '\0') { 3955 if (((np->in_flags & IPN_FILTER) == 0) && 3956 (np->in_dport != tcp->th_dport)) 3957 continue; 3958 if (appr_ok(fin, tcp, np) == 0) 3959 continue; 3960 } 3961 3962 ATOMIC_INC32(np->in_use); 3963 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3964 WRITE_ENTER(&ifs->ifs_ipf_nat); 3965 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND); 3966 if (nat != NULL) { 3967 np->in_use--; 3968 np->in_hits++; 3969 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3970 break; 3971 } 3972 natfailed = -1; 3973 npnext = np->in_mnext; 3974 fr_ipnatderef(&np, ifs); 3975 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3976 } 3977 if ((np == NULL) && (nmsk != 0)) { 3978 while (nmsk) { 3979 msk <<= 1; 3980 if (nmsk & 0x80000000) 3981 break; 3982 nmsk <<= 1; 3983 } 3984 if (nmsk != 0) { 3985 nmsk <<= 1; 3986 goto maskloop; 3987 } 3988 } 3989 } 3990 3991 if (nat != NULL) { 3992 rval = fr_natout(fin, nat, natadd, nflags); 3993 if (rval == 1) { 3994 MUTEX_ENTER(&nat->nat_lock); 3995 nat->nat_ref++; 3996 MUTEX_EXIT(&nat->nat_lock); 3997 nat->nat_touched = ifs->ifs_fr_ticks; 3998 fin->fin_nat = nat; 3999 } 4000 } else 4001 rval = natfailed; 4002 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4003 4004 if (rval == -1) { 4005 if (passp != NULL) 4006 *passp = FR_BLOCK; 4007 fin->fin_flx |= FI_BADNAT; 4008 } 4009 fin->fin_ifp = sifp; 4010 return rval; 4011 } 4012 4013 /* ------------------------------------------------------------------------ */ 4014 /* Function: fr_natout */ 4015 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4016 /* 1 == packet was successfully translated. */ 4017 /* Parameters: fin(I) - pointer to packet information */ 4018 /* nat(I) - pointer to NAT structure */ 4019 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4020 /* nflags(I) - NAT flags set for this packet */ 4021 /* */ 4022 /* Translate a packet coming "out" on an interface. */ 4023 /* ------------------------------------------------------------------------ */ 4024 int fr_natout(fin, nat, natadd, nflags) 4025 fr_info_t *fin; 4026 nat_t *nat; 4027 int natadd; 4028 u_32_t nflags; 4029 { 4030 icmphdr_t *icmp; 4031 u_short *csump; 4032 u_32_t sumd; 4033 tcphdr_t *tcp; 4034 ipnat_t *np; 4035 int i; 4036 ipf_stack_t *ifs = fin->fin_ifs; 4037 4038 if (fin->fin_v == 6) { 4039 #ifdef USE_INET6 4040 return fr_nat6out(fin, nat, natadd, nflags); 4041 #else 4042 return NULL; 4043 #endif 4044 } 4045 4046 #if SOLARIS && defined(_KERNEL) 4047 net_data_t net_data_p = ifs->ifs_ipf_ipv4; 4048 #endif 4049 4050 tcp = NULL; 4051 icmp = NULL; 4052 csump = NULL; 4053 np = nat->nat_ptr; 4054 4055 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4056 (void) fr_nat_newfrag(fin, 0, nat); 4057 4058 MUTEX_ENTER(&nat->nat_lock); 4059 nat->nat_bytes[1] += fin->fin_plen; 4060 nat->nat_pkts[1]++; 4061 MUTEX_EXIT(&nat->nat_lock); 4062 4063 /* 4064 * Fix up checksums, not by recalculating them, but 4065 * simply computing adjustments. 4066 * This is only done for STREAMS based IP implementations where the 4067 * checksum has already been calculated by IP. In all other cases, 4068 * IPFilter is called before the checksum needs calculating so there 4069 * is no call to modify whatever is in the header now. 4070 */ 4071 ASSERT(fin->fin_m != NULL); 4072 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 4073 if (nflags == IPN_ICMPERR) { 4074 u_32_t s1, s2; 4075 4076 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 4077 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 4078 CALC_SUMD(s1, s2, sumd); 4079 4080 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 4081 } 4082 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4083 defined(linux) || defined(BRIDGE_IPF) 4084 else { 4085 /* 4086 * Strictly speaking, this isn't necessary on BSD 4087 * kernels because they do checksum calculation after 4088 * this code has run BUT if ipfilter is being used 4089 * to do NAT as a bridge, that code doesn't exist. 4090 */ 4091 if (nat->nat_dir == NAT_OUTBOUND) 4092 fix_outcksum(&fin->fin_ip->ip_sum, 4093 nat->nat_ipsumd); 4094 else 4095 fix_incksum(&fin->fin_ip->ip_sum, 4096 nat->nat_ipsumd); 4097 } 4098 #endif 4099 } 4100 4101 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4102 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 4103 tcp = fin->fin_dp; 4104 4105 tcp->th_sport = nat->nat_outport; 4106 fin->fin_data[0] = ntohs(nat->nat_outport); 4107 } 4108 4109 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 4110 icmp = fin->fin_dp; 4111 icmp->icmp_id = nat->nat_outport; 4112 } 4113 4114 csump = nat_proto(fin, nat, nflags); 4115 } 4116 4117 fin->fin_ip->ip_src = nat->nat_outip; 4118 4119 nat_update(fin, nat, np); 4120 4121 /* 4122 * The above comments do not hold for layer 4 (or higher) checksums... 4123 */ 4124 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 4125 if (nflags & IPN_TCPUDP && 4126 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 4127 sumd = nat->nat_sumd[1]; 4128 else 4129 sumd = nat->nat_sumd[0]; 4130 4131 if (nat->nat_dir == NAT_OUTBOUND) 4132 fix_outcksum(csump, sumd); 4133 else 4134 fix_incksum(csump, sumd); 4135 } 4136 #ifdef IPFILTER_SYNC 4137 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4138 #endif 4139 /* ------------------------------------------------------------- */ 4140 /* A few quick notes: */ 4141 /* Following are test conditions prior to calling the */ 4142 /* appr_check routine. */ 4143 /* */ 4144 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4145 /* with a redirect rule, we attempt to match the packet's */ 4146 /* source port against in_dport, otherwise we'd compare the */ 4147 /* packet's destination. */ 4148 /* ------------------------------------------------------------- */ 4149 if ((np != NULL) && (np->in_apr != NULL)) { 4150 i = appr_check(fin, nat); 4151 if (i == 0) 4152 i = 1; 4153 } else 4154 i = 1; 4155 ifs->ifs_nat_stats.ns_mapped[1]++; 4156 fin->fin_flx |= FI_NATED; 4157 return i; 4158 } 4159 4160 4161 /* ------------------------------------------------------------------------ */ 4162 /* Function: fr_checknatin */ 4163 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4164 /* 0 == no packet translation occurred, */ 4165 /* 1 == packet was successfully translated. */ 4166 /* Parameters: fin(I) - pointer to packet information */ 4167 /* passp(I) - pointer to filtering result flags */ 4168 /* */ 4169 /* Check to see if an incoming packet should be changed. ICMP packets are */ 4170 /* first checked to see if they match an existing entry (if an error), */ 4171 /* otherwise a search of the current NAT table is made. If neither results */ 4172 /* in a match then a search for a matching NAT rule is made. Create a new */ 4173 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 4174 /* packet header(s) as required. */ 4175 /* ------------------------------------------------------------------------ */ 4176 int fr_checknatin(fin, passp) 4177 fr_info_t *fin; 4178 u_32_t *passp; 4179 { 4180 u_int nflags, natadd; 4181 ipnat_t *np, *npnext; 4182 int rval, natfailed; 4183 struct ifnet *ifp; 4184 struct in_addr in; 4185 icmphdr_t *icmp; 4186 tcphdr_t *tcp; 4187 u_short dport; 4188 nat_t *nat; 4189 u_32_t iph; 4190 ipf_stack_t *ifs = fin->fin_ifs; 4191 4192 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 4193 return 0; 4194 4195 tcp = NULL; 4196 icmp = NULL; 4197 dport = 0; 4198 natadd = 1; 4199 nflags = 0; 4200 natfailed = 0; 4201 ifp = fin->fin_ifp; 4202 4203 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4204 switch (fin->fin_p) 4205 { 4206 case IPPROTO_TCP : 4207 nflags = IPN_TCP; 4208 break; 4209 case IPPROTO_UDP : 4210 nflags = IPN_UDP; 4211 break; 4212 case IPPROTO_ICMP : 4213 icmp = fin->fin_dp; 4214 4215 /* 4216 * This is an incoming packet, so the destination is 4217 * the icmp_id and the source port equals 0 4218 */ 4219 if (nat_icmpquerytype4(icmp->icmp_type)) { 4220 nflags = IPN_ICMPQUERY; 4221 dport = icmp->icmp_id; 4222 } break; 4223 default : 4224 break; 4225 } 4226 4227 if ((nflags & IPN_TCPUDP)) { 4228 tcp = fin->fin_dp; 4229 dport = tcp->th_dport; 4230 } 4231 } 4232 4233 in = fin->fin_dst; 4234 4235 READ_ENTER(&ifs->ifs_ipf_nat); 4236 4237 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 4238 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 4239 /*EMPTY*/; 4240 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 4241 natadd = 0; 4242 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 4243 fin->fin_src, in))) { 4244 nflags = nat->nat_flags; 4245 } else { 4246 u_32_t hv, msk, rmsk; 4247 4248 rmsk = ifs->ifs_rdr_masks; 4249 msk = 0xffffffff; 4250 /* 4251 * If there is no current entry in the nat table for this IP#, 4252 * create one for it (if there is a matching rule). 4253 */ 4254 maskloop: 4255 iph = in.s_addr & htonl(msk); 4256 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 4257 for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) { 4258 npnext = np->in_rnext; 4259 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 4260 continue; 4261 if (np->in_v != fin->fin_v) 4262 continue; 4263 if (np->in_p && (np->in_p != fin->fin_p)) 4264 continue; 4265 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4266 continue; 4267 if (np->in_flags & IPN_FILTER) { 4268 if (!nat_match(fin, np)) 4269 continue; 4270 } else { 4271 if ((in.s_addr & np->in_outmsk) != np->in_outip) 4272 continue; 4273 if (np->in_pmin && 4274 ((ntohs(np->in_pmax) < ntohs(dport)) || 4275 (ntohs(dport) < ntohs(np->in_pmin)))) 4276 continue; 4277 } 4278 4279 if (*np->in_plabel != '\0') { 4280 if (!appr_ok(fin, tcp, np)) { 4281 continue; 4282 } 4283 } 4284 4285 ATOMIC_INC32(np->in_use); 4286 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4287 WRITE_ENTER(&ifs->ifs_ipf_nat); 4288 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4289 if (nat != NULL) { 4290 np->in_use--; 4291 np->in_hits++; 4292 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4293 break; 4294 } 4295 natfailed = -1; 4296 npnext = np->in_rnext; 4297 fr_ipnatderef(&np, ifs); 4298 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4299 } 4300 4301 if ((np == NULL) && (rmsk != 0)) { 4302 while (rmsk) { 4303 msk <<= 1; 4304 if (rmsk & 0x80000000) 4305 break; 4306 rmsk <<= 1; 4307 } 4308 if (rmsk != 0) { 4309 rmsk <<= 1; 4310 goto maskloop; 4311 } 4312 } 4313 } 4314 if (nat != NULL) { 4315 rval = fr_natin(fin, nat, natadd, nflags); 4316 if (rval == 1) { 4317 MUTEX_ENTER(&nat->nat_lock); 4318 nat->nat_ref++; 4319 MUTEX_EXIT(&nat->nat_lock); 4320 nat->nat_touched = ifs->ifs_fr_ticks; 4321 fin->fin_nat = nat; 4322 fin->fin_state = nat->nat_state; 4323 } 4324 } else 4325 rval = natfailed; 4326 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4327 4328 if (rval == -1) { 4329 if (passp != NULL) 4330 *passp = FR_BLOCK; 4331 fin->fin_flx |= FI_BADNAT; 4332 } 4333 return rval; 4334 } 4335 4336 4337 /* ------------------------------------------------------------------------ */ 4338 /* Function: fr_natin */ 4339 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4340 /* 1 == packet was successfully translated. */ 4341 /* Parameters: fin(I) - pointer to packet information */ 4342 /* nat(I) - pointer to NAT structure */ 4343 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4344 /* nflags(I) - NAT flags set for this packet */ 4345 /* Locks Held: ipf_nat (READ) */ 4346 /* */ 4347 /* Translate a packet coming "in" on an interface. */ 4348 /* ------------------------------------------------------------------------ */ 4349 int fr_natin(fin, nat, natadd, nflags) 4350 fr_info_t *fin; 4351 nat_t *nat; 4352 int natadd; 4353 u_32_t nflags; 4354 { 4355 icmphdr_t *icmp; 4356 u_short *csump; 4357 tcphdr_t *tcp; 4358 ipnat_t *np; 4359 int i; 4360 ipf_stack_t *ifs = fin->fin_ifs; 4361 4362 if (fin->fin_v == 6) { 4363 #ifdef USE_INET6 4364 return fr_nat6in(fin, nat, natadd, nflags); 4365 #else 4366 return NULL; 4367 #endif 4368 } 4369 4370 #if SOLARIS && defined(_KERNEL) 4371 net_data_t net_data_p = ifs->ifs_ipf_ipv4; 4372 #endif 4373 4374 tcp = NULL; 4375 csump = NULL; 4376 np = nat->nat_ptr; 4377 fin->fin_fr = nat->nat_fr; 4378 4379 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4380 (void) fr_nat_newfrag(fin, 0, nat); 4381 4382 if (np != NULL) { 4383 4384 /* ------------------------------------------------------------- */ 4385 /* A few quick notes: */ 4386 /* Following are test conditions prior to calling the */ 4387 /* appr_check routine. */ 4388 /* */ 4389 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4390 /* with a map rule, we attempt to match the packet's */ 4391 /* source port against in_dport, otherwise we'd compare the */ 4392 /* packet's destination. */ 4393 /* ------------------------------------------------------------- */ 4394 if (np->in_apr != NULL) { 4395 i = appr_check(fin, nat); 4396 if (i == -1) { 4397 return -1; 4398 } 4399 } 4400 } 4401 4402 #ifdef IPFILTER_SYNC 4403 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4404 #endif 4405 4406 MUTEX_ENTER(&nat->nat_lock); 4407 nat->nat_bytes[0] += fin->fin_plen; 4408 nat->nat_pkts[0]++; 4409 MUTEX_EXIT(&nat->nat_lock); 4410 4411 fin->fin_ip->ip_dst = nat->nat_inip; 4412 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4413 if (nflags & IPN_TCPUDP) 4414 tcp = fin->fin_dp; 4415 4416 /* 4417 * Fix up checksums, not by recalculating them, but 4418 * simply computing adjustments. 4419 * Why only do this for some platforms on inbound packets ? 4420 * Because for those that it is done, IP processing is yet to happen 4421 * and so the IPv4 header checksum has not yet been evaluated. 4422 * Perhaps it should always be done for the benefit of things like 4423 * fast forwarding (so that it doesn't need to be recomputed) but with 4424 * header checksum offloading, perhaps it is a moot point. 4425 */ 4426 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4427 defined(__osf__) || defined(linux) 4428 if (nat->nat_dir == NAT_OUTBOUND) 4429 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4430 else 4431 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4432 #endif 4433 4434 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4435 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4436 tcp->th_dport = nat->nat_inport; 4437 fin->fin_data[1] = ntohs(nat->nat_inport); 4438 } 4439 4440 4441 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4442 icmp = fin->fin_dp; 4443 4444 icmp->icmp_id = nat->nat_inport; 4445 } 4446 4447 csump = nat_proto(fin, nat, nflags); 4448 } 4449 4450 nat_update(fin, nat, np); 4451 4452 /* 4453 * In case they are being forwarded, inbound packets always need to have 4454 * their checksum adjusted even if hardware checksum validation said OK. 4455 */ 4456 if (csump != NULL) { 4457 if (nat->nat_dir == NAT_OUTBOUND) 4458 fix_incksum(csump, nat->nat_sumd[0]); 4459 else 4460 fix_outcksum(csump, nat->nat_sumd[0]); 4461 } 4462 4463 #if SOLARIS && defined(_KERNEL) 4464 if (nflags & IPN_TCPUDP && 4465 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4466 /* 4467 * Need to adjust the partial checksum result stored in 4468 * db_cksum16, which will be used for validation in IP. 4469 * See IP_CKSUM_RECV(). 4470 * Adjustment data should be the inverse of the IP address 4471 * changes, because db_cksum16 is supposed to be the complement 4472 * of the pesudo header. 4473 */ 4474 csump = &fin->fin_m->b_datap->db_cksum16; 4475 if (nat->nat_dir == NAT_OUTBOUND) 4476 fix_outcksum(csump, nat->nat_sumd[1]); 4477 else 4478 fix_incksum(csump, nat->nat_sumd[1]); 4479 } 4480 #endif 4481 4482 ifs->ifs_nat_stats.ns_mapped[0]++; 4483 fin->fin_flx |= FI_NATED; 4484 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4485 fin->fin_nattag = &np->in_tag; 4486 return 1; 4487 } 4488 4489 4490 /* ------------------------------------------------------------------------ */ 4491 /* Function: nat_proto */ 4492 /* Returns: u_short* - pointer to transport header checksum to update, */ 4493 /* NULL if the transport protocol is not recognised */ 4494 /* as needing a checksum update. */ 4495 /* Parameters: fin(I) - pointer to packet information */ 4496 /* nat(I) - pointer to NAT structure */ 4497 /* nflags(I) - NAT flags set for this packet */ 4498 /* */ 4499 /* Return the pointer to the checksum field for each protocol so understood.*/ 4500 /* If support for making other changes to a protocol header is required, */ 4501 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4502 /* TCP down to a specific value, then do it from here. */ 4503 /* ------------------------------------------------------------------------ */ 4504 u_short *nat_proto(fin, nat, nflags) 4505 fr_info_t *fin; 4506 nat_t *nat; 4507 u_int nflags; 4508 { 4509 icmphdr_t *icmp; 4510 struct icmp6_hdr *icmp6; 4511 u_short *csump; 4512 tcphdr_t *tcp; 4513 udphdr_t *udp; 4514 4515 csump = NULL; 4516 if (fin->fin_out == 0) { 4517 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4518 } else { 4519 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4520 } 4521 4522 switch (fin->fin_p) 4523 { 4524 case IPPROTO_TCP : 4525 tcp = fin->fin_dp; 4526 4527 csump = &tcp->th_sum; 4528 4529 /* 4530 * Do a MSS CLAMPING on a SYN packet, 4531 * only deal IPv4 for now. 4532 */ 4533 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4534 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4535 4536 break; 4537 4538 case IPPROTO_UDP : 4539 udp = fin->fin_dp; 4540 4541 if (udp->uh_sum) 4542 csump = &udp->uh_sum; 4543 break; 4544 4545 case IPPROTO_ICMP : 4546 icmp = fin->fin_dp; 4547 4548 if ((nflags & IPN_ICMPQUERY) != 0) { 4549 if (icmp->icmp_cksum != 0) 4550 csump = &icmp->icmp_cksum; 4551 } 4552 break; 4553 4554 case IPPROTO_ICMPV6 : 4555 icmp6 = fin->fin_dp; 4556 4557 if ((nflags & IPN_ICMPQUERY) != 0) { 4558 if (icmp6->icmp6_cksum != 0) 4559 csump = &icmp6->icmp6_cksum; 4560 } 4561 break; 4562 } 4563 return csump; 4564 } 4565 4566 4567 /* ------------------------------------------------------------------------ */ 4568 /* Function: fr_natunload */ 4569 /* Returns: Nil */ 4570 /* Parameters: Nil */ 4571 /* */ 4572 /* Free all memory used by NAT structures allocated at runtime. */ 4573 /* ------------------------------------------------------------------------ */ 4574 void fr_natunload(ifs) 4575 ipf_stack_t *ifs; 4576 { 4577 ipftq_t *ifq, *ifqnext; 4578 4579 (void) nat_clearlist(ifs); 4580 (void) nat_flushtable(ifs); 4581 4582 /* 4583 * Proxy timeout queues are not cleaned here because although they 4584 * exist on the NAT list, appr_unload is called after fr_natunload 4585 * and the proxies actually are responsible for them being created. 4586 * Should the proxy timeouts have their own list? There's no real 4587 * justification as this is the only complication. 4588 */ 4589 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4590 ifqnext = ifq->ifq_next; 4591 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4592 (fr_deletetimeoutqueue(ifq) == 0)) 4593 fr_freetimeoutqueue(ifq, ifs); 4594 } 4595 4596 if (ifs->ifs_nat_table[0] != NULL) { 4597 KFREES(ifs->ifs_nat_table[0], 4598 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4599 ifs->ifs_nat_table[0] = NULL; 4600 } 4601 if (ifs->ifs_nat_table[1] != NULL) { 4602 KFREES(ifs->ifs_nat_table[1], 4603 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4604 ifs->ifs_nat_table[1] = NULL; 4605 } 4606 if (ifs->ifs_nat_rules != NULL) { 4607 KFREES(ifs->ifs_nat_rules, 4608 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4609 ifs->ifs_nat_rules = NULL; 4610 } 4611 if (ifs->ifs_rdr_rules != NULL) { 4612 KFREES(ifs->ifs_rdr_rules, 4613 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4614 ifs->ifs_rdr_rules = NULL; 4615 } 4616 if (ifs->ifs_maptable != NULL) { 4617 KFREES(ifs->ifs_maptable, 4618 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4619 ifs->ifs_maptable = NULL; 4620 } 4621 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4622 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4623 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4624 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4625 } 4626 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4627 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4628 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4629 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4630 } 4631 4632 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4633 ifs->ifs_fr_nat_maxbucket = 0; 4634 4635 if (ifs->ifs_fr_nat_init == 1) { 4636 ifs->ifs_fr_nat_init = 0; 4637 fr_sttab_destroy(ifs->ifs_nat_tqb); 4638 4639 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4640 RW_DESTROY(&ifs->ifs_ipf_nat); 4641 4642 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4643 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4644 4645 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4646 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4647 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4648 } 4649 } 4650 4651 4652 /* ------------------------------------------------------------------------ */ 4653 /* Function: fr_natexpire */ 4654 /* Returns: Nil */ 4655 /* Parameters: Nil */ 4656 /* */ 4657 /* Check all of the timeout queues for entries at the top which need to be */ 4658 /* expired. */ 4659 /* ------------------------------------------------------------------------ */ 4660 void fr_natexpire(ifs) 4661 ipf_stack_t *ifs; 4662 { 4663 ipftq_t *ifq, *ifqnext; 4664 ipftqent_t *tqe, *tqn; 4665 int i; 4666 SPL_INT(s); 4667 4668 SPL_NET(s); 4669 WRITE_ENTER(&ifs->ifs_ipf_nat); 4670 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4671 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4672 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4673 break; 4674 tqn = tqe->tqe_next; 4675 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4676 } 4677 } 4678 4679 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4680 ifqnext = ifq->ifq_next; 4681 4682 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4683 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4684 break; 4685 tqn = tqe->tqe_next; 4686 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4687 } 4688 } 4689 4690 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4691 ifqnext = ifq->ifq_next; 4692 4693 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4694 (ifq->ifq_ref == 0)) { 4695 fr_freetimeoutqueue(ifq, ifs); 4696 } 4697 } 4698 4699 if (ifs->ifs_nat_doflush != 0) { 4700 (void) nat_extraflush(2, ifs); 4701 ifs->ifs_nat_doflush = 0; 4702 } 4703 4704 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4705 SPL_X(s); 4706 } 4707 4708 4709 /* ------------------------------------------------------------------------ */ 4710 /* Function: fr_nataddrsync */ 4711 /* Returns: Nil */ 4712 /* Parameters: ifp(I) - pointer to network interface */ 4713 /* addr(I) - pointer to new network address */ 4714 /* */ 4715 /* Walk through all of the currently active NAT sessions, looking for those */ 4716 /* which need to have their translated address updated (where the interface */ 4717 /* matches the one passed in) and change it, recalculating the checksum sum */ 4718 /* difference too. */ 4719 /* ------------------------------------------------------------------------ */ 4720 void fr_nataddrsync(v, ifp, addr, ifs) 4721 int v; 4722 void *ifp; 4723 void *addr; 4724 ipf_stack_t *ifs; 4725 { 4726 u_32_t sum1, sum2, sumd; 4727 nat_t *nat; 4728 ipnat_t *np; 4729 SPL_INT(s); 4730 4731 if (ifs->ifs_fr_running <= 0) 4732 return; 4733 4734 SPL_NET(s); 4735 WRITE_ENTER(&ifs->ifs_ipf_nat); 4736 4737 if (ifs->ifs_fr_running <= 0) { 4738 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4739 return; 4740 } 4741 4742 /* 4743 * Change IP addresses for NAT sessions for any protocol except TCP 4744 * since it will break the TCP connection anyway. The only rules 4745 * which will get changed are those which are "map ... -> 0/32", 4746 * where the rule specifies the address is taken from the interface. 4747 */ 4748 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4749 if (addr != NULL) { 4750 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4751 ((nat->nat_flags & IPN_TCP) != 0)) 4752 continue; 4753 if ((np = nat->nat_ptr) == NULL) 4754 continue; 4755 if (v == 4 && np->in_v == 4) { 4756 if (np->in_nip || np->in_outmsk != 0xffffffff) 4757 continue; 4758 /* 4759 * Change the map-to address to be the same as 4760 * the new one. 4761 */ 4762 sum1 = nat->nat_outip.s_addr; 4763 nat->nat_outip = *(struct in_addr *)addr; 4764 sum2 = nat->nat_outip.s_addr; 4765 } else if (v == 6 && np->in_v == 6) { 4766 if (!IP6_ISZERO(&np->in_next6.in6) || 4767 !IP6_ISONES(&np->in_out[1].in6)) 4768 continue; 4769 /* 4770 * Change the map-to address to be the same as 4771 * the new one. 4772 */ 4773 nat->nat_outip6.in6 = *(struct in6_addr *)addr; 4774 } else 4775 continue; 4776 4777 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4778 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) { 4779 if (np->in_v == 4 && (v == 4 || v == 0)) { 4780 struct in_addr in; 4781 if (np->in_outmsk != 0xffffffff || np->in_nip) 4782 continue; 4783 /* 4784 * Change the map-to address to be the same as 4785 * the new one. 4786 */ 4787 sum1 = nat->nat_outip.s_addr; 4788 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4789 &in, NULL, ifs) != -1) 4790 nat->nat_outip = in; 4791 sum2 = nat->nat_outip.s_addr; 4792 } else if (np->in_v == 6 && (v == 6 || v == 0)) { 4793 struct in6_addr in6; 4794 if (!IP6_ISZERO(&np->in_next6.in6) || 4795 !IP6_ISONES(&np->in_out[1].in6)) 4796 continue; 4797 /* 4798 * Change the map-to address to be the same as 4799 * the new one. 4800 */ 4801 if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0], 4802 (void *)&in6, NULL, ifs) != -1) 4803 nat->nat_outip6.in6 = in6; 4804 } else 4805 continue; 4806 } else { 4807 continue; 4808 } 4809 4810 if (sum1 == sum2) 4811 continue; 4812 /* 4813 * Readjust the checksum adjustment to take into 4814 * account the new IP#. 4815 */ 4816 CALC_SUMD(sum1, sum2, sumd); 4817 /* XXX - dont change for TCP when solaris does 4818 * hardware checksumming. 4819 */ 4820 sumd += nat->nat_sumd[0]; 4821 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4822 nat->nat_sumd[1] = nat->nat_sumd[0]; 4823 } 4824 4825 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4826 SPL_X(s); 4827 } 4828 4829 4830 /* ------------------------------------------------------------------------ */ 4831 /* Function: fr_natifpsync */ 4832 /* Returns: Nil */ 4833 /* Parameters: action(I) - how we are syncing */ 4834 /* ifp(I) - pointer to network interface */ 4835 /* name(I) - name of interface to sync to */ 4836 /* */ 4837 /* This function is used to resync the mapping of interface names and their */ 4838 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4839 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4840 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4841 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4842 /* there is no longer any interface associated with it. */ 4843 /* ------------------------------------------------------------------------ */ 4844 void fr_natifpsync(action, v, ifp, name, ifs) 4845 int action, v; 4846 void *ifp; 4847 char *name; 4848 ipf_stack_t *ifs; 4849 { 4850 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4851 int s; 4852 #endif 4853 nat_t *nat; 4854 ipnat_t *n; 4855 int nv; 4856 4857 if (ifs->ifs_fr_running <= 0) 4858 return; 4859 4860 SPL_NET(s); 4861 WRITE_ENTER(&ifs->ifs_ipf_nat); 4862 4863 if (ifs->ifs_fr_running <= 0) { 4864 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4865 return; 4866 } 4867 4868 switch (action) 4869 { 4870 case IPFSYNC_RESYNC : 4871 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4872 nv = (v == 0) ? nat->nat_v : v; 4873 if (nat->nat_v != nv) 4874 continue; 4875 if ((ifp == nat->nat_ifps[0]) || 4876 (nat->nat_ifps[0] == (void *)-1)) { 4877 nat->nat_ifps[0] = 4878 fr_resolvenic(nat->nat_ifnames[0], nv, ifs); 4879 } 4880 4881 if ((ifp == nat->nat_ifps[1]) || 4882 (nat->nat_ifps[1] == (void *)-1)) { 4883 nat->nat_ifps[1] = 4884 fr_resolvenic(nat->nat_ifnames[1], nv, ifs); 4885 } 4886 } 4887 4888 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4889 nv = (v == 0) ? (int)n->in_v : v; 4890 if ((int)n->in_v != nv) 4891 continue; 4892 if (n->in_ifps[0] == ifp || 4893 n->in_ifps[0] == (void *)-1) { 4894 n->in_ifps[0] = 4895 fr_resolvenic(n->in_ifnames[0], nv, ifs); 4896 } 4897 if (n->in_ifps[1] == ifp || 4898 n->in_ifps[1] == (void *)-1) { 4899 n->in_ifps[1] = 4900 fr_resolvenic(n->in_ifnames[1], nv, ifs); 4901 } 4902 } 4903 break; 4904 case IPFSYNC_NEWIFP : 4905 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4906 if (nat->nat_v != v) 4907 continue; 4908 if (!strncmp(name, nat->nat_ifnames[0], 4909 sizeof(nat->nat_ifnames[0]))) 4910 nat->nat_ifps[0] = ifp; 4911 if (!strncmp(name, nat->nat_ifnames[1], 4912 sizeof(nat->nat_ifnames[1]))) 4913 nat->nat_ifps[1] = ifp; 4914 } 4915 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4916 if ((int)n->in_v != v) 4917 continue; 4918 if (!strncmp(name, n->in_ifnames[0], 4919 sizeof(n->in_ifnames[0]))) 4920 n->in_ifps[0] = ifp; 4921 if (!strncmp(name, n->in_ifnames[1], 4922 sizeof(n->in_ifnames[1]))) 4923 n->in_ifps[1] = ifp; 4924 } 4925 break; 4926 case IPFSYNC_OLDIFP : 4927 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4928 if (nat->nat_v != v) 4929 continue; 4930 if (ifp == nat->nat_ifps[0]) 4931 nat->nat_ifps[0] = (void *)-1; 4932 if (ifp == nat->nat_ifps[1]) 4933 nat->nat_ifps[1] = (void *)-1; 4934 } 4935 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4936 if ((int)n->in_v != v) 4937 continue; 4938 if (n->in_ifps[0] == ifp) 4939 n->in_ifps[0] = (void *)-1; 4940 if (n->in_ifps[1] == ifp) 4941 n->in_ifps[1] = (void *)-1; 4942 } 4943 break; 4944 } 4945 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4946 SPL_X(s); 4947 } 4948 4949 4950 /* ------------------------------------------------------------------------ */ 4951 /* Function: nat_icmpquerytype4 */ 4952 /* Returns: int - 1 == success, 0 == failure */ 4953 /* Parameters: icmptype(I) - ICMP type number */ 4954 /* */ 4955 /* Tests to see if the ICMP type number passed is a query/response type or */ 4956 /* not. */ 4957 /* ------------------------------------------------------------------------ */ 4958 static INLINE int nat_icmpquerytype4(icmptype) 4959 int icmptype; 4960 { 4961 4962 /* 4963 * For the ICMP query NAT code, it is essential that both the query 4964 * and the reply match on the NAT rule. Because the NAT structure 4965 * does not keep track of the icmptype, and a single NAT structure 4966 * is used for all icmp types with the same src, dest and id, we 4967 * simply define the replies as queries as well. The funny thing is, 4968 * altough it seems silly to call a reply a query, this is exactly 4969 * as it is defined in the IPv4 specification 4970 */ 4971 4972 switch (icmptype) 4973 { 4974 4975 case ICMP_ECHOREPLY: 4976 case ICMP_ECHO: 4977 /* route aedvertisement/solliciation is currently unsupported: */ 4978 /* it would require rewriting the ICMP data section */ 4979 case ICMP_TSTAMP: 4980 case ICMP_TSTAMPREPLY: 4981 case ICMP_IREQ: 4982 case ICMP_IREQREPLY: 4983 case ICMP_MASKREQ: 4984 case ICMP_MASKREPLY: 4985 return 1; 4986 default: 4987 return 0; 4988 } 4989 } 4990 4991 4992 /* ------------------------------------------------------------------------ */ 4993 /* Function: nat_log */ 4994 /* Returns: Nil */ 4995 /* Parameters: nat(I) - pointer to NAT structure */ 4996 /* type(I) - type of log entry to create */ 4997 /* */ 4998 /* Creates a NAT log entry. */ 4999 /* ------------------------------------------------------------------------ */ 5000 void nat_log(nat, type, ifs) 5001 struct nat *nat; 5002 u_int type; 5003 ipf_stack_t *ifs; 5004 { 5005 #ifdef IPFILTER_LOG 5006 # ifndef LARGE_NAT 5007 struct ipnat *np; 5008 int rulen; 5009 # endif 5010 struct natlog natl; 5011 void *items[1]; 5012 size_t sizes[1]; 5013 int types[1]; 5014 5015 natl.nlg_inip = nat->nat_inip6; 5016 natl.nlg_outip = nat->nat_outip6; 5017 natl.nlg_origip = nat->nat_oip6; 5018 natl.nlg_bytes[0] = nat->nat_bytes[0]; 5019 natl.nlg_bytes[1] = nat->nat_bytes[1]; 5020 natl.nlg_pkts[0] = nat->nat_pkts[0]; 5021 natl.nlg_pkts[1] = nat->nat_pkts[1]; 5022 natl.nlg_origport = nat->nat_oport; 5023 natl.nlg_inport = nat->nat_inport; 5024 natl.nlg_outport = nat->nat_outport; 5025 natl.nlg_p = nat->nat_p; 5026 natl.nlg_type = type; 5027 natl.nlg_rule = -1; 5028 natl.nlg_v = nat->nat_v; 5029 # ifndef LARGE_NAT 5030 if (nat->nat_ptr != NULL) { 5031 for (rulen = 0, np = ifs->ifs_nat_list; np; 5032 np = np->in_next, rulen++) 5033 if (np == nat->nat_ptr) { 5034 natl.nlg_rule = rulen; 5035 break; 5036 } 5037 } 5038 # endif 5039 items[0] = &natl; 5040 sizes[0] = sizeof(natl); 5041 types[0] = 0; 5042 5043 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 5044 #endif 5045 } 5046 5047 5048 #if defined(__OpenBSD__) 5049 /* ------------------------------------------------------------------------ */ 5050 /* Function: nat_ifdetach */ 5051 /* Returns: Nil */ 5052 /* Parameters: ifp(I) - pointer to network interface */ 5053 /* */ 5054 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 5055 /* interface references within IPFilter. */ 5056 /* ------------------------------------------------------------------------ */ 5057 void nat_ifdetach(ifp, ifs) 5058 void *ifp; 5059 ipf_stack_t *ifs; 5060 { 5061 frsync(ifp, ifs); 5062 return; 5063 } 5064 #endif 5065 5066 5067 /* ------------------------------------------------------------------------ */ 5068 /* Function: fr_ipnatderef */ 5069 /* Returns: Nil */ 5070 /* Parameters: inp(I) - pointer to pointer to NAT rule */ 5071 /* Write Locks: ipf_nat */ 5072 /* */ 5073 /* ------------------------------------------------------------------------ */ 5074 void fr_ipnatderef(inp, ifs) 5075 ipnat_t **inp; 5076 ipf_stack_t *ifs; 5077 { 5078 ipnat_t *in; 5079 5080 in = *inp; 5081 *inp = NULL; 5082 in->in_use--; 5083 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 5084 if (in->in_apr) 5085 appr_free(in->in_apr); 5086 KFREE(in); 5087 ifs->ifs_nat_stats.ns_rules--; 5088 #ifdef notdef 5089 #if SOLARIS 5090 if (ifs->ifs_nat_stats.ns_rules == 0) 5091 ifs->ifs_pfil_delayed_copy = 1; 5092 #endif 5093 #endif 5094 } 5095 } 5096 5097 5098 /* ------------------------------------------------------------------------ */ 5099 /* Function: fr_natderef */ 5100 /* Returns: Nil */ 5101 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 5102 /* */ 5103 /* Decrement the reference counter for this NAT table entry and free it if */ 5104 /* there are no more things using it. */ 5105 /* */ 5106 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 5107 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 5108 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 5109 /* because nat_delete() will do that and send nat_ref to -1. */ 5110 /* */ 5111 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 5112 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 5113 /* ------------------------------------------------------------------------ */ 5114 void fr_natderef(natp, ifs) 5115 nat_t **natp; 5116 ipf_stack_t *ifs; 5117 { 5118 nat_t *nat; 5119 5120 nat = *natp; 5121 *natp = NULL; 5122 5123 MUTEX_ENTER(&nat->nat_lock); 5124 if (nat->nat_ref > 1) { 5125 nat->nat_ref--; 5126 MUTEX_EXIT(&nat->nat_lock); 5127 return; 5128 } 5129 MUTEX_EXIT(&nat->nat_lock); 5130 5131 WRITE_ENTER(&ifs->ifs_ipf_nat); 5132 nat_delete(nat, NL_EXPIRE, ifs); 5133 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5134 } 5135 5136 5137 /* ------------------------------------------------------------------------ */ 5138 /* Function: fr_natclone */ 5139 /* Returns: ipstate_t* - NULL == cloning failed, */ 5140 /* else pointer to new state structure */ 5141 /* Parameters: fin(I) - pointer to packet information */ 5142 /* is(I) - pointer to master state structure */ 5143 /* Write Lock: ipf_nat */ 5144 /* */ 5145 /* Create a "duplcate" state table entry from the master. */ 5146 /* ------------------------------------------------------------------------ */ 5147 nat_t *fr_natclone(fin, nat) 5148 fr_info_t *fin; 5149 nat_t *nat; 5150 { 5151 frentry_t *fr; 5152 nat_t *clone; 5153 ipnat_t *np; 5154 ipf_stack_t *ifs = fin->fin_ifs; 5155 5156 KMALLOC(clone, nat_t *); 5157 if (clone == NULL) 5158 return NULL; 5159 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 5160 5161 MUTEX_NUKE(&clone->nat_lock); 5162 5163 clone->nat_aps = NULL; 5164 /* 5165 * Initialize all these so that nat_delete() doesn't cause a crash. 5166 */ 5167 clone->nat_tqe.tqe_pnext = NULL; 5168 clone->nat_tqe.tqe_next = NULL; 5169 clone->nat_tqe.tqe_ifq = NULL; 5170 clone->nat_tqe.tqe_parent = clone; 5171 5172 clone->nat_flags &= ~SI_CLONE; 5173 clone->nat_flags |= SI_CLONED; 5174 5175 if (clone->nat_hm) 5176 clone->nat_hm->hm_ref++; 5177 5178 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 5179 KFREE(clone); 5180 return NULL; 5181 } 5182 np = clone->nat_ptr; 5183 if (np != NULL) { 5184 if (ifs->ifs_nat_logging) 5185 nat_log(clone, (u_int)np->in_redir, ifs); 5186 np->in_use++; 5187 } 5188 fr = clone->nat_fr; 5189 if (fr != NULL) { 5190 MUTEX_ENTER(&fr->fr_lock); 5191 fr->fr_ref++; 5192 MUTEX_EXIT(&fr->fr_lock); 5193 } 5194 5195 /* 5196 * Because the clone is created outside the normal loop of things and 5197 * TCP has special needs in terms of state, initialise the timeout 5198 * state of the new NAT from here. 5199 */ 5200 if (clone->nat_p == IPPROTO_TCP) { 5201 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 5202 clone->nat_flags); 5203 } 5204 #ifdef IPFILTER_SYNC 5205 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 5206 #endif 5207 if (ifs->ifs_nat_logging) 5208 nat_log(clone, NL_CLONE, ifs); 5209 return clone; 5210 } 5211 5212 5213 /* ------------------------------------------------------------------------ */ 5214 /* Function: nat_wildok */ 5215 /* Returns: int - 1 == packet's ports match wildcards */ 5216 /* 0 == packet's ports don't match wildcards */ 5217 /* Parameters: nat(I) - NAT entry */ 5218 /* sport(I) - source port */ 5219 /* dport(I) - destination port */ 5220 /* flags(I) - wildcard flags */ 5221 /* dir(I) - packet direction */ 5222 /* */ 5223 /* Use NAT entry and packet direction to determine which combination of */ 5224 /* wildcard flags should be used. */ 5225 /* ------------------------------------------------------------------------ */ 5226 int nat_wildok(nat, sport, dport, flags, dir) 5227 nat_t *nat; 5228 int sport; 5229 int dport; 5230 int flags; 5231 int dir; 5232 { 5233 /* 5234 * When called by dir is set to 5235 * nat_inlookup NAT_INBOUND (0) 5236 * nat_outlookup NAT_OUTBOUND (1) 5237 * 5238 * We simply combine the packet's direction in dir with the original 5239 * "intended" direction of that NAT entry in nat->nat_dir to decide 5240 * which combination of wildcard flags to allow. 5241 */ 5242 5243 switch ((dir << 1) | nat->nat_dir) 5244 { 5245 case 3: /* outbound packet / outbound entry */ 5246 if (((nat->nat_inport == sport) || 5247 (flags & SI_W_SPORT)) && 5248 ((nat->nat_oport == dport) || 5249 (flags & SI_W_DPORT))) 5250 return 1; 5251 break; 5252 case 2: /* outbound packet / inbound entry */ 5253 if (((nat->nat_outport == sport) || 5254 (flags & SI_W_DPORT)) && 5255 ((nat->nat_oport == dport) || 5256 (flags & SI_W_SPORT))) 5257 return 1; 5258 break; 5259 case 1: /* inbound packet / outbound entry */ 5260 if (((nat->nat_oport == sport) || 5261 (flags & SI_W_DPORT)) && 5262 ((nat->nat_outport == dport) || 5263 (flags & SI_W_SPORT))) 5264 return 1; 5265 break; 5266 case 0: /* inbound packet / inbound entry */ 5267 if (((nat->nat_oport == sport) || 5268 (flags & SI_W_SPORT)) && 5269 ((nat->nat_outport == dport) || 5270 (flags & SI_W_DPORT))) 5271 return 1; 5272 break; 5273 default: 5274 break; 5275 } 5276 5277 return(0); 5278 } 5279 5280 5281 /* ------------------------------------------------------------------------ */ 5282 /* Function: nat_mssclamp */ 5283 /* Returns: Nil */ 5284 /* Parameters: tcp(I) - pointer to TCP header */ 5285 /* maxmss(I) - value to clamp the TCP MSS to */ 5286 /* csump(I) - pointer to TCP checksum */ 5287 /* */ 5288 /* Check for MSS option and clamp it if necessary. If found and changed, */ 5289 /* then the TCP header checksum will be updated to reflect the change in */ 5290 /* the MSS. */ 5291 /* ------------------------------------------------------------------------ */ 5292 static void nat_mssclamp(tcp, maxmss, csump) 5293 tcphdr_t *tcp; 5294 u_32_t maxmss; 5295 u_short *csump; 5296 { 5297 u_char *cp, *ep, opt; 5298 int hlen, advance; 5299 u_32_t mss, sumd; 5300 5301 hlen = TCP_OFF(tcp) << 2; 5302 if (hlen > sizeof(*tcp)) { 5303 cp = (u_char *)tcp + sizeof(*tcp); 5304 ep = (u_char *)tcp + hlen; 5305 5306 while (cp < ep) { 5307 opt = cp[0]; 5308 if (opt == TCPOPT_EOL) 5309 break; 5310 else if (opt == TCPOPT_NOP) { 5311 cp++; 5312 continue; 5313 } 5314 5315 if (cp + 1 >= ep) 5316 break; 5317 advance = cp[1]; 5318 if ((cp + advance > ep) || (advance <= 0)) 5319 break; 5320 switch (opt) 5321 { 5322 case TCPOPT_MAXSEG: 5323 if (advance != 4) 5324 break; 5325 mss = cp[2] * 256 + cp[3]; 5326 if (mss > maxmss) { 5327 cp[2] = maxmss / 256; 5328 cp[3] = maxmss & 0xff; 5329 CALC_SUMD(mss, maxmss, sumd); 5330 fix_outcksum(csump, sumd); 5331 } 5332 break; 5333 default: 5334 /* ignore unknown options */ 5335 break; 5336 } 5337 5338 cp += advance; 5339 } 5340 } 5341 } 5342 5343 5344 /* ------------------------------------------------------------------------ */ 5345 /* Function: fr_setnatqueue */ 5346 /* Returns: Nil */ 5347 /* Parameters: nat(I)- pointer to NAT structure */ 5348 /* rev(I) - forward(0) or reverse(1) direction */ 5349 /* Locks: ipf_nat (read or write) */ 5350 /* */ 5351 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5352 /* determining which queue it should be placed on. */ 5353 /* ------------------------------------------------------------------------ */ 5354 void fr_setnatqueue(nat, rev, ifs) 5355 nat_t *nat; 5356 int rev; 5357 ipf_stack_t *ifs; 5358 { 5359 ipftq_t *oifq, *nifq; 5360 5361 if (nat->nat_ptr != NULL) 5362 nifq = nat->nat_ptr->in_tqehead[rev]; 5363 else 5364 nifq = NULL; 5365 5366 if (nifq == NULL) { 5367 switch (nat->nat_p) 5368 { 5369 case IPPROTO_UDP : 5370 nifq = &ifs->ifs_nat_udptq; 5371 break; 5372 case IPPROTO_ICMP : 5373 nifq = &ifs->ifs_nat_icmptq; 5374 break; 5375 case IPPROTO_TCP : 5376 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5377 break; 5378 default : 5379 nifq = &ifs->ifs_nat_iptq; 5380 break; 5381 } 5382 } 5383 5384 oifq = nat->nat_tqe.tqe_ifq; 5385 /* 5386 * If it's currently on a timeout queue, move it from one queue to 5387 * another, else put it on the end of the newly determined queue. 5388 */ 5389 if (oifq != NULL) 5390 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5391 else 5392 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5393 return; 5394 } 5395 5396 /* ------------------------------------------------------------------------ */ 5397 /* Function: nat_getnext */ 5398 /* Returns: int - 0 == ok, else error */ 5399 /* Parameters: t(I) - pointer to ipftoken structure */ 5400 /* itp(I) - pointer to ipfgeniter_t structure */ 5401 /* ifs - ipf stack instance */ 5402 /* */ 5403 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5404 /* and copy it out to the storage space pointed to by itp. The next item */ 5405 /* in the list to look at is put back in the ipftoken struture. */ 5406 /* ------------------------------------------------------------------------ */ 5407 static int nat_getnext(t, itp, ifs) 5408 ipftoken_t *t; 5409 ipfgeniter_t *itp; 5410 ipf_stack_t *ifs; 5411 { 5412 hostmap_t *hm, *nexthm = NULL, zerohm; 5413 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5414 nat_t *nat, *nextnat = NULL, zeronat; 5415 int error = 0, count; 5416 char *dst; 5417 5418 if (itp->igi_nitems == 0) 5419 return EINVAL; 5420 5421 READ_ENTER(&ifs->ifs_ipf_nat); 5422 5423 /* 5424 * Get "previous" entry from the token and find the next entry. 5425 */ 5426 switch (itp->igi_type) 5427 { 5428 case IPFGENITER_HOSTMAP : 5429 hm = t->ipt_data; 5430 if (hm == NULL) { 5431 nexthm = ifs->ifs_ipf_hm_maplist; 5432 } else { 5433 nexthm = hm->hm_next; 5434 } 5435 break; 5436 5437 case IPFGENITER_IPNAT : 5438 ipn = t->ipt_data; 5439 if (ipn == NULL) { 5440 nextipnat = ifs->ifs_nat_list; 5441 } else { 5442 nextipnat = ipn->in_next; 5443 } 5444 break; 5445 5446 case IPFGENITER_NAT : 5447 nat = t->ipt_data; 5448 if (nat == NULL) { 5449 nextnat = ifs->ifs_nat_instances; 5450 } else { 5451 nextnat = nat->nat_next; 5452 } 5453 break; 5454 default : 5455 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5456 return EINVAL; 5457 } 5458 5459 dst = itp->igi_data; 5460 for (count = itp->igi_nitems; count > 0; count--) { 5461 /* 5462 * If we found an entry, add a reference to it and update the token. 5463 * Otherwise, zero out data to be returned and NULL out token. 5464 */ 5465 switch (itp->igi_type) 5466 { 5467 case IPFGENITER_HOSTMAP : 5468 if (nexthm != NULL) { 5469 ATOMIC_INC32(nexthm->hm_ref); 5470 t->ipt_data = nexthm; 5471 } else { 5472 bzero(&zerohm, sizeof(zerohm)); 5473 nexthm = &zerohm; 5474 t->ipt_data = NULL; 5475 } 5476 break; 5477 case IPFGENITER_IPNAT : 5478 if (nextipnat != NULL) { 5479 ATOMIC_INC32(nextipnat->in_use); 5480 t->ipt_data = nextipnat; 5481 } else { 5482 bzero(&zeroipn, sizeof(zeroipn)); 5483 nextipnat = &zeroipn; 5484 t->ipt_data = NULL; 5485 } 5486 break; 5487 case IPFGENITER_NAT : 5488 if (nextnat != NULL) { 5489 MUTEX_ENTER(&nextnat->nat_lock); 5490 nextnat->nat_ref++; 5491 MUTEX_EXIT(&nextnat->nat_lock); 5492 t->ipt_data = nextnat; 5493 } else { 5494 bzero(&zeronat, sizeof(zeronat)); 5495 nextnat = &zeronat; 5496 t->ipt_data = NULL; 5497 } 5498 break; 5499 default : 5500 break; 5501 } 5502 5503 /* 5504 * Now that we have ref, it's save to give up lock. 5505 */ 5506 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5507 5508 /* 5509 * Copy out data and clean up references and token as needed. 5510 */ 5511 switch (itp->igi_type) 5512 { 5513 case IPFGENITER_HOSTMAP : 5514 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5515 if (error != 0) 5516 error = EFAULT; 5517 if (t->ipt_data == NULL) { 5518 ipf_freetoken(t, ifs); 5519 break; 5520 } else { 5521 if (hm != NULL) { 5522 WRITE_ENTER(&ifs->ifs_ipf_nat); 5523 fr_hostmapdel(&hm); 5524 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5525 } 5526 if (nexthm->hm_next == NULL) { 5527 ipf_freetoken(t, ifs); 5528 break; 5529 } 5530 dst += sizeof(*nexthm); 5531 hm = nexthm; 5532 nexthm = nexthm->hm_next; 5533 } 5534 break; 5535 5536 case IPFGENITER_IPNAT : 5537 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5538 if (error != 0) 5539 error = EFAULT; 5540 if (t->ipt_data == NULL) { 5541 ipf_freetoken(t, ifs); 5542 break; 5543 } else { 5544 if (ipn != NULL) { 5545 WRITE_ENTER(&ifs->ifs_ipf_nat); 5546 fr_ipnatderef(&ipn, ifs); 5547 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5548 } 5549 if (nextipnat->in_next == NULL) { 5550 ipf_freetoken(t, ifs); 5551 break; 5552 } 5553 dst += sizeof(*nextipnat); 5554 ipn = nextipnat; 5555 nextipnat = nextipnat->in_next; 5556 } 5557 break; 5558 5559 case IPFGENITER_NAT : 5560 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5561 if (error != 0) 5562 error = EFAULT; 5563 if (t->ipt_data == NULL) { 5564 ipf_freetoken(t, ifs); 5565 break; 5566 } else { 5567 if (nat != NULL) 5568 fr_natderef(&nat, ifs); 5569 if (nextnat->nat_next == NULL) { 5570 ipf_freetoken(t, ifs); 5571 break; 5572 } 5573 dst += sizeof(*nextnat); 5574 nat = nextnat; 5575 nextnat = nextnat->nat_next; 5576 } 5577 break; 5578 default : 5579 break; 5580 } 5581 5582 if ((count == 1) || (error != 0)) 5583 break; 5584 5585 READ_ENTER(&ifs->ifs_ipf_nat); 5586 } 5587 5588 return error; 5589 } 5590 5591 5592 /* ------------------------------------------------------------------------ */ 5593 /* Function: nat_iterator */ 5594 /* Returns: int - 0 == ok, else error */ 5595 /* Parameters: token(I) - pointer to ipftoken structure */ 5596 /* itp(I) - pointer to ipfgeniter_t structure */ 5597 /* */ 5598 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5599 /* generic structure to iterate through a list. There are three different */ 5600 /* linked lists of NAT related information to go through: NAT rules, active */ 5601 /* NAT mappings and the NAT fragment cache. */ 5602 /* ------------------------------------------------------------------------ */ 5603 static int nat_iterator(token, itp, ifs) 5604 ipftoken_t *token; 5605 ipfgeniter_t *itp; 5606 ipf_stack_t *ifs; 5607 { 5608 int error; 5609 5610 if (itp->igi_data == NULL) 5611 return EFAULT; 5612 5613 token->ipt_subtype = itp->igi_type; 5614 5615 switch (itp->igi_type) 5616 { 5617 case IPFGENITER_HOSTMAP : 5618 case IPFGENITER_IPNAT : 5619 case IPFGENITER_NAT : 5620 error = nat_getnext(token, itp, ifs); 5621 break; 5622 case IPFGENITER_NATFRAG : 5623 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5624 &ifs->ifs_ipfr_nattail, 5625 &ifs->ifs_ipf_natfrag, ifs); 5626 break; 5627 default : 5628 error = EINVAL; 5629 break; 5630 } 5631 5632 return error; 5633 } 5634 5635 5636 /* -------------------------------------------------------------------- */ 5637 /* Function: nat_earlydrop */ 5638 /* Returns: number of dropped/removed entries from the queue */ 5639 /* Parameters: ifq - pointer to queue with entries to be processed */ 5640 /* maxidle - entry must be idle this long to be dropped */ 5641 /* ifs - ipf stack instance */ 5642 /* */ 5643 /* Function is invoked from nat_extraflush() only. Removes entries */ 5644 /* form specified timeout queue, based on how long they've sat idle, */ 5645 /* without waiting for it to happen on its own. */ 5646 /* -------------------------------------------------------------------- */ 5647 static int nat_earlydrop(ifq, maxidle, ifs) 5648 ipftq_t *ifq; 5649 int maxidle; 5650 ipf_stack_t *ifs; 5651 { 5652 ipftqent_t *tqe, *tqn; 5653 nat_t *nat; 5654 unsigned int dropped; 5655 int droptick; 5656 5657 if (ifq == NULL) 5658 return (0); 5659 5660 dropped = 0; 5661 5662 /* 5663 * Determine the tick representing the idle time we're interested 5664 * in. If an entry exists in the queue, and it was touched before 5665 * that tick, then it's been idle longer than maxidle ... remove it. 5666 */ 5667 droptick = ifs->ifs_fr_ticks - maxidle; 5668 tqn = ifq->ifq_head; 5669 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5670 tqn = tqe->tqe_next; 5671 nat = tqe->tqe_parent; 5672 nat_delete(nat, ISL_EXPIRE, ifs); 5673 dropped++; 5674 } 5675 return (dropped); 5676 } 5677 5678 5679 /* --------------------------------------------------------------------- */ 5680 /* Function: nat_flushclosing */ 5681 /* Returns: int - number of NAT entries deleted */ 5682 /* Parameters: stateval(I) - State at which to start removing entries */ 5683 /* ifs - ipf stack instance */ 5684 /* */ 5685 /* Remove nat table entries for TCP connections which are in the process */ 5686 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5687 /* --------------------------------------------------------------------- */ 5688 static int nat_flushclosing(stateval, ifs) 5689 int stateval; 5690 ipf_stack_t *ifs; 5691 { 5692 ipftq_t *ifq, *ifqn; 5693 ipftqent_t *tqe, *tqn; 5694 nat_t *nat; 5695 int dropped; 5696 5697 dropped = 0; 5698 5699 /* 5700 * Start by deleting any entries in specific timeout queues. 5701 */ 5702 ifqn = &ifs->ifs_nat_tqb[stateval]; 5703 while ((ifq = ifqn) != NULL) { 5704 ifqn = ifq->ifq_next; 5705 dropped += nat_earlydrop(ifq, (int)0, ifs); 5706 } 5707 5708 /* 5709 * Next, look through user defined queues for closing entries. 5710 */ 5711 ifqn = ifs->ifs_nat_utqe; 5712 while ((ifq = ifqn) != NULL) { 5713 ifqn = ifq->ifq_next; 5714 tqn = ifq->ifq_head; 5715 while ((tqe = tqn) != NULL) { 5716 tqn = tqe->tqe_next; 5717 nat = tqe->tqe_parent; 5718 if (nat->nat_p != IPPROTO_TCP) 5719 continue; 5720 if ((nat->nat_tcpstate[0] >= stateval) && 5721 (nat->nat_tcpstate[1] >= stateval)) { 5722 nat_delete(nat, NL_EXPIRE, ifs); 5723 dropped++; 5724 } 5725 } 5726 } 5727 return (dropped); 5728 } 5729 5730 5731 /* --------------------------------------------------------------------- */ 5732 /* Function: nat_extraflush */ 5733 /* Returns: int - number of NAT entries deleted */ 5734 /* Parameters: which(I) - how to flush the active NAT table */ 5735 /* ifs - ipf stack instance */ 5736 /* Write Locks: ipf_nat */ 5737 /* */ 5738 /* Flush nat tables. Three actions currently defined: */ 5739 /* */ 5740 /* which == 0 : Flush all nat table entries. */ 5741 /* */ 5742 /* which == 1 : Flush entries with TCP connections which have started */ 5743 /* to close on both ends. */ 5744 /* */ 5745 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5746 /* does not take us below specified threshold in the table, */ 5747 /* we want to flush entries with TCP connections which have */ 5748 /* been idle for a long time. Start with connections idle */ 5749 /* over 12 hours, and then work backwards in half hour */ 5750 /* increments to at most 30 minutes idle, and finally work */ 5751 /* back in 30 second increments to at most 30 seconds. */ 5752 /* --------------------------------------------------------------------- */ 5753 static int nat_extraflush(which, ifs) 5754 int which; 5755 ipf_stack_t *ifs; 5756 { 5757 ipftq_t *ifq, *ifqn; 5758 nat_t *nat, **natp; 5759 int idletime, removed, idle_idx; 5760 SPL_INT(s); 5761 5762 removed = 0; 5763 5764 SPL_NET(s); 5765 switch (which) 5766 { 5767 case 0: 5768 natp = &ifs->ifs_nat_instances; 5769 while ((nat = *natp) != NULL) { 5770 natp = &nat->nat_next; 5771 nat_delete(nat, ISL_FLUSH, ifs); 5772 removed++; 5773 } 5774 break; 5775 5776 case 1: 5777 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5778 break; 5779 5780 case 2: 5781 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5782 5783 /* 5784 * Be sure we haven't done this in the last 10 seconds. 5785 */ 5786 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5787 IPF_TTLVAL(10)) 5788 break; 5789 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5790 5791 /* 5792 * Determine initial threshold for minimum idle time based on 5793 * how long ipfilter has been running. Ipfilter needs to have 5794 * been up as long as the smallest interval to continue on. 5795 * 5796 * Minimum idle times stored in idletime_tab and indexed by 5797 * idle_idx. Start at upper end of array and work backwards. 5798 * 5799 * Once the index is found, set the initial idle time to the 5800 * first interval before the current ipfilter run time. 5801 */ 5802 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5803 break; /* switch */ 5804 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5805 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5806 idletime = idletime_tab[idle_idx]; 5807 } else { 5808 while ((idle_idx > 0) && 5809 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5810 idle_idx--; 5811 idletime = (ifs->ifs_fr_ticks / 5812 idletime_tab[idle_idx]) * 5813 idletime_tab[idle_idx]; 5814 } 5815 5816 while ((idle_idx >= 0) && 5817 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5818 /* 5819 * Start with appropriate timeout queue. 5820 */ 5821 removed += nat_earlydrop( 5822 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5823 idletime, ifs); 5824 5825 /* 5826 * Make sure we haven't already deleted enough 5827 * entries before checking the user defined queues. 5828 */ 5829 if (NAT_TAB_WATER_LEVEL(ifs) <= 5830 ifs->ifs_nat_flush_lvl_lo) 5831 break; 5832 5833 /* 5834 * Next, look through the user defined queues. 5835 */ 5836 ifqn = ifs->ifs_nat_utqe; 5837 while ((ifq = ifqn) != NULL) { 5838 ifqn = ifq->ifq_next; 5839 removed += nat_earlydrop(ifq, idletime, ifs); 5840 } 5841 5842 /* 5843 * Adjust the granularity of idle time. 5844 * 5845 * If we reach an interval boundary, we need to 5846 * either adjust the idle time accordingly or exit 5847 * the loop altogether (if this is very last check). 5848 */ 5849 idletime -= idletime_tab[idle_idx]; 5850 if (idletime < idletime_tab[idle_idx]) { 5851 if (idle_idx != 0) { 5852 idletime = idletime_tab[idle_idx] - 5853 idletime_tab[idle_idx - 1]; 5854 idle_idx--; 5855 } else { 5856 break; /* while */ 5857 } 5858 } 5859 } 5860 break; 5861 default: 5862 break; 5863 } 5864 5865 SPL_X(s); 5866 return (removed); 5867 } 5868