1 /* 2 * Copyright (C) 1995-2004 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI"$ 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/time.h> 22 #include <sys/file.h> 23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 24 defined(_KERNEL) 25 # include "opt_ipfilter_log.h" 26 #endif 27 #if !defined(_KERNEL) 28 # include <stdio.h> 29 # include <string.h> 30 # include <stdlib.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 #else 42 # include <sys/ioctl.h> 43 #endif 44 #if !defined(AIX) 45 # include <sys/fcntl.h> 46 #endif 47 #if !defined(linux) 48 # include <sys/protosw.h> 49 #endif 50 #include <sys/socket.h> 51 #if defined(_KERNEL) 52 # include <sys/systm.h> 53 # if !defined(__SVR4) && !defined(__svr4__) 54 # include <sys/mbuf.h> 55 # endif 56 #endif 57 #if defined(__SVR4) || defined(__svr4__) 58 # include <sys/filio.h> 59 # include <sys/byteorder.h> 60 # ifdef _KERNEL 61 # include <sys/dditypes.h> 62 # endif 63 # include <sys/stream.h> 64 # include <sys/kmem.h> 65 #endif 66 #if __FreeBSD_version >= 300000 67 # include <sys/queue.h> 68 #endif 69 #include <net/if.h> 70 #if __FreeBSD_version >= 300000 71 # include <net/if_var.h> 72 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 73 # include "opt_ipfilter.h" 74 # endif 75 #endif 76 #ifdef sun 77 # include <net/af.h> 78 #endif 79 #include <net/route.h> 80 #include <netinet/in.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 84 #ifdef RFC1825 85 # include <vpn/md5.h> 86 # include <vpn/ipsec.h> 87 extern struct ifnet vpnif; 88 #endif 89 90 #if !defined(linux) 91 # include <netinet/ip_var.h> 92 #endif 93 #include <netinet/tcp.h> 94 #include <netinet/udp.h> 95 #include <netinet/ip_icmp.h> 96 #include "netinet/ip_compat.h" 97 #include <netinet/tcpip.h> 98 #include "netinet/ip_fil.h" 99 #include "netinet/ip_nat.h" 100 #include "netinet/ip_frag.h" 101 #include "netinet/ip_state.h" 102 #include "netinet/ip_proxy.h" 103 #include "netinet/ipf_stack.h" 104 #ifdef IPFILTER_SYNC 105 #include "netinet/ip_sync.h" 106 #endif 107 #if (__FreeBSD_version >= 300000) 108 # include <sys/malloc.h> 109 #endif 110 /* END OF INCLUDES */ 111 112 #undef SOCKADDR_IN 113 #define SOCKADDR_IN struct sockaddr_in 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 118 #endif 119 120 121 /* ======================================================================== */ 122 /* How the NAT is organised and works. */ 123 /* */ 124 /* Inside (interface y) NAT Outside (interface x) */ 125 /* -------------------- -+- ------------------------------------- */ 126 /* Packet going | out, processsed by fr_checknatout() for x */ 127 /* ------------> | ------------> */ 128 /* src=10.1.1.1 | src=192.1.1.1 */ 129 /* | */ 130 /* | in, processed by fr_checknatin() for x */ 131 /* <------------ | <------------ */ 132 /* dst=10.1.1.1 | dst=192.1.1.1 */ 133 /* -------------------- -+- ------------------------------------- */ 134 /* fr_checknatout() - changes ip_src and if required, sport */ 135 /* - creates a new mapping, if required. */ 136 /* fr_checknatin() - changes ip_dst and if required, dport */ 137 /* */ 138 /* In the NAT table, internal source is recorded as "in" and externally */ 139 /* seen as "out". */ 140 /* ======================================================================== */ 141 142 143 static int nat_flushtable __P((ipf_stack_t *)); 144 static int nat_clearlist __P((ipf_stack_t *)); 145 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 146 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 147 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 148 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 149 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 150 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 151 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 152 static int nat_match __P((fr_info_t *, ipnat_t *)); 153 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 154 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 155 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 156 struct in_addr, struct in_addr, u_32_t, 157 ipf_stack_t *)); 158 static INLINE int nat_icmpquerytype4 __P((int)); 159 static int nat_ruleaddrinit __P((ipnat_t *)); 160 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 161 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 162 static INLINE int nat_icmperrortype4 __P((int)); 163 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 164 tcphdr_t *, nat_t **, int)); 165 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 166 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 167 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 168 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 169 static int nat_extraflush __P((int, ipf_stack_t *)); 170 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 171 static int nat_flushclosing __P((int, ipf_stack_t *)); 172 173 174 /* 175 * Below we declare a list of constants used only in the nat_extraflush() 176 * routine. We are placing it here, instead of in nat_extraflush() itself, 177 * because we want to make it visible to tools such as mdb, nm etc., so the 178 * values can easily be altered during debugging. 179 */ 180 static const int idletime_tab[] = { 181 IPF_TTLVAL(30), /* 30 seconds */ 182 IPF_TTLVAL(1800), /* 30 minutes */ 183 IPF_TTLVAL(43200), /* 12 hours */ 184 IPF_TTLVAL(345600), /* 4 days */ 185 }; 186 187 #define NAT_HAS_L4_CHANGED(n) \ 188 (((n)->nat_flags & (IPN_TCPUDPICMP)) && \ 189 (n)->nat_inport != (n)->nat_outport) 190 191 /* ------------------------------------------------------------------------ */ 192 /* Function: fr_natinit */ 193 /* Returns: int - 0 == success, -1 == failure */ 194 /* Parameters: Nil */ 195 /* */ 196 /* Initialise all of the NAT locks, tables and other structures. */ 197 /* ------------------------------------------------------------------------ */ 198 int fr_natinit(ifs) 199 ipf_stack_t *ifs; 200 { 201 int i; 202 203 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 204 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 205 if (ifs->ifs_nat_table[0] != NULL) 206 bzero((char *)ifs->ifs_nat_table[0], 207 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 208 else 209 return -1; 210 211 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 212 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 213 if (ifs->ifs_nat_table[1] != NULL) 214 bzero((char *)ifs->ifs_nat_table[1], 215 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 216 else 217 return -2; 218 219 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 220 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 221 if (ifs->ifs_nat_rules != NULL) 222 bzero((char *)ifs->ifs_nat_rules, 223 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 224 else 225 return -3; 226 227 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 228 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 229 if (ifs->ifs_rdr_rules != NULL) 230 bzero((char *)ifs->ifs_rdr_rules, 231 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 232 else 233 return -4; 234 235 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 236 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 237 if (ifs->ifs_maptable != NULL) 238 bzero((char *)ifs->ifs_maptable, 239 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 240 else 241 return -5; 242 243 ifs->ifs_ipf_hm_maplist = NULL; 244 245 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 246 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 247 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 248 return -1; 249 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 250 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 251 252 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 253 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 254 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 255 return -1; 256 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 257 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 258 259 if (ifs->ifs_fr_nat_maxbucket == 0) { 260 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 261 ifs->ifs_fr_nat_maxbucket++; 262 ifs->ifs_fr_nat_maxbucket *= 2; 263 } 264 265 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 266 /* 267 * Increase this because we may have "keep state" following this too 268 * and packet storms can occur if this is removed too quickly. 269 */ 270 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 271 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 272 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 273 ifs->ifs_nat_udptq.ifq_ref = 1; 274 ifs->ifs_nat_udptq.ifq_head = NULL; 275 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 276 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 277 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 278 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 279 ifs->ifs_nat_icmptq.ifq_ref = 1; 280 ifs->ifs_nat_icmptq.ifq_head = NULL; 281 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 282 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 283 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 284 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 285 ifs->ifs_nat_iptq.ifq_ref = 1; 286 ifs->ifs_nat_iptq.ifq_head = NULL; 287 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 288 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 289 ifs->ifs_nat_iptq.ifq_next = NULL; 290 291 for (i = 0; i < IPF_TCP_NSTATES; i++) { 292 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 293 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 294 #ifdef LARGE_NAT 295 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 296 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 297 #endif 298 } 299 300 /* 301 * Increase this because we may have "keep state" following 302 * this too and packet storms can occur if this is removed 303 * too quickly. 304 */ 305 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 306 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 307 308 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 309 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 310 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 311 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 312 313 ifs->ifs_fr_nat_init = 1; 314 315 return 0; 316 } 317 318 319 /* ------------------------------------------------------------------------ */ 320 /* Function: nat_addrdr */ 321 /* Returns: Nil */ 322 /* Parameters: n(I) - pointer to NAT rule to add */ 323 /* */ 324 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 325 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 326 /* use by redirect rules. */ 327 /* ------------------------------------------------------------------------ */ 328 static void nat_addrdr(n, ifs) 329 ipnat_t *n; 330 ipf_stack_t *ifs; 331 { 332 ipnat_t **np; 333 u_32_t j; 334 u_int hv; 335 int k; 336 337 k = count4bits(n->in_outmsk); 338 if ((k >= 0) && (k != 32)) 339 ifs->ifs_rdr_masks |= 1 << k; 340 j = (n->in_outip & n->in_outmsk); 341 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 342 np = ifs->ifs_rdr_rules + hv; 343 while (*np != NULL) 344 np = &(*np)->in_rnext; 345 n->in_rnext = NULL; 346 n->in_prnext = np; 347 n->in_hv = hv; 348 *np = n; 349 } 350 351 352 /* ------------------------------------------------------------------------ */ 353 /* Function: nat_addnat */ 354 /* Returns: Nil */ 355 /* Parameters: n(I) - pointer to NAT rule to add */ 356 /* */ 357 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 358 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 359 /* redirect rules. */ 360 /* ------------------------------------------------------------------------ */ 361 static void nat_addnat(n, ifs) 362 ipnat_t *n; 363 ipf_stack_t *ifs; 364 { 365 ipnat_t **np; 366 u_32_t j; 367 u_int hv; 368 int k; 369 370 k = count4bits(n->in_inmsk); 371 if ((k >= 0) && (k != 32)) 372 ifs->ifs_nat_masks |= 1 << k; 373 j = (n->in_inip & n->in_inmsk); 374 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 375 np = ifs->ifs_nat_rules + hv; 376 while (*np != NULL) 377 np = &(*np)->in_mnext; 378 n->in_mnext = NULL; 379 n->in_pmnext = np; 380 n->in_hv = hv; 381 *np = n; 382 } 383 384 385 /* ------------------------------------------------------------------------ */ 386 /* Function: nat_delrdr */ 387 /* Returns: Nil */ 388 /* Parameters: n(I) - pointer to NAT rule to delete */ 389 /* */ 390 /* Removes a redirect rule from the hash table of redirect rules. */ 391 /* ------------------------------------------------------------------------ */ 392 void nat_delrdr(n) 393 ipnat_t *n; 394 { 395 if (n->in_rnext) 396 n->in_rnext->in_prnext = n->in_prnext; 397 *n->in_prnext = n->in_rnext; 398 } 399 400 401 /* ------------------------------------------------------------------------ */ 402 /* Function: nat_delnat */ 403 /* Returns: Nil */ 404 /* Parameters: n(I) - pointer to NAT rule to delete */ 405 /* */ 406 /* Removes a NAT map rule from the hash table of NAT map rules. */ 407 /* ------------------------------------------------------------------------ */ 408 void nat_delnat(n) 409 ipnat_t *n; 410 { 411 if (n->in_mnext != NULL) 412 n->in_mnext->in_pmnext = n->in_pmnext; 413 *n->in_pmnext = n->in_mnext; 414 } 415 416 417 /* ------------------------------------------------------------------------ */ 418 /* Function: nat_hostmap */ 419 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 420 /* else a pointer to the hostmapping to use */ 421 /* Parameters: np(I) - pointer to NAT rule */ 422 /* real(I) - real IP address */ 423 /* map(I) - mapped IP address */ 424 /* port(I) - destination port number */ 425 /* Write Locks: ipf_nat */ 426 /* */ 427 /* Check if an ip address has already been allocated for a given mapping */ 428 /* that is not doing port based translation. If is not yet allocated, then */ 429 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 430 /* ------------------------------------------------------------------------ */ 431 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 432 ipnat_t *np; 433 struct in_addr src; 434 struct in_addr dst; 435 struct in_addr map; 436 u_32_t port; 437 ipf_stack_t *ifs; 438 { 439 hostmap_t *hm; 440 u_int hv; 441 442 hv = (src.s_addr ^ dst.s_addr); 443 hv += src.s_addr; 444 hv += dst.s_addr; 445 hv %= HOSTMAP_SIZE; 446 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 447 if ((hm->hm_srcip.s_addr == src.s_addr) && 448 (hm->hm_dstip.s_addr == dst.s_addr) && 449 ((np == NULL) || (np == hm->hm_ipnat)) && 450 ((port == 0) || (port == hm->hm_port))) { 451 hm->hm_ref++; 452 return hm; 453 } 454 455 if (np == NULL) 456 return NULL; 457 458 KMALLOC(hm, hostmap_t *); 459 if (hm) { 460 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 461 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 462 if (ifs->ifs_ipf_hm_maplist != NULL) 463 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 464 ifs->ifs_ipf_hm_maplist = hm; 465 466 hm->hm_next = ifs->ifs_maptable[hv]; 467 hm->hm_pnext = ifs->ifs_maptable + hv; 468 if (ifs->ifs_maptable[hv] != NULL) 469 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 470 ifs->ifs_maptable[hv] = hm; 471 hm->hm_ipnat = np; 472 hm->hm_srcip = src; 473 hm->hm_dstip = dst; 474 hm->hm_mapip = map; 475 hm->hm_ref = 1; 476 hm->hm_port = port; 477 hm->hm_v = 4; 478 } 479 return hm; 480 } 481 482 483 /* ------------------------------------------------------------------------ */ 484 /* Function: fr_hostmapdel */ 485 /* Returns: Nil */ 486 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 487 /* Write Locks: ipf_nat */ 488 /* */ 489 /* Decrement the references to this hostmap structure by one. If this */ 490 /* reaches zero then remove it and free it. */ 491 /* ------------------------------------------------------------------------ */ 492 void fr_hostmapdel(hmp) 493 struct hostmap **hmp; 494 { 495 struct hostmap *hm; 496 497 hm = *hmp; 498 *hmp = NULL; 499 500 hm->hm_ref--; 501 if (hm->hm_ref == 0) { 502 if (hm->hm_next) 503 hm->hm_next->hm_pnext = hm->hm_pnext; 504 *hm->hm_pnext = hm->hm_next; 505 if (hm->hm_hnext) 506 hm->hm_hnext->hm_phnext = hm->hm_phnext; 507 *hm->hm_phnext = hm->hm_hnext; 508 KFREE(hm); 509 } 510 } 511 512 513 /* ------------------------------------------------------------------------ */ 514 /* Function: fix_outcksum */ 515 /* Returns: Nil */ 516 /* Parameters: sp(I) - location of 16bit checksum to update */ 517 /* n((I) - amount to adjust checksum by */ 518 /* */ 519 /* Adjusts the 16bit checksum by "n" for packets going out. */ 520 /* ------------------------------------------------------------------------ */ 521 void fix_outcksum(sp, n) 522 u_short *sp; 523 u_32_t n; 524 { 525 u_short sumshort; 526 u_32_t sum1; 527 528 if (n == 0) 529 return; 530 531 sum1 = (~ntohs(*sp)) & 0xffff; 532 sum1 += (n); 533 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 534 /* Again */ 535 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 536 sumshort = ~(u_short)sum1; 537 *(sp) = htons(sumshort); 538 } 539 540 541 /* ------------------------------------------------------------------------ */ 542 /* Function: fix_incksum */ 543 /* Returns: Nil */ 544 /* Parameters: sp(I) - location of 16bit checksum to update */ 545 /* n((I) - amount to adjust checksum by */ 546 /* */ 547 /* Adjusts the 16bit checksum by "n" for packets going in. */ 548 /* ------------------------------------------------------------------------ */ 549 void fix_incksum(sp, n) 550 u_short *sp; 551 u_32_t n; 552 { 553 u_short sumshort; 554 u_32_t sum1; 555 556 if (n == 0) 557 return; 558 559 sum1 = (~ntohs(*sp)) & 0xffff; 560 sum1 += ~(n) & 0xffff; 561 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 562 /* Again */ 563 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 564 sumshort = ~(u_short)sum1; 565 *(sp) = htons(sumshort); 566 } 567 568 569 /* ------------------------------------------------------------------------ */ 570 /* Function: fix_datacksum */ 571 /* Returns: Nil */ 572 /* Parameters: sp(I) - location of 16bit checksum to update */ 573 /* n((I) - amount to adjust checksum by */ 574 /* */ 575 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 576 /* data section of an IP packet. */ 577 /* */ 578 /* The only situation in which you need to do this is when NAT'ing an */ 579 /* ICMP error message. Such a message, contains in its body the IP header */ 580 /* of the original IP packet, that causes the error. */ 581 /* */ 582 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 583 /* kernel the data section of the ICMP error is just data, and no special */ 584 /* processing like hardware cksum or ntohs processing have been done by the */ 585 /* kernel on the data section. */ 586 /* ------------------------------------------------------------------------ */ 587 void fix_datacksum(sp, n) 588 u_short *sp; 589 u_32_t n; 590 { 591 u_short sumshort; 592 u_32_t sum1; 593 594 if (n == 0) 595 return; 596 597 sum1 = (~ntohs(*sp)) & 0xffff; 598 sum1 += (n); 599 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 600 /* Again */ 601 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 602 sumshort = ~(u_short)sum1; 603 *(sp) = htons(sumshort); 604 } 605 606 607 /* ------------------------------------------------------------------------ */ 608 /* Function: fr_nat_ioctl */ 609 /* Returns: int - 0 == success, != 0 == failure */ 610 /* Parameters: data(I) - pointer to ioctl data */ 611 /* cmd(I) - ioctl command integer */ 612 /* mode(I) - file mode bits used with open */ 613 /* */ 614 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 615 /* ------------------------------------------------------------------------ */ 616 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 617 ioctlcmd_t cmd; 618 caddr_t data; 619 int mode, uid; 620 void *ctx; 621 ipf_stack_t *ifs; 622 { 623 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 624 int error = 0, ret, arg, getlock; 625 ipnat_t natd; 626 627 #if (BSD >= 199306) && defined(_KERNEL) 628 if ((securelevel >= 2) && (mode & FWRITE)) 629 return EPERM; 630 #endif 631 632 #if defined(__osf__) && defined(_KERNEL) 633 getlock = 0; 634 #else 635 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 636 #endif 637 638 nat = NULL; /* XXX gcc -Wuninitialized */ 639 if (cmd == (ioctlcmd_t)SIOCADNAT) { 640 KMALLOC(nt, ipnat_t *); 641 } else { 642 nt = NULL; 643 } 644 645 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 646 if (mode & NAT_SYSSPACE) { 647 bcopy(data, (char *)&natd, sizeof(natd)); 648 error = 0; 649 } else { 650 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 651 } 652 653 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 654 BCOPYIN(data, &arg, sizeof(arg)); 655 } 656 657 if (error != 0) 658 goto done; 659 660 /* 661 * For add/delete, look to see if the NAT entry is already present 662 */ 663 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 664 nat = &natd; 665 if (nat->in_v == 0) /* For backward compat. */ 666 nat->in_v = 4; 667 nat->in_flags &= IPN_USERFLAGS; 668 if ((nat->in_redir & NAT_MAPBLK) == 0) { 669 if ((nat->in_flags & IPN_SPLIT) == 0) 670 nat->in_inip &= nat->in_inmsk; 671 if ((nat->in_flags & IPN_IPRANGE) == 0) 672 nat->in_outip &= nat->in_outmsk; 673 } 674 MUTEX_ENTER(&ifs->ifs_ipf_natio); 675 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 676 np = &n->in_next) 677 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 678 IPN_CMPSIZ) == 0) { 679 if (nat->in_redir == NAT_REDIRECT && 680 nat->in_pnext != n->in_pnext) 681 continue; 682 break; 683 } 684 } 685 686 switch (cmd) 687 { 688 case SIOCGENITER : 689 { 690 ipfgeniter_t iter; 691 ipftoken_t *token; 692 693 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 694 if (error != 0) 695 break; 696 697 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 698 if (token != NULL) 699 error = nat_iterator(token, &iter, ifs); 700 else 701 error = ESRCH; 702 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 703 break; 704 } 705 #ifdef IPFILTER_LOG 706 case SIOCIPFFB : 707 { 708 int tmp; 709 710 if (!(mode & FWRITE)) 711 error = EPERM; 712 else { 713 tmp = ipflog_clear(IPL_LOGNAT, ifs); 714 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 715 } 716 break; 717 } 718 case SIOCSETLG : 719 if (!(mode & FWRITE)) 720 error = EPERM; 721 else { 722 BCOPYIN((char *)data, 723 (char *)&ifs->ifs_nat_logging, 724 sizeof(ifs->ifs_nat_logging)); 725 } 726 break; 727 case SIOCGETLG : 728 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 729 sizeof(ifs->ifs_nat_logging)); 730 break; 731 case FIONREAD : 732 arg = ifs->ifs_iplused[IPL_LOGNAT]; 733 BCOPYOUT(&arg, data, sizeof(arg)); 734 break; 735 #endif 736 case SIOCADNAT : 737 if (!(mode & FWRITE)) { 738 error = EPERM; 739 } else if (n != NULL) { 740 error = EEXIST; 741 } else if (nt == NULL) { 742 error = ENOMEM; 743 } 744 if (error != 0) { 745 MUTEX_EXIT(&ifs->ifs_ipf_natio); 746 break; 747 } 748 bcopy((char *)nat, (char *)nt, sizeof(*n)); 749 error = nat_siocaddnat(nt, np, getlock, ifs); 750 MUTEX_EXIT(&ifs->ifs_ipf_natio); 751 if (error == 0) 752 nt = NULL; 753 break; 754 case SIOCRMNAT : 755 if (!(mode & FWRITE)) { 756 error = EPERM; 757 n = NULL; 758 } else if (n == NULL) { 759 error = ESRCH; 760 } 761 762 if (error != 0) { 763 MUTEX_EXIT(&ifs->ifs_ipf_natio); 764 break; 765 } 766 nat_siocdelnat(n, np, getlock, ifs); 767 768 MUTEX_EXIT(&ifs->ifs_ipf_natio); 769 n = NULL; 770 break; 771 case SIOCGNATS : 772 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 773 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 774 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 775 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 776 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 777 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 778 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 779 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 780 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 781 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 782 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 783 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 784 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 785 break; 786 case SIOCGNATL : 787 { 788 natlookup_t nl; 789 790 if (getlock) { 791 READ_ENTER(&ifs->ifs_ipf_nat); 792 } 793 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 794 if (nl.nl_v != 6) 795 nl.nl_v = 4; 796 if (error == 0) { 797 void *ptr; 798 799 switch (nl.nl_v) 800 { 801 case 4: 802 ptr = nat_lookupredir(&nl, ifs); 803 break; 804 #ifdef USE_INET6 805 case 6: 806 ptr = nat6_lookupredir(&nl, ifs); 807 break; 808 #endif 809 default: 810 ptr = NULL; 811 break; 812 } 813 814 if (ptr != NULL) { 815 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 816 } else { 817 error = ESRCH; 818 } 819 } 820 if (getlock) { 821 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 822 } 823 break; 824 } 825 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 826 if (!(mode & FWRITE)) { 827 error = EPERM; 828 break; 829 } 830 if (getlock) { 831 WRITE_ENTER(&ifs->ifs_ipf_nat); 832 } 833 error = 0; 834 if (arg == 0) 835 ret = nat_flushtable(ifs); 836 else if (arg == 1) 837 ret = nat_clearlist(ifs); 838 else if (arg >= 2 && arg <= 4) 839 ret = nat_extraflush(arg - 2, ifs); 840 else 841 error = EINVAL; 842 if (getlock) { 843 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 844 } 845 if (error == 0) { 846 BCOPYOUT(&ret, data, sizeof(ret)); 847 } 848 break; 849 case SIOCPROXY : 850 error = appr_ioctl(data, cmd, mode, ifs); 851 break; 852 case SIOCSTLCK : 853 if (!(mode & FWRITE)) { 854 error = EPERM; 855 } else { 856 fr_lock(data, &ifs->ifs_fr_nat_lock); 857 } 858 break; 859 case SIOCSTPUT : 860 if ((mode & FWRITE) != 0) { 861 error = fr_natputent(data, getlock, ifs); 862 } else { 863 error = EACCES; 864 } 865 break; 866 case SIOCSTGSZ : 867 if (ifs->ifs_fr_nat_lock) { 868 if (getlock) { 869 READ_ENTER(&ifs->ifs_ipf_nat); 870 } 871 error = fr_natgetsz(data, ifs); 872 if (getlock) { 873 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 874 } 875 } else 876 error = EACCES; 877 break; 878 case SIOCSTGET : 879 if (ifs->ifs_fr_nat_lock) { 880 if (getlock) { 881 READ_ENTER(&ifs->ifs_ipf_nat); 882 } 883 error = fr_natgetent(data, ifs); 884 if (getlock) { 885 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 886 } 887 } else 888 error = EACCES; 889 break; 890 case SIOCIPFDELTOK : 891 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 892 error = ipf_deltoken(arg, uid, ctx, ifs); 893 break; 894 default : 895 error = EINVAL; 896 break; 897 } 898 done: 899 if (nt) 900 KFREE(nt); 901 return error; 902 } 903 904 905 /* ------------------------------------------------------------------------ */ 906 /* Function: nat_siocaddnat */ 907 /* Returns: int - 0 == success, != 0 == failure */ 908 /* Parameters: n(I) - pointer to new NAT rule */ 909 /* np(I) - pointer to where to insert new NAT rule */ 910 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 911 /* Mutex Locks: ipf_natio */ 912 /* */ 913 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 914 /* from information passed to the kernel, then add it to the appropriate */ 915 /* NAT rule table(s). */ 916 /* ------------------------------------------------------------------------ */ 917 static int nat_siocaddnat(n, np, getlock, ifs) 918 ipnat_t *n, **np; 919 int getlock; 920 ipf_stack_t *ifs; 921 { 922 int error = 0, i, j; 923 924 if (nat_resolverule(n, ifs) != 0) 925 return ENOENT; 926 927 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 928 return EINVAL; 929 930 n->in_use = 0; 931 if (n->in_redir & NAT_MAPBLK) 932 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 933 else if (n->in_flags & IPN_AUTOPORTMAP) 934 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 935 else if (n->in_flags & IPN_IPRANGE) 936 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 937 else if (n->in_flags & IPN_SPLIT) 938 n->in_space = 2; 939 else if (n->in_outmsk != 0) 940 n->in_space = ~ntohl(n->in_outmsk); 941 else 942 n->in_space = 1; 943 944 /* 945 * Calculate the number of valid IP addresses in the output 946 * mapping range. In all cases, the range is inclusive of 947 * the start and ending IP addresses. 948 * If to a CIDR address, lose 2: broadcast + network address 949 * (so subtract 1) 950 * If to a range, add one. 951 * If to a single IP address, set to 1. 952 */ 953 if (n->in_space) { 954 if ((n->in_flags & IPN_IPRANGE) != 0) 955 n->in_space += 1; 956 else 957 n->in_space -= 1; 958 } else 959 n->in_space = 1; 960 961 #ifdef USE_INET6 962 if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 && 963 !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1])) 964 IP6_ADD(&n->in_out[0], 1, &n->in_next6) 965 else if (n->in_v == 6 && 966 (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT)) 967 n->in_next6 = n->in_in[0]; 968 else if (n->in_v == 6) 969 n->in_next6 = n->in_out[0]; 970 else 971 #endif 972 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 973 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 974 n->in_nip = ntohl(n->in_outip) + 1; 975 else if ((n->in_flags & IPN_SPLIT) && 976 (n->in_redir & NAT_REDIRECT)) 977 n->in_nip = ntohl(n->in_inip); 978 else 979 n->in_nip = ntohl(n->in_outip); 980 981 if (n->in_redir & NAT_MAP) { 982 n->in_pnext = ntohs(n->in_pmin); 983 /* 984 * Multiply by the number of ports made available. 985 */ 986 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 987 n->in_space *= (ntohs(n->in_pmax) - 988 ntohs(n->in_pmin) + 1); 989 /* 990 * Because two different sources can map to 991 * different destinations but use the same 992 * local IP#/port #. 993 * If the result is smaller than in_space, then 994 * we may have wrapped around 32bits. 995 */ 996 i = n->in_inmsk; 997 if ((i != 0) && (i != 0xffffffff)) { 998 j = n->in_space * (~ntohl(i) + 1); 999 if (j >= n->in_space) 1000 n->in_space = j; 1001 else 1002 n->in_space = 0xffffffff; 1003 } 1004 } 1005 /* 1006 * If no protocol is specified, multiple by 256 to allow for 1007 * at least one IP:IP mapping per protocol. 1008 */ 1009 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 1010 j = n->in_space * 256; 1011 if (j >= n->in_space) 1012 n->in_space = j; 1013 else 1014 n->in_space = 0xffffffff; 1015 } 1016 } 1017 1018 /* Otherwise, these fields are preset */ 1019 1020 if (getlock) { 1021 WRITE_ENTER(&ifs->ifs_ipf_nat); 1022 } 1023 n->in_next = NULL; 1024 *np = n; 1025 1026 if (n->in_age[0] != 0) 1027 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1028 n->in_age[0], ifs); 1029 1030 if (n->in_age[1] != 0) 1031 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1032 n->in_age[1], ifs); 1033 1034 if (n->in_redir & NAT_REDIRECT) { 1035 n->in_flags &= ~IPN_NOTDST; 1036 switch (n->in_v) 1037 { 1038 case 4 : 1039 nat_addrdr(n, ifs); 1040 break; 1041 #ifdef USE_INET6 1042 case 6 : 1043 nat6_addrdr(n, ifs); 1044 break; 1045 #endif 1046 default : 1047 break; 1048 } 1049 } 1050 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1051 n->in_flags &= ~IPN_NOTSRC; 1052 switch (n->in_v) 1053 { 1054 case 4 : 1055 nat_addnat(n, ifs); 1056 break; 1057 #ifdef USE_INET6 1058 case 6 : 1059 nat6_addnat(n, ifs); 1060 break; 1061 #endif 1062 default : 1063 break; 1064 } 1065 } 1066 n = NULL; 1067 ifs->ifs_nat_stats.ns_rules++; 1068 if (getlock) { 1069 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1070 } 1071 1072 return error; 1073 } 1074 1075 1076 /* ------------------------------------------------------------------------ */ 1077 /* Function: nat_resolvrule */ 1078 /* Returns: int - 0 == success, -1 == failure */ 1079 /* Parameters: n(I) - pointer to NAT rule */ 1080 /* */ 1081 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1082 /* any specified interfaces and proxy labels, and determines whether or not */ 1083 /* all proxy labels are correctly specified. */ 1084 /* */ 1085 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1086 /* ------------------------------------------------------------------------ */ 1087 static int nat_resolverule(n, ifs) 1088 ipnat_t *n; 1089 ipf_stack_t *ifs; 1090 { 1091 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1092 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs); 1093 1094 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1095 if (n->in_ifnames[1][0] == '\0') { 1096 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1097 n->in_ifps[1] = n->in_ifps[0]; 1098 } else { 1099 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs); 1100 } 1101 1102 if (n->in_plabel[0] != '\0') { 1103 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1104 if (n->in_apr == NULL) 1105 return -1; 1106 } 1107 return 0; 1108 } 1109 1110 1111 /* ------------------------------------------------------------------------ */ 1112 /* Function: nat_siocdelnat */ 1113 /* Returns: int - 0 == success, != 0 == failure */ 1114 /* Parameters: n(I) - pointer to new NAT rule */ 1115 /* np(I) - pointer to where to insert new NAT rule */ 1116 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1117 /* Mutex Locks: ipf_natio */ 1118 /* */ 1119 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1120 /* from information passed to the kernel, then add it to the appropriate */ 1121 /* NAT rule table(s). */ 1122 /* ------------------------------------------------------------------------ */ 1123 static void nat_siocdelnat(n, np, getlock, ifs) 1124 ipnat_t *n, **np; 1125 int getlock; 1126 ipf_stack_t *ifs; 1127 { 1128 int i; 1129 1130 if (getlock) { 1131 WRITE_ENTER(&ifs->ifs_ipf_nat); 1132 } 1133 if (n->in_redir & NAT_REDIRECT) 1134 nat_delrdr(n); 1135 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1136 nat_delnat(n); 1137 if (ifs->ifs_nat_list == NULL) { 1138 ifs->ifs_nat_masks = 0; 1139 ifs->ifs_rdr_masks = 0; 1140 for (i = 0; i < 4; i++) { 1141 ifs->ifs_nat6_masks[i] = 0; 1142 ifs->ifs_rdr6_masks[i] = 0; 1143 } 1144 } 1145 1146 if (n->in_tqehead[0] != NULL) { 1147 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1148 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1149 } 1150 } 1151 1152 if (n->in_tqehead[1] != NULL) { 1153 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1154 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1155 } 1156 } 1157 1158 *np = n->in_next; 1159 1160 if (n->in_use == 0) { 1161 if (n->in_apr) 1162 appr_free(n->in_apr); 1163 KFREE(n); 1164 ifs->ifs_nat_stats.ns_rules--; 1165 } else { 1166 n->in_flags |= IPN_DELETE; 1167 n->in_next = NULL; 1168 } 1169 if (getlock) { 1170 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1171 } 1172 } 1173 1174 1175 /* ------------------------------------------------------------------------ */ 1176 /* Function: fr_natgetsz */ 1177 /* Returns: int - 0 == success, != 0 is the error value. */ 1178 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1179 /* get the size of. */ 1180 /* */ 1181 /* Handle SIOCSTGSZ. */ 1182 /* Return the size of the nat list entry to be copied back to user space. */ 1183 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1184 /* structure is copied back to the user. */ 1185 /* ------------------------------------------------------------------------ */ 1186 static int fr_natgetsz(data, ifs) 1187 caddr_t data; 1188 ipf_stack_t *ifs; 1189 { 1190 ap_session_t *aps; 1191 nat_t *nat, *n; 1192 natget_t ng; 1193 1194 BCOPYIN(data, &ng, sizeof(ng)); 1195 1196 nat = ng.ng_ptr; 1197 if (!nat) { 1198 nat = ifs->ifs_nat_instances; 1199 ng.ng_sz = 0; 1200 /* 1201 * Empty list so the size returned is 0. Simple. 1202 */ 1203 if (nat == NULL) { 1204 BCOPYOUT(&ng, data, sizeof(ng)); 1205 return 0; 1206 } 1207 } else { 1208 /* 1209 * Make sure the pointer we're copying from exists in the 1210 * current list of entries. Security precaution to prevent 1211 * copying of random kernel data. 1212 */ 1213 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1214 if (n == nat) 1215 break; 1216 if (!n) 1217 return ESRCH; 1218 } 1219 1220 /* 1221 * Incluse any space required for proxy data structures. 1222 */ 1223 ng.ng_sz = sizeof(nat_save_t); 1224 aps = nat->nat_aps; 1225 if (aps != NULL) { 1226 ng.ng_sz += sizeof(ap_session_t) - 4; 1227 if (aps->aps_data != 0) 1228 ng.ng_sz += aps->aps_psiz; 1229 } 1230 1231 BCOPYOUT(&ng, data, sizeof(ng)); 1232 return 0; 1233 } 1234 1235 1236 /* ------------------------------------------------------------------------ */ 1237 /* Function: fr_natgetent */ 1238 /* Returns: int - 0 == success, != 0 is the error value. */ 1239 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1240 /* to NAT structure to copy out. */ 1241 /* */ 1242 /* Handle SIOCSTGET. */ 1243 /* Copies out NAT entry to user space. Any additional data held for a */ 1244 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1245 /* ------------------------------------------------------------------------ */ 1246 static int fr_natgetent(data, ifs) 1247 caddr_t data; 1248 ipf_stack_t *ifs; 1249 { 1250 int error, outsize; 1251 ap_session_t *aps; 1252 nat_save_t *ipn, ipns; 1253 nat_t *n, *nat; 1254 1255 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1256 if (error != 0) 1257 return error; 1258 1259 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1260 return EINVAL; 1261 1262 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1263 if (ipn == NULL) 1264 return ENOMEM; 1265 1266 ipn->ipn_dsize = ipns.ipn_dsize; 1267 nat = ipns.ipn_next; 1268 if (nat == NULL) { 1269 nat = ifs->ifs_nat_instances; 1270 if (nat == NULL) { 1271 if (ifs->ifs_nat_instances == NULL) 1272 error = ENOENT; 1273 goto finished; 1274 } 1275 } else { 1276 /* 1277 * Make sure the pointer we're copying from exists in the 1278 * current list of entries. Security precaution to prevent 1279 * copying of random kernel data. 1280 */ 1281 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1282 if (n == nat) 1283 break; 1284 if (n == NULL) { 1285 error = ESRCH; 1286 goto finished; 1287 } 1288 } 1289 ipn->ipn_next = nat->nat_next; 1290 1291 /* 1292 * Copy the NAT structure. 1293 */ 1294 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1295 1296 /* 1297 * If we have a pointer to the NAT rule it belongs to, save that too. 1298 */ 1299 if (nat->nat_ptr != NULL) 1300 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1301 sizeof(ipn->ipn_ipnat)); 1302 1303 /* 1304 * If we also know the NAT entry has an associated filter rule, 1305 * save that too. 1306 */ 1307 if (nat->nat_fr != NULL) 1308 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1309 sizeof(ipn->ipn_fr)); 1310 1311 /* 1312 * Last but not least, if there is an application proxy session set 1313 * up for this NAT entry, then copy that out too, including any 1314 * private data saved along side it by the proxy. 1315 */ 1316 aps = nat->nat_aps; 1317 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1318 if (aps != NULL) { 1319 char *s; 1320 1321 if (outsize < sizeof(*aps)) { 1322 error = ENOBUFS; 1323 goto finished; 1324 } 1325 1326 s = ipn->ipn_data; 1327 bcopy((char *)aps, s, sizeof(*aps)); 1328 s += sizeof(*aps); 1329 outsize -= sizeof(*aps); 1330 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1331 bcopy(aps->aps_data, s, aps->aps_psiz); 1332 else 1333 error = ENOBUFS; 1334 } 1335 if (error == 0) { 1336 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1337 } 1338 1339 finished: 1340 if (ipn != NULL) { 1341 KFREES(ipn, ipns.ipn_dsize); 1342 } 1343 return error; 1344 } 1345 1346 /* ------------------------------------------------------------------------ */ 1347 /* Function: nat_calc_chksum_diffs */ 1348 /* Returns: void */ 1349 /* Parameters: nat - pointer to NAT table entry */ 1350 /* */ 1351 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */ 1352 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */ 1353 /* we are dealing with partial chksum offload. For these cases we need to */ 1354 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */ 1355 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */ 1356 /* nat_sumd[0]. */ 1357 /* */ 1358 /* The function accepts initialized NAT table entry and computes the deltas */ 1359 /* from nat_inip/nat_outip members. The function is called right before */ 1360 /* the new entry is inserted into the table. */ 1361 /* */ 1362 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */ 1363 /* of delta between original and new IP addresses. */ 1364 /* */ 1365 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */ 1366 /* a chkusm of delta between original an new IP addrress:port tupples. */ 1367 /* */ 1368 /* Some facts about chksum, we should remember: */ 1369 /* IP header chksum covers IP header only */ 1370 /* */ 1371 /* TCP/UDP chksum covers data payload and so called pseudo header */ 1372 /* SRC, DST IP address */ 1373 /* SRC, DST Port */ 1374 /* length of payload */ 1375 /* */ 1376 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */ 1377 /* member of dblk_t structure. The db_ckusm16 member is not part of */ 1378 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */ 1379 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */ 1380 /* stored along with other IP packet data in dblk_t structure and used in */ 1381 /* for IP/UDP/TCP chksum validation later in ip.c. */ 1382 /* */ 1383 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */ 1384 /* of delta between new and orig address. NOTE: the order of operands for */ 1385 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */ 1386 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */ 1387 /* */ 1388 /* ------------------------------------------------------------------------ */ 1389 void nat_calc_chksum_diffs(nat) 1390 nat_t *nat; 1391 { 1392 u_32_t sum_orig = 0; 1393 u_32_t sum_changed = 0; 1394 u_32_t sumd; 1395 u_32_t ipsum_orig = 0; 1396 u_32_t ipsum_changed = 0; 1397 1398 if (nat->nat_v != 4 && nat->nat_v != 6) 1399 return; 1400 1401 /* 1402 * the switch calculates operands for CALC_SUMD(), 1403 * which will compute the partial chksum delta. 1404 */ 1405 switch (nat->nat_dir) 1406 { 1407 case NAT_INBOUND: 1408 /* 1409 * we are dealing with RDR rule (DST address gets 1410 * modified on packet from client) 1411 */ 1412 if (nat->nat_v == 4) { 1413 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1414 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1415 } else { 1416 sum_changed = LONG_SUM6(&nat->nat_inip6); 1417 sum_orig = LONG_SUM6(&nat->nat_outip6); 1418 } 1419 break; 1420 case NAT_OUTBOUND: 1421 /* 1422 * we are dealing with MAP rule (SRC address gets 1423 * modified on packet from client) 1424 */ 1425 if (nat->nat_v == 4) { 1426 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1427 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1428 } else { 1429 sum_changed = LONG_SUM6(&nat->nat_outip6); 1430 sum_orig = LONG_SUM6(&nat->nat_inip6); 1431 } 1432 break; 1433 default: ; 1434 break; 1435 } 1436 1437 /* 1438 * we also preserve CALC_SUMD() operands here, for IP chksum delta 1439 * calculation, which happens at the end of function. 1440 */ 1441 ipsum_changed = sum_changed; 1442 ipsum_orig = sum_orig; 1443 /* 1444 * NOTE: the order of operands for partial chksum adjustment 1445 * computation has to be swapped! 1446 */ 1447 CALC_SUMD(sum_changed, sum_orig, sumd); 1448 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 1449 1450 if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) { 1451 1452 /* 1453 * switch calculates operands for CALC_SUMD(), which will 1454 * compute the full chksum delta. 1455 */ 1456 switch (nat->nat_dir) 1457 { 1458 case NAT_INBOUND: 1459 if (nat->nat_v == 4) { 1460 sum_changed = LONG_SUM( 1461 ntohl(nat->nat_inip.s_addr) + 1462 ntohs(nat->nat_inport)); 1463 sum_orig = LONG_SUM( 1464 ntohl(nat->nat_outip.s_addr) + 1465 ntohs(nat->nat_outport)); 1466 } else { 1467 sum_changed = LONG_SUM6(&nat->nat_inip6) + 1468 ntohs(nat->nat_inport); 1469 sum_orig = LONG_SUM6(&nat->nat_outip6) + 1470 ntohs(nat->nat_outport); 1471 } 1472 break; 1473 case NAT_OUTBOUND: 1474 if (nat->nat_v == 4) { 1475 sum_changed = LONG_SUM( 1476 ntohl(nat->nat_outip.s_addr) + 1477 ntohs(nat->nat_outport)); 1478 sum_orig = LONG_SUM( 1479 ntohl(nat->nat_inip.s_addr) + 1480 ntohs(nat->nat_inport)); 1481 } else { 1482 sum_changed = LONG_SUM6(&nat->nat_outip6) + 1483 ntohs(nat->nat_outport); 1484 sum_orig = LONG_SUM6(&nat->nat_inip6) + 1485 ntohs(nat->nat_inport); 1486 } 1487 break; 1488 default: ; 1489 break; 1490 } 1491 1492 CALC_SUMD(sum_orig, sum_changed, sumd); 1493 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 1494 1495 if (!(nat->nat_flags & IPN_TCPUDP)) { 1496 /* 1497 * partial HW chksum offload works for TCP/UDP headers only, 1498 * so we need to enforce full chksum adjustment for ICMP 1499 */ 1500 nat->nat_sumd[1] = nat->nat_sumd[0]; 1501 } 1502 } 1503 else 1504 nat->nat_sumd[0] = nat->nat_sumd[1]; 1505 1506 /* 1507 * we may reuse the already computed nat_sumd[0] for IP header chksum 1508 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT. 1509 */ 1510 if (nat->nat_v == 4) { 1511 if (NAT_HAS_L4_CHANGED(nat)) { 1512 /* 1513 * bad luck, NAT changes also the L4 header, use IP 1514 * addresses to compute chksum adjustment for IP header. 1515 */ 1516 CALC_SUMD(ipsum_orig, ipsum_changed, sumd); 1517 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 1518 } else { 1519 /* 1520 * the NAT does not change L4 hdr -> reuse chksum 1521 * adjustment for IP hdr. 1522 */ 1523 nat->nat_ipsumd = nat->nat_sumd[0]; 1524 1525 /* 1526 * if L4 header does not use chksum - zero out deltas 1527 */ 1528 if (!(nat->nat_flags & IPN_TCPUDP)) { 1529 nat->nat_sumd[0] = 0; 1530 nat->nat_sumd[1] = 0; 1531 } 1532 } 1533 } 1534 1535 return; 1536 } 1537 1538 /* ------------------------------------------------------------------------ */ 1539 /* Function: fr_natputent */ 1540 /* Returns: int - 0 == success, != 0 is the error value. */ 1541 /* Parameters: data(I) - pointer to natget structure with NAT */ 1542 /* structure information to load into the kernel */ 1543 /* getlock(I) - flag indicating whether or not a write lock */ 1544 /* on ipf_nat is already held. */ 1545 /* */ 1546 /* Handle SIOCSTPUT. */ 1547 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1548 /* firewall rule data structures, if pointers to them indicate so. */ 1549 /* ------------------------------------------------------------------------ */ 1550 static int fr_natputent(data, getlock, ifs) 1551 caddr_t data; 1552 int getlock; 1553 ipf_stack_t *ifs; 1554 { 1555 nat_save_t ipn, *ipnn; 1556 ap_session_t *aps; 1557 nat_t *n, *nat; 1558 frentry_t *fr; 1559 fr_info_t fin; 1560 ipnat_t *in; 1561 int error; 1562 1563 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1564 if (error != 0) 1565 return error; 1566 1567 /* 1568 * Trigger automatic call to nat_extraflush() if the 1569 * table has reached capcity specified by hi watermark. 1570 */ 1571 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1572 ifs->ifs_nat_doflush = 1; 1573 1574 /* 1575 * Initialise early because of code at junkput label. 1576 */ 1577 in = NULL; 1578 aps = NULL; 1579 nat = NULL; 1580 ipnn = NULL; 1581 1582 /* 1583 * New entry, copy in the rest of the NAT entry if it's size is more 1584 * than just the nat_t structure. 1585 */ 1586 fr = NULL; 1587 if (ipn.ipn_dsize > sizeof(ipn)) { 1588 if (ipn.ipn_dsize > 81920) { 1589 error = ENOMEM; 1590 goto junkput; 1591 } 1592 1593 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1594 if (ipnn == NULL) 1595 return ENOMEM; 1596 1597 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1598 if (error != 0) { 1599 error = EFAULT; 1600 goto junkput; 1601 } 1602 } else 1603 ipnn = &ipn; 1604 1605 KMALLOC(nat, nat_t *); 1606 if (nat == NULL) { 1607 error = ENOMEM; 1608 goto junkput; 1609 } 1610 1611 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1612 /* 1613 * Initialize all these so that nat_delete() doesn't cause a crash. 1614 */ 1615 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1616 nat->nat_tqe.tqe_pnext = NULL; 1617 nat->nat_tqe.tqe_next = NULL; 1618 nat->nat_tqe.tqe_ifq = NULL; 1619 nat->nat_tqe.tqe_parent = nat; 1620 1621 /* 1622 * Restore the rule associated with this nat session 1623 */ 1624 in = ipnn->ipn_nat.nat_ptr; 1625 if (in != NULL) { 1626 KMALLOC(in, ipnat_t *); 1627 nat->nat_ptr = in; 1628 if (in == NULL) { 1629 error = ENOMEM; 1630 goto junkput; 1631 } 1632 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1633 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1634 in->in_use = 1; 1635 in->in_flags |= IPN_DELETE; 1636 1637 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1638 1639 if (nat_resolverule(in, ifs) != 0) { 1640 error = ESRCH; 1641 goto junkput; 1642 } 1643 } 1644 1645 /* 1646 * Check that the NAT entry doesn't already exist in the kernel. 1647 */ 1648 if (nat->nat_v != 6) 1649 nat->nat_v = 4; 1650 bzero((char *)&fin, sizeof(fin)); 1651 fin.fin_p = nat->nat_p; 1652 fin.fin_ifs = ifs; 1653 if (nat->nat_dir == NAT_OUTBOUND) { 1654 fin.fin_data[0] = ntohs(nat->nat_oport); 1655 fin.fin_data[1] = ntohs(nat->nat_outport); 1656 fin.fin_ifp = nat->nat_ifps[0]; 1657 if (getlock) { 1658 READ_ENTER(&ifs->ifs_ipf_nat); 1659 } 1660 1661 switch (nat->nat_v) 1662 { 1663 case 4: 1664 fin.fin_v = nat->nat_v; 1665 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1666 nat->nat_oip, nat->nat_outip); 1667 break; 1668 #ifdef USE_INET6 1669 case 6: 1670 n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p, 1671 &nat->nat_oip6.in6, &nat->nat_outip6.in6); 1672 break; 1673 #endif 1674 default: 1675 n = NULL; 1676 break; 1677 } 1678 1679 if (getlock) { 1680 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1681 } 1682 if (n != NULL) { 1683 error = EEXIST; 1684 goto junkput; 1685 } 1686 } else if (nat->nat_dir == NAT_INBOUND) { 1687 fin.fin_data[0] = ntohs(nat->nat_inport); 1688 fin.fin_data[1] = ntohs(nat->nat_oport); 1689 fin.fin_ifp = nat->nat_ifps[1]; 1690 if (getlock) { 1691 READ_ENTER(&ifs->ifs_ipf_nat); 1692 } 1693 1694 switch (nat->nat_v) 1695 { 1696 case 4: 1697 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1698 nat->nat_inip, nat->nat_oip); 1699 break; 1700 #ifdef USE_INET6 1701 case 6: 1702 n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p, 1703 &nat->nat_inip6.in6, &nat->nat_oip6.in6); 1704 break; 1705 #endif 1706 default: 1707 n = NULL; 1708 break; 1709 } 1710 1711 if (getlock) { 1712 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1713 } 1714 if (n != NULL) { 1715 error = EEXIST; 1716 goto junkput; 1717 } 1718 } else { 1719 error = EINVAL; 1720 goto junkput; 1721 } 1722 1723 /* 1724 * Restore ap_session_t structure. Include the private data allocated 1725 * if it was there. 1726 */ 1727 aps = nat->nat_aps; 1728 if (aps != NULL) { 1729 KMALLOC(aps, ap_session_t *); 1730 nat->nat_aps = aps; 1731 if (aps == NULL) { 1732 error = ENOMEM; 1733 goto junkput; 1734 } 1735 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1736 if (in != NULL) 1737 aps->aps_apr = in->in_apr; 1738 else 1739 aps->aps_apr = NULL; 1740 if (aps->aps_psiz != 0) { 1741 if (aps->aps_psiz > 81920) { 1742 error = ENOMEM; 1743 goto junkput; 1744 } 1745 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1746 if (aps->aps_data == NULL) { 1747 error = ENOMEM; 1748 goto junkput; 1749 } 1750 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1751 aps->aps_psiz); 1752 } else { 1753 aps->aps_psiz = 0; 1754 aps->aps_data = NULL; 1755 } 1756 } 1757 1758 /* 1759 * If there was a filtering rule associated with this entry then 1760 * build up a new one. 1761 */ 1762 fr = nat->nat_fr; 1763 if (fr != NULL) { 1764 if ((nat->nat_flags & SI_NEWFR) != 0) { 1765 KMALLOC(fr, frentry_t *); 1766 nat->nat_fr = fr; 1767 if (fr == NULL) { 1768 error = ENOMEM; 1769 goto junkput; 1770 } 1771 ipnn->ipn_nat.nat_fr = fr; 1772 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1773 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1774 1775 fr->fr_ref = 1; 1776 fr->fr_dsize = 0; 1777 fr->fr_data = NULL; 1778 fr->fr_type = FR_T_NONE; 1779 1780 MUTEX_NUKE(&fr->fr_lock); 1781 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1782 } else { 1783 if (getlock) { 1784 READ_ENTER(&ifs->ifs_ipf_nat); 1785 } 1786 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1787 if (n->nat_fr == fr) 1788 break; 1789 1790 if (n != NULL) { 1791 MUTEX_ENTER(&fr->fr_lock); 1792 fr->fr_ref++; 1793 MUTEX_EXIT(&fr->fr_lock); 1794 } 1795 if (getlock) { 1796 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1797 } 1798 if (!n) { 1799 error = ESRCH; 1800 goto junkput; 1801 } 1802 } 1803 } 1804 1805 if (ipnn != &ipn) { 1806 KFREES(ipnn, ipn.ipn_dsize); 1807 ipnn = NULL; 1808 } 1809 1810 nat_calc_chksum_diffs(nat); 1811 1812 if (getlock) { 1813 WRITE_ENTER(&ifs->ifs_ipf_nat); 1814 } 1815 1816 nat_calc_chksum_diffs(nat); 1817 1818 switch (nat->nat_v) 1819 { 1820 case 4 : 1821 error = nat_insert(nat, nat->nat_rev, ifs); 1822 break; 1823 #ifdef USE_INET6 1824 case 6 : 1825 error = nat6_insert(nat, nat->nat_rev, ifs); 1826 break; 1827 #endif 1828 default : 1829 break; 1830 } 1831 1832 if ((error == 0) && (aps != NULL)) { 1833 aps->aps_next = ifs->ifs_ap_sess_list; 1834 ifs->ifs_ap_sess_list = aps; 1835 } 1836 if (getlock) { 1837 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1838 } 1839 1840 if (error == 0) 1841 return 0; 1842 1843 error = ENOMEM; 1844 1845 junkput: 1846 if (fr != NULL) 1847 (void) fr_derefrule(&fr, ifs); 1848 1849 if ((ipnn != NULL) && (ipnn != &ipn)) { 1850 KFREES(ipnn, ipn.ipn_dsize); 1851 } 1852 if (nat != NULL) { 1853 if (aps != NULL) { 1854 if (aps->aps_data != NULL) { 1855 KFREES(aps->aps_data, aps->aps_psiz); 1856 } 1857 KFREE(aps); 1858 } 1859 if (in != NULL) { 1860 if (in->in_apr) 1861 appr_free(in->in_apr); 1862 KFREE(in); 1863 } 1864 KFREE(nat); 1865 } 1866 return error; 1867 } 1868 1869 1870 /* ------------------------------------------------------------------------ */ 1871 /* Function: nat_delete */ 1872 /* Returns: Nil */ 1873 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1874 /* logtype(I) - type of LOG record to create before deleting */ 1875 /* Write Lock: ipf_nat */ 1876 /* */ 1877 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1878 /* enabled then generate a NAT log record for this event. */ 1879 /* ------------------------------------------------------------------------ */ 1880 static void nat_delete(nat, logtype, ifs) 1881 struct nat *nat; 1882 int logtype; 1883 ipf_stack_t *ifs; 1884 { 1885 struct ipnat *ipn; 1886 1887 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1888 nat_log(nat, logtype, ifs); 1889 1890 /* 1891 * Take it as a general indication that all the pointers are set if 1892 * nat_pnext is set. 1893 */ 1894 if (nat->nat_pnext != NULL) { 1895 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1896 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1897 1898 *nat->nat_pnext = nat->nat_next; 1899 if (nat->nat_next != NULL) { 1900 nat->nat_next->nat_pnext = nat->nat_pnext; 1901 nat->nat_next = NULL; 1902 } 1903 nat->nat_pnext = NULL; 1904 1905 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1906 if (nat->nat_hnext[0] != NULL) { 1907 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1908 nat->nat_hnext[0] = NULL; 1909 } 1910 nat->nat_phnext[0] = NULL; 1911 1912 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1913 if (nat->nat_hnext[1] != NULL) { 1914 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1915 nat->nat_hnext[1] = NULL; 1916 } 1917 nat->nat_phnext[1] = NULL; 1918 1919 if ((nat->nat_flags & SI_WILDP) != 0) 1920 ifs->ifs_nat_stats.ns_wilds--; 1921 } 1922 1923 if (nat->nat_me != NULL) { 1924 *nat->nat_me = NULL; 1925 nat->nat_me = NULL; 1926 } 1927 1928 fr_deletequeueentry(&nat->nat_tqe); 1929 1930 MUTEX_ENTER(&nat->nat_lock); 1931 if (nat->nat_ref > 1) { 1932 nat->nat_ref--; 1933 MUTEX_EXIT(&nat->nat_lock); 1934 return; 1935 } 1936 MUTEX_EXIT(&nat->nat_lock); 1937 1938 /* 1939 * At this point, nat_ref is 1, doing "--" would make it 0.. 1940 */ 1941 nat->nat_ref = 0; 1942 1943 #ifdef IPFILTER_SYNC 1944 if (nat->nat_sync) 1945 ipfsync_del(nat->nat_sync); 1946 #endif 1947 1948 if (nat->nat_fr != NULL) 1949 (void)fr_derefrule(&nat->nat_fr, ifs); 1950 1951 if (nat->nat_hm != NULL) 1952 fr_hostmapdel(&nat->nat_hm); 1953 1954 /* 1955 * If there is an active reference from the nat entry to its parent 1956 * rule, decrement the rule's reference count and free it too if no 1957 * longer being used. 1958 */ 1959 ipn = nat->nat_ptr; 1960 if (ipn != NULL) { 1961 ipn->in_space++; 1962 ipn->in_use--; 1963 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1964 if (ipn->in_apr) 1965 appr_free(ipn->in_apr); 1966 KFREE(ipn); 1967 ifs->ifs_nat_stats.ns_rules--; 1968 } 1969 } 1970 1971 MUTEX_DESTROY(&nat->nat_lock); 1972 1973 aps_free(nat->nat_aps, ifs); 1974 ifs->ifs_nat_stats.ns_inuse--; 1975 1976 /* 1977 * If there's a fragment table entry too for this nat entry, then 1978 * dereference that as well. This is after nat_lock is released 1979 * because of Tru64. 1980 */ 1981 fr_forgetnat((void *)nat, ifs); 1982 1983 KFREE(nat); 1984 } 1985 1986 1987 /* ------------------------------------------------------------------------ */ 1988 /* Function: nat_flushtable */ 1989 /* Returns: int - number of NAT rules deleted */ 1990 /* Parameters: Nil */ 1991 /* */ 1992 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1993 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1994 /* ------------------------------------------------------------------------ */ 1995 /* 1996 * nat_flushtable - clear the NAT table of all mapping entries. 1997 */ 1998 static int nat_flushtable(ifs) 1999 ipf_stack_t *ifs; 2000 { 2001 nat_t *nat; 2002 int j = 0; 2003 2004 /* 2005 * ALL NAT mappings deleted, so lets just make the deletions 2006 * quicker. 2007 */ 2008 if (ifs->ifs_nat_table[0] != NULL) 2009 bzero((char *)ifs->ifs_nat_table[0], 2010 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 2011 if (ifs->ifs_nat_table[1] != NULL) 2012 bzero((char *)ifs->ifs_nat_table[1], 2013 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 2014 2015 while ((nat = ifs->ifs_nat_instances) != NULL) { 2016 nat_delete(nat, NL_FLUSH, ifs); 2017 j++; 2018 } 2019 2020 return j; 2021 } 2022 2023 2024 /* ------------------------------------------------------------------------ */ 2025 /* Function: nat_clearlist */ 2026 /* Returns: int - number of NAT/RDR rules deleted */ 2027 /* Parameters: Nil */ 2028 /* */ 2029 /* Delete all rules in the current list of rules. There is nothing elegant */ 2030 /* about this cleanup: simply free all entries on the list of rules and */ 2031 /* clear out the tables used for hashed NAT rule lookups. */ 2032 /* ------------------------------------------------------------------------ */ 2033 static int nat_clearlist(ifs) 2034 ipf_stack_t *ifs; 2035 { 2036 ipnat_t *n, **np = &ifs->ifs_nat_list; 2037 int i = 0; 2038 2039 if (ifs->ifs_nat_rules != NULL) 2040 bzero((char *)ifs->ifs_nat_rules, 2041 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 2042 if (ifs->ifs_rdr_rules != NULL) 2043 bzero((char *)ifs->ifs_rdr_rules, 2044 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 2045 2046 while ((n = *np) != NULL) { 2047 *np = n->in_next; 2048 if (n->in_use == 0) { 2049 if (n->in_apr != NULL) 2050 appr_free(n->in_apr); 2051 KFREE(n); 2052 ifs->ifs_nat_stats.ns_rules--; 2053 } else { 2054 n->in_flags |= IPN_DELETE; 2055 n->in_next = NULL; 2056 } 2057 i++; 2058 } 2059 ifs->ifs_nat_masks = 0; 2060 ifs->ifs_rdr_masks = 0; 2061 for (i = 0; i < 4; i++) { 2062 ifs->ifs_nat6_masks[i] = 0; 2063 ifs->ifs_rdr6_masks[i] = 0; 2064 } 2065 return i; 2066 } 2067 2068 2069 /* ------------------------------------------------------------------------ */ 2070 /* Function: nat_newmap */ 2071 /* Returns: int - -1 == error, 0 == success */ 2072 /* Parameters: fin(I) - pointer to packet information */ 2073 /* nat(I) - pointer to NAT entry */ 2074 /* ni(I) - pointer to structure with misc. information needed */ 2075 /* to create new NAT entry. */ 2076 /* */ 2077 /* Given an empty NAT structure, populate it with new information about a */ 2078 /* new NAT session, as defined by the matching NAT rule. */ 2079 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2080 /* to the new IP address for the translation. */ 2081 /* ------------------------------------------------------------------------ */ 2082 static INLINE int nat_newmap(fin, nat, ni) 2083 fr_info_t *fin; 2084 nat_t *nat; 2085 natinfo_t *ni; 2086 { 2087 u_short st_port, dport, sport, port, sp, dp; 2088 struct in_addr in, inb; 2089 hostmap_t *hm; 2090 u_32_t flags; 2091 u_32_t st_ip; 2092 ipnat_t *np; 2093 nat_t *natl; 2094 int l; 2095 ipf_stack_t *ifs = fin->fin_ifs; 2096 2097 /* 2098 * If it's an outbound packet which doesn't match any existing 2099 * record, then create a new port 2100 */ 2101 l = 0; 2102 hm = NULL; 2103 np = ni->nai_np; 2104 st_ip = np->in_nip; 2105 st_port = np->in_pnext; 2106 flags = ni->nai_flags; 2107 sport = ni->nai_sport; 2108 dport = ni->nai_dport; 2109 2110 /* 2111 * Do a loop until we either run out of entries to try or we find 2112 * a NAT mapping that isn't currently being used. This is done 2113 * because the change to the source is not (usually) being fixed. 2114 */ 2115 do { 2116 port = 0; 2117 in.s_addr = htonl(np->in_nip); 2118 if (l == 0) { 2119 /* 2120 * Check to see if there is an existing NAT 2121 * setup for this IP address pair. 2122 */ 2123 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2124 in, 0, ifs); 2125 if (hm != NULL) 2126 in.s_addr = hm->hm_mapip.s_addr; 2127 } else if ((l == 1) && (hm != NULL)) { 2128 fr_hostmapdel(&hm); 2129 } 2130 in.s_addr = ntohl(in.s_addr); 2131 2132 nat->nat_hm = hm; 2133 2134 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 2135 if (l > 0) 2136 return -1; 2137 } 2138 2139 if (np->in_redir == NAT_BIMAP && 2140 np->in_inmsk == np->in_outmsk) { 2141 /* 2142 * map the address block in a 1:1 fashion 2143 */ 2144 in.s_addr = np->in_outip; 2145 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 2146 in.s_addr = ntohl(in.s_addr); 2147 2148 } else if (np->in_redir & NAT_MAPBLK) { 2149 if ((l >= np->in_ppip) || ((l > 0) && 2150 !(flags & IPN_TCPUDP))) 2151 return -1; 2152 /* 2153 * map-block - Calculate destination address. 2154 */ 2155 in.s_addr = ntohl(fin->fin_saddr); 2156 in.s_addr &= ntohl(~np->in_inmsk); 2157 inb.s_addr = in.s_addr; 2158 in.s_addr /= np->in_ippip; 2159 in.s_addr &= ntohl(~np->in_outmsk); 2160 in.s_addr += ntohl(np->in_outip); 2161 /* 2162 * Calculate destination port. 2163 */ 2164 if ((flags & IPN_TCPUDP) && 2165 (np->in_ppip != 0)) { 2166 port = ntohs(sport) + l; 2167 port %= np->in_ppip; 2168 port += np->in_ppip * 2169 (inb.s_addr % np->in_ippip); 2170 port += MAPBLK_MINPORT; 2171 port = htons(port); 2172 } 2173 2174 } else if ((np->in_outip == 0) && 2175 (np->in_outmsk == 0xffffffff)) { 2176 /* 2177 * 0/32 - use the interface's IP address. 2178 */ 2179 if ((l > 0) || 2180 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 2181 &in, NULL, fin->fin_ifs) == -1) 2182 return -1; 2183 in.s_addr = ntohl(in.s_addr); 2184 2185 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 2186 /* 2187 * 0/0 - use the original source address/port. 2188 */ 2189 if (l > 0) 2190 return -1; 2191 in.s_addr = ntohl(fin->fin_saddr); 2192 2193 } else if ((np->in_outmsk != 0xffffffff) && 2194 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 2195 np->in_nip++; 2196 2197 natl = NULL; 2198 2199 if ((flags & IPN_TCPUDP) && 2200 ((np->in_redir & NAT_MAPBLK) == 0) && 2201 (np->in_flags & IPN_AUTOPORTMAP)) { 2202 /* 2203 * "ports auto" (without map-block) 2204 */ 2205 if ((l > 0) && (l % np->in_ppip == 0)) { 2206 if (l > np->in_space) { 2207 return -1; 2208 } else if ((l > np->in_ppip) && 2209 np->in_outmsk != 0xffffffff) 2210 np->in_nip++; 2211 } 2212 if (np->in_ppip != 0) { 2213 port = ntohs(sport); 2214 port += (l % np->in_ppip); 2215 port %= np->in_ppip; 2216 port += np->in_ppip * 2217 (ntohl(fin->fin_saddr) % 2218 np->in_ippip); 2219 port += MAPBLK_MINPORT; 2220 port = htons(port); 2221 } 2222 2223 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 2224 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 2225 /* 2226 * Standard port translation. Select next port. 2227 */ 2228 port = htons(np->in_pnext++); 2229 2230 if (np->in_pnext > ntohs(np->in_pmax)) { 2231 np->in_pnext = ntohs(np->in_pmin); 2232 if (np->in_outmsk != 0xffffffff) 2233 np->in_nip++; 2234 } 2235 } 2236 2237 if (np->in_flags & IPN_IPRANGE) { 2238 if (np->in_nip > ntohl(np->in_outmsk)) 2239 np->in_nip = ntohl(np->in_outip); 2240 } else { 2241 if ((np->in_outmsk != 0xffffffff) && 2242 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 2243 ntohl(np->in_outip)) 2244 np->in_nip = ntohl(np->in_outip) + 1; 2245 } 2246 2247 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 2248 port = sport; 2249 2250 /* 2251 * Here we do a lookup of the connection as seen from 2252 * the outside. If an IP# pair already exists, try 2253 * again. So if you have A->B becomes C->B, you can 2254 * also have D->E become C->E but not D->B causing 2255 * another C->B. Also take protocol and ports into 2256 * account when determining whether a pre-existing 2257 * NAT setup will cause an external conflict where 2258 * this is appropriate. 2259 */ 2260 inb.s_addr = htonl(in.s_addr); 2261 sp = fin->fin_data[0]; 2262 dp = fin->fin_data[1]; 2263 fin->fin_data[0] = fin->fin_data[1]; 2264 fin->fin_data[1] = htons(port); 2265 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2266 (u_int)fin->fin_p, fin->fin_dst, inb); 2267 fin->fin_data[0] = sp; 2268 fin->fin_data[1] = dp; 2269 2270 /* 2271 * Has the search wrapped around and come back to the 2272 * start ? 2273 */ 2274 if ((natl != NULL) && 2275 (np->in_pnext != 0) && (st_port == np->in_pnext) && 2276 (np->in_nip != 0) && (st_ip == np->in_nip)) 2277 return -1; 2278 l++; 2279 } while (natl != NULL); 2280 2281 if (np->in_space > 0) 2282 np->in_space--; 2283 2284 /* Setup the NAT table */ 2285 nat->nat_inip = fin->fin_src; 2286 nat->nat_outip.s_addr = htonl(in.s_addr); 2287 nat->nat_oip = fin->fin_dst; 2288 if (nat->nat_hm == NULL) 2289 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2290 nat->nat_outip, 0, ifs); 2291 2292 if (flags & IPN_TCPUDP) { 2293 nat->nat_inport = sport; 2294 nat->nat_outport = port; /* sport */ 2295 nat->nat_oport = dport; 2296 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2297 } else if (flags & IPN_ICMPQUERY) { 2298 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2299 nat->nat_inport = port; 2300 nat->nat_outport = port; 2301 } 2302 2303 ni->nai_ip.s_addr = in.s_addr; 2304 ni->nai_port = port; 2305 ni->nai_nport = dport; 2306 return 0; 2307 } 2308 2309 2310 /* ------------------------------------------------------------------------ */ 2311 /* Function: nat_newrdr */ 2312 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2313 /* allow rule to be moved if IPN_ROUNDR is set. */ 2314 /* Parameters: fin(I) - pointer to packet information */ 2315 /* nat(I) - pointer to NAT entry */ 2316 /* ni(I) - pointer to structure with misc. information needed */ 2317 /* to create new NAT entry. */ 2318 /* */ 2319 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2320 /* to the new IP address for the translation. */ 2321 /* ------------------------------------------------------------------------ */ 2322 static INLINE int nat_newrdr(fin, nat, ni) 2323 fr_info_t *fin; 2324 nat_t *nat; 2325 natinfo_t *ni; 2326 { 2327 u_short nport, dport, sport; 2328 struct in_addr in, inb; 2329 u_short sp, dp; 2330 hostmap_t *hm; 2331 u_32_t flags; 2332 ipnat_t *np; 2333 nat_t *natl; 2334 int move; 2335 ipf_stack_t *ifs = fin->fin_ifs; 2336 2337 move = 1; 2338 hm = NULL; 2339 in.s_addr = 0; 2340 np = ni->nai_np; 2341 flags = ni->nai_flags; 2342 sport = ni->nai_sport; 2343 dport = ni->nai_dport; 2344 2345 /* 2346 * If the matching rule has IPN_STICKY set, then we want to have the 2347 * same rule kick in as before. Why would this happen? If you have 2348 * a collection of rdr rules with "round-robin sticky", the current 2349 * packet might match a different one to the previous connection but 2350 * we want the same destination to be used. 2351 */ 2352 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2353 (IPN_ROUNDR|IPN_STICKY)) { 2354 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2355 (u_32_t)dport, ifs); 2356 if (hm != NULL) { 2357 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2358 np = hm->hm_ipnat; 2359 ni->nai_np = np; 2360 move = 0; 2361 } 2362 } 2363 2364 /* 2365 * Otherwise, it's an inbound packet. Most likely, we don't 2366 * want to rewrite source ports and source addresses. Instead, 2367 * we want to rewrite to a fixed internal address and fixed 2368 * internal port. 2369 */ 2370 if (np->in_flags & IPN_SPLIT) { 2371 in.s_addr = np->in_nip; 2372 2373 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2374 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2375 in, (u_32_t)dport, ifs); 2376 if (hm != NULL) { 2377 in.s_addr = hm->hm_mapip.s_addr; 2378 move = 0; 2379 } 2380 } 2381 2382 if (hm == NULL || hm->hm_ref == 1) { 2383 if (np->in_inip == htonl(in.s_addr)) { 2384 np->in_nip = ntohl(np->in_inmsk); 2385 move = 0; 2386 } else { 2387 np->in_nip = ntohl(np->in_inip); 2388 } 2389 } 2390 2391 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2392 /* 2393 * 0/32 - use the interface's IP address. 2394 */ 2395 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2396 fin->fin_ifs) == -1) 2397 return -1; 2398 in.s_addr = ntohl(in.s_addr); 2399 2400 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2401 /* 2402 * 0/0 - use the original destination address/port. 2403 */ 2404 in.s_addr = ntohl(fin->fin_daddr); 2405 2406 } else if (np->in_redir == NAT_BIMAP && 2407 np->in_inmsk == np->in_outmsk) { 2408 /* 2409 * map the address block in a 1:1 fashion 2410 */ 2411 in.s_addr = np->in_inip; 2412 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2413 in.s_addr = ntohl(in.s_addr); 2414 } else { 2415 in.s_addr = ntohl(np->in_inip); 2416 } 2417 2418 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2419 nport = dport; 2420 else { 2421 /* 2422 * Whilst not optimized for the case where 2423 * pmin == pmax, the gain is not significant. 2424 */ 2425 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2426 (np->in_pmin != np->in_pmax)) { 2427 nport = ntohs(dport) - ntohs(np->in_pmin) + 2428 ntohs(np->in_pnext); 2429 nport = htons(nport); 2430 } else 2431 nport = np->in_pnext; 2432 } 2433 2434 /* 2435 * When the redirect-to address is set to 0.0.0.0, just 2436 * assume a blank `forwarding' of the packet. We don't 2437 * setup any translation for this either. 2438 */ 2439 if (in.s_addr == 0) { 2440 if (nport == dport) 2441 return -1; 2442 in.s_addr = ntohl(fin->fin_daddr); 2443 } 2444 2445 /* 2446 * Check to see if this redirect mapping already exists and if 2447 * it does, return "failure" (allowing it to be created will just 2448 * cause one or both of these "connections" to stop working.) 2449 */ 2450 inb.s_addr = htonl(in.s_addr); 2451 sp = fin->fin_data[0]; 2452 dp = fin->fin_data[1]; 2453 fin->fin_data[1] = fin->fin_data[0]; 2454 fin->fin_data[0] = ntohs(nport); 2455 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2456 (u_int)fin->fin_p, inb, fin->fin_src); 2457 fin->fin_data[0] = sp; 2458 fin->fin_data[1] = dp; 2459 if (natl != NULL) 2460 return (-1); 2461 2462 nat->nat_inip.s_addr = htonl(in.s_addr); 2463 nat->nat_outip = fin->fin_dst; 2464 nat->nat_oip = fin->fin_src; 2465 2466 ni->nai_ip.s_addr = in.s_addr; 2467 ni->nai_nport = nport; 2468 ni->nai_port = sport; 2469 2470 if (flags & IPN_TCPUDP) { 2471 nat->nat_inport = nport; 2472 nat->nat_outport = dport; 2473 nat->nat_oport = sport; 2474 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2475 } else if (flags & IPN_ICMPQUERY) { 2476 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2477 nat->nat_inport = nport; 2478 nat->nat_outport = nport; 2479 } 2480 2481 return move; 2482 } 2483 2484 /* ------------------------------------------------------------------------ */ 2485 /* Function: nat_new */ 2486 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2487 /* else pointer to new NAT structure */ 2488 /* Parameters: fin(I) - pointer to packet information */ 2489 /* np(I) - pointer to NAT rule */ 2490 /* natsave(I) - pointer to where to store NAT struct pointer */ 2491 /* flags(I) - flags describing the current packet */ 2492 /* direction(I) - direction of packet (in/out) */ 2493 /* Write Lock: ipf_nat */ 2494 /* */ 2495 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2496 /* in any way. */ 2497 /* */ 2498 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2499 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2500 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2501 /* and (3) building that structure and putting it into the NAT table(s). */ 2502 /* ------------------------------------------------------------------------ */ 2503 nat_t *nat_new(fin, np, natsave, flags, direction) 2504 fr_info_t *fin; 2505 ipnat_t *np; 2506 nat_t **natsave; 2507 u_int flags; 2508 int direction; 2509 { 2510 tcphdr_t *tcp = NULL; 2511 hostmap_t *hm = NULL; 2512 nat_t *nat, *natl; 2513 u_int nflags; 2514 natinfo_t ni; 2515 int move; 2516 ipf_stack_t *ifs = fin->fin_ifs; 2517 2518 /* 2519 * Trigger automatic call to nat_extraflush() if the 2520 * table has reached capcity specified by hi watermark. 2521 */ 2522 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2523 ifs->ifs_nat_doflush = 1; 2524 2525 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2526 ifs->ifs_nat_stats.ns_memfail++; 2527 return NULL; 2528 } 2529 2530 move = 1; 2531 nflags = np->in_flags & flags; 2532 nflags &= NAT_FROMRULE; 2533 2534 ni.nai_np = np; 2535 ni.nai_nflags = nflags; 2536 ni.nai_flags = flags; 2537 2538 /* Give me a new nat */ 2539 KMALLOC(nat, nat_t *); 2540 if (nat == NULL) { 2541 ifs->ifs_nat_stats.ns_memfail++; 2542 /* 2543 * Try to automatically tune the max # of entries in the 2544 * table allowed to be less than what will cause kmem_alloc() 2545 * to fail and try to eliminate panics due to out of memory 2546 * conditions arising. 2547 */ 2548 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2549 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2550 printf("ipf_nattable_max reduced to %d\n", 2551 ifs->ifs_ipf_nattable_max); 2552 } 2553 return NULL; 2554 } 2555 2556 if (flags & IPN_TCPUDP) { 2557 tcp = fin->fin_dp; 2558 ni.nai_sport = htons(fin->fin_sport); 2559 ni.nai_dport = htons(fin->fin_dport); 2560 } else if (flags & IPN_ICMPQUERY) { 2561 /* 2562 * In the ICMP query NAT code, we translate the ICMP id fields 2563 * to make them unique. This is indepedent of the ICMP type 2564 * (e.g. in the unlikely event that a host sends an echo and 2565 * an tstamp request with the same id, both packets will have 2566 * their ip address/id field changed in the same way). 2567 */ 2568 /* The icmp_id field is used by the sender to identify the 2569 * process making the icmp request. (the receiver justs 2570 * copies it back in its response). So, it closely matches 2571 * the concept of source port. We overlay sport, so we can 2572 * maximally reuse the existing code. 2573 */ 2574 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2575 ni.nai_dport = ni.nai_sport; 2576 } 2577 2578 bzero((char *)nat, sizeof(*nat)); 2579 nat->nat_flags = flags; 2580 nat->nat_redir = np->in_redir; 2581 2582 if ((flags & NAT_SLAVE) == 0) { 2583 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2584 } 2585 2586 /* 2587 * Search the current table for a match. 2588 */ 2589 if (direction == NAT_OUTBOUND) { 2590 /* 2591 * We can now arrange to call this for the same connection 2592 * because ipf_nat_new doesn't protect the code path into 2593 * this function. 2594 */ 2595 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2596 fin->fin_src, fin->fin_dst); 2597 if (natl != NULL) { 2598 KFREE(nat); 2599 nat = natl; 2600 goto done; 2601 } 2602 2603 move = nat_newmap(fin, nat, &ni); 2604 if (move == -1) 2605 goto badnat; 2606 2607 np = ni.nai_np; 2608 } else { 2609 /* 2610 * NAT_INBOUND is used only for redirects rules 2611 */ 2612 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2613 fin->fin_src, fin->fin_dst); 2614 if (natl != NULL) { 2615 KFREE(nat); 2616 nat = natl; 2617 goto done; 2618 } 2619 2620 move = nat_newrdr(fin, nat, &ni); 2621 if (move == -1) 2622 goto badnat; 2623 2624 np = ni.nai_np; 2625 } 2626 2627 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2628 if (np->in_redir == NAT_REDIRECT) { 2629 nat_delrdr(np); 2630 nat_addrdr(np, ifs); 2631 } else if (np->in_redir == NAT_MAP) { 2632 nat_delnat(np); 2633 nat_addnat(np, ifs); 2634 } 2635 } 2636 2637 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2638 goto badnat; 2639 } 2640 2641 nat_calc_chksum_diffs(nat); 2642 2643 if (flags & SI_WILDP) 2644 ifs->ifs_nat_stats.ns_wilds++; 2645 goto done; 2646 badnat: 2647 ifs->ifs_nat_stats.ns_badnat++; 2648 if ((hm = nat->nat_hm) != NULL) 2649 fr_hostmapdel(&hm); 2650 KFREE(nat); 2651 nat = NULL; 2652 done: 2653 if ((flags & NAT_SLAVE) == 0) { 2654 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2655 } 2656 return nat; 2657 } 2658 2659 2660 /* ------------------------------------------------------------------------ */ 2661 /* Function: nat_finalise */ 2662 /* Returns: int - 0 == sucess, -1 == failure */ 2663 /* Parameters: fin(I) - pointer to packet information */ 2664 /* nat(I) - pointer to NAT entry */ 2665 /* ni(I) - pointer to structure with misc. information needed */ 2666 /* to create new NAT entry. */ 2667 /* Write Lock: ipf_nat */ 2668 /* */ 2669 /* This is the tail end of constructing a new NAT entry and is the same */ 2670 /* for both IPv4 and IPv6. */ 2671 /* ------------------------------------------------------------------------ */ 2672 /*ARGSUSED*/ 2673 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2674 fr_info_t *fin; 2675 nat_t *nat; 2676 natinfo_t *ni; 2677 tcphdr_t *tcp; 2678 nat_t **natsave; 2679 int direction; 2680 { 2681 frentry_t *fr; 2682 ipnat_t *np; 2683 ipf_stack_t *ifs = fin->fin_ifs; 2684 2685 np = ni->nai_np; 2686 2687 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2688 2689 #ifdef IPFILTER_SYNC 2690 if ((nat->nat_flags & SI_CLONE) == 0) 2691 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2692 #endif 2693 2694 nat->nat_me = natsave; 2695 nat->nat_dir = direction; 2696 nat->nat_ifps[0] = np->in_ifps[0]; 2697 nat->nat_ifps[1] = np->in_ifps[1]; 2698 nat->nat_ptr = np; 2699 nat->nat_p = fin->fin_p; 2700 nat->nat_v = fin->fin_v; 2701 nat->nat_mssclamp = np->in_mssclamp; 2702 fr = fin->fin_fr; 2703 nat->nat_fr = fr; 2704 2705 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2706 if (appr_new(fin, nat) == -1) 2707 return -1; 2708 2709 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2710 if (ifs->ifs_nat_logging) 2711 nat_log(nat, (u_int)np->in_redir, ifs); 2712 np->in_use++; 2713 if (fr != NULL) { 2714 MUTEX_ENTER(&fr->fr_lock); 2715 fr->fr_ref++; 2716 MUTEX_EXIT(&fr->fr_lock); 2717 } 2718 return 0; 2719 } 2720 2721 /* 2722 * nat_insert failed, so cleanup time... 2723 */ 2724 return -1; 2725 } 2726 2727 2728 /* ------------------------------------------------------------------------ */ 2729 /* Function: nat_insert */ 2730 /* Returns: int - 0 == sucess, -1 == failure */ 2731 /* Parameters: nat(I) - pointer to NAT structure */ 2732 /* rev(I) - flag indicating forward/reverse direction of packet */ 2733 /* Write Lock: ipf_nat */ 2734 /* */ 2735 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2736 /* list of active NAT entries. Adjust global counters when complete. */ 2737 /* ------------------------------------------------------------------------ */ 2738 int nat_insert(nat, rev, ifs) 2739 nat_t *nat; 2740 int rev; 2741 ipf_stack_t *ifs; 2742 { 2743 u_int hv1, hv2; 2744 nat_t **natp; 2745 2746 /* 2747 * Try and return an error as early as possible, so calculate the hash 2748 * entry numbers first and then proceed. 2749 */ 2750 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2751 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2752 0xffffffff); 2753 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2754 ifs->ifs_ipf_nattable_sz); 2755 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2756 0xffffffff); 2757 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2758 ifs->ifs_ipf_nattable_sz); 2759 } else { 2760 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2761 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2762 ifs->ifs_ipf_nattable_sz); 2763 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2764 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2765 ifs->ifs_ipf_nattable_sz); 2766 } 2767 2768 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2769 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2770 return -1; 2771 } 2772 2773 nat->nat_hv[0] = hv1; 2774 nat->nat_hv[1] = hv2; 2775 2776 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2777 2778 nat->nat_rev = rev; 2779 nat->nat_ref = 1; 2780 nat->nat_bytes[0] = 0; 2781 nat->nat_pkts[0] = 0; 2782 nat->nat_bytes[1] = 0; 2783 nat->nat_pkts[1] = 0; 2784 2785 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2786 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2787 2788 if (nat->nat_ifnames[1][0] !='\0') { 2789 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2790 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2791 } else { 2792 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2793 LIFNAMSIZ); 2794 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2795 nat->nat_ifps[1] = nat->nat_ifps[0]; 2796 } 2797 2798 nat->nat_next = ifs->ifs_nat_instances; 2799 nat->nat_pnext = &ifs->ifs_nat_instances; 2800 if (ifs->ifs_nat_instances) 2801 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2802 ifs->ifs_nat_instances = nat; 2803 2804 natp = &ifs->ifs_nat_table[0][hv1]; 2805 if (*natp) 2806 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2807 nat->nat_phnext[0] = natp; 2808 nat->nat_hnext[0] = *natp; 2809 *natp = nat; 2810 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2811 2812 natp = &ifs->ifs_nat_table[1][hv2]; 2813 if (*natp) 2814 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2815 nat->nat_phnext[1] = natp; 2816 nat->nat_hnext[1] = *natp; 2817 *natp = nat; 2818 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2819 2820 fr_setnatqueue(nat, rev, ifs); 2821 2822 ifs->ifs_nat_stats.ns_added++; 2823 ifs->ifs_nat_stats.ns_inuse++; 2824 return 0; 2825 } 2826 2827 2828 /* ------------------------------------------------------------------------ */ 2829 /* Function: nat_icmperrorlookup */ 2830 /* Returns: nat_t* - point to matching NAT structure */ 2831 /* Parameters: fin(I) - pointer to packet information */ 2832 /* dir(I) - direction of packet (in/out) */ 2833 /* */ 2834 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2835 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2836 /* the required length. */ 2837 /* ------------------------------------------------------------------------ */ 2838 nat_t *nat_icmperrorlookup(fin, dir) 2839 fr_info_t *fin; 2840 int dir; 2841 { 2842 int flags = 0, minlen; 2843 icmphdr_t *orgicmp; 2844 tcphdr_t *tcp = NULL; 2845 u_short data[2]; 2846 nat_t *nat; 2847 ip_t *oip; 2848 u_int p; 2849 2850 /* 2851 * Does it at least have the return (basic) IP header ? 2852 * Only a basic IP header (no options) should be with an ICMP error 2853 * header. Also, if it's not an error type, then return. 2854 */ 2855 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2856 return NULL; 2857 2858 /* 2859 * Check packet size 2860 */ 2861 oip = (ip_t *)((char *)fin->fin_dp + 8); 2862 minlen = IP_HL(oip) << 2; 2863 if ((minlen < sizeof(ip_t)) || 2864 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2865 return NULL; 2866 /* 2867 * Is the buffer big enough for all of it ? It's the size of the IP 2868 * header claimed in the encapsulated part which is of concern. It 2869 * may be too big to be in this buffer but not so big that it's 2870 * outside the ICMP packet, leading to TCP deref's causing problems. 2871 * This is possible because we don't know how big oip_hl is when we 2872 * do the pullup early in fr_check() and thus can't gaurantee it is 2873 * all here now. 2874 */ 2875 #ifdef _KERNEL 2876 { 2877 mb_t *m; 2878 2879 m = fin->fin_m; 2880 # if defined(MENTAT) 2881 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2882 return NULL; 2883 # else 2884 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2885 (char *)fin->fin_ip + M_LEN(m)) 2886 return NULL; 2887 # endif 2888 } 2889 #endif 2890 2891 if (fin->fin_daddr != oip->ip_src.s_addr) 2892 return NULL; 2893 2894 p = oip->ip_p; 2895 if (p == IPPROTO_TCP) 2896 flags = IPN_TCP; 2897 else if (p == IPPROTO_UDP) 2898 flags = IPN_UDP; 2899 else if (p == IPPROTO_ICMP) { 2900 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2901 2902 /* see if this is related to an ICMP query */ 2903 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2904 data[0] = fin->fin_data[0]; 2905 data[1] = fin->fin_data[1]; 2906 fin->fin_data[0] = 0; 2907 fin->fin_data[1] = orgicmp->icmp_id; 2908 2909 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2910 /* 2911 * NOTE : dir refers to the direction of the original 2912 * ip packet. By definition the icmp error 2913 * message flows in the opposite direction. 2914 */ 2915 if (dir == NAT_INBOUND) 2916 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2917 oip->ip_src); 2918 else 2919 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2920 oip->ip_src); 2921 fin->fin_data[0] = data[0]; 2922 fin->fin_data[1] = data[1]; 2923 return nat; 2924 } 2925 } 2926 2927 if (flags & IPN_TCPUDP) { 2928 minlen += 8; /* + 64bits of data to get ports */ 2929 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2930 return NULL; 2931 2932 data[0] = fin->fin_data[0]; 2933 data[1] = fin->fin_data[1]; 2934 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2935 fin->fin_data[0] = ntohs(tcp->th_dport); 2936 fin->fin_data[1] = ntohs(tcp->th_sport); 2937 2938 if (dir == NAT_INBOUND) { 2939 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2940 oip->ip_src); 2941 } else { 2942 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2943 oip->ip_src); 2944 } 2945 fin->fin_data[0] = data[0]; 2946 fin->fin_data[1] = data[1]; 2947 return nat; 2948 } 2949 if (dir == NAT_INBOUND) 2950 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2951 else 2952 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2953 } 2954 2955 2956 /* ------------------------------------------------------------------------ */ 2957 /* Function: nat_icmperror */ 2958 /* Returns: nat_t* - point to matching NAT structure */ 2959 /* Parameters: fin(I) - pointer to packet information */ 2960 /* nflags(I) - NAT flags for this packet */ 2961 /* dir(I) - direction of packet (in/out) */ 2962 /* */ 2963 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2964 /* session. This will correct both packet header data and checksums. */ 2965 /* */ 2966 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2967 /* a NAT'd ICMP packet gets correctly recognised. */ 2968 /* ------------------------------------------------------------------------ */ 2969 nat_t *nat_icmperror(fin, nflags, dir) 2970 fr_info_t *fin; 2971 u_int *nflags; 2972 int dir; 2973 { 2974 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2975 struct in_addr in; 2976 icmphdr_t *icmp, *orgicmp; 2977 int dlen; 2978 udphdr_t *udp; 2979 tcphdr_t *tcp; 2980 nat_t *nat; 2981 ip_t *oip; 2982 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2983 return NULL; 2984 2985 /* 2986 * nat_icmperrorlookup() looks up nat entry associated with the 2987 * offending IP packet and returns pointer to the entry, or NULL 2988 * if packet wasn't natted or for `defective' packets. 2989 */ 2990 2991 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 2992 return NULL; 2993 2994 sumd2 = 0; 2995 *nflags = IPN_ICMPERR; 2996 icmp = fin->fin_dp; 2997 oip = (ip_t *)&icmp->icmp_ip; 2998 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 2999 tcp = (tcphdr_t *)udp; 3000 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 3001 3002 /* 3003 * Need to adjust ICMP header to include the real IP#'s and 3004 * port #'s. There are three steps required. 3005 * 3006 * Step 1 3007 * Fix the IP addresses in the offending IP packet and update 3008 * ip header checksum to compensate for the change. 3009 * 3010 * No update needed here for icmp_cksum because the ICMP checksum 3011 * is calculated over the complete ICMP packet, which includes the 3012 * changed oip IP addresses and oip->ip_sum. These two changes 3013 * cancel each other out (if the delta for the IP address is x, 3014 * then the delta for ip_sum is minus x). 3015 */ 3016 3017 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 3018 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 3019 in = nat->nat_inip; 3020 oip->ip_src = in; 3021 } else { 3022 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 3023 in = nat->nat_outip; 3024 oip->ip_dst = in; 3025 } 3026 3027 sum2 = LONG_SUM(ntohl(in.s_addr)); 3028 CALC_SUMD(sum1, sum2, sumd); 3029 fix_datacksum(&oip->ip_sum, sumd); 3030 3031 /* 3032 * Step 2 3033 * Perform other adjustments based on protocol of offending packet. 3034 */ 3035 3036 switch (oip->ip_p) { 3037 case IPPROTO_TCP : 3038 case IPPROTO_UDP : 3039 3040 /* 3041 * For offending TCP/UDP IP packets, translate the ports 3042 * based on the NAT specification. 3043 * 3044 * Advance notice : Now it becomes complicated :-) 3045 * 3046 * Since the port and IP addresse fields are both part 3047 * of the TCP/UDP checksum of the offending IP packet, 3048 * we need to adjust that checksum as well. 3049 * 3050 * To further complicate things, the TCP/UDP checksum 3051 * may not be present. We must check to see if the 3052 * length of the data portion is big enough to hold 3053 * the checksum. In the UDP case, a test to determine 3054 * if the checksum is even set is also required. 3055 * 3056 * Any changes to an IP address, port or checksum within 3057 * the ICMP packet requires a change to icmp_cksum. 3058 * 3059 * Be extremely careful here ... The change is dependent 3060 * upon whether or not the TCP/UPD checksum is present. 3061 * 3062 * If TCP/UPD checksum is present, the icmp_cksum must 3063 * compensate for checksum modification resulting from 3064 * IP address change only. Port change and resulting 3065 * data checksum adjustments cancel each other out. 3066 * 3067 * If TCP/UDP checksum is not present, icmp_cksum must 3068 * compensate for port change only. The IP address 3069 * change does not modify anything else in this case. 3070 */ 3071 3072 psum1 = 0; 3073 psum2 = 0; 3074 psumd = 0; 3075 3076 if ((tcp->th_dport == nat->nat_oport) && 3077 (tcp->th_sport != nat->nat_inport)) { 3078 3079 /* 3080 * Translate the source port. 3081 */ 3082 3083 psum1 = ntohs(tcp->th_sport); 3084 psum2 = ntohs(nat->nat_inport); 3085 tcp->th_sport = nat->nat_inport; 3086 3087 } else if ((tcp->th_sport == nat->nat_oport) && 3088 (tcp->th_dport != nat->nat_outport)) { 3089 3090 /* 3091 * Translate the destination port. 3092 */ 3093 3094 psum1 = ntohs(tcp->th_dport); 3095 psum2 = ntohs(nat->nat_outport); 3096 tcp->th_dport = nat->nat_outport; 3097 } 3098 3099 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 3100 3101 /* 3102 * TCP checksum present. 3103 * 3104 * Adjust data checksum and icmp checksum to 3105 * compensate for any IP address change. 3106 */ 3107 3108 sum1 = ntohs(tcp->th_sum); 3109 fix_datacksum(&tcp->th_sum, sumd); 3110 sum2 = ntohs(tcp->th_sum); 3111 sumd2 = sumd << 1; 3112 CALC_SUMD(sum1, sum2, sumd); 3113 sumd2 += sumd; 3114 3115 /* 3116 * Also make data checksum adjustment to 3117 * compensate for any port change. 3118 */ 3119 3120 if (psum1 != psum2) { 3121 CALC_SUMD(psum1, psum2, psumd); 3122 fix_datacksum(&tcp->th_sum, psumd); 3123 } 3124 3125 } else if ((oip->ip_p == IPPROTO_UDP) && 3126 (dlen >= 8) && (udp->uh_sum != 0)) { 3127 3128 /* 3129 * The UDP checksum is present and set. 3130 * 3131 * Adjust data checksum and icmp checksum to 3132 * compensate for any IP address change. 3133 */ 3134 3135 sum1 = ntohs(udp->uh_sum); 3136 fix_datacksum(&udp->uh_sum, sumd); 3137 sum2 = ntohs(udp->uh_sum); 3138 sumd2 = sumd << 1; 3139 CALC_SUMD(sum1, sum2, sumd); 3140 sumd2 += sumd; 3141 3142 /* 3143 * Also make data checksum adjustment to 3144 * compensate for any port change. 3145 */ 3146 3147 if (psum1 != psum2) { 3148 CALC_SUMD(psum1, psum2, psumd); 3149 fix_datacksum(&udp->uh_sum, psumd); 3150 } 3151 3152 } else { 3153 3154 /* 3155 * Data checksum was not present. 3156 * 3157 * Compensate for any port change. 3158 */ 3159 3160 CALC_SUMD(psum2, psum1, psumd); 3161 sumd2 += psumd; 3162 } 3163 break; 3164 3165 case IPPROTO_ICMP : 3166 3167 orgicmp = (icmphdr_t *)udp; 3168 3169 if ((nat->nat_dir == NAT_OUTBOUND) && 3170 (orgicmp->icmp_id != nat->nat_inport) && 3171 (dlen >= 8)) { 3172 3173 /* 3174 * Fix ICMP checksum (of the offening ICMP 3175 * query packet) to compensate the change 3176 * in the ICMP id of the offending ICMP 3177 * packet. 3178 * 3179 * Since you modify orgicmp->icmp_id with 3180 * a delta (say x) and you compensate that 3181 * in origicmp->icmp_cksum with a delta 3182 * minus x, you don't have to adjust the 3183 * overall icmp->icmp_cksum 3184 */ 3185 3186 sum1 = ntohs(orgicmp->icmp_id); 3187 sum2 = ntohs(nat->nat_inport); 3188 CALC_SUMD(sum1, sum2, sumd); 3189 orgicmp->icmp_id = nat->nat_inport; 3190 fix_datacksum(&orgicmp->icmp_cksum, sumd); 3191 3192 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 3193 3194 break; 3195 3196 default : 3197 3198 break; 3199 3200 } /* switch (oip->ip_p) */ 3201 3202 /* 3203 * Step 3 3204 * Make the adjustments to icmp checksum. 3205 */ 3206 3207 if (sumd2 != 0) { 3208 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3209 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3210 fix_incksum(&icmp->icmp_cksum, sumd2); 3211 } 3212 return nat; 3213 } 3214 3215 3216 /* 3217 * NB: these lookups don't lock access to the list, it assumed that it has 3218 * already been done! 3219 */ 3220 3221 /* ------------------------------------------------------------------------ */ 3222 /* Function: nat_inlookup */ 3223 /* Returns: nat_t* - NULL == no match, */ 3224 /* else pointer to matching NAT entry */ 3225 /* Parameters: fin(I) - pointer to packet information */ 3226 /* flags(I) - NAT flags for this packet */ 3227 /* p(I) - protocol for this packet */ 3228 /* src(I) - source IP address */ 3229 /* mapdst(I) - destination IP address */ 3230 /* */ 3231 /* Lookup a nat entry based on the mapped destination ip address/port and */ 3232 /* real source address/port. We use this lookup when receiving a packet, */ 3233 /* we're looking for a table entry, based on the destination address. */ 3234 /* */ 3235 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3236 /* */ 3237 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3238 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3239 /* */ 3240 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3241 /* the packet is of said protocol */ 3242 /* ------------------------------------------------------------------------ */ 3243 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 3244 fr_info_t *fin; 3245 u_int flags, p; 3246 struct in_addr src , mapdst; 3247 { 3248 u_short sport, dport; 3249 ipnat_t *ipn; 3250 u_int sflags; 3251 nat_t *nat; 3252 int nflags; 3253 u_32_t dst; 3254 void *ifp; 3255 u_int hv; 3256 ipf_stack_t *ifs = fin->fin_ifs; 3257 3258 if (fin != NULL) 3259 ifp = fin->fin_ifp; 3260 else 3261 ifp = NULL; 3262 sport = 0; 3263 dport = 0; 3264 dst = mapdst.s_addr; 3265 sflags = flags & NAT_TCPUDPICMP; 3266 3267 switch (p) 3268 { 3269 case IPPROTO_TCP : 3270 case IPPROTO_UDP : 3271 sport = htons(fin->fin_data[0]); 3272 dport = htons(fin->fin_data[1]); 3273 break; 3274 case IPPROTO_ICMP : 3275 if (flags & IPN_ICMPERR) 3276 sport = fin->fin_data[1]; 3277 else 3278 dport = fin->fin_data[1]; 3279 break; 3280 default : 3281 break; 3282 } 3283 3284 3285 if ((flags & SI_WILDP) != 0) 3286 goto find_in_wild_ports; 3287 3288 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3289 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3290 nat = ifs->ifs_nat_table[1][hv]; 3291 for (; nat; nat = nat->nat_hnext[1]) { 3292 if (nat->nat_v != 4) 3293 continue; 3294 3295 if (nat->nat_ifps[0] != NULL) { 3296 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3297 continue; 3298 } else if (ifp != NULL) 3299 nat->nat_ifps[0] = ifp; 3300 3301 nflags = nat->nat_flags; 3302 3303 if (nat->nat_oip.s_addr == src.s_addr && 3304 nat->nat_outip.s_addr == dst && 3305 (((p == 0) && 3306 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3307 || (p == nat->nat_p))) { 3308 switch (p) 3309 { 3310 #if 0 3311 case IPPROTO_GRE : 3312 if (nat->nat_call[1] != fin->fin_data[0]) 3313 continue; 3314 break; 3315 #endif 3316 case IPPROTO_ICMP : 3317 if ((flags & IPN_ICMPERR) != 0) { 3318 if (nat->nat_outport != sport) 3319 continue; 3320 } else { 3321 if (nat->nat_outport != dport) 3322 continue; 3323 } 3324 break; 3325 case IPPROTO_TCP : 3326 case IPPROTO_UDP : 3327 if (nat->nat_oport != sport) 3328 continue; 3329 if (nat->nat_outport != dport) 3330 continue; 3331 break; 3332 default : 3333 break; 3334 } 3335 3336 ipn = nat->nat_ptr; 3337 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3338 if (appr_match(fin, nat) != 0) 3339 continue; 3340 return nat; 3341 } 3342 } 3343 3344 /* 3345 * So if we didn't find it but there are wildcard members in the hash 3346 * table, go back and look for them. We do this search and update here 3347 * because it is modifying the NAT table and we want to do this only 3348 * for the first packet that matches. The exception, of course, is 3349 * for "dummy" (FI_IGNORE) lookups. 3350 */ 3351 find_in_wild_ports: 3352 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3353 return NULL; 3354 if (ifs->ifs_nat_stats.ns_wilds == 0) 3355 return NULL; 3356 3357 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3358 3359 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3360 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3361 3362 WRITE_ENTER(&ifs->ifs_ipf_nat); 3363 3364 nat = ifs->ifs_nat_table[1][hv]; 3365 for (; nat; nat = nat->nat_hnext[1]) { 3366 if (nat->nat_v != 4) 3367 continue; 3368 3369 if (nat->nat_ifps[0] != NULL) { 3370 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3371 continue; 3372 } else if (ifp != NULL) 3373 nat->nat_ifps[0] = ifp; 3374 3375 if (nat->nat_p != fin->fin_p) 3376 continue; 3377 if (nat->nat_oip.s_addr != src.s_addr || 3378 nat->nat_outip.s_addr != dst) 3379 continue; 3380 3381 nflags = nat->nat_flags; 3382 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3383 continue; 3384 3385 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3386 NAT_INBOUND) == 1) { 3387 if ((fin->fin_flx & FI_IGNORE) != 0) 3388 break; 3389 if ((nflags & SI_CLONE) != 0) { 3390 nat = fr_natclone(fin, nat); 3391 if (nat == NULL) 3392 break; 3393 } else { 3394 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3395 ifs->ifs_nat_stats.ns_wilds--; 3396 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3397 } 3398 nat->nat_oport = sport; 3399 nat->nat_outport = dport; 3400 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3401 nat_tabmove(nat, ifs); 3402 break; 3403 } 3404 } 3405 3406 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3407 3408 return nat; 3409 } 3410 3411 3412 /* ------------------------------------------------------------------------ */ 3413 /* Function: nat_tabmove */ 3414 /* Returns: Nil */ 3415 /* Parameters: nat(I) - pointer to NAT structure */ 3416 /* Write Lock: ipf_nat */ 3417 /* */ 3418 /* This function is only called for TCP/UDP NAT table entries where the */ 3419 /* original was placed in the table without hashing on the ports and we now */ 3420 /* want to include hashing on port numbers. */ 3421 /* ------------------------------------------------------------------------ */ 3422 static void nat_tabmove(nat, ifs) 3423 nat_t *nat; 3424 ipf_stack_t *ifs; 3425 { 3426 nat_t **natp; 3427 u_int hv; 3428 3429 if (nat->nat_flags & SI_CLONE) 3430 return; 3431 3432 /* 3433 * Remove the NAT entry from the old location 3434 */ 3435 if (nat->nat_hnext[0]) 3436 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3437 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3438 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3439 3440 if (nat->nat_hnext[1]) 3441 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3442 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3443 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3444 3445 /* 3446 * Add into the NAT table in the new position 3447 */ 3448 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3449 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3450 ifs->ifs_ipf_nattable_sz); 3451 nat->nat_hv[0] = hv; 3452 natp = &ifs->ifs_nat_table[0][hv]; 3453 if (*natp) 3454 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3455 nat->nat_phnext[0] = natp; 3456 nat->nat_hnext[0] = *natp; 3457 *natp = nat; 3458 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3459 3460 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3461 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3462 ifs->ifs_ipf_nattable_sz); 3463 nat->nat_hv[1] = hv; 3464 natp = &ifs->ifs_nat_table[1][hv]; 3465 if (*natp) 3466 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3467 nat->nat_phnext[1] = natp; 3468 nat->nat_hnext[1] = *natp; 3469 *natp = nat; 3470 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3471 } 3472 3473 3474 /* ------------------------------------------------------------------------ */ 3475 /* Function: nat_outlookup */ 3476 /* Returns: nat_t* - NULL == no match, */ 3477 /* else pointer to matching NAT entry */ 3478 /* Parameters: fin(I) - pointer to packet information */ 3479 /* flags(I) - NAT flags for this packet */ 3480 /* p(I) - protocol for this packet */ 3481 /* src(I) - source IP address */ 3482 /* dst(I) - destination IP address */ 3483 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3484 /* */ 3485 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3486 /* destination address/port. We use this lookup when sending a packet out, */ 3487 /* we're looking for a table entry, based on the source address. */ 3488 /* */ 3489 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3490 /* */ 3491 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3492 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3493 /* */ 3494 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3495 /* the packet is of said protocol */ 3496 /* ------------------------------------------------------------------------ */ 3497 nat_t *nat_outlookup(fin, flags, p, src, dst) 3498 fr_info_t *fin; 3499 u_int flags, p; 3500 struct in_addr src , dst; 3501 { 3502 u_short sport, dport; 3503 u_int sflags; 3504 ipnat_t *ipn; 3505 u_32_t srcip; 3506 nat_t *nat; 3507 int nflags; 3508 void *ifp; 3509 u_int hv; 3510 ipf_stack_t *ifs = fin->fin_ifs; 3511 3512 ifp = fin->fin_ifp; 3513 3514 srcip = src.s_addr; 3515 sflags = flags & IPN_TCPUDPICMP; 3516 sport = 0; 3517 dport = 0; 3518 3519 switch (p) 3520 { 3521 case IPPROTO_TCP : 3522 case IPPROTO_UDP : 3523 sport = htons(fin->fin_data[0]); 3524 dport = htons(fin->fin_data[1]); 3525 break; 3526 case IPPROTO_ICMP : 3527 if (flags & IPN_ICMPERR) 3528 sport = fin->fin_data[1]; 3529 else 3530 dport = fin->fin_data[1]; 3531 break; 3532 default : 3533 break; 3534 } 3535 3536 if ((flags & SI_WILDP) != 0) 3537 goto find_out_wild_ports; 3538 3539 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3540 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3541 nat = ifs->ifs_nat_table[0][hv]; 3542 for (; nat; nat = nat->nat_hnext[0]) { 3543 if (nat->nat_v != 4) 3544 continue; 3545 3546 if (nat->nat_ifps[1] != NULL) { 3547 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3548 continue; 3549 } else if (ifp != NULL) 3550 nat->nat_ifps[1] = ifp; 3551 3552 nflags = nat->nat_flags; 3553 3554 if (nat->nat_inip.s_addr == srcip && 3555 nat->nat_oip.s_addr == dst.s_addr && 3556 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3557 || (p == nat->nat_p))) { 3558 switch (p) 3559 { 3560 #if 0 3561 case IPPROTO_GRE : 3562 if (nat->nat_call[1] != fin->fin_data[0]) 3563 continue; 3564 break; 3565 #endif 3566 case IPPROTO_TCP : 3567 case IPPROTO_UDP : 3568 if (nat->nat_oport != dport) 3569 continue; 3570 if (nat->nat_inport != sport) 3571 continue; 3572 break; 3573 default : 3574 break; 3575 } 3576 3577 ipn = nat->nat_ptr; 3578 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3579 if (appr_match(fin, nat) != 0) 3580 continue; 3581 return nat; 3582 } 3583 } 3584 3585 /* 3586 * So if we didn't find it but there are wildcard members in the hash 3587 * table, go back and look for them. We do this search and update here 3588 * because it is modifying the NAT table and we want to do this only 3589 * for the first packet that matches. The exception, of course, is 3590 * for "dummy" (FI_IGNORE) lookups. 3591 */ 3592 find_out_wild_ports: 3593 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3594 return NULL; 3595 if (ifs->ifs_nat_stats.ns_wilds == 0) 3596 return NULL; 3597 3598 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3599 3600 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3601 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3602 3603 WRITE_ENTER(&ifs->ifs_ipf_nat); 3604 3605 nat = ifs->ifs_nat_table[0][hv]; 3606 for (; nat; nat = nat->nat_hnext[0]) { 3607 if (nat->nat_v != 4) 3608 continue; 3609 3610 if (nat->nat_ifps[1] != NULL) { 3611 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3612 continue; 3613 } else if (ifp != NULL) 3614 nat->nat_ifps[1] = ifp; 3615 3616 if (nat->nat_p != fin->fin_p) 3617 continue; 3618 if ((nat->nat_inip.s_addr != srcip) || 3619 (nat->nat_oip.s_addr != dst.s_addr)) 3620 continue; 3621 3622 nflags = nat->nat_flags; 3623 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3624 continue; 3625 3626 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3627 NAT_OUTBOUND) == 1) { 3628 if ((fin->fin_flx & FI_IGNORE) != 0) 3629 break; 3630 if ((nflags & SI_CLONE) != 0) { 3631 nat = fr_natclone(fin, nat); 3632 if (nat == NULL) 3633 break; 3634 } else { 3635 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3636 ifs->ifs_nat_stats.ns_wilds--; 3637 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3638 } 3639 nat->nat_inport = sport; 3640 nat->nat_oport = dport; 3641 if (nat->nat_outport == 0) 3642 nat->nat_outport = sport; 3643 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3644 nat_tabmove(nat, ifs); 3645 break; 3646 } 3647 } 3648 3649 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3650 3651 return nat; 3652 } 3653 3654 3655 /* ------------------------------------------------------------------------ */ 3656 /* Function: nat_lookupredir */ 3657 /* Returns: nat_t* - NULL == no match, */ 3658 /* else pointer to matching NAT entry */ 3659 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3660 /* entry for. */ 3661 /* */ 3662 /* Lookup the NAT tables to search for a matching redirect */ 3663 /* ------------------------------------------------------------------------ */ 3664 nat_t *nat_lookupredir(np, ifs) 3665 natlookup_t *np; 3666 ipf_stack_t *ifs; 3667 { 3668 fr_info_t fi; 3669 nat_t *nat; 3670 3671 bzero((char *)&fi, sizeof(fi)); 3672 if (np->nl_flags & IPN_IN) { 3673 fi.fin_data[0] = ntohs(np->nl_realport); 3674 fi.fin_data[1] = ntohs(np->nl_outport); 3675 } else { 3676 fi.fin_data[0] = ntohs(np->nl_inport); 3677 fi.fin_data[1] = ntohs(np->nl_outport); 3678 } 3679 if (np->nl_flags & IPN_TCP) 3680 fi.fin_p = IPPROTO_TCP; 3681 else if (np->nl_flags & IPN_UDP) 3682 fi.fin_p = IPPROTO_UDP; 3683 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3684 fi.fin_p = IPPROTO_ICMP; 3685 3686 fi.fin_ifs = ifs; 3687 /* 3688 * We can do two sorts of lookups: 3689 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3690 * - default: we have the `in' and `out' address, look for `real'. 3691 */ 3692 if (np->nl_flags & IPN_IN) { 3693 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3694 np->nl_realip, np->nl_outip))) { 3695 np->nl_inip = nat->nat_inip; 3696 np->nl_inport = nat->nat_inport; 3697 } 3698 } else { 3699 /* 3700 * If nl_inip is non null, this is a lookup based on the real 3701 * ip address. Else, we use the fake. 3702 */ 3703 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3704 np->nl_inip, np->nl_outip))) { 3705 3706 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3707 fr_info_t fin; 3708 bzero((char *)&fin, sizeof(fin)); 3709 fin.fin_p = nat->nat_p; 3710 fin.fin_data[0] = ntohs(nat->nat_outport); 3711 fin.fin_data[1] = ntohs(nat->nat_oport); 3712 fin.fin_ifs = ifs; 3713 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3714 nat->nat_outip, 3715 nat->nat_oip) != NULL) { 3716 np->nl_flags &= ~IPN_FINDFORWARD; 3717 } 3718 } 3719 3720 np->nl_realip = nat->nat_outip; 3721 np->nl_realport = nat->nat_outport; 3722 } 3723 } 3724 3725 return nat; 3726 } 3727 3728 3729 /* ------------------------------------------------------------------------ */ 3730 /* Function: nat_match */ 3731 /* Returns: int - 0 == no match, 1 == match */ 3732 /* Parameters: fin(I) - pointer to packet information */ 3733 /* np(I) - pointer to NAT rule */ 3734 /* */ 3735 /* Pull the matching of a packet against a NAT rule out of that complex */ 3736 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3737 /* ------------------------------------------------------------------------ */ 3738 static int nat_match(fin, np) 3739 fr_info_t *fin; 3740 ipnat_t *np; 3741 { 3742 frtuc_t *ft; 3743 3744 if (fin->fin_v != 4) 3745 return 0; 3746 3747 if (np->in_p && fin->fin_p != np->in_p) 3748 return 0; 3749 3750 if (fin->fin_out) { 3751 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3752 return 0; 3753 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3754 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3755 return 0; 3756 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3757 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3758 return 0; 3759 } else { 3760 if (!(np->in_redir & NAT_REDIRECT)) 3761 return 0; 3762 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3763 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3764 return 0; 3765 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3766 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3767 return 0; 3768 } 3769 3770 ft = &np->in_tuc; 3771 if (!(fin->fin_flx & FI_TCPUDP) || 3772 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3773 if (ft->ftu_scmp || ft->ftu_dcmp) 3774 return 0; 3775 return 1; 3776 } 3777 3778 return fr_tcpudpchk(fin, ft); 3779 } 3780 3781 3782 /* ------------------------------------------------------------------------ */ 3783 /* Function: nat_update */ 3784 /* Returns: Nil */ 3785 /* Parameters: nat(I) - pointer to NAT structure */ 3786 /* np(I) - pointer to NAT rule */ 3787 /* */ 3788 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3789 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3790 /* ------------------------------------------------------------------------ */ 3791 void nat_update(fin, nat, np) 3792 fr_info_t *fin; 3793 nat_t *nat; 3794 ipnat_t *np; 3795 { 3796 ipftq_t *ifq, *ifq2; 3797 ipftqent_t *tqe; 3798 ipf_stack_t *ifs = fin->fin_ifs; 3799 3800 MUTEX_ENTER(&nat->nat_lock); 3801 tqe = &nat->nat_tqe; 3802 ifq = tqe->tqe_ifq; 3803 3804 /* 3805 * We allow over-riding of NAT timeouts from NAT rules, even for 3806 * TCP, however, if it is TCP and there is no rule timeout set, 3807 * then do not update the timeout here. 3808 */ 3809 if (np != NULL) 3810 ifq2 = np->in_tqehead[fin->fin_rev]; 3811 else 3812 ifq2 = NULL; 3813 3814 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3815 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3816 } else { 3817 if (ifq2 == NULL) { 3818 if (nat->nat_p == IPPROTO_UDP) 3819 ifq2 = &ifs->ifs_nat_udptq; 3820 else if (nat->nat_p == IPPROTO_ICMP) 3821 ifq2 = &ifs->ifs_nat_icmptq; 3822 else 3823 ifq2 = &ifs->ifs_nat_iptq; 3824 } 3825 3826 fr_movequeue(tqe, ifq, ifq2, ifs); 3827 } 3828 MUTEX_EXIT(&nat->nat_lock); 3829 } 3830 3831 3832 /* ------------------------------------------------------------------------ */ 3833 /* Function: fr_checknatout */ 3834 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3835 /* 0 == no packet translation occurred, */ 3836 /* 1 == packet was successfully translated. */ 3837 /* Parameters: fin(I) - pointer to packet information */ 3838 /* passp(I) - pointer to filtering result flags */ 3839 /* */ 3840 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3841 /* first checked to see if they match an existing entry (if an error), */ 3842 /* otherwise a search of the current NAT table is made. If neither results */ 3843 /* in a match then a search for a matching NAT rule is made. Create a new */ 3844 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3845 /* packet header(s) as required. */ 3846 /* ------------------------------------------------------------------------ */ 3847 int fr_checknatout(fin, passp) 3848 fr_info_t *fin; 3849 u_32_t *passp; 3850 { 3851 ipnat_t *np = NULL, *npnext; 3852 struct ifnet *ifp, *sifp; 3853 icmphdr_t *icmp = NULL; 3854 tcphdr_t *tcp = NULL; 3855 int rval, natfailed; 3856 u_int nflags = 0; 3857 u_32_t ipa, iph; 3858 int natadd = 1; 3859 frentry_t *fr; 3860 nat_t *nat; 3861 ipf_stack_t *ifs = fin->fin_ifs; 3862 3863 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3864 return 0; 3865 3866 natfailed = 0; 3867 fr = fin->fin_fr; 3868 sifp = fin->fin_ifp; 3869 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3870 fr->fr_tifs[fin->fin_rev].fd_ifp && 3871 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3872 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3873 ifp = fin->fin_ifp; 3874 3875 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3876 switch (fin->fin_p) 3877 { 3878 case IPPROTO_TCP : 3879 nflags = IPN_TCP; 3880 break; 3881 case IPPROTO_UDP : 3882 nflags = IPN_UDP; 3883 break; 3884 case IPPROTO_ICMP : 3885 icmp = fin->fin_dp; 3886 3887 /* 3888 * This is an incoming packet, so the destination is 3889 * the icmp_id and the source port equals 0 3890 */ 3891 if (nat_icmpquerytype4(icmp->icmp_type)) 3892 nflags = IPN_ICMPQUERY; 3893 break; 3894 default : 3895 break; 3896 } 3897 3898 if ((nflags & IPN_TCPUDP)) 3899 tcp = fin->fin_dp; 3900 } 3901 3902 ipa = fin->fin_saddr; 3903 3904 READ_ENTER(&ifs->ifs_ipf_nat); 3905 3906 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3907 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3908 /*EMPTY*/; 3909 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3910 natadd = 0; 3911 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3912 fin->fin_src, fin->fin_dst))) { 3913 nflags = nat->nat_flags; 3914 } else { 3915 u_32_t hv, msk, nmsk; 3916 3917 /* 3918 * If there is no current entry in the nat table for this IP#, 3919 * create one for it (if there is a matching rule). 3920 */ 3921 msk = 0xffffffff; 3922 nmsk = ifs->ifs_nat_masks; 3923 maskloop: 3924 iph = ipa & htonl(msk); 3925 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3926 for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) { 3927 npnext = np->in_mnext; 3928 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3929 continue; 3930 if (np->in_v != fin->fin_v) 3931 continue; 3932 if (np->in_p && (np->in_p != fin->fin_p)) 3933 continue; 3934 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3935 continue; 3936 if (np->in_flags & IPN_FILTER) { 3937 if (!nat_match(fin, np)) 3938 continue; 3939 } else if ((ipa & np->in_inmsk) != np->in_inip) 3940 continue; 3941 3942 if ((fr != NULL) && 3943 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3944 continue; 3945 3946 if (*np->in_plabel != '\0') { 3947 if (((np->in_flags & IPN_FILTER) == 0) && 3948 (np->in_dport != tcp->th_dport)) 3949 continue; 3950 if (appr_ok(fin, tcp, np) == 0) 3951 continue; 3952 } 3953 3954 ATOMIC_INC32(np->in_use); 3955 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3956 WRITE_ENTER(&ifs->ifs_ipf_nat); 3957 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND); 3958 if (nat != NULL) { 3959 np->in_use--; 3960 np->in_hits++; 3961 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3962 break; 3963 } 3964 natfailed = -1; 3965 npnext = np->in_mnext; 3966 fr_ipnatderef(&np, ifs); 3967 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3968 } 3969 if ((np == NULL) && (nmsk != 0)) { 3970 while (nmsk) { 3971 msk <<= 1; 3972 if (nmsk & 0x80000000) 3973 break; 3974 nmsk <<= 1; 3975 } 3976 if (nmsk != 0) { 3977 nmsk <<= 1; 3978 goto maskloop; 3979 } 3980 } 3981 } 3982 3983 if (nat != NULL) { 3984 rval = fr_natout(fin, nat, natadd, nflags); 3985 if (rval == 1) { 3986 MUTEX_ENTER(&nat->nat_lock); 3987 nat->nat_ref++; 3988 MUTEX_EXIT(&nat->nat_lock); 3989 nat->nat_touched = ifs->ifs_fr_ticks; 3990 fin->fin_nat = nat; 3991 } 3992 } else 3993 rval = natfailed; 3994 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3995 3996 if (rval == -1) { 3997 if (passp != NULL) 3998 *passp = FR_BLOCK; 3999 fin->fin_flx |= FI_BADNAT; 4000 } 4001 fin->fin_ifp = sifp; 4002 return rval; 4003 } 4004 4005 /* ------------------------------------------------------------------------ */ 4006 /* Function: fr_natout */ 4007 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4008 /* 1 == packet was successfully translated. */ 4009 /* Parameters: fin(I) - pointer to packet information */ 4010 /* nat(I) - pointer to NAT structure */ 4011 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4012 /* nflags(I) - NAT flags set for this packet */ 4013 /* */ 4014 /* Translate a packet coming "out" on an interface. */ 4015 /* ------------------------------------------------------------------------ */ 4016 int fr_natout(fin, nat, natadd, nflags) 4017 fr_info_t *fin; 4018 nat_t *nat; 4019 int natadd; 4020 u_32_t nflags; 4021 { 4022 icmphdr_t *icmp; 4023 u_short *csump; 4024 u_32_t sumd; 4025 tcphdr_t *tcp; 4026 ipnat_t *np; 4027 int i; 4028 ipf_stack_t *ifs = fin->fin_ifs; 4029 4030 if (fin->fin_v == 6) { 4031 #ifdef USE_INET6 4032 return fr_nat6out(fin, nat, natadd, nflags); 4033 #else 4034 return NULL; 4035 #endif 4036 } 4037 4038 #if SOLARIS && defined(_KERNEL) 4039 net_data_t net_data_p = ifs->ifs_ipf_ipv4; 4040 #endif 4041 4042 tcp = NULL; 4043 icmp = NULL; 4044 csump = NULL; 4045 np = nat->nat_ptr; 4046 4047 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4048 (void) fr_nat_newfrag(fin, 0, nat); 4049 4050 MUTEX_ENTER(&nat->nat_lock); 4051 nat->nat_bytes[1] += fin->fin_plen; 4052 nat->nat_pkts[1]++; 4053 MUTEX_EXIT(&nat->nat_lock); 4054 4055 /* 4056 * Fix up checksums, not by recalculating them, but 4057 * simply computing adjustments. 4058 * This is only done for STREAMS based IP implementations where the 4059 * checksum has already been calculated by IP. In all other cases, 4060 * IPFilter is called before the checksum needs calculating so there 4061 * is no call to modify whatever is in the header now. 4062 */ 4063 ASSERT(fin->fin_m != NULL); 4064 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 4065 if (nflags == IPN_ICMPERR) { 4066 u_32_t s1, s2; 4067 4068 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 4069 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 4070 CALC_SUMD(s1, s2, sumd); 4071 4072 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 4073 } 4074 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4075 defined(linux) || defined(BRIDGE_IPF) 4076 else { 4077 /* 4078 * Strictly speaking, this isn't necessary on BSD 4079 * kernels because they do checksum calculation after 4080 * this code has run BUT if ipfilter is being used 4081 * to do NAT as a bridge, that code doesn't exist. 4082 */ 4083 if (nat->nat_dir == NAT_OUTBOUND) 4084 fix_outcksum(&fin->fin_ip->ip_sum, 4085 nat->nat_ipsumd); 4086 else 4087 fix_incksum(&fin->fin_ip->ip_sum, 4088 nat->nat_ipsumd); 4089 } 4090 #endif 4091 } 4092 4093 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4094 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 4095 tcp = fin->fin_dp; 4096 4097 tcp->th_sport = nat->nat_outport; 4098 fin->fin_data[0] = ntohs(nat->nat_outport); 4099 } 4100 4101 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 4102 icmp = fin->fin_dp; 4103 icmp->icmp_id = nat->nat_outport; 4104 } 4105 4106 csump = nat_proto(fin, nat, nflags); 4107 } 4108 4109 fin->fin_ip->ip_src = nat->nat_outip; 4110 4111 nat_update(fin, nat, np); 4112 4113 /* 4114 * The above comments do not hold for layer 4 (or higher) checksums... 4115 */ 4116 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 4117 if (nflags & IPN_TCPUDP && 4118 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 4119 sumd = nat->nat_sumd[1]; 4120 else 4121 sumd = nat->nat_sumd[0]; 4122 4123 if (nat->nat_dir == NAT_OUTBOUND) 4124 fix_outcksum(csump, sumd); 4125 else 4126 fix_incksum(csump, sumd); 4127 } 4128 #ifdef IPFILTER_SYNC 4129 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4130 #endif 4131 /* ------------------------------------------------------------- */ 4132 /* A few quick notes: */ 4133 /* Following are test conditions prior to calling the */ 4134 /* appr_check routine. */ 4135 /* */ 4136 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4137 /* with a redirect rule, we attempt to match the packet's */ 4138 /* source port against in_dport, otherwise we'd compare the */ 4139 /* packet's destination. */ 4140 /* ------------------------------------------------------------- */ 4141 if ((np != NULL) && (np->in_apr != NULL)) { 4142 i = appr_check(fin, nat); 4143 if (i == 0) 4144 i = 1; 4145 } else 4146 i = 1; 4147 ifs->ifs_nat_stats.ns_mapped[1]++; 4148 fin->fin_flx |= FI_NATED; 4149 return i; 4150 } 4151 4152 4153 /* ------------------------------------------------------------------------ */ 4154 /* Function: fr_checknatin */ 4155 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4156 /* 0 == no packet translation occurred, */ 4157 /* 1 == packet was successfully translated. */ 4158 /* Parameters: fin(I) - pointer to packet information */ 4159 /* passp(I) - pointer to filtering result flags */ 4160 /* */ 4161 /* Check to see if an incoming packet should be changed. ICMP packets are */ 4162 /* first checked to see if they match an existing entry (if an error), */ 4163 /* otherwise a search of the current NAT table is made. If neither results */ 4164 /* in a match then a search for a matching NAT rule is made. Create a new */ 4165 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 4166 /* packet header(s) as required. */ 4167 /* ------------------------------------------------------------------------ */ 4168 int fr_checknatin(fin, passp) 4169 fr_info_t *fin; 4170 u_32_t *passp; 4171 { 4172 u_int nflags, natadd; 4173 ipnat_t *np, *npnext; 4174 int rval, natfailed; 4175 struct ifnet *ifp; 4176 struct in_addr in; 4177 icmphdr_t *icmp; 4178 tcphdr_t *tcp; 4179 u_short dport; 4180 nat_t *nat; 4181 u_32_t iph; 4182 ipf_stack_t *ifs = fin->fin_ifs; 4183 4184 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 4185 return 0; 4186 4187 tcp = NULL; 4188 icmp = NULL; 4189 dport = 0; 4190 natadd = 1; 4191 nflags = 0; 4192 natfailed = 0; 4193 ifp = fin->fin_ifp; 4194 4195 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4196 switch (fin->fin_p) 4197 { 4198 case IPPROTO_TCP : 4199 nflags = IPN_TCP; 4200 break; 4201 case IPPROTO_UDP : 4202 nflags = IPN_UDP; 4203 break; 4204 case IPPROTO_ICMP : 4205 icmp = fin->fin_dp; 4206 4207 /* 4208 * This is an incoming packet, so the destination is 4209 * the icmp_id and the source port equals 0 4210 */ 4211 if (nat_icmpquerytype4(icmp->icmp_type)) { 4212 nflags = IPN_ICMPQUERY; 4213 dport = icmp->icmp_id; 4214 } break; 4215 default : 4216 break; 4217 } 4218 4219 if ((nflags & IPN_TCPUDP)) { 4220 tcp = fin->fin_dp; 4221 dport = tcp->th_dport; 4222 } 4223 } 4224 4225 in = fin->fin_dst; 4226 4227 READ_ENTER(&ifs->ifs_ipf_nat); 4228 4229 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 4230 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 4231 /*EMPTY*/; 4232 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 4233 natadd = 0; 4234 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 4235 fin->fin_src, in))) { 4236 nflags = nat->nat_flags; 4237 } else { 4238 u_32_t hv, msk, rmsk; 4239 4240 rmsk = ifs->ifs_rdr_masks; 4241 msk = 0xffffffff; 4242 /* 4243 * If there is no current entry in the nat table for this IP#, 4244 * create one for it (if there is a matching rule). 4245 */ 4246 maskloop: 4247 iph = in.s_addr & htonl(msk); 4248 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 4249 for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) { 4250 npnext = np->in_rnext; 4251 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 4252 continue; 4253 if (np->in_v != fin->fin_v) 4254 continue; 4255 if (np->in_p && (np->in_p != fin->fin_p)) 4256 continue; 4257 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4258 continue; 4259 if (np->in_flags & IPN_FILTER) { 4260 if (!nat_match(fin, np)) 4261 continue; 4262 } else { 4263 if ((in.s_addr & np->in_outmsk) != np->in_outip) 4264 continue; 4265 if (np->in_pmin && 4266 ((ntohs(np->in_pmax) < ntohs(dport)) || 4267 (ntohs(dport) < ntohs(np->in_pmin)))) 4268 continue; 4269 } 4270 4271 if (*np->in_plabel != '\0') { 4272 if (!appr_ok(fin, tcp, np)) { 4273 continue; 4274 } 4275 } 4276 4277 ATOMIC_INC32(np->in_use); 4278 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4279 WRITE_ENTER(&ifs->ifs_ipf_nat); 4280 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4281 if (nat != NULL) { 4282 np->in_use--; 4283 np->in_hits++; 4284 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4285 break; 4286 } 4287 natfailed = -1; 4288 npnext = np->in_rnext; 4289 fr_ipnatderef(&np, ifs); 4290 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4291 } 4292 4293 if ((np == NULL) && (rmsk != 0)) { 4294 while (rmsk) { 4295 msk <<= 1; 4296 if (rmsk & 0x80000000) 4297 break; 4298 rmsk <<= 1; 4299 } 4300 if (rmsk != 0) { 4301 rmsk <<= 1; 4302 goto maskloop; 4303 } 4304 } 4305 } 4306 if (nat != NULL) { 4307 rval = fr_natin(fin, nat, natadd, nflags); 4308 if (rval == 1) { 4309 MUTEX_ENTER(&nat->nat_lock); 4310 nat->nat_ref++; 4311 MUTEX_EXIT(&nat->nat_lock); 4312 nat->nat_touched = ifs->ifs_fr_ticks; 4313 fin->fin_nat = nat; 4314 fin->fin_state = nat->nat_state; 4315 } 4316 } else 4317 rval = natfailed; 4318 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4319 4320 if (rval == -1) { 4321 if (passp != NULL) 4322 *passp = FR_BLOCK; 4323 fin->fin_flx |= FI_BADNAT; 4324 } 4325 return rval; 4326 } 4327 4328 4329 /* ------------------------------------------------------------------------ */ 4330 /* Function: fr_natin */ 4331 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4332 /* 1 == packet was successfully translated. */ 4333 /* Parameters: fin(I) - pointer to packet information */ 4334 /* nat(I) - pointer to NAT structure */ 4335 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4336 /* nflags(I) - NAT flags set for this packet */ 4337 /* Locks Held: ipf_nat (READ) */ 4338 /* */ 4339 /* Translate a packet coming "in" on an interface. */ 4340 /* ------------------------------------------------------------------------ */ 4341 int fr_natin(fin, nat, natadd, nflags) 4342 fr_info_t *fin; 4343 nat_t *nat; 4344 int natadd; 4345 u_32_t nflags; 4346 { 4347 icmphdr_t *icmp; 4348 u_short *csump; 4349 tcphdr_t *tcp; 4350 ipnat_t *np; 4351 int i; 4352 ipf_stack_t *ifs = fin->fin_ifs; 4353 4354 if (fin->fin_v == 6) { 4355 #ifdef USE_INET6 4356 return fr_nat6in(fin, nat, natadd, nflags); 4357 #else 4358 return NULL; 4359 #endif 4360 } 4361 4362 #if SOLARIS && defined(_KERNEL) 4363 net_data_t net_data_p = ifs->ifs_ipf_ipv4; 4364 #endif 4365 4366 tcp = NULL; 4367 csump = NULL; 4368 np = nat->nat_ptr; 4369 fin->fin_fr = nat->nat_fr; 4370 4371 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4372 (void) fr_nat_newfrag(fin, 0, nat); 4373 4374 if (np != NULL) { 4375 4376 /* ------------------------------------------------------------- */ 4377 /* A few quick notes: */ 4378 /* Following are test conditions prior to calling the */ 4379 /* appr_check routine. */ 4380 /* */ 4381 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4382 /* with a map rule, we attempt to match the packet's */ 4383 /* source port against in_dport, otherwise we'd compare the */ 4384 /* packet's destination. */ 4385 /* ------------------------------------------------------------- */ 4386 if (np->in_apr != NULL) { 4387 i = appr_check(fin, nat); 4388 if (i == -1) { 4389 return -1; 4390 } 4391 } 4392 } 4393 4394 #ifdef IPFILTER_SYNC 4395 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4396 #endif 4397 4398 MUTEX_ENTER(&nat->nat_lock); 4399 nat->nat_bytes[0] += fin->fin_plen; 4400 nat->nat_pkts[0]++; 4401 MUTEX_EXIT(&nat->nat_lock); 4402 4403 fin->fin_ip->ip_dst = nat->nat_inip; 4404 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4405 if (nflags & IPN_TCPUDP) 4406 tcp = fin->fin_dp; 4407 4408 /* 4409 * Fix up checksums, not by recalculating them, but 4410 * simply computing adjustments. 4411 * Why only do this for some platforms on inbound packets ? 4412 * Because for those that it is done, IP processing is yet to happen 4413 * and so the IPv4 header checksum has not yet been evaluated. 4414 * Perhaps it should always be done for the benefit of things like 4415 * fast forwarding (so that it doesn't need to be recomputed) but with 4416 * header checksum offloading, perhaps it is a moot point. 4417 */ 4418 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4419 defined(__osf__) || defined(linux) 4420 if (nat->nat_dir == NAT_OUTBOUND) 4421 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4422 else 4423 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4424 #endif 4425 4426 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4427 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4428 tcp->th_dport = nat->nat_inport; 4429 fin->fin_data[1] = ntohs(nat->nat_inport); 4430 } 4431 4432 4433 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4434 icmp = fin->fin_dp; 4435 4436 icmp->icmp_id = nat->nat_inport; 4437 } 4438 4439 csump = nat_proto(fin, nat, nflags); 4440 } 4441 4442 nat_update(fin, nat, np); 4443 4444 /* 4445 * In case they are being forwarded, inbound packets always need to have 4446 * their checksum adjusted even if hardware checksum validation said OK. 4447 */ 4448 if (csump != NULL) { 4449 if (nat->nat_dir == NAT_OUTBOUND) 4450 fix_incksum(csump, nat->nat_sumd[0]); 4451 else 4452 fix_outcksum(csump, nat->nat_sumd[0]); 4453 } 4454 4455 #if SOLARIS && defined(_KERNEL) 4456 if (nflags & IPN_TCPUDP && 4457 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4458 /* 4459 * Need to adjust the partial checksum result stored in 4460 * db_cksum16, which will be used for validation in IP. 4461 * See IP_CKSUM_RECV(). 4462 * Adjustment data should be the inverse of the IP address 4463 * changes, because db_cksum16 is supposed to be the complement 4464 * of the pesudo header. 4465 */ 4466 csump = &fin->fin_m->b_datap->db_cksum16; 4467 if (nat->nat_dir == NAT_OUTBOUND) 4468 fix_outcksum(csump, nat->nat_sumd[1]); 4469 else 4470 fix_incksum(csump, nat->nat_sumd[1]); 4471 } 4472 #endif 4473 4474 ifs->ifs_nat_stats.ns_mapped[0]++; 4475 fin->fin_flx |= FI_NATED; 4476 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4477 fin->fin_nattag = &np->in_tag; 4478 return 1; 4479 } 4480 4481 4482 /* ------------------------------------------------------------------------ */ 4483 /* Function: nat_proto */ 4484 /* Returns: u_short* - pointer to transport header checksum to update, */ 4485 /* NULL if the transport protocol is not recognised */ 4486 /* as needing a checksum update. */ 4487 /* Parameters: fin(I) - pointer to packet information */ 4488 /* nat(I) - pointer to NAT structure */ 4489 /* nflags(I) - NAT flags set for this packet */ 4490 /* */ 4491 /* Return the pointer to the checksum field for each protocol so understood.*/ 4492 /* If support for making other changes to a protocol header is required, */ 4493 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4494 /* TCP down to a specific value, then do it from here. */ 4495 /* ------------------------------------------------------------------------ */ 4496 u_short *nat_proto(fin, nat, nflags) 4497 fr_info_t *fin; 4498 nat_t *nat; 4499 u_int nflags; 4500 { 4501 icmphdr_t *icmp; 4502 struct icmp6_hdr *icmp6; 4503 u_short *csump; 4504 tcphdr_t *tcp; 4505 udphdr_t *udp; 4506 4507 csump = NULL; 4508 if (fin->fin_out == 0) { 4509 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4510 } else { 4511 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4512 } 4513 4514 switch (fin->fin_p) 4515 { 4516 case IPPROTO_TCP : 4517 tcp = fin->fin_dp; 4518 4519 csump = &tcp->th_sum; 4520 4521 /* 4522 * Do a MSS CLAMPING on a SYN packet, 4523 * only deal IPv4 for now. 4524 */ 4525 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4526 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4527 4528 break; 4529 4530 case IPPROTO_UDP : 4531 udp = fin->fin_dp; 4532 4533 if (udp->uh_sum) 4534 csump = &udp->uh_sum; 4535 break; 4536 4537 case IPPROTO_ICMP : 4538 icmp = fin->fin_dp; 4539 4540 if ((nflags & IPN_ICMPQUERY) != 0) { 4541 if (icmp->icmp_cksum != 0) 4542 csump = &icmp->icmp_cksum; 4543 } 4544 break; 4545 4546 case IPPROTO_ICMPV6 : 4547 icmp6 = fin->fin_dp; 4548 4549 if ((nflags & IPN_ICMPQUERY) != 0) { 4550 if (icmp6->icmp6_cksum != 0) 4551 csump = &icmp6->icmp6_cksum; 4552 } 4553 break; 4554 } 4555 return csump; 4556 } 4557 4558 4559 /* ------------------------------------------------------------------------ */ 4560 /* Function: fr_natunload */ 4561 /* Returns: Nil */ 4562 /* Parameters: Nil */ 4563 /* */ 4564 /* Free all memory used by NAT structures allocated at runtime. */ 4565 /* ------------------------------------------------------------------------ */ 4566 void fr_natunload(ifs) 4567 ipf_stack_t *ifs; 4568 { 4569 ipftq_t *ifq, *ifqnext; 4570 4571 (void) nat_clearlist(ifs); 4572 (void) nat_flushtable(ifs); 4573 4574 /* 4575 * Proxy timeout queues are not cleaned here because although they 4576 * exist on the NAT list, appr_unload is called after fr_natunload 4577 * and the proxies actually are responsible for them being created. 4578 * Should the proxy timeouts have their own list? There's no real 4579 * justification as this is the only complication. 4580 */ 4581 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4582 ifqnext = ifq->ifq_next; 4583 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4584 (fr_deletetimeoutqueue(ifq) == 0)) 4585 fr_freetimeoutqueue(ifq, ifs); 4586 } 4587 4588 if (ifs->ifs_nat_table[0] != NULL) { 4589 KFREES(ifs->ifs_nat_table[0], 4590 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4591 ifs->ifs_nat_table[0] = NULL; 4592 } 4593 if (ifs->ifs_nat_table[1] != NULL) { 4594 KFREES(ifs->ifs_nat_table[1], 4595 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4596 ifs->ifs_nat_table[1] = NULL; 4597 } 4598 if (ifs->ifs_nat_rules != NULL) { 4599 KFREES(ifs->ifs_nat_rules, 4600 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4601 ifs->ifs_nat_rules = NULL; 4602 } 4603 if (ifs->ifs_rdr_rules != NULL) { 4604 KFREES(ifs->ifs_rdr_rules, 4605 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4606 ifs->ifs_rdr_rules = NULL; 4607 } 4608 if (ifs->ifs_maptable != NULL) { 4609 KFREES(ifs->ifs_maptable, 4610 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4611 ifs->ifs_maptable = NULL; 4612 } 4613 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4614 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4615 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4616 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4617 } 4618 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4619 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4620 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4621 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4622 } 4623 4624 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4625 ifs->ifs_fr_nat_maxbucket = 0; 4626 4627 if (ifs->ifs_fr_nat_init == 1) { 4628 ifs->ifs_fr_nat_init = 0; 4629 fr_sttab_destroy(ifs->ifs_nat_tqb); 4630 4631 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4632 RW_DESTROY(&ifs->ifs_ipf_nat); 4633 4634 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4635 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4636 4637 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4638 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4639 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4640 } 4641 } 4642 4643 4644 /* ------------------------------------------------------------------------ */ 4645 /* Function: fr_natexpire */ 4646 /* Returns: Nil */ 4647 /* Parameters: Nil */ 4648 /* */ 4649 /* Check all of the timeout queues for entries at the top which need to be */ 4650 /* expired. */ 4651 /* ------------------------------------------------------------------------ */ 4652 void fr_natexpire(ifs) 4653 ipf_stack_t *ifs; 4654 { 4655 ipftq_t *ifq, *ifqnext; 4656 ipftqent_t *tqe, *tqn; 4657 int i; 4658 SPL_INT(s); 4659 4660 SPL_NET(s); 4661 WRITE_ENTER(&ifs->ifs_ipf_nat); 4662 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4663 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4664 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4665 break; 4666 tqn = tqe->tqe_next; 4667 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4668 } 4669 } 4670 4671 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4672 ifqnext = ifq->ifq_next; 4673 4674 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4675 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4676 break; 4677 tqn = tqe->tqe_next; 4678 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4679 } 4680 } 4681 4682 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4683 ifqnext = ifq->ifq_next; 4684 4685 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4686 (ifq->ifq_ref == 0)) { 4687 fr_freetimeoutqueue(ifq, ifs); 4688 } 4689 } 4690 4691 if (ifs->ifs_nat_doflush != 0) { 4692 (void) nat_extraflush(2, ifs); 4693 ifs->ifs_nat_doflush = 0; 4694 } 4695 4696 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4697 SPL_X(s); 4698 } 4699 4700 4701 /* ------------------------------------------------------------------------ */ 4702 /* Function: fr_nataddrsync */ 4703 /* Returns: Nil */ 4704 /* Parameters: ifp(I) - pointer to network interface */ 4705 /* addr(I) - pointer to new network address */ 4706 /* */ 4707 /* Walk through all of the currently active NAT sessions, looking for those */ 4708 /* which need to have their translated address updated (where the interface */ 4709 /* matches the one passed in) and change it, recalculating the checksum sum */ 4710 /* difference too. */ 4711 /* ------------------------------------------------------------------------ */ 4712 void fr_nataddrsync(v, ifp, addr, ifs) 4713 int v; 4714 void *ifp; 4715 void *addr; 4716 ipf_stack_t *ifs; 4717 { 4718 u_32_t sum1, sum2, sumd; 4719 nat_t *nat; 4720 ipnat_t *np; 4721 SPL_INT(s); 4722 4723 if (ifs->ifs_fr_running <= 0) 4724 return; 4725 4726 SPL_NET(s); 4727 WRITE_ENTER(&ifs->ifs_ipf_nat); 4728 4729 if (ifs->ifs_fr_running <= 0) { 4730 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4731 return; 4732 } 4733 4734 /* 4735 * Change IP addresses for NAT sessions for any protocol except TCP 4736 * since it will break the TCP connection anyway. The only rules 4737 * which will get changed are those which are "map ... -> 0/32", 4738 * where the rule specifies the address is taken from the interface. 4739 */ 4740 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4741 if (addr != NULL) { 4742 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4743 ((nat->nat_flags & IPN_TCP) != 0)) 4744 continue; 4745 if ((np = nat->nat_ptr) == NULL) 4746 continue; 4747 if (v == 4 && np->in_v == 4) { 4748 if (np->in_nip || np->in_outmsk != 0xffffffff) 4749 continue; 4750 /* 4751 * Change the map-to address to be the same as 4752 * the new one. 4753 */ 4754 sum1 = nat->nat_outip.s_addr; 4755 nat->nat_outip = *(struct in_addr *)addr; 4756 sum2 = nat->nat_outip.s_addr; 4757 } else if (v == 6 && np->in_v == 6) { 4758 if (!IP6_ISZERO(&np->in_next6.in6) || 4759 !IP6_ISONES(&np->in_out[1].in6)) 4760 continue; 4761 /* 4762 * Change the map-to address to be the same as 4763 * the new one. 4764 */ 4765 nat->nat_outip6.in6 = *(struct in6_addr *)addr; 4766 } else 4767 continue; 4768 4769 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4770 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) { 4771 if (np->in_v == 4 && (v == 4 || v == 0)) { 4772 struct in_addr in; 4773 if (np->in_outmsk != 0xffffffff || np->in_nip) 4774 continue; 4775 /* 4776 * Change the map-to address to be the same as 4777 * the new one. 4778 */ 4779 sum1 = nat->nat_outip.s_addr; 4780 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4781 &in, NULL, ifs) != -1) 4782 nat->nat_outip = in; 4783 sum2 = nat->nat_outip.s_addr; 4784 } else if (np->in_v == 6 && (v == 6 || v == 0)) { 4785 struct in6_addr in6; 4786 if (!IP6_ISZERO(&np->in_next6.in6) || 4787 !IP6_ISONES(&np->in_out[1].in6)) 4788 continue; 4789 /* 4790 * Change the map-to address to be the same as 4791 * the new one. 4792 */ 4793 if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0], 4794 (void *)&in6, NULL, ifs) != -1) 4795 nat->nat_outip6.in6 = in6; 4796 } else 4797 continue; 4798 } else { 4799 continue; 4800 } 4801 4802 if (sum1 == sum2) 4803 continue; 4804 /* 4805 * Readjust the checksum adjustment to take into 4806 * account the new IP#. 4807 */ 4808 CALC_SUMD(sum1, sum2, sumd); 4809 /* XXX - dont change for TCP when solaris does 4810 * hardware checksumming. 4811 */ 4812 sumd += nat->nat_sumd[0]; 4813 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4814 nat->nat_sumd[1] = nat->nat_sumd[0]; 4815 } 4816 4817 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4818 SPL_X(s); 4819 } 4820 4821 4822 /* ------------------------------------------------------------------------ */ 4823 /* Function: fr_natifpsync */ 4824 /* Returns: Nil */ 4825 /* Parameters: action(I) - how we are syncing */ 4826 /* ifp(I) - pointer to network interface */ 4827 /* name(I) - name of interface to sync to */ 4828 /* */ 4829 /* This function is used to resync the mapping of interface names and their */ 4830 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4831 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4832 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4833 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4834 /* there is no longer any interface associated with it. */ 4835 /* ------------------------------------------------------------------------ */ 4836 void fr_natifpsync(action, v, ifp, name, ifs) 4837 int action, v; 4838 void *ifp; 4839 char *name; 4840 ipf_stack_t *ifs; 4841 { 4842 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4843 int s; 4844 #endif 4845 nat_t *nat; 4846 ipnat_t *n; 4847 int nv; 4848 4849 if (ifs->ifs_fr_running <= 0) 4850 return; 4851 4852 SPL_NET(s); 4853 WRITE_ENTER(&ifs->ifs_ipf_nat); 4854 4855 if (ifs->ifs_fr_running <= 0) { 4856 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4857 return; 4858 } 4859 4860 switch (action) 4861 { 4862 case IPFSYNC_RESYNC : 4863 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4864 nv = (v == 0) ? nat->nat_v : v; 4865 if (nat->nat_v != nv) 4866 continue; 4867 if ((ifp == nat->nat_ifps[0]) || 4868 (nat->nat_ifps[0] == (void *)-1)) { 4869 nat->nat_ifps[0] = 4870 fr_resolvenic(nat->nat_ifnames[0], nv, ifs); 4871 } 4872 4873 if ((ifp == nat->nat_ifps[1]) || 4874 (nat->nat_ifps[1] == (void *)-1)) { 4875 nat->nat_ifps[1] = 4876 fr_resolvenic(nat->nat_ifnames[1], nv, ifs); 4877 } 4878 } 4879 4880 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4881 nv = (v == 0) ? (int)n->in_v : v; 4882 if ((int)n->in_v != nv) 4883 continue; 4884 if (n->in_ifps[0] == ifp || 4885 n->in_ifps[0] == (void *)-1) { 4886 n->in_ifps[0] = 4887 fr_resolvenic(n->in_ifnames[0], nv, ifs); 4888 } 4889 if (n->in_ifps[1] == ifp || 4890 n->in_ifps[1] == (void *)-1) { 4891 n->in_ifps[1] = 4892 fr_resolvenic(n->in_ifnames[1], nv, ifs); 4893 } 4894 } 4895 break; 4896 case IPFSYNC_NEWIFP : 4897 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4898 if (nat->nat_v != v) 4899 continue; 4900 if (!strncmp(name, nat->nat_ifnames[0], 4901 sizeof(nat->nat_ifnames[0]))) 4902 nat->nat_ifps[0] = ifp; 4903 if (!strncmp(name, nat->nat_ifnames[1], 4904 sizeof(nat->nat_ifnames[1]))) 4905 nat->nat_ifps[1] = ifp; 4906 } 4907 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4908 if ((int)n->in_v != v) 4909 continue; 4910 if (!strncmp(name, n->in_ifnames[0], 4911 sizeof(n->in_ifnames[0]))) 4912 n->in_ifps[0] = ifp; 4913 if (!strncmp(name, n->in_ifnames[1], 4914 sizeof(n->in_ifnames[1]))) 4915 n->in_ifps[1] = ifp; 4916 } 4917 break; 4918 case IPFSYNC_OLDIFP : 4919 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4920 if (nat->nat_v != v) 4921 continue; 4922 if (ifp == nat->nat_ifps[0]) 4923 nat->nat_ifps[0] = (void *)-1; 4924 if (ifp == nat->nat_ifps[1]) 4925 nat->nat_ifps[1] = (void *)-1; 4926 } 4927 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4928 if ((int)n->in_v != v) 4929 continue; 4930 if (n->in_ifps[0] == ifp) 4931 n->in_ifps[0] = (void *)-1; 4932 if (n->in_ifps[1] == ifp) 4933 n->in_ifps[1] = (void *)-1; 4934 } 4935 break; 4936 } 4937 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4938 SPL_X(s); 4939 } 4940 4941 4942 /* ------------------------------------------------------------------------ */ 4943 /* Function: nat_icmpquerytype4 */ 4944 /* Returns: int - 1 == success, 0 == failure */ 4945 /* Parameters: icmptype(I) - ICMP type number */ 4946 /* */ 4947 /* Tests to see if the ICMP type number passed is a query/response type or */ 4948 /* not. */ 4949 /* ------------------------------------------------------------------------ */ 4950 static INLINE int nat_icmpquerytype4(icmptype) 4951 int icmptype; 4952 { 4953 4954 /* 4955 * For the ICMP query NAT code, it is essential that both the query 4956 * and the reply match on the NAT rule. Because the NAT structure 4957 * does not keep track of the icmptype, and a single NAT structure 4958 * is used for all icmp types with the same src, dest and id, we 4959 * simply define the replies as queries as well. The funny thing is, 4960 * altough it seems silly to call a reply a query, this is exactly 4961 * as it is defined in the IPv4 specification 4962 */ 4963 4964 switch (icmptype) 4965 { 4966 4967 case ICMP_ECHOREPLY: 4968 case ICMP_ECHO: 4969 /* route aedvertisement/solliciation is currently unsupported: */ 4970 /* it would require rewriting the ICMP data section */ 4971 case ICMP_TSTAMP: 4972 case ICMP_TSTAMPREPLY: 4973 case ICMP_IREQ: 4974 case ICMP_IREQREPLY: 4975 case ICMP_MASKREQ: 4976 case ICMP_MASKREPLY: 4977 return 1; 4978 default: 4979 return 0; 4980 } 4981 } 4982 4983 4984 /* ------------------------------------------------------------------------ */ 4985 /* Function: nat_log */ 4986 /* Returns: Nil */ 4987 /* Parameters: nat(I) - pointer to NAT structure */ 4988 /* type(I) - type of log entry to create */ 4989 /* */ 4990 /* Creates a NAT log entry. */ 4991 /* ------------------------------------------------------------------------ */ 4992 void nat_log(nat, type, ifs) 4993 struct nat *nat; 4994 u_int type; 4995 ipf_stack_t *ifs; 4996 { 4997 #ifdef IPFILTER_LOG 4998 # ifndef LARGE_NAT 4999 struct ipnat *np; 5000 int rulen; 5001 # endif 5002 struct natlog natl; 5003 void *items[1]; 5004 size_t sizes[1]; 5005 int types[1]; 5006 5007 natl.nlg_inip = nat->nat_inip6; 5008 natl.nlg_outip = nat->nat_outip6; 5009 natl.nlg_origip = nat->nat_oip6; 5010 natl.nlg_bytes[0] = nat->nat_bytes[0]; 5011 natl.nlg_bytes[1] = nat->nat_bytes[1]; 5012 natl.nlg_pkts[0] = nat->nat_pkts[0]; 5013 natl.nlg_pkts[1] = nat->nat_pkts[1]; 5014 natl.nlg_origport = nat->nat_oport; 5015 natl.nlg_inport = nat->nat_inport; 5016 natl.nlg_outport = nat->nat_outport; 5017 natl.nlg_p = nat->nat_p; 5018 natl.nlg_type = type; 5019 natl.nlg_rule = -1; 5020 natl.nlg_v = nat->nat_v; 5021 # ifndef LARGE_NAT 5022 if (nat->nat_ptr != NULL) { 5023 for (rulen = 0, np = ifs->ifs_nat_list; np; 5024 np = np->in_next, rulen++) 5025 if (np == nat->nat_ptr) { 5026 natl.nlg_rule = rulen; 5027 break; 5028 } 5029 } 5030 # endif 5031 items[0] = &natl; 5032 sizes[0] = sizeof(natl); 5033 types[0] = 0; 5034 5035 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 5036 #endif 5037 } 5038 5039 5040 #if defined(__OpenBSD__) 5041 /* ------------------------------------------------------------------------ */ 5042 /* Function: nat_ifdetach */ 5043 /* Returns: Nil */ 5044 /* Parameters: ifp(I) - pointer to network interface */ 5045 /* */ 5046 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 5047 /* interface references within IPFilter. */ 5048 /* ------------------------------------------------------------------------ */ 5049 void nat_ifdetach(ifp, ifs) 5050 void *ifp; 5051 ipf_stack_t *ifs; 5052 { 5053 frsync(ifp, ifs); 5054 return; 5055 } 5056 #endif 5057 5058 5059 /* ------------------------------------------------------------------------ */ 5060 /* Function: fr_ipnatderef */ 5061 /* Returns: Nil */ 5062 /* Parameters: inp(I) - pointer to pointer to NAT rule */ 5063 /* Write Locks: ipf_nat */ 5064 /* */ 5065 /* ------------------------------------------------------------------------ */ 5066 void fr_ipnatderef(inp, ifs) 5067 ipnat_t **inp; 5068 ipf_stack_t *ifs; 5069 { 5070 ipnat_t *in; 5071 5072 in = *inp; 5073 *inp = NULL; 5074 in->in_use--; 5075 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 5076 if (in->in_apr) 5077 appr_free(in->in_apr); 5078 KFREE(in); 5079 ifs->ifs_nat_stats.ns_rules--; 5080 #ifdef notdef 5081 #if SOLARIS 5082 if (ifs->ifs_nat_stats.ns_rules == 0) 5083 ifs->ifs_pfil_delayed_copy = 1; 5084 #endif 5085 #endif 5086 } 5087 } 5088 5089 5090 /* ------------------------------------------------------------------------ */ 5091 /* Function: fr_natderef */ 5092 /* Returns: Nil */ 5093 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 5094 /* */ 5095 /* Decrement the reference counter for this NAT table entry and free it if */ 5096 /* there are no more things using it. */ 5097 /* */ 5098 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 5099 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 5100 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 5101 /* because nat_delete() will do that and send nat_ref to -1. */ 5102 /* */ 5103 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 5104 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 5105 /* ------------------------------------------------------------------------ */ 5106 void fr_natderef(natp, ifs) 5107 nat_t **natp; 5108 ipf_stack_t *ifs; 5109 { 5110 nat_t *nat; 5111 5112 nat = *natp; 5113 *natp = NULL; 5114 5115 MUTEX_ENTER(&nat->nat_lock); 5116 if (nat->nat_ref > 1) { 5117 nat->nat_ref--; 5118 MUTEX_EXIT(&nat->nat_lock); 5119 return; 5120 } 5121 MUTEX_EXIT(&nat->nat_lock); 5122 5123 WRITE_ENTER(&ifs->ifs_ipf_nat); 5124 nat_delete(nat, NL_EXPIRE, ifs); 5125 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5126 } 5127 5128 5129 /* ------------------------------------------------------------------------ */ 5130 /* Function: fr_natclone */ 5131 /* Returns: ipstate_t* - NULL == cloning failed, */ 5132 /* else pointer to new state structure */ 5133 /* Parameters: fin(I) - pointer to packet information */ 5134 /* is(I) - pointer to master state structure */ 5135 /* Write Lock: ipf_nat */ 5136 /* */ 5137 /* Create a "duplcate" state table entry from the master. */ 5138 /* ------------------------------------------------------------------------ */ 5139 nat_t *fr_natclone(fin, nat) 5140 fr_info_t *fin; 5141 nat_t *nat; 5142 { 5143 frentry_t *fr; 5144 nat_t *clone; 5145 ipnat_t *np; 5146 ipf_stack_t *ifs = fin->fin_ifs; 5147 5148 KMALLOC(clone, nat_t *); 5149 if (clone == NULL) 5150 return NULL; 5151 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 5152 5153 MUTEX_NUKE(&clone->nat_lock); 5154 5155 clone->nat_aps = NULL; 5156 /* 5157 * Initialize all these so that nat_delete() doesn't cause a crash. 5158 */ 5159 clone->nat_tqe.tqe_pnext = NULL; 5160 clone->nat_tqe.tqe_next = NULL; 5161 clone->nat_tqe.tqe_ifq = NULL; 5162 clone->nat_tqe.tqe_parent = clone; 5163 5164 clone->nat_flags &= ~SI_CLONE; 5165 clone->nat_flags |= SI_CLONED; 5166 5167 if (clone->nat_hm) 5168 clone->nat_hm->hm_ref++; 5169 5170 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 5171 KFREE(clone); 5172 return NULL; 5173 } 5174 np = clone->nat_ptr; 5175 if (np != NULL) { 5176 if (ifs->ifs_nat_logging) 5177 nat_log(clone, (u_int)np->in_redir, ifs); 5178 np->in_use++; 5179 } 5180 fr = clone->nat_fr; 5181 if (fr != NULL) { 5182 MUTEX_ENTER(&fr->fr_lock); 5183 fr->fr_ref++; 5184 MUTEX_EXIT(&fr->fr_lock); 5185 } 5186 5187 /* 5188 * Because the clone is created outside the normal loop of things and 5189 * TCP has special needs in terms of state, initialise the timeout 5190 * state of the new NAT from here. 5191 */ 5192 if (clone->nat_p == IPPROTO_TCP) { 5193 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 5194 clone->nat_flags); 5195 } 5196 #ifdef IPFILTER_SYNC 5197 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 5198 #endif 5199 if (ifs->ifs_nat_logging) 5200 nat_log(clone, NL_CLONE, ifs); 5201 return clone; 5202 } 5203 5204 5205 /* ------------------------------------------------------------------------ */ 5206 /* Function: nat_wildok */ 5207 /* Returns: int - 1 == packet's ports match wildcards */ 5208 /* 0 == packet's ports don't match wildcards */ 5209 /* Parameters: nat(I) - NAT entry */ 5210 /* sport(I) - source port */ 5211 /* dport(I) - destination port */ 5212 /* flags(I) - wildcard flags */ 5213 /* dir(I) - packet direction */ 5214 /* */ 5215 /* Use NAT entry and packet direction to determine which combination of */ 5216 /* wildcard flags should be used. */ 5217 /* ------------------------------------------------------------------------ */ 5218 int nat_wildok(nat, sport, dport, flags, dir) 5219 nat_t *nat; 5220 int sport; 5221 int dport; 5222 int flags; 5223 int dir; 5224 { 5225 /* 5226 * When called by dir is set to 5227 * nat_inlookup NAT_INBOUND (0) 5228 * nat_outlookup NAT_OUTBOUND (1) 5229 * 5230 * We simply combine the packet's direction in dir with the original 5231 * "intended" direction of that NAT entry in nat->nat_dir to decide 5232 * which combination of wildcard flags to allow. 5233 */ 5234 5235 switch ((dir << 1) | nat->nat_dir) 5236 { 5237 case 3: /* outbound packet / outbound entry */ 5238 if (((nat->nat_inport == sport) || 5239 (flags & SI_W_SPORT)) && 5240 ((nat->nat_oport == dport) || 5241 (flags & SI_W_DPORT))) 5242 return 1; 5243 break; 5244 case 2: /* outbound packet / inbound entry */ 5245 if (((nat->nat_outport == sport) || 5246 (flags & SI_W_DPORT)) && 5247 ((nat->nat_oport == dport) || 5248 (flags & SI_W_SPORT))) 5249 return 1; 5250 break; 5251 case 1: /* inbound packet / outbound entry */ 5252 if (((nat->nat_oport == sport) || 5253 (flags & SI_W_DPORT)) && 5254 ((nat->nat_outport == dport) || 5255 (flags & SI_W_SPORT))) 5256 return 1; 5257 break; 5258 case 0: /* inbound packet / inbound entry */ 5259 if (((nat->nat_oport == sport) || 5260 (flags & SI_W_SPORT)) && 5261 ((nat->nat_outport == dport) || 5262 (flags & SI_W_DPORT))) 5263 return 1; 5264 break; 5265 default: 5266 break; 5267 } 5268 5269 return(0); 5270 } 5271 5272 5273 /* ------------------------------------------------------------------------ */ 5274 /* Function: nat_mssclamp */ 5275 /* Returns: Nil */ 5276 /* Parameters: tcp(I) - pointer to TCP header */ 5277 /* maxmss(I) - value to clamp the TCP MSS to */ 5278 /* csump(I) - pointer to TCP checksum */ 5279 /* */ 5280 /* Check for MSS option and clamp it if necessary. If found and changed, */ 5281 /* then the TCP header checksum will be updated to reflect the change in */ 5282 /* the MSS. */ 5283 /* ------------------------------------------------------------------------ */ 5284 static void nat_mssclamp(tcp, maxmss, csump) 5285 tcphdr_t *tcp; 5286 u_32_t maxmss; 5287 u_short *csump; 5288 { 5289 u_char *cp, *ep, opt; 5290 int hlen, advance; 5291 u_32_t mss, sumd; 5292 5293 hlen = TCP_OFF(tcp) << 2; 5294 if (hlen > sizeof(*tcp)) { 5295 cp = (u_char *)tcp + sizeof(*tcp); 5296 ep = (u_char *)tcp + hlen; 5297 5298 while (cp < ep) { 5299 opt = cp[0]; 5300 if (opt == TCPOPT_EOL) 5301 break; 5302 else if (opt == TCPOPT_NOP) { 5303 cp++; 5304 continue; 5305 } 5306 5307 if (cp + 1 >= ep) 5308 break; 5309 advance = cp[1]; 5310 if ((cp + advance > ep) || (advance <= 0)) 5311 break; 5312 switch (opt) 5313 { 5314 case TCPOPT_MAXSEG: 5315 if (advance != 4) 5316 break; 5317 mss = cp[2] * 256 + cp[3]; 5318 if (mss > maxmss) { 5319 cp[2] = maxmss / 256; 5320 cp[3] = maxmss & 0xff; 5321 CALC_SUMD(mss, maxmss, sumd); 5322 fix_outcksum(csump, sumd); 5323 } 5324 break; 5325 default: 5326 /* ignore unknown options */ 5327 break; 5328 } 5329 5330 cp += advance; 5331 } 5332 } 5333 } 5334 5335 5336 /* ------------------------------------------------------------------------ */ 5337 /* Function: fr_setnatqueue */ 5338 /* Returns: Nil */ 5339 /* Parameters: nat(I)- pointer to NAT structure */ 5340 /* rev(I) - forward(0) or reverse(1) direction */ 5341 /* Locks: ipf_nat (read or write) */ 5342 /* */ 5343 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5344 /* determining which queue it should be placed on. */ 5345 /* ------------------------------------------------------------------------ */ 5346 void fr_setnatqueue(nat, rev, ifs) 5347 nat_t *nat; 5348 int rev; 5349 ipf_stack_t *ifs; 5350 { 5351 ipftq_t *oifq, *nifq; 5352 5353 if (nat->nat_ptr != NULL) 5354 nifq = nat->nat_ptr->in_tqehead[rev]; 5355 else 5356 nifq = NULL; 5357 5358 if (nifq == NULL) { 5359 switch (nat->nat_p) 5360 { 5361 case IPPROTO_UDP : 5362 nifq = &ifs->ifs_nat_udptq; 5363 break; 5364 case IPPROTO_ICMP : 5365 nifq = &ifs->ifs_nat_icmptq; 5366 break; 5367 case IPPROTO_TCP : 5368 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5369 break; 5370 default : 5371 nifq = &ifs->ifs_nat_iptq; 5372 break; 5373 } 5374 } 5375 5376 oifq = nat->nat_tqe.tqe_ifq; 5377 /* 5378 * If it's currently on a timeout queue, move it from one queue to 5379 * another, else put it on the end of the newly determined queue. 5380 */ 5381 if (oifq != NULL) 5382 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5383 else 5384 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5385 return; 5386 } 5387 5388 /* ------------------------------------------------------------------------ */ 5389 /* Function: nat_getnext */ 5390 /* Returns: int - 0 == ok, else error */ 5391 /* Parameters: t(I) - pointer to ipftoken structure */ 5392 /* itp(I) - pointer to ipfgeniter_t structure */ 5393 /* ifs - ipf stack instance */ 5394 /* */ 5395 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5396 /* and copy it out to the storage space pointed to by itp. The next item */ 5397 /* in the list to look at is put back in the ipftoken struture. */ 5398 /* ------------------------------------------------------------------------ */ 5399 static int nat_getnext(t, itp, ifs) 5400 ipftoken_t *t; 5401 ipfgeniter_t *itp; 5402 ipf_stack_t *ifs; 5403 { 5404 hostmap_t *hm, *nexthm = NULL, zerohm; 5405 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5406 nat_t *nat, *nextnat = NULL, zeronat; 5407 int error = 0, count; 5408 char *dst; 5409 5410 if (itp->igi_nitems == 0) 5411 return EINVAL; 5412 5413 READ_ENTER(&ifs->ifs_ipf_nat); 5414 5415 /* 5416 * Get "previous" entry from the token and find the next entry. 5417 */ 5418 switch (itp->igi_type) 5419 { 5420 case IPFGENITER_HOSTMAP : 5421 hm = t->ipt_data; 5422 if (hm == NULL) { 5423 nexthm = ifs->ifs_ipf_hm_maplist; 5424 } else { 5425 nexthm = hm->hm_next; 5426 } 5427 break; 5428 5429 case IPFGENITER_IPNAT : 5430 ipn = t->ipt_data; 5431 if (ipn == NULL) { 5432 nextipnat = ifs->ifs_nat_list; 5433 } else { 5434 nextipnat = ipn->in_next; 5435 } 5436 break; 5437 5438 case IPFGENITER_NAT : 5439 nat = t->ipt_data; 5440 if (nat == NULL) { 5441 nextnat = ifs->ifs_nat_instances; 5442 } else { 5443 nextnat = nat->nat_next; 5444 } 5445 break; 5446 default : 5447 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5448 return EINVAL; 5449 } 5450 5451 dst = itp->igi_data; 5452 for (count = itp->igi_nitems; count > 0; count--) { 5453 /* 5454 * If we found an entry, add a reference to it and update the token. 5455 * Otherwise, zero out data to be returned and NULL out token. 5456 */ 5457 switch (itp->igi_type) 5458 { 5459 case IPFGENITER_HOSTMAP : 5460 if (nexthm != NULL) { 5461 ATOMIC_INC32(nexthm->hm_ref); 5462 t->ipt_data = nexthm; 5463 } else { 5464 bzero(&zerohm, sizeof(zerohm)); 5465 nexthm = &zerohm; 5466 t->ipt_data = NULL; 5467 } 5468 break; 5469 case IPFGENITER_IPNAT : 5470 if (nextipnat != NULL) { 5471 ATOMIC_INC32(nextipnat->in_use); 5472 t->ipt_data = nextipnat; 5473 } else { 5474 bzero(&zeroipn, sizeof(zeroipn)); 5475 nextipnat = &zeroipn; 5476 t->ipt_data = NULL; 5477 } 5478 break; 5479 case IPFGENITER_NAT : 5480 if (nextnat != NULL) { 5481 MUTEX_ENTER(&nextnat->nat_lock); 5482 nextnat->nat_ref++; 5483 MUTEX_EXIT(&nextnat->nat_lock); 5484 t->ipt_data = nextnat; 5485 } else { 5486 bzero(&zeronat, sizeof(zeronat)); 5487 nextnat = &zeronat; 5488 t->ipt_data = NULL; 5489 } 5490 break; 5491 default : 5492 break; 5493 } 5494 5495 /* 5496 * Now that we have ref, it's save to give up lock. 5497 */ 5498 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5499 5500 /* 5501 * Copy out data and clean up references and token as needed. 5502 */ 5503 switch (itp->igi_type) 5504 { 5505 case IPFGENITER_HOSTMAP : 5506 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5507 if (error != 0) 5508 error = EFAULT; 5509 if (t->ipt_data == NULL) { 5510 ipf_freetoken(t, ifs); 5511 break; 5512 } else { 5513 if (hm != NULL) { 5514 WRITE_ENTER(&ifs->ifs_ipf_nat); 5515 fr_hostmapdel(&hm); 5516 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5517 } 5518 if (nexthm->hm_next == NULL) { 5519 ipf_freetoken(t, ifs); 5520 break; 5521 } 5522 dst += sizeof(*nexthm); 5523 hm = nexthm; 5524 nexthm = nexthm->hm_next; 5525 } 5526 break; 5527 5528 case IPFGENITER_IPNAT : 5529 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5530 if (error != 0) 5531 error = EFAULT; 5532 if (t->ipt_data == NULL) { 5533 ipf_freetoken(t, ifs); 5534 break; 5535 } else { 5536 if (ipn != NULL) { 5537 WRITE_ENTER(&ifs->ifs_ipf_nat); 5538 fr_ipnatderef(&ipn, ifs); 5539 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5540 } 5541 if (nextipnat->in_next == NULL) { 5542 ipf_freetoken(t, ifs); 5543 break; 5544 } 5545 dst += sizeof(*nextipnat); 5546 ipn = nextipnat; 5547 nextipnat = nextipnat->in_next; 5548 } 5549 break; 5550 5551 case IPFGENITER_NAT : 5552 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5553 if (error != 0) 5554 error = EFAULT; 5555 if (t->ipt_data == NULL) { 5556 ipf_freetoken(t, ifs); 5557 break; 5558 } else { 5559 if (nat != NULL) 5560 fr_natderef(&nat, ifs); 5561 if (nextnat->nat_next == NULL) { 5562 ipf_freetoken(t, ifs); 5563 break; 5564 } 5565 dst += sizeof(*nextnat); 5566 nat = nextnat; 5567 nextnat = nextnat->nat_next; 5568 } 5569 break; 5570 default : 5571 break; 5572 } 5573 5574 if ((count == 1) || (error != 0)) 5575 break; 5576 5577 READ_ENTER(&ifs->ifs_ipf_nat); 5578 } 5579 5580 return error; 5581 } 5582 5583 5584 /* ------------------------------------------------------------------------ */ 5585 /* Function: nat_iterator */ 5586 /* Returns: int - 0 == ok, else error */ 5587 /* Parameters: token(I) - pointer to ipftoken structure */ 5588 /* itp(I) - pointer to ipfgeniter_t structure */ 5589 /* */ 5590 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5591 /* generic structure to iterate through a list. There are three different */ 5592 /* linked lists of NAT related information to go through: NAT rules, active */ 5593 /* NAT mappings and the NAT fragment cache. */ 5594 /* ------------------------------------------------------------------------ */ 5595 static int nat_iterator(token, itp, ifs) 5596 ipftoken_t *token; 5597 ipfgeniter_t *itp; 5598 ipf_stack_t *ifs; 5599 { 5600 int error; 5601 5602 if (itp->igi_data == NULL) 5603 return EFAULT; 5604 5605 token->ipt_subtype = itp->igi_type; 5606 5607 switch (itp->igi_type) 5608 { 5609 case IPFGENITER_HOSTMAP : 5610 case IPFGENITER_IPNAT : 5611 case IPFGENITER_NAT : 5612 error = nat_getnext(token, itp, ifs); 5613 break; 5614 case IPFGENITER_NATFRAG : 5615 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5616 &ifs->ifs_ipfr_nattail, 5617 &ifs->ifs_ipf_natfrag, ifs); 5618 break; 5619 default : 5620 error = EINVAL; 5621 break; 5622 } 5623 5624 return error; 5625 } 5626 5627 5628 /* -------------------------------------------------------------------- */ 5629 /* Function: nat_earlydrop */ 5630 /* Returns: number of dropped/removed entries from the queue */ 5631 /* Parameters: ifq - pointer to queue with entries to be processed */ 5632 /* maxidle - entry must be idle this long to be dropped */ 5633 /* ifs - ipf stack instance */ 5634 /* */ 5635 /* Function is invoked from nat_extraflush() only. Removes entries */ 5636 /* form specified timeout queue, based on how long they've sat idle, */ 5637 /* without waiting for it to happen on its own. */ 5638 /* -------------------------------------------------------------------- */ 5639 static int nat_earlydrop(ifq, maxidle, ifs) 5640 ipftq_t *ifq; 5641 int maxidle; 5642 ipf_stack_t *ifs; 5643 { 5644 ipftqent_t *tqe, *tqn; 5645 nat_t *nat; 5646 unsigned int dropped; 5647 int droptick; 5648 5649 if (ifq == NULL) 5650 return (0); 5651 5652 dropped = 0; 5653 5654 /* 5655 * Determine the tick representing the idle time we're interested 5656 * in. If an entry exists in the queue, and it was touched before 5657 * that tick, then it's been idle longer than maxidle ... remove it. 5658 */ 5659 droptick = ifs->ifs_fr_ticks - maxidle; 5660 tqn = ifq->ifq_head; 5661 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5662 tqn = tqe->tqe_next; 5663 nat = tqe->tqe_parent; 5664 nat_delete(nat, ISL_EXPIRE, ifs); 5665 dropped++; 5666 } 5667 return (dropped); 5668 } 5669 5670 5671 /* --------------------------------------------------------------------- */ 5672 /* Function: nat_flushclosing */ 5673 /* Returns: int - number of NAT entries deleted */ 5674 /* Parameters: stateval(I) - State at which to start removing entries */ 5675 /* ifs - ipf stack instance */ 5676 /* */ 5677 /* Remove nat table entries for TCP connections which are in the process */ 5678 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5679 /* --------------------------------------------------------------------- */ 5680 static int nat_flushclosing(stateval, ifs) 5681 int stateval; 5682 ipf_stack_t *ifs; 5683 { 5684 ipftq_t *ifq, *ifqn; 5685 ipftqent_t *tqe, *tqn; 5686 nat_t *nat; 5687 int dropped; 5688 5689 dropped = 0; 5690 5691 /* 5692 * Start by deleting any entries in specific timeout queues. 5693 */ 5694 ifqn = &ifs->ifs_nat_tqb[stateval]; 5695 while ((ifq = ifqn) != NULL) { 5696 ifqn = ifq->ifq_next; 5697 dropped += nat_earlydrop(ifq, (int)0, ifs); 5698 } 5699 5700 /* 5701 * Next, look through user defined queues for closing entries. 5702 */ 5703 ifqn = ifs->ifs_nat_utqe; 5704 while ((ifq = ifqn) != NULL) { 5705 ifqn = ifq->ifq_next; 5706 tqn = ifq->ifq_head; 5707 while ((tqe = tqn) != NULL) { 5708 tqn = tqe->tqe_next; 5709 nat = tqe->tqe_parent; 5710 if (nat->nat_p != IPPROTO_TCP) 5711 continue; 5712 if ((nat->nat_tcpstate[0] >= stateval) && 5713 (nat->nat_tcpstate[1] >= stateval)) { 5714 nat_delete(nat, NL_EXPIRE, ifs); 5715 dropped++; 5716 } 5717 } 5718 } 5719 return (dropped); 5720 } 5721 5722 5723 /* --------------------------------------------------------------------- */ 5724 /* Function: nat_extraflush */ 5725 /* Returns: int - number of NAT entries deleted */ 5726 /* Parameters: which(I) - how to flush the active NAT table */ 5727 /* ifs - ipf stack instance */ 5728 /* Write Locks: ipf_nat */ 5729 /* */ 5730 /* Flush nat tables. Three actions currently defined: */ 5731 /* */ 5732 /* which == 0 : Flush all nat table entries. */ 5733 /* */ 5734 /* which == 1 : Flush entries with TCP connections which have started */ 5735 /* to close on both ends. */ 5736 /* */ 5737 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5738 /* does not take us below specified threshold in the table, */ 5739 /* we want to flush entries with TCP connections which have */ 5740 /* been idle for a long time. Start with connections idle */ 5741 /* over 12 hours, and then work backwards in half hour */ 5742 /* increments to at most 30 minutes idle, and finally work */ 5743 /* back in 30 second increments to at most 30 seconds. */ 5744 /* --------------------------------------------------------------------- */ 5745 static int nat_extraflush(which, ifs) 5746 int which; 5747 ipf_stack_t *ifs; 5748 { 5749 ipftq_t *ifq, *ifqn; 5750 nat_t *nat, **natp; 5751 int idletime, removed, idle_idx; 5752 SPL_INT(s); 5753 5754 removed = 0; 5755 5756 SPL_NET(s); 5757 switch (which) 5758 { 5759 case 0: 5760 natp = &ifs->ifs_nat_instances; 5761 while ((nat = *natp) != NULL) { 5762 natp = &nat->nat_next; 5763 nat_delete(nat, ISL_FLUSH, ifs); 5764 removed++; 5765 } 5766 break; 5767 5768 case 1: 5769 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5770 break; 5771 5772 case 2: 5773 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5774 5775 /* 5776 * Be sure we haven't done this in the last 10 seconds. 5777 */ 5778 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5779 IPF_TTLVAL(10)) 5780 break; 5781 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5782 5783 /* 5784 * Determine initial threshold for minimum idle time based on 5785 * how long ipfilter has been running. Ipfilter needs to have 5786 * been up as long as the smallest interval to continue on. 5787 * 5788 * Minimum idle times stored in idletime_tab and indexed by 5789 * idle_idx. Start at upper end of array and work backwards. 5790 * 5791 * Once the index is found, set the initial idle time to the 5792 * first interval before the current ipfilter run time. 5793 */ 5794 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5795 break; /* switch */ 5796 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5797 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5798 idletime = idletime_tab[idle_idx]; 5799 } else { 5800 while ((idle_idx > 0) && 5801 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5802 idle_idx--; 5803 idletime = (ifs->ifs_fr_ticks / 5804 idletime_tab[idle_idx]) * 5805 idletime_tab[idle_idx]; 5806 } 5807 5808 while ((idle_idx >= 0) && 5809 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5810 /* 5811 * Start with appropriate timeout queue. 5812 */ 5813 removed += nat_earlydrop( 5814 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5815 idletime, ifs); 5816 5817 /* 5818 * Make sure we haven't already deleted enough 5819 * entries before checking the user defined queues. 5820 */ 5821 if (NAT_TAB_WATER_LEVEL(ifs) <= 5822 ifs->ifs_nat_flush_lvl_lo) 5823 break; 5824 5825 /* 5826 * Next, look through the user defined queues. 5827 */ 5828 ifqn = ifs->ifs_nat_utqe; 5829 while ((ifq = ifqn) != NULL) { 5830 ifqn = ifq->ifq_next; 5831 removed += nat_earlydrop(ifq, idletime, ifs); 5832 } 5833 5834 /* 5835 * Adjust the granularity of idle time. 5836 * 5837 * If we reach an interval boundary, we need to 5838 * either adjust the idle time accordingly or exit 5839 * the loop altogether (if this is very last check). 5840 */ 5841 idletime -= idletime_tab[idle_idx]; 5842 if (idletime < idletime_tab[idle_idx]) { 5843 if (idle_idx != 0) { 5844 idletime = idletime_tab[idle_idx] - 5845 idletime_tab[idle_idx - 1]; 5846 idle_idx--; 5847 } else { 5848 break; /* while */ 5849 } 5850 } 5851 } 5852 break; 5853 default: 5854 break; 5855 } 5856 5857 SPL_X(s); 5858 return (removed); 5859 } 5860