1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI"$ 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/time.h> 22 #include <sys/file.h> 23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 24 defined(_KERNEL) 25 # include "opt_ipfilter_log.h" 26 #endif 27 #if !defined(_KERNEL) 28 # include <stdio.h> 29 # include <string.h> 30 # include <stdlib.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 #else 42 # include <sys/ioctl.h> 43 #endif 44 #if !defined(AIX) 45 # include <sys/fcntl.h> 46 #endif 47 #if !defined(linux) 48 # include <sys/protosw.h> 49 #endif 50 #include <sys/socket.h> 51 #if defined(_KERNEL) 52 # include <sys/systm.h> 53 # if !defined(__SVR4) && !defined(__svr4__) 54 # include <sys/mbuf.h> 55 # endif 56 #endif 57 #if defined(__SVR4) || defined(__svr4__) 58 # include <sys/filio.h> 59 # include <sys/byteorder.h> 60 # ifdef _KERNEL 61 # include <sys/dditypes.h> 62 # endif 63 # include <sys/stream.h> 64 # include <sys/kmem.h> 65 #endif 66 #if __FreeBSD_version >= 300000 67 # include <sys/queue.h> 68 #endif 69 #include <net/if.h> 70 #if __FreeBSD_version >= 300000 71 # include <net/if_var.h> 72 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 73 # include "opt_ipfilter.h" 74 # endif 75 #endif 76 #ifdef sun 77 # include <net/af.h> 78 #endif 79 #include <net/route.h> 80 #include <netinet/in.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 84 #ifdef RFC1825 85 # include <vpn/md5.h> 86 # include <vpn/ipsec.h> 87 extern struct ifnet vpnif; 88 #endif 89 90 #if !defined(linux) 91 # include <netinet/ip_var.h> 92 #endif 93 #include <netinet/tcp.h> 94 #include <netinet/udp.h> 95 #include <netinet/ip_icmp.h> 96 #include "netinet/ip_compat.h" 97 #include <netinet/tcpip.h> 98 #include "netinet/ip_fil.h" 99 #include "netinet/ip_nat.h" 100 #include "netinet/ip_frag.h" 101 #include "netinet/ip_state.h" 102 #include "netinet/ip_proxy.h" 103 #include "netinet/ipf_stack.h" 104 #ifdef IPFILTER_SYNC 105 #include "netinet/ip_sync.h" 106 #endif 107 #if (__FreeBSD_version >= 300000) 108 # include <sys/malloc.h> 109 #endif 110 /* END OF INCLUDES */ 111 112 #undef SOCKADDR_IN 113 #define SOCKADDR_IN struct sockaddr_in 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 118 #endif 119 120 121 /* ======================================================================== */ 122 /* How the NAT is organised and works. */ 123 /* */ 124 /* Inside (interface y) NAT Outside (interface x) */ 125 /* -------------------- -+- ------------------------------------- */ 126 /* Packet going | out, processsed by fr_checknatout() for x */ 127 /* ------------> | ------------> */ 128 /* src=10.1.1.1 | src=192.1.1.1 */ 129 /* | */ 130 /* | in, processed by fr_checknatin() for x */ 131 /* <------------ | <------------ */ 132 /* dst=10.1.1.1 | dst=192.1.1.1 */ 133 /* -------------------- -+- ------------------------------------- */ 134 /* fr_checknatout() - changes ip_src and if required, sport */ 135 /* - creates a new mapping, if required. */ 136 /* fr_checknatin() - changes ip_dst and if required, dport */ 137 /* */ 138 /* In the NAT table, internal source is recorded as "in" and externally */ 139 /* seen as "out". */ 140 /* ======================================================================== */ 141 142 143 static int nat_flushtable __P((ipf_stack_t *)); 144 static int nat_clearlist __P((ipf_stack_t *)); 145 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 146 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 147 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 148 static void nat_delrdr __P((struct ipnat *)); 149 static void nat_delnat __P((struct ipnat *)); 150 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 151 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 152 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 153 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 154 static int nat_match __P((fr_info_t *, ipnat_t *)); 155 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 156 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 157 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 158 struct in_addr, struct in_addr, u_32_t, 159 ipf_stack_t *)); 160 static INLINE int nat_icmpquerytype4 __P((int)); 161 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, 162 ipf_stack_t *)); 163 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, 164 ipf_stack_t *)); 165 static INLINE int nat_icmperrortype4 __P((int)); 166 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 167 tcphdr_t *, nat_t **, int)); 168 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 169 static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); 170 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 171 static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); 172 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 173 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 174 static int nat_extraflush __P((int, ipf_stack_t *)); 175 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 176 static int nat_flushclosing __P((int, ipf_stack_t *)); 177 178 179 /* 180 * Below we declare a list of constants used only in the nat_extraflush() 181 * routine. We are placing it here, instead of in nat_extraflush() itself, 182 * because we want to make it visible to tools such as mdb, nm etc., so the 183 * values can easily be altered during debugging. 184 */ 185 static const int idletime_tab[] = { 186 IPF_TTLVAL(30), /* 30 seconds */ 187 IPF_TTLVAL(1800), /* 30 minutes */ 188 IPF_TTLVAL(43200), /* 12 hours */ 189 IPF_TTLVAL(345600), /* 4 days */ 190 }; 191 192 193 /* ------------------------------------------------------------------------ */ 194 /* Function: fr_natinit */ 195 /* Returns: int - 0 == success, -1 == failure */ 196 /* Parameters: Nil */ 197 /* */ 198 /* Initialise all of the NAT locks, tables and other structures. */ 199 /* ------------------------------------------------------------------------ */ 200 int fr_natinit(ifs) 201 ipf_stack_t *ifs; 202 { 203 int i; 204 205 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 206 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 207 if (ifs->ifs_nat_table[0] != NULL) 208 bzero((char *)ifs->ifs_nat_table[0], 209 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 210 else 211 return -1; 212 213 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 214 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 215 if (ifs->ifs_nat_table[1] != NULL) 216 bzero((char *)ifs->ifs_nat_table[1], 217 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 218 else 219 return -2; 220 221 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 222 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 223 if (ifs->ifs_nat_rules != NULL) 224 bzero((char *)ifs->ifs_nat_rules, 225 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 226 else 227 return -3; 228 229 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 230 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 231 if (ifs->ifs_rdr_rules != NULL) 232 bzero((char *)ifs->ifs_rdr_rules, 233 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 234 else 235 return -4; 236 237 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 238 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 239 if (ifs->ifs_maptable != NULL) 240 bzero((char *)ifs->ifs_maptable, 241 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 242 else 243 return -5; 244 245 ifs->ifs_ipf_hm_maplist = NULL; 246 247 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 248 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 249 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 250 return -1; 251 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 252 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 253 254 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 255 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 256 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 257 return -1; 258 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 259 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 260 261 if (ifs->ifs_fr_nat_maxbucket == 0) { 262 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 263 ifs->ifs_fr_nat_maxbucket++; 264 ifs->ifs_fr_nat_maxbucket *= 2; 265 } 266 267 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 268 /* 269 * Increase this because we may have "keep state" following this too 270 * and packet storms can occur if this is removed too quickly. 271 */ 272 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 273 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 274 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 275 ifs->ifs_nat_udptq.ifq_ref = 1; 276 ifs->ifs_nat_udptq.ifq_head = NULL; 277 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 278 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 279 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 280 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 281 ifs->ifs_nat_icmptq.ifq_ref = 1; 282 ifs->ifs_nat_icmptq.ifq_head = NULL; 283 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 284 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 285 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 286 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 287 ifs->ifs_nat_iptq.ifq_ref = 1; 288 ifs->ifs_nat_iptq.ifq_head = NULL; 289 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 290 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 291 ifs->ifs_nat_iptq.ifq_next = NULL; 292 293 for (i = 0; i < IPF_TCP_NSTATES; i++) { 294 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 295 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 296 #ifdef LARGE_NAT 297 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 298 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 299 #endif 300 } 301 302 /* 303 * Increase this because we may have "keep state" following 304 * this too and packet storms can occur if this is removed 305 * too quickly. 306 */ 307 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 308 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 309 310 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 311 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 312 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 313 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 314 315 ifs->ifs_fr_nat_init = 1; 316 317 return 0; 318 } 319 320 321 /* ------------------------------------------------------------------------ */ 322 /* Function: nat_addrdr */ 323 /* Returns: Nil */ 324 /* Parameters: n(I) - pointer to NAT rule to add */ 325 /* */ 326 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 327 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 328 /* use by redirect rules. */ 329 /* ------------------------------------------------------------------------ */ 330 static void nat_addrdr(n, ifs) 331 ipnat_t *n; 332 ipf_stack_t *ifs; 333 { 334 ipnat_t **np; 335 u_32_t j; 336 u_int hv; 337 int k; 338 339 k = count4bits(n->in_outmsk); 340 if ((k >= 0) && (k != 32)) 341 ifs->ifs_rdr_masks |= 1 << k; 342 j = (n->in_outip & n->in_outmsk); 343 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 344 np = ifs->ifs_rdr_rules + hv; 345 while (*np != NULL) 346 np = &(*np)->in_rnext; 347 n->in_rnext = NULL; 348 n->in_prnext = np; 349 n->in_hv = hv; 350 *np = n; 351 } 352 353 354 /* ------------------------------------------------------------------------ */ 355 /* Function: nat_addnat */ 356 /* Returns: Nil */ 357 /* Parameters: n(I) - pointer to NAT rule to add */ 358 /* */ 359 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 360 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 361 /* redirect rules. */ 362 /* ------------------------------------------------------------------------ */ 363 static void nat_addnat(n, ifs) 364 ipnat_t *n; 365 ipf_stack_t *ifs; 366 { 367 ipnat_t **np; 368 u_32_t j; 369 u_int hv; 370 int k; 371 372 k = count4bits(n->in_inmsk); 373 if ((k >= 0) && (k != 32)) 374 ifs->ifs_nat_masks |= 1 << k; 375 j = (n->in_inip & n->in_inmsk); 376 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 377 np = ifs->ifs_nat_rules + hv; 378 while (*np != NULL) 379 np = &(*np)->in_mnext; 380 n->in_mnext = NULL; 381 n->in_pmnext = np; 382 n->in_hv = hv; 383 *np = n; 384 } 385 386 387 /* ------------------------------------------------------------------------ */ 388 /* Function: nat_delrdr */ 389 /* Returns: Nil */ 390 /* Parameters: n(I) - pointer to NAT rule to delete */ 391 /* */ 392 /* Removes a redirect rule from the hash table of redirect rules. */ 393 /* ------------------------------------------------------------------------ */ 394 static void nat_delrdr(n) 395 ipnat_t *n; 396 { 397 if (n->in_rnext) 398 n->in_rnext->in_prnext = n->in_prnext; 399 *n->in_prnext = n->in_rnext; 400 } 401 402 403 /* ------------------------------------------------------------------------ */ 404 /* Function: nat_delnat */ 405 /* Returns: Nil */ 406 /* Parameters: n(I) - pointer to NAT rule to delete */ 407 /* */ 408 /* Removes a NAT map rule from the hash table of NAT map rules. */ 409 /* ------------------------------------------------------------------------ */ 410 static void nat_delnat(n) 411 ipnat_t *n; 412 { 413 if (n->in_mnext != NULL) 414 n->in_mnext->in_pmnext = n->in_pmnext; 415 *n->in_pmnext = n->in_mnext; 416 } 417 418 419 /* ------------------------------------------------------------------------ */ 420 /* Function: nat_hostmap */ 421 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 422 /* else a pointer to the hostmapping to use */ 423 /* Parameters: np(I) - pointer to NAT rule */ 424 /* real(I) - real IP address */ 425 /* map(I) - mapped IP address */ 426 /* port(I) - destination port number */ 427 /* Write Locks: ipf_nat */ 428 /* */ 429 /* Check if an ip address has already been allocated for a given mapping */ 430 /* that is not doing port based translation. If is not yet allocated, then */ 431 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 432 /* ------------------------------------------------------------------------ */ 433 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 434 ipnat_t *np; 435 struct in_addr src; 436 struct in_addr dst; 437 struct in_addr map; 438 u_32_t port; 439 ipf_stack_t *ifs; 440 { 441 hostmap_t *hm; 442 u_int hv; 443 444 hv = (src.s_addr ^ dst.s_addr); 445 hv += src.s_addr; 446 hv += dst.s_addr; 447 hv %= HOSTMAP_SIZE; 448 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 449 if ((hm->hm_srcip.s_addr == src.s_addr) && 450 (hm->hm_dstip.s_addr == dst.s_addr) && 451 ((np == NULL) || (np == hm->hm_ipnat)) && 452 ((port == 0) || (port == hm->hm_port))) { 453 hm->hm_ref++; 454 return hm; 455 } 456 457 if (np == NULL) 458 return NULL; 459 460 KMALLOC(hm, hostmap_t *); 461 if (hm) { 462 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 463 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 464 if (ifs->ifs_ipf_hm_maplist != NULL) 465 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 466 ifs->ifs_ipf_hm_maplist = hm; 467 468 hm->hm_next = ifs->ifs_maptable[hv]; 469 hm->hm_pnext = ifs->ifs_maptable + hv; 470 if (ifs->ifs_maptable[hv] != NULL) 471 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 472 ifs->ifs_maptable[hv] = hm; 473 hm->hm_ipnat = np; 474 hm->hm_srcip = src; 475 hm->hm_dstip = dst; 476 hm->hm_mapip = map; 477 hm->hm_ref = 1; 478 hm->hm_port = port; 479 } 480 return hm; 481 } 482 483 484 /* ------------------------------------------------------------------------ */ 485 /* Function: fr_hostmapdel */ 486 /* Returns: Nil */ 487 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 488 /* Write Locks: ipf_nat */ 489 /* */ 490 /* Decrement the references to this hostmap structure by one. If this */ 491 /* reaches zero then remove it and free it. */ 492 /* ------------------------------------------------------------------------ */ 493 void fr_hostmapdel(hmp) 494 struct hostmap **hmp; 495 { 496 struct hostmap *hm; 497 498 hm = *hmp; 499 *hmp = NULL; 500 501 hm->hm_ref--; 502 if (hm->hm_ref == 0) { 503 if (hm->hm_next) 504 hm->hm_next->hm_pnext = hm->hm_pnext; 505 *hm->hm_pnext = hm->hm_next; 506 if (hm->hm_hnext) 507 hm->hm_hnext->hm_phnext = hm->hm_phnext; 508 *hm->hm_phnext = hm->hm_hnext; 509 KFREE(hm); 510 } 511 } 512 513 514 /* ------------------------------------------------------------------------ */ 515 /* Function: fix_outcksum */ 516 /* Returns: Nil */ 517 /* Parameters: sp(I) - location of 16bit checksum to update */ 518 /* n((I) - amount to adjust checksum by */ 519 /* */ 520 /* Adjusts the 16bit checksum by "n" for packets going out. */ 521 /* ------------------------------------------------------------------------ */ 522 void fix_outcksum(sp, n) 523 u_short *sp; 524 u_32_t n; 525 { 526 u_short sumshort; 527 u_32_t sum1; 528 529 if (n == 0) 530 return; 531 532 sum1 = (~ntohs(*sp)) & 0xffff; 533 sum1 += (n); 534 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 535 /* Again */ 536 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 537 sumshort = ~(u_short)sum1; 538 *(sp) = htons(sumshort); 539 } 540 541 542 /* ------------------------------------------------------------------------ */ 543 /* Function: fix_incksum */ 544 /* Returns: Nil */ 545 /* Parameters: sp(I) - location of 16bit checksum to update */ 546 /* n((I) - amount to adjust checksum by */ 547 /* */ 548 /* Adjusts the 16bit checksum by "n" for packets going in. */ 549 /* ------------------------------------------------------------------------ */ 550 void fix_incksum(sp, n) 551 u_short *sp; 552 u_32_t n; 553 { 554 u_short sumshort; 555 u_32_t sum1; 556 557 if (n == 0) 558 return; 559 560 sum1 = (~ntohs(*sp)) & 0xffff; 561 sum1 += ~(n) & 0xffff; 562 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 563 /* Again */ 564 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 565 sumshort = ~(u_short)sum1; 566 *(sp) = htons(sumshort); 567 } 568 569 570 /* ------------------------------------------------------------------------ */ 571 /* Function: fix_datacksum */ 572 /* Returns: Nil */ 573 /* Parameters: sp(I) - location of 16bit checksum to update */ 574 /* n((I) - amount to adjust checksum by */ 575 /* */ 576 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 577 /* data section of an IP packet. */ 578 /* */ 579 /* The only situation in which you need to do this is when NAT'ing an */ 580 /* ICMP error message. Such a message, contains in its body the IP header */ 581 /* of the original IP packet, that causes the error. */ 582 /* */ 583 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 584 /* kernel the data section of the ICMP error is just data, and no special */ 585 /* processing like hardware cksum or ntohs processing have been done by the */ 586 /* kernel on the data section. */ 587 /* ------------------------------------------------------------------------ */ 588 void fix_datacksum(sp, n) 589 u_short *sp; 590 u_32_t n; 591 { 592 u_short sumshort; 593 u_32_t sum1; 594 595 if (n == 0) 596 return; 597 598 sum1 = (~ntohs(*sp)) & 0xffff; 599 sum1 += (n); 600 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 601 /* Again */ 602 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 603 sumshort = ~(u_short)sum1; 604 *(sp) = htons(sumshort); 605 } 606 607 608 /* ------------------------------------------------------------------------ */ 609 /* Function: fr_nat_ioctl */ 610 /* Returns: int - 0 == success, != 0 == failure */ 611 /* Parameters: data(I) - pointer to ioctl data */ 612 /* cmd(I) - ioctl command integer */ 613 /* mode(I) - file mode bits used with open */ 614 /* */ 615 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 616 /* ------------------------------------------------------------------------ */ 617 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 618 ioctlcmd_t cmd; 619 caddr_t data; 620 int mode, uid; 621 void *ctx; 622 ipf_stack_t *ifs; 623 { 624 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 625 int error = 0, ret, arg, getlock; 626 ipnat_t natd; 627 628 #if (BSD >= 199306) && defined(_KERNEL) 629 if ((securelevel >= 2) && (mode & FWRITE)) 630 return EPERM; 631 #endif 632 633 #if defined(__osf__) && defined(_KERNEL) 634 getlock = 0; 635 #else 636 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 637 #endif 638 639 nat = NULL; /* XXX gcc -Wuninitialized */ 640 if (cmd == (ioctlcmd_t)SIOCADNAT) { 641 KMALLOC(nt, ipnat_t *); 642 } else { 643 nt = NULL; 644 } 645 646 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 647 if (mode & NAT_SYSSPACE) { 648 bcopy(data, (char *)&natd, sizeof(natd)); 649 error = 0; 650 } else { 651 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 652 } 653 654 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 655 BCOPYIN(data, &arg, sizeof(arg)); 656 } 657 658 if (error != 0) 659 goto done; 660 661 /* 662 * For add/delete, look to see if the NAT entry is already present 663 */ 664 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 665 nat = &natd; 666 if (nat->in_v == 0) /* For backward compat. */ 667 nat->in_v = 4; 668 nat->in_flags &= IPN_USERFLAGS; 669 if ((nat->in_redir & NAT_MAPBLK) == 0) { 670 if ((nat->in_flags & IPN_SPLIT) == 0) 671 nat->in_inip &= nat->in_inmsk; 672 if ((nat->in_flags & IPN_IPRANGE) == 0) 673 nat->in_outip &= nat->in_outmsk; 674 } 675 MUTEX_ENTER(&ifs->ifs_ipf_natio); 676 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 677 np = &n->in_next) 678 if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 679 IPN_CMPSIZ)) 680 break; 681 } 682 683 switch (cmd) 684 { 685 case SIOCGENITER : 686 { 687 ipfgeniter_t iter; 688 ipftoken_t *token; 689 690 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 691 if (error != 0) 692 break; 693 694 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 695 if (token != NULL) 696 error = nat_iterator(token, &iter, ifs); 697 else 698 error = ESRCH; 699 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 700 break; 701 } 702 #ifdef IPFILTER_LOG 703 case SIOCIPFFB : 704 { 705 int tmp; 706 707 if (!(mode & FWRITE)) 708 error = EPERM; 709 else { 710 tmp = ipflog_clear(IPL_LOGNAT, ifs); 711 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 712 } 713 break; 714 } 715 case SIOCSETLG : 716 if (!(mode & FWRITE)) 717 error = EPERM; 718 else { 719 BCOPYIN((char *)data, 720 (char *)&ifs->ifs_nat_logging, 721 sizeof(ifs->ifs_nat_logging)); 722 } 723 break; 724 case SIOCGETLG : 725 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 726 sizeof(ifs->ifs_nat_logging)); 727 break; 728 case FIONREAD : 729 arg = ifs->ifs_iplused[IPL_LOGNAT]; 730 BCOPYOUT(&arg, data, sizeof(arg)); 731 break; 732 #endif 733 case SIOCADNAT : 734 if (!(mode & FWRITE)) { 735 error = EPERM; 736 } else if (n != NULL) { 737 error = EEXIST; 738 } else if (nt == NULL) { 739 error = ENOMEM; 740 } 741 if (error != 0) { 742 MUTEX_EXIT(&ifs->ifs_ipf_natio); 743 break; 744 } 745 bcopy((char *)nat, (char *)nt, sizeof(*n)); 746 error = nat_siocaddnat(nt, np, getlock, ifs); 747 MUTEX_EXIT(&ifs->ifs_ipf_natio); 748 if (error == 0) 749 nt = NULL; 750 break; 751 case SIOCRMNAT : 752 if (!(mode & FWRITE)) { 753 error = EPERM; 754 n = NULL; 755 } else if (n == NULL) { 756 error = ESRCH; 757 } 758 759 if (error != 0) { 760 MUTEX_EXIT(&ifs->ifs_ipf_natio); 761 break; 762 } 763 nat_siocdelnat(n, np, getlock, ifs); 764 765 MUTEX_EXIT(&ifs->ifs_ipf_natio); 766 n = NULL; 767 break; 768 case SIOCGNATS : 769 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 770 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 771 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 772 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 773 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 774 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 775 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 776 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 777 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 778 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 779 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 780 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 781 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 782 break; 783 case SIOCGNATL : 784 { 785 natlookup_t nl; 786 787 if (getlock) { 788 READ_ENTER(&ifs->ifs_ipf_nat); 789 } 790 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 791 if (error == 0) { 792 if (nat_lookupredir(&nl, ifs) != NULL) { 793 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 794 } else { 795 error = ESRCH; 796 } 797 } 798 if (getlock) { 799 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 800 } 801 break; 802 } 803 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 804 if (!(mode & FWRITE)) { 805 error = EPERM; 806 break; 807 } 808 if (getlock) { 809 WRITE_ENTER(&ifs->ifs_ipf_nat); 810 } 811 error = 0; 812 if (arg == 0) 813 ret = nat_flushtable(ifs); 814 else if (arg == 1) 815 ret = nat_clearlist(ifs); 816 else if (arg >= 2 && arg <= 4) 817 ret = nat_extraflush(arg - 2, ifs); 818 else 819 error = EINVAL; 820 if (getlock) { 821 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 822 } 823 if (error == 0) { 824 BCOPYOUT(&ret, data, sizeof(ret)); 825 } 826 break; 827 case SIOCPROXY : 828 error = appr_ioctl(data, cmd, mode, ifs); 829 break; 830 case SIOCSTLCK : 831 if (!(mode & FWRITE)) { 832 error = EPERM; 833 } else { 834 fr_lock(data, &ifs->ifs_fr_nat_lock); 835 } 836 break; 837 case SIOCSTPUT : 838 if ((mode & FWRITE) != 0) { 839 error = fr_natputent(data, getlock, ifs); 840 } else { 841 error = EACCES; 842 } 843 break; 844 case SIOCSTGSZ : 845 if (ifs->ifs_fr_nat_lock) { 846 if (getlock) { 847 READ_ENTER(&ifs->ifs_ipf_nat); 848 } 849 error = fr_natgetsz(data, ifs); 850 if (getlock) { 851 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 852 } 853 } else 854 error = EACCES; 855 break; 856 case SIOCSTGET : 857 if (ifs->ifs_fr_nat_lock) { 858 if (getlock) { 859 READ_ENTER(&ifs->ifs_ipf_nat); 860 } 861 error = fr_natgetent(data, ifs); 862 if (getlock) { 863 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 864 } 865 } else 866 error = EACCES; 867 break; 868 case SIOCIPFDELTOK : 869 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 870 error = ipf_deltoken(arg, uid, ctx, ifs); 871 break; 872 default : 873 error = EINVAL; 874 break; 875 } 876 done: 877 if (nt) 878 KFREE(nt); 879 return error; 880 } 881 882 883 /* ------------------------------------------------------------------------ */ 884 /* Function: nat_siocaddnat */ 885 /* Returns: int - 0 == success, != 0 == failure */ 886 /* Parameters: n(I) - pointer to new NAT rule */ 887 /* np(I) - pointer to where to insert new NAT rule */ 888 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 889 /* Mutex Locks: ipf_natio */ 890 /* */ 891 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 892 /* from information passed to the kernel, then add it to the appropriate */ 893 /* NAT rule table(s). */ 894 /* ------------------------------------------------------------------------ */ 895 static int nat_siocaddnat(n, np, getlock, ifs) 896 ipnat_t *n, **np; 897 int getlock; 898 ipf_stack_t *ifs; 899 { 900 int error = 0, i, j; 901 902 if (nat_resolverule(n, ifs) != 0) 903 return ENOENT; 904 905 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 906 return EINVAL; 907 908 n->in_use = 0; 909 if (n->in_redir & NAT_MAPBLK) 910 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 911 else if (n->in_flags & IPN_AUTOPORTMAP) 912 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 913 else if (n->in_flags & IPN_IPRANGE) 914 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 915 else if (n->in_flags & IPN_SPLIT) 916 n->in_space = 2; 917 else if (n->in_outmsk != 0) 918 n->in_space = ~ntohl(n->in_outmsk); 919 else 920 n->in_space = 1; 921 922 /* 923 * Calculate the number of valid IP addresses in the output 924 * mapping range. In all cases, the range is inclusive of 925 * the start and ending IP addresses. 926 * If to a CIDR address, lose 2: broadcast + network address 927 * (so subtract 1) 928 * If to a range, add one. 929 * If to a single IP address, set to 1. 930 */ 931 if (n->in_space) { 932 if ((n->in_flags & IPN_IPRANGE) != 0) 933 n->in_space += 1; 934 else 935 n->in_space -= 1; 936 } else 937 n->in_space = 1; 938 939 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 940 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 941 n->in_nip = ntohl(n->in_outip) + 1; 942 else if ((n->in_flags & IPN_SPLIT) && 943 (n->in_redir & NAT_REDIRECT)) 944 n->in_nip = ntohl(n->in_inip); 945 else 946 n->in_nip = ntohl(n->in_outip); 947 if (n->in_redir & NAT_MAP) { 948 n->in_pnext = ntohs(n->in_pmin); 949 /* 950 * Multiply by the number of ports made available. 951 */ 952 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 953 n->in_space *= (ntohs(n->in_pmax) - 954 ntohs(n->in_pmin) + 1); 955 /* 956 * Because two different sources can map to 957 * different destinations but use the same 958 * local IP#/port #. 959 * If the result is smaller than in_space, then 960 * we may have wrapped around 32bits. 961 */ 962 i = n->in_inmsk; 963 if ((i != 0) && (i != 0xffffffff)) { 964 j = n->in_space * (~ntohl(i) + 1); 965 if (j >= n->in_space) 966 n->in_space = j; 967 else 968 n->in_space = 0xffffffff; 969 } 970 } 971 /* 972 * If no protocol is specified, multiple by 256 to allow for 973 * at least one IP:IP mapping per protocol. 974 */ 975 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 976 j = n->in_space * 256; 977 if (j >= n->in_space) 978 n->in_space = j; 979 else 980 n->in_space = 0xffffffff; 981 } 982 } 983 984 /* Otherwise, these fields are preset */ 985 986 if (getlock) { 987 WRITE_ENTER(&ifs->ifs_ipf_nat); 988 } 989 n->in_next = NULL; 990 *np = n; 991 992 if (n->in_age[0] != 0) 993 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 994 n->in_age[0], ifs); 995 996 if (n->in_age[1] != 0) 997 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 998 n->in_age[1], ifs); 999 1000 if (n->in_redir & NAT_REDIRECT) { 1001 n->in_flags &= ~IPN_NOTDST; 1002 nat_addrdr(n, ifs); 1003 } 1004 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1005 n->in_flags &= ~IPN_NOTSRC; 1006 nat_addnat(n, ifs); 1007 } 1008 n = NULL; 1009 ifs->ifs_nat_stats.ns_rules++; 1010 if (getlock) { 1011 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1012 } 1013 1014 return error; 1015 } 1016 1017 1018 /* ------------------------------------------------------------------------ */ 1019 /* Function: nat_resolvrule */ 1020 /* Returns: int - 0 == success, -1 == failure */ 1021 /* Parameters: n(I) - pointer to NAT rule */ 1022 /* */ 1023 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1024 /* any specified interfaces and proxy labels, and determines whether or not */ 1025 /* all proxy labels are correctly specified. */ 1026 /* */ 1027 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1028 /* ------------------------------------------------------------------------ */ 1029 static int nat_resolverule(n, ifs) 1030 ipnat_t *n; 1031 ipf_stack_t *ifs; 1032 { 1033 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1034 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs); 1035 1036 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1037 if (n->in_ifnames[1][0] == '\0') { 1038 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1039 n->in_ifps[1] = n->in_ifps[0]; 1040 } else { 1041 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs); 1042 } 1043 1044 if (n->in_plabel[0] != '\0') { 1045 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1046 if (n->in_apr == NULL) 1047 return -1; 1048 } 1049 return 0; 1050 } 1051 1052 1053 /* ------------------------------------------------------------------------ */ 1054 /* Function: nat_siocdelnat */ 1055 /* Returns: int - 0 == success, != 0 == failure */ 1056 /* Parameters: n(I) - pointer to new NAT rule */ 1057 /* np(I) - pointer to where to insert new NAT rule */ 1058 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1059 /* Mutex Locks: ipf_natio */ 1060 /* */ 1061 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1062 /* from information passed to the kernel, then add it to the appropriate */ 1063 /* NAT rule table(s). */ 1064 /* ------------------------------------------------------------------------ */ 1065 static void nat_siocdelnat(n, np, getlock, ifs) 1066 ipnat_t *n, **np; 1067 int getlock; 1068 ipf_stack_t *ifs; 1069 { 1070 if (getlock) { 1071 WRITE_ENTER(&ifs->ifs_ipf_nat); 1072 } 1073 if (n->in_redir & NAT_REDIRECT) 1074 nat_delrdr(n); 1075 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1076 nat_delnat(n); 1077 if (ifs->ifs_nat_list == NULL) { 1078 ifs->ifs_nat_masks = 0; 1079 ifs->ifs_rdr_masks = 0; 1080 } 1081 1082 if (n->in_tqehead[0] != NULL) { 1083 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1084 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1085 } 1086 } 1087 1088 if (n->in_tqehead[1] != NULL) { 1089 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1090 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1091 } 1092 } 1093 1094 *np = n->in_next; 1095 1096 if (n->in_use == 0) { 1097 if (n->in_apr) 1098 appr_free(n->in_apr); 1099 KFREE(n); 1100 ifs->ifs_nat_stats.ns_rules--; 1101 } else { 1102 n->in_flags |= IPN_DELETE; 1103 n->in_next = NULL; 1104 } 1105 if (getlock) { 1106 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1107 } 1108 } 1109 1110 1111 /* ------------------------------------------------------------------------ */ 1112 /* Function: fr_natgetsz */ 1113 /* Returns: int - 0 == success, != 0 is the error value. */ 1114 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1115 /* get the size of. */ 1116 /* */ 1117 /* Handle SIOCSTGSZ. */ 1118 /* Return the size of the nat list entry to be copied back to user space. */ 1119 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1120 /* structure is copied back to the user. */ 1121 /* ------------------------------------------------------------------------ */ 1122 static int fr_natgetsz(data, ifs) 1123 caddr_t data; 1124 ipf_stack_t *ifs; 1125 { 1126 ap_session_t *aps; 1127 nat_t *nat, *n; 1128 natget_t ng; 1129 1130 BCOPYIN(data, &ng, sizeof(ng)); 1131 1132 nat = ng.ng_ptr; 1133 if (!nat) { 1134 nat = ifs->ifs_nat_instances; 1135 ng.ng_sz = 0; 1136 /* 1137 * Empty list so the size returned is 0. Simple. 1138 */ 1139 if (nat == NULL) { 1140 BCOPYOUT(&ng, data, sizeof(ng)); 1141 return 0; 1142 } 1143 } else { 1144 /* 1145 * Make sure the pointer we're copying from exists in the 1146 * current list of entries. Security precaution to prevent 1147 * copying of random kernel data. 1148 */ 1149 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1150 if (n == nat) 1151 break; 1152 if (!n) 1153 return ESRCH; 1154 } 1155 1156 /* 1157 * Incluse any space required for proxy data structures. 1158 */ 1159 ng.ng_sz = sizeof(nat_save_t); 1160 aps = nat->nat_aps; 1161 if (aps != NULL) { 1162 ng.ng_sz += sizeof(ap_session_t) - 4; 1163 if (aps->aps_data != 0) 1164 ng.ng_sz += aps->aps_psiz; 1165 } 1166 1167 BCOPYOUT(&ng, data, sizeof(ng)); 1168 return 0; 1169 } 1170 1171 1172 /* ------------------------------------------------------------------------ */ 1173 /* Function: fr_natgetent */ 1174 /* Returns: int - 0 == success, != 0 is the error value. */ 1175 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1176 /* to NAT structure to copy out. */ 1177 /* */ 1178 /* Handle SIOCSTGET. */ 1179 /* Copies out NAT entry to user space. Any additional data held for a */ 1180 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1181 /* ------------------------------------------------------------------------ */ 1182 static int fr_natgetent(data, ifs) 1183 caddr_t data; 1184 ipf_stack_t *ifs; 1185 { 1186 int error, outsize; 1187 ap_session_t *aps; 1188 nat_save_t *ipn, ipns; 1189 nat_t *n, *nat; 1190 1191 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1192 if (error != 0) 1193 return error; 1194 1195 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1196 return EINVAL; 1197 1198 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1199 if (ipn == NULL) 1200 return ENOMEM; 1201 1202 ipn->ipn_dsize = ipns.ipn_dsize; 1203 nat = ipns.ipn_next; 1204 if (nat == NULL) { 1205 nat = ifs->ifs_nat_instances; 1206 if (nat == NULL) { 1207 if (ifs->ifs_nat_instances == NULL) 1208 error = ENOENT; 1209 goto finished; 1210 } 1211 } else { 1212 /* 1213 * Make sure the pointer we're copying from exists in the 1214 * current list of entries. Security precaution to prevent 1215 * copying of random kernel data. 1216 */ 1217 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1218 if (n == nat) 1219 break; 1220 if (n == NULL) { 1221 error = ESRCH; 1222 goto finished; 1223 } 1224 } 1225 ipn->ipn_next = nat->nat_next; 1226 1227 /* 1228 * Copy the NAT structure. 1229 */ 1230 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1231 1232 /* 1233 * If we have a pointer to the NAT rule it belongs to, save that too. 1234 */ 1235 if (nat->nat_ptr != NULL) 1236 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1237 sizeof(ipn->ipn_ipnat)); 1238 1239 /* 1240 * If we also know the NAT entry has an associated filter rule, 1241 * save that too. 1242 */ 1243 if (nat->nat_fr != NULL) 1244 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1245 sizeof(ipn->ipn_fr)); 1246 1247 /* 1248 * Last but not least, if there is an application proxy session set 1249 * up for this NAT entry, then copy that out too, including any 1250 * private data saved along side it by the proxy. 1251 */ 1252 aps = nat->nat_aps; 1253 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1254 if (aps != NULL) { 1255 char *s; 1256 1257 if (outsize < sizeof(*aps)) { 1258 error = ENOBUFS; 1259 goto finished; 1260 } 1261 1262 s = ipn->ipn_data; 1263 bcopy((char *)aps, s, sizeof(*aps)); 1264 s += sizeof(*aps); 1265 outsize -= sizeof(*aps); 1266 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1267 bcopy(aps->aps_data, s, aps->aps_psiz); 1268 else 1269 error = ENOBUFS; 1270 } 1271 if (error == 0) { 1272 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1273 } 1274 1275 finished: 1276 if (ipn != NULL) { 1277 KFREES(ipn, ipns.ipn_dsize); 1278 } 1279 return error; 1280 } 1281 1282 1283 /* ------------------------------------------------------------------------ */ 1284 /* Function: fr_natputent */ 1285 /* Returns: int - 0 == success, != 0 is the error value. */ 1286 /* Parameters: data(I) - pointer to natget structure with NAT */ 1287 /* structure information to load into the kernel */ 1288 /* getlock(I) - flag indicating whether or not a write lock */ 1289 /* on ipf_nat is already held. */ 1290 /* */ 1291 /* Handle SIOCSTPUT. */ 1292 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1293 /* firewall rule data structures, if pointers to them indicate so. */ 1294 /* ------------------------------------------------------------------------ */ 1295 static int fr_natputent(data, getlock, ifs) 1296 caddr_t data; 1297 int getlock; 1298 ipf_stack_t *ifs; 1299 { 1300 nat_save_t ipn, *ipnn; 1301 ap_session_t *aps; 1302 nat_t *n, *nat; 1303 frentry_t *fr; 1304 fr_info_t fin; 1305 ipnat_t *in; 1306 int error; 1307 1308 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1309 if (error != 0) 1310 return error; 1311 1312 /* 1313 * Trigger automatic call to nat_extraflush() if the 1314 * table has reached capcity specified by hi watermark. 1315 */ 1316 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1317 ifs->ifs_nat_doflush = 1; 1318 1319 /* 1320 * Initialise early because of code at junkput label. 1321 */ 1322 in = NULL; 1323 aps = NULL; 1324 nat = NULL; 1325 ipnn = NULL; 1326 1327 /* 1328 * New entry, copy in the rest of the NAT entry if it's size is more 1329 * than just the nat_t structure. 1330 */ 1331 fr = NULL; 1332 if (ipn.ipn_dsize > sizeof(ipn)) { 1333 if (ipn.ipn_dsize > 81920) { 1334 error = ENOMEM; 1335 goto junkput; 1336 } 1337 1338 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1339 if (ipnn == NULL) 1340 return ENOMEM; 1341 1342 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1343 if (error != 0) { 1344 error = EFAULT; 1345 goto junkput; 1346 } 1347 } else 1348 ipnn = &ipn; 1349 1350 KMALLOC(nat, nat_t *); 1351 if (nat == NULL) { 1352 error = ENOMEM; 1353 goto junkput; 1354 } 1355 1356 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1357 /* 1358 * Initialize all these so that nat_delete() doesn't cause a crash. 1359 */ 1360 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1361 nat->nat_tqe.tqe_pnext = NULL; 1362 nat->nat_tqe.tqe_next = NULL; 1363 nat->nat_tqe.tqe_ifq = NULL; 1364 nat->nat_tqe.tqe_parent = nat; 1365 1366 /* 1367 * Restore the rule associated with this nat session 1368 */ 1369 in = ipnn->ipn_nat.nat_ptr; 1370 if (in != NULL) { 1371 KMALLOC(in, ipnat_t *); 1372 nat->nat_ptr = in; 1373 if (in == NULL) { 1374 error = ENOMEM; 1375 goto junkput; 1376 } 1377 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1378 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1379 in->in_use = 1; 1380 in->in_flags |= IPN_DELETE; 1381 1382 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1383 1384 if (nat_resolverule(in, ifs) != 0) { 1385 error = ESRCH; 1386 goto junkput; 1387 } 1388 } 1389 1390 /* 1391 * Check that the NAT entry doesn't already exist in the kernel. 1392 */ 1393 bzero((char *)&fin, sizeof(fin)); 1394 fin.fin_p = nat->nat_p; 1395 fin.fin_ifs = ifs; 1396 if (nat->nat_dir == NAT_OUTBOUND) { 1397 fin.fin_data[0] = ntohs(nat->nat_oport); 1398 fin.fin_data[1] = ntohs(nat->nat_outport); 1399 fin.fin_ifp = nat->nat_ifps[0]; 1400 if (getlock) { 1401 READ_ENTER(&ifs->ifs_ipf_nat); 1402 } 1403 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1404 nat->nat_oip, nat->nat_outip); 1405 if (getlock) { 1406 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1407 } 1408 if (n != NULL) { 1409 error = EEXIST; 1410 goto junkput; 1411 } 1412 } else if (nat->nat_dir == NAT_INBOUND) { 1413 fin.fin_data[0] = ntohs(nat->nat_inport); 1414 fin.fin_data[1] = ntohs(nat->nat_oport); 1415 fin.fin_ifp = nat->nat_ifps[1]; 1416 if (getlock) { 1417 READ_ENTER(&ifs->ifs_ipf_nat); 1418 } 1419 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1420 nat->nat_inip, nat->nat_oip); 1421 if (getlock) { 1422 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1423 } 1424 if (n != NULL) { 1425 error = EEXIST; 1426 goto junkput; 1427 } 1428 } else { 1429 error = EINVAL; 1430 goto junkput; 1431 } 1432 1433 /* 1434 * Restore ap_session_t structure. Include the private data allocated 1435 * if it was there. 1436 */ 1437 aps = nat->nat_aps; 1438 if (aps != NULL) { 1439 KMALLOC(aps, ap_session_t *); 1440 nat->nat_aps = aps; 1441 if (aps == NULL) { 1442 error = ENOMEM; 1443 goto junkput; 1444 } 1445 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1446 if (in != NULL) 1447 aps->aps_apr = in->in_apr; 1448 else 1449 aps->aps_apr = NULL; 1450 if (aps->aps_psiz != 0) { 1451 if (aps->aps_psiz > 81920) { 1452 error = ENOMEM; 1453 goto junkput; 1454 } 1455 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1456 if (aps->aps_data == NULL) { 1457 error = ENOMEM; 1458 goto junkput; 1459 } 1460 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1461 aps->aps_psiz); 1462 } else { 1463 aps->aps_psiz = 0; 1464 aps->aps_data = NULL; 1465 } 1466 } 1467 1468 /* 1469 * If there was a filtering rule associated with this entry then 1470 * build up a new one. 1471 */ 1472 fr = nat->nat_fr; 1473 if (fr != NULL) { 1474 if ((nat->nat_flags & SI_NEWFR) != 0) { 1475 KMALLOC(fr, frentry_t *); 1476 nat->nat_fr = fr; 1477 if (fr == NULL) { 1478 error = ENOMEM; 1479 goto junkput; 1480 } 1481 ipnn->ipn_nat.nat_fr = fr; 1482 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1483 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1484 1485 fr->fr_ref = 1; 1486 fr->fr_dsize = 0; 1487 fr->fr_data = NULL; 1488 fr->fr_type = FR_T_NONE; 1489 1490 MUTEX_NUKE(&fr->fr_lock); 1491 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1492 } else { 1493 if (getlock) { 1494 READ_ENTER(&ifs->ifs_ipf_nat); 1495 } 1496 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1497 if (n->nat_fr == fr) 1498 break; 1499 1500 if (n != NULL) { 1501 MUTEX_ENTER(&fr->fr_lock); 1502 fr->fr_ref++; 1503 MUTEX_EXIT(&fr->fr_lock); 1504 } 1505 if (getlock) { 1506 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1507 } 1508 if (!n) { 1509 error = ESRCH; 1510 goto junkput; 1511 } 1512 } 1513 } 1514 1515 if (ipnn != &ipn) { 1516 KFREES(ipnn, ipn.ipn_dsize); 1517 ipnn = NULL; 1518 } 1519 1520 if (getlock) { 1521 WRITE_ENTER(&ifs->ifs_ipf_nat); 1522 } 1523 error = nat_insert(nat, nat->nat_rev, ifs); 1524 if ((error == 0) && (aps != NULL)) { 1525 aps->aps_next = ifs->ifs_ap_sess_list; 1526 ifs->ifs_ap_sess_list = aps; 1527 } 1528 if (getlock) { 1529 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1530 } 1531 1532 if (error == 0) 1533 return 0; 1534 1535 error = ENOMEM; 1536 1537 junkput: 1538 if (fr != NULL) 1539 (void) fr_derefrule(&fr, ifs); 1540 1541 if ((ipnn != NULL) && (ipnn != &ipn)) { 1542 KFREES(ipnn, ipn.ipn_dsize); 1543 } 1544 if (nat != NULL) { 1545 if (aps != NULL) { 1546 if (aps->aps_data != NULL) { 1547 KFREES(aps->aps_data, aps->aps_psiz); 1548 } 1549 KFREE(aps); 1550 } 1551 if (in != NULL) { 1552 if (in->in_apr) 1553 appr_free(in->in_apr); 1554 KFREE(in); 1555 } 1556 KFREE(nat); 1557 } 1558 return error; 1559 } 1560 1561 1562 /* ------------------------------------------------------------------------ */ 1563 /* Function: nat_delete */ 1564 /* Returns: Nil */ 1565 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1566 /* logtype(I) - type of LOG record to create before deleting */ 1567 /* Write Lock: ipf_nat */ 1568 /* */ 1569 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1570 /* enabled then generate a NAT log record for this event. */ 1571 /* ------------------------------------------------------------------------ */ 1572 static void nat_delete(nat, logtype, ifs) 1573 struct nat *nat; 1574 int logtype; 1575 ipf_stack_t *ifs; 1576 { 1577 struct ipnat *ipn; 1578 1579 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1580 nat_log(nat, logtype, ifs); 1581 1582 /* 1583 * Take it as a general indication that all the pointers are set if 1584 * nat_pnext is set. 1585 */ 1586 if (nat->nat_pnext != NULL) { 1587 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1588 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1589 1590 *nat->nat_pnext = nat->nat_next; 1591 if (nat->nat_next != NULL) { 1592 nat->nat_next->nat_pnext = nat->nat_pnext; 1593 nat->nat_next = NULL; 1594 } 1595 nat->nat_pnext = NULL; 1596 1597 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1598 if (nat->nat_hnext[0] != NULL) { 1599 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1600 nat->nat_hnext[0] = NULL; 1601 } 1602 nat->nat_phnext[0] = NULL; 1603 1604 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1605 if (nat->nat_hnext[1] != NULL) { 1606 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1607 nat->nat_hnext[1] = NULL; 1608 } 1609 nat->nat_phnext[1] = NULL; 1610 1611 if ((nat->nat_flags & SI_WILDP) != 0) 1612 ifs->ifs_nat_stats.ns_wilds--; 1613 } 1614 1615 if (nat->nat_me != NULL) { 1616 *nat->nat_me = NULL; 1617 nat->nat_me = NULL; 1618 } 1619 1620 fr_deletequeueentry(&nat->nat_tqe); 1621 1622 MUTEX_ENTER(&nat->nat_lock); 1623 if (nat->nat_ref > 1) { 1624 nat->nat_ref--; 1625 MUTEX_EXIT(&nat->nat_lock); 1626 return; 1627 } 1628 MUTEX_EXIT(&nat->nat_lock); 1629 1630 /* 1631 * At this point, nat_ref is 1, doing "--" would make it 0.. 1632 */ 1633 nat->nat_ref = 0; 1634 1635 #ifdef IPFILTER_SYNC 1636 if (nat->nat_sync) 1637 ipfsync_del(nat->nat_sync); 1638 #endif 1639 1640 if (nat->nat_fr != NULL) 1641 (void)fr_derefrule(&nat->nat_fr, ifs); 1642 1643 if (nat->nat_hm != NULL) 1644 fr_hostmapdel(&nat->nat_hm); 1645 1646 /* 1647 * If there is an active reference from the nat entry to its parent 1648 * rule, decrement the rule's reference count and free it too if no 1649 * longer being used. 1650 */ 1651 ipn = nat->nat_ptr; 1652 if (ipn != NULL) { 1653 ipn->in_space++; 1654 ipn->in_use--; 1655 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1656 if (ipn->in_apr) 1657 appr_free(ipn->in_apr); 1658 KFREE(ipn); 1659 ifs->ifs_nat_stats.ns_rules--; 1660 } 1661 } 1662 1663 MUTEX_DESTROY(&nat->nat_lock); 1664 1665 aps_free(nat->nat_aps, ifs); 1666 ifs->ifs_nat_stats.ns_inuse--; 1667 1668 /* 1669 * If there's a fragment table entry too for this nat entry, then 1670 * dereference that as well. This is after nat_lock is released 1671 * because of Tru64. 1672 */ 1673 fr_forgetnat((void *)nat, ifs); 1674 1675 KFREE(nat); 1676 } 1677 1678 1679 /* ------------------------------------------------------------------------ */ 1680 /* Function: nat_flushtable */ 1681 /* Returns: int - number of NAT rules deleted */ 1682 /* Parameters: Nil */ 1683 /* */ 1684 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1685 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1686 /* ------------------------------------------------------------------------ */ 1687 /* 1688 * nat_flushtable - clear the NAT table of all mapping entries. 1689 */ 1690 static int nat_flushtable(ifs) 1691 ipf_stack_t *ifs; 1692 { 1693 nat_t *nat; 1694 int j = 0; 1695 1696 /* 1697 * ALL NAT mappings deleted, so lets just make the deletions 1698 * quicker. 1699 */ 1700 if (ifs->ifs_nat_table[0] != NULL) 1701 bzero((char *)ifs->ifs_nat_table[0], 1702 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 1703 if (ifs->ifs_nat_table[1] != NULL) 1704 bzero((char *)ifs->ifs_nat_table[1], 1705 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 1706 1707 while ((nat = ifs->ifs_nat_instances) != NULL) { 1708 nat_delete(nat, NL_FLUSH, ifs); 1709 j++; 1710 } 1711 1712 ifs->ifs_nat_stats.ns_inuse = 0; 1713 return j; 1714 } 1715 1716 1717 /* ------------------------------------------------------------------------ */ 1718 /* Function: nat_clearlist */ 1719 /* Returns: int - number of NAT/RDR rules deleted */ 1720 /* Parameters: Nil */ 1721 /* */ 1722 /* Delete all rules in the current list of rules. There is nothing elegant */ 1723 /* about this cleanup: simply free all entries on the list of rules and */ 1724 /* clear out the tables used for hashed NAT rule lookups. */ 1725 /* ------------------------------------------------------------------------ */ 1726 static int nat_clearlist(ifs) 1727 ipf_stack_t *ifs; 1728 { 1729 ipnat_t *n, **np = &ifs->ifs_nat_list; 1730 int i = 0; 1731 1732 if (ifs->ifs_nat_rules != NULL) 1733 bzero((char *)ifs->ifs_nat_rules, 1734 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 1735 if (ifs->ifs_rdr_rules != NULL) 1736 bzero((char *)ifs->ifs_rdr_rules, 1737 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 1738 1739 while ((n = *np) != NULL) { 1740 *np = n->in_next; 1741 if (n->in_use == 0) { 1742 if (n->in_apr != NULL) 1743 appr_free(n->in_apr); 1744 KFREE(n); 1745 ifs->ifs_nat_stats.ns_rules--; 1746 } else { 1747 n->in_flags |= IPN_DELETE; 1748 n->in_next = NULL; 1749 } 1750 i++; 1751 } 1752 ifs->ifs_nat_masks = 0; 1753 ifs->ifs_rdr_masks = 0; 1754 return i; 1755 } 1756 1757 1758 /* ------------------------------------------------------------------------ */ 1759 /* Function: nat_newmap */ 1760 /* Returns: int - -1 == error, 0 == success */ 1761 /* Parameters: fin(I) - pointer to packet information */ 1762 /* nat(I) - pointer to NAT entry */ 1763 /* ni(I) - pointer to structure with misc. information needed */ 1764 /* to create new NAT entry. */ 1765 /* */ 1766 /* Given an empty NAT structure, populate it with new information about a */ 1767 /* new NAT session, as defined by the matching NAT rule. */ 1768 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 1769 /* to the new IP address for the translation. */ 1770 /* ------------------------------------------------------------------------ */ 1771 static INLINE int nat_newmap(fin, nat, ni) 1772 fr_info_t *fin; 1773 nat_t *nat; 1774 natinfo_t *ni; 1775 { 1776 u_short st_port, dport, sport, port, sp, dp; 1777 struct in_addr in, inb; 1778 hostmap_t *hm; 1779 u_32_t flags; 1780 u_32_t st_ip; 1781 ipnat_t *np; 1782 nat_t *natl; 1783 int l; 1784 ipf_stack_t *ifs = fin->fin_ifs; 1785 1786 /* 1787 * If it's an outbound packet which doesn't match any existing 1788 * record, then create a new port 1789 */ 1790 l = 0; 1791 hm = NULL; 1792 np = ni->nai_np; 1793 st_ip = np->in_nip; 1794 st_port = np->in_pnext; 1795 flags = ni->nai_flags; 1796 sport = ni->nai_sport; 1797 dport = ni->nai_dport; 1798 1799 /* 1800 * Do a loop until we either run out of entries to try or we find 1801 * a NAT mapping that isn't currently being used. This is done 1802 * because the change to the source is not (usually) being fixed. 1803 */ 1804 do { 1805 port = 0; 1806 in.s_addr = htonl(np->in_nip); 1807 if (l == 0) { 1808 /* 1809 * Check to see if there is an existing NAT 1810 * setup for this IP address pair. 1811 */ 1812 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 1813 in, 0, ifs); 1814 if (hm != NULL) 1815 in.s_addr = hm->hm_mapip.s_addr; 1816 } else if ((l == 1) && (hm != NULL)) { 1817 fr_hostmapdel(&hm); 1818 } 1819 in.s_addr = ntohl(in.s_addr); 1820 1821 nat->nat_hm = hm; 1822 1823 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 1824 if (l > 0) 1825 return -1; 1826 } 1827 1828 if (np->in_redir == NAT_BIMAP && 1829 np->in_inmsk == np->in_outmsk) { 1830 /* 1831 * map the address block in a 1:1 fashion 1832 */ 1833 in.s_addr = np->in_outip; 1834 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 1835 in.s_addr = ntohl(in.s_addr); 1836 1837 } else if (np->in_redir & NAT_MAPBLK) { 1838 if ((l >= np->in_ppip) || ((l > 0) && 1839 !(flags & IPN_TCPUDP))) 1840 return -1; 1841 /* 1842 * map-block - Calculate destination address. 1843 */ 1844 in.s_addr = ntohl(fin->fin_saddr); 1845 in.s_addr &= ntohl(~np->in_inmsk); 1846 inb.s_addr = in.s_addr; 1847 in.s_addr /= np->in_ippip; 1848 in.s_addr &= ntohl(~np->in_outmsk); 1849 in.s_addr += ntohl(np->in_outip); 1850 /* 1851 * Calculate destination port. 1852 */ 1853 if ((flags & IPN_TCPUDP) && 1854 (np->in_ppip != 0)) { 1855 port = ntohs(sport) + l; 1856 port %= np->in_ppip; 1857 port += np->in_ppip * 1858 (inb.s_addr % np->in_ippip); 1859 port += MAPBLK_MINPORT; 1860 port = htons(port); 1861 } 1862 1863 } else if ((np->in_outip == 0) && 1864 (np->in_outmsk == 0xffffffff)) { 1865 /* 1866 * 0/32 - use the interface's IP address. 1867 */ 1868 if ((l > 0) || 1869 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 1870 &in, NULL, fin->fin_ifs) == -1) 1871 return -1; 1872 in.s_addr = ntohl(in.s_addr); 1873 1874 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 1875 /* 1876 * 0/0 - use the original source address/port. 1877 */ 1878 if (l > 0) 1879 return -1; 1880 in.s_addr = ntohl(fin->fin_saddr); 1881 1882 } else if ((np->in_outmsk != 0xffffffff) && 1883 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 1884 np->in_nip++; 1885 1886 natl = NULL; 1887 1888 if ((flags & IPN_TCPUDP) && 1889 ((np->in_redir & NAT_MAPBLK) == 0) && 1890 (np->in_flags & IPN_AUTOPORTMAP)) { 1891 /* 1892 * "ports auto" (without map-block) 1893 */ 1894 if ((l > 0) && (l % np->in_ppip == 0)) { 1895 if (l > np->in_space) { 1896 return -1; 1897 } else if ((l > np->in_ppip) && 1898 np->in_outmsk != 0xffffffff) 1899 np->in_nip++; 1900 } 1901 if (np->in_ppip != 0) { 1902 port = ntohs(sport); 1903 port += (l % np->in_ppip); 1904 port %= np->in_ppip; 1905 port += np->in_ppip * 1906 (ntohl(fin->fin_saddr) % 1907 np->in_ippip); 1908 port += MAPBLK_MINPORT; 1909 port = htons(port); 1910 } 1911 1912 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 1913 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 1914 /* 1915 * Standard port translation. Select next port. 1916 */ 1917 port = htons(np->in_pnext++); 1918 1919 if (np->in_pnext > ntohs(np->in_pmax)) { 1920 np->in_pnext = ntohs(np->in_pmin); 1921 if (np->in_outmsk != 0xffffffff) 1922 np->in_nip++; 1923 } 1924 } 1925 1926 if (np->in_flags & IPN_IPRANGE) { 1927 if (np->in_nip > ntohl(np->in_outmsk)) 1928 np->in_nip = ntohl(np->in_outip); 1929 } else { 1930 if ((np->in_outmsk != 0xffffffff) && 1931 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 1932 ntohl(np->in_outip)) 1933 np->in_nip = ntohl(np->in_outip) + 1; 1934 } 1935 1936 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 1937 port = sport; 1938 1939 /* 1940 * Here we do a lookup of the connection as seen from 1941 * the outside. If an IP# pair already exists, try 1942 * again. So if you have A->B becomes C->B, you can 1943 * also have D->E become C->E but not D->B causing 1944 * another C->B. Also take protocol and ports into 1945 * account when determining whether a pre-existing 1946 * NAT setup will cause an external conflict where 1947 * this is appropriate. 1948 */ 1949 inb.s_addr = htonl(in.s_addr); 1950 sp = fin->fin_data[0]; 1951 dp = fin->fin_data[1]; 1952 fin->fin_data[0] = fin->fin_data[1]; 1953 fin->fin_data[1] = htons(port); 1954 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 1955 (u_int)fin->fin_p, fin->fin_dst, inb); 1956 fin->fin_data[0] = sp; 1957 fin->fin_data[1] = dp; 1958 1959 /* 1960 * Has the search wrapped around and come back to the 1961 * start ? 1962 */ 1963 if ((natl != NULL) && 1964 (np->in_pnext != 0) && (st_port == np->in_pnext) && 1965 (np->in_nip != 0) && (st_ip == np->in_nip)) 1966 return -1; 1967 l++; 1968 } while (natl != NULL); 1969 1970 if (np->in_space > 0) 1971 np->in_space--; 1972 1973 /* Setup the NAT table */ 1974 nat->nat_inip = fin->fin_src; 1975 nat->nat_outip.s_addr = htonl(in.s_addr); 1976 nat->nat_oip = fin->fin_dst; 1977 if (nat->nat_hm == NULL) 1978 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 1979 nat->nat_outip, 0, ifs); 1980 1981 /* 1982 * The ICMP checksum does not have a pseudo header containing 1983 * the IP addresses 1984 */ 1985 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); 1986 ni->nai_sum2 = LONG_SUM(in.s_addr); 1987 if ((flags & IPN_TCPUDP)) { 1988 ni->nai_sum1 += ntohs(sport); 1989 ni->nai_sum2 += ntohs(port); 1990 } 1991 1992 if (flags & IPN_TCPUDP) { 1993 nat->nat_inport = sport; 1994 nat->nat_outport = port; /* sport */ 1995 nat->nat_oport = dport; 1996 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 1997 } else if (flags & IPN_ICMPQUERY) { 1998 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 1999 nat->nat_inport = port; 2000 nat->nat_outport = port; 2001 } 2002 2003 ni->nai_ip.s_addr = in.s_addr; 2004 ni->nai_port = port; 2005 ni->nai_nport = dport; 2006 return 0; 2007 } 2008 2009 2010 /* ------------------------------------------------------------------------ */ 2011 /* Function: nat_newrdr */ 2012 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2013 /* allow rule to be moved if IPN_ROUNDR is set. */ 2014 /* Parameters: fin(I) - pointer to packet information */ 2015 /* nat(I) - pointer to NAT entry */ 2016 /* ni(I) - pointer to structure with misc. information needed */ 2017 /* to create new NAT entry. */ 2018 /* */ 2019 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2020 /* to the new IP address for the translation. */ 2021 /* ------------------------------------------------------------------------ */ 2022 static INLINE int nat_newrdr(fin, nat, ni) 2023 fr_info_t *fin; 2024 nat_t *nat; 2025 natinfo_t *ni; 2026 { 2027 u_short nport, dport, sport; 2028 struct in_addr in; 2029 hostmap_t *hm; 2030 u_32_t flags; 2031 ipnat_t *np; 2032 int move; 2033 ipf_stack_t *ifs = fin->fin_ifs; 2034 2035 move = 1; 2036 hm = NULL; 2037 in.s_addr = 0; 2038 np = ni->nai_np; 2039 flags = ni->nai_flags; 2040 sport = ni->nai_sport; 2041 dport = ni->nai_dport; 2042 2043 /* 2044 * If the matching rule has IPN_STICKY set, then we want to have the 2045 * same rule kick in as before. Why would this happen? If you have 2046 * a collection of rdr rules with "round-robin sticky", the current 2047 * packet might match a different one to the previous connection but 2048 * we want the same destination to be used. 2049 */ 2050 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2051 (IPN_ROUNDR|IPN_STICKY)) { 2052 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2053 (u_32_t)dport, ifs); 2054 if (hm != NULL) { 2055 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2056 np = hm->hm_ipnat; 2057 ni->nai_np = np; 2058 move = 0; 2059 } 2060 } 2061 2062 /* 2063 * Otherwise, it's an inbound packet. Most likely, we don't 2064 * want to rewrite source ports and source addresses. Instead, 2065 * we want to rewrite to a fixed internal address and fixed 2066 * internal port. 2067 */ 2068 if (np->in_flags & IPN_SPLIT) { 2069 in.s_addr = np->in_nip; 2070 2071 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2072 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2073 in, (u_32_t)dport, ifs); 2074 if (hm != NULL) { 2075 in.s_addr = hm->hm_mapip.s_addr; 2076 move = 0; 2077 } 2078 } 2079 2080 if (hm == NULL || hm->hm_ref == 1) { 2081 if (np->in_inip == htonl(in.s_addr)) { 2082 np->in_nip = ntohl(np->in_inmsk); 2083 move = 0; 2084 } else { 2085 np->in_nip = ntohl(np->in_inip); 2086 } 2087 } 2088 2089 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2090 /* 2091 * 0/32 - use the interface's IP address. 2092 */ 2093 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2094 fin->fin_ifs) == -1) 2095 return -1; 2096 in.s_addr = ntohl(in.s_addr); 2097 2098 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2099 /* 2100 * 0/0 - use the original destination address/port. 2101 */ 2102 in.s_addr = ntohl(fin->fin_daddr); 2103 2104 } else if (np->in_redir == NAT_BIMAP && 2105 np->in_inmsk == np->in_outmsk) { 2106 /* 2107 * map the address block in a 1:1 fashion 2108 */ 2109 in.s_addr = np->in_inip; 2110 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2111 in.s_addr = ntohl(in.s_addr); 2112 } else { 2113 in.s_addr = ntohl(np->in_inip); 2114 } 2115 2116 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2117 nport = dport; 2118 else { 2119 /* 2120 * Whilst not optimized for the case where 2121 * pmin == pmax, the gain is not significant. 2122 */ 2123 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2124 (np->in_pmin != np->in_pmax)) { 2125 nport = ntohs(dport) - ntohs(np->in_pmin) + 2126 ntohs(np->in_pnext); 2127 nport = htons(nport); 2128 } else 2129 nport = np->in_pnext; 2130 } 2131 2132 /* 2133 * When the redirect-to address is set to 0.0.0.0, just 2134 * assume a blank `forwarding' of the packet. We don't 2135 * setup any translation for this either. 2136 */ 2137 if (in.s_addr == 0) { 2138 if (nport == dport) 2139 return -1; 2140 in.s_addr = ntohl(fin->fin_daddr); 2141 } 2142 2143 nat->nat_inip.s_addr = htonl(in.s_addr); 2144 nat->nat_outip = fin->fin_dst; 2145 nat->nat_oip = fin->fin_src; 2146 2147 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport); 2148 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport); 2149 2150 ni->nai_ip.s_addr = in.s_addr; 2151 ni->nai_nport = nport; 2152 ni->nai_port = sport; 2153 2154 if (flags & IPN_TCPUDP) { 2155 nat->nat_inport = nport; 2156 nat->nat_outport = dport; 2157 nat->nat_oport = sport; 2158 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2159 } else if (flags & IPN_ICMPQUERY) { 2160 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2161 nat->nat_inport = nport; 2162 nat->nat_outport = nport; 2163 } 2164 2165 return move; 2166 } 2167 2168 /* ------------------------------------------------------------------------ */ 2169 /* Function: nat_new */ 2170 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2171 /* else pointer to new NAT structure */ 2172 /* Parameters: fin(I) - pointer to packet information */ 2173 /* np(I) - pointer to NAT rule */ 2174 /* natsave(I) - pointer to where to store NAT struct pointer */ 2175 /* flags(I) - flags describing the current packet */ 2176 /* direction(I) - direction of packet (in/out) */ 2177 /* Write Lock: ipf_nat */ 2178 /* */ 2179 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2180 /* in any way. */ 2181 /* */ 2182 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2183 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2184 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2185 /* and (3) building that structure and putting it into the NAT table(s). */ 2186 /* ------------------------------------------------------------------------ */ 2187 nat_t *nat_new(fin, np, natsave, flags, direction) 2188 fr_info_t *fin; 2189 ipnat_t *np; 2190 nat_t **natsave; 2191 u_int flags; 2192 int direction; 2193 { 2194 u_short port = 0, sport = 0, dport = 0, nport = 0; 2195 tcphdr_t *tcp = NULL; 2196 hostmap_t *hm = NULL; 2197 struct in_addr in; 2198 nat_t *nat, *natl; 2199 u_int nflags; 2200 natinfo_t ni; 2201 u_32_t sumd; 2202 int move; 2203 ipf_stack_t *ifs = fin->fin_ifs; 2204 2205 /* 2206 * Trigger automatic call to nat_extraflush() if the 2207 * table has reached capcity specified by hi watermark. 2208 */ 2209 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2210 ifs->ifs_nat_doflush = 1; 2211 2212 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2213 ifs->ifs_nat_stats.ns_memfail++; 2214 return NULL; 2215 } 2216 2217 move = 1; 2218 nflags = np->in_flags & flags; 2219 nflags &= NAT_FROMRULE; 2220 2221 ni.nai_np = np; 2222 ni.nai_nflags = nflags; 2223 ni.nai_flags = flags; 2224 2225 /* Give me a new nat */ 2226 KMALLOC(nat, nat_t *); 2227 if (nat == NULL) { 2228 ifs->ifs_nat_stats.ns_memfail++; 2229 /* 2230 * Try to automatically tune the max # of entries in the 2231 * table allowed to be less than what will cause kmem_alloc() 2232 * to fail and try to eliminate panics due to out of memory 2233 * conditions arising. 2234 */ 2235 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2236 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2237 printf("ipf_nattable_max reduced to %d\n", 2238 ifs->ifs_ipf_nattable_max); 2239 } 2240 return NULL; 2241 } 2242 2243 if (flags & IPN_TCPUDP) { 2244 tcp = fin->fin_dp; 2245 ni.nai_sport = htons(fin->fin_sport); 2246 ni.nai_dport = htons(fin->fin_dport); 2247 } else if (flags & IPN_ICMPQUERY) { 2248 /* 2249 * In the ICMP query NAT code, we translate the ICMP id fields 2250 * to make them unique. This is indepedent of the ICMP type 2251 * (e.g. in the unlikely event that a host sends an echo and 2252 * an tstamp request with the same id, both packets will have 2253 * their ip address/id field changed in the same way). 2254 */ 2255 /* The icmp_id field is used by the sender to identify the 2256 * process making the icmp request. (the receiver justs 2257 * copies it back in its response). So, it closely matches 2258 * the concept of source port. We overlay sport, so we can 2259 * maximally reuse the existing code. 2260 */ 2261 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2262 ni.nai_dport = ni.nai_sport; 2263 } 2264 2265 bzero((char *)nat, sizeof(*nat)); 2266 nat->nat_flags = flags; 2267 nat->nat_redir = np->in_redir; 2268 2269 if ((flags & NAT_SLAVE) == 0) { 2270 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2271 } 2272 2273 /* 2274 * Search the current table for a match. 2275 */ 2276 if (direction == NAT_OUTBOUND) { 2277 /* 2278 * We can now arrange to call this for the same connection 2279 * because ipf_nat_new doesn't protect the code path into 2280 * this function. 2281 */ 2282 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2283 fin->fin_src, fin->fin_dst); 2284 if (natl != NULL) { 2285 KFREE(nat); 2286 nat = natl; 2287 goto done; 2288 } 2289 2290 move = nat_newmap(fin, nat, &ni); 2291 if (move == -1) 2292 goto badnat; 2293 2294 np = ni.nai_np; 2295 in = ni.nai_ip; 2296 } else { 2297 /* 2298 * NAT_INBOUND is used only for redirects rules 2299 */ 2300 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2301 fin->fin_src, fin->fin_dst); 2302 if (natl != NULL) { 2303 KFREE(nat); 2304 nat = natl; 2305 goto done; 2306 } 2307 2308 move = nat_newrdr(fin, nat, &ni); 2309 if (move == -1) 2310 goto badnat; 2311 2312 np = ni.nai_np; 2313 in = ni.nai_ip; 2314 } 2315 port = ni.nai_port; 2316 nport = ni.nai_nport; 2317 2318 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2319 if (np->in_redir == NAT_REDIRECT) { 2320 nat_delrdr(np); 2321 nat_addrdr(np, ifs); 2322 } else if (np->in_redir == NAT_MAP) { 2323 nat_delnat(np); 2324 nat_addnat(np, ifs); 2325 } 2326 } 2327 2328 if (flags & IPN_TCPUDP) { 2329 sport = ni.nai_sport; 2330 dport = ni.nai_dport; 2331 } else if (flags & IPN_ICMPQUERY) { 2332 sport = ni.nai_sport; 2333 dport = 0; 2334 } 2335 2336 /* 2337 * nat_sumd[0] stores adjustment value including both IP address and 2338 * port number changes. nat_sumd[1] stores adjustment value only for 2339 * IP address changes, to be used for pseudo header adjustment, in 2340 * case hardware partial checksum offload is offered. 2341 */ 2342 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); 2343 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 2344 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) 2345 if (flags & IPN_TCPUDP) { 2346 ni.nai_sum1 = LONG_SUM(in.s_addr); 2347 if (direction == NAT_OUTBOUND) 2348 ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_saddr)); 2349 else 2350 ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_daddr)); 2351 2352 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); 2353 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 2354 } else 2355 #endif 2356 nat->nat_sumd[1] = nat->nat_sumd[0]; 2357 2358 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) { 2359 if (direction == NAT_OUTBOUND) 2360 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); 2361 else 2362 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)); 2363 2364 ni.nai_sum2 = LONG_SUM(in.s_addr); 2365 2366 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); 2367 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 2368 } else { 2369 nat->nat_ipsumd = nat->nat_sumd[0]; 2370 if (!(flags & IPN_TCPUDPICMP)) { 2371 nat->nat_sumd[0] = 0; 2372 nat->nat_sumd[1] = 0; 2373 } 2374 } 2375 2376 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2377 goto badnat; 2378 } 2379 if (flags & SI_WILDP) 2380 ifs->ifs_nat_stats.ns_wilds++; 2381 goto done; 2382 badnat: 2383 ifs->ifs_nat_stats.ns_badnat++; 2384 if ((hm = nat->nat_hm) != NULL) 2385 fr_hostmapdel(&hm); 2386 KFREE(nat); 2387 nat = NULL; 2388 done: 2389 if ((flags & NAT_SLAVE) == 0) { 2390 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2391 } 2392 return nat; 2393 } 2394 2395 2396 /* ------------------------------------------------------------------------ */ 2397 /* Function: nat_finalise */ 2398 /* Returns: int - 0 == sucess, -1 == failure */ 2399 /* Parameters: fin(I) - pointer to packet information */ 2400 /* nat(I) - pointer to NAT entry */ 2401 /* ni(I) - pointer to structure with misc. information needed */ 2402 /* to create new NAT entry. */ 2403 /* Write Lock: ipf_nat */ 2404 /* */ 2405 /* This is the tail end of constructing a new NAT entry and is the same */ 2406 /* for both IPv4 and IPv6. */ 2407 /* ------------------------------------------------------------------------ */ 2408 /*ARGSUSED*/ 2409 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2410 fr_info_t *fin; 2411 nat_t *nat; 2412 natinfo_t *ni; 2413 tcphdr_t *tcp; 2414 nat_t **natsave; 2415 int direction; 2416 { 2417 frentry_t *fr; 2418 ipnat_t *np; 2419 ipf_stack_t *ifs = fin->fin_ifs; 2420 2421 np = ni->nai_np; 2422 2423 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2424 2425 #ifdef IPFILTER_SYNC 2426 if ((nat->nat_flags & SI_CLONE) == 0) 2427 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2428 #endif 2429 2430 nat->nat_me = natsave; 2431 nat->nat_dir = direction; 2432 nat->nat_ifps[0] = np->in_ifps[0]; 2433 nat->nat_ifps[1] = np->in_ifps[1]; 2434 nat->nat_ptr = np; 2435 nat->nat_p = fin->fin_p; 2436 nat->nat_mssclamp = np->in_mssclamp; 2437 fr = fin->fin_fr; 2438 nat->nat_fr = fr; 2439 2440 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2441 if (appr_new(fin, nat) == -1) 2442 return -1; 2443 2444 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2445 if (ifs->ifs_nat_logging) 2446 nat_log(nat, (u_int)np->in_redir, ifs); 2447 np->in_use++; 2448 if (fr != NULL) { 2449 MUTEX_ENTER(&fr->fr_lock); 2450 fr->fr_ref++; 2451 MUTEX_EXIT(&fr->fr_lock); 2452 } 2453 return 0; 2454 } 2455 2456 /* 2457 * nat_insert failed, so cleanup time... 2458 */ 2459 return -1; 2460 } 2461 2462 2463 /* ------------------------------------------------------------------------ */ 2464 /* Function: nat_insert */ 2465 /* Returns: int - 0 == sucess, -1 == failure */ 2466 /* Parameters: nat(I) - pointer to NAT structure */ 2467 /* rev(I) - flag indicating forward/reverse direction of packet */ 2468 /* Write Lock: ipf_nat */ 2469 /* */ 2470 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2471 /* list of active NAT entries. Adjust global counters when complete. */ 2472 /* ------------------------------------------------------------------------ */ 2473 int nat_insert(nat, rev, ifs) 2474 nat_t *nat; 2475 int rev; 2476 ipf_stack_t *ifs; 2477 { 2478 u_int hv1, hv2; 2479 nat_t **natp; 2480 2481 /* 2482 * Try and return an error as early as possible, so calculate the hash 2483 * entry numbers first and then proceed. 2484 */ 2485 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2486 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2487 0xffffffff); 2488 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2489 ifs->ifs_ipf_nattable_sz); 2490 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2491 0xffffffff); 2492 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2493 ifs->ifs_ipf_nattable_sz); 2494 } else { 2495 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2496 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2497 ifs->ifs_ipf_nattable_sz); 2498 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2499 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2500 ifs->ifs_ipf_nattable_sz); 2501 } 2502 2503 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2504 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2505 return -1; 2506 } 2507 2508 nat->nat_hv[0] = hv1; 2509 nat->nat_hv[1] = hv2; 2510 2511 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2512 2513 nat->nat_rev = rev; 2514 nat->nat_ref = 1; 2515 nat->nat_bytes[0] = 0; 2516 nat->nat_pkts[0] = 0; 2517 nat->nat_bytes[1] = 0; 2518 nat->nat_pkts[1] = 0; 2519 2520 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2521 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2522 2523 if (nat->nat_ifnames[1][0] !='\0') { 2524 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2525 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2526 } else { 2527 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2528 LIFNAMSIZ); 2529 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2530 nat->nat_ifps[1] = nat->nat_ifps[0]; 2531 } 2532 2533 nat->nat_next = ifs->ifs_nat_instances; 2534 nat->nat_pnext = &ifs->ifs_nat_instances; 2535 if (ifs->ifs_nat_instances) 2536 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2537 ifs->ifs_nat_instances = nat; 2538 2539 natp = &ifs->ifs_nat_table[0][hv1]; 2540 if (*natp) 2541 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2542 nat->nat_phnext[0] = natp; 2543 nat->nat_hnext[0] = *natp; 2544 *natp = nat; 2545 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2546 2547 natp = &ifs->ifs_nat_table[1][hv2]; 2548 if (*natp) 2549 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2550 nat->nat_phnext[1] = natp; 2551 nat->nat_hnext[1] = *natp; 2552 *natp = nat; 2553 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2554 2555 fr_setnatqueue(nat, rev, ifs); 2556 2557 ifs->ifs_nat_stats.ns_added++; 2558 ifs->ifs_nat_stats.ns_inuse++; 2559 return 0; 2560 } 2561 2562 2563 /* ------------------------------------------------------------------------ */ 2564 /* Function: nat_icmperrorlookup */ 2565 /* Returns: nat_t* - point to matching NAT structure */ 2566 /* Parameters: fin(I) - pointer to packet information */ 2567 /* dir(I) - direction of packet (in/out) */ 2568 /* */ 2569 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2570 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2571 /* the required length. */ 2572 /* ------------------------------------------------------------------------ */ 2573 nat_t *nat_icmperrorlookup(fin, dir) 2574 fr_info_t *fin; 2575 int dir; 2576 { 2577 int flags = 0, minlen; 2578 icmphdr_t *orgicmp; 2579 tcphdr_t *tcp = NULL; 2580 u_short data[2]; 2581 nat_t *nat; 2582 ip_t *oip; 2583 u_int p; 2584 2585 /* 2586 * Does it at least have the return (basic) IP header ? 2587 * Only a basic IP header (no options) should be with an ICMP error 2588 * header. Also, if it's not an error type, then return. 2589 */ 2590 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2591 return NULL; 2592 2593 /* 2594 * Check packet size 2595 */ 2596 oip = (ip_t *)((char *)fin->fin_dp + 8); 2597 minlen = IP_HL(oip) << 2; 2598 if ((minlen < sizeof(ip_t)) || 2599 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2600 return NULL; 2601 /* 2602 * Is the buffer big enough for all of it ? It's the size of the IP 2603 * header claimed in the encapsulated part which is of concern. It 2604 * may be too big to be in this buffer but not so big that it's 2605 * outside the ICMP packet, leading to TCP deref's causing problems. 2606 * This is possible because we don't know how big oip_hl is when we 2607 * do the pullup early in fr_check() and thus can't gaurantee it is 2608 * all here now. 2609 */ 2610 #ifdef _KERNEL 2611 { 2612 mb_t *m; 2613 2614 m = fin->fin_m; 2615 # if defined(MENTAT) 2616 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2617 return NULL; 2618 # else 2619 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2620 (char *)fin->fin_ip + M_LEN(m)) 2621 return NULL; 2622 # endif 2623 } 2624 #endif 2625 2626 if (fin->fin_daddr != oip->ip_src.s_addr) 2627 return NULL; 2628 2629 p = oip->ip_p; 2630 if (p == IPPROTO_TCP) 2631 flags = IPN_TCP; 2632 else if (p == IPPROTO_UDP) 2633 flags = IPN_UDP; 2634 else if (p == IPPROTO_ICMP) { 2635 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2636 2637 /* see if this is related to an ICMP query */ 2638 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2639 data[0] = fin->fin_data[0]; 2640 data[1] = fin->fin_data[1]; 2641 fin->fin_data[0] = 0; 2642 fin->fin_data[1] = orgicmp->icmp_id; 2643 2644 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2645 /* 2646 * NOTE : dir refers to the direction of the original 2647 * ip packet. By definition the icmp error 2648 * message flows in the opposite direction. 2649 */ 2650 if (dir == NAT_INBOUND) 2651 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2652 oip->ip_src); 2653 else 2654 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2655 oip->ip_src); 2656 fin->fin_data[0] = data[0]; 2657 fin->fin_data[1] = data[1]; 2658 return nat; 2659 } 2660 } 2661 2662 if (flags & IPN_TCPUDP) { 2663 minlen += 8; /* + 64bits of data to get ports */ 2664 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2665 return NULL; 2666 2667 data[0] = fin->fin_data[0]; 2668 data[1] = fin->fin_data[1]; 2669 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2670 fin->fin_data[0] = ntohs(tcp->th_dport); 2671 fin->fin_data[1] = ntohs(tcp->th_sport); 2672 2673 if (dir == NAT_INBOUND) { 2674 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2675 oip->ip_src); 2676 } else { 2677 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2678 oip->ip_src); 2679 } 2680 fin->fin_data[0] = data[0]; 2681 fin->fin_data[1] = data[1]; 2682 return nat; 2683 } 2684 if (dir == NAT_INBOUND) 2685 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2686 else 2687 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2688 } 2689 2690 2691 /* ------------------------------------------------------------------------ */ 2692 /* Function: nat_icmperror */ 2693 /* Returns: nat_t* - point to matching NAT structure */ 2694 /* Parameters: fin(I) - pointer to packet information */ 2695 /* nflags(I) - NAT flags for this packet */ 2696 /* dir(I) - direction of packet (in/out) */ 2697 /* */ 2698 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2699 /* session. This will correct both packet header data and checksums. */ 2700 /* */ 2701 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2702 /* a NAT'd ICMP packet gets correctly recognised. */ 2703 /* ------------------------------------------------------------------------ */ 2704 nat_t *nat_icmperror(fin, nflags, dir) 2705 fr_info_t *fin; 2706 u_int *nflags; 2707 int dir; 2708 { 2709 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2710 struct in_addr in; 2711 icmphdr_t *icmp, *orgicmp; 2712 int dlen; 2713 udphdr_t *udp; 2714 tcphdr_t *tcp; 2715 nat_t *nat; 2716 ip_t *oip; 2717 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2718 return NULL; 2719 2720 /* 2721 * nat_icmperrorlookup() looks up nat entry associated with the 2722 * offending IP packet and returns pointer to the entry, or NULL 2723 * if packet wasn't natted or for `defective' packets. 2724 */ 2725 2726 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 2727 return NULL; 2728 2729 sumd2 = 0; 2730 *nflags = IPN_ICMPERR; 2731 icmp = fin->fin_dp; 2732 oip = (ip_t *)&icmp->icmp_ip; 2733 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 2734 tcp = (tcphdr_t *)udp; 2735 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 2736 2737 /* 2738 * Need to adjust ICMP header to include the real IP#'s and 2739 * port #'s. There are three steps required. 2740 * 2741 * Step 1 2742 * Fix the IP addresses in the offending IP packet and update 2743 * ip header checksum to compensate for the change. 2744 * 2745 * No update needed here for icmp_cksum because the ICMP checksum 2746 * is calculated over the complete ICMP packet, which includes the 2747 * changed oip IP addresses and oip->ip_sum. These two changes 2748 * cancel each other out (if the delta for the IP address is x, 2749 * then the delta for ip_sum is minus x). 2750 */ 2751 2752 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 2753 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 2754 in = nat->nat_inip; 2755 oip->ip_src = in; 2756 } else { 2757 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 2758 in = nat->nat_outip; 2759 oip->ip_dst = in; 2760 } 2761 2762 sum2 = LONG_SUM(ntohl(in.s_addr)); 2763 CALC_SUMD(sum1, sum2, sumd); 2764 fix_datacksum(&oip->ip_sum, sumd); 2765 2766 /* 2767 * Step 2 2768 * Perform other adjustments based on protocol of offending packet. 2769 */ 2770 2771 switch (oip->ip_p) { 2772 case IPPROTO_TCP : 2773 case IPPROTO_UDP : 2774 2775 /* 2776 * For offending TCP/UDP IP packets, translate the ports 2777 * based on the NAT specification. 2778 * 2779 * Advance notice : Now it becomes complicated :-) 2780 * 2781 * Since the port and IP addresse fields are both part 2782 * of the TCP/UDP checksum of the offending IP packet, 2783 * we need to adjust that checksum as well. 2784 * 2785 * To further complicate things, the TCP/UDP checksum 2786 * may not be present. We must check to see if the 2787 * length of the data portion is big enough to hold 2788 * the checksum. In the UDP case, a test to determine 2789 * if the checksum is even set is also required. 2790 * 2791 * Any changes to an IP address, port or checksum within 2792 * the ICMP packet requires a change to icmp_cksum. 2793 * 2794 * Be extremely careful here ... The change is dependent 2795 * upon whether or not the TCP/UPD checksum is present. 2796 * 2797 * If TCP/UPD checksum is present, the icmp_cksum must 2798 * compensate for checksum modification resulting from 2799 * IP address change only. Port change and resulting 2800 * data checksum adjustments cancel each other out. 2801 * 2802 * If TCP/UDP checksum is not present, icmp_cksum must 2803 * compensate for port change only. The IP address 2804 * change does not modify anything else in this case. 2805 */ 2806 2807 psum1 = 0; 2808 psum2 = 0; 2809 psumd = 0; 2810 2811 if ((tcp->th_dport == nat->nat_oport) && 2812 (tcp->th_sport != nat->nat_inport)) { 2813 2814 /* 2815 * Translate the source port. 2816 */ 2817 2818 psum1 = ntohs(tcp->th_sport); 2819 psum2 = ntohs(nat->nat_inport); 2820 tcp->th_sport = nat->nat_inport; 2821 2822 } else if ((tcp->th_sport == nat->nat_oport) && 2823 (tcp->th_dport != nat->nat_outport)) { 2824 2825 /* 2826 * Translate the destination port. 2827 */ 2828 2829 psum1 = ntohs(tcp->th_dport); 2830 psum2 = ntohs(nat->nat_outport); 2831 tcp->th_dport = nat->nat_outport; 2832 } 2833 2834 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 2835 2836 /* 2837 * TCP checksum present. 2838 * 2839 * Adjust data checksum and icmp checksum to 2840 * compensate for any IP address change. 2841 */ 2842 2843 sum1 = ntohs(tcp->th_sum); 2844 fix_datacksum(&tcp->th_sum, sumd); 2845 sum2 = ntohs(tcp->th_sum); 2846 sumd2 = sumd << 1; 2847 CALC_SUMD(sum1, sum2, sumd); 2848 sumd2 += sumd; 2849 2850 /* 2851 * Also make data checksum adjustment to 2852 * compensate for any port change. 2853 */ 2854 2855 if (psum1 != psum2) { 2856 CALC_SUMD(psum1, psum2, psumd); 2857 fix_datacksum(&tcp->th_sum, psumd); 2858 } 2859 2860 } else if ((oip->ip_p == IPPROTO_UDP) && 2861 (dlen >= 8) && (udp->uh_sum != 0)) { 2862 2863 /* 2864 * The UDP checksum is present and set. 2865 * 2866 * Adjust data checksum and icmp checksum to 2867 * compensate for any IP address change. 2868 */ 2869 2870 sum1 = ntohs(udp->uh_sum); 2871 fix_datacksum(&udp->uh_sum, sumd); 2872 sum2 = ntohs(udp->uh_sum); 2873 sumd2 = sumd << 1; 2874 CALC_SUMD(sum1, sum2, sumd); 2875 sumd2 += sumd; 2876 2877 /* 2878 * Also make data checksum adjustment to 2879 * compensate for any port change. 2880 */ 2881 2882 if (psum1 != psum2) { 2883 CALC_SUMD(psum1, psum2, psumd); 2884 fix_datacksum(&udp->uh_sum, psumd); 2885 } 2886 2887 } else { 2888 2889 /* 2890 * Data checksum was not present. 2891 * 2892 * Compensate for any port change. 2893 */ 2894 2895 CALC_SUMD(psum2, psum1, psumd); 2896 sumd2 += psumd; 2897 } 2898 break; 2899 2900 case IPPROTO_ICMP : 2901 2902 orgicmp = (icmphdr_t *)udp; 2903 2904 if ((nat->nat_dir == NAT_OUTBOUND) && 2905 (orgicmp->icmp_id != nat->nat_inport) && 2906 (dlen >= 8)) { 2907 2908 /* 2909 * Fix ICMP checksum (of the offening ICMP 2910 * query packet) to compensate the change 2911 * in the ICMP id of the offending ICMP 2912 * packet. 2913 * 2914 * Since you modify orgicmp->icmp_id with 2915 * a delta (say x) and you compensate that 2916 * in origicmp->icmp_cksum with a delta 2917 * minus x, you don't have to adjust the 2918 * overall icmp->icmp_cksum 2919 */ 2920 2921 sum1 = ntohs(orgicmp->icmp_id); 2922 sum2 = ntohs(nat->nat_inport); 2923 CALC_SUMD(sum1, sum2, sumd); 2924 orgicmp->icmp_id = nat->nat_inport; 2925 fix_datacksum(&orgicmp->icmp_cksum, sumd); 2926 2927 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 2928 2929 break; 2930 2931 default : 2932 2933 break; 2934 2935 } /* switch (oip->ip_p) */ 2936 2937 /* 2938 * Step 3 2939 * Make the adjustments to icmp checksum. 2940 */ 2941 2942 if (sumd2 != 0) { 2943 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 2944 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 2945 fix_incksum(&icmp->icmp_cksum, sumd2); 2946 } 2947 return nat; 2948 } 2949 2950 2951 /* 2952 * NB: these lookups don't lock access to the list, it assumed that it has 2953 * already been done! 2954 */ 2955 2956 /* ------------------------------------------------------------------------ */ 2957 /* Function: nat_inlookup */ 2958 /* Returns: nat_t* - NULL == no match, */ 2959 /* else pointer to matching NAT entry */ 2960 /* Parameters: fin(I) - pointer to packet information */ 2961 /* flags(I) - NAT flags for this packet */ 2962 /* p(I) - protocol for this packet */ 2963 /* src(I) - source IP address */ 2964 /* mapdst(I) - destination IP address */ 2965 /* */ 2966 /* Lookup a nat entry based on the mapped destination ip address/port and */ 2967 /* real source address/port. We use this lookup when receiving a packet, */ 2968 /* we're looking for a table entry, based on the destination address. */ 2969 /* */ 2970 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 2971 /* */ 2972 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 2973 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 2974 /* */ 2975 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 2976 /* the packet is of said protocol */ 2977 /* ------------------------------------------------------------------------ */ 2978 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 2979 fr_info_t *fin; 2980 u_int flags, p; 2981 struct in_addr src , mapdst; 2982 { 2983 u_short sport, dport; 2984 ipnat_t *ipn; 2985 u_int sflags; 2986 nat_t *nat; 2987 int nflags; 2988 u_32_t dst; 2989 void *ifp; 2990 u_int hv; 2991 ipf_stack_t *ifs = fin->fin_ifs; 2992 2993 if (fin != NULL) 2994 ifp = fin->fin_ifp; 2995 else 2996 ifp = NULL; 2997 sport = 0; 2998 dport = 0; 2999 dst = mapdst.s_addr; 3000 sflags = flags & NAT_TCPUDPICMP; 3001 3002 switch (p) 3003 { 3004 case IPPROTO_TCP : 3005 case IPPROTO_UDP : 3006 sport = htons(fin->fin_data[0]); 3007 dport = htons(fin->fin_data[1]); 3008 break; 3009 case IPPROTO_ICMP : 3010 if (flags & IPN_ICMPERR) 3011 sport = fin->fin_data[1]; 3012 else 3013 dport = fin->fin_data[1]; 3014 break; 3015 default : 3016 break; 3017 } 3018 3019 3020 if ((flags & SI_WILDP) != 0) 3021 goto find_in_wild_ports; 3022 3023 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3024 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3025 nat = ifs->ifs_nat_table[1][hv]; 3026 for (; nat; nat = nat->nat_hnext[1]) { 3027 if (nat->nat_ifps[0] != NULL) { 3028 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3029 continue; 3030 } else if (ifp != NULL) 3031 nat->nat_ifps[0] = ifp; 3032 3033 nflags = nat->nat_flags; 3034 3035 if (nat->nat_oip.s_addr == src.s_addr && 3036 nat->nat_outip.s_addr == dst && 3037 (((p == 0) && 3038 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3039 || (p == nat->nat_p))) { 3040 switch (p) 3041 { 3042 #if 0 3043 case IPPROTO_GRE : 3044 if (nat->nat_call[1] != fin->fin_data[0]) 3045 continue; 3046 break; 3047 #endif 3048 case IPPROTO_ICMP : 3049 if ((flags & IPN_ICMPERR) != 0) { 3050 if (nat->nat_outport != sport) 3051 continue; 3052 } else { 3053 if (nat->nat_outport != dport) 3054 continue; 3055 } 3056 break; 3057 case IPPROTO_TCP : 3058 case IPPROTO_UDP : 3059 if (nat->nat_oport != sport) 3060 continue; 3061 if (nat->nat_outport != dport) 3062 continue; 3063 break; 3064 default : 3065 break; 3066 } 3067 3068 ipn = nat->nat_ptr; 3069 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3070 if (appr_match(fin, nat) != 0) 3071 continue; 3072 return nat; 3073 } 3074 } 3075 3076 /* 3077 * So if we didn't find it but there are wildcard members in the hash 3078 * table, go back and look for them. We do this search and update here 3079 * because it is modifying the NAT table and we want to do this only 3080 * for the first packet that matches. The exception, of course, is 3081 * for "dummy" (FI_IGNORE) lookups. 3082 */ 3083 find_in_wild_ports: 3084 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3085 return NULL; 3086 if (ifs->ifs_nat_stats.ns_wilds == 0) 3087 return NULL; 3088 3089 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3090 3091 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3092 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3093 3094 WRITE_ENTER(&ifs->ifs_ipf_nat); 3095 3096 nat = ifs->ifs_nat_table[1][hv]; 3097 for (; nat; nat = nat->nat_hnext[1]) { 3098 if (nat->nat_ifps[0] != NULL) { 3099 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3100 continue; 3101 } else if (ifp != NULL) 3102 nat->nat_ifps[0] = ifp; 3103 3104 if (nat->nat_p != fin->fin_p) 3105 continue; 3106 if (nat->nat_oip.s_addr != src.s_addr || 3107 nat->nat_outip.s_addr != dst) 3108 continue; 3109 3110 nflags = nat->nat_flags; 3111 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3112 continue; 3113 3114 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3115 NAT_INBOUND) == 1) { 3116 if ((fin->fin_flx & FI_IGNORE) != 0) 3117 break; 3118 if ((nflags & SI_CLONE) != 0) { 3119 nat = fr_natclone(fin, nat); 3120 if (nat == NULL) 3121 break; 3122 } else { 3123 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3124 ifs->ifs_nat_stats.ns_wilds--; 3125 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3126 } 3127 nat->nat_oport = sport; 3128 nat->nat_outport = dport; 3129 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3130 nat_tabmove(nat, ifs); 3131 break; 3132 } 3133 } 3134 3135 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3136 3137 return nat; 3138 } 3139 3140 3141 /* ------------------------------------------------------------------------ */ 3142 /* Function: nat_tabmove */ 3143 /* Returns: Nil */ 3144 /* Parameters: nat(I) - pointer to NAT structure */ 3145 /* Write Lock: ipf_nat */ 3146 /* */ 3147 /* This function is only called for TCP/UDP NAT table entries where the */ 3148 /* original was placed in the table without hashing on the ports and we now */ 3149 /* want to include hashing on port numbers. */ 3150 /* ------------------------------------------------------------------------ */ 3151 static void nat_tabmove(nat, ifs) 3152 nat_t *nat; 3153 ipf_stack_t *ifs; 3154 { 3155 nat_t **natp; 3156 u_int hv; 3157 3158 if (nat->nat_flags & SI_CLONE) 3159 return; 3160 3161 /* 3162 * Remove the NAT entry from the old location 3163 */ 3164 if (nat->nat_hnext[0]) 3165 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3166 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3167 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3168 3169 if (nat->nat_hnext[1]) 3170 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3171 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3172 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3173 3174 /* 3175 * Add into the NAT table in the new position 3176 */ 3177 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3178 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3179 ifs->ifs_ipf_nattable_sz); 3180 nat->nat_hv[0] = hv; 3181 natp = &ifs->ifs_nat_table[0][hv]; 3182 if (*natp) 3183 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3184 nat->nat_phnext[0] = natp; 3185 nat->nat_hnext[0] = *natp; 3186 *natp = nat; 3187 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3188 3189 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3190 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3191 ifs->ifs_ipf_nattable_sz); 3192 nat->nat_hv[1] = hv; 3193 natp = &ifs->ifs_nat_table[1][hv]; 3194 if (*natp) 3195 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3196 nat->nat_phnext[1] = natp; 3197 nat->nat_hnext[1] = *natp; 3198 *natp = nat; 3199 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3200 } 3201 3202 3203 /* ------------------------------------------------------------------------ */ 3204 /* Function: nat_outlookup */ 3205 /* Returns: nat_t* - NULL == no match, */ 3206 /* else pointer to matching NAT entry */ 3207 /* Parameters: fin(I) - pointer to packet information */ 3208 /* flags(I) - NAT flags for this packet */ 3209 /* p(I) - protocol for this packet */ 3210 /* src(I) - source IP address */ 3211 /* dst(I) - destination IP address */ 3212 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3213 /* */ 3214 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3215 /* destination address/port. We use this lookup when sending a packet out, */ 3216 /* we're looking for a table entry, based on the source address. */ 3217 /* */ 3218 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3219 /* */ 3220 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3221 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3222 /* */ 3223 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3224 /* the packet is of said protocol */ 3225 /* ------------------------------------------------------------------------ */ 3226 nat_t *nat_outlookup(fin, flags, p, src, dst) 3227 fr_info_t *fin; 3228 u_int flags, p; 3229 struct in_addr src , dst; 3230 { 3231 u_short sport, dport; 3232 u_int sflags; 3233 ipnat_t *ipn; 3234 u_32_t srcip; 3235 nat_t *nat; 3236 int nflags; 3237 void *ifp; 3238 u_int hv; 3239 ipf_stack_t *ifs = fin->fin_ifs; 3240 3241 ifp = fin->fin_ifp; 3242 3243 srcip = src.s_addr; 3244 sflags = flags & IPN_TCPUDPICMP; 3245 sport = 0; 3246 dport = 0; 3247 3248 switch (p) 3249 { 3250 case IPPROTO_TCP : 3251 case IPPROTO_UDP : 3252 sport = htons(fin->fin_data[0]); 3253 dport = htons(fin->fin_data[1]); 3254 break; 3255 case IPPROTO_ICMP : 3256 if (flags & IPN_ICMPERR) 3257 sport = fin->fin_data[1]; 3258 else 3259 dport = fin->fin_data[1]; 3260 break; 3261 default : 3262 break; 3263 } 3264 3265 if ((flags & SI_WILDP) != 0) 3266 goto find_out_wild_ports; 3267 3268 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3269 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3270 nat = ifs->ifs_nat_table[0][hv]; 3271 for (; nat; nat = nat->nat_hnext[0]) { 3272 if (nat->nat_ifps[1] != NULL) { 3273 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3274 continue; 3275 } else if (ifp != NULL) 3276 nat->nat_ifps[1] = ifp; 3277 3278 nflags = nat->nat_flags; 3279 3280 if (nat->nat_inip.s_addr == srcip && 3281 nat->nat_oip.s_addr == dst.s_addr && 3282 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3283 || (p == nat->nat_p))) { 3284 switch (p) 3285 { 3286 #if 0 3287 case IPPROTO_GRE : 3288 if (nat->nat_call[1] != fin->fin_data[0]) 3289 continue; 3290 break; 3291 #endif 3292 case IPPROTO_TCP : 3293 case IPPROTO_UDP : 3294 if (nat->nat_oport != dport) 3295 continue; 3296 if (nat->nat_inport != sport) 3297 continue; 3298 break; 3299 default : 3300 break; 3301 } 3302 3303 ipn = nat->nat_ptr; 3304 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3305 if (appr_match(fin, nat) != 0) 3306 continue; 3307 return nat; 3308 } 3309 } 3310 3311 /* 3312 * So if we didn't find it but there are wildcard members in the hash 3313 * table, go back and look for them. We do this search and update here 3314 * because it is modifying the NAT table and we want to do this only 3315 * for the first packet that matches. The exception, of course, is 3316 * for "dummy" (FI_IGNORE) lookups. 3317 */ 3318 find_out_wild_ports: 3319 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3320 return NULL; 3321 if (ifs->ifs_nat_stats.ns_wilds == 0) 3322 return NULL; 3323 3324 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3325 3326 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3327 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3328 3329 WRITE_ENTER(&ifs->ifs_ipf_nat); 3330 3331 nat = ifs->ifs_nat_table[0][hv]; 3332 for (; nat; nat = nat->nat_hnext[0]) { 3333 if (nat->nat_ifps[1] != NULL) { 3334 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3335 continue; 3336 } else if (ifp != NULL) 3337 nat->nat_ifps[1] = ifp; 3338 3339 if (nat->nat_p != fin->fin_p) 3340 continue; 3341 if ((nat->nat_inip.s_addr != srcip) || 3342 (nat->nat_oip.s_addr != dst.s_addr)) 3343 continue; 3344 3345 nflags = nat->nat_flags; 3346 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3347 continue; 3348 3349 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3350 NAT_OUTBOUND) == 1) { 3351 if ((fin->fin_flx & FI_IGNORE) != 0) 3352 break; 3353 if ((nflags & SI_CLONE) != 0) { 3354 nat = fr_natclone(fin, nat); 3355 if (nat == NULL) 3356 break; 3357 } else { 3358 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3359 ifs->ifs_nat_stats.ns_wilds--; 3360 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3361 } 3362 nat->nat_inport = sport; 3363 nat->nat_oport = dport; 3364 if (nat->nat_outport == 0) 3365 nat->nat_outport = sport; 3366 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3367 nat_tabmove(nat, ifs); 3368 break; 3369 } 3370 } 3371 3372 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3373 3374 return nat; 3375 } 3376 3377 3378 /* ------------------------------------------------------------------------ */ 3379 /* Function: nat_lookupredir */ 3380 /* Returns: nat_t* - NULL == no match, */ 3381 /* else pointer to matching NAT entry */ 3382 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3383 /* entry for. */ 3384 /* */ 3385 /* Lookup the NAT tables to search for a matching redirect */ 3386 /* ------------------------------------------------------------------------ */ 3387 nat_t *nat_lookupredir(np, ifs) 3388 natlookup_t *np; 3389 ipf_stack_t *ifs; 3390 { 3391 fr_info_t fi; 3392 nat_t *nat; 3393 3394 bzero((char *)&fi, sizeof(fi)); 3395 if (np->nl_flags & IPN_IN) { 3396 fi.fin_data[0] = ntohs(np->nl_realport); 3397 fi.fin_data[1] = ntohs(np->nl_outport); 3398 } else { 3399 fi.fin_data[0] = ntohs(np->nl_inport); 3400 fi.fin_data[1] = ntohs(np->nl_outport); 3401 } 3402 if (np->nl_flags & IPN_TCP) 3403 fi.fin_p = IPPROTO_TCP; 3404 else if (np->nl_flags & IPN_UDP) 3405 fi.fin_p = IPPROTO_UDP; 3406 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3407 fi.fin_p = IPPROTO_ICMP; 3408 3409 fi.fin_ifs = ifs; 3410 /* 3411 * We can do two sorts of lookups: 3412 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3413 * - default: we have the `in' and `out' address, look for `real'. 3414 */ 3415 if (np->nl_flags & IPN_IN) { 3416 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3417 np->nl_realip, np->nl_outip))) { 3418 np->nl_inip = nat->nat_inip; 3419 np->nl_inport = nat->nat_inport; 3420 } 3421 } else { 3422 /* 3423 * If nl_inip is non null, this is a lookup based on the real 3424 * ip address. Else, we use the fake. 3425 */ 3426 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3427 np->nl_inip, np->nl_outip))) { 3428 3429 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3430 fr_info_t fin; 3431 bzero((char *)&fin, sizeof(fin)); 3432 fin.fin_p = nat->nat_p; 3433 fin.fin_data[0] = ntohs(nat->nat_outport); 3434 fin.fin_data[1] = ntohs(nat->nat_oport); 3435 fin.fin_ifs = ifs; 3436 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3437 nat->nat_outip, 3438 nat->nat_oip) != NULL) { 3439 np->nl_flags &= ~IPN_FINDFORWARD; 3440 } 3441 } 3442 3443 np->nl_realip = nat->nat_outip; 3444 np->nl_realport = nat->nat_outport; 3445 } 3446 } 3447 3448 return nat; 3449 } 3450 3451 3452 /* ------------------------------------------------------------------------ */ 3453 /* Function: nat_match */ 3454 /* Returns: int - 0 == no match, 1 == match */ 3455 /* Parameters: fin(I) - pointer to packet information */ 3456 /* np(I) - pointer to NAT rule */ 3457 /* */ 3458 /* Pull the matching of a packet against a NAT rule out of that complex */ 3459 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3460 /* ------------------------------------------------------------------------ */ 3461 static int nat_match(fin, np) 3462 fr_info_t *fin; 3463 ipnat_t *np; 3464 { 3465 frtuc_t *ft; 3466 3467 if (fin->fin_v != 4) 3468 return 0; 3469 3470 if (np->in_p && fin->fin_p != np->in_p) 3471 return 0; 3472 3473 if (fin->fin_out) { 3474 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3475 return 0; 3476 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3477 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3478 return 0; 3479 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3480 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3481 return 0; 3482 } else { 3483 if (!(np->in_redir & NAT_REDIRECT)) 3484 return 0; 3485 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3486 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3487 return 0; 3488 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3489 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3490 return 0; 3491 } 3492 3493 ft = &np->in_tuc; 3494 if (!(fin->fin_flx & FI_TCPUDP) || 3495 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3496 if (ft->ftu_scmp || ft->ftu_dcmp) 3497 return 0; 3498 return 1; 3499 } 3500 3501 return fr_tcpudpchk(fin, ft); 3502 } 3503 3504 3505 /* ------------------------------------------------------------------------ */ 3506 /* Function: nat_update */ 3507 /* Returns: Nil */ 3508 /* Parameters: nat(I) - pointer to NAT structure */ 3509 /* np(I) - pointer to NAT rule */ 3510 /* */ 3511 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3512 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3513 /* ------------------------------------------------------------------------ */ 3514 void nat_update(fin, nat, np) 3515 fr_info_t *fin; 3516 nat_t *nat; 3517 ipnat_t *np; 3518 { 3519 ipftq_t *ifq, *ifq2; 3520 ipftqent_t *tqe; 3521 ipf_stack_t *ifs = fin->fin_ifs; 3522 3523 MUTEX_ENTER(&nat->nat_lock); 3524 tqe = &nat->nat_tqe; 3525 ifq = tqe->tqe_ifq; 3526 3527 /* 3528 * We allow over-riding of NAT timeouts from NAT rules, even for 3529 * TCP, however, if it is TCP and there is no rule timeout set, 3530 * then do not update the timeout here. 3531 */ 3532 if (np != NULL) 3533 ifq2 = np->in_tqehead[fin->fin_rev]; 3534 else 3535 ifq2 = NULL; 3536 3537 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3538 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3539 } else { 3540 if (ifq2 == NULL) { 3541 if (nat->nat_p == IPPROTO_UDP) 3542 ifq2 = &ifs->ifs_nat_udptq; 3543 else if (nat->nat_p == IPPROTO_ICMP) 3544 ifq2 = &ifs->ifs_nat_icmptq; 3545 else 3546 ifq2 = &ifs->ifs_nat_iptq; 3547 } 3548 3549 fr_movequeue(tqe, ifq, ifq2, ifs); 3550 } 3551 MUTEX_EXIT(&nat->nat_lock); 3552 } 3553 3554 3555 /* ------------------------------------------------------------------------ */ 3556 /* Function: fr_checknatout */ 3557 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3558 /* 0 == no packet translation occurred, */ 3559 /* 1 == packet was successfully translated. */ 3560 /* Parameters: fin(I) - pointer to packet information */ 3561 /* passp(I) - pointer to filtering result flags */ 3562 /* */ 3563 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3564 /* first checked to see if they match an existing entry (if an error), */ 3565 /* otherwise a search of the current NAT table is made. If neither results */ 3566 /* in a match then a search for a matching NAT rule is made. Create a new */ 3567 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3568 /* packet header(s) as required. */ 3569 /* ------------------------------------------------------------------------ */ 3570 int fr_checknatout(fin, passp) 3571 fr_info_t *fin; 3572 u_32_t *passp; 3573 { 3574 struct ifnet *ifp, *sifp; 3575 icmphdr_t *icmp = NULL; 3576 tcphdr_t *tcp = NULL; 3577 int rval, natfailed; 3578 ipnat_t *np = NULL; 3579 u_int nflags = 0; 3580 u_32_t ipa, iph; 3581 int natadd = 1; 3582 frentry_t *fr; 3583 nat_t *nat; 3584 ipf_stack_t *ifs = fin->fin_ifs; 3585 3586 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3587 return 0; 3588 3589 natfailed = 0; 3590 fr = fin->fin_fr; 3591 sifp = fin->fin_ifp; 3592 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3593 fr->fr_tifs[fin->fin_rev].fd_ifp && 3594 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3595 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3596 ifp = fin->fin_ifp; 3597 3598 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3599 switch (fin->fin_p) 3600 { 3601 case IPPROTO_TCP : 3602 nflags = IPN_TCP; 3603 break; 3604 case IPPROTO_UDP : 3605 nflags = IPN_UDP; 3606 break; 3607 case IPPROTO_ICMP : 3608 icmp = fin->fin_dp; 3609 3610 /* 3611 * This is an incoming packet, so the destination is 3612 * the icmp_id and the source port equals 0 3613 */ 3614 if (nat_icmpquerytype4(icmp->icmp_type)) 3615 nflags = IPN_ICMPQUERY; 3616 break; 3617 default : 3618 break; 3619 } 3620 3621 if ((nflags & IPN_TCPUDP)) 3622 tcp = fin->fin_dp; 3623 } 3624 3625 ipa = fin->fin_saddr; 3626 3627 READ_ENTER(&ifs->ifs_ipf_nat); 3628 3629 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3630 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3631 /*EMPTY*/; 3632 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3633 natadd = 0; 3634 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3635 fin->fin_src, fin->fin_dst))) { 3636 nflags = nat->nat_flags; 3637 } else { 3638 u_32_t hv, msk, nmsk; 3639 3640 /* 3641 * If there is no current entry in the nat table for this IP#, 3642 * create one for it (if there is a matching rule). 3643 */ 3644 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3645 msk = 0xffffffff; 3646 nmsk = ifs->ifs_nat_masks; 3647 WRITE_ENTER(&ifs->ifs_ipf_nat); 3648 maskloop: 3649 iph = ipa & htonl(msk); 3650 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3651 for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext) 3652 { 3653 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3654 continue; 3655 if (np->in_v != fin->fin_v) 3656 continue; 3657 if (np->in_p && (np->in_p != fin->fin_p)) 3658 continue; 3659 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3660 continue; 3661 if (np->in_flags & IPN_FILTER) { 3662 if (!nat_match(fin, np)) 3663 continue; 3664 } else if ((ipa & np->in_inmsk) != np->in_inip) 3665 continue; 3666 3667 if ((fr != NULL) && 3668 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3669 continue; 3670 3671 if (*np->in_plabel != '\0') { 3672 if (((np->in_flags & IPN_FILTER) == 0) && 3673 (np->in_dport != tcp->th_dport)) 3674 continue; 3675 if (appr_ok(fin, tcp, np) == 0) 3676 continue; 3677 } 3678 3679 if ((nat = nat_new(fin, np, NULL, nflags, 3680 NAT_OUTBOUND))) { 3681 np->in_hits++; 3682 break; 3683 } else 3684 natfailed = -1; 3685 } 3686 if ((np == NULL) && (nmsk != 0)) { 3687 while (nmsk) { 3688 msk <<= 1; 3689 if (nmsk & 0x80000000) 3690 break; 3691 nmsk <<= 1; 3692 } 3693 if (nmsk != 0) { 3694 nmsk <<= 1; 3695 goto maskloop; 3696 } 3697 } 3698 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3699 } 3700 3701 if (nat != NULL) { 3702 rval = fr_natout(fin, nat, natadd, nflags); 3703 if (rval == 1) { 3704 MUTEX_ENTER(&nat->nat_lock); 3705 nat->nat_ref++; 3706 MUTEX_EXIT(&nat->nat_lock); 3707 nat->nat_touched = ifs->ifs_fr_ticks; 3708 fin->fin_nat = nat; 3709 } 3710 } else 3711 rval = natfailed; 3712 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3713 3714 if (rval == -1) { 3715 if (passp != NULL) 3716 *passp = FR_BLOCK; 3717 fin->fin_flx |= FI_BADNAT; 3718 } 3719 fin->fin_ifp = sifp; 3720 return rval; 3721 } 3722 3723 /* ------------------------------------------------------------------------ */ 3724 /* Function: fr_natout */ 3725 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3726 /* 1 == packet was successfully translated. */ 3727 /* Parameters: fin(I) - pointer to packet information */ 3728 /* nat(I) - pointer to NAT structure */ 3729 /* natadd(I) - flag indicating if it is safe to add frag cache */ 3730 /* nflags(I) - NAT flags set for this packet */ 3731 /* */ 3732 /* Translate a packet coming "out" on an interface. */ 3733 /* ------------------------------------------------------------------------ */ 3734 int fr_natout(fin, nat, natadd, nflags) 3735 fr_info_t *fin; 3736 nat_t *nat; 3737 int natadd; 3738 u_32_t nflags; 3739 { 3740 icmphdr_t *icmp; 3741 u_short *csump; 3742 u_32_t sumd; 3743 tcphdr_t *tcp; 3744 ipnat_t *np; 3745 int i; 3746 ipf_stack_t *ifs = fin->fin_ifs; 3747 3748 #if SOLARIS && defined(_KERNEL) 3749 net_data_t net_data_p; 3750 if (fin->fin_v == 4) 3751 net_data_p = ifs->ifs_ipf_ipv4; 3752 else 3753 net_data_p = ifs->ifs_ipf_ipv6; 3754 #endif 3755 3756 tcp = NULL; 3757 icmp = NULL; 3758 csump = NULL; 3759 np = nat->nat_ptr; 3760 3761 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 3762 (void) fr_nat_newfrag(fin, 0, nat); 3763 3764 MUTEX_ENTER(&nat->nat_lock); 3765 nat->nat_bytes[1] += fin->fin_plen; 3766 nat->nat_pkts[1]++; 3767 MUTEX_EXIT(&nat->nat_lock); 3768 3769 /* 3770 * Fix up checksums, not by recalculating them, but 3771 * simply computing adjustments. 3772 * This is only done for STREAMS based IP implementations where the 3773 * checksum has already been calculated by IP. In all other cases, 3774 * IPFilter is called before the checksum needs calculating so there 3775 * is no call to modify whatever is in the header now. 3776 */ 3777 ASSERT(fin->fin_m != NULL); 3778 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 3779 if (nflags == IPN_ICMPERR) { 3780 u_32_t s1, s2; 3781 3782 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 3783 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 3784 CALC_SUMD(s1, s2, sumd); 3785 3786 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 3787 } 3788 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 3789 defined(linux) || defined(BRIDGE_IPF) 3790 else { 3791 /* 3792 * Strictly speaking, this isn't necessary on BSD 3793 * kernels because they do checksum calculation after 3794 * this code has run BUT if ipfilter is being used 3795 * to do NAT as a bridge, that code doesn't exist. 3796 */ 3797 if (nat->nat_dir == NAT_OUTBOUND) 3798 fix_outcksum(&fin->fin_ip->ip_sum, 3799 nat->nat_ipsumd); 3800 else 3801 fix_incksum(&fin->fin_ip->ip_sum, 3802 nat->nat_ipsumd); 3803 } 3804 #endif 3805 } 3806 3807 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3808 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 3809 tcp = fin->fin_dp; 3810 3811 tcp->th_sport = nat->nat_outport; 3812 fin->fin_data[0] = ntohs(nat->nat_outport); 3813 } 3814 3815 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 3816 icmp = fin->fin_dp; 3817 icmp->icmp_id = nat->nat_outport; 3818 } 3819 3820 csump = nat_proto(fin, nat, nflags); 3821 } 3822 3823 fin->fin_ip->ip_src = nat->nat_outip; 3824 3825 nat_update(fin, nat, np); 3826 3827 /* 3828 * The above comments do not hold for layer 4 (or higher) checksums... 3829 */ 3830 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 3831 if (nflags & IPN_TCPUDP && 3832 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 3833 sumd = nat->nat_sumd[1]; 3834 else 3835 sumd = nat->nat_sumd[0]; 3836 3837 if (nat->nat_dir == NAT_OUTBOUND) 3838 fix_outcksum(csump, sumd); 3839 else 3840 fix_incksum(csump, sumd); 3841 } 3842 #ifdef IPFILTER_SYNC 3843 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 3844 #endif 3845 /* ------------------------------------------------------------- */ 3846 /* A few quick notes: */ 3847 /* Following are test conditions prior to calling the */ 3848 /* appr_check routine. */ 3849 /* */ 3850 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 3851 /* with a redirect rule, we attempt to match the packet's */ 3852 /* source port against in_dport, otherwise we'd compare the */ 3853 /* packet's destination. */ 3854 /* ------------------------------------------------------------- */ 3855 if ((np != NULL) && (np->in_apr != NULL)) { 3856 i = appr_check(fin, nat); 3857 if (i == 0) 3858 i = 1; 3859 } else 3860 i = 1; 3861 ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]); 3862 fin->fin_flx |= FI_NATED; 3863 return i; 3864 } 3865 3866 3867 /* ------------------------------------------------------------------------ */ 3868 /* Function: fr_checknatin */ 3869 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3870 /* 0 == no packet translation occurred, */ 3871 /* 1 == packet was successfully translated. */ 3872 /* Parameters: fin(I) - pointer to packet information */ 3873 /* passp(I) - pointer to filtering result flags */ 3874 /* */ 3875 /* Check to see if an incoming packet should be changed. ICMP packets are */ 3876 /* first checked to see if they match an existing entry (if an error), */ 3877 /* otherwise a search of the current NAT table is made. If neither results */ 3878 /* in a match then a search for a matching NAT rule is made. Create a new */ 3879 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3880 /* packet header(s) as required. */ 3881 /* ------------------------------------------------------------------------ */ 3882 int fr_checknatin(fin, passp) 3883 fr_info_t *fin; 3884 u_32_t *passp; 3885 { 3886 u_int nflags, natadd; 3887 int rval, natfailed; 3888 struct ifnet *ifp; 3889 struct in_addr in; 3890 icmphdr_t *icmp; 3891 tcphdr_t *tcp; 3892 u_short dport; 3893 ipnat_t *np; 3894 nat_t *nat; 3895 u_32_t iph; 3896 ipf_stack_t *ifs = fin->fin_ifs; 3897 3898 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3899 return 0; 3900 3901 tcp = NULL; 3902 icmp = NULL; 3903 dport = 0; 3904 natadd = 1; 3905 nflags = 0; 3906 natfailed = 0; 3907 ifp = fin->fin_ifp; 3908 3909 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3910 switch (fin->fin_p) 3911 { 3912 case IPPROTO_TCP : 3913 nflags = IPN_TCP; 3914 break; 3915 case IPPROTO_UDP : 3916 nflags = IPN_UDP; 3917 break; 3918 case IPPROTO_ICMP : 3919 icmp = fin->fin_dp; 3920 3921 /* 3922 * This is an incoming packet, so the destination is 3923 * the icmp_id and the source port equals 0 3924 */ 3925 if (nat_icmpquerytype4(icmp->icmp_type)) { 3926 nflags = IPN_ICMPQUERY; 3927 dport = icmp->icmp_id; 3928 } break; 3929 default : 3930 break; 3931 } 3932 3933 if ((nflags & IPN_TCPUDP)) { 3934 tcp = fin->fin_dp; 3935 dport = tcp->th_dport; 3936 } 3937 } 3938 3939 in = fin->fin_dst; 3940 3941 READ_ENTER(&ifs->ifs_ipf_nat); 3942 3943 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3944 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 3945 /*EMPTY*/; 3946 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3947 natadd = 0; 3948 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3949 fin->fin_src, in))) { 3950 nflags = nat->nat_flags; 3951 } else { 3952 u_32_t hv, msk, rmsk; 3953 3954 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3955 rmsk = ifs->ifs_rdr_masks; 3956 msk = 0xffffffff; 3957 WRITE_ENTER(&ifs->ifs_ipf_nat); 3958 /* 3959 * If there is no current entry in the nat table for this IP#, 3960 * create one for it (if there is a matching rule). 3961 */ 3962 maskloop: 3963 iph = in.s_addr & htonl(msk); 3964 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 3965 for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) { 3966 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 3967 continue; 3968 if (np->in_v != fin->fin_v) 3969 continue; 3970 if (np->in_p && (np->in_p != fin->fin_p)) 3971 continue; 3972 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3973 continue; 3974 if (np->in_flags & IPN_FILTER) { 3975 if (!nat_match(fin, np)) 3976 continue; 3977 } else { 3978 if ((in.s_addr & np->in_outmsk) != np->in_outip) 3979 continue; 3980 if (np->in_pmin && 3981 ((ntohs(np->in_pmax) < ntohs(dport)) || 3982 (ntohs(dport) < ntohs(np->in_pmin)))) 3983 continue; 3984 } 3985 3986 if (*np->in_plabel != '\0') { 3987 if (!appr_ok(fin, tcp, np)) { 3988 continue; 3989 } 3990 } 3991 3992 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 3993 if (nat != NULL) { 3994 np->in_hits++; 3995 break; 3996 } else 3997 natfailed = -1; 3998 } 3999 4000 if ((np == NULL) && (rmsk != 0)) { 4001 while (rmsk) { 4002 msk <<= 1; 4003 if (rmsk & 0x80000000) 4004 break; 4005 rmsk <<= 1; 4006 } 4007 if (rmsk != 0) { 4008 rmsk <<= 1; 4009 goto maskloop; 4010 } 4011 } 4012 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4013 } 4014 if (nat != NULL) { 4015 rval = fr_natin(fin, nat, natadd, nflags); 4016 if (rval == 1) { 4017 MUTEX_ENTER(&nat->nat_lock); 4018 nat->nat_ref++; 4019 MUTEX_EXIT(&nat->nat_lock); 4020 nat->nat_touched = ifs->ifs_fr_ticks; 4021 fin->fin_nat = nat; 4022 fin->fin_state = nat->nat_state; 4023 } 4024 } else 4025 rval = natfailed; 4026 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4027 4028 if (rval == -1) { 4029 if (passp != NULL) 4030 *passp = FR_BLOCK; 4031 fin->fin_flx |= FI_BADNAT; 4032 } 4033 return rval; 4034 } 4035 4036 4037 /* ------------------------------------------------------------------------ */ 4038 /* Function: fr_natin */ 4039 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4040 /* 1 == packet was successfully translated. */ 4041 /* Parameters: fin(I) - pointer to packet information */ 4042 /* nat(I) - pointer to NAT structure */ 4043 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4044 /* nflags(I) - NAT flags set for this packet */ 4045 /* Locks Held: ipf_nat (READ) */ 4046 /* */ 4047 /* Translate a packet coming "in" on an interface. */ 4048 /* ------------------------------------------------------------------------ */ 4049 int fr_natin(fin, nat, natadd, nflags) 4050 fr_info_t *fin; 4051 nat_t *nat; 4052 int natadd; 4053 u_32_t nflags; 4054 { 4055 icmphdr_t *icmp; 4056 u_short *csump; 4057 tcphdr_t *tcp; 4058 ipnat_t *np; 4059 int i; 4060 ipf_stack_t *ifs = fin->fin_ifs; 4061 4062 #if SOLARIS && defined(_KERNEL) 4063 net_data_t net_data_p; 4064 if (fin->fin_v == 4) 4065 net_data_p = ifs->ifs_ipf_ipv4; 4066 else 4067 net_data_p = ifs->ifs_ipf_ipv6; 4068 #endif 4069 4070 tcp = NULL; 4071 csump = NULL; 4072 np = nat->nat_ptr; 4073 fin->fin_fr = nat->nat_fr; 4074 4075 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4076 (void) fr_nat_newfrag(fin, 0, nat); 4077 4078 if (np != NULL) { 4079 4080 /* ------------------------------------------------------------- */ 4081 /* A few quick notes: */ 4082 /* Following are test conditions prior to calling the */ 4083 /* appr_check routine. */ 4084 /* */ 4085 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4086 /* with a map rule, we attempt to match the packet's */ 4087 /* source port against in_dport, otherwise we'd compare the */ 4088 /* packet's destination. */ 4089 /* ------------------------------------------------------------- */ 4090 if (np->in_apr != NULL) { 4091 i = appr_check(fin, nat); 4092 if (i == -1) { 4093 return -1; 4094 } 4095 } 4096 } 4097 4098 #ifdef IPFILTER_SYNC 4099 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4100 #endif 4101 4102 MUTEX_ENTER(&nat->nat_lock); 4103 nat->nat_bytes[0] += fin->fin_plen; 4104 nat->nat_pkts[0]++; 4105 MUTEX_EXIT(&nat->nat_lock); 4106 4107 fin->fin_ip->ip_dst = nat->nat_inip; 4108 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4109 if (nflags & IPN_TCPUDP) 4110 tcp = fin->fin_dp; 4111 4112 /* 4113 * Fix up checksums, not by recalculating them, but 4114 * simply computing adjustments. 4115 * Why only do this for some platforms on inbound packets ? 4116 * Because for those that it is done, IP processing is yet to happen 4117 * and so the IPv4 header checksum has not yet been evaluated. 4118 * Perhaps it should always be done for the benefit of things like 4119 * fast forwarding (so that it doesn't need to be recomputed) but with 4120 * header checksum offloading, perhaps it is a moot point. 4121 */ 4122 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4123 defined(__osf__) || defined(linux) 4124 if (nat->nat_dir == NAT_OUTBOUND) 4125 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4126 else 4127 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4128 #endif 4129 4130 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4131 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4132 tcp->th_dport = nat->nat_inport; 4133 fin->fin_data[1] = ntohs(nat->nat_inport); 4134 } 4135 4136 4137 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4138 icmp = fin->fin_dp; 4139 4140 icmp->icmp_id = nat->nat_inport; 4141 } 4142 4143 csump = nat_proto(fin, nat, nflags); 4144 } 4145 4146 nat_update(fin, nat, np); 4147 4148 /* 4149 * In case they are being forwarded, inbound packets always need to have 4150 * their checksum adjusted even if hardware checksum validation said OK. 4151 */ 4152 if (csump != NULL) { 4153 if (nat->nat_dir == NAT_OUTBOUND) 4154 fix_incksum(csump, nat->nat_sumd[0]); 4155 else 4156 fix_outcksum(csump, nat->nat_sumd[0]); 4157 } 4158 4159 #if SOLARIS && defined(_KERNEL) 4160 if (nflags & IPN_TCPUDP && 4161 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4162 /* 4163 * Need to adjust the partial checksum result stored in 4164 * db_cksum16, which will be used for validation in IP. 4165 * See IP_CKSUM_RECV(). 4166 * Adjustment data should be the inverse of the IP address 4167 * changes, because db_cksum16 is supposed to be the complement 4168 * of the pesudo header. 4169 */ 4170 csump = &fin->fin_m->b_datap->db_cksum16; 4171 if (nat->nat_dir == NAT_OUTBOUND) 4172 fix_outcksum(csump, nat->nat_sumd[1]); 4173 else 4174 fix_incksum(csump, nat->nat_sumd[1]); 4175 } 4176 #endif 4177 4178 ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]); 4179 fin->fin_flx |= FI_NATED; 4180 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4181 fin->fin_nattag = &np->in_tag; 4182 return 1; 4183 } 4184 4185 4186 /* ------------------------------------------------------------------------ */ 4187 /* Function: nat_proto */ 4188 /* Returns: u_short* - pointer to transport header checksum to update, */ 4189 /* NULL if the transport protocol is not recognised */ 4190 /* as needing a checksum update. */ 4191 /* Parameters: fin(I) - pointer to packet information */ 4192 /* nat(I) - pointer to NAT structure */ 4193 /* nflags(I) - NAT flags set for this packet */ 4194 /* */ 4195 /* Return the pointer to the checksum field for each protocol so understood.*/ 4196 /* If support for making other changes to a protocol header is required, */ 4197 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4198 /* TCP down to a specific value, then do it from here. */ 4199 /* ------------------------------------------------------------------------ */ 4200 u_short *nat_proto(fin, nat, nflags) 4201 fr_info_t *fin; 4202 nat_t *nat; 4203 u_int nflags; 4204 { 4205 icmphdr_t *icmp; 4206 u_short *csump; 4207 tcphdr_t *tcp; 4208 udphdr_t *udp; 4209 4210 csump = NULL; 4211 if (fin->fin_out == 0) { 4212 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4213 } else { 4214 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4215 } 4216 4217 switch (fin->fin_p) 4218 { 4219 case IPPROTO_TCP : 4220 tcp = fin->fin_dp; 4221 4222 csump = &tcp->th_sum; 4223 4224 /* 4225 * Do a MSS CLAMPING on a SYN packet, 4226 * only deal IPv4 for now. 4227 */ 4228 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4229 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4230 4231 break; 4232 4233 case IPPROTO_UDP : 4234 udp = fin->fin_dp; 4235 4236 if (udp->uh_sum) 4237 csump = &udp->uh_sum; 4238 break; 4239 4240 case IPPROTO_ICMP : 4241 icmp = fin->fin_dp; 4242 4243 if ((nflags & IPN_ICMPQUERY) != 0) { 4244 if (icmp->icmp_cksum != 0) 4245 csump = &icmp->icmp_cksum; 4246 } 4247 break; 4248 } 4249 return csump; 4250 } 4251 4252 4253 /* ------------------------------------------------------------------------ */ 4254 /* Function: fr_natunload */ 4255 /* Returns: Nil */ 4256 /* Parameters: Nil */ 4257 /* */ 4258 /* Free all memory used by NAT structures allocated at runtime. */ 4259 /* ------------------------------------------------------------------------ */ 4260 void fr_natunload(ifs) 4261 ipf_stack_t *ifs; 4262 { 4263 ipftq_t *ifq, *ifqnext; 4264 4265 (void) nat_clearlist(ifs); 4266 (void) nat_flushtable(ifs); 4267 4268 /* 4269 * Proxy timeout queues are not cleaned here because although they 4270 * exist on the NAT list, appr_unload is called after fr_natunload 4271 * and the proxies actually are responsible for them being created. 4272 * Should the proxy timeouts have their own list? There's no real 4273 * justification as this is the only complication. 4274 */ 4275 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4276 ifqnext = ifq->ifq_next; 4277 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4278 (fr_deletetimeoutqueue(ifq) == 0)) 4279 fr_freetimeoutqueue(ifq, ifs); 4280 } 4281 4282 if (ifs->ifs_nat_table[0] != NULL) { 4283 KFREES(ifs->ifs_nat_table[0], 4284 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4285 ifs->ifs_nat_table[0] = NULL; 4286 } 4287 if (ifs->ifs_nat_table[1] != NULL) { 4288 KFREES(ifs->ifs_nat_table[1], 4289 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4290 ifs->ifs_nat_table[1] = NULL; 4291 } 4292 if (ifs->ifs_nat_rules != NULL) { 4293 KFREES(ifs->ifs_nat_rules, 4294 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4295 ifs->ifs_nat_rules = NULL; 4296 } 4297 if (ifs->ifs_rdr_rules != NULL) { 4298 KFREES(ifs->ifs_rdr_rules, 4299 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4300 ifs->ifs_rdr_rules = NULL; 4301 } 4302 if (ifs->ifs_maptable != NULL) { 4303 KFREES(ifs->ifs_maptable, 4304 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4305 ifs->ifs_maptable = NULL; 4306 } 4307 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4308 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4309 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4310 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4311 } 4312 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4313 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4314 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4315 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4316 } 4317 4318 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4319 ifs->ifs_fr_nat_maxbucket = 0; 4320 4321 if (ifs->ifs_fr_nat_init == 1) { 4322 ifs->ifs_fr_nat_init = 0; 4323 fr_sttab_destroy(ifs->ifs_nat_tqb); 4324 4325 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4326 RW_DESTROY(&ifs->ifs_ipf_nat); 4327 4328 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4329 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4330 4331 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4332 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4333 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4334 } 4335 } 4336 4337 4338 /* ------------------------------------------------------------------------ */ 4339 /* Function: fr_natexpire */ 4340 /* Returns: Nil */ 4341 /* Parameters: Nil */ 4342 /* */ 4343 /* Check all of the timeout queues for entries at the top which need to be */ 4344 /* expired. */ 4345 /* ------------------------------------------------------------------------ */ 4346 void fr_natexpire(ifs) 4347 ipf_stack_t *ifs; 4348 { 4349 ipftq_t *ifq, *ifqnext; 4350 ipftqent_t *tqe, *tqn; 4351 int i; 4352 SPL_INT(s); 4353 4354 SPL_NET(s); 4355 WRITE_ENTER(&ifs->ifs_ipf_nat); 4356 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4357 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4358 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4359 break; 4360 tqn = tqe->tqe_next; 4361 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4362 } 4363 } 4364 4365 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4366 ifqnext = ifq->ifq_next; 4367 4368 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4369 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4370 break; 4371 tqn = tqe->tqe_next; 4372 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4373 } 4374 } 4375 4376 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4377 ifqnext = ifq->ifq_next; 4378 4379 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4380 (ifq->ifq_ref == 0)) { 4381 fr_freetimeoutqueue(ifq, ifs); 4382 } 4383 } 4384 4385 if (ifs->ifs_nat_doflush != 0) { 4386 (void) nat_extraflush(2, ifs); 4387 ifs->ifs_nat_doflush = 0; 4388 } 4389 4390 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4391 SPL_X(s); 4392 } 4393 4394 4395 /* ------------------------------------------------------------------------ */ 4396 /* Function: fr_nataddrsync */ 4397 /* Returns: Nil */ 4398 /* Parameters: ifp(I) - pointer to network interface */ 4399 /* addr(I) - pointer to new network address */ 4400 /* */ 4401 /* Walk through all of the currently active NAT sessions, looking for those */ 4402 /* which need to have their translated address updated (where the interface */ 4403 /* matches the one passed in) and change it, recalculating the checksum sum */ 4404 /* difference too. */ 4405 /* ------------------------------------------------------------------------ */ 4406 void fr_nataddrsync(ifp, addr, ifs) 4407 void *ifp; 4408 struct in_addr *addr; 4409 ipf_stack_t *ifs; 4410 { 4411 u_32_t sum1, sum2, sumd; 4412 nat_t *nat; 4413 ipnat_t *np; 4414 SPL_INT(s); 4415 4416 if (ifs->ifs_fr_running <= 0) 4417 return; 4418 4419 SPL_NET(s); 4420 WRITE_ENTER(&ifs->ifs_ipf_nat); 4421 4422 if (ifs->ifs_fr_running <= 0) { 4423 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4424 return; 4425 } 4426 4427 /* 4428 * Change IP addresses for NAT sessions for any protocol except TCP 4429 * since it will break the TCP connection anyway. The only rules 4430 * which will get changed are those which are "map ... -> 0/32", 4431 * where the rule specifies the address is taken from the interface. 4432 */ 4433 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4434 if (addr != NULL) { 4435 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4436 ((nat->nat_flags & IPN_TCP) != 0)) 4437 continue; 4438 if (((np = nat->nat_ptr) == NULL) || 4439 (np->in_nip || (np->in_outmsk != 0xffffffff))) 4440 continue; 4441 4442 /* 4443 * Change the map-to address to be the same as the 4444 * new one. 4445 */ 4446 sum1 = nat->nat_outip.s_addr; 4447 nat->nat_outip = *addr; 4448 sum2 = nat->nat_outip.s_addr; 4449 4450 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4451 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) && 4452 (np->in_outmsk == 0xffffffff) && !np->in_nip) { 4453 struct in_addr in; 4454 4455 /* 4456 * Change the map-to address to be the same as the 4457 * new one. 4458 */ 4459 sum1 = nat->nat_outip.s_addr; 4460 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4461 &in, NULL, ifs) != -1) 4462 nat->nat_outip = in; 4463 sum2 = nat->nat_outip.s_addr; 4464 } else { 4465 continue; 4466 } 4467 4468 if (sum1 == sum2) 4469 continue; 4470 /* 4471 * Readjust the checksum adjustment to take into 4472 * account the new IP#. 4473 */ 4474 CALC_SUMD(sum1, sum2, sumd); 4475 /* XXX - dont change for TCP when solaris does 4476 * hardware checksumming. 4477 */ 4478 sumd += nat->nat_sumd[0]; 4479 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4480 nat->nat_sumd[1] = nat->nat_sumd[0]; 4481 } 4482 4483 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4484 SPL_X(s); 4485 } 4486 4487 4488 /* ------------------------------------------------------------------------ */ 4489 /* Function: fr_natifpsync */ 4490 /* Returns: Nil */ 4491 /* Parameters: action(I) - how we are syncing */ 4492 /* ifp(I) - pointer to network interface */ 4493 /* name(I) - name of interface to sync to */ 4494 /* */ 4495 /* This function is used to resync the mapping of interface names and their */ 4496 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4497 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4498 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4499 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4500 /* there is no longer any interface associated with it. */ 4501 /* ------------------------------------------------------------------------ */ 4502 void fr_natifpsync(action, ifp, name, ifs) 4503 int action; 4504 void *ifp; 4505 char *name; 4506 ipf_stack_t *ifs; 4507 { 4508 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4509 int s; 4510 #endif 4511 nat_t *nat; 4512 ipnat_t *n; 4513 4514 if (ifs->ifs_fr_running <= 0) 4515 return; 4516 4517 SPL_NET(s); 4518 WRITE_ENTER(&ifs->ifs_ipf_nat); 4519 4520 if (ifs->ifs_fr_running <= 0) { 4521 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4522 return; 4523 } 4524 4525 switch (action) 4526 { 4527 case IPFSYNC_RESYNC : 4528 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4529 if ((ifp == nat->nat_ifps[0]) || 4530 (nat->nat_ifps[0] == (void *)-1)) { 4531 nat->nat_ifps[0] = 4532 fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 4533 } 4534 4535 if ((ifp == nat->nat_ifps[1]) || 4536 (nat->nat_ifps[1] == (void *)-1)) { 4537 nat->nat_ifps[1] = 4538 fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 4539 } 4540 } 4541 4542 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4543 if (n->in_ifps[0] == ifp || 4544 n->in_ifps[0] == (void *)-1) { 4545 n->in_ifps[0] = 4546 fr_resolvenic(n->in_ifnames[0], 4, ifs); 4547 } 4548 if (n->in_ifps[1] == ifp || 4549 n->in_ifps[1] == (void *)-1) { 4550 n->in_ifps[1] = 4551 fr_resolvenic(n->in_ifnames[1], 4, ifs); 4552 } 4553 } 4554 break; 4555 case IPFSYNC_NEWIFP : 4556 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4557 if (!strncmp(name, nat->nat_ifnames[0], 4558 sizeof(nat->nat_ifnames[0]))) 4559 nat->nat_ifps[0] = ifp; 4560 if (!strncmp(name, nat->nat_ifnames[1], 4561 sizeof(nat->nat_ifnames[1]))) 4562 nat->nat_ifps[1] = ifp; 4563 } 4564 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4565 if (!strncmp(name, n->in_ifnames[0], 4566 sizeof(n->in_ifnames[0]))) 4567 n->in_ifps[0] = ifp; 4568 if (!strncmp(name, n->in_ifnames[1], 4569 sizeof(n->in_ifnames[1]))) 4570 n->in_ifps[1] = ifp; 4571 } 4572 break; 4573 case IPFSYNC_OLDIFP : 4574 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4575 if (ifp == nat->nat_ifps[0]) 4576 nat->nat_ifps[0] = (void *)-1; 4577 if (ifp == nat->nat_ifps[1]) 4578 nat->nat_ifps[1] = (void *)-1; 4579 } 4580 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4581 if (n->in_ifps[0] == ifp) 4582 n->in_ifps[0] = (void *)-1; 4583 if (n->in_ifps[1] == ifp) 4584 n->in_ifps[1] = (void *)-1; 4585 } 4586 break; 4587 } 4588 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4589 SPL_X(s); 4590 } 4591 4592 4593 /* ------------------------------------------------------------------------ */ 4594 /* Function: nat_icmpquerytype4 */ 4595 /* Returns: int - 1 == success, 0 == failure */ 4596 /* Parameters: icmptype(I) - ICMP type number */ 4597 /* */ 4598 /* Tests to see if the ICMP type number passed is a query/response type or */ 4599 /* not. */ 4600 /* ------------------------------------------------------------------------ */ 4601 static INLINE int nat_icmpquerytype4(icmptype) 4602 int icmptype; 4603 { 4604 4605 /* 4606 * For the ICMP query NAT code, it is essential that both the query 4607 * and the reply match on the NAT rule. Because the NAT structure 4608 * does not keep track of the icmptype, and a single NAT structure 4609 * is used for all icmp types with the same src, dest and id, we 4610 * simply define the replies as queries as well. The funny thing is, 4611 * altough it seems silly to call a reply a query, this is exactly 4612 * as it is defined in the IPv4 specification 4613 */ 4614 4615 switch (icmptype) 4616 { 4617 4618 case ICMP_ECHOREPLY: 4619 case ICMP_ECHO: 4620 /* route aedvertisement/solliciation is currently unsupported: */ 4621 /* it would require rewriting the ICMP data section */ 4622 case ICMP_TSTAMP: 4623 case ICMP_TSTAMPREPLY: 4624 case ICMP_IREQ: 4625 case ICMP_IREQREPLY: 4626 case ICMP_MASKREQ: 4627 case ICMP_MASKREPLY: 4628 return 1; 4629 default: 4630 return 0; 4631 } 4632 } 4633 4634 4635 /* ------------------------------------------------------------------------ */ 4636 /* Function: nat_log */ 4637 /* Returns: Nil */ 4638 /* Parameters: nat(I) - pointer to NAT structure */ 4639 /* type(I) - type of log entry to create */ 4640 /* */ 4641 /* Creates a NAT log entry. */ 4642 /* ------------------------------------------------------------------------ */ 4643 void nat_log(nat, type, ifs) 4644 struct nat *nat; 4645 u_int type; 4646 ipf_stack_t *ifs; 4647 { 4648 #ifdef IPFILTER_LOG 4649 # ifndef LARGE_NAT 4650 struct ipnat *np; 4651 int rulen; 4652 # endif 4653 struct natlog natl; 4654 void *items[1]; 4655 size_t sizes[1]; 4656 int types[1]; 4657 4658 natl.nl_inip = nat->nat_inip; 4659 natl.nl_outip = nat->nat_outip; 4660 natl.nl_origip = nat->nat_oip; 4661 natl.nl_bytes[0] = nat->nat_bytes[0]; 4662 natl.nl_bytes[1] = nat->nat_bytes[1]; 4663 natl.nl_pkts[0] = nat->nat_pkts[0]; 4664 natl.nl_pkts[1] = nat->nat_pkts[1]; 4665 natl.nl_origport = nat->nat_oport; 4666 natl.nl_inport = nat->nat_inport; 4667 natl.nl_outport = nat->nat_outport; 4668 natl.nl_p = nat->nat_p; 4669 natl.nl_type = type; 4670 natl.nl_rule = -1; 4671 # ifndef LARGE_NAT 4672 if (nat->nat_ptr != NULL) { 4673 for (rulen = 0, np = ifs->ifs_nat_list; np; 4674 np = np->in_next, rulen++) 4675 if (np == nat->nat_ptr) { 4676 natl.nl_rule = rulen; 4677 break; 4678 } 4679 } 4680 # endif 4681 items[0] = &natl; 4682 sizes[0] = sizeof(natl); 4683 types[0] = 0; 4684 4685 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 4686 #endif 4687 } 4688 4689 4690 #if defined(__OpenBSD__) 4691 /* ------------------------------------------------------------------------ */ 4692 /* Function: nat_ifdetach */ 4693 /* Returns: Nil */ 4694 /* Parameters: ifp(I) - pointer to network interface */ 4695 /* */ 4696 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 4697 /* interface references within IPFilter. */ 4698 /* ------------------------------------------------------------------------ */ 4699 void nat_ifdetach(ifp, ifs) 4700 void *ifp; 4701 ipf_stack_t *ifs; 4702 { 4703 frsync(ifp, ifs); 4704 return; 4705 } 4706 #endif 4707 4708 4709 /* ------------------------------------------------------------------------ */ 4710 /* Function: fr_ipnatderef */ 4711 /* Returns: Nil */ 4712 /* Parameters: isp(I) - pointer to pointer to NAT rule */ 4713 /* Write Locks: ipf_nat */ 4714 /* */ 4715 /* ------------------------------------------------------------------------ */ 4716 void fr_ipnatderef(inp, ifs) 4717 ipnat_t **inp; 4718 ipf_stack_t *ifs; 4719 { 4720 ipnat_t *in; 4721 4722 in = *inp; 4723 *inp = NULL; 4724 in->in_space++; 4725 in->in_use--; 4726 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 4727 if (in->in_apr) 4728 appr_free(in->in_apr); 4729 KFREE(in); 4730 ifs->ifs_nat_stats.ns_rules--; 4731 #ifdef notdef 4732 #if SOLARIS 4733 if (ifs->ifs_nat_stats.ns_rules == 0) 4734 ifs->ifs_pfil_delayed_copy = 1; 4735 #endif 4736 #endif 4737 } 4738 } 4739 4740 4741 /* ------------------------------------------------------------------------ */ 4742 /* Function: fr_natderef */ 4743 /* Returns: Nil */ 4744 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 4745 /* */ 4746 /* Decrement the reference counter for this NAT table entry and free it if */ 4747 /* there are no more things using it. */ 4748 /* */ 4749 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 4750 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 4751 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 4752 /* because nat_delete() will do that and send nat_ref to -1. */ 4753 /* */ 4754 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 4755 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 4756 /* ------------------------------------------------------------------------ */ 4757 void fr_natderef(natp, ifs) 4758 nat_t **natp; 4759 ipf_stack_t *ifs; 4760 { 4761 nat_t *nat; 4762 4763 nat = *natp; 4764 *natp = NULL; 4765 4766 MUTEX_ENTER(&nat->nat_lock); 4767 if (nat->nat_ref > 1) { 4768 nat->nat_ref--; 4769 MUTEX_EXIT(&nat->nat_lock); 4770 return; 4771 } 4772 MUTEX_EXIT(&nat->nat_lock); 4773 4774 WRITE_ENTER(&ifs->ifs_ipf_nat); 4775 nat_delete(nat, NL_EXPIRE, ifs); 4776 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4777 } 4778 4779 4780 /* ------------------------------------------------------------------------ */ 4781 /* Function: fr_natclone */ 4782 /* Returns: ipstate_t* - NULL == cloning failed, */ 4783 /* else pointer to new state structure */ 4784 /* Parameters: fin(I) - pointer to packet information */ 4785 /* is(I) - pointer to master state structure */ 4786 /* Write Lock: ipf_nat */ 4787 /* */ 4788 /* Create a "duplcate" state table entry from the master. */ 4789 /* ------------------------------------------------------------------------ */ 4790 static nat_t *fr_natclone(fin, nat) 4791 fr_info_t *fin; 4792 nat_t *nat; 4793 { 4794 frentry_t *fr; 4795 nat_t *clone; 4796 ipnat_t *np; 4797 ipf_stack_t *ifs = fin->fin_ifs; 4798 4799 KMALLOC(clone, nat_t *); 4800 if (clone == NULL) 4801 return NULL; 4802 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 4803 4804 MUTEX_NUKE(&clone->nat_lock); 4805 4806 clone->nat_aps = NULL; 4807 /* 4808 * Initialize all these so that nat_delete() doesn't cause a crash. 4809 */ 4810 clone->nat_tqe.tqe_pnext = NULL; 4811 clone->nat_tqe.tqe_next = NULL; 4812 clone->nat_tqe.tqe_ifq = NULL; 4813 clone->nat_tqe.tqe_parent = clone; 4814 4815 clone->nat_flags &= ~SI_CLONE; 4816 clone->nat_flags |= SI_CLONED; 4817 4818 if (clone->nat_hm) 4819 clone->nat_hm->hm_ref++; 4820 4821 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 4822 KFREE(clone); 4823 return NULL; 4824 } 4825 np = clone->nat_ptr; 4826 if (np != NULL) { 4827 if (ifs->ifs_nat_logging) 4828 nat_log(clone, (u_int)np->in_redir, ifs); 4829 np->in_use++; 4830 } 4831 fr = clone->nat_fr; 4832 if (fr != NULL) { 4833 MUTEX_ENTER(&fr->fr_lock); 4834 fr->fr_ref++; 4835 MUTEX_EXIT(&fr->fr_lock); 4836 } 4837 4838 /* 4839 * Because the clone is created outside the normal loop of things and 4840 * TCP has special needs in terms of state, initialise the timeout 4841 * state of the new NAT from here. 4842 */ 4843 if (clone->nat_p == IPPROTO_TCP) { 4844 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 4845 clone->nat_flags); 4846 } 4847 #ifdef IPFILTER_SYNC 4848 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 4849 #endif 4850 if (ifs->ifs_nat_logging) 4851 nat_log(clone, NL_CLONE, ifs); 4852 return clone; 4853 } 4854 4855 4856 /* ------------------------------------------------------------------------ */ 4857 /* Function: nat_wildok */ 4858 /* Returns: int - 1 == packet's ports match wildcards */ 4859 /* 0 == packet's ports don't match wildcards */ 4860 /* Parameters: nat(I) - NAT entry */ 4861 /* sport(I) - source port */ 4862 /* dport(I) - destination port */ 4863 /* flags(I) - wildcard flags */ 4864 /* dir(I) - packet direction */ 4865 /* */ 4866 /* Use NAT entry and packet direction to determine which combination of */ 4867 /* wildcard flags should be used. */ 4868 /* ------------------------------------------------------------------------ */ 4869 static INLINE int nat_wildok(nat, sport, dport, flags, dir) 4870 nat_t *nat; 4871 int sport; 4872 int dport; 4873 int flags; 4874 int dir; 4875 { 4876 /* 4877 * When called by dir is set to 4878 * nat_inlookup NAT_INBOUND (0) 4879 * nat_outlookup NAT_OUTBOUND (1) 4880 * 4881 * We simply combine the packet's direction in dir with the original 4882 * "intended" direction of that NAT entry in nat->nat_dir to decide 4883 * which combination of wildcard flags to allow. 4884 */ 4885 4886 switch ((dir << 1) | nat->nat_dir) 4887 { 4888 case 3: /* outbound packet / outbound entry */ 4889 if (((nat->nat_inport == sport) || 4890 (flags & SI_W_SPORT)) && 4891 ((nat->nat_oport == dport) || 4892 (flags & SI_W_DPORT))) 4893 return 1; 4894 break; 4895 case 2: /* outbound packet / inbound entry */ 4896 if (((nat->nat_outport == sport) || 4897 (flags & SI_W_DPORT)) && 4898 ((nat->nat_oport == dport) || 4899 (flags & SI_W_SPORT))) 4900 return 1; 4901 break; 4902 case 1: /* inbound packet / outbound entry */ 4903 if (((nat->nat_oport == sport) || 4904 (flags & SI_W_DPORT)) && 4905 ((nat->nat_outport == dport) || 4906 (flags & SI_W_SPORT))) 4907 return 1; 4908 break; 4909 case 0: /* inbound packet / inbound entry */ 4910 if (((nat->nat_oport == sport) || 4911 (flags & SI_W_SPORT)) && 4912 ((nat->nat_outport == dport) || 4913 (flags & SI_W_DPORT))) 4914 return 1; 4915 break; 4916 default: 4917 break; 4918 } 4919 4920 return(0); 4921 } 4922 4923 4924 /* ------------------------------------------------------------------------ */ 4925 /* Function: nat_mssclamp */ 4926 /* Returns: Nil */ 4927 /* Parameters: tcp(I) - pointer to TCP header */ 4928 /* maxmss(I) - value to clamp the TCP MSS to */ 4929 /* csump(I) - pointer to TCP checksum */ 4930 /* */ 4931 /* Check for MSS option and clamp it if necessary. If found and changed, */ 4932 /* then the TCP header checksum will be updated to reflect the change in */ 4933 /* the MSS. */ 4934 /* ------------------------------------------------------------------------ */ 4935 static void nat_mssclamp(tcp, maxmss, csump) 4936 tcphdr_t *tcp; 4937 u_32_t maxmss; 4938 u_short *csump; 4939 { 4940 u_char *cp, *ep, opt; 4941 int hlen, advance; 4942 u_32_t mss, sumd; 4943 4944 hlen = TCP_OFF(tcp) << 2; 4945 if (hlen > sizeof(*tcp)) { 4946 cp = (u_char *)tcp + sizeof(*tcp); 4947 ep = (u_char *)tcp + hlen; 4948 4949 while (cp < ep) { 4950 opt = cp[0]; 4951 if (opt == TCPOPT_EOL) 4952 break; 4953 else if (opt == TCPOPT_NOP) { 4954 cp++; 4955 continue; 4956 } 4957 4958 if (cp + 1 >= ep) 4959 break; 4960 advance = cp[1]; 4961 if ((cp + advance > ep) || (advance <= 0)) 4962 break; 4963 switch (opt) 4964 { 4965 case TCPOPT_MAXSEG: 4966 if (advance != 4) 4967 break; 4968 mss = cp[2] * 256 + cp[3]; 4969 if (mss > maxmss) { 4970 cp[2] = maxmss / 256; 4971 cp[3] = maxmss & 0xff; 4972 CALC_SUMD(mss, maxmss, sumd); 4973 fix_outcksum(csump, sumd); 4974 } 4975 break; 4976 default: 4977 /* ignore unknown options */ 4978 break; 4979 } 4980 4981 cp += advance; 4982 } 4983 } 4984 } 4985 4986 4987 /* ------------------------------------------------------------------------ */ 4988 /* Function: fr_setnatqueue */ 4989 /* Returns: Nil */ 4990 /* Parameters: nat(I)- pointer to NAT structure */ 4991 /* rev(I) - forward(0) or reverse(1) direction */ 4992 /* Locks: ipf_nat (read or write) */ 4993 /* */ 4994 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 4995 /* determining which queue it should be placed on. */ 4996 /* ------------------------------------------------------------------------ */ 4997 void fr_setnatqueue(nat, rev, ifs) 4998 nat_t *nat; 4999 int rev; 5000 ipf_stack_t *ifs; 5001 { 5002 ipftq_t *oifq, *nifq; 5003 5004 if (nat->nat_ptr != NULL) 5005 nifq = nat->nat_ptr->in_tqehead[rev]; 5006 else 5007 nifq = NULL; 5008 5009 if (nifq == NULL) { 5010 switch (nat->nat_p) 5011 { 5012 case IPPROTO_UDP : 5013 nifq = &ifs->ifs_nat_udptq; 5014 break; 5015 case IPPROTO_ICMP : 5016 nifq = &ifs->ifs_nat_icmptq; 5017 break; 5018 case IPPROTO_TCP : 5019 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5020 break; 5021 default : 5022 nifq = &ifs->ifs_nat_iptq; 5023 break; 5024 } 5025 } 5026 5027 oifq = nat->nat_tqe.tqe_ifq; 5028 /* 5029 * If it's currently on a timeout queue, move it from one queue to 5030 * another, else put it on the end of the newly determined queue. 5031 */ 5032 if (oifq != NULL) 5033 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5034 else 5035 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5036 return; 5037 } 5038 5039 /* ------------------------------------------------------------------------ */ 5040 /* Function: nat_getnext */ 5041 /* Returns: int - 0 == ok, else error */ 5042 /* Parameters: t(I) - pointer to ipftoken structure */ 5043 /* itp(I) - pointer to ipfgeniter_t structure */ 5044 /* ifs - ipf stack instance */ 5045 /* */ 5046 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5047 /* and copy it out to the storage space pointed to by itp. The next item */ 5048 /* in the list to look at is put back in the ipftoken struture. */ 5049 /* ------------------------------------------------------------------------ */ 5050 static int nat_getnext(t, itp, ifs) 5051 ipftoken_t *t; 5052 ipfgeniter_t *itp; 5053 ipf_stack_t *ifs; 5054 { 5055 hostmap_t *hm, *nexthm = NULL, zerohm; 5056 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5057 nat_t *nat, *nextnat = NULL, zeronat; 5058 int error = 0, count; 5059 char *dst; 5060 5061 if (itp->igi_nitems == 0) 5062 return EINVAL; 5063 5064 READ_ENTER(&ifs->ifs_ipf_nat); 5065 5066 switch (itp->igi_type) 5067 { 5068 case IPFGENITER_HOSTMAP : 5069 hm = t->ipt_data; 5070 if (hm == NULL) { 5071 nexthm = ifs->ifs_ipf_hm_maplist; 5072 } else { 5073 nexthm = hm->hm_next; 5074 } 5075 break; 5076 5077 case IPFGENITER_IPNAT : 5078 ipn = t->ipt_data; 5079 if (ipn == NULL) { 5080 nextipnat = ifs->ifs_nat_list; 5081 } else { 5082 nextipnat = ipn->in_next; 5083 } 5084 break; 5085 5086 case IPFGENITER_NAT : 5087 nat = t->ipt_data; 5088 if (nat == NULL) { 5089 nextnat = ifs->ifs_nat_instances; 5090 } else { 5091 nextnat = nat->nat_next; 5092 } 5093 break; 5094 default : 5095 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5096 return EINVAL; 5097 } 5098 5099 dst = itp->igi_data; 5100 for (count = itp->igi_nitems; count > 0; count--) { 5101 switch (itp->igi_type) 5102 { 5103 case IPFGENITER_HOSTMAP : 5104 if (nexthm != NULL) { 5105 ATOMIC_INC32(nexthm->hm_ref); 5106 t->ipt_data = nexthm; 5107 } else { 5108 bzero(&zerohm, sizeof(zerohm)); 5109 nexthm = &zerohm; 5110 count = 1; 5111 t->ipt_data = NULL; 5112 } 5113 break; 5114 case IPFGENITER_IPNAT : 5115 if (nextipnat != NULL) { 5116 ATOMIC_INC32(nextipnat->in_use); 5117 t->ipt_data = nextipnat; 5118 } else { 5119 bzero(&zeroipn, sizeof(zeroipn)); 5120 nextipnat = &zeroipn; 5121 count = 1; 5122 t->ipt_data = NULL; 5123 } 5124 break; 5125 case IPFGENITER_NAT : 5126 if (nextnat != NULL) { 5127 MUTEX_ENTER(&nextnat->nat_lock); 5128 nextnat->nat_ref++; 5129 MUTEX_EXIT(&nextnat->nat_lock); 5130 t->ipt_data = nextnat; 5131 } else { 5132 bzero(&zeronat, sizeof(zeronat)); 5133 nextnat = &zeronat; 5134 count = 1; 5135 t->ipt_data = NULL; 5136 } 5137 break; 5138 default : 5139 break; 5140 } 5141 5142 /* 5143 * We can safely release our hold on ipf_nat. 5144 */ 5145 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5146 5147 switch (itp->igi_type) 5148 { 5149 case IPFGENITER_HOSTMAP : 5150 if (hm != NULL) { 5151 WRITE_ENTER(&ifs->ifs_ipf_nat); 5152 fr_hostmapdel(&hm); 5153 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5154 } 5155 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5156 if (error != 0) { 5157 error = EFAULT; 5158 } else { 5159 dst += sizeof(*nexthm); 5160 hm = nexthm; 5161 nexthm = nexthm->hm_next; 5162 } 5163 break; 5164 case IPFGENITER_IPNAT : 5165 if (ipn != NULL) { 5166 WRITE_ENTER(&ifs->ifs_ipf_nat); 5167 fr_ipnatderef(&ipn, ifs); 5168 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5169 } 5170 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5171 if (error != 0) { 5172 error = EFAULT; 5173 } else { 5174 dst += sizeof(*nextipnat); 5175 ipn = nextipnat; 5176 nextipnat = nextipnat->in_next; 5177 } 5178 break; 5179 case IPFGENITER_NAT : 5180 if (nat != NULL) { 5181 fr_natderef(&nat, ifs); 5182 } 5183 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5184 if (error != 0) { 5185 error = EFAULT; 5186 } else { 5187 dst += sizeof(*nextnat); 5188 nat = nextnat; 5189 nextnat = nextnat->nat_next; 5190 } 5191 break; 5192 default : 5193 break; 5194 } 5195 5196 if ((count == 1) || (error != 0)) 5197 break; 5198 5199 READ_ENTER(&ifs->ifs_ipf_nat); 5200 } 5201 5202 return error; 5203 } 5204 5205 5206 /* ------------------------------------------------------------------------ */ 5207 /* Function: nat_iterator */ 5208 /* Returns: int - 0 == ok, else error */ 5209 /* Parameters: token(I) - pointer to ipftoken structure */ 5210 /* itp(I) - pointer to ipfgeniter_t structure */ 5211 /* */ 5212 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5213 /* generic structure to iterate through a list. There are three different */ 5214 /* linked lists of NAT related information to go through: NAT rules, active */ 5215 /* NAT mappings and the NAT fragment cache. */ 5216 /* ------------------------------------------------------------------------ */ 5217 static int nat_iterator(token, itp, ifs) 5218 ipftoken_t *token; 5219 ipfgeniter_t *itp; 5220 ipf_stack_t *ifs; 5221 { 5222 int error; 5223 5224 if (itp->igi_data == NULL) 5225 return EFAULT; 5226 5227 token->ipt_subtype = itp->igi_type; 5228 5229 switch (itp->igi_type) 5230 { 5231 case IPFGENITER_HOSTMAP : 5232 case IPFGENITER_IPNAT : 5233 case IPFGENITER_NAT : 5234 error = nat_getnext(token, itp, ifs); 5235 break; 5236 case IPFGENITER_NATFRAG : 5237 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5238 &ifs->ifs_ipfr_nattail, 5239 &ifs->ifs_ipf_natfrag, ifs); 5240 break; 5241 default : 5242 error = EINVAL; 5243 break; 5244 } 5245 5246 return error; 5247 } 5248 5249 5250 /* -------------------------------------------------------------------- */ 5251 /* Function: nat_earlydrop */ 5252 /* Returns: number of dropped/removed entries from the queue */ 5253 /* Parameters: ifq - pointer to queue with entries to be processed */ 5254 /* maxidle - entry must be idle this long to be dropped */ 5255 /* ifs - ipf stack instance */ 5256 /* */ 5257 /* Function is invoked from nat_extraflush() only. Removes entries */ 5258 /* form specified timeout queue, based on how long they've sat idle, */ 5259 /* without waiting for it to happen on its own. */ 5260 /* -------------------------------------------------------------------- */ 5261 static int nat_earlydrop(ifq, maxidle, ifs) 5262 ipftq_t *ifq; 5263 int maxidle; 5264 ipf_stack_t *ifs; 5265 { 5266 ipftqent_t *tqe, *tqn; 5267 nat_t *nat; 5268 unsigned int dropped; 5269 int droptick; 5270 5271 if (ifq == NULL) 5272 return (0); 5273 5274 dropped = 0; 5275 5276 /* 5277 * Determine the tick representing the idle time we're interested 5278 * in. If an entry exists in the queue, and it was touched before 5279 * that tick, then it's been idle longer than maxidle ... remove it. 5280 */ 5281 droptick = ifs->ifs_fr_ticks - maxidle; 5282 tqn = ifq->ifq_head; 5283 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5284 tqn = tqe->tqe_next; 5285 nat = tqe->tqe_parent; 5286 nat_delete(nat, ISL_EXPIRE, ifs); 5287 dropped++; 5288 } 5289 return (dropped); 5290 } 5291 5292 5293 /* --------------------------------------------------------------------- */ 5294 /* Function: nat_flushclosing */ 5295 /* Returns: int - number of NAT entries deleted */ 5296 /* Parameters: stateval(I) - State at which to start removing entries */ 5297 /* ifs - ipf stack instance */ 5298 /* */ 5299 /* Remove nat table entries for TCP connections which are in the process */ 5300 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5301 /* --------------------------------------------------------------------- */ 5302 static int nat_flushclosing(stateval, ifs) 5303 int stateval; 5304 ipf_stack_t *ifs; 5305 { 5306 ipftq_t *ifq, *ifqn; 5307 ipftqent_t *tqe, *tqn; 5308 nat_t *nat; 5309 int dropped; 5310 5311 dropped = 0; 5312 5313 /* 5314 * Start by deleting any entries in specific timeout queues. 5315 */ 5316 ifqn = &ifs->ifs_nat_tqb[stateval]; 5317 while ((ifq = ifqn) != NULL) { 5318 ifqn = ifq->ifq_next; 5319 dropped += nat_earlydrop(ifq, (int)0, ifs); 5320 } 5321 5322 /* 5323 * Next, look through user defined queues for closing entries. 5324 */ 5325 ifqn = ifs->ifs_nat_utqe; 5326 while ((ifq = ifqn) != NULL) { 5327 ifqn = ifq->ifq_next; 5328 tqn = ifq->ifq_head; 5329 while ((tqe = tqn) != NULL) { 5330 tqn = tqe->tqe_next; 5331 nat = tqe->tqe_parent; 5332 if (nat->nat_p != IPPROTO_TCP) 5333 continue; 5334 if ((nat->nat_tcpstate[0] >= stateval) && 5335 (nat->nat_tcpstate[1] >= stateval)) { 5336 nat_delete(nat, NL_EXPIRE, ifs); 5337 dropped++; 5338 } 5339 } 5340 } 5341 return (dropped); 5342 } 5343 5344 5345 /* --------------------------------------------------------------------- */ 5346 /* Function: nat_extraflush */ 5347 /* Returns: int - number of NAT entries deleted */ 5348 /* Parameters: which(I) - how to flush the active NAT table */ 5349 /* ifs - ipf stack instance */ 5350 /* Write Locks: ipf_nat */ 5351 /* */ 5352 /* Flush nat tables. Three actions currently defined: */ 5353 /* */ 5354 /* which == 0 : Flush all nat table entries. */ 5355 /* */ 5356 /* which == 1 : Flush entries with TCP connections which have started */ 5357 /* to close on both ends. */ 5358 /* */ 5359 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5360 /* does not take us below specified threshold in the table, */ 5361 /* we want to flush entries with TCP connections which have */ 5362 /* been idle for a long time. Start with connections idle */ 5363 /* over 12 hours, and then work backwards in half hour */ 5364 /* increments to at most 30 minutes idle, and finally work */ 5365 /* back in 30 second increments to at most 30 seconds. */ 5366 /* --------------------------------------------------------------------- */ 5367 static int nat_extraflush(which, ifs) 5368 int which; 5369 ipf_stack_t *ifs; 5370 { 5371 ipftq_t *ifq, *ifqn; 5372 nat_t *nat, **natp; 5373 int idletime, removed, idle_idx; 5374 SPL_INT(s); 5375 5376 removed = 0; 5377 5378 SPL_NET(s); 5379 switch (which) 5380 { 5381 case 0: 5382 natp = &ifs->ifs_nat_instances; 5383 while ((nat = *natp) != NULL) { 5384 natp = &nat->nat_next; 5385 nat_delete(nat, ISL_FLUSH, ifs); 5386 removed++; 5387 } 5388 break; 5389 5390 case 1: 5391 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5392 break; 5393 5394 case 2: 5395 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5396 5397 /* 5398 * Be sure we haven't done this in the last 10 seconds. 5399 */ 5400 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5401 IPF_TTLVAL(10)) 5402 break; 5403 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5404 5405 /* 5406 * Determine initial threshold for minimum idle time based on 5407 * how long ipfilter has been running. Ipfilter needs to have 5408 * been up as long as the smallest interval to continue on. 5409 * 5410 * Minimum idle times stored in idletime_tab and indexed by 5411 * idle_idx. Start at upper end of array and work backwards. 5412 * 5413 * Once the index is found, set the initial idle time to the 5414 * first interval before the current ipfilter run time. 5415 */ 5416 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5417 break; /* switch */ 5418 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5419 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5420 idletime = idletime_tab[idle_idx]; 5421 } else { 5422 while ((idle_idx > 0) && 5423 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5424 idle_idx--; 5425 idletime = (ifs->ifs_fr_ticks / 5426 idletime_tab[idle_idx]) * 5427 idletime_tab[idle_idx]; 5428 } 5429 5430 while ((idle_idx >= 0) && 5431 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5432 /* 5433 * Start with appropriate timeout queue. 5434 */ 5435 removed += nat_earlydrop( 5436 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5437 idletime, ifs); 5438 5439 /* 5440 * Make sure we haven't already deleted enough 5441 * entries before checking the user defined queues. 5442 */ 5443 if (NAT_TAB_WATER_LEVEL(ifs) <= 5444 ifs->ifs_nat_flush_lvl_lo) 5445 break; 5446 5447 /* 5448 * Next, look through the user defined queues. 5449 */ 5450 ifqn = ifs->ifs_nat_utqe; 5451 while ((ifq = ifqn) != NULL) { 5452 ifqn = ifq->ifq_next; 5453 removed += nat_earlydrop(ifq, idletime, ifs); 5454 } 5455 5456 /* 5457 * Adjust the granularity of idle time. 5458 * 5459 * If we reach an interval boundary, we need to 5460 * either adjust the idle time accordingly or exit 5461 * the loop altogether (if this is very last check). 5462 */ 5463 idletime -= idletime_tab[idle_idx]; 5464 if (idletime < idletime_tab[idle_idx]) { 5465 if (idle_idx != 0) { 5466 idletime = idletime_tab[idle_idx] - 5467 idletime_tab[idle_idx - 1]; 5468 idle_idx--; 5469 } else { 5470 break; /* while */ 5471 } 5472 } 5473 } 5474 break; 5475 default: 5476 break; 5477 } 5478 5479 SPL_X(s); 5480 return (removed); 5481 } 5482