1 /* 2 * Copyright (C) 1995-2004 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/time.h> 20 #include <sys/file.h> 21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 22 defined(_KERNEL) 23 # include "opt_ipfilter_log.h" 24 #endif 25 #if !defined(_KERNEL) 26 # include <stdio.h> 27 # include <string.h> 28 # include <stdlib.h> 29 # define _KERNEL 30 # ifdef __OpenBSD__ 31 struct file; 32 # endif 33 # include <sys/uio.h> 34 # undef _KERNEL 35 #endif 36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 37 # include <sys/filio.h> 38 # include <sys/fcntl.h> 39 #else 40 # include <sys/ioctl.h> 41 #endif 42 #if !defined(AIX) 43 # include <sys/fcntl.h> 44 #endif 45 #if !defined(linux) 46 # include <sys/protosw.h> 47 #endif 48 #include <sys/socket.h> 49 #if defined(_KERNEL) 50 # include <sys/systm.h> 51 # if !defined(__SVR4) && !defined(__svr4__) 52 # include <sys/mbuf.h> 53 # endif 54 #endif 55 #if defined(__SVR4) || defined(__svr4__) 56 # include <sys/filio.h> 57 # include <sys/byteorder.h> 58 # ifdef _KERNEL 59 # include <sys/dditypes.h> 60 # endif 61 # include <sys/stream.h> 62 # include <sys/kmem.h> 63 #endif 64 #if __FreeBSD_version >= 300000 65 # include <sys/queue.h> 66 #endif 67 #include <net/if.h> 68 #if __FreeBSD_version >= 300000 69 # include <net/if_var.h> 70 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 71 # include "opt_ipfilter.h" 72 # endif 73 #endif 74 #ifdef sun 75 # include <net/af.h> 76 #endif 77 #include <net/route.h> 78 #include <netinet/in.h> 79 #include <netinet/in_systm.h> 80 #include <netinet/ip.h> 81 82 #ifdef RFC1825 83 # include <vpn/md5.h> 84 # include <vpn/ipsec.h> 85 extern struct ifnet vpnif; 86 #endif 87 88 #if !defined(linux) 89 # include <netinet/ip_var.h> 90 #endif 91 #include <netinet/tcp.h> 92 #include <netinet/udp.h> 93 #include <netinet/ip_icmp.h> 94 #include "netinet/ip_compat.h" 95 #include <netinet/tcpip.h> 96 #include "netinet/ip_fil.h" 97 #include "netinet/ip_nat.h" 98 #include "netinet/ip_frag.h" 99 #include "netinet/ip_state.h" 100 #include "netinet/ip_proxy.h" 101 #include "netinet/ipf_stack.h" 102 #ifdef IPFILTER_SYNC 103 #include "netinet/ip_sync.h" 104 #endif 105 #if (__FreeBSD_version >= 300000) 106 # include <sys/malloc.h> 107 #endif 108 /* END OF INCLUDES */ 109 110 #undef SOCKADDR_IN 111 #define SOCKADDR_IN struct sockaddr_in 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 116 #endif 117 118 119 /* ======================================================================== */ 120 /* How the NAT is organised and works. */ 121 /* */ 122 /* Inside (interface y) NAT Outside (interface x) */ 123 /* -------------------- -+- ------------------------------------- */ 124 /* Packet going | out, processsed by fr_checknatout() for x */ 125 /* ------------> | ------------> */ 126 /* src=10.1.1.1 | src=192.1.1.1 */ 127 /* | */ 128 /* | in, processed by fr_checknatin() for x */ 129 /* <------------ | <------------ */ 130 /* dst=10.1.1.1 | dst=192.1.1.1 */ 131 /* -------------------- -+- ------------------------------------- */ 132 /* fr_checknatout() - changes ip_src and if required, sport */ 133 /* - creates a new mapping, if required. */ 134 /* fr_checknatin() - changes ip_dst and if required, dport */ 135 /* */ 136 /* In the NAT table, internal source is recorded as "in" and externally */ 137 /* seen as "out". */ 138 /* ======================================================================== */ 139 140 141 static int nat_flushtable __P((ipf_stack_t *)); 142 static int nat_clearlist __P((ipf_stack_t *)); 143 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 144 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 145 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 146 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 147 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 148 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 149 static int nat_match __P((fr_info_t *, ipnat_t *)); 150 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 151 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 152 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 153 struct in_addr, struct in_addr, u_32_t, 154 ipf_stack_t *)); 155 static INLINE int nat_icmpquerytype4 __P((int)); 156 static int nat_ruleaddrinit __P((ipnat_t *)); 157 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 158 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *)); 159 static INLINE int nat_icmperrortype4 __P((int)); 160 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 161 tcphdr_t *, nat_t **, int)); 162 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 163 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 164 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 165 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 166 static int nat_extraflush __P((int, ipf_stack_t *)); 167 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 168 static int nat_flushclosing __P((int, ipf_stack_t *)); 169 170 171 /* 172 * Below we declare a list of constants used only in the nat_extraflush() 173 * routine. We are placing it here, instead of in nat_extraflush() itself, 174 * because we want to make it visible to tools such as mdb, nm etc., so the 175 * values can easily be altered during debugging. 176 */ 177 static const int idletime_tab[] = { 178 IPF_TTLVAL(30), /* 30 seconds */ 179 IPF_TTLVAL(1800), /* 30 minutes */ 180 IPF_TTLVAL(43200), /* 12 hours */ 181 IPF_TTLVAL(345600), /* 4 days */ 182 }; 183 184 #define NAT_HAS_L4_CHANGED(n) \ 185 (((n)->nat_flags & (IPN_TCPUDPICMP)) && \ 186 (n)->nat_inport != (n)->nat_outport) 187 188 /* ------------------------------------------------------------------------ */ 189 /* Function: fr_natinit */ 190 /* Returns: int - 0 == success, -1 == failure */ 191 /* Parameters: Nil */ 192 /* */ 193 /* Initialise all of the NAT locks, tables and other structures. */ 194 /* ------------------------------------------------------------------------ */ 195 int fr_natinit(ifs) 196 ipf_stack_t *ifs; 197 { 198 int i; 199 200 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 201 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 202 if (ifs->ifs_nat_table[0] != NULL) 203 bzero((char *)ifs->ifs_nat_table[0], 204 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 205 else 206 return -1; 207 208 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 209 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 210 if (ifs->ifs_nat_table[1] != NULL) 211 bzero((char *)ifs->ifs_nat_table[1], 212 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 213 else 214 return -2; 215 216 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 217 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 218 if (ifs->ifs_nat_rules != NULL) 219 bzero((char *)ifs->ifs_nat_rules, 220 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 221 else 222 return -3; 223 224 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 225 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 226 if (ifs->ifs_rdr_rules != NULL) 227 bzero((char *)ifs->ifs_rdr_rules, 228 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 229 else 230 return -4; 231 232 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 233 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 234 if (ifs->ifs_maptable != NULL) 235 bzero((char *)ifs->ifs_maptable, 236 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 237 else 238 return -5; 239 240 ifs->ifs_ipf_hm_maplist = NULL; 241 242 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 243 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 244 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 245 return -1; 246 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 247 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 248 249 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 250 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 251 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 252 return -1; 253 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 254 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 255 256 if (ifs->ifs_fr_nat_maxbucket == 0) { 257 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 258 ifs->ifs_fr_nat_maxbucket++; 259 ifs->ifs_fr_nat_maxbucket *= 2; 260 } 261 262 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 263 /* 264 * Increase this because we may have "keep state" following this too 265 * and packet storms can occur if this is removed too quickly. 266 */ 267 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 268 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 269 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 270 ifs->ifs_nat_udptq.ifq_ref = 1; 271 ifs->ifs_nat_udptq.ifq_head = NULL; 272 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 273 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 274 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 275 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 276 ifs->ifs_nat_icmptq.ifq_ref = 1; 277 ifs->ifs_nat_icmptq.ifq_head = NULL; 278 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 279 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 280 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 281 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 282 ifs->ifs_nat_iptq.ifq_ref = 1; 283 ifs->ifs_nat_iptq.ifq_head = NULL; 284 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 285 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 286 ifs->ifs_nat_iptq.ifq_next = NULL; 287 288 for (i = 0; i < IPF_TCP_NSTATES; i++) { 289 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 290 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 291 #ifdef LARGE_NAT 292 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 293 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 294 #endif 295 } 296 297 /* 298 * Increase this because we may have "keep state" following 299 * this too and packet storms can occur if this is removed 300 * too quickly. 301 */ 302 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 303 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 304 305 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 306 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 307 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 308 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 309 310 ifs->ifs_fr_nat_init = 1; 311 312 return 0; 313 } 314 315 316 /* ------------------------------------------------------------------------ */ 317 /* Function: nat_addrdr */ 318 /* Returns: Nil */ 319 /* Parameters: n(I) - pointer to NAT rule to add */ 320 /* */ 321 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 322 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 323 /* use by redirect rules. */ 324 /* ------------------------------------------------------------------------ */ 325 static void nat_addrdr(n, ifs) 326 ipnat_t *n; 327 ipf_stack_t *ifs; 328 { 329 ipnat_t **np; 330 u_32_t j; 331 u_int hv; 332 int k; 333 334 k = count4bits(n->in_outmsk); 335 if ((k >= 0) && (k != 32)) 336 ifs->ifs_rdr_masks |= 1 << k; 337 j = (n->in_outip & n->in_outmsk); 338 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 339 np = ifs->ifs_rdr_rules + hv; 340 while (*np != NULL) 341 np = &(*np)->in_rnext; 342 n->in_rnext = NULL; 343 n->in_prnext = np; 344 n->in_hv = hv; 345 *np = n; 346 } 347 348 349 /* ------------------------------------------------------------------------ */ 350 /* Function: nat_addnat */ 351 /* Returns: Nil */ 352 /* Parameters: n(I) - pointer to NAT rule to add */ 353 /* */ 354 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 355 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 356 /* redirect rules. */ 357 /* ------------------------------------------------------------------------ */ 358 static void nat_addnat(n, ifs) 359 ipnat_t *n; 360 ipf_stack_t *ifs; 361 { 362 ipnat_t **np; 363 u_32_t j; 364 u_int hv; 365 int k; 366 367 k = count4bits(n->in_inmsk); 368 if ((k >= 0) && (k != 32)) 369 ifs->ifs_nat_masks |= 1 << k; 370 j = (n->in_inip & n->in_inmsk); 371 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 372 np = ifs->ifs_nat_rules + hv; 373 while (*np != NULL) 374 np = &(*np)->in_mnext; 375 n->in_mnext = NULL; 376 n->in_pmnext = np; 377 n->in_hv = hv; 378 *np = n; 379 } 380 381 382 /* ------------------------------------------------------------------------ */ 383 /* Function: nat_delrdr */ 384 /* Returns: Nil */ 385 /* Parameters: n(I) - pointer to NAT rule to delete */ 386 /* */ 387 /* Removes a redirect rule from the hash table of redirect rules. */ 388 /* ------------------------------------------------------------------------ */ 389 void nat_delrdr(n) 390 ipnat_t *n; 391 { 392 if (n->in_rnext) 393 n->in_rnext->in_prnext = n->in_prnext; 394 *n->in_prnext = n->in_rnext; 395 } 396 397 398 /* ------------------------------------------------------------------------ */ 399 /* Function: nat_delnat */ 400 /* Returns: Nil */ 401 /* Parameters: n(I) - pointer to NAT rule to delete */ 402 /* */ 403 /* Removes a NAT map rule from the hash table of NAT map rules. */ 404 /* ------------------------------------------------------------------------ */ 405 void nat_delnat(n) 406 ipnat_t *n; 407 { 408 if (n->in_mnext != NULL) 409 n->in_mnext->in_pmnext = n->in_pmnext; 410 *n->in_pmnext = n->in_mnext; 411 } 412 413 414 /* ------------------------------------------------------------------------ */ 415 /* Function: nat_hostmap */ 416 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 417 /* else a pointer to the hostmapping to use */ 418 /* Parameters: np(I) - pointer to NAT rule */ 419 /* real(I) - real IP address */ 420 /* map(I) - mapped IP address */ 421 /* port(I) - destination port number */ 422 /* Write Locks: ipf_nat */ 423 /* */ 424 /* Check if an ip address has already been allocated for a given mapping */ 425 /* that is not doing port based translation. If is not yet allocated, then */ 426 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 427 /* ------------------------------------------------------------------------ */ 428 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 429 ipnat_t *np; 430 struct in_addr src; 431 struct in_addr dst; 432 struct in_addr map; 433 u_32_t port; 434 ipf_stack_t *ifs; 435 { 436 hostmap_t *hm; 437 u_int hv; 438 439 hv = (src.s_addr ^ dst.s_addr); 440 hv += src.s_addr; 441 hv += dst.s_addr; 442 hv %= HOSTMAP_SIZE; 443 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 444 if ((hm->hm_srcip.s_addr == src.s_addr) && 445 (hm->hm_dstip.s_addr == dst.s_addr) && 446 ((np == NULL) || (np == hm->hm_ipnat)) && 447 ((port == 0) || (port == hm->hm_port))) { 448 hm->hm_ref++; 449 return hm; 450 } 451 452 if (np == NULL) 453 return NULL; 454 455 KMALLOC(hm, hostmap_t *); 456 if (hm) { 457 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 458 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 459 if (ifs->ifs_ipf_hm_maplist != NULL) 460 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 461 ifs->ifs_ipf_hm_maplist = hm; 462 463 hm->hm_next = ifs->ifs_maptable[hv]; 464 hm->hm_pnext = ifs->ifs_maptable + hv; 465 if (ifs->ifs_maptable[hv] != NULL) 466 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 467 ifs->ifs_maptable[hv] = hm; 468 hm->hm_ipnat = np; 469 hm->hm_srcip = src; 470 hm->hm_dstip = dst; 471 hm->hm_mapip = map; 472 hm->hm_ref = 1; 473 hm->hm_port = port; 474 hm->hm_v = 4; 475 } 476 return hm; 477 } 478 479 480 /* ------------------------------------------------------------------------ */ 481 /* Function: fr_hostmapdel */ 482 /* Returns: Nil */ 483 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 484 /* Write Locks: ipf_nat */ 485 /* */ 486 /* Decrement the references to this hostmap structure by one. If this */ 487 /* reaches zero then remove it and free it. */ 488 /* ------------------------------------------------------------------------ */ 489 void fr_hostmapdel(hmp) 490 struct hostmap **hmp; 491 { 492 struct hostmap *hm; 493 494 hm = *hmp; 495 *hmp = NULL; 496 497 hm->hm_ref--; 498 if (hm->hm_ref == 0) { 499 if (hm->hm_next) 500 hm->hm_next->hm_pnext = hm->hm_pnext; 501 *hm->hm_pnext = hm->hm_next; 502 if (hm->hm_hnext) 503 hm->hm_hnext->hm_phnext = hm->hm_phnext; 504 *hm->hm_phnext = hm->hm_hnext; 505 KFREE(hm); 506 } 507 } 508 509 510 /* ------------------------------------------------------------------------ */ 511 /* Function: fix_outcksum */ 512 /* Returns: Nil */ 513 /* Parameters: sp(I) - location of 16bit checksum to update */ 514 /* n((I) - amount to adjust checksum by */ 515 /* */ 516 /* Adjusts the 16bit checksum by "n" for packets going out. */ 517 /* ------------------------------------------------------------------------ */ 518 void fix_outcksum(sp, n) 519 u_short *sp; 520 u_32_t n; 521 { 522 u_short sumshort; 523 u_32_t sum1; 524 525 if (n == 0) 526 return; 527 528 sum1 = (~ntohs(*sp)) & 0xffff; 529 sum1 += (n); 530 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 531 /* Again */ 532 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 533 sumshort = ~(u_short)sum1; 534 *(sp) = htons(sumshort); 535 } 536 537 538 /* ------------------------------------------------------------------------ */ 539 /* Function: fix_incksum */ 540 /* Returns: Nil */ 541 /* Parameters: sp(I) - location of 16bit checksum to update */ 542 /* n((I) - amount to adjust checksum by */ 543 /* */ 544 /* Adjusts the 16bit checksum by "n" for packets going in. */ 545 /* ------------------------------------------------------------------------ */ 546 void fix_incksum(sp, n) 547 u_short *sp; 548 u_32_t n; 549 { 550 u_short sumshort; 551 u_32_t sum1; 552 553 if (n == 0) 554 return; 555 556 sum1 = (~ntohs(*sp)) & 0xffff; 557 sum1 += ~(n) & 0xffff; 558 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 559 /* Again */ 560 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 561 sumshort = ~(u_short)sum1; 562 *(sp) = htons(sumshort); 563 } 564 565 566 /* ------------------------------------------------------------------------ */ 567 /* Function: fix_datacksum */ 568 /* Returns: Nil */ 569 /* Parameters: sp(I) - location of 16bit checksum to update */ 570 /* n((I) - amount to adjust checksum by */ 571 /* */ 572 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 573 /* data section of an IP packet. */ 574 /* */ 575 /* The only situation in which you need to do this is when NAT'ing an */ 576 /* ICMP error message. Such a message, contains in its body the IP header */ 577 /* of the original IP packet, that causes the error. */ 578 /* */ 579 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 580 /* kernel the data section of the ICMP error is just data, and no special */ 581 /* processing like hardware cksum or ntohs processing have been done by the */ 582 /* kernel on the data section. */ 583 /* ------------------------------------------------------------------------ */ 584 void fix_datacksum(sp, n) 585 u_short *sp; 586 u_32_t n; 587 { 588 u_short sumshort; 589 u_32_t sum1; 590 591 if (n == 0) 592 return; 593 594 sum1 = (~ntohs(*sp)) & 0xffff; 595 sum1 += (n); 596 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 597 /* Again */ 598 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 599 sumshort = ~(u_short)sum1; 600 *(sp) = htons(sumshort); 601 } 602 603 604 /* ------------------------------------------------------------------------ */ 605 /* Function: fr_nat_ioctl */ 606 /* Returns: int - 0 == success, != 0 == failure */ 607 /* Parameters: data(I) - pointer to ioctl data */ 608 /* cmd(I) - ioctl command integer */ 609 /* mode(I) - file mode bits used with open */ 610 /* */ 611 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 612 /* ------------------------------------------------------------------------ */ 613 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 614 ioctlcmd_t cmd; 615 caddr_t data; 616 int mode, uid; 617 void *ctx; 618 ipf_stack_t *ifs; 619 { 620 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 621 int error = 0, ret, arg, getlock; 622 ipnat_t natd; 623 624 #if (BSD >= 199306) && defined(_KERNEL) 625 if ((securelevel >= 2) && (mode & FWRITE)) 626 return EPERM; 627 #endif 628 629 #if defined(__osf__) && defined(_KERNEL) 630 getlock = 0; 631 #else 632 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 633 #endif 634 635 nat = NULL; /* XXX gcc -Wuninitialized */ 636 if (cmd == (ioctlcmd_t)SIOCADNAT) { 637 KMALLOC(nt, ipnat_t *); 638 } else { 639 nt = NULL; 640 } 641 642 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 643 if (mode & NAT_SYSSPACE) { 644 bcopy(data, (char *)&natd, sizeof(natd)); 645 error = 0; 646 } else { 647 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 648 } 649 650 } 651 652 if (error != 0) 653 goto done; 654 655 /* 656 * For add/delete, look to see if the NAT entry is already present 657 */ 658 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 659 nat = &natd; 660 if (nat->in_v == 0) /* For backward compat. */ 661 nat->in_v = 4; 662 nat->in_flags &= IPN_USERFLAGS; 663 if ((nat->in_redir & NAT_MAPBLK) == 0) { 664 if ((nat->in_flags & IPN_SPLIT) == 0) 665 nat->in_inip &= nat->in_inmsk; 666 if ((nat->in_flags & IPN_IPRANGE) == 0) 667 nat->in_outip &= nat->in_outmsk; 668 } 669 MUTEX_ENTER(&ifs->ifs_ipf_natio); 670 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 671 np = &n->in_next) 672 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 673 IPN_CMPSIZ) == 0) { 674 if (nat->in_redir == NAT_REDIRECT && 675 nat->in_pnext != n->in_pnext) 676 continue; 677 break; 678 } 679 } 680 681 switch (cmd) 682 { 683 case SIOCGENITER : 684 { 685 ipfgeniter_t iter; 686 ipftoken_t *token; 687 688 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 689 if (error != 0) 690 break; 691 692 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 693 if (token != NULL) 694 error = nat_iterator(token, &iter, ifs); 695 else 696 error = ESRCH; 697 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 698 break; 699 } 700 #ifdef IPFILTER_LOG 701 case SIOCIPFFB : 702 { 703 int tmp; 704 705 if (!(mode & FWRITE)) 706 error = EPERM; 707 else { 708 tmp = ipflog_clear(IPL_LOGNAT, ifs); 709 error = BCOPYOUT((char *)&tmp, (char *)data, 710 sizeof(tmp)); 711 if (error != 0) 712 error = EFAULT; 713 } 714 break; 715 } 716 case SIOCSETLG : 717 if (!(mode & FWRITE)) { 718 error = EPERM; 719 } else { 720 error = BCOPYIN((char *)data, 721 (char *)&ifs->ifs_nat_logging, 722 sizeof(ifs->ifs_nat_logging)); 723 if (error != 0) 724 error = EFAULT; 725 } 726 break; 727 case SIOCGETLG : 728 error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 729 sizeof(ifs->ifs_nat_logging)); 730 if (error != 0) 731 error = EFAULT; 732 break; 733 case FIONREAD : 734 arg = ifs->ifs_iplused[IPL_LOGNAT]; 735 error = BCOPYOUT(&arg, data, sizeof(arg)); 736 if (error != 0) 737 error = EFAULT; 738 break; 739 #endif 740 case SIOCADNAT : 741 if (!(mode & FWRITE)) { 742 error = EPERM; 743 } else if (n != NULL) { 744 error = EEXIST; 745 } else if (nt == NULL) { 746 error = ENOMEM; 747 } 748 if (error != 0) { 749 MUTEX_EXIT(&ifs->ifs_ipf_natio); 750 break; 751 } 752 bcopy((char *)nat, (char *)nt, sizeof(*n)); 753 error = nat_siocaddnat(nt, np, getlock, ifs); 754 MUTEX_EXIT(&ifs->ifs_ipf_natio); 755 if (error == 0) 756 nt = NULL; 757 break; 758 case SIOCRMNAT : 759 if (!(mode & FWRITE)) { 760 error = EPERM; 761 n = NULL; 762 } else if (n == NULL) { 763 error = ESRCH; 764 } 765 766 if (error != 0) { 767 MUTEX_EXIT(&ifs->ifs_ipf_natio); 768 break; 769 } 770 nat_siocdelnat(n, np, getlock, ifs); 771 772 MUTEX_EXIT(&ifs->ifs_ipf_natio); 773 n = NULL; 774 break; 775 case SIOCGNATS : 776 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 777 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 778 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 779 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 780 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 781 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 782 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 783 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 784 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 785 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 786 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 787 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 788 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 789 break; 790 case SIOCGNATL : 791 { 792 natlookup_t nl; 793 794 if (getlock) { 795 READ_ENTER(&ifs->ifs_ipf_nat); 796 } 797 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 798 if (nl.nl_v != 6) 799 nl.nl_v = 4; 800 if (error == 0) { 801 void *ptr; 802 803 switch (nl.nl_v) 804 { 805 case 4: 806 ptr = nat_lookupredir(&nl, ifs); 807 break; 808 #ifdef USE_INET6 809 case 6: 810 ptr = nat6_lookupredir(&nl, ifs); 811 break; 812 #endif 813 default: 814 ptr = NULL; 815 break; 816 } 817 818 if (ptr != NULL) { 819 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 820 } else { 821 error = ESRCH; 822 } 823 } 824 if (getlock) { 825 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 826 } 827 break; 828 } 829 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 830 if (!(mode & FWRITE)) { 831 error = EPERM; 832 break; 833 } 834 if (getlock) { 835 WRITE_ENTER(&ifs->ifs_ipf_nat); 836 } 837 error = BCOPYIN(data, &arg, sizeof(arg)); 838 if (error != 0) { 839 error = EFAULT; 840 } else { 841 if (arg == 0) 842 ret = nat_flushtable(ifs); 843 else if (arg == 1) 844 ret = nat_clearlist(ifs); 845 else if (arg >= 2 && arg <= 4) 846 ret = nat_extraflush(arg - 2, ifs); 847 else 848 error = EINVAL; 849 } 850 if (getlock) { 851 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 852 } 853 if (error == 0) { 854 error = BCOPYOUT(&ret, data, sizeof(ret)); 855 if (error != 0) 856 error = EFAULT; 857 } 858 break; 859 case SIOCPROXY : 860 error = appr_ioctl(data, cmd, mode, ifs); 861 break; 862 case SIOCSTLCK : 863 if (!(mode & FWRITE)) { 864 error = EPERM; 865 } else { 866 error = fr_lock(data, &ifs->ifs_fr_nat_lock); 867 } 868 break; 869 case SIOCSTPUT : 870 if ((mode & FWRITE) != 0) { 871 error = fr_natputent(data, getlock, ifs); 872 } else { 873 error = EACCES; 874 } 875 break; 876 case SIOCSTGSZ : 877 if (ifs->ifs_fr_nat_lock) { 878 if (getlock) { 879 READ_ENTER(&ifs->ifs_ipf_nat); 880 } 881 error = fr_natgetsz(data, ifs); 882 if (getlock) { 883 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 884 } 885 } else 886 error = EACCES; 887 break; 888 case SIOCSTGET : 889 if (ifs->ifs_fr_nat_lock) { 890 if (getlock) { 891 READ_ENTER(&ifs->ifs_ipf_nat); 892 } 893 error = fr_natgetent(data, ifs); 894 if (getlock) { 895 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 896 } 897 } else 898 error = EACCES; 899 break; 900 case SIOCIPFDELTOK : 901 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 902 if (error != 0) { 903 error = EFAULT; 904 } else { 905 error = ipf_deltoken(arg, uid, ctx, ifs); 906 } 907 break; 908 default : 909 error = EINVAL; 910 break; 911 } 912 done: 913 if (nt) 914 KFREE(nt); 915 return error; 916 } 917 918 919 /* ------------------------------------------------------------------------ */ 920 /* Function: nat_siocaddnat */ 921 /* Returns: int - 0 == success, != 0 == failure */ 922 /* Parameters: n(I) - pointer to new NAT rule */ 923 /* np(I) - pointer to where to insert new NAT rule */ 924 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 925 /* Mutex Locks: ipf_natio */ 926 /* */ 927 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 928 /* from information passed to the kernel, then add it to the appropriate */ 929 /* NAT rule table(s). */ 930 /* ------------------------------------------------------------------------ */ 931 static int nat_siocaddnat(n, np, getlock, ifs) 932 ipnat_t *n, **np; 933 int getlock; 934 ipf_stack_t *ifs; 935 { 936 int error = 0, i, j; 937 938 if (nat_resolverule(n, ifs) != 0) 939 return ENOENT; 940 941 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 942 return EINVAL; 943 944 n->in_use = 0; 945 if (n->in_redir & NAT_MAPBLK) 946 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 947 else if (n->in_flags & IPN_AUTOPORTMAP) 948 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 949 else if (n->in_flags & IPN_IPRANGE) 950 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 951 else if (n->in_flags & IPN_SPLIT) 952 n->in_space = 2; 953 else if (n->in_outmsk != 0) 954 n->in_space = ~ntohl(n->in_outmsk); 955 else 956 n->in_space = 1; 957 958 /* 959 * Calculate the number of valid IP addresses in the output 960 * mapping range. In all cases, the range is inclusive of 961 * the start and ending IP addresses. 962 * If to a CIDR address, lose 2: broadcast + network address 963 * (so subtract 1) 964 * If to a range, add one. 965 * If to a single IP address, set to 1. 966 */ 967 if (n->in_space) { 968 if ((n->in_flags & IPN_IPRANGE) != 0) 969 n->in_space += 1; 970 else 971 n->in_space -= 1; 972 } else 973 n->in_space = 1; 974 975 #ifdef USE_INET6 976 if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 && 977 !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1])) 978 IP6_ADD(&n->in_out[0], 1, &n->in_next6) 979 else if (n->in_v == 6 && 980 (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT)) 981 n->in_next6 = n->in_in[0]; 982 else if (n->in_v == 6) 983 n->in_next6 = n->in_out[0]; 984 else 985 #endif 986 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 987 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 988 n->in_nip = ntohl(n->in_outip) + 1; 989 else if ((n->in_flags & IPN_SPLIT) && 990 (n->in_redir & NAT_REDIRECT)) 991 n->in_nip = ntohl(n->in_inip); 992 else 993 n->in_nip = ntohl(n->in_outip); 994 995 if (n->in_redir & NAT_MAP) { 996 n->in_pnext = ntohs(n->in_pmin); 997 /* 998 * Multiply by the number of ports made available. 999 */ 1000 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 1001 n->in_space *= (ntohs(n->in_pmax) - 1002 ntohs(n->in_pmin) + 1); 1003 /* 1004 * Because two different sources can map to 1005 * different destinations but use the same 1006 * local IP#/port #. 1007 * If the result is smaller than in_space, then 1008 * we may have wrapped around 32bits. 1009 */ 1010 i = n->in_inmsk; 1011 if ((i != 0) && (i != 0xffffffff)) { 1012 j = n->in_space * (~ntohl(i) + 1); 1013 if (j >= n->in_space) 1014 n->in_space = j; 1015 else 1016 n->in_space = 0xffffffff; 1017 } 1018 } 1019 /* 1020 * If no protocol is specified, multiple by 256 to allow for 1021 * at least one IP:IP mapping per protocol. 1022 */ 1023 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 1024 j = n->in_space * 256; 1025 if (j >= n->in_space) 1026 n->in_space = j; 1027 else 1028 n->in_space = 0xffffffff; 1029 } 1030 } 1031 1032 /* Otherwise, these fields are preset */ 1033 1034 if (getlock) { 1035 WRITE_ENTER(&ifs->ifs_ipf_nat); 1036 } 1037 n->in_next = NULL; 1038 *np = n; 1039 1040 if (n->in_age[0] != 0) 1041 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1042 n->in_age[0], ifs); 1043 1044 if (n->in_age[1] != 0) 1045 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1046 n->in_age[1], ifs); 1047 1048 if (n->in_redir & NAT_REDIRECT) { 1049 n->in_flags &= ~IPN_NOTDST; 1050 switch (n->in_v) 1051 { 1052 case 4 : 1053 nat_addrdr(n, ifs); 1054 break; 1055 #ifdef USE_INET6 1056 case 6 : 1057 nat6_addrdr(n, ifs); 1058 break; 1059 #endif 1060 default : 1061 break; 1062 } 1063 } 1064 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1065 n->in_flags &= ~IPN_NOTSRC; 1066 switch (n->in_v) 1067 { 1068 case 4 : 1069 nat_addnat(n, ifs); 1070 break; 1071 #ifdef USE_INET6 1072 case 6 : 1073 nat6_addnat(n, ifs); 1074 break; 1075 #endif 1076 default : 1077 break; 1078 } 1079 } 1080 n = NULL; 1081 ifs->ifs_nat_stats.ns_rules++; 1082 if (getlock) { 1083 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1084 } 1085 1086 return error; 1087 } 1088 1089 1090 /* ------------------------------------------------------------------------ */ 1091 /* Function: nat_resolvrule */ 1092 /* Returns: int - 0 == success, -1 == failure */ 1093 /* Parameters: n(I) - pointer to NAT rule */ 1094 /* */ 1095 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1096 /* any specified interfaces and proxy labels, and determines whether or not */ 1097 /* all proxy labels are correctly specified. */ 1098 /* */ 1099 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1100 /* ------------------------------------------------------------------------ */ 1101 static int nat_resolverule(n, ifs) 1102 ipnat_t *n; 1103 ipf_stack_t *ifs; 1104 { 1105 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1106 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs); 1107 1108 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1109 if (n->in_ifnames[1][0] == '\0') { 1110 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1111 n->in_ifps[1] = n->in_ifps[0]; 1112 } else { 1113 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs); 1114 } 1115 1116 if (n->in_plabel[0] != '\0') { 1117 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1118 if (n->in_apr == NULL) 1119 return -1; 1120 } 1121 return 0; 1122 } 1123 1124 1125 /* ------------------------------------------------------------------------ */ 1126 /* Function: nat_siocdelnat */ 1127 /* Returns: int - 0 == success, != 0 == failure */ 1128 /* Parameters: n(I) - pointer to new NAT rule */ 1129 /* np(I) - pointer to where to insert new NAT rule */ 1130 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1131 /* Mutex Locks: ipf_natio */ 1132 /* */ 1133 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1134 /* from information passed to the kernel, then add it to the appropriate */ 1135 /* NAT rule table(s). */ 1136 /* ------------------------------------------------------------------------ */ 1137 static void nat_siocdelnat(n, np, getlock, ifs) 1138 ipnat_t *n, **np; 1139 int getlock; 1140 ipf_stack_t *ifs; 1141 { 1142 int i; 1143 1144 if (getlock) { 1145 WRITE_ENTER(&ifs->ifs_ipf_nat); 1146 } 1147 if (n->in_redir & NAT_REDIRECT) 1148 nat_delrdr(n); 1149 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1150 nat_delnat(n); 1151 if (ifs->ifs_nat_list == NULL) { 1152 ifs->ifs_nat_masks = 0; 1153 ifs->ifs_rdr_masks = 0; 1154 for (i = 0; i < 4; i++) { 1155 ifs->ifs_nat6_masks[i] = 0; 1156 ifs->ifs_rdr6_masks[i] = 0; 1157 } 1158 } 1159 1160 if (n->in_tqehead[0] != NULL) { 1161 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1162 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1163 } 1164 } 1165 1166 if (n->in_tqehead[1] != NULL) { 1167 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1168 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1169 } 1170 } 1171 1172 *np = n->in_next; 1173 1174 if (n->in_use == 0) { 1175 if (n->in_apr) 1176 appr_free(n->in_apr); 1177 KFREE(n); 1178 ifs->ifs_nat_stats.ns_rules--; 1179 } else { 1180 n->in_flags |= IPN_DELETE; 1181 n->in_next = NULL; 1182 } 1183 if (getlock) { 1184 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1185 } 1186 } 1187 1188 1189 /* ------------------------------------------------------------------------ */ 1190 /* Function: fr_natgetsz */ 1191 /* Returns: int - 0 == success, != 0 is the error value. */ 1192 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1193 /* get the size of. */ 1194 /* */ 1195 /* Handle SIOCSTGSZ. */ 1196 /* Return the size of the nat list entry to be copied back to user space. */ 1197 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1198 /* structure is copied back to the user. */ 1199 /* ------------------------------------------------------------------------ */ 1200 static int fr_natgetsz(data, ifs) 1201 caddr_t data; 1202 ipf_stack_t *ifs; 1203 { 1204 ap_session_t *aps; 1205 nat_t *nat, *n; 1206 natget_t ng; 1207 int err; 1208 1209 err = BCOPYIN(data, &ng, sizeof(ng)); 1210 if (err != 0) 1211 return EFAULT; 1212 1213 nat = ng.ng_ptr; 1214 if (!nat) { 1215 nat = ifs->ifs_nat_instances; 1216 ng.ng_sz = 0; 1217 /* 1218 * Empty list so the size returned is 0. Simple. 1219 */ 1220 if (nat == NULL) { 1221 err = BCOPYOUT(&ng, data, sizeof(ng)); 1222 if (err != 0) { 1223 return EFAULT; 1224 } else { 1225 return 0; 1226 } 1227 } 1228 } else { 1229 /* 1230 * Make sure the pointer we're copying from exists in the 1231 * current list of entries. Security precaution to prevent 1232 * copying of random kernel data. 1233 */ 1234 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1235 if (n == nat) 1236 break; 1237 if (!n) 1238 return ESRCH; 1239 } 1240 1241 /* 1242 * Incluse any space required for proxy data structures. 1243 */ 1244 ng.ng_sz = sizeof(nat_save_t); 1245 aps = nat->nat_aps; 1246 if (aps != NULL) { 1247 ng.ng_sz += sizeof(ap_session_t) - 4; 1248 if (aps->aps_data != 0) 1249 ng.ng_sz += aps->aps_psiz; 1250 } 1251 1252 err = BCOPYOUT(&ng, data, sizeof(ng)); 1253 if (err != 0) 1254 return EFAULT; 1255 return 0; 1256 } 1257 1258 1259 /* ------------------------------------------------------------------------ */ 1260 /* Function: fr_natgetent */ 1261 /* Returns: int - 0 == success, != 0 is the error value. */ 1262 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1263 /* to NAT structure to copy out. */ 1264 /* */ 1265 /* Handle SIOCSTGET. */ 1266 /* Copies out NAT entry to user space. Any additional data held for a */ 1267 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1268 /* ------------------------------------------------------------------------ */ 1269 static int fr_natgetent(data, ifs) 1270 caddr_t data; 1271 ipf_stack_t *ifs; 1272 { 1273 int error, outsize; 1274 ap_session_t *aps; 1275 nat_save_t *ipn, ipns; 1276 nat_t *n, *nat; 1277 1278 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1279 if (error != 0) 1280 return error; 1281 1282 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1283 return EINVAL; 1284 1285 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1286 if (ipn == NULL) 1287 return ENOMEM; 1288 1289 ipn->ipn_dsize = ipns.ipn_dsize; 1290 nat = ipns.ipn_next; 1291 if (nat == NULL) { 1292 nat = ifs->ifs_nat_instances; 1293 if (nat == NULL) { 1294 if (ifs->ifs_nat_instances == NULL) 1295 error = ENOENT; 1296 goto finished; 1297 } 1298 } else { 1299 /* 1300 * Make sure the pointer we're copying from exists in the 1301 * current list of entries. Security precaution to prevent 1302 * copying of random kernel data. 1303 */ 1304 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1305 if (n == nat) 1306 break; 1307 if (n == NULL) { 1308 error = ESRCH; 1309 goto finished; 1310 } 1311 } 1312 ipn->ipn_next = nat->nat_next; 1313 1314 /* 1315 * Copy the NAT structure. 1316 */ 1317 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1318 1319 /* 1320 * If we have a pointer to the NAT rule it belongs to, save that too. 1321 */ 1322 if (nat->nat_ptr != NULL) 1323 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1324 sizeof(ipn->ipn_ipnat)); 1325 1326 /* 1327 * If we also know the NAT entry has an associated filter rule, 1328 * save that too. 1329 */ 1330 if (nat->nat_fr != NULL) 1331 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1332 sizeof(ipn->ipn_fr)); 1333 1334 /* 1335 * Last but not least, if there is an application proxy session set 1336 * up for this NAT entry, then copy that out too, including any 1337 * private data saved along side it by the proxy. 1338 */ 1339 aps = nat->nat_aps; 1340 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1341 if (aps != NULL) { 1342 char *s; 1343 1344 if (outsize < sizeof(*aps)) { 1345 error = ENOBUFS; 1346 goto finished; 1347 } 1348 1349 s = ipn->ipn_data; 1350 bcopy((char *)aps, s, sizeof(*aps)); 1351 s += sizeof(*aps); 1352 outsize -= sizeof(*aps); 1353 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1354 bcopy(aps->aps_data, s, aps->aps_psiz); 1355 else 1356 error = ENOBUFS; 1357 } 1358 if (error == 0) { 1359 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1360 } 1361 1362 finished: 1363 if (ipn != NULL) { 1364 KFREES(ipn, ipns.ipn_dsize); 1365 } 1366 return error; 1367 } 1368 1369 /* ------------------------------------------------------------------------ */ 1370 /* Function: nat_calc_chksum_diffs */ 1371 /* Returns: void */ 1372 /* Parameters: nat - pointer to NAT table entry */ 1373 /* */ 1374 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */ 1375 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */ 1376 /* we are dealing with partial chksum offload. For these cases we need to */ 1377 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */ 1378 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */ 1379 /* nat_sumd[0]. */ 1380 /* */ 1381 /* The function accepts initialized NAT table entry and computes the deltas */ 1382 /* from nat_inip/nat_outip members. The function is called right before */ 1383 /* the new entry is inserted into the table. */ 1384 /* */ 1385 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */ 1386 /* of delta between original and new IP addresses. */ 1387 /* */ 1388 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */ 1389 /* a chkusm of delta between original an new IP addrress:port tupples. */ 1390 /* */ 1391 /* Some facts about chksum, we should remember: */ 1392 /* IP header chksum covers IP header only */ 1393 /* */ 1394 /* TCP/UDP chksum covers data payload and so called pseudo header */ 1395 /* SRC, DST IP address */ 1396 /* SRC, DST Port */ 1397 /* length of payload */ 1398 /* */ 1399 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */ 1400 /* member of dblk_t structure. The db_ckusm16 member is not part of */ 1401 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */ 1402 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */ 1403 /* stored along with other IP packet data in dblk_t structure and used in */ 1404 /* for IP/UDP/TCP chksum validation later in ip.c. */ 1405 /* */ 1406 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */ 1407 /* of delta between new and orig address. NOTE: the order of operands for */ 1408 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */ 1409 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */ 1410 /* */ 1411 /* ------------------------------------------------------------------------ */ 1412 void nat_calc_chksum_diffs(nat) 1413 nat_t *nat; 1414 { 1415 u_32_t sum_orig = 0; 1416 u_32_t sum_changed = 0; 1417 u_32_t sumd; 1418 u_32_t ipsum_orig = 0; 1419 u_32_t ipsum_changed = 0; 1420 1421 if (nat->nat_v != 4 && nat->nat_v != 6) 1422 return; 1423 1424 /* 1425 * the switch calculates operands for CALC_SUMD(), 1426 * which will compute the partial chksum delta. 1427 */ 1428 switch (nat->nat_dir) 1429 { 1430 case NAT_INBOUND: 1431 /* 1432 * we are dealing with RDR rule (DST address gets 1433 * modified on packet from client) 1434 */ 1435 if (nat->nat_v == 4) { 1436 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1437 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1438 } else { 1439 sum_changed = LONG_SUM6(&nat->nat_inip6); 1440 sum_orig = LONG_SUM6(&nat->nat_outip6); 1441 } 1442 break; 1443 case NAT_OUTBOUND: 1444 /* 1445 * we are dealing with MAP rule (SRC address gets 1446 * modified on packet from client) 1447 */ 1448 if (nat->nat_v == 4) { 1449 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1450 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1451 } else { 1452 sum_changed = LONG_SUM6(&nat->nat_outip6); 1453 sum_orig = LONG_SUM6(&nat->nat_inip6); 1454 } 1455 break; 1456 default: ; 1457 break; 1458 } 1459 1460 /* 1461 * we also preserve CALC_SUMD() operands here, for IP chksum delta 1462 * calculation, which happens at the end of function. 1463 */ 1464 ipsum_changed = sum_changed; 1465 ipsum_orig = sum_orig; 1466 /* 1467 * NOTE: the order of operands for partial chksum adjustment 1468 * computation has to be swapped! 1469 */ 1470 CALC_SUMD(sum_changed, sum_orig, sumd); 1471 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 1472 1473 if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) { 1474 1475 /* 1476 * switch calculates operands for CALC_SUMD(), which will 1477 * compute the full chksum delta. 1478 */ 1479 switch (nat->nat_dir) 1480 { 1481 case NAT_INBOUND: 1482 if (nat->nat_v == 4) { 1483 sum_changed = LONG_SUM( 1484 ntohl(nat->nat_inip.s_addr) + 1485 ntohs(nat->nat_inport)); 1486 sum_orig = LONG_SUM( 1487 ntohl(nat->nat_outip.s_addr) + 1488 ntohs(nat->nat_outport)); 1489 } else { 1490 sum_changed = LONG_SUM6(&nat->nat_inip6) + 1491 ntohs(nat->nat_inport); 1492 sum_orig = LONG_SUM6(&nat->nat_outip6) + 1493 ntohs(nat->nat_outport); 1494 } 1495 break; 1496 case NAT_OUTBOUND: 1497 if (nat->nat_v == 4) { 1498 sum_changed = LONG_SUM( 1499 ntohl(nat->nat_outip.s_addr) + 1500 ntohs(nat->nat_outport)); 1501 sum_orig = LONG_SUM( 1502 ntohl(nat->nat_inip.s_addr) + 1503 ntohs(nat->nat_inport)); 1504 } else { 1505 sum_changed = LONG_SUM6(&nat->nat_outip6) + 1506 ntohs(nat->nat_outport); 1507 sum_orig = LONG_SUM6(&nat->nat_inip6) + 1508 ntohs(nat->nat_inport); 1509 } 1510 break; 1511 default: ; 1512 break; 1513 } 1514 1515 CALC_SUMD(sum_orig, sum_changed, sumd); 1516 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 1517 1518 if (!(nat->nat_flags & IPN_TCPUDP)) { 1519 /* 1520 * partial HW chksum offload works for TCP/UDP headers only, 1521 * so we need to enforce full chksum adjustment for ICMP 1522 */ 1523 nat->nat_sumd[1] = nat->nat_sumd[0]; 1524 } 1525 } 1526 else 1527 nat->nat_sumd[0] = nat->nat_sumd[1]; 1528 1529 /* 1530 * we may reuse the already computed nat_sumd[0] for IP header chksum 1531 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT. 1532 */ 1533 if (nat->nat_v == 4) { 1534 if (NAT_HAS_L4_CHANGED(nat)) { 1535 /* 1536 * bad luck, NAT changes also the L4 header, use IP 1537 * addresses to compute chksum adjustment for IP header. 1538 */ 1539 CALC_SUMD(ipsum_orig, ipsum_changed, sumd); 1540 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 1541 } else { 1542 /* 1543 * the NAT does not change L4 hdr -> reuse chksum 1544 * adjustment for IP hdr. 1545 */ 1546 nat->nat_ipsumd = nat->nat_sumd[0]; 1547 1548 /* 1549 * if L4 header does not use chksum - zero out deltas 1550 */ 1551 if (!(nat->nat_flags & IPN_TCPUDP)) { 1552 nat->nat_sumd[0] = 0; 1553 nat->nat_sumd[1] = 0; 1554 } 1555 } 1556 } 1557 1558 return; 1559 } 1560 1561 /* ------------------------------------------------------------------------ */ 1562 /* Function: fr_natputent */ 1563 /* Returns: int - 0 == success, != 0 is the error value. */ 1564 /* Parameters: data(I) - pointer to natget structure with NAT */ 1565 /* structure information to load into the kernel */ 1566 /* getlock(I) - flag indicating whether or not a write lock */ 1567 /* on ipf_nat is already held. */ 1568 /* */ 1569 /* Handle SIOCSTPUT. */ 1570 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1571 /* firewall rule data structures, if pointers to them indicate so. */ 1572 /* ------------------------------------------------------------------------ */ 1573 static int fr_natputent(data, getlock, ifs) 1574 caddr_t data; 1575 int getlock; 1576 ipf_stack_t *ifs; 1577 { 1578 nat_save_t ipn, *ipnn; 1579 ap_session_t *aps; 1580 nat_t *n, *nat; 1581 frentry_t *fr; 1582 fr_info_t fin; 1583 ipnat_t *in; 1584 int error; 1585 1586 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1587 if (error != 0) 1588 return error; 1589 1590 /* 1591 * Trigger automatic call to nat_extraflush() if the 1592 * table has reached capcity specified by hi watermark. 1593 */ 1594 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1595 ifs->ifs_nat_doflush = 1; 1596 1597 /* 1598 * If automatic flushing did not do its job, and the table 1599 * has filled up, don't try to create a new entry. 1600 */ 1601 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 1602 ifs->ifs_nat_stats.ns_memfail++; 1603 return ENOMEM; 1604 } 1605 1606 /* 1607 * Initialise early because of code at junkput label. 1608 */ 1609 in = NULL; 1610 aps = NULL; 1611 nat = NULL; 1612 ipnn = NULL; 1613 1614 /* 1615 * New entry, copy in the rest of the NAT entry if it's size is more 1616 * than just the nat_t structure. 1617 */ 1618 fr = NULL; 1619 if (ipn.ipn_dsize > sizeof(ipn)) { 1620 if (ipn.ipn_dsize > 81920) { 1621 error = ENOMEM; 1622 goto junkput; 1623 } 1624 1625 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1626 if (ipnn == NULL) 1627 return ENOMEM; 1628 1629 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1630 if (error != 0) { 1631 error = EFAULT; 1632 goto junkput; 1633 } 1634 } else 1635 ipnn = &ipn; 1636 1637 KMALLOC(nat, nat_t *); 1638 if (nat == NULL) { 1639 error = ENOMEM; 1640 goto junkput; 1641 } 1642 1643 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1644 /* 1645 * Initialize all these so that nat_delete() doesn't cause a crash. 1646 */ 1647 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1648 nat->nat_tqe.tqe_pnext = NULL; 1649 nat->nat_tqe.tqe_next = NULL; 1650 nat->nat_tqe.tqe_ifq = NULL; 1651 nat->nat_tqe.tqe_parent = nat; 1652 1653 /* 1654 * Restore the rule associated with this nat session 1655 */ 1656 in = ipnn->ipn_nat.nat_ptr; 1657 if (in != NULL) { 1658 KMALLOC(in, ipnat_t *); 1659 nat->nat_ptr = in; 1660 if (in == NULL) { 1661 error = ENOMEM; 1662 goto junkput; 1663 } 1664 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1665 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1666 in->in_use = 1; 1667 in->in_flags |= IPN_DELETE; 1668 1669 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1670 1671 if (nat_resolverule(in, ifs) != 0) { 1672 error = ESRCH; 1673 goto junkput; 1674 } 1675 } 1676 1677 /* 1678 * Check that the NAT entry doesn't already exist in the kernel. 1679 */ 1680 if (nat->nat_v != 6) 1681 nat->nat_v = 4; 1682 bzero((char *)&fin, sizeof(fin)); 1683 fin.fin_p = nat->nat_p; 1684 fin.fin_ifs = ifs; 1685 if (nat->nat_dir == NAT_OUTBOUND) { 1686 fin.fin_data[0] = ntohs(nat->nat_oport); 1687 fin.fin_data[1] = ntohs(nat->nat_outport); 1688 fin.fin_ifp = nat->nat_ifps[0]; 1689 if (getlock) { 1690 READ_ENTER(&ifs->ifs_ipf_nat); 1691 } 1692 1693 switch (nat->nat_v) 1694 { 1695 case 4: 1696 fin.fin_v = nat->nat_v; 1697 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1698 nat->nat_oip, nat->nat_outip); 1699 break; 1700 #ifdef USE_INET6 1701 case 6: 1702 n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p, 1703 &nat->nat_oip6.in6, &nat->nat_outip6.in6); 1704 break; 1705 #endif 1706 default: 1707 n = NULL; 1708 break; 1709 } 1710 1711 if (getlock) { 1712 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1713 } 1714 if (n != NULL) { 1715 error = EEXIST; 1716 goto junkput; 1717 } 1718 } else if (nat->nat_dir == NAT_INBOUND) { 1719 fin.fin_data[0] = ntohs(nat->nat_inport); 1720 fin.fin_data[1] = ntohs(nat->nat_oport); 1721 fin.fin_ifp = nat->nat_ifps[1]; 1722 if (getlock) { 1723 READ_ENTER(&ifs->ifs_ipf_nat); 1724 } 1725 1726 switch (nat->nat_v) 1727 { 1728 case 4: 1729 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1730 nat->nat_inip, nat->nat_oip); 1731 break; 1732 #ifdef USE_INET6 1733 case 6: 1734 n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p, 1735 &nat->nat_inip6.in6, &nat->nat_oip6.in6); 1736 break; 1737 #endif 1738 default: 1739 n = NULL; 1740 break; 1741 } 1742 1743 if (getlock) { 1744 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1745 } 1746 if (n != NULL) { 1747 error = EEXIST; 1748 goto junkput; 1749 } 1750 } else { 1751 error = EINVAL; 1752 goto junkput; 1753 } 1754 1755 /* 1756 * Restore ap_session_t structure. Include the private data allocated 1757 * if it was there. 1758 */ 1759 aps = nat->nat_aps; 1760 if (aps != NULL) { 1761 KMALLOC(aps, ap_session_t *); 1762 nat->nat_aps = aps; 1763 if (aps == NULL) { 1764 error = ENOMEM; 1765 goto junkput; 1766 } 1767 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1768 if (in != NULL) 1769 aps->aps_apr = in->in_apr; 1770 else 1771 aps->aps_apr = NULL; 1772 if (aps->aps_psiz != 0) { 1773 if (aps->aps_psiz > 81920) { 1774 error = ENOMEM; 1775 goto junkput; 1776 } 1777 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1778 if (aps->aps_data == NULL) { 1779 error = ENOMEM; 1780 goto junkput; 1781 } 1782 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1783 aps->aps_psiz); 1784 } else { 1785 aps->aps_psiz = 0; 1786 aps->aps_data = NULL; 1787 } 1788 } 1789 1790 /* 1791 * If there was a filtering rule associated with this entry then 1792 * build up a new one. 1793 */ 1794 fr = nat->nat_fr; 1795 if (fr != NULL) { 1796 if ((nat->nat_flags & SI_NEWFR) != 0) { 1797 KMALLOC(fr, frentry_t *); 1798 nat->nat_fr = fr; 1799 if (fr == NULL) { 1800 error = ENOMEM; 1801 goto junkput; 1802 } 1803 ipnn->ipn_nat.nat_fr = fr; 1804 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1805 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1806 1807 fr->fr_ref = 1; 1808 fr->fr_dsize = 0; 1809 fr->fr_data = NULL; 1810 fr->fr_type = FR_T_NONE; 1811 1812 MUTEX_NUKE(&fr->fr_lock); 1813 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1814 } else { 1815 if (getlock) { 1816 READ_ENTER(&ifs->ifs_ipf_nat); 1817 } 1818 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1819 if (n->nat_fr == fr) 1820 break; 1821 1822 if (n != NULL) { 1823 MUTEX_ENTER(&fr->fr_lock); 1824 fr->fr_ref++; 1825 MUTEX_EXIT(&fr->fr_lock); 1826 } 1827 if (getlock) { 1828 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1829 } 1830 if (!n) { 1831 error = ESRCH; 1832 goto junkput; 1833 } 1834 } 1835 } 1836 1837 if (ipnn != &ipn) { 1838 KFREES(ipnn, ipn.ipn_dsize); 1839 ipnn = NULL; 1840 } 1841 1842 nat_calc_chksum_diffs(nat); 1843 1844 if (getlock) { 1845 WRITE_ENTER(&ifs->ifs_ipf_nat); 1846 } 1847 1848 nat_calc_chksum_diffs(nat); 1849 1850 switch (nat->nat_v) 1851 { 1852 case 4 : 1853 error = nat_insert(nat, nat->nat_rev, ifs); 1854 break; 1855 #ifdef USE_INET6 1856 case 6 : 1857 error = nat6_insert(nat, nat->nat_rev, ifs); 1858 break; 1859 #endif 1860 default : 1861 break; 1862 } 1863 1864 if ((error == 0) && (aps != NULL)) { 1865 aps->aps_next = ifs->ifs_ap_sess_list; 1866 ifs->ifs_ap_sess_list = aps; 1867 } 1868 if (getlock) { 1869 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1870 } 1871 1872 if (error == 0) 1873 return 0; 1874 1875 error = ENOMEM; 1876 1877 junkput: 1878 if (fr != NULL) 1879 (void) fr_derefrule(&fr, ifs); 1880 1881 if ((ipnn != NULL) && (ipnn != &ipn)) { 1882 KFREES(ipnn, ipn.ipn_dsize); 1883 } 1884 if (nat != NULL) { 1885 if (aps != NULL) { 1886 if (aps->aps_data != NULL) { 1887 KFREES(aps->aps_data, aps->aps_psiz); 1888 } 1889 KFREE(aps); 1890 } 1891 if (in != NULL) { 1892 if (in->in_apr) 1893 appr_free(in->in_apr); 1894 KFREE(in); 1895 } 1896 KFREE(nat); 1897 } 1898 return error; 1899 } 1900 1901 1902 /* ------------------------------------------------------------------------ */ 1903 /* Function: nat_delete */ 1904 /* Returns: Nil */ 1905 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1906 /* logtype(I) - type of LOG record to create before deleting */ 1907 /* ifs - ipf stack instance */ 1908 /* Write Lock: ipf_nat */ 1909 /* */ 1910 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1911 /* enabled then generate a NAT log record for this event. */ 1912 /* ------------------------------------------------------------------------ */ 1913 void nat_delete(nat, logtype, ifs) 1914 struct nat *nat; 1915 int logtype; 1916 ipf_stack_t *ifs; 1917 { 1918 struct ipnat *ipn; 1919 int removed = 0; 1920 1921 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1922 nat_log(nat, logtype, ifs); 1923 1924 /* 1925 * Start by removing the entry from the hash table of nat entries 1926 * so it will not be "used" again. 1927 * 1928 * It will remain in the "list" of nat entries until all references 1929 * have been accounted for. 1930 */ 1931 if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) { 1932 removed = 1; 1933 1934 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1935 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1936 1937 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1938 if (nat->nat_hnext[0] != NULL) { 1939 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1940 nat->nat_hnext[0] = NULL; 1941 } 1942 nat->nat_phnext[0] = NULL; 1943 1944 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1945 if (nat->nat_hnext[1] != NULL) { 1946 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1947 nat->nat_hnext[1] = NULL; 1948 } 1949 nat->nat_phnext[1] = NULL; 1950 1951 if ((nat->nat_flags & SI_WILDP) != 0) 1952 ifs->ifs_nat_stats.ns_wilds--; 1953 } 1954 1955 /* 1956 * Next, remove it from the timeout queue it is in. 1957 */ 1958 fr_deletequeueentry(&nat->nat_tqe); 1959 1960 if (nat->nat_me != NULL) { 1961 *nat->nat_me = NULL; 1962 nat->nat_me = NULL; 1963 } 1964 1965 MUTEX_ENTER(&nat->nat_lock); 1966 if (logtype == NL_DESTROY) { 1967 /* 1968 * NL_DESTROY should only be passed when nat_ref >= 2. 1969 * This happens when a nat'd packet is blocked, we have 1970 * just created the nat table entry (reason why the ref 1971 * count is 2 or higher), but and we want to throw away 1972 * that NAT session as result of the blocked packet. 1973 */ 1974 if (nat->nat_ref > 2) { 1975 nat->nat_ref -= 2; 1976 MUTEX_EXIT(&nat->nat_lock); 1977 if (removed) 1978 ifs->ifs_nat_stats.ns_orphans++; 1979 return; 1980 } 1981 } else if (nat->nat_ref > 1) { 1982 nat->nat_ref--; 1983 MUTEX_EXIT(&nat->nat_lock); 1984 if (removed) 1985 ifs->ifs_nat_stats.ns_orphans++; 1986 return; 1987 } 1988 MUTEX_EXIT(&nat->nat_lock); 1989 1990 nat->nat_ref = 0; 1991 1992 /* 1993 * If entry had already been removed, 1994 * it means we're cleaning up an orphan. 1995 */ 1996 if (!removed) 1997 ifs->ifs_nat_stats.ns_orphans--; 1998 1999 #ifdef IPFILTER_SYNC 2000 if (nat->nat_sync) 2001 ipfsync_del(nat->nat_sync); 2002 #endif 2003 2004 /* 2005 * Now remove it from master list of nat table entries 2006 */ 2007 if (nat->nat_pnext != NULL) { 2008 *nat->nat_pnext = nat->nat_next; 2009 if (nat->nat_next != NULL) { 2010 nat->nat_next->nat_pnext = nat->nat_pnext; 2011 nat->nat_next = NULL; 2012 } 2013 nat->nat_pnext = NULL; 2014 } 2015 2016 if (nat->nat_fr != NULL) 2017 (void)fr_derefrule(&nat->nat_fr, ifs); 2018 2019 if (nat->nat_hm != NULL) 2020 fr_hostmapdel(&nat->nat_hm); 2021 2022 /* 2023 * If there is an active reference from the nat entry to its parent 2024 * rule, decrement the rule's reference count and free it too if no 2025 * longer being used. 2026 */ 2027 ipn = nat->nat_ptr; 2028 if (ipn != NULL) { 2029 ipn->in_space++; 2030 ipn->in_use--; 2031 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 2032 if (ipn->in_apr) 2033 appr_free(ipn->in_apr); 2034 KFREE(ipn); 2035 ifs->ifs_nat_stats.ns_rules--; 2036 } 2037 } 2038 2039 MUTEX_DESTROY(&nat->nat_lock); 2040 2041 aps_free(nat->nat_aps, ifs); 2042 ifs->ifs_nat_stats.ns_inuse--; 2043 2044 /* 2045 * If there's a fragment table entry too for this nat entry, then 2046 * dereference that as well. This is after nat_lock is released 2047 * because of Tru64. 2048 */ 2049 fr_forgetnat((void *)nat, ifs); 2050 2051 KFREE(nat); 2052 } 2053 2054 2055 /* ------------------------------------------------------------------------ */ 2056 /* Function: nat_flushtable */ 2057 /* Returns: int - number of NAT rules deleted */ 2058 /* Parameters: Nil */ 2059 /* */ 2060 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 2061 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 2062 /* ------------------------------------------------------------------------ */ 2063 /* 2064 * nat_flushtable - clear the NAT table of all mapping entries. 2065 */ 2066 static int nat_flushtable(ifs) 2067 ipf_stack_t *ifs; 2068 { 2069 nat_t *nat; 2070 int j = 0; 2071 2072 /* 2073 * ALL NAT mappings deleted, so lets just make the deletions 2074 * quicker. 2075 */ 2076 if (ifs->ifs_nat_table[0] != NULL) 2077 bzero((char *)ifs->ifs_nat_table[0], 2078 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 2079 if (ifs->ifs_nat_table[1] != NULL) 2080 bzero((char *)ifs->ifs_nat_table[1], 2081 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 2082 2083 while ((nat = ifs->ifs_nat_instances) != NULL) { 2084 nat_delete(nat, NL_FLUSH, ifs); 2085 j++; 2086 } 2087 2088 return j; 2089 } 2090 2091 2092 /* ------------------------------------------------------------------------ */ 2093 /* Function: nat_clearlist */ 2094 /* Returns: int - number of NAT/RDR rules deleted */ 2095 /* Parameters: Nil */ 2096 /* */ 2097 /* Delete all rules in the current list of rules. There is nothing elegant */ 2098 /* about this cleanup: simply free all entries on the list of rules and */ 2099 /* clear out the tables used for hashed NAT rule lookups. */ 2100 /* ------------------------------------------------------------------------ */ 2101 static int nat_clearlist(ifs) 2102 ipf_stack_t *ifs; 2103 { 2104 ipnat_t *n, **np = &ifs->ifs_nat_list; 2105 int i = 0; 2106 2107 if (ifs->ifs_nat_rules != NULL) 2108 bzero((char *)ifs->ifs_nat_rules, 2109 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 2110 if (ifs->ifs_rdr_rules != NULL) 2111 bzero((char *)ifs->ifs_rdr_rules, 2112 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 2113 2114 while ((n = *np) != NULL) { 2115 *np = n->in_next; 2116 if (n->in_use == 0) { 2117 if (n->in_apr != NULL) 2118 appr_free(n->in_apr); 2119 KFREE(n); 2120 ifs->ifs_nat_stats.ns_rules--; 2121 } else { 2122 n->in_flags |= IPN_DELETE; 2123 n->in_next = NULL; 2124 } 2125 i++; 2126 } 2127 ifs->ifs_nat_masks = 0; 2128 ifs->ifs_rdr_masks = 0; 2129 for (i = 0; i < 4; i++) { 2130 ifs->ifs_nat6_masks[i] = 0; 2131 ifs->ifs_rdr6_masks[i] = 0; 2132 } 2133 return i; 2134 } 2135 2136 2137 /* ------------------------------------------------------------------------ */ 2138 /* Function: nat_newmap */ 2139 /* Returns: int - -1 == error, 0 == success */ 2140 /* Parameters: fin(I) - pointer to packet information */ 2141 /* nat(I) - pointer to NAT entry */ 2142 /* ni(I) - pointer to structure with misc. information needed */ 2143 /* to create new NAT entry. */ 2144 /* */ 2145 /* Given an empty NAT structure, populate it with new information about a */ 2146 /* new NAT session, as defined by the matching NAT rule. */ 2147 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2148 /* to the new IP address for the translation. */ 2149 /* ------------------------------------------------------------------------ */ 2150 static INLINE int nat_newmap(fin, nat, ni) 2151 fr_info_t *fin; 2152 nat_t *nat; 2153 natinfo_t *ni; 2154 { 2155 u_short st_port, dport, sport, port, sp, dp; 2156 struct in_addr in, inb; 2157 hostmap_t *hm; 2158 u_32_t flags; 2159 u_32_t st_ip; 2160 ipnat_t *np; 2161 nat_t *natl; 2162 int l; 2163 ipf_stack_t *ifs = fin->fin_ifs; 2164 2165 /* 2166 * If it's an outbound packet which doesn't match any existing 2167 * record, then create a new port 2168 */ 2169 l = 0; 2170 hm = NULL; 2171 np = ni->nai_np; 2172 st_ip = np->in_nip; 2173 st_port = np->in_pnext; 2174 flags = ni->nai_flags; 2175 sport = ni->nai_sport; 2176 dport = ni->nai_dport; 2177 2178 /* 2179 * Do a loop until we either run out of entries to try or we find 2180 * a NAT mapping that isn't currently being used. This is done 2181 * because the change to the source is not (usually) being fixed. 2182 */ 2183 do { 2184 port = 0; 2185 in.s_addr = htonl(np->in_nip); 2186 if (l == 0) { 2187 /* 2188 * Check to see if there is an existing NAT 2189 * setup for this IP address pair. 2190 */ 2191 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2192 in, 0, ifs); 2193 if (hm != NULL) 2194 in.s_addr = hm->hm_mapip.s_addr; 2195 } else if ((l == 1) && (hm != NULL)) { 2196 fr_hostmapdel(&hm); 2197 } 2198 in.s_addr = ntohl(in.s_addr); 2199 2200 nat->nat_hm = hm; 2201 2202 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 2203 if (l > 0) 2204 return -1; 2205 } 2206 2207 if (np->in_redir == NAT_BIMAP && 2208 np->in_inmsk == np->in_outmsk) { 2209 /* 2210 * map the address block in a 1:1 fashion 2211 */ 2212 in.s_addr = np->in_outip; 2213 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 2214 in.s_addr = ntohl(in.s_addr); 2215 2216 } else if (np->in_redir & NAT_MAPBLK) { 2217 if ((l >= np->in_ppip) || ((l > 0) && 2218 !(flags & IPN_TCPUDP))) 2219 return -1; 2220 /* 2221 * map-block - Calculate destination address. 2222 */ 2223 in.s_addr = ntohl(fin->fin_saddr); 2224 in.s_addr &= ntohl(~np->in_inmsk); 2225 inb.s_addr = in.s_addr; 2226 in.s_addr /= np->in_ippip; 2227 in.s_addr &= ntohl(~np->in_outmsk); 2228 in.s_addr += ntohl(np->in_outip); 2229 /* 2230 * Calculate destination port. 2231 */ 2232 if ((flags & IPN_TCPUDP) && 2233 (np->in_ppip != 0)) { 2234 port = ntohs(sport) + l; 2235 port %= np->in_ppip; 2236 port += np->in_ppip * 2237 (inb.s_addr % np->in_ippip); 2238 port += MAPBLK_MINPORT; 2239 port = htons(port); 2240 } 2241 2242 } else if ((np->in_outip == 0) && 2243 (np->in_outmsk == 0xffffffff)) { 2244 /* 2245 * 0/32 - use the interface's IP address. 2246 */ 2247 if ((l > 0) || 2248 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 2249 &in, NULL, fin->fin_ifs) == -1) 2250 return -1; 2251 in.s_addr = ntohl(in.s_addr); 2252 2253 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 2254 /* 2255 * 0/0 - use the original source address/port. 2256 */ 2257 if (l > 0) 2258 return -1; 2259 in.s_addr = ntohl(fin->fin_saddr); 2260 2261 } else if ((np->in_outmsk != 0xffffffff) && 2262 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 2263 np->in_nip++; 2264 2265 natl = NULL; 2266 2267 if ((flags & IPN_TCPUDP) && 2268 ((np->in_redir & NAT_MAPBLK) == 0) && 2269 (np->in_flags & IPN_AUTOPORTMAP)) { 2270 /* 2271 * "ports auto" (without map-block) 2272 */ 2273 if ((l > 0) && (l % np->in_ppip == 0)) { 2274 if (l > np->in_space) { 2275 return -1; 2276 } else if ((l > np->in_ppip) && 2277 np->in_outmsk != 0xffffffff) 2278 np->in_nip++; 2279 } 2280 if (np->in_ppip != 0) { 2281 port = ntohs(sport); 2282 port += (l % np->in_ppip); 2283 port %= np->in_ppip; 2284 port += np->in_ppip * 2285 (ntohl(fin->fin_saddr) % 2286 np->in_ippip); 2287 port += MAPBLK_MINPORT; 2288 port = htons(port); 2289 } 2290 2291 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 2292 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 2293 /* 2294 * Standard port translation. Select next port. 2295 */ 2296 if (np->in_flags & IPN_SEQUENTIAL) { 2297 port = np->in_pnext; 2298 } else { 2299 port = ipf_random() % (ntohs(np->in_pmax) - 2300 ntohs(np->in_pmin)); 2301 port += ntohs(np->in_pmin); 2302 } 2303 port = htons(port); 2304 np->in_pnext++; 2305 2306 if (np->in_pnext > ntohs(np->in_pmax)) { 2307 np->in_pnext = ntohs(np->in_pmin); 2308 if (np->in_outmsk != 0xffffffff) 2309 np->in_nip++; 2310 } 2311 } 2312 2313 if (np->in_flags & IPN_IPRANGE) { 2314 if (np->in_nip > ntohl(np->in_outmsk)) 2315 np->in_nip = ntohl(np->in_outip); 2316 } else { 2317 if ((np->in_outmsk != 0xffffffff) && 2318 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 2319 ntohl(np->in_outip)) 2320 np->in_nip = ntohl(np->in_outip) + 1; 2321 } 2322 2323 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 2324 port = sport; 2325 2326 /* 2327 * Here we do a lookup of the connection as seen from 2328 * the outside. If an IP# pair already exists, try 2329 * again. So if you have A->B becomes C->B, you can 2330 * also have D->E become C->E but not D->B causing 2331 * another C->B. Also take protocol and ports into 2332 * account when determining whether a pre-existing 2333 * NAT setup will cause an external conflict where 2334 * this is appropriate. 2335 */ 2336 inb.s_addr = htonl(in.s_addr); 2337 sp = fin->fin_data[0]; 2338 dp = fin->fin_data[1]; 2339 fin->fin_data[0] = fin->fin_data[1]; 2340 fin->fin_data[1] = htons(port); 2341 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2342 (u_int)fin->fin_p, fin->fin_dst, inb); 2343 fin->fin_data[0] = sp; 2344 fin->fin_data[1] = dp; 2345 2346 /* 2347 * Has the search wrapped around and come back to the 2348 * start ? 2349 */ 2350 if ((natl != NULL) && 2351 (np->in_pnext != 0) && (st_port == np->in_pnext) && 2352 (np->in_nip != 0) && (st_ip == np->in_nip)) 2353 return -1; 2354 l++; 2355 } while (natl != NULL); 2356 2357 if (np->in_space > 0) 2358 np->in_space--; 2359 2360 /* Setup the NAT table */ 2361 nat->nat_inip = fin->fin_src; 2362 nat->nat_outip.s_addr = htonl(in.s_addr); 2363 nat->nat_oip = fin->fin_dst; 2364 if (nat->nat_hm == NULL) 2365 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2366 nat->nat_outip, 0, ifs); 2367 2368 if (flags & IPN_TCPUDP) { 2369 nat->nat_inport = sport; 2370 nat->nat_outport = port; /* sport */ 2371 nat->nat_oport = dport; 2372 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2373 } else if (flags & IPN_ICMPQUERY) { 2374 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2375 nat->nat_inport = port; 2376 nat->nat_outport = port; 2377 } 2378 2379 ni->nai_ip.s_addr = in.s_addr; 2380 ni->nai_port = port; 2381 ni->nai_nport = dport; 2382 return 0; 2383 } 2384 2385 2386 /* ------------------------------------------------------------------------ */ 2387 /* Function: nat_newrdr */ 2388 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2389 /* allow rule to be moved if IPN_ROUNDR is set. */ 2390 /* Parameters: fin(I) - pointer to packet information */ 2391 /* nat(I) - pointer to NAT entry */ 2392 /* ni(I) - pointer to structure with misc. information needed */ 2393 /* to create new NAT entry. */ 2394 /* */ 2395 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2396 /* to the new IP address for the translation. */ 2397 /* ------------------------------------------------------------------------ */ 2398 static INLINE int nat_newrdr(fin, nat, ni) 2399 fr_info_t *fin; 2400 nat_t *nat; 2401 natinfo_t *ni; 2402 { 2403 u_short nport, dport, sport; 2404 struct in_addr in, inb; 2405 u_short sp, dp; 2406 hostmap_t *hm; 2407 u_32_t flags; 2408 ipnat_t *np; 2409 nat_t *natl; 2410 int move; 2411 ipf_stack_t *ifs = fin->fin_ifs; 2412 2413 move = 1; 2414 hm = NULL; 2415 in.s_addr = 0; 2416 np = ni->nai_np; 2417 flags = ni->nai_flags; 2418 sport = ni->nai_sport; 2419 dport = ni->nai_dport; 2420 2421 /* 2422 * If the matching rule has IPN_STICKY set, then we want to have the 2423 * same rule kick in as before. Why would this happen? If you have 2424 * a collection of rdr rules with "round-robin sticky", the current 2425 * packet might match a different one to the previous connection but 2426 * we want the same destination to be used. 2427 */ 2428 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2429 (IPN_ROUNDR|IPN_STICKY)) { 2430 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2431 (u_32_t)dport, ifs); 2432 if (hm != NULL) { 2433 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2434 np = hm->hm_ipnat; 2435 ni->nai_np = np; 2436 move = 0; 2437 } 2438 } 2439 2440 /* 2441 * Otherwise, it's an inbound packet. Most likely, we don't 2442 * want to rewrite source ports and source addresses. Instead, 2443 * we want to rewrite to a fixed internal address and fixed 2444 * internal port. 2445 */ 2446 if (np->in_flags & IPN_SPLIT) { 2447 in.s_addr = np->in_nip; 2448 2449 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2450 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2451 in, (u_32_t)dport, ifs); 2452 if (hm != NULL) { 2453 in.s_addr = hm->hm_mapip.s_addr; 2454 move = 0; 2455 } 2456 } 2457 2458 if (hm == NULL || hm->hm_ref == 1) { 2459 if (np->in_inip == htonl(in.s_addr)) { 2460 np->in_nip = ntohl(np->in_inmsk); 2461 move = 0; 2462 } else { 2463 np->in_nip = ntohl(np->in_inip); 2464 } 2465 } 2466 2467 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2468 /* 2469 * 0/32 - use the interface's IP address. 2470 */ 2471 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2472 fin->fin_ifs) == -1) 2473 return -1; 2474 in.s_addr = ntohl(in.s_addr); 2475 2476 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2477 /* 2478 * 0/0 - use the original destination address/port. 2479 */ 2480 in.s_addr = ntohl(fin->fin_daddr); 2481 2482 } else if (np->in_redir == NAT_BIMAP && 2483 np->in_inmsk == np->in_outmsk) { 2484 /* 2485 * map the address block in a 1:1 fashion 2486 */ 2487 in.s_addr = np->in_inip; 2488 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2489 in.s_addr = ntohl(in.s_addr); 2490 } else { 2491 in.s_addr = ntohl(np->in_inip); 2492 } 2493 2494 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2495 nport = dport; 2496 else { 2497 /* 2498 * Whilst not optimized for the case where 2499 * pmin == pmax, the gain is not significant. 2500 */ 2501 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2502 (np->in_pmin != np->in_pmax)) { 2503 nport = ntohs(dport) - ntohs(np->in_pmin) + 2504 ntohs(np->in_pnext); 2505 nport = htons(nport); 2506 } else 2507 nport = np->in_pnext; 2508 } 2509 2510 /* 2511 * When the redirect-to address is set to 0.0.0.0, just 2512 * assume a blank `forwarding' of the packet. We don't 2513 * setup any translation for this either. 2514 */ 2515 if (in.s_addr == 0) { 2516 if (nport == dport) 2517 return -1; 2518 in.s_addr = ntohl(fin->fin_daddr); 2519 } 2520 2521 /* 2522 * Check to see if this redirect mapping already exists and if 2523 * it does, return "failure" (allowing it to be created will just 2524 * cause one or both of these "connections" to stop working.) 2525 */ 2526 inb.s_addr = htonl(in.s_addr); 2527 sp = fin->fin_data[0]; 2528 dp = fin->fin_data[1]; 2529 fin->fin_data[1] = fin->fin_data[0]; 2530 fin->fin_data[0] = ntohs(nport); 2531 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2532 (u_int)fin->fin_p, inb, fin->fin_src); 2533 fin->fin_data[0] = sp; 2534 fin->fin_data[1] = dp; 2535 if (natl != NULL) 2536 return (-1); 2537 2538 nat->nat_inip.s_addr = htonl(in.s_addr); 2539 nat->nat_outip = fin->fin_dst; 2540 nat->nat_oip = fin->fin_src; 2541 2542 ni->nai_ip.s_addr = in.s_addr; 2543 ni->nai_nport = nport; 2544 ni->nai_port = sport; 2545 2546 if (flags & IPN_TCPUDP) { 2547 nat->nat_inport = nport; 2548 nat->nat_outport = dport; 2549 nat->nat_oport = sport; 2550 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2551 } else if (flags & IPN_ICMPQUERY) { 2552 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2553 nat->nat_inport = nport; 2554 nat->nat_outport = nport; 2555 } 2556 2557 return move; 2558 } 2559 2560 /* ------------------------------------------------------------------------ */ 2561 /* Function: nat_new */ 2562 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2563 /* else pointer to new NAT structure */ 2564 /* Parameters: fin(I) - pointer to packet information */ 2565 /* np(I) - pointer to NAT rule */ 2566 /* natsave(I) - pointer to where to store NAT struct pointer */ 2567 /* flags(I) - flags describing the current packet */ 2568 /* direction(I) - direction of packet (in/out) */ 2569 /* Write Lock: ipf_nat */ 2570 /* */ 2571 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2572 /* in any way. */ 2573 /* */ 2574 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2575 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2576 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2577 /* and (3) building that structure and putting it into the NAT table(s). */ 2578 /* ------------------------------------------------------------------------ */ 2579 nat_t *nat_new(fin, np, natsave, flags, direction) 2580 fr_info_t *fin; 2581 ipnat_t *np; 2582 nat_t **natsave; 2583 u_int flags; 2584 int direction; 2585 { 2586 tcphdr_t *tcp = NULL; 2587 hostmap_t *hm = NULL; 2588 nat_t *nat, *natl; 2589 u_int nflags; 2590 natinfo_t ni; 2591 int move; 2592 ipf_stack_t *ifs = fin->fin_ifs; 2593 2594 /* 2595 * Trigger automatic call to nat_extraflush() if the 2596 * table has reached capcity specified by hi watermark. 2597 */ 2598 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2599 ifs->ifs_nat_doflush = 1; 2600 2601 /* 2602 * If automatic flushing did not do its job, and the table 2603 * has filled up, don't try to create a new entry. 2604 */ 2605 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2606 ifs->ifs_nat_stats.ns_memfail++; 2607 return NULL; 2608 } 2609 2610 move = 1; 2611 nflags = np->in_flags & flags; 2612 nflags &= NAT_FROMRULE; 2613 2614 ni.nai_np = np; 2615 ni.nai_nflags = nflags; 2616 ni.nai_flags = flags; 2617 2618 /* Give me a new nat */ 2619 KMALLOC(nat, nat_t *); 2620 if (nat == NULL) { 2621 ifs->ifs_nat_stats.ns_memfail++; 2622 /* 2623 * Try to automatically tune the max # of entries in the 2624 * table allowed to be less than what will cause kmem_alloc() 2625 * to fail and try to eliminate panics due to out of memory 2626 * conditions arising. 2627 */ 2628 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2629 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2630 printf("ipf_nattable_max reduced to %d\n", 2631 ifs->ifs_ipf_nattable_max); 2632 } 2633 return NULL; 2634 } 2635 2636 if (flags & IPN_TCPUDP) { 2637 tcp = fin->fin_dp; 2638 ni.nai_sport = htons(fin->fin_sport); 2639 ni.nai_dport = htons(fin->fin_dport); 2640 } else if (flags & IPN_ICMPQUERY) { 2641 /* 2642 * In the ICMP query NAT code, we translate the ICMP id fields 2643 * to make them unique. This is indepedent of the ICMP type 2644 * (e.g. in the unlikely event that a host sends an echo and 2645 * an tstamp request with the same id, both packets will have 2646 * their ip address/id field changed in the same way). 2647 */ 2648 /* The icmp_id field is used by the sender to identify the 2649 * process making the icmp request. (the receiver justs 2650 * copies it back in its response). So, it closely matches 2651 * the concept of source port. We overlay sport, so we can 2652 * maximally reuse the existing code. 2653 */ 2654 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2655 ni.nai_dport = ni.nai_sport; 2656 } 2657 2658 bzero((char *)nat, sizeof(*nat)); 2659 nat->nat_flags = flags; 2660 nat->nat_redir = np->in_redir; 2661 2662 if ((flags & NAT_SLAVE) == 0) { 2663 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2664 } 2665 2666 /* 2667 * Search the current table for a match. 2668 */ 2669 if (direction == NAT_OUTBOUND) { 2670 /* 2671 * We can now arrange to call this for the same connection 2672 * because ipf_nat_new doesn't protect the code path into 2673 * this function. 2674 */ 2675 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2676 fin->fin_src, fin->fin_dst); 2677 if (natl != NULL) { 2678 KFREE(nat); 2679 nat = natl; 2680 goto done; 2681 } 2682 2683 move = nat_newmap(fin, nat, &ni); 2684 if (move == -1) 2685 goto badnat; 2686 2687 np = ni.nai_np; 2688 } else { 2689 /* 2690 * NAT_INBOUND is used only for redirects rules 2691 */ 2692 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2693 fin->fin_src, fin->fin_dst); 2694 if (natl != NULL) { 2695 KFREE(nat); 2696 nat = natl; 2697 goto done; 2698 } 2699 2700 move = nat_newrdr(fin, nat, &ni); 2701 if (move == -1) 2702 goto badnat; 2703 2704 np = ni.nai_np; 2705 } 2706 2707 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2708 if (np->in_redir == NAT_REDIRECT) { 2709 nat_delrdr(np); 2710 nat_addrdr(np, ifs); 2711 } else if (np->in_redir == NAT_MAP) { 2712 nat_delnat(np); 2713 nat_addnat(np, ifs); 2714 } 2715 } 2716 2717 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2718 goto badnat; 2719 } 2720 2721 nat_calc_chksum_diffs(nat); 2722 2723 if (flags & SI_WILDP) 2724 ifs->ifs_nat_stats.ns_wilds++; 2725 fin->fin_flx |= FI_NEWNAT; 2726 goto done; 2727 badnat: 2728 ifs->ifs_nat_stats.ns_badnat++; 2729 if ((hm = nat->nat_hm) != NULL) 2730 fr_hostmapdel(&hm); 2731 KFREE(nat); 2732 nat = NULL; 2733 done: 2734 if ((flags & NAT_SLAVE) == 0) { 2735 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2736 } 2737 return nat; 2738 } 2739 2740 2741 /* ------------------------------------------------------------------------ */ 2742 /* Function: nat_finalise */ 2743 /* Returns: int - 0 == sucess, -1 == failure */ 2744 /* Parameters: fin(I) - pointer to packet information */ 2745 /* nat(I) - pointer to NAT entry */ 2746 /* ni(I) - pointer to structure with misc. information needed */ 2747 /* to create new NAT entry. */ 2748 /* Write Lock: ipf_nat */ 2749 /* */ 2750 /* This is the tail end of constructing a new NAT entry and is the same */ 2751 /* for both IPv4 and IPv6. */ 2752 /* ------------------------------------------------------------------------ */ 2753 /*ARGSUSED*/ 2754 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2755 fr_info_t *fin; 2756 nat_t *nat; 2757 natinfo_t *ni; 2758 tcphdr_t *tcp; 2759 nat_t **natsave; 2760 int direction; 2761 { 2762 frentry_t *fr; 2763 ipnat_t *np; 2764 ipf_stack_t *ifs = fin->fin_ifs; 2765 2766 np = ni->nai_np; 2767 2768 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2769 2770 #ifdef IPFILTER_SYNC 2771 if ((nat->nat_flags & SI_CLONE) == 0) 2772 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2773 #endif 2774 2775 nat->nat_me = natsave; 2776 nat->nat_dir = direction; 2777 nat->nat_ifps[0] = np->in_ifps[0]; 2778 nat->nat_ifps[1] = np->in_ifps[1]; 2779 nat->nat_ptr = np; 2780 nat->nat_p = fin->fin_p; 2781 nat->nat_v = fin->fin_v; 2782 nat->nat_mssclamp = np->in_mssclamp; 2783 fr = fin->fin_fr; 2784 nat->nat_fr = fr; 2785 2786 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2787 if (appr_new(fin, nat) == -1) 2788 return -1; 2789 2790 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2791 if (ifs->ifs_nat_logging) 2792 nat_log(nat, (u_int)np->in_redir, ifs); 2793 np->in_use++; 2794 if (fr != NULL) { 2795 MUTEX_ENTER(&fr->fr_lock); 2796 fr->fr_ref++; 2797 MUTEX_EXIT(&fr->fr_lock); 2798 } 2799 return 0; 2800 } 2801 2802 /* 2803 * nat_insert failed, so cleanup time... 2804 */ 2805 return -1; 2806 } 2807 2808 2809 /* ------------------------------------------------------------------------ */ 2810 /* Function: nat_insert */ 2811 /* Returns: int - 0 == sucess, -1 == failure */ 2812 /* Parameters: nat(I) - pointer to NAT structure */ 2813 /* rev(I) - flag indicating forward/reverse direction of packet */ 2814 /* Write Lock: ipf_nat */ 2815 /* */ 2816 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2817 /* list of active NAT entries. Adjust global counters when complete. */ 2818 /* ------------------------------------------------------------------------ */ 2819 int nat_insert(nat, rev, ifs) 2820 nat_t *nat; 2821 int rev; 2822 ipf_stack_t *ifs; 2823 { 2824 u_int hv1, hv2; 2825 nat_t **natp; 2826 2827 /* 2828 * Try and return an error as early as possible, so calculate the hash 2829 * entry numbers first and then proceed. 2830 */ 2831 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2832 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2833 0xffffffff); 2834 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2835 ifs->ifs_ipf_nattable_sz); 2836 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2837 0xffffffff); 2838 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2839 ifs->ifs_ipf_nattable_sz); 2840 } else { 2841 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2842 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2843 ifs->ifs_ipf_nattable_sz); 2844 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2845 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2846 ifs->ifs_ipf_nattable_sz); 2847 } 2848 2849 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2850 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2851 return -1; 2852 } 2853 2854 nat->nat_hv[0] = hv1; 2855 nat->nat_hv[1] = hv2; 2856 2857 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2858 2859 nat->nat_rev = rev; 2860 nat->nat_ref = 1; 2861 nat->nat_bytes[0] = 0; 2862 nat->nat_pkts[0] = 0; 2863 nat->nat_bytes[1] = 0; 2864 nat->nat_pkts[1] = 0; 2865 2866 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2867 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2868 2869 if (nat->nat_ifnames[1][0] !='\0') { 2870 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2871 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2872 } else { 2873 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2874 LIFNAMSIZ); 2875 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2876 nat->nat_ifps[1] = nat->nat_ifps[0]; 2877 } 2878 2879 nat->nat_next = ifs->ifs_nat_instances; 2880 nat->nat_pnext = &ifs->ifs_nat_instances; 2881 if (ifs->ifs_nat_instances) 2882 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2883 ifs->ifs_nat_instances = nat; 2884 2885 natp = &ifs->ifs_nat_table[0][hv1]; 2886 if (*natp) 2887 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2888 nat->nat_phnext[0] = natp; 2889 nat->nat_hnext[0] = *natp; 2890 *natp = nat; 2891 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2892 2893 natp = &ifs->ifs_nat_table[1][hv2]; 2894 if (*natp) 2895 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2896 nat->nat_phnext[1] = natp; 2897 nat->nat_hnext[1] = *natp; 2898 *natp = nat; 2899 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2900 2901 fr_setnatqueue(nat, rev, ifs); 2902 2903 ifs->ifs_nat_stats.ns_added++; 2904 ifs->ifs_nat_stats.ns_inuse++; 2905 return 0; 2906 } 2907 2908 2909 /* ------------------------------------------------------------------------ */ 2910 /* Function: nat_icmperrorlookup */ 2911 /* Returns: nat_t* - point to matching NAT structure */ 2912 /* Parameters: fin(I) - pointer to packet information */ 2913 /* dir(I) - direction of packet (in/out) */ 2914 /* */ 2915 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2916 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2917 /* the required length. */ 2918 /* ------------------------------------------------------------------------ */ 2919 nat_t *nat_icmperrorlookup(fin, dir) 2920 fr_info_t *fin; 2921 int dir; 2922 { 2923 int flags = 0, minlen; 2924 icmphdr_t *orgicmp; 2925 tcphdr_t *tcp = NULL; 2926 u_short data[2]; 2927 nat_t *nat; 2928 ip_t *oip; 2929 u_int p; 2930 2931 /* 2932 * Does it at least have the return (basic) IP header ? 2933 * Only a basic IP header (no options) should be with an ICMP error 2934 * header. Also, if it's not an error type, then return. 2935 */ 2936 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2937 return NULL; 2938 2939 /* 2940 * Check packet size 2941 */ 2942 oip = (ip_t *)((char *)fin->fin_dp + 8); 2943 minlen = IP_HL(oip) << 2; 2944 if ((minlen < sizeof(ip_t)) || 2945 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2946 return NULL; 2947 /* 2948 * Is the buffer big enough for all of it ? It's the size of the IP 2949 * header claimed in the encapsulated part which is of concern. It 2950 * may be too big to be in this buffer but not so big that it's 2951 * outside the ICMP packet, leading to TCP deref's causing problems. 2952 * This is possible because we don't know how big oip_hl is when we 2953 * do the pullup early in fr_check() and thus can't gaurantee it is 2954 * all here now. 2955 */ 2956 #ifdef _KERNEL 2957 { 2958 mb_t *m; 2959 2960 m = fin->fin_m; 2961 # if defined(MENTAT) 2962 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2963 return NULL; 2964 # else 2965 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2966 (char *)fin->fin_ip + M_LEN(m)) 2967 return NULL; 2968 # endif 2969 } 2970 #endif 2971 2972 if (fin->fin_daddr != oip->ip_src.s_addr) 2973 return NULL; 2974 2975 p = oip->ip_p; 2976 if (p == IPPROTO_TCP) 2977 flags = IPN_TCP; 2978 else if (p == IPPROTO_UDP) 2979 flags = IPN_UDP; 2980 else if (p == IPPROTO_ICMP) { 2981 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2982 2983 /* see if this is related to an ICMP query */ 2984 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2985 data[0] = fin->fin_data[0]; 2986 data[1] = fin->fin_data[1]; 2987 fin->fin_data[0] = 0; 2988 fin->fin_data[1] = orgicmp->icmp_id; 2989 2990 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2991 /* 2992 * NOTE : dir refers to the direction of the original 2993 * ip packet. By definition the icmp error 2994 * message flows in the opposite direction. 2995 */ 2996 if (dir == NAT_INBOUND) 2997 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2998 oip->ip_src); 2999 else 3000 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 3001 oip->ip_src); 3002 fin->fin_data[0] = data[0]; 3003 fin->fin_data[1] = data[1]; 3004 return nat; 3005 } 3006 } 3007 3008 if (flags & IPN_TCPUDP) { 3009 minlen += 8; /* + 64bits of data to get ports */ 3010 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 3011 return NULL; 3012 3013 data[0] = fin->fin_data[0]; 3014 data[1] = fin->fin_data[1]; 3015 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 3016 fin->fin_data[0] = ntohs(tcp->th_dport); 3017 fin->fin_data[1] = ntohs(tcp->th_sport); 3018 3019 if (dir == NAT_INBOUND) { 3020 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 3021 oip->ip_src); 3022 } else { 3023 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 3024 oip->ip_src); 3025 } 3026 fin->fin_data[0] = data[0]; 3027 fin->fin_data[1] = data[1]; 3028 return nat; 3029 } 3030 if (dir == NAT_INBOUND) 3031 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 3032 else 3033 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 3034 } 3035 3036 3037 /* ------------------------------------------------------------------------ */ 3038 /* Function: nat_icmperror */ 3039 /* Returns: nat_t* - point to matching NAT structure */ 3040 /* Parameters: fin(I) - pointer to packet information */ 3041 /* nflags(I) - NAT flags for this packet */ 3042 /* dir(I) - direction of packet (in/out) */ 3043 /* */ 3044 /* Fix up an ICMP packet which is an error message for an existing NAT */ 3045 /* session. This will correct both packet header data and checksums. */ 3046 /* */ 3047 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 3048 /* a NAT'd ICMP packet gets correctly recognised. */ 3049 /* ------------------------------------------------------------------------ */ 3050 nat_t *nat_icmperror(fin, nflags, dir) 3051 fr_info_t *fin; 3052 u_int *nflags; 3053 int dir; 3054 { 3055 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 3056 struct in_addr in; 3057 icmphdr_t *icmp, *orgicmp; 3058 int dlen; 3059 udphdr_t *udp; 3060 tcphdr_t *tcp; 3061 nat_t *nat; 3062 ip_t *oip; 3063 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 3064 return NULL; 3065 3066 /* 3067 * nat_icmperrorlookup() looks up nat entry associated with the 3068 * offending IP packet and returns pointer to the entry, or NULL 3069 * if packet wasn't natted or for `defective' packets. 3070 */ 3071 3072 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 3073 return NULL; 3074 3075 sumd2 = 0; 3076 *nflags = IPN_ICMPERR; 3077 icmp = fin->fin_dp; 3078 oip = (ip_t *)&icmp->icmp_ip; 3079 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 3080 tcp = (tcphdr_t *)udp; 3081 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 3082 3083 /* 3084 * Need to adjust ICMP header to include the real IP#'s and 3085 * port #'s. There are three steps required. 3086 * 3087 * Step 1 3088 * Fix the IP addresses in the offending IP packet and update 3089 * ip header checksum to compensate for the change. 3090 * 3091 * No update needed here for icmp_cksum because the ICMP checksum 3092 * is calculated over the complete ICMP packet, which includes the 3093 * changed oip IP addresses and oip->ip_sum. These two changes 3094 * cancel each other out (if the delta for the IP address is x, 3095 * then the delta for ip_sum is minus x). 3096 */ 3097 3098 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 3099 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 3100 in = nat->nat_inip; 3101 oip->ip_src = in; 3102 } else { 3103 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 3104 in = nat->nat_outip; 3105 oip->ip_dst = in; 3106 } 3107 3108 sum2 = LONG_SUM(ntohl(in.s_addr)); 3109 CALC_SUMD(sum1, sum2, sumd); 3110 fix_datacksum(&oip->ip_sum, sumd); 3111 3112 /* 3113 * Step 2 3114 * Perform other adjustments based on protocol of offending packet. 3115 */ 3116 3117 switch (oip->ip_p) { 3118 case IPPROTO_TCP : 3119 case IPPROTO_UDP : 3120 3121 /* 3122 * For offending TCP/UDP IP packets, translate the ports 3123 * based on the NAT specification. 3124 * 3125 * Advance notice : Now it becomes complicated :-) 3126 * 3127 * Since the port and IP addresse fields are both part 3128 * of the TCP/UDP checksum of the offending IP packet, 3129 * we need to adjust that checksum as well. 3130 * 3131 * To further complicate things, the TCP/UDP checksum 3132 * may not be present. We must check to see if the 3133 * length of the data portion is big enough to hold 3134 * the checksum. In the UDP case, a test to determine 3135 * if the checksum is even set is also required. 3136 * 3137 * Any changes to an IP address, port or checksum within 3138 * the ICMP packet requires a change to icmp_cksum. 3139 * 3140 * Be extremely careful here ... The change is dependent 3141 * upon whether or not the TCP/UPD checksum is present. 3142 * 3143 * If TCP/UPD checksum is present, the icmp_cksum must 3144 * compensate for checksum modification resulting from 3145 * IP address change only. Port change and resulting 3146 * data checksum adjustments cancel each other out. 3147 * 3148 * If TCP/UDP checksum is not present, icmp_cksum must 3149 * compensate for port change only. The IP address 3150 * change does not modify anything else in this case. 3151 */ 3152 3153 psum1 = 0; 3154 psum2 = 0; 3155 psumd = 0; 3156 3157 if ((tcp->th_dport == nat->nat_oport) && 3158 (tcp->th_sport != nat->nat_inport)) { 3159 3160 /* 3161 * Translate the source port. 3162 */ 3163 3164 psum1 = ntohs(tcp->th_sport); 3165 psum2 = ntohs(nat->nat_inport); 3166 tcp->th_sport = nat->nat_inport; 3167 3168 } else if ((tcp->th_sport == nat->nat_oport) && 3169 (tcp->th_dport != nat->nat_outport)) { 3170 3171 /* 3172 * Translate the destination port. 3173 */ 3174 3175 psum1 = ntohs(tcp->th_dport); 3176 psum2 = ntohs(nat->nat_outport); 3177 tcp->th_dport = nat->nat_outport; 3178 } 3179 3180 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 3181 3182 /* 3183 * TCP checksum present. 3184 * 3185 * Adjust data checksum and icmp checksum to 3186 * compensate for any IP address change. 3187 */ 3188 3189 sum1 = ntohs(tcp->th_sum); 3190 fix_datacksum(&tcp->th_sum, sumd); 3191 sum2 = ntohs(tcp->th_sum); 3192 sumd2 = sumd << 1; 3193 CALC_SUMD(sum1, sum2, sumd); 3194 sumd2 += sumd; 3195 3196 /* 3197 * Also make data checksum adjustment to 3198 * compensate for any port change. 3199 */ 3200 3201 if (psum1 != psum2) { 3202 CALC_SUMD(psum1, psum2, psumd); 3203 fix_datacksum(&tcp->th_sum, psumd); 3204 } 3205 3206 } else if ((oip->ip_p == IPPROTO_UDP) && 3207 (dlen >= 8) && (udp->uh_sum != 0)) { 3208 3209 /* 3210 * The UDP checksum is present and set. 3211 * 3212 * Adjust data checksum and icmp checksum to 3213 * compensate for any IP address change. 3214 */ 3215 3216 sum1 = ntohs(udp->uh_sum); 3217 fix_datacksum(&udp->uh_sum, sumd); 3218 sum2 = ntohs(udp->uh_sum); 3219 sumd2 = sumd << 1; 3220 CALC_SUMD(sum1, sum2, sumd); 3221 sumd2 += sumd; 3222 3223 /* 3224 * Also make data checksum adjustment to 3225 * compensate for any port change. 3226 */ 3227 3228 if (psum1 != psum2) { 3229 CALC_SUMD(psum1, psum2, psumd); 3230 fix_datacksum(&udp->uh_sum, psumd); 3231 } 3232 3233 } else { 3234 3235 /* 3236 * Data checksum was not present. 3237 * 3238 * Compensate for any port change. 3239 */ 3240 3241 CALC_SUMD(psum2, psum1, psumd); 3242 sumd2 += psumd; 3243 } 3244 break; 3245 3246 case IPPROTO_ICMP : 3247 3248 orgicmp = (icmphdr_t *)udp; 3249 3250 if ((nat->nat_dir == NAT_OUTBOUND) && 3251 (orgicmp->icmp_id != nat->nat_inport) && 3252 (dlen >= 8)) { 3253 3254 /* 3255 * Fix ICMP checksum (of the offening ICMP 3256 * query packet) to compensate the change 3257 * in the ICMP id of the offending ICMP 3258 * packet. 3259 * 3260 * Since you modify orgicmp->icmp_id with 3261 * a delta (say x) and you compensate that 3262 * in origicmp->icmp_cksum with a delta 3263 * minus x, you don't have to adjust the 3264 * overall icmp->icmp_cksum 3265 */ 3266 3267 sum1 = ntohs(orgicmp->icmp_id); 3268 sum2 = ntohs(nat->nat_inport); 3269 CALC_SUMD(sum1, sum2, sumd); 3270 orgicmp->icmp_id = nat->nat_inport; 3271 fix_datacksum(&orgicmp->icmp_cksum, sumd); 3272 3273 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 3274 3275 break; 3276 3277 default : 3278 3279 break; 3280 3281 } /* switch (oip->ip_p) */ 3282 3283 /* 3284 * Step 3 3285 * Make the adjustments to icmp checksum. 3286 */ 3287 3288 if (sumd2 != 0) { 3289 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3290 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3291 fix_incksum(&icmp->icmp_cksum, sumd2); 3292 } 3293 return nat; 3294 } 3295 3296 3297 /* 3298 * NB: these lookups don't lock access to the list, it assumed that it has 3299 * already been done! 3300 */ 3301 3302 /* ------------------------------------------------------------------------ */ 3303 /* Function: nat_inlookup */ 3304 /* Returns: nat_t* - NULL == no match, */ 3305 /* else pointer to matching NAT entry */ 3306 /* Parameters: fin(I) - pointer to packet information */ 3307 /* flags(I) - NAT flags for this packet */ 3308 /* p(I) - protocol for this packet */ 3309 /* src(I) - source IP address */ 3310 /* mapdst(I) - destination IP address */ 3311 /* */ 3312 /* Lookup a nat entry based on the mapped destination ip address/port and */ 3313 /* real source address/port. We use this lookup when receiving a packet, */ 3314 /* we're looking for a table entry, based on the destination address. */ 3315 /* */ 3316 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3317 /* */ 3318 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3319 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3320 /* */ 3321 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3322 /* the packet is of said protocol */ 3323 /* ------------------------------------------------------------------------ */ 3324 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 3325 fr_info_t *fin; 3326 u_int flags, p; 3327 struct in_addr src , mapdst; 3328 { 3329 u_short sport, dport; 3330 ipnat_t *ipn; 3331 u_int sflags; 3332 nat_t *nat; 3333 int nflags; 3334 u_32_t dst; 3335 void *ifp; 3336 u_int hv; 3337 ipf_stack_t *ifs = fin->fin_ifs; 3338 3339 if (fin != NULL) 3340 ifp = fin->fin_ifp; 3341 else 3342 ifp = NULL; 3343 sport = 0; 3344 dport = 0; 3345 dst = mapdst.s_addr; 3346 sflags = flags & NAT_TCPUDPICMP; 3347 3348 switch (p) 3349 { 3350 case IPPROTO_TCP : 3351 case IPPROTO_UDP : 3352 sport = htons(fin->fin_data[0]); 3353 dport = htons(fin->fin_data[1]); 3354 break; 3355 case IPPROTO_ICMP : 3356 if (flags & IPN_ICMPERR) 3357 sport = fin->fin_data[1]; 3358 else 3359 dport = fin->fin_data[1]; 3360 break; 3361 default : 3362 break; 3363 } 3364 3365 3366 if ((flags & SI_WILDP) != 0) 3367 goto find_in_wild_ports; 3368 3369 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3370 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3371 nat = ifs->ifs_nat_table[1][hv]; 3372 for (; nat; nat = nat->nat_hnext[1]) { 3373 if (nat->nat_v != 4) 3374 continue; 3375 3376 if (nat->nat_ifps[0] != NULL) { 3377 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3378 continue; 3379 } else if (ifp != NULL) 3380 nat->nat_ifps[0] = ifp; 3381 3382 nflags = nat->nat_flags; 3383 3384 if (nat->nat_oip.s_addr == src.s_addr && 3385 nat->nat_outip.s_addr == dst && 3386 (((p == 0) && 3387 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3388 || (p == nat->nat_p))) { 3389 switch (p) 3390 { 3391 #if 0 3392 case IPPROTO_GRE : 3393 if (nat->nat_call[1] != fin->fin_data[0]) 3394 continue; 3395 break; 3396 #endif 3397 case IPPROTO_ICMP : 3398 if ((flags & IPN_ICMPERR) != 0) { 3399 if (nat->nat_outport != sport) 3400 continue; 3401 } else { 3402 if (nat->nat_outport != dport) 3403 continue; 3404 } 3405 break; 3406 case IPPROTO_TCP : 3407 case IPPROTO_UDP : 3408 if (nat->nat_oport != sport) 3409 continue; 3410 if (nat->nat_outport != dport) 3411 continue; 3412 break; 3413 default : 3414 break; 3415 } 3416 3417 ipn = nat->nat_ptr; 3418 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3419 if (appr_match(fin, nat) != 0) 3420 continue; 3421 return nat; 3422 } 3423 } 3424 3425 /* 3426 * So if we didn't find it but there are wildcard members in the hash 3427 * table, go back and look for them. We do this search and update here 3428 * because it is modifying the NAT table and we want to do this only 3429 * for the first packet that matches. The exception, of course, is 3430 * for "dummy" (FI_IGNORE) lookups. 3431 */ 3432 find_in_wild_ports: 3433 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3434 return NULL; 3435 if (ifs->ifs_nat_stats.ns_wilds == 0) 3436 return NULL; 3437 3438 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3439 3440 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3441 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3442 3443 WRITE_ENTER(&ifs->ifs_ipf_nat); 3444 3445 nat = ifs->ifs_nat_table[1][hv]; 3446 for (; nat; nat = nat->nat_hnext[1]) { 3447 if (nat->nat_v != 4) 3448 continue; 3449 3450 if (nat->nat_ifps[0] != NULL) { 3451 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3452 continue; 3453 } else if (ifp != NULL) 3454 nat->nat_ifps[0] = ifp; 3455 3456 if (nat->nat_p != fin->fin_p) 3457 continue; 3458 if (nat->nat_oip.s_addr != src.s_addr || 3459 nat->nat_outip.s_addr != dst) 3460 continue; 3461 3462 nflags = nat->nat_flags; 3463 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3464 continue; 3465 3466 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3467 NAT_INBOUND) == 1) { 3468 if ((fin->fin_flx & FI_IGNORE) != 0) 3469 break; 3470 if ((nflags & SI_CLONE) != 0) { 3471 nat = fr_natclone(fin, nat); 3472 if (nat == NULL) 3473 break; 3474 } else { 3475 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3476 ifs->ifs_nat_stats.ns_wilds--; 3477 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3478 } 3479 nat->nat_oport = sport; 3480 nat->nat_outport = dport; 3481 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3482 nat_tabmove(nat, ifs); 3483 break; 3484 } 3485 } 3486 3487 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3488 3489 return nat; 3490 } 3491 3492 3493 /* ------------------------------------------------------------------------ */ 3494 /* Function: nat_tabmove */ 3495 /* Returns: Nil */ 3496 /* Parameters: nat(I) - pointer to NAT structure */ 3497 /* Write Lock: ipf_nat */ 3498 /* */ 3499 /* This function is only called for TCP/UDP NAT table entries where the */ 3500 /* original was placed in the table without hashing on the ports and we now */ 3501 /* want to include hashing on port numbers. */ 3502 /* ------------------------------------------------------------------------ */ 3503 static void nat_tabmove(nat, ifs) 3504 nat_t *nat; 3505 ipf_stack_t *ifs; 3506 { 3507 nat_t **natp; 3508 u_int hv; 3509 3510 if (nat->nat_flags & SI_CLONE) 3511 return; 3512 3513 /* 3514 * Remove the NAT entry from the old location 3515 */ 3516 if (nat->nat_hnext[0]) 3517 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3518 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3519 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3520 3521 if (nat->nat_hnext[1]) 3522 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3523 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3524 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3525 3526 /* 3527 * Add into the NAT table in the new position 3528 */ 3529 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3530 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3531 ifs->ifs_ipf_nattable_sz); 3532 nat->nat_hv[0] = hv; 3533 natp = &ifs->ifs_nat_table[0][hv]; 3534 if (*natp) 3535 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3536 nat->nat_phnext[0] = natp; 3537 nat->nat_hnext[0] = *natp; 3538 *natp = nat; 3539 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3540 3541 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3542 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3543 ifs->ifs_ipf_nattable_sz); 3544 nat->nat_hv[1] = hv; 3545 natp = &ifs->ifs_nat_table[1][hv]; 3546 if (*natp) 3547 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3548 nat->nat_phnext[1] = natp; 3549 nat->nat_hnext[1] = *natp; 3550 *natp = nat; 3551 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3552 } 3553 3554 3555 /* ------------------------------------------------------------------------ */ 3556 /* Function: nat_outlookup */ 3557 /* Returns: nat_t* - NULL == no match, */ 3558 /* else pointer to matching NAT entry */ 3559 /* Parameters: fin(I) - pointer to packet information */ 3560 /* flags(I) - NAT flags for this packet */ 3561 /* p(I) - protocol for this packet */ 3562 /* src(I) - source IP address */ 3563 /* dst(I) - destination IP address */ 3564 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3565 /* */ 3566 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3567 /* destination address/port. We use this lookup when sending a packet out, */ 3568 /* we're looking for a table entry, based on the source address. */ 3569 /* */ 3570 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3571 /* */ 3572 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3573 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3574 /* */ 3575 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3576 /* the packet is of said protocol */ 3577 /* ------------------------------------------------------------------------ */ 3578 nat_t *nat_outlookup(fin, flags, p, src, dst) 3579 fr_info_t *fin; 3580 u_int flags, p; 3581 struct in_addr src , dst; 3582 { 3583 u_short sport, dport; 3584 u_int sflags; 3585 ipnat_t *ipn; 3586 u_32_t srcip; 3587 nat_t *nat; 3588 int nflags; 3589 void *ifp; 3590 u_int hv; 3591 ipf_stack_t *ifs = fin->fin_ifs; 3592 3593 ifp = fin->fin_ifp; 3594 3595 srcip = src.s_addr; 3596 sflags = flags & IPN_TCPUDPICMP; 3597 sport = 0; 3598 dport = 0; 3599 3600 switch (p) 3601 { 3602 case IPPROTO_TCP : 3603 case IPPROTO_UDP : 3604 sport = htons(fin->fin_data[0]); 3605 dport = htons(fin->fin_data[1]); 3606 break; 3607 case IPPROTO_ICMP : 3608 if (flags & IPN_ICMPERR) 3609 sport = fin->fin_data[1]; 3610 else 3611 dport = fin->fin_data[1]; 3612 break; 3613 default : 3614 break; 3615 } 3616 3617 if ((flags & SI_WILDP) != 0) 3618 goto find_out_wild_ports; 3619 3620 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3621 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3622 nat = ifs->ifs_nat_table[0][hv]; 3623 for (; nat; nat = nat->nat_hnext[0]) { 3624 if (nat->nat_v != 4) 3625 continue; 3626 3627 if (nat->nat_ifps[1] != NULL) { 3628 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3629 continue; 3630 } else if (ifp != NULL) 3631 nat->nat_ifps[1] = ifp; 3632 3633 nflags = nat->nat_flags; 3634 3635 if (nat->nat_inip.s_addr == srcip && 3636 nat->nat_oip.s_addr == dst.s_addr && 3637 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3638 || (p == nat->nat_p))) { 3639 switch (p) 3640 { 3641 #if 0 3642 case IPPROTO_GRE : 3643 if (nat->nat_call[1] != fin->fin_data[0]) 3644 continue; 3645 break; 3646 #endif 3647 case IPPROTO_TCP : 3648 case IPPROTO_UDP : 3649 if (nat->nat_oport != dport) 3650 continue; 3651 if (nat->nat_inport != sport) 3652 continue; 3653 break; 3654 default : 3655 break; 3656 } 3657 3658 ipn = nat->nat_ptr; 3659 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3660 if (appr_match(fin, nat) != 0) 3661 continue; 3662 return nat; 3663 } 3664 } 3665 3666 /* 3667 * So if we didn't find it but there are wildcard members in the hash 3668 * table, go back and look for them. We do this search and update here 3669 * because it is modifying the NAT table and we want to do this only 3670 * for the first packet that matches. The exception, of course, is 3671 * for "dummy" (FI_IGNORE) lookups. 3672 */ 3673 find_out_wild_ports: 3674 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3675 return NULL; 3676 if (ifs->ifs_nat_stats.ns_wilds == 0) 3677 return NULL; 3678 3679 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3680 3681 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3682 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3683 3684 WRITE_ENTER(&ifs->ifs_ipf_nat); 3685 3686 nat = ifs->ifs_nat_table[0][hv]; 3687 for (; nat; nat = nat->nat_hnext[0]) { 3688 if (nat->nat_v != 4) 3689 continue; 3690 3691 if (nat->nat_ifps[1] != NULL) { 3692 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3693 continue; 3694 } else if (ifp != NULL) 3695 nat->nat_ifps[1] = ifp; 3696 3697 if (nat->nat_p != fin->fin_p) 3698 continue; 3699 if ((nat->nat_inip.s_addr != srcip) || 3700 (nat->nat_oip.s_addr != dst.s_addr)) 3701 continue; 3702 3703 nflags = nat->nat_flags; 3704 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3705 continue; 3706 3707 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3708 NAT_OUTBOUND) == 1) { 3709 if ((fin->fin_flx & FI_IGNORE) != 0) 3710 break; 3711 if ((nflags & SI_CLONE) != 0) { 3712 nat = fr_natclone(fin, nat); 3713 if (nat == NULL) 3714 break; 3715 } else { 3716 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3717 ifs->ifs_nat_stats.ns_wilds--; 3718 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3719 } 3720 nat->nat_inport = sport; 3721 nat->nat_oport = dport; 3722 if (nat->nat_outport == 0) 3723 nat->nat_outport = sport; 3724 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3725 nat_tabmove(nat, ifs); 3726 break; 3727 } 3728 } 3729 3730 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3731 3732 return nat; 3733 } 3734 3735 3736 /* ------------------------------------------------------------------------ */ 3737 /* Function: nat_lookupredir */ 3738 /* Returns: nat_t* - NULL == no match, */ 3739 /* else pointer to matching NAT entry */ 3740 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3741 /* entry for. */ 3742 /* */ 3743 /* Lookup the NAT tables to search for a matching redirect */ 3744 /* ------------------------------------------------------------------------ */ 3745 nat_t *nat_lookupredir(np, ifs) 3746 natlookup_t *np; 3747 ipf_stack_t *ifs; 3748 { 3749 fr_info_t fi; 3750 nat_t *nat; 3751 3752 bzero((char *)&fi, sizeof(fi)); 3753 if (np->nl_flags & IPN_IN) { 3754 fi.fin_data[0] = ntohs(np->nl_realport); 3755 fi.fin_data[1] = ntohs(np->nl_outport); 3756 } else { 3757 fi.fin_data[0] = ntohs(np->nl_inport); 3758 fi.fin_data[1] = ntohs(np->nl_outport); 3759 } 3760 if (np->nl_flags & IPN_TCP) 3761 fi.fin_p = IPPROTO_TCP; 3762 else if (np->nl_flags & IPN_UDP) 3763 fi.fin_p = IPPROTO_UDP; 3764 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3765 fi.fin_p = IPPROTO_ICMP; 3766 3767 fi.fin_ifs = ifs; 3768 /* 3769 * We can do two sorts of lookups: 3770 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3771 * - default: we have the `in' and `out' address, look for `real'. 3772 */ 3773 if (np->nl_flags & IPN_IN) { 3774 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3775 np->nl_realip, np->nl_outip))) { 3776 np->nl_inip = nat->nat_inip; 3777 np->nl_inport = nat->nat_inport; 3778 } 3779 } else { 3780 /* 3781 * If nl_inip is non null, this is a lookup based on the real 3782 * ip address. Else, we use the fake. 3783 */ 3784 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3785 np->nl_inip, np->nl_outip))) { 3786 3787 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3788 fr_info_t fin; 3789 bzero((char *)&fin, sizeof(fin)); 3790 fin.fin_p = nat->nat_p; 3791 fin.fin_data[0] = ntohs(nat->nat_outport); 3792 fin.fin_data[1] = ntohs(nat->nat_oport); 3793 fin.fin_ifs = ifs; 3794 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3795 nat->nat_outip, 3796 nat->nat_oip) != NULL) { 3797 np->nl_flags &= ~IPN_FINDFORWARD; 3798 } 3799 } 3800 3801 np->nl_realip = nat->nat_outip; 3802 np->nl_realport = nat->nat_outport; 3803 } 3804 } 3805 3806 return nat; 3807 } 3808 3809 3810 /* ------------------------------------------------------------------------ */ 3811 /* Function: nat_match */ 3812 /* Returns: int - 0 == no match, 1 == match */ 3813 /* Parameters: fin(I) - pointer to packet information */ 3814 /* np(I) - pointer to NAT rule */ 3815 /* */ 3816 /* Pull the matching of a packet against a NAT rule out of that complex */ 3817 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3818 /* ------------------------------------------------------------------------ */ 3819 static int nat_match(fin, np) 3820 fr_info_t *fin; 3821 ipnat_t *np; 3822 { 3823 frtuc_t *ft; 3824 3825 if (fin->fin_v != 4) 3826 return 0; 3827 3828 if (np->in_p && fin->fin_p != np->in_p) 3829 return 0; 3830 3831 if (fin->fin_out) { 3832 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3833 return 0; 3834 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3835 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3836 return 0; 3837 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3838 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3839 return 0; 3840 } else { 3841 if (!(np->in_redir & NAT_REDIRECT)) 3842 return 0; 3843 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3844 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3845 return 0; 3846 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3847 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3848 return 0; 3849 } 3850 3851 ft = &np->in_tuc; 3852 if (!(fin->fin_flx & FI_TCPUDP) || 3853 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3854 if (ft->ftu_scmp || ft->ftu_dcmp) 3855 return 0; 3856 return 1; 3857 } 3858 3859 return fr_tcpudpchk(fin, ft); 3860 } 3861 3862 3863 /* ------------------------------------------------------------------------ */ 3864 /* Function: nat_update */ 3865 /* Returns: Nil */ 3866 /* Parameters: fin(I) - pointer to packet information */ 3867 /* nat(I) - pointer to NAT structure */ 3868 /* np(I) - pointer to NAT rule */ 3869 /* Locks: nat_lock */ 3870 /* */ 3871 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3872 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3873 /* ------------------------------------------------------------------------ */ 3874 void nat_update(fin, nat, np) 3875 fr_info_t *fin; 3876 nat_t *nat; 3877 ipnat_t *np; 3878 { 3879 ipftq_t *ifq, *ifq2; 3880 ipftqent_t *tqe; 3881 ipf_stack_t *ifs = fin->fin_ifs; 3882 3883 tqe = &nat->nat_tqe; 3884 ifq = tqe->tqe_ifq; 3885 3886 /* 3887 * We allow over-riding of NAT timeouts from NAT rules, even for 3888 * TCP, however, if it is TCP and there is no rule timeout set, 3889 * then do not update the timeout here. 3890 */ 3891 if (np != NULL) 3892 ifq2 = np->in_tqehead[fin->fin_rev]; 3893 else 3894 ifq2 = NULL; 3895 3896 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3897 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3898 } else { 3899 if (ifq2 == NULL) { 3900 if (nat->nat_p == IPPROTO_UDP) 3901 ifq2 = &ifs->ifs_nat_udptq; 3902 else if (nat->nat_p == IPPROTO_ICMP) 3903 ifq2 = &ifs->ifs_nat_icmptq; 3904 else 3905 ifq2 = &ifs->ifs_nat_iptq; 3906 } 3907 3908 fr_movequeue(tqe, ifq, ifq2, ifs); 3909 } 3910 } 3911 3912 3913 /* ------------------------------------------------------------------------ */ 3914 /* Function: fr_checknatout */ 3915 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3916 /* 0 == no packet translation occurred, */ 3917 /* 1 == packet was successfully translated. */ 3918 /* Parameters: fin(I) - pointer to packet information */ 3919 /* passp(I) - pointer to filtering result flags */ 3920 /* */ 3921 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3922 /* first checked to see if they match an existing entry (if an error), */ 3923 /* otherwise a search of the current NAT table is made. If neither results */ 3924 /* in a match then a search for a matching NAT rule is made. Create a new */ 3925 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3926 /* packet header(s) as required. */ 3927 /* ------------------------------------------------------------------------ */ 3928 int fr_checknatout(fin, passp) 3929 fr_info_t *fin; 3930 u_32_t *passp; 3931 { 3932 ipnat_t *np = NULL, *npnext; 3933 struct ifnet *ifp, *sifp; 3934 icmphdr_t *icmp = NULL; 3935 tcphdr_t *tcp = NULL; 3936 int rval, natfailed; 3937 u_int nflags = 0; 3938 u_32_t ipa, iph; 3939 int natadd = 1; 3940 frentry_t *fr; 3941 nat_t *nat; 3942 ipf_stack_t *ifs = fin->fin_ifs; 3943 3944 if (ifs->ifs_fr_nat_lock != 0) 3945 return 0; 3946 if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL) 3947 return 0; 3948 3949 natfailed = 0; 3950 fr = fin->fin_fr; 3951 sifp = fin->fin_ifp; 3952 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3953 fr->fr_tifs[fin->fin_rev].fd_ifp && 3954 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3955 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3956 ifp = fin->fin_ifp; 3957 3958 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3959 switch (fin->fin_p) 3960 { 3961 case IPPROTO_TCP : 3962 nflags = IPN_TCP; 3963 break; 3964 case IPPROTO_UDP : 3965 nflags = IPN_UDP; 3966 break; 3967 case IPPROTO_ICMP : 3968 icmp = fin->fin_dp; 3969 3970 /* 3971 * This is an incoming packet, so the destination is 3972 * the icmp_id and the source port equals 0 3973 */ 3974 if (nat_icmpquerytype4(icmp->icmp_type)) 3975 nflags = IPN_ICMPQUERY; 3976 break; 3977 default : 3978 break; 3979 } 3980 3981 if ((nflags & IPN_TCPUDP)) 3982 tcp = fin->fin_dp; 3983 } 3984 3985 ipa = fin->fin_saddr; 3986 3987 READ_ENTER(&ifs->ifs_ipf_nat); 3988 3989 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3990 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3991 /*EMPTY*/; 3992 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3993 natadd = 0; 3994 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3995 fin->fin_src, fin->fin_dst))) { 3996 nflags = nat->nat_flags; 3997 } else { 3998 u_32_t hv, msk, nmsk; 3999 4000 /* 4001 * There is no current entry in the nat table for this packet. 4002 * 4003 * If the packet is a fragment, but not the first fragment, 4004 * then don't do anything. Otherwise, if there is a matching 4005 * nat rule, try to create a new nat entry. 4006 */ 4007 if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP)) 4008 goto nonatfrag; 4009 4010 msk = 0xffffffff; 4011 nmsk = ifs->ifs_nat_masks; 4012 maskloop: 4013 iph = ipa & htonl(msk); 4014 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 4015 for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) { 4016 npnext = np->in_mnext; 4017 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 4018 continue; 4019 if (np->in_v != fin->fin_v) 4020 continue; 4021 if (np->in_p && (np->in_p != fin->fin_p)) 4022 continue; 4023 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4024 continue; 4025 if (np->in_flags & IPN_FILTER) { 4026 if (!nat_match(fin, np)) 4027 continue; 4028 } else if ((ipa & np->in_inmsk) != np->in_inip) 4029 continue; 4030 4031 if ((fr != NULL) && 4032 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 4033 continue; 4034 4035 if (*np->in_plabel != '\0') { 4036 if (((np->in_flags & IPN_FILTER) == 0) && 4037 (np->in_dport != tcp->th_dport)) 4038 continue; 4039 if (appr_ok(fin, tcp, np) == 0) 4040 continue; 4041 } 4042 4043 ATOMIC_INC32(np->in_use); 4044 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4045 WRITE_ENTER(&ifs->ifs_ipf_nat); 4046 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND); 4047 if (nat != NULL) { 4048 np->in_use--; 4049 np->in_hits++; 4050 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4051 break; 4052 } 4053 natfailed = -1; 4054 npnext = np->in_mnext; 4055 fr_ipnatderef(&np, ifs); 4056 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4057 } 4058 if ((np == NULL) && (nmsk != 0)) { 4059 while (nmsk) { 4060 msk <<= 1; 4061 if (nmsk & 0x80000000) 4062 break; 4063 nmsk <<= 1; 4064 } 4065 if (nmsk != 0) { 4066 nmsk <<= 1; 4067 goto maskloop; 4068 } 4069 } 4070 } 4071 4072 nonatfrag: 4073 if (nat != NULL) { 4074 rval = fr_natout(fin, nat, natadd, nflags); 4075 if (rval == 1) { 4076 MUTEX_ENTER(&nat->nat_lock); 4077 nat_update(fin, nat, nat->nat_ptr); 4078 nat->nat_bytes[1] += fin->fin_plen; 4079 nat->nat_pkts[1]++; 4080 nat->nat_ref++; 4081 MUTEX_EXIT(&nat->nat_lock); 4082 fin->fin_nat = nat; 4083 } 4084 } else 4085 rval = natfailed; 4086 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4087 4088 if (rval == -1) { 4089 if (passp != NULL) 4090 *passp = FR_BLOCK; 4091 fin->fin_flx |= FI_BADNAT; 4092 } 4093 fin->fin_ifp = sifp; 4094 return rval; 4095 } 4096 4097 /* ------------------------------------------------------------------------ */ 4098 /* Function: fr_natout */ 4099 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4100 /* 1 == packet was successfully translated. */ 4101 /* Parameters: fin(I) - pointer to packet information */ 4102 /* nat(I) - pointer to NAT structure */ 4103 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4104 /* nflags(I) - NAT flags set for this packet */ 4105 /* */ 4106 /* Translate a packet coming "out" on an interface. */ 4107 /* ------------------------------------------------------------------------ */ 4108 int fr_natout(fin, nat, natadd, nflags) 4109 fr_info_t *fin; 4110 nat_t *nat; 4111 int natadd; 4112 u_32_t nflags; 4113 { 4114 icmphdr_t *icmp; 4115 u_short *csump; 4116 u_32_t sumd; 4117 tcphdr_t *tcp; 4118 ipnat_t *np; 4119 int i; 4120 ipf_stack_t *ifs = fin->fin_ifs; 4121 4122 if (fin->fin_v == 6) { 4123 #ifdef USE_INET6 4124 return fr_nat6out(fin, nat, natadd, nflags); 4125 #else 4126 return NULL; 4127 #endif 4128 } 4129 4130 #if SOLARIS && defined(_KERNEL) 4131 net_handle_t net_data_p = ifs->ifs_ipf_ipv4; 4132 #endif 4133 4134 tcp = NULL; 4135 icmp = NULL; 4136 csump = NULL; 4137 np = nat->nat_ptr; 4138 4139 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4140 (void) fr_nat_newfrag(fin, 0, nat); 4141 4142 /* 4143 * Fix up checksums, not by recalculating them, but 4144 * simply computing adjustments. 4145 * This is only done for STREAMS based IP implementations where the 4146 * checksum has already been calculated by IP. In all other cases, 4147 * IPFilter is called before the checksum needs calculating so there 4148 * is no call to modify whatever is in the header now. 4149 */ 4150 ASSERT(fin->fin_m != NULL); 4151 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 4152 if (nflags == IPN_ICMPERR) { 4153 u_32_t s1, s2; 4154 4155 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 4156 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 4157 CALC_SUMD(s1, s2, sumd); 4158 4159 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 4160 } 4161 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4162 defined(linux) || defined(BRIDGE_IPF) 4163 else { 4164 /* 4165 * Strictly speaking, this isn't necessary on BSD 4166 * kernels because they do checksum calculation after 4167 * this code has run BUT if ipfilter is being used 4168 * to do NAT as a bridge, that code doesn't exist. 4169 */ 4170 if (nat->nat_dir == NAT_OUTBOUND) 4171 fix_outcksum(&fin->fin_ip->ip_sum, 4172 nat->nat_ipsumd); 4173 else 4174 fix_incksum(&fin->fin_ip->ip_sum, 4175 nat->nat_ipsumd); 4176 } 4177 #endif 4178 } 4179 4180 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4181 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 4182 tcp = fin->fin_dp; 4183 4184 tcp->th_sport = nat->nat_outport; 4185 fin->fin_data[0] = ntohs(nat->nat_outport); 4186 } 4187 4188 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 4189 icmp = fin->fin_dp; 4190 icmp->icmp_id = nat->nat_outport; 4191 } 4192 4193 csump = nat_proto(fin, nat, nflags); 4194 } 4195 4196 fin->fin_ip->ip_src = nat->nat_outip; 4197 4198 /* 4199 * The above comments do not hold for layer 4 (or higher) checksums... 4200 */ 4201 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 4202 if (nflags & IPN_TCPUDP && 4203 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 4204 sumd = nat->nat_sumd[1]; 4205 else 4206 sumd = nat->nat_sumd[0]; 4207 4208 if (nat->nat_dir == NAT_OUTBOUND) 4209 fix_outcksum(csump, sumd); 4210 else 4211 fix_incksum(csump, sumd); 4212 } 4213 #ifdef IPFILTER_SYNC 4214 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4215 #endif 4216 /* ------------------------------------------------------------- */ 4217 /* A few quick notes: */ 4218 /* Following are test conditions prior to calling the */ 4219 /* appr_check routine. */ 4220 /* */ 4221 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4222 /* with a redirect rule, we attempt to match the packet's */ 4223 /* source port against in_dport, otherwise we'd compare the */ 4224 /* packet's destination. */ 4225 /* ------------------------------------------------------------- */ 4226 if ((np != NULL) && (np->in_apr != NULL)) { 4227 i = appr_check(fin, nat); 4228 if (i == 0) 4229 i = 1; 4230 } else 4231 i = 1; 4232 ifs->ifs_nat_stats.ns_mapped[1]++; 4233 fin->fin_flx |= FI_NATED; 4234 return i; 4235 } 4236 4237 4238 /* ------------------------------------------------------------------------ */ 4239 /* Function: fr_checknatin */ 4240 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4241 /* 0 == no packet translation occurred, */ 4242 /* 1 == packet was successfully translated. */ 4243 /* Parameters: fin(I) - pointer to packet information */ 4244 /* passp(I) - pointer to filtering result flags */ 4245 /* */ 4246 /* Check to see if an incoming packet should be changed. ICMP packets are */ 4247 /* first checked to see if they match an existing entry (if an error), */ 4248 /* otherwise a search of the current NAT table is made. If neither results */ 4249 /* in a match then a search for a matching NAT rule is made. Create a new */ 4250 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 4251 /* packet header(s) as required. */ 4252 /* ------------------------------------------------------------------------ */ 4253 int fr_checknatin(fin, passp) 4254 fr_info_t *fin; 4255 u_32_t *passp; 4256 { 4257 u_int nflags, natadd; 4258 ipnat_t *np, *npnext; 4259 int rval, natfailed; 4260 struct ifnet *ifp; 4261 struct in_addr in; 4262 icmphdr_t *icmp; 4263 tcphdr_t *tcp; 4264 u_short dport; 4265 nat_t *nat; 4266 u_32_t iph; 4267 ipf_stack_t *ifs = fin->fin_ifs; 4268 4269 if (ifs->ifs_fr_nat_lock != 0) 4270 return 0; 4271 if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL) 4272 return 0; 4273 4274 tcp = NULL; 4275 icmp = NULL; 4276 dport = 0; 4277 natadd = 1; 4278 nflags = 0; 4279 natfailed = 0; 4280 ifp = fin->fin_ifp; 4281 4282 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4283 switch (fin->fin_p) 4284 { 4285 case IPPROTO_TCP : 4286 nflags = IPN_TCP; 4287 break; 4288 case IPPROTO_UDP : 4289 nflags = IPN_UDP; 4290 break; 4291 case IPPROTO_ICMP : 4292 icmp = fin->fin_dp; 4293 4294 /* 4295 * This is an incoming packet, so the destination is 4296 * the icmp_id and the source port equals 0 4297 */ 4298 if (nat_icmpquerytype4(icmp->icmp_type)) { 4299 nflags = IPN_ICMPQUERY; 4300 dport = icmp->icmp_id; 4301 } break; 4302 default : 4303 break; 4304 } 4305 4306 if ((nflags & IPN_TCPUDP)) { 4307 tcp = fin->fin_dp; 4308 dport = tcp->th_dport; 4309 } 4310 } 4311 4312 in = fin->fin_dst; 4313 4314 READ_ENTER(&ifs->ifs_ipf_nat); 4315 4316 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 4317 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 4318 /*EMPTY*/; 4319 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 4320 natadd = 0; 4321 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 4322 fin->fin_src, in))) { 4323 nflags = nat->nat_flags; 4324 } else { 4325 u_32_t hv, msk, rmsk; 4326 4327 /* 4328 * There is no current entry in the nat table for this packet. 4329 * 4330 * If the packet is a fragment, but not the first fragment, 4331 * then don't do anything. Otherwise, if there is a matching 4332 * nat rule, try to create a new nat entry. 4333 */ 4334 if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP)) 4335 goto nonatfrag; 4336 4337 rmsk = ifs->ifs_rdr_masks; 4338 msk = 0xffffffff; 4339 maskloop: 4340 iph = in.s_addr & htonl(msk); 4341 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 4342 for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) { 4343 npnext = np->in_rnext; 4344 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 4345 continue; 4346 if (np->in_v != fin->fin_v) 4347 continue; 4348 if (np->in_p && (np->in_p != fin->fin_p)) 4349 continue; 4350 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4351 continue; 4352 if (np->in_flags & IPN_FILTER) { 4353 if (!nat_match(fin, np)) 4354 continue; 4355 } else { 4356 if ((in.s_addr & np->in_outmsk) != np->in_outip) 4357 continue; 4358 if (np->in_pmin && 4359 ((ntohs(np->in_pmax) < ntohs(dport)) || 4360 (ntohs(dport) < ntohs(np->in_pmin)))) 4361 continue; 4362 } 4363 4364 if (*np->in_plabel != '\0') { 4365 if (!appr_ok(fin, tcp, np)) { 4366 continue; 4367 } 4368 } 4369 4370 ATOMIC_INC32(np->in_use); 4371 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4372 WRITE_ENTER(&ifs->ifs_ipf_nat); 4373 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4374 if (nat != NULL) { 4375 np->in_use--; 4376 np->in_hits++; 4377 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4378 break; 4379 } 4380 natfailed = -1; 4381 npnext = np->in_rnext; 4382 fr_ipnatderef(&np, ifs); 4383 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4384 } 4385 4386 if ((np == NULL) && (rmsk != 0)) { 4387 while (rmsk) { 4388 msk <<= 1; 4389 if (rmsk & 0x80000000) 4390 break; 4391 rmsk <<= 1; 4392 } 4393 if (rmsk != 0) { 4394 rmsk <<= 1; 4395 goto maskloop; 4396 } 4397 } 4398 } 4399 4400 nonatfrag: 4401 if (nat != NULL) { 4402 rval = fr_natin(fin, nat, natadd, nflags); 4403 if (rval == 1) { 4404 MUTEX_ENTER(&nat->nat_lock); 4405 nat_update(fin, nat, nat->nat_ptr); 4406 nat->nat_bytes[0] += fin->fin_plen; 4407 nat->nat_pkts[0]++; 4408 nat->nat_ref++; 4409 MUTEX_EXIT(&nat->nat_lock); 4410 fin->fin_nat = nat; 4411 fin->fin_state = nat->nat_state; 4412 } 4413 } else 4414 rval = natfailed; 4415 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4416 4417 if (rval == -1) { 4418 if (passp != NULL) 4419 *passp = FR_BLOCK; 4420 fin->fin_flx |= FI_BADNAT; 4421 } 4422 return rval; 4423 } 4424 4425 4426 /* ------------------------------------------------------------------------ */ 4427 /* Function: fr_natin */ 4428 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4429 /* 1 == packet was successfully translated. */ 4430 /* Parameters: fin(I) - pointer to packet information */ 4431 /* nat(I) - pointer to NAT structure */ 4432 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4433 /* nflags(I) - NAT flags set for this packet */ 4434 /* Locks Held: ipf_nat (READ) */ 4435 /* */ 4436 /* Translate a packet coming "in" on an interface. */ 4437 /* ------------------------------------------------------------------------ */ 4438 int fr_natin(fin, nat, natadd, nflags) 4439 fr_info_t *fin; 4440 nat_t *nat; 4441 int natadd; 4442 u_32_t nflags; 4443 { 4444 icmphdr_t *icmp; 4445 u_short *csump; 4446 tcphdr_t *tcp; 4447 ipnat_t *np; 4448 int i; 4449 ipf_stack_t *ifs = fin->fin_ifs; 4450 4451 if (fin->fin_v == 6) { 4452 #ifdef USE_INET6 4453 return fr_nat6in(fin, nat, natadd, nflags); 4454 #else 4455 return NULL; 4456 #endif 4457 } 4458 4459 #if SOLARIS && defined(_KERNEL) 4460 net_handle_t net_data_p = ifs->ifs_ipf_ipv4; 4461 #endif 4462 4463 tcp = NULL; 4464 csump = NULL; 4465 np = nat->nat_ptr; 4466 fin->fin_fr = nat->nat_fr; 4467 4468 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4469 (void) fr_nat_newfrag(fin, 0, nat); 4470 4471 if (np != NULL) { 4472 4473 /* ------------------------------------------------------------- */ 4474 /* A few quick notes: */ 4475 /* Following are test conditions prior to calling the */ 4476 /* appr_check routine. */ 4477 /* */ 4478 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4479 /* with a map rule, we attempt to match the packet's */ 4480 /* source port against in_dport, otherwise we'd compare the */ 4481 /* packet's destination. */ 4482 /* ------------------------------------------------------------- */ 4483 if (np->in_apr != NULL) { 4484 i = appr_check(fin, nat); 4485 if (i == -1) { 4486 return -1; 4487 } 4488 } 4489 } 4490 4491 #ifdef IPFILTER_SYNC 4492 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4493 #endif 4494 4495 fin->fin_ip->ip_dst = nat->nat_inip; 4496 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4497 if (nflags & IPN_TCPUDP) 4498 tcp = fin->fin_dp; 4499 4500 /* 4501 * Fix up checksums, not by recalculating them, but 4502 * simply computing adjustments. 4503 * Why only do this for some platforms on inbound packets ? 4504 * Because for those that it is done, IP processing is yet to happen 4505 * and so the IPv4 header checksum has not yet been evaluated. 4506 * Perhaps it should always be done for the benefit of things like 4507 * fast forwarding (so that it doesn't need to be recomputed) but with 4508 * header checksum offloading, perhaps it is a moot point. 4509 */ 4510 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4511 defined(__osf__) || defined(linux) 4512 if (nat->nat_dir == NAT_OUTBOUND) 4513 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4514 else 4515 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4516 #endif 4517 4518 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4519 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4520 tcp->th_dport = nat->nat_inport; 4521 fin->fin_data[1] = ntohs(nat->nat_inport); 4522 } 4523 4524 4525 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4526 icmp = fin->fin_dp; 4527 4528 icmp->icmp_id = nat->nat_inport; 4529 } 4530 4531 csump = nat_proto(fin, nat, nflags); 4532 } 4533 4534 /* 4535 * In case they are being forwarded, inbound packets always need to have 4536 * their checksum adjusted even if hardware checksum validation said OK. 4537 */ 4538 if (csump != NULL) { 4539 if (nat->nat_dir == NAT_OUTBOUND) 4540 fix_incksum(csump, nat->nat_sumd[0]); 4541 else 4542 fix_outcksum(csump, nat->nat_sumd[0]); 4543 } 4544 4545 #if SOLARIS && defined(_KERNEL) 4546 if (nflags & IPN_TCPUDP && 4547 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4548 /* 4549 * Need to adjust the partial checksum result stored in 4550 * db_cksum16, which will be used for validation in IP. 4551 * See IP_CKSUM_RECV(). 4552 * Adjustment data should be the inverse of the IP address 4553 * changes, because db_cksum16 is supposed to be the complement 4554 * of the pesudo header. 4555 */ 4556 csump = &fin->fin_m->b_datap->db_cksum16; 4557 if (nat->nat_dir == NAT_OUTBOUND) 4558 fix_outcksum(csump, nat->nat_sumd[1]); 4559 else 4560 fix_incksum(csump, nat->nat_sumd[1]); 4561 } 4562 #endif 4563 4564 ifs->ifs_nat_stats.ns_mapped[0]++; 4565 fin->fin_flx |= FI_NATED; 4566 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4567 fin->fin_nattag = &np->in_tag; 4568 return 1; 4569 } 4570 4571 4572 /* ------------------------------------------------------------------------ */ 4573 /* Function: nat_proto */ 4574 /* Returns: u_short* - pointer to transport header checksum to update, */ 4575 /* NULL if the transport protocol is not recognised */ 4576 /* as needing a checksum update. */ 4577 /* Parameters: fin(I) - pointer to packet information */ 4578 /* nat(I) - pointer to NAT structure */ 4579 /* nflags(I) - NAT flags set for this packet */ 4580 /* */ 4581 /* Return the pointer to the checksum field for each protocol so understood.*/ 4582 /* If support for making other changes to a protocol header is required, */ 4583 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4584 /* TCP down to a specific value, then do it from here. */ 4585 /* ------------------------------------------------------------------------ */ 4586 u_short *nat_proto(fin, nat, nflags) 4587 fr_info_t *fin; 4588 nat_t *nat; 4589 u_int nflags; 4590 { 4591 icmphdr_t *icmp; 4592 struct icmp6_hdr *icmp6; 4593 u_short *csump; 4594 tcphdr_t *tcp; 4595 udphdr_t *udp; 4596 4597 csump = NULL; 4598 if (fin->fin_out == 0) { 4599 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4600 } else { 4601 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4602 } 4603 4604 switch (fin->fin_p) 4605 { 4606 case IPPROTO_TCP : 4607 tcp = fin->fin_dp; 4608 4609 csump = &tcp->th_sum; 4610 4611 /* 4612 * Do a MSS CLAMPING on a SYN packet, 4613 * only deal IPv4 for now. 4614 */ 4615 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4616 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4617 4618 break; 4619 4620 case IPPROTO_UDP : 4621 udp = fin->fin_dp; 4622 4623 if (udp->uh_sum) 4624 csump = &udp->uh_sum; 4625 break; 4626 4627 case IPPROTO_ICMP : 4628 icmp = fin->fin_dp; 4629 4630 if ((nflags & IPN_ICMPQUERY) != 0) { 4631 if (icmp->icmp_cksum != 0) 4632 csump = &icmp->icmp_cksum; 4633 } 4634 break; 4635 4636 case IPPROTO_ICMPV6 : 4637 icmp6 = fin->fin_dp; 4638 4639 if ((nflags & IPN_ICMPQUERY) != 0) { 4640 if (icmp6->icmp6_cksum != 0) 4641 csump = &icmp6->icmp6_cksum; 4642 } 4643 break; 4644 } 4645 return csump; 4646 } 4647 4648 4649 /* ------------------------------------------------------------------------ */ 4650 /* Function: fr_natunload */ 4651 /* Returns: Nil */ 4652 /* Parameters: Nil */ 4653 /* */ 4654 /* Free all memory used by NAT structures allocated at runtime. */ 4655 /* ------------------------------------------------------------------------ */ 4656 void fr_natunload(ifs) 4657 ipf_stack_t *ifs; 4658 { 4659 ipftq_t *ifq, *ifqnext; 4660 4661 (void) nat_clearlist(ifs); 4662 (void) nat_flushtable(ifs); 4663 4664 /* 4665 * Proxy timeout queues are not cleaned here because although they 4666 * exist on the NAT list, appr_unload is called after fr_natunload 4667 * and the proxies actually are responsible for them being created. 4668 * Should the proxy timeouts have their own list? There's no real 4669 * justification as this is the only complication. 4670 */ 4671 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4672 ifqnext = ifq->ifq_next; 4673 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4674 (fr_deletetimeoutqueue(ifq) == 0)) 4675 fr_freetimeoutqueue(ifq, ifs); 4676 } 4677 4678 if (ifs->ifs_nat_table[0] != NULL) { 4679 KFREES(ifs->ifs_nat_table[0], 4680 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4681 ifs->ifs_nat_table[0] = NULL; 4682 } 4683 if (ifs->ifs_nat_table[1] != NULL) { 4684 KFREES(ifs->ifs_nat_table[1], 4685 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4686 ifs->ifs_nat_table[1] = NULL; 4687 } 4688 if (ifs->ifs_nat_rules != NULL) { 4689 KFREES(ifs->ifs_nat_rules, 4690 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4691 ifs->ifs_nat_rules = NULL; 4692 } 4693 if (ifs->ifs_rdr_rules != NULL) { 4694 KFREES(ifs->ifs_rdr_rules, 4695 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4696 ifs->ifs_rdr_rules = NULL; 4697 } 4698 if (ifs->ifs_maptable != NULL) { 4699 KFREES(ifs->ifs_maptable, 4700 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4701 ifs->ifs_maptable = NULL; 4702 } 4703 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4704 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4705 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4706 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4707 } 4708 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4709 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4710 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4711 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4712 } 4713 4714 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4715 ifs->ifs_fr_nat_maxbucket = 0; 4716 4717 if (ifs->ifs_fr_nat_init == 1) { 4718 ifs->ifs_fr_nat_init = 0; 4719 fr_sttab_destroy(ifs->ifs_nat_tqb); 4720 4721 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4722 RW_DESTROY(&ifs->ifs_ipf_nat); 4723 4724 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4725 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4726 4727 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4728 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4729 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4730 } 4731 } 4732 4733 4734 /* ------------------------------------------------------------------------ */ 4735 /* Function: fr_natexpire */ 4736 /* Returns: Nil */ 4737 /* Parameters: Nil */ 4738 /* */ 4739 /* Check all of the timeout queues for entries at the top which need to be */ 4740 /* expired. */ 4741 /* ------------------------------------------------------------------------ */ 4742 void fr_natexpire(ifs) 4743 ipf_stack_t *ifs; 4744 { 4745 ipftq_t *ifq, *ifqnext; 4746 ipftqent_t *tqe, *tqn; 4747 int i; 4748 SPL_INT(s); 4749 4750 SPL_NET(s); 4751 WRITE_ENTER(&ifs->ifs_ipf_nat); 4752 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4753 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4754 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4755 break; 4756 tqn = tqe->tqe_next; 4757 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4758 } 4759 } 4760 4761 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4762 ifqnext = ifq->ifq_next; 4763 4764 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4765 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4766 break; 4767 tqn = tqe->tqe_next; 4768 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4769 } 4770 } 4771 4772 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4773 ifqnext = ifq->ifq_next; 4774 4775 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4776 (ifq->ifq_ref == 0)) { 4777 fr_freetimeoutqueue(ifq, ifs); 4778 } 4779 } 4780 4781 if (ifs->ifs_nat_doflush != 0) { 4782 (void) nat_extraflush(2, ifs); 4783 ifs->ifs_nat_doflush = 0; 4784 } 4785 4786 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4787 SPL_X(s); 4788 } 4789 4790 4791 /* ------------------------------------------------------------------------ */ 4792 /* Function: fr_nataddrsync */ 4793 /* Returns: Nil */ 4794 /* Parameters: ifp(I) - pointer to network interface */ 4795 /* addr(I) - pointer to new network address */ 4796 /* */ 4797 /* Walk through all of the currently active NAT sessions, looking for those */ 4798 /* which need to have their translated address updated (where the interface */ 4799 /* matches the one passed in) and change it, recalculating the checksum sum */ 4800 /* difference too. */ 4801 /* ------------------------------------------------------------------------ */ 4802 void fr_nataddrsync(v, ifp, addr, ifs) 4803 int v; 4804 void *ifp; 4805 void *addr; 4806 ipf_stack_t *ifs; 4807 { 4808 u_32_t sum1, sum2, sumd; 4809 nat_t *nat; 4810 ipnat_t *np; 4811 SPL_INT(s); 4812 4813 if (ifs->ifs_fr_running <= 0) 4814 return; 4815 4816 SPL_NET(s); 4817 WRITE_ENTER(&ifs->ifs_ipf_nat); 4818 4819 if (ifs->ifs_fr_running <= 0) { 4820 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4821 return; 4822 } 4823 4824 /* 4825 * Change IP addresses for NAT sessions for any protocol except TCP 4826 * since it will break the TCP connection anyway. The only rules 4827 * which will get changed are those which are "map ... -> 0/32", 4828 * where the rule specifies the address is taken from the interface. 4829 */ 4830 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4831 if (addr != NULL) { 4832 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4833 ((nat->nat_flags & IPN_TCP) != 0)) 4834 continue; 4835 if ((np = nat->nat_ptr) == NULL) 4836 continue; 4837 if (v == 4 && np->in_v == 4) { 4838 if (np->in_nip || np->in_outmsk != 0xffffffff) 4839 continue; 4840 /* 4841 * Change the map-to address to be the same as 4842 * the new one. 4843 */ 4844 sum1 = nat->nat_outip.s_addr; 4845 nat->nat_outip = *(struct in_addr *)addr; 4846 sum2 = nat->nat_outip.s_addr; 4847 } else if (v == 6 && np->in_v == 6) { 4848 if (!IP6_ISZERO(&np->in_next6.in6) || 4849 !IP6_ISONES(&np->in_out[1].in6)) 4850 continue; 4851 /* 4852 * Change the map-to address to be the same as 4853 * the new one. 4854 */ 4855 nat->nat_outip6.in6 = *(struct in6_addr *)addr; 4856 } else 4857 continue; 4858 4859 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4860 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) { 4861 if (np->in_v == 4 && (v == 4 || v == 0)) { 4862 struct in_addr in; 4863 if (np->in_outmsk != 0xffffffff || np->in_nip) 4864 continue; 4865 /* 4866 * Change the map-to address to be the same as 4867 * the new one. 4868 */ 4869 sum1 = nat->nat_outip.s_addr; 4870 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4871 &in, NULL, ifs) != -1) 4872 nat->nat_outip = in; 4873 sum2 = nat->nat_outip.s_addr; 4874 } else if (np->in_v == 6 && (v == 6 || v == 0)) { 4875 struct in6_addr in6; 4876 if (!IP6_ISZERO(&np->in_next6.in6) || 4877 !IP6_ISONES(&np->in_out[1].in6)) 4878 continue; 4879 /* 4880 * Change the map-to address to be the same as 4881 * the new one. 4882 */ 4883 if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0], 4884 (void *)&in6, NULL, ifs) != -1) 4885 nat->nat_outip6.in6 = in6; 4886 } else 4887 continue; 4888 } else { 4889 continue; 4890 } 4891 4892 if (sum1 == sum2) 4893 continue; 4894 /* 4895 * Readjust the checksum adjustment to take into 4896 * account the new IP#. 4897 */ 4898 CALC_SUMD(sum1, sum2, sumd); 4899 /* XXX - dont change for TCP when solaris does 4900 * hardware checksumming. 4901 */ 4902 sumd += nat->nat_sumd[0]; 4903 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4904 nat->nat_sumd[1] = nat->nat_sumd[0]; 4905 } 4906 4907 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4908 SPL_X(s); 4909 } 4910 4911 4912 /* ------------------------------------------------------------------------ */ 4913 /* Function: fr_natifpsync */ 4914 /* Returns: Nil */ 4915 /* Parameters: action(I) - how we are syncing */ 4916 /* ifp(I) - pointer to network interface */ 4917 /* name(I) - name of interface to sync to */ 4918 /* */ 4919 /* This function is used to resync the mapping of interface names and their */ 4920 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4921 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4922 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4923 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4924 /* there is no longer any interface associated with it. */ 4925 /* ------------------------------------------------------------------------ */ 4926 void fr_natifpsync(action, v, ifp, name, ifs) 4927 int action, v; 4928 void *ifp; 4929 char *name; 4930 ipf_stack_t *ifs; 4931 { 4932 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4933 int s; 4934 #endif 4935 nat_t *nat; 4936 ipnat_t *n; 4937 int nv; 4938 4939 if (ifs->ifs_fr_running <= 0) 4940 return; 4941 4942 SPL_NET(s); 4943 WRITE_ENTER(&ifs->ifs_ipf_nat); 4944 4945 if (ifs->ifs_fr_running <= 0) { 4946 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4947 return; 4948 } 4949 4950 switch (action) 4951 { 4952 case IPFSYNC_RESYNC : 4953 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4954 nv = (v == 0) ? nat->nat_v : v; 4955 if (nat->nat_v != nv) 4956 continue; 4957 if ((ifp == nat->nat_ifps[0]) || 4958 (nat->nat_ifps[0] == (void *)-1)) { 4959 nat->nat_ifps[0] = 4960 fr_resolvenic(nat->nat_ifnames[0], nv, ifs); 4961 } 4962 4963 if ((ifp == nat->nat_ifps[1]) || 4964 (nat->nat_ifps[1] == (void *)-1)) { 4965 nat->nat_ifps[1] = 4966 fr_resolvenic(nat->nat_ifnames[1], nv, ifs); 4967 } 4968 } 4969 4970 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4971 nv = (v == 0) ? (int)n->in_v : v; 4972 if ((int)n->in_v != nv) 4973 continue; 4974 if (n->in_ifps[0] == ifp || 4975 n->in_ifps[0] == (void *)-1) { 4976 n->in_ifps[0] = 4977 fr_resolvenic(n->in_ifnames[0], nv, ifs); 4978 } 4979 if (n->in_ifps[1] == ifp || 4980 n->in_ifps[1] == (void *)-1) { 4981 n->in_ifps[1] = 4982 fr_resolvenic(n->in_ifnames[1], nv, ifs); 4983 } 4984 } 4985 break; 4986 case IPFSYNC_NEWIFP : 4987 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4988 if (nat->nat_v != v) 4989 continue; 4990 if (!strncmp(name, nat->nat_ifnames[0], 4991 sizeof(nat->nat_ifnames[0]))) 4992 nat->nat_ifps[0] = ifp; 4993 if (!strncmp(name, nat->nat_ifnames[1], 4994 sizeof(nat->nat_ifnames[1]))) 4995 nat->nat_ifps[1] = ifp; 4996 } 4997 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4998 if ((int)n->in_v != v) 4999 continue; 5000 if (!strncmp(name, n->in_ifnames[0], 5001 sizeof(n->in_ifnames[0]))) 5002 n->in_ifps[0] = ifp; 5003 if (!strncmp(name, n->in_ifnames[1], 5004 sizeof(n->in_ifnames[1]))) 5005 n->in_ifps[1] = ifp; 5006 } 5007 break; 5008 case IPFSYNC_OLDIFP : 5009 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 5010 if (nat->nat_v != v) 5011 continue; 5012 if (ifp == nat->nat_ifps[0]) 5013 nat->nat_ifps[0] = (void *)-1; 5014 if (ifp == nat->nat_ifps[1]) 5015 nat->nat_ifps[1] = (void *)-1; 5016 } 5017 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 5018 if ((int)n->in_v != v) 5019 continue; 5020 if (n->in_ifps[0] == ifp) 5021 n->in_ifps[0] = (void *)-1; 5022 if (n->in_ifps[1] == ifp) 5023 n->in_ifps[1] = (void *)-1; 5024 } 5025 break; 5026 } 5027 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5028 SPL_X(s); 5029 } 5030 5031 5032 /* ------------------------------------------------------------------------ */ 5033 /* Function: nat_icmpquerytype4 */ 5034 /* Returns: int - 1 == success, 0 == failure */ 5035 /* Parameters: icmptype(I) - ICMP type number */ 5036 /* */ 5037 /* Tests to see if the ICMP type number passed is a query/response type or */ 5038 /* not. */ 5039 /* ------------------------------------------------------------------------ */ 5040 static INLINE int nat_icmpquerytype4(icmptype) 5041 int icmptype; 5042 { 5043 5044 /* 5045 * For the ICMP query NAT code, it is essential that both the query 5046 * and the reply match on the NAT rule. Because the NAT structure 5047 * does not keep track of the icmptype, and a single NAT structure 5048 * is used for all icmp types with the same src, dest and id, we 5049 * simply define the replies as queries as well. The funny thing is, 5050 * altough it seems silly to call a reply a query, this is exactly 5051 * as it is defined in the IPv4 specification 5052 */ 5053 5054 switch (icmptype) 5055 { 5056 5057 case ICMP_ECHOREPLY: 5058 case ICMP_ECHO: 5059 /* route aedvertisement/solliciation is currently unsupported: */ 5060 /* it would require rewriting the ICMP data section */ 5061 case ICMP_TSTAMP: 5062 case ICMP_TSTAMPREPLY: 5063 case ICMP_IREQ: 5064 case ICMP_IREQREPLY: 5065 case ICMP_MASKREQ: 5066 case ICMP_MASKREPLY: 5067 return 1; 5068 default: 5069 return 0; 5070 } 5071 } 5072 5073 5074 /* ------------------------------------------------------------------------ */ 5075 /* Function: nat_log */ 5076 /* Returns: Nil */ 5077 /* Parameters: nat(I) - pointer to NAT structure */ 5078 /* type(I) - type of log entry to create */ 5079 /* */ 5080 /* Creates a NAT log entry. */ 5081 /* ------------------------------------------------------------------------ */ 5082 void nat_log(nat, type, ifs) 5083 struct nat *nat; 5084 u_int type; 5085 ipf_stack_t *ifs; 5086 { 5087 #ifdef IPFILTER_LOG 5088 # ifndef LARGE_NAT 5089 struct ipnat *np; 5090 int rulen; 5091 # endif 5092 struct natlog natl; 5093 void *items[1]; 5094 size_t sizes[1]; 5095 int types[1]; 5096 5097 natl.nlg_inip = nat->nat_inip6; 5098 natl.nlg_outip = nat->nat_outip6; 5099 natl.nlg_origip = nat->nat_oip6; 5100 natl.nlg_bytes[0] = nat->nat_bytes[0]; 5101 natl.nlg_bytes[1] = nat->nat_bytes[1]; 5102 natl.nlg_pkts[0] = nat->nat_pkts[0]; 5103 natl.nlg_pkts[1] = nat->nat_pkts[1]; 5104 natl.nlg_origport = nat->nat_oport; 5105 natl.nlg_inport = nat->nat_inport; 5106 natl.nlg_outport = nat->nat_outport; 5107 natl.nlg_p = nat->nat_p; 5108 natl.nlg_type = type; 5109 natl.nlg_rule = -1; 5110 natl.nlg_v = nat->nat_v; 5111 # ifndef LARGE_NAT 5112 if (nat->nat_ptr != NULL) { 5113 for (rulen = 0, np = ifs->ifs_nat_list; np; 5114 np = np->in_next, rulen++) 5115 if (np == nat->nat_ptr) { 5116 natl.nlg_rule = rulen; 5117 break; 5118 } 5119 } 5120 # endif 5121 items[0] = &natl; 5122 sizes[0] = sizeof(natl); 5123 types[0] = 0; 5124 5125 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 5126 #endif 5127 } 5128 5129 5130 #if defined(__OpenBSD__) 5131 /* ------------------------------------------------------------------------ */ 5132 /* Function: nat_ifdetach */ 5133 /* Returns: Nil */ 5134 /* Parameters: ifp(I) - pointer to network interface */ 5135 /* */ 5136 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 5137 /* interface references within IPFilter. */ 5138 /* ------------------------------------------------------------------------ */ 5139 void nat_ifdetach(ifp, ifs) 5140 void *ifp; 5141 ipf_stack_t *ifs; 5142 { 5143 frsync(ifp, ifs); 5144 return; 5145 } 5146 #endif 5147 5148 5149 /* ------------------------------------------------------------------------ */ 5150 /* Function: fr_ipnatderef */ 5151 /* Returns: Nil */ 5152 /* Parameters: inp(I) - pointer to pointer to NAT rule */ 5153 /* Write Locks: ipf_nat */ 5154 /* */ 5155 /* ------------------------------------------------------------------------ */ 5156 void fr_ipnatderef(inp, ifs) 5157 ipnat_t **inp; 5158 ipf_stack_t *ifs; 5159 { 5160 ipnat_t *in; 5161 5162 in = *inp; 5163 *inp = NULL; 5164 in->in_use--; 5165 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 5166 if (in->in_apr) 5167 appr_free(in->in_apr); 5168 KFREE(in); 5169 ifs->ifs_nat_stats.ns_rules--; 5170 #ifdef notdef 5171 #if SOLARIS 5172 if (ifs->ifs_nat_stats.ns_rules == 0) 5173 ifs->ifs_pfil_delayed_copy = 1; 5174 #endif 5175 #endif 5176 } 5177 } 5178 5179 5180 /* ------------------------------------------------------------------------ */ 5181 /* Function: fr_natderef */ 5182 /* Returns: Nil */ 5183 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 5184 /* */ 5185 /* Decrement the reference counter for this NAT table entry and free it if */ 5186 /* there are no more things using it. */ 5187 /* */ 5188 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 5189 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 5190 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 5191 /* because nat_delete() will do that and send nat_ref to -1. */ 5192 /* */ 5193 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 5194 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 5195 /* ------------------------------------------------------------------------ */ 5196 void fr_natderef(natp, ifs) 5197 nat_t **natp; 5198 ipf_stack_t *ifs; 5199 { 5200 nat_t *nat; 5201 5202 nat = *natp; 5203 *natp = NULL; 5204 5205 MUTEX_ENTER(&nat->nat_lock); 5206 if (nat->nat_ref > 1) { 5207 nat->nat_ref--; 5208 MUTEX_EXIT(&nat->nat_lock); 5209 return; 5210 } 5211 MUTEX_EXIT(&nat->nat_lock); 5212 5213 WRITE_ENTER(&ifs->ifs_ipf_nat); 5214 nat_delete(nat, NL_EXPIRE, ifs); 5215 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5216 } 5217 5218 5219 /* ------------------------------------------------------------------------ */ 5220 /* Function: fr_natclone */ 5221 /* Returns: ipstate_t* - NULL == cloning failed, */ 5222 /* else pointer to new state structure */ 5223 /* Parameters: fin(I) - pointer to packet information */ 5224 /* is(I) - pointer to master state structure */ 5225 /* Write Lock: ipf_nat */ 5226 /* */ 5227 /* Create a "duplcate" state table entry from the master. */ 5228 /* ------------------------------------------------------------------------ */ 5229 nat_t *fr_natclone(fin, nat) 5230 fr_info_t *fin; 5231 nat_t *nat; 5232 { 5233 frentry_t *fr; 5234 nat_t *clone; 5235 ipnat_t *np; 5236 ipf_stack_t *ifs = fin->fin_ifs; 5237 5238 KMALLOC(clone, nat_t *); 5239 if (clone == NULL) 5240 return NULL; 5241 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 5242 5243 MUTEX_NUKE(&clone->nat_lock); 5244 5245 clone->nat_aps = NULL; 5246 /* 5247 * Initialize all these so that nat_delete() doesn't cause a crash. 5248 */ 5249 clone->nat_tqe.tqe_pnext = NULL; 5250 clone->nat_tqe.tqe_next = NULL; 5251 clone->nat_tqe.tqe_ifq = NULL; 5252 clone->nat_tqe.tqe_parent = clone; 5253 5254 clone->nat_flags &= ~SI_CLONE; 5255 clone->nat_flags |= SI_CLONED; 5256 5257 if (clone->nat_hm) 5258 clone->nat_hm->hm_ref++; 5259 5260 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 5261 KFREE(clone); 5262 return NULL; 5263 } 5264 np = clone->nat_ptr; 5265 if (np != NULL) { 5266 if (ifs->ifs_nat_logging) 5267 nat_log(clone, (u_int)np->in_redir, ifs); 5268 np->in_use++; 5269 } 5270 fr = clone->nat_fr; 5271 if (fr != NULL) { 5272 MUTEX_ENTER(&fr->fr_lock); 5273 fr->fr_ref++; 5274 MUTEX_EXIT(&fr->fr_lock); 5275 } 5276 5277 /* 5278 * Because the clone is created outside the normal loop of things and 5279 * TCP has special needs in terms of state, initialise the timeout 5280 * state of the new NAT from here. 5281 */ 5282 if (clone->nat_p == IPPROTO_TCP) { 5283 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 5284 clone->nat_flags); 5285 } 5286 #ifdef IPFILTER_SYNC 5287 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 5288 #endif 5289 if (ifs->ifs_nat_logging) 5290 nat_log(clone, NL_CLONE, ifs); 5291 return clone; 5292 } 5293 5294 5295 /* ------------------------------------------------------------------------ */ 5296 /* Function: nat_wildok */ 5297 /* Returns: int - 1 == packet's ports match wildcards */ 5298 /* 0 == packet's ports don't match wildcards */ 5299 /* Parameters: nat(I) - NAT entry */ 5300 /* sport(I) - source port */ 5301 /* dport(I) - destination port */ 5302 /* flags(I) - wildcard flags */ 5303 /* dir(I) - packet direction */ 5304 /* */ 5305 /* Use NAT entry and packet direction to determine which combination of */ 5306 /* wildcard flags should be used. */ 5307 /* ------------------------------------------------------------------------ */ 5308 int nat_wildok(nat, sport, dport, flags, dir) 5309 nat_t *nat; 5310 int sport; 5311 int dport; 5312 int flags; 5313 int dir; 5314 { 5315 /* 5316 * When called by dir is set to 5317 * nat_inlookup NAT_INBOUND (0) 5318 * nat_outlookup NAT_OUTBOUND (1) 5319 * 5320 * We simply combine the packet's direction in dir with the original 5321 * "intended" direction of that NAT entry in nat->nat_dir to decide 5322 * which combination of wildcard flags to allow. 5323 */ 5324 5325 switch ((dir << 1) | nat->nat_dir) 5326 { 5327 case 3: /* outbound packet / outbound entry */ 5328 if (((nat->nat_inport == sport) || 5329 (flags & SI_W_SPORT)) && 5330 ((nat->nat_oport == dport) || 5331 (flags & SI_W_DPORT))) 5332 return 1; 5333 break; 5334 case 2: /* outbound packet / inbound entry */ 5335 if (((nat->nat_outport == sport) || 5336 (flags & SI_W_DPORT)) && 5337 ((nat->nat_oport == dport) || 5338 (flags & SI_W_SPORT))) 5339 return 1; 5340 break; 5341 case 1: /* inbound packet / outbound entry */ 5342 if (((nat->nat_oport == sport) || 5343 (flags & SI_W_DPORT)) && 5344 ((nat->nat_outport == dport) || 5345 (flags & SI_W_SPORT))) 5346 return 1; 5347 break; 5348 case 0: /* inbound packet / inbound entry */ 5349 if (((nat->nat_oport == sport) || 5350 (flags & SI_W_SPORT)) && 5351 ((nat->nat_outport == dport) || 5352 (flags & SI_W_DPORT))) 5353 return 1; 5354 break; 5355 default: 5356 break; 5357 } 5358 5359 return(0); 5360 } 5361 5362 5363 /* ------------------------------------------------------------------------ */ 5364 /* Function: nat_mssclamp */ 5365 /* Returns: Nil */ 5366 /* Parameters: tcp(I) - pointer to TCP header */ 5367 /* maxmss(I) - value to clamp the TCP MSS to */ 5368 /* csump(I) - pointer to TCP checksum */ 5369 /* */ 5370 /* Check for MSS option and clamp it if necessary. If found and changed, */ 5371 /* then the TCP header checksum will be updated to reflect the change in */ 5372 /* the MSS. */ 5373 /* ------------------------------------------------------------------------ */ 5374 static void nat_mssclamp(tcp, maxmss, csump) 5375 tcphdr_t *tcp; 5376 u_32_t maxmss; 5377 u_short *csump; 5378 { 5379 u_char *cp, *ep, opt; 5380 int hlen, advance; 5381 u_32_t mss, sumd; 5382 5383 hlen = TCP_OFF(tcp) << 2; 5384 if (hlen > sizeof(*tcp)) { 5385 cp = (u_char *)tcp + sizeof(*tcp); 5386 ep = (u_char *)tcp + hlen; 5387 5388 while (cp < ep) { 5389 opt = cp[0]; 5390 if (opt == TCPOPT_EOL) 5391 break; 5392 else if (opt == TCPOPT_NOP) { 5393 cp++; 5394 continue; 5395 } 5396 5397 if (cp + 1 >= ep) 5398 break; 5399 advance = cp[1]; 5400 if ((cp + advance > ep) || (advance <= 0)) 5401 break; 5402 switch (opt) 5403 { 5404 case TCPOPT_MAXSEG: 5405 if (advance != 4) 5406 break; 5407 mss = cp[2] * 256 + cp[3]; 5408 if (mss > maxmss) { 5409 cp[2] = maxmss / 256; 5410 cp[3] = maxmss & 0xff; 5411 CALC_SUMD(mss, maxmss, sumd); 5412 fix_outcksum(csump, sumd); 5413 } 5414 break; 5415 default: 5416 /* ignore unknown options */ 5417 break; 5418 } 5419 5420 cp += advance; 5421 } 5422 } 5423 } 5424 5425 5426 /* ------------------------------------------------------------------------ */ 5427 /* Function: fr_setnatqueue */ 5428 /* Returns: Nil */ 5429 /* Parameters: nat(I)- pointer to NAT structure */ 5430 /* rev(I) - forward(0) or reverse(1) direction */ 5431 /* Locks: ipf_nat (read or write) */ 5432 /* */ 5433 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5434 /* determining which queue it should be placed on. */ 5435 /* ------------------------------------------------------------------------ */ 5436 void fr_setnatqueue(nat, rev, ifs) 5437 nat_t *nat; 5438 int rev; 5439 ipf_stack_t *ifs; 5440 { 5441 ipftq_t *oifq, *nifq; 5442 5443 if (nat->nat_ptr != NULL) 5444 nifq = nat->nat_ptr->in_tqehead[rev]; 5445 else 5446 nifq = NULL; 5447 5448 if (nifq == NULL) { 5449 switch (nat->nat_p) 5450 { 5451 case IPPROTO_UDP : 5452 nifq = &ifs->ifs_nat_udptq; 5453 break; 5454 case IPPROTO_ICMP : 5455 nifq = &ifs->ifs_nat_icmptq; 5456 break; 5457 case IPPROTO_TCP : 5458 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5459 break; 5460 default : 5461 nifq = &ifs->ifs_nat_iptq; 5462 break; 5463 } 5464 } 5465 5466 oifq = nat->nat_tqe.tqe_ifq; 5467 /* 5468 * If it's currently on a timeout queue, move it from one queue to 5469 * another, else put it on the end of the newly determined queue. 5470 */ 5471 if (oifq != NULL) 5472 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5473 else 5474 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5475 return; 5476 } 5477 5478 /* ------------------------------------------------------------------------ */ 5479 /* Function: nat_getnext */ 5480 /* Returns: int - 0 == ok, else error */ 5481 /* Parameters: t(I) - pointer to ipftoken structure */ 5482 /* itp(I) - pointer to ipfgeniter_t structure */ 5483 /* ifs - ipf stack instance */ 5484 /* */ 5485 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5486 /* and copy it out to the storage space pointed to by itp. The next item */ 5487 /* in the list to look at is put back in the ipftoken struture. */ 5488 /* ------------------------------------------------------------------------ */ 5489 static int nat_getnext(t, itp, ifs) 5490 ipftoken_t *t; 5491 ipfgeniter_t *itp; 5492 ipf_stack_t *ifs; 5493 { 5494 hostmap_t *hm, *nexthm = NULL, zerohm; 5495 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5496 nat_t *nat, *nextnat = NULL, zeronat; 5497 int error = 0, count; 5498 char *dst; 5499 5500 if (itp->igi_nitems == 0) 5501 return EINVAL; 5502 5503 READ_ENTER(&ifs->ifs_ipf_nat); 5504 5505 /* 5506 * Get "previous" entry from the token and find the next entry. 5507 */ 5508 switch (itp->igi_type) 5509 { 5510 case IPFGENITER_HOSTMAP : 5511 hm = t->ipt_data; 5512 if (hm == NULL) { 5513 nexthm = ifs->ifs_ipf_hm_maplist; 5514 } else { 5515 nexthm = hm->hm_next; 5516 } 5517 break; 5518 5519 case IPFGENITER_IPNAT : 5520 ipn = t->ipt_data; 5521 if (ipn == NULL) { 5522 nextipnat = ifs->ifs_nat_list; 5523 } else { 5524 nextipnat = ipn->in_next; 5525 } 5526 break; 5527 5528 case IPFGENITER_NAT : 5529 nat = t->ipt_data; 5530 if (nat == NULL) { 5531 nextnat = ifs->ifs_nat_instances; 5532 } else { 5533 nextnat = nat->nat_next; 5534 } 5535 break; 5536 default : 5537 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5538 return EINVAL; 5539 } 5540 5541 dst = itp->igi_data; 5542 for (count = itp->igi_nitems; count > 0; count--) { 5543 /* 5544 * If we found an entry, add a reference to it and update the token. 5545 * Otherwise, zero out data to be returned and NULL out token. 5546 */ 5547 switch (itp->igi_type) 5548 { 5549 case IPFGENITER_HOSTMAP : 5550 if (nexthm != NULL) { 5551 ATOMIC_INC32(nexthm->hm_ref); 5552 t->ipt_data = nexthm; 5553 } else { 5554 bzero(&zerohm, sizeof(zerohm)); 5555 nexthm = &zerohm; 5556 t->ipt_data = NULL; 5557 } 5558 break; 5559 case IPFGENITER_IPNAT : 5560 if (nextipnat != NULL) { 5561 ATOMIC_INC32(nextipnat->in_use); 5562 t->ipt_data = nextipnat; 5563 } else { 5564 bzero(&zeroipn, sizeof(zeroipn)); 5565 nextipnat = &zeroipn; 5566 t->ipt_data = NULL; 5567 } 5568 break; 5569 case IPFGENITER_NAT : 5570 if (nextnat != NULL) { 5571 MUTEX_ENTER(&nextnat->nat_lock); 5572 nextnat->nat_ref++; 5573 MUTEX_EXIT(&nextnat->nat_lock); 5574 t->ipt_data = nextnat; 5575 } else { 5576 bzero(&zeronat, sizeof(zeronat)); 5577 nextnat = &zeronat; 5578 t->ipt_data = NULL; 5579 } 5580 break; 5581 default : 5582 break; 5583 } 5584 5585 /* 5586 * Now that we have ref, it's save to give up lock. 5587 */ 5588 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5589 5590 /* 5591 * Copy out data and clean up references and token as needed. 5592 */ 5593 switch (itp->igi_type) 5594 { 5595 case IPFGENITER_HOSTMAP : 5596 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5597 if (error != 0) 5598 error = EFAULT; 5599 if (t->ipt_data == NULL) { 5600 ipf_freetoken(t, ifs); 5601 break; 5602 } else { 5603 if (hm != NULL) { 5604 WRITE_ENTER(&ifs->ifs_ipf_nat); 5605 fr_hostmapdel(&hm); 5606 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5607 } 5608 if (nexthm->hm_next == NULL) { 5609 ipf_freetoken(t, ifs); 5610 break; 5611 } 5612 dst += sizeof(*nexthm); 5613 hm = nexthm; 5614 nexthm = nexthm->hm_next; 5615 } 5616 break; 5617 5618 case IPFGENITER_IPNAT : 5619 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5620 if (error != 0) 5621 error = EFAULT; 5622 if (t->ipt_data == NULL) { 5623 ipf_freetoken(t, ifs); 5624 break; 5625 } else { 5626 if (ipn != NULL) { 5627 WRITE_ENTER(&ifs->ifs_ipf_nat); 5628 fr_ipnatderef(&ipn, ifs); 5629 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5630 } 5631 if (nextipnat->in_next == NULL) { 5632 ipf_freetoken(t, ifs); 5633 break; 5634 } 5635 dst += sizeof(*nextipnat); 5636 ipn = nextipnat; 5637 nextipnat = nextipnat->in_next; 5638 } 5639 break; 5640 5641 case IPFGENITER_NAT : 5642 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5643 if (error != 0) 5644 error = EFAULT; 5645 if (t->ipt_data == NULL) { 5646 ipf_freetoken(t, ifs); 5647 break; 5648 } else { 5649 if (nat != NULL) 5650 fr_natderef(&nat, ifs); 5651 if (nextnat->nat_next == NULL) { 5652 ipf_freetoken(t, ifs); 5653 break; 5654 } 5655 dst += sizeof(*nextnat); 5656 nat = nextnat; 5657 nextnat = nextnat->nat_next; 5658 } 5659 break; 5660 default : 5661 break; 5662 } 5663 5664 if ((count == 1) || (error != 0)) 5665 break; 5666 5667 READ_ENTER(&ifs->ifs_ipf_nat); 5668 } 5669 5670 return error; 5671 } 5672 5673 5674 /* ------------------------------------------------------------------------ */ 5675 /* Function: nat_iterator */ 5676 /* Returns: int - 0 == ok, else error */ 5677 /* Parameters: token(I) - pointer to ipftoken structure */ 5678 /* itp(I) - pointer to ipfgeniter_t structure */ 5679 /* */ 5680 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5681 /* generic structure to iterate through a list. There are three different */ 5682 /* linked lists of NAT related information to go through: NAT rules, active */ 5683 /* NAT mappings and the NAT fragment cache. */ 5684 /* ------------------------------------------------------------------------ */ 5685 static int nat_iterator(token, itp, ifs) 5686 ipftoken_t *token; 5687 ipfgeniter_t *itp; 5688 ipf_stack_t *ifs; 5689 { 5690 int error; 5691 5692 if (itp->igi_data == NULL) 5693 return EFAULT; 5694 5695 token->ipt_subtype = itp->igi_type; 5696 5697 switch (itp->igi_type) 5698 { 5699 case IPFGENITER_HOSTMAP : 5700 case IPFGENITER_IPNAT : 5701 case IPFGENITER_NAT : 5702 error = nat_getnext(token, itp, ifs); 5703 break; 5704 case IPFGENITER_NATFRAG : 5705 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5706 &ifs->ifs_ipfr_nattail, 5707 &ifs->ifs_ipf_natfrag, ifs); 5708 break; 5709 default : 5710 error = EINVAL; 5711 break; 5712 } 5713 5714 return error; 5715 } 5716 5717 5718 /* -------------------------------------------------------------------- */ 5719 /* Function: nat_earlydrop */ 5720 /* Returns: number of dropped/removed entries from the queue */ 5721 /* Parameters: ifq - pointer to queue with entries to be processed */ 5722 /* maxidle - entry must be idle this long to be dropped */ 5723 /* ifs - ipf stack instance */ 5724 /* */ 5725 /* Function is invoked from nat_extraflush() only. Removes entries */ 5726 /* form specified timeout queue, based on how long they've sat idle, */ 5727 /* without waiting for it to happen on its own. */ 5728 /* -------------------------------------------------------------------- */ 5729 static int nat_earlydrop(ifq, maxidle, ifs) 5730 ipftq_t *ifq; 5731 int maxidle; 5732 ipf_stack_t *ifs; 5733 { 5734 ipftqent_t *tqe, *tqn; 5735 nat_t *nat; 5736 unsigned int dropped; 5737 int droptick; 5738 5739 if (ifq == NULL) 5740 return (0); 5741 5742 dropped = 0; 5743 5744 /* 5745 * Determine the tick representing the idle time we're interested 5746 * in. If an entry exists in the queue, and it was touched before 5747 * that tick, then it's been idle longer than maxidle ... remove it. 5748 */ 5749 droptick = ifs->ifs_fr_ticks - maxidle; 5750 tqn = ifq->ifq_head; 5751 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5752 tqn = tqe->tqe_next; 5753 nat = tqe->tqe_parent; 5754 nat_delete(nat, ISL_EXPIRE, ifs); 5755 dropped++; 5756 } 5757 return (dropped); 5758 } 5759 5760 5761 /* --------------------------------------------------------------------- */ 5762 /* Function: nat_flushclosing */ 5763 /* Returns: int - number of NAT entries deleted */ 5764 /* Parameters: stateval(I) - State at which to start removing entries */ 5765 /* ifs - ipf stack instance */ 5766 /* */ 5767 /* Remove nat table entries for TCP connections which are in the process */ 5768 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5769 /* --------------------------------------------------------------------- */ 5770 static int nat_flushclosing(stateval, ifs) 5771 int stateval; 5772 ipf_stack_t *ifs; 5773 { 5774 ipftq_t *ifq, *ifqn; 5775 ipftqent_t *tqe, *tqn; 5776 nat_t *nat; 5777 int dropped; 5778 5779 dropped = 0; 5780 5781 /* 5782 * Start by deleting any entries in specific timeout queues. 5783 */ 5784 ifqn = &ifs->ifs_nat_tqb[stateval]; 5785 while ((ifq = ifqn) != NULL) { 5786 ifqn = ifq->ifq_next; 5787 dropped += nat_earlydrop(ifq, (int)0, ifs); 5788 } 5789 5790 /* 5791 * Next, look through user defined queues for closing entries. 5792 */ 5793 ifqn = ifs->ifs_nat_utqe; 5794 while ((ifq = ifqn) != NULL) { 5795 ifqn = ifq->ifq_next; 5796 tqn = ifq->ifq_head; 5797 while ((tqe = tqn) != NULL) { 5798 tqn = tqe->tqe_next; 5799 nat = tqe->tqe_parent; 5800 if (nat->nat_p != IPPROTO_TCP) 5801 continue; 5802 if ((nat->nat_tcpstate[0] >= stateval) && 5803 (nat->nat_tcpstate[1] >= stateval)) { 5804 nat_delete(nat, NL_EXPIRE, ifs); 5805 dropped++; 5806 } 5807 } 5808 } 5809 return (dropped); 5810 } 5811 5812 5813 /* --------------------------------------------------------------------- */ 5814 /* Function: nat_extraflush */ 5815 /* Returns: int - number of NAT entries deleted */ 5816 /* Parameters: which(I) - how to flush the active NAT table */ 5817 /* ifs - ipf stack instance */ 5818 /* Write Locks: ipf_nat */ 5819 /* */ 5820 /* Flush nat tables. Three actions currently defined: */ 5821 /* */ 5822 /* which == 0 : Flush all nat table entries. */ 5823 /* */ 5824 /* which == 1 : Flush entries with TCP connections which have started */ 5825 /* to close on both ends. */ 5826 /* */ 5827 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5828 /* does not take us below specified threshold in the table, */ 5829 /* we want to flush entries with TCP connections which have */ 5830 /* been idle for a long time. Start with connections idle */ 5831 /* over 12 hours, and then work backwards in half hour */ 5832 /* increments to at most 30 minutes idle, and finally work */ 5833 /* back in 30 second increments to at most 30 seconds. */ 5834 /* --------------------------------------------------------------------- */ 5835 static int nat_extraflush(which, ifs) 5836 int which; 5837 ipf_stack_t *ifs; 5838 { 5839 ipftq_t *ifq, *ifqn; 5840 nat_t *nat, **natp; 5841 int idletime, removed, idle_idx; 5842 SPL_INT(s); 5843 5844 removed = 0; 5845 5846 SPL_NET(s); 5847 switch (which) 5848 { 5849 case 0: 5850 natp = &ifs->ifs_nat_instances; 5851 while ((nat = *natp) != NULL) { 5852 natp = &nat->nat_next; 5853 nat_delete(nat, ISL_FLUSH, ifs); 5854 removed++; 5855 } 5856 break; 5857 5858 case 1: 5859 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5860 break; 5861 5862 case 2: 5863 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5864 5865 /* 5866 * Be sure we haven't done this in the last 10 seconds. 5867 */ 5868 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5869 IPF_TTLVAL(10)) 5870 break; 5871 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5872 5873 /* 5874 * Determine initial threshold for minimum idle time based on 5875 * how long ipfilter has been running. Ipfilter needs to have 5876 * been up as long as the smallest interval to continue on. 5877 * 5878 * Minimum idle times stored in idletime_tab and indexed by 5879 * idle_idx. Start at upper end of array and work backwards. 5880 * 5881 * Once the index is found, set the initial idle time to the 5882 * first interval before the current ipfilter run time. 5883 */ 5884 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5885 break; /* switch */ 5886 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5887 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5888 idletime = idletime_tab[idle_idx]; 5889 } else { 5890 while ((idle_idx > 0) && 5891 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5892 idle_idx--; 5893 idletime = (ifs->ifs_fr_ticks / 5894 idletime_tab[idle_idx]) * 5895 idletime_tab[idle_idx]; 5896 } 5897 5898 while ((idle_idx >= 0) && 5899 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5900 /* 5901 * Start with appropriate timeout queue. 5902 */ 5903 removed += nat_earlydrop( 5904 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5905 idletime, ifs); 5906 5907 /* 5908 * Make sure we haven't already deleted enough 5909 * entries before checking the user defined queues. 5910 */ 5911 if (NAT_TAB_WATER_LEVEL(ifs) <= 5912 ifs->ifs_nat_flush_lvl_lo) 5913 break; 5914 5915 /* 5916 * Next, look through the user defined queues. 5917 */ 5918 ifqn = ifs->ifs_nat_utqe; 5919 while ((ifq = ifqn) != NULL) { 5920 ifqn = ifq->ifq_next; 5921 removed += nat_earlydrop(ifq, idletime, ifs); 5922 } 5923 5924 /* 5925 * Adjust the granularity of idle time. 5926 * 5927 * If we reach an interval boundary, we need to 5928 * either adjust the idle time accordingly or exit 5929 * the loop altogether (if this is very last check). 5930 */ 5931 idletime -= idletime_tab[idle_idx]; 5932 if (idletime < idletime_tab[idle_idx]) { 5933 if (idle_idx != 0) { 5934 idletime = idletime_tab[idle_idx] - 5935 idletime_tab[idle_idx - 1]; 5936 idle_idx--; 5937 } else { 5938 break; /* while */ 5939 } 5940 } 5941 } 5942 break; 5943 default: 5944 break; 5945 } 5946 5947 SPL_X(s); 5948 return (removed); 5949 } 5950