1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI"$ 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/time.h> 22 #include <sys/file.h> 23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 24 defined(_KERNEL) 25 # include "opt_ipfilter_log.h" 26 #endif 27 #if !defined(_KERNEL) 28 # include <stdio.h> 29 # include <string.h> 30 # include <stdlib.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 #else 42 # include <sys/ioctl.h> 43 #endif 44 #if !defined(AIX) 45 # include <sys/fcntl.h> 46 #endif 47 #if !defined(linux) 48 # include <sys/protosw.h> 49 #endif 50 #include <sys/socket.h> 51 #if defined(_KERNEL) 52 # include <sys/systm.h> 53 # if !defined(__SVR4) && !defined(__svr4__) 54 # include <sys/mbuf.h> 55 # endif 56 #endif 57 #if defined(__SVR4) || defined(__svr4__) 58 # include <sys/filio.h> 59 # include <sys/byteorder.h> 60 # ifdef _KERNEL 61 # include <sys/dditypes.h> 62 # endif 63 # include <sys/stream.h> 64 # include <sys/kmem.h> 65 #endif 66 #if __FreeBSD_version >= 300000 67 # include <sys/queue.h> 68 #endif 69 #include <net/if.h> 70 #if __FreeBSD_version >= 300000 71 # include <net/if_var.h> 72 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 73 # include "opt_ipfilter.h" 74 # endif 75 #endif 76 #ifdef sun 77 # include <net/af.h> 78 #endif 79 #include <net/route.h> 80 #include <netinet/in.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 84 #ifdef RFC1825 85 # include <vpn/md5.h> 86 # include <vpn/ipsec.h> 87 extern struct ifnet vpnif; 88 #endif 89 90 #if !defined(linux) 91 # include <netinet/ip_var.h> 92 #endif 93 #include <netinet/tcp.h> 94 #include <netinet/udp.h> 95 #include <netinet/ip_icmp.h> 96 #include "netinet/ip_compat.h" 97 #include <netinet/tcpip.h> 98 #include "netinet/ip_fil.h" 99 #include "netinet/ip_nat.h" 100 #include "netinet/ip_frag.h" 101 #include "netinet/ip_state.h" 102 #include "netinet/ip_proxy.h" 103 #include "netinet/ipf_stack.h" 104 #ifdef IPFILTER_SYNC 105 #include "netinet/ip_sync.h" 106 #endif 107 #if (__FreeBSD_version >= 300000) 108 # include <sys/malloc.h> 109 #endif 110 /* END OF INCLUDES */ 111 112 #undef SOCKADDR_IN 113 #define SOCKADDR_IN struct sockaddr_in 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 118 #endif 119 120 121 /* ======================================================================== */ 122 /* How the NAT is organised and works. */ 123 /* */ 124 /* Inside (interface y) NAT Outside (interface x) */ 125 /* -------------------- -+- ------------------------------------- */ 126 /* Packet going | out, processsed by fr_checknatout() for x */ 127 /* ------------> | ------------> */ 128 /* src=10.1.1.1 | src=192.1.1.1 */ 129 /* | */ 130 /* | in, processed by fr_checknatin() for x */ 131 /* <------------ | <------------ */ 132 /* dst=10.1.1.1 | dst=192.1.1.1 */ 133 /* -------------------- -+- ------------------------------------- */ 134 /* fr_checknatout() - changes ip_src and if required, sport */ 135 /* - creates a new mapping, if required. */ 136 /* fr_checknatin() - changes ip_dst and if required, dport */ 137 /* */ 138 /* In the NAT table, internal source is recorded as "in" and externally */ 139 /* seen as "out". */ 140 /* ======================================================================== */ 141 142 143 static int nat_flushtable __P((ipf_stack_t *)); 144 static int nat_clearlist __P((ipf_stack_t *)); 145 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 146 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 147 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 148 static void nat_delrdr __P((struct ipnat *)); 149 static void nat_delnat __P((struct ipnat *)); 150 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 151 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 152 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 153 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 154 static int nat_match __P((fr_info_t *, ipnat_t *)); 155 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 156 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 157 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 158 struct in_addr, struct in_addr, u_32_t, 159 ipf_stack_t *)); 160 static void nat_hostmapdel __P((struct hostmap *)); 161 static INLINE int nat_icmpquerytype4 __P((int)); 162 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, 163 ipf_stack_t *)); 164 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, 165 ipf_stack_t *)); 166 static INLINE int nat_icmperrortype4 __P((int)); 167 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 168 tcphdr_t *, nat_t **, int)); 169 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 170 static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); 171 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 172 static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); 173 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 174 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 175 static int nat_extraflush __P((int, ipf_stack_t *)); 176 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 177 static int nat_flushclosing __P((int, ipf_stack_t *)); 178 179 180 /* 181 * Below we declare a list of constants used only in the nat_extraflush() 182 * routine. We are placing it here, instead of in nat_extraflush() itself, 183 * because we want to make it visible to tools such as mdb, nm etc., so the 184 * values can easily be altered during debugging. 185 */ 186 static const int idletime_tab[] = { 187 IPF_TTLVAL(30), /* 30 seconds */ 188 IPF_TTLVAL(1800), /* 30 minutes */ 189 IPF_TTLVAL(43200), /* 12 hours */ 190 IPF_TTLVAL(345600), /* 4 days */ 191 }; 192 193 194 /* ------------------------------------------------------------------------ */ 195 /* Function: fr_natinit */ 196 /* Returns: int - 0 == success, -1 == failure */ 197 /* Parameters: Nil */ 198 /* */ 199 /* Initialise all of the NAT locks, tables and other structures. */ 200 /* ------------------------------------------------------------------------ */ 201 int fr_natinit(ifs) 202 ipf_stack_t *ifs; 203 { 204 int i; 205 206 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 207 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 208 if (ifs->ifs_nat_table[0] != NULL) 209 bzero((char *)ifs->ifs_nat_table[0], 210 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 211 else 212 return -1; 213 214 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 215 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 216 if (ifs->ifs_nat_table[1] != NULL) 217 bzero((char *)ifs->ifs_nat_table[1], 218 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 219 else 220 return -2; 221 222 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 223 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 224 if (ifs->ifs_nat_rules != NULL) 225 bzero((char *)ifs->ifs_nat_rules, 226 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 227 else 228 return -3; 229 230 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 231 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 232 if (ifs->ifs_rdr_rules != NULL) 233 bzero((char *)ifs->ifs_rdr_rules, 234 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 235 else 236 return -4; 237 238 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 239 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 240 if (ifs->ifs_maptable != NULL) 241 bzero((char *)ifs->ifs_maptable, 242 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 243 else 244 return -5; 245 246 ifs->ifs_ipf_hm_maplist = NULL; 247 248 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 249 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 250 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 251 return -1; 252 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 253 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 254 255 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 256 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 257 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 258 return -1; 259 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 260 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 261 262 if (ifs->ifs_fr_nat_maxbucket == 0) { 263 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 264 ifs->ifs_fr_nat_maxbucket++; 265 ifs->ifs_fr_nat_maxbucket *= 2; 266 } 267 268 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 269 /* 270 * Increase this because we may have "keep state" following this too 271 * and packet storms can occur if this is removed too quickly. 272 */ 273 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 274 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 275 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 276 ifs->ifs_nat_udptq.ifq_ref = 1; 277 ifs->ifs_nat_udptq.ifq_head = NULL; 278 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 279 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 280 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 281 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 282 ifs->ifs_nat_icmptq.ifq_ref = 1; 283 ifs->ifs_nat_icmptq.ifq_head = NULL; 284 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 285 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 286 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 287 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 288 ifs->ifs_nat_iptq.ifq_ref = 1; 289 ifs->ifs_nat_iptq.ifq_head = NULL; 290 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 291 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 292 ifs->ifs_nat_iptq.ifq_next = NULL; 293 294 for (i = 0; i < IPF_TCP_NSTATES; i++) { 295 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 296 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 297 #ifdef LARGE_NAT 298 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 299 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 300 #endif 301 } 302 303 /* 304 * Increase this because we may have "keep state" following 305 * this too and packet storms can occur if this is removed 306 * too quickly. 307 */ 308 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 309 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 310 311 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 312 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 313 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 314 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 315 316 ifs->ifs_fr_nat_init = 1; 317 318 return 0; 319 } 320 321 322 /* ------------------------------------------------------------------------ */ 323 /* Function: nat_addrdr */ 324 /* Returns: Nil */ 325 /* Parameters: n(I) - pointer to NAT rule to add */ 326 /* */ 327 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 328 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 329 /* use by redirect rules. */ 330 /* ------------------------------------------------------------------------ */ 331 static void nat_addrdr(n, ifs) 332 ipnat_t *n; 333 ipf_stack_t *ifs; 334 { 335 ipnat_t **np; 336 u_32_t j; 337 u_int hv; 338 int k; 339 340 k = count4bits(n->in_outmsk); 341 if ((k >= 0) && (k != 32)) 342 ifs->ifs_rdr_masks |= 1 << k; 343 j = (n->in_outip & n->in_outmsk); 344 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 345 np = ifs->ifs_rdr_rules + hv; 346 while (*np != NULL) 347 np = &(*np)->in_rnext; 348 n->in_rnext = NULL; 349 n->in_prnext = np; 350 n->in_hv = hv; 351 *np = n; 352 } 353 354 355 /* ------------------------------------------------------------------------ */ 356 /* Function: nat_addnat */ 357 /* Returns: Nil */ 358 /* Parameters: n(I) - pointer to NAT rule to add */ 359 /* */ 360 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 361 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 362 /* redirect rules. */ 363 /* ------------------------------------------------------------------------ */ 364 static void nat_addnat(n, ifs) 365 ipnat_t *n; 366 ipf_stack_t *ifs; 367 { 368 ipnat_t **np; 369 u_32_t j; 370 u_int hv; 371 int k; 372 373 k = count4bits(n->in_inmsk); 374 if ((k >= 0) && (k != 32)) 375 ifs->ifs_nat_masks |= 1 << k; 376 j = (n->in_inip & n->in_inmsk); 377 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 378 np = ifs->ifs_nat_rules + hv; 379 while (*np != NULL) 380 np = &(*np)->in_mnext; 381 n->in_mnext = NULL; 382 n->in_pmnext = np; 383 n->in_hv = hv; 384 *np = n; 385 } 386 387 388 /* ------------------------------------------------------------------------ */ 389 /* Function: nat_delrdr */ 390 /* Returns: Nil */ 391 /* Parameters: n(I) - pointer to NAT rule to delete */ 392 /* */ 393 /* Removes a redirect rule from the hash table of redirect rules. */ 394 /* ------------------------------------------------------------------------ */ 395 static void nat_delrdr(n) 396 ipnat_t *n; 397 { 398 if (n->in_rnext) 399 n->in_rnext->in_prnext = n->in_prnext; 400 *n->in_prnext = n->in_rnext; 401 } 402 403 404 /* ------------------------------------------------------------------------ */ 405 /* Function: nat_delnat */ 406 /* Returns: Nil */ 407 /* Parameters: n(I) - pointer to NAT rule to delete */ 408 /* */ 409 /* Removes a NAT map rule from the hash table of NAT map rules. */ 410 /* ------------------------------------------------------------------------ */ 411 static void nat_delnat(n) 412 ipnat_t *n; 413 { 414 if (n->in_mnext != NULL) 415 n->in_mnext->in_pmnext = n->in_pmnext; 416 *n->in_pmnext = n->in_mnext; 417 } 418 419 420 /* ------------------------------------------------------------------------ */ 421 /* Function: nat_hostmap */ 422 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 423 /* else a pointer to the hostmapping to use */ 424 /* Parameters: np(I) - pointer to NAT rule */ 425 /* real(I) - real IP address */ 426 /* map(I) - mapped IP address */ 427 /* port(I) - destination port number */ 428 /* Write Locks: ipf_nat */ 429 /* */ 430 /* Check if an ip address has already been allocated for a given mapping */ 431 /* that is not doing port based translation. If is not yet allocated, then */ 432 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 433 /* ------------------------------------------------------------------------ */ 434 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 435 ipnat_t *np; 436 struct in_addr src; 437 struct in_addr dst; 438 struct in_addr map; 439 u_32_t port; 440 ipf_stack_t *ifs; 441 { 442 hostmap_t *hm; 443 u_int hv; 444 445 hv = (src.s_addr ^ dst.s_addr); 446 hv += src.s_addr; 447 hv += dst.s_addr; 448 hv %= HOSTMAP_SIZE; 449 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 450 if ((hm->hm_srcip.s_addr == src.s_addr) && 451 (hm->hm_dstip.s_addr == dst.s_addr) && 452 ((np == NULL) || (np == hm->hm_ipnat)) && 453 ((port == 0) || (port == hm->hm_port))) { 454 hm->hm_ref++; 455 return hm; 456 } 457 458 if (np == NULL) 459 return NULL; 460 461 KMALLOC(hm, hostmap_t *); 462 if (hm) { 463 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 464 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 465 if (ifs->ifs_ipf_hm_maplist != NULL) 466 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 467 ifs->ifs_ipf_hm_maplist = hm; 468 469 hm->hm_next = ifs->ifs_maptable[hv]; 470 hm->hm_pnext = ifs->ifs_maptable + hv; 471 if (ifs->ifs_maptable[hv] != NULL) 472 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 473 ifs->ifs_maptable[hv] = hm; 474 hm->hm_ipnat = np; 475 hm->hm_srcip = src; 476 hm->hm_dstip = dst; 477 hm->hm_mapip = map; 478 hm->hm_ref = 1; 479 hm->hm_port = port; 480 } 481 return hm; 482 } 483 484 485 /* ------------------------------------------------------------------------ */ 486 /* Function: nat_hostmapdel */ 487 /* Returns: Nil */ 488 /* Parameters: hm(I) - pointer to hostmap structure */ 489 /* Write Locks: ipf_nat */ 490 /* */ 491 /* Decrement the references to this hostmap structure by one. If this */ 492 /* reaches zero then remove it and free it. */ 493 /* ------------------------------------------------------------------------ */ 494 static void nat_hostmapdel(hm) 495 struct hostmap *hm; 496 { 497 hm->hm_ref--; 498 if (hm->hm_ref == 0) { 499 if (hm->hm_next) 500 hm->hm_next->hm_pnext = hm->hm_pnext; 501 *hm->hm_pnext = hm->hm_next; 502 if (hm->hm_hnext) 503 hm->hm_hnext->hm_phnext = hm->hm_phnext; 504 *hm->hm_phnext = hm->hm_hnext; 505 KFREE(hm); 506 } 507 } 508 509 void fr_hostmapderef(hmp) 510 struct hostmap **hmp; 511 { 512 struct hostmap *hm; 513 514 hm = *hmp; 515 *hmp = NULL; 516 hm->hm_ref--; 517 if (hm->hm_ref == 0) 518 nat_hostmapdel(hm); 519 } 520 521 522 /* ------------------------------------------------------------------------ */ 523 /* Function: fix_outcksum */ 524 /* Returns: Nil */ 525 /* Parameters: sp(I) - location of 16bit checksum to update */ 526 /* n((I) - amount to adjust checksum by */ 527 /* */ 528 /* Adjusts the 16bit checksum by "n" for packets going out. */ 529 /* ------------------------------------------------------------------------ */ 530 void fix_outcksum(sp, n) 531 u_short *sp; 532 u_32_t n; 533 { 534 u_short sumshort; 535 u_32_t sum1; 536 537 if (n == 0) 538 return; 539 540 sum1 = (~ntohs(*sp)) & 0xffff; 541 sum1 += (n); 542 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 543 /* Again */ 544 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 545 sumshort = ~(u_short)sum1; 546 *(sp) = htons(sumshort); 547 } 548 549 550 /* ------------------------------------------------------------------------ */ 551 /* Function: fix_incksum */ 552 /* Returns: Nil */ 553 /* Parameters: sp(I) - location of 16bit checksum to update */ 554 /* n((I) - amount to adjust checksum by */ 555 /* */ 556 /* Adjusts the 16bit checksum by "n" for packets going in. */ 557 /* ------------------------------------------------------------------------ */ 558 void fix_incksum(sp, n) 559 u_short *sp; 560 u_32_t n; 561 { 562 u_short sumshort; 563 u_32_t sum1; 564 565 if (n == 0) 566 return; 567 568 sum1 = (~ntohs(*sp)) & 0xffff; 569 sum1 += ~(n) & 0xffff; 570 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 571 /* Again */ 572 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 573 sumshort = ~(u_short)sum1; 574 *(sp) = htons(sumshort); 575 } 576 577 578 /* ------------------------------------------------------------------------ */ 579 /* Function: fix_datacksum */ 580 /* Returns: Nil */ 581 /* Parameters: sp(I) - location of 16bit checksum to update */ 582 /* n((I) - amount to adjust checksum by */ 583 /* */ 584 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 585 /* data section of an IP packet. */ 586 /* */ 587 /* The only situation in which you need to do this is when NAT'ing an */ 588 /* ICMP error message. Such a message, contains in its body the IP header */ 589 /* of the original IP packet, that causes the error. */ 590 /* */ 591 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 592 /* kernel the data section of the ICMP error is just data, and no special */ 593 /* processing like hardware cksum or ntohs processing have been done by the */ 594 /* kernel on the data section. */ 595 /* ------------------------------------------------------------------------ */ 596 void fix_datacksum(sp, n) 597 u_short *sp; 598 u_32_t n; 599 { 600 u_short sumshort; 601 u_32_t sum1; 602 603 if (n == 0) 604 return; 605 606 sum1 = (~ntohs(*sp)) & 0xffff; 607 sum1 += (n); 608 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 609 /* Again */ 610 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 611 sumshort = ~(u_short)sum1; 612 *(sp) = htons(sumshort); 613 } 614 615 616 /* ------------------------------------------------------------------------ */ 617 /* Function: fr_nat_ioctl */ 618 /* Returns: int - 0 == success, != 0 == failure */ 619 /* Parameters: data(I) - pointer to ioctl data */ 620 /* cmd(I) - ioctl command integer */ 621 /* mode(I) - file mode bits used with open */ 622 /* */ 623 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 624 /* ------------------------------------------------------------------------ */ 625 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 626 ioctlcmd_t cmd; 627 caddr_t data; 628 int mode, uid; 629 void *ctx; 630 ipf_stack_t *ifs; 631 { 632 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 633 int error = 0, ret, arg, getlock; 634 ipnat_t natd; 635 636 #if (BSD >= 199306) && defined(_KERNEL) 637 if ((securelevel >= 2) && (mode & FWRITE)) 638 return EPERM; 639 #endif 640 641 #if defined(__osf__) && defined(_KERNEL) 642 getlock = 0; 643 #else 644 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 645 #endif 646 647 nat = NULL; /* XXX gcc -Wuninitialized */ 648 if (cmd == (ioctlcmd_t)SIOCADNAT) { 649 KMALLOC(nt, ipnat_t *); 650 } else { 651 nt = NULL; 652 } 653 654 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 655 if (mode & NAT_SYSSPACE) { 656 bcopy(data, (char *)&natd, sizeof(natd)); 657 error = 0; 658 } else { 659 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 660 } 661 662 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 663 BCOPYIN(data, &arg, sizeof(arg)); 664 } 665 666 if (error != 0) 667 goto done; 668 669 /* 670 * For add/delete, look to see if the NAT entry is already present 671 */ 672 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 673 nat = &natd; 674 if (nat->in_v == 0) /* For backward compat. */ 675 nat->in_v = 4; 676 nat->in_flags &= IPN_USERFLAGS; 677 if ((nat->in_redir & NAT_MAPBLK) == 0) { 678 if ((nat->in_flags & IPN_SPLIT) == 0) 679 nat->in_inip &= nat->in_inmsk; 680 if ((nat->in_flags & IPN_IPRANGE) == 0) 681 nat->in_outip &= nat->in_outmsk; 682 } 683 MUTEX_ENTER(&ifs->ifs_ipf_natio); 684 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 685 np = &n->in_next) 686 if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 687 IPN_CMPSIZ)) 688 break; 689 } 690 691 switch (cmd) 692 { 693 case SIOCGENITER : 694 { 695 ipfgeniter_t iter; 696 ipftoken_t *token; 697 698 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 699 if (error != 0) 700 break; 701 702 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 703 if (token != NULL) 704 error = nat_iterator(token, &iter, ifs); 705 else 706 error = ESRCH; 707 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 708 break; 709 } 710 #ifdef IPFILTER_LOG 711 case SIOCIPFFB : 712 { 713 int tmp; 714 715 if (!(mode & FWRITE)) 716 error = EPERM; 717 else { 718 tmp = ipflog_clear(IPL_LOGNAT, ifs); 719 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 720 } 721 break; 722 } 723 case SIOCSETLG : 724 if (!(mode & FWRITE)) 725 error = EPERM; 726 else { 727 BCOPYIN((char *)data, 728 (char *)&ifs->ifs_nat_logging, 729 sizeof(ifs->ifs_nat_logging)); 730 } 731 break; 732 case SIOCGETLG : 733 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 734 sizeof(ifs->ifs_nat_logging)); 735 break; 736 case FIONREAD : 737 arg = ifs->ifs_iplused[IPL_LOGNAT]; 738 BCOPYOUT(&arg, data, sizeof(arg)); 739 break; 740 #endif 741 case SIOCADNAT : 742 if (!(mode & FWRITE)) { 743 error = EPERM; 744 } else if (n != NULL) { 745 error = EEXIST; 746 } else if (nt == NULL) { 747 error = ENOMEM; 748 } 749 if (error != 0) { 750 MUTEX_EXIT(&ifs->ifs_ipf_natio); 751 break; 752 } 753 bcopy((char *)nat, (char *)nt, sizeof(*n)); 754 error = nat_siocaddnat(nt, np, getlock, ifs); 755 MUTEX_EXIT(&ifs->ifs_ipf_natio); 756 if (error == 0) 757 nt = NULL; 758 break; 759 case SIOCRMNAT : 760 if (!(mode & FWRITE)) { 761 error = EPERM; 762 n = NULL; 763 } else if (n == NULL) { 764 error = ESRCH; 765 } 766 767 if (error != 0) { 768 MUTEX_EXIT(&ifs->ifs_ipf_natio); 769 break; 770 } 771 nat_siocdelnat(n, np, getlock, ifs); 772 773 MUTEX_EXIT(&ifs->ifs_ipf_natio); 774 n = NULL; 775 break; 776 case SIOCGNATS : 777 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 778 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 779 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 780 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 781 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 782 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 783 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 784 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 785 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 786 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 787 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 788 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 789 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 790 break; 791 case SIOCGNATL : 792 { 793 natlookup_t nl; 794 795 if (getlock) { 796 READ_ENTER(&ifs->ifs_ipf_nat); 797 } 798 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 799 if (error == 0) { 800 if (nat_lookupredir(&nl, ifs) != NULL) { 801 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 802 } else { 803 error = ESRCH; 804 } 805 } 806 if (getlock) { 807 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 808 } 809 break; 810 } 811 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 812 if (!(mode & FWRITE)) { 813 error = EPERM; 814 break; 815 } 816 if (getlock) { 817 WRITE_ENTER(&ifs->ifs_ipf_nat); 818 } 819 error = 0; 820 if (arg == 0) 821 ret = nat_flushtable(ifs); 822 else if (arg == 1) 823 ret = nat_clearlist(ifs); 824 else if (arg >= 2 && arg <= 4) 825 ret = nat_extraflush(arg - 2, ifs); 826 else 827 error = EINVAL; 828 if (getlock) { 829 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 830 } 831 if (error == 0) { 832 BCOPYOUT(&ret, data, sizeof(ret)); 833 } 834 break; 835 case SIOCPROXY : 836 error = appr_ioctl(data, cmd, mode, ifs); 837 break; 838 case SIOCSTLCK : 839 if (!(mode & FWRITE)) { 840 error = EPERM; 841 } else { 842 fr_lock(data, &ifs->ifs_fr_nat_lock); 843 } 844 break; 845 case SIOCSTPUT : 846 if ((mode & FWRITE) != 0) { 847 error = fr_natputent(data, getlock, ifs); 848 } else { 849 error = EACCES; 850 } 851 break; 852 case SIOCSTGSZ : 853 if (ifs->ifs_fr_nat_lock) { 854 if (getlock) { 855 READ_ENTER(&ifs->ifs_ipf_nat); 856 } 857 error = fr_natgetsz(data, ifs); 858 if (getlock) { 859 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 860 } 861 } else 862 error = EACCES; 863 break; 864 case SIOCSTGET : 865 if (ifs->ifs_fr_nat_lock) { 866 if (getlock) { 867 READ_ENTER(&ifs->ifs_ipf_nat); 868 } 869 error = fr_natgetent(data, ifs); 870 if (getlock) { 871 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 872 } 873 } else 874 error = EACCES; 875 break; 876 case SIOCIPFDELTOK : 877 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 878 error = ipf_deltoken(arg, uid, ctx, ifs); 879 break; 880 default : 881 error = EINVAL; 882 break; 883 } 884 done: 885 if (nt) 886 KFREE(nt); 887 return error; 888 } 889 890 891 /* ------------------------------------------------------------------------ */ 892 /* Function: nat_siocaddnat */ 893 /* Returns: int - 0 == success, != 0 == failure */ 894 /* Parameters: n(I) - pointer to new NAT rule */ 895 /* np(I) - pointer to where to insert new NAT rule */ 896 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 897 /* Mutex Locks: ipf_natio */ 898 /* */ 899 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 900 /* from information passed to the kernel, then add it to the appropriate */ 901 /* NAT rule table(s). */ 902 /* ------------------------------------------------------------------------ */ 903 static int nat_siocaddnat(n, np, getlock, ifs) 904 ipnat_t *n, **np; 905 int getlock; 906 ipf_stack_t *ifs; 907 { 908 int error = 0, i, j; 909 910 if (nat_resolverule(n, ifs) != 0) 911 return ENOENT; 912 913 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 914 return EINVAL; 915 916 n->in_use = 0; 917 if (n->in_redir & NAT_MAPBLK) 918 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 919 else if (n->in_flags & IPN_AUTOPORTMAP) 920 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 921 else if (n->in_flags & IPN_IPRANGE) 922 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 923 else if (n->in_flags & IPN_SPLIT) 924 n->in_space = 2; 925 else if (n->in_outmsk != 0) 926 n->in_space = ~ntohl(n->in_outmsk); 927 else 928 n->in_space = 1; 929 930 /* 931 * Calculate the number of valid IP addresses in the output 932 * mapping range. In all cases, the range is inclusive of 933 * the start and ending IP addresses. 934 * If to a CIDR address, lose 2: broadcast + network address 935 * (so subtract 1) 936 * If to a range, add one. 937 * If to a single IP address, set to 1. 938 */ 939 if (n->in_space) { 940 if ((n->in_flags & IPN_IPRANGE) != 0) 941 n->in_space += 1; 942 else 943 n->in_space -= 1; 944 } else 945 n->in_space = 1; 946 947 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 948 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 949 n->in_nip = ntohl(n->in_outip) + 1; 950 else if ((n->in_flags & IPN_SPLIT) && 951 (n->in_redir & NAT_REDIRECT)) 952 n->in_nip = ntohl(n->in_inip); 953 else 954 n->in_nip = ntohl(n->in_outip); 955 if (n->in_redir & NAT_MAP) { 956 n->in_pnext = ntohs(n->in_pmin); 957 /* 958 * Multiply by the number of ports made available. 959 */ 960 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 961 n->in_space *= (ntohs(n->in_pmax) - 962 ntohs(n->in_pmin) + 1); 963 /* 964 * Because two different sources can map to 965 * different destinations but use the same 966 * local IP#/port #. 967 * If the result is smaller than in_space, then 968 * we may have wrapped around 32bits. 969 */ 970 i = n->in_inmsk; 971 if ((i != 0) && (i != 0xffffffff)) { 972 j = n->in_space * (~ntohl(i) + 1); 973 if (j >= n->in_space) 974 n->in_space = j; 975 else 976 n->in_space = 0xffffffff; 977 } 978 } 979 /* 980 * If no protocol is specified, multiple by 256 to allow for 981 * at least one IP:IP mapping per protocol. 982 */ 983 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 984 j = n->in_space * 256; 985 if (j >= n->in_space) 986 n->in_space = j; 987 else 988 n->in_space = 0xffffffff; 989 } 990 } 991 992 /* Otherwise, these fields are preset */ 993 994 if (getlock) { 995 WRITE_ENTER(&ifs->ifs_ipf_nat); 996 } 997 n->in_next = NULL; 998 *np = n; 999 1000 if (n->in_age[0] != 0) 1001 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1002 n->in_age[0], ifs); 1003 1004 if (n->in_age[1] != 0) 1005 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1006 n->in_age[1], ifs); 1007 1008 if (n->in_redir & NAT_REDIRECT) { 1009 n->in_flags &= ~IPN_NOTDST; 1010 nat_addrdr(n, ifs); 1011 } 1012 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1013 n->in_flags &= ~IPN_NOTSRC; 1014 nat_addnat(n, ifs); 1015 } 1016 n = NULL; 1017 ifs->ifs_nat_stats.ns_rules++; 1018 if (getlock) { 1019 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1020 } 1021 1022 return error; 1023 } 1024 1025 1026 /* ------------------------------------------------------------------------ */ 1027 /* Function: nat_resolvrule */ 1028 /* Returns: int - 0 == success, -1 == failure */ 1029 /* Parameters: n(I) - pointer to NAT rule */ 1030 /* */ 1031 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1032 /* any specified interfaces and proxy labels, and determines whether or not */ 1033 /* all proxy labels are correctly specified. */ 1034 /* */ 1035 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1036 /* ------------------------------------------------------------------------ */ 1037 static int nat_resolverule(n, ifs) 1038 ipnat_t *n; 1039 ipf_stack_t *ifs; 1040 { 1041 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1042 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs); 1043 1044 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1045 if (n->in_ifnames[1][0] == '\0') { 1046 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1047 n->in_ifps[1] = n->in_ifps[0]; 1048 } else { 1049 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs); 1050 } 1051 1052 if (n->in_plabel[0] != '\0') { 1053 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1054 if (n->in_apr == NULL) 1055 return -1; 1056 } 1057 return 0; 1058 } 1059 1060 1061 /* ------------------------------------------------------------------------ */ 1062 /* Function: nat_siocdelnat */ 1063 /* Returns: int - 0 == success, != 0 == failure */ 1064 /* Parameters: n(I) - pointer to new NAT rule */ 1065 /* np(I) - pointer to where to insert new NAT rule */ 1066 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1067 /* Mutex Locks: ipf_natio */ 1068 /* */ 1069 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1070 /* from information passed to the kernel, then add it to the appropriate */ 1071 /* NAT rule table(s). */ 1072 /* ------------------------------------------------------------------------ */ 1073 static void nat_siocdelnat(n, np, getlock, ifs) 1074 ipnat_t *n, **np; 1075 int getlock; 1076 ipf_stack_t *ifs; 1077 { 1078 if (getlock) { 1079 WRITE_ENTER(&ifs->ifs_ipf_nat); 1080 } 1081 if (n->in_redir & NAT_REDIRECT) 1082 nat_delrdr(n); 1083 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1084 nat_delnat(n); 1085 if (ifs->ifs_nat_list == NULL) { 1086 ifs->ifs_nat_masks = 0; 1087 ifs->ifs_rdr_masks = 0; 1088 } 1089 1090 if (n->in_tqehead[0] != NULL) { 1091 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1092 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1093 } 1094 } 1095 1096 if (n->in_tqehead[1] != NULL) { 1097 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1098 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1099 } 1100 } 1101 1102 *np = n->in_next; 1103 1104 if (n->in_use == 0) { 1105 if (n->in_apr) 1106 appr_free(n->in_apr); 1107 KFREE(n); 1108 ifs->ifs_nat_stats.ns_rules--; 1109 } else { 1110 n->in_flags |= IPN_DELETE; 1111 n->in_next = NULL; 1112 } 1113 if (getlock) { 1114 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1115 } 1116 } 1117 1118 1119 /* ------------------------------------------------------------------------ */ 1120 /* Function: fr_natgetsz */ 1121 /* Returns: int - 0 == success, != 0 is the error value. */ 1122 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1123 /* get the size of. */ 1124 /* */ 1125 /* Handle SIOCSTGSZ. */ 1126 /* Return the size of the nat list entry to be copied back to user space. */ 1127 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1128 /* structure is copied back to the user. */ 1129 /* ------------------------------------------------------------------------ */ 1130 static int fr_natgetsz(data, ifs) 1131 caddr_t data; 1132 ipf_stack_t *ifs; 1133 { 1134 ap_session_t *aps; 1135 nat_t *nat, *n; 1136 natget_t ng; 1137 1138 BCOPYIN(data, &ng, sizeof(ng)); 1139 1140 nat = ng.ng_ptr; 1141 if (!nat) { 1142 nat = ifs->ifs_nat_instances; 1143 ng.ng_sz = 0; 1144 /* 1145 * Empty list so the size returned is 0. Simple. 1146 */ 1147 if (nat == NULL) { 1148 BCOPYOUT(&ng, data, sizeof(ng)); 1149 return 0; 1150 } 1151 } else { 1152 /* 1153 * Make sure the pointer we're copying from exists in the 1154 * current list of entries. Security precaution to prevent 1155 * copying of random kernel data. 1156 */ 1157 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1158 if (n == nat) 1159 break; 1160 if (!n) 1161 return ESRCH; 1162 } 1163 1164 /* 1165 * Incluse any space required for proxy data structures. 1166 */ 1167 ng.ng_sz = sizeof(nat_save_t); 1168 aps = nat->nat_aps; 1169 if (aps != NULL) { 1170 ng.ng_sz += sizeof(ap_session_t) - 4; 1171 if (aps->aps_data != 0) 1172 ng.ng_sz += aps->aps_psiz; 1173 } 1174 1175 BCOPYOUT(&ng, data, sizeof(ng)); 1176 return 0; 1177 } 1178 1179 1180 /* ------------------------------------------------------------------------ */ 1181 /* Function: fr_natgetent */ 1182 /* Returns: int - 0 == success, != 0 is the error value. */ 1183 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1184 /* to NAT structure to copy out. */ 1185 /* */ 1186 /* Handle SIOCSTGET. */ 1187 /* Copies out NAT entry to user space. Any additional data held for a */ 1188 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1189 /* ------------------------------------------------------------------------ */ 1190 static int fr_natgetent(data, ifs) 1191 caddr_t data; 1192 ipf_stack_t *ifs; 1193 { 1194 int error, outsize; 1195 ap_session_t *aps; 1196 nat_save_t *ipn, ipns; 1197 nat_t *n, *nat; 1198 1199 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1200 if (error != 0) 1201 return error; 1202 1203 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1204 return EINVAL; 1205 1206 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1207 if (ipn == NULL) 1208 return ENOMEM; 1209 1210 ipn->ipn_dsize = ipns.ipn_dsize; 1211 nat = ipns.ipn_next; 1212 if (nat == NULL) { 1213 nat = ifs->ifs_nat_instances; 1214 if (nat == NULL) { 1215 if (ifs->ifs_nat_instances == NULL) 1216 error = ENOENT; 1217 goto finished; 1218 } 1219 } else { 1220 /* 1221 * Make sure the pointer we're copying from exists in the 1222 * current list of entries. Security precaution to prevent 1223 * copying of random kernel data. 1224 */ 1225 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1226 if (n == nat) 1227 break; 1228 if (n == NULL) { 1229 error = ESRCH; 1230 goto finished; 1231 } 1232 } 1233 ipn->ipn_next = nat->nat_next; 1234 1235 /* 1236 * Copy the NAT structure. 1237 */ 1238 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1239 1240 /* 1241 * If we have a pointer to the NAT rule it belongs to, save that too. 1242 */ 1243 if (nat->nat_ptr != NULL) 1244 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1245 sizeof(ipn->ipn_ipnat)); 1246 1247 /* 1248 * If we also know the NAT entry has an associated filter rule, 1249 * save that too. 1250 */ 1251 if (nat->nat_fr != NULL) 1252 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1253 sizeof(ipn->ipn_fr)); 1254 1255 /* 1256 * Last but not least, if there is an application proxy session set 1257 * up for this NAT entry, then copy that out too, including any 1258 * private data saved along side it by the proxy. 1259 */ 1260 aps = nat->nat_aps; 1261 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1262 if (aps != NULL) { 1263 char *s; 1264 1265 if (outsize < sizeof(*aps)) { 1266 error = ENOBUFS; 1267 goto finished; 1268 } 1269 1270 s = ipn->ipn_data; 1271 bcopy((char *)aps, s, sizeof(*aps)); 1272 s += sizeof(*aps); 1273 outsize -= sizeof(*aps); 1274 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1275 bcopy(aps->aps_data, s, aps->aps_psiz); 1276 else 1277 error = ENOBUFS; 1278 } 1279 if (error == 0) { 1280 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1281 } 1282 1283 finished: 1284 if (ipn != NULL) { 1285 KFREES(ipn, ipns.ipn_dsize); 1286 } 1287 return error; 1288 } 1289 1290 1291 /* ------------------------------------------------------------------------ */ 1292 /* Function: fr_natputent */ 1293 /* Returns: int - 0 == success, != 0 is the error value. */ 1294 /* Parameters: data(I) - pointer to natget structure with NAT */ 1295 /* structure information to load into the kernel */ 1296 /* getlock(I) - flag indicating whether or not a write lock */ 1297 /* on ipf_nat is already held. */ 1298 /* */ 1299 /* Handle SIOCSTPUT. */ 1300 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1301 /* firewall rule data structures, if pointers to them indicate so. */ 1302 /* ------------------------------------------------------------------------ */ 1303 static int fr_natputent(data, getlock, ifs) 1304 caddr_t data; 1305 int getlock; 1306 ipf_stack_t *ifs; 1307 { 1308 nat_save_t ipn, *ipnn; 1309 ap_session_t *aps; 1310 nat_t *n, *nat; 1311 frentry_t *fr; 1312 fr_info_t fin; 1313 ipnat_t *in; 1314 int error; 1315 1316 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1317 if (error != 0) 1318 return error; 1319 1320 /* 1321 * Trigger automatic call to nat_extraflush() if the 1322 * table has reached capcity specified by hi watermark. 1323 */ 1324 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1325 ifs->ifs_nat_doflush = 1; 1326 1327 /* 1328 * Initialise early because of code at junkput label. 1329 */ 1330 in = NULL; 1331 aps = NULL; 1332 nat = NULL; 1333 ipnn = NULL; 1334 1335 /* 1336 * New entry, copy in the rest of the NAT entry if it's size is more 1337 * than just the nat_t structure. 1338 */ 1339 fr = NULL; 1340 if (ipn.ipn_dsize > sizeof(ipn)) { 1341 if (ipn.ipn_dsize > 81920) { 1342 error = ENOMEM; 1343 goto junkput; 1344 } 1345 1346 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1347 if (ipnn == NULL) 1348 return ENOMEM; 1349 1350 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1351 if (error != 0) { 1352 error = EFAULT; 1353 goto junkput; 1354 } 1355 } else 1356 ipnn = &ipn; 1357 1358 KMALLOC(nat, nat_t *); 1359 if (nat == NULL) { 1360 error = ENOMEM; 1361 goto junkput; 1362 } 1363 1364 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1365 /* 1366 * Initialize all these so that nat_delete() doesn't cause a crash. 1367 */ 1368 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1369 nat->nat_tqe.tqe_pnext = NULL; 1370 nat->nat_tqe.tqe_next = NULL; 1371 nat->nat_tqe.tqe_ifq = NULL; 1372 nat->nat_tqe.tqe_parent = nat; 1373 1374 /* 1375 * Restore the rule associated with this nat session 1376 */ 1377 in = ipnn->ipn_nat.nat_ptr; 1378 if (in != NULL) { 1379 KMALLOC(in, ipnat_t *); 1380 nat->nat_ptr = in; 1381 if (in == NULL) { 1382 error = ENOMEM; 1383 goto junkput; 1384 } 1385 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1386 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1387 in->in_use = 1; 1388 in->in_flags |= IPN_DELETE; 1389 1390 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1391 1392 if (nat_resolverule(in, ifs) != 0) { 1393 error = ESRCH; 1394 goto junkput; 1395 } 1396 } 1397 1398 /* 1399 * Check that the NAT entry doesn't already exist in the kernel. 1400 */ 1401 bzero((char *)&fin, sizeof(fin)); 1402 fin.fin_p = nat->nat_p; 1403 fin.fin_ifs = ifs; 1404 if (nat->nat_dir == NAT_OUTBOUND) { 1405 fin.fin_data[0] = ntohs(nat->nat_oport); 1406 fin.fin_data[1] = ntohs(nat->nat_outport); 1407 fin.fin_ifp = nat->nat_ifps[0]; 1408 if (getlock) { 1409 READ_ENTER(&ifs->ifs_ipf_nat); 1410 } 1411 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1412 nat->nat_oip, nat->nat_outip); 1413 if (getlock) { 1414 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1415 } 1416 if (n != NULL) { 1417 error = EEXIST; 1418 goto junkput; 1419 } 1420 } else if (nat->nat_dir == NAT_INBOUND) { 1421 fin.fin_data[0] = ntohs(nat->nat_inport); 1422 fin.fin_data[1] = ntohs(nat->nat_oport); 1423 fin.fin_ifp = nat->nat_ifps[1]; 1424 if (getlock) { 1425 READ_ENTER(&ifs->ifs_ipf_nat); 1426 } 1427 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1428 nat->nat_inip, nat->nat_oip); 1429 if (getlock) { 1430 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1431 } 1432 if (n != NULL) { 1433 error = EEXIST; 1434 goto junkput; 1435 } 1436 } else { 1437 error = EINVAL; 1438 goto junkput; 1439 } 1440 1441 /* 1442 * Restore ap_session_t structure. Include the private data allocated 1443 * if it was there. 1444 */ 1445 aps = nat->nat_aps; 1446 if (aps != NULL) { 1447 KMALLOC(aps, ap_session_t *); 1448 nat->nat_aps = aps; 1449 if (aps == NULL) { 1450 error = ENOMEM; 1451 goto junkput; 1452 } 1453 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1454 if (in != NULL) 1455 aps->aps_apr = in->in_apr; 1456 else 1457 aps->aps_apr = NULL; 1458 if (aps->aps_psiz != 0) { 1459 if (aps->aps_psiz > 81920) { 1460 error = ENOMEM; 1461 goto junkput; 1462 } 1463 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1464 if (aps->aps_data == NULL) { 1465 error = ENOMEM; 1466 goto junkput; 1467 } 1468 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1469 aps->aps_psiz); 1470 } else { 1471 aps->aps_psiz = 0; 1472 aps->aps_data = NULL; 1473 } 1474 } 1475 1476 /* 1477 * If there was a filtering rule associated with this entry then 1478 * build up a new one. 1479 */ 1480 fr = nat->nat_fr; 1481 if (fr != NULL) { 1482 if ((nat->nat_flags & SI_NEWFR) != 0) { 1483 KMALLOC(fr, frentry_t *); 1484 nat->nat_fr = fr; 1485 if (fr == NULL) { 1486 error = ENOMEM; 1487 goto junkput; 1488 } 1489 ipnn->ipn_nat.nat_fr = fr; 1490 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1491 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1492 1493 fr->fr_ref = 1; 1494 fr->fr_dsize = 0; 1495 fr->fr_data = NULL; 1496 fr->fr_type = FR_T_NONE; 1497 1498 MUTEX_NUKE(&fr->fr_lock); 1499 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1500 } else { 1501 if (getlock) { 1502 READ_ENTER(&ifs->ifs_ipf_nat); 1503 } 1504 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1505 if (n->nat_fr == fr) 1506 break; 1507 1508 if (n != NULL) { 1509 MUTEX_ENTER(&fr->fr_lock); 1510 fr->fr_ref++; 1511 MUTEX_EXIT(&fr->fr_lock); 1512 } 1513 if (getlock) { 1514 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1515 } 1516 if (!n) { 1517 error = ESRCH; 1518 goto junkput; 1519 } 1520 } 1521 } 1522 1523 if (ipnn != &ipn) { 1524 KFREES(ipnn, ipn.ipn_dsize); 1525 ipnn = NULL; 1526 } 1527 1528 if (getlock) { 1529 WRITE_ENTER(&ifs->ifs_ipf_nat); 1530 } 1531 error = nat_insert(nat, nat->nat_rev, ifs); 1532 if ((error == 0) && (aps != NULL)) { 1533 aps->aps_next = ifs->ifs_ap_sess_list; 1534 ifs->ifs_ap_sess_list = aps; 1535 } 1536 if (getlock) { 1537 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1538 } 1539 1540 if (error == 0) 1541 return 0; 1542 1543 error = ENOMEM; 1544 1545 junkput: 1546 if (fr != NULL) 1547 (void) fr_derefrule(&fr, ifs); 1548 1549 if ((ipnn != NULL) && (ipnn != &ipn)) { 1550 KFREES(ipnn, ipn.ipn_dsize); 1551 } 1552 if (nat != NULL) { 1553 if (aps != NULL) { 1554 if (aps->aps_data != NULL) { 1555 KFREES(aps->aps_data, aps->aps_psiz); 1556 } 1557 KFREE(aps); 1558 } 1559 if (in != NULL) { 1560 if (in->in_apr) 1561 appr_free(in->in_apr); 1562 KFREE(in); 1563 } 1564 KFREE(nat); 1565 } 1566 return error; 1567 } 1568 1569 1570 /* ------------------------------------------------------------------------ */ 1571 /* Function: nat_delete */ 1572 /* Returns: Nil */ 1573 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1574 /* logtype(I) - type of LOG record to create before deleting */ 1575 /* Write Lock: ipf_nat */ 1576 /* */ 1577 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1578 /* enabled then generate a NAT log record for this event. */ 1579 /* ------------------------------------------------------------------------ */ 1580 static void nat_delete(nat, logtype, ifs) 1581 struct nat *nat; 1582 int logtype; 1583 ipf_stack_t *ifs; 1584 { 1585 struct ipnat *ipn; 1586 1587 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1588 nat_log(nat, logtype, ifs); 1589 1590 /* 1591 * Take it as a general indication that all the pointers are set if 1592 * nat_pnext is set. 1593 */ 1594 if (nat->nat_pnext != NULL) { 1595 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1596 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1597 1598 *nat->nat_pnext = nat->nat_next; 1599 if (nat->nat_next != NULL) { 1600 nat->nat_next->nat_pnext = nat->nat_pnext; 1601 nat->nat_next = NULL; 1602 } 1603 nat->nat_pnext = NULL; 1604 1605 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1606 if (nat->nat_hnext[0] != NULL) { 1607 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1608 nat->nat_hnext[0] = NULL; 1609 } 1610 nat->nat_phnext[0] = NULL; 1611 1612 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1613 if (nat->nat_hnext[1] != NULL) { 1614 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1615 nat->nat_hnext[1] = NULL; 1616 } 1617 nat->nat_phnext[1] = NULL; 1618 1619 if ((nat->nat_flags & SI_WILDP) != 0) 1620 ifs->ifs_nat_stats.ns_wilds--; 1621 } 1622 1623 if (nat->nat_me != NULL) { 1624 *nat->nat_me = NULL; 1625 nat->nat_me = NULL; 1626 } 1627 1628 fr_deletequeueentry(&nat->nat_tqe); 1629 1630 MUTEX_ENTER(&nat->nat_lock); 1631 if (nat->nat_ref > 1) { 1632 nat->nat_ref--; 1633 MUTEX_EXIT(&nat->nat_lock); 1634 return; 1635 } 1636 MUTEX_EXIT(&nat->nat_lock); 1637 1638 /* 1639 * At this point, nat_ref is 1, doing "--" would make it 0.. 1640 */ 1641 nat->nat_ref = 0; 1642 1643 #ifdef IPFILTER_SYNC 1644 if (nat->nat_sync) 1645 ipfsync_del(nat->nat_sync); 1646 #endif 1647 1648 if (nat->nat_fr != NULL) 1649 (void)fr_derefrule(&nat->nat_fr, ifs); 1650 1651 if (nat->nat_hm != NULL) 1652 nat_hostmapdel(nat->nat_hm); 1653 1654 /* 1655 * If there is an active reference from the nat entry to its parent 1656 * rule, decrement the rule's reference count and free it too if no 1657 * longer being used. 1658 */ 1659 ipn = nat->nat_ptr; 1660 if (ipn != NULL) { 1661 ipn->in_space++; 1662 ipn->in_use--; 1663 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1664 if (ipn->in_apr) 1665 appr_free(ipn->in_apr); 1666 KFREE(ipn); 1667 ifs->ifs_nat_stats.ns_rules--; 1668 } 1669 } 1670 1671 MUTEX_DESTROY(&nat->nat_lock); 1672 1673 aps_free(nat->nat_aps, ifs); 1674 ifs->ifs_nat_stats.ns_inuse--; 1675 1676 /* 1677 * If there's a fragment table entry too for this nat entry, then 1678 * dereference that as well. This is after nat_lock is released 1679 * because of Tru64. 1680 */ 1681 fr_forgetnat((void *)nat, ifs); 1682 1683 KFREE(nat); 1684 } 1685 1686 1687 /* ------------------------------------------------------------------------ */ 1688 /* Function: nat_flushtable */ 1689 /* Returns: int - number of NAT rules deleted */ 1690 /* Parameters: Nil */ 1691 /* */ 1692 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1693 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1694 /* ------------------------------------------------------------------------ */ 1695 /* 1696 * nat_flushtable - clear the NAT table of all mapping entries. 1697 */ 1698 static int nat_flushtable(ifs) 1699 ipf_stack_t *ifs; 1700 { 1701 nat_t *nat; 1702 int j = 0; 1703 1704 /* 1705 * ALL NAT mappings deleted, so lets just make the deletions 1706 * quicker. 1707 */ 1708 if (ifs->ifs_nat_table[0] != NULL) 1709 bzero((char *)ifs->ifs_nat_table[0], 1710 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 1711 if (ifs->ifs_nat_table[1] != NULL) 1712 bzero((char *)ifs->ifs_nat_table[1], 1713 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 1714 1715 while ((nat = ifs->ifs_nat_instances) != NULL) { 1716 nat_delete(nat, NL_FLUSH, ifs); 1717 j++; 1718 } 1719 1720 ifs->ifs_nat_stats.ns_inuse = 0; 1721 return j; 1722 } 1723 1724 1725 /* ------------------------------------------------------------------------ */ 1726 /* Function: nat_clearlist */ 1727 /* Returns: int - number of NAT/RDR rules deleted */ 1728 /* Parameters: Nil */ 1729 /* */ 1730 /* Delete all rules in the current list of rules. There is nothing elegant */ 1731 /* about this cleanup: simply free all entries on the list of rules and */ 1732 /* clear out the tables used for hashed NAT rule lookups. */ 1733 /* ------------------------------------------------------------------------ */ 1734 static int nat_clearlist(ifs) 1735 ipf_stack_t *ifs; 1736 { 1737 ipnat_t *n, **np = &ifs->ifs_nat_list; 1738 int i = 0; 1739 1740 if (ifs->ifs_nat_rules != NULL) 1741 bzero((char *)ifs->ifs_nat_rules, 1742 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 1743 if (ifs->ifs_rdr_rules != NULL) 1744 bzero((char *)ifs->ifs_rdr_rules, 1745 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 1746 1747 while ((n = *np) != NULL) { 1748 *np = n->in_next; 1749 if (n->in_use == 0) { 1750 if (n->in_apr != NULL) 1751 appr_free(n->in_apr); 1752 KFREE(n); 1753 ifs->ifs_nat_stats.ns_rules--; 1754 } else { 1755 n->in_flags |= IPN_DELETE; 1756 n->in_next = NULL; 1757 } 1758 i++; 1759 } 1760 ifs->ifs_nat_masks = 0; 1761 ifs->ifs_rdr_masks = 0; 1762 return i; 1763 } 1764 1765 1766 /* ------------------------------------------------------------------------ */ 1767 /* Function: nat_newmap */ 1768 /* Returns: int - -1 == error, 0 == success */ 1769 /* Parameters: fin(I) - pointer to packet information */ 1770 /* nat(I) - pointer to NAT entry */ 1771 /* ni(I) - pointer to structure with misc. information needed */ 1772 /* to create new NAT entry. */ 1773 /* */ 1774 /* Given an empty NAT structure, populate it with new information about a */ 1775 /* new NAT session, as defined by the matching NAT rule. */ 1776 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 1777 /* to the new IP address for the translation. */ 1778 /* ------------------------------------------------------------------------ */ 1779 static INLINE int nat_newmap(fin, nat, ni) 1780 fr_info_t *fin; 1781 nat_t *nat; 1782 natinfo_t *ni; 1783 { 1784 u_short st_port, dport, sport, port, sp, dp; 1785 struct in_addr in, inb; 1786 hostmap_t *hm; 1787 u_32_t flags; 1788 u_32_t st_ip; 1789 ipnat_t *np; 1790 nat_t *natl; 1791 int l; 1792 ipf_stack_t *ifs = fin->fin_ifs; 1793 1794 /* 1795 * If it's an outbound packet which doesn't match any existing 1796 * record, then create a new port 1797 */ 1798 l = 0; 1799 hm = NULL; 1800 np = ni->nai_np; 1801 st_ip = np->in_nip; 1802 st_port = np->in_pnext; 1803 flags = ni->nai_flags; 1804 sport = ni->nai_sport; 1805 dport = ni->nai_dport; 1806 1807 /* 1808 * Do a loop until we either run out of entries to try or we find 1809 * a NAT mapping that isn't currently being used. This is done 1810 * because the change to the source is not (usually) being fixed. 1811 */ 1812 do { 1813 port = 0; 1814 in.s_addr = htonl(np->in_nip); 1815 if (l == 0) { 1816 /* 1817 * Check to see if there is an existing NAT 1818 * setup for this IP address pair. 1819 */ 1820 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 1821 in, 0, ifs); 1822 if (hm != NULL) 1823 in.s_addr = hm->hm_mapip.s_addr; 1824 } else if ((l == 1) && (hm != NULL)) { 1825 nat_hostmapdel(hm); 1826 hm = NULL; 1827 } 1828 in.s_addr = ntohl(in.s_addr); 1829 1830 nat->nat_hm = hm; 1831 1832 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 1833 if (l > 0) 1834 return -1; 1835 } 1836 1837 if (np->in_redir == NAT_BIMAP && 1838 np->in_inmsk == np->in_outmsk) { 1839 /* 1840 * map the address block in a 1:1 fashion 1841 */ 1842 in.s_addr = np->in_outip; 1843 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 1844 in.s_addr = ntohl(in.s_addr); 1845 1846 } else if (np->in_redir & NAT_MAPBLK) { 1847 if ((l >= np->in_ppip) || ((l > 0) && 1848 !(flags & IPN_TCPUDP))) 1849 return -1; 1850 /* 1851 * map-block - Calculate destination address. 1852 */ 1853 in.s_addr = ntohl(fin->fin_saddr); 1854 in.s_addr &= ntohl(~np->in_inmsk); 1855 inb.s_addr = in.s_addr; 1856 in.s_addr /= np->in_ippip; 1857 in.s_addr &= ntohl(~np->in_outmsk); 1858 in.s_addr += ntohl(np->in_outip); 1859 /* 1860 * Calculate destination port. 1861 */ 1862 if ((flags & IPN_TCPUDP) && 1863 (np->in_ppip != 0)) { 1864 port = ntohs(sport) + l; 1865 port %= np->in_ppip; 1866 port += np->in_ppip * 1867 (inb.s_addr % np->in_ippip); 1868 port += MAPBLK_MINPORT; 1869 port = htons(port); 1870 } 1871 1872 } else if ((np->in_outip == 0) && 1873 (np->in_outmsk == 0xffffffff)) { 1874 /* 1875 * 0/32 - use the interface's IP address. 1876 */ 1877 if ((l > 0) || 1878 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 1879 &in, NULL, fin->fin_ifs) == -1) 1880 return -1; 1881 in.s_addr = ntohl(in.s_addr); 1882 1883 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 1884 /* 1885 * 0/0 - use the original source address/port. 1886 */ 1887 if (l > 0) 1888 return -1; 1889 in.s_addr = ntohl(fin->fin_saddr); 1890 1891 } else if ((np->in_outmsk != 0xffffffff) && 1892 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 1893 np->in_nip++; 1894 1895 natl = NULL; 1896 1897 if ((flags & IPN_TCPUDP) && 1898 ((np->in_redir & NAT_MAPBLK) == 0) && 1899 (np->in_flags & IPN_AUTOPORTMAP)) { 1900 /* 1901 * "ports auto" (without map-block) 1902 */ 1903 if ((l > 0) && (l % np->in_ppip == 0)) { 1904 if (l > np->in_space) { 1905 return -1; 1906 } else if ((l > np->in_ppip) && 1907 np->in_outmsk != 0xffffffff) 1908 np->in_nip++; 1909 } 1910 if (np->in_ppip != 0) { 1911 port = ntohs(sport); 1912 port += (l % np->in_ppip); 1913 port %= np->in_ppip; 1914 port += np->in_ppip * 1915 (ntohl(fin->fin_saddr) % 1916 np->in_ippip); 1917 port += MAPBLK_MINPORT; 1918 port = htons(port); 1919 } 1920 1921 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 1922 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 1923 /* 1924 * Standard port translation. Select next port. 1925 */ 1926 port = htons(np->in_pnext++); 1927 1928 if (np->in_pnext > ntohs(np->in_pmax)) { 1929 np->in_pnext = ntohs(np->in_pmin); 1930 if (np->in_outmsk != 0xffffffff) 1931 np->in_nip++; 1932 } 1933 } 1934 1935 if (np->in_flags & IPN_IPRANGE) { 1936 if (np->in_nip > ntohl(np->in_outmsk)) 1937 np->in_nip = ntohl(np->in_outip); 1938 } else { 1939 if ((np->in_outmsk != 0xffffffff) && 1940 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 1941 ntohl(np->in_outip)) 1942 np->in_nip = ntohl(np->in_outip) + 1; 1943 } 1944 1945 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 1946 port = sport; 1947 1948 /* 1949 * Here we do a lookup of the connection as seen from 1950 * the outside. If an IP# pair already exists, try 1951 * again. So if you have A->B becomes C->B, you can 1952 * also have D->E become C->E but not D->B causing 1953 * another C->B. Also take protocol and ports into 1954 * account when determining whether a pre-existing 1955 * NAT setup will cause an external conflict where 1956 * this is appropriate. 1957 */ 1958 inb.s_addr = htonl(in.s_addr); 1959 sp = fin->fin_data[0]; 1960 dp = fin->fin_data[1]; 1961 fin->fin_data[0] = fin->fin_data[1]; 1962 fin->fin_data[1] = htons(port); 1963 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 1964 (u_int)fin->fin_p, fin->fin_dst, inb); 1965 fin->fin_data[0] = sp; 1966 fin->fin_data[1] = dp; 1967 1968 /* 1969 * Has the search wrapped around and come back to the 1970 * start ? 1971 */ 1972 if ((natl != NULL) && 1973 (np->in_pnext != 0) && (st_port == np->in_pnext) && 1974 (np->in_nip != 0) && (st_ip == np->in_nip)) 1975 return -1; 1976 l++; 1977 } while (natl != NULL); 1978 1979 if (np->in_space > 0) 1980 np->in_space--; 1981 1982 /* Setup the NAT table */ 1983 nat->nat_inip = fin->fin_src; 1984 nat->nat_outip.s_addr = htonl(in.s_addr); 1985 nat->nat_oip = fin->fin_dst; 1986 if (nat->nat_hm == NULL) 1987 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 1988 nat->nat_outip, 0, ifs); 1989 1990 /* 1991 * The ICMP checksum does not have a pseudo header containing 1992 * the IP addresses 1993 */ 1994 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); 1995 ni->nai_sum2 = LONG_SUM(in.s_addr); 1996 if ((flags & IPN_TCPUDP)) { 1997 ni->nai_sum1 += ntohs(sport); 1998 ni->nai_sum2 += ntohs(port); 1999 } 2000 2001 if (flags & IPN_TCPUDP) { 2002 nat->nat_inport = sport; 2003 nat->nat_outport = port; /* sport */ 2004 nat->nat_oport = dport; 2005 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2006 } else if (flags & IPN_ICMPQUERY) { 2007 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2008 nat->nat_inport = port; 2009 nat->nat_outport = port; 2010 } 2011 2012 ni->nai_ip.s_addr = in.s_addr; 2013 ni->nai_port = port; 2014 ni->nai_nport = dport; 2015 return 0; 2016 } 2017 2018 2019 /* ------------------------------------------------------------------------ */ 2020 /* Function: nat_newrdr */ 2021 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2022 /* allow rule to be moved if IPN_ROUNDR is set. */ 2023 /* Parameters: fin(I) - pointer to packet information */ 2024 /* nat(I) - pointer to NAT entry */ 2025 /* ni(I) - pointer to structure with misc. information needed */ 2026 /* to create new NAT entry. */ 2027 /* */ 2028 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2029 /* to the new IP address for the translation. */ 2030 /* ------------------------------------------------------------------------ */ 2031 static INLINE int nat_newrdr(fin, nat, ni) 2032 fr_info_t *fin; 2033 nat_t *nat; 2034 natinfo_t *ni; 2035 { 2036 u_short nport, dport, sport; 2037 struct in_addr in; 2038 hostmap_t *hm; 2039 u_32_t flags; 2040 ipnat_t *np; 2041 int move; 2042 ipf_stack_t *ifs = fin->fin_ifs; 2043 2044 move = 1; 2045 hm = NULL; 2046 in.s_addr = 0; 2047 np = ni->nai_np; 2048 flags = ni->nai_flags; 2049 sport = ni->nai_sport; 2050 dport = ni->nai_dport; 2051 2052 /* 2053 * If the matching rule has IPN_STICKY set, then we want to have the 2054 * same rule kick in as before. Why would this happen? If you have 2055 * a collection of rdr rules with "round-robin sticky", the current 2056 * packet might match a different one to the previous connection but 2057 * we want the same destination to be used. 2058 */ 2059 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2060 (IPN_ROUNDR|IPN_STICKY)) { 2061 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2062 (u_32_t)dport, ifs); 2063 if (hm != NULL) { 2064 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2065 np = hm->hm_ipnat; 2066 ni->nai_np = np; 2067 move = 0; 2068 } 2069 } 2070 2071 /* 2072 * Otherwise, it's an inbound packet. Most likely, we don't 2073 * want to rewrite source ports and source addresses. Instead, 2074 * we want to rewrite to a fixed internal address and fixed 2075 * internal port. 2076 */ 2077 if (np->in_flags & IPN_SPLIT) { 2078 in.s_addr = np->in_nip; 2079 2080 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2081 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2082 in, (u_32_t)dport, ifs); 2083 if (hm != NULL) { 2084 in.s_addr = hm->hm_mapip.s_addr; 2085 move = 0; 2086 } 2087 } 2088 2089 if (hm == NULL || hm->hm_ref == 1) { 2090 if (np->in_inip == htonl(in.s_addr)) { 2091 np->in_nip = ntohl(np->in_inmsk); 2092 move = 0; 2093 } else { 2094 np->in_nip = ntohl(np->in_inip); 2095 } 2096 } 2097 2098 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2099 /* 2100 * 0/32 - use the interface's IP address. 2101 */ 2102 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2103 fin->fin_ifs) == -1) 2104 return -1; 2105 in.s_addr = ntohl(in.s_addr); 2106 2107 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2108 /* 2109 * 0/0 - use the original destination address/port. 2110 */ 2111 in.s_addr = ntohl(fin->fin_daddr); 2112 2113 } else if (np->in_redir == NAT_BIMAP && 2114 np->in_inmsk == np->in_outmsk) { 2115 /* 2116 * map the address block in a 1:1 fashion 2117 */ 2118 in.s_addr = np->in_inip; 2119 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2120 in.s_addr = ntohl(in.s_addr); 2121 } else { 2122 in.s_addr = ntohl(np->in_inip); 2123 } 2124 2125 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2126 nport = dport; 2127 else { 2128 /* 2129 * Whilst not optimized for the case where 2130 * pmin == pmax, the gain is not significant. 2131 */ 2132 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2133 (np->in_pmin != np->in_pmax)) { 2134 nport = ntohs(dport) - ntohs(np->in_pmin) + 2135 ntohs(np->in_pnext); 2136 nport = htons(nport); 2137 } else 2138 nport = np->in_pnext; 2139 } 2140 2141 /* 2142 * When the redirect-to address is set to 0.0.0.0, just 2143 * assume a blank `forwarding' of the packet. We don't 2144 * setup any translation for this either. 2145 */ 2146 if (in.s_addr == 0) { 2147 if (nport == dport) 2148 return -1; 2149 in.s_addr = ntohl(fin->fin_daddr); 2150 } 2151 2152 nat->nat_inip.s_addr = htonl(in.s_addr); 2153 nat->nat_outip = fin->fin_dst; 2154 nat->nat_oip = fin->fin_src; 2155 2156 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport); 2157 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport); 2158 2159 ni->nai_ip.s_addr = in.s_addr; 2160 ni->nai_nport = nport; 2161 ni->nai_port = sport; 2162 2163 if (flags & IPN_TCPUDP) { 2164 nat->nat_inport = nport; 2165 nat->nat_outport = dport; 2166 nat->nat_oport = sport; 2167 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2168 } else if (flags & IPN_ICMPQUERY) { 2169 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2170 nat->nat_inport = nport; 2171 nat->nat_outport = nport; 2172 } 2173 2174 return move; 2175 } 2176 2177 /* ------------------------------------------------------------------------ */ 2178 /* Function: nat_new */ 2179 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2180 /* else pointer to new NAT structure */ 2181 /* Parameters: fin(I) - pointer to packet information */ 2182 /* np(I) - pointer to NAT rule */ 2183 /* natsave(I) - pointer to where to store NAT struct pointer */ 2184 /* flags(I) - flags describing the current packet */ 2185 /* direction(I) - direction of packet (in/out) */ 2186 /* Write Lock: ipf_nat */ 2187 /* */ 2188 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2189 /* in any way. */ 2190 /* */ 2191 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2192 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2193 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2194 /* and (3) building that structure and putting it into the NAT table(s). */ 2195 /* ------------------------------------------------------------------------ */ 2196 nat_t *nat_new(fin, np, natsave, flags, direction) 2197 fr_info_t *fin; 2198 ipnat_t *np; 2199 nat_t **natsave; 2200 u_int flags; 2201 int direction; 2202 { 2203 u_short port = 0, sport = 0, dport = 0, nport = 0; 2204 tcphdr_t *tcp = NULL; 2205 hostmap_t *hm = NULL; 2206 struct in_addr in; 2207 nat_t *nat, *natl; 2208 u_int nflags; 2209 natinfo_t ni; 2210 u_32_t sumd; 2211 int move; 2212 ipf_stack_t *ifs = fin->fin_ifs; 2213 2214 /* 2215 * Trigger automatic call to nat_extraflush() if the 2216 * table has reached capcity specified by hi watermark. 2217 */ 2218 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2219 ifs->ifs_nat_doflush = 1; 2220 2221 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2222 ifs->ifs_nat_stats.ns_memfail++; 2223 return NULL; 2224 } 2225 2226 move = 1; 2227 nflags = np->in_flags & flags; 2228 nflags &= NAT_FROMRULE; 2229 2230 ni.nai_np = np; 2231 ni.nai_nflags = nflags; 2232 ni.nai_flags = flags; 2233 2234 /* Give me a new nat */ 2235 KMALLOC(nat, nat_t *); 2236 if (nat == NULL) { 2237 ifs->ifs_nat_stats.ns_memfail++; 2238 /* 2239 * Try to automatically tune the max # of entries in the 2240 * table allowed to be less than what will cause kmem_alloc() 2241 * to fail and try to eliminate panics due to out of memory 2242 * conditions arising. 2243 */ 2244 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2245 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2246 printf("ipf_nattable_max reduced to %d\n", 2247 ifs->ifs_ipf_nattable_max); 2248 } 2249 return NULL; 2250 } 2251 2252 if (flags & IPN_TCPUDP) { 2253 tcp = fin->fin_dp; 2254 ni.nai_sport = htons(fin->fin_sport); 2255 ni.nai_dport = htons(fin->fin_dport); 2256 } else if (flags & IPN_ICMPQUERY) { 2257 /* 2258 * In the ICMP query NAT code, we translate the ICMP id fields 2259 * to make them unique. This is indepedent of the ICMP type 2260 * (e.g. in the unlikely event that a host sends an echo and 2261 * an tstamp request with the same id, both packets will have 2262 * their ip address/id field changed in the same way). 2263 */ 2264 /* The icmp_id field is used by the sender to identify the 2265 * process making the icmp request. (the receiver justs 2266 * copies it back in its response). So, it closely matches 2267 * the concept of source port. We overlay sport, so we can 2268 * maximally reuse the existing code. 2269 */ 2270 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2271 ni.nai_dport = ni.nai_sport; 2272 } 2273 2274 bzero((char *)nat, sizeof(*nat)); 2275 nat->nat_flags = flags; 2276 nat->nat_redir = np->in_redir; 2277 2278 if ((flags & NAT_SLAVE) == 0) { 2279 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2280 } 2281 2282 /* 2283 * Search the current table for a match. 2284 */ 2285 if (direction == NAT_OUTBOUND) { 2286 /* 2287 * We can now arrange to call this for the same connection 2288 * because ipf_nat_new doesn't protect the code path into 2289 * this function. 2290 */ 2291 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2292 fin->fin_src, fin->fin_dst); 2293 if (natl != NULL) { 2294 KFREE(nat); 2295 nat = natl; 2296 goto done; 2297 } 2298 2299 move = nat_newmap(fin, nat, &ni); 2300 if (move == -1) 2301 goto badnat; 2302 2303 np = ni.nai_np; 2304 in = ni.nai_ip; 2305 } else { 2306 /* 2307 * NAT_INBOUND is used only for redirects rules 2308 */ 2309 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2310 fin->fin_src, fin->fin_dst); 2311 if (natl != NULL) { 2312 KFREE(nat); 2313 nat = natl; 2314 goto done; 2315 } 2316 2317 move = nat_newrdr(fin, nat, &ni); 2318 if (move == -1) 2319 goto badnat; 2320 2321 np = ni.nai_np; 2322 in = ni.nai_ip; 2323 } 2324 port = ni.nai_port; 2325 nport = ni.nai_nport; 2326 2327 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2328 if (np->in_redir == NAT_REDIRECT) { 2329 nat_delrdr(np); 2330 nat_addrdr(np, ifs); 2331 } else if (np->in_redir == NAT_MAP) { 2332 nat_delnat(np); 2333 nat_addnat(np, ifs); 2334 } 2335 } 2336 2337 if (flags & IPN_TCPUDP) { 2338 sport = ni.nai_sport; 2339 dport = ni.nai_dport; 2340 } else if (flags & IPN_ICMPQUERY) { 2341 sport = ni.nai_sport; 2342 dport = 0; 2343 } 2344 2345 /* 2346 * nat_sumd[0] stores adjustment value including both IP address and 2347 * port number changes. nat_sumd[1] stores adjustment value only for 2348 * IP address changes, to be used for pseudo header adjustment, in 2349 * case hardware partial checksum offload is offered. 2350 */ 2351 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); 2352 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 2353 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) 2354 if (flags & IPN_TCPUDP) { 2355 ni.nai_sum1 = LONG_SUM(in.s_addr); 2356 if (direction == NAT_OUTBOUND) 2357 ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_saddr)); 2358 else 2359 ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_daddr)); 2360 2361 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); 2362 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 2363 } else 2364 #endif 2365 nat->nat_sumd[1] = nat->nat_sumd[0]; 2366 2367 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) { 2368 if (direction == NAT_OUTBOUND) 2369 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); 2370 else 2371 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)); 2372 2373 ni.nai_sum2 = LONG_SUM(in.s_addr); 2374 2375 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); 2376 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 2377 } else { 2378 nat->nat_ipsumd = nat->nat_sumd[0]; 2379 if (!(flags & IPN_TCPUDPICMP)) { 2380 nat->nat_sumd[0] = 0; 2381 nat->nat_sumd[1] = 0; 2382 } 2383 } 2384 2385 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2386 goto badnat; 2387 } 2388 if (flags & SI_WILDP) 2389 ifs->ifs_nat_stats.ns_wilds++; 2390 goto done; 2391 badnat: 2392 ifs->ifs_nat_stats.ns_badnat++; 2393 if ((hm = nat->nat_hm) != NULL) 2394 nat_hostmapdel(hm); 2395 KFREE(nat); 2396 nat = NULL; 2397 done: 2398 if ((flags & NAT_SLAVE) == 0) { 2399 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2400 } 2401 return nat; 2402 } 2403 2404 2405 /* ------------------------------------------------------------------------ */ 2406 /* Function: nat_finalise */ 2407 /* Returns: int - 0 == sucess, -1 == failure */ 2408 /* Parameters: fin(I) - pointer to packet information */ 2409 /* nat(I) - pointer to NAT entry */ 2410 /* ni(I) - pointer to structure with misc. information needed */ 2411 /* to create new NAT entry. */ 2412 /* Write Lock: ipf_nat */ 2413 /* */ 2414 /* This is the tail end of constructing a new NAT entry and is the same */ 2415 /* for both IPv4 and IPv6. */ 2416 /* ------------------------------------------------------------------------ */ 2417 /*ARGSUSED*/ 2418 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2419 fr_info_t *fin; 2420 nat_t *nat; 2421 natinfo_t *ni; 2422 tcphdr_t *tcp; 2423 nat_t **natsave; 2424 int direction; 2425 { 2426 frentry_t *fr; 2427 ipnat_t *np; 2428 ipf_stack_t *ifs = fin->fin_ifs; 2429 2430 np = ni->nai_np; 2431 2432 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2433 2434 #ifdef IPFILTER_SYNC 2435 if ((nat->nat_flags & SI_CLONE) == 0) 2436 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2437 #endif 2438 2439 nat->nat_me = natsave; 2440 nat->nat_dir = direction; 2441 nat->nat_ifps[0] = np->in_ifps[0]; 2442 nat->nat_ifps[1] = np->in_ifps[1]; 2443 nat->nat_ptr = np; 2444 nat->nat_p = fin->fin_p; 2445 nat->nat_mssclamp = np->in_mssclamp; 2446 fr = fin->fin_fr; 2447 nat->nat_fr = fr; 2448 2449 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2450 if (appr_new(fin, nat) == -1) 2451 return -1; 2452 2453 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2454 if (ifs->ifs_nat_logging) 2455 nat_log(nat, (u_int)np->in_redir, ifs); 2456 np->in_use++; 2457 if (fr != NULL) { 2458 MUTEX_ENTER(&fr->fr_lock); 2459 fr->fr_ref++; 2460 MUTEX_EXIT(&fr->fr_lock); 2461 } 2462 return 0; 2463 } 2464 2465 /* 2466 * nat_insert failed, so cleanup time... 2467 */ 2468 return -1; 2469 } 2470 2471 2472 /* ------------------------------------------------------------------------ */ 2473 /* Function: nat_insert */ 2474 /* Returns: int - 0 == sucess, -1 == failure */ 2475 /* Parameters: nat(I) - pointer to NAT structure */ 2476 /* rev(I) - flag indicating forward/reverse direction of packet */ 2477 /* Write Lock: ipf_nat */ 2478 /* */ 2479 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2480 /* list of active NAT entries. Adjust global counters when complete. */ 2481 /* ------------------------------------------------------------------------ */ 2482 int nat_insert(nat, rev, ifs) 2483 nat_t *nat; 2484 int rev; 2485 ipf_stack_t *ifs; 2486 { 2487 u_int hv1, hv2; 2488 nat_t **natp; 2489 2490 /* 2491 * Try and return an error as early as possible, so calculate the hash 2492 * entry numbers first and then proceed. 2493 */ 2494 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2495 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2496 0xffffffff); 2497 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2498 ifs->ifs_ipf_nattable_sz); 2499 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2500 0xffffffff); 2501 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2502 ifs->ifs_ipf_nattable_sz); 2503 } else { 2504 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2505 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2506 ifs->ifs_ipf_nattable_sz); 2507 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2508 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2509 ifs->ifs_ipf_nattable_sz); 2510 } 2511 2512 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2513 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2514 return -1; 2515 } 2516 2517 nat->nat_hv[0] = hv1; 2518 nat->nat_hv[1] = hv2; 2519 2520 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2521 2522 nat->nat_rev = rev; 2523 nat->nat_ref = 1; 2524 nat->nat_bytes[0] = 0; 2525 nat->nat_pkts[0] = 0; 2526 nat->nat_bytes[1] = 0; 2527 nat->nat_pkts[1] = 0; 2528 2529 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2530 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2531 2532 if (nat->nat_ifnames[1][0] !='\0') { 2533 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2534 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2535 } else { 2536 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2537 LIFNAMSIZ); 2538 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2539 nat->nat_ifps[1] = nat->nat_ifps[0]; 2540 } 2541 2542 nat->nat_next = ifs->ifs_nat_instances; 2543 nat->nat_pnext = &ifs->ifs_nat_instances; 2544 if (ifs->ifs_nat_instances) 2545 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2546 ifs->ifs_nat_instances = nat; 2547 2548 natp = &ifs->ifs_nat_table[0][hv1]; 2549 if (*natp) 2550 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2551 nat->nat_phnext[0] = natp; 2552 nat->nat_hnext[0] = *natp; 2553 *natp = nat; 2554 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2555 2556 natp = &ifs->ifs_nat_table[1][hv2]; 2557 if (*natp) 2558 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2559 nat->nat_phnext[1] = natp; 2560 nat->nat_hnext[1] = *natp; 2561 *natp = nat; 2562 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2563 2564 fr_setnatqueue(nat, rev, ifs); 2565 2566 ifs->ifs_nat_stats.ns_added++; 2567 ifs->ifs_nat_stats.ns_inuse++; 2568 return 0; 2569 } 2570 2571 2572 /* ------------------------------------------------------------------------ */ 2573 /* Function: nat_icmperrorlookup */ 2574 /* Returns: nat_t* - point to matching NAT structure */ 2575 /* Parameters: fin(I) - pointer to packet information */ 2576 /* dir(I) - direction of packet (in/out) */ 2577 /* */ 2578 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2579 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2580 /* the required length. */ 2581 /* ------------------------------------------------------------------------ */ 2582 nat_t *nat_icmperrorlookup(fin, dir) 2583 fr_info_t *fin; 2584 int dir; 2585 { 2586 int flags = 0, minlen; 2587 icmphdr_t *orgicmp; 2588 tcphdr_t *tcp = NULL; 2589 u_short data[2]; 2590 nat_t *nat; 2591 ip_t *oip; 2592 u_int p; 2593 2594 /* 2595 * Does it at least have the return (basic) IP header ? 2596 * Only a basic IP header (no options) should be with an ICMP error 2597 * header. Also, if it's not an error type, then return. 2598 */ 2599 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2600 return NULL; 2601 2602 /* 2603 * Check packet size 2604 */ 2605 oip = (ip_t *)((char *)fin->fin_dp + 8); 2606 minlen = IP_HL(oip) << 2; 2607 if ((minlen < sizeof(ip_t)) || 2608 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2609 return NULL; 2610 /* 2611 * Is the buffer big enough for all of it ? It's the size of the IP 2612 * header claimed in the encapsulated part which is of concern. It 2613 * may be too big to be in this buffer but not so big that it's 2614 * outside the ICMP packet, leading to TCP deref's causing problems. 2615 * This is possible because we don't know how big oip_hl is when we 2616 * do the pullup early in fr_check() and thus can't gaurantee it is 2617 * all here now. 2618 */ 2619 #ifdef _KERNEL 2620 { 2621 mb_t *m; 2622 2623 m = fin->fin_m; 2624 # if defined(MENTAT) 2625 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2626 return NULL; 2627 # else 2628 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2629 (char *)fin->fin_ip + M_LEN(m)) 2630 return NULL; 2631 # endif 2632 } 2633 #endif 2634 2635 if (fin->fin_daddr != oip->ip_src.s_addr) 2636 return NULL; 2637 2638 p = oip->ip_p; 2639 if (p == IPPROTO_TCP) 2640 flags = IPN_TCP; 2641 else if (p == IPPROTO_UDP) 2642 flags = IPN_UDP; 2643 else if (p == IPPROTO_ICMP) { 2644 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2645 2646 /* see if this is related to an ICMP query */ 2647 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2648 data[0] = fin->fin_data[0]; 2649 data[1] = fin->fin_data[1]; 2650 fin->fin_data[0] = 0; 2651 fin->fin_data[1] = orgicmp->icmp_id; 2652 2653 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2654 /* 2655 * NOTE : dir refers to the direction of the original 2656 * ip packet. By definition the icmp error 2657 * message flows in the opposite direction. 2658 */ 2659 if (dir == NAT_INBOUND) 2660 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2661 oip->ip_src); 2662 else 2663 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2664 oip->ip_src); 2665 fin->fin_data[0] = data[0]; 2666 fin->fin_data[1] = data[1]; 2667 return nat; 2668 } 2669 } 2670 2671 if (flags & IPN_TCPUDP) { 2672 minlen += 8; /* + 64bits of data to get ports */ 2673 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2674 return NULL; 2675 2676 data[0] = fin->fin_data[0]; 2677 data[1] = fin->fin_data[1]; 2678 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2679 fin->fin_data[0] = ntohs(tcp->th_dport); 2680 fin->fin_data[1] = ntohs(tcp->th_sport); 2681 2682 if (dir == NAT_INBOUND) { 2683 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2684 oip->ip_src); 2685 } else { 2686 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2687 oip->ip_src); 2688 } 2689 fin->fin_data[0] = data[0]; 2690 fin->fin_data[1] = data[1]; 2691 return nat; 2692 } 2693 if (dir == NAT_INBOUND) 2694 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2695 else 2696 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2697 } 2698 2699 2700 /* ------------------------------------------------------------------------ */ 2701 /* Function: nat_icmperror */ 2702 /* Returns: nat_t* - point to matching NAT structure */ 2703 /* Parameters: fin(I) - pointer to packet information */ 2704 /* nflags(I) - NAT flags for this packet */ 2705 /* dir(I) - direction of packet (in/out) */ 2706 /* */ 2707 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2708 /* session. This will correct both packet header data and checksums. */ 2709 /* */ 2710 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2711 /* a NAT'd ICMP packet gets correctly recognised. */ 2712 /* ------------------------------------------------------------------------ */ 2713 nat_t *nat_icmperror(fin, nflags, dir) 2714 fr_info_t *fin; 2715 u_int *nflags; 2716 int dir; 2717 { 2718 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2719 struct in_addr in; 2720 icmphdr_t *icmp, *orgicmp; 2721 int dlen; 2722 udphdr_t *udp; 2723 tcphdr_t *tcp; 2724 nat_t *nat; 2725 ip_t *oip; 2726 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2727 return NULL; 2728 2729 /* 2730 * nat_icmperrorlookup() looks up nat entry associated with the 2731 * offending IP packet and returns pointer to the entry, or NULL 2732 * if packet wasn't natted or for `defective' packets. 2733 */ 2734 2735 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 2736 return NULL; 2737 2738 sumd2 = 0; 2739 *nflags = IPN_ICMPERR; 2740 icmp = fin->fin_dp; 2741 oip = (ip_t *)&icmp->icmp_ip; 2742 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 2743 tcp = (tcphdr_t *)udp; 2744 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 2745 2746 /* 2747 * Need to adjust ICMP header to include the real IP#'s and 2748 * port #'s. There are three steps required. 2749 * 2750 * Step 1 2751 * Fix the IP addresses in the offending IP packet and update 2752 * ip header checksum to compensate for the change. 2753 * 2754 * No update needed here for icmp_cksum because the ICMP checksum 2755 * is calculated over the complete ICMP packet, which includes the 2756 * changed oip IP addresses and oip->ip_sum. These two changes 2757 * cancel each other out (if the delta for the IP address is x, 2758 * then the delta for ip_sum is minus x). 2759 */ 2760 2761 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 2762 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 2763 in = nat->nat_inip; 2764 oip->ip_src = in; 2765 } else { 2766 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 2767 in = nat->nat_outip; 2768 oip->ip_dst = in; 2769 } 2770 2771 sum2 = LONG_SUM(ntohl(in.s_addr)); 2772 CALC_SUMD(sum1, sum2, sumd); 2773 fix_datacksum(&oip->ip_sum, sumd); 2774 2775 /* 2776 * Step 2 2777 * Perform other adjustments based on protocol of offending packet. 2778 */ 2779 2780 switch (oip->ip_p) { 2781 case IPPROTO_TCP : 2782 case IPPROTO_UDP : 2783 2784 /* 2785 * For offending TCP/UDP IP packets, translate the ports 2786 * based on the NAT specification. 2787 * 2788 * Advance notice : Now it becomes complicated :-) 2789 * 2790 * Since the port and IP addresse fields are both part 2791 * of the TCP/UDP checksum of the offending IP packet, 2792 * we need to adjust that checksum as well. 2793 * 2794 * To further complicate things, the TCP/UDP checksum 2795 * may not be present. We must check to see if the 2796 * length of the data portion is big enough to hold 2797 * the checksum. In the UDP case, a test to determine 2798 * if the checksum is even set is also required. 2799 * 2800 * Any changes to an IP address, port or checksum within 2801 * the ICMP packet requires a change to icmp_cksum. 2802 * 2803 * Be extremely careful here ... The change is dependent 2804 * upon whether or not the TCP/UPD checksum is present. 2805 * 2806 * If TCP/UPD checksum is present, the icmp_cksum must 2807 * compensate for checksum modification resulting from 2808 * IP address change only. Port change and resulting 2809 * data checksum adjustments cancel each other out. 2810 * 2811 * If TCP/UDP checksum is not present, icmp_cksum must 2812 * compensate for port change only. The IP address 2813 * change does not modify anything else in this case. 2814 */ 2815 2816 psum1 = 0; 2817 psum2 = 0; 2818 psumd = 0; 2819 2820 if ((tcp->th_dport == nat->nat_oport) && 2821 (tcp->th_sport != nat->nat_inport)) { 2822 2823 /* 2824 * Translate the source port. 2825 */ 2826 2827 psum1 = ntohs(tcp->th_sport); 2828 psum2 = ntohs(nat->nat_inport); 2829 tcp->th_sport = nat->nat_inport; 2830 2831 } else if ((tcp->th_sport == nat->nat_oport) && 2832 (tcp->th_dport != nat->nat_outport)) { 2833 2834 /* 2835 * Translate the destination port. 2836 */ 2837 2838 psum1 = ntohs(tcp->th_dport); 2839 psum2 = ntohs(nat->nat_outport); 2840 tcp->th_dport = nat->nat_outport; 2841 } 2842 2843 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 2844 2845 /* 2846 * TCP checksum present. 2847 * 2848 * Adjust data checksum and icmp checksum to 2849 * compensate for any IP address change. 2850 */ 2851 2852 sum1 = ntohs(tcp->th_sum); 2853 fix_datacksum(&tcp->th_sum, sumd); 2854 sum2 = ntohs(tcp->th_sum); 2855 sumd2 = sumd << 1; 2856 CALC_SUMD(sum1, sum2, sumd); 2857 sumd2 += sumd; 2858 2859 /* 2860 * Also make data checksum adjustment to 2861 * compensate for any port change. 2862 */ 2863 2864 if (psum1 != psum2) { 2865 CALC_SUMD(psum1, psum2, psumd); 2866 fix_datacksum(&tcp->th_sum, psumd); 2867 } 2868 2869 } else if ((oip->ip_p == IPPROTO_UDP) && 2870 (dlen >= 8) && (udp->uh_sum != 0)) { 2871 2872 /* 2873 * The UDP checksum is present and set. 2874 * 2875 * Adjust data checksum and icmp checksum to 2876 * compensate for any IP address change. 2877 */ 2878 2879 sum1 = ntohs(udp->uh_sum); 2880 fix_datacksum(&udp->uh_sum, sumd); 2881 sum2 = ntohs(udp->uh_sum); 2882 sumd2 = sumd << 1; 2883 CALC_SUMD(sum1, sum2, sumd); 2884 sumd2 += sumd; 2885 2886 /* 2887 * Also make data checksum adjustment to 2888 * compensate for any port change. 2889 */ 2890 2891 if (psum1 != psum2) { 2892 CALC_SUMD(psum1, psum2, psumd); 2893 fix_datacksum(&udp->uh_sum, psumd); 2894 } 2895 2896 } else { 2897 2898 /* 2899 * Data checksum was not present. 2900 * 2901 * Compensate for any port change. 2902 */ 2903 2904 CALC_SUMD(psum2, psum1, psumd); 2905 sumd2 += psumd; 2906 } 2907 break; 2908 2909 case IPPROTO_ICMP : 2910 2911 orgicmp = (icmphdr_t *)udp; 2912 2913 if ((nat->nat_dir == NAT_OUTBOUND) && 2914 (orgicmp->icmp_id != nat->nat_inport) && 2915 (dlen >= 8)) { 2916 2917 /* 2918 * Fix ICMP checksum (of the offening ICMP 2919 * query packet) to compensate the change 2920 * in the ICMP id of the offending ICMP 2921 * packet. 2922 * 2923 * Since you modify orgicmp->icmp_id with 2924 * a delta (say x) and you compensate that 2925 * in origicmp->icmp_cksum with a delta 2926 * minus x, you don't have to adjust the 2927 * overall icmp->icmp_cksum 2928 */ 2929 2930 sum1 = ntohs(orgicmp->icmp_id); 2931 sum2 = ntohs(nat->nat_inport); 2932 CALC_SUMD(sum1, sum2, sumd); 2933 orgicmp->icmp_id = nat->nat_inport; 2934 fix_datacksum(&orgicmp->icmp_cksum, sumd); 2935 2936 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 2937 2938 break; 2939 2940 default : 2941 2942 break; 2943 2944 } /* switch (oip->ip_p) */ 2945 2946 /* 2947 * Step 3 2948 * Make the adjustments to icmp checksum. 2949 */ 2950 2951 if (sumd2 != 0) { 2952 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 2953 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 2954 fix_incksum(&icmp->icmp_cksum, sumd2); 2955 } 2956 return nat; 2957 } 2958 2959 2960 /* 2961 * NB: these lookups don't lock access to the list, it assumed that it has 2962 * already been done! 2963 */ 2964 2965 /* ------------------------------------------------------------------------ */ 2966 /* Function: nat_inlookup */ 2967 /* Returns: nat_t* - NULL == no match, */ 2968 /* else pointer to matching NAT entry */ 2969 /* Parameters: fin(I) - pointer to packet information */ 2970 /* flags(I) - NAT flags for this packet */ 2971 /* p(I) - protocol for this packet */ 2972 /* src(I) - source IP address */ 2973 /* mapdst(I) - destination IP address */ 2974 /* */ 2975 /* Lookup a nat entry based on the mapped destination ip address/port and */ 2976 /* real source address/port. We use this lookup when receiving a packet, */ 2977 /* we're looking for a table entry, based on the destination address. */ 2978 /* */ 2979 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 2980 /* */ 2981 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 2982 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 2983 /* */ 2984 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 2985 /* the packet is of said protocol */ 2986 /* ------------------------------------------------------------------------ */ 2987 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 2988 fr_info_t *fin; 2989 u_int flags, p; 2990 struct in_addr src , mapdst; 2991 { 2992 u_short sport, dport; 2993 ipnat_t *ipn; 2994 u_int sflags; 2995 nat_t *nat; 2996 int nflags; 2997 u_32_t dst; 2998 void *ifp; 2999 u_int hv; 3000 ipf_stack_t *ifs = fin->fin_ifs; 3001 3002 if (fin != NULL) 3003 ifp = fin->fin_ifp; 3004 else 3005 ifp = NULL; 3006 sport = 0; 3007 dport = 0; 3008 dst = mapdst.s_addr; 3009 sflags = flags & NAT_TCPUDPICMP; 3010 3011 switch (p) 3012 { 3013 case IPPROTO_TCP : 3014 case IPPROTO_UDP : 3015 sport = htons(fin->fin_data[0]); 3016 dport = htons(fin->fin_data[1]); 3017 break; 3018 case IPPROTO_ICMP : 3019 if (flags & IPN_ICMPERR) 3020 sport = fin->fin_data[1]; 3021 else 3022 dport = fin->fin_data[1]; 3023 break; 3024 default : 3025 break; 3026 } 3027 3028 3029 if ((flags & SI_WILDP) != 0) 3030 goto find_in_wild_ports; 3031 3032 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3033 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3034 nat = ifs->ifs_nat_table[1][hv]; 3035 for (; nat; nat = nat->nat_hnext[1]) { 3036 if (nat->nat_ifps[0] != NULL) { 3037 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3038 continue; 3039 } else if (ifp != NULL) 3040 nat->nat_ifps[0] = ifp; 3041 3042 nflags = nat->nat_flags; 3043 3044 if (nat->nat_oip.s_addr == src.s_addr && 3045 nat->nat_outip.s_addr == dst && 3046 (((p == 0) && 3047 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3048 || (p == nat->nat_p))) { 3049 switch (p) 3050 { 3051 #if 0 3052 case IPPROTO_GRE : 3053 if (nat->nat_call[1] != fin->fin_data[0]) 3054 continue; 3055 break; 3056 #endif 3057 case IPPROTO_ICMP : 3058 if ((flags & IPN_ICMPERR) != 0) { 3059 if (nat->nat_outport != sport) 3060 continue; 3061 } else { 3062 if (nat->nat_outport != dport) 3063 continue; 3064 } 3065 break; 3066 case IPPROTO_TCP : 3067 case IPPROTO_UDP : 3068 if (nat->nat_oport != sport) 3069 continue; 3070 if (nat->nat_outport != dport) 3071 continue; 3072 break; 3073 default : 3074 break; 3075 } 3076 3077 ipn = nat->nat_ptr; 3078 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3079 if (appr_match(fin, nat) != 0) 3080 continue; 3081 return nat; 3082 } 3083 } 3084 3085 /* 3086 * So if we didn't find it but there are wildcard members in the hash 3087 * table, go back and look for them. We do this search and update here 3088 * because it is modifying the NAT table and we want to do this only 3089 * for the first packet that matches. The exception, of course, is 3090 * for "dummy" (FI_IGNORE) lookups. 3091 */ 3092 find_in_wild_ports: 3093 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3094 return NULL; 3095 if (ifs->ifs_nat_stats.ns_wilds == 0) 3096 return NULL; 3097 3098 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3099 3100 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3101 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3102 3103 WRITE_ENTER(&ifs->ifs_ipf_nat); 3104 3105 nat = ifs->ifs_nat_table[1][hv]; 3106 for (; nat; nat = nat->nat_hnext[1]) { 3107 if (nat->nat_ifps[0] != NULL) { 3108 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3109 continue; 3110 } else if (ifp != NULL) 3111 nat->nat_ifps[0] = ifp; 3112 3113 if (nat->nat_p != fin->fin_p) 3114 continue; 3115 if (nat->nat_oip.s_addr != src.s_addr || 3116 nat->nat_outip.s_addr != dst) 3117 continue; 3118 3119 nflags = nat->nat_flags; 3120 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3121 continue; 3122 3123 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3124 NAT_INBOUND) == 1) { 3125 if ((fin->fin_flx & FI_IGNORE) != 0) 3126 break; 3127 if ((nflags & SI_CLONE) != 0) { 3128 nat = fr_natclone(fin, nat); 3129 if (nat == NULL) 3130 break; 3131 } else { 3132 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3133 ifs->ifs_nat_stats.ns_wilds--; 3134 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3135 } 3136 nat->nat_oport = sport; 3137 nat->nat_outport = dport; 3138 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3139 nat_tabmove(nat, ifs); 3140 break; 3141 } 3142 } 3143 3144 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3145 3146 return nat; 3147 } 3148 3149 3150 /* ------------------------------------------------------------------------ */ 3151 /* Function: nat_tabmove */ 3152 /* Returns: Nil */ 3153 /* Parameters: nat(I) - pointer to NAT structure */ 3154 /* Write Lock: ipf_nat */ 3155 /* */ 3156 /* This function is only called for TCP/UDP NAT table entries where the */ 3157 /* original was placed in the table without hashing on the ports and we now */ 3158 /* want to include hashing on port numbers. */ 3159 /* ------------------------------------------------------------------------ */ 3160 static void nat_tabmove(nat, ifs) 3161 nat_t *nat; 3162 ipf_stack_t *ifs; 3163 { 3164 nat_t **natp; 3165 u_int hv; 3166 3167 if (nat->nat_flags & SI_CLONE) 3168 return; 3169 3170 /* 3171 * Remove the NAT entry from the old location 3172 */ 3173 if (nat->nat_hnext[0]) 3174 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3175 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3176 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3177 3178 if (nat->nat_hnext[1]) 3179 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3180 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3181 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3182 3183 /* 3184 * Add into the NAT table in the new position 3185 */ 3186 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3187 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3188 ifs->ifs_ipf_nattable_sz); 3189 nat->nat_hv[0] = hv; 3190 natp = &ifs->ifs_nat_table[0][hv]; 3191 if (*natp) 3192 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3193 nat->nat_phnext[0] = natp; 3194 nat->nat_hnext[0] = *natp; 3195 *natp = nat; 3196 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3197 3198 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3199 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3200 ifs->ifs_ipf_nattable_sz); 3201 nat->nat_hv[1] = hv; 3202 natp = &ifs->ifs_nat_table[1][hv]; 3203 if (*natp) 3204 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3205 nat->nat_phnext[1] = natp; 3206 nat->nat_hnext[1] = *natp; 3207 *natp = nat; 3208 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3209 } 3210 3211 3212 /* ------------------------------------------------------------------------ */ 3213 /* Function: nat_outlookup */ 3214 /* Returns: nat_t* - NULL == no match, */ 3215 /* else pointer to matching NAT entry */ 3216 /* Parameters: fin(I) - pointer to packet information */ 3217 /* flags(I) - NAT flags for this packet */ 3218 /* p(I) - protocol for this packet */ 3219 /* src(I) - source IP address */ 3220 /* dst(I) - destination IP address */ 3221 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3222 /* */ 3223 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3224 /* destination address/port. We use this lookup when sending a packet out, */ 3225 /* we're looking for a table entry, based on the source address. */ 3226 /* */ 3227 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3228 /* */ 3229 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3230 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3231 /* */ 3232 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3233 /* the packet is of said protocol */ 3234 /* ------------------------------------------------------------------------ */ 3235 nat_t *nat_outlookup(fin, flags, p, src, dst) 3236 fr_info_t *fin; 3237 u_int flags, p; 3238 struct in_addr src , dst; 3239 { 3240 u_short sport, dport; 3241 u_int sflags; 3242 ipnat_t *ipn; 3243 u_32_t srcip; 3244 nat_t *nat; 3245 int nflags; 3246 void *ifp; 3247 u_int hv; 3248 ipf_stack_t *ifs = fin->fin_ifs; 3249 3250 ifp = fin->fin_ifp; 3251 3252 srcip = src.s_addr; 3253 sflags = flags & IPN_TCPUDPICMP; 3254 sport = 0; 3255 dport = 0; 3256 3257 switch (p) 3258 { 3259 case IPPROTO_TCP : 3260 case IPPROTO_UDP : 3261 sport = htons(fin->fin_data[0]); 3262 dport = htons(fin->fin_data[1]); 3263 break; 3264 case IPPROTO_ICMP : 3265 if (flags & IPN_ICMPERR) 3266 sport = fin->fin_data[1]; 3267 else 3268 dport = fin->fin_data[1]; 3269 break; 3270 default : 3271 break; 3272 } 3273 3274 if ((flags & SI_WILDP) != 0) 3275 goto find_out_wild_ports; 3276 3277 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3278 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3279 nat = ifs->ifs_nat_table[0][hv]; 3280 for (; nat; nat = nat->nat_hnext[0]) { 3281 if (nat->nat_ifps[1] != NULL) { 3282 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3283 continue; 3284 } else if (ifp != NULL) 3285 nat->nat_ifps[1] = ifp; 3286 3287 nflags = nat->nat_flags; 3288 3289 if (nat->nat_inip.s_addr == srcip && 3290 nat->nat_oip.s_addr == dst.s_addr && 3291 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3292 || (p == nat->nat_p))) { 3293 switch (p) 3294 { 3295 #if 0 3296 case IPPROTO_GRE : 3297 if (nat->nat_call[1] != fin->fin_data[0]) 3298 continue; 3299 break; 3300 #endif 3301 case IPPROTO_TCP : 3302 case IPPROTO_UDP : 3303 if (nat->nat_oport != dport) 3304 continue; 3305 if (nat->nat_inport != sport) 3306 continue; 3307 break; 3308 default : 3309 break; 3310 } 3311 3312 ipn = nat->nat_ptr; 3313 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3314 if (appr_match(fin, nat) != 0) 3315 continue; 3316 return nat; 3317 } 3318 } 3319 3320 /* 3321 * So if we didn't find it but there are wildcard members in the hash 3322 * table, go back and look for them. We do this search and update here 3323 * because it is modifying the NAT table and we want to do this only 3324 * for the first packet that matches. The exception, of course, is 3325 * for "dummy" (FI_IGNORE) lookups. 3326 */ 3327 find_out_wild_ports: 3328 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3329 return NULL; 3330 if (ifs->ifs_nat_stats.ns_wilds == 0) 3331 return NULL; 3332 3333 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3334 3335 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3336 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3337 3338 WRITE_ENTER(&ifs->ifs_ipf_nat); 3339 3340 nat = ifs->ifs_nat_table[0][hv]; 3341 for (; nat; nat = nat->nat_hnext[0]) { 3342 if (nat->nat_ifps[1] != NULL) { 3343 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3344 continue; 3345 } else if (ifp != NULL) 3346 nat->nat_ifps[1] = ifp; 3347 3348 if (nat->nat_p != fin->fin_p) 3349 continue; 3350 if ((nat->nat_inip.s_addr != srcip) || 3351 (nat->nat_oip.s_addr != dst.s_addr)) 3352 continue; 3353 3354 nflags = nat->nat_flags; 3355 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3356 continue; 3357 3358 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3359 NAT_OUTBOUND) == 1) { 3360 if ((fin->fin_flx & FI_IGNORE) != 0) 3361 break; 3362 if ((nflags & SI_CLONE) != 0) { 3363 nat = fr_natclone(fin, nat); 3364 if (nat == NULL) 3365 break; 3366 } else { 3367 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3368 ifs->ifs_nat_stats.ns_wilds--; 3369 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3370 } 3371 nat->nat_inport = sport; 3372 nat->nat_oport = dport; 3373 if (nat->nat_outport == 0) 3374 nat->nat_outport = sport; 3375 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3376 nat_tabmove(nat, ifs); 3377 break; 3378 } 3379 } 3380 3381 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3382 3383 return nat; 3384 } 3385 3386 3387 /* ------------------------------------------------------------------------ */ 3388 /* Function: nat_lookupredir */ 3389 /* Returns: nat_t* - NULL == no match, */ 3390 /* else pointer to matching NAT entry */ 3391 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3392 /* entry for. */ 3393 /* */ 3394 /* Lookup the NAT tables to search for a matching redirect */ 3395 /* ------------------------------------------------------------------------ */ 3396 nat_t *nat_lookupredir(np, ifs) 3397 natlookup_t *np; 3398 ipf_stack_t *ifs; 3399 { 3400 fr_info_t fi; 3401 nat_t *nat; 3402 3403 bzero((char *)&fi, sizeof(fi)); 3404 if (np->nl_flags & IPN_IN) { 3405 fi.fin_data[0] = ntohs(np->nl_realport); 3406 fi.fin_data[1] = ntohs(np->nl_outport); 3407 } else { 3408 fi.fin_data[0] = ntohs(np->nl_inport); 3409 fi.fin_data[1] = ntohs(np->nl_outport); 3410 } 3411 if (np->nl_flags & IPN_TCP) 3412 fi.fin_p = IPPROTO_TCP; 3413 else if (np->nl_flags & IPN_UDP) 3414 fi.fin_p = IPPROTO_UDP; 3415 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3416 fi.fin_p = IPPROTO_ICMP; 3417 3418 fi.fin_ifs = ifs; 3419 /* 3420 * We can do two sorts of lookups: 3421 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3422 * - default: we have the `in' and `out' address, look for `real'. 3423 */ 3424 if (np->nl_flags & IPN_IN) { 3425 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3426 np->nl_realip, np->nl_outip))) { 3427 np->nl_inip = nat->nat_inip; 3428 np->nl_inport = nat->nat_inport; 3429 } 3430 } else { 3431 /* 3432 * If nl_inip is non null, this is a lookup based on the real 3433 * ip address. Else, we use the fake. 3434 */ 3435 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3436 np->nl_inip, np->nl_outip))) { 3437 3438 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3439 fr_info_t fin; 3440 bzero((char *)&fin, sizeof(fin)); 3441 fin.fin_p = nat->nat_p; 3442 fin.fin_data[0] = ntohs(nat->nat_outport); 3443 fin.fin_data[1] = ntohs(nat->nat_oport); 3444 fin.fin_ifs = ifs; 3445 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3446 nat->nat_outip, 3447 nat->nat_oip) != NULL) { 3448 np->nl_flags &= ~IPN_FINDFORWARD; 3449 } 3450 } 3451 3452 np->nl_realip = nat->nat_outip; 3453 np->nl_realport = nat->nat_outport; 3454 } 3455 } 3456 3457 return nat; 3458 } 3459 3460 3461 /* ------------------------------------------------------------------------ */ 3462 /* Function: nat_match */ 3463 /* Returns: int - 0 == no match, 1 == match */ 3464 /* Parameters: fin(I) - pointer to packet information */ 3465 /* np(I) - pointer to NAT rule */ 3466 /* */ 3467 /* Pull the matching of a packet against a NAT rule out of that complex */ 3468 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3469 /* ------------------------------------------------------------------------ */ 3470 static int nat_match(fin, np) 3471 fr_info_t *fin; 3472 ipnat_t *np; 3473 { 3474 frtuc_t *ft; 3475 3476 if (fin->fin_v != 4) 3477 return 0; 3478 3479 if (np->in_p && fin->fin_p != np->in_p) 3480 return 0; 3481 3482 if (fin->fin_out) { 3483 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3484 return 0; 3485 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3486 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3487 return 0; 3488 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3489 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3490 return 0; 3491 } else { 3492 if (!(np->in_redir & NAT_REDIRECT)) 3493 return 0; 3494 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3495 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3496 return 0; 3497 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3498 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3499 return 0; 3500 } 3501 3502 ft = &np->in_tuc; 3503 if (!(fin->fin_flx & FI_TCPUDP) || 3504 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3505 if (ft->ftu_scmp || ft->ftu_dcmp) 3506 return 0; 3507 return 1; 3508 } 3509 3510 return fr_tcpudpchk(fin, ft); 3511 } 3512 3513 3514 /* ------------------------------------------------------------------------ */ 3515 /* Function: nat_update */ 3516 /* Returns: Nil */ 3517 /* Parameters: nat(I) - pointer to NAT structure */ 3518 /* np(I) - pointer to NAT rule */ 3519 /* */ 3520 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3521 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3522 /* ------------------------------------------------------------------------ */ 3523 void nat_update(fin, nat, np) 3524 fr_info_t *fin; 3525 nat_t *nat; 3526 ipnat_t *np; 3527 { 3528 ipftq_t *ifq, *ifq2; 3529 ipftqent_t *tqe; 3530 ipf_stack_t *ifs = fin->fin_ifs; 3531 3532 MUTEX_ENTER(&nat->nat_lock); 3533 tqe = &nat->nat_tqe; 3534 ifq = tqe->tqe_ifq; 3535 3536 /* 3537 * We allow over-riding of NAT timeouts from NAT rules, even for 3538 * TCP, however, if it is TCP and there is no rule timeout set, 3539 * then do not update the timeout here. 3540 */ 3541 if (np != NULL) 3542 ifq2 = np->in_tqehead[fin->fin_rev]; 3543 else 3544 ifq2 = NULL; 3545 3546 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3547 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3548 } else { 3549 if (ifq2 == NULL) { 3550 if (nat->nat_p == IPPROTO_UDP) 3551 ifq2 = &ifs->ifs_nat_udptq; 3552 else if (nat->nat_p == IPPROTO_ICMP) 3553 ifq2 = &ifs->ifs_nat_icmptq; 3554 else 3555 ifq2 = &ifs->ifs_nat_iptq; 3556 } 3557 3558 fr_movequeue(tqe, ifq, ifq2, ifs); 3559 } 3560 MUTEX_EXIT(&nat->nat_lock); 3561 } 3562 3563 3564 /* ------------------------------------------------------------------------ */ 3565 /* Function: fr_checknatout */ 3566 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3567 /* 0 == no packet translation occurred, */ 3568 /* 1 == packet was successfully translated. */ 3569 /* Parameters: fin(I) - pointer to packet information */ 3570 /* passp(I) - pointer to filtering result flags */ 3571 /* */ 3572 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3573 /* first checked to see if they match an existing entry (if an error), */ 3574 /* otherwise a search of the current NAT table is made. If neither results */ 3575 /* in a match then a search for a matching NAT rule is made. Create a new */ 3576 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3577 /* packet header(s) as required. */ 3578 /* ------------------------------------------------------------------------ */ 3579 int fr_checknatout(fin, passp) 3580 fr_info_t *fin; 3581 u_32_t *passp; 3582 { 3583 struct ifnet *ifp, *sifp; 3584 icmphdr_t *icmp = NULL; 3585 tcphdr_t *tcp = NULL; 3586 int rval, natfailed; 3587 ipnat_t *np = NULL; 3588 u_int nflags = 0; 3589 u_32_t ipa, iph; 3590 int natadd = 1; 3591 frentry_t *fr; 3592 nat_t *nat; 3593 ipf_stack_t *ifs = fin->fin_ifs; 3594 3595 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3596 return 0; 3597 3598 natfailed = 0; 3599 fr = fin->fin_fr; 3600 sifp = fin->fin_ifp; 3601 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3602 fr->fr_tifs[fin->fin_rev].fd_ifp && 3603 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3604 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3605 ifp = fin->fin_ifp; 3606 3607 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3608 switch (fin->fin_p) 3609 { 3610 case IPPROTO_TCP : 3611 nflags = IPN_TCP; 3612 break; 3613 case IPPROTO_UDP : 3614 nflags = IPN_UDP; 3615 break; 3616 case IPPROTO_ICMP : 3617 icmp = fin->fin_dp; 3618 3619 /* 3620 * This is an incoming packet, so the destination is 3621 * the icmp_id and the source port equals 0 3622 */ 3623 if (nat_icmpquerytype4(icmp->icmp_type)) 3624 nflags = IPN_ICMPQUERY; 3625 break; 3626 default : 3627 break; 3628 } 3629 3630 if ((nflags & IPN_TCPUDP)) 3631 tcp = fin->fin_dp; 3632 } 3633 3634 ipa = fin->fin_saddr; 3635 3636 READ_ENTER(&ifs->ifs_ipf_nat); 3637 3638 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3639 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3640 /*EMPTY*/; 3641 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3642 natadd = 0; 3643 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3644 fin->fin_src, fin->fin_dst))) { 3645 nflags = nat->nat_flags; 3646 } else { 3647 u_32_t hv, msk, nmsk; 3648 3649 /* 3650 * If there is no current entry in the nat table for this IP#, 3651 * create one for it (if there is a matching rule). 3652 */ 3653 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3654 msk = 0xffffffff; 3655 nmsk = ifs->ifs_nat_masks; 3656 WRITE_ENTER(&ifs->ifs_ipf_nat); 3657 maskloop: 3658 iph = ipa & htonl(msk); 3659 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3660 for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext) 3661 { 3662 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3663 continue; 3664 if (np->in_v != fin->fin_v) 3665 continue; 3666 if (np->in_p && (np->in_p != fin->fin_p)) 3667 continue; 3668 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3669 continue; 3670 if (np->in_flags & IPN_FILTER) { 3671 if (!nat_match(fin, np)) 3672 continue; 3673 } else if ((ipa & np->in_inmsk) != np->in_inip) 3674 continue; 3675 3676 if ((fr != NULL) && 3677 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3678 continue; 3679 3680 if (*np->in_plabel != '\0') { 3681 if (((np->in_flags & IPN_FILTER) == 0) && 3682 (np->in_dport != tcp->th_dport)) 3683 continue; 3684 if (appr_ok(fin, tcp, np) == 0) 3685 continue; 3686 } 3687 3688 if ((nat = nat_new(fin, np, NULL, nflags, 3689 NAT_OUTBOUND))) { 3690 np->in_hits++; 3691 break; 3692 } else 3693 natfailed = -1; 3694 } 3695 if ((np == NULL) && (nmsk != 0)) { 3696 while (nmsk) { 3697 msk <<= 1; 3698 if (nmsk & 0x80000000) 3699 break; 3700 nmsk <<= 1; 3701 } 3702 if (nmsk != 0) { 3703 nmsk <<= 1; 3704 goto maskloop; 3705 } 3706 } 3707 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3708 } 3709 3710 if (nat != NULL) { 3711 rval = fr_natout(fin, nat, natadd, nflags); 3712 if (rval == 1) { 3713 MUTEX_ENTER(&nat->nat_lock); 3714 nat->nat_ref++; 3715 MUTEX_EXIT(&nat->nat_lock); 3716 nat->nat_touched = ifs->ifs_fr_ticks; 3717 fin->fin_nat = nat; 3718 } 3719 } else 3720 rval = natfailed; 3721 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3722 3723 if (rval == -1) { 3724 if (passp != NULL) 3725 *passp = FR_BLOCK; 3726 fin->fin_flx |= FI_BADNAT; 3727 } 3728 fin->fin_ifp = sifp; 3729 return rval; 3730 } 3731 3732 /* ------------------------------------------------------------------------ */ 3733 /* Function: fr_natout */ 3734 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3735 /* 1 == packet was successfully translated. */ 3736 /* Parameters: fin(I) - pointer to packet information */ 3737 /* nat(I) - pointer to NAT structure */ 3738 /* natadd(I) - flag indicating if it is safe to add frag cache */ 3739 /* nflags(I) - NAT flags set for this packet */ 3740 /* */ 3741 /* Translate a packet coming "out" on an interface. */ 3742 /* ------------------------------------------------------------------------ */ 3743 int fr_natout(fin, nat, natadd, nflags) 3744 fr_info_t *fin; 3745 nat_t *nat; 3746 int natadd; 3747 u_32_t nflags; 3748 { 3749 icmphdr_t *icmp; 3750 u_short *csump; 3751 u_32_t sumd; 3752 tcphdr_t *tcp; 3753 ipnat_t *np; 3754 int i; 3755 ipf_stack_t *ifs = fin->fin_ifs; 3756 3757 #if SOLARIS && defined(_KERNEL) 3758 net_data_t net_data_p; 3759 if (fin->fin_v == 4) 3760 net_data_p = ifs->ifs_ipf_ipv4; 3761 else 3762 net_data_p = ifs->ifs_ipf_ipv6; 3763 #endif 3764 3765 tcp = NULL; 3766 icmp = NULL; 3767 csump = NULL; 3768 np = nat->nat_ptr; 3769 3770 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 3771 (void) fr_nat_newfrag(fin, 0, nat); 3772 3773 MUTEX_ENTER(&nat->nat_lock); 3774 nat->nat_bytes[1] += fin->fin_plen; 3775 nat->nat_pkts[1]++; 3776 MUTEX_EXIT(&nat->nat_lock); 3777 3778 /* 3779 * Fix up checksums, not by recalculating them, but 3780 * simply computing adjustments. 3781 * This is only done for STREAMS based IP implementations where the 3782 * checksum has already been calculated by IP. In all other cases, 3783 * IPFilter is called before the checksum needs calculating so there 3784 * is no call to modify whatever is in the header now. 3785 */ 3786 ASSERT(fin->fin_m != NULL); 3787 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 3788 if (nflags == IPN_ICMPERR) { 3789 u_32_t s1, s2; 3790 3791 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 3792 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 3793 CALC_SUMD(s1, s2, sumd); 3794 3795 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 3796 } 3797 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 3798 defined(linux) || defined(BRIDGE_IPF) 3799 else { 3800 /* 3801 * Strictly speaking, this isn't necessary on BSD 3802 * kernels because they do checksum calculation after 3803 * this code has run BUT if ipfilter is being used 3804 * to do NAT as a bridge, that code doesn't exist. 3805 */ 3806 if (nat->nat_dir == NAT_OUTBOUND) 3807 fix_outcksum(&fin->fin_ip->ip_sum, 3808 nat->nat_ipsumd); 3809 else 3810 fix_incksum(&fin->fin_ip->ip_sum, 3811 nat->nat_ipsumd); 3812 } 3813 #endif 3814 } 3815 3816 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3817 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 3818 tcp = fin->fin_dp; 3819 3820 tcp->th_sport = nat->nat_outport; 3821 fin->fin_data[0] = ntohs(nat->nat_outport); 3822 } 3823 3824 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 3825 icmp = fin->fin_dp; 3826 icmp->icmp_id = nat->nat_outport; 3827 } 3828 3829 csump = nat_proto(fin, nat, nflags); 3830 } 3831 3832 fin->fin_ip->ip_src = nat->nat_outip; 3833 3834 nat_update(fin, nat, np); 3835 3836 /* 3837 * The above comments do not hold for layer 4 (or higher) checksums... 3838 */ 3839 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 3840 if (nflags & IPN_TCPUDP && 3841 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 3842 sumd = nat->nat_sumd[1]; 3843 else 3844 sumd = nat->nat_sumd[0]; 3845 3846 if (nat->nat_dir == NAT_OUTBOUND) 3847 fix_outcksum(csump, sumd); 3848 else 3849 fix_incksum(csump, sumd); 3850 } 3851 #ifdef IPFILTER_SYNC 3852 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 3853 #endif 3854 /* ------------------------------------------------------------- */ 3855 /* A few quick notes: */ 3856 /* Following are test conditions prior to calling the */ 3857 /* appr_check routine. */ 3858 /* */ 3859 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 3860 /* with a redirect rule, we attempt to match the packet's */ 3861 /* source port against in_dport, otherwise we'd compare the */ 3862 /* packet's destination. */ 3863 /* ------------------------------------------------------------- */ 3864 if ((np != NULL) && (np->in_apr != NULL)) { 3865 i = appr_check(fin, nat); 3866 if (i == 0) 3867 i = 1; 3868 } else 3869 i = 1; 3870 ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]); 3871 fin->fin_flx |= FI_NATED; 3872 return i; 3873 } 3874 3875 3876 /* ------------------------------------------------------------------------ */ 3877 /* Function: fr_checknatin */ 3878 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3879 /* 0 == no packet translation occurred, */ 3880 /* 1 == packet was successfully translated. */ 3881 /* Parameters: fin(I) - pointer to packet information */ 3882 /* passp(I) - pointer to filtering result flags */ 3883 /* */ 3884 /* Check to see if an incoming packet should be changed. ICMP packets are */ 3885 /* first checked to see if they match an existing entry (if an error), */ 3886 /* otherwise a search of the current NAT table is made. If neither results */ 3887 /* in a match then a search for a matching NAT rule is made. Create a new */ 3888 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3889 /* packet header(s) as required. */ 3890 /* ------------------------------------------------------------------------ */ 3891 int fr_checknatin(fin, passp) 3892 fr_info_t *fin; 3893 u_32_t *passp; 3894 { 3895 u_int nflags, natadd; 3896 int rval, natfailed; 3897 struct ifnet *ifp; 3898 struct in_addr in; 3899 icmphdr_t *icmp; 3900 tcphdr_t *tcp; 3901 u_short dport; 3902 ipnat_t *np; 3903 nat_t *nat; 3904 u_32_t iph; 3905 ipf_stack_t *ifs = fin->fin_ifs; 3906 3907 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3908 return 0; 3909 3910 tcp = NULL; 3911 icmp = NULL; 3912 dport = 0; 3913 natadd = 1; 3914 nflags = 0; 3915 natfailed = 0; 3916 ifp = fin->fin_ifp; 3917 3918 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3919 switch (fin->fin_p) 3920 { 3921 case IPPROTO_TCP : 3922 nflags = IPN_TCP; 3923 break; 3924 case IPPROTO_UDP : 3925 nflags = IPN_UDP; 3926 break; 3927 case IPPROTO_ICMP : 3928 icmp = fin->fin_dp; 3929 3930 /* 3931 * This is an incoming packet, so the destination is 3932 * the icmp_id and the source port equals 0 3933 */ 3934 if (nat_icmpquerytype4(icmp->icmp_type)) { 3935 nflags = IPN_ICMPQUERY; 3936 dport = icmp->icmp_id; 3937 } break; 3938 default : 3939 break; 3940 } 3941 3942 if ((nflags & IPN_TCPUDP)) { 3943 tcp = fin->fin_dp; 3944 dport = tcp->th_dport; 3945 } 3946 } 3947 3948 in = fin->fin_dst; 3949 3950 READ_ENTER(&ifs->ifs_ipf_nat); 3951 3952 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3953 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 3954 /*EMPTY*/; 3955 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3956 natadd = 0; 3957 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3958 fin->fin_src, in))) { 3959 nflags = nat->nat_flags; 3960 } else { 3961 u_32_t hv, msk, rmsk; 3962 3963 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3964 rmsk = ifs->ifs_rdr_masks; 3965 msk = 0xffffffff; 3966 WRITE_ENTER(&ifs->ifs_ipf_nat); 3967 /* 3968 * If there is no current entry in the nat table for this IP#, 3969 * create one for it (if there is a matching rule). 3970 */ 3971 maskloop: 3972 iph = in.s_addr & htonl(msk); 3973 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 3974 for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) { 3975 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 3976 continue; 3977 if (np->in_v != fin->fin_v) 3978 continue; 3979 if (np->in_p && (np->in_p != fin->fin_p)) 3980 continue; 3981 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3982 continue; 3983 if (np->in_flags & IPN_FILTER) { 3984 if (!nat_match(fin, np)) 3985 continue; 3986 } else { 3987 if ((in.s_addr & np->in_outmsk) != np->in_outip) 3988 continue; 3989 if (np->in_pmin && 3990 ((ntohs(np->in_pmax) < ntohs(dport)) || 3991 (ntohs(dport) < ntohs(np->in_pmin)))) 3992 continue; 3993 } 3994 3995 if (*np->in_plabel != '\0') { 3996 if (!appr_ok(fin, tcp, np)) { 3997 continue; 3998 } 3999 } 4000 4001 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4002 if (nat != NULL) { 4003 np->in_hits++; 4004 break; 4005 } else 4006 natfailed = -1; 4007 } 4008 4009 if ((np == NULL) && (rmsk != 0)) { 4010 while (rmsk) { 4011 msk <<= 1; 4012 if (rmsk & 0x80000000) 4013 break; 4014 rmsk <<= 1; 4015 } 4016 if (rmsk != 0) { 4017 rmsk <<= 1; 4018 goto maskloop; 4019 } 4020 } 4021 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4022 } 4023 if (nat != NULL) { 4024 rval = fr_natin(fin, nat, natadd, nflags); 4025 if (rval == 1) { 4026 MUTEX_ENTER(&nat->nat_lock); 4027 nat->nat_ref++; 4028 MUTEX_EXIT(&nat->nat_lock); 4029 nat->nat_touched = ifs->ifs_fr_ticks; 4030 fin->fin_nat = nat; 4031 fin->fin_state = nat->nat_state; 4032 } 4033 } else 4034 rval = natfailed; 4035 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4036 4037 if (rval == -1) { 4038 if (passp != NULL) 4039 *passp = FR_BLOCK; 4040 fin->fin_flx |= FI_BADNAT; 4041 } 4042 return rval; 4043 } 4044 4045 4046 /* ------------------------------------------------------------------------ */ 4047 /* Function: fr_natin */ 4048 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4049 /* 1 == packet was successfully translated. */ 4050 /* Parameters: fin(I) - pointer to packet information */ 4051 /* nat(I) - pointer to NAT structure */ 4052 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4053 /* nflags(I) - NAT flags set for this packet */ 4054 /* Locks Held: ipf_nat (READ) */ 4055 /* */ 4056 /* Translate a packet coming "in" on an interface. */ 4057 /* ------------------------------------------------------------------------ */ 4058 int fr_natin(fin, nat, natadd, nflags) 4059 fr_info_t *fin; 4060 nat_t *nat; 4061 int natadd; 4062 u_32_t nflags; 4063 { 4064 icmphdr_t *icmp; 4065 u_short *csump; 4066 tcphdr_t *tcp; 4067 ipnat_t *np; 4068 int i; 4069 ipf_stack_t *ifs = fin->fin_ifs; 4070 4071 #if SOLARIS && defined(_KERNEL) 4072 net_data_t net_data_p; 4073 if (fin->fin_v == 4) 4074 net_data_p = ifs->ifs_ipf_ipv4; 4075 else 4076 net_data_p = ifs->ifs_ipf_ipv6; 4077 #endif 4078 4079 tcp = NULL; 4080 csump = NULL; 4081 np = nat->nat_ptr; 4082 fin->fin_fr = nat->nat_fr; 4083 4084 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4085 (void) fr_nat_newfrag(fin, 0, nat); 4086 4087 if (np != NULL) { 4088 4089 /* ------------------------------------------------------------- */ 4090 /* A few quick notes: */ 4091 /* Following are test conditions prior to calling the */ 4092 /* appr_check routine. */ 4093 /* */ 4094 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4095 /* with a map rule, we attempt to match the packet's */ 4096 /* source port against in_dport, otherwise we'd compare the */ 4097 /* packet's destination. */ 4098 /* ------------------------------------------------------------- */ 4099 if (np->in_apr != NULL) { 4100 i = appr_check(fin, nat); 4101 if (i == -1) { 4102 return -1; 4103 } 4104 } 4105 } 4106 4107 #ifdef IPFILTER_SYNC 4108 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4109 #endif 4110 4111 MUTEX_ENTER(&nat->nat_lock); 4112 nat->nat_bytes[0] += fin->fin_plen; 4113 nat->nat_pkts[0]++; 4114 MUTEX_EXIT(&nat->nat_lock); 4115 4116 fin->fin_ip->ip_dst = nat->nat_inip; 4117 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4118 if (nflags & IPN_TCPUDP) 4119 tcp = fin->fin_dp; 4120 4121 /* 4122 * Fix up checksums, not by recalculating them, but 4123 * simply computing adjustments. 4124 * Why only do this for some platforms on inbound packets ? 4125 * Because for those that it is done, IP processing is yet to happen 4126 * and so the IPv4 header checksum has not yet been evaluated. 4127 * Perhaps it should always be done for the benefit of things like 4128 * fast forwarding (so that it doesn't need to be recomputed) but with 4129 * header checksum offloading, perhaps it is a moot point. 4130 */ 4131 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4132 defined(__osf__) || defined(linux) 4133 if (nat->nat_dir == NAT_OUTBOUND) 4134 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4135 else 4136 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4137 #endif 4138 4139 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4140 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4141 tcp->th_dport = nat->nat_inport; 4142 fin->fin_data[1] = ntohs(nat->nat_inport); 4143 } 4144 4145 4146 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4147 icmp = fin->fin_dp; 4148 4149 icmp->icmp_id = nat->nat_inport; 4150 } 4151 4152 csump = nat_proto(fin, nat, nflags); 4153 } 4154 4155 nat_update(fin, nat, np); 4156 4157 /* 4158 * In case they are being forwarded, inbound packets always need to have 4159 * their checksum adjusted even if hardware checksum validation said OK. 4160 */ 4161 if (csump != NULL) { 4162 if (nat->nat_dir == NAT_OUTBOUND) 4163 fix_incksum(csump, nat->nat_sumd[0]); 4164 else 4165 fix_outcksum(csump, nat->nat_sumd[0]); 4166 } 4167 4168 #if SOLARIS && defined(_KERNEL) 4169 if (nflags & IPN_TCPUDP && 4170 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4171 /* 4172 * Need to adjust the partial checksum result stored in 4173 * db_cksum16, which will be used for validation in IP. 4174 * See IP_CKSUM_RECV(). 4175 * Adjustment data should be the inverse of the IP address 4176 * changes, because db_cksum16 is supposed to be the complement 4177 * of the pesudo header. 4178 */ 4179 csump = &fin->fin_m->b_datap->db_cksum16; 4180 if (nat->nat_dir == NAT_OUTBOUND) 4181 fix_outcksum(csump, nat->nat_sumd[1]); 4182 else 4183 fix_incksum(csump, nat->nat_sumd[1]); 4184 } 4185 #endif 4186 4187 ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]); 4188 fin->fin_flx |= FI_NATED; 4189 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4190 fin->fin_nattag = &np->in_tag; 4191 return 1; 4192 } 4193 4194 4195 /* ------------------------------------------------------------------------ */ 4196 /* Function: nat_proto */ 4197 /* Returns: u_short* - pointer to transport header checksum to update, */ 4198 /* NULL if the transport protocol is not recognised */ 4199 /* as needing a checksum update. */ 4200 /* Parameters: fin(I) - pointer to packet information */ 4201 /* nat(I) - pointer to NAT structure */ 4202 /* nflags(I) - NAT flags set for this packet */ 4203 /* */ 4204 /* Return the pointer to the checksum field for each protocol so understood.*/ 4205 /* If support for making other changes to a protocol header is required, */ 4206 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4207 /* TCP down to a specific value, then do it from here. */ 4208 /* ------------------------------------------------------------------------ */ 4209 u_short *nat_proto(fin, nat, nflags) 4210 fr_info_t *fin; 4211 nat_t *nat; 4212 u_int nflags; 4213 { 4214 icmphdr_t *icmp; 4215 u_short *csump; 4216 tcphdr_t *tcp; 4217 udphdr_t *udp; 4218 4219 csump = NULL; 4220 if (fin->fin_out == 0) { 4221 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4222 } else { 4223 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4224 } 4225 4226 switch (fin->fin_p) 4227 { 4228 case IPPROTO_TCP : 4229 tcp = fin->fin_dp; 4230 4231 csump = &tcp->th_sum; 4232 4233 /* 4234 * Do a MSS CLAMPING on a SYN packet, 4235 * only deal IPv4 for now. 4236 */ 4237 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4238 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4239 4240 break; 4241 4242 case IPPROTO_UDP : 4243 udp = fin->fin_dp; 4244 4245 if (udp->uh_sum) 4246 csump = &udp->uh_sum; 4247 break; 4248 4249 case IPPROTO_ICMP : 4250 icmp = fin->fin_dp; 4251 4252 if ((nflags & IPN_ICMPQUERY) != 0) { 4253 if (icmp->icmp_cksum != 0) 4254 csump = &icmp->icmp_cksum; 4255 } 4256 break; 4257 } 4258 return csump; 4259 } 4260 4261 4262 /* ------------------------------------------------------------------------ */ 4263 /* Function: fr_natunload */ 4264 /* Returns: Nil */ 4265 /* Parameters: Nil */ 4266 /* */ 4267 /* Free all memory used by NAT structures allocated at runtime. */ 4268 /* ------------------------------------------------------------------------ */ 4269 void fr_natunload(ifs) 4270 ipf_stack_t *ifs; 4271 { 4272 ipftq_t *ifq, *ifqnext; 4273 4274 (void) nat_clearlist(ifs); 4275 (void) nat_flushtable(ifs); 4276 4277 /* 4278 * Proxy timeout queues are not cleaned here because although they 4279 * exist on the NAT list, appr_unload is called after fr_natunload 4280 * and the proxies actually are responsible for them being created. 4281 * Should the proxy timeouts have their own list? There's no real 4282 * justification as this is the only complication. 4283 */ 4284 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4285 ifqnext = ifq->ifq_next; 4286 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4287 (fr_deletetimeoutqueue(ifq) == 0)) 4288 fr_freetimeoutqueue(ifq, ifs); 4289 } 4290 4291 if (ifs->ifs_nat_table[0] != NULL) { 4292 KFREES(ifs->ifs_nat_table[0], 4293 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4294 ifs->ifs_nat_table[0] = NULL; 4295 } 4296 if (ifs->ifs_nat_table[1] != NULL) { 4297 KFREES(ifs->ifs_nat_table[1], 4298 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4299 ifs->ifs_nat_table[1] = NULL; 4300 } 4301 if (ifs->ifs_nat_rules != NULL) { 4302 KFREES(ifs->ifs_nat_rules, 4303 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4304 ifs->ifs_nat_rules = NULL; 4305 } 4306 if (ifs->ifs_rdr_rules != NULL) { 4307 KFREES(ifs->ifs_rdr_rules, 4308 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4309 ifs->ifs_rdr_rules = NULL; 4310 } 4311 if (ifs->ifs_maptable != NULL) { 4312 KFREES(ifs->ifs_maptable, 4313 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4314 ifs->ifs_maptable = NULL; 4315 } 4316 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4317 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4318 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4319 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4320 } 4321 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4322 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4323 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4324 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4325 } 4326 4327 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4328 ifs->ifs_fr_nat_maxbucket = 0; 4329 4330 if (ifs->ifs_fr_nat_init == 1) { 4331 ifs->ifs_fr_nat_init = 0; 4332 fr_sttab_destroy(ifs->ifs_nat_tqb); 4333 4334 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4335 RW_DESTROY(&ifs->ifs_ipf_nat); 4336 4337 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4338 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4339 4340 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4341 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4342 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4343 } 4344 } 4345 4346 4347 /* ------------------------------------------------------------------------ */ 4348 /* Function: fr_natexpire */ 4349 /* Returns: Nil */ 4350 /* Parameters: Nil */ 4351 /* */ 4352 /* Check all of the timeout queues for entries at the top which need to be */ 4353 /* expired. */ 4354 /* ------------------------------------------------------------------------ */ 4355 void fr_natexpire(ifs) 4356 ipf_stack_t *ifs; 4357 { 4358 ipftq_t *ifq, *ifqnext; 4359 ipftqent_t *tqe, *tqn; 4360 int i; 4361 SPL_INT(s); 4362 4363 SPL_NET(s); 4364 WRITE_ENTER(&ifs->ifs_ipf_nat); 4365 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4366 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4367 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4368 break; 4369 tqn = tqe->tqe_next; 4370 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4371 } 4372 } 4373 4374 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4375 ifqnext = ifq->ifq_next; 4376 4377 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4378 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4379 break; 4380 tqn = tqe->tqe_next; 4381 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4382 } 4383 } 4384 4385 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4386 ifqnext = ifq->ifq_next; 4387 4388 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4389 (ifq->ifq_ref == 0)) { 4390 fr_freetimeoutqueue(ifq, ifs); 4391 } 4392 } 4393 4394 if (ifs->ifs_nat_doflush != 0) { 4395 (void) nat_extraflush(2, ifs); 4396 ifs->ifs_nat_doflush = 0; 4397 } 4398 4399 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4400 SPL_X(s); 4401 } 4402 4403 4404 /* ------------------------------------------------------------------------ */ 4405 /* Function: fr_nataddrsync */ 4406 /* Returns: Nil */ 4407 /* Parameters: ifp(I) - pointer to network interface */ 4408 /* addr(I) - pointer to new network address */ 4409 /* */ 4410 /* Walk through all of the currently active NAT sessions, looking for those */ 4411 /* which need to have their translated address updated (where the interface */ 4412 /* matches the one passed in) and change it, recalculating the checksum sum */ 4413 /* difference too. */ 4414 /* ------------------------------------------------------------------------ */ 4415 void fr_nataddrsync(ifp, addr, ifs) 4416 void *ifp; 4417 struct in_addr *addr; 4418 ipf_stack_t *ifs; 4419 { 4420 u_32_t sum1, sum2, sumd; 4421 nat_t *nat; 4422 ipnat_t *np; 4423 SPL_INT(s); 4424 4425 if (ifs->ifs_fr_running <= 0) 4426 return; 4427 4428 SPL_NET(s); 4429 WRITE_ENTER(&ifs->ifs_ipf_nat); 4430 4431 if (ifs->ifs_fr_running <= 0) { 4432 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4433 return; 4434 } 4435 4436 /* 4437 * Change IP addresses for NAT sessions for any protocol except TCP 4438 * since it will break the TCP connection anyway. The only rules 4439 * which will get changed are those which are "map ... -> 0/32", 4440 * where the rule specifies the address is taken from the interface. 4441 */ 4442 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4443 if (addr != NULL) { 4444 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4445 ((nat->nat_flags & IPN_TCP) != 0)) 4446 continue; 4447 if (((np = nat->nat_ptr) == NULL) || 4448 (np->in_nip || (np->in_outmsk != 0xffffffff))) 4449 continue; 4450 4451 /* 4452 * Change the map-to address to be the same as the 4453 * new one. 4454 */ 4455 sum1 = nat->nat_outip.s_addr; 4456 nat->nat_outip = *addr; 4457 sum2 = nat->nat_outip.s_addr; 4458 4459 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4460 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) && 4461 (np->in_outmsk == 0xffffffff) && !np->in_nip) { 4462 struct in_addr in; 4463 4464 /* 4465 * Change the map-to address to be the same as the 4466 * new one. 4467 */ 4468 sum1 = nat->nat_outip.s_addr; 4469 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4470 &in, NULL, ifs) != -1) 4471 nat->nat_outip = in; 4472 sum2 = nat->nat_outip.s_addr; 4473 } else { 4474 continue; 4475 } 4476 4477 if (sum1 == sum2) 4478 continue; 4479 /* 4480 * Readjust the checksum adjustment to take into 4481 * account the new IP#. 4482 */ 4483 CALC_SUMD(sum1, sum2, sumd); 4484 /* XXX - dont change for TCP when solaris does 4485 * hardware checksumming. 4486 */ 4487 sumd += nat->nat_sumd[0]; 4488 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4489 nat->nat_sumd[1] = nat->nat_sumd[0]; 4490 } 4491 4492 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4493 SPL_X(s); 4494 } 4495 4496 4497 /* ------------------------------------------------------------------------ */ 4498 /* Function: fr_natifpsync */ 4499 /* Returns: Nil */ 4500 /* Parameters: action(I) - how we are syncing */ 4501 /* ifp(I) - pointer to network interface */ 4502 /* name(I) - name of interface to sync to */ 4503 /* */ 4504 /* This function is used to resync the mapping of interface names and their */ 4505 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4506 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4507 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4508 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4509 /* there is no longer any interface associated with it. */ 4510 /* ------------------------------------------------------------------------ */ 4511 void fr_natifpsync(action, ifp, name, ifs) 4512 int action; 4513 void *ifp; 4514 char *name; 4515 ipf_stack_t *ifs; 4516 { 4517 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4518 int s; 4519 #endif 4520 nat_t *nat; 4521 ipnat_t *n; 4522 4523 if (ifs->ifs_fr_running <= 0) 4524 return; 4525 4526 SPL_NET(s); 4527 WRITE_ENTER(&ifs->ifs_ipf_nat); 4528 4529 if (ifs->ifs_fr_running <= 0) { 4530 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4531 return; 4532 } 4533 4534 switch (action) 4535 { 4536 case IPFSYNC_RESYNC : 4537 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4538 if ((ifp == nat->nat_ifps[0]) || 4539 (nat->nat_ifps[0] == (void *)-1)) { 4540 nat->nat_ifps[0] = 4541 fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 4542 } 4543 4544 if ((ifp == nat->nat_ifps[1]) || 4545 (nat->nat_ifps[1] == (void *)-1)) { 4546 nat->nat_ifps[1] = 4547 fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 4548 } 4549 } 4550 4551 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4552 if (n->in_ifps[0] == ifp || 4553 n->in_ifps[0] == (void *)-1) { 4554 n->in_ifps[0] = 4555 fr_resolvenic(n->in_ifnames[0], 4, ifs); 4556 } 4557 if (n->in_ifps[1] == ifp || 4558 n->in_ifps[1] == (void *)-1) { 4559 n->in_ifps[1] = 4560 fr_resolvenic(n->in_ifnames[1], 4, ifs); 4561 } 4562 } 4563 break; 4564 case IPFSYNC_NEWIFP : 4565 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4566 if (!strncmp(name, nat->nat_ifnames[0], 4567 sizeof(nat->nat_ifnames[0]))) 4568 nat->nat_ifps[0] = ifp; 4569 if (!strncmp(name, nat->nat_ifnames[1], 4570 sizeof(nat->nat_ifnames[1]))) 4571 nat->nat_ifps[1] = ifp; 4572 } 4573 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4574 if (!strncmp(name, n->in_ifnames[0], 4575 sizeof(n->in_ifnames[0]))) 4576 n->in_ifps[0] = ifp; 4577 if (!strncmp(name, n->in_ifnames[1], 4578 sizeof(n->in_ifnames[1]))) 4579 n->in_ifps[1] = ifp; 4580 } 4581 break; 4582 case IPFSYNC_OLDIFP : 4583 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4584 if (ifp == nat->nat_ifps[0]) 4585 nat->nat_ifps[0] = (void *)-1; 4586 if (ifp == nat->nat_ifps[1]) 4587 nat->nat_ifps[1] = (void *)-1; 4588 } 4589 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4590 if (n->in_ifps[0] == ifp) 4591 n->in_ifps[0] = (void *)-1; 4592 if (n->in_ifps[1] == ifp) 4593 n->in_ifps[1] = (void *)-1; 4594 } 4595 break; 4596 } 4597 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4598 SPL_X(s); 4599 } 4600 4601 4602 /* ------------------------------------------------------------------------ */ 4603 /* Function: nat_icmpquerytype4 */ 4604 /* Returns: int - 1 == success, 0 == failure */ 4605 /* Parameters: icmptype(I) - ICMP type number */ 4606 /* */ 4607 /* Tests to see if the ICMP type number passed is a query/response type or */ 4608 /* not. */ 4609 /* ------------------------------------------------------------------------ */ 4610 static INLINE int nat_icmpquerytype4(icmptype) 4611 int icmptype; 4612 { 4613 4614 /* 4615 * For the ICMP query NAT code, it is essential that both the query 4616 * and the reply match on the NAT rule. Because the NAT structure 4617 * does not keep track of the icmptype, and a single NAT structure 4618 * is used for all icmp types with the same src, dest and id, we 4619 * simply define the replies as queries as well. The funny thing is, 4620 * altough it seems silly to call a reply a query, this is exactly 4621 * as it is defined in the IPv4 specification 4622 */ 4623 4624 switch (icmptype) 4625 { 4626 4627 case ICMP_ECHOREPLY: 4628 case ICMP_ECHO: 4629 /* route aedvertisement/solliciation is currently unsupported: */ 4630 /* it would require rewriting the ICMP data section */ 4631 case ICMP_TSTAMP: 4632 case ICMP_TSTAMPREPLY: 4633 case ICMP_IREQ: 4634 case ICMP_IREQREPLY: 4635 case ICMP_MASKREQ: 4636 case ICMP_MASKREPLY: 4637 return 1; 4638 default: 4639 return 0; 4640 } 4641 } 4642 4643 4644 /* ------------------------------------------------------------------------ */ 4645 /* Function: nat_log */ 4646 /* Returns: Nil */ 4647 /* Parameters: nat(I) - pointer to NAT structure */ 4648 /* type(I) - type of log entry to create */ 4649 /* */ 4650 /* Creates a NAT log entry. */ 4651 /* ------------------------------------------------------------------------ */ 4652 void nat_log(nat, type, ifs) 4653 struct nat *nat; 4654 u_int type; 4655 ipf_stack_t *ifs; 4656 { 4657 #ifdef IPFILTER_LOG 4658 # ifndef LARGE_NAT 4659 struct ipnat *np; 4660 int rulen; 4661 # endif 4662 struct natlog natl; 4663 void *items[1]; 4664 size_t sizes[1]; 4665 int types[1]; 4666 4667 natl.nl_inip = nat->nat_inip; 4668 natl.nl_outip = nat->nat_outip; 4669 natl.nl_origip = nat->nat_oip; 4670 natl.nl_bytes[0] = nat->nat_bytes[0]; 4671 natl.nl_bytes[1] = nat->nat_bytes[1]; 4672 natl.nl_pkts[0] = nat->nat_pkts[0]; 4673 natl.nl_pkts[1] = nat->nat_pkts[1]; 4674 natl.nl_origport = nat->nat_oport; 4675 natl.nl_inport = nat->nat_inport; 4676 natl.nl_outport = nat->nat_outport; 4677 natl.nl_p = nat->nat_p; 4678 natl.nl_type = type; 4679 natl.nl_rule = -1; 4680 # ifndef LARGE_NAT 4681 if (nat->nat_ptr != NULL) { 4682 for (rulen = 0, np = ifs->ifs_nat_list; np; 4683 np = np->in_next, rulen++) 4684 if (np == nat->nat_ptr) { 4685 natl.nl_rule = rulen; 4686 break; 4687 } 4688 } 4689 # endif 4690 items[0] = &natl; 4691 sizes[0] = sizeof(natl); 4692 types[0] = 0; 4693 4694 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 4695 #endif 4696 } 4697 4698 4699 #if defined(__OpenBSD__) 4700 /* ------------------------------------------------------------------------ */ 4701 /* Function: nat_ifdetach */ 4702 /* Returns: Nil */ 4703 /* Parameters: ifp(I) - pointer to network interface */ 4704 /* */ 4705 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 4706 /* interface references within IPFilter. */ 4707 /* ------------------------------------------------------------------------ */ 4708 void nat_ifdetach(ifp, ifs) 4709 void *ifp; 4710 ipf_stack_t *ifs; 4711 { 4712 frsync(ifp, ifs); 4713 return; 4714 } 4715 #endif 4716 4717 4718 /* ------------------------------------------------------------------------ */ 4719 /* Function: fr_ipnatderef */ 4720 /* Returns: Nil */ 4721 /* Parameters: isp(I) - pointer to pointer to NAT rule */ 4722 /* Write Locks: ipf_nat */ 4723 /* */ 4724 /* ------------------------------------------------------------------------ */ 4725 void fr_ipnatderef(inp, ifs) 4726 ipnat_t **inp; 4727 ipf_stack_t *ifs; 4728 { 4729 ipnat_t *in; 4730 4731 in = *inp; 4732 *inp = NULL; 4733 in->in_space++; 4734 in->in_use--; 4735 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 4736 if (in->in_apr) 4737 appr_free(in->in_apr); 4738 KFREE(in); 4739 ifs->ifs_nat_stats.ns_rules--; 4740 #ifdef notdef 4741 #if SOLARIS 4742 if (ifs->ifs_nat_stats.ns_rules == 0) 4743 ifs->ifs_pfil_delayed_copy = 1; 4744 #endif 4745 #endif 4746 } 4747 } 4748 4749 4750 /* ------------------------------------------------------------------------ */ 4751 /* Function: fr_natderef */ 4752 /* Returns: Nil */ 4753 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 4754 /* */ 4755 /* Decrement the reference counter for this NAT table entry and free it if */ 4756 /* there are no more things using it. */ 4757 /* */ 4758 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 4759 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 4760 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 4761 /* because nat_delete() will do that and send nat_ref to -1. */ 4762 /* */ 4763 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 4764 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 4765 /* ------------------------------------------------------------------------ */ 4766 void fr_natderef(natp, ifs) 4767 nat_t **natp; 4768 ipf_stack_t *ifs; 4769 { 4770 nat_t *nat; 4771 4772 nat = *natp; 4773 *natp = NULL; 4774 4775 MUTEX_ENTER(&nat->nat_lock); 4776 if (nat->nat_ref > 1) { 4777 nat->nat_ref--; 4778 MUTEX_EXIT(&nat->nat_lock); 4779 return; 4780 } 4781 MUTEX_EXIT(&nat->nat_lock); 4782 4783 WRITE_ENTER(&ifs->ifs_ipf_nat); 4784 nat_delete(nat, NL_EXPIRE, ifs); 4785 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4786 } 4787 4788 4789 /* ------------------------------------------------------------------------ */ 4790 /* Function: fr_natclone */ 4791 /* Returns: ipstate_t* - NULL == cloning failed, */ 4792 /* else pointer to new state structure */ 4793 /* Parameters: fin(I) - pointer to packet information */ 4794 /* is(I) - pointer to master state structure */ 4795 /* Write Lock: ipf_nat */ 4796 /* */ 4797 /* Create a "duplcate" state table entry from the master. */ 4798 /* ------------------------------------------------------------------------ */ 4799 static nat_t *fr_natclone(fin, nat) 4800 fr_info_t *fin; 4801 nat_t *nat; 4802 { 4803 frentry_t *fr; 4804 nat_t *clone; 4805 ipnat_t *np; 4806 ipf_stack_t *ifs = fin->fin_ifs; 4807 4808 KMALLOC(clone, nat_t *); 4809 if (clone == NULL) 4810 return NULL; 4811 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 4812 4813 MUTEX_NUKE(&clone->nat_lock); 4814 4815 clone->nat_aps = NULL; 4816 /* 4817 * Initialize all these so that nat_delete() doesn't cause a crash. 4818 */ 4819 clone->nat_tqe.tqe_pnext = NULL; 4820 clone->nat_tqe.tqe_next = NULL; 4821 clone->nat_tqe.tqe_ifq = NULL; 4822 clone->nat_tqe.tqe_parent = clone; 4823 4824 clone->nat_flags &= ~SI_CLONE; 4825 clone->nat_flags |= SI_CLONED; 4826 4827 if (clone->nat_hm) 4828 clone->nat_hm->hm_ref++; 4829 4830 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 4831 KFREE(clone); 4832 return NULL; 4833 } 4834 np = clone->nat_ptr; 4835 if (np != NULL) { 4836 if (ifs->ifs_nat_logging) 4837 nat_log(clone, (u_int)np->in_redir, ifs); 4838 np->in_use++; 4839 } 4840 fr = clone->nat_fr; 4841 if (fr != NULL) { 4842 MUTEX_ENTER(&fr->fr_lock); 4843 fr->fr_ref++; 4844 MUTEX_EXIT(&fr->fr_lock); 4845 } 4846 4847 /* 4848 * Because the clone is created outside the normal loop of things and 4849 * TCP has special needs in terms of state, initialise the timeout 4850 * state of the new NAT from here. 4851 */ 4852 if (clone->nat_p == IPPROTO_TCP) { 4853 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 4854 clone->nat_flags); 4855 } 4856 #ifdef IPFILTER_SYNC 4857 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 4858 #endif 4859 if (ifs->ifs_nat_logging) 4860 nat_log(clone, NL_CLONE, ifs); 4861 return clone; 4862 } 4863 4864 4865 /* ------------------------------------------------------------------------ */ 4866 /* Function: nat_wildok */ 4867 /* Returns: int - 1 == packet's ports match wildcards */ 4868 /* 0 == packet's ports don't match wildcards */ 4869 /* Parameters: nat(I) - NAT entry */ 4870 /* sport(I) - source port */ 4871 /* dport(I) - destination port */ 4872 /* flags(I) - wildcard flags */ 4873 /* dir(I) - packet direction */ 4874 /* */ 4875 /* Use NAT entry and packet direction to determine which combination of */ 4876 /* wildcard flags should be used. */ 4877 /* ------------------------------------------------------------------------ */ 4878 static INLINE int nat_wildok(nat, sport, dport, flags, dir) 4879 nat_t *nat; 4880 int sport; 4881 int dport; 4882 int flags; 4883 int dir; 4884 { 4885 /* 4886 * When called by dir is set to 4887 * nat_inlookup NAT_INBOUND (0) 4888 * nat_outlookup NAT_OUTBOUND (1) 4889 * 4890 * We simply combine the packet's direction in dir with the original 4891 * "intended" direction of that NAT entry in nat->nat_dir to decide 4892 * which combination of wildcard flags to allow. 4893 */ 4894 4895 switch ((dir << 1) | nat->nat_dir) 4896 { 4897 case 3: /* outbound packet / outbound entry */ 4898 if (((nat->nat_inport == sport) || 4899 (flags & SI_W_SPORT)) && 4900 ((nat->nat_oport == dport) || 4901 (flags & SI_W_DPORT))) 4902 return 1; 4903 break; 4904 case 2: /* outbound packet / inbound entry */ 4905 if (((nat->nat_outport == sport) || 4906 (flags & SI_W_DPORT)) && 4907 ((nat->nat_oport == dport) || 4908 (flags & SI_W_SPORT))) 4909 return 1; 4910 break; 4911 case 1: /* inbound packet / outbound entry */ 4912 if (((nat->nat_oport == sport) || 4913 (flags & SI_W_DPORT)) && 4914 ((nat->nat_outport == dport) || 4915 (flags & SI_W_SPORT))) 4916 return 1; 4917 break; 4918 case 0: /* inbound packet / inbound entry */ 4919 if (((nat->nat_oport == sport) || 4920 (flags & SI_W_SPORT)) && 4921 ((nat->nat_outport == dport) || 4922 (flags & SI_W_DPORT))) 4923 return 1; 4924 break; 4925 default: 4926 break; 4927 } 4928 4929 return(0); 4930 } 4931 4932 4933 /* ------------------------------------------------------------------------ */ 4934 /* Function: nat_mssclamp */ 4935 /* Returns: Nil */ 4936 /* Parameters: tcp(I) - pointer to TCP header */ 4937 /* maxmss(I) - value to clamp the TCP MSS to */ 4938 /* csump(I) - pointer to TCP checksum */ 4939 /* */ 4940 /* Check for MSS option and clamp it if necessary. If found and changed, */ 4941 /* then the TCP header checksum will be updated to reflect the change in */ 4942 /* the MSS. */ 4943 /* ------------------------------------------------------------------------ */ 4944 static void nat_mssclamp(tcp, maxmss, csump) 4945 tcphdr_t *tcp; 4946 u_32_t maxmss; 4947 u_short *csump; 4948 { 4949 u_char *cp, *ep, opt; 4950 int hlen, advance; 4951 u_32_t mss, sumd; 4952 4953 hlen = TCP_OFF(tcp) << 2; 4954 if (hlen > sizeof(*tcp)) { 4955 cp = (u_char *)tcp + sizeof(*tcp); 4956 ep = (u_char *)tcp + hlen; 4957 4958 while (cp < ep) { 4959 opt = cp[0]; 4960 if (opt == TCPOPT_EOL) 4961 break; 4962 else if (opt == TCPOPT_NOP) { 4963 cp++; 4964 continue; 4965 } 4966 4967 if (cp + 1 >= ep) 4968 break; 4969 advance = cp[1]; 4970 if ((cp + advance > ep) || (advance <= 0)) 4971 break; 4972 switch (opt) 4973 { 4974 case TCPOPT_MAXSEG: 4975 if (advance != 4) 4976 break; 4977 mss = cp[2] * 256 + cp[3]; 4978 if (mss > maxmss) { 4979 cp[2] = maxmss / 256; 4980 cp[3] = maxmss & 0xff; 4981 CALC_SUMD(mss, maxmss, sumd); 4982 fix_outcksum(csump, sumd); 4983 } 4984 break; 4985 default: 4986 /* ignore unknown options */ 4987 break; 4988 } 4989 4990 cp += advance; 4991 } 4992 } 4993 } 4994 4995 4996 /* ------------------------------------------------------------------------ */ 4997 /* Function: fr_setnatqueue */ 4998 /* Returns: Nil */ 4999 /* Parameters: nat(I)- pointer to NAT structure */ 5000 /* rev(I) - forward(0) or reverse(1) direction */ 5001 /* Locks: ipf_nat (read or write) */ 5002 /* */ 5003 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5004 /* determining which queue it should be placed on. */ 5005 /* ------------------------------------------------------------------------ */ 5006 void fr_setnatqueue(nat, rev, ifs) 5007 nat_t *nat; 5008 int rev; 5009 ipf_stack_t *ifs; 5010 { 5011 ipftq_t *oifq, *nifq; 5012 5013 if (nat->nat_ptr != NULL) 5014 nifq = nat->nat_ptr->in_tqehead[rev]; 5015 else 5016 nifq = NULL; 5017 5018 if (nifq == NULL) { 5019 switch (nat->nat_p) 5020 { 5021 case IPPROTO_UDP : 5022 nifq = &ifs->ifs_nat_udptq; 5023 break; 5024 case IPPROTO_ICMP : 5025 nifq = &ifs->ifs_nat_icmptq; 5026 break; 5027 case IPPROTO_TCP : 5028 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5029 break; 5030 default : 5031 nifq = &ifs->ifs_nat_iptq; 5032 break; 5033 } 5034 } 5035 5036 oifq = nat->nat_tqe.tqe_ifq; 5037 /* 5038 * If it's currently on a timeout queue, move it from one queue to 5039 * another, else put it on the end of the newly determined queue. 5040 */ 5041 if (oifq != NULL) 5042 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5043 else 5044 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5045 return; 5046 } 5047 5048 /* Function: nat_getnext */ 5049 /* Returns: int - 0 == ok, else error */ 5050 /* Parameters: t(I) - pointer to ipftoken structure */ 5051 /* itp(I) - pointer to ipfgeniter_t structure */ 5052 /* */ 5053 /* Fetch the next nat/ipnat structure pointer from the linked list and */ 5054 /* copy it out to the storage space pointed to by itp_data. The next item */ 5055 /* in the list to look at is put back in the ipftoken struture. */ 5056 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/ 5057 /* ipf_freetoken will call a deref function for us and we dont want to call */ 5058 /* that twice (second time would be in the second switch statement below. */ 5059 /* ------------------------------------------------------------------------ */ 5060 static int nat_getnext(t, itp, ifs) 5061 ipftoken_t *t; 5062 ipfgeniter_t *itp; 5063 ipf_stack_t *ifs; 5064 { 5065 hostmap_t *hm, *nexthm = NULL, zerohm; 5066 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5067 nat_t *nat, *nextnat = NULL, zeronat; 5068 int error = 0; 5069 5070 READ_ENTER(&ifs->ifs_ipf_nat); 5071 switch (itp->igi_type) 5072 { 5073 case IPFGENITER_HOSTMAP : 5074 hm = t->ipt_data; 5075 if (hm == NULL) { 5076 nexthm = ifs->ifs_ipf_hm_maplist; 5077 } else { 5078 nexthm = hm->hm_hnext; 5079 } 5080 if (nexthm != NULL) { 5081 if (nexthm->hm_hnext == NULL) { 5082 t->ipt_alive = 0; 5083 ipf_unlinktoken(t, ifs); 5084 KFREE(t); 5085 } else { 5086 /*MUTEX_ENTER(&nexthm->hm_lock);*/ 5087 nexthm->hm_ref++; 5088 /*MUTEX_EXIT(&nextipnat->hm_lock);*/ 5089 } 5090 5091 } else { 5092 bzero(&zerohm, sizeof(zerohm)); 5093 nexthm = &zerohm; 5094 ipf_freetoken(t, ifs); 5095 } 5096 break; 5097 5098 case IPFGENITER_IPNAT : 5099 ipn = t->ipt_data; 5100 if (ipn == NULL) { 5101 nextipnat = ifs->ifs_nat_list; 5102 } else { 5103 nextipnat = ipn->in_next; 5104 } 5105 if (nextipnat != NULL) { 5106 if (nextipnat->in_next == NULL) { 5107 t->ipt_alive = 0; 5108 ipf_unlinktoken(t, ifs); 5109 KFREE(t); 5110 } else { 5111 /* MUTEX_ENTER(&nextipnat->in_lock); */ 5112 nextipnat->in_use++; 5113 /* MUTEX_EXIT(&nextipnat->in_lock); */ 5114 } 5115 } else { 5116 bzero(&zeroipn, sizeof(zeroipn)); 5117 nextipnat = &zeroipn; 5118 ipf_freetoken(t, ifs); 5119 } 5120 break; 5121 5122 case IPFGENITER_NAT : 5123 nat = t->ipt_data; 5124 if (nat == NULL) { 5125 nextnat = ifs->ifs_nat_instances; 5126 } else { 5127 nextnat = nat->nat_next; 5128 } 5129 if (nextnat != NULL) { 5130 if (nextnat->nat_next == NULL) { 5131 t->ipt_alive = 0; 5132 ipf_unlinktoken(t, ifs); 5133 KFREE(t); 5134 } else { 5135 MUTEX_ENTER(&nextnat->nat_lock); 5136 nextnat->nat_ref++; 5137 MUTEX_EXIT(&nextnat->nat_lock); 5138 } 5139 } else { 5140 bzero(&zeronat, sizeof(zeronat)); 5141 nextnat = &zeronat; 5142 ipf_freetoken(t, ifs); 5143 } 5144 break; 5145 } 5146 5147 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5148 5149 switch (itp->igi_type) 5150 { 5151 case IPFGENITER_HOSTMAP : 5152 if (hm != NULL) { 5153 WRITE_ENTER(&ifs->ifs_ipf_nat); 5154 fr_hostmapderef(&hm); 5155 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5156 } 5157 if (nexthm->hm_hnext != NULL) 5158 t->ipt_data = nexthm; 5159 error = COPYOUT(nexthm, itp->igi_data, sizeof(*nexthm)); 5160 if (error != 0) 5161 error = EFAULT; 5162 break; 5163 5164 case IPFGENITER_IPNAT : 5165 if (ipn != NULL) 5166 fr_ipnatderef(&ipn, ifs); 5167 if (nextipnat->in_next != NULL) 5168 t->ipt_data = nextipnat; 5169 error = COPYOUT(nextipnat, itp->igi_data, sizeof(*nextipnat)); 5170 if (error != 0) 5171 error = EFAULT; 5172 break; 5173 5174 case IPFGENITER_NAT : 5175 if (nat != NULL) 5176 fr_natderef(&nat, ifs); 5177 if (nextnat->nat_next != NULL) 5178 t->ipt_data = nextnat; 5179 error = COPYOUT(nextnat, itp->igi_data, sizeof(*nextnat)); 5180 if (error != 0) 5181 error = EFAULT; 5182 break; 5183 } 5184 5185 return error; 5186 } 5187 5188 5189 /* ------------------------------------------------------------------------ */ 5190 /* Function: nat_iterator */ 5191 /* Returns: int - 0 == ok, else error */ 5192 /* Parameters: token(I) - pointer to ipftoken structure */ 5193 /* itp(I) - pointer to ipfgeniter_t structure */ 5194 /* */ 5195 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5196 /* generic structure to iterate through a list. There are three different */ 5197 /* linked lists of NAT related information to go through: NAT rules, active */ 5198 /* NAT mappings and the NAT fragment cache. */ 5199 /* ------------------------------------------------------------------------ */ 5200 static int nat_iterator(token, itp, ifs) 5201 ipftoken_t *token; 5202 ipfgeniter_t *itp; 5203 ipf_stack_t *ifs; 5204 { 5205 int error; 5206 5207 if (itp->igi_data == NULL) 5208 return EFAULT; 5209 5210 token->ipt_subtype = itp->igi_type; 5211 5212 switch (itp->igi_type) 5213 { 5214 case IPFGENITER_HOSTMAP : 5215 case IPFGENITER_IPNAT : 5216 case IPFGENITER_NAT : 5217 error = nat_getnext(token, itp, ifs); 5218 break; 5219 case IPFGENITER_NATFRAG : 5220 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5221 &ifs->ifs_ipfr_nattail, 5222 &ifs->ifs_ipf_natfrag, ifs); 5223 break; 5224 default : 5225 error = EINVAL; 5226 break; 5227 } 5228 5229 return error; 5230 } 5231 5232 5233 /* -------------------------------------------------------------------- */ 5234 /* Function: nat_earlydrop */ 5235 /* Returns: number of dropped/removed entries from the queue */ 5236 /* Parameters: ifq - pointer to queue with entries to be processed */ 5237 /* maxidle - entry must be idle this long to be dropped */ 5238 /* ifs - ipf stack instance */ 5239 /* */ 5240 /* Function is invoked from nat_extraflush() only. Removes entries */ 5241 /* form specified timeout queue, based on how long they've sat idle, */ 5242 /* without waiting for it to happen on its own. */ 5243 /* -------------------------------------------------------------------- */ 5244 static int nat_earlydrop(ifq, maxidle, ifs) 5245 ipftq_t *ifq; 5246 int maxidle; 5247 ipf_stack_t *ifs; 5248 { 5249 ipftqent_t *tqe, *tqn; 5250 nat_t *nat; 5251 unsigned int dropped; 5252 int droptick; 5253 5254 if (ifq == NULL) 5255 return (0); 5256 5257 dropped = 0; 5258 5259 /* 5260 * Determine the tick representing the idle time we're interested 5261 * in. If an entry exists in the queue, and it was touched before 5262 * that tick, then it's been idle longer than maxidle ... remove it. 5263 */ 5264 droptick = ifs->ifs_fr_ticks - maxidle; 5265 tqn = ifq->ifq_head; 5266 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5267 tqn = tqe->tqe_next; 5268 nat = tqe->tqe_parent; 5269 nat_delete(nat, ISL_EXPIRE, ifs); 5270 dropped++; 5271 } 5272 return (dropped); 5273 } 5274 5275 5276 /* --------------------------------------------------------------------- */ 5277 /* Function: nat_flushclosing */ 5278 /* Returns: int - number of NAT entries deleted */ 5279 /* Parameters: stateval(I) - State at which to start removing entries */ 5280 /* ifs - ipf stack instance */ 5281 /* */ 5282 /* Remove nat table entries for TCP connections which are in the process */ 5283 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5284 /* --------------------------------------------------------------------- */ 5285 static int nat_flushclosing(stateval, ifs) 5286 int stateval; 5287 ipf_stack_t *ifs; 5288 { 5289 ipftq_t *ifq, *ifqn; 5290 ipftqent_t *tqe, *tqn; 5291 nat_t *nat; 5292 int dropped; 5293 5294 dropped = 0; 5295 5296 /* 5297 * Start by deleting any entries in specific timeout queues. 5298 */ 5299 ifqn = &ifs->ifs_nat_tqb[stateval]; 5300 while ((ifq = ifqn) != NULL) { 5301 ifqn = ifq->ifq_next; 5302 dropped += nat_earlydrop(ifq, (int)0, ifs); 5303 } 5304 5305 /* 5306 * Next, look through user defined queues for closing entries. 5307 */ 5308 ifqn = ifs->ifs_nat_utqe; 5309 while ((ifq = ifqn) != NULL) { 5310 ifqn = ifq->ifq_next; 5311 tqn = ifq->ifq_head; 5312 while ((tqe = tqn) != NULL) { 5313 tqn = tqe->tqe_next; 5314 nat = tqe->tqe_parent; 5315 if (nat->nat_p != IPPROTO_TCP) 5316 continue; 5317 if ((nat->nat_tcpstate[0] >= stateval) && 5318 (nat->nat_tcpstate[1] >= stateval)) { 5319 nat_delete(nat, NL_EXPIRE, ifs); 5320 dropped++; 5321 } 5322 } 5323 } 5324 return (dropped); 5325 } 5326 5327 5328 /* --------------------------------------------------------------------- */ 5329 /* Function: nat_extraflush */ 5330 /* Returns: int - number of NAT entries deleted */ 5331 /* Parameters: which(I) - how to flush the active NAT table */ 5332 /* ifs - ipf stack instance */ 5333 /* Write Locks: ipf_nat */ 5334 /* */ 5335 /* Flush nat tables. Three actions currently defined: */ 5336 /* */ 5337 /* which == 0 : Flush all nat table entries. */ 5338 /* */ 5339 /* which == 1 : Flush entries with TCP connections which have started */ 5340 /* to close on both ends. */ 5341 /* */ 5342 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5343 /* does not take us below specified threshold in the table, */ 5344 /* we want to flush entries with TCP connections which have */ 5345 /* been idle for a long time. Start with connections idle */ 5346 /* over 12 hours, and then work backwards in half hour */ 5347 /* increments to at most 30 minutes idle, and finally work */ 5348 /* back in 30 second increments to at most 30 seconds. */ 5349 /* --------------------------------------------------------------------- */ 5350 static int nat_extraflush(which, ifs) 5351 int which; 5352 ipf_stack_t *ifs; 5353 { 5354 ipftq_t *ifq, *ifqn; 5355 nat_t *nat, **natp; 5356 int idletime, removed, idle_idx; 5357 SPL_INT(s); 5358 5359 removed = 0; 5360 5361 SPL_NET(s); 5362 switch (which) 5363 { 5364 case 0: 5365 natp = &ifs->ifs_nat_instances; 5366 while ((nat = *natp) != NULL) { 5367 natp = &nat->nat_next; 5368 nat_delete(nat, ISL_FLUSH, ifs); 5369 removed++; 5370 } 5371 break; 5372 5373 case 1: 5374 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5375 break; 5376 5377 case 2: 5378 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5379 5380 /* 5381 * Be sure we haven't done this in the last 10 seconds. 5382 */ 5383 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5384 IPF_TTLVAL(10)) 5385 break; 5386 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5387 5388 /* 5389 * Determine initial threshold for minimum idle time based on 5390 * how long ipfilter has been running. Ipfilter needs to have 5391 * been up as long as the smallest interval to continue on. 5392 * 5393 * Minimum idle times stored in idletime_tab and indexed by 5394 * idle_idx. Start at upper end of array and work backwards. 5395 * 5396 * Once the index is found, set the initial idle time to the 5397 * first interval before the current ipfilter run time. 5398 */ 5399 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5400 break; /* switch */ 5401 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5402 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5403 idletime = idletime_tab[idle_idx]; 5404 } else { 5405 while ((idle_idx > 0) && 5406 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5407 idle_idx--; 5408 idletime = (ifs->ifs_fr_ticks / 5409 idletime_tab[idle_idx]) * 5410 idletime_tab[idle_idx]; 5411 } 5412 5413 while ((idle_idx >= 0) && 5414 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5415 /* 5416 * Start with appropriate timeout queue. 5417 */ 5418 removed += nat_earlydrop( 5419 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5420 idletime, ifs); 5421 5422 /* 5423 * Make sure we haven't already deleted enough 5424 * entries before checking the user defined queues. 5425 */ 5426 if (NAT_TAB_WATER_LEVEL(ifs) <= 5427 ifs->ifs_nat_flush_lvl_lo) 5428 break; 5429 5430 /* 5431 * Next, look through the user defined queues. 5432 */ 5433 ifqn = ifs->ifs_nat_utqe; 5434 while ((ifq = ifqn) != NULL) { 5435 ifqn = ifq->ifq_next; 5436 removed += nat_earlydrop(ifq, idletime, ifs); 5437 } 5438 5439 /* 5440 * Adjust the granularity of idle time. 5441 * 5442 * If we reach an interval boundary, we need to 5443 * either adjust the idle time accordingly or exit 5444 * the loop altogether (if this is very last check). 5445 */ 5446 idletime -= idletime_tab[idle_idx]; 5447 if (idletime < idletime_tab[idle_idx]) { 5448 if (idle_idx != 0) { 5449 idletime = idletime_tab[idle_idx] - 5450 idletime_tab[idle_idx - 1]; 5451 idle_idx--; 5452 } else { 5453 break; /* while */ 5454 } 5455 } 5456 } 5457 break; 5458 default: 5459 break; 5460 } 5461 5462 SPL_X(s); 5463 return (removed); 5464 } 5465