1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI"$ 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/time.h> 22 #include <sys/file.h> 23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 24 defined(_KERNEL) 25 # include "opt_ipfilter_log.h" 26 #endif 27 #if !defined(_KERNEL) 28 # include <stdio.h> 29 # include <string.h> 30 # include <stdlib.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 #else 42 # include <sys/ioctl.h> 43 #endif 44 #if !defined(AIX) 45 # include <sys/fcntl.h> 46 #endif 47 #if !defined(linux) 48 # include <sys/protosw.h> 49 #endif 50 #include <sys/socket.h> 51 #if defined(_KERNEL) 52 # include <sys/systm.h> 53 # if !defined(__SVR4) && !defined(__svr4__) 54 # include <sys/mbuf.h> 55 # endif 56 #endif 57 #if defined(__SVR4) || defined(__svr4__) 58 # include <sys/filio.h> 59 # include <sys/byteorder.h> 60 # ifdef _KERNEL 61 # include <sys/dditypes.h> 62 # endif 63 # include <sys/stream.h> 64 # include <sys/kmem.h> 65 #endif 66 #if __FreeBSD_version >= 300000 67 # include <sys/queue.h> 68 #endif 69 #include <net/if.h> 70 #if __FreeBSD_version >= 300000 71 # include <net/if_var.h> 72 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 73 # include "opt_ipfilter.h" 74 # endif 75 #endif 76 #ifdef sun 77 # include <net/af.h> 78 #endif 79 #include <net/route.h> 80 #include <netinet/in.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 84 #ifdef RFC1825 85 # include <vpn/md5.h> 86 # include <vpn/ipsec.h> 87 extern struct ifnet vpnif; 88 #endif 89 90 #if !defined(linux) 91 # include <netinet/ip_var.h> 92 #endif 93 #include <netinet/tcp.h> 94 #include <netinet/udp.h> 95 #include <netinet/ip_icmp.h> 96 #include "netinet/ip_compat.h" 97 #include <netinet/tcpip.h> 98 #include "netinet/ip_fil.h" 99 #include "netinet/ip_nat.h" 100 #include "netinet/ip_frag.h" 101 #include "netinet/ip_state.h" 102 #include "netinet/ip_proxy.h" 103 #include "netinet/ipf_stack.h" 104 #ifdef IPFILTER_SYNC 105 #include "netinet/ip_sync.h" 106 #endif 107 #if (__FreeBSD_version >= 300000) 108 # include <sys/malloc.h> 109 #endif 110 /* END OF INCLUDES */ 111 112 #undef SOCKADDR_IN 113 #define SOCKADDR_IN struct sockaddr_in 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; 118 #endif 119 120 121 /* ======================================================================== */ 122 /* How the NAT is organised and works. */ 123 /* */ 124 /* Inside (interface y) NAT Outside (interface x) */ 125 /* -------------------- -+- ------------------------------------- */ 126 /* Packet going | out, processsed by fr_checknatout() for x */ 127 /* ------------> | ------------> */ 128 /* src=10.1.1.1 | src=192.1.1.1 */ 129 /* | */ 130 /* | in, processed by fr_checknatin() for x */ 131 /* <------------ | <------------ */ 132 /* dst=10.1.1.1 | dst=192.1.1.1 */ 133 /* -------------------- -+- ------------------------------------- */ 134 /* fr_checknatout() - changes ip_src and if required, sport */ 135 /* - creates a new mapping, if required. */ 136 /* fr_checknatin() - changes ip_dst and if required, dport */ 137 /* */ 138 /* In the NAT table, internal source is recorded as "in" and externally */ 139 /* seen as "out". */ 140 /* ======================================================================== */ 141 142 143 static int nat_flushtable __P((ipf_stack_t *)); 144 static int nat_clearlist __P((ipf_stack_t *)); 145 static void nat_addnat __P((struct ipnat *, ipf_stack_t *)); 146 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *)); 147 static void nat_delete __P((struct nat *, int, ipf_stack_t *)); 148 static void nat_delrdr __P((struct ipnat *)); 149 static void nat_delnat __P((struct ipnat *)); 150 static int fr_natgetent __P((caddr_t, ipf_stack_t *)); 151 static int fr_natgetsz __P((caddr_t, ipf_stack_t *)); 152 static int fr_natputent __P((caddr_t, int, ipf_stack_t *)); 153 static void nat_tabmove __P((nat_t *, ipf_stack_t *)); 154 static int nat_match __P((fr_info_t *, ipnat_t *)); 155 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); 156 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); 157 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, 158 struct in_addr, struct in_addr, u_32_t, 159 ipf_stack_t *)); 160 static INLINE int nat_icmpquerytype4 __P((int)); 161 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, 162 ipf_stack_t *)); 163 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, 164 ipf_stack_t *)); 165 static INLINE int nat_icmperrortype4 __P((int)); 166 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, 167 tcphdr_t *, nat_t **, int)); 168 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *)); 169 static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); 170 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *)); 171 static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); 172 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 173 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 174 static int nat_extraflush __P((int, ipf_stack_t *)); 175 static int nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *)); 176 static int nat_flushclosing __P((int, ipf_stack_t *)); 177 178 179 /* 180 * Below we declare a list of constants used only in the nat_extraflush() 181 * routine. We are placing it here, instead of in nat_extraflush() itself, 182 * because we want to make it visible to tools such as mdb, nm etc., so the 183 * values can easily be altered during debugging. 184 */ 185 static const int idletime_tab[] = { 186 IPF_TTLVAL(30), /* 30 seconds */ 187 IPF_TTLVAL(1800), /* 30 minutes */ 188 IPF_TTLVAL(43200), /* 12 hours */ 189 IPF_TTLVAL(345600), /* 4 days */ 190 }; 191 192 #define NAT_HAS_L4_CHANGED(n) \ 193 (((n)->nat_flags & (IPN_TCPUDPICMP | IPN_ICMPQUERY)) && \ 194 (n)->nat_inport != (n)->nat_outport) 195 196 197 /* ------------------------------------------------------------------------ */ 198 /* Function: fr_natinit */ 199 /* Returns: int - 0 == success, -1 == failure */ 200 /* Parameters: Nil */ 201 /* */ 202 /* Initialise all of the NAT locks, tables and other structures. */ 203 /* ------------------------------------------------------------------------ */ 204 int fr_natinit(ifs) 205 ipf_stack_t *ifs; 206 { 207 int i; 208 209 KMALLOCS(ifs->ifs_nat_table[0], nat_t **, 210 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 211 if (ifs->ifs_nat_table[0] != NULL) 212 bzero((char *)ifs->ifs_nat_table[0], 213 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 214 else 215 return -1; 216 217 KMALLOCS(ifs->ifs_nat_table[1], nat_t **, 218 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 219 if (ifs->ifs_nat_table[1] != NULL) 220 bzero((char *)ifs->ifs_nat_table[1], 221 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *)); 222 else 223 return -2; 224 225 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **, 226 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 227 if (ifs->ifs_nat_rules != NULL) 228 bzero((char *)ifs->ifs_nat_rules, 229 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *)); 230 else 231 return -3; 232 233 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **, 234 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 235 if (ifs->ifs_rdr_rules != NULL) 236 bzero((char *)ifs->ifs_rdr_rules, 237 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *)); 238 else 239 return -4; 240 241 KMALLOCS(ifs->ifs_maptable, hostmap_t **, 242 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 243 if (ifs->ifs_maptable != NULL) 244 bzero((char *)ifs->ifs_maptable, 245 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 246 else 247 return -5; 248 249 ifs->ifs_ipf_hm_maplist = NULL; 250 251 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *, 252 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 253 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL) 254 return -1; 255 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0], 256 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 257 258 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *, 259 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 260 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL) 261 return -1; 262 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1], 263 ifs->ifs_ipf_nattable_sz * sizeof(u_long)); 264 265 if (ifs->ifs_fr_nat_maxbucket == 0) { 266 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1) 267 ifs->ifs_fr_nat_maxbucket++; 268 ifs->ifs_fr_nat_maxbucket *= 2; 269 } 270 271 fr_sttab_init(ifs->ifs_nat_tqb, ifs); 272 /* 273 * Increase this because we may have "keep state" following this too 274 * and packet storms can occur if this is removed too quickly. 275 */ 276 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack; 277 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq; 278 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage; 279 ifs->ifs_nat_udptq.ifq_ref = 1; 280 ifs->ifs_nat_udptq.ifq_head = NULL; 281 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head; 282 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab"); 283 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq; 284 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage; 285 ifs->ifs_nat_icmptq.ifq_ref = 1; 286 ifs->ifs_nat_icmptq.ifq_head = NULL; 287 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head; 288 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab"); 289 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq; 290 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage; 291 ifs->ifs_nat_iptq.ifq_ref = 1; 292 ifs->ifs_nat_iptq.ifq_head = NULL; 293 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head; 294 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab"); 295 ifs->ifs_nat_iptq.ifq_next = NULL; 296 297 for (i = 0; i < IPF_TCP_NSTATES; i++) { 298 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage) 299 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage; 300 #ifdef LARGE_NAT 301 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage) 302 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage; 303 #endif 304 } 305 306 /* 307 * Increase this because we may have "keep state" following 308 * this too and packet storms can occur if this is removed 309 * too quickly. 310 */ 311 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = 312 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; 313 314 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock"); 315 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock"); 316 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex"); 317 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex"); 318 319 ifs->ifs_fr_nat_init = 1; 320 321 return 0; 322 } 323 324 325 /* ------------------------------------------------------------------------ */ 326 /* Function: nat_addrdr */ 327 /* Returns: Nil */ 328 /* Parameters: n(I) - pointer to NAT rule to add */ 329 /* */ 330 /* Adds a redirect rule to the hash table of redirect rules and the list of */ 331 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ 332 /* use by redirect rules. */ 333 /* ------------------------------------------------------------------------ */ 334 static void nat_addrdr(n, ifs) 335 ipnat_t *n; 336 ipf_stack_t *ifs; 337 { 338 ipnat_t **np; 339 u_32_t j; 340 u_int hv; 341 int k; 342 343 k = count4bits(n->in_outmsk); 344 if ((k >= 0) && (k != 32)) 345 ifs->ifs_rdr_masks |= 1 << k; 346 j = (n->in_outip & n->in_outmsk); 347 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz); 348 np = ifs->ifs_rdr_rules + hv; 349 while (*np != NULL) 350 np = &(*np)->in_rnext; 351 n->in_rnext = NULL; 352 n->in_prnext = np; 353 n->in_hv = hv; 354 *np = n; 355 } 356 357 358 /* ------------------------------------------------------------------------ */ 359 /* Function: nat_addnat */ 360 /* Returns: Nil */ 361 /* Parameters: n(I) - pointer to NAT rule to add */ 362 /* */ 363 /* Adds a NAT map rule to the hash table of rules and the list of loaded */ 364 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ 365 /* redirect rules. */ 366 /* ------------------------------------------------------------------------ */ 367 static void nat_addnat(n, ifs) 368 ipnat_t *n; 369 ipf_stack_t *ifs; 370 { 371 ipnat_t **np; 372 u_32_t j; 373 u_int hv; 374 int k; 375 376 k = count4bits(n->in_inmsk); 377 if ((k >= 0) && (k != 32)) 378 ifs->ifs_nat_masks |= 1 << k; 379 j = (n->in_inip & n->in_inmsk); 380 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz); 381 np = ifs->ifs_nat_rules + hv; 382 while (*np != NULL) 383 np = &(*np)->in_mnext; 384 n->in_mnext = NULL; 385 n->in_pmnext = np; 386 n->in_hv = hv; 387 *np = n; 388 } 389 390 391 /* ------------------------------------------------------------------------ */ 392 /* Function: nat_delrdr */ 393 /* Returns: Nil */ 394 /* Parameters: n(I) - pointer to NAT rule to delete */ 395 /* */ 396 /* Removes a redirect rule from the hash table of redirect rules. */ 397 /* ------------------------------------------------------------------------ */ 398 static void nat_delrdr(n) 399 ipnat_t *n; 400 { 401 if (n->in_rnext) 402 n->in_rnext->in_prnext = n->in_prnext; 403 *n->in_prnext = n->in_rnext; 404 } 405 406 407 /* ------------------------------------------------------------------------ */ 408 /* Function: nat_delnat */ 409 /* Returns: Nil */ 410 /* Parameters: n(I) - pointer to NAT rule to delete */ 411 /* */ 412 /* Removes a NAT map rule from the hash table of NAT map rules. */ 413 /* ------------------------------------------------------------------------ */ 414 static void nat_delnat(n) 415 ipnat_t *n; 416 { 417 if (n->in_mnext != NULL) 418 n->in_mnext->in_pmnext = n->in_pmnext; 419 *n->in_pmnext = n->in_mnext; 420 } 421 422 423 /* ------------------------------------------------------------------------ */ 424 /* Function: nat_hostmap */ 425 /* Returns: struct hostmap* - NULL if no hostmap could be created, */ 426 /* else a pointer to the hostmapping to use */ 427 /* Parameters: np(I) - pointer to NAT rule */ 428 /* real(I) - real IP address */ 429 /* map(I) - mapped IP address */ 430 /* port(I) - destination port number */ 431 /* Write Locks: ipf_nat */ 432 /* */ 433 /* Check if an ip address has already been allocated for a given mapping */ 434 /* that is not doing port based translation. If is not yet allocated, then */ 435 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ 436 /* ------------------------------------------------------------------------ */ 437 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs) 438 ipnat_t *np; 439 struct in_addr src; 440 struct in_addr dst; 441 struct in_addr map; 442 u_32_t port; 443 ipf_stack_t *ifs; 444 { 445 hostmap_t *hm; 446 u_int hv; 447 448 hv = (src.s_addr ^ dst.s_addr); 449 hv += src.s_addr; 450 hv += dst.s_addr; 451 hv %= HOSTMAP_SIZE; 452 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next) 453 if ((hm->hm_srcip.s_addr == src.s_addr) && 454 (hm->hm_dstip.s_addr == dst.s_addr) && 455 ((np == NULL) || (np == hm->hm_ipnat)) && 456 ((port == 0) || (port == hm->hm_port))) { 457 hm->hm_ref++; 458 return hm; 459 } 460 461 if (np == NULL) 462 return NULL; 463 464 KMALLOC(hm, hostmap_t *); 465 if (hm) { 466 hm->hm_hnext = ifs->ifs_ipf_hm_maplist; 467 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist; 468 if (ifs->ifs_ipf_hm_maplist != NULL) 469 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext; 470 ifs->ifs_ipf_hm_maplist = hm; 471 472 hm->hm_next = ifs->ifs_maptable[hv]; 473 hm->hm_pnext = ifs->ifs_maptable + hv; 474 if (ifs->ifs_maptable[hv] != NULL) 475 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next; 476 ifs->ifs_maptable[hv] = hm; 477 hm->hm_ipnat = np; 478 hm->hm_srcip = src; 479 hm->hm_dstip = dst; 480 hm->hm_mapip = map; 481 hm->hm_ref = 1; 482 hm->hm_port = port; 483 } 484 return hm; 485 } 486 487 488 /* ------------------------------------------------------------------------ */ 489 /* Function: fr_hostmapdel */ 490 /* Returns: Nil */ 491 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */ 492 /* Write Locks: ipf_nat */ 493 /* */ 494 /* Decrement the references to this hostmap structure by one. If this */ 495 /* reaches zero then remove it and free it. */ 496 /* ------------------------------------------------------------------------ */ 497 void fr_hostmapdel(hmp) 498 struct hostmap **hmp; 499 { 500 struct hostmap *hm; 501 502 hm = *hmp; 503 *hmp = NULL; 504 505 hm->hm_ref--; 506 if (hm->hm_ref == 0) { 507 if (hm->hm_next) 508 hm->hm_next->hm_pnext = hm->hm_pnext; 509 *hm->hm_pnext = hm->hm_next; 510 if (hm->hm_hnext) 511 hm->hm_hnext->hm_phnext = hm->hm_phnext; 512 *hm->hm_phnext = hm->hm_hnext; 513 KFREE(hm); 514 } 515 } 516 517 518 /* ------------------------------------------------------------------------ */ 519 /* Function: fix_outcksum */ 520 /* Returns: Nil */ 521 /* Parameters: sp(I) - location of 16bit checksum to update */ 522 /* n((I) - amount to adjust checksum by */ 523 /* */ 524 /* Adjusts the 16bit checksum by "n" for packets going out. */ 525 /* ------------------------------------------------------------------------ */ 526 void fix_outcksum(sp, n) 527 u_short *sp; 528 u_32_t n; 529 { 530 u_short sumshort; 531 u_32_t sum1; 532 533 if (n == 0) 534 return; 535 536 sum1 = (~ntohs(*sp)) & 0xffff; 537 sum1 += (n); 538 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 539 /* Again */ 540 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 541 sumshort = ~(u_short)sum1; 542 *(sp) = htons(sumshort); 543 } 544 545 546 /* ------------------------------------------------------------------------ */ 547 /* Function: fix_incksum */ 548 /* Returns: Nil */ 549 /* Parameters: sp(I) - location of 16bit checksum to update */ 550 /* n((I) - amount to adjust checksum by */ 551 /* */ 552 /* Adjusts the 16bit checksum by "n" for packets going in. */ 553 /* ------------------------------------------------------------------------ */ 554 void fix_incksum(sp, n) 555 u_short *sp; 556 u_32_t n; 557 { 558 u_short sumshort; 559 u_32_t sum1; 560 561 if (n == 0) 562 return; 563 564 sum1 = (~ntohs(*sp)) & 0xffff; 565 sum1 += ~(n) & 0xffff; 566 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 567 /* Again */ 568 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 569 sumshort = ~(u_short)sum1; 570 *(sp) = htons(sumshort); 571 } 572 573 574 /* ------------------------------------------------------------------------ */ 575 /* Function: fix_datacksum */ 576 /* Returns: Nil */ 577 /* Parameters: sp(I) - location of 16bit checksum to update */ 578 /* n((I) - amount to adjust checksum by */ 579 /* */ 580 /* Fix_datacksum is used *only* for the adjustments of checksums in the */ 581 /* data section of an IP packet. */ 582 /* */ 583 /* The only situation in which you need to do this is when NAT'ing an */ 584 /* ICMP error message. Such a message, contains in its body the IP header */ 585 /* of the original IP packet, that causes the error. */ 586 /* */ 587 /* You can't use fix_incksum or fix_outcksum in that case, because for the */ 588 /* kernel the data section of the ICMP error is just data, and no special */ 589 /* processing like hardware cksum or ntohs processing have been done by the */ 590 /* kernel on the data section. */ 591 /* ------------------------------------------------------------------------ */ 592 void fix_datacksum(sp, n) 593 u_short *sp; 594 u_32_t n; 595 { 596 u_short sumshort; 597 u_32_t sum1; 598 599 if (n == 0) 600 return; 601 602 sum1 = (~ntohs(*sp)) & 0xffff; 603 sum1 += (n); 604 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 605 /* Again */ 606 sum1 = (sum1 >> 16) + (sum1 & 0xffff); 607 sumshort = ~(u_short)sum1; 608 *(sp) = htons(sumshort); 609 } 610 611 612 /* ------------------------------------------------------------------------ */ 613 /* Function: fr_nat_ioctl */ 614 /* Returns: int - 0 == success, != 0 == failure */ 615 /* Parameters: data(I) - pointer to ioctl data */ 616 /* cmd(I) - ioctl command integer */ 617 /* mode(I) - file mode bits used with open */ 618 /* */ 619 /* Processes an ioctl call made to operate on the IP Filter NAT device. */ 620 /* ------------------------------------------------------------------------ */ 621 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs) 622 ioctlcmd_t cmd; 623 caddr_t data; 624 int mode, uid; 625 void *ctx; 626 ipf_stack_t *ifs; 627 { 628 ipnat_t *nat, *nt, *n = NULL, **np = NULL; 629 int error = 0, ret, arg, getlock; 630 ipnat_t natd; 631 632 #if (BSD >= 199306) && defined(_KERNEL) 633 if ((securelevel >= 2) && (mode & FWRITE)) 634 return EPERM; 635 #endif 636 637 #if defined(__osf__) && defined(_KERNEL) 638 getlock = 0; 639 #else 640 getlock = (mode & NAT_LOCKHELD) ? 0 : 1; 641 #endif 642 643 nat = NULL; /* XXX gcc -Wuninitialized */ 644 if (cmd == (ioctlcmd_t)SIOCADNAT) { 645 KMALLOC(nt, ipnat_t *); 646 } else { 647 nt = NULL; 648 } 649 650 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 651 if (mode & NAT_SYSSPACE) { 652 bcopy(data, (char *)&natd, sizeof(natd)); 653 error = 0; 654 } else { 655 error = fr_inobj(data, &natd, IPFOBJ_IPNAT); 656 } 657 658 } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ 659 BCOPYIN(data, &arg, sizeof(arg)); 660 } 661 662 if (error != 0) 663 goto done; 664 665 /* 666 * For add/delete, look to see if the NAT entry is already present 667 */ 668 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { 669 nat = &natd; 670 if (nat->in_v == 0) /* For backward compat. */ 671 nat->in_v = 4; 672 nat->in_flags &= IPN_USERFLAGS; 673 if ((nat->in_redir & NAT_MAPBLK) == 0) { 674 if ((nat->in_flags & IPN_SPLIT) == 0) 675 nat->in_inip &= nat->in_inmsk; 676 if ((nat->in_flags & IPN_IPRANGE) == 0) 677 nat->in_outip &= nat->in_outmsk; 678 } 679 MUTEX_ENTER(&ifs->ifs_ipf_natio); 680 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL); 681 np = &n->in_next) 682 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags, 683 IPN_CMPSIZ) == 0) { 684 if (nat->in_redir == NAT_REDIRECT && 685 nat->in_pnext != n->in_pnext) 686 continue; 687 break; 688 } 689 } 690 691 switch (cmd) 692 { 693 case SIOCGENITER : 694 { 695 ipfgeniter_t iter; 696 ipftoken_t *token; 697 698 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 699 if (error != 0) 700 break; 701 702 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs); 703 if (token != NULL) 704 error = nat_iterator(token, &iter, ifs); 705 else 706 error = ESRCH; 707 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 708 break; 709 } 710 #ifdef IPFILTER_LOG 711 case SIOCIPFFB : 712 { 713 int tmp; 714 715 if (!(mode & FWRITE)) 716 error = EPERM; 717 else { 718 tmp = ipflog_clear(IPL_LOGNAT, ifs); 719 BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); 720 } 721 break; 722 } 723 case SIOCSETLG : 724 if (!(mode & FWRITE)) 725 error = EPERM; 726 else { 727 BCOPYIN((char *)data, 728 (char *)&ifs->ifs_nat_logging, 729 sizeof(ifs->ifs_nat_logging)); 730 } 731 break; 732 case SIOCGETLG : 733 BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data, 734 sizeof(ifs->ifs_nat_logging)); 735 break; 736 case FIONREAD : 737 arg = ifs->ifs_iplused[IPL_LOGNAT]; 738 BCOPYOUT(&arg, data, sizeof(arg)); 739 break; 740 #endif 741 case SIOCADNAT : 742 if (!(mode & FWRITE)) { 743 error = EPERM; 744 } else if (n != NULL) { 745 error = EEXIST; 746 } else if (nt == NULL) { 747 error = ENOMEM; 748 } 749 if (error != 0) { 750 MUTEX_EXIT(&ifs->ifs_ipf_natio); 751 break; 752 } 753 bcopy((char *)nat, (char *)nt, sizeof(*n)); 754 error = nat_siocaddnat(nt, np, getlock, ifs); 755 MUTEX_EXIT(&ifs->ifs_ipf_natio); 756 if (error == 0) 757 nt = NULL; 758 break; 759 case SIOCRMNAT : 760 if (!(mode & FWRITE)) { 761 error = EPERM; 762 n = NULL; 763 } else if (n == NULL) { 764 error = ESRCH; 765 } 766 767 if (error != 0) { 768 MUTEX_EXIT(&ifs->ifs_ipf_natio); 769 break; 770 } 771 nat_siocdelnat(n, np, getlock, ifs); 772 773 MUTEX_EXIT(&ifs->ifs_ipf_natio); 774 n = NULL; 775 break; 776 case SIOCGNATS : 777 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0]; 778 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1]; 779 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list; 780 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable; 781 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist; 782 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max; 783 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz; 784 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz; 785 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz; 786 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz; 787 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances; 788 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list; 789 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT); 790 break; 791 case SIOCGNATL : 792 { 793 natlookup_t nl; 794 795 if (getlock) { 796 READ_ENTER(&ifs->ifs_ipf_nat); 797 } 798 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); 799 if (error == 0) { 800 if (nat_lookupredir(&nl, ifs) != NULL) { 801 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); 802 } else { 803 error = ESRCH; 804 } 805 } 806 if (getlock) { 807 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 808 } 809 break; 810 } 811 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ 812 if (!(mode & FWRITE)) { 813 error = EPERM; 814 break; 815 } 816 if (getlock) { 817 WRITE_ENTER(&ifs->ifs_ipf_nat); 818 } 819 error = 0; 820 if (arg == 0) 821 ret = nat_flushtable(ifs); 822 else if (arg == 1) 823 ret = nat_clearlist(ifs); 824 else if (arg >= 2 && arg <= 4) 825 ret = nat_extraflush(arg - 2, ifs); 826 else 827 error = EINVAL; 828 if (getlock) { 829 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 830 } 831 if (error == 0) { 832 BCOPYOUT(&ret, data, sizeof(ret)); 833 } 834 break; 835 case SIOCPROXY : 836 error = appr_ioctl(data, cmd, mode, ifs); 837 break; 838 case SIOCSTLCK : 839 if (!(mode & FWRITE)) { 840 error = EPERM; 841 } else { 842 fr_lock(data, &ifs->ifs_fr_nat_lock); 843 } 844 break; 845 case SIOCSTPUT : 846 if ((mode & FWRITE) != 0) { 847 error = fr_natputent(data, getlock, ifs); 848 } else { 849 error = EACCES; 850 } 851 break; 852 case SIOCSTGSZ : 853 if (ifs->ifs_fr_nat_lock) { 854 if (getlock) { 855 READ_ENTER(&ifs->ifs_ipf_nat); 856 } 857 error = fr_natgetsz(data, ifs); 858 if (getlock) { 859 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 860 } 861 } else 862 error = EACCES; 863 break; 864 case SIOCSTGET : 865 if (ifs->ifs_fr_nat_lock) { 866 if (getlock) { 867 READ_ENTER(&ifs->ifs_ipf_nat); 868 } 869 error = fr_natgetent(data, ifs); 870 if (getlock) { 871 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 872 } 873 } else 874 error = EACCES; 875 break; 876 case SIOCIPFDELTOK : 877 (void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg)); 878 error = ipf_deltoken(arg, uid, ctx, ifs); 879 break; 880 default : 881 error = EINVAL; 882 break; 883 } 884 done: 885 if (nt) 886 KFREE(nt); 887 return error; 888 } 889 890 891 /* ------------------------------------------------------------------------ */ 892 /* Function: nat_siocaddnat */ 893 /* Returns: int - 0 == success, != 0 == failure */ 894 /* Parameters: n(I) - pointer to new NAT rule */ 895 /* np(I) - pointer to where to insert new NAT rule */ 896 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 897 /* Mutex Locks: ipf_natio */ 898 /* */ 899 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 900 /* from information passed to the kernel, then add it to the appropriate */ 901 /* NAT rule table(s). */ 902 /* ------------------------------------------------------------------------ */ 903 static int nat_siocaddnat(n, np, getlock, ifs) 904 ipnat_t *n, **np; 905 int getlock; 906 ipf_stack_t *ifs; 907 { 908 int error = 0, i, j; 909 910 if (nat_resolverule(n, ifs) != 0) 911 return ENOENT; 912 913 if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) 914 return EINVAL; 915 916 n->in_use = 0; 917 if (n->in_redir & NAT_MAPBLK) 918 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); 919 else if (n->in_flags & IPN_AUTOPORTMAP) 920 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); 921 else if (n->in_flags & IPN_IPRANGE) 922 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); 923 else if (n->in_flags & IPN_SPLIT) 924 n->in_space = 2; 925 else if (n->in_outmsk != 0) 926 n->in_space = ~ntohl(n->in_outmsk); 927 else 928 n->in_space = 1; 929 930 /* 931 * Calculate the number of valid IP addresses in the output 932 * mapping range. In all cases, the range is inclusive of 933 * the start and ending IP addresses. 934 * If to a CIDR address, lose 2: broadcast + network address 935 * (so subtract 1) 936 * If to a range, add one. 937 * If to a single IP address, set to 1. 938 */ 939 if (n->in_space) { 940 if ((n->in_flags & IPN_IPRANGE) != 0) 941 n->in_space += 1; 942 else 943 n->in_space -= 1; 944 } else 945 n->in_space = 1; 946 947 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && 948 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) 949 n->in_nip = ntohl(n->in_outip) + 1; 950 else if ((n->in_flags & IPN_SPLIT) && 951 (n->in_redir & NAT_REDIRECT)) 952 n->in_nip = ntohl(n->in_inip); 953 else 954 n->in_nip = ntohl(n->in_outip); 955 if (n->in_redir & NAT_MAP) { 956 n->in_pnext = ntohs(n->in_pmin); 957 /* 958 * Multiply by the number of ports made available. 959 */ 960 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { 961 n->in_space *= (ntohs(n->in_pmax) - 962 ntohs(n->in_pmin) + 1); 963 /* 964 * Because two different sources can map to 965 * different destinations but use the same 966 * local IP#/port #. 967 * If the result is smaller than in_space, then 968 * we may have wrapped around 32bits. 969 */ 970 i = n->in_inmsk; 971 if ((i != 0) && (i != 0xffffffff)) { 972 j = n->in_space * (~ntohl(i) + 1); 973 if (j >= n->in_space) 974 n->in_space = j; 975 else 976 n->in_space = 0xffffffff; 977 } 978 } 979 /* 980 * If no protocol is specified, multiple by 256 to allow for 981 * at least one IP:IP mapping per protocol. 982 */ 983 if ((n->in_flags & IPN_TCPUDPICMP) == 0) { 984 j = n->in_space * 256; 985 if (j >= n->in_space) 986 n->in_space = j; 987 else 988 n->in_space = 0xffffffff; 989 } 990 } 991 992 /* Otherwise, these fields are preset */ 993 994 if (getlock) { 995 WRITE_ENTER(&ifs->ifs_ipf_nat); 996 } 997 n->in_next = NULL; 998 *np = n; 999 1000 if (n->in_age[0] != 0) 1001 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1002 n->in_age[0], ifs); 1003 1004 if (n->in_age[1] != 0) 1005 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe, 1006 n->in_age[1], ifs); 1007 1008 if (n->in_redir & NAT_REDIRECT) { 1009 n->in_flags &= ~IPN_NOTDST; 1010 nat_addrdr(n, ifs); 1011 } 1012 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { 1013 n->in_flags &= ~IPN_NOTSRC; 1014 nat_addnat(n, ifs); 1015 } 1016 n = NULL; 1017 ifs->ifs_nat_stats.ns_rules++; 1018 if (getlock) { 1019 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */ 1020 } 1021 1022 return error; 1023 } 1024 1025 1026 /* ------------------------------------------------------------------------ */ 1027 /* Function: nat_resolvrule */ 1028 /* Returns: int - 0 == success, -1 == failure */ 1029 /* Parameters: n(I) - pointer to NAT rule */ 1030 /* */ 1031 /* Resolve some of the details inside the NAT rule. Includes resolving */ 1032 /* any specified interfaces and proxy labels, and determines whether or not */ 1033 /* all proxy labels are correctly specified. */ 1034 /* */ 1035 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */ 1036 /* ------------------------------------------------------------------------ */ 1037 static int nat_resolverule(n, ifs) 1038 ipnat_t *n; 1039 ipf_stack_t *ifs; 1040 { 1041 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; 1042 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs); 1043 1044 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; 1045 if (n->in_ifnames[1][0] == '\0') { 1046 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); 1047 n->in_ifps[1] = n->in_ifps[0]; 1048 } else { 1049 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs); 1050 } 1051 1052 if (n->in_plabel[0] != '\0') { 1053 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs); 1054 if (n->in_apr == NULL) 1055 return -1; 1056 } 1057 return 0; 1058 } 1059 1060 1061 /* ------------------------------------------------------------------------ */ 1062 /* Function: nat_siocdelnat */ 1063 /* Returns: int - 0 == success, != 0 == failure */ 1064 /* Parameters: n(I) - pointer to new NAT rule */ 1065 /* np(I) - pointer to where to insert new NAT rule */ 1066 /* getlock(I) - flag indicating if lock on ipf_nat is held */ 1067 /* Mutex Locks: ipf_natio */ 1068 /* */ 1069 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ 1070 /* from information passed to the kernel, then add it to the appropriate */ 1071 /* NAT rule table(s). */ 1072 /* ------------------------------------------------------------------------ */ 1073 static void nat_siocdelnat(n, np, getlock, ifs) 1074 ipnat_t *n, **np; 1075 int getlock; 1076 ipf_stack_t *ifs; 1077 { 1078 if (getlock) { 1079 WRITE_ENTER(&ifs->ifs_ipf_nat); 1080 } 1081 if (n->in_redir & NAT_REDIRECT) 1082 nat_delrdr(n); 1083 if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) 1084 nat_delnat(n); 1085 if (ifs->ifs_nat_list == NULL) { 1086 ifs->ifs_nat_masks = 0; 1087 ifs->ifs_rdr_masks = 0; 1088 } 1089 1090 if (n->in_tqehead[0] != NULL) { 1091 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { 1092 fr_freetimeoutqueue(n->in_tqehead[0], ifs); 1093 } 1094 } 1095 1096 if (n->in_tqehead[1] != NULL) { 1097 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { 1098 fr_freetimeoutqueue(n->in_tqehead[1], ifs); 1099 } 1100 } 1101 1102 *np = n->in_next; 1103 1104 if (n->in_use == 0) { 1105 if (n->in_apr) 1106 appr_free(n->in_apr); 1107 KFREE(n); 1108 ifs->ifs_nat_stats.ns_rules--; 1109 } else { 1110 n->in_flags |= IPN_DELETE; 1111 n->in_next = NULL; 1112 } 1113 if (getlock) { 1114 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */ 1115 } 1116 } 1117 1118 1119 /* ------------------------------------------------------------------------ */ 1120 /* Function: fr_natgetsz */ 1121 /* Returns: int - 0 == success, != 0 is the error value. */ 1122 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1123 /* get the size of. */ 1124 /* */ 1125 /* Handle SIOCSTGSZ. */ 1126 /* Return the size of the nat list entry to be copied back to user space. */ 1127 /* The size of the entry is stored in the ng_sz field and the enture natget */ 1128 /* structure is copied back to the user. */ 1129 /* ------------------------------------------------------------------------ */ 1130 static int fr_natgetsz(data, ifs) 1131 caddr_t data; 1132 ipf_stack_t *ifs; 1133 { 1134 ap_session_t *aps; 1135 nat_t *nat, *n; 1136 natget_t ng; 1137 1138 BCOPYIN(data, &ng, sizeof(ng)); 1139 1140 nat = ng.ng_ptr; 1141 if (!nat) { 1142 nat = ifs->ifs_nat_instances; 1143 ng.ng_sz = 0; 1144 /* 1145 * Empty list so the size returned is 0. Simple. 1146 */ 1147 if (nat == NULL) { 1148 BCOPYOUT(&ng, data, sizeof(ng)); 1149 return 0; 1150 } 1151 } else { 1152 /* 1153 * Make sure the pointer we're copying from exists in the 1154 * current list of entries. Security precaution to prevent 1155 * copying of random kernel data. 1156 */ 1157 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1158 if (n == nat) 1159 break; 1160 if (!n) 1161 return ESRCH; 1162 } 1163 1164 /* 1165 * Incluse any space required for proxy data structures. 1166 */ 1167 ng.ng_sz = sizeof(nat_save_t); 1168 aps = nat->nat_aps; 1169 if (aps != NULL) { 1170 ng.ng_sz += sizeof(ap_session_t) - 4; 1171 if (aps->aps_data != 0) 1172 ng.ng_sz += aps->aps_psiz; 1173 } 1174 1175 BCOPYOUT(&ng, data, sizeof(ng)); 1176 return 0; 1177 } 1178 1179 1180 /* ------------------------------------------------------------------------ */ 1181 /* Function: fr_natgetent */ 1182 /* Returns: int - 0 == success, != 0 is the error value. */ 1183 /* Parameters: data(I) - pointer to natget structure with kernel pointer */ 1184 /* to NAT structure to copy out. */ 1185 /* */ 1186 /* Handle SIOCSTGET. */ 1187 /* Copies out NAT entry to user space. Any additional data held for a */ 1188 /* proxy is also copied, as to is the NAT rule which was responsible for it */ 1189 /* ------------------------------------------------------------------------ */ 1190 static int fr_natgetent(data, ifs) 1191 caddr_t data; 1192 ipf_stack_t *ifs; 1193 { 1194 int error, outsize; 1195 ap_session_t *aps; 1196 nat_save_t *ipn, ipns; 1197 nat_t *n, *nat; 1198 1199 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); 1200 if (error != 0) 1201 return error; 1202 1203 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) 1204 return EINVAL; 1205 1206 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); 1207 if (ipn == NULL) 1208 return ENOMEM; 1209 1210 ipn->ipn_dsize = ipns.ipn_dsize; 1211 nat = ipns.ipn_next; 1212 if (nat == NULL) { 1213 nat = ifs->ifs_nat_instances; 1214 if (nat == NULL) { 1215 if (ifs->ifs_nat_instances == NULL) 1216 error = ENOENT; 1217 goto finished; 1218 } 1219 } else { 1220 /* 1221 * Make sure the pointer we're copying from exists in the 1222 * current list of entries. Security precaution to prevent 1223 * copying of random kernel data. 1224 */ 1225 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1226 if (n == nat) 1227 break; 1228 if (n == NULL) { 1229 error = ESRCH; 1230 goto finished; 1231 } 1232 } 1233 ipn->ipn_next = nat->nat_next; 1234 1235 /* 1236 * Copy the NAT structure. 1237 */ 1238 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); 1239 1240 /* 1241 * If we have a pointer to the NAT rule it belongs to, save that too. 1242 */ 1243 if (nat->nat_ptr != NULL) 1244 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, 1245 sizeof(ipn->ipn_ipnat)); 1246 1247 /* 1248 * If we also know the NAT entry has an associated filter rule, 1249 * save that too. 1250 */ 1251 if (nat->nat_fr != NULL) 1252 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, 1253 sizeof(ipn->ipn_fr)); 1254 1255 /* 1256 * Last but not least, if there is an application proxy session set 1257 * up for this NAT entry, then copy that out too, including any 1258 * private data saved along side it by the proxy. 1259 */ 1260 aps = nat->nat_aps; 1261 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); 1262 if (aps != NULL) { 1263 char *s; 1264 1265 if (outsize < sizeof(*aps)) { 1266 error = ENOBUFS; 1267 goto finished; 1268 } 1269 1270 s = ipn->ipn_data; 1271 bcopy((char *)aps, s, sizeof(*aps)); 1272 s += sizeof(*aps); 1273 outsize -= sizeof(*aps); 1274 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) 1275 bcopy(aps->aps_data, s, aps->aps_psiz); 1276 else 1277 error = ENOBUFS; 1278 } 1279 if (error == 0) { 1280 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); 1281 } 1282 1283 finished: 1284 if (ipn != NULL) { 1285 KFREES(ipn, ipns.ipn_dsize); 1286 } 1287 return error; 1288 } 1289 1290 /* ------------------------------------------------------------------------ */ 1291 /* Function: nat_calc_chksum_diffs */ 1292 /* Returns: void */ 1293 /* Parameters: nat - pointer to NAT table entry */ 1294 /* */ 1295 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */ 1296 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */ 1297 /* we are dealing with partial chksum offload. For these cases we need to */ 1298 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */ 1299 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */ 1300 /* nat_sumd[0]. */ 1301 /* */ 1302 /* The function accepts initialized NAT table entry and computes the deltas */ 1303 /* from nat_inip/nat_outip members. The function is called right before */ 1304 /* the new entry is inserted into the table. */ 1305 /* */ 1306 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */ 1307 /* of delta between original and new IP addresses. */ 1308 /* */ 1309 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */ 1310 /* a chkusm of delta between original an new IP addrress:port tupples. */ 1311 /* */ 1312 /* Some facts about chksum, we should remember: */ 1313 /* IP header chksum covers IP header only */ 1314 /* */ 1315 /* TCP/UDP chksum covers data payload and so called pseudo header */ 1316 /* SRC, DST IP address */ 1317 /* SRC, DST Port */ 1318 /* length of payload */ 1319 /* */ 1320 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */ 1321 /* member of dblk_t structure. The db_ckusm16 member is not part of */ 1322 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */ 1323 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */ 1324 /* stored along with other IP packet data in dblk_t structure and used in */ 1325 /* for IP/UDP/TCP chksum validation later in ip.c. */ 1326 /* */ 1327 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */ 1328 /* of delta between new and orig address. NOTE: the order of operands for */ 1329 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */ 1330 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */ 1331 /* */ 1332 /* ------------------------------------------------------------------------ */ 1333 static void nat_calc_chksum_diffs(nat) 1334 nat_t *nat; 1335 { 1336 u_32_t sum_orig = 0; 1337 u_32_t sum_changed = 0; 1338 u_32_t sumd; 1339 u_32_t ipsum_orig = 0; 1340 u_32_t ipsum_changed = 0; 1341 1342 /* 1343 * the switch calculates operands for CALC_SUMD(), 1344 * which will compute the partial chksum delta. 1345 */ 1346 switch (nat->nat_dir) 1347 { 1348 case NAT_INBOUND: 1349 /* 1350 * we are dealing with RDR rule (DST address gets 1351 * modified on packet from client) 1352 */ 1353 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1354 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1355 break; 1356 case NAT_OUTBOUND: 1357 /* 1358 * we are dealing with MAP rule (SRC address gets 1359 * modified on packet from client) 1360 */ 1361 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 1362 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr)); 1363 break; 1364 default: ; 1365 break; 1366 } 1367 1368 /* 1369 * we also preserve CALC_SUMD() operands here, for IP chksum delta 1370 * calculation, which happens at the end of function. 1371 */ 1372 ipsum_changed = sum_changed; 1373 ipsum_orig = sum_orig; 1374 /* 1375 * NOTE: the order of operands for partial chksum adjustment 1376 * computation has to be swapped! 1377 */ 1378 CALC_SUMD(sum_changed, sum_orig, sumd); 1379 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16); 1380 1381 if (nat->nat_p == IPPROTO_TCP || nat->nat_p == IPPROTO_UDP) { 1382 1383 /* 1384 * switch calculates operands for CALC_SUMD(), which will 1385 * compute the full chksum delta. 1386 */ 1387 switch (nat->nat_dir) 1388 { 1389 case NAT_INBOUND: 1390 sum_changed = LONG_SUM( 1391 ntohl(nat->nat_inip.s_addr) + 1392 ntohs(nat->nat_inport) 1393 ); 1394 sum_orig = LONG_SUM( 1395 ntohl(nat->nat_outip.s_addr) + 1396 ntohs(nat->nat_outport) 1397 ); 1398 break; 1399 case NAT_OUTBOUND: 1400 sum_changed = LONG_SUM( 1401 ntohl(nat->nat_outip.s_addr) + 1402 ntohs(nat->nat_outport) 1403 ); 1404 sum_orig = LONG_SUM( 1405 ntohl(nat->nat_inip.s_addr) + 1406 ntohs(nat->nat_inport) 1407 ); 1408 break; 1409 default: ; 1410 break; 1411 } 1412 1413 CALC_SUMD(sum_orig, sum_changed, sumd); 1414 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 1415 } 1416 else 1417 nat->nat_sumd[0] = nat->nat_sumd[1]; 1418 1419 /* 1420 * we may reuse the already computed nat_sumd[0] for IP header chksum 1421 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT. 1422 */ 1423 if (NAT_HAS_L4_CHANGED(nat)) { 1424 /* 1425 * bad luck, NAT changes also the L4 header, use IP addresses 1426 * to compute chksum adjustment for IP header. 1427 */ 1428 CALC_SUMD(ipsum_orig, ipsum_changed, sumd); 1429 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); 1430 } 1431 else { 1432 /* 1433 * the NAT does not change L4 hdr -> reuse chksum adjustment 1434 * for IP hdr. 1435 */ 1436 nat->nat_ipsumd = nat->nat_sumd[0]; 1437 } 1438 1439 return; 1440 } 1441 1442 /* ------------------------------------------------------------------------ */ 1443 /* Function: fr_natputent */ 1444 /* Returns: int - 0 == success, != 0 is the error value. */ 1445 /* Parameters: data(I) - pointer to natget structure with NAT */ 1446 /* structure information to load into the kernel */ 1447 /* getlock(I) - flag indicating whether or not a write lock */ 1448 /* on ipf_nat is already held. */ 1449 /* */ 1450 /* Handle SIOCSTPUT. */ 1451 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ 1452 /* firewall rule data structures, if pointers to them indicate so. */ 1453 /* ------------------------------------------------------------------------ */ 1454 static int fr_natputent(data, getlock, ifs) 1455 caddr_t data; 1456 int getlock; 1457 ipf_stack_t *ifs; 1458 { 1459 nat_save_t ipn, *ipnn; 1460 ap_session_t *aps; 1461 nat_t *n, *nat; 1462 frentry_t *fr; 1463 fr_info_t fin; 1464 ipnat_t *in; 1465 int error; 1466 1467 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); 1468 if (error != 0) 1469 return error; 1470 1471 /* 1472 * Trigger automatic call to nat_extraflush() if the 1473 * table has reached capcity specified by hi watermark. 1474 */ 1475 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 1476 ifs->ifs_nat_doflush = 1; 1477 1478 /* 1479 * Initialise early because of code at junkput label. 1480 */ 1481 in = NULL; 1482 aps = NULL; 1483 nat = NULL; 1484 ipnn = NULL; 1485 1486 /* 1487 * New entry, copy in the rest of the NAT entry if it's size is more 1488 * than just the nat_t structure. 1489 */ 1490 fr = NULL; 1491 if (ipn.ipn_dsize > sizeof(ipn)) { 1492 if (ipn.ipn_dsize > 81920) { 1493 error = ENOMEM; 1494 goto junkput; 1495 } 1496 1497 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); 1498 if (ipnn == NULL) 1499 return ENOMEM; 1500 1501 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); 1502 if (error != 0) { 1503 error = EFAULT; 1504 goto junkput; 1505 } 1506 } else 1507 ipnn = &ipn; 1508 1509 KMALLOC(nat, nat_t *); 1510 if (nat == NULL) { 1511 error = ENOMEM; 1512 goto junkput; 1513 } 1514 1515 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); 1516 /* 1517 * Initialize all these so that nat_delete() doesn't cause a crash. 1518 */ 1519 bzero((char *)nat, offsetof(struct nat, nat_tqe)); 1520 nat->nat_tqe.tqe_pnext = NULL; 1521 nat->nat_tqe.tqe_next = NULL; 1522 nat->nat_tqe.tqe_ifq = NULL; 1523 nat->nat_tqe.tqe_parent = nat; 1524 1525 /* 1526 * Restore the rule associated with this nat session 1527 */ 1528 in = ipnn->ipn_nat.nat_ptr; 1529 if (in != NULL) { 1530 KMALLOC(in, ipnat_t *); 1531 nat->nat_ptr = in; 1532 if (in == NULL) { 1533 error = ENOMEM; 1534 goto junkput; 1535 } 1536 bzero((char *)in, offsetof(struct ipnat, in_next6)); 1537 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); 1538 in->in_use = 1; 1539 in->in_flags |= IPN_DELETE; 1540 1541 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules); 1542 1543 if (nat_resolverule(in, ifs) != 0) { 1544 error = ESRCH; 1545 goto junkput; 1546 } 1547 } 1548 1549 /* 1550 * Check that the NAT entry doesn't already exist in the kernel. 1551 */ 1552 bzero((char *)&fin, sizeof(fin)); 1553 fin.fin_p = nat->nat_p; 1554 fin.fin_ifs = ifs; 1555 if (nat->nat_dir == NAT_OUTBOUND) { 1556 fin.fin_data[0] = ntohs(nat->nat_oport); 1557 fin.fin_data[1] = ntohs(nat->nat_outport); 1558 fin.fin_ifp = nat->nat_ifps[0]; 1559 if (getlock) { 1560 READ_ENTER(&ifs->ifs_ipf_nat); 1561 } 1562 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, 1563 nat->nat_oip, nat->nat_outip); 1564 if (getlock) { 1565 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1566 } 1567 if (n != NULL) { 1568 error = EEXIST; 1569 goto junkput; 1570 } 1571 } else if (nat->nat_dir == NAT_INBOUND) { 1572 fin.fin_data[0] = ntohs(nat->nat_inport); 1573 fin.fin_data[1] = ntohs(nat->nat_oport); 1574 fin.fin_ifp = nat->nat_ifps[1]; 1575 if (getlock) { 1576 READ_ENTER(&ifs->ifs_ipf_nat); 1577 } 1578 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, 1579 nat->nat_inip, nat->nat_oip); 1580 if (getlock) { 1581 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1582 } 1583 if (n != NULL) { 1584 error = EEXIST; 1585 goto junkput; 1586 } 1587 } else { 1588 error = EINVAL; 1589 goto junkput; 1590 } 1591 1592 /* 1593 * Restore ap_session_t structure. Include the private data allocated 1594 * if it was there. 1595 */ 1596 aps = nat->nat_aps; 1597 if (aps != NULL) { 1598 KMALLOC(aps, ap_session_t *); 1599 nat->nat_aps = aps; 1600 if (aps == NULL) { 1601 error = ENOMEM; 1602 goto junkput; 1603 } 1604 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); 1605 if (in != NULL) 1606 aps->aps_apr = in->in_apr; 1607 else 1608 aps->aps_apr = NULL; 1609 if (aps->aps_psiz != 0) { 1610 if (aps->aps_psiz > 81920) { 1611 error = ENOMEM; 1612 goto junkput; 1613 } 1614 KMALLOCS(aps->aps_data, void *, aps->aps_psiz); 1615 if (aps->aps_data == NULL) { 1616 error = ENOMEM; 1617 goto junkput; 1618 } 1619 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, 1620 aps->aps_psiz); 1621 } else { 1622 aps->aps_psiz = 0; 1623 aps->aps_data = NULL; 1624 } 1625 } 1626 1627 /* 1628 * If there was a filtering rule associated with this entry then 1629 * build up a new one. 1630 */ 1631 fr = nat->nat_fr; 1632 if (fr != NULL) { 1633 if ((nat->nat_flags & SI_NEWFR) != 0) { 1634 KMALLOC(fr, frentry_t *); 1635 nat->nat_fr = fr; 1636 if (fr == NULL) { 1637 error = ENOMEM; 1638 goto junkput; 1639 } 1640 ipnn->ipn_nat.nat_fr = fr; 1641 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); 1642 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); 1643 1644 fr->fr_ref = 1; 1645 fr->fr_dsize = 0; 1646 fr->fr_data = NULL; 1647 fr->fr_type = FR_T_NONE; 1648 1649 MUTEX_NUKE(&fr->fr_lock); 1650 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); 1651 } else { 1652 if (getlock) { 1653 READ_ENTER(&ifs->ifs_ipf_nat); 1654 } 1655 for (n = ifs->ifs_nat_instances; n; n = n->nat_next) 1656 if (n->nat_fr == fr) 1657 break; 1658 1659 if (n != NULL) { 1660 MUTEX_ENTER(&fr->fr_lock); 1661 fr->fr_ref++; 1662 MUTEX_EXIT(&fr->fr_lock); 1663 } 1664 if (getlock) { 1665 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1666 } 1667 if (!n) { 1668 error = ESRCH; 1669 goto junkput; 1670 } 1671 } 1672 } 1673 1674 if (ipnn != &ipn) { 1675 KFREES(ipnn, ipn.ipn_dsize); 1676 ipnn = NULL; 1677 } 1678 1679 nat_calc_chksum_diffs(nat); 1680 1681 if (getlock) { 1682 WRITE_ENTER(&ifs->ifs_ipf_nat); 1683 } 1684 error = nat_insert(nat, nat->nat_rev, ifs); 1685 if ((error == 0) && (aps != NULL)) { 1686 aps->aps_next = ifs->ifs_ap_sess_list; 1687 ifs->ifs_ap_sess_list = aps; 1688 } 1689 if (getlock) { 1690 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 1691 } 1692 1693 if (error == 0) 1694 return 0; 1695 1696 error = ENOMEM; 1697 1698 junkput: 1699 if (fr != NULL) 1700 (void) fr_derefrule(&fr, ifs); 1701 1702 if ((ipnn != NULL) && (ipnn != &ipn)) { 1703 KFREES(ipnn, ipn.ipn_dsize); 1704 } 1705 if (nat != NULL) { 1706 if (aps != NULL) { 1707 if (aps->aps_data != NULL) { 1708 KFREES(aps->aps_data, aps->aps_psiz); 1709 } 1710 KFREE(aps); 1711 } 1712 if (in != NULL) { 1713 if (in->in_apr) 1714 appr_free(in->in_apr); 1715 KFREE(in); 1716 } 1717 KFREE(nat); 1718 } 1719 return error; 1720 } 1721 1722 1723 /* ------------------------------------------------------------------------ */ 1724 /* Function: nat_delete */ 1725 /* Returns: Nil */ 1726 /* Parameters: natd(I) - pointer to NAT structure to delete */ 1727 /* logtype(I) - type of LOG record to create before deleting */ 1728 /* Write Lock: ipf_nat */ 1729 /* */ 1730 /* Delete a nat entry from the various lists and table. If NAT logging is */ 1731 /* enabled then generate a NAT log record for this event. */ 1732 /* ------------------------------------------------------------------------ */ 1733 static void nat_delete(nat, logtype, ifs) 1734 struct nat *nat; 1735 int logtype; 1736 ipf_stack_t *ifs; 1737 { 1738 struct ipnat *ipn; 1739 1740 if (logtype != 0 && ifs->ifs_nat_logging != 0) 1741 nat_log(nat, logtype, ifs); 1742 1743 /* 1744 * Take it as a general indication that all the pointers are set if 1745 * nat_pnext is set. 1746 */ 1747 if (nat->nat_pnext != NULL) { 1748 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 1749 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 1750 1751 *nat->nat_pnext = nat->nat_next; 1752 if (nat->nat_next != NULL) { 1753 nat->nat_next->nat_pnext = nat->nat_pnext; 1754 nat->nat_next = NULL; 1755 } 1756 nat->nat_pnext = NULL; 1757 1758 *nat->nat_phnext[0] = nat->nat_hnext[0]; 1759 if (nat->nat_hnext[0] != NULL) { 1760 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 1761 nat->nat_hnext[0] = NULL; 1762 } 1763 nat->nat_phnext[0] = NULL; 1764 1765 *nat->nat_phnext[1] = nat->nat_hnext[1]; 1766 if (nat->nat_hnext[1] != NULL) { 1767 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 1768 nat->nat_hnext[1] = NULL; 1769 } 1770 nat->nat_phnext[1] = NULL; 1771 1772 if ((nat->nat_flags & SI_WILDP) != 0) 1773 ifs->ifs_nat_stats.ns_wilds--; 1774 } 1775 1776 if (nat->nat_me != NULL) { 1777 *nat->nat_me = NULL; 1778 nat->nat_me = NULL; 1779 } 1780 1781 fr_deletequeueentry(&nat->nat_tqe); 1782 1783 MUTEX_ENTER(&nat->nat_lock); 1784 if (nat->nat_ref > 1) { 1785 nat->nat_ref--; 1786 MUTEX_EXIT(&nat->nat_lock); 1787 return; 1788 } 1789 MUTEX_EXIT(&nat->nat_lock); 1790 1791 /* 1792 * At this point, nat_ref is 1, doing "--" would make it 0.. 1793 */ 1794 nat->nat_ref = 0; 1795 1796 #ifdef IPFILTER_SYNC 1797 if (nat->nat_sync) 1798 ipfsync_del(nat->nat_sync); 1799 #endif 1800 1801 if (nat->nat_fr != NULL) 1802 (void)fr_derefrule(&nat->nat_fr, ifs); 1803 1804 if (nat->nat_hm != NULL) 1805 fr_hostmapdel(&nat->nat_hm); 1806 1807 /* 1808 * If there is an active reference from the nat entry to its parent 1809 * rule, decrement the rule's reference count and free it too if no 1810 * longer being used. 1811 */ 1812 ipn = nat->nat_ptr; 1813 if (ipn != NULL) { 1814 ipn->in_space++; 1815 ipn->in_use--; 1816 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { 1817 if (ipn->in_apr) 1818 appr_free(ipn->in_apr); 1819 KFREE(ipn); 1820 ifs->ifs_nat_stats.ns_rules--; 1821 } 1822 } 1823 1824 MUTEX_DESTROY(&nat->nat_lock); 1825 1826 aps_free(nat->nat_aps, ifs); 1827 ifs->ifs_nat_stats.ns_inuse--; 1828 1829 /* 1830 * If there's a fragment table entry too for this nat entry, then 1831 * dereference that as well. This is after nat_lock is released 1832 * because of Tru64. 1833 */ 1834 fr_forgetnat((void *)nat, ifs); 1835 1836 KFREE(nat); 1837 } 1838 1839 1840 /* ------------------------------------------------------------------------ */ 1841 /* Function: nat_flushtable */ 1842 /* Returns: int - number of NAT rules deleted */ 1843 /* Parameters: Nil */ 1844 /* */ 1845 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ 1846 /* log record should be emitted in nat_delete() if NAT logging is enabled. */ 1847 /* ------------------------------------------------------------------------ */ 1848 /* 1849 * nat_flushtable - clear the NAT table of all mapping entries. 1850 */ 1851 static int nat_flushtable(ifs) 1852 ipf_stack_t *ifs; 1853 { 1854 nat_t *nat; 1855 int j = 0; 1856 1857 /* 1858 * ALL NAT mappings deleted, so lets just make the deletions 1859 * quicker. 1860 */ 1861 if (ifs->ifs_nat_table[0] != NULL) 1862 bzero((char *)ifs->ifs_nat_table[0], 1863 sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz); 1864 if (ifs->ifs_nat_table[1] != NULL) 1865 bzero((char *)ifs->ifs_nat_table[1], 1866 sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz); 1867 1868 while ((nat = ifs->ifs_nat_instances) != NULL) { 1869 nat_delete(nat, NL_FLUSH, ifs); 1870 j++; 1871 } 1872 1873 return j; 1874 } 1875 1876 1877 /* ------------------------------------------------------------------------ */ 1878 /* Function: nat_clearlist */ 1879 /* Returns: int - number of NAT/RDR rules deleted */ 1880 /* Parameters: Nil */ 1881 /* */ 1882 /* Delete all rules in the current list of rules. There is nothing elegant */ 1883 /* about this cleanup: simply free all entries on the list of rules and */ 1884 /* clear out the tables used for hashed NAT rule lookups. */ 1885 /* ------------------------------------------------------------------------ */ 1886 static int nat_clearlist(ifs) 1887 ipf_stack_t *ifs; 1888 { 1889 ipnat_t *n, **np = &ifs->ifs_nat_list; 1890 int i = 0; 1891 1892 if (ifs->ifs_nat_rules != NULL) 1893 bzero((char *)ifs->ifs_nat_rules, 1894 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz); 1895 if (ifs->ifs_rdr_rules != NULL) 1896 bzero((char *)ifs->ifs_rdr_rules, 1897 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz); 1898 1899 while ((n = *np) != NULL) { 1900 *np = n->in_next; 1901 if (n->in_use == 0) { 1902 if (n->in_apr != NULL) 1903 appr_free(n->in_apr); 1904 KFREE(n); 1905 ifs->ifs_nat_stats.ns_rules--; 1906 } else { 1907 n->in_flags |= IPN_DELETE; 1908 n->in_next = NULL; 1909 } 1910 i++; 1911 } 1912 ifs->ifs_nat_masks = 0; 1913 ifs->ifs_rdr_masks = 0; 1914 return i; 1915 } 1916 1917 1918 /* ------------------------------------------------------------------------ */ 1919 /* Function: nat_newmap */ 1920 /* Returns: int - -1 == error, 0 == success */ 1921 /* Parameters: fin(I) - pointer to packet information */ 1922 /* nat(I) - pointer to NAT entry */ 1923 /* ni(I) - pointer to structure with misc. information needed */ 1924 /* to create new NAT entry. */ 1925 /* */ 1926 /* Given an empty NAT structure, populate it with new information about a */ 1927 /* new NAT session, as defined by the matching NAT rule. */ 1928 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 1929 /* to the new IP address for the translation. */ 1930 /* ------------------------------------------------------------------------ */ 1931 static INLINE int nat_newmap(fin, nat, ni) 1932 fr_info_t *fin; 1933 nat_t *nat; 1934 natinfo_t *ni; 1935 { 1936 u_short st_port, dport, sport, port, sp, dp; 1937 struct in_addr in, inb; 1938 hostmap_t *hm; 1939 u_32_t flags; 1940 u_32_t st_ip; 1941 ipnat_t *np; 1942 nat_t *natl; 1943 int l; 1944 ipf_stack_t *ifs = fin->fin_ifs; 1945 1946 /* 1947 * If it's an outbound packet which doesn't match any existing 1948 * record, then create a new port 1949 */ 1950 l = 0; 1951 hm = NULL; 1952 np = ni->nai_np; 1953 st_ip = np->in_nip; 1954 st_port = np->in_pnext; 1955 flags = ni->nai_flags; 1956 sport = ni->nai_sport; 1957 dport = ni->nai_dport; 1958 1959 /* 1960 * Do a loop until we either run out of entries to try or we find 1961 * a NAT mapping that isn't currently being used. This is done 1962 * because the change to the source is not (usually) being fixed. 1963 */ 1964 do { 1965 port = 0; 1966 in.s_addr = htonl(np->in_nip); 1967 if (l == 0) { 1968 /* 1969 * Check to see if there is an existing NAT 1970 * setup for this IP address pair. 1971 */ 1972 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 1973 in, 0, ifs); 1974 if (hm != NULL) 1975 in.s_addr = hm->hm_mapip.s_addr; 1976 } else if ((l == 1) && (hm != NULL)) { 1977 fr_hostmapdel(&hm); 1978 } 1979 in.s_addr = ntohl(in.s_addr); 1980 1981 nat->nat_hm = hm; 1982 1983 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { 1984 if (l > 0) 1985 return -1; 1986 } 1987 1988 if (np->in_redir == NAT_BIMAP && 1989 np->in_inmsk == np->in_outmsk) { 1990 /* 1991 * map the address block in a 1:1 fashion 1992 */ 1993 in.s_addr = np->in_outip; 1994 in.s_addr |= fin->fin_saddr & ~np->in_inmsk; 1995 in.s_addr = ntohl(in.s_addr); 1996 1997 } else if (np->in_redir & NAT_MAPBLK) { 1998 if ((l >= np->in_ppip) || ((l > 0) && 1999 !(flags & IPN_TCPUDP))) 2000 return -1; 2001 /* 2002 * map-block - Calculate destination address. 2003 */ 2004 in.s_addr = ntohl(fin->fin_saddr); 2005 in.s_addr &= ntohl(~np->in_inmsk); 2006 inb.s_addr = in.s_addr; 2007 in.s_addr /= np->in_ippip; 2008 in.s_addr &= ntohl(~np->in_outmsk); 2009 in.s_addr += ntohl(np->in_outip); 2010 /* 2011 * Calculate destination port. 2012 */ 2013 if ((flags & IPN_TCPUDP) && 2014 (np->in_ppip != 0)) { 2015 port = ntohs(sport) + l; 2016 port %= np->in_ppip; 2017 port += np->in_ppip * 2018 (inb.s_addr % np->in_ippip); 2019 port += MAPBLK_MINPORT; 2020 port = htons(port); 2021 } 2022 2023 } else if ((np->in_outip == 0) && 2024 (np->in_outmsk == 0xffffffff)) { 2025 /* 2026 * 0/32 - use the interface's IP address. 2027 */ 2028 if ((l > 0) || 2029 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, 2030 &in, NULL, fin->fin_ifs) == -1) 2031 return -1; 2032 in.s_addr = ntohl(in.s_addr); 2033 2034 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { 2035 /* 2036 * 0/0 - use the original source address/port. 2037 */ 2038 if (l > 0) 2039 return -1; 2040 in.s_addr = ntohl(fin->fin_saddr); 2041 2042 } else if ((np->in_outmsk != 0xffffffff) && 2043 (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) 2044 np->in_nip++; 2045 2046 natl = NULL; 2047 2048 if ((flags & IPN_TCPUDP) && 2049 ((np->in_redir & NAT_MAPBLK) == 0) && 2050 (np->in_flags & IPN_AUTOPORTMAP)) { 2051 /* 2052 * "ports auto" (without map-block) 2053 */ 2054 if ((l > 0) && (l % np->in_ppip == 0)) { 2055 if (l > np->in_space) { 2056 return -1; 2057 } else if ((l > np->in_ppip) && 2058 np->in_outmsk != 0xffffffff) 2059 np->in_nip++; 2060 } 2061 if (np->in_ppip != 0) { 2062 port = ntohs(sport); 2063 port += (l % np->in_ppip); 2064 port %= np->in_ppip; 2065 port += np->in_ppip * 2066 (ntohl(fin->fin_saddr) % 2067 np->in_ippip); 2068 port += MAPBLK_MINPORT; 2069 port = htons(port); 2070 } 2071 2072 } else if (((np->in_redir & NAT_MAPBLK) == 0) && 2073 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { 2074 /* 2075 * Standard port translation. Select next port. 2076 */ 2077 port = htons(np->in_pnext++); 2078 2079 if (np->in_pnext > ntohs(np->in_pmax)) { 2080 np->in_pnext = ntohs(np->in_pmin); 2081 if (np->in_outmsk != 0xffffffff) 2082 np->in_nip++; 2083 } 2084 } 2085 2086 if (np->in_flags & IPN_IPRANGE) { 2087 if (np->in_nip > ntohl(np->in_outmsk)) 2088 np->in_nip = ntohl(np->in_outip); 2089 } else { 2090 if ((np->in_outmsk != 0xffffffff) && 2091 ((np->in_nip + 1) & ntohl(np->in_outmsk)) > 2092 ntohl(np->in_outip)) 2093 np->in_nip = ntohl(np->in_outip) + 1; 2094 } 2095 2096 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) 2097 port = sport; 2098 2099 /* 2100 * Here we do a lookup of the connection as seen from 2101 * the outside. If an IP# pair already exists, try 2102 * again. So if you have A->B becomes C->B, you can 2103 * also have D->E become C->E but not D->B causing 2104 * another C->B. Also take protocol and ports into 2105 * account when determining whether a pre-existing 2106 * NAT setup will cause an external conflict where 2107 * this is appropriate. 2108 */ 2109 inb.s_addr = htonl(in.s_addr); 2110 sp = fin->fin_data[0]; 2111 dp = fin->fin_data[1]; 2112 fin->fin_data[0] = fin->fin_data[1]; 2113 fin->fin_data[1] = htons(port); 2114 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2115 (u_int)fin->fin_p, fin->fin_dst, inb); 2116 fin->fin_data[0] = sp; 2117 fin->fin_data[1] = dp; 2118 2119 /* 2120 * Has the search wrapped around and come back to the 2121 * start ? 2122 */ 2123 if ((natl != NULL) && 2124 (np->in_pnext != 0) && (st_port == np->in_pnext) && 2125 (np->in_nip != 0) && (st_ip == np->in_nip)) 2126 return -1; 2127 l++; 2128 } while (natl != NULL); 2129 2130 if (np->in_space > 0) 2131 np->in_space--; 2132 2133 /* Setup the NAT table */ 2134 nat->nat_inip = fin->fin_src; 2135 nat->nat_outip.s_addr = htonl(in.s_addr); 2136 nat->nat_oip = fin->fin_dst; 2137 if (nat->nat_hm == NULL) 2138 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2139 nat->nat_outip, 0, ifs); 2140 2141 if (flags & IPN_TCPUDP) { 2142 nat->nat_inport = sport; 2143 nat->nat_outport = port; /* sport */ 2144 nat->nat_oport = dport; 2145 ((tcphdr_t *)fin->fin_dp)->th_sport = port; 2146 } else if (flags & IPN_ICMPQUERY) { 2147 ((icmphdr_t *)fin->fin_dp)->icmp_id = port; 2148 nat->nat_inport = port; 2149 nat->nat_outport = port; 2150 } 2151 2152 ni->nai_ip.s_addr = in.s_addr; 2153 ni->nai_port = port; 2154 ni->nai_nport = dport; 2155 return 0; 2156 } 2157 2158 2159 /* ------------------------------------------------------------------------ */ 2160 /* Function: nat_newrdr */ 2161 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ 2162 /* allow rule to be moved if IPN_ROUNDR is set. */ 2163 /* Parameters: fin(I) - pointer to packet information */ 2164 /* nat(I) - pointer to NAT entry */ 2165 /* ni(I) - pointer to structure with misc. information needed */ 2166 /* to create new NAT entry. */ 2167 /* */ 2168 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ 2169 /* to the new IP address for the translation. */ 2170 /* ------------------------------------------------------------------------ */ 2171 static INLINE int nat_newrdr(fin, nat, ni) 2172 fr_info_t *fin; 2173 nat_t *nat; 2174 natinfo_t *ni; 2175 { 2176 u_short nport, dport, sport; 2177 struct in_addr in, inb; 2178 u_short sp, dp; 2179 hostmap_t *hm; 2180 u_32_t flags; 2181 ipnat_t *np; 2182 nat_t *natl; 2183 int move; 2184 ipf_stack_t *ifs = fin->fin_ifs; 2185 2186 move = 1; 2187 hm = NULL; 2188 in.s_addr = 0; 2189 np = ni->nai_np; 2190 flags = ni->nai_flags; 2191 sport = ni->nai_sport; 2192 dport = ni->nai_dport; 2193 2194 /* 2195 * If the matching rule has IPN_STICKY set, then we want to have the 2196 * same rule kick in as before. Why would this happen? If you have 2197 * a collection of rdr rules with "round-robin sticky", the current 2198 * packet might match a different one to the previous connection but 2199 * we want the same destination to be used. 2200 */ 2201 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == 2202 (IPN_ROUNDR|IPN_STICKY)) { 2203 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, 2204 (u_32_t)dport, ifs); 2205 if (hm != NULL) { 2206 in.s_addr = ntohl(hm->hm_mapip.s_addr); 2207 np = hm->hm_ipnat; 2208 ni->nai_np = np; 2209 move = 0; 2210 } 2211 } 2212 2213 /* 2214 * Otherwise, it's an inbound packet. Most likely, we don't 2215 * want to rewrite source ports and source addresses. Instead, 2216 * we want to rewrite to a fixed internal address and fixed 2217 * internal port. 2218 */ 2219 if (np->in_flags & IPN_SPLIT) { 2220 in.s_addr = np->in_nip; 2221 2222 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { 2223 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, 2224 in, (u_32_t)dport, ifs); 2225 if (hm != NULL) { 2226 in.s_addr = hm->hm_mapip.s_addr; 2227 move = 0; 2228 } 2229 } 2230 2231 if (hm == NULL || hm->hm_ref == 1) { 2232 if (np->in_inip == htonl(in.s_addr)) { 2233 np->in_nip = ntohl(np->in_inmsk); 2234 move = 0; 2235 } else { 2236 np->in_nip = ntohl(np->in_inip); 2237 } 2238 } 2239 2240 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { 2241 /* 2242 * 0/32 - use the interface's IP address. 2243 */ 2244 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL, 2245 fin->fin_ifs) == -1) 2246 return -1; 2247 in.s_addr = ntohl(in.s_addr); 2248 2249 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { 2250 /* 2251 * 0/0 - use the original destination address/port. 2252 */ 2253 in.s_addr = ntohl(fin->fin_daddr); 2254 2255 } else if (np->in_redir == NAT_BIMAP && 2256 np->in_inmsk == np->in_outmsk) { 2257 /* 2258 * map the address block in a 1:1 fashion 2259 */ 2260 in.s_addr = np->in_inip; 2261 in.s_addr |= fin->fin_daddr & ~np->in_inmsk; 2262 in.s_addr = ntohl(in.s_addr); 2263 } else { 2264 in.s_addr = ntohl(np->in_inip); 2265 } 2266 2267 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) 2268 nport = dport; 2269 else { 2270 /* 2271 * Whilst not optimized for the case where 2272 * pmin == pmax, the gain is not significant. 2273 */ 2274 if (((np->in_flags & IPN_FIXEDDPORT) == 0) && 2275 (np->in_pmin != np->in_pmax)) { 2276 nport = ntohs(dport) - ntohs(np->in_pmin) + 2277 ntohs(np->in_pnext); 2278 nport = htons(nport); 2279 } else 2280 nport = np->in_pnext; 2281 } 2282 2283 /* 2284 * When the redirect-to address is set to 0.0.0.0, just 2285 * assume a blank `forwarding' of the packet. We don't 2286 * setup any translation for this either. 2287 */ 2288 if (in.s_addr == 0) { 2289 if (nport == dport) 2290 return -1; 2291 in.s_addr = ntohl(fin->fin_daddr); 2292 } 2293 2294 /* 2295 * Check to see if this redirect mapping already exists and if 2296 * it does, return "failure" (allowing it to be created will just 2297 * cause one or both of these "connections" to stop working.) 2298 */ 2299 inb.s_addr = htonl(in.s_addr); 2300 sp = fin->fin_data[0]; 2301 dp = fin->fin_data[1]; 2302 fin->fin_data[1] = fin->fin_data[0]; 2303 fin->fin_data[0] = ntohs(nport); 2304 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), 2305 (u_int)fin->fin_p, inb, fin->fin_src); 2306 fin->fin_data[0] = sp; 2307 fin->fin_data[1] = dp; 2308 if (natl != NULL) 2309 return (-1); 2310 2311 nat->nat_inip.s_addr = htonl(in.s_addr); 2312 nat->nat_outip = fin->fin_dst; 2313 nat->nat_oip = fin->fin_src; 2314 2315 ni->nai_ip.s_addr = in.s_addr; 2316 ni->nai_nport = nport; 2317 ni->nai_port = sport; 2318 2319 if (flags & IPN_TCPUDP) { 2320 nat->nat_inport = nport; 2321 nat->nat_outport = dport; 2322 nat->nat_oport = sport; 2323 ((tcphdr_t *)fin->fin_dp)->th_dport = nport; 2324 } else if (flags & IPN_ICMPQUERY) { 2325 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; 2326 nat->nat_inport = nport; 2327 nat->nat_outport = nport; 2328 } 2329 2330 return move; 2331 } 2332 2333 /* ------------------------------------------------------------------------ */ 2334 /* Function: nat_new */ 2335 /* Returns: nat_t* - NULL == failure to create new NAT structure, */ 2336 /* else pointer to new NAT structure */ 2337 /* Parameters: fin(I) - pointer to packet information */ 2338 /* np(I) - pointer to NAT rule */ 2339 /* natsave(I) - pointer to where to store NAT struct pointer */ 2340 /* flags(I) - flags describing the current packet */ 2341 /* direction(I) - direction of packet (in/out) */ 2342 /* Write Lock: ipf_nat */ 2343 /* */ 2344 /* Attempts to create a new NAT entry. Does not actually change the packet */ 2345 /* in any way. */ 2346 /* */ 2347 /* This fucntion is in three main parts: (1) deal with creating a new NAT */ 2348 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ 2349 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ 2350 /* and (3) building that structure and putting it into the NAT table(s). */ 2351 /* ------------------------------------------------------------------------ */ 2352 nat_t *nat_new(fin, np, natsave, flags, direction) 2353 fr_info_t *fin; 2354 ipnat_t *np; 2355 nat_t **natsave; 2356 u_int flags; 2357 int direction; 2358 { 2359 tcphdr_t *tcp = NULL; 2360 hostmap_t *hm = NULL; 2361 nat_t *nat, *natl; 2362 u_int nflags; 2363 natinfo_t ni; 2364 int move; 2365 ipf_stack_t *ifs = fin->fin_ifs; 2366 2367 /* 2368 * Trigger automatic call to nat_extraflush() if the 2369 * table has reached capcity specified by hi watermark. 2370 */ 2371 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi) 2372 ifs->ifs_nat_doflush = 1; 2373 2374 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) { 2375 ifs->ifs_nat_stats.ns_memfail++; 2376 return NULL; 2377 } 2378 2379 move = 1; 2380 nflags = np->in_flags & flags; 2381 nflags &= NAT_FROMRULE; 2382 2383 ni.nai_np = np; 2384 ni.nai_nflags = nflags; 2385 ni.nai_flags = flags; 2386 2387 /* Give me a new nat */ 2388 KMALLOC(nat, nat_t *); 2389 if (nat == NULL) { 2390 ifs->ifs_nat_stats.ns_memfail++; 2391 /* 2392 * Try to automatically tune the max # of entries in the 2393 * table allowed to be less than what will cause kmem_alloc() 2394 * to fail and try to eliminate panics due to out of memory 2395 * conditions arising. 2396 */ 2397 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) { 2398 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100; 2399 printf("ipf_nattable_max reduced to %d\n", 2400 ifs->ifs_ipf_nattable_max); 2401 } 2402 return NULL; 2403 } 2404 2405 if (flags & IPN_TCPUDP) { 2406 tcp = fin->fin_dp; 2407 ni.nai_sport = htons(fin->fin_sport); 2408 ni.nai_dport = htons(fin->fin_dport); 2409 } else if (flags & IPN_ICMPQUERY) { 2410 /* 2411 * In the ICMP query NAT code, we translate the ICMP id fields 2412 * to make them unique. This is indepedent of the ICMP type 2413 * (e.g. in the unlikely event that a host sends an echo and 2414 * an tstamp request with the same id, both packets will have 2415 * their ip address/id field changed in the same way). 2416 */ 2417 /* The icmp_id field is used by the sender to identify the 2418 * process making the icmp request. (the receiver justs 2419 * copies it back in its response). So, it closely matches 2420 * the concept of source port. We overlay sport, so we can 2421 * maximally reuse the existing code. 2422 */ 2423 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; 2424 ni.nai_dport = ni.nai_sport; 2425 } 2426 2427 bzero((char *)nat, sizeof(*nat)); 2428 nat->nat_flags = flags; 2429 nat->nat_redir = np->in_redir; 2430 2431 if ((flags & NAT_SLAVE) == 0) { 2432 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 2433 } 2434 2435 /* 2436 * Search the current table for a match. 2437 */ 2438 if (direction == NAT_OUTBOUND) { 2439 /* 2440 * We can now arrange to call this for the same connection 2441 * because ipf_nat_new doesn't protect the code path into 2442 * this function. 2443 */ 2444 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, 2445 fin->fin_src, fin->fin_dst); 2446 if (natl != NULL) { 2447 KFREE(nat); 2448 nat = natl; 2449 goto done; 2450 } 2451 2452 move = nat_newmap(fin, nat, &ni); 2453 if (move == -1) 2454 goto badnat; 2455 2456 np = ni.nai_np; 2457 } else { 2458 /* 2459 * NAT_INBOUND is used only for redirects rules 2460 */ 2461 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, 2462 fin->fin_src, fin->fin_dst); 2463 if (natl != NULL) { 2464 KFREE(nat); 2465 nat = natl; 2466 goto done; 2467 } 2468 2469 move = nat_newrdr(fin, nat, &ni); 2470 if (move == -1) 2471 goto badnat; 2472 2473 np = ni.nai_np; 2474 } 2475 2476 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { 2477 if (np->in_redir == NAT_REDIRECT) { 2478 nat_delrdr(np); 2479 nat_addrdr(np, ifs); 2480 } else if (np->in_redir == NAT_MAP) { 2481 nat_delnat(np); 2482 nat_addnat(np, ifs); 2483 } 2484 } 2485 2486 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { 2487 goto badnat; 2488 } 2489 2490 nat_calc_chksum_diffs(nat); 2491 2492 if (flags & SI_WILDP) 2493 ifs->ifs_nat_stats.ns_wilds++; 2494 goto done; 2495 badnat: 2496 ifs->ifs_nat_stats.ns_badnat++; 2497 if ((hm = nat->nat_hm) != NULL) 2498 fr_hostmapdel(&hm); 2499 KFREE(nat); 2500 nat = NULL; 2501 done: 2502 if ((flags & NAT_SLAVE) == 0) { 2503 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 2504 } 2505 return nat; 2506 } 2507 2508 2509 /* ------------------------------------------------------------------------ */ 2510 /* Function: nat_finalise */ 2511 /* Returns: int - 0 == sucess, -1 == failure */ 2512 /* Parameters: fin(I) - pointer to packet information */ 2513 /* nat(I) - pointer to NAT entry */ 2514 /* ni(I) - pointer to structure with misc. information needed */ 2515 /* to create new NAT entry. */ 2516 /* Write Lock: ipf_nat */ 2517 /* */ 2518 /* This is the tail end of constructing a new NAT entry and is the same */ 2519 /* for both IPv4 and IPv6. */ 2520 /* ------------------------------------------------------------------------ */ 2521 /*ARGSUSED*/ 2522 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) 2523 fr_info_t *fin; 2524 nat_t *nat; 2525 natinfo_t *ni; 2526 tcphdr_t *tcp; 2527 nat_t **natsave; 2528 int direction; 2529 { 2530 frentry_t *fr; 2531 ipnat_t *np; 2532 ipf_stack_t *ifs = fin->fin_ifs; 2533 2534 np = ni->nai_np; 2535 2536 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v); 2537 2538 #ifdef IPFILTER_SYNC 2539 if ((nat->nat_flags & SI_CLONE) == 0) 2540 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); 2541 #endif 2542 2543 nat->nat_me = natsave; 2544 nat->nat_dir = direction; 2545 nat->nat_ifps[0] = np->in_ifps[0]; 2546 nat->nat_ifps[1] = np->in_ifps[1]; 2547 nat->nat_ptr = np; 2548 nat->nat_p = fin->fin_p; 2549 nat->nat_mssclamp = np->in_mssclamp; 2550 fr = fin->fin_fr; 2551 nat->nat_fr = fr; 2552 2553 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) 2554 if (appr_new(fin, nat) == -1) 2555 return -1; 2556 2557 if (nat_insert(nat, fin->fin_rev, ifs) == 0) { 2558 if (ifs->ifs_nat_logging) 2559 nat_log(nat, (u_int)np->in_redir, ifs); 2560 np->in_use++; 2561 if (fr != NULL) { 2562 MUTEX_ENTER(&fr->fr_lock); 2563 fr->fr_ref++; 2564 MUTEX_EXIT(&fr->fr_lock); 2565 } 2566 return 0; 2567 } 2568 2569 /* 2570 * nat_insert failed, so cleanup time... 2571 */ 2572 return -1; 2573 } 2574 2575 2576 /* ------------------------------------------------------------------------ */ 2577 /* Function: nat_insert */ 2578 /* Returns: int - 0 == sucess, -1 == failure */ 2579 /* Parameters: nat(I) - pointer to NAT structure */ 2580 /* rev(I) - flag indicating forward/reverse direction of packet */ 2581 /* Write Lock: ipf_nat */ 2582 /* */ 2583 /* Insert a NAT entry into the hash tables for searching and add it to the */ 2584 /* list of active NAT entries. Adjust global counters when complete. */ 2585 /* ------------------------------------------------------------------------ */ 2586 int nat_insert(nat, rev, ifs) 2587 nat_t *nat; 2588 int rev; 2589 ipf_stack_t *ifs; 2590 { 2591 u_int hv1, hv2; 2592 nat_t **natp; 2593 2594 /* 2595 * Try and return an error as early as possible, so calculate the hash 2596 * entry numbers first and then proceed. 2597 */ 2598 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { 2599 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 2600 0xffffffff); 2601 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, 2602 ifs->ifs_ipf_nattable_sz); 2603 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 2604 0xffffffff); 2605 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, 2606 ifs->ifs_ipf_nattable_sz); 2607 } else { 2608 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); 2609 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, 2610 ifs->ifs_ipf_nattable_sz); 2611 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); 2612 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, 2613 ifs->ifs_ipf_nattable_sz); 2614 } 2615 2616 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket || 2617 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) { 2618 return -1; 2619 } 2620 2621 nat->nat_hv[0] = hv1; 2622 nat->nat_hv[1] = hv2; 2623 2624 MUTEX_INIT(&nat->nat_lock, "nat entry lock"); 2625 2626 nat->nat_rev = rev; 2627 nat->nat_ref = 1; 2628 nat->nat_bytes[0] = 0; 2629 nat->nat_pkts[0] = 0; 2630 nat->nat_bytes[1] = 0; 2631 nat->nat_pkts[1] = 0; 2632 2633 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; 2634 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 2635 2636 if (nat->nat_ifnames[1][0] !='\0') { 2637 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2638 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 2639 } else { 2640 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], 2641 LIFNAMSIZ); 2642 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; 2643 nat->nat_ifps[1] = nat->nat_ifps[0]; 2644 } 2645 2646 nat->nat_next = ifs->ifs_nat_instances; 2647 nat->nat_pnext = &ifs->ifs_nat_instances; 2648 if (ifs->ifs_nat_instances) 2649 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next; 2650 ifs->ifs_nat_instances = nat; 2651 2652 natp = &ifs->ifs_nat_table[0][hv1]; 2653 if (*natp) 2654 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 2655 nat->nat_phnext[0] = natp; 2656 nat->nat_hnext[0] = *natp; 2657 *natp = nat; 2658 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++; 2659 2660 natp = &ifs->ifs_nat_table[1][hv2]; 2661 if (*natp) 2662 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 2663 nat->nat_phnext[1] = natp; 2664 nat->nat_hnext[1] = *natp; 2665 *natp = nat; 2666 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++; 2667 2668 fr_setnatqueue(nat, rev, ifs); 2669 2670 ifs->ifs_nat_stats.ns_added++; 2671 ifs->ifs_nat_stats.ns_inuse++; 2672 return 0; 2673 } 2674 2675 2676 /* ------------------------------------------------------------------------ */ 2677 /* Function: nat_icmperrorlookup */ 2678 /* Returns: nat_t* - point to matching NAT structure */ 2679 /* Parameters: fin(I) - pointer to packet information */ 2680 /* dir(I) - direction of packet (in/out) */ 2681 /* */ 2682 /* Check if the ICMP error message is related to an existing TCP, UDP or */ 2683 /* ICMP query nat entry. It is assumed that the packet is already of the */ 2684 /* the required length. */ 2685 /* ------------------------------------------------------------------------ */ 2686 nat_t *nat_icmperrorlookup(fin, dir) 2687 fr_info_t *fin; 2688 int dir; 2689 { 2690 int flags = 0, minlen; 2691 icmphdr_t *orgicmp; 2692 tcphdr_t *tcp = NULL; 2693 u_short data[2]; 2694 nat_t *nat; 2695 ip_t *oip; 2696 u_int p; 2697 2698 /* 2699 * Does it at least have the return (basic) IP header ? 2700 * Only a basic IP header (no options) should be with an ICMP error 2701 * header. Also, if it's not an error type, then return. 2702 */ 2703 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) 2704 return NULL; 2705 2706 /* 2707 * Check packet size 2708 */ 2709 oip = (ip_t *)((char *)fin->fin_dp + 8); 2710 minlen = IP_HL(oip) << 2; 2711 if ((minlen < sizeof(ip_t)) || 2712 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) 2713 return NULL; 2714 /* 2715 * Is the buffer big enough for all of it ? It's the size of the IP 2716 * header claimed in the encapsulated part which is of concern. It 2717 * may be too big to be in this buffer but not so big that it's 2718 * outside the ICMP packet, leading to TCP deref's causing problems. 2719 * This is possible because we don't know how big oip_hl is when we 2720 * do the pullup early in fr_check() and thus can't gaurantee it is 2721 * all here now. 2722 */ 2723 #ifdef _KERNEL 2724 { 2725 mb_t *m; 2726 2727 m = fin->fin_m; 2728 # if defined(MENTAT) 2729 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) 2730 return NULL; 2731 # else 2732 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > 2733 (char *)fin->fin_ip + M_LEN(m)) 2734 return NULL; 2735 # endif 2736 } 2737 #endif 2738 2739 if (fin->fin_daddr != oip->ip_src.s_addr) 2740 return NULL; 2741 2742 p = oip->ip_p; 2743 if (p == IPPROTO_TCP) 2744 flags = IPN_TCP; 2745 else if (p == IPPROTO_UDP) 2746 flags = IPN_UDP; 2747 else if (p == IPPROTO_ICMP) { 2748 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2749 2750 /* see if this is related to an ICMP query */ 2751 if (nat_icmpquerytype4(orgicmp->icmp_type)) { 2752 data[0] = fin->fin_data[0]; 2753 data[1] = fin->fin_data[1]; 2754 fin->fin_data[0] = 0; 2755 fin->fin_data[1] = orgicmp->icmp_id; 2756 2757 flags = IPN_ICMPERR|IPN_ICMPQUERY; 2758 /* 2759 * NOTE : dir refers to the direction of the original 2760 * ip packet. By definition the icmp error 2761 * message flows in the opposite direction. 2762 */ 2763 if (dir == NAT_INBOUND) 2764 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2765 oip->ip_src); 2766 else 2767 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2768 oip->ip_src); 2769 fin->fin_data[0] = data[0]; 2770 fin->fin_data[1] = data[1]; 2771 return nat; 2772 } 2773 } 2774 2775 if (flags & IPN_TCPUDP) { 2776 minlen += 8; /* + 64bits of data to get ports */ 2777 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) 2778 return NULL; 2779 2780 data[0] = fin->fin_data[0]; 2781 data[1] = fin->fin_data[1]; 2782 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2783 fin->fin_data[0] = ntohs(tcp->th_dport); 2784 fin->fin_data[1] = ntohs(tcp->th_sport); 2785 2786 if (dir == NAT_INBOUND) { 2787 nat = nat_inlookup(fin, flags, p, oip->ip_dst, 2788 oip->ip_src); 2789 } else { 2790 nat = nat_outlookup(fin, flags, p, oip->ip_dst, 2791 oip->ip_src); 2792 } 2793 fin->fin_data[0] = data[0]; 2794 fin->fin_data[1] = data[1]; 2795 return nat; 2796 } 2797 if (dir == NAT_INBOUND) 2798 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2799 else 2800 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); 2801 } 2802 2803 2804 /* ------------------------------------------------------------------------ */ 2805 /* Function: nat_icmperror */ 2806 /* Returns: nat_t* - point to matching NAT structure */ 2807 /* Parameters: fin(I) - pointer to packet information */ 2808 /* nflags(I) - NAT flags for this packet */ 2809 /* dir(I) - direction of packet (in/out) */ 2810 /* */ 2811 /* Fix up an ICMP packet which is an error message for an existing NAT */ 2812 /* session. This will correct both packet header data and checksums. */ 2813 /* */ 2814 /* This should *ONLY* be used for incoming ICMP error packets to make sure */ 2815 /* a NAT'd ICMP packet gets correctly recognised. */ 2816 /* ------------------------------------------------------------------------ */ 2817 nat_t *nat_icmperror(fin, nflags, dir) 2818 fr_info_t *fin; 2819 u_int *nflags; 2820 int dir; 2821 { 2822 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2; 2823 struct in_addr in; 2824 icmphdr_t *icmp, *orgicmp; 2825 int dlen; 2826 udphdr_t *udp; 2827 tcphdr_t *tcp; 2828 nat_t *nat; 2829 ip_t *oip; 2830 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) 2831 return NULL; 2832 2833 /* 2834 * nat_icmperrorlookup() looks up nat entry associated with the 2835 * offending IP packet and returns pointer to the entry, or NULL 2836 * if packet wasn't natted or for `defective' packets. 2837 */ 2838 2839 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) 2840 return NULL; 2841 2842 sumd2 = 0; 2843 *nflags = IPN_ICMPERR; 2844 icmp = fin->fin_dp; 2845 oip = (ip_t *)&icmp->icmp_ip; 2846 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2))); 2847 tcp = (tcphdr_t *)udp; 2848 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip); 2849 2850 /* 2851 * Need to adjust ICMP header to include the real IP#'s and 2852 * port #'s. There are three steps required. 2853 * 2854 * Step 1 2855 * Fix the IP addresses in the offending IP packet and update 2856 * ip header checksum to compensate for the change. 2857 * 2858 * No update needed here for icmp_cksum because the ICMP checksum 2859 * is calculated over the complete ICMP packet, which includes the 2860 * changed oip IP addresses and oip->ip_sum. These two changes 2861 * cancel each other out (if the delta for the IP address is x, 2862 * then the delta for ip_sum is minus x). 2863 */ 2864 2865 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { 2866 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); 2867 in = nat->nat_inip; 2868 oip->ip_src = in; 2869 } else { 2870 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); 2871 in = nat->nat_outip; 2872 oip->ip_dst = in; 2873 } 2874 2875 sum2 = LONG_SUM(ntohl(in.s_addr)); 2876 CALC_SUMD(sum1, sum2, sumd); 2877 fix_datacksum(&oip->ip_sum, sumd); 2878 2879 /* 2880 * Step 2 2881 * Perform other adjustments based on protocol of offending packet. 2882 */ 2883 2884 switch (oip->ip_p) { 2885 case IPPROTO_TCP : 2886 case IPPROTO_UDP : 2887 2888 /* 2889 * For offending TCP/UDP IP packets, translate the ports 2890 * based on the NAT specification. 2891 * 2892 * Advance notice : Now it becomes complicated :-) 2893 * 2894 * Since the port and IP addresse fields are both part 2895 * of the TCP/UDP checksum of the offending IP packet, 2896 * we need to adjust that checksum as well. 2897 * 2898 * To further complicate things, the TCP/UDP checksum 2899 * may not be present. We must check to see if the 2900 * length of the data portion is big enough to hold 2901 * the checksum. In the UDP case, a test to determine 2902 * if the checksum is even set is also required. 2903 * 2904 * Any changes to an IP address, port or checksum within 2905 * the ICMP packet requires a change to icmp_cksum. 2906 * 2907 * Be extremely careful here ... The change is dependent 2908 * upon whether or not the TCP/UPD checksum is present. 2909 * 2910 * If TCP/UPD checksum is present, the icmp_cksum must 2911 * compensate for checksum modification resulting from 2912 * IP address change only. Port change and resulting 2913 * data checksum adjustments cancel each other out. 2914 * 2915 * If TCP/UDP checksum is not present, icmp_cksum must 2916 * compensate for port change only. The IP address 2917 * change does not modify anything else in this case. 2918 */ 2919 2920 psum1 = 0; 2921 psum2 = 0; 2922 psumd = 0; 2923 2924 if ((tcp->th_dport == nat->nat_oport) && 2925 (tcp->th_sport != nat->nat_inport)) { 2926 2927 /* 2928 * Translate the source port. 2929 */ 2930 2931 psum1 = ntohs(tcp->th_sport); 2932 psum2 = ntohs(nat->nat_inport); 2933 tcp->th_sport = nat->nat_inport; 2934 2935 } else if ((tcp->th_sport == nat->nat_oport) && 2936 (tcp->th_dport != nat->nat_outport)) { 2937 2938 /* 2939 * Translate the destination port. 2940 */ 2941 2942 psum1 = ntohs(tcp->th_dport); 2943 psum2 = ntohs(nat->nat_outport); 2944 tcp->th_dport = nat->nat_outport; 2945 } 2946 2947 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { 2948 2949 /* 2950 * TCP checksum present. 2951 * 2952 * Adjust data checksum and icmp checksum to 2953 * compensate for any IP address change. 2954 */ 2955 2956 sum1 = ntohs(tcp->th_sum); 2957 fix_datacksum(&tcp->th_sum, sumd); 2958 sum2 = ntohs(tcp->th_sum); 2959 sumd2 = sumd << 1; 2960 CALC_SUMD(sum1, sum2, sumd); 2961 sumd2 += sumd; 2962 2963 /* 2964 * Also make data checksum adjustment to 2965 * compensate for any port change. 2966 */ 2967 2968 if (psum1 != psum2) { 2969 CALC_SUMD(psum1, psum2, psumd); 2970 fix_datacksum(&tcp->th_sum, psumd); 2971 } 2972 2973 } else if ((oip->ip_p == IPPROTO_UDP) && 2974 (dlen >= 8) && (udp->uh_sum != 0)) { 2975 2976 /* 2977 * The UDP checksum is present and set. 2978 * 2979 * Adjust data checksum and icmp checksum to 2980 * compensate for any IP address change. 2981 */ 2982 2983 sum1 = ntohs(udp->uh_sum); 2984 fix_datacksum(&udp->uh_sum, sumd); 2985 sum2 = ntohs(udp->uh_sum); 2986 sumd2 = sumd << 1; 2987 CALC_SUMD(sum1, sum2, sumd); 2988 sumd2 += sumd; 2989 2990 /* 2991 * Also make data checksum adjustment to 2992 * compensate for any port change. 2993 */ 2994 2995 if (psum1 != psum2) { 2996 CALC_SUMD(psum1, psum2, psumd); 2997 fix_datacksum(&udp->uh_sum, psumd); 2998 } 2999 3000 } else { 3001 3002 /* 3003 * Data checksum was not present. 3004 * 3005 * Compensate for any port change. 3006 */ 3007 3008 CALC_SUMD(psum2, psum1, psumd); 3009 sumd2 += psumd; 3010 } 3011 break; 3012 3013 case IPPROTO_ICMP : 3014 3015 orgicmp = (icmphdr_t *)udp; 3016 3017 if ((nat->nat_dir == NAT_OUTBOUND) && 3018 (orgicmp->icmp_id != nat->nat_inport) && 3019 (dlen >= 8)) { 3020 3021 /* 3022 * Fix ICMP checksum (of the offening ICMP 3023 * query packet) to compensate the change 3024 * in the ICMP id of the offending ICMP 3025 * packet. 3026 * 3027 * Since you modify orgicmp->icmp_id with 3028 * a delta (say x) and you compensate that 3029 * in origicmp->icmp_cksum with a delta 3030 * minus x, you don't have to adjust the 3031 * overall icmp->icmp_cksum 3032 */ 3033 3034 sum1 = ntohs(orgicmp->icmp_id); 3035 sum2 = ntohs(nat->nat_inport); 3036 CALC_SUMD(sum1, sum2, sumd); 3037 orgicmp->icmp_id = nat->nat_inport; 3038 fix_datacksum(&orgicmp->icmp_cksum, sumd); 3039 3040 } /* nat_dir can't be NAT_INBOUND for icmp queries */ 3041 3042 break; 3043 3044 default : 3045 3046 break; 3047 3048 } /* switch (oip->ip_p) */ 3049 3050 /* 3051 * Step 3 3052 * Make the adjustments to icmp checksum. 3053 */ 3054 3055 if (sumd2 != 0) { 3056 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3057 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); 3058 fix_incksum(&icmp->icmp_cksum, sumd2); 3059 } 3060 return nat; 3061 } 3062 3063 3064 /* 3065 * NB: these lookups don't lock access to the list, it assumed that it has 3066 * already been done! 3067 */ 3068 3069 /* ------------------------------------------------------------------------ */ 3070 /* Function: nat_inlookup */ 3071 /* Returns: nat_t* - NULL == no match, */ 3072 /* else pointer to matching NAT entry */ 3073 /* Parameters: fin(I) - pointer to packet information */ 3074 /* flags(I) - NAT flags for this packet */ 3075 /* p(I) - protocol for this packet */ 3076 /* src(I) - source IP address */ 3077 /* mapdst(I) - destination IP address */ 3078 /* */ 3079 /* Lookup a nat entry based on the mapped destination ip address/port and */ 3080 /* real source address/port. We use this lookup when receiving a packet, */ 3081 /* we're looking for a table entry, based on the destination address. */ 3082 /* */ 3083 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3084 /* */ 3085 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3086 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3087 /* */ 3088 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3089 /* the packet is of said protocol */ 3090 /* ------------------------------------------------------------------------ */ 3091 nat_t *nat_inlookup(fin, flags, p, src, mapdst) 3092 fr_info_t *fin; 3093 u_int flags, p; 3094 struct in_addr src , mapdst; 3095 { 3096 u_short sport, dport; 3097 ipnat_t *ipn; 3098 u_int sflags; 3099 nat_t *nat; 3100 int nflags; 3101 u_32_t dst; 3102 void *ifp; 3103 u_int hv; 3104 ipf_stack_t *ifs = fin->fin_ifs; 3105 3106 if (fin != NULL) 3107 ifp = fin->fin_ifp; 3108 else 3109 ifp = NULL; 3110 sport = 0; 3111 dport = 0; 3112 dst = mapdst.s_addr; 3113 sflags = flags & NAT_TCPUDPICMP; 3114 3115 switch (p) 3116 { 3117 case IPPROTO_TCP : 3118 case IPPROTO_UDP : 3119 sport = htons(fin->fin_data[0]); 3120 dport = htons(fin->fin_data[1]); 3121 break; 3122 case IPPROTO_ICMP : 3123 if (flags & IPN_ICMPERR) 3124 sport = fin->fin_data[1]; 3125 else 3126 dport = fin->fin_data[1]; 3127 break; 3128 default : 3129 break; 3130 } 3131 3132 3133 if ((flags & SI_WILDP) != 0) 3134 goto find_in_wild_ports; 3135 3136 hv = NAT_HASH_FN(dst, dport, 0xffffffff); 3137 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz); 3138 nat = ifs->ifs_nat_table[1][hv]; 3139 for (; nat; nat = nat->nat_hnext[1]) { 3140 if (nat->nat_ifps[0] != NULL) { 3141 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3142 continue; 3143 } else if (ifp != NULL) 3144 nat->nat_ifps[0] = ifp; 3145 3146 nflags = nat->nat_flags; 3147 3148 if (nat->nat_oip.s_addr == src.s_addr && 3149 nat->nat_outip.s_addr == dst && 3150 (((p == 0) && 3151 (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) 3152 || (p == nat->nat_p))) { 3153 switch (p) 3154 { 3155 #if 0 3156 case IPPROTO_GRE : 3157 if (nat->nat_call[1] != fin->fin_data[0]) 3158 continue; 3159 break; 3160 #endif 3161 case IPPROTO_ICMP : 3162 if ((flags & IPN_ICMPERR) != 0) { 3163 if (nat->nat_outport != sport) 3164 continue; 3165 } else { 3166 if (nat->nat_outport != dport) 3167 continue; 3168 } 3169 break; 3170 case IPPROTO_TCP : 3171 case IPPROTO_UDP : 3172 if (nat->nat_oport != sport) 3173 continue; 3174 if (nat->nat_outport != dport) 3175 continue; 3176 break; 3177 default : 3178 break; 3179 } 3180 3181 ipn = nat->nat_ptr; 3182 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3183 if (appr_match(fin, nat) != 0) 3184 continue; 3185 return nat; 3186 } 3187 } 3188 3189 /* 3190 * So if we didn't find it but there are wildcard members in the hash 3191 * table, go back and look for them. We do this search and update here 3192 * because it is modifying the NAT table and we want to do this only 3193 * for the first packet that matches. The exception, of course, is 3194 * for "dummy" (FI_IGNORE) lookups. 3195 */ 3196 find_in_wild_ports: 3197 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3198 return NULL; 3199 if (ifs->ifs_nat_stats.ns_wilds == 0) 3200 return NULL; 3201 3202 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3203 3204 hv = NAT_HASH_FN(dst, 0, 0xffffffff); 3205 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3206 3207 WRITE_ENTER(&ifs->ifs_ipf_nat); 3208 3209 nat = ifs->ifs_nat_table[1][hv]; 3210 for (; nat; nat = nat->nat_hnext[1]) { 3211 if (nat->nat_ifps[0] != NULL) { 3212 if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) 3213 continue; 3214 } else if (ifp != NULL) 3215 nat->nat_ifps[0] = ifp; 3216 3217 if (nat->nat_p != fin->fin_p) 3218 continue; 3219 if (nat->nat_oip.s_addr != src.s_addr || 3220 nat->nat_outip.s_addr != dst) 3221 continue; 3222 3223 nflags = nat->nat_flags; 3224 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3225 continue; 3226 3227 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3228 NAT_INBOUND) == 1) { 3229 if ((fin->fin_flx & FI_IGNORE) != 0) 3230 break; 3231 if ((nflags & SI_CLONE) != 0) { 3232 nat = fr_natclone(fin, nat); 3233 if (nat == NULL) 3234 break; 3235 } else { 3236 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3237 ifs->ifs_nat_stats.ns_wilds--; 3238 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3239 } 3240 nat->nat_oport = sport; 3241 nat->nat_outport = dport; 3242 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3243 nat_tabmove(nat, ifs); 3244 break; 3245 } 3246 } 3247 3248 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3249 3250 return nat; 3251 } 3252 3253 3254 /* ------------------------------------------------------------------------ */ 3255 /* Function: nat_tabmove */ 3256 /* Returns: Nil */ 3257 /* Parameters: nat(I) - pointer to NAT structure */ 3258 /* Write Lock: ipf_nat */ 3259 /* */ 3260 /* This function is only called for TCP/UDP NAT table entries where the */ 3261 /* original was placed in the table without hashing on the ports and we now */ 3262 /* want to include hashing on port numbers. */ 3263 /* ------------------------------------------------------------------------ */ 3264 static void nat_tabmove(nat, ifs) 3265 nat_t *nat; 3266 ipf_stack_t *ifs; 3267 { 3268 nat_t **natp; 3269 u_int hv; 3270 3271 if (nat->nat_flags & SI_CLONE) 3272 return; 3273 3274 /* 3275 * Remove the NAT entry from the old location 3276 */ 3277 if (nat->nat_hnext[0]) 3278 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; 3279 *nat->nat_phnext[0] = nat->nat_hnext[0]; 3280 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; 3281 3282 if (nat->nat_hnext[1]) 3283 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; 3284 *nat->nat_phnext[1] = nat->nat_hnext[1]; 3285 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; 3286 3287 /* 3288 * Add into the NAT table in the new position 3289 */ 3290 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); 3291 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3292 ifs->ifs_ipf_nattable_sz); 3293 nat->nat_hv[0] = hv; 3294 natp = &ifs->ifs_nat_table[0][hv]; 3295 if (*natp) 3296 (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; 3297 nat->nat_phnext[0] = natp; 3298 nat->nat_hnext[0] = *natp; 3299 *natp = nat; 3300 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++; 3301 3302 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); 3303 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, 3304 ifs->ifs_ipf_nattable_sz); 3305 nat->nat_hv[1] = hv; 3306 natp = &ifs->ifs_nat_table[1][hv]; 3307 if (*natp) 3308 (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; 3309 nat->nat_phnext[1] = natp; 3310 nat->nat_hnext[1] = *natp; 3311 *natp = nat; 3312 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++; 3313 } 3314 3315 3316 /* ------------------------------------------------------------------------ */ 3317 /* Function: nat_outlookup */ 3318 /* Returns: nat_t* - NULL == no match, */ 3319 /* else pointer to matching NAT entry */ 3320 /* Parameters: fin(I) - pointer to packet information */ 3321 /* flags(I) - NAT flags for this packet */ 3322 /* p(I) - protocol for this packet */ 3323 /* src(I) - source IP address */ 3324 /* dst(I) - destination IP address */ 3325 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ 3326 /* */ 3327 /* Lookup a nat entry based on the source 'real' ip address/port and */ 3328 /* destination address/port. We use this lookup when sending a packet out, */ 3329 /* we're looking for a table entry, based on the source address. */ 3330 /* */ 3331 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ 3332 /* */ 3333 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ 3334 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ 3335 /* */ 3336 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ 3337 /* the packet is of said protocol */ 3338 /* ------------------------------------------------------------------------ */ 3339 nat_t *nat_outlookup(fin, flags, p, src, dst) 3340 fr_info_t *fin; 3341 u_int flags, p; 3342 struct in_addr src , dst; 3343 { 3344 u_short sport, dport; 3345 u_int sflags; 3346 ipnat_t *ipn; 3347 u_32_t srcip; 3348 nat_t *nat; 3349 int nflags; 3350 void *ifp; 3351 u_int hv; 3352 ipf_stack_t *ifs = fin->fin_ifs; 3353 3354 ifp = fin->fin_ifp; 3355 3356 srcip = src.s_addr; 3357 sflags = flags & IPN_TCPUDPICMP; 3358 sport = 0; 3359 dport = 0; 3360 3361 switch (p) 3362 { 3363 case IPPROTO_TCP : 3364 case IPPROTO_UDP : 3365 sport = htons(fin->fin_data[0]); 3366 dport = htons(fin->fin_data[1]); 3367 break; 3368 case IPPROTO_ICMP : 3369 if (flags & IPN_ICMPERR) 3370 sport = fin->fin_data[1]; 3371 else 3372 dport = fin->fin_data[1]; 3373 break; 3374 default : 3375 break; 3376 } 3377 3378 if ((flags & SI_WILDP) != 0) 3379 goto find_out_wild_ports; 3380 3381 hv = NAT_HASH_FN(srcip, sport, 0xffffffff); 3382 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz); 3383 nat = ifs->ifs_nat_table[0][hv]; 3384 for (; nat; nat = nat->nat_hnext[0]) { 3385 if (nat->nat_ifps[1] != NULL) { 3386 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3387 continue; 3388 } else if (ifp != NULL) 3389 nat->nat_ifps[1] = ifp; 3390 3391 nflags = nat->nat_flags; 3392 3393 if (nat->nat_inip.s_addr == srcip && 3394 nat->nat_oip.s_addr == dst.s_addr && 3395 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) 3396 || (p == nat->nat_p))) { 3397 switch (p) 3398 { 3399 #if 0 3400 case IPPROTO_GRE : 3401 if (nat->nat_call[1] != fin->fin_data[0]) 3402 continue; 3403 break; 3404 #endif 3405 case IPPROTO_TCP : 3406 case IPPROTO_UDP : 3407 if (nat->nat_oport != dport) 3408 continue; 3409 if (nat->nat_inport != sport) 3410 continue; 3411 break; 3412 default : 3413 break; 3414 } 3415 3416 ipn = nat->nat_ptr; 3417 if ((ipn != NULL) && (nat->nat_aps != NULL)) 3418 if (appr_match(fin, nat) != 0) 3419 continue; 3420 return nat; 3421 } 3422 } 3423 3424 /* 3425 * So if we didn't find it but there are wildcard members in the hash 3426 * table, go back and look for them. We do this search and update here 3427 * because it is modifying the NAT table and we want to do this only 3428 * for the first packet that matches. The exception, of course, is 3429 * for "dummy" (FI_IGNORE) lookups. 3430 */ 3431 find_out_wild_ports: 3432 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) 3433 return NULL; 3434 if (ifs->ifs_nat_stats.ns_wilds == 0) 3435 return NULL; 3436 3437 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3438 3439 hv = NAT_HASH_FN(srcip, 0, 0xffffffff); 3440 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz); 3441 3442 WRITE_ENTER(&ifs->ifs_ipf_nat); 3443 3444 nat = ifs->ifs_nat_table[0][hv]; 3445 for (; nat; nat = nat->nat_hnext[0]) { 3446 if (nat->nat_ifps[1] != NULL) { 3447 if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) 3448 continue; 3449 } else if (ifp != NULL) 3450 nat->nat_ifps[1] = ifp; 3451 3452 if (nat->nat_p != fin->fin_p) 3453 continue; 3454 if ((nat->nat_inip.s_addr != srcip) || 3455 (nat->nat_oip.s_addr != dst.s_addr)) 3456 continue; 3457 3458 nflags = nat->nat_flags; 3459 if (!(nflags & (NAT_TCPUDP|SI_WILDP))) 3460 continue; 3461 3462 if (nat_wildok(nat, (int)sport, (int)dport, nflags, 3463 NAT_OUTBOUND) == 1) { 3464 if ((fin->fin_flx & FI_IGNORE) != 0) 3465 break; 3466 if ((nflags & SI_CLONE) != 0) { 3467 nat = fr_natclone(fin, nat); 3468 if (nat == NULL) 3469 break; 3470 } else { 3471 MUTEX_ENTER(&ifs->ifs_ipf_nat_new); 3472 ifs->ifs_nat_stats.ns_wilds--; 3473 MUTEX_EXIT(&ifs->ifs_ipf_nat_new); 3474 } 3475 nat->nat_inport = sport; 3476 nat->nat_oport = dport; 3477 if (nat->nat_outport == 0) 3478 nat->nat_outport = sport; 3479 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); 3480 nat_tabmove(nat, ifs); 3481 break; 3482 } 3483 } 3484 3485 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3486 3487 return nat; 3488 } 3489 3490 3491 /* ------------------------------------------------------------------------ */ 3492 /* Function: nat_lookupredir */ 3493 /* Returns: nat_t* - NULL == no match, */ 3494 /* else pointer to matching NAT entry */ 3495 /* Parameters: np(I) - pointer to description of packet to find NAT table */ 3496 /* entry for. */ 3497 /* */ 3498 /* Lookup the NAT tables to search for a matching redirect */ 3499 /* ------------------------------------------------------------------------ */ 3500 nat_t *nat_lookupredir(np, ifs) 3501 natlookup_t *np; 3502 ipf_stack_t *ifs; 3503 { 3504 fr_info_t fi; 3505 nat_t *nat; 3506 3507 bzero((char *)&fi, sizeof(fi)); 3508 if (np->nl_flags & IPN_IN) { 3509 fi.fin_data[0] = ntohs(np->nl_realport); 3510 fi.fin_data[1] = ntohs(np->nl_outport); 3511 } else { 3512 fi.fin_data[0] = ntohs(np->nl_inport); 3513 fi.fin_data[1] = ntohs(np->nl_outport); 3514 } 3515 if (np->nl_flags & IPN_TCP) 3516 fi.fin_p = IPPROTO_TCP; 3517 else if (np->nl_flags & IPN_UDP) 3518 fi.fin_p = IPPROTO_UDP; 3519 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) 3520 fi.fin_p = IPPROTO_ICMP; 3521 3522 fi.fin_ifs = ifs; 3523 /* 3524 * We can do two sorts of lookups: 3525 * - IPN_IN: we have the `real' and `out' address, look for `in'. 3526 * - default: we have the `in' and `out' address, look for `real'. 3527 */ 3528 if (np->nl_flags & IPN_IN) { 3529 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, 3530 np->nl_realip, np->nl_outip))) { 3531 np->nl_inip = nat->nat_inip; 3532 np->nl_inport = nat->nat_inport; 3533 } 3534 } else { 3535 /* 3536 * If nl_inip is non null, this is a lookup based on the real 3537 * ip address. Else, we use the fake. 3538 */ 3539 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, 3540 np->nl_inip, np->nl_outip))) { 3541 3542 if ((np->nl_flags & IPN_FINDFORWARD) != 0) { 3543 fr_info_t fin; 3544 bzero((char *)&fin, sizeof(fin)); 3545 fin.fin_p = nat->nat_p; 3546 fin.fin_data[0] = ntohs(nat->nat_outport); 3547 fin.fin_data[1] = ntohs(nat->nat_oport); 3548 fin.fin_ifs = ifs; 3549 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, 3550 nat->nat_outip, 3551 nat->nat_oip) != NULL) { 3552 np->nl_flags &= ~IPN_FINDFORWARD; 3553 } 3554 } 3555 3556 np->nl_realip = nat->nat_outip; 3557 np->nl_realport = nat->nat_outport; 3558 } 3559 } 3560 3561 return nat; 3562 } 3563 3564 3565 /* ------------------------------------------------------------------------ */ 3566 /* Function: nat_match */ 3567 /* Returns: int - 0 == no match, 1 == match */ 3568 /* Parameters: fin(I) - pointer to packet information */ 3569 /* np(I) - pointer to NAT rule */ 3570 /* */ 3571 /* Pull the matching of a packet against a NAT rule out of that complex */ 3572 /* loop inside fr_checknatin() and lay it out properly in its own function. */ 3573 /* ------------------------------------------------------------------------ */ 3574 static int nat_match(fin, np) 3575 fr_info_t *fin; 3576 ipnat_t *np; 3577 { 3578 frtuc_t *ft; 3579 3580 if (fin->fin_v != 4) 3581 return 0; 3582 3583 if (np->in_p && fin->fin_p != np->in_p) 3584 return 0; 3585 3586 if (fin->fin_out) { 3587 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) 3588 return 0; 3589 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) 3590 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3591 return 0; 3592 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) 3593 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3594 return 0; 3595 } else { 3596 if (!(np->in_redir & NAT_REDIRECT)) 3597 return 0; 3598 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) 3599 ^ ((np->in_flags & IPN_NOTSRC) != 0)) 3600 return 0; 3601 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) 3602 ^ ((np->in_flags & IPN_NOTDST) != 0)) 3603 return 0; 3604 } 3605 3606 ft = &np->in_tuc; 3607 if (!(fin->fin_flx & FI_TCPUDP) || 3608 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { 3609 if (ft->ftu_scmp || ft->ftu_dcmp) 3610 return 0; 3611 return 1; 3612 } 3613 3614 return fr_tcpudpchk(fin, ft); 3615 } 3616 3617 3618 /* ------------------------------------------------------------------------ */ 3619 /* Function: nat_update */ 3620 /* Returns: Nil */ 3621 /* Parameters: nat(I) - pointer to NAT structure */ 3622 /* np(I) - pointer to NAT rule */ 3623 /* */ 3624 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ 3625 /* called with fin_rev updated - i.e. after calling nat_proto(). */ 3626 /* ------------------------------------------------------------------------ */ 3627 void nat_update(fin, nat, np) 3628 fr_info_t *fin; 3629 nat_t *nat; 3630 ipnat_t *np; 3631 { 3632 ipftq_t *ifq, *ifq2; 3633 ipftqent_t *tqe; 3634 ipf_stack_t *ifs = fin->fin_ifs; 3635 3636 MUTEX_ENTER(&nat->nat_lock); 3637 tqe = &nat->nat_tqe; 3638 ifq = tqe->tqe_ifq; 3639 3640 /* 3641 * We allow over-riding of NAT timeouts from NAT rules, even for 3642 * TCP, however, if it is TCP and there is no rule timeout set, 3643 * then do not update the timeout here. 3644 */ 3645 if (np != NULL) 3646 ifq2 = np->in_tqehead[fin->fin_rev]; 3647 else 3648 ifq2 = NULL; 3649 3650 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { 3651 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0); 3652 } else { 3653 if (ifq2 == NULL) { 3654 if (nat->nat_p == IPPROTO_UDP) 3655 ifq2 = &ifs->ifs_nat_udptq; 3656 else if (nat->nat_p == IPPROTO_ICMP) 3657 ifq2 = &ifs->ifs_nat_icmptq; 3658 else 3659 ifq2 = &ifs->ifs_nat_iptq; 3660 } 3661 3662 fr_movequeue(tqe, ifq, ifq2, ifs); 3663 } 3664 MUTEX_EXIT(&nat->nat_lock); 3665 } 3666 3667 3668 /* ------------------------------------------------------------------------ */ 3669 /* Function: fr_checknatout */ 3670 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3671 /* 0 == no packet translation occurred, */ 3672 /* 1 == packet was successfully translated. */ 3673 /* Parameters: fin(I) - pointer to packet information */ 3674 /* passp(I) - pointer to filtering result flags */ 3675 /* */ 3676 /* Check to see if an outcoming packet should be changed. ICMP packets are */ 3677 /* first checked to see if they match an existing entry (if an error), */ 3678 /* otherwise a search of the current NAT table is made. If neither results */ 3679 /* in a match then a search for a matching NAT rule is made. Create a new */ 3680 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3681 /* packet header(s) as required. */ 3682 /* ------------------------------------------------------------------------ */ 3683 int fr_checknatout(fin, passp) 3684 fr_info_t *fin; 3685 u_32_t *passp; 3686 { 3687 struct ifnet *ifp, *sifp; 3688 icmphdr_t *icmp = NULL; 3689 tcphdr_t *tcp = NULL; 3690 int rval, natfailed; 3691 ipnat_t *np = NULL; 3692 u_int nflags = 0; 3693 u_32_t ipa, iph; 3694 int natadd = 1; 3695 frentry_t *fr; 3696 nat_t *nat; 3697 ipf_stack_t *ifs = fin->fin_ifs; 3698 3699 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 3700 return 0; 3701 3702 natfailed = 0; 3703 fr = fin->fin_fr; 3704 sifp = fin->fin_ifp; 3705 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && 3706 fr->fr_tifs[fin->fin_rev].fd_ifp && 3707 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1) 3708 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp; 3709 ifp = fin->fin_ifp; 3710 3711 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3712 switch (fin->fin_p) 3713 { 3714 case IPPROTO_TCP : 3715 nflags = IPN_TCP; 3716 break; 3717 case IPPROTO_UDP : 3718 nflags = IPN_UDP; 3719 break; 3720 case IPPROTO_ICMP : 3721 icmp = fin->fin_dp; 3722 3723 /* 3724 * This is an incoming packet, so the destination is 3725 * the icmp_id and the source port equals 0 3726 */ 3727 if (nat_icmpquerytype4(icmp->icmp_type)) 3728 nflags = IPN_ICMPQUERY; 3729 break; 3730 default : 3731 break; 3732 } 3733 3734 if ((nflags & IPN_TCPUDP)) 3735 tcp = fin->fin_dp; 3736 } 3737 3738 ipa = fin->fin_saddr; 3739 3740 READ_ENTER(&ifs->ifs_ipf_nat); 3741 3742 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 3743 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) 3744 /*EMPTY*/; 3745 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 3746 natadd = 0; 3747 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 3748 fin->fin_src, fin->fin_dst))) { 3749 nflags = nat->nat_flags; 3750 } else { 3751 u_32_t hv, msk, nmsk; 3752 3753 /* 3754 * If there is no current entry in the nat table for this IP#, 3755 * create one for it (if there is a matching rule). 3756 */ 3757 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3758 msk = 0xffffffff; 3759 nmsk = ifs->ifs_nat_masks; 3760 WRITE_ENTER(&ifs->ifs_ipf_nat); 3761 maskloop: 3762 iph = ipa & htonl(msk); 3763 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz); 3764 for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext) 3765 { 3766 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) 3767 continue; 3768 if (np->in_v != fin->fin_v) 3769 continue; 3770 if (np->in_p && (np->in_p != fin->fin_p)) 3771 continue; 3772 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 3773 continue; 3774 if (np->in_flags & IPN_FILTER) { 3775 if (!nat_match(fin, np)) 3776 continue; 3777 } else if ((ipa & np->in_inmsk) != np->in_inip) 3778 continue; 3779 3780 if ((fr != NULL) && 3781 !fr_matchtag(&np->in_tag, &fr->fr_nattag)) 3782 continue; 3783 3784 if (*np->in_plabel != '\0') { 3785 if (((np->in_flags & IPN_FILTER) == 0) && 3786 (np->in_dport != tcp->th_dport)) 3787 continue; 3788 if (appr_ok(fin, tcp, np) == 0) 3789 continue; 3790 } 3791 3792 if ((nat = nat_new(fin, np, NULL, nflags, 3793 NAT_OUTBOUND))) { 3794 np->in_hits++; 3795 break; 3796 } else 3797 natfailed = -1; 3798 } 3799 if ((np == NULL) && (nmsk != 0)) { 3800 while (nmsk) { 3801 msk <<= 1; 3802 if (nmsk & 0x80000000) 3803 break; 3804 nmsk <<= 1; 3805 } 3806 if (nmsk != 0) { 3807 nmsk <<= 1; 3808 goto maskloop; 3809 } 3810 } 3811 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 3812 } 3813 3814 if (nat != NULL) { 3815 rval = fr_natout(fin, nat, natadd, nflags); 3816 if (rval == 1) { 3817 MUTEX_ENTER(&nat->nat_lock); 3818 nat->nat_ref++; 3819 MUTEX_EXIT(&nat->nat_lock); 3820 nat->nat_touched = ifs->ifs_fr_ticks; 3821 fin->fin_nat = nat; 3822 } 3823 } else 3824 rval = natfailed; 3825 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 3826 3827 if (rval == -1) { 3828 if (passp != NULL) 3829 *passp = FR_BLOCK; 3830 fin->fin_flx |= FI_BADNAT; 3831 } 3832 fin->fin_ifp = sifp; 3833 return rval; 3834 } 3835 3836 /* ------------------------------------------------------------------------ */ 3837 /* Function: fr_natout */ 3838 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3839 /* 1 == packet was successfully translated. */ 3840 /* Parameters: fin(I) - pointer to packet information */ 3841 /* nat(I) - pointer to NAT structure */ 3842 /* natadd(I) - flag indicating if it is safe to add frag cache */ 3843 /* nflags(I) - NAT flags set for this packet */ 3844 /* */ 3845 /* Translate a packet coming "out" on an interface. */ 3846 /* ------------------------------------------------------------------------ */ 3847 int fr_natout(fin, nat, natadd, nflags) 3848 fr_info_t *fin; 3849 nat_t *nat; 3850 int natadd; 3851 u_32_t nflags; 3852 { 3853 icmphdr_t *icmp; 3854 u_short *csump; 3855 u_32_t sumd; 3856 tcphdr_t *tcp; 3857 ipnat_t *np; 3858 int i; 3859 ipf_stack_t *ifs = fin->fin_ifs; 3860 3861 #if SOLARIS && defined(_KERNEL) 3862 net_data_t net_data_p; 3863 if (fin->fin_v == 4) 3864 net_data_p = ifs->ifs_ipf_ipv4; 3865 else 3866 net_data_p = ifs->ifs_ipf_ipv6; 3867 #endif 3868 3869 tcp = NULL; 3870 icmp = NULL; 3871 csump = NULL; 3872 np = nat->nat_ptr; 3873 3874 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 3875 (void) fr_nat_newfrag(fin, 0, nat); 3876 3877 MUTEX_ENTER(&nat->nat_lock); 3878 nat->nat_bytes[1] += fin->fin_plen; 3879 nat->nat_pkts[1]++; 3880 MUTEX_EXIT(&nat->nat_lock); 3881 3882 /* 3883 * Fix up checksums, not by recalculating them, but 3884 * simply computing adjustments. 3885 * This is only done for STREAMS based IP implementations where the 3886 * checksum has already been calculated by IP. In all other cases, 3887 * IPFilter is called before the checksum needs calculating so there 3888 * is no call to modify whatever is in the header now. 3889 */ 3890 ASSERT(fin->fin_m != NULL); 3891 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) { 3892 if (nflags == IPN_ICMPERR) { 3893 u_32_t s1, s2; 3894 3895 s1 = LONG_SUM(ntohl(fin->fin_saddr)); 3896 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); 3897 CALC_SUMD(s1, s2, sumd); 3898 3899 fix_outcksum(&fin->fin_ip->ip_sum, sumd); 3900 } 3901 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 3902 defined(linux) || defined(BRIDGE_IPF) 3903 else { 3904 /* 3905 * Strictly speaking, this isn't necessary on BSD 3906 * kernels because they do checksum calculation after 3907 * this code has run BUT if ipfilter is being used 3908 * to do NAT as a bridge, that code doesn't exist. 3909 */ 3910 if (nat->nat_dir == NAT_OUTBOUND) 3911 fix_outcksum(&fin->fin_ip->ip_sum, 3912 nat->nat_ipsumd); 3913 else 3914 fix_incksum(&fin->fin_ip->ip_sum, 3915 nat->nat_ipsumd); 3916 } 3917 #endif 3918 } 3919 3920 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 3921 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { 3922 tcp = fin->fin_dp; 3923 3924 tcp->th_sport = nat->nat_outport; 3925 fin->fin_data[0] = ntohs(nat->nat_outport); 3926 } 3927 3928 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { 3929 icmp = fin->fin_dp; 3930 icmp->icmp_id = nat->nat_outport; 3931 } 3932 3933 csump = nat_proto(fin, nat, nflags); 3934 } 3935 3936 fin->fin_ip->ip_src = nat->nat_outip; 3937 3938 nat_update(fin, nat, np); 3939 3940 /* 3941 * The above comments do not hold for layer 4 (or higher) checksums... 3942 */ 3943 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) { 3944 if (nflags & IPN_TCPUDP && 3945 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) 3946 sumd = nat->nat_sumd[1]; 3947 else 3948 sumd = nat->nat_sumd[0]; 3949 3950 if (nat->nat_dir == NAT_OUTBOUND) 3951 fix_outcksum(csump, sumd); 3952 else 3953 fix_incksum(csump, sumd); 3954 } 3955 #ifdef IPFILTER_SYNC 3956 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 3957 #endif 3958 /* ------------------------------------------------------------- */ 3959 /* A few quick notes: */ 3960 /* Following are test conditions prior to calling the */ 3961 /* appr_check routine. */ 3962 /* */ 3963 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 3964 /* with a redirect rule, we attempt to match the packet's */ 3965 /* source port against in_dport, otherwise we'd compare the */ 3966 /* packet's destination. */ 3967 /* ------------------------------------------------------------- */ 3968 if ((np != NULL) && (np->in_apr != NULL)) { 3969 i = appr_check(fin, nat); 3970 if (i == 0) 3971 i = 1; 3972 } else 3973 i = 1; 3974 ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]); 3975 fin->fin_flx |= FI_NATED; 3976 return i; 3977 } 3978 3979 3980 /* ------------------------------------------------------------------------ */ 3981 /* Function: fr_checknatin */ 3982 /* Returns: int - -1 == packet failed NAT checks so block it, */ 3983 /* 0 == no packet translation occurred, */ 3984 /* 1 == packet was successfully translated. */ 3985 /* Parameters: fin(I) - pointer to packet information */ 3986 /* passp(I) - pointer to filtering result flags */ 3987 /* */ 3988 /* Check to see if an incoming packet should be changed. ICMP packets are */ 3989 /* first checked to see if they match an existing entry (if an error), */ 3990 /* otherwise a search of the current NAT table is made. If neither results */ 3991 /* in a match then a search for a matching NAT rule is made. Create a new */ 3992 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ 3993 /* packet header(s) as required. */ 3994 /* ------------------------------------------------------------------------ */ 3995 int fr_checknatin(fin, passp) 3996 fr_info_t *fin; 3997 u_32_t *passp; 3998 { 3999 u_int nflags, natadd; 4000 int rval, natfailed; 4001 struct ifnet *ifp; 4002 struct in_addr in; 4003 icmphdr_t *icmp; 4004 tcphdr_t *tcp; 4005 u_short dport; 4006 ipnat_t *np; 4007 nat_t *nat; 4008 u_32_t iph; 4009 ipf_stack_t *ifs = fin->fin_ifs; 4010 4011 if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0) 4012 return 0; 4013 4014 tcp = NULL; 4015 icmp = NULL; 4016 dport = 0; 4017 natadd = 1; 4018 nflags = 0; 4019 natfailed = 0; 4020 ifp = fin->fin_ifp; 4021 4022 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4023 switch (fin->fin_p) 4024 { 4025 case IPPROTO_TCP : 4026 nflags = IPN_TCP; 4027 break; 4028 case IPPROTO_UDP : 4029 nflags = IPN_UDP; 4030 break; 4031 case IPPROTO_ICMP : 4032 icmp = fin->fin_dp; 4033 4034 /* 4035 * This is an incoming packet, so the destination is 4036 * the icmp_id and the source port equals 0 4037 */ 4038 if (nat_icmpquerytype4(icmp->icmp_type)) { 4039 nflags = IPN_ICMPQUERY; 4040 dport = icmp->icmp_id; 4041 } break; 4042 default : 4043 break; 4044 } 4045 4046 if ((nflags & IPN_TCPUDP)) { 4047 tcp = fin->fin_dp; 4048 dport = tcp->th_dport; 4049 } 4050 } 4051 4052 in = fin->fin_dst; 4053 4054 READ_ENTER(&ifs->ifs_ipf_nat); 4055 4056 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && 4057 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) 4058 /*EMPTY*/; 4059 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) 4060 natadd = 0; 4061 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, 4062 fin->fin_src, in))) { 4063 nflags = nat->nat_flags; 4064 } else { 4065 u_32_t hv, msk, rmsk; 4066 4067 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4068 rmsk = ifs->ifs_rdr_masks; 4069 msk = 0xffffffff; 4070 WRITE_ENTER(&ifs->ifs_ipf_nat); 4071 /* 4072 * If there is no current entry in the nat table for this IP#, 4073 * create one for it (if there is a matching rule). 4074 */ 4075 maskloop: 4076 iph = in.s_addr & htonl(msk); 4077 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz); 4078 for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) { 4079 if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) 4080 continue; 4081 if (np->in_v != fin->fin_v) 4082 continue; 4083 if (np->in_p && (np->in_p != fin->fin_p)) 4084 continue; 4085 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) 4086 continue; 4087 if (np->in_flags & IPN_FILTER) { 4088 if (!nat_match(fin, np)) 4089 continue; 4090 } else { 4091 if ((in.s_addr & np->in_outmsk) != np->in_outip) 4092 continue; 4093 if (np->in_pmin && 4094 ((ntohs(np->in_pmax) < ntohs(dport)) || 4095 (ntohs(dport) < ntohs(np->in_pmin)))) 4096 continue; 4097 } 4098 4099 if (*np->in_plabel != '\0') { 4100 if (!appr_ok(fin, tcp, np)) { 4101 continue; 4102 } 4103 } 4104 4105 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); 4106 if (nat != NULL) { 4107 np->in_hits++; 4108 break; 4109 } else 4110 natfailed = -1; 4111 } 4112 4113 if ((np == NULL) && (rmsk != 0)) { 4114 while (rmsk) { 4115 msk <<= 1; 4116 if (rmsk & 0x80000000) 4117 break; 4118 rmsk <<= 1; 4119 } 4120 if (rmsk != 0) { 4121 rmsk <<= 1; 4122 goto maskloop; 4123 } 4124 } 4125 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat); 4126 } 4127 if (nat != NULL) { 4128 rval = fr_natin(fin, nat, natadd, nflags); 4129 if (rval == 1) { 4130 MUTEX_ENTER(&nat->nat_lock); 4131 nat->nat_ref++; 4132 MUTEX_EXIT(&nat->nat_lock); 4133 nat->nat_touched = ifs->ifs_fr_ticks; 4134 fin->fin_nat = nat; 4135 fin->fin_state = nat->nat_state; 4136 } 4137 } else 4138 rval = natfailed; 4139 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4140 4141 if (rval == -1) { 4142 if (passp != NULL) 4143 *passp = FR_BLOCK; 4144 fin->fin_flx |= FI_BADNAT; 4145 } 4146 return rval; 4147 } 4148 4149 4150 /* ------------------------------------------------------------------------ */ 4151 /* Function: fr_natin */ 4152 /* Returns: int - -1 == packet failed NAT checks so block it, */ 4153 /* 1 == packet was successfully translated. */ 4154 /* Parameters: fin(I) - pointer to packet information */ 4155 /* nat(I) - pointer to NAT structure */ 4156 /* natadd(I) - flag indicating if it is safe to add frag cache */ 4157 /* nflags(I) - NAT flags set for this packet */ 4158 /* Locks Held: ipf_nat (READ) */ 4159 /* */ 4160 /* Translate a packet coming "in" on an interface. */ 4161 /* ------------------------------------------------------------------------ */ 4162 int fr_natin(fin, nat, natadd, nflags) 4163 fr_info_t *fin; 4164 nat_t *nat; 4165 int natadd; 4166 u_32_t nflags; 4167 { 4168 icmphdr_t *icmp; 4169 u_short *csump; 4170 tcphdr_t *tcp; 4171 ipnat_t *np; 4172 int i; 4173 ipf_stack_t *ifs = fin->fin_ifs; 4174 4175 #if SOLARIS && defined(_KERNEL) 4176 net_data_t net_data_p; 4177 if (fin->fin_v == 4) 4178 net_data_p = ifs->ifs_ipf_ipv4; 4179 else 4180 net_data_p = ifs->ifs_ipf_ipv6; 4181 #endif 4182 4183 tcp = NULL; 4184 csump = NULL; 4185 np = nat->nat_ptr; 4186 fin->fin_fr = nat->nat_fr; 4187 4188 if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) 4189 (void) fr_nat_newfrag(fin, 0, nat); 4190 4191 if (np != NULL) { 4192 4193 /* ------------------------------------------------------------- */ 4194 /* A few quick notes: */ 4195 /* Following are test conditions prior to calling the */ 4196 /* appr_check routine. */ 4197 /* */ 4198 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ 4199 /* with a map rule, we attempt to match the packet's */ 4200 /* source port against in_dport, otherwise we'd compare the */ 4201 /* packet's destination. */ 4202 /* ------------------------------------------------------------- */ 4203 if (np->in_apr != NULL) { 4204 i = appr_check(fin, nat); 4205 if (i == -1) { 4206 return -1; 4207 } 4208 } 4209 } 4210 4211 #ifdef IPFILTER_SYNC 4212 ipfsync_update(SMC_NAT, fin, nat->nat_sync); 4213 #endif 4214 4215 MUTEX_ENTER(&nat->nat_lock); 4216 nat->nat_bytes[0] += fin->fin_plen; 4217 nat->nat_pkts[0]++; 4218 MUTEX_EXIT(&nat->nat_lock); 4219 4220 fin->fin_ip->ip_dst = nat->nat_inip; 4221 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; 4222 if (nflags & IPN_TCPUDP) 4223 tcp = fin->fin_dp; 4224 4225 /* 4226 * Fix up checksums, not by recalculating them, but 4227 * simply computing adjustments. 4228 * Why only do this for some platforms on inbound packets ? 4229 * Because for those that it is done, IP processing is yet to happen 4230 * and so the IPv4 header checksum has not yet been evaluated. 4231 * Perhaps it should always be done for the benefit of things like 4232 * fast forwarding (so that it doesn't need to be recomputed) but with 4233 * header checksum offloading, perhaps it is a moot point. 4234 */ 4235 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ 4236 defined(__osf__) || defined(linux) 4237 if (nat->nat_dir == NAT_OUTBOUND) 4238 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4239 else 4240 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd); 4241 #endif 4242 4243 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { 4244 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { 4245 tcp->th_dport = nat->nat_inport; 4246 fin->fin_data[1] = ntohs(nat->nat_inport); 4247 } 4248 4249 4250 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { 4251 icmp = fin->fin_dp; 4252 4253 icmp->icmp_id = nat->nat_inport; 4254 } 4255 4256 csump = nat_proto(fin, nat, nflags); 4257 } 4258 4259 nat_update(fin, nat, np); 4260 4261 /* 4262 * In case they are being forwarded, inbound packets always need to have 4263 * their checksum adjusted even if hardware checksum validation said OK. 4264 */ 4265 if (csump != NULL) { 4266 if (nat->nat_dir == NAT_OUTBOUND) 4267 fix_incksum(csump, nat->nat_sumd[0]); 4268 else 4269 fix_outcksum(csump, nat->nat_sumd[0]); 4270 } 4271 4272 #if SOLARIS && defined(_KERNEL) 4273 if (nflags & IPN_TCPUDP && 4274 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) { 4275 /* 4276 * Need to adjust the partial checksum result stored in 4277 * db_cksum16, which will be used for validation in IP. 4278 * See IP_CKSUM_RECV(). 4279 * Adjustment data should be the inverse of the IP address 4280 * changes, because db_cksum16 is supposed to be the complement 4281 * of the pesudo header. 4282 */ 4283 csump = &fin->fin_m->b_datap->db_cksum16; 4284 if (nat->nat_dir == NAT_OUTBOUND) 4285 fix_outcksum(csump, nat->nat_sumd[1]); 4286 else 4287 fix_incksum(csump, nat->nat_sumd[1]); 4288 } 4289 #endif 4290 4291 ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]); 4292 fin->fin_flx |= FI_NATED; 4293 if (np != NULL && np->in_tag.ipt_num[0] != 0) 4294 fin->fin_nattag = &np->in_tag; 4295 return 1; 4296 } 4297 4298 4299 /* ------------------------------------------------------------------------ */ 4300 /* Function: nat_proto */ 4301 /* Returns: u_short* - pointer to transport header checksum to update, */ 4302 /* NULL if the transport protocol is not recognised */ 4303 /* as needing a checksum update. */ 4304 /* Parameters: fin(I) - pointer to packet information */ 4305 /* nat(I) - pointer to NAT structure */ 4306 /* nflags(I) - NAT flags set for this packet */ 4307 /* */ 4308 /* Return the pointer to the checksum field for each protocol so understood.*/ 4309 /* If support for making other changes to a protocol header is required, */ 4310 /* that is not strictly 'address' translation, such as clamping the MSS in */ 4311 /* TCP down to a specific value, then do it from here. */ 4312 /* ------------------------------------------------------------------------ */ 4313 u_short *nat_proto(fin, nat, nflags) 4314 fr_info_t *fin; 4315 nat_t *nat; 4316 u_int nflags; 4317 { 4318 icmphdr_t *icmp; 4319 u_short *csump; 4320 tcphdr_t *tcp; 4321 udphdr_t *udp; 4322 4323 csump = NULL; 4324 if (fin->fin_out == 0) { 4325 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); 4326 } else { 4327 fin->fin_rev = (nat->nat_dir == NAT_INBOUND); 4328 } 4329 4330 switch (fin->fin_p) 4331 { 4332 case IPPROTO_TCP : 4333 tcp = fin->fin_dp; 4334 4335 csump = &tcp->th_sum; 4336 4337 /* 4338 * Do a MSS CLAMPING on a SYN packet, 4339 * only deal IPv4 for now. 4340 */ 4341 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) 4342 nat_mssclamp(tcp, nat->nat_mssclamp, csump); 4343 4344 break; 4345 4346 case IPPROTO_UDP : 4347 udp = fin->fin_dp; 4348 4349 if (udp->uh_sum) 4350 csump = &udp->uh_sum; 4351 break; 4352 4353 case IPPROTO_ICMP : 4354 icmp = fin->fin_dp; 4355 4356 if ((nflags & IPN_ICMPQUERY) != 0) { 4357 if (icmp->icmp_cksum != 0) 4358 csump = &icmp->icmp_cksum; 4359 } 4360 break; 4361 } 4362 return csump; 4363 } 4364 4365 4366 /* ------------------------------------------------------------------------ */ 4367 /* Function: fr_natunload */ 4368 /* Returns: Nil */ 4369 /* Parameters: Nil */ 4370 /* */ 4371 /* Free all memory used by NAT structures allocated at runtime. */ 4372 /* ------------------------------------------------------------------------ */ 4373 void fr_natunload(ifs) 4374 ipf_stack_t *ifs; 4375 { 4376 ipftq_t *ifq, *ifqnext; 4377 4378 (void) nat_clearlist(ifs); 4379 (void) nat_flushtable(ifs); 4380 4381 /* 4382 * Proxy timeout queues are not cleaned here because although they 4383 * exist on the NAT list, appr_unload is called after fr_natunload 4384 * and the proxies actually are responsible for them being created. 4385 * Should the proxy timeouts have their own list? There's no real 4386 * justification as this is the only complication. 4387 */ 4388 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4389 ifqnext = ifq->ifq_next; 4390 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 4391 (fr_deletetimeoutqueue(ifq) == 0)) 4392 fr_freetimeoutqueue(ifq, ifs); 4393 } 4394 4395 if (ifs->ifs_nat_table[0] != NULL) { 4396 KFREES(ifs->ifs_nat_table[0], 4397 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4398 ifs->ifs_nat_table[0] = NULL; 4399 } 4400 if (ifs->ifs_nat_table[1] != NULL) { 4401 KFREES(ifs->ifs_nat_table[1], 4402 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz); 4403 ifs->ifs_nat_table[1] = NULL; 4404 } 4405 if (ifs->ifs_nat_rules != NULL) { 4406 KFREES(ifs->ifs_nat_rules, 4407 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz); 4408 ifs->ifs_nat_rules = NULL; 4409 } 4410 if (ifs->ifs_rdr_rules != NULL) { 4411 KFREES(ifs->ifs_rdr_rules, 4412 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz); 4413 ifs->ifs_rdr_rules = NULL; 4414 } 4415 if (ifs->ifs_maptable != NULL) { 4416 KFREES(ifs->ifs_maptable, 4417 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz); 4418 ifs->ifs_maptable = NULL; 4419 } 4420 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) { 4421 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0], 4422 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4423 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL; 4424 } 4425 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) { 4426 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1], 4427 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz); 4428 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL; 4429 } 4430 4431 if (ifs->ifs_fr_nat_maxbucket_reset == 1) 4432 ifs->ifs_fr_nat_maxbucket = 0; 4433 4434 if (ifs->ifs_fr_nat_init == 1) { 4435 ifs->ifs_fr_nat_init = 0; 4436 fr_sttab_destroy(ifs->ifs_nat_tqb); 4437 4438 RW_DESTROY(&ifs->ifs_ipf_natfrag); 4439 RW_DESTROY(&ifs->ifs_ipf_nat); 4440 4441 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new); 4442 MUTEX_DESTROY(&ifs->ifs_ipf_natio); 4443 4444 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock); 4445 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock); 4446 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock); 4447 } 4448 } 4449 4450 4451 /* ------------------------------------------------------------------------ */ 4452 /* Function: fr_natexpire */ 4453 /* Returns: Nil */ 4454 /* Parameters: Nil */ 4455 /* */ 4456 /* Check all of the timeout queues for entries at the top which need to be */ 4457 /* expired. */ 4458 /* ------------------------------------------------------------------------ */ 4459 void fr_natexpire(ifs) 4460 ipf_stack_t *ifs; 4461 { 4462 ipftq_t *ifq, *ifqnext; 4463 ipftqent_t *tqe, *tqn; 4464 int i; 4465 SPL_INT(s); 4466 4467 SPL_NET(s); 4468 WRITE_ENTER(&ifs->ifs_ipf_nat); 4469 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { 4470 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4471 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4472 break; 4473 tqn = tqe->tqe_next; 4474 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4475 } 4476 } 4477 4478 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4479 ifqnext = ifq->ifq_next; 4480 4481 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { 4482 if (tqe->tqe_die > ifs->ifs_fr_ticks) 4483 break; 4484 tqn = tqe->tqe_next; 4485 nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs); 4486 } 4487 } 4488 4489 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) { 4490 ifqnext = ifq->ifq_next; 4491 4492 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 4493 (ifq->ifq_ref == 0)) { 4494 fr_freetimeoutqueue(ifq, ifs); 4495 } 4496 } 4497 4498 if (ifs->ifs_nat_doflush != 0) { 4499 (void) nat_extraflush(2, ifs); 4500 ifs->ifs_nat_doflush = 0; 4501 } 4502 4503 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4504 SPL_X(s); 4505 } 4506 4507 4508 /* ------------------------------------------------------------------------ */ 4509 /* Function: fr_nataddrsync */ 4510 /* Returns: Nil */ 4511 /* Parameters: ifp(I) - pointer to network interface */ 4512 /* addr(I) - pointer to new network address */ 4513 /* */ 4514 /* Walk through all of the currently active NAT sessions, looking for those */ 4515 /* which need to have their translated address updated (where the interface */ 4516 /* matches the one passed in) and change it, recalculating the checksum sum */ 4517 /* difference too. */ 4518 /* ------------------------------------------------------------------------ */ 4519 void fr_nataddrsync(ifp, addr, ifs) 4520 void *ifp; 4521 struct in_addr *addr; 4522 ipf_stack_t *ifs; 4523 { 4524 u_32_t sum1, sum2, sumd; 4525 nat_t *nat; 4526 ipnat_t *np; 4527 SPL_INT(s); 4528 4529 if (ifs->ifs_fr_running <= 0) 4530 return; 4531 4532 SPL_NET(s); 4533 WRITE_ENTER(&ifs->ifs_ipf_nat); 4534 4535 if (ifs->ifs_fr_running <= 0) { 4536 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4537 return; 4538 } 4539 4540 /* 4541 * Change IP addresses for NAT sessions for any protocol except TCP 4542 * since it will break the TCP connection anyway. The only rules 4543 * which will get changed are those which are "map ... -> 0/32", 4544 * where the rule specifies the address is taken from the interface. 4545 */ 4546 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4547 if (addr != NULL) { 4548 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) || 4549 ((nat->nat_flags & IPN_TCP) != 0)) 4550 continue; 4551 if (((np = nat->nat_ptr) == NULL) || 4552 (np->in_nip || (np->in_outmsk != 0xffffffff))) 4553 continue; 4554 4555 /* 4556 * Change the map-to address to be the same as the 4557 * new one. 4558 */ 4559 sum1 = nat->nat_outip.s_addr; 4560 nat->nat_outip = *addr; 4561 sum2 = nat->nat_outip.s_addr; 4562 4563 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) && 4564 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) && 4565 (np->in_outmsk == 0xffffffff) && !np->in_nip) { 4566 struct in_addr in; 4567 4568 /* 4569 * Change the map-to address to be the same as the 4570 * new one. 4571 */ 4572 sum1 = nat->nat_outip.s_addr; 4573 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0], 4574 &in, NULL, ifs) != -1) 4575 nat->nat_outip = in; 4576 sum2 = nat->nat_outip.s_addr; 4577 } else { 4578 continue; 4579 } 4580 4581 if (sum1 == sum2) 4582 continue; 4583 /* 4584 * Readjust the checksum adjustment to take into 4585 * account the new IP#. 4586 */ 4587 CALC_SUMD(sum1, sum2, sumd); 4588 /* XXX - dont change for TCP when solaris does 4589 * hardware checksumming. 4590 */ 4591 sumd += nat->nat_sumd[0]; 4592 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); 4593 nat->nat_sumd[1] = nat->nat_sumd[0]; 4594 } 4595 4596 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4597 SPL_X(s); 4598 } 4599 4600 4601 /* ------------------------------------------------------------------------ */ 4602 /* Function: fr_natifpsync */ 4603 /* Returns: Nil */ 4604 /* Parameters: action(I) - how we are syncing */ 4605 /* ifp(I) - pointer to network interface */ 4606 /* name(I) - name of interface to sync to */ 4607 /* */ 4608 /* This function is used to resync the mapping of interface names and their */ 4609 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */ 4610 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */ 4611 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */ 4612 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */ 4613 /* there is no longer any interface associated with it. */ 4614 /* ------------------------------------------------------------------------ */ 4615 void fr_natifpsync(action, ifp, name, ifs) 4616 int action; 4617 void *ifp; 4618 char *name; 4619 ipf_stack_t *ifs; 4620 { 4621 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) 4622 int s; 4623 #endif 4624 nat_t *nat; 4625 ipnat_t *n; 4626 4627 if (ifs->ifs_fr_running <= 0) 4628 return; 4629 4630 SPL_NET(s); 4631 WRITE_ENTER(&ifs->ifs_ipf_nat); 4632 4633 if (ifs->ifs_fr_running <= 0) { 4634 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4635 return; 4636 } 4637 4638 switch (action) 4639 { 4640 case IPFSYNC_RESYNC : 4641 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4642 if ((ifp == nat->nat_ifps[0]) || 4643 (nat->nat_ifps[0] == (void *)-1)) { 4644 nat->nat_ifps[0] = 4645 fr_resolvenic(nat->nat_ifnames[0], 4, ifs); 4646 } 4647 4648 if ((ifp == nat->nat_ifps[1]) || 4649 (nat->nat_ifps[1] == (void *)-1)) { 4650 nat->nat_ifps[1] = 4651 fr_resolvenic(nat->nat_ifnames[1], 4, ifs); 4652 } 4653 } 4654 4655 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4656 if (n->in_ifps[0] == ifp || 4657 n->in_ifps[0] == (void *)-1) { 4658 n->in_ifps[0] = 4659 fr_resolvenic(n->in_ifnames[0], 4, ifs); 4660 } 4661 if (n->in_ifps[1] == ifp || 4662 n->in_ifps[1] == (void *)-1) { 4663 n->in_ifps[1] = 4664 fr_resolvenic(n->in_ifnames[1], 4, ifs); 4665 } 4666 } 4667 break; 4668 case IPFSYNC_NEWIFP : 4669 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4670 if (!strncmp(name, nat->nat_ifnames[0], 4671 sizeof(nat->nat_ifnames[0]))) 4672 nat->nat_ifps[0] = ifp; 4673 if (!strncmp(name, nat->nat_ifnames[1], 4674 sizeof(nat->nat_ifnames[1]))) 4675 nat->nat_ifps[1] = ifp; 4676 } 4677 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4678 if (!strncmp(name, n->in_ifnames[0], 4679 sizeof(n->in_ifnames[0]))) 4680 n->in_ifps[0] = ifp; 4681 if (!strncmp(name, n->in_ifnames[1], 4682 sizeof(n->in_ifnames[1]))) 4683 n->in_ifps[1] = ifp; 4684 } 4685 break; 4686 case IPFSYNC_OLDIFP : 4687 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) { 4688 if (ifp == nat->nat_ifps[0]) 4689 nat->nat_ifps[0] = (void *)-1; 4690 if (ifp == nat->nat_ifps[1]) 4691 nat->nat_ifps[1] = (void *)-1; 4692 } 4693 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) { 4694 if (n->in_ifps[0] == ifp) 4695 n->in_ifps[0] = (void *)-1; 4696 if (n->in_ifps[1] == ifp) 4697 n->in_ifps[1] = (void *)-1; 4698 } 4699 break; 4700 } 4701 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4702 SPL_X(s); 4703 } 4704 4705 4706 /* ------------------------------------------------------------------------ */ 4707 /* Function: nat_icmpquerytype4 */ 4708 /* Returns: int - 1 == success, 0 == failure */ 4709 /* Parameters: icmptype(I) - ICMP type number */ 4710 /* */ 4711 /* Tests to see if the ICMP type number passed is a query/response type or */ 4712 /* not. */ 4713 /* ------------------------------------------------------------------------ */ 4714 static INLINE int nat_icmpquerytype4(icmptype) 4715 int icmptype; 4716 { 4717 4718 /* 4719 * For the ICMP query NAT code, it is essential that both the query 4720 * and the reply match on the NAT rule. Because the NAT structure 4721 * does not keep track of the icmptype, and a single NAT structure 4722 * is used for all icmp types with the same src, dest and id, we 4723 * simply define the replies as queries as well. The funny thing is, 4724 * altough it seems silly to call a reply a query, this is exactly 4725 * as it is defined in the IPv4 specification 4726 */ 4727 4728 switch (icmptype) 4729 { 4730 4731 case ICMP_ECHOREPLY: 4732 case ICMP_ECHO: 4733 /* route aedvertisement/solliciation is currently unsupported: */ 4734 /* it would require rewriting the ICMP data section */ 4735 case ICMP_TSTAMP: 4736 case ICMP_TSTAMPREPLY: 4737 case ICMP_IREQ: 4738 case ICMP_IREQREPLY: 4739 case ICMP_MASKREQ: 4740 case ICMP_MASKREPLY: 4741 return 1; 4742 default: 4743 return 0; 4744 } 4745 } 4746 4747 4748 /* ------------------------------------------------------------------------ */ 4749 /* Function: nat_log */ 4750 /* Returns: Nil */ 4751 /* Parameters: nat(I) - pointer to NAT structure */ 4752 /* type(I) - type of log entry to create */ 4753 /* */ 4754 /* Creates a NAT log entry. */ 4755 /* ------------------------------------------------------------------------ */ 4756 void nat_log(nat, type, ifs) 4757 struct nat *nat; 4758 u_int type; 4759 ipf_stack_t *ifs; 4760 { 4761 #ifdef IPFILTER_LOG 4762 # ifndef LARGE_NAT 4763 struct ipnat *np; 4764 int rulen; 4765 # endif 4766 struct natlog natl; 4767 void *items[1]; 4768 size_t sizes[1]; 4769 int types[1]; 4770 4771 natl.nl_inip = nat->nat_inip; 4772 natl.nl_outip = nat->nat_outip; 4773 natl.nl_origip = nat->nat_oip; 4774 natl.nl_bytes[0] = nat->nat_bytes[0]; 4775 natl.nl_bytes[1] = nat->nat_bytes[1]; 4776 natl.nl_pkts[0] = nat->nat_pkts[0]; 4777 natl.nl_pkts[1] = nat->nat_pkts[1]; 4778 natl.nl_origport = nat->nat_oport; 4779 natl.nl_inport = nat->nat_inport; 4780 natl.nl_outport = nat->nat_outport; 4781 natl.nl_p = nat->nat_p; 4782 natl.nl_type = type; 4783 natl.nl_rule = -1; 4784 # ifndef LARGE_NAT 4785 if (nat->nat_ptr != NULL) { 4786 for (rulen = 0, np = ifs->ifs_nat_list; np; 4787 np = np->in_next, rulen++) 4788 if (np == nat->nat_ptr) { 4789 natl.nl_rule = rulen; 4790 break; 4791 } 4792 } 4793 # endif 4794 items[0] = &natl; 4795 sizes[0] = sizeof(natl); 4796 types[0] = 0; 4797 4798 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs); 4799 #endif 4800 } 4801 4802 4803 #if defined(__OpenBSD__) 4804 /* ------------------------------------------------------------------------ */ 4805 /* Function: nat_ifdetach */ 4806 /* Returns: Nil */ 4807 /* Parameters: ifp(I) - pointer to network interface */ 4808 /* */ 4809 /* Compatibility interface for OpenBSD to trigger the correct updating of */ 4810 /* interface references within IPFilter. */ 4811 /* ------------------------------------------------------------------------ */ 4812 void nat_ifdetach(ifp, ifs) 4813 void *ifp; 4814 ipf_stack_t *ifs; 4815 { 4816 frsync(ifp, ifs); 4817 return; 4818 } 4819 #endif 4820 4821 4822 /* ------------------------------------------------------------------------ */ 4823 /* Function: fr_ipnatderef */ 4824 /* Returns: Nil */ 4825 /* Parameters: isp(I) - pointer to pointer to NAT rule */ 4826 /* Write Locks: ipf_nat */ 4827 /* */ 4828 /* ------------------------------------------------------------------------ */ 4829 void fr_ipnatderef(inp, ifs) 4830 ipnat_t **inp; 4831 ipf_stack_t *ifs; 4832 { 4833 ipnat_t *in; 4834 4835 in = *inp; 4836 *inp = NULL; 4837 in->in_space++; 4838 in->in_use--; 4839 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) { 4840 if (in->in_apr) 4841 appr_free(in->in_apr); 4842 KFREE(in); 4843 ifs->ifs_nat_stats.ns_rules--; 4844 #ifdef notdef 4845 #if SOLARIS 4846 if (ifs->ifs_nat_stats.ns_rules == 0) 4847 ifs->ifs_pfil_delayed_copy = 1; 4848 #endif 4849 #endif 4850 } 4851 } 4852 4853 4854 /* ------------------------------------------------------------------------ */ 4855 /* Function: fr_natderef */ 4856 /* Returns: Nil */ 4857 /* Parameters: isp(I) - pointer to pointer to NAT table entry */ 4858 /* */ 4859 /* Decrement the reference counter for this NAT table entry and free it if */ 4860 /* there are no more things using it. */ 4861 /* */ 4862 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ 4863 /* structure *because* it only gets called on paths _after_ nat_ref has been*/ 4864 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ 4865 /* because nat_delete() will do that and send nat_ref to -1. */ 4866 /* */ 4867 /* Holding the lock on nat_lock is required to serialise nat_delete() being */ 4868 /* called from a NAT flush ioctl with a deref happening because of a packet.*/ 4869 /* ------------------------------------------------------------------------ */ 4870 void fr_natderef(natp, ifs) 4871 nat_t **natp; 4872 ipf_stack_t *ifs; 4873 { 4874 nat_t *nat; 4875 4876 nat = *natp; 4877 *natp = NULL; 4878 4879 MUTEX_ENTER(&nat->nat_lock); 4880 if (nat->nat_ref > 1) { 4881 nat->nat_ref--; 4882 MUTEX_EXIT(&nat->nat_lock); 4883 return; 4884 } 4885 MUTEX_EXIT(&nat->nat_lock); 4886 4887 WRITE_ENTER(&ifs->ifs_ipf_nat); 4888 nat_delete(nat, NL_EXPIRE, ifs); 4889 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 4890 } 4891 4892 4893 /* ------------------------------------------------------------------------ */ 4894 /* Function: fr_natclone */ 4895 /* Returns: ipstate_t* - NULL == cloning failed, */ 4896 /* else pointer to new state structure */ 4897 /* Parameters: fin(I) - pointer to packet information */ 4898 /* is(I) - pointer to master state structure */ 4899 /* Write Lock: ipf_nat */ 4900 /* */ 4901 /* Create a "duplcate" state table entry from the master. */ 4902 /* ------------------------------------------------------------------------ */ 4903 static nat_t *fr_natclone(fin, nat) 4904 fr_info_t *fin; 4905 nat_t *nat; 4906 { 4907 frentry_t *fr; 4908 nat_t *clone; 4909 ipnat_t *np; 4910 ipf_stack_t *ifs = fin->fin_ifs; 4911 4912 KMALLOC(clone, nat_t *); 4913 if (clone == NULL) 4914 return NULL; 4915 bcopy((char *)nat, (char *)clone, sizeof(*clone)); 4916 4917 MUTEX_NUKE(&clone->nat_lock); 4918 4919 clone->nat_aps = NULL; 4920 /* 4921 * Initialize all these so that nat_delete() doesn't cause a crash. 4922 */ 4923 clone->nat_tqe.tqe_pnext = NULL; 4924 clone->nat_tqe.tqe_next = NULL; 4925 clone->nat_tqe.tqe_ifq = NULL; 4926 clone->nat_tqe.tqe_parent = clone; 4927 4928 clone->nat_flags &= ~SI_CLONE; 4929 clone->nat_flags |= SI_CLONED; 4930 4931 if (clone->nat_hm) 4932 clone->nat_hm->hm_ref++; 4933 4934 if (nat_insert(clone, fin->fin_rev, ifs) == -1) { 4935 KFREE(clone); 4936 return NULL; 4937 } 4938 np = clone->nat_ptr; 4939 if (np != NULL) { 4940 if (ifs->ifs_nat_logging) 4941 nat_log(clone, (u_int)np->in_redir, ifs); 4942 np->in_use++; 4943 } 4944 fr = clone->nat_fr; 4945 if (fr != NULL) { 4946 MUTEX_ENTER(&fr->fr_lock); 4947 fr->fr_ref++; 4948 MUTEX_EXIT(&fr->fr_lock); 4949 } 4950 4951 /* 4952 * Because the clone is created outside the normal loop of things and 4953 * TCP has special needs in terms of state, initialise the timeout 4954 * state of the new NAT from here. 4955 */ 4956 if (clone->nat_p == IPPROTO_TCP) { 4957 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb, 4958 clone->nat_flags); 4959 } 4960 #ifdef IPFILTER_SYNC 4961 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); 4962 #endif 4963 if (ifs->ifs_nat_logging) 4964 nat_log(clone, NL_CLONE, ifs); 4965 return clone; 4966 } 4967 4968 4969 /* ------------------------------------------------------------------------ */ 4970 /* Function: nat_wildok */ 4971 /* Returns: int - 1 == packet's ports match wildcards */ 4972 /* 0 == packet's ports don't match wildcards */ 4973 /* Parameters: nat(I) - NAT entry */ 4974 /* sport(I) - source port */ 4975 /* dport(I) - destination port */ 4976 /* flags(I) - wildcard flags */ 4977 /* dir(I) - packet direction */ 4978 /* */ 4979 /* Use NAT entry and packet direction to determine which combination of */ 4980 /* wildcard flags should be used. */ 4981 /* ------------------------------------------------------------------------ */ 4982 static INLINE int nat_wildok(nat, sport, dport, flags, dir) 4983 nat_t *nat; 4984 int sport; 4985 int dport; 4986 int flags; 4987 int dir; 4988 { 4989 /* 4990 * When called by dir is set to 4991 * nat_inlookup NAT_INBOUND (0) 4992 * nat_outlookup NAT_OUTBOUND (1) 4993 * 4994 * We simply combine the packet's direction in dir with the original 4995 * "intended" direction of that NAT entry in nat->nat_dir to decide 4996 * which combination of wildcard flags to allow. 4997 */ 4998 4999 switch ((dir << 1) | nat->nat_dir) 5000 { 5001 case 3: /* outbound packet / outbound entry */ 5002 if (((nat->nat_inport == sport) || 5003 (flags & SI_W_SPORT)) && 5004 ((nat->nat_oport == dport) || 5005 (flags & SI_W_DPORT))) 5006 return 1; 5007 break; 5008 case 2: /* outbound packet / inbound entry */ 5009 if (((nat->nat_outport == sport) || 5010 (flags & SI_W_DPORT)) && 5011 ((nat->nat_oport == dport) || 5012 (flags & SI_W_SPORT))) 5013 return 1; 5014 break; 5015 case 1: /* inbound packet / outbound entry */ 5016 if (((nat->nat_oport == sport) || 5017 (flags & SI_W_DPORT)) && 5018 ((nat->nat_outport == dport) || 5019 (flags & SI_W_SPORT))) 5020 return 1; 5021 break; 5022 case 0: /* inbound packet / inbound entry */ 5023 if (((nat->nat_oport == sport) || 5024 (flags & SI_W_SPORT)) && 5025 ((nat->nat_outport == dport) || 5026 (flags & SI_W_DPORT))) 5027 return 1; 5028 break; 5029 default: 5030 break; 5031 } 5032 5033 return(0); 5034 } 5035 5036 5037 /* ------------------------------------------------------------------------ */ 5038 /* Function: nat_mssclamp */ 5039 /* Returns: Nil */ 5040 /* Parameters: tcp(I) - pointer to TCP header */ 5041 /* maxmss(I) - value to clamp the TCP MSS to */ 5042 /* csump(I) - pointer to TCP checksum */ 5043 /* */ 5044 /* Check for MSS option and clamp it if necessary. If found and changed, */ 5045 /* then the TCP header checksum will be updated to reflect the change in */ 5046 /* the MSS. */ 5047 /* ------------------------------------------------------------------------ */ 5048 static void nat_mssclamp(tcp, maxmss, csump) 5049 tcphdr_t *tcp; 5050 u_32_t maxmss; 5051 u_short *csump; 5052 { 5053 u_char *cp, *ep, opt; 5054 int hlen, advance; 5055 u_32_t mss, sumd; 5056 5057 hlen = TCP_OFF(tcp) << 2; 5058 if (hlen > sizeof(*tcp)) { 5059 cp = (u_char *)tcp + sizeof(*tcp); 5060 ep = (u_char *)tcp + hlen; 5061 5062 while (cp < ep) { 5063 opt = cp[0]; 5064 if (opt == TCPOPT_EOL) 5065 break; 5066 else if (opt == TCPOPT_NOP) { 5067 cp++; 5068 continue; 5069 } 5070 5071 if (cp + 1 >= ep) 5072 break; 5073 advance = cp[1]; 5074 if ((cp + advance > ep) || (advance <= 0)) 5075 break; 5076 switch (opt) 5077 { 5078 case TCPOPT_MAXSEG: 5079 if (advance != 4) 5080 break; 5081 mss = cp[2] * 256 + cp[3]; 5082 if (mss > maxmss) { 5083 cp[2] = maxmss / 256; 5084 cp[3] = maxmss & 0xff; 5085 CALC_SUMD(mss, maxmss, sumd); 5086 fix_outcksum(csump, sumd); 5087 } 5088 break; 5089 default: 5090 /* ignore unknown options */ 5091 break; 5092 } 5093 5094 cp += advance; 5095 } 5096 } 5097 } 5098 5099 5100 /* ------------------------------------------------------------------------ */ 5101 /* Function: fr_setnatqueue */ 5102 /* Returns: Nil */ 5103 /* Parameters: nat(I)- pointer to NAT structure */ 5104 /* rev(I) - forward(0) or reverse(1) direction */ 5105 /* Locks: ipf_nat (read or write) */ 5106 /* */ 5107 /* Put the NAT entry on its default queue entry, using rev as a helped in */ 5108 /* determining which queue it should be placed on. */ 5109 /* ------------------------------------------------------------------------ */ 5110 void fr_setnatqueue(nat, rev, ifs) 5111 nat_t *nat; 5112 int rev; 5113 ipf_stack_t *ifs; 5114 { 5115 ipftq_t *oifq, *nifq; 5116 5117 if (nat->nat_ptr != NULL) 5118 nifq = nat->nat_ptr->in_tqehead[rev]; 5119 else 5120 nifq = NULL; 5121 5122 if (nifq == NULL) { 5123 switch (nat->nat_p) 5124 { 5125 case IPPROTO_UDP : 5126 nifq = &ifs->ifs_nat_udptq; 5127 break; 5128 case IPPROTO_ICMP : 5129 nifq = &ifs->ifs_nat_icmptq; 5130 break; 5131 case IPPROTO_TCP : 5132 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev]; 5133 break; 5134 default : 5135 nifq = &ifs->ifs_nat_iptq; 5136 break; 5137 } 5138 } 5139 5140 oifq = nat->nat_tqe.tqe_ifq; 5141 /* 5142 * If it's currently on a timeout queue, move it from one queue to 5143 * another, else put it on the end of the newly determined queue. 5144 */ 5145 if (oifq != NULL) 5146 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs); 5147 else 5148 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs); 5149 return; 5150 } 5151 5152 /* ------------------------------------------------------------------------ */ 5153 /* Function: nat_getnext */ 5154 /* Returns: int - 0 == ok, else error */ 5155 /* Parameters: t(I) - pointer to ipftoken structure */ 5156 /* itp(I) - pointer to ipfgeniter_t structure */ 5157 /* ifs - ipf stack instance */ 5158 /* */ 5159 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */ 5160 /* and copy it out to the storage space pointed to by itp. The next item */ 5161 /* in the list to look at is put back in the ipftoken struture. */ 5162 /* ------------------------------------------------------------------------ */ 5163 static int nat_getnext(t, itp, ifs) 5164 ipftoken_t *t; 5165 ipfgeniter_t *itp; 5166 ipf_stack_t *ifs; 5167 { 5168 hostmap_t *hm, *nexthm = NULL, zerohm; 5169 ipnat_t *ipn, *nextipnat = NULL, zeroipn; 5170 nat_t *nat, *nextnat = NULL, zeronat; 5171 int error = 0, count; 5172 char *dst; 5173 5174 if (itp->igi_nitems == 0) 5175 return EINVAL; 5176 5177 READ_ENTER(&ifs->ifs_ipf_nat); 5178 5179 switch (itp->igi_type) 5180 { 5181 case IPFGENITER_HOSTMAP : 5182 hm = t->ipt_data; 5183 if (hm == NULL) { 5184 nexthm = ifs->ifs_ipf_hm_maplist; 5185 } else { 5186 nexthm = hm->hm_next; 5187 } 5188 break; 5189 5190 case IPFGENITER_IPNAT : 5191 ipn = t->ipt_data; 5192 if (ipn == NULL) { 5193 nextipnat = ifs->ifs_nat_list; 5194 } else { 5195 nextipnat = ipn->in_next; 5196 } 5197 break; 5198 5199 case IPFGENITER_NAT : 5200 nat = t->ipt_data; 5201 if (nat == NULL) { 5202 nextnat = ifs->ifs_nat_instances; 5203 } else { 5204 nextnat = nat->nat_next; 5205 } 5206 break; 5207 default : 5208 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5209 return EINVAL; 5210 } 5211 5212 dst = itp->igi_data; 5213 for (count = itp->igi_nitems; count > 0; count--) { 5214 switch (itp->igi_type) 5215 { 5216 case IPFGENITER_HOSTMAP : 5217 if (nexthm != NULL) { 5218 ATOMIC_INC32(nexthm->hm_ref); 5219 t->ipt_data = nexthm; 5220 } else { 5221 bzero(&zerohm, sizeof(zerohm)); 5222 nexthm = &zerohm; 5223 count = 1; 5224 t->ipt_data = NULL; 5225 } 5226 break; 5227 case IPFGENITER_IPNAT : 5228 if (nextipnat != NULL) { 5229 ATOMIC_INC32(nextipnat->in_use); 5230 t->ipt_data = nextipnat; 5231 } else { 5232 bzero(&zeroipn, sizeof(zeroipn)); 5233 nextipnat = &zeroipn; 5234 count = 1; 5235 t->ipt_data = NULL; 5236 } 5237 break; 5238 case IPFGENITER_NAT : 5239 if (nextnat != NULL) { 5240 MUTEX_ENTER(&nextnat->nat_lock); 5241 nextnat->nat_ref++; 5242 MUTEX_EXIT(&nextnat->nat_lock); 5243 t->ipt_data = nextnat; 5244 } else { 5245 bzero(&zeronat, sizeof(zeronat)); 5246 nextnat = &zeronat; 5247 count = 1; 5248 t->ipt_data = NULL; 5249 } 5250 break; 5251 default : 5252 break; 5253 } 5254 5255 /* 5256 * We can safely release our hold on ipf_nat. 5257 */ 5258 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5259 5260 switch (itp->igi_type) 5261 { 5262 case IPFGENITER_HOSTMAP : 5263 if (hm != NULL) { 5264 WRITE_ENTER(&ifs->ifs_ipf_nat); 5265 fr_hostmapdel(&hm); 5266 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5267 } 5268 error = COPYOUT(nexthm, dst, sizeof(*nexthm)); 5269 if (error != 0) { 5270 error = EFAULT; 5271 } else { 5272 dst += sizeof(*nexthm); 5273 hm = nexthm; 5274 nexthm = nexthm->hm_next; 5275 } 5276 break; 5277 case IPFGENITER_IPNAT : 5278 if (ipn != NULL) { 5279 WRITE_ENTER(&ifs->ifs_ipf_nat); 5280 fr_ipnatderef(&ipn, ifs); 5281 RWLOCK_EXIT(&ifs->ifs_ipf_nat); 5282 } 5283 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat)); 5284 if (error != 0) { 5285 error = EFAULT; 5286 } else { 5287 dst += sizeof(*nextipnat); 5288 ipn = nextipnat; 5289 nextipnat = nextipnat->in_next; 5290 } 5291 break; 5292 case IPFGENITER_NAT : 5293 if (nat != NULL) { 5294 fr_natderef(&nat, ifs); 5295 } 5296 error = COPYOUT(nextnat, dst, sizeof(*nextnat)); 5297 if (error != 0) { 5298 error = EFAULT; 5299 } else { 5300 dst += sizeof(*nextnat); 5301 nat = nextnat; 5302 nextnat = nextnat->nat_next; 5303 } 5304 break; 5305 default : 5306 break; 5307 } 5308 5309 if ((count == 1) || (error != 0)) 5310 break; 5311 5312 READ_ENTER(&ifs->ifs_ipf_nat); 5313 } 5314 5315 return error; 5316 } 5317 5318 5319 /* ------------------------------------------------------------------------ */ 5320 /* Function: nat_iterator */ 5321 /* Returns: int - 0 == ok, else error */ 5322 /* Parameters: token(I) - pointer to ipftoken structure */ 5323 /* itp(I) - pointer to ipfgeniter_t structure */ 5324 /* */ 5325 /* This function acts as a handler for the SIOCGENITER ioctls that use a */ 5326 /* generic structure to iterate through a list. There are three different */ 5327 /* linked lists of NAT related information to go through: NAT rules, active */ 5328 /* NAT mappings and the NAT fragment cache. */ 5329 /* ------------------------------------------------------------------------ */ 5330 static int nat_iterator(token, itp, ifs) 5331 ipftoken_t *token; 5332 ipfgeniter_t *itp; 5333 ipf_stack_t *ifs; 5334 { 5335 int error; 5336 5337 if (itp->igi_data == NULL) 5338 return EFAULT; 5339 5340 token->ipt_subtype = itp->igi_type; 5341 5342 switch (itp->igi_type) 5343 { 5344 case IPFGENITER_HOSTMAP : 5345 case IPFGENITER_IPNAT : 5346 case IPFGENITER_NAT : 5347 error = nat_getnext(token, itp, ifs); 5348 break; 5349 case IPFGENITER_NATFRAG : 5350 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist, 5351 &ifs->ifs_ipfr_nattail, 5352 &ifs->ifs_ipf_natfrag, ifs); 5353 break; 5354 default : 5355 error = EINVAL; 5356 break; 5357 } 5358 5359 return error; 5360 } 5361 5362 5363 /* -------------------------------------------------------------------- */ 5364 /* Function: nat_earlydrop */ 5365 /* Returns: number of dropped/removed entries from the queue */ 5366 /* Parameters: ifq - pointer to queue with entries to be processed */ 5367 /* maxidle - entry must be idle this long to be dropped */ 5368 /* ifs - ipf stack instance */ 5369 /* */ 5370 /* Function is invoked from nat_extraflush() only. Removes entries */ 5371 /* form specified timeout queue, based on how long they've sat idle, */ 5372 /* without waiting for it to happen on its own. */ 5373 /* -------------------------------------------------------------------- */ 5374 static int nat_earlydrop(ifq, maxidle, ifs) 5375 ipftq_t *ifq; 5376 int maxidle; 5377 ipf_stack_t *ifs; 5378 { 5379 ipftqent_t *tqe, *tqn; 5380 nat_t *nat; 5381 unsigned int dropped; 5382 int droptick; 5383 5384 if (ifq == NULL) 5385 return (0); 5386 5387 dropped = 0; 5388 5389 /* 5390 * Determine the tick representing the idle time we're interested 5391 * in. If an entry exists in the queue, and it was touched before 5392 * that tick, then it's been idle longer than maxidle ... remove it. 5393 */ 5394 droptick = ifs->ifs_fr_ticks - maxidle; 5395 tqn = ifq->ifq_head; 5396 while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) { 5397 tqn = tqe->tqe_next; 5398 nat = tqe->tqe_parent; 5399 nat_delete(nat, ISL_EXPIRE, ifs); 5400 dropped++; 5401 } 5402 return (dropped); 5403 } 5404 5405 5406 /* --------------------------------------------------------------------- */ 5407 /* Function: nat_flushclosing */ 5408 /* Returns: int - number of NAT entries deleted */ 5409 /* Parameters: stateval(I) - State at which to start removing entries */ 5410 /* ifs - ipf stack instance */ 5411 /* */ 5412 /* Remove nat table entries for TCP connections which are in the process */ 5413 /* of closing, and are in (or "beyond") state specified by 'stateval'. */ 5414 /* --------------------------------------------------------------------- */ 5415 static int nat_flushclosing(stateval, ifs) 5416 int stateval; 5417 ipf_stack_t *ifs; 5418 { 5419 ipftq_t *ifq, *ifqn; 5420 ipftqent_t *tqe, *tqn; 5421 nat_t *nat; 5422 int dropped; 5423 5424 dropped = 0; 5425 5426 /* 5427 * Start by deleting any entries in specific timeout queues. 5428 */ 5429 ifqn = &ifs->ifs_nat_tqb[stateval]; 5430 while ((ifq = ifqn) != NULL) { 5431 ifqn = ifq->ifq_next; 5432 dropped += nat_earlydrop(ifq, (int)0, ifs); 5433 } 5434 5435 /* 5436 * Next, look through user defined queues for closing entries. 5437 */ 5438 ifqn = ifs->ifs_nat_utqe; 5439 while ((ifq = ifqn) != NULL) { 5440 ifqn = ifq->ifq_next; 5441 tqn = ifq->ifq_head; 5442 while ((tqe = tqn) != NULL) { 5443 tqn = tqe->tqe_next; 5444 nat = tqe->tqe_parent; 5445 if (nat->nat_p != IPPROTO_TCP) 5446 continue; 5447 if ((nat->nat_tcpstate[0] >= stateval) && 5448 (nat->nat_tcpstate[1] >= stateval)) { 5449 nat_delete(nat, NL_EXPIRE, ifs); 5450 dropped++; 5451 } 5452 } 5453 } 5454 return (dropped); 5455 } 5456 5457 5458 /* --------------------------------------------------------------------- */ 5459 /* Function: nat_extraflush */ 5460 /* Returns: int - number of NAT entries deleted */ 5461 /* Parameters: which(I) - how to flush the active NAT table */ 5462 /* ifs - ipf stack instance */ 5463 /* Write Locks: ipf_nat */ 5464 /* */ 5465 /* Flush nat tables. Three actions currently defined: */ 5466 /* */ 5467 /* which == 0 : Flush all nat table entries. */ 5468 /* */ 5469 /* which == 1 : Flush entries with TCP connections which have started */ 5470 /* to close on both ends. */ 5471 /* */ 5472 /* which == 2 : First, flush entries which are "almost" closed. If that */ 5473 /* does not take us below specified threshold in the table, */ 5474 /* we want to flush entries with TCP connections which have */ 5475 /* been idle for a long time. Start with connections idle */ 5476 /* over 12 hours, and then work backwards in half hour */ 5477 /* increments to at most 30 minutes idle, and finally work */ 5478 /* back in 30 second increments to at most 30 seconds. */ 5479 /* --------------------------------------------------------------------- */ 5480 static int nat_extraflush(which, ifs) 5481 int which; 5482 ipf_stack_t *ifs; 5483 { 5484 ipftq_t *ifq, *ifqn; 5485 nat_t *nat, **natp; 5486 int idletime, removed, idle_idx; 5487 SPL_INT(s); 5488 5489 removed = 0; 5490 5491 SPL_NET(s); 5492 switch (which) 5493 { 5494 case 0: 5495 natp = &ifs->ifs_nat_instances; 5496 while ((nat = *natp) != NULL) { 5497 natp = &nat->nat_next; 5498 nat_delete(nat, ISL_FLUSH, ifs); 5499 removed++; 5500 } 5501 break; 5502 5503 case 1: 5504 removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs); 5505 break; 5506 5507 case 2: 5508 removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs); 5509 5510 /* 5511 * Be sure we haven't done this in the last 10 seconds. 5512 */ 5513 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush < 5514 IPF_TTLVAL(10)) 5515 break; 5516 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks; 5517 5518 /* 5519 * Determine initial threshold for minimum idle time based on 5520 * how long ipfilter has been running. Ipfilter needs to have 5521 * been up as long as the smallest interval to continue on. 5522 * 5523 * Minimum idle times stored in idletime_tab and indexed by 5524 * idle_idx. Start at upper end of array and work backwards. 5525 * 5526 * Once the index is found, set the initial idle time to the 5527 * first interval before the current ipfilter run time. 5528 */ 5529 if (ifs->ifs_fr_ticks < idletime_tab[0]) 5530 break; /* switch */ 5531 idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1; 5532 if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) { 5533 idletime = idletime_tab[idle_idx]; 5534 } else { 5535 while ((idle_idx > 0) && 5536 (ifs->ifs_fr_ticks < idletime_tab[idle_idx])) 5537 idle_idx--; 5538 idletime = (ifs->ifs_fr_ticks / 5539 idletime_tab[idle_idx]) * 5540 idletime_tab[idle_idx]; 5541 } 5542 5543 while ((idle_idx >= 0) && 5544 (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) { 5545 /* 5546 * Start with appropriate timeout queue. 5547 */ 5548 removed += nat_earlydrop( 5549 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED], 5550 idletime, ifs); 5551 5552 /* 5553 * Make sure we haven't already deleted enough 5554 * entries before checking the user defined queues. 5555 */ 5556 if (NAT_TAB_WATER_LEVEL(ifs) <= 5557 ifs->ifs_nat_flush_lvl_lo) 5558 break; 5559 5560 /* 5561 * Next, look through the user defined queues. 5562 */ 5563 ifqn = ifs->ifs_nat_utqe; 5564 while ((ifq = ifqn) != NULL) { 5565 ifqn = ifq->ifq_next; 5566 removed += nat_earlydrop(ifq, idletime, ifs); 5567 } 5568 5569 /* 5570 * Adjust the granularity of idle time. 5571 * 5572 * If we reach an interval boundary, we need to 5573 * either adjust the idle time accordingly or exit 5574 * the loop altogether (if this is very last check). 5575 */ 5576 idletime -= idletime_tab[idle_idx]; 5577 if (idletime < idletime_tab[idle_idx]) { 5578 if (idle_idx != 0) { 5579 idletime = idletime_tab[idle_idx] - 5580 idletime_tab[idle_idx - 1]; 5581 idle_idx--; 5582 } else { 5583 break; /* while */ 5584 } 5585 } 5586 } 5587 break; 5588 default: 5589 break; 5590 } 5591 5592 SPL_X(s); 5593 return (removed); 5594 } 5595