1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/file.h> 20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 21 defined(_KERNEL) 22 # include "opt_ipfilter_log.h" 23 #endif 24 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 25 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 26 #include "opt_inet6.h" 27 #endif 28 #if !defined(_KERNEL) && !defined(__KERNEL__) 29 # include <stdio.h> 30 # include <stdlib.h> 31 # include <string.h> 32 # define _KERNEL 33 # ifdef __OpenBSD__ 34 struct file; 35 # endif 36 # include <sys/uio.h> 37 # undef _KERNEL 38 #endif 39 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 40 # include <sys/filio.h> 41 # include <sys/fcntl.h> 42 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 43 # include "opt_ipfilter.h" 44 # endif 45 #else 46 # include <sys/ioctl.h> 47 #endif 48 #include <sys/time.h> 49 #if !defined(linux) 50 # include <sys/protosw.h> 51 #endif 52 #include <sys/socket.h> 53 #if defined(_KERNEL) 54 # include <sys/systm.h> 55 # if !defined(__SVR4) && !defined(__svr4__) 56 # include <sys/mbuf.h> 57 # endif 58 #endif 59 #if defined(__SVR4) || defined(__svr4__) 60 # include <sys/filio.h> 61 # include <sys/byteorder.h> 62 # ifdef _KERNEL 63 # include <sys/dditypes.h> 64 # endif 65 # include <sys/stream.h> 66 # include <sys/kmem.h> 67 #endif 68 69 #include <net/if.h> 70 #ifdef sun 71 # include <net/af.h> 72 #endif 73 #include <net/route.h> 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/tcp.h> 78 #if !defined(linux) 79 # include <netinet/ip_var.h> 80 #endif 81 #if !defined(__hpux) && !defined(linux) 82 # include <netinet/tcp_fsm.h> 83 #endif 84 #include <netinet/udp.h> 85 #include <netinet/ip_icmp.h> 86 #include "netinet/ip_compat.h" 87 #include <netinet/tcpip.h> 88 #include "netinet/ip_fil.h" 89 #include "netinet/ip_nat.h" 90 #include "netinet/ip_frag.h" 91 #include "netinet/ip_state.h" 92 #include "netinet/ip_proxy.h" 93 #include "netinet/ipf_stack.h" 94 #ifdef IPFILTER_SYNC 95 #include "netinet/ip_sync.h" 96 #endif 97 #ifdef IPFILTER_SCAN 98 #include "netinet/ip_scan.h" 99 #endif 100 #ifdef USE_INET6 101 #include <netinet/icmp6.h> 102 #endif 103 #if (__FreeBSD_version >= 300000) 104 # include <sys/malloc.h> 105 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 106 # include <sys/libkern.h> 107 # include <sys/systm.h> 108 # endif 109 #endif 110 /* END OF INCLUDES */ 111 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 116 #endif 117 118 #ifdef USE_INET6 119 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 120 #endif 121 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 122 i6addr_t *, tcphdr_t *, u_32_t)); 123 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 124 static int fr_state_flush __P((int, int, ipf_stack_t *)); 125 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 126 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 127 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 128 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 129 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 130 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 131 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 132 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 134 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 135 136 int fr_stputent __P((caddr_t, ipf_stack_t *)); 137 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 138 139 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 140 #define FIVE_DAYS (5 * ONE_DAY) 141 #define DOUBLE_HASH(x, ifs) \ 142 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 143 144 145 /* ------------------------------------------------------------------------ */ 146 /* Function: fr_stateinit */ 147 /* Returns: int - 0 == success, -1 == failure */ 148 /* Parameters: ifs - ipf stack instance */ 149 /* */ 150 /* Initialise all the global variables used within the state code. */ 151 /* This action also includes initiailising locks. */ 152 /* ------------------------------------------------------------------------ */ 153 int fr_stateinit(ifs) 154 ipf_stack_t *ifs; 155 { 156 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 157 struct timeval tv; 158 #endif 159 int i; 160 161 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 162 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 163 if (ifs->ifs_ips_table == NULL) 164 return -1; 165 bzero((char *)ifs->ifs_ips_table, 166 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 167 168 KMALLOCS(ifs->ifs_ips_seed, u_long *, 169 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 170 if (ifs->ifs_ips_seed == NULL) 171 return -2; 172 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 173 tv.tv_sec = 0; 174 GETKTIME(&tv); 175 #endif 176 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 177 /* 178 * XXX - ips_seed[X] should be a random number of sorts. 179 */ 180 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 181 ifs->ifs_ips_seed[i] = ipf_random(); 182 #else 183 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 184 ifs->ifs_fr_statesize; 185 ifs->ifs_ips_seed[i] += tv.tv_sec; 186 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 187 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 188 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 189 #endif 190 } 191 192 /* fill icmp reply type table */ 193 for (i = 0; i <= ICMP_MAXTYPE; i++) 194 icmpreplytype4[i] = -1; 195 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 196 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 197 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 198 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 199 #ifdef USE_INET6 200 /* fill icmp reply type table */ 201 for (i = 0; i <= ICMP6_MAXTYPE; i++) 202 icmpreplytype6[i] = -1; 203 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 204 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 205 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 206 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 207 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 208 #endif 209 210 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 211 ifs->ifs_fr_statesize * sizeof(u_long)); 212 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 213 return -1; 214 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 215 ifs->ifs_fr_statesize * sizeof(u_long)); 216 217 if (ifs->ifs_fr_state_maxbucket == 0) { 218 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 219 ifs->ifs_fr_state_maxbucket++; 220 ifs->ifs_fr_state_maxbucket *= 2; 221 } 222 223 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 224 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 225 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 226 ifs->ifs_ips_udptq.ifq_ref = 1; 227 ifs->ifs_ips_udptq.ifq_head = NULL; 228 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 229 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 230 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 231 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 232 ifs->ifs_ips_udpacktq.ifq_ref = 1; 233 ifs->ifs_ips_udpacktq.ifq_head = NULL; 234 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 235 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 236 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 237 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 238 ifs->ifs_ips_icmptq.ifq_ref = 1; 239 ifs->ifs_ips_icmptq.ifq_head = NULL; 240 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 241 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 242 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 243 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 244 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 245 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 246 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 247 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 248 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 249 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 250 ifs->ifs_ips_iptq.ifq_ref = 1; 251 ifs->ifs_ips_iptq.ifq_head = NULL; 252 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 253 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 254 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 255 /* entry's ttl in deletetq is just 1 tick */ 256 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 257 ifs->ifs_ips_deletetq.ifq_ref = 1; 258 ifs->ifs_ips_deletetq.ifq_head = NULL; 259 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 260 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 261 ifs->ifs_ips_deletetq.ifq_next = NULL; 262 263 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 264 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 265 ifs->ifs_fr_state_init = 1; 266 267 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 268 return 0; 269 } 270 271 272 /* ------------------------------------------------------------------------ */ 273 /* Function: fr_stateunload */ 274 /* Returns: Nil */ 275 /* Parameters: ifs - ipf stack instance */ 276 /* */ 277 /* Release and destroy any resources acquired or initialised so that */ 278 /* IPFilter can be unloaded or re-initialised. */ 279 /* ------------------------------------------------------------------------ */ 280 void fr_stateunload(ifs) 281 ipf_stack_t *ifs; 282 { 283 ipftq_t *ifq, *ifqnext; 284 ipstate_t *is; 285 286 while ((is = ifs->ifs_ips_list) != NULL) 287 (void) fr_delstate(is, 0, ifs); 288 289 /* 290 * Proxy timeout queues are not cleaned here because although they 291 * exist on the state list, appr_unload is called after fr_stateunload 292 * and the proxies actually are responsible for them being created. 293 * Should the proxy timeouts have their own list? There's no real 294 * justification as this is the only complicationA 295 */ 296 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 297 ifqnext = ifq->ifq_next; 298 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 299 (fr_deletetimeoutqueue(ifq) == 0)) 300 fr_freetimeoutqueue(ifq, ifs); 301 } 302 303 ifs->ifs_ips_stats.iss_inuse = 0; 304 ifs->ifs_ips_num = 0; 305 306 if (ifs->ifs_fr_state_init == 1) { 307 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 308 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 309 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 314 } 315 316 if (ifs->ifs_ips_table != NULL) { 317 KFREES(ifs->ifs_ips_table, 318 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 319 ifs->ifs_ips_table = NULL; 320 } 321 322 if (ifs->ifs_ips_seed != NULL) { 323 KFREES(ifs->ifs_ips_seed, 324 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 325 ifs->ifs_ips_seed = NULL; 326 } 327 328 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 329 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 330 ifs->ifs_fr_statesize * sizeof(u_long)); 331 ifs->ifs_ips_stats.iss_bucketlen = NULL; 332 } 333 334 if (ifs->ifs_fr_state_maxbucket_reset == 1) 335 ifs->ifs_fr_state_maxbucket = 0; 336 337 if (ifs->ifs_fr_state_init == 1) { 338 ifs->ifs_fr_state_init = 0; 339 RW_DESTROY(&ifs->ifs_ipf_state); 340 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 341 } 342 } 343 344 345 /* ------------------------------------------------------------------------ */ 346 /* Function: fr_statetstats */ 347 /* Returns: ips_state_t* - pointer to state stats structure */ 348 /* Parameters: Nil */ 349 /* */ 350 /* Put all the current numbers and pointers into a single struct and return */ 351 /* a pointer to it. */ 352 /* ------------------------------------------------------------------------ */ 353 static ips_stat_t *fr_statetstats(ifs) 354 ipf_stack_t *ifs; 355 { 356 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 357 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 358 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 359 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 360 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 361 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 362 return &ifs->ifs_ips_stats; 363 } 364 365 /* ------------------------------------------------------------------------ */ 366 /* Function: fr_state_remove */ 367 /* Returns: int - 0 == success, != 0 == failure */ 368 /* Parameters: data(I) - pointer to state structure to delete from table */ 369 /* ifs - ipf stack instance */ 370 /* */ 371 /* Search for a state structure that matches the one passed, according to */ 372 /* the IP addresses and other protocol specific information. */ 373 /* ------------------------------------------------------------------------ */ 374 static int fr_state_remove(data, ifs) 375 caddr_t data; 376 ipf_stack_t *ifs; 377 { 378 ipstate_t *sp, st; 379 int error; 380 381 sp = &st; 382 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 383 if (error) 384 return EFAULT; 385 386 WRITE_ENTER(&ifs->ifs_ipf_state); 387 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 388 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 389 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 390 sizeof(st.is_src)) && 391 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 392 sizeof(st.is_dst)) && 393 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 394 sizeof(st.is_ps))) { 395 (void) fr_delstate(sp, ISL_REMOVE, ifs); 396 RWLOCK_EXIT(&ifs->ifs_ipf_state); 397 return 0; 398 } 399 RWLOCK_EXIT(&ifs->ifs_ipf_state); 400 return ESRCH; 401 } 402 403 404 /* ------------------------------------------------------------------------ */ 405 /* Function: fr_state_ioctl */ 406 /* Returns: int - 0 == success, != 0 == failure */ 407 /* Parameters: data(I) - pointer to ioctl data */ 408 /* cmd(I) - ioctl command integer */ 409 /* mode(I) - file mode bits used with open */ 410 /* uid(I) - uid of caller */ 411 /* ctx(I) - pointer to give the uid context */ 412 /* ifs - ipf stack instance */ 413 /* */ 414 /* Processes an ioctl call made to operate on the IP Filter state device. */ 415 /* ------------------------------------------------------------------------ */ 416 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 417 caddr_t data; 418 ioctlcmd_t cmd; 419 int mode, uid; 420 void *ctx; 421 ipf_stack_t *ifs; 422 { 423 int arg, ret, error = 0; 424 425 switch (cmd) 426 { 427 /* 428 * Delete an entry from the state table. 429 */ 430 case SIOCDELST : 431 error = fr_state_remove(data, ifs); 432 break; 433 /* 434 * Flush the state table 435 */ 436 case SIOCIPFFL : 437 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 438 if (error != 0) { 439 error = EFAULT; 440 } else { 441 if (VALID_TABLE_FLUSH_OPT(arg)) { 442 WRITE_ENTER(&ifs->ifs_ipf_state); 443 ret = fr_state_flush(arg, 4, ifs); 444 RWLOCK_EXIT(&ifs->ifs_ipf_state); 445 error = BCOPYOUT((char *)&ret, data, 446 sizeof(ret)); 447 if (error != 0) 448 return EFAULT; 449 } else { 450 error = EINVAL; 451 } 452 } 453 break; 454 455 #ifdef USE_INET6 456 case SIOCIPFL6 : 457 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 458 if (error != 0) { 459 error = EFAULT; 460 } else { 461 if (VALID_TABLE_FLUSH_OPT(arg)) { 462 WRITE_ENTER(&ifs->ifs_ipf_state); 463 ret = fr_state_flush(arg, 6, ifs); 464 RWLOCK_EXIT(&ifs->ifs_ipf_state); 465 error = BCOPYOUT((char *)&ret, data, 466 sizeof(ret)); 467 if (error != 0) 468 return EFAULT; 469 } else { 470 error = EINVAL; 471 } 472 } 473 break; 474 #endif 475 #ifdef IPFILTER_LOG 476 /* 477 * Flush the state log. 478 */ 479 case SIOCIPFFB : 480 if (!(mode & FWRITE)) 481 error = EPERM; 482 else { 483 int tmp; 484 485 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 486 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 487 if (error != 0) 488 error = EFAULT; 489 } 490 break; 491 /* 492 * Turn logging of state information on/off. 493 */ 494 case SIOCSETLG : 495 if (!(mode & FWRITE)) { 496 error = EPERM; 497 } else { 498 error = BCOPYIN((char *)data, 499 (char *)&ifs->ifs_ipstate_logging, 500 sizeof(ifs->ifs_ipstate_logging)); 501 if (error != 0) 502 error = EFAULT; 503 } 504 break; 505 /* 506 * Return the current state of logging. 507 */ 508 case SIOCGETLG : 509 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 510 (char *)data, 511 sizeof(ifs->ifs_ipstate_logging)); 512 if (error != 0) 513 error = EFAULT; 514 break; 515 /* 516 * Return the number of bytes currently waiting to be read. 517 */ 518 case FIONREAD : 519 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 520 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 521 if (error != 0) 522 error = EFAULT; 523 break; 524 #endif 525 /* 526 * Get the current state statistics. 527 */ 528 case SIOCGETFS : 529 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 530 break; 531 /* 532 * Lock/Unlock the state table. (Locking prevents any changes, which 533 * means no packets match). 534 */ 535 case SIOCSTLCK : 536 if (!(mode & FWRITE)) { 537 error = EPERM; 538 } else { 539 error = fr_lock(data, &ifs->ifs_fr_state_lock); 540 } 541 break; 542 /* 543 * Add an entry to the current state table. 544 */ 545 case SIOCSTPUT : 546 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 547 error = EACCES; 548 break; 549 } 550 error = fr_stputent(data, ifs); 551 break; 552 /* 553 * Get a state table entry. 554 */ 555 case SIOCSTGET : 556 if (!ifs->ifs_fr_state_lock) { 557 error = EACCES; 558 break; 559 } 560 error = fr_stgetent(data, ifs); 561 break; 562 563 case SIOCGENITER : 564 { 565 ipftoken_t *token; 566 ipfgeniter_t iter; 567 568 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 569 if (error != 0) 570 break; 571 572 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 573 if (token != NULL) 574 error = fr_stateiter(token, &iter, ifs); 575 else 576 error = ESRCH; 577 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 578 break; 579 } 580 581 case SIOCIPFDELTOK : 582 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 583 if (error != 0) { 584 error = EFAULT; 585 } else { 586 error = ipf_deltoken(arg, uid, ctx, ifs); 587 } 588 break; 589 590 default : 591 error = EINVAL; 592 break; 593 } 594 return error; 595 } 596 597 598 /* ------------------------------------------------------------------------ */ 599 /* Function: fr_stgetent */ 600 /* Returns: int - 0 == success, != 0 == failure */ 601 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 602 /* */ 603 /* Copy out state information from the kernel to a user space process. If */ 604 /* there is a filter rule associated with the state entry, copy that out */ 605 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 606 /* the struct passed in and if not null and not found in the list of current*/ 607 /* state entries, the retrieval fails. */ 608 /* ------------------------------------------------------------------------ */ 609 int fr_stgetent(data, ifs) 610 caddr_t data; 611 ipf_stack_t *ifs; 612 { 613 ipstate_t *is, *isn; 614 ipstate_save_t ips; 615 int error; 616 617 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 618 if (error) 619 return EFAULT; 620 621 isn = ips.ips_next; 622 if (isn == NULL) { 623 isn = ifs->ifs_ips_list; 624 if (isn == NULL) { 625 if (ips.ips_next == NULL) 626 return ENOENT; 627 return 0; 628 } 629 } else { 630 /* 631 * Make sure the pointer we're copying from exists in the 632 * current list of entries. Security precaution to prevent 633 * copying of random kernel data. 634 */ 635 for (is = ifs->ifs_ips_list; is; is = is->is_next) 636 if (is == isn) 637 break; 638 if (!is) 639 return ESRCH; 640 } 641 ips.ips_next = isn->is_next; 642 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 643 ips.ips_rule = isn->is_rule; 644 if (isn->is_rule != NULL) 645 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 646 sizeof(ips.ips_fr)); 647 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 648 if (error) 649 return EFAULT; 650 return 0; 651 } 652 653 654 /* ------------------------------------------------------------------------ */ 655 /* Function: fr_stputent */ 656 /* Returns: int - 0 == success, != 0 == failure */ 657 /* Parameters: data(I) - pointer to state information struct */ 658 /* ifs - ipf stack instance */ 659 /* */ 660 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 661 /* the state table. If the state info. includes a pointer to a filter rule */ 662 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 663 /* output. */ 664 /* ------------------------------------------------------------------------ */ 665 int fr_stputent(data, ifs) 666 caddr_t data; 667 ipf_stack_t *ifs; 668 { 669 ipstate_t *is, *isn; 670 ipstate_save_t ips; 671 int error, i; 672 frentry_t *fr; 673 char *name; 674 675 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 676 if (error) 677 return EFAULT; 678 679 /* 680 * Trigger automatic call to fr_state_flush() if the 681 * table has reached capacity specified by hi watermark. 682 */ 683 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 684 ifs->ifs_fr_state_doflush = 1; 685 686 /* 687 * If automatic flushing did not do its job, and the table 688 * has filled up, don't try to create a new entry. 689 */ 690 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 691 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 692 return ENOMEM; 693 } 694 695 KMALLOC(isn, ipstate_t *); 696 if (isn == NULL) 697 return ENOMEM; 698 699 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 700 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 701 isn->is_sti.tqe_pnext = NULL; 702 isn->is_sti.tqe_next = NULL; 703 isn->is_sti.tqe_ifq = NULL; 704 isn->is_sti.tqe_parent = isn; 705 isn->is_ifp[0] = NULL; 706 isn->is_ifp[1] = NULL; 707 isn->is_ifp[2] = NULL; 708 isn->is_ifp[3] = NULL; 709 isn->is_sync = NULL; 710 fr = ips.ips_rule; 711 712 if (fr == NULL) { 713 READ_ENTER(&ifs->ifs_ipf_state); 714 fr_stinsert(isn, 0, ifs); 715 MUTEX_EXIT(&isn->is_lock); 716 RWLOCK_EXIT(&ifs->ifs_ipf_state); 717 return 0; 718 } 719 720 if (isn->is_flags & SI_NEWFR) { 721 KMALLOC(fr, frentry_t *); 722 if (fr == NULL) { 723 KFREE(isn); 724 return ENOMEM; 725 } 726 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 727 isn->is_rule = fr; 728 ips.ips_is.is_rule = fr; 729 MUTEX_NUKE(&fr->fr_lock); 730 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 731 732 /* 733 * Look up all the interface names in the rule. 734 */ 735 for (i = 0; i < 4; i++) { 736 name = fr->fr_ifnames[i]; 737 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 738 name = isn->is_ifname[i]; 739 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 740 } 741 742 fr->fr_ref = 0; 743 fr->fr_dsize = 0; 744 fr->fr_data = NULL; 745 fr->fr_type = FR_T_NONE; 746 747 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 748 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 749 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 750 751 /* 752 * send a copy back to userland of what we ended up 753 * to allow for verification. 754 */ 755 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 756 if (error) { 757 KFREE(isn); 758 MUTEX_DESTROY(&fr->fr_lock); 759 KFREE(fr); 760 return EFAULT; 761 } 762 READ_ENTER(&ifs->ifs_ipf_state); 763 fr_stinsert(isn, 0, ifs); 764 MUTEX_EXIT(&isn->is_lock); 765 RWLOCK_EXIT(&ifs->ifs_ipf_state); 766 767 } else { 768 READ_ENTER(&ifs->ifs_ipf_state); 769 for (is = ifs->ifs_ips_list; is; is = is->is_next) 770 if (is->is_rule == fr) { 771 fr_stinsert(isn, 0, ifs); 772 MUTEX_EXIT(&isn->is_lock); 773 break; 774 } 775 776 if (is == NULL) { 777 KFREE(isn); 778 isn = NULL; 779 } 780 RWLOCK_EXIT(&ifs->ifs_ipf_state); 781 782 return (isn == NULL) ? ESRCH : 0; 783 } 784 785 return 0; 786 } 787 788 789 /* ------------------------------------------------------------------------ */ 790 /* Function: fr_stinsert */ 791 /* Returns: Nil */ 792 /* Parameters: is(I) - pointer to state structure */ 793 /* rev(I) - flag indicating forward/reverse direction of packet */ 794 /* */ 795 /* Inserts a state structure into the hash table (for lookups) and the list */ 796 /* of state entries (for enumeration). Resolves all of the interface names */ 797 /* to pointers and adjusts running stats for the hash table as appropriate. */ 798 /* */ 799 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 800 /* Exits with is_lock initialised and held. */ 801 /* ------------------------------------------------------------------------ */ 802 void fr_stinsert(is, rev, ifs) 803 ipstate_t *is; 804 int rev; 805 ipf_stack_t *ifs; 806 { 807 frentry_t *fr; 808 u_int hv; 809 int i; 810 811 MUTEX_INIT(&is->is_lock, "ipf state entry"); 812 813 fr = is->is_rule; 814 if (fr != NULL) { 815 MUTEX_ENTER(&fr->fr_lock); 816 fr->fr_ref++; 817 fr->fr_statecnt++; 818 MUTEX_EXIT(&fr->fr_lock); 819 } 820 821 /* 822 * Look up all the interface names in the state entry. 823 */ 824 for (i = 0; i < 4; i++) { 825 if (is->is_ifp[i] != NULL) 826 continue; 827 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 828 } 829 830 /* 831 * If we could trust is_hv, then the modulous would not be needed, but 832 * when running with IPFILTER_SYNC, this stops bad values. 833 */ 834 hv = is->is_hv % ifs->ifs_fr_statesize; 835 is->is_hv = hv; 836 837 /* 838 * We need to get both of these locks...the first because it is 839 * possible that once the insert is complete another packet might 840 * come along, match the entry and want to update it. 841 */ 842 MUTEX_ENTER(&is->is_lock); 843 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 844 845 /* 846 * add into list table. 847 */ 848 if (ifs->ifs_ips_list != NULL) 849 ifs->ifs_ips_list->is_pnext = &is->is_next; 850 is->is_pnext = &ifs->ifs_ips_list; 851 is->is_next = ifs->ifs_ips_list; 852 ifs->ifs_ips_list = is; 853 854 if (ifs->ifs_ips_table[hv] != NULL) 855 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 856 else 857 ifs->ifs_ips_stats.iss_inuse++; 858 is->is_phnext = ifs->ifs_ips_table + hv; 859 is->is_hnext = ifs->ifs_ips_table[hv]; 860 ifs->ifs_ips_table[hv] = is; 861 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 862 ifs->ifs_ips_num++; 863 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 864 865 fr_setstatequeue(is, rev, ifs); 866 } 867 868 /* ------------------------------------------------------------------------ */ 869 /* Function: fr_match_ipv4addrs */ 870 /* Returns: int - 2 strong match (same addresses, same direction) */ 871 /* 1 weak match (same address, opposite direction) */ 872 /* 0 no match */ 873 /* */ 874 /* Function matches IPv4 addresses. */ 875 /* ------------------------------------------------------------------------ */ 876 static int fr_match_ipv4addrs(is1, is2) 877 ipstate_t *is1; 878 ipstate_t *is2; 879 { 880 int rv; 881 882 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 883 rv = 2; 884 else if (is1->is_saddr == is2->is_daddr && 885 is1->is_daddr == is2->is_saddr) 886 rv = 1; 887 else 888 rv = 0; 889 890 return (rv); 891 } 892 893 /* ------------------------------------------------------------------------ */ 894 /* Function: fr_match_ipv6addrs */ 895 /* Returns: int - 2 strong match (same addresses, same direction) */ 896 /* 1 weak match (same addresses, opposite direction) */ 897 /* 0 no match */ 898 /* */ 899 /* Function matches IPv6 addresses. */ 900 /* ------------------------------------------------------------------------ */ 901 static int fr_match_ipv6addrs(is1, is2) 902 ipstate_t *is1; 903 ipstate_t *is2; 904 { 905 int rv; 906 907 if (IP6_EQ(&is1->is_src, &is2->is_src) && 908 IP6_EQ(&is1->is_dst, &is2->is_dst)) 909 rv = 2; 910 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 911 IP6_EQ(&is1->is_dst, &is2->is_src)) { 912 rv = 1; 913 } 914 else 915 rv = 0; 916 917 return (rv); 918 } 919 /* ------------------------------------------------------------------------ */ 920 /* Function: fr_match_addresses */ 921 /* Returns: int - 2 strong match (same addresses, same direction) */ 922 /* 1 weak match (same address, opposite directions) */ 923 /* 0 no match */ 924 /* Parameters: is1, is2 pointers to states we are checking */ 925 /* */ 926 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 927 /* and IPv6 address format. */ 928 /* ------------------------------------------------------------------------ */ 929 static int fr_match_addresses(is1, is2) 930 ipstate_t *is1; 931 ipstate_t *is2; 932 { 933 int rv; 934 935 if (is1->is_v == 4) { 936 rv = fr_match_ipv4addrs(is1, is2); 937 } else { 938 rv = fr_match_ipv6addrs(is1, is2); 939 } 940 941 return (rv); 942 } 943 944 /* ------------------------------------------------------------------------ */ 945 /* Function: fr_match_ppairs */ 946 /* Returns: int - 2 strong match (same ports, same direction) */ 947 /* 1 weak match (same ports, different direction) */ 948 /* 0 no match */ 949 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 950 /* */ 951 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 952 /* src, dst port, which belong to session (state entry). */ 953 /* ------------------------------------------------------------------------ */ 954 static int fr_match_ppairs(ppairs1, ppairs2) 955 port_pair_t *ppairs1; 956 port_pair_t *ppairs2; 957 { 958 int rv; 959 960 if (ppairs1->pp_sport == ppairs2->pp_sport && 961 ppairs1->pp_dport == ppairs2->pp_dport) 962 rv = 2; 963 else if (ppairs1->pp_sport == ppairs2->pp_dport && 964 ppairs1->pp_dport == ppairs2->pp_sport) 965 rv = 1; 966 else 967 rv = 0; 968 969 return (rv); 970 } 971 972 /* ------------------------------------------------------------------------ */ 973 /* Function: fr_match_l4_hdr */ 974 /* Returns: int - 0 no match, */ 975 /* 1 weak match (same ports, different directions) */ 976 /* 2 strong match (same ports, same direction) */ 977 /* Parameters is1, is2 - states we want to match */ 978 /* */ 979 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 980 /* GRE protocol). */ 981 /* ------------------------------------------------------------------------ */ 982 static int fr_match_l4_hdr(is1, is2) 983 ipstate_t *is1; 984 ipstate_t *is2; 985 { 986 int rv = 0; 987 port_pair_t pp1; 988 port_pair_t pp2; 989 990 if (is1->is_p != is2->is_p) 991 return (0); 992 993 switch (is1->is_p) { 994 case IPPROTO_TCP: 995 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 996 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 997 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 998 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 999 rv = fr_match_ppairs(&pp1, &pp2); 1000 break; 1001 case IPPROTO_UDP: 1002 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1003 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1004 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1005 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1006 rv = fr_match_ppairs(&pp1, &pp2); 1007 break; 1008 case IPPROTO_GRE: 1009 /* greinfo_t can be also interprted as port pair */ 1010 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1011 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1012 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1013 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1014 rv = fr_match_ppairs(&pp1, &pp2); 1015 break; 1016 case IPPROTO_ICMP: 1017 case IPPROTO_ICMPV6: 1018 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1019 rv = 1; 1020 else 1021 rv = 0; 1022 break; 1023 default: 1024 rv = 0; 1025 } 1026 1027 return (rv); 1028 } 1029 1030 /* ------------------------------------------------------------------------ */ 1031 /* Function: fr_matchstates */ 1032 /* Returns: int - nonzero match, zero no match */ 1033 /* Parameters is1, is2 - states we want to match */ 1034 /* */ 1035 /* The state entries are equal (identical match) if they belong to the same */ 1036 /* session. Any time new state entry is being added the fr_addstate() */ 1037 /* function creates temporal state entry from the data it gets from IP and */ 1038 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1039 /* which is also stored within the state entry. We should keep in mind the */ 1040 /* information about packet direction is spread accross L3 (addresses) and */ 1041 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1042 /* - no match (match(is1, is2) == 0)) */ 1043 /* - weak match same addresses (ports), but different */ 1044 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1045 /* - strong match same addresses (ports) and same directions */ 1046 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1047 /* */ 1048 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1049 /* and functions, which are used to compare ports (L4 header) data. We say */ 1050 /* the is1 and is2 are same (identical) if there is a match */ 1051 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1052 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1053 /* Such requirement deals with case as follows: */ 1054 /* suppose there are two connections between hosts A, B. Connection 1: */ 1055 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1056 /* Connection 2: */ 1057 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1058 /* since we've introduced match levels into our fr_matchstates(), we are */ 1059 /* able to identify, which packets belong to connection A and which belong */ 1060 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1061 /* from con. 1 packet, which travelled from A to B: */ 1062 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1063 /* while s2, has been created from packet which belongs to con. 2 and is */ 1064 /* also coming from A to B: */ 1065 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1066 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1067 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1068 /* different the state entries are not identical -> no match as a final */ 1069 /* result. */ 1070 /* ------------------------------------------------------------------------ */ 1071 static int fr_matchstates(is1, is2) 1072 ipstate_t *is1; 1073 ipstate_t *is2; 1074 { 1075 int rv; 1076 int amatch; 1077 int pmatch; 1078 1079 if (bcmp(&is1->is_pass, &is2->is_pass, 1080 offsetof(struct ipstate, is_ps) - 1081 offsetof(struct ipstate, is_pass)) == 0) { 1082 1083 pmatch = fr_match_l4_hdr(is1, is2); 1084 amatch = fr_match_addresses(is1, is2); 1085 /* 1086 * If addresses match (amatch != 0), then 'match levels' 1087 * must be same for matching entries. If amatch and pmatch 1088 * have different values (different match levels), then 1089 * is1 and is2 belong to different sessions. 1090 */ 1091 rv = (amatch != 0) && (amatch == pmatch); 1092 } 1093 else 1094 rv = 0; 1095 1096 return (rv); 1097 } 1098 1099 /* ------------------------------------------------------------------------ */ 1100 /* Function: fr_addstate */ 1101 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1102 /* Parameters: fin(I) - pointer to packet information */ 1103 /* stsave(O) - pointer to place to save pointer to created */ 1104 /* state structure. */ 1105 /* flags(I) - flags to use when creating the structure */ 1106 /* */ 1107 /* Creates a new IP state structure from the packet information collected. */ 1108 /* Inserts it into the state table and appends to the bottom of the active */ 1109 /* list. If the capacity of the table has reached the maximum allowed then */ 1110 /* the call will fail and a flush is scheduled for the next timeout call. */ 1111 /* ------------------------------------------------------------------------ */ 1112 ipstate_t *fr_addstate(fin, stsave, flags) 1113 fr_info_t *fin; 1114 ipstate_t **stsave; 1115 u_int flags; 1116 { 1117 ipstate_t *is, ips; 1118 struct icmp *ic; 1119 u_int pass, hv; 1120 frentry_t *fr; 1121 tcphdr_t *tcp; 1122 grehdr_t *gre; 1123 void *ifp; 1124 int out; 1125 ipf_stack_t *ifs = fin->fin_ifs; 1126 1127 if (ifs->ifs_fr_state_lock || 1128 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1129 return NULL; 1130 1131 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1132 return NULL; 1133 1134 /* 1135 * Trigger automatic call to fr_state_flush() if the 1136 * table has reached capacity specified by hi watermark. 1137 */ 1138 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1139 ifs->ifs_fr_state_doflush = 1; 1140 1141 /* 1142 * If the max number of state entries has been reached, and there is no 1143 * limit on the state count for the rule, then do not continue. In the 1144 * case where a limit exists, it's ok allow the entries to be created as 1145 * long as specified limit itself has not been reached. 1146 * 1147 * Note that because the lock isn't held on fr, it is possible to exceed 1148 * the specified size of the table. However, the cost of this is being 1149 * ignored here; as the number by which it can go over is a product of 1150 * the number of simultaneous threads that could be executing in here. 1151 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1152 * 1153 * Also note that, since the automatic flush should have been triggered 1154 * well before we reach the maximum number of state table entries, the 1155 * likelihood of reaching the max (and thus exceedng it) is minimal. 1156 */ 1157 fr = fin->fin_fr; 1158 if (fr != NULL) { 1159 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1160 (fr->fr_statemax == 0)) { 1161 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1162 return NULL; 1163 } 1164 if ((fr->fr_statemax != 0) && 1165 (fr->fr_statecnt >= fr->fr_statemax)) { 1166 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1167 ifs->ifs_fr_state_doflush = 1; 1168 return NULL; 1169 } 1170 } 1171 1172 ic = NULL; 1173 tcp = NULL; 1174 out = fin->fin_out; 1175 is = &ips; 1176 bzero((char *)is, sizeof(*is)); 1177 1178 if (fr == NULL) { 1179 pass = ifs->ifs_fr_flags; 1180 is->is_tag = FR_NOLOGTAG; 1181 } else { 1182 pass = fr->fr_flags; 1183 } 1184 1185 is->is_die = 1 + ifs->ifs_fr_ticks; 1186 /* 1187 * We want to check everything that is a property of this packet, 1188 * but we don't (automatically) care about it's fragment status as 1189 * this may change. 1190 */ 1191 is->is_pass = pass; 1192 is->is_v = fin->fin_v; 1193 is->is_opt[0] = fin->fin_optmsk; 1194 is->is_optmsk[0] = 0xffffffff; 1195 is->is_optmsk[1] = 0xffffffff; 1196 if (is->is_v == 6) { 1197 is->is_opt[0] &= ~0x8; 1198 is->is_optmsk[0] &= ~0x8; 1199 is->is_optmsk[1] &= ~0x8; 1200 } 1201 is->is_sec = fin->fin_secmsk; 1202 is->is_secmsk = 0xffff; 1203 is->is_auth = fin->fin_auth; 1204 is->is_authmsk = 0xffff; 1205 1206 /* 1207 * Copy and calculate... 1208 */ 1209 hv = (is->is_p = fin->fin_fi.fi_p); 1210 is->is_src = fin->fin_fi.fi_src; 1211 hv += is->is_saddr; 1212 is->is_dst = fin->fin_fi.fi_dst; 1213 hv += is->is_daddr; 1214 #ifdef USE_INET6 1215 if (fin->fin_v == 6) { 1216 /* 1217 * For ICMPv6, we check to see if the destination address is 1218 * a multicast address. If it is, do not include it in the 1219 * calculation of the hash because the correct reply will come 1220 * back from a real address, not a multicast address. 1221 */ 1222 if ((is->is_p == IPPROTO_ICMPV6) && 1223 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1224 /* 1225 * So you can do keep state with neighbour discovery. 1226 * 1227 * Here we could use the address from the neighbour 1228 * solicit message to put in the state structure and 1229 * we could use that without a wildcard flag too... 1230 */ 1231 is->is_flags |= SI_W_DADDR; 1232 hv -= is->is_daddr; 1233 } else { 1234 hv += is->is_dst.i6[1]; 1235 hv += is->is_dst.i6[2]; 1236 hv += is->is_dst.i6[3]; 1237 } 1238 hv += is->is_src.i6[1]; 1239 hv += is->is_src.i6[2]; 1240 hv += is->is_src.i6[3]; 1241 } 1242 #endif 1243 if ((fin->fin_v == 4) && 1244 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1245 if (fin->fin_out == 0) { 1246 flags |= SI_W_DADDR|SI_CLONE; 1247 hv -= is->is_daddr; 1248 } else { 1249 flags |= SI_W_SADDR|SI_CLONE; 1250 hv -= is->is_saddr; 1251 } 1252 } 1253 1254 switch (is->is_p) 1255 { 1256 #ifdef USE_INET6 1257 case IPPROTO_ICMPV6 : 1258 ic = fin->fin_dp; 1259 1260 switch (ic->icmp_type) 1261 { 1262 case ICMP6_ECHO_REQUEST : 1263 is->is_icmp.ici_type = ic->icmp_type; 1264 hv += (is->is_icmp.ici_id = ic->icmp_id); 1265 break; 1266 case ICMP6_MEMBERSHIP_QUERY : 1267 case ND_ROUTER_SOLICIT : 1268 case ND_NEIGHBOR_SOLICIT : 1269 case ICMP6_NI_QUERY : 1270 is->is_icmp.ici_type = ic->icmp_type; 1271 break; 1272 default : 1273 return NULL; 1274 } 1275 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1276 break; 1277 #endif 1278 case IPPROTO_ICMP : 1279 ic = fin->fin_dp; 1280 1281 switch (ic->icmp_type) 1282 { 1283 case ICMP_ECHO : 1284 case ICMP_TSTAMP : 1285 case ICMP_IREQ : 1286 case ICMP_MASKREQ : 1287 is->is_icmp.ici_type = ic->icmp_type; 1288 hv += (is->is_icmp.ici_id = ic->icmp_id); 1289 break; 1290 default : 1291 return NULL; 1292 } 1293 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1294 break; 1295 1296 case IPPROTO_GRE : 1297 gre = fin->fin_dp; 1298 1299 is->is_gre.gs_flags = gre->gr_flags; 1300 is->is_gre.gs_ptype = gre->gr_ptype; 1301 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1302 is->is_call[0] = fin->fin_data[0]; 1303 is->is_call[1] = fin->fin_data[1]; 1304 } 1305 break; 1306 1307 case IPPROTO_TCP : 1308 tcp = fin->fin_dp; 1309 1310 if (tcp->th_flags & TH_RST) 1311 return NULL; 1312 /* 1313 * The endian of the ports doesn't matter, but the ack and 1314 * sequence numbers do as we do mathematics on them later. 1315 */ 1316 is->is_sport = htons(fin->fin_data[0]); 1317 is->is_dport = htons(fin->fin_data[1]); 1318 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1319 hv += is->is_sport; 1320 hv += is->is_dport; 1321 } 1322 1323 /* 1324 * If this is a real packet then initialise fields in the 1325 * state information structure from the TCP header information. 1326 */ 1327 1328 is->is_maxdwin = 1; 1329 is->is_maxswin = ntohs(tcp->th_win); 1330 if (is->is_maxswin == 0) 1331 is->is_maxswin = 1; 1332 1333 if ((fin->fin_flx & FI_IGNORE) == 0) { 1334 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1335 (TCP_OFF(tcp) << 2) + 1336 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1337 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1338 is->is_maxsend = is->is_send; 1339 1340 /* 1341 * Window scale option is only present in 1342 * SYN/SYN-ACK packet. 1343 */ 1344 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1345 TH_SYN && 1346 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1347 if (fr_tcpoptions(fin, tcp, 1348 &is->is_tcp.ts_data[0]) == -1) { 1349 fin->fin_flx |= FI_BAD; 1350 } 1351 } 1352 1353 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1354 fr_checknewisn(fin, is); 1355 fr_fixoutisn(fin, is); 1356 } 1357 1358 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1359 flags |= IS_TCPFSM; 1360 else { 1361 is->is_maxdwin = is->is_maxswin * 2; 1362 is->is_dend = ntohl(tcp->th_ack); 1363 is->is_maxdend = ntohl(tcp->th_ack); 1364 is->is_maxdwin *= 2; 1365 } 1366 } 1367 1368 /* 1369 * If we're creating state for a starting connection, start the 1370 * timer on it as we'll never see an error if it fails to 1371 * connect. 1372 */ 1373 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1374 break; 1375 1376 case IPPROTO_UDP : 1377 tcp = fin->fin_dp; 1378 1379 is->is_sport = htons(fin->fin_data[0]); 1380 is->is_dport = htons(fin->fin_data[1]); 1381 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1382 hv += tcp->th_dport; 1383 hv += tcp->th_sport; 1384 } 1385 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1386 break; 1387 1388 default : 1389 break; 1390 } 1391 hv = DOUBLE_HASH(hv, ifs); 1392 is->is_hv = hv; 1393 is->is_rule = fr; 1394 is->is_flags = flags & IS_INHERITED; 1395 1396 /* 1397 * Look for identical state. 1398 */ 1399 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1400 is != NULL; 1401 is = is->is_hnext) { 1402 if (fr_matchstates(&ips, is) == 1) 1403 break; 1404 } 1405 1406 /* 1407 * we've found a matching state -> state already exists, 1408 * we are not going to add a duplicate record. 1409 */ 1410 if (is != NULL) 1411 return NULL; 1412 1413 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1414 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1415 return NULL; 1416 } 1417 KMALLOC(is, ipstate_t *); 1418 if (is == NULL) { 1419 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1420 return NULL; 1421 } 1422 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1423 /* 1424 * Do not do the modulous here, it is done in fr_stinsert(). 1425 */ 1426 if (fr != NULL) { 1427 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1428 if (fr->fr_age[0] != 0) { 1429 is->is_tqehead[0] = 1430 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1431 fr->fr_age[0], ifs); 1432 is->is_sti.tqe_flags |= TQE_RULEBASED; 1433 } 1434 if (fr->fr_age[1] != 0) { 1435 is->is_tqehead[1] = 1436 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1437 fr->fr_age[1], ifs); 1438 is->is_sti.tqe_flags |= TQE_RULEBASED; 1439 } 1440 is->is_tag = fr->fr_logtag; 1441 1442 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1443 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1444 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1445 1446 if (((ifp = fr->fr_ifas[1]) != NULL) && 1447 (ifp != (void *)-1)) { 1448 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1449 } 1450 if (((ifp = fr->fr_ifas[2]) != NULL) && 1451 (ifp != (void *)-1)) { 1452 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1453 } 1454 if (((ifp = fr->fr_ifas[3]) != NULL) && 1455 (ifp != (void *)-1)) { 1456 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1457 } 1458 } 1459 1460 is->is_ifp[out << 1] = fin->fin_ifp; 1461 if (fin->fin_ifp != NULL) { 1462 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1463 } 1464 1465 is->is_ref = 1; 1466 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1467 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1468 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1469 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1470 if ((fin->fin_flx & FI_IGNORE) == 0) { 1471 is->is_pkts[out] = 1; 1472 is->is_bytes[out] = fin->fin_plen; 1473 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1474 is->is_flx[out][0] &= ~FI_OOW; 1475 } 1476 1477 if (pass & FR_STSTRICT) 1478 is->is_flags |= IS_STRICT; 1479 1480 if (pass & FR_STATESYNC) 1481 is->is_flags |= IS_STATESYNC; 1482 1483 if (flags & (SI_WILDP|SI_WILDA)) { 1484 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1485 } 1486 is->is_rulen = fin->fin_rule; 1487 1488 1489 if (pass & FR_LOGFIRST) 1490 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1491 1492 READ_ENTER(&ifs->ifs_ipf_state); 1493 is->is_me = stsave; 1494 1495 fr_stinsert(is, fin->fin_rev, ifs); 1496 1497 if (fin->fin_p == IPPROTO_TCP) { 1498 /* 1499 * If we're creating state for a starting connection, start the 1500 * timer on it as we'll never see an error if it fails to 1501 * connect. 1502 */ 1503 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1504 is->is_flags); 1505 MUTEX_EXIT(&is->is_lock); 1506 #ifdef IPFILTER_SCAN 1507 if ((is->is_flags & SI_CLONE) == 0) 1508 (void) ipsc_attachis(is); 1509 #endif 1510 } else { 1511 MUTEX_EXIT(&is->is_lock); 1512 } 1513 #ifdef IPFILTER_SYNC 1514 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1515 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1516 #endif 1517 if (ifs->ifs_ipstate_logging) 1518 ipstate_log(is, ISL_NEW, ifs); 1519 1520 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1521 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1522 fin->fin_flx |= FI_STATE; 1523 if (fin->fin_flx & FI_FRAG) 1524 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1525 1526 return is; 1527 } 1528 1529 1530 /* ------------------------------------------------------------------------ */ 1531 /* Function: fr_tcpoptions */ 1532 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1533 /* Parameters: fin(I) - pointer to packet information */ 1534 /* tcp(I) - pointer to TCP packet header */ 1535 /* td(I) - pointer to TCP data held as part of the state */ 1536 /* */ 1537 /* Look after the TCP header for any options and deal with those that are */ 1538 /* present. Record details about those that we recogise. */ 1539 /* ------------------------------------------------------------------------ */ 1540 static int fr_tcpoptions(fin, tcp, td) 1541 fr_info_t *fin; 1542 tcphdr_t *tcp; 1543 tcpdata_t *td; 1544 { 1545 int off, mlen, ol, i, len, retval; 1546 char buf[64], *s, opt; 1547 mb_t *m = NULL; 1548 1549 len = (TCP_OFF(tcp) << 2); 1550 if (fin->fin_dlen < len) 1551 return 0; 1552 len -= sizeof(*tcp); 1553 1554 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1555 1556 m = fin->fin_m; 1557 mlen = MSGDSIZE(m) - off; 1558 if (len > mlen) { 1559 len = mlen; 1560 retval = 0; 1561 } else { 1562 retval = 1; 1563 } 1564 1565 COPYDATA(m, off, len, buf); 1566 1567 for (s = buf; len > 0; ) { 1568 opt = *s; 1569 if (opt == TCPOPT_EOL) 1570 break; 1571 else if (opt == TCPOPT_NOP) 1572 ol = 1; 1573 else { 1574 if (len < 2) 1575 break; 1576 ol = (int)*(s + 1); 1577 if (ol < 2 || ol > len) 1578 break; 1579 1580 /* 1581 * Extract the TCP options we are interested in out of 1582 * the header and store them in the the tcpdata struct. 1583 */ 1584 switch (opt) 1585 { 1586 case TCPOPT_WINDOW : 1587 if (ol == TCPOLEN_WINDOW) { 1588 i = (int)*(s + 2); 1589 if (i > TCP_WSCALE_MAX) 1590 i = TCP_WSCALE_MAX; 1591 else if (i < 0) 1592 i = 0; 1593 td->td_winscale = i; 1594 td->td_winflags |= TCP_WSCALE_SEEN | 1595 TCP_WSCALE_FIRST; 1596 } else 1597 retval = -1; 1598 break; 1599 case TCPOPT_MAXSEG : 1600 /* 1601 * So, if we wanted to set the TCP MAXSEG, 1602 * it should be done here... 1603 */ 1604 if (ol == TCPOLEN_MAXSEG) { 1605 i = (int)*(s + 2); 1606 i <<= 8; 1607 i += (int)*(s + 3); 1608 td->td_maxseg = i; 1609 } else 1610 retval = -1; 1611 break; 1612 case TCPOPT_SACK_PERMITTED : 1613 if (ol == TCPOLEN_SACK_PERMITTED) 1614 td->td_winflags |= TCP_SACK_PERMIT; 1615 else 1616 retval = -1; 1617 break; 1618 } 1619 } 1620 len -= ol; 1621 s += ol; 1622 } 1623 return retval; 1624 } 1625 1626 1627 /* ------------------------------------------------------------------------ */ 1628 /* Function: fr_tcpstate */ 1629 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1630 /* Parameters: fin(I) - pointer to packet information */ 1631 /* tcp(I) - pointer to TCP packet header */ 1632 /* is(I) - pointer to master state structure */ 1633 /* */ 1634 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1635 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1636 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1637 /* ------------------------------------------------------------------------ */ 1638 static int fr_tcpstate(fin, tcp, is) 1639 fr_info_t *fin; 1640 tcphdr_t *tcp; 1641 ipstate_t *is; 1642 { 1643 int source, ret = 0, flags; 1644 tcpdata_t *fdata, *tdata; 1645 ipf_stack_t *ifs = fin->fin_ifs; 1646 1647 source = !fin->fin_rev; 1648 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1649 (ntohs(is->is_sport) != fin->fin_data[0])) 1650 source = 0; 1651 fdata = &is->is_tcp.ts_data[!source]; 1652 tdata = &is->is_tcp.ts_data[source]; 1653 1654 MUTEX_ENTER(&is->is_lock); 1655 1656 /* 1657 * If a SYN packet is received for a connection that is in a half 1658 * closed state, then move its state entry to deletetq. In such case 1659 * the SYN packet will be consequently dropped. This allows new state 1660 * entry to be created with a retransmited SYN packet. 1661 */ 1662 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1663 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1664 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1665 is->is_state[source] = IPF_TCPS_CLOSED; 1666 is->is_state[!source] = IPF_TCPS_CLOSED; 1667 /* 1668 * Do not update is->is_sti.tqe_die in case state entry 1669 * is already present in deletetq. It prevents state 1670 * entry ttl update by retransmitted SYN packets, which 1671 * may arrive before timer tick kicks off. The SYN 1672 * packet will be dropped again. 1673 */ 1674 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1675 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1676 &fin->fin_ifs->ifs_ips_deletetq, 1677 fin->fin_ifs); 1678 1679 MUTEX_EXIT(&is->is_lock); 1680 return 0; 1681 } 1682 } 1683 1684 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1685 #ifdef IPFILTER_SCAN 1686 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1687 ipsc_packet(fin, is); 1688 if (FR_ISBLOCK(is->is_pass)) { 1689 MUTEX_EXIT(&is->is_lock); 1690 return 1; 1691 } 1692 } 1693 #endif 1694 1695 /* 1696 * Nearing end of connection, start timeout. 1697 */ 1698 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1699 is->is_flags); 1700 if (ret == 0) { 1701 MUTEX_EXIT(&is->is_lock); 1702 return 0; 1703 } 1704 1705 /* 1706 * set s0's as appropriate. Use syn-ack packet as it 1707 * contains both pieces of required information. 1708 */ 1709 /* 1710 * Window scale option is only present in SYN/SYN-ACK packet. 1711 * Compare with ~TH_FIN to mask out T/TCP setups. 1712 */ 1713 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1714 if (flags == (TH_SYN|TH_ACK)) { 1715 is->is_s0[source] = ntohl(tcp->th_ack); 1716 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1717 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1718 (void) fr_tcpoptions(fin, tcp, fdata); 1719 } 1720 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1721 fr_checknewisn(fin, is); 1722 } else if (flags == TH_SYN) { 1723 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1724 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1725 (void) fr_tcpoptions(fin, tcp, tdata); 1726 1727 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1728 fr_checknewisn(fin, is); 1729 1730 } 1731 ret = 1; 1732 } else 1733 fin->fin_flx |= FI_OOW; 1734 MUTEX_EXIT(&is->is_lock); 1735 return ret; 1736 } 1737 1738 1739 /* ------------------------------------------------------------------------ */ 1740 /* Function: fr_checknewisn */ 1741 /* Returns: Nil */ 1742 /* Parameters: fin(I) - pointer to packet information */ 1743 /* is(I) - pointer to master state structure */ 1744 /* */ 1745 /* Check to see if this TCP connection is expecting and needs a new */ 1746 /* sequence number for a particular direction of the connection. */ 1747 /* */ 1748 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1749 /* one ready. */ 1750 /* ------------------------------------------------------------------------ */ 1751 static void fr_checknewisn(fin, is) 1752 fr_info_t *fin; 1753 ipstate_t *is; 1754 { 1755 u_32_t sumd, old, new; 1756 tcphdr_t *tcp; 1757 int i; 1758 1759 i = fin->fin_rev; 1760 tcp = fin->fin_dp; 1761 1762 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1763 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1764 old = ntohl(tcp->th_seq); 1765 new = fr_newisn(fin); 1766 is->is_isninc[i] = new - old; 1767 CALC_SUMD(old, new, sumd); 1768 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1769 1770 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1771 } 1772 } 1773 1774 1775 /* ------------------------------------------------------------------------ */ 1776 /* Function: fr_tcpinwindow */ 1777 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1778 /* Parameters: fin(I) - pointer to packet information */ 1779 /* fdata(I) - pointer to tcp state informatio (forward) */ 1780 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1781 /* tcp(I) - pointer to TCP packet header */ 1782 /* */ 1783 /* Given a packet has matched addresses and ports, check to see if it is */ 1784 /* within the TCP data window. In a show of generosity, allow packets that */ 1785 /* are within the window space behind the current sequence # as well. */ 1786 /* ------------------------------------------------------------------------ */ 1787 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1788 fr_info_t *fin; 1789 tcpdata_t *fdata, *tdata; 1790 tcphdr_t *tcp; 1791 int flags; 1792 { 1793 tcp_seq seq, ack, end; 1794 int ackskew, tcpflags; 1795 u_32_t win, maxwin; 1796 int dsize, inseq; 1797 1798 /* 1799 * Find difference between last checked packet and this packet. 1800 */ 1801 tcpflags = tcp->th_flags; 1802 seq = ntohl(tcp->th_seq); 1803 ack = ntohl(tcp->th_ack); 1804 1805 if (tcpflags & TH_SYN) 1806 win = ntohs(tcp->th_win); 1807 else 1808 win = ntohs(tcp->th_win) << fdata->td_winscale; 1809 1810 /* 1811 * win 0 means the receiving endpoint has closed the window, because it 1812 * has not enough memory to receive data from sender. In such case we 1813 * are pretending window size to be 1 to let TCP probe data through. 1814 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1815 * state this accurately, so we have to allow 1 octet (win = 1) even if 1816 * the window is closed (win == 0). 1817 */ 1818 if (win == 0) 1819 win = 1; 1820 1821 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1822 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1823 1824 /* 1825 * if window scaling is present, the scaling is only allowed 1826 * for windows not in the first SYN packet. In that packet the 1827 * window is 65535 to specify the largest window possible 1828 * for receivers not implementing the window scale option. 1829 * Currently, we do not assume TTCP here. That means that 1830 * if we see a second packet from a host (after the initial 1831 * SYN), we can assume that the receiver of the SYN did 1832 * already send back the SYN/ACK (and thus that we know if 1833 * the receiver also does window scaling) 1834 */ 1835 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1836 fdata->td_maxwin = win; 1837 } 1838 1839 end = seq + dsize; 1840 1841 if ((fdata->td_end == 0) && 1842 (!(flags & IS_TCPFSM) || 1843 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1844 /* 1845 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1846 */ 1847 fdata->td_end = end - 1; 1848 fdata->td_maxwin = 1; 1849 fdata->td_maxend = end + win; 1850 } 1851 1852 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1853 ack = tdata->td_end; 1854 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1855 (ack == 0)) { 1856 /* gross hack to get around certain broken tcp stacks */ 1857 ack = tdata->td_end; 1858 } 1859 1860 maxwin = tdata->td_maxwin; 1861 ackskew = tdata->td_end - ack; 1862 1863 /* 1864 * Strict sequencing only allows in-order delivery. 1865 */ 1866 if ((flags & IS_STRICT) != 0) { 1867 if (seq != fdata->td_end) { 1868 DTRACE_PROBE(strict_check); 1869 return 0; 1870 } 1871 } 1872 1873 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1874 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1875 inseq = 0; 1876 DTRACE_PROBE4( 1877 dyn_params, 1878 int, dsize, 1879 int, ackskew, 1880 int, maxwin, 1881 int, win 1882 ); 1883 if ( 1884 #if defined(_KERNEL) 1885 /* 1886 * end <-> s + n 1887 * maxend <-> ack + win 1888 * this is upperbound check 1889 */ 1890 (SEQ_GE(fdata->td_maxend, end)) && 1891 /* 1892 * this is lowerbound check 1893 */ 1894 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1895 #endif 1896 /* XXX what about big packets */ 1897 #define MAXACKWINDOW 66000 1898 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1899 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1900 inseq = 1; 1901 /* 1902 * Microsoft Windows will send the next packet to the right of the 1903 * window if SACK is in use. 1904 */ 1905 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1906 (fdata->td_winflags & TCP_SACK_PERMIT) && 1907 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1908 inseq = 1; 1909 /* 1910 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1911 * response to initial SYN packet, when there is no application 1912 * listeing to on a port, where the SYN packet has came to. 1913 */ 1914 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1915 (ackskew >= -1) && (ackskew <= 1)) { 1916 inseq = 1; 1917 } else if (!(flags & IS_TCPFSM)) { 1918 1919 if (!(fdata->td_winflags & 1920 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1921 /* 1922 * No TCPFSM and no window scaling, so make some 1923 * extra guesses. 1924 */ 1925 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1926 inseq = 1; 1927 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1928 inseq = 1; 1929 } 1930 } 1931 1932 if (inseq) { 1933 /* if ackskew < 0 then this should be due to fragmented 1934 * packets. There is no way to know the length of the 1935 * total packet in advance. 1936 * We do know the total length from the fragment cache though. 1937 * Note however that there might be more sessions with 1938 * exactly the same source and destination parameters in the 1939 * state cache (and source and destination is the only stuff 1940 * that is saved in the fragment cache). Note further that 1941 * some TCP connections in the state cache are hashed with 1942 * sport and dport as well which makes it not worthwhile to 1943 * look for them. 1944 * Thus, when ackskew is negative but still seems to belong 1945 * to this session, we bump up the destinations end value. 1946 */ 1947 if (ackskew < 0) { 1948 DTRACE_PROBE2(end_update_td, 1949 int, tdata->td_end, 1950 int, ack 1951 ); 1952 tdata->td_end = ack; 1953 } 1954 1955 /* update max window seen */ 1956 if (fdata->td_maxwin < win) { 1957 DTRACE_PROBE2(win_update_fd, 1958 int, fdata->td_maxwin, 1959 int, win 1960 ); 1961 fdata->td_maxwin = win; 1962 } 1963 1964 if (SEQ_GT(end, fdata->td_end)) { 1965 DTRACE_PROBE2(end_update_fd, 1966 int, fdata->td_end, 1967 int, end 1968 ); 1969 fdata->td_end = end; 1970 } 1971 1972 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1973 DTRACE_PROBE2(max_end_update_td, 1974 int, tdata->td_maxend, 1975 int, ack + win 1976 ); 1977 tdata->td_maxend = ack + win; 1978 } 1979 1980 return 1; 1981 } 1982 fin->fin_flx |= FI_OOW; 1983 1984 #if defined(_KERNEL) 1985 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1986 fin->fin_flx |= FI_NEG_OOW; 1987 #endif 1988 1989 return 0; 1990 } 1991 1992 1993 /* ------------------------------------------------------------------------ */ 1994 /* Function: fr_stclone */ 1995 /* Returns: ipstate_t* - NULL == cloning failed, */ 1996 /* else pointer to new state structure */ 1997 /* Parameters: fin(I) - pointer to packet information */ 1998 /* tcp(I) - pointer to TCP/UDP header */ 1999 /* is(I) - pointer to master state structure */ 2000 /* */ 2001 /* Create a "duplcate" state table entry from the master. */ 2002 /* ------------------------------------------------------------------------ */ 2003 static ipstate_t *fr_stclone(fin, tcp, is) 2004 fr_info_t *fin; 2005 tcphdr_t *tcp; 2006 ipstate_t *is; 2007 { 2008 ipstate_t *clone; 2009 u_32_t send; 2010 ipf_stack_t *ifs = fin->fin_ifs; 2011 2012 /* 2013 * Trigger automatic call to fr_state_flush() if the 2014 * table has reached capacity specified by hi watermark. 2015 */ 2016 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2017 ifs->ifs_fr_state_doflush = 1; 2018 2019 /* 2020 * If automatic flushing did not do its job, and the table 2021 * has filled up, don't try to create a new entry. A NULL 2022 * return will indicate that the cloning has failed. 2023 */ 2024 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2025 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2026 return NULL; 2027 } 2028 2029 KMALLOC(clone, ipstate_t *); 2030 if (clone == NULL) 2031 return NULL; 2032 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2033 2034 MUTEX_NUKE(&clone->is_lock); 2035 2036 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2037 clone->is_state[0] = 0; 2038 clone->is_state[1] = 0; 2039 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2040 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2041 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2042 2043 if (fin->fin_rev == 1) { 2044 clone->is_dend = send; 2045 clone->is_maxdend = send; 2046 clone->is_send = 0; 2047 clone->is_maxswin = 1; 2048 clone->is_maxdwin = ntohs(tcp->th_win); 2049 if (clone->is_maxdwin == 0) 2050 clone->is_maxdwin = 1; 2051 } else { 2052 clone->is_send = send; 2053 clone->is_maxsend = send; 2054 clone->is_dend = 0; 2055 clone->is_maxdwin = 1; 2056 clone->is_maxswin = ntohs(tcp->th_win); 2057 if (clone->is_maxswin == 0) 2058 clone->is_maxswin = 1; 2059 } 2060 2061 clone->is_flags &= ~SI_CLONE; 2062 clone->is_flags |= SI_CLONED; 2063 fr_stinsert(clone, fin->fin_rev, ifs); 2064 clone->is_ref = 1; 2065 if (clone->is_p == IPPROTO_TCP) { 2066 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2067 clone->is_flags); 2068 } 2069 MUTEX_EXIT(&clone->is_lock); 2070 #ifdef IPFILTER_SCAN 2071 (void) ipsc_attachis(is); 2072 #endif 2073 #ifdef IPFILTER_SYNC 2074 if (is->is_flags & IS_STATESYNC) 2075 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2076 #endif 2077 return clone; 2078 } 2079 2080 2081 /* ------------------------------------------------------------------------ */ 2082 /* Function: fr_matchsrcdst */ 2083 /* Returns: Nil */ 2084 /* Parameters: fin(I) - pointer to packet information */ 2085 /* is(I) - pointer to state structure */ 2086 /* src(I) - pointer to source address */ 2087 /* dst(I) - pointer to destination address */ 2088 /* tcp(I) - pointer to TCP/UDP header */ 2089 /* */ 2090 /* Match a state table entry against an IP packet. The logic below is that */ 2091 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2092 /* still 0 after the test. no match. */ 2093 /* ------------------------------------------------------------------------ */ 2094 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2095 fr_info_t *fin; 2096 ipstate_t *is; 2097 i6addr_t *src, *dst; 2098 tcphdr_t *tcp; 2099 u_32_t cmask; 2100 { 2101 int ret = 0, rev, out, flags, flx = 0, idx; 2102 u_short sp, dp; 2103 u_32_t cflx; 2104 void *ifp; 2105 ipf_stack_t *ifs = fin->fin_ifs; 2106 2107 rev = IP6_NEQ(&is->is_dst, dst); 2108 ifp = fin->fin_ifp; 2109 out = fin->fin_out; 2110 flags = is->is_flags; 2111 sp = 0; 2112 dp = 0; 2113 2114 if (tcp != NULL) { 2115 sp = htons(fin->fin_sport); 2116 dp = ntohs(fin->fin_dport); 2117 } 2118 if (!rev) { 2119 if (tcp != NULL) { 2120 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2121 rev = 1; 2122 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2123 rev = 1; 2124 } 2125 } 2126 2127 idx = (out << 1) + rev; 2128 2129 /* 2130 * If the interface for this 'direction' is set, make sure it matches. 2131 * An interface name that is not set matches any, as does a name of *. 2132 */ 2133 if ((is->is_ifp[idx] == NULL && 2134 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2135 is->is_ifp[idx] == ifp) 2136 ret = 1; 2137 2138 if (ret == 0) 2139 return NULL; 2140 ret = 0; 2141 2142 /* 2143 * Match addresses and ports. 2144 */ 2145 if (rev == 0) { 2146 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2147 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2148 if (tcp) { 2149 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2150 (dp == is->is_dport || flags & SI_W_DPORT)) 2151 ret = 1; 2152 } else { 2153 ret = 1; 2154 } 2155 } 2156 } else { 2157 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2158 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2159 if (tcp) { 2160 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2161 (sp == is->is_dport || flags & SI_W_DPORT)) 2162 ret = 1; 2163 } else { 2164 ret = 1; 2165 } 2166 } 2167 } 2168 2169 if (ret == 0) 2170 return NULL; 2171 2172 /* 2173 * Whether or not this should be here, is questionable, but the aim 2174 * is to get this out of the main line. 2175 */ 2176 if (tcp == NULL) 2177 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2178 2179 /* 2180 * Only one of the source or destination address can be flaged as a 2181 * wildcard. Fill in the missing address, if set. 2182 * For IPv6, if the address being copied in is multicast, then 2183 * don't reset the wild flag - multicast causes it to be set in the 2184 * first place! 2185 */ 2186 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2187 fr_ip_t *fi = &fin->fin_fi; 2188 2189 if ((flags & SI_W_SADDR) != 0) { 2190 if (rev == 0) { 2191 #ifdef USE_INET6 2192 if (is->is_v == 6 && 2193 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2194 /*EMPTY*/; 2195 else 2196 #endif 2197 { 2198 is->is_src = fi->fi_src; 2199 is->is_flags &= ~SI_W_SADDR; 2200 } 2201 } else { 2202 #ifdef USE_INET6 2203 if (is->is_v == 6 && 2204 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2205 /*EMPTY*/; 2206 else 2207 #endif 2208 { 2209 is->is_src = fi->fi_dst; 2210 is->is_flags &= ~SI_W_SADDR; 2211 } 2212 } 2213 } else if ((flags & SI_W_DADDR) != 0) { 2214 if (rev == 0) { 2215 #ifdef USE_INET6 2216 if (is->is_v == 6 && 2217 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2218 /*EMPTY*/; 2219 else 2220 #endif 2221 { 2222 is->is_dst = fi->fi_dst; 2223 is->is_flags &= ~SI_W_DADDR; 2224 } 2225 } else { 2226 #ifdef USE_INET6 2227 if (is->is_v == 6 && 2228 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2229 /*EMPTY*/; 2230 else 2231 #endif 2232 { 2233 is->is_dst = fi->fi_src; 2234 is->is_flags &= ~SI_W_DADDR; 2235 } 2236 } 2237 } 2238 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2239 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2240 } 2241 } 2242 2243 flx = fin->fin_flx & cmask; 2244 cflx = is->is_flx[out][rev]; 2245 2246 /* 2247 * Match up any flags set from IP options. 2248 */ 2249 if ((cflx && (flx != (cflx & cmask))) || 2250 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2251 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2252 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2253 return NULL; 2254 2255 /* 2256 * Only one of the source or destination port can be flagged as a 2257 * wildcard. When filling it in, fill in a copy of the matched entry 2258 * if it has the cloning flag set. 2259 */ 2260 if ((fin->fin_flx & FI_IGNORE) != 0) { 2261 fin->fin_rev = rev; 2262 return is; 2263 } 2264 2265 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2266 if ((flags & SI_CLONE) != 0) { 2267 ipstate_t *clone; 2268 2269 clone = fr_stclone(fin, tcp, is); 2270 if (clone == NULL) 2271 return NULL; 2272 is = clone; 2273 } else { 2274 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2275 } 2276 2277 if ((flags & SI_W_SPORT) != 0) { 2278 if (rev == 0) { 2279 is->is_sport = sp; 2280 is->is_send = ntohl(tcp->th_seq); 2281 } else { 2282 is->is_sport = dp; 2283 is->is_send = ntohl(tcp->th_ack); 2284 } 2285 is->is_maxsend = is->is_send + 1; 2286 } else if ((flags & SI_W_DPORT) != 0) { 2287 if (rev == 0) { 2288 is->is_dport = dp; 2289 is->is_dend = ntohl(tcp->th_ack); 2290 } else { 2291 is->is_dport = sp; 2292 is->is_dend = ntohl(tcp->th_seq); 2293 } 2294 is->is_maxdend = is->is_dend + 1; 2295 } 2296 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2297 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2298 ipstate_log(is, ISL_CLONE, ifs); 2299 } 2300 2301 ret = -1; 2302 2303 if (is->is_flx[out][rev] == 0) { 2304 is->is_flx[out][rev] = flx; 2305 is->is_opt[rev] = fin->fin_optmsk; 2306 if (is->is_v == 6) { 2307 is->is_opt[rev] &= ~0x8; 2308 is->is_optmsk[rev] &= ~0x8; 2309 } 2310 } 2311 2312 /* 2313 * Check if the interface name for this "direction" is set and if not, 2314 * fill it in. 2315 */ 2316 if (is->is_ifp[idx] == NULL && 2317 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2318 is->is_ifp[idx] = ifp; 2319 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2320 } 2321 fin->fin_rev = rev; 2322 return is; 2323 } 2324 2325 2326 /* ------------------------------------------------------------------------ */ 2327 /* Function: fr_checkicmpmatchingstate */ 2328 /* Returns: Nil */ 2329 /* Parameters: fin(I) - pointer to packet information */ 2330 /* */ 2331 /* If we've got an ICMP error message, using the information stored in the */ 2332 /* ICMP packet, look for a matching state table entry. */ 2333 /* */ 2334 /* If we return NULL then no lock on ipf_state is held. */ 2335 /* If we return non-null then a read-lock on ipf_state is held. */ 2336 /* ------------------------------------------------------------------------ */ 2337 static ipstate_t *fr_checkicmpmatchingstate(fin) 2338 fr_info_t *fin; 2339 { 2340 ipstate_t *is, **isp; 2341 u_short sport, dport; 2342 u_char pr; 2343 int backward, i, oi; 2344 i6addr_t dst, src; 2345 struct icmp *ic; 2346 u_short savelen; 2347 icmphdr_t *icmp; 2348 fr_info_t ofin; 2349 tcphdr_t *tcp; 2350 int len; 2351 ip_t *oip; 2352 u_int hv; 2353 ipf_stack_t *ifs = fin->fin_ifs; 2354 2355 /* 2356 * Does it at least have the return (basic) IP header ? 2357 * Is it an actual recognised ICMP error type? 2358 * Only a basic IP header (no options) should be with 2359 * an ICMP error header. 2360 */ 2361 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2362 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2363 !(fin->fin_flx & FI_ICMPERR)) 2364 return NULL; 2365 ic = fin->fin_dp; 2366 2367 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2368 /* 2369 * Check if the at least the old IP header (with options) and 2370 * 8 bytes of payload is present. 2371 */ 2372 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2373 return NULL; 2374 2375 /* 2376 * Sanity Checks. 2377 */ 2378 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2379 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2380 return NULL; 2381 2382 /* 2383 * Is the buffer big enough for all of it ? It's the size of the IP 2384 * header claimed in the encapsulated part which is of concern. It 2385 * may be too big to be in this buffer but not so big that it's 2386 * outside the ICMP packet, leading to TCP deref's causing problems. 2387 * This is possible because we don't know how big oip_hl is when we 2388 * do the pullup early in fr_check() and thus can't guarantee it is 2389 * all here now. 2390 */ 2391 #ifdef _KERNEL 2392 { 2393 mb_t *m; 2394 2395 m = fin->fin_m; 2396 # if defined(MENTAT) 2397 if ((char *)oip + len > (char *)m->b_wptr) 2398 return NULL; 2399 # else 2400 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2401 return NULL; 2402 # endif 2403 } 2404 #endif 2405 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2406 2407 /* 2408 * in the IPv4 case we must zero the i6addr union otherwise 2409 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2410 * of the 'junk' in the unused part of the union 2411 */ 2412 bzero((char *)&src, sizeof(src)); 2413 bzero((char *)&dst, sizeof(dst)); 2414 2415 /* 2416 * we make an fin entry to be able to feed it to 2417 * matchsrcdst note that not all fields are encessary 2418 * but this is the cleanest way. Note further we fill 2419 * in fin_mp such that if someone uses it we'll get 2420 * a kernel panic. fr_matchsrcdst does not use this. 2421 * 2422 * watch out here, as ip is in host order and oip in network 2423 * order. Any change we make must be undone afterwards, like 2424 * oip->ip_off - it is still in network byte order so fix it. 2425 */ 2426 savelen = oip->ip_len; 2427 oip->ip_len = len; 2428 oip->ip_off = ntohs(oip->ip_off); 2429 2430 ofin.fin_flx = FI_NOCKSUM; 2431 ofin.fin_v = 4; 2432 ofin.fin_ip = oip; 2433 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2434 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2435 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2436 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2437 ofin.fin_ifp = fin->fin_ifp; 2438 ofin.fin_out = !fin->fin_out; 2439 /* 2440 * Reset the short and bad flag here because in fr_matchsrcdst() 2441 * the flags for the current packet (fin_flx) are compared against 2442 * those for the existing session. 2443 */ 2444 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2445 2446 /* 2447 * Put old values of ip_len and ip_off back as we don't know 2448 * if we have to forward the packet (or process it again. 2449 */ 2450 oip->ip_len = savelen; 2451 oip->ip_off = htons(oip->ip_off); 2452 2453 switch (oip->ip_p) 2454 { 2455 case IPPROTO_ICMP : 2456 /* 2457 * an ICMP error can only be generated as a result of an 2458 * ICMP query, not as the response on an ICMP error 2459 * 2460 * XXX theoretically ICMP_ECHOREP and the other reply's are 2461 * ICMP query's as well, but adding them here seems strange XXX 2462 */ 2463 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2464 return NULL; 2465 2466 /* 2467 * perform a lookup of the ICMP packet in the state table 2468 */ 2469 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2470 hv = (pr = oip->ip_p); 2471 src.in4 = oip->ip_src; 2472 hv += src.in4.s_addr; 2473 dst.in4 = oip->ip_dst; 2474 hv += dst.in4.s_addr; 2475 hv += icmp->icmp_id; 2476 hv = DOUBLE_HASH(hv, ifs); 2477 2478 READ_ENTER(&ifs->ifs_ipf_state); 2479 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2480 isp = &is->is_hnext; 2481 if ((is->is_p != pr) || (is->is_v != 4)) 2482 continue; 2483 if (is->is_pass & FR_NOICMPERR) 2484 continue; 2485 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2486 NULL, FI_ICMPCMP); 2487 if (is != NULL) { 2488 if ((is->is_pass & FR_NOICMPERR) != 0) { 2489 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2490 return NULL; 2491 } 2492 /* 2493 * i : the index of this packet (the icmp 2494 * unreachable) 2495 * oi : the index of the original packet found 2496 * in the icmp header (i.e. the packet 2497 * causing this icmp) 2498 * backward : original packet was backward 2499 * compared to the state 2500 */ 2501 backward = IP6_NEQ(&is->is_src, &src); 2502 fin->fin_rev = !backward; 2503 i = (!backward << 1) + fin->fin_out; 2504 oi = (backward << 1) + ofin.fin_out; 2505 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2506 continue; 2507 ifs->ifs_ips_stats.iss_hits++; 2508 is->is_icmppkts[i]++; 2509 return is; 2510 } 2511 } 2512 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2513 return NULL; 2514 case IPPROTO_TCP : 2515 case IPPROTO_UDP : 2516 break; 2517 default : 2518 return NULL; 2519 } 2520 2521 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2522 dport = tcp->th_dport; 2523 sport = tcp->th_sport; 2524 2525 hv = (pr = oip->ip_p); 2526 src.in4 = oip->ip_src; 2527 hv += src.in4.s_addr; 2528 dst.in4 = oip->ip_dst; 2529 hv += dst.in4.s_addr; 2530 hv += dport; 2531 hv += sport; 2532 hv = DOUBLE_HASH(hv, ifs); 2533 2534 READ_ENTER(&ifs->ifs_ipf_state); 2535 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2536 isp = &is->is_hnext; 2537 /* 2538 * Only allow this icmp though if the 2539 * encapsulated packet was allowed through the 2540 * other way around. Note that the minimal amount 2541 * of info present does not allow for checking against 2542 * tcp internals such as seq and ack numbers. Only the 2543 * ports are known to be present and can be even if the 2544 * short flag is set. 2545 */ 2546 if ((is->is_p == pr) && (is->is_v == 4) && 2547 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2548 tcp, FI_ICMPCMP))) { 2549 /* 2550 * i : the index of this packet (the icmp unreachable) 2551 * oi : the index of the original packet found in the 2552 * icmp header (i.e. the packet causing this icmp) 2553 * backward : original packet was backward compared to 2554 * the state 2555 */ 2556 backward = IP6_NEQ(&is->is_src, &src); 2557 fin->fin_rev = !backward; 2558 i = (!backward << 1) + fin->fin_out; 2559 oi = (backward << 1) + ofin.fin_out; 2560 2561 if (((is->is_pass & FR_NOICMPERR) != 0) || 2562 (is->is_icmppkts[i] > is->is_pkts[oi])) 2563 break; 2564 ifs->ifs_ips_stats.iss_hits++; 2565 is->is_icmppkts[i]++; 2566 /* 2567 * we deliberately do not touch the timeouts 2568 * for the accompanying state table entry. 2569 * It remains to be seen if that is correct. XXX 2570 */ 2571 return is; 2572 } 2573 } 2574 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2575 return NULL; 2576 } 2577 2578 2579 /* ------------------------------------------------------------------------ */ 2580 /* Function: fr_ipsmove */ 2581 /* Returns: Nil */ 2582 /* Parameters: is(I) - pointer to state table entry */ 2583 /* hv(I) - new hash value for state table entry */ 2584 /* Write Locks: ipf_state */ 2585 /* */ 2586 /* Move a state entry from one position in the hash table to another. */ 2587 /* ------------------------------------------------------------------------ */ 2588 static void fr_ipsmove(is, hv, ifs) 2589 ipstate_t *is; 2590 u_int hv; 2591 ipf_stack_t *ifs; 2592 { 2593 ipstate_t **isp; 2594 u_int hvm; 2595 2596 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2597 2598 hvm = is->is_hv; 2599 /* 2600 * Remove the hash from the old location... 2601 */ 2602 isp = is->is_phnext; 2603 if (is->is_hnext) 2604 is->is_hnext->is_phnext = isp; 2605 *isp = is->is_hnext; 2606 if (ifs->ifs_ips_table[hvm] == NULL) 2607 ifs->ifs_ips_stats.iss_inuse--; 2608 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2609 2610 /* 2611 * ...and put the hash in the new one. 2612 */ 2613 hvm = DOUBLE_HASH(hv, ifs); 2614 is->is_hv = hvm; 2615 isp = &ifs->ifs_ips_table[hvm]; 2616 if (*isp) 2617 (*isp)->is_phnext = &is->is_hnext; 2618 else 2619 ifs->ifs_ips_stats.iss_inuse++; 2620 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2621 is->is_phnext = isp; 2622 is->is_hnext = *isp; 2623 *isp = is; 2624 } 2625 2626 2627 /* ------------------------------------------------------------------------ */ 2628 /* Function: fr_stlookup */ 2629 /* Returns: ipstate_t* - NULL == no matching state found, */ 2630 /* else pointer to state information is returned */ 2631 /* Parameters: fin(I) - pointer to packet information */ 2632 /* tcp(I) - pointer to TCP/UDP header. */ 2633 /* */ 2634 /* Search the state table for a matching entry to the packet described by */ 2635 /* the contents of *fin. */ 2636 /* */ 2637 /* If we return NULL then no lock on ipf_state is held. */ 2638 /* If we return non-null then a read-lock on ipf_state is held. */ 2639 /* ------------------------------------------------------------------------ */ 2640 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2641 fr_info_t *fin; 2642 tcphdr_t *tcp; 2643 ipftq_t **ifqp; 2644 { 2645 u_int hv, hvm, pr, v, tryagain; 2646 ipstate_t *is, **isp; 2647 u_short dport, sport; 2648 i6addr_t src, dst; 2649 struct icmp *ic; 2650 ipftq_t *ifq; 2651 int oow; 2652 ipf_stack_t *ifs = fin->fin_ifs; 2653 2654 is = NULL; 2655 ifq = NULL; 2656 tcp = fin->fin_dp; 2657 ic = (struct icmp *)tcp; 2658 hv = (pr = fin->fin_fi.fi_p); 2659 src = fin->fin_fi.fi_src; 2660 dst = fin->fin_fi.fi_dst; 2661 hv += src.in4.s_addr; 2662 hv += dst.in4.s_addr; 2663 2664 v = fin->fin_fi.fi_v; 2665 #ifdef USE_INET6 2666 if (v == 6) { 2667 hv += fin->fin_fi.fi_src.i6[1]; 2668 hv += fin->fin_fi.fi_src.i6[2]; 2669 hv += fin->fin_fi.fi_src.i6[3]; 2670 2671 if ((fin->fin_p == IPPROTO_ICMPV6) && 2672 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2673 hv -= dst.in4.s_addr; 2674 } else { 2675 hv += fin->fin_fi.fi_dst.i6[1]; 2676 hv += fin->fin_fi.fi_dst.i6[2]; 2677 hv += fin->fin_fi.fi_dst.i6[3]; 2678 } 2679 } 2680 #endif 2681 if ((v == 4) && 2682 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2683 if (fin->fin_out == 0) { 2684 hv -= src.in4.s_addr; 2685 } else { 2686 hv -= dst.in4.s_addr; 2687 } 2688 } 2689 2690 /* 2691 * Search the hash table for matching packet header info. 2692 */ 2693 switch (pr) 2694 { 2695 #ifdef USE_INET6 2696 case IPPROTO_ICMPV6 : 2697 tryagain = 0; 2698 if (v == 6) { 2699 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2700 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2701 hv += ic->icmp_id; 2702 } 2703 } 2704 READ_ENTER(&ifs->ifs_ipf_state); 2705 icmp6again: 2706 hvm = DOUBLE_HASH(hv, ifs); 2707 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2708 isp = &is->is_hnext; 2709 if ((is->is_p != pr) || (is->is_v != v)) 2710 continue; 2711 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2712 if (is != NULL && 2713 fr_matchicmpqueryreply(v, &is->is_icmp, 2714 ic, fin->fin_rev)) { 2715 if (fin->fin_rev) 2716 ifq = &ifs->ifs_ips_icmpacktq; 2717 else 2718 ifq = &ifs->ifs_ips_icmptq; 2719 break; 2720 } 2721 } 2722 2723 if (is != NULL) { 2724 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2725 hv += fin->fin_fi.fi_src.i6[0]; 2726 hv += fin->fin_fi.fi_src.i6[1]; 2727 hv += fin->fin_fi.fi_src.i6[2]; 2728 hv += fin->fin_fi.fi_src.i6[3]; 2729 fr_ipsmove(is, hv, ifs); 2730 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2731 } 2732 break; 2733 } 2734 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2735 2736 /* 2737 * No matching icmp state entry. Perhaps this is a 2738 * response to another state entry. 2739 * 2740 * XXX With some ICMP6 packets, the "other" address is already 2741 * in the packet, after the ICMP6 header, and this could be 2742 * used in place of the multicast address. However, taking 2743 * advantage of this requires some significant code changes 2744 * to handle the specific types where that is the case. 2745 */ 2746 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2747 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2748 hv -= fin->fin_fi.fi_src.i6[0]; 2749 hv -= fin->fin_fi.fi_src.i6[1]; 2750 hv -= fin->fin_fi.fi_src.i6[2]; 2751 hv -= fin->fin_fi.fi_src.i6[3]; 2752 tryagain = 1; 2753 WRITE_ENTER(&ifs->ifs_ipf_state); 2754 goto icmp6again; 2755 } 2756 2757 is = fr_checkicmp6matchingstate(fin); 2758 if (is != NULL) 2759 return is; 2760 break; 2761 #endif 2762 2763 case IPPROTO_ICMP : 2764 if (v == 4) { 2765 hv += ic->icmp_id; 2766 } 2767 hv = DOUBLE_HASH(hv, ifs); 2768 READ_ENTER(&ifs->ifs_ipf_state); 2769 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2770 isp = &is->is_hnext; 2771 if ((is->is_p != pr) || (is->is_v != v)) 2772 continue; 2773 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2774 if (is != NULL && 2775 fr_matchicmpqueryreply(v, &is->is_icmp, 2776 ic, fin->fin_rev)) { 2777 if (fin->fin_rev) 2778 ifq = &ifs->ifs_ips_icmpacktq; 2779 else 2780 ifq = &ifs->ifs_ips_icmptq; 2781 break; 2782 } 2783 } 2784 if (is == NULL) { 2785 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2786 } 2787 break; 2788 2789 case IPPROTO_TCP : 2790 case IPPROTO_UDP : 2791 ifqp = NULL; 2792 sport = htons(fin->fin_data[0]); 2793 hv += sport; 2794 dport = htons(fin->fin_data[1]); 2795 hv += dport; 2796 oow = 0; 2797 tryagain = 0; 2798 READ_ENTER(&ifs->ifs_ipf_state); 2799 retry_tcpudp: 2800 hvm = DOUBLE_HASH(hv, ifs); 2801 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2802 isp = &is->is_hnext; 2803 if ((is->is_p != pr) || (is->is_v != v)) 2804 continue; 2805 fin->fin_flx &= ~FI_OOW; 2806 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2807 if (is != NULL) { 2808 if (pr == IPPROTO_TCP) { 2809 if (!fr_tcpstate(fin, tcp, is)) { 2810 oow |= fin->fin_flx & FI_OOW; 2811 continue; 2812 } 2813 } 2814 break; 2815 } 2816 } 2817 if (is != NULL) { 2818 if (tryagain && 2819 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2820 hv += dport; 2821 hv += sport; 2822 fr_ipsmove(is, hv, ifs); 2823 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2824 } 2825 break; 2826 } 2827 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2828 2829 if (ifs->ifs_ips_stats.iss_wild) { 2830 if (tryagain == 0) { 2831 hv -= dport; 2832 hv -= sport; 2833 } else if (tryagain == 1) { 2834 hv = fin->fin_fi.fi_p; 2835 /* 2836 * If we try to pretend this is a reply to a 2837 * multicast/broadcast packet then we need to 2838 * exclude part of the address from the hash 2839 * calculation. 2840 */ 2841 if (fin->fin_out == 0) { 2842 hv += src.in4.s_addr; 2843 } else { 2844 hv += dst.in4.s_addr; 2845 } 2846 hv += dport; 2847 hv += sport; 2848 } 2849 tryagain++; 2850 if (tryagain <= 2) { 2851 WRITE_ENTER(&ifs->ifs_ipf_state); 2852 goto retry_tcpudp; 2853 } 2854 } 2855 fin->fin_flx |= oow; 2856 break; 2857 2858 #if 0 2859 case IPPROTO_GRE : 2860 gre = fin->fin_dp; 2861 if (GRE_REV(gre->gr_flags) == 1) { 2862 hv += gre->gr_call; 2863 } 2864 /* FALLTHROUGH */ 2865 #endif 2866 default : 2867 ifqp = NULL; 2868 hvm = DOUBLE_HASH(hv, ifs); 2869 READ_ENTER(&ifs->ifs_ipf_state); 2870 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2871 isp = &is->is_hnext; 2872 if ((is->is_p != pr) || (is->is_v != v)) 2873 continue; 2874 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2875 if (is != NULL) { 2876 ifq = &ifs->ifs_ips_iptq; 2877 break; 2878 } 2879 } 2880 if (is == NULL) { 2881 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2882 } 2883 break; 2884 } 2885 2886 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2887 (is->is_tqehead[fin->fin_rev] != NULL)) 2888 ifq = is->is_tqehead[fin->fin_rev]; 2889 if (ifq != NULL && ifqp != NULL) 2890 *ifqp = ifq; 2891 return is; 2892 } 2893 2894 2895 /* ------------------------------------------------------------------------ */ 2896 /* Function: fr_updatestate */ 2897 /* Returns: Nil */ 2898 /* Parameters: fin(I) - pointer to packet information */ 2899 /* is(I) - pointer to state table entry */ 2900 /* Read Locks: ipf_state */ 2901 /* */ 2902 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2903 /* fragment cache with a new entry as required. */ 2904 /* ------------------------------------------------------------------------ */ 2905 void fr_updatestate(fin, is, ifq) 2906 fr_info_t *fin; 2907 ipstate_t *is; 2908 ipftq_t *ifq; 2909 { 2910 ipftqent_t *tqe; 2911 int i, pass; 2912 ipf_stack_t *ifs = fin->fin_ifs; 2913 2914 i = (fin->fin_rev << 1) + fin->fin_out; 2915 2916 /* 2917 * For TCP packets, ifq == NULL. For all others, check if this new 2918 * queue is different to the last one it was on and move it if so. 2919 */ 2920 tqe = &is->is_sti; 2921 MUTEX_ENTER(&is->is_lock); 2922 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2923 ifq = is->is_tqehead[fin->fin_rev]; 2924 2925 if (ifq != NULL) 2926 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2927 2928 is->is_pkts[i]++; 2929 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2930 is->is_bytes[i] += fin->fin_plen; 2931 MUTEX_EXIT(&is->is_lock); 2932 2933 #ifdef IPFILTER_SYNC 2934 if (is->is_flags & IS_STATESYNC) 2935 ipfsync_update(SMC_STATE, fin, is->is_sync); 2936 #endif 2937 2938 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2939 2940 fin->fin_fr = is->is_rule; 2941 2942 /* 2943 * If this packet is a fragment and the rule says to track fragments, 2944 * then create a new fragment cache entry. 2945 */ 2946 pass = is->is_pass; 2947 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2948 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2949 } 2950 2951 2952 /* ------------------------------------------------------------------------ */ 2953 /* Function: fr_checkstate */ 2954 /* Returns: frentry_t* - NULL == search failed, */ 2955 /* else pointer to rule for matching state */ 2956 /* Parameters: ifp(I) - pointer to interface */ 2957 /* passp(I) - pointer to filtering result flags */ 2958 /* */ 2959 /* Check if a packet is associated with an entry in the state table. */ 2960 /* ------------------------------------------------------------------------ */ 2961 frentry_t *fr_checkstate(fin, passp) 2962 fr_info_t *fin; 2963 u_32_t *passp; 2964 { 2965 ipstate_t *is; 2966 frentry_t *fr; 2967 tcphdr_t *tcp; 2968 ipftq_t *ifq; 2969 u_int pass; 2970 ipf_stack_t *ifs = fin->fin_ifs; 2971 2972 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2973 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2974 return NULL; 2975 2976 is = NULL; 2977 if ((fin->fin_flx & FI_TCPUDP) || 2978 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2979 #ifdef USE_INET6 2980 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2981 #endif 2982 ) 2983 tcp = fin->fin_dp; 2984 else 2985 tcp = NULL; 2986 2987 /* 2988 * Search the hash table for matching packet header info. 2989 */ 2990 ifq = NULL; 2991 is = fr_stlookup(fin, tcp, &ifq); 2992 switch (fin->fin_p) 2993 { 2994 #ifdef USE_INET6 2995 case IPPROTO_ICMPV6 : 2996 if (is != NULL) 2997 break; 2998 if (fin->fin_v == 6) { 2999 is = fr_checkicmp6matchingstate(fin); 3000 if (is != NULL) 3001 goto matched; 3002 } 3003 break; 3004 #endif 3005 case IPPROTO_ICMP : 3006 if (is != NULL) 3007 break; 3008 /* 3009 * No matching icmp state entry. Perhaps this is a 3010 * response to another state entry. 3011 */ 3012 is = fr_checkicmpmatchingstate(fin); 3013 if (is != NULL) 3014 goto matched; 3015 break; 3016 case IPPROTO_TCP : 3017 if (is == NULL) 3018 break; 3019 3020 if (is->is_pass & FR_NEWISN) { 3021 if (fin->fin_out == 0) 3022 fr_fixinisn(fin, is); 3023 else if (fin->fin_out == 1) 3024 fr_fixoutisn(fin, is); 3025 } 3026 break; 3027 default : 3028 if (fin->fin_rev) 3029 ifq = &ifs->ifs_ips_udpacktq; 3030 else 3031 ifq = &ifs->ifs_ips_udptq; 3032 break; 3033 } 3034 if (is == NULL) { 3035 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3036 return NULL; 3037 } 3038 3039 matched: 3040 fr = is->is_rule; 3041 if (fr != NULL) { 3042 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3043 if (fin->fin_nattag == NULL) { 3044 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3045 return NULL; 3046 } 3047 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) { 3048 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3049 return NULL; 3050 } 3051 } 3052 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3053 fin->fin_icode = fr->fr_icode; 3054 } 3055 3056 fin->fin_rule = is->is_rulen; 3057 pass = is->is_pass; 3058 fr_updatestate(fin, is, ifq); 3059 3060 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3061 fin->fin_flx |= FI_STATE; 3062 if ((pass & FR_LOGFIRST) != 0) 3063 pass &= ~(FR_LOGFIRST|FR_LOG); 3064 *passp = pass; 3065 return fr; 3066 } 3067 3068 3069 /* ------------------------------------------------------------------------ */ 3070 /* Function: fr_fixoutisn */ 3071 /* Returns: Nil */ 3072 /* Parameters: fin(I) - pointer to packet information */ 3073 /* is(I) - pointer to master state structure */ 3074 /* */ 3075 /* Called only for outbound packets, adjusts the sequence number and the */ 3076 /* TCP checksum to match that change. */ 3077 /* ------------------------------------------------------------------------ */ 3078 static void fr_fixoutisn(fin, is) 3079 fr_info_t *fin; 3080 ipstate_t *is; 3081 { 3082 tcphdr_t *tcp; 3083 int rev; 3084 u_32_t seq; 3085 3086 tcp = fin->fin_dp; 3087 rev = fin->fin_rev; 3088 if ((is->is_flags & IS_ISNSYN) != 0) { 3089 if (rev == 0) { 3090 seq = ntohl(tcp->th_seq); 3091 seq += is->is_isninc[0]; 3092 tcp->th_seq = htonl(seq); 3093 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3094 } 3095 } 3096 if ((is->is_flags & IS_ISNACK) != 0) { 3097 if (rev == 1) { 3098 seq = ntohl(tcp->th_seq); 3099 seq += is->is_isninc[1]; 3100 tcp->th_seq = htonl(seq); 3101 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3102 } 3103 } 3104 } 3105 3106 3107 /* ------------------------------------------------------------------------ */ 3108 /* Function: fr_fixinisn */ 3109 /* Returns: Nil */ 3110 /* Parameters: fin(I) - pointer to packet information */ 3111 /* is(I) - pointer to master state structure */ 3112 /* */ 3113 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3114 /* TCP checksum to match that change. */ 3115 /* ------------------------------------------------------------------------ */ 3116 static void fr_fixinisn(fin, is) 3117 fr_info_t *fin; 3118 ipstate_t *is; 3119 { 3120 tcphdr_t *tcp; 3121 int rev; 3122 u_32_t ack; 3123 3124 tcp = fin->fin_dp; 3125 rev = fin->fin_rev; 3126 if ((is->is_flags & IS_ISNSYN) != 0) { 3127 if (rev == 1) { 3128 ack = ntohl(tcp->th_ack); 3129 ack -= is->is_isninc[0]; 3130 tcp->th_ack = htonl(ack); 3131 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3132 } 3133 } 3134 if ((is->is_flags & IS_ISNACK) != 0) { 3135 if (rev == 0) { 3136 ack = ntohl(tcp->th_ack); 3137 ack -= is->is_isninc[1]; 3138 tcp->th_ack = htonl(ack); 3139 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3140 } 3141 } 3142 } 3143 3144 3145 /* ------------------------------------------------------------------------ */ 3146 /* Function: fr_statesync */ 3147 /* Returns: Nil */ 3148 /* Parameters: action(I) - type of synchronisation to do */ 3149 /* v(I) - IP version being sync'd (v4 or v6) */ 3150 /* ifp(I) - interface identifier associated with action */ 3151 /* name(I) - name associated with ifp parameter */ 3152 /* */ 3153 /* Walk through all state entries and if an interface pointer match is */ 3154 /* found then look it up again, based on its name in case the pointer has */ 3155 /* changed since last time. */ 3156 /* */ 3157 /* If ifp is passed in as being non-null then we are only doing updates for */ 3158 /* existing, matching, uses of it. */ 3159 /* ------------------------------------------------------------------------ */ 3160 void fr_statesync(action, v, ifp, name, ifs) 3161 int action, v; 3162 void *ifp; 3163 char *name; 3164 ipf_stack_t *ifs; 3165 { 3166 ipstate_t *is; 3167 int i; 3168 3169 if (ifs->ifs_fr_running <= 0) 3170 return; 3171 3172 WRITE_ENTER(&ifs->ifs_ipf_state); 3173 3174 if (ifs->ifs_fr_running <= 0) { 3175 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3176 return; 3177 } 3178 3179 switch (action) 3180 { 3181 case IPFSYNC_RESYNC : 3182 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3183 if (v != 0 && is->is_v != v) 3184 continue; 3185 /* 3186 * Look up all the interface names in the state entry. 3187 */ 3188 for (i = 0; i < 4; i++) { 3189 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3190 is->is_v, ifs); 3191 } 3192 } 3193 break; 3194 case IPFSYNC_NEWIFP : 3195 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3196 if (v != 0 && is->is_v != v) 3197 continue; 3198 /* 3199 * Look up all the interface names in the state entry. 3200 */ 3201 for (i = 0; i < 4; i++) { 3202 if (!strncmp(is->is_ifname[i], name, 3203 sizeof(is->is_ifname[i]))) 3204 is->is_ifp[i] = ifp; 3205 } 3206 } 3207 break; 3208 case IPFSYNC_OLDIFP : 3209 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3210 if (v != 0 && is->is_v != v) 3211 continue; 3212 /* 3213 * Look up all the interface names in the state entry. 3214 */ 3215 for (i = 0; i < 4; i++) { 3216 if (is->is_ifp[i] == ifp) 3217 is->is_ifp[i] = (void *)-1; 3218 } 3219 } 3220 break; 3221 } 3222 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3223 } 3224 3225 3226 #if SOLARIS2 >= 10 3227 /* ------------------------------------------------------------------------ */ 3228 /* Function: fr_stateifindexsync */ 3229 /* Returns: void */ 3230 /* Parameters: ifp - current network interface descriptor (ifindex) */ 3231 /* newifp - new interface descriptor (new ifindex) */ 3232 /* ifs - pointer to IPF stack */ 3233 /* */ 3234 /* Write Locks: assumes ipf_mutex is locked */ 3235 /* */ 3236 /* Updates all interface indeces matching to ifp with new interface index */ 3237 /* value. */ 3238 /* ------------------------------------------------------------------------ */ 3239 void fr_stateifindexsync(ifp, newifp, ifs) 3240 void *ifp; 3241 void *newifp; 3242 ipf_stack_t *ifs; 3243 { 3244 ipstate_t *is; 3245 int i; 3246 3247 WRITE_ENTER(&ifs->ifs_ipf_state); 3248 3249 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) { 3250 3251 for (i = 0; i < 4; i++) { 3252 if (is->is_ifp[i] == ifp) 3253 is->is_ifp[i] = newifp; 3254 } 3255 } 3256 3257 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3258 } 3259 #endif 3260 3261 /* ------------------------------------------------------------------------ */ 3262 /* Function: fr_delstate */ 3263 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3264 /* Parameters: is(I) - pointer to state structure to delete */ 3265 /* why(I) - if not 0, log reason why it was deleted */ 3266 /* ifs - ipf stack instance */ 3267 /* Write Locks: ipf_state/ipf_global */ 3268 /* */ 3269 /* Deletes a state entry from the enumerated list as well as the hash table */ 3270 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3271 /* global counters as required. */ 3272 /* ------------------------------------------------------------------------ */ 3273 int fr_delstate(is, why, ifs) 3274 ipstate_t *is; 3275 int why; 3276 ipf_stack_t *ifs; 3277 { 3278 int removed = 0; 3279 3280 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3281 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3282 3283 /* 3284 * Start by removing the entry from the hash table of state entries 3285 * so it will not be "used" again. 3286 * 3287 * It will remain in the "list" of state entries until all references 3288 * have been accounted for. 3289 */ 3290 if (is->is_phnext != NULL) { 3291 removed = 1; 3292 *is->is_phnext = is->is_hnext; 3293 if (is->is_hnext != NULL) 3294 is->is_hnext->is_phnext = is->is_phnext; 3295 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3296 ifs->ifs_ips_stats.iss_inuse--; 3297 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3298 3299 is->is_phnext = NULL; 3300 is->is_hnext = NULL; 3301 } 3302 3303 /* 3304 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3305 * table that have wildcard flags set, only decerement it once 3306 * and do it here. 3307 */ 3308 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3309 if (!(is->is_flags & SI_CLONED)) { 3310 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3311 } 3312 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3313 } 3314 3315 /* 3316 * Next, remove it from the timeout queue it is in. 3317 */ 3318 fr_deletequeueentry(&is->is_sti); 3319 3320 is->is_me = NULL; 3321 3322 /* 3323 * If it is still in use by something else, do not go any further, 3324 * but note that at this point it is now an orphan. 3325 */ 3326 MUTEX_ENTER(&is->is_lock); 3327 if (is->is_ref > 1) { 3328 is->is_ref--; 3329 MUTEX_EXIT(&is->is_lock); 3330 if (removed) 3331 ifs->ifs_ips_stats.iss_orphans++; 3332 return (is->is_ref); 3333 } 3334 MUTEX_EXIT(&is->is_lock); 3335 3336 is->is_ref = 0; 3337 3338 /* 3339 * If entry has already been removed from table, 3340 * it means we're simply cleaning up an orphan. 3341 */ 3342 if (!removed) 3343 ifs->ifs_ips_stats.iss_orphans--; 3344 3345 if (is->is_tqehead[0] != NULL) 3346 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3347 3348 if (is->is_tqehead[1] != NULL) 3349 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3350 3351 #ifdef IPFILTER_SYNC 3352 if (is->is_sync) 3353 ipfsync_del(is->is_sync); 3354 #endif 3355 #ifdef IPFILTER_SCAN 3356 (void) ipsc_detachis(is); 3357 #endif 3358 3359 /* 3360 * Now remove it from master list of state table entries. 3361 */ 3362 if (is->is_pnext != NULL) { 3363 *is->is_pnext = is->is_next; 3364 if (is->is_next != NULL) { 3365 is->is_next->is_pnext = is->is_pnext; 3366 is->is_next = NULL; 3367 } 3368 is->is_pnext = NULL; 3369 } 3370 3371 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3372 ipstate_log(is, why, ifs); 3373 3374 if (is->is_rule != NULL) { 3375 is->is_rule->fr_statecnt--; 3376 (void)fr_derefrule(&is->is_rule, ifs); 3377 } 3378 3379 MUTEX_DESTROY(&is->is_lock); 3380 KFREE(is); 3381 ifs->ifs_ips_num--; 3382 3383 return (0); 3384 } 3385 3386 3387 /* ------------------------------------------------------------------------ */ 3388 /* Function: fr_timeoutstate */ 3389 /* Returns: Nil */ 3390 /* Parameters: ifs - ipf stack instance */ 3391 /* */ 3392 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3393 /* used here is to keep the queue sorted with the oldest things at the top */ 3394 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3395 /* expired then neither will any under it. */ 3396 /* ------------------------------------------------------------------------ */ 3397 void fr_timeoutstate(ifs) 3398 ipf_stack_t *ifs; 3399 { 3400 ipftq_t *ifq, *ifqnext; 3401 ipftqent_t *tqe, *tqn; 3402 ipstate_t *is; 3403 SPL_INT(s); 3404 3405 SPL_NET(s); 3406 WRITE_ENTER(&ifs->ifs_ipf_state); 3407 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3408 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3409 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3410 break; 3411 tqn = tqe->tqe_next; 3412 is = tqe->tqe_parent; 3413 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3414 } 3415 3416 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3417 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3418 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3419 break; 3420 tqn = tqe->tqe_next; 3421 is = tqe->tqe_parent; 3422 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3423 } 3424 } 3425 3426 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3427 ifqnext = ifq->ifq_next; 3428 3429 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3430 (ifq->ifq_ref == 0)) { 3431 fr_freetimeoutqueue(ifq, ifs); 3432 } 3433 } 3434 3435 if (ifs->ifs_fr_state_doflush) { 3436 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3437 ifs->ifs_fr_state_doflush = 0; 3438 } 3439 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3440 SPL_X(s); 3441 } 3442 3443 3444 /* ---------------------------------------------------------------------- */ 3445 /* Function: fr_state_flush */ 3446 /* Returns: int - 0 == success, -1 == failure */ 3447 /* Parameters: flush_option - how to flush the active State table */ 3448 /* proto - IP version to flush (4, 6, or both) */ 3449 /* ifs - ipf stack instance */ 3450 /* Write Locks: ipf_state */ 3451 /* */ 3452 /* Flush state tables. Three possible flush options currently defined: */ 3453 /* */ 3454 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3455 /* */ 3456 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3457 /* have started to close on both ends using */ 3458 /* ipf_flushclosing(). */ 3459 /* */ 3460 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3461 /* Then, if needed, flush entries with TCP */ 3462 /* connections which have been idle for a long */ 3463 /* time with ipf_extraflush(). */ 3464 /* ---------------------------------------------------------------------- */ 3465 static int fr_state_flush(flush_option, proto, ifs) 3466 int flush_option, proto; 3467 ipf_stack_t *ifs; 3468 { 3469 ipstate_t *is, *isn; 3470 int removed; 3471 SPL_INT(s); 3472 3473 removed = 0; 3474 3475 SPL_NET(s); 3476 switch (flush_option) 3477 { 3478 case FLUSH_TABLE_ALL: 3479 isn = ifs->ifs_ips_list; 3480 while ((is = isn) != NULL) { 3481 isn = is->is_next; 3482 if ((proto != 0) && (is->is_v != proto)) 3483 continue; 3484 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3485 removed++; 3486 } 3487 break; 3488 3489 case FLUSH_TABLE_CLOSING: 3490 removed = ipf_flushclosing(STATE_FLUSH, 3491 IPF_TCPS_CLOSE_WAIT, 3492 ifs->ifs_ips_tqtqb, 3493 ifs->ifs_ips_utqe, 3494 ifs); 3495 break; 3496 3497 case FLUSH_TABLE_EXTRA: 3498 removed = ipf_flushclosing(STATE_FLUSH, 3499 IPF_TCPS_FIN_WAIT_2, 3500 ifs->ifs_ips_tqtqb, 3501 ifs->ifs_ips_utqe, 3502 ifs); 3503 3504 /* 3505 * Be sure we haven't done this in the last 10 seconds. 3506 */ 3507 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3508 IPF_TTLVAL(10)) 3509 break; 3510 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3511 removed += ipf_extraflush(STATE_FLUSH, 3512 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3513 ifs->ifs_ips_utqe, 3514 ifs); 3515 break; 3516 3517 default: /* Flush Nothing */ 3518 break; 3519 } 3520 3521 SPL_X(s); 3522 return (removed); 3523 } 3524 3525 3526 /* ------------------------------------------------------------------------ */ 3527 /* Function: fr_tcp_age */ 3528 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3529 /* Parameters: tq(I) - pointer to timeout queue information */ 3530 /* fin(I) - pointer to packet information */ 3531 /* tqtab(I) - TCP timeout queue table this is in */ 3532 /* flags(I) - flags from state/NAT entry */ 3533 /* */ 3534 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3535 /* */ 3536 /* - (try to) base state transitions on real evidence only, */ 3537 /* i.e. packets that are sent and have been received by ipfilter; */ 3538 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3539 /* */ 3540 /* - deal with half-closed connections correctly; */ 3541 /* */ 3542 /* - store the state of the source in state[0] such that ipfstat */ 3543 /* displays the state as source/dest instead of dest/source; the calls */ 3544 /* to fr_tcp_age have been changed accordingly. */ 3545 /* */ 3546 /* Internal Parameters: */ 3547 /* */ 3548 /* state[0] = state of source (host that initiated connection) */ 3549 /* state[1] = state of dest (host that accepted the connection) */ 3550 /* */ 3551 /* dir == 0 : a packet from source to dest */ 3552 /* dir == 1 : a packet from dest to source */ 3553 /* */ 3554 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3555 /* ------------------------------------------------------------------------ */ 3556 int fr_tcp_age(tqe, fin, tqtab, flags) 3557 ipftqent_t *tqe; 3558 fr_info_t *fin; 3559 ipftq_t *tqtab; 3560 int flags; 3561 { 3562 int dlen, ostate, nstate, rval, dir; 3563 u_char tcpflags; 3564 tcphdr_t *tcp; 3565 ipf_stack_t *ifs = fin->fin_ifs; 3566 3567 tcp = fin->fin_dp; 3568 3569 rval = 0; 3570 dir = fin->fin_rev; 3571 tcpflags = tcp->th_flags; 3572 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3573 3574 ostate = tqe->tqe_state[1 - dir]; 3575 nstate = tqe->tqe_state[dir]; 3576 3577 DTRACE_PROBE4( 3578 indata, 3579 fr_info_t *, fin, 3580 int, ostate, 3581 int, nstate, 3582 u_char, tcpflags 3583 ); 3584 3585 if (tcpflags & TH_RST) { 3586 if (!(tcpflags & TH_PUSH) && !dlen) 3587 nstate = IPF_TCPS_CLOSED; 3588 else 3589 nstate = IPF_TCPS_CLOSE_WAIT; 3590 3591 /* 3592 * Once RST is received, we must advance peer's state to 3593 * CLOSE_WAIT. 3594 */ 3595 if (ostate <= IPF_TCPS_ESTABLISHED) { 3596 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3597 } 3598 rval = 1; 3599 } else { 3600 3601 switch (nstate) 3602 { 3603 case IPF_TCPS_LISTEN: /* 0 */ 3604 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3605 /* 3606 * 'dir' received an S and sends SA in 3607 * response, CLOSED -> SYN_RECEIVED 3608 */ 3609 nstate = IPF_TCPS_SYN_RECEIVED; 3610 rval = 1; 3611 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3612 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3613 nstate = IPF_TCPS_SYN_SENT; 3614 rval = 1; 3615 } 3616 /* 3617 * the next piece of code makes it possible to get 3618 * already established connections into the state table 3619 * after a restart or reload of the filter rules; this 3620 * does not work when a strict 'flags S keep state' is 3621 * used for tcp connections of course 3622 */ 3623 if (((flags & IS_TCPFSM) == 0) && 3624 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3625 /* 3626 * we saw an A, guess 'dir' is in ESTABLISHED 3627 * mode 3628 */ 3629 switch (ostate) 3630 { 3631 case IPF_TCPS_LISTEN : 3632 case IPF_TCPS_SYN_RECEIVED : 3633 nstate = IPF_TCPS_HALF_ESTAB; 3634 rval = 1; 3635 break; 3636 case IPF_TCPS_HALF_ESTAB : 3637 case IPF_TCPS_ESTABLISHED : 3638 nstate = IPF_TCPS_ESTABLISHED; 3639 rval = 1; 3640 break; 3641 default : 3642 break; 3643 } 3644 } 3645 /* 3646 * TODO: besides regular ACK packets we can have other 3647 * packets as well; it is yet to be determined how we 3648 * should initialize the states in those cases 3649 */ 3650 break; 3651 3652 case IPF_TCPS_SYN_SENT: /* 1 */ 3653 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3654 /* 3655 * A retransmitted SYN packet. We do not reset 3656 * the timeout here to fr_tcptimeout because a 3657 * connection connect timeout does not renew 3658 * after every packet that is sent. We need to 3659 * set rval so as to indicate the packet has 3660 * passed the check for its flags being valid 3661 * in the TCP FSM. Setting rval to 2 has the 3662 * result of not resetting the timeout. 3663 */ 3664 rval = 2; 3665 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3666 TH_ACK) { 3667 /* 3668 * we see an A from 'dir' which is in SYN_SENT 3669 * state: 'dir' sent an A in response to an SA 3670 * which it received, SYN_SENT -> ESTABLISHED 3671 */ 3672 nstate = IPF_TCPS_ESTABLISHED; 3673 rval = 1; 3674 } else if (tcpflags & TH_FIN) { 3675 /* 3676 * we see an F from 'dir' which is in SYN_SENT 3677 * state and wants to close its side of the 3678 * connection; SYN_SENT -> FIN_WAIT_1 3679 */ 3680 nstate = IPF_TCPS_FIN_WAIT_1; 3681 rval = 1; 3682 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3683 /* 3684 * we see an SA from 'dir' which is already in 3685 * SYN_SENT state, this means we have a 3686 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3687 */ 3688 nstate = IPF_TCPS_SYN_RECEIVED; 3689 rval = 1; 3690 } 3691 break; 3692 3693 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3694 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3695 /* 3696 * we see an A from 'dir' which was in 3697 * SYN_RECEIVED state so it must now be in 3698 * established state, SYN_RECEIVED -> 3699 * ESTABLISHED 3700 */ 3701 nstate = IPF_TCPS_ESTABLISHED; 3702 rval = 1; 3703 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3704 TH_OPENING) { 3705 /* 3706 * We see an SA from 'dir' which is already in 3707 * SYN_RECEIVED state. 3708 */ 3709 rval = 2; 3710 } else if (tcpflags & TH_FIN) { 3711 /* 3712 * we see an F from 'dir' which is in 3713 * SYN_RECEIVED state and wants to close its 3714 * side of the connection; SYN_RECEIVED -> 3715 * FIN_WAIT_1 3716 */ 3717 nstate = IPF_TCPS_FIN_WAIT_1; 3718 rval = 1; 3719 } 3720 break; 3721 3722 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3723 if (tcpflags & TH_FIN) { 3724 nstate = IPF_TCPS_FIN_WAIT_1; 3725 rval = 1; 3726 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3727 /* 3728 * If we've picked up a connection in mid 3729 * flight, we could be looking at a follow on 3730 * packet from the same direction as the one 3731 * that created this state. Recognise it but 3732 * do not advance the entire connection's 3733 * state. 3734 */ 3735 switch (ostate) 3736 { 3737 case IPF_TCPS_LISTEN : 3738 case IPF_TCPS_SYN_SENT : 3739 case IPF_TCPS_SYN_RECEIVED : 3740 rval = 1; 3741 break; 3742 case IPF_TCPS_HALF_ESTAB : 3743 case IPF_TCPS_ESTABLISHED : 3744 nstate = IPF_TCPS_ESTABLISHED; 3745 rval = 1; 3746 break; 3747 default : 3748 break; 3749 } 3750 } 3751 break; 3752 3753 case IPF_TCPS_ESTABLISHED: /* 4 */ 3754 rval = 1; 3755 if (tcpflags & TH_FIN) { 3756 /* 3757 * 'dir' closed its side of the connection; 3758 * this gives us a half-closed connection; 3759 * ESTABLISHED -> FIN_WAIT_1 3760 */ 3761 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3762 nstate = IPF_TCPS_CLOSING; 3763 } else { 3764 nstate = IPF_TCPS_FIN_WAIT_1; 3765 } 3766 } else if (tcpflags & TH_ACK) { 3767 /* 3768 * an ACK, should we exclude other flags here? 3769 */ 3770 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3771 /* 3772 * We know the other side did an active 3773 * close, so we are ACKing the recvd 3774 * FIN packet (does the window matching 3775 * code guarantee this?) and go into 3776 * CLOSE_WAIT state; this gives us a 3777 * half-closed connection 3778 */ 3779 nstate = IPF_TCPS_CLOSE_WAIT; 3780 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3781 /* 3782 * still a fully established 3783 * connection reset timeout 3784 */ 3785 nstate = IPF_TCPS_ESTABLISHED; 3786 } 3787 } 3788 break; 3789 3790 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3791 rval = 1; 3792 if (tcpflags & TH_FIN) { 3793 /* 3794 * application closed and 'dir' sent a FIN, 3795 * we're now going into LAST_ACK state 3796 */ 3797 nstate = IPF_TCPS_LAST_ACK; 3798 } else { 3799 /* 3800 * we remain in CLOSE_WAIT because the other 3801 * side has closed already and we did not 3802 * close our side yet; reset timeout 3803 */ 3804 nstate = IPF_TCPS_CLOSE_WAIT; 3805 } 3806 break; 3807 3808 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3809 rval = 1; 3810 if ((tcpflags & TH_ACK) && 3811 ostate > IPF_TCPS_CLOSE_WAIT) { 3812 /* 3813 * if the other side is not active anymore 3814 * it has sent us a FIN packet that we are 3815 * ack'ing now with an ACK; this means both 3816 * sides have now closed the connection and 3817 * we go into LAST_ACK 3818 */ 3819 /* 3820 * XXX: how do we know we really are ACKing 3821 * the FIN packet here? does the window code 3822 * guarantee that? 3823 */ 3824 nstate = IPF_TCPS_LAST_ACK; 3825 } else { 3826 /* 3827 * we closed our side of the connection 3828 * already but the other side is still active 3829 * (ESTABLISHED/CLOSE_WAIT); continue with 3830 * this half-closed connection 3831 */ 3832 nstate = IPF_TCPS_FIN_WAIT_1; 3833 } 3834 break; 3835 3836 case IPF_TCPS_CLOSING: /* 7 */ 3837 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3838 nstate = IPF_TCPS_TIME_WAIT; 3839 } 3840 rval = 1; 3841 break; 3842 3843 case IPF_TCPS_LAST_ACK: /* 8 */ 3844 /* 3845 * We want to reset timer here to keep state in table. 3846 * If we would allow the state to time out here, while 3847 * there would still be packets being retransmitted, we 3848 * would cut off line between the two peers preventing 3849 * them to close connection properly. 3850 */ 3851 rval = 1; 3852 break; 3853 3854 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3855 /* NOT USED */ 3856 break; 3857 3858 case IPF_TCPS_TIME_WAIT: /* 10 */ 3859 /* we're in 2MSL timeout now */ 3860 if (ostate == IPF_TCPS_LAST_ACK) { 3861 nstate = IPF_TCPS_CLOSED; 3862 rval = 1; 3863 } else { 3864 rval = 2; 3865 } 3866 break; 3867 3868 case IPF_TCPS_CLOSED: /* 11 */ 3869 rval = 2; 3870 break; 3871 3872 default : 3873 #if defined(_KERNEL) 3874 ASSERT(nstate >= IPF_TCPS_LISTEN && 3875 nstate <= IPF_TCPS_CLOSED); 3876 #else 3877 abort(); 3878 #endif 3879 break; 3880 } 3881 } 3882 3883 /* 3884 * If rval == 2 then do not update the queue position, but treat the 3885 * packet as being ok. 3886 */ 3887 if (rval == 2) { 3888 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3889 rval = 1; 3890 } 3891 else if (rval == 1) { 3892 tqe->tqe_state[dir] = nstate; 3893 /* 3894 * The nstate can either advance to a new state, or remain 3895 * unchanged, resetting the timer by moving to the bottom of 3896 * the queue. 3897 */ 3898 DTRACE_PROBE1(state_done, int, nstate); 3899 3900 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3901 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3902 } 3903 3904 return rval; 3905 } 3906 3907 3908 /* ------------------------------------------------------------------------ */ 3909 /* Function: ipstate_log */ 3910 /* Returns: Nil */ 3911 /* Parameters: is(I) - pointer to state structure */ 3912 /* type(I) - type of log entry to create */ 3913 /* */ 3914 /* Creates a state table log entry using the state structure and type info. */ 3915 /* passed in. Log packet/byte counts, source/destination address and other */ 3916 /* protocol specific information. */ 3917 /* ------------------------------------------------------------------------ */ 3918 void ipstate_log(is, type, ifs) 3919 struct ipstate *is; 3920 u_int type; 3921 ipf_stack_t *ifs; 3922 { 3923 #ifdef IPFILTER_LOG 3924 struct ipslog ipsl; 3925 size_t sizes[1]; 3926 void *items[1]; 3927 int types[1]; 3928 3929 /* 3930 * Copy information out of the ipstate_t structure and into the 3931 * structure used for logging. 3932 */ 3933 ipsl.isl_type = type; 3934 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3935 ipsl.isl_bytes[0] = is->is_bytes[0]; 3936 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3937 ipsl.isl_bytes[1] = is->is_bytes[1]; 3938 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3939 ipsl.isl_bytes[2] = is->is_bytes[2]; 3940 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3941 ipsl.isl_bytes[3] = is->is_bytes[3]; 3942 ipsl.isl_src = is->is_src; 3943 ipsl.isl_dst = is->is_dst; 3944 ipsl.isl_p = is->is_p; 3945 ipsl.isl_v = is->is_v; 3946 ipsl.isl_flags = is->is_flags; 3947 ipsl.isl_tag = is->is_tag; 3948 ipsl.isl_rulen = is->is_rulen; 3949 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3950 3951 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3952 ipsl.isl_sport = is->is_sport; 3953 ipsl.isl_dport = is->is_dport; 3954 if (ipsl.isl_p == IPPROTO_TCP) { 3955 ipsl.isl_state[0] = is->is_state[0]; 3956 ipsl.isl_state[1] = is->is_state[1]; 3957 } 3958 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3959 ipsl.isl_itype = is->is_icmp.ici_type; 3960 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3961 ipsl.isl_itype = is->is_icmp.ici_type; 3962 } else { 3963 ipsl.isl_ps.isl_filler[0] = 0; 3964 ipsl.isl_ps.isl_filler[1] = 0; 3965 } 3966 3967 items[0] = &ipsl; 3968 sizes[0] = sizeof(ipsl); 3969 types[0] = 0; 3970 3971 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3972 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3973 } else { 3974 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3975 } 3976 #endif 3977 } 3978 3979 3980 #ifdef USE_INET6 3981 /* ------------------------------------------------------------------------ */ 3982 /* Function: fr_checkicmp6matchingstate */ 3983 /* Returns: ipstate_t* - NULL == no match found, */ 3984 /* else pointer to matching state entry */ 3985 /* Parameters: fin(I) - pointer to packet information */ 3986 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3987 /* */ 3988 /* If we've got an ICMPv6 error message, using the information stored in */ 3989 /* the ICMPv6 packet, look for a matching state table entry. */ 3990 /* ------------------------------------------------------------------------ */ 3991 static ipstate_t *fr_checkicmp6matchingstate(fin) 3992 fr_info_t *fin; 3993 { 3994 struct icmp6_hdr *ic6, *oic; 3995 int backward, i; 3996 ipstate_t *is, **isp; 3997 u_short sport, dport; 3998 i6addr_t dst, src; 3999 u_short savelen; 4000 icmpinfo_t *ic; 4001 fr_info_t ofin; 4002 tcphdr_t *tcp; 4003 ip6_t *oip6; 4004 u_char pr; 4005 u_int hv; 4006 ipf_stack_t *ifs = fin->fin_ifs; 4007 4008 /* 4009 * Does it at least have the return (basic) IP header ? 4010 * Is it an actual recognised ICMP error type? 4011 * Only a basic IP header (no options) should be with 4012 * an ICMP error header. 4013 */ 4014 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 4015 !(fin->fin_flx & FI_ICMPERR)) 4016 return NULL; 4017 4018 ic6 = fin->fin_dp; 4019 4020 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 4021 if (fin->fin_plen < sizeof(*oip6)) 4022 return NULL; 4023 4024 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 4025 ofin.fin_v = 6; 4026 ofin.fin_ifp = fin->fin_ifp; 4027 ofin.fin_out = !fin->fin_out; 4028 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 4029 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 4030 4031 /* 4032 * We make a fin entry to be able to feed it to 4033 * matchsrcdst. Note that not all fields are necessary 4034 * but this is the cleanest way. Note further we fill 4035 * in fin_mp such that if someone uses it we'll get 4036 * a kernel panic. fr_matchsrcdst does not use this. 4037 * 4038 * watch out here, as ip is in host order and oip6 in network 4039 * order. Any change we make must be undone afterwards. 4040 */ 4041 savelen = oip6->ip6_plen; 4042 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4043 ofin.fin_flx = FI_NOCKSUM; 4044 ofin.fin_ip = (ip_t *)oip6; 4045 ofin.fin_plen = oip6->ip6_plen; 4046 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4047 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4048 oip6->ip6_plen = savelen; 4049 4050 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4051 oic = (struct icmp6_hdr *)(oip6 + 1); 4052 /* 4053 * an ICMP error can only be generated as a result of an 4054 * ICMP query, not as the response on an ICMP error 4055 * 4056 * XXX theoretically ICMP_ECHOREP and the other reply's are 4057 * ICMP query's as well, but adding them here seems strange XXX 4058 */ 4059 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4060 return NULL; 4061 4062 /* 4063 * perform a lookup of the ICMP packet in the state table 4064 */ 4065 hv = (pr = oip6->ip6_nxt); 4066 src.in6 = oip6->ip6_src; 4067 hv += src.in4.s_addr; 4068 dst.in6 = oip6->ip6_dst; 4069 hv += dst.in4.s_addr; 4070 hv += oic->icmp6_id; 4071 hv += oic->icmp6_seq; 4072 hv = DOUBLE_HASH(hv, ifs); 4073 4074 READ_ENTER(&ifs->ifs_ipf_state); 4075 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4076 ic = &is->is_icmp; 4077 isp = &is->is_hnext; 4078 if ((is->is_p == pr) && 4079 !(is->is_pass & FR_NOICMPERR) && 4080 (oic->icmp6_id == ic->ici_id) && 4081 (oic->icmp6_seq == ic->ici_seq) && 4082 (is = fr_matchsrcdst(&ofin, is, &src, 4083 &dst, NULL, FI_ICMPCMP))) { 4084 /* 4085 * in the state table ICMP query's are stored 4086 * with the type of the corresponding ICMP 4087 * response. Correct here 4088 */ 4089 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4090 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4091 (ic->ici_type - 1 == oic->icmp6_type )) { 4092 ifs->ifs_ips_stats.iss_hits++; 4093 backward = IP6_NEQ(&is->is_dst, &src); 4094 fin->fin_rev = !backward; 4095 i = (backward << 1) + fin->fin_out; 4096 is->is_icmppkts[i]++; 4097 return is; 4098 } 4099 } 4100 } 4101 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4102 return NULL; 4103 } 4104 4105 hv = (pr = oip6->ip6_nxt); 4106 src.in6 = oip6->ip6_src; 4107 hv += src.i6[0]; 4108 hv += src.i6[1]; 4109 hv += src.i6[2]; 4110 hv += src.i6[3]; 4111 dst.in6 = oip6->ip6_dst; 4112 hv += dst.i6[0]; 4113 hv += dst.i6[1]; 4114 hv += dst.i6[2]; 4115 hv += dst.i6[3]; 4116 4117 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4118 tcp = (tcphdr_t *)(oip6 + 1); 4119 dport = tcp->th_dport; 4120 sport = tcp->th_sport; 4121 hv += dport; 4122 hv += sport; 4123 } else 4124 tcp = NULL; 4125 hv = DOUBLE_HASH(hv, ifs); 4126 4127 READ_ENTER(&ifs->ifs_ipf_state); 4128 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4129 isp = &is->is_hnext; 4130 /* 4131 * Only allow this icmp though if the 4132 * encapsulated packet was allowed through the 4133 * other way around. Note that the minimal amount 4134 * of info present does not allow for checking against 4135 * tcp internals such as seq and ack numbers. 4136 */ 4137 if ((is->is_p != pr) || (is->is_v != 6) || 4138 (is->is_pass & FR_NOICMPERR)) 4139 continue; 4140 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4141 if (is != NULL) { 4142 ifs->ifs_ips_stats.iss_hits++; 4143 backward = IP6_NEQ(&is->is_dst, &src); 4144 fin->fin_rev = !backward; 4145 i = (backward << 1) + fin->fin_out; 4146 is->is_icmppkts[i]++; 4147 /* 4148 * we deliberately do not touch the timeouts 4149 * for the accompanying state table entry. 4150 * It remains to be seen if that is correct. XXX 4151 */ 4152 return is; 4153 } 4154 } 4155 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4156 return NULL; 4157 } 4158 #endif 4159 4160 4161 /* ------------------------------------------------------------------------ */ 4162 /* Function: fr_sttab_init */ 4163 /* Returns: Nil */ 4164 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4165 /* */ 4166 /* Initialise the array of timeout queues for TCP. */ 4167 /* ------------------------------------------------------------------------ */ 4168 void fr_sttab_init(tqp, ifs) 4169 ipftq_t *tqp; 4170 ipf_stack_t *ifs; 4171 { 4172 int i; 4173 4174 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4175 tqp[i].ifq_ttl = 0; 4176 tqp[i].ifq_ref = 1; 4177 tqp[i].ifq_head = NULL; 4178 tqp[i].ifq_tail = &tqp[i].ifq_head; 4179 tqp[i].ifq_next = tqp + i + 1; 4180 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4181 } 4182 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4183 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4184 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4185 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4186 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4187 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4188 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4189 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4190 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4191 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4192 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4193 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4194 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4195 } 4196 4197 4198 /* ------------------------------------------------------------------------ */ 4199 /* Function: fr_sttab_destroy */ 4200 /* Returns: Nil */ 4201 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4202 /* */ 4203 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4204 /* of timeout queues for TCP. */ 4205 /* ------------------------------------------------------------------------ */ 4206 void fr_sttab_destroy(tqp) 4207 ipftq_t *tqp; 4208 { 4209 int i; 4210 4211 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4212 MUTEX_DESTROY(&tqp[i].ifq_lock); 4213 } 4214 4215 4216 /* ------------------------------------------------------------------------ */ 4217 /* Function: fr_statederef */ 4218 /* Returns: Nil */ 4219 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4220 /* ifs - ipf stack instance */ 4221 /* */ 4222 /* Decrement the reference counter for this state table entry and free it */ 4223 /* if there are no more things using it. */ 4224 /* */ 4225 /* Internal parameters: */ 4226 /* state[0] = state of source (host that initiated connection) */ 4227 /* state[1] = state of dest (host that accepted the connection) */ 4228 /* ------------------------------------------------------------------------ */ 4229 void fr_statederef(isp, ifs) 4230 ipstate_t **isp; 4231 ipf_stack_t *ifs; 4232 { 4233 ipstate_t *is; 4234 4235 is = *isp; 4236 *isp = NULL; 4237 4238 MUTEX_ENTER(&is->is_lock); 4239 if (is->is_ref > 1) { 4240 is->is_ref--; 4241 MUTEX_EXIT(&is->is_lock); 4242 #ifndef _KERNEL 4243 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4244 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4245 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4246 } 4247 #endif 4248 return; 4249 } 4250 MUTEX_EXIT(&is->is_lock); 4251 4252 WRITE_ENTER(&ifs->ifs_ipf_state); 4253 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4254 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4255 } 4256 4257 4258 /* ------------------------------------------------------------------------ */ 4259 /* Function: fr_setstatequeue */ 4260 /* Returns: Nil */ 4261 /* Parameters: is(I) - pointer to state structure */ 4262 /* rev(I) - forward(0) or reverse(1) direction */ 4263 /* Locks: ipf_state (read or write) */ 4264 /* */ 4265 /* Put the state entry on its default queue entry, using rev as a helped in */ 4266 /* determining which queue it should be placed on. */ 4267 /* ------------------------------------------------------------------------ */ 4268 void fr_setstatequeue(is, rev, ifs) 4269 ipstate_t *is; 4270 int rev; 4271 ipf_stack_t *ifs; 4272 { 4273 ipftq_t *oifq, *nifq; 4274 4275 4276 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4277 nifq = is->is_tqehead[rev]; 4278 else 4279 nifq = NULL; 4280 4281 if (nifq == NULL) { 4282 switch (is->is_p) 4283 { 4284 #ifdef USE_INET6 4285 case IPPROTO_ICMPV6 : 4286 if (rev == 1) 4287 nifq = &ifs->ifs_ips_icmpacktq; 4288 else 4289 nifq = &ifs->ifs_ips_icmptq; 4290 break; 4291 #endif 4292 case IPPROTO_ICMP : 4293 if (rev == 1) 4294 nifq = &ifs->ifs_ips_icmpacktq; 4295 else 4296 nifq = &ifs->ifs_ips_icmptq; 4297 break; 4298 case IPPROTO_TCP : 4299 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4300 break; 4301 4302 case IPPROTO_UDP : 4303 if (rev == 1) 4304 nifq = &ifs->ifs_ips_udpacktq; 4305 else 4306 nifq = &ifs->ifs_ips_udptq; 4307 break; 4308 4309 default : 4310 nifq = &ifs->ifs_ips_iptq; 4311 break; 4312 } 4313 } 4314 4315 oifq = is->is_sti.tqe_ifq; 4316 /* 4317 * If it's currently on a timeout queue, move it from one queue to 4318 * another, else put it on the end of the newly determined queue. 4319 */ 4320 if (oifq != NULL) 4321 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4322 else 4323 fr_queueappend(&is->is_sti, nifq, is, ifs); 4324 return; 4325 } 4326 4327 4328 /* ------------------------------------------------------------------------ */ 4329 /* Function: fr_stateiter */ 4330 /* Returns: int - 0 == success, else error */ 4331 /* Parameters: token(I) - pointer to ipftoken structure */ 4332 /* itp(I) - pointer to ipfgeniter structure */ 4333 /* */ 4334 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4335 /* walks through the list of entries in the state table list (ips_list.) */ 4336 /* ------------------------------------------------------------------------ */ 4337 static int fr_stateiter(token, itp, ifs) 4338 ipftoken_t *token; 4339 ipfgeniter_t *itp; 4340 ipf_stack_t *ifs; 4341 { 4342 ipstate_t *is, *next, zero; 4343 int error, count; 4344 char *dst; 4345 4346 if (itp->igi_data == NULL) 4347 return EFAULT; 4348 4349 if (itp->igi_nitems == 0) 4350 return EINVAL; 4351 4352 if (itp->igi_type != IPFGENITER_STATE) 4353 return EINVAL; 4354 4355 error = 0; 4356 4357 READ_ENTER(&ifs->ifs_ipf_state); 4358 4359 /* 4360 * Get "previous" entry from the token and find the next entry. 4361 */ 4362 is = token->ipt_data; 4363 if (is == NULL) { 4364 next = ifs->ifs_ips_list; 4365 } else { 4366 next = is->is_next; 4367 } 4368 4369 dst = itp->igi_data; 4370 for (count = itp->igi_nitems; count > 0; count--) { 4371 /* 4372 * If we found an entry, add a reference to it and update the token. 4373 * Otherwise, zero out data to be returned and NULL out token. 4374 */ 4375 if (next != NULL) { 4376 MUTEX_ENTER(&next->is_lock); 4377 next->is_ref++; 4378 MUTEX_EXIT(&next->is_lock); 4379 token->ipt_data = next; 4380 } else { 4381 bzero(&zero, sizeof(zero)); 4382 next = &zero; 4383 token->ipt_data = NULL; 4384 } 4385 4386 /* 4387 * Safe to release lock now the we have a reference. 4388 */ 4389 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4390 4391 /* 4392 * Copy out data and clean up references and tokens. 4393 */ 4394 error = COPYOUT(next, dst, sizeof(*next)); 4395 if (error != 0) 4396 error = EFAULT; 4397 if (token->ipt_data == NULL) { 4398 ipf_freetoken(token, ifs); 4399 break; 4400 } else { 4401 if (is != NULL) 4402 fr_statederef(&is, ifs); 4403 if (next->is_next == NULL) { 4404 ipf_freetoken(token, ifs); 4405 break; 4406 } 4407 } 4408 4409 if ((count == 1) || (error != 0)) 4410 break; 4411 4412 READ_ENTER(&ifs->ifs_ipf_state); 4413 dst += sizeof(*next); 4414 is = next; 4415 next = is->is_next; 4416 } 4417 4418 return error; 4419 } 4420