1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/file.h> 20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 21 defined(_KERNEL) 22 # include "opt_ipfilter_log.h" 23 #endif 24 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 25 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 26 #include "opt_inet6.h" 27 #endif 28 #if !defined(_KERNEL) && !defined(__KERNEL__) 29 # include <stdio.h> 30 # include <stdlib.h> 31 # include <string.h> 32 # define _KERNEL 33 # ifdef __OpenBSD__ 34 struct file; 35 # endif 36 # include <sys/uio.h> 37 # undef _KERNEL 38 #endif 39 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 40 # include <sys/filio.h> 41 # include <sys/fcntl.h> 42 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 43 # include "opt_ipfilter.h" 44 # endif 45 #else 46 # include <sys/ioctl.h> 47 #endif 48 #include <sys/time.h> 49 #if !defined(linux) 50 # include <sys/protosw.h> 51 #endif 52 #include <sys/socket.h> 53 #if defined(_KERNEL) 54 # include <sys/systm.h> 55 # if !defined(__SVR4) && !defined(__svr4__) 56 # include <sys/mbuf.h> 57 # endif 58 #endif 59 #if defined(__SVR4) || defined(__svr4__) 60 # include <sys/filio.h> 61 # include <sys/byteorder.h> 62 # ifdef _KERNEL 63 # include <sys/dditypes.h> 64 # endif 65 # include <sys/stream.h> 66 # include <sys/kmem.h> 67 #endif 68 69 #include <net/if.h> 70 #ifdef sun 71 # include <net/af.h> 72 #endif 73 #include <net/route.h> 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/tcp.h> 78 #if !defined(linux) 79 # include <netinet/ip_var.h> 80 #endif 81 #if !defined(__hpux) && !defined(linux) 82 # include <netinet/tcp_fsm.h> 83 #endif 84 #include <netinet/udp.h> 85 #include <netinet/ip_icmp.h> 86 #include "netinet/ip_compat.h" 87 #include <netinet/tcpip.h> 88 #include "netinet/ip_fil.h" 89 #include "netinet/ip_nat.h" 90 #include "netinet/ip_frag.h" 91 #include "netinet/ip_state.h" 92 #include "netinet/ip_proxy.h" 93 #include "netinet/ipf_stack.h" 94 #ifdef IPFILTER_SYNC 95 #include "netinet/ip_sync.h" 96 #endif 97 #ifdef IPFILTER_SCAN 98 #include "netinet/ip_scan.h" 99 #endif 100 #ifdef USE_INET6 101 #include <netinet/icmp6.h> 102 #endif 103 #if (__FreeBSD_version >= 300000) 104 # include <sys/malloc.h> 105 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 106 # include <sys/libkern.h> 107 # include <sys/systm.h> 108 # endif 109 #endif 110 /* END OF INCLUDES */ 111 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 116 #endif 117 118 #ifdef USE_INET6 119 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 120 #endif 121 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 122 i6addr_t *, tcphdr_t *, u_32_t)); 123 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 124 static int fr_state_flush __P((int, int, ipf_stack_t *)); 125 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 126 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 127 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 128 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 129 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 130 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 131 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 132 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 134 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 135 136 int fr_stputent __P((caddr_t, ipf_stack_t *)); 137 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 138 139 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 140 #define FIVE_DAYS (5 * ONE_DAY) 141 #define DOUBLE_HASH(x, ifs) \ 142 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 143 144 145 /* ------------------------------------------------------------------------ */ 146 /* Function: fr_stateinit */ 147 /* Returns: int - 0 == success, -1 == failure */ 148 /* Parameters: ifs - ipf stack instance */ 149 /* */ 150 /* Initialise all the global variables used within the state code. */ 151 /* This action also includes initiailising locks. */ 152 /* ------------------------------------------------------------------------ */ 153 int fr_stateinit(ifs) 154 ipf_stack_t *ifs; 155 { 156 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 157 struct timeval tv; 158 #endif 159 int i; 160 161 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 162 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 163 if (ifs->ifs_ips_table == NULL) 164 return -1; 165 bzero((char *)ifs->ifs_ips_table, 166 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 167 168 KMALLOCS(ifs->ifs_ips_seed, u_long *, 169 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 170 if (ifs->ifs_ips_seed == NULL) 171 return -2; 172 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 173 tv.tv_sec = 0; 174 GETKTIME(&tv); 175 #endif 176 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 177 /* 178 * XXX - ips_seed[X] should be a random number of sorts. 179 */ 180 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 181 ifs->ifs_ips_seed[i] = ipf_random(); 182 #else 183 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 184 ifs->ifs_fr_statesize; 185 ifs->ifs_ips_seed[i] += tv.tv_sec; 186 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 187 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 188 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 189 #endif 190 } 191 192 /* fill icmp reply type table */ 193 for (i = 0; i <= ICMP_MAXTYPE; i++) 194 icmpreplytype4[i] = -1; 195 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 196 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 197 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 198 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 199 #ifdef USE_INET6 200 /* fill icmp reply type table */ 201 for (i = 0; i <= ICMP6_MAXTYPE; i++) 202 icmpreplytype6[i] = -1; 203 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 204 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 205 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 206 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 207 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 208 #endif 209 210 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 211 ifs->ifs_fr_statesize * sizeof(u_long)); 212 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 213 return -1; 214 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 215 ifs->ifs_fr_statesize * sizeof(u_long)); 216 217 if (ifs->ifs_fr_state_maxbucket == 0) { 218 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 219 ifs->ifs_fr_state_maxbucket++; 220 ifs->ifs_fr_state_maxbucket *= 2; 221 } 222 223 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 224 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 225 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 226 ifs->ifs_ips_udptq.ifq_ref = 1; 227 ifs->ifs_ips_udptq.ifq_head = NULL; 228 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 229 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 230 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 231 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 232 ifs->ifs_ips_udpacktq.ifq_ref = 1; 233 ifs->ifs_ips_udpacktq.ifq_head = NULL; 234 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 235 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 236 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 237 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 238 ifs->ifs_ips_icmptq.ifq_ref = 1; 239 ifs->ifs_ips_icmptq.ifq_head = NULL; 240 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 241 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 242 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 243 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 244 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 245 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 246 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 247 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 248 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 249 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 250 ifs->ifs_ips_iptq.ifq_ref = 1; 251 ifs->ifs_ips_iptq.ifq_head = NULL; 252 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 253 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 254 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 255 /* entry's ttl in deletetq is just 1 tick */ 256 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 257 ifs->ifs_ips_deletetq.ifq_ref = 1; 258 ifs->ifs_ips_deletetq.ifq_head = NULL; 259 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 260 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 261 ifs->ifs_ips_deletetq.ifq_next = NULL; 262 263 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 264 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 265 ifs->ifs_fr_state_init = 1; 266 267 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 268 return 0; 269 } 270 271 272 /* ------------------------------------------------------------------------ */ 273 /* Function: fr_stateunload */ 274 /* Returns: Nil */ 275 /* Parameters: ifs - ipf stack instance */ 276 /* */ 277 /* Release and destroy any resources acquired or initialised so that */ 278 /* IPFilter can be unloaded or re-initialised. */ 279 /* ------------------------------------------------------------------------ */ 280 void fr_stateunload(ifs) 281 ipf_stack_t *ifs; 282 { 283 ipftq_t *ifq, *ifqnext; 284 ipstate_t *is; 285 286 while ((is = ifs->ifs_ips_list) != NULL) 287 (void) fr_delstate(is, 0, ifs); 288 289 /* 290 * Proxy timeout queues are not cleaned here because although they 291 * exist on the state list, appr_unload is called after fr_stateunload 292 * and the proxies actually are responsible for them being created. 293 * Should the proxy timeouts have their own list? There's no real 294 * justification as this is the only complicationA 295 */ 296 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 297 ifqnext = ifq->ifq_next; 298 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 299 (fr_deletetimeoutqueue(ifq) == 0)) 300 fr_freetimeoutqueue(ifq, ifs); 301 } 302 303 ifs->ifs_ips_stats.iss_inuse = 0; 304 ifs->ifs_ips_num = 0; 305 306 if (ifs->ifs_fr_state_init == 1) { 307 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 308 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 309 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 314 } 315 316 if (ifs->ifs_ips_table != NULL) { 317 KFREES(ifs->ifs_ips_table, 318 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 319 ifs->ifs_ips_table = NULL; 320 } 321 322 if (ifs->ifs_ips_seed != NULL) { 323 KFREES(ifs->ifs_ips_seed, 324 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 325 ifs->ifs_ips_seed = NULL; 326 } 327 328 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 329 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 330 ifs->ifs_fr_statesize * sizeof(u_long)); 331 ifs->ifs_ips_stats.iss_bucketlen = NULL; 332 } 333 334 if (ifs->ifs_fr_state_maxbucket_reset == 1) 335 ifs->ifs_fr_state_maxbucket = 0; 336 337 if (ifs->ifs_fr_state_init == 1) { 338 ifs->ifs_fr_state_init = 0; 339 RW_DESTROY(&ifs->ifs_ipf_state); 340 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 341 } 342 } 343 344 345 /* ------------------------------------------------------------------------ */ 346 /* Function: fr_statetstats */ 347 /* Returns: ips_state_t* - pointer to state stats structure */ 348 /* Parameters: Nil */ 349 /* */ 350 /* Put all the current numbers and pointers into a single struct and return */ 351 /* a pointer to it. */ 352 /* ------------------------------------------------------------------------ */ 353 static ips_stat_t *fr_statetstats(ifs) 354 ipf_stack_t *ifs; 355 { 356 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 357 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 358 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 359 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 360 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 361 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 362 return &ifs->ifs_ips_stats; 363 } 364 365 /* ------------------------------------------------------------------------ */ 366 /* Function: fr_state_remove */ 367 /* Returns: int - 0 == success, != 0 == failure */ 368 /* Parameters: data(I) - pointer to state structure to delete from table */ 369 /* ifs - ipf stack instance */ 370 /* */ 371 /* Search for a state structure that matches the one passed, according to */ 372 /* the IP addresses and other protocol specific information. */ 373 /* ------------------------------------------------------------------------ */ 374 static int fr_state_remove(data, ifs) 375 caddr_t data; 376 ipf_stack_t *ifs; 377 { 378 ipstate_t *sp, st; 379 int error; 380 381 sp = &st; 382 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 383 if (error) 384 return EFAULT; 385 386 WRITE_ENTER(&ifs->ifs_ipf_state); 387 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 388 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 389 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 390 sizeof(st.is_src)) && 391 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 392 sizeof(st.is_dst)) && 393 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 394 sizeof(st.is_ps))) { 395 (void) fr_delstate(sp, ISL_REMOVE, ifs); 396 RWLOCK_EXIT(&ifs->ifs_ipf_state); 397 return 0; 398 } 399 RWLOCK_EXIT(&ifs->ifs_ipf_state); 400 return ESRCH; 401 } 402 403 404 /* ------------------------------------------------------------------------ */ 405 /* Function: fr_state_ioctl */ 406 /* Returns: int - 0 == success, != 0 == failure */ 407 /* Parameters: data(I) - pointer to ioctl data */ 408 /* cmd(I) - ioctl command integer */ 409 /* mode(I) - file mode bits used with open */ 410 /* uid(I) - uid of caller */ 411 /* ctx(I) - pointer to give the uid context */ 412 /* ifs - ipf stack instance */ 413 /* */ 414 /* Processes an ioctl call made to operate on the IP Filter state device. */ 415 /* ------------------------------------------------------------------------ */ 416 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 417 caddr_t data; 418 ioctlcmd_t cmd; 419 int mode, uid; 420 void *ctx; 421 ipf_stack_t *ifs; 422 { 423 int arg, ret, error = 0; 424 425 switch (cmd) 426 { 427 /* 428 * Delete an entry from the state table. 429 */ 430 case SIOCDELST : 431 error = fr_state_remove(data, ifs); 432 break; 433 /* 434 * Flush the state table 435 */ 436 case SIOCIPFFL : 437 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 438 if (error != 0) { 439 error = EFAULT; 440 } else { 441 if (VALID_TABLE_FLUSH_OPT(arg)) { 442 WRITE_ENTER(&ifs->ifs_ipf_state); 443 ret = fr_state_flush(arg, 4, ifs); 444 RWLOCK_EXIT(&ifs->ifs_ipf_state); 445 error = BCOPYOUT((char *)&ret, data, 446 sizeof(ret)); 447 if (error != 0) 448 return EFAULT; 449 } else { 450 error = EINVAL; 451 } 452 } 453 break; 454 455 #ifdef USE_INET6 456 case SIOCIPFL6 : 457 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 458 if (error != 0) { 459 error = EFAULT; 460 } else { 461 if (VALID_TABLE_FLUSH_OPT(arg)) { 462 WRITE_ENTER(&ifs->ifs_ipf_state); 463 ret = fr_state_flush(arg, 6, ifs); 464 RWLOCK_EXIT(&ifs->ifs_ipf_state); 465 error = BCOPYOUT((char *)&ret, data, 466 sizeof(ret)); 467 if (error != 0) 468 return EFAULT; 469 } else { 470 error = EINVAL; 471 } 472 } 473 break; 474 #endif 475 #ifdef IPFILTER_LOG 476 /* 477 * Flush the state log. 478 */ 479 case SIOCIPFFB : 480 if (!(mode & FWRITE)) 481 error = EPERM; 482 else { 483 int tmp; 484 485 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 486 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 487 if (error != 0) 488 error = EFAULT; 489 } 490 break; 491 /* 492 * Turn logging of state information on/off. 493 */ 494 case SIOCSETLG : 495 if (!(mode & FWRITE)) { 496 error = EPERM; 497 } else { 498 error = BCOPYIN((char *)data, 499 (char *)&ifs->ifs_ipstate_logging, 500 sizeof(ifs->ifs_ipstate_logging)); 501 if (error != 0) 502 error = EFAULT; 503 } 504 break; 505 /* 506 * Return the current state of logging. 507 */ 508 case SIOCGETLG : 509 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 510 (char *)data, 511 sizeof(ifs->ifs_ipstate_logging)); 512 if (error != 0) 513 error = EFAULT; 514 break; 515 /* 516 * Return the number of bytes currently waiting to be read. 517 */ 518 case FIONREAD : 519 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 520 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 521 if (error != 0) 522 error = EFAULT; 523 break; 524 #endif 525 /* 526 * Get the current state statistics. 527 */ 528 case SIOCGETFS : 529 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 530 break; 531 /* 532 * Lock/Unlock the state table. (Locking prevents any changes, which 533 * means no packets match). 534 */ 535 case SIOCSTLCK : 536 if (!(mode & FWRITE)) { 537 error = EPERM; 538 } else { 539 error = fr_lock(data, &ifs->ifs_fr_state_lock); 540 } 541 break; 542 /* 543 * Add an entry to the current state table. 544 */ 545 case SIOCSTPUT : 546 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 547 error = EACCES; 548 break; 549 } 550 error = fr_stputent(data, ifs); 551 break; 552 /* 553 * Get a state table entry. 554 */ 555 case SIOCSTGET : 556 if (!ifs->ifs_fr_state_lock) { 557 error = EACCES; 558 break; 559 } 560 error = fr_stgetent(data, ifs); 561 break; 562 563 case SIOCGENITER : 564 { 565 ipftoken_t *token; 566 ipfgeniter_t iter; 567 568 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 569 if (error != 0) 570 break; 571 572 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 573 if (token != NULL) 574 error = fr_stateiter(token, &iter, ifs); 575 else 576 error = ESRCH; 577 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 578 break; 579 } 580 581 case SIOCIPFDELTOK : 582 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 583 if (error != 0) { 584 error = EFAULT; 585 } else { 586 error = ipf_deltoken(arg, uid, ctx, ifs); 587 } 588 break; 589 590 default : 591 error = EINVAL; 592 break; 593 } 594 return error; 595 } 596 597 598 /* ------------------------------------------------------------------------ */ 599 /* Function: fr_stgetent */ 600 /* Returns: int - 0 == success, != 0 == failure */ 601 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 602 /* */ 603 /* Copy out state information from the kernel to a user space process. If */ 604 /* there is a filter rule associated with the state entry, copy that out */ 605 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 606 /* the struct passed in and if not null and not found in the list of current*/ 607 /* state entries, the retrieval fails. */ 608 /* ------------------------------------------------------------------------ */ 609 int fr_stgetent(data, ifs) 610 caddr_t data; 611 ipf_stack_t *ifs; 612 { 613 ipstate_t *is, *isn; 614 ipstate_save_t ips; 615 int error; 616 617 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 618 if (error) 619 return EFAULT; 620 621 isn = ips.ips_next; 622 if (isn == NULL) { 623 isn = ifs->ifs_ips_list; 624 if (isn == NULL) { 625 if (ips.ips_next == NULL) 626 return ENOENT; 627 return 0; 628 } 629 } else { 630 /* 631 * Make sure the pointer we're copying from exists in the 632 * current list of entries. Security precaution to prevent 633 * copying of random kernel data. 634 */ 635 for (is = ifs->ifs_ips_list; is; is = is->is_next) 636 if (is == isn) 637 break; 638 if (!is) 639 return ESRCH; 640 } 641 ips.ips_next = isn->is_next; 642 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 643 ips.ips_rule = isn->is_rule; 644 if (isn->is_rule != NULL) 645 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 646 sizeof(ips.ips_fr)); 647 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 648 if (error) 649 return EFAULT; 650 return 0; 651 } 652 653 654 /* ------------------------------------------------------------------------ */ 655 /* Function: fr_stputent */ 656 /* Returns: int - 0 == success, != 0 == failure */ 657 /* Parameters: data(I) - pointer to state information struct */ 658 /* ifs - ipf stack instance */ 659 /* */ 660 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 661 /* the state table. If the state info. includes a pointer to a filter rule */ 662 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 663 /* output. */ 664 /* ------------------------------------------------------------------------ */ 665 int fr_stputent(data, ifs) 666 caddr_t data; 667 ipf_stack_t *ifs; 668 { 669 ipstate_t *is, *isn; 670 ipstate_save_t ips; 671 int error, i; 672 frentry_t *fr; 673 char *name; 674 675 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 676 if (error) 677 return EFAULT; 678 679 /* 680 * Trigger automatic call to fr_state_flush() if the 681 * table has reached capacity specified by hi watermark. 682 */ 683 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 684 ifs->ifs_fr_state_doflush = 1; 685 686 /* 687 * If automatic flushing did not do its job, and the table 688 * has filled up, don't try to create a new entry. 689 */ 690 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 691 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 692 return ENOMEM; 693 } 694 695 KMALLOC(isn, ipstate_t *); 696 if (isn == NULL) 697 return ENOMEM; 698 699 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 700 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 701 isn->is_sti.tqe_pnext = NULL; 702 isn->is_sti.tqe_next = NULL; 703 isn->is_sti.tqe_ifq = NULL; 704 isn->is_sti.tqe_parent = isn; 705 isn->is_ifp[0] = NULL; 706 isn->is_ifp[1] = NULL; 707 isn->is_ifp[2] = NULL; 708 isn->is_ifp[3] = NULL; 709 isn->is_sync = NULL; 710 fr = ips.ips_rule; 711 712 if (fr == NULL) { 713 READ_ENTER(&ifs->ifs_ipf_state); 714 fr_stinsert(isn, 0, ifs); 715 MUTEX_EXIT(&isn->is_lock); 716 RWLOCK_EXIT(&ifs->ifs_ipf_state); 717 return 0; 718 } 719 720 if (isn->is_flags & SI_NEWFR) { 721 KMALLOC(fr, frentry_t *); 722 if (fr == NULL) { 723 KFREE(isn); 724 return ENOMEM; 725 } 726 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 727 isn->is_rule = fr; 728 ips.ips_is.is_rule = fr; 729 MUTEX_NUKE(&fr->fr_lock); 730 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 731 732 /* 733 * Look up all the interface names in the rule. 734 */ 735 for (i = 0; i < 4; i++) { 736 name = fr->fr_ifnames[i]; 737 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 738 name = isn->is_ifname[i]; 739 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 740 } 741 742 fr->fr_ref = 0; 743 fr->fr_dsize = 0; 744 fr->fr_data = NULL; 745 fr->fr_type = FR_T_NONE; 746 747 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 748 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 749 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 750 751 /* 752 * send a copy back to userland of what we ended up 753 * to allow for verification. 754 */ 755 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 756 if (error) { 757 KFREE(isn); 758 MUTEX_DESTROY(&fr->fr_lock); 759 KFREE(fr); 760 return EFAULT; 761 } 762 READ_ENTER(&ifs->ifs_ipf_state); 763 fr_stinsert(isn, 0, ifs); 764 MUTEX_EXIT(&isn->is_lock); 765 RWLOCK_EXIT(&ifs->ifs_ipf_state); 766 767 } else { 768 READ_ENTER(&ifs->ifs_ipf_state); 769 for (is = ifs->ifs_ips_list; is; is = is->is_next) 770 if (is->is_rule == fr) { 771 fr_stinsert(isn, 0, ifs); 772 MUTEX_EXIT(&isn->is_lock); 773 break; 774 } 775 776 if (is == NULL) { 777 KFREE(isn); 778 isn = NULL; 779 } 780 RWLOCK_EXIT(&ifs->ifs_ipf_state); 781 782 return (isn == NULL) ? ESRCH : 0; 783 } 784 785 return 0; 786 } 787 788 789 /* ------------------------------------------------------------------------ */ 790 /* Function: fr_stinsert */ 791 /* Returns: Nil */ 792 /* Parameters: is(I) - pointer to state structure */ 793 /* rev(I) - flag indicating forward/reverse direction of packet */ 794 /* */ 795 /* Inserts a state structure into the hash table (for lookups) and the list */ 796 /* of state entries (for enumeration). Resolves all of the interface names */ 797 /* to pointers and adjusts running stats for the hash table as appropriate. */ 798 /* */ 799 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 800 /* Exits with is_lock initialised and held. */ 801 /* ------------------------------------------------------------------------ */ 802 void fr_stinsert(is, rev, ifs) 803 ipstate_t *is; 804 int rev; 805 ipf_stack_t *ifs; 806 { 807 frentry_t *fr; 808 u_int hv; 809 int i; 810 811 MUTEX_INIT(&is->is_lock, "ipf state entry"); 812 813 fr = is->is_rule; 814 if (fr != NULL) { 815 MUTEX_ENTER(&fr->fr_lock); 816 fr->fr_ref++; 817 fr->fr_statecnt++; 818 MUTEX_EXIT(&fr->fr_lock); 819 } 820 821 /* 822 * Look up all the interface names in the state entry. 823 */ 824 for (i = 0; i < 4; i++) { 825 if (is->is_ifp[i] != NULL) 826 continue; 827 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 828 } 829 830 /* 831 * If we could trust is_hv, then the modulous would not be needed, but 832 * when running with IPFILTER_SYNC, this stops bad values. 833 */ 834 hv = is->is_hv % ifs->ifs_fr_statesize; 835 is->is_hv = hv; 836 837 /* 838 * We need to get both of these locks...the first because it is 839 * possible that once the insert is complete another packet might 840 * come along, match the entry and want to update it. 841 */ 842 MUTEX_ENTER(&is->is_lock); 843 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 844 845 /* 846 * add into list table. 847 */ 848 if (ifs->ifs_ips_list != NULL) 849 ifs->ifs_ips_list->is_pnext = &is->is_next; 850 is->is_pnext = &ifs->ifs_ips_list; 851 is->is_next = ifs->ifs_ips_list; 852 ifs->ifs_ips_list = is; 853 854 if (ifs->ifs_ips_table[hv] != NULL) 855 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 856 else 857 ifs->ifs_ips_stats.iss_inuse++; 858 is->is_phnext = ifs->ifs_ips_table + hv; 859 is->is_hnext = ifs->ifs_ips_table[hv]; 860 ifs->ifs_ips_table[hv] = is; 861 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 862 ifs->ifs_ips_num++; 863 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 864 865 fr_setstatequeue(is, rev, ifs); 866 } 867 868 /* ------------------------------------------------------------------------ */ 869 /* Function: fr_match_ipv4addrs */ 870 /* Returns: int - 2 strong match (same addresses, same direction) */ 871 /* 1 weak match (same address, opposite direction) */ 872 /* 0 no match */ 873 /* */ 874 /* Function matches IPv4 addresses. */ 875 /* ------------------------------------------------------------------------ */ 876 static int fr_match_ipv4addrs(is1, is2) 877 ipstate_t *is1; 878 ipstate_t *is2; 879 { 880 int rv; 881 882 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 883 rv = 2; 884 else if (is1->is_saddr == is2->is_daddr && 885 is1->is_daddr == is2->is_saddr) 886 rv = 1; 887 else 888 rv = 0; 889 890 return (rv); 891 } 892 893 /* ------------------------------------------------------------------------ */ 894 /* Function: fr_match_ipv6addrs */ 895 /* Returns: int - 2 strong match (same addresses, same direction) */ 896 /* 1 weak match (same addresses, opposite direction) */ 897 /* 0 no match */ 898 /* */ 899 /* Function matches IPv6 addresses. */ 900 /* ------------------------------------------------------------------------ */ 901 static int fr_match_ipv6addrs(is1, is2) 902 ipstate_t *is1; 903 ipstate_t *is2; 904 { 905 int rv; 906 907 if (IP6_EQ(&is1->is_src, &is2->is_src) && 908 IP6_EQ(&is1->is_dst, &is2->is_dst)) 909 rv = 2; 910 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 911 IP6_EQ(&is1->is_dst, &is2->is_src)) { 912 rv = 1; 913 } 914 else 915 rv = 0; 916 917 return (rv); 918 } 919 /* ------------------------------------------------------------------------ */ 920 /* Function: fr_match_addresses */ 921 /* Returns: int - 2 strong match (same addresses, same direction) */ 922 /* 1 weak match (same address, opposite directions) */ 923 /* 0 no match */ 924 /* Parameters: is1, is2 pointers to states we are checking */ 925 /* */ 926 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 927 /* and IPv6 address format. */ 928 /* ------------------------------------------------------------------------ */ 929 static int fr_match_addresses(is1, is2) 930 ipstate_t *is1; 931 ipstate_t *is2; 932 { 933 int rv; 934 935 if (is1->is_v == 4) { 936 rv = fr_match_ipv4addrs(is1, is2); 937 } else { 938 rv = fr_match_ipv6addrs(is1, is2); 939 } 940 941 return (rv); 942 } 943 944 /* ------------------------------------------------------------------------ */ 945 /* Function: fr_match_ppairs */ 946 /* Returns: int - 2 strong match (same ports, same direction) */ 947 /* 1 weak match (same ports, different direction) */ 948 /* 0 no match */ 949 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 950 /* */ 951 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 952 /* src, dst port, which belong to session (state entry). */ 953 /* ------------------------------------------------------------------------ */ 954 static int fr_match_ppairs(ppairs1, ppairs2) 955 port_pair_t *ppairs1; 956 port_pair_t *ppairs2; 957 { 958 int rv; 959 960 if (ppairs1->pp_sport == ppairs2->pp_sport && 961 ppairs1->pp_dport == ppairs2->pp_dport) 962 rv = 2; 963 else if (ppairs1->pp_sport == ppairs2->pp_dport && 964 ppairs1->pp_dport == ppairs2->pp_sport) 965 rv = 1; 966 else 967 rv = 0; 968 969 return (rv); 970 } 971 972 /* ------------------------------------------------------------------------ */ 973 /* Function: fr_match_l4_hdr */ 974 /* Returns: int - 0 no match, */ 975 /* 1 weak match (same ports, different directions) */ 976 /* 2 strong match (same ports, same direction) */ 977 /* Parameters is1, is2 - states we want to match */ 978 /* */ 979 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 980 /* GRE protocol). */ 981 /* ------------------------------------------------------------------------ */ 982 static int fr_match_l4_hdr(is1, is2) 983 ipstate_t *is1; 984 ipstate_t *is2; 985 { 986 int rv = 0; 987 port_pair_t pp1; 988 port_pair_t pp2; 989 990 if (is1->is_p != is2->is_p) 991 return (0); 992 993 switch (is1->is_p) { 994 case IPPROTO_TCP: 995 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 996 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 997 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 998 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 999 rv = fr_match_ppairs(&pp1, &pp2); 1000 break; 1001 case IPPROTO_UDP: 1002 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1003 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1004 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1005 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1006 rv = fr_match_ppairs(&pp1, &pp2); 1007 break; 1008 case IPPROTO_GRE: 1009 /* greinfo_t can be also interprted as port pair */ 1010 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1011 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1012 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1013 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1014 rv = fr_match_ppairs(&pp1, &pp2); 1015 break; 1016 case IPPROTO_ICMP: 1017 case IPPROTO_ICMPV6: 1018 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1019 rv = 1; 1020 else 1021 rv = 0; 1022 break; 1023 default: 1024 rv = 0; 1025 } 1026 1027 return (rv); 1028 } 1029 1030 /* ------------------------------------------------------------------------ */ 1031 /* Function: fr_matchstates */ 1032 /* Returns: int - nonzero match, zero no match */ 1033 /* Parameters is1, is2 - states we want to match */ 1034 /* */ 1035 /* The state entries are equal (identical match) if they belong to the same */ 1036 /* session. Any time new state entry is being added the fr_addstate() */ 1037 /* function creates temporal state entry from the data it gets from IP and */ 1038 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1039 /* which is also stored within the state entry. We should keep in mind the */ 1040 /* information about packet direction is spread accross L3 (addresses) and */ 1041 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1042 /* - no match (match(is1, is2) == 0)) */ 1043 /* - weak match same addresses (ports), but different */ 1044 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1045 /* - strong match same addresses (ports) and same directions */ 1046 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1047 /* */ 1048 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1049 /* and functions, which are used to compare ports (L4 header) data. We say */ 1050 /* the is1 and is2 are same (identical) if there is a match */ 1051 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1052 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1053 /* Such requirement deals with case as follows: */ 1054 /* suppose there are two connections between hosts A, B. Connection 1: */ 1055 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1056 /* Connection 2: */ 1057 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1058 /* since we've introduced match levels into our fr_matchstates(), we are */ 1059 /* able to identify, which packets belong to connection A and which belong */ 1060 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1061 /* from con. 1 packet, which travelled from A to B: */ 1062 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1063 /* while s2, has been created from packet which belongs to con. 2 and is */ 1064 /* also coming from A to B: */ 1065 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1066 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1067 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1068 /* different the state entries are not identical -> no match as a final */ 1069 /* result. */ 1070 /* ------------------------------------------------------------------------ */ 1071 static int fr_matchstates(is1, is2) 1072 ipstate_t *is1; 1073 ipstate_t *is2; 1074 { 1075 int rv; 1076 int amatch; 1077 int pmatch; 1078 1079 if (bcmp(&is1->is_pass, &is2->is_pass, 1080 offsetof(struct ipstate, is_ps) - 1081 offsetof(struct ipstate, is_pass)) == 0) { 1082 1083 pmatch = fr_match_l4_hdr(is1, is2); 1084 amatch = fr_match_addresses(is1, is2); 1085 /* 1086 * If addresses match (amatch != 0), then 'match levels' 1087 * must be same for matching entries. If amatch and pmatch 1088 * have different values (different match levels), then 1089 * is1 and is2 belong to different sessions. 1090 */ 1091 rv = (amatch != 0) && (amatch == pmatch); 1092 } 1093 else 1094 rv = 0; 1095 1096 return (rv); 1097 } 1098 1099 /* ------------------------------------------------------------------------ */ 1100 /* Function: fr_addstate */ 1101 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1102 /* Parameters: fin(I) - pointer to packet information */ 1103 /* stsave(O) - pointer to place to save pointer to created */ 1104 /* state structure. */ 1105 /* flags(I) - flags to use when creating the structure */ 1106 /* */ 1107 /* Creates a new IP state structure from the packet information collected. */ 1108 /* Inserts it into the state table and appends to the bottom of the active */ 1109 /* list. If the capacity of the table has reached the maximum allowed then */ 1110 /* the call will fail and a flush is scheduled for the next timeout call. */ 1111 /* ------------------------------------------------------------------------ */ 1112 ipstate_t *fr_addstate(fin, stsave, flags) 1113 fr_info_t *fin; 1114 ipstate_t **stsave; 1115 u_int flags; 1116 { 1117 ipstate_t *is, ips; 1118 struct icmp *ic; 1119 u_int pass, hv; 1120 frentry_t *fr; 1121 tcphdr_t *tcp; 1122 grehdr_t *gre; 1123 void *ifp; 1124 int out; 1125 ipf_stack_t *ifs = fin->fin_ifs; 1126 1127 if (ifs->ifs_fr_state_lock || 1128 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1129 return NULL; 1130 1131 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1132 return NULL; 1133 1134 /* 1135 * Trigger automatic call to fr_state_flush() if the 1136 * table has reached capacity specified by hi watermark. 1137 */ 1138 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1139 ifs->ifs_fr_state_doflush = 1; 1140 1141 /* 1142 * If the max number of state entries has been reached, and there is no 1143 * limit on the state count for the rule, then do not continue. In the 1144 * case where a limit exists, it's ok allow the entries to be created as 1145 * long as specified limit itself has not been reached. 1146 * 1147 * Note that because the lock isn't held on fr, it is possible to exceed 1148 * the specified size of the table. However, the cost of this is being 1149 * ignored here; as the number by which it can go over is a product of 1150 * the number of simultaneous threads that could be executing in here. 1151 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1152 * 1153 * Also note that, since the automatic flush should have been triggered 1154 * well before we reach the maximum number of state table entries, the 1155 * likelihood of reaching the max (and thus exceedng it) is minimal. 1156 */ 1157 fr = fin->fin_fr; 1158 if (fr != NULL) { 1159 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1160 (fr->fr_statemax == 0)) { 1161 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1162 return NULL; 1163 } 1164 if ((fr->fr_statemax != 0) && 1165 (fr->fr_statecnt >= fr->fr_statemax)) { 1166 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1167 ifs->ifs_fr_state_doflush = 1; 1168 return NULL; 1169 } 1170 } 1171 1172 ic = NULL; 1173 tcp = NULL; 1174 out = fin->fin_out; 1175 is = &ips; 1176 bzero((char *)is, sizeof(*is)); 1177 1178 if (fr == NULL) { 1179 pass = ifs->ifs_fr_flags; 1180 is->is_tag = FR_NOLOGTAG; 1181 } else { 1182 pass = fr->fr_flags; 1183 } 1184 1185 is->is_die = 1 + ifs->ifs_fr_ticks; 1186 /* 1187 * We want to check everything that is a property of this packet, 1188 * but we don't (automatically) care about it's fragment status as 1189 * this may change. 1190 */ 1191 is->is_pass = pass; 1192 is->is_v = fin->fin_v; 1193 is->is_opt[0] = fin->fin_optmsk; 1194 is->is_optmsk[0] = 0xffffffff; 1195 is->is_optmsk[1] = 0xffffffff; 1196 if (is->is_v == 6) { 1197 is->is_opt[0] &= ~0x8; 1198 is->is_optmsk[0] &= ~0x8; 1199 is->is_optmsk[1] &= ~0x8; 1200 } 1201 is->is_sec = fin->fin_secmsk; 1202 is->is_secmsk = 0xffff; 1203 is->is_auth = fin->fin_auth; 1204 is->is_authmsk = 0xffff; 1205 1206 /* 1207 * Copy and calculate... 1208 */ 1209 hv = (is->is_p = fin->fin_fi.fi_p); 1210 is->is_src = fin->fin_fi.fi_src; 1211 hv += is->is_saddr; 1212 is->is_dst = fin->fin_fi.fi_dst; 1213 hv += is->is_daddr; 1214 #ifdef USE_INET6 1215 if (fin->fin_v == 6) { 1216 /* 1217 * For ICMPv6, we check to see if the destination address is 1218 * a multicast address. If it is, do not include it in the 1219 * calculation of the hash because the correct reply will come 1220 * back from a real address, not a multicast address. 1221 */ 1222 if ((is->is_p == IPPROTO_ICMPV6) && 1223 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1224 /* 1225 * So you can do keep state with neighbour discovery. 1226 * 1227 * Here we could use the address from the neighbour 1228 * solicit message to put in the state structure and 1229 * we could use that without a wildcard flag too... 1230 */ 1231 is->is_flags |= SI_W_DADDR; 1232 hv -= is->is_daddr; 1233 } else { 1234 hv += is->is_dst.i6[1]; 1235 hv += is->is_dst.i6[2]; 1236 hv += is->is_dst.i6[3]; 1237 } 1238 hv += is->is_src.i6[1]; 1239 hv += is->is_src.i6[2]; 1240 hv += is->is_src.i6[3]; 1241 } 1242 #endif 1243 if ((fin->fin_v == 4) && 1244 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1245 if (fin->fin_out == 0) { 1246 flags |= SI_W_DADDR|SI_CLONE; 1247 hv -= is->is_daddr; 1248 } else { 1249 flags |= SI_W_SADDR|SI_CLONE; 1250 hv -= is->is_saddr; 1251 } 1252 } 1253 1254 switch (is->is_p) 1255 { 1256 #ifdef USE_INET6 1257 case IPPROTO_ICMPV6 : 1258 ic = fin->fin_dp; 1259 1260 switch (ic->icmp_type) 1261 { 1262 case ICMP6_ECHO_REQUEST : 1263 is->is_icmp.ici_type = ic->icmp_type; 1264 hv += (is->is_icmp.ici_id = ic->icmp_id); 1265 break; 1266 case ICMP6_MEMBERSHIP_QUERY : 1267 case ND_ROUTER_SOLICIT : 1268 case ND_NEIGHBOR_SOLICIT : 1269 case ICMP6_NI_QUERY : 1270 is->is_icmp.ici_type = ic->icmp_type; 1271 break; 1272 default : 1273 return NULL; 1274 } 1275 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1276 break; 1277 #endif 1278 case IPPROTO_ICMP : 1279 ic = fin->fin_dp; 1280 1281 switch (ic->icmp_type) 1282 { 1283 case ICMP_ECHO : 1284 case ICMP_TSTAMP : 1285 case ICMP_IREQ : 1286 case ICMP_MASKREQ : 1287 is->is_icmp.ici_type = ic->icmp_type; 1288 hv += (is->is_icmp.ici_id = ic->icmp_id); 1289 break; 1290 default : 1291 return NULL; 1292 } 1293 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1294 break; 1295 1296 case IPPROTO_GRE : 1297 gre = fin->fin_dp; 1298 1299 is->is_gre.gs_flags = gre->gr_flags; 1300 is->is_gre.gs_ptype = gre->gr_ptype; 1301 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1302 is->is_call[0] = fin->fin_data[0]; 1303 is->is_call[1] = fin->fin_data[1]; 1304 } 1305 break; 1306 1307 case IPPROTO_TCP : 1308 tcp = fin->fin_dp; 1309 1310 if (tcp->th_flags & TH_RST) 1311 return NULL; 1312 /* 1313 * The endian of the ports doesn't matter, but the ack and 1314 * sequence numbers do as we do mathematics on them later. 1315 */ 1316 is->is_sport = htons(fin->fin_data[0]); 1317 is->is_dport = htons(fin->fin_data[1]); 1318 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1319 hv += is->is_sport; 1320 hv += is->is_dport; 1321 } 1322 1323 /* 1324 * If this is a real packet then initialise fields in the 1325 * state information structure from the TCP header information. 1326 */ 1327 1328 is->is_maxdwin = 1; 1329 is->is_maxswin = ntohs(tcp->th_win); 1330 if (is->is_maxswin == 0) 1331 is->is_maxswin = 1; 1332 1333 if ((fin->fin_flx & FI_IGNORE) == 0) { 1334 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1335 (TCP_OFF(tcp) << 2) + 1336 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1337 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1338 is->is_maxsend = is->is_send; 1339 1340 /* 1341 * Window scale option is only present in 1342 * SYN/SYN-ACK packet. 1343 */ 1344 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1345 TH_SYN && 1346 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1347 if (fr_tcpoptions(fin, tcp, 1348 &is->is_tcp.ts_data[0]) == -1) { 1349 fin->fin_flx |= FI_BAD; 1350 } 1351 } 1352 1353 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1354 fr_checknewisn(fin, is); 1355 fr_fixoutisn(fin, is); 1356 } 1357 1358 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1359 flags |= IS_TCPFSM; 1360 else { 1361 is->is_maxdwin = is->is_maxswin * 2; 1362 is->is_dend = ntohl(tcp->th_ack); 1363 is->is_maxdend = ntohl(tcp->th_ack); 1364 is->is_maxdwin *= 2; 1365 } 1366 } 1367 1368 /* 1369 * If we're creating state for a starting connection, start the 1370 * timer on it as we'll never see an error if it fails to 1371 * connect. 1372 */ 1373 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1374 break; 1375 1376 case IPPROTO_UDP : 1377 tcp = fin->fin_dp; 1378 1379 is->is_sport = htons(fin->fin_data[0]); 1380 is->is_dport = htons(fin->fin_data[1]); 1381 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1382 hv += tcp->th_dport; 1383 hv += tcp->th_sport; 1384 } 1385 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1386 break; 1387 1388 default : 1389 break; 1390 } 1391 hv = DOUBLE_HASH(hv, ifs); 1392 is->is_hv = hv; 1393 is->is_rule = fr; 1394 is->is_flags = flags & IS_INHERITED; 1395 1396 /* 1397 * Look for identical state. 1398 */ 1399 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1400 is != NULL; 1401 is = is->is_hnext) { 1402 if (fr_matchstates(&ips, is) == 1) 1403 break; 1404 } 1405 1406 /* 1407 * we've found a matching state -> state already exists, 1408 * we are not going to add a duplicate record. 1409 */ 1410 if (is != NULL) 1411 return NULL; 1412 1413 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1414 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1415 return NULL; 1416 } 1417 KMALLOC(is, ipstate_t *); 1418 if (is == NULL) { 1419 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1420 return NULL; 1421 } 1422 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1423 /* 1424 * Do not do the modulous here, it is done in fr_stinsert(). 1425 */ 1426 if (fr != NULL) { 1427 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1428 if (fr->fr_age[0] != 0) { 1429 is->is_tqehead[0] = 1430 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1431 fr->fr_age[0], ifs); 1432 is->is_sti.tqe_flags |= TQE_RULEBASED; 1433 } 1434 if (fr->fr_age[1] != 0) { 1435 is->is_tqehead[1] = 1436 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1437 fr->fr_age[1], ifs); 1438 is->is_sti.tqe_flags |= TQE_RULEBASED; 1439 } 1440 is->is_tag = fr->fr_logtag; 1441 1442 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1443 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1444 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1445 1446 if (((ifp = fr->fr_ifas[1]) != NULL) && 1447 (ifp != (void *)-1)) { 1448 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1449 } 1450 if (((ifp = fr->fr_ifas[2]) != NULL) && 1451 (ifp != (void *)-1)) { 1452 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1453 } 1454 if (((ifp = fr->fr_ifas[3]) != NULL) && 1455 (ifp != (void *)-1)) { 1456 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1457 } 1458 } 1459 1460 is->is_ifp[out << 1] = fin->fin_ifp; 1461 if (fin->fin_ifp != NULL) { 1462 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1463 } 1464 1465 is->is_ref = 1; 1466 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1467 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1468 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1469 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1470 if ((fin->fin_flx & FI_IGNORE) == 0) { 1471 is->is_pkts[out] = 1; 1472 is->is_bytes[out] = fin->fin_plen; 1473 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1474 is->is_flx[out][0] &= ~FI_OOW; 1475 } 1476 1477 if (pass & FR_STSTRICT) 1478 is->is_flags |= IS_STRICT; 1479 1480 if (pass & FR_STATESYNC) 1481 is->is_flags |= IS_STATESYNC; 1482 1483 if (flags & (SI_WILDP|SI_WILDA)) { 1484 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1485 } 1486 is->is_rulen = fin->fin_rule; 1487 1488 1489 if (pass & FR_LOGFIRST) 1490 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1491 1492 READ_ENTER(&ifs->ifs_ipf_state); 1493 is->is_me = stsave; 1494 1495 fr_stinsert(is, fin->fin_rev, ifs); 1496 1497 if (fin->fin_p == IPPROTO_TCP) { 1498 /* 1499 * If we're creating state for a starting connection, start the 1500 * timer on it as we'll never see an error if it fails to 1501 * connect. 1502 */ 1503 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1504 is->is_flags); 1505 MUTEX_EXIT(&is->is_lock); 1506 #ifdef IPFILTER_SCAN 1507 if ((is->is_flags & SI_CLONE) == 0) 1508 (void) ipsc_attachis(is); 1509 #endif 1510 } else { 1511 MUTEX_EXIT(&is->is_lock); 1512 } 1513 #ifdef IPFILTER_SYNC 1514 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1515 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1516 #endif 1517 if (ifs->ifs_ipstate_logging) 1518 ipstate_log(is, ISL_NEW, ifs); 1519 1520 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1521 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1522 fin->fin_flx |= FI_STATE; 1523 if (fin->fin_flx & FI_FRAG) 1524 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1525 1526 return is; 1527 } 1528 1529 1530 /* ------------------------------------------------------------------------ */ 1531 /* Function: fr_tcpoptions */ 1532 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1533 /* Parameters: fin(I) - pointer to packet information */ 1534 /* tcp(I) - pointer to TCP packet header */ 1535 /* td(I) - pointer to TCP data held as part of the state */ 1536 /* */ 1537 /* Look after the TCP header for any options and deal with those that are */ 1538 /* present. Record details about those that we recogise. */ 1539 /* ------------------------------------------------------------------------ */ 1540 static int fr_tcpoptions(fin, tcp, td) 1541 fr_info_t *fin; 1542 tcphdr_t *tcp; 1543 tcpdata_t *td; 1544 { 1545 int off, mlen, ol, i, len, retval; 1546 char buf[64], *s, opt; 1547 mb_t *m = NULL; 1548 1549 len = (TCP_OFF(tcp) << 2); 1550 if (fin->fin_dlen < len) 1551 return 0; 1552 len -= sizeof(*tcp); 1553 1554 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1555 1556 m = fin->fin_m; 1557 mlen = MSGDSIZE(m) - off; 1558 if (len > mlen) { 1559 len = mlen; 1560 retval = 0; 1561 } else { 1562 retval = 1; 1563 } 1564 1565 COPYDATA(m, off, len, buf); 1566 1567 for (s = buf; len > 0; ) { 1568 opt = *s; 1569 if (opt == TCPOPT_EOL) 1570 break; 1571 else if (opt == TCPOPT_NOP) 1572 ol = 1; 1573 else { 1574 if (len < 2) 1575 break; 1576 ol = (int)*(s + 1); 1577 if (ol < 2 || ol > len) 1578 break; 1579 1580 /* 1581 * Extract the TCP options we are interested in out of 1582 * the header and store them in the the tcpdata struct. 1583 */ 1584 switch (opt) 1585 { 1586 case TCPOPT_WINDOW : 1587 if (ol == TCPOLEN_WINDOW) { 1588 i = (int)*(s + 2); 1589 if (i > TCP_WSCALE_MAX) 1590 i = TCP_WSCALE_MAX; 1591 else if (i < 0) 1592 i = 0; 1593 td->td_winscale = i; 1594 td->td_winflags |= TCP_WSCALE_SEEN | 1595 TCP_WSCALE_FIRST; 1596 } else 1597 retval = -1; 1598 break; 1599 case TCPOPT_MAXSEG : 1600 /* 1601 * So, if we wanted to set the TCP MAXSEG, 1602 * it should be done here... 1603 */ 1604 if (ol == TCPOLEN_MAXSEG) { 1605 i = (int)*(s + 2); 1606 i <<= 8; 1607 i += (int)*(s + 3); 1608 td->td_maxseg = i; 1609 } else 1610 retval = -1; 1611 break; 1612 case TCPOPT_SACK_PERMITTED : 1613 if (ol == TCPOLEN_SACK_PERMITTED) 1614 td->td_winflags |= TCP_SACK_PERMIT; 1615 else 1616 retval = -1; 1617 break; 1618 } 1619 } 1620 len -= ol; 1621 s += ol; 1622 } 1623 return retval; 1624 } 1625 1626 1627 /* ------------------------------------------------------------------------ */ 1628 /* Function: fr_tcpstate */ 1629 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1630 /* Parameters: fin(I) - pointer to packet information */ 1631 /* tcp(I) - pointer to TCP packet header */ 1632 /* is(I) - pointer to master state structure */ 1633 /* */ 1634 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1635 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1636 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1637 /* ------------------------------------------------------------------------ */ 1638 static int fr_tcpstate(fin, tcp, is) 1639 fr_info_t *fin; 1640 tcphdr_t *tcp; 1641 ipstate_t *is; 1642 { 1643 int source, ret = 0, flags; 1644 tcpdata_t *fdata, *tdata; 1645 ipf_stack_t *ifs = fin->fin_ifs; 1646 1647 source = !fin->fin_rev; 1648 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1649 (ntohs(is->is_sport) != fin->fin_data[0])) 1650 source = 0; 1651 fdata = &is->is_tcp.ts_data[!source]; 1652 tdata = &is->is_tcp.ts_data[source]; 1653 1654 MUTEX_ENTER(&is->is_lock); 1655 1656 /* 1657 * If a SYN packet is received for a connection that is in a half 1658 * closed state, then move its state entry to deletetq. In such case 1659 * the SYN packet will be consequently dropped. This allows new state 1660 * entry to be created with a retransmited SYN packet. 1661 */ 1662 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1663 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1664 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1665 is->is_state[source] = IPF_TCPS_CLOSED; 1666 is->is_state[!source] = IPF_TCPS_CLOSED; 1667 /* 1668 * Do not update is->is_sti.tqe_die in case state entry 1669 * is already present in deletetq. It prevents state 1670 * entry ttl update by retransmitted SYN packets, which 1671 * may arrive before timer tick kicks off. The SYN 1672 * packet will be dropped again. 1673 */ 1674 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1675 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1676 &fin->fin_ifs->ifs_ips_deletetq, 1677 fin->fin_ifs); 1678 1679 MUTEX_EXIT(&is->is_lock); 1680 return 0; 1681 } 1682 } 1683 1684 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1685 #ifdef IPFILTER_SCAN 1686 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1687 ipsc_packet(fin, is); 1688 if (FR_ISBLOCK(is->is_pass)) { 1689 MUTEX_EXIT(&is->is_lock); 1690 return 1; 1691 } 1692 } 1693 #endif 1694 1695 /* 1696 * Nearing end of connection, start timeout. 1697 */ 1698 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1699 is->is_flags); 1700 if (ret == 0) { 1701 MUTEX_EXIT(&is->is_lock); 1702 return 0; 1703 } 1704 1705 /* 1706 * set s0's as appropriate. Use syn-ack packet as it 1707 * contains both pieces of required information. 1708 */ 1709 /* 1710 * Window scale option is only present in SYN/SYN-ACK packet. 1711 * Compare with ~TH_FIN to mask out T/TCP setups. 1712 */ 1713 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1714 if (flags == (TH_SYN|TH_ACK)) { 1715 is->is_s0[source] = ntohl(tcp->th_ack); 1716 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1717 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1718 (void) fr_tcpoptions(fin, tcp, fdata); 1719 } 1720 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1721 fr_checknewisn(fin, is); 1722 } else if (flags == TH_SYN) { 1723 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1724 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1725 (void) fr_tcpoptions(fin, tcp, tdata); 1726 1727 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1728 fr_checknewisn(fin, is); 1729 1730 } 1731 ret = 1; 1732 } else 1733 fin->fin_flx |= FI_OOW; 1734 MUTEX_EXIT(&is->is_lock); 1735 return ret; 1736 } 1737 1738 1739 /* ------------------------------------------------------------------------ */ 1740 /* Function: fr_checknewisn */ 1741 /* Returns: Nil */ 1742 /* Parameters: fin(I) - pointer to packet information */ 1743 /* is(I) - pointer to master state structure */ 1744 /* */ 1745 /* Check to see if this TCP connection is expecting and needs a new */ 1746 /* sequence number for a particular direction of the connection. */ 1747 /* */ 1748 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1749 /* one ready. */ 1750 /* ------------------------------------------------------------------------ */ 1751 static void fr_checknewisn(fin, is) 1752 fr_info_t *fin; 1753 ipstate_t *is; 1754 { 1755 u_32_t sumd, old, new; 1756 tcphdr_t *tcp; 1757 int i; 1758 1759 i = fin->fin_rev; 1760 tcp = fin->fin_dp; 1761 1762 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1763 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1764 old = ntohl(tcp->th_seq); 1765 new = fr_newisn(fin); 1766 is->is_isninc[i] = new - old; 1767 CALC_SUMD(old, new, sumd); 1768 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1769 1770 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1771 } 1772 } 1773 1774 1775 /* ------------------------------------------------------------------------ */ 1776 /* Function: fr_tcpinwindow */ 1777 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1778 /* Parameters: fin(I) - pointer to packet information */ 1779 /* fdata(I) - pointer to tcp state informatio (forward) */ 1780 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1781 /* tcp(I) - pointer to TCP packet header */ 1782 /* */ 1783 /* Given a packet has matched addresses and ports, check to see if it is */ 1784 /* within the TCP data window. In a show of generosity, allow packets that */ 1785 /* are within the window space behind the current sequence # as well. */ 1786 /* ------------------------------------------------------------------------ */ 1787 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1788 fr_info_t *fin; 1789 tcpdata_t *fdata, *tdata; 1790 tcphdr_t *tcp; 1791 int flags; 1792 { 1793 tcp_seq seq, ack, end; 1794 int ackskew, tcpflags; 1795 u_32_t win, maxwin; 1796 int dsize, inseq; 1797 1798 /* 1799 * Find difference between last checked packet and this packet. 1800 */ 1801 tcpflags = tcp->th_flags; 1802 seq = ntohl(tcp->th_seq); 1803 ack = ntohl(tcp->th_ack); 1804 1805 if (tcpflags & TH_SYN) 1806 win = ntohs(tcp->th_win); 1807 else 1808 win = ntohs(tcp->th_win) << fdata->td_winscale; 1809 1810 /* 1811 * win 0 means the receiving endpoint has closed the window, because it 1812 * has not enough memory to receive data from sender. In such case we 1813 * are pretending window size to be 1 to let TCP probe data through. 1814 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1815 * state this accurately, so we have to allow 1 octet (win = 1) even if 1816 * the window is closed (win == 0). 1817 */ 1818 if (win == 0) 1819 win = 1; 1820 1821 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1822 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1823 1824 /* 1825 * if window scaling is present, the scaling is only allowed 1826 * for windows not in the first SYN packet. In that packet the 1827 * window is 65535 to specify the largest window possible 1828 * for receivers not implementing the window scale option. 1829 * Currently, we do not assume TTCP here. That means that 1830 * if we see a second packet from a host (after the initial 1831 * SYN), we can assume that the receiver of the SYN did 1832 * already send back the SYN/ACK (and thus that we know if 1833 * the receiver also does window scaling) 1834 */ 1835 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1836 fdata->td_maxwin = win; 1837 } 1838 1839 end = seq + dsize; 1840 1841 if ((fdata->td_end == 0) && 1842 (!(flags & IS_TCPFSM) || 1843 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1844 /* 1845 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1846 */ 1847 fdata->td_end = end - 1; 1848 fdata->td_maxwin = 1; 1849 fdata->td_maxend = end + win; 1850 } 1851 1852 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1853 ack = tdata->td_end; 1854 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1855 (ack == 0)) { 1856 /* gross hack to get around certain broken tcp stacks */ 1857 ack = tdata->td_end; 1858 } 1859 1860 maxwin = tdata->td_maxwin; 1861 ackskew = tdata->td_end - ack; 1862 1863 /* 1864 * Strict sequencing only allows in-order delivery. 1865 */ 1866 if ((flags & IS_STRICT) != 0) { 1867 if (seq != fdata->td_end) { 1868 DTRACE_PROBE(strict_check); 1869 return 0; 1870 } 1871 } 1872 1873 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1874 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1875 inseq = 0; 1876 DTRACE_PROBE4( 1877 dyn_params, 1878 int, dsize, 1879 int, ackskew, 1880 int, maxwin, 1881 int, win 1882 ); 1883 if ( 1884 #if defined(_KERNEL) 1885 /* 1886 * end <-> s + n 1887 * maxend <-> ack + win 1888 * this is upperbound check 1889 */ 1890 (SEQ_GE(fdata->td_maxend, end)) && 1891 /* 1892 * this is lowerbound check 1893 */ 1894 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1895 #endif 1896 /* XXX what about big packets */ 1897 #define MAXACKWINDOW 66000 1898 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1899 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1900 inseq = 1; 1901 /* 1902 * Microsoft Windows will send the next packet to the right of the 1903 * window if SACK is in use. 1904 */ 1905 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1906 (fdata->td_winflags & TCP_SACK_PERMIT) && 1907 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1908 inseq = 1; 1909 /* 1910 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1911 * response to initial SYN packet, when there is no application 1912 * listeing to on a port, where the SYN packet has came to. 1913 */ 1914 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1915 (ackskew >= -1) && (ackskew <= 1)) { 1916 inseq = 1; 1917 } else if (!(flags & IS_TCPFSM)) { 1918 1919 if (!(fdata->td_winflags & 1920 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1921 /* 1922 * No TCPFSM and no window scaling, so make some 1923 * extra guesses. 1924 */ 1925 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1926 inseq = 1; 1927 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1928 inseq = 1; 1929 } 1930 } 1931 1932 if (inseq) { 1933 /* if ackskew < 0 then this should be due to fragmented 1934 * packets. There is no way to know the length of the 1935 * total packet in advance. 1936 * We do know the total length from the fragment cache though. 1937 * Note however that there might be more sessions with 1938 * exactly the same source and destination parameters in the 1939 * state cache (and source and destination is the only stuff 1940 * that is saved in the fragment cache). Note further that 1941 * some TCP connections in the state cache are hashed with 1942 * sport and dport as well which makes it not worthwhile to 1943 * look for them. 1944 * Thus, when ackskew is negative but still seems to belong 1945 * to this session, we bump up the destinations end value. 1946 */ 1947 if (ackskew < 0) { 1948 DTRACE_PROBE2(end_update_td, 1949 int, tdata->td_end, 1950 int, ack 1951 ); 1952 tdata->td_end = ack; 1953 } 1954 1955 /* update max window seen */ 1956 if (fdata->td_maxwin < win) { 1957 DTRACE_PROBE2(win_update_fd, 1958 int, fdata->td_maxwin, 1959 int, win 1960 ); 1961 fdata->td_maxwin = win; 1962 } 1963 1964 if (SEQ_GT(end, fdata->td_end)) { 1965 DTRACE_PROBE2(end_update_fd, 1966 int, fdata->td_end, 1967 int, end 1968 ); 1969 fdata->td_end = end; 1970 } 1971 1972 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1973 DTRACE_PROBE2(max_end_update_td, 1974 int, tdata->td_maxend, 1975 int, ack + win 1976 ); 1977 tdata->td_maxend = ack + win; 1978 } 1979 1980 return 1; 1981 } 1982 fin->fin_flx |= FI_OOW; 1983 1984 #if defined(_KERNEL) 1985 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1986 fin->fin_flx |= FI_NEG_OOW; 1987 #endif 1988 1989 return 0; 1990 } 1991 1992 1993 /* ------------------------------------------------------------------------ */ 1994 /* Function: fr_stclone */ 1995 /* Returns: ipstate_t* - NULL == cloning failed, */ 1996 /* else pointer to new state structure */ 1997 /* Parameters: fin(I) - pointer to packet information */ 1998 /* tcp(I) - pointer to TCP/UDP header */ 1999 /* is(I) - pointer to master state structure */ 2000 /* */ 2001 /* Create a "duplcate" state table entry from the master. */ 2002 /* ------------------------------------------------------------------------ */ 2003 static ipstate_t *fr_stclone(fin, tcp, is) 2004 fr_info_t *fin; 2005 tcphdr_t *tcp; 2006 ipstate_t *is; 2007 { 2008 ipstate_t *clone; 2009 u_32_t send; 2010 ipf_stack_t *ifs = fin->fin_ifs; 2011 2012 /* 2013 * Trigger automatic call to fr_state_flush() if the 2014 * table has reached capacity specified by hi watermark. 2015 */ 2016 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2017 ifs->ifs_fr_state_doflush = 1; 2018 2019 /* 2020 * If automatic flushing did not do its job, and the table 2021 * has filled up, don't try to create a new entry. A NULL 2022 * return will indicate that the cloning has failed. 2023 */ 2024 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2025 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2026 return NULL; 2027 } 2028 2029 KMALLOC(clone, ipstate_t *); 2030 if (clone == NULL) 2031 return NULL; 2032 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2033 2034 MUTEX_NUKE(&clone->is_lock); 2035 2036 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2037 clone->is_state[0] = 0; 2038 clone->is_state[1] = 0; 2039 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2040 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2041 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2042 2043 if (fin->fin_rev == 1) { 2044 clone->is_dend = send; 2045 clone->is_maxdend = send; 2046 clone->is_send = 0; 2047 clone->is_maxswin = 1; 2048 clone->is_maxdwin = ntohs(tcp->th_win); 2049 if (clone->is_maxdwin == 0) 2050 clone->is_maxdwin = 1; 2051 } else { 2052 clone->is_send = send; 2053 clone->is_maxsend = send; 2054 clone->is_dend = 0; 2055 clone->is_maxdwin = 1; 2056 clone->is_maxswin = ntohs(tcp->th_win); 2057 if (clone->is_maxswin == 0) 2058 clone->is_maxswin = 1; 2059 } 2060 2061 clone->is_flags &= ~SI_CLONE; 2062 clone->is_flags |= SI_CLONED; 2063 fr_stinsert(clone, fin->fin_rev, ifs); 2064 clone->is_ref = 1; 2065 if (clone->is_p == IPPROTO_TCP) { 2066 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2067 clone->is_flags); 2068 } 2069 MUTEX_EXIT(&clone->is_lock); 2070 #ifdef IPFILTER_SCAN 2071 (void) ipsc_attachis(is); 2072 #endif 2073 #ifdef IPFILTER_SYNC 2074 if (is->is_flags & IS_STATESYNC) 2075 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2076 #endif 2077 return clone; 2078 } 2079 2080 2081 /* ------------------------------------------------------------------------ */ 2082 /* Function: fr_matchsrcdst */ 2083 /* Returns: Nil */ 2084 /* Parameters: fin(I) - pointer to packet information */ 2085 /* is(I) - pointer to state structure */ 2086 /* src(I) - pointer to source address */ 2087 /* dst(I) - pointer to destination address */ 2088 /* tcp(I) - pointer to TCP/UDP header */ 2089 /* */ 2090 /* Match a state table entry against an IP packet. The logic below is that */ 2091 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2092 /* still 0 after the test. no match. */ 2093 /* ------------------------------------------------------------------------ */ 2094 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2095 fr_info_t *fin; 2096 ipstate_t *is; 2097 i6addr_t *src, *dst; 2098 tcphdr_t *tcp; 2099 u_32_t cmask; 2100 { 2101 int ret = 0, rev, out, flags, flx = 0, idx; 2102 u_short sp, dp; 2103 u_32_t cflx; 2104 void *ifp; 2105 ipf_stack_t *ifs = fin->fin_ifs; 2106 2107 rev = IP6_NEQ(&is->is_dst, dst); 2108 ifp = fin->fin_ifp; 2109 out = fin->fin_out; 2110 flags = is->is_flags; 2111 sp = 0; 2112 dp = 0; 2113 2114 if (tcp != NULL) { 2115 sp = htons(fin->fin_sport); 2116 dp = ntohs(fin->fin_dport); 2117 } 2118 if (!rev) { 2119 if (tcp != NULL) { 2120 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2121 rev = 1; 2122 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2123 rev = 1; 2124 } 2125 } 2126 2127 idx = (out << 1) + rev; 2128 2129 /* 2130 * If the interface for this 'direction' is set, make sure it matches. 2131 * An interface name that is not set matches any, as does a name of *. 2132 */ 2133 if ((is->is_ifp[idx] == NULL && 2134 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2135 is->is_ifp[idx] == ifp) 2136 ret = 1; 2137 2138 if (ret == 0) 2139 return NULL; 2140 ret = 0; 2141 2142 /* 2143 * Match addresses and ports. 2144 */ 2145 if (rev == 0) { 2146 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2147 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2148 if (tcp) { 2149 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2150 (dp == is->is_dport || flags & SI_W_DPORT)) 2151 ret = 1; 2152 } else { 2153 ret = 1; 2154 } 2155 } 2156 } else { 2157 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2158 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2159 if (tcp) { 2160 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2161 (sp == is->is_dport || flags & SI_W_DPORT)) 2162 ret = 1; 2163 } else { 2164 ret = 1; 2165 } 2166 } 2167 } 2168 2169 if (ret == 0) 2170 return NULL; 2171 2172 /* 2173 * Whether or not this should be here, is questionable, but the aim 2174 * is to get this out of the main line. 2175 */ 2176 if (tcp == NULL) 2177 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2178 2179 /* 2180 * Only one of the source or destination address can be flaged as a 2181 * wildcard. Fill in the missing address, if set. 2182 * For IPv6, if the address being copied in is multicast, then 2183 * don't reset the wild flag - multicast causes it to be set in the 2184 * first place! 2185 */ 2186 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2187 fr_ip_t *fi = &fin->fin_fi; 2188 2189 if ((flags & SI_W_SADDR) != 0) { 2190 if (rev == 0) { 2191 #ifdef USE_INET6 2192 if (is->is_v == 6 && 2193 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2194 /*EMPTY*/; 2195 else 2196 #endif 2197 { 2198 is->is_src = fi->fi_src; 2199 is->is_flags &= ~SI_W_SADDR; 2200 } 2201 } else { 2202 #ifdef USE_INET6 2203 if (is->is_v == 6 && 2204 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2205 /*EMPTY*/; 2206 else 2207 #endif 2208 { 2209 is->is_src = fi->fi_dst; 2210 is->is_flags &= ~SI_W_SADDR; 2211 } 2212 } 2213 } else if ((flags & SI_W_DADDR) != 0) { 2214 if (rev == 0) { 2215 #ifdef USE_INET6 2216 if (is->is_v == 6 && 2217 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2218 /*EMPTY*/; 2219 else 2220 #endif 2221 { 2222 is->is_dst = fi->fi_dst; 2223 is->is_flags &= ~SI_W_DADDR; 2224 } 2225 } else { 2226 #ifdef USE_INET6 2227 if (is->is_v == 6 && 2228 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2229 /*EMPTY*/; 2230 else 2231 #endif 2232 { 2233 is->is_dst = fi->fi_src; 2234 is->is_flags &= ~SI_W_DADDR; 2235 } 2236 } 2237 } 2238 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2239 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2240 } 2241 } 2242 2243 flx = fin->fin_flx & cmask; 2244 cflx = is->is_flx[out][rev]; 2245 2246 /* 2247 * Match up any flags set from IP options. 2248 */ 2249 if ((cflx && (flx != (cflx & cmask))) || 2250 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2251 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2252 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2253 return NULL; 2254 2255 /* 2256 * Only one of the source or destination port can be flagged as a 2257 * wildcard. When filling it in, fill in a copy of the matched entry 2258 * if it has the cloning flag set. 2259 */ 2260 if ((fin->fin_flx & FI_IGNORE) != 0) { 2261 fin->fin_rev = rev; 2262 return is; 2263 } 2264 2265 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2266 if ((flags & SI_CLONE) != 0) { 2267 ipstate_t *clone; 2268 2269 clone = fr_stclone(fin, tcp, is); 2270 if (clone == NULL) 2271 return NULL; 2272 is = clone; 2273 } else { 2274 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2275 } 2276 2277 if ((flags & SI_W_SPORT) != 0) { 2278 if (rev == 0) { 2279 is->is_sport = sp; 2280 is->is_send = ntohl(tcp->th_seq); 2281 } else { 2282 is->is_sport = dp; 2283 is->is_send = ntohl(tcp->th_ack); 2284 } 2285 is->is_maxsend = is->is_send + 1; 2286 } else if ((flags & SI_W_DPORT) != 0) { 2287 if (rev == 0) { 2288 is->is_dport = dp; 2289 is->is_dend = ntohl(tcp->th_ack); 2290 } else { 2291 is->is_dport = sp; 2292 is->is_dend = ntohl(tcp->th_seq); 2293 } 2294 is->is_maxdend = is->is_dend + 1; 2295 } 2296 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2297 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2298 ipstate_log(is, ISL_CLONE, ifs); 2299 } 2300 2301 ret = -1; 2302 2303 if (is->is_flx[out][rev] == 0) { 2304 is->is_flx[out][rev] = flx; 2305 is->is_opt[rev] = fin->fin_optmsk; 2306 if (is->is_v == 6) { 2307 is->is_opt[rev] &= ~0x8; 2308 is->is_optmsk[rev] &= ~0x8; 2309 } 2310 } 2311 2312 /* 2313 * Check if the interface name for this "direction" is set and if not, 2314 * fill it in. 2315 */ 2316 if (is->is_ifp[idx] == NULL && 2317 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2318 is->is_ifp[idx] = ifp; 2319 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2320 } 2321 fin->fin_rev = rev; 2322 return is; 2323 } 2324 2325 2326 /* ------------------------------------------------------------------------ */ 2327 /* Function: fr_checkicmpmatchingstate */ 2328 /* Returns: Nil */ 2329 /* Parameters: fin(I) - pointer to packet information */ 2330 /* */ 2331 /* If we've got an ICMP error message, using the information stored in the */ 2332 /* ICMP packet, look for a matching state table entry. */ 2333 /* */ 2334 /* If we return NULL then no lock on ipf_state is held. */ 2335 /* If we return non-null then a read-lock on ipf_state is held. */ 2336 /* ------------------------------------------------------------------------ */ 2337 static ipstate_t *fr_checkicmpmatchingstate(fin) 2338 fr_info_t *fin; 2339 { 2340 ipstate_t *is, **isp; 2341 u_short sport, dport; 2342 u_char pr; 2343 int backward, i, oi; 2344 i6addr_t dst, src; 2345 struct icmp *ic; 2346 u_short savelen; 2347 icmphdr_t *icmp; 2348 fr_info_t ofin; 2349 tcphdr_t *tcp; 2350 int len; 2351 ip_t *oip; 2352 u_int hv; 2353 ipf_stack_t *ifs = fin->fin_ifs; 2354 2355 /* 2356 * Does it at least have the return (basic) IP header ? 2357 * Is it an actual recognised ICMP error type? 2358 * Only a basic IP header (no options) should be with 2359 * an ICMP error header. 2360 */ 2361 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2362 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2363 !(fin->fin_flx & FI_ICMPERR)) 2364 return NULL; 2365 ic = fin->fin_dp; 2366 2367 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2368 /* 2369 * Check if the at least the old IP header (with options) and 2370 * 8 bytes of payload is present. 2371 */ 2372 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2373 return NULL; 2374 2375 /* 2376 * Sanity Checks. 2377 */ 2378 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2379 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2380 return NULL; 2381 2382 /* 2383 * Is the buffer big enough for all of it ? It's the size of the IP 2384 * header claimed in the encapsulated part which is of concern. It 2385 * may be too big to be in this buffer but not so big that it's 2386 * outside the ICMP packet, leading to TCP deref's causing problems. 2387 * This is possible because we don't know how big oip_hl is when we 2388 * do the pullup early in fr_check() and thus can't guarantee it is 2389 * all here now. 2390 */ 2391 #ifdef _KERNEL 2392 { 2393 mb_t *m; 2394 2395 m = fin->fin_m; 2396 # if defined(MENTAT) 2397 if ((char *)oip + len > (char *)m->b_wptr) 2398 return NULL; 2399 # else 2400 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2401 return NULL; 2402 # endif 2403 } 2404 #endif 2405 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2406 2407 /* 2408 * in the IPv4 case we must zero the i6addr union otherwise 2409 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2410 * of the 'junk' in the unused part of the union 2411 */ 2412 bzero((char *)&src, sizeof(src)); 2413 bzero((char *)&dst, sizeof(dst)); 2414 2415 /* 2416 * we make an fin entry to be able to feed it to 2417 * matchsrcdst note that not all fields are encessary 2418 * but this is the cleanest way. Note further we fill 2419 * in fin_mp such that if someone uses it we'll get 2420 * a kernel panic. fr_matchsrcdst does not use this. 2421 * 2422 * watch out here, as ip is in host order and oip in network 2423 * order. Any change we make must be undone afterwards, like 2424 * oip->ip_off - it is still in network byte order so fix it. 2425 */ 2426 savelen = oip->ip_len; 2427 oip->ip_len = len; 2428 oip->ip_off = ntohs(oip->ip_off); 2429 2430 ofin.fin_flx = FI_NOCKSUM; 2431 ofin.fin_v = 4; 2432 ofin.fin_ip = oip; 2433 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2434 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2435 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2436 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2437 ofin.fin_ifp = fin->fin_ifp; 2438 ofin.fin_out = !fin->fin_out; 2439 /* 2440 * Reset the short and bad flag here because in fr_matchsrcdst() 2441 * the flags for the current packet (fin_flx) are compared against 2442 * those for the existing session. 2443 */ 2444 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2445 2446 /* 2447 * Put old values of ip_len and ip_off back as we don't know 2448 * if we have to forward the packet (or process it again. 2449 */ 2450 oip->ip_len = savelen; 2451 oip->ip_off = htons(oip->ip_off); 2452 2453 switch (oip->ip_p) 2454 { 2455 case IPPROTO_ICMP : 2456 /* 2457 * an ICMP error can only be generated as a result of an 2458 * ICMP query, not as the response on an ICMP error 2459 * 2460 * XXX theoretically ICMP_ECHOREP and the other reply's are 2461 * ICMP query's as well, but adding them here seems strange XXX 2462 */ 2463 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2464 return NULL; 2465 2466 /* 2467 * perform a lookup of the ICMP packet in the state table 2468 */ 2469 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2470 hv = (pr = oip->ip_p); 2471 src.in4 = oip->ip_src; 2472 hv += src.in4.s_addr; 2473 dst.in4 = oip->ip_dst; 2474 hv += dst.in4.s_addr; 2475 hv += icmp->icmp_id; 2476 hv = DOUBLE_HASH(hv, ifs); 2477 2478 READ_ENTER(&ifs->ifs_ipf_state); 2479 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2480 isp = &is->is_hnext; 2481 if ((is->is_p != pr) || (is->is_v != 4)) 2482 continue; 2483 if (is->is_pass & FR_NOICMPERR) 2484 continue; 2485 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2486 NULL, FI_ICMPCMP); 2487 if (is != NULL) { 2488 if ((is->is_pass & FR_NOICMPERR) != 0) { 2489 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2490 return NULL; 2491 } 2492 /* 2493 * i : the index of this packet (the icmp 2494 * unreachable) 2495 * oi : the index of the original packet found 2496 * in the icmp header (i.e. the packet 2497 * causing this icmp) 2498 * backward : original packet was backward 2499 * compared to the state 2500 */ 2501 backward = IP6_NEQ(&is->is_src, &src); 2502 fin->fin_rev = !backward; 2503 i = (!backward << 1) + fin->fin_out; 2504 oi = (backward << 1) + ofin.fin_out; 2505 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2506 continue; 2507 ifs->ifs_ips_stats.iss_hits++; 2508 is->is_icmppkts[i]++; 2509 return is; 2510 } 2511 } 2512 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2513 return NULL; 2514 case IPPROTO_TCP : 2515 case IPPROTO_UDP : 2516 break; 2517 default : 2518 return NULL; 2519 } 2520 2521 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2522 dport = tcp->th_dport; 2523 sport = tcp->th_sport; 2524 2525 hv = (pr = oip->ip_p); 2526 src.in4 = oip->ip_src; 2527 hv += src.in4.s_addr; 2528 dst.in4 = oip->ip_dst; 2529 hv += dst.in4.s_addr; 2530 hv += dport; 2531 hv += sport; 2532 hv = DOUBLE_HASH(hv, ifs); 2533 2534 READ_ENTER(&ifs->ifs_ipf_state); 2535 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2536 isp = &is->is_hnext; 2537 /* 2538 * Only allow this icmp though if the 2539 * encapsulated packet was allowed through the 2540 * other way around. Note that the minimal amount 2541 * of info present does not allow for checking against 2542 * tcp internals such as seq and ack numbers. Only the 2543 * ports are known to be present and can be even if the 2544 * short flag is set. 2545 */ 2546 if ((is->is_p == pr) && (is->is_v == 4) && 2547 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2548 tcp, FI_ICMPCMP))) { 2549 /* 2550 * i : the index of this packet (the icmp unreachable) 2551 * oi : the index of the original packet found in the 2552 * icmp header (i.e. the packet causing this icmp) 2553 * backward : original packet was backward compared to 2554 * the state 2555 */ 2556 backward = IP6_NEQ(&is->is_src, &src); 2557 fin->fin_rev = !backward; 2558 i = (!backward << 1) + fin->fin_out; 2559 oi = (backward << 1) + ofin.fin_out; 2560 2561 if (((is->is_pass & FR_NOICMPERR) != 0) || 2562 (is->is_icmppkts[i] > is->is_pkts[oi])) 2563 break; 2564 ifs->ifs_ips_stats.iss_hits++; 2565 is->is_icmppkts[i]++; 2566 /* 2567 * we deliberately do not touch the timeouts 2568 * for the accompanying state table entry. 2569 * It remains to be seen if that is correct. XXX 2570 */ 2571 return is; 2572 } 2573 } 2574 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2575 return NULL; 2576 } 2577 2578 2579 /* ------------------------------------------------------------------------ */ 2580 /* Function: fr_ipsmove */ 2581 /* Returns: Nil */ 2582 /* Parameters: is(I) - pointer to state table entry */ 2583 /* hv(I) - new hash value for state table entry */ 2584 /* Write Locks: ipf_state */ 2585 /* */ 2586 /* Move a state entry from one position in the hash table to another. */ 2587 /* ------------------------------------------------------------------------ */ 2588 static void fr_ipsmove(is, hv, ifs) 2589 ipstate_t *is; 2590 u_int hv; 2591 ipf_stack_t *ifs; 2592 { 2593 ipstate_t **isp; 2594 u_int hvm; 2595 2596 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2597 2598 hvm = is->is_hv; 2599 /* 2600 * Remove the hash from the old location... 2601 */ 2602 isp = is->is_phnext; 2603 if (is->is_hnext) 2604 is->is_hnext->is_phnext = isp; 2605 *isp = is->is_hnext; 2606 if (ifs->ifs_ips_table[hvm] == NULL) 2607 ifs->ifs_ips_stats.iss_inuse--; 2608 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2609 2610 /* 2611 * ...and put the hash in the new one. 2612 */ 2613 hvm = DOUBLE_HASH(hv, ifs); 2614 is->is_hv = hvm; 2615 isp = &ifs->ifs_ips_table[hvm]; 2616 if (*isp) 2617 (*isp)->is_phnext = &is->is_hnext; 2618 else 2619 ifs->ifs_ips_stats.iss_inuse++; 2620 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2621 is->is_phnext = isp; 2622 is->is_hnext = *isp; 2623 *isp = is; 2624 } 2625 2626 2627 /* ------------------------------------------------------------------------ */ 2628 /* Function: fr_stlookup */ 2629 /* Returns: ipstate_t* - NULL == no matching state found, */ 2630 /* else pointer to state information is returned */ 2631 /* Parameters: fin(I) - pointer to packet information */ 2632 /* tcp(I) - pointer to TCP/UDP header. */ 2633 /* */ 2634 /* Search the state table for a matching entry to the packet described by */ 2635 /* the contents of *fin. */ 2636 /* */ 2637 /* If we return NULL then no lock on ipf_state is held. */ 2638 /* If we return non-null then a read-lock on ipf_state is held. */ 2639 /* ------------------------------------------------------------------------ */ 2640 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2641 fr_info_t *fin; 2642 tcphdr_t *tcp; 2643 ipftq_t **ifqp; 2644 { 2645 u_int hv, hvm, pr, v, tryagain; 2646 ipstate_t *is, **isp; 2647 u_short dport, sport; 2648 i6addr_t src, dst; 2649 struct icmp *ic; 2650 ipftq_t *ifq; 2651 int oow; 2652 ipf_stack_t *ifs = fin->fin_ifs; 2653 2654 is = NULL; 2655 ifq = NULL; 2656 tcp = fin->fin_dp; 2657 ic = (struct icmp *)tcp; 2658 hv = (pr = fin->fin_fi.fi_p); 2659 src = fin->fin_fi.fi_src; 2660 dst = fin->fin_fi.fi_dst; 2661 hv += src.in4.s_addr; 2662 hv += dst.in4.s_addr; 2663 2664 v = fin->fin_fi.fi_v; 2665 #ifdef USE_INET6 2666 if (v == 6) { 2667 hv += fin->fin_fi.fi_src.i6[1]; 2668 hv += fin->fin_fi.fi_src.i6[2]; 2669 hv += fin->fin_fi.fi_src.i6[3]; 2670 2671 if ((fin->fin_p == IPPROTO_ICMPV6) && 2672 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2673 hv -= dst.in4.s_addr; 2674 } else { 2675 hv += fin->fin_fi.fi_dst.i6[1]; 2676 hv += fin->fin_fi.fi_dst.i6[2]; 2677 hv += fin->fin_fi.fi_dst.i6[3]; 2678 } 2679 } 2680 #endif 2681 if ((v == 4) && 2682 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2683 if (fin->fin_out == 0) { 2684 hv -= src.in4.s_addr; 2685 } else { 2686 hv -= dst.in4.s_addr; 2687 } 2688 } 2689 2690 /* 2691 * Search the hash table for matching packet header info. 2692 */ 2693 switch (pr) 2694 { 2695 #ifdef USE_INET6 2696 case IPPROTO_ICMPV6 : 2697 tryagain = 0; 2698 if (v == 6) { 2699 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2700 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2701 hv += ic->icmp_id; 2702 } 2703 } 2704 READ_ENTER(&ifs->ifs_ipf_state); 2705 icmp6again: 2706 hvm = DOUBLE_HASH(hv, ifs); 2707 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2708 isp = &is->is_hnext; 2709 if ((is->is_p != pr) || (is->is_v != v)) 2710 continue; 2711 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2712 if (is != NULL && 2713 fr_matchicmpqueryreply(v, &is->is_icmp, 2714 ic, fin->fin_rev)) { 2715 if (fin->fin_rev) 2716 ifq = &ifs->ifs_ips_icmpacktq; 2717 else 2718 ifq = &ifs->ifs_ips_icmptq; 2719 break; 2720 } 2721 } 2722 2723 if (is != NULL) { 2724 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2725 hv += fin->fin_fi.fi_src.i6[0]; 2726 hv += fin->fin_fi.fi_src.i6[1]; 2727 hv += fin->fin_fi.fi_src.i6[2]; 2728 hv += fin->fin_fi.fi_src.i6[3]; 2729 fr_ipsmove(is, hv, ifs); 2730 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2731 } 2732 break; 2733 } 2734 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2735 2736 /* 2737 * No matching icmp state entry. Perhaps this is a 2738 * response to another state entry. 2739 * 2740 * XXX With some ICMP6 packets, the "other" address is already 2741 * in the packet, after the ICMP6 header, and this could be 2742 * used in place of the multicast address. However, taking 2743 * advantage of this requires some significant code changes 2744 * to handle the specific types where that is the case. 2745 */ 2746 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2747 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2748 hv -= fin->fin_fi.fi_src.i6[0]; 2749 hv -= fin->fin_fi.fi_src.i6[1]; 2750 hv -= fin->fin_fi.fi_src.i6[2]; 2751 hv -= fin->fin_fi.fi_src.i6[3]; 2752 tryagain = 1; 2753 WRITE_ENTER(&ifs->ifs_ipf_state); 2754 goto icmp6again; 2755 } 2756 2757 is = fr_checkicmp6matchingstate(fin); 2758 if (is != NULL) 2759 return is; 2760 break; 2761 #endif 2762 2763 case IPPROTO_ICMP : 2764 if (v == 4) { 2765 hv += ic->icmp_id; 2766 } 2767 hv = DOUBLE_HASH(hv, ifs); 2768 READ_ENTER(&ifs->ifs_ipf_state); 2769 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2770 isp = &is->is_hnext; 2771 if ((is->is_p != pr) || (is->is_v != v)) 2772 continue; 2773 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2774 if (is != NULL && 2775 fr_matchicmpqueryreply(v, &is->is_icmp, 2776 ic, fin->fin_rev)) { 2777 if (fin->fin_rev) 2778 ifq = &ifs->ifs_ips_icmpacktq; 2779 else 2780 ifq = &ifs->ifs_ips_icmptq; 2781 break; 2782 } 2783 } 2784 if (is == NULL) { 2785 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2786 } 2787 break; 2788 2789 case IPPROTO_TCP : 2790 case IPPROTO_UDP : 2791 ifqp = NULL; 2792 sport = htons(fin->fin_data[0]); 2793 hv += sport; 2794 dport = htons(fin->fin_data[1]); 2795 hv += dport; 2796 oow = 0; 2797 tryagain = 0; 2798 READ_ENTER(&ifs->ifs_ipf_state); 2799 retry_tcpudp: 2800 hvm = DOUBLE_HASH(hv, ifs); 2801 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2802 isp = &is->is_hnext; 2803 if ((is->is_p != pr) || (is->is_v != v)) 2804 continue; 2805 fin->fin_flx &= ~FI_OOW; 2806 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2807 if (is != NULL) { 2808 if (pr == IPPROTO_TCP) { 2809 if (!fr_tcpstate(fin, tcp, is)) { 2810 oow |= fin->fin_flx & FI_OOW; 2811 continue; 2812 } 2813 } 2814 break; 2815 } 2816 } 2817 if (is != NULL) { 2818 if (tryagain && 2819 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2820 hv += dport; 2821 hv += sport; 2822 fr_ipsmove(is, hv, ifs); 2823 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2824 } 2825 break; 2826 } 2827 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2828 2829 if (ifs->ifs_ips_stats.iss_wild) { 2830 if (tryagain == 0) { 2831 hv -= dport; 2832 hv -= sport; 2833 } else if (tryagain == 1) { 2834 hv = fin->fin_fi.fi_p; 2835 /* 2836 * If we try to pretend this is a reply to a 2837 * multicast/broadcast packet then we need to 2838 * exclude part of the address from the hash 2839 * calculation. 2840 */ 2841 if (fin->fin_out == 0) { 2842 hv += src.in4.s_addr; 2843 } else { 2844 hv += dst.in4.s_addr; 2845 } 2846 hv += dport; 2847 hv += sport; 2848 } 2849 tryagain++; 2850 if (tryagain <= 2) { 2851 WRITE_ENTER(&ifs->ifs_ipf_state); 2852 goto retry_tcpudp; 2853 } 2854 } 2855 fin->fin_flx |= oow; 2856 break; 2857 2858 #if 0 2859 case IPPROTO_GRE : 2860 gre = fin->fin_dp; 2861 if (GRE_REV(gre->gr_flags) == 1) { 2862 hv += gre->gr_call; 2863 } 2864 /* FALLTHROUGH */ 2865 #endif 2866 default : 2867 ifqp = NULL; 2868 hvm = DOUBLE_HASH(hv, ifs); 2869 READ_ENTER(&ifs->ifs_ipf_state); 2870 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2871 isp = &is->is_hnext; 2872 if ((is->is_p != pr) || (is->is_v != v)) 2873 continue; 2874 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2875 if (is != NULL) { 2876 ifq = &ifs->ifs_ips_iptq; 2877 break; 2878 } 2879 } 2880 if (is == NULL) { 2881 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2882 } 2883 break; 2884 } 2885 2886 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2887 (is->is_tqehead[fin->fin_rev] != NULL)) 2888 ifq = is->is_tqehead[fin->fin_rev]; 2889 if (ifq != NULL && ifqp != NULL) 2890 *ifqp = ifq; 2891 return is; 2892 } 2893 2894 2895 /* ------------------------------------------------------------------------ */ 2896 /* Function: fr_updatestate */ 2897 /* Returns: Nil */ 2898 /* Parameters: fin(I) - pointer to packet information */ 2899 /* is(I) - pointer to state table entry */ 2900 /* Read Locks: ipf_state */ 2901 /* */ 2902 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2903 /* fragment cache with a new entry as required. */ 2904 /* ------------------------------------------------------------------------ */ 2905 void fr_updatestate(fin, is, ifq) 2906 fr_info_t *fin; 2907 ipstate_t *is; 2908 ipftq_t *ifq; 2909 { 2910 ipftqent_t *tqe; 2911 int i, pass; 2912 ipf_stack_t *ifs = fin->fin_ifs; 2913 2914 i = (fin->fin_rev << 1) + fin->fin_out; 2915 2916 /* 2917 * For TCP packets, ifq == NULL. For all others, check if this new 2918 * queue is different to the last one it was on and move it if so. 2919 */ 2920 tqe = &is->is_sti; 2921 MUTEX_ENTER(&is->is_lock); 2922 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2923 ifq = is->is_tqehead[fin->fin_rev]; 2924 2925 if (ifq != NULL) 2926 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2927 2928 is->is_pkts[i]++; 2929 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2930 is->is_bytes[i] += fin->fin_plen; 2931 MUTEX_EXIT(&is->is_lock); 2932 2933 #ifdef IPFILTER_SYNC 2934 if (is->is_flags & IS_STATESYNC) 2935 ipfsync_update(SMC_STATE, fin, is->is_sync); 2936 #endif 2937 2938 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2939 2940 fin->fin_fr = is->is_rule; 2941 2942 /* 2943 * If this packet is a fragment and the rule says to track fragments, 2944 * then create a new fragment cache entry. 2945 */ 2946 pass = is->is_pass; 2947 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2948 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2949 } 2950 2951 2952 /* ------------------------------------------------------------------------ */ 2953 /* Function: fr_checkstate */ 2954 /* Returns: frentry_t* - NULL == search failed, */ 2955 /* else pointer to rule for matching state */ 2956 /* Parameters: ifp(I) - pointer to interface */ 2957 /* passp(I) - pointer to filtering result flags */ 2958 /* */ 2959 /* Check if a packet is associated with an entry in the state table. */ 2960 /* ------------------------------------------------------------------------ */ 2961 frentry_t *fr_checkstate(fin, passp) 2962 fr_info_t *fin; 2963 u_32_t *passp; 2964 { 2965 ipstate_t *is; 2966 frentry_t *fr; 2967 tcphdr_t *tcp; 2968 ipftq_t *ifq; 2969 u_int pass; 2970 ipf_stack_t *ifs = fin->fin_ifs; 2971 2972 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2973 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2974 return NULL; 2975 2976 is = NULL; 2977 if ((fin->fin_flx & FI_TCPUDP) || 2978 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2979 #ifdef USE_INET6 2980 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2981 #endif 2982 ) 2983 tcp = fin->fin_dp; 2984 else 2985 tcp = NULL; 2986 2987 /* 2988 * Search the hash table for matching packet header info. 2989 */ 2990 ifq = NULL; 2991 is = fr_stlookup(fin, tcp, &ifq); 2992 switch (fin->fin_p) 2993 { 2994 #ifdef USE_INET6 2995 case IPPROTO_ICMPV6 : 2996 if (is != NULL) 2997 break; 2998 if (fin->fin_v == 6) { 2999 is = fr_checkicmp6matchingstate(fin); 3000 if (is != NULL) 3001 goto matched; 3002 } 3003 break; 3004 #endif 3005 case IPPROTO_ICMP : 3006 if (is != NULL) 3007 break; 3008 /* 3009 * No matching icmp state entry. Perhaps this is a 3010 * response to another state entry. 3011 */ 3012 is = fr_checkicmpmatchingstate(fin); 3013 if (is != NULL) 3014 goto matched; 3015 break; 3016 case IPPROTO_TCP : 3017 if (is == NULL) 3018 break; 3019 3020 if (is->is_pass & FR_NEWISN) { 3021 if (fin->fin_out == 0) 3022 fr_fixinisn(fin, is); 3023 else if (fin->fin_out == 1) 3024 fr_fixoutisn(fin, is); 3025 } 3026 break; 3027 default : 3028 if (fin->fin_rev) 3029 ifq = &ifs->ifs_ips_udpacktq; 3030 else 3031 ifq = &ifs->ifs_ips_udptq; 3032 break; 3033 } 3034 if (is == NULL) { 3035 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3036 return NULL; 3037 } 3038 3039 matched: 3040 fr = is->is_rule; 3041 if (fr != NULL) { 3042 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3043 if (fin->fin_nattag == NULL) 3044 return NULL; 3045 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) 3046 return NULL; 3047 } 3048 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3049 fin->fin_icode = fr->fr_icode; 3050 } 3051 3052 fin->fin_rule = is->is_rulen; 3053 pass = is->is_pass; 3054 fr_updatestate(fin, is, ifq); 3055 3056 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3057 fin->fin_flx |= FI_STATE; 3058 if ((pass & FR_LOGFIRST) != 0) 3059 pass &= ~(FR_LOGFIRST|FR_LOG); 3060 *passp = pass; 3061 return fr; 3062 } 3063 3064 3065 /* ------------------------------------------------------------------------ */ 3066 /* Function: fr_fixoutisn */ 3067 /* Returns: Nil */ 3068 /* Parameters: fin(I) - pointer to packet information */ 3069 /* is(I) - pointer to master state structure */ 3070 /* */ 3071 /* Called only for outbound packets, adjusts the sequence number and the */ 3072 /* TCP checksum to match that change. */ 3073 /* ------------------------------------------------------------------------ */ 3074 static void fr_fixoutisn(fin, is) 3075 fr_info_t *fin; 3076 ipstate_t *is; 3077 { 3078 tcphdr_t *tcp; 3079 int rev; 3080 u_32_t seq; 3081 3082 tcp = fin->fin_dp; 3083 rev = fin->fin_rev; 3084 if ((is->is_flags & IS_ISNSYN) != 0) { 3085 if (rev == 0) { 3086 seq = ntohl(tcp->th_seq); 3087 seq += is->is_isninc[0]; 3088 tcp->th_seq = htonl(seq); 3089 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3090 } 3091 } 3092 if ((is->is_flags & IS_ISNACK) != 0) { 3093 if (rev == 1) { 3094 seq = ntohl(tcp->th_seq); 3095 seq += is->is_isninc[1]; 3096 tcp->th_seq = htonl(seq); 3097 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3098 } 3099 } 3100 } 3101 3102 3103 /* ------------------------------------------------------------------------ */ 3104 /* Function: fr_fixinisn */ 3105 /* Returns: Nil */ 3106 /* Parameters: fin(I) - pointer to packet information */ 3107 /* is(I) - pointer to master state structure */ 3108 /* */ 3109 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3110 /* TCP checksum to match that change. */ 3111 /* ------------------------------------------------------------------------ */ 3112 static void fr_fixinisn(fin, is) 3113 fr_info_t *fin; 3114 ipstate_t *is; 3115 { 3116 tcphdr_t *tcp; 3117 int rev; 3118 u_32_t ack; 3119 3120 tcp = fin->fin_dp; 3121 rev = fin->fin_rev; 3122 if ((is->is_flags & IS_ISNSYN) != 0) { 3123 if (rev == 1) { 3124 ack = ntohl(tcp->th_ack); 3125 ack -= is->is_isninc[0]; 3126 tcp->th_ack = htonl(ack); 3127 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3128 } 3129 } 3130 if ((is->is_flags & IS_ISNACK) != 0) { 3131 if (rev == 0) { 3132 ack = ntohl(tcp->th_ack); 3133 ack -= is->is_isninc[1]; 3134 tcp->th_ack = htonl(ack); 3135 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3136 } 3137 } 3138 } 3139 3140 3141 /* ------------------------------------------------------------------------ */ 3142 /* Function: fr_statesync */ 3143 /* Returns: Nil */ 3144 /* Parameters: action(I) - type of synchronisation to do */ 3145 /* v(I) - IP version being sync'd (v4 or v6) */ 3146 /* ifp(I) - interface identifier associated with action */ 3147 /* name(I) - name associated with ifp parameter */ 3148 /* */ 3149 /* Walk through all state entries and if an interface pointer match is */ 3150 /* found then look it up again, based on its name in case the pointer has */ 3151 /* changed since last time. */ 3152 /* */ 3153 /* If ifp is passed in as being non-null then we are only doing updates for */ 3154 /* existing, matching, uses of it. */ 3155 /* ------------------------------------------------------------------------ */ 3156 void fr_statesync(action, v, ifp, name, ifs) 3157 int action, v; 3158 void *ifp; 3159 char *name; 3160 ipf_stack_t *ifs; 3161 { 3162 ipstate_t *is; 3163 int i; 3164 3165 if (ifs->ifs_fr_running <= 0) 3166 return; 3167 3168 WRITE_ENTER(&ifs->ifs_ipf_state); 3169 3170 if (ifs->ifs_fr_running <= 0) { 3171 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3172 return; 3173 } 3174 3175 switch (action) 3176 { 3177 case IPFSYNC_RESYNC : 3178 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3179 if (v != 0 && is->is_v != v) 3180 continue; 3181 /* 3182 * Look up all the interface names in the state entry. 3183 */ 3184 for (i = 0; i < 4; i++) { 3185 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3186 is->is_v, ifs); 3187 } 3188 } 3189 break; 3190 case IPFSYNC_NEWIFP : 3191 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3192 if (v != 0 && is->is_v != v) 3193 continue; 3194 /* 3195 * Look up all the interface names in the state entry. 3196 */ 3197 for (i = 0; i < 4; i++) { 3198 if (!strncmp(is->is_ifname[i], name, 3199 sizeof(is->is_ifname[i]))) 3200 is->is_ifp[i] = ifp; 3201 } 3202 } 3203 break; 3204 case IPFSYNC_OLDIFP : 3205 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3206 if (v != 0 && is->is_v != v) 3207 continue; 3208 /* 3209 * Look up all the interface names in the state entry. 3210 */ 3211 for (i = 0; i < 4; i++) { 3212 if (is->is_ifp[i] == ifp) 3213 is->is_ifp[i] = (void *)-1; 3214 } 3215 } 3216 break; 3217 } 3218 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3219 } 3220 3221 3222 /* ------------------------------------------------------------------------ */ 3223 /* Function: fr_delstate */ 3224 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3225 /* Parameters: is(I) - pointer to state structure to delete */ 3226 /* why(I) - if not 0, log reason why it was deleted */ 3227 /* ifs - ipf stack instance */ 3228 /* Write Locks: ipf_state/ipf_global */ 3229 /* */ 3230 /* Deletes a state entry from the enumerated list as well as the hash table */ 3231 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3232 /* global counters as required. */ 3233 /* ------------------------------------------------------------------------ */ 3234 int fr_delstate(is, why, ifs) 3235 ipstate_t *is; 3236 int why; 3237 ipf_stack_t *ifs; 3238 { 3239 int removed = 0; 3240 3241 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3242 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3243 3244 /* 3245 * Start by removing the entry from the hash table of state entries 3246 * so it will not be "used" again. 3247 * 3248 * It will remain in the "list" of state entries until all references 3249 * have been accounted for. 3250 */ 3251 if (is->is_phnext != NULL) { 3252 removed = 1; 3253 *is->is_phnext = is->is_hnext; 3254 if (is->is_hnext != NULL) 3255 is->is_hnext->is_phnext = is->is_phnext; 3256 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3257 ifs->ifs_ips_stats.iss_inuse--; 3258 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3259 3260 is->is_phnext = NULL; 3261 is->is_hnext = NULL; 3262 } 3263 3264 /* 3265 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3266 * table that have wildcard flags set, only decerement it once 3267 * and do it here. 3268 */ 3269 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3270 if (!(is->is_flags & SI_CLONED)) { 3271 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3272 } 3273 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3274 } 3275 3276 /* 3277 * Next, remove it from the timeout queue it is in. 3278 */ 3279 fr_deletequeueentry(&is->is_sti); 3280 3281 is->is_me = NULL; 3282 3283 /* 3284 * If it is still in use by something else, do not go any further, 3285 * but note that at this point it is now an orphan. 3286 */ 3287 MUTEX_ENTER(&is->is_lock); 3288 if (is->is_ref > 1) { 3289 is->is_ref--; 3290 MUTEX_EXIT(&is->is_lock); 3291 if (removed) 3292 ifs->ifs_ips_stats.iss_orphans++; 3293 return (is->is_ref); 3294 } 3295 MUTEX_EXIT(&is->is_lock); 3296 3297 is->is_ref = 0; 3298 3299 /* 3300 * If entry has already been removed from table, 3301 * it means we're simply cleaning up an orphan. 3302 */ 3303 if (!removed) 3304 ifs->ifs_ips_stats.iss_orphans--; 3305 3306 if (is->is_tqehead[0] != NULL) 3307 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3308 3309 if (is->is_tqehead[1] != NULL) 3310 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3311 3312 #ifdef IPFILTER_SYNC 3313 if (is->is_sync) 3314 ipfsync_del(is->is_sync); 3315 #endif 3316 #ifdef IPFILTER_SCAN 3317 (void) ipsc_detachis(is); 3318 #endif 3319 3320 /* 3321 * Now remove it from master list of state table entries. 3322 */ 3323 if (is->is_pnext != NULL) { 3324 *is->is_pnext = is->is_next; 3325 if (is->is_next != NULL) { 3326 is->is_next->is_pnext = is->is_pnext; 3327 is->is_next = NULL; 3328 } 3329 is->is_pnext = NULL; 3330 } 3331 3332 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3333 ipstate_log(is, why, ifs); 3334 3335 if (is->is_rule != NULL) { 3336 is->is_rule->fr_statecnt--; 3337 (void)fr_derefrule(&is->is_rule, ifs); 3338 } 3339 3340 MUTEX_DESTROY(&is->is_lock); 3341 KFREE(is); 3342 ifs->ifs_ips_num--; 3343 3344 return (0); 3345 } 3346 3347 3348 /* ------------------------------------------------------------------------ */ 3349 /* Function: fr_timeoutstate */ 3350 /* Returns: Nil */ 3351 /* Parameters: ifs - ipf stack instance */ 3352 /* */ 3353 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3354 /* used here is to keep the queue sorted with the oldest things at the top */ 3355 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3356 /* expired then neither will any under it. */ 3357 /* ------------------------------------------------------------------------ */ 3358 void fr_timeoutstate(ifs) 3359 ipf_stack_t *ifs; 3360 { 3361 ipftq_t *ifq, *ifqnext; 3362 ipftqent_t *tqe, *tqn; 3363 ipstate_t *is; 3364 SPL_INT(s); 3365 3366 SPL_NET(s); 3367 WRITE_ENTER(&ifs->ifs_ipf_state); 3368 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3369 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3370 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3371 break; 3372 tqn = tqe->tqe_next; 3373 is = tqe->tqe_parent; 3374 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3375 } 3376 3377 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3378 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3379 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3380 break; 3381 tqn = tqe->tqe_next; 3382 is = tqe->tqe_parent; 3383 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3384 } 3385 } 3386 3387 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3388 ifqnext = ifq->ifq_next; 3389 3390 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3391 (ifq->ifq_ref == 0)) { 3392 fr_freetimeoutqueue(ifq, ifs); 3393 } 3394 } 3395 3396 if (ifs->ifs_fr_state_doflush) { 3397 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3398 ifs->ifs_fr_state_doflush = 0; 3399 } 3400 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3401 SPL_X(s); 3402 } 3403 3404 3405 /* ---------------------------------------------------------------------- */ 3406 /* Function: fr_state_flush */ 3407 /* Returns: int - 0 == success, -1 == failure */ 3408 /* Parameters: flush_option - how to flush the active State table */ 3409 /* proto - IP version to flush (4, 6, or both) */ 3410 /* ifs - ipf stack instance */ 3411 /* Write Locks: ipf_state */ 3412 /* */ 3413 /* Flush state tables. Three possible flush options currently defined: */ 3414 /* */ 3415 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3416 /* */ 3417 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3418 /* have started to close on both ends using */ 3419 /* ipf_flushclosing(). */ 3420 /* */ 3421 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3422 /* Then, if needed, flush entries with TCP */ 3423 /* connections which have been idle for a long */ 3424 /* time with ipf_extraflush(). */ 3425 /* ---------------------------------------------------------------------- */ 3426 static int fr_state_flush(flush_option, proto, ifs) 3427 int flush_option, proto; 3428 ipf_stack_t *ifs; 3429 { 3430 ipstate_t *is, *isn; 3431 int removed; 3432 SPL_INT(s); 3433 3434 removed = 0; 3435 3436 SPL_NET(s); 3437 switch (flush_option) 3438 { 3439 case FLUSH_TABLE_ALL: 3440 isn = ifs->ifs_ips_list; 3441 while ((is = isn) != NULL) { 3442 isn = is->is_next; 3443 if ((proto != 0) && (is->is_v != proto)) 3444 continue; 3445 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3446 removed++; 3447 } 3448 break; 3449 3450 case FLUSH_TABLE_CLOSING: 3451 removed = ipf_flushclosing(STATE_FLUSH, 3452 IPF_TCPS_CLOSE_WAIT, 3453 ifs->ifs_ips_tqtqb, 3454 ifs->ifs_ips_utqe, 3455 ifs); 3456 break; 3457 3458 case FLUSH_TABLE_EXTRA: 3459 removed = ipf_flushclosing(STATE_FLUSH, 3460 IPF_TCPS_FIN_WAIT_2, 3461 ifs->ifs_ips_tqtqb, 3462 ifs->ifs_ips_utqe, 3463 ifs); 3464 3465 /* 3466 * Be sure we haven't done this in the last 10 seconds. 3467 */ 3468 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3469 IPF_TTLVAL(10)) 3470 break; 3471 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3472 removed += ipf_extraflush(STATE_FLUSH, 3473 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3474 ifs->ifs_ips_utqe, 3475 ifs); 3476 break; 3477 3478 default: /* Flush Nothing */ 3479 break; 3480 } 3481 3482 SPL_X(s); 3483 return (removed); 3484 } 3485 3486 3487 /* ------------------------------------------------------------------------ */ 3488 /* Function: fr_tcp_age */ 3489 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3490 /* Parameters: tq(I) - pointer to timeout queue information */ 3491 /* fin(I) - pointer to packet information */ 3492 /* tqtab(I) - TCP timeout queue table this is in */ 3493 /* flags(I) - flags from state/NAT entry */ 3494 /* */ 3495 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3496 /* */ 3497 /* - (try to) base state transitions on real evidence only, */ 3498 /* i.e. packets that are sent and have been received by ipfilter; */ 3499 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3500 /* */ 3501 /* - deal with half-closed connections correctly; */ 3502 /* */ 3503 /* - store the state of the source in state[0] such that ipfstat */ 3504 /* displays the state as source/dest instead of dest/source; the calls */ 3505 /* to fr_tcp_age have been changed accordingly. */ 3506 /* */ 3507 /* Internal Parameters: */ 3508 /* */ 3509 /* state[0] = state of source (host that initiated connection) */ 3510 /* state[1] = state of dest (host that accepted the connection) */ 3511 /* */ 3512 /* dir == 0 : a packet from source to dest */ 3513 /* dir == 1 : a packet from dest to source */ 3514 /* */ 3515 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3516 /* ------------------------------------------------------------------------ */ 3517 int fr_tcp_age(tqe, fin, tqtab, flags) 3518 ipftqent_t *tqe; 3519 fr_info_t *fin; 3520 ipftq_t *tqtab; 3521 int flags; 3522 { 3523 int dlen, ostate, nstate, rval, dir; 3524 u_char tcpflags; 3525 tcphdr_t *tcp; 3526 ipf_stack_t *ifs = fin->fin_ifs; 3527 3528 tcp = fin->fin_dp; 3529 3530 rval = 0; 3531 dir = fin->fin_rev; 3532 tcpflags = tcp->th_flags; 3533 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3534 3535 ostate = tqe->tqe_state[1 - dir]; 3536 nstate = tqe->tqe_state[dir]; 3537 3538 DTRACE_PROBE4( 3539 indata, 3540 fr_info_t *, fin, 3541 int, ostate, 3542 int, nstate, 3543 u_char, tcpflags 3544 ); 3545 3546 if (tcpflags & TH_RST) { 3547 if (!(tcpflags & TH_PUSH) && !dlen) 3548 nstate = IPF_TCPS_CLOSED; 3549 else 3550 nstate = IPF_TCPS_CLOSE_WAIT; 3551 3552 /* 3553 * Once RST is received, we must advance peer's state to 3554 * CLOSE_WAIT. 3555 */ 3556 if (ostate <= IPF_TCPS_ESTABLISHED) { 3557 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3558 } 3559 rval = 1; 3560 } else { 3561 3562 switch (nstate) 3563 { 3564 case IPF_TCPS_LISTEN: /* 0 */ 3565 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3566 /* 3567 * 'dir' received an S and sends SA in 3568 * response, CLOSED -> SYN_RECEIVED 3569 */ 3570 nstate = IPF_TCPS_SYN_RECEIVED; 3571 rval = 1; 3572 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3573 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3574 nstate = IPF_TCPS_SYN_SENT; 3575 rval = 1; 3576 } 3577 /* 3578 * the next piece of code makes it possible to get 3579 * already established connections into the state table 3580 * after a restart or reload of the filter rules; this 3581 * does not work when a strict 'flags S keep state' is 3582 * used for tcp connections of course 3583 */ 3584 if (((flags & IS_TCPFSM) == 0) && 3585 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3586 /* 3587 * we saw an A, guess 'dir' is in ESTABLISHED 3588 * mode 3589 */ 3590 switch (ostate) 3591 { 3592 case IPF_TCPS_LISTEN : 3593 case IPF_TCPS_SYN_RECEIVED : 3594 nstate = IPF_TCPS_HALF_ESTAB; 3595 rval = 1; 3596 break; 3597 case IPF_TCPS_HALF_ESTAB : 3598 case IPF_TCPS_ESTABLISHED : 3599 nstate = IPF_TCPS_ESTABLISHED; 3600 rval = 1; 3601 break; 3602 default : 3603 break; 3604 } 3605 } 3606 /* 3607 * TODO: besides regular ACK packets we can have other 3608 * packets as well; it is yet to be determined how we 3609 * should initialize the states in those cases 3610 */ 3611 break; 3612 3613 case IPF_TCPS_SYN_SENT: /* 1 */ 3614 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3615 /* 3616 * A retransmitted SYN packet. We do not reset 3617 * the timeout here to fr_tcptimeout because a 3618 * connection connect timeout does not renew 3619 * after every packet that is sent. We need to 3620 * set rval so as to indicate the packet has 3621 * passed the check for its flags being valid 3622 * in the TCP FSM. Setting rval to 2 has the 3623 * result of not resetting the timeout. 3624 */ 3625 rval = 2; 3626 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3627 TH_ACK) { 3628 /* 3629 * we see an A from 'dir' which is in SYN_SENT 3630 * state: 'dir' sent an A in response to an SA 3631 * which it received, SYN_SENT -> ESTABLISHED 3632 */ 3633 nstate = IPF_TCPS_ESTABLISHED; 3634 rval = 1; 3635 } else if (tcpflags & TH_FIN) { 3636 /* 3637 * we see an F from 'dir' which is in SYN_SENT 3638 * state and wants to close its side of the 3639 * connection; SYN_SENT -> FIN_WAIT_1 3640 */ 3641 nstate = IPF_TCPS_FIN_WAIT_1; 3642 rval = 1; 3643 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3644 /* 3645 * we see an SA from 'dir' which is already in 3646 * SYN_SENT state, this means we have a 3647 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3648 */ 3649 nstate = IPF_TCPS_SYN_RECEIVED; 3650 rval = 1; 3651 } 3652 break; 3653 3654 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3655 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3656 /* 3657 * we see an A from 'dir' which was in 3658 * SYN_RECEIVED state so it must now be in 3659 * established state, SYN_RECEIVED -> 3660 * ESTABLISHED 3661 */ 3662 nstate = IPF_TCPS_ESTABLISHED; 3663 rval = 1; 3664 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3665 TH_OPENING) { 3666 /* 3667 * We see an SA from 'dir' which is already in 3668 * SYN_RECEIVED state. 3669 */ 3670 rval = 2; 3671 } else if (tcpflags & TH_FIN) { 3672 /* 3673 * we see an F from 'dir' which is in 3674 * SYN_RECEIVED state and wants to close its 3675 * side of the connection; SYN_RECEIVED -> 3676 * FIN_WAIT_1 3677 */ 3678 nstate = IPF_TCPS_FIN_WAIT_1; 3679 rval = 1; 3680 } 3681 break; 3682 3683 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3684 if (tcpflags & TH_FIN) { 3685 nstate = IPF_TCPS_FIN_WAIT_1; 3686 rval = 1; 3687 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3688 /* 3689 * If we've picked up a connection in mid 3690 * flight, we could be looking at a follow on 3691 * packet from the same direction as the one 3692 * that created this state. Recognise it but 3693 * do not advance the entire connection's 3694 * state. 3695 */ 3696 switch (ostate) 3697 { 3698 case IPF_TCPS_LISTEN : 3699 case IPF_TCPS_SYN_SENT : 3700 case IPF_TCPS_SYN_RECEIVED : 3701 rval = 1; 3702 break; 3703 case IPF_TCPS_HALF_ESTAB : 3704 case IPF_TCPS_ESTABLISHED : 3705 nstate = IPF_TCPS_ESTABLISHED; 3706 rval = 1; 3707 break; 3708 default : 3709 break; 3710 } 3711 } 3712 break; 3713 3714 case IPF_TCPS_ESTABLISHED: /* 4 */ 3715 rval = 1; 3716 if (tcpflags & TH_FIN) { 3717 /* 3718 * 'dir' closed its side of the connection; 3719 * this gives us a half-closed connection; 3720 * ESTABLISHED -> FIN_WAIT_1 3721 */ 3722 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3723 nstate = IPF_TCPS_CLOSING; 3724 } else { 3725 nstate = IPF_TCPS_FIN_WAIT_1; 3726 } 3727 } else if (tcpflags & TH_ACK) { 3728 /* 3729 * an ACK, should we exclude other flags here? 3730 */ 3731 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3732 /* 3733 * We know the other side did an active 3734 * close, so we are ACKing the recvd 3735 * FIN packet (does the window matching 3736 * code guarantee this?) and go into 3737 * CLOSE_WAIT state; this gives us a 3738 * half-closed connection 3739 */ 3740 nstate = IPF_TCPS_CLOSE_WAIT; 3741 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3742 /* 3743 * still a fully established 3744 * connection reset timeout 3745 */ 3746 nstate = IPF_TCPS_ESTABLISHED; 3747 } 3748 } 3749 break; 3750 3751 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3752 rval = 1; 3753 if (tcpflags & TH_FIN) { 3754 /* 3755 * application closed and 'dir' sent a FIN, 3756 * we're now going into LAST_ACK state 3757 */ 3758 nstate = IPF_TCPS_LAST_ACK; 3759 } else { 3760 /* 3761 * we remain in CLOSE_WAIT because the other 3762 * side has closed already and we did not 3763 * close our side yet; reset timeout 3764 */ 3765 nstate = IPF_TCPS_CLOSE_WAIT; 3766 } 3767 break; 3768 3769 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3770 rval = 1; 3771 if ((tcpflags & TH_ACK) && 3772 ostate > IPF_TCPS_CLOSE_WAIT) { 3773 /* 3774 * if the other side is not active anymore 3775 * it has sent us a FIN packet that we are 3776 * ack'ing now with an ACK; this means both 3777 * sides have now closed the connection and 3778 * we go into LAST_ACK 3779 */ 3780 /* 3781 * XXX: how do we know we really are ACKing 3782 * the FIN packet here? does the window code 3783 * guarantee that? 3784 */ 3785 nstate = IPF_TCPS_LAST_ACK; 3786 } else { 3787 /* 3788 * we closed our side of the connection 3789 * already but the other side is still active 3790 * (ESTABLISHED/CLOSE_WAIT); continue with 3791 * this half-closed connection 3792 */ 3793 nstate = IPF_TCPS_FIN_WAIT_1; 3794 } 3795 break; 3796 3797 case IPF_TCPS_CLOSING: /* 7 */ 3798 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3799 nstate = IPF_TCPS_TIME_WAIT; 3800 } 3801 rval = 1; 3802 break; 3803 3804 case IPF_TCPS_LAST_ACK: /* 8 */ 3805 /* 3806 * We want to reset timer here to keep state in table. 3807 * If we would allow the state to time out here, while 3808 * there would still be packets being retransmitted, we 3809 * would cut off line between the two peers preventing 3810 * them to close connection properly. 3811 */ 3812 rval = 1; 3813 break; 3814 3815 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3816 /* NOT USED */ 3817 break; 3818 3819 case IPF_TCPS_TIME_WAIT: /* 10 */ 3820 /* we're in 2MSL timeout now */ 3821 if (ostate == IPF_TCPS_LAST_ACK) { 3822 nstate = IPF_TCPS_CLOSED; 3823 rval = 1; 3824 } else { 3825 rval = 2; 3826 } 3827 break; 3828 3829 case IPF_TCPS_CLOSED: /* 11 */ 3830 rval = 2; 3831 break; 3832 3833 default : 3834 #if defined(_KERNEL) 3835 ASSERT(nstate >= IPF_TCPS_LISTEN && 3836 nstate <= IPF_TCPS_CLOSED); 3837 #else 3838 abort(); 3839 #endif 3840 break; 3841 } 3842 } 3843 3844 /* 3845 * If rval == 2 then do not update the queue position, but treat the 3846 * packet as being ok. 3847 */ 3848 if (rval == 2) { 3849 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3850 rval = 1; 3851 } 3852 else if (rval == 1) { 3853 tqe->tqe_state[dir] = nstate; 3854 /* 3855 * The nstate can either advance to a new state, or remain 3856 * unchanged, resetting the timer by moving to the bottom of 3857 * the queue. 3858 */ 3859 DTRACE_PROBE1(state_done, int, nstate); 3860 3861 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3862 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3863 } 3864 3865 return rval; 3866 } 3867 3868 3869 /* ------------------------------------------------------------------------ */ 3870 /* Function: ipstate_log */ 3871 /* Returns: Nil */ 3872 /* Parameters: is(I) - pointer to state structure */ 3873 /* type(I) - type of log entry to create */ 3874 /* */ 3875 /* Creates a state table log entry using the state structure and type info. */ 3876 /* passed in. Log packet/byte counts, source/destination address and other */ 3877 /* protocol specific information. */ 3878 /* ------------------------------------------------------------------------ */ 3879 void ipstate_log(is, type, ifs) 3880 struct ipstate *is; 3881 u_int type; 3882 ipf_stack_t *ifs; 3883 { 3884 #ifdef IPFILTER_LOG 3885 struct ipslog ipsl; 3886 size_t sizes[1]; 3887 void *items[1]; 3888 int types[1]; 3889 3890 /* 3891 * Copy information out of the ipstate_t structure and into the 3892 * structure used for logging. 3893 */ 3894 ipsl.isl_type = type; 3895 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3896 ipsl.isl_bytes[0] = is->is_bytes[0]; 3897 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3898 ipsl.isl_bytes[1] = is->is_bytes[1]; 3899 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3900 ipsl.isl_bytes[2] = is->is_bytes[2]; 3901 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3902 ipsl.isl_bytes[3] = is->is_bytes[3]; 3903 ipsl.isl_src = is->is_src; 3904 ipsl.isl_dst = is->is_dst; 3905 ipsl.isl_p = is->is_p; 3906 ipsl.isl_v = is->is_v; 3907 ipsl.isl_flags = is->is_flags; 3908 ipsl.isl_tag = is->is_tag; 3909 ipsl.isl_rulen = is->is_rulen; 3910 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3911 3912 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3913 ipsl.isl_sport = is->is_sport; 3914 ipsl.isl_dport = is->is_dport; 3915 if (ipsl.isl_p == IPPROTO_TCP) { 3916 ipsl.isl_state[0] = is->is_state[0]; 3917 ipsl.isl_state[1] = is->is_state[1]; 3918 } 3919 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3920 ipsl.isl_itype = is->is_icmp.ici_type; 3921 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3922 ipsl.isl_itype = is->is_icmp.ici_type; 3923 } else { 3924 ipsl.isl_ps.isl_filler[0] = 0; 3925 ipsl.isl_ps.isl_filler[1] = 0; 3926 } 3927 3928 items[0] = &ipsl; 3929 sizes[0] = sizeof(ipsl); 3930 types[0] = 0; 3931 3932 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3933 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3934 } else { 3935 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3936 } 3937 #endif 3938 } 3939 3940 3941 #ifdef USE_INET6 3942 /* ------------------------------------------------------------------------ */ 3943 /* Function: fr_checkicmp6matchingstate */ 3944 /* Returns: ipstate_t* - NULL == no match found, */ 3945 /* else pointer to matching state entry */ 3946 /* Parameters: fin(I) - pointer to packet information */ 3947 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3948 /* */ 3949 /* If we've got an ICMPv6 error message, using the information stored in */ 3950 /* the ICMPv6 packet, look for a matching state table entry. */ 3951 /* ------------------------------------------------------------------------ */ 3952 static ipstate_t *fr_checkicmp6matchingstate(fin) 3953 fr_info_t *fin; 3954 { 3955 struct icmp6_hdr *ic6, *oic; 3956 int backward, i; 3957 ipstate_t *is, **isp; 3958 u_short sport, dport; 3959 i6addr_t dst, src; 3960 u_short savelen; 3961 icmpinfo_t *ic; 3962 fr_info_t ofin; 3963 tcphdr_t *tcp; 3964 ip6_t *oip6; 3965 u_char pr; 3966 u_int hv; 3967 ipf_stack_t *ifs = fin->fin_ifs; 3968 3969 /* 3970 * Does it at least have the return (basic) IP header ? 3971 * Is it an actual recognised ICMP error type? 3972 * Only a basic IP header (no options) should be with 3973 * an ICMP error header. 3974 */ 3975 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 3976 !(fin->fin_flx & FI_ICMPERR)) 3977 return NULL; 3978 3979 ic6 = fin->fin_dp; 3980 3981 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 3982 if (fin->fin_plen < sizeof(*oip6)) 3983 return NULL; 3984 3985 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 3986 ofin.fin_v = 6; 3987 ofin.fin_ifp = fin->fin_ifp; 3988 ofin.fin_out = !fin->fin_out; 3989 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 3990 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 3991 3992 /* 3993 * We make a fin entry to be able to feed it to 3994 * matchsrcdst. Note that not all fields are necessary 3995 * but this is the cleanest way. Note further we fill 3996 * in fin_mp such that if someone uses it we'll get 3997 * a kernel panic. fr_matchsrcdst does not use this. 3998 * 3999 * watch out here, as ip is in host order and oip6 in network 4000 * order. Any change we make must be undone afterwards. 4001 */ 4002 savelen = oip6->ip6_plen; 4003 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4004 ofin.fin_flx = FI_NOCKSUM; 4005 ofin.fin_ip = (ip_t *)oip6; 4006 ofin.fin_plen = oip6->ip6_plen; 4007 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4008 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4009 oip6->ip6_plen = savelen; 4010 4011 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4012 oic = (struct icmp6_hdr *)(oip6 + 1); 4013 /* 4014 * an ICMP error can only be generated as a result of an 4015 * ICMP query, not as the response on an ICMP error 4016 * 4017 * XXX theoretically ICMP_ECHOREP and the other reply's are 4018 * ICMP query's as well, but adding them here seems strange XXX 4019 */ 4020 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4021 return NULL; 4022 4023 /* 4024 * perform a lookup of the ICMP packet in the state table 4025 */ 4026 hv = (pr = oip6->ip6_nxt); 4027 src.in6 = oip6->ip6_src; 4028 hv += src.in4.s_addr; 4029 dst.in6 = oip6->ip6_dst; 4030 hv += dst.in4.s_addr; 4031 hv += oic->icmp6_id; 4032 hv += oic->icmp6_seq; 4033 hv = DOUBLE_HASH(hv, ifs); 4034 4035 READ_ENTER(&ifs->ifs_ipf_state); 4036 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4037 ic = &is->is_icmp; 4038 isp = &is->is_hnext; 4039 if ((is->is_p == pr) && 4040 !(is->is_pass & FR_NOICMPERR) && 4041 (oic->icmp6_id == ic->ici_id) && 4042 (oic->icmp6_seq == ic->ici_seq) && 4043 (is = fr_matchsrcdst(&ofin, is, &src, 4044 &dst, NULL, FI_ICMPCMP))) { 4045 /* 4046 * in the state table ICMP query's are stored 4047 * with the type of the corresponding ICMP 4048 * response. Correct here 4049 */ 4050 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4051 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4052 (ic->ici_type - 1 == oic->icmp6_type )) { 4053 ifs->ifs_ips_stats.iss_hits++; 4054 backward = IP6_NEQ(&is->is_dst, &src); 4055 fin->fin_rev = !backward; 4056 i = (backward << 1) + fin->fin_out; 4057 is->is_icmppkts[i]++; 4058 return is; 4059 } 4060 } 4061 } 4062 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4063 return NULL; 4064 } 4065 4066 hv = (pr = oip6->ip6_nxt); 4067 src.in6 = oip6->ip6_src; 4068 hv += src.i6[0]; 4069 hv += src.i6[1]; 4070 hv += src.i6[2]; 4071 hv += src.i6[3]; 4072 dst.in6 = oip6->ip6_dst; 4073 hv += dst.i6[0]; 4074 hv += dst.i6[1]; 4075 hv += dst.i6[2]; 4076 hv += dst.i6[3]; 4077 4078 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4079 tcp = (tcphdr_t *)(oip6 + 1); 4080 dport = tcp->th_dport; 4081 sport = tcp->th_sport; 4082 hv += dport; 4083 hv += sport; 4084 } else 4085 tcp = NULL; 4086 hv = DOUBLE_HASH(hv, ifs); 4087 4088 READ_ENTER(&ifs->ifs_ipf_state); 4089 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4090 isp = &is->is_hnext; 4091 /* 4092 * Only allow this icmp though if the 4093 * encapsulated packet was allowed through the 4094 * other way around. Note that the minimal amount 4095 * of info present does not allow for checking against 4096 * tcp internals such as seq and ack numbers. 4097 */ 4098 if ((is->is_p != pr) || (is->is_v != 6) || 4099 (is->is_pass & FR_NOICMPERR)) 4100 continue; 4101 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4102 if (is != NULL) { 4103 ifs->ifs_ips_stats.iss_hits++; 4104 backward = IP6_NEQ(&is->is_dst, &src); 4105 fin->fin_rev = !backward; 4106 i = (backward << 1) + fin->fin_out; 4107 is->is_icmppkts[i]++; 4108 /* 4109 * we deliberately do not touch the timeouts 4110 * for the accompanying state table entry. 4111 * It remains to be seen if that is correct. XXX 4112 */ 4113 return is; 4114 } 4115 } 4116 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4117 return NULL; 4118 } 4119 #endif 4120 4121 4122 /* ------------------------------------------------------------------------ */ 4123 /* Function: fr_sttab_init */ 4124 /* Returns: Nil */ 4125 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4126 /* */ 4127 /* Initialise the array of timeout queues for TCP. */ 4128 /* ------------------------------------------------------------------------ */ 4129 void fr_sttab_init(tqp, ifs) 4130 ipftq_t *tqp; 4131 ipf_stack_t *ifs; 4132 { 4133 int i; 4134 4135 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4136 tqp[i].ifq_ttl = 0; 4137 tqp[i].ifq_ref = 1; 4138 tqp[i].ifq_head = NULL; 4139 tqp[i].ifq_tail = &tqp[i].ifq_head; 4140 tqp[i].ifq_next = tqp + i + 1; 4141 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4142 } 4143 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4144 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4145 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4146 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4147 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4148 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4149 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4150 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4151 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4152 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4153 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4154 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4155 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4156 } 4157 4158 4159 /* ------------------------------------------------------------------------ */ 4160 /* Function: fr_sttab_destroy */ 4161 /* Returns: Nil */ 4162 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4163 /* */ 4164 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4165 /* of timeout queues for TCP. */ 4166 /* ------------------------------------------------------------------------ */ 4167 void fr_sttab_destroy(tqp) 4168 ipftq_t *tqp; 4169 { 4170 int i; 4171 4172 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4173 MUTEX_DESTROY(&tqp[i].ifq_lock); 4174 } 4175 4176 4177 /* ------------------------------------------------------------------------ */ 4178 /* Function: fr_statederef */ 4179 /* Returns: Nil */ 4180 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4181 /* ifs - ipf stack instance */ 4182 /* */ 4183 /* Decrement the reference counter for this state table entry and free it */ 4184 /* if there are no more things using it. */ 4185 /* */ 4186 /* Internal parameters: */ 4187 /* state[0] = state of source (host that initiated connection) */ 4188 /* state[1] = state of dest (host that accepted the connection) */ 4189 /* ------------------------------------------------------------------------ */ 4190 void fr_statederef(isp, ifs) 4191 ipstate_t **isp; 4192 ipf_stack_t *ifs; 4193 { 4194 ipstate_t *is; 4195 4196 is = *isp; 4197 *isp = NULL; 4198 4199 MUTEX_ENTER(&is->is_lock); 4200 if (is->is_ref > 1) { 4201 is->is_ref--; 4202 MUTEX_EXIT(&is->is_lock); 4203 #ifndef _KERNEL 4204 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4205 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4206 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4207 } 4208 #endif 4209 return; 4210 } 4211 MUTEX_EXIT(&is->is_lock); 4212 4213 WRITE_ENTER(&ifs->ifs_ipf_state); 4214 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4215 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4216 } 4217 4218 4219 /* ------------------------------------------------------------------------ */ 4220 /* Function: fr_setstatequeue */ 4221 /* Returns: Nil */ 4222 /* Parameters: is(I) - pointer to state structure */ 4223 /* rev(I) - forward(0) or reverse(1) direction */ 4224 /* Locks: ipf_state (read or write) */ 4225 /* */ 4226 /* Put the state entry on its default queue entry, using rev as a helped in */ 4227 /* determining which queue it should be placed on. */ 4228 /* ------------------------------------------------------------------------ */ 4229 void fr_setstatequeue(is, rev, ifs) 4230 ipstate_t *is; 4231 int rev; 4232 ipf_stack_t *ifs; 4233 { 4234 ipftq_t *oifq, *nifq; 4235 4236 4237 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4238 nifq = is->is_tqehead[rev]; 4239 else 4240 nifq = NULL; 4241 4242 if (nifq == NULL) { 4243 switch (is->is_p) 4244 { 4245 #ifdef USE_INET6 4246 case IPPROTO_ICMPV6 : 4247 if (rev == 1) 4248 nifq = &ifs->ifs_ips_icmpacktq; 4249 else 4250 nifq = &ifs->ifs_ips_icmptq; 4251 break; 4252 #endif 4253 case IPPROTO_ICMP : 4254 if (rev == 1) 4255 nifq = &ifs->ifs_ips_icmpacktq; 4256 else 4257 nifq = &ifs->ifs_ips_icmptq; 4258 break; 4259 case IPPROTO_TCP : 4260 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4261 break; 4262 4263 case IPPROTO_UDP : 4264 if (rev == 1) 4265 nifq = &ifs->ifs_ips_udpacktq; 4266 else 4267 nifq = &ifs->ifs_ips_udptq; 4268 break; 4269 4270 default : 4271 nifq = &ifs->ifs_ips_iptq; 4272 break; 4273 } 4274 } 4275 4276 oifq = is->is_sti.tqe_ifq; 4277 /* 4278 * If it's currently on a timeout queue, move it from one queue to 4279 * another, else put it on the end of the newly determined queue. 4280 */ 4281 if (oifq != NULL) 4282 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4283 else 4284 fr_queueappend(&is->is_sti, nifq, is, ifs); 4285 return; 4286 } 4287 4288 4289 /* ------------------------------------------------------------------------ */ 4290 /* Function: fr_stateiter */ 4291 /* Returns: int - 0 == success, else error */ 4292 /* Parameters: token(I) - pointer to ipftoken structure */ 4293 /* itp(I) - pointer to ipfgeniter structure */ 4294 /* */ 4295 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4296 /* walks through the list of entries in the state table list (ips_list.) */ 4297 /* ------------------------------------------------------------------------ */ 4298 static int fr_stateiter(token, itp, ifs) 4299 ipftoken_t *token; 4300 ipfgeniter_t *itp; 4301 ipf_stack_t *ifs; 4302 { 4303 ipstate_t *is, *next, zero; 4304 int error, count; 4305 char *dst; 4306 4307 if (itp->igi_data == NULL) 4308 return EFAULT; 4309 4310 if (itp->igi_nitems == 0) 4311 return EINVAL; 4312 4313 if (itp->igi_type != IPFGENITER_STATE) 4314 return EINVAL; 4315 4316 error = 0; 4317 4318 READ_ENTER(&ifs->ifs_ipf_state); 4319 4320 /* 4321 * Get "previous" entry from the token and find the next entry. 4322 */ 4323 is = token->ipt_data; 4324 if (is == NULL) { 4325 next = ifs->ifs_ips_list; 4326 } else { 4327 next = is->is_next; 4328 } 4329 4330 dst = itp->igi_data; 4331 for (count = itp->igi_nitems; count > 0; count--) { 4332 /* 4333 * If we found an entry, add a reference to it and update the token. 4334 * Otherwise, zero out data to be returned and NULL out token. 4335 */ 4336 if (next != NULL) { 4337 MUTEX_ENTER(&next->is_lock); 4338 next->is_ref++; 4339 MUTEX_EXIT(&next->is_lock); 4340 token->ipt_data = next; 4341 } else { 4342 bzero(&zero, sizeof(zero)); 4343 next = &zero; 4344 token->ipt_data = NULL; 4345 } 4346 4347 /* 4348 * Safe to release lock now the we have a reference. 4349 */ 4350 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4351 4352 /* 4353 * Copy out data and clean up references and tokens. 4354 */ 4355 error = COPYOUT(next, dst, sizeof(*next)); 4356 if (error != 0) 4357 error = EFAULT; 4358 if (token->ipt_data == NULL) { 4359 ipf_freetoken(token, ifs); 4360 break; 4361 } else { 4362 if (is != NULL) 4363 fr_statederef(&is, ifs); 4364 if (next->is_next == NULL) { 4365 ipf_freetoken(token, ifs); 4366 break; 4367 } 4368 } 4369 4370 if ((count == 1) || (error != 0)) 4371 break; 4372 4373 READ_ENTER(&ifs->ifs_ipf_state); 4374 dst += sizeof(*next); 4375 is = next; 4376 next = is->is_next; 4377 } 4378 4379 return error; 4380 } 4381