1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/file.h> 20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 21 defined(_KERNEL) 22 # include "opt_ipfilter_log.h" 23 #endif 24 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 25 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 26 #include "opt_inet6.h" 27 #endif 28 #if !defined(_KERNEL) && !defined(__KERNEL__) 29 # include <stdio.h> 30 # include <stdlib.h> 31 # include <string.h> 32 # define _KERNEL 33 # ifdef __OpenBSD__ 34 struct file; 35 # endif 36 # include <sys/uio.h> 37 # undef _KERNEL 38 #endif 39 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 40 # include <sys/filio.h> 41 # include <sys/fcntl.h> 42 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 43 # include "opt_ipfilter.h" 44 # endif 45 #else 46 # include <sys/ioctl.h> 47 #endif 48 #include <sys/time.h> 49 #if !defined(linux) 50 # include <sys/protosw.h> 51 #endif 52 #include <sys/socket.h> 53 #if defined(_KERNEL) 54 # include <sys/systm.h> 55 # if !defined(__SVR4) && !defined(__svr4__) 56 # include <sys/mbuf.h> 57 # endif 58 #endif 59 #if defined(__SVR4) || defined(__svr4__) 60 # include <sys/filio.h> 61 # include <sys/byteorder.h> 62 # ifdef _KERNEL 63 # include <sys/dditypes.h> 64 # endif 65 # include <sys/stream.h> 66 # include <sys/kmem.h> 67 #endif 68 69 #include <net/if.h> 70 #ifdef sun 71 # include <net/af.h> 72 #endif 73 #include <net/route.h> 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/tcp.h> 78 #if !defined(linux) 79 # include <netinet/ip_var.h> 80 #endif 81 #if !defined(__hpux) && !defined(linux) 82 # include <netinet/tcp_fsm.h> 83 #endif 84 #include <netinet/udp.h> 85 #include <netinet/ip_icmp.h> 86 #include "netinet/ip_compat.h" 87 #include <netinet/tcpip.h> 88 #include "netinet/ip_fil.h" 89 #include "netinet/ip_nat.h" 90 #include "netinet/ip_frag.h" 91 #include "netinet/ip_state.h" 92 #include "netinet/ip_proxy.h" 93 #include "netinet/ipf_stack.h" 94 #ifdef IPFILTER_SYNC 95 #include "netinet/ip_sync.h" 96 #endif 97 #ifdef IPFILTER_SCAN 98 #include "netinet/ip_scan.h" 99 #endif 100 #ifdef USE_INET6 101 #include <netinet/icmp6.h> 102 #endif 103 #if (__FreeBSD_version >= 300000) 104 # include <sys/malloc.h> 105 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 106 # include <sys/libkern.h> 107 # include <sys/systm.h> 108 # endif 109 #endif 110 /* END OF INCLUDES */ 111 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 116 #endif 117 118 #ifdef USE_INET6 119 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 120 #endif 121 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 122 i6addr_t *, tcphdr_t *, u_32_t)); 123 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 124 static int fr_state_flush __P((int, int, ipf_stack_t *)); 125 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 126 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 127 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 128 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 129 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 130 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 131 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 132 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 134 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 135 136 int fr_stputent __P((caddr_t, ipf_stack_t *)); 137 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 138 139 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 140 #define FIVE_DAYS (5 * ONE_DAY) 141 #define DOUBLE_HASH(x, ifs) \ 142 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 143 144 145 /* ------------------------------------------------------------------------ */ 146 /* Function: fr_stateinit */ 147 /* Returns: int - 0 == success, -1 == failure */ 148 /* Parameters: ifs - ipf stack instance */ 149 /* */ 150 /* Initialise all the global variables used within the state code. */ 151 /* This action also includes initiailising locks. */ 152 /* ------------------------------------------------------------------------ */ 153 int fr_stateinit(ifs) 154 ipf_stack_t *ifs; 155 { 156 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 157 struct timeval tv; 158 #endif 159 int i; 160 161 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 162 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 163 if (ifs->ifs_ips_table == NULL) 164 return -1; 165 bzero((char *)ifs->ifs_ips_table, 166 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 167 168 KMALLOCS(ifs->ifs_ips_seed, u_long *, 169 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 170 if (ifs->ifs_ips_seed == NULL) 171 return -2; 172 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 173 tv.tv_sec = 0; 174 GETKTIME(&tv); 175 #endif 176 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 177 /* 178 * XXX - ips_seed[X] should be a random number of sorts. 179 */ 180 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 181 ifs->ifs_ips_seed[i] = ipf_random(); 182 #else 183 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 184 ifs->ifs_fr_statesize; 185 ifs->ifs_ips_seed[i] += tv.tv_sec; 186 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 187 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 188 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 189 #endif 190 } 191 192 /* fill icmp reply type table */ 193 for (i = 0; i <= ICMP_MAXTYPE; i++) 194 icmpreplytype4[i] = -1; 195 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 196 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 197 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 198 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 199 #ifdef USE_INET6 200 /* fill icmp reply type table */ 201 for (i = 0; i <= ICMP6_MAXTYPE; i++) 202 icmpreplytype6[i] = -1; 203 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 204 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 205 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 206 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 207 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 208 #endif 209 210 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 211 ifs->ifs_fr_statesize * sizeof(u_long)); 212 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 213 return -1; 214 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 215 ifs->ifs_fr_statesize * sizeof(u_long)); 216 217 if (ifs->ifs_fr_state_maxbucket == 0) { 218 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 219 ifs->ifs_fr_state_maxbucket++; 220 ifs->ifs_fr_state_maxbucket *= 2; 221 } 222 223 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 224 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 225 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 226 ifs->ifs_ips_udptq.ifq_ref = 1; 227 ifs->ifs_ips_udptq.ifq_head = NULL; 228 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 229 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 230 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 231 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 232 ifs->ifs_ips_udpacktq.ifq_ref = 1; 233 ifs->ifs_ips_udpacktq.ifq_head = NULL; 234 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 235 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 236 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 237 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 238 ifs->ifs_ips_icmptq.ifq_ref = 1; 239 ifs->ifs_ips_icmptq.ifq_head = NULL; 240 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 241 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 242 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 243 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 244 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 245 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 246 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 247 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 248 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 249 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 250 ifs->ifs_ips_iptq.ifq_ref = 1; 251 ifs->ifs_ips_iptq.ifq_head = NULL; 252 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 253 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 254 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 255 /* entry's ttl in deletetq is just 1 tick */ 256 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 257 ifs->ifs_ips_deletetq.ifq_ref = 1; 258 ifs->ifs_ips_deletetq.ifq_head = NULL; 259 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 260 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 261 ifs->ifs_ips_deletetq.ifq_next = NULL; 262 263 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 264 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 265 ifs->ifs_fr_state_init = 1; 266 267 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 268 return 0; 269 } 270 271 272 /* ------------------------------------------------------------------------ */ 273 /* Function: fr_stateunload */ 274 /* Returns: Nil */ 275 /* Parameters: ifs - ipf stack instance */ 276 /* */ 277 /* Release and destroy any resources acquired or initialised so that */ 278 /* IPFilter can be unloaded or re-initialised. */ 279 /* ------------------------------------------------------------------------ */ 280 void fr_stateunload(ifs) 281 ipf_stack_t *ifs; 282 { 283 ipftq_t *ifq, *ifqnext; 284 ipstate_t *is; 285 286 while ((is = ifs->ifs_ips_list) != NULL) 287 (void) fr_delstate(is, 0, ifs); 288 289 /* 290 * Proxy timeout queues are not cleaned here because although they 291 * exist on the state list, appr_unload is called after fr_stateunload 292 * and the proxies actually are responsible for them being created. 293 * Should the proxy timeouts have their own list? There's no real 294 * justification as this is the only complicationA 295 */ 296 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 297 ifqnext = ifq->ifq_next; 298 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 299 (fr_deletetimeoutqueue(ifq) == 0)) 300 fr_freetimeoutqueue(ifq, ifs); 301 } 302 303 ifs->ifs_ips_stats.iss_inuse = 0; 304 ifs->ifs_ips_num = 0; 305 306 if (ifs->ifs_fr_state_init == 1) { 307 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 308 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 309 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 314 } 315 316 if (ifs->ifs_ips_table != NULL) { 317 KFREES(ifs->ifs_ips_table, 318 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 319 ifs->ifs_ips_table = NULL; 320 } 321 322 if (ifs->ifs_ips_seed != NULL) { 323 KFREES(ifs->ifs_ips_seed, 324 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 325 ifs->ifs_ips_seed = NULL; 326 } 327 328 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 329 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 330 ifs->ifs_fr_statesize * sizeof(u_long)); 331 ifs->ifs_ips_stats.iss_bucketlen = NULL; 332 } 333 334 if (ifs->ifs_fr_state_maxbucket_reset == 1) 335 ifs->ifs_fr_state_maxbucket = 0; 336 337 if (ifs->ifs_fr_state_init == 1) { 338 ifs->ifs_fr_state_init = 0; 339 RW_DESTROY(&ifs->ifs_ipf_state); 340 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 341 } 342 } 343 344 345 /* ------------------------------------------------------------------------ */ 346 /* Function: fr_statetstats */ 347 /* Returns: ips_state_t* - pointer to state stats structure */ 348 /* Parameters: Nil */ 349 /* */ 350 /* Put all the current numbers and pointers into a single struct and return */ 351 /* a pointer to it. */ 352 /* ------------------------------------------------------------------------ */ 353 static ips_stat_t *fr_statetstats(ifs) 354 ipf_stack_t *ifs; 355 { 356 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 357 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 358 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 359 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 360 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 361 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 362 return &ifs->ifs_ips_stats; 363 } 364 365 /* ------------------------------------------------------------------------ */ 366 /* Function: fr_state_remove */ 367 /* Returns: int - 0 == success, != 0 == failure */ 368 /* Parameters: data(I) - pointer to state structure to delete from table */ 369 /* ifs - ipf stack instance */ 370 /* */ 371 /* Search for a state structure that matches the one passed, according to */ 372 /* the IP addresses and other protocol specific information. */ 373 /* ------------------------------------------------------------------------ */ 374 static int fr_state_remove(data, ifs) 375 caddr_t data; 376 ipf_stack_t *ifs; 377 { 378 ipstate_t *sp, st; 379 int error; 380 381 sp = &st; 382 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 383 if (error) 384 return EFAULT; 385 386 WRITE_ENTER(&ifs->ifs_ipf_state); 387 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 388 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 389 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 390 sizeof(st.is_src)) && 391 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 392 sizeof(st.is_dst)) && 393 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 394 sizeof(st.is_ps))) { 395 (void) fr_delstate(sp, ISL_REMOVE, ifs); 396 RWLOCK_EXIT(&ifs->ifs_ipf_state); 397 return 0; 398 } 399 RWLOCK_EXIT(&ifs->ifs_ipf_state); 400 return ESRCH; 401 } 402 403 404 /* ------------------------------------------------------------------------ */ 405 /* Function: fr_state_ioctl */ 406 /* Returns: int - 0 == success, != 0 == failure */ 407 /* Parameters: data(I) - pointer to ioctl data */ 408 /* cmd(I) - ioctl command integer */ 409 /* mode(I) - file mode bits used with open */ 410 /* uid(I) - uid of caller */ 411 /* ctx(I) - pointer to give the uid context */ 412 /* ifs - ipf stack instance */ 413 /* */ 414 /* Processes an ioctl call made to operate on the IP Filter state device. */ 415 /* ------------------------------------------------------------------------ */ 416 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 417 caddr_t data; 418 ioctlcmd_t cmd; 419 int mode, uid; 420 void *ctx; 421 ipf_stack_t *ifs; 422 { 423 int arg, ret, error = 0; 424 425 switch (cmd) 426 { 427 /* 428 * Delete an entry from the state table. 429 */ 430 case SIOCDELST : 431 error = fr_state_remove(data, ifs); 432 break; 433 /* 434 * Flush the state table 435 */ 436 case SIOCIPFFL : 437 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 438 if (error != 0) { 439 error = EFAULT; 440 } else { 441 if (VALID_TABLE_FLUSH_OPT(arg)) { 442 WRITE_ENTER(&ifs->ifs_ipf_state); 443 ret = fr_state_flush(arg, 4, ifs); 444 RWLOCK_EXIT(&ifs->ifs_ipf_state); 445 error = BCOPYOUT((char *)&ret, data, 446 sizeof(ret)); 447 if (error != 0) 448 return EFAULT; 449 } else { 450 error = EINVAL; 451 } 452 } 453 break; 454 455 #ifdef USE_INET6 456 case SIOCIPFL6 : 457 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 458 if (error != 0) { 459 error = EFAULT; 460 } else { 461 if (VALID_TABLE_FLUSH_OPT(arg)) { 462 WRITE_ENTER(&ifs->ifs_ipf_state); 463 ret = fr_state_flush(arg, 6, ifs); 464 RWLOCK_EXIT(&ifs->ifs_ipf_state); 465 error = BCOPYOUT((char *)&ret, data, 466 sizeof(ret)); 467 if (error != 0) 468 return EFAULT; 469 } else { 470 error = EINVAL; 471 } 472 } 473 break; 474 #endif 475 #ifdef IPFILTER_LOG 476 /* 477 * Flush the state log. 478 */ 479 case SIOCIPFFB : 480 if (!(mode & FWRITE)) 481 error = EPERM; 482 else { 483 int tmp; 484 485 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 486 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 487 if (error != 0) 488 error = EFAULT; 489 } 490 break; 491 /* 492 * Turn logging of state information on/off. 493 */ 494 case SIOCSETLG : 495 if (!(mode & FWRITE)) { 496 error = EPERM; 497 } else { 498 error = BCOPYIN((char *)data, 499 (char *)&ifs->ifs_ipstate_logging, 500 sizeof(ifs->ifs_ipstate_logging)); 501 if (error != 0) 502 error = EFAULT; 503 } 504 break; 505 /* 506 * Return the current state of logging. 507 */ 508 case SIOCGETLG : 509 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 510 (char *)data, 511 sizeof(ifs->ifs_ipstate_logging)); 512 if (error != 0) 513 error = EFAULT; 514 break; 515 /* 516 * Return the number of bytes currently waiting to be read. 517 */ 518 case FIONREAD : 519 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 520 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 521 if (error != 0) 522 error = EFAULT; 523 break; 524 #endif 525 /* 526 * Get the current state statistics. 527 */ 528 case SIOCGETFS : 529 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 530 break; 531 /* 532 * Lock/Unlock the state table. (Locking prevents any changes, which 533 * means no packets match). 534 */ 535 case SIOCSTLCK : 536 if (!(mode & FWRITE)) { 537 error = EPERM; 538 } else { 539 error = fr_lock(data, &ifs->ifs_fr_state_lock); 540 } 541 break; 542 /* 543 * Add an entry to the current state table. 544 */ 545 case SIOCSTPUT : 546 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 547 error = EACCES; 548 break; 549 } 550 error = fr_stputent(data, ifs); 551 break; 552 /* 553 * Get a state table entry. 554 */ 555 case SIOCSTGET : 556 if (!ifs->ifs_fr_state_lock) { 557 error = EACCES; 558 break; 559 } 560 error = fr_stgetent(data, ifs); 561 break; 562 563 case SIOCGENITER : 564 { 565 ipftoken_t *token; 566 ipfgeniter_t iter; 567 568 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 569 if (error != 0) 570 break; 571 572 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 573 if (token != NULL) 574 error = fr_stateiter(token, &iter, ifs); 575 else 576 error = ESRCH; 577 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 578 break; 579 } 580 581 case SIOCIPFDELTOK : 582 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 583 if (error != 0) { 584 error = EFAULT; 585 } else { 586 error = ipf_deltoken(arg, uid, ctx, ifs); 587 } 588 break; 589 590 default : 591 error = EINVAL; 592 break; 593 } 594 return error; 595 } 596 597 598 /* ------------------------------------------------------------------------ */ 599 /* Function: fr_stgetent */ 600 /* Returns: int - 0 == success, != 0 == failure */ 601 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 602 /* */ 603 /* Copy out state information from the kernel to a user space process. If */ 604 /* there is a filter rule associated with the state entry, copy that out */ 605 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 606 /* the struct passed in and if not null and not found in the list of current*/ 607 /* state entries, the retrieval fails. */ 608 /* ------------------------------------------------------------------------ */ 609 int fr_stgetent(data, ifs) 610 caddr_t data; 611 ipf_stack_t *ifs; 612 { 613 ipstate_t *is, *isn; 614 ipstate_save_t ips; 615 int error; 616 617 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 618 if (error) 619 return EFAULT; 620 621 isn = ips.ips_next; 622 if (isn == NULL) { 623 isn = ifs->ifs_ips_list; 624 if (isn == NULL) { 625 if (ips.ips_next == NULL) 626 return ENOENT; 627 return 0; 628 } 629 } else { 630 /* 631 * Make sure the pointer we're copying from exists in the 632 * current list of entries. Security precaution to prevent 633 * copying of random kernel data. 634 */ 635 for (is = ifs->ifs_ips_list; is; is = is->is_next) 636 if (is == isn) 637 break; 638 if (!is) 639 return ESRCH; 640 } 641 ips.ips_next = isn->is_next; 642 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 643 ips.ips_rule = isn->is_rule; 644 if (isn->is_rule != NULL) 645 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 646 sizeof(ips.ips_fr)); 647 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 648 if (error) 649 return EFAULT; 650 return 0; 651 } 652 653 654 /* ------------------------------------------------------------------------ */ 655 /* Function: fr_stputent */ 656 /* Returns: int - 0 == success, != 0 == failure */ 657 /* Parameters: data(I) - pointer to state information struct */ 658 /* ifs - ipf stack instance */ 659 /* */ 660 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 661 /* the state table. If the state info. includes a pointer to a filter rule */ 662 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 663 /* output. */ 664 /* ------------------------------------------------------------------------ */ 665 int fr_stputent(data, ifs) 666 caddr_t data; 667 ipf_stack_t *ifs; 668 { 669 ipstate_t *is, *isn; 670 ipstate_save_t ips; 671 int error, i; 672 frentry_t *fr; 673 char *name; 674 675 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 676 if (error) 677 return EFAULT; 678 679 /* 680 * Trigger automatic call to fr_state_flush() if the 681 * table has reached capacity specified by hi watermark. 682 */ 683 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 684 ifs->ifs_fr_state_doflush = 1; 685 686 /* 687 * If automatic flushing did not do its job, and the table 688 * has filled up, don't try to create a new entry. 689 */ 690 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 691 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 692 return ENOMEM; 693 } 694 695 KMALLOC(isn, ipstate_t *); 696 if (isn == NULL) 697 return ENOMEM; 698 699 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 700 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 701 isn->is_sti.tqe_pnext = NULL; 702 isn->is_sti.tqe_next = NULL; 703 isn->is_sti.tqe_ifq = NULL; 704 isn->is_sti.tqe_parent = isn; 705 isn->is_ifp[0] = NULL; 706 isn->is_ifp[1] = NULL; 707 isn->is_ifp[2] = NULL; 708 isn->is_ifp[3] = NULL; 709 isn->is_sync = NULL; 710 fr = ips.ips_rule; 711 712 if (fr == NULL) { 713 READ_ENTER(&ifs->ifs_ipf_state); 714 fr_stinsert(isn, 0, ifs); 715 MUTEX_EXIT(&isn->is_lock); 716 RWLOCK_EXIT(&ifs->ifs_ipf_state); 717 return 0; 718 } 719 720 if (isn->is_flags & SI_NEWFR) { 721 KMALLOC(fr, frentry_t *); 722 if (fr == NULL) { 723 KFREE(isn); 724 return ENOMEM; 725 } 726 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 727 isn->is_rule = fr; 728 ips.ips_is.is_rule = fr; 729 MUTEX_NUKE(&fr->fr_lock); 730 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 731 732 /* 733 * Look up all the interface names in the rule. 734 */ 735 for (i = 0; i < 4; i++) { 736 name = fr->fr_ifnames[i]; 737 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 738 name = isn->is_ifname[i]; 739 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 740 } 741 742 fr->fr_ref = 0; 743 fr->fr_dsize = 0; 744 fr->fr_data = NULL; 745 fr->fr_type = FR_T_NONE; 746 747 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 748 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 749 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 750 751 /* 752 * send a copy back to userland of what we ended up 753 * to allow for verification. 754 */ 755 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 756 if (error) { 757 KFREE(isn); 758 MUTEX_DESTROY(&fr->fr_lock); 759 KFREE(fr); 760 return EFAULT; 761 } 762 READ_ENTER(&ifs->ifs_ipf_state); 763 fr_stinsert(isn, 0, ifs); 764 MUTEX_EXIT(&isn->is_lock); 765 RWLOCK_EXIT(&ifs->ifs_ipf_state); 766 767 } else { 768 READ_ENTER(&ifs->ifs_ipf_state); 769 for (is = ifs->ifs_ips_list; is; is = is->is_next) 770 if (is->is_rule == fr) { 771 fr_stinsert(isn, 0, ifs); 772 MUTEX_EXIT(&isn->is_lock); 773 break; 774 } 775 776 if (is == NULL) { 777 KFREE(isn); 778 isn = NULL; 779 } 780 RWLOCK_EXIT(&ifs->ifs_ipf_state); 781 782 return (isn == NULL) ? ESRCH : 0; 783 } 784 785 return 0; 786 } 787 788 789 /* ------------------------------------------------------------------------ */ 790 /* Function: fr_stinsert */ 791 /* Returns: Nil */ 792 /* Parameters: is(I) - pointer to state structure */ 793 /* rev(I) - flag indicating forward/reverse direction of packet */ 794 /* */ 795 /* Inserts a state structure into the hash table (for lookups) and the list */ 796 /* of state entries (for enumeration). Resolves all of the interface names */ 797 /* to pointers and adjusts running stats for the hash table as appropriate. */ 798 /* */ 799 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 800 /* Exits with is_lock initialised and held. */ 801 /* ------------------------------------------------------------------------ */ 802 void fr_stinsert(is, rev, ifs) 803 ipstate_t *is; 804 int rev; 805 ipf_stack_t *ifs; 806 { 807 frentry_t *fr; 808 u_int hv; 809 int i; 810 811 MUTEX_INIT(&is->is_lock, "ipf state entry"); 812 813 fr = is->is_rule; 814 if (fr != NULL) { 815 MUTEX_ENTER(&fr->fr_lock); 816 fr->fr_ref++; 817 fr->fr_statecnt++; 818 MUTEX_EXIT(&fr->fr_lock); 819 } 820 821 /* 822 * Look up all the interface names in the state entry. 823 */ 824 for (i = 0; i < 4; i++) { 825 if (is->is_ifp[i] != NULL) 826 continue; 827 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 828 } 829 830 /* 831 * If we could trust is_hv, then the modulous would not be needed, but 832 * when running with IPFILTER_SYNC, this stops bad values. 833 */ 834 hv = is->is_hv % ifs->ifs_fr_statesize; 835 is->is_hv = hv; 836 837 /* 838 * We need to get both of these locks...the first because it is 839 * possible that once the insert is complete another packet might 840 * come along, match the entry and want to update it. 841 */ 842 MUTEX_ENTER(&is->is_lock); 843 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 844 845 /* 846 * add into list table. 847 */ 848 if (ifs->ifs_ips_list != NULL) 849 ifs->ifs_ips_list->is_pnext = &is->is_next; 850 is->is_pnext = &ifs->ifs_ips_list; 851 is->is_next = ifs->ifs_ips_list; 852 ifs->ifs_ips_list = is; 853 854 if (ifs->ifs_ips_table[hv] != NULL) 855 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 856 else 857 ifs->ifs_ips_stats.iss_inuse++; 858 is->is_phnext = ifs->ifs_ips_table + hv; 859 is->is_hnext = ifs->ifs_ips_table[hv]; 860 ifs->ifs_ips_table[hv] = is; 861 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 862 ifs->ifs_ips_num++; 863 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 864 865 fr_setstatequeue(is, rev, ifs); 866 } 867 868 /* ------------------------------------------------------------------------ */ 869 /* Function: fr_match_ipv4addrs */ 870 /* Returns: int - 2 strong match (same addresses, same direction) */ 871 /* 1 weak match (same address, opposite direction) */ 872 /* 0 no match */ 873 /* */ 874 /* Function matches IPv4 addresses. */ 875 /* ------------------------------------------------------------------------ */ 876 static int fr_match_ipv4addrs(is1, is2) 877 ipstate_t *is1; 878 ipstate_t *is2; 879 { 880 int rv; 881 882 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 883 rv = 2; 884 else if (is1->is_saddr == is2->is_daddr && 885 is1->is_daddr == is2->is_saddr) 886 rv = 1; 887 else 888 rv = 0; 889 890 return (rv); 891 } 892 893 /* ------------------------------------------------------------------------ */ 894 /* Function: fr_match_ipv6addrs */ 895 /* Returns: int - 2 strong match (same addresses, same direction) */ 896 /* 1 weak match (same addresses, opposite direction) */ 897 /* 0 no match */ 898 /* */ 899 /* Function matches IPv6 addresses. */ 900 /* ------------------------------------------------------------------------ */ 901 static int fr_match_ipv6addrs(is1, is2) 902 ipstate_t *is1; 903 ipstate_t *is2; 904 { 905 int rv; 906 907 if (IP6_EQ(&is1->is_src, &is2->is_src) && 908 IP6_EQ(&is1->is_dst, &is2->is_dst)) 909 rv = 2; 910 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 911 IP6_EQ(&is1->is_dst, &is2->is_src)) { 912 rv = 1; 913 } 914 else 915 rv = 0; 916 917 return (rv); 918 } 919 /* ------------------------------------------------------------------------ */ 920 /* Function: fr_match_addresses */ 921 /* Returns: int - 2 strong match (same addresses, same direction) */ 922 /* 1 weak match (same address, opposite directions) */ 923 /* 0 no match */ 924 /* Parameters: is1, is2 pointers to states we are checking */ 925 /* */ 926 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 927 /* and IPv6 address format. */ 928 /* ------------------------------------------------------------------------ */ 929 static int fr_match_addresses(is1, is2) 930 ipstate_t *is1; 931 ipstate_t *is2; 932 { 933 int rv; 934 935 if (is1->is_v == 4) { 936 rv = fr_match_ipv4addrs(is1, is2); 937 } else { 938 rv = fr_match_ipv6addrs(is1, is2); 939 } 940 941 return (rv); 942 } 943 944 /* ------------------------------------------------------------------------ */ 945 /* Function: fr_match_ppairs */ 946 /* Returns: int - 2 strong match (same ports, same direction) */ 947 /* 1 weak match (same ports, different direction) */ 948 /* 0 no match */ 949 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 950 /* */ 951 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 952 /* src, dst port, which belong to session (state entry). */ 953 /* ------------------------------------------------------------------------ */ 954 static int fr_match_ppairs(ppairs1, ppairs2) 955 port_pair_t *ppairs1; 956 port_pair_t *ppairs2; 957 { 958 int rv; 959 960 if (ppairs1->pp_sport == ppairs2->pp_sport && 961 ppairs1->pp_dport == ppairs2->pp_dport) 962 rv = 2; 963 else if (ppairs1->pp_sport == ppairs2->pp_dport && 964 ppairs1->pp_dport == ppairs2->pp_sport) 965 rv = 1; 966 else 967 rv = 0; 968 969 return (rv); 970 } 971 972 /* ------------------------------------------------------------------------ */ 973 /* Function: fr_match_l4_hdr */ 974 /* Returns: int - 0 no match, */ 975 /* 1 weak match (same ports, different directions) */ 976 /* 2 strong match (same ports, same direction) */ 977 /* Parameters is1, is2 - states we want to match */ 978 /* */ 979 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 980 /* GRE protocol). */ 981 /* ------------------------------------------------------------------------ */ 982 static int fr_match_l4_hdr(is1, is2) 983 ipstate_t *is1; 984 ipstate_t *is2; 985 { 986 int rv = 0; 987 port_pair_t pp1; 988 port_pair_t pp2; 989 990 if (is1->is_p != is2->is_p) 991 return (0); 992 993 switch (is1->is_p) { 994 case IPPROTO_TCP: 995 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 996 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 997 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 998 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 999 rv = fr_match_ppairs(&pp1, &pp2); 1000 break; 1001 case IPPROTO_UDP: 1002 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1003 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1004 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1005 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1006 rv = fr_match_ppairs(&pp1, &pp2); 1007 break; 1008 case IPPROTO_GRE: 1009 /* greinfo_t can be also interprted as port pair */ 1010 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1011 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1012 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1013 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1014 rv = fr_match_ppairs(&pp1, &pp2); 1015 break; 1016 case IPPROTO_ICMP: 1017 case IPPROTO_ICMPV6: 1018 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1019 rv = 1; 1020 else 1021 rv = 0; 1022 break; 1023 default: 1024 rv = 0; 1025 } 1026 1027 return (rv); 1028 } 1029 1030 /* ------------------------------------------------------------------------ */ 1031 /* Function: fr_matchstates */ 1032 /* Returns: int - nonzero match, zero no match */ 1033 /* Parameters is1, is2 - states we want to match */ 1034 /* */ 1035 /* The state entries are equal (identical match) if they belong to the same */ 1036 /* session. Any time new state entry is being added the fr_addstate() */ 1037 /* function creates temporal state entry from the data it gets from IP and */ 1038 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1039 /* which is also stored within the state entry. We should keep in mind the */ 1040 /* information about packet direction is spread accross L3 (addresses) and */ 1041 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1042 /* - no match (match(is1, is2) == 0)) */ 1043 /* - weak match same addresses (ports), but different */ 1044 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1045 /* - strong match same addresses (ports) and same directions */ 1046 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1047 /* */ 1048 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1049 /* and functions, which are used to compare ports (L4 header) data. We say */ 1050 /* the is1 and is2 are same (identical) if there is a match */ 1051 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1052 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1053 /* Such requirement deals with case as follows: */ 1054 /* suppose there are two connections between hosts A, B. Connection 1: */ 1055 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1056 /* Connection 2: */ 1057 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1058 /* since we've introduced match levels into our fr_matchstates(), we are */ 1059 /* able to identify, which packets belong to connection A and which belong */ 1060 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1061 /* from con. 1 packet, which travelled from A to B: */ 1062 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1063 /* while s2, has been created from packet which belongs to con. 2 and is */ 1064 /* also coming from A to B: */ 1065 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1066 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1067 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1068 /* different the state entries are not identical -> no match as a final */ 1069 /* result. */ 1070 /* ------------------------------------------------------------------------ */ 1071 static int fr_matchstates(is1, is2) 1072 ipstate_t *is1; 1073 ipstate_t *is2; 1074 { 1075 int rv; 1076 int amatch; 1077 int pmatch; 1078 1079 if (bcmp(&is1->is_pass, &is2->is_pass, 1080 offsetof(struct ipstate, is_ps) - 1081 offsetof(struct ipstate, is_pass)) == 0) { 1082 1083 pmatch = fr_match_l4_hdr(is1, is2); 1084 amatch = fr_match_addresses(is1, is2); 1085 /* 1086 * If addresses match (amatch != 0), then 'match levels' 1087 * must be same for matching entries. If amatch and pmatch 1088 * have different values (different match levels), then 1089 * is1 and is2 belong to different sessions. 1090 */ 1091 rv = (amatch != 0) && (amatch == pmatch); 1092 } 1093 else 1094 rv = 0; 1095 1096 return (rv); 1097 } 1098 1099 /* ------------------------------------------------------------------------ */ 1100 /* Function: fr_addstate */ 1101 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1102 /* Parameters: fin(I) - pointer to packet information */ 1103 /* stsave(O) - pointer to place to save pointer to created */ 1104 /* state structure. */ 1105 /* flags(I) - flags to use when creating the structure */ 1106 /* */ 1107 /* Creates a new IP state structure from the packet information collected. */ 1108 /* Inserts it into the state table and appends to the bottom of the active */ 1109 /* list. If the capacity of the table has reached the maximum allowed then */ 1110 /* the call will fail and a flush is scheduled for the next timeout call. */ 1111 /* ------------------------------------------------------------------------ */ 1112 ipstate_t *fr_addstate(fin, stsave, flags) 1113 fr_info_t *fin; 1114 ipstate_t **stsave; 1115 u_int flags; 1116 { 1117 ipstate_t *is, ips; 1118 struct icmp *ic; 1119 u_int pass, hv; 1120 frentry_t *fr; 1121 tcphdr_t *tcp; 1122 grehdr_t *gre; 1123 void *ifp; 1124 int out; 1125 ipf_stack_t *ifs = fin->fin_ifs; 1126 1127 if (ifs->ifs_fr_state_lock || 1128 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1129 return NULL; 1130 1131 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1132 return NULL; 1133 1134 /* 1135 * Trigger automatic call to fr_state_flush() if the 1136 * table has reached capacity specified by hi watermark. 1137 */ 1138 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1139 ifs->ifs_fr_state_doflush = 1; 1140 1141 /* 1142 * If the max number of state entries has been reached, and there is no 1143 * limit on the state count for the rule, then do not continue. In the 1144 * case where a limit exists, it's ok allow the entries to be created as 1145 * long as specified limit itself has not been reached. 1146 * 1147 * Note that because the lock isn't held on fr, it is possible to exceed 1148 * the specified size of the table. However, the cost of this is being 1149 * ignored here; as the number by which it can go over is a product of 1150 * the number of simultaneous threads that could be executing in here. 1151 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1152 * 1153 * Also note that, since the automatic flush should have been triggered 1154 * well before we reach the maximum number of state table entries, the 1155 * likelihood of reaching the max (and thus exceedng it) is minimal. 1156 */ 1157 fr = fin->fin_fr; 1158 if (fr != NULL) { 1159 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1160 (fr->fr_statemax == 0)) { 1161 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1162 return NULL; 1163 } 1164 if ((fr->fr_statemax != 0) && 1165 (fr->fr_statecnt >= fr->fr_statemax)) { 1166 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1167 ifs->ifs_fr_state_doflush = 1; 1168 return NULL; 1169 } 1170 } 1171 1172 ic = NULL; 1173 tcp = NULL; 1174 out = fin->fin_out; 1175 is = &ips; 1176 bzero((char *)is, sizeof(*is)); 1177 1178 if (fr == NULL) { 1179 pass = ifs->ifs_fr_flags; 1180 is->is_tag = FR_NOLOGTAG; 1181 } else { 1182 pass = fr->fr_flags; 1183 } 1184 1185 is->is_die = 1 + ifs->ifs_fr_ticks; 1186 /* 1187 * We want to check everything that is a property of this packet, 1188 * but we don't (automatically) care about it's fragment status as 1189 * this may change. 1190 */ 1191 is->is_pass = pass; 1192 is->is_v = fin->fin_v; 1193 is->is_opt[0] = fin->fin_optmsk; 1194 is->is_optmsk[0] = 0xffffffff; 1195 is->is_optmsk[1] = 0xffffffff; 1196 if (is->is_v == 6) { 1197 is->is_opt[0] &= ~0x8; 1198 is->is_optmsk[0] &= ~0x8; 1199 is->is_optmsk[1] &= ~0x8; 1200 } 1201 is->is_sec = fin->fin_secmsk; 1202 is->is_secmsk = 0xffff; 1203 is->is_auth = fin->fin_auth; 1204 is->is_authmsk = 0xffff; 1205 1206 /* 1207 * Copy and calculate... 1208 */ 1209 hv = (is->is_p = fin->fin_fi.fi_p); 1210 is->is_src = fin->fin_fi.fi_src; 1211 hv += is->is_saddr; 1212 is->is_dst = fin->fin_fi.fi_dst; 1213 hv += is->is_daddr; 1214 #ifdef USE_INET6 1215 if (fin->fin_v == 6) { 1216 /* 1217 * For ICMPv6, we check to see if the destination address is 1218 * a multicast address. If it is, do not include it in the 1219 * calculation of the hash because the correct reply will come 1220 * back from a real address, not a multicast address. 1221 */ 1222 if ((is->is_p == IPPROTO_ICMPV6) && 1223 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1224 /* 1225 * So you can do keep state with neighbour discovery. 1226 * 1227 * Here we could use the address from the neighbour 1228 * solicit message to put in the state structure and 1229 * we could use that without a wildcard flag too... 1230 */ 1231 is->is_flags |= SI_W_DADDR; 1232 hv -= is->is_daddr; 1233 } else { 1234 hv += is->is_dst.i6[1]; 1235 hv += is->is_dst.i6[2]; 1236 hv += is->is_dst.i6[3]; 1237 } 1238 hv += is->is_src.i6[1]; 1239 hv += is->is_src.i6[2]; 1240 hv += is->is_src.i6[3]; 1241 } 1242 #endif 1243 if ((fin->fin_v == 4) && 1244 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1245 if (fin->fin_out == 0) { 1246 flags |= SI_W_DADDR|SI_CLONE; 1247 hv -= is->is_daddr; 1248 } else { 1249 flags |= SI_W_SADDR|SI_CLONE; 1250 hv -= is->is_saddr; 1251 } 1252 } 1253 1254 switch (is->is_p) 1255 { 1256 #ifdef USE_INET6 1257 case IPPROTO_ICMPV6 : 1258 ic = fin->fin_dp; 1259 1260 switch (ic->icmp_type) 1261 { 1262 case ICMP6_ECHO_REQUEST : 1263 is->is_icmp.ici_type = ic->icmp_type; 1264 hv += (is->is_icmp.ici_id = ic->icmp_id); 1265 break; 1266 case ICMP6_MEMBERSHIP_QUERY : 1267 case ND_ROUTER_SOLICIT : 1268 case ND_NEIGHBOR_SOLICIT : 1269 case ICMP6_NI_QUERY : 1270 is->is_icmp.ici_type = ic->icmp_type; 1271 break; 1272 default : 1273 return NULL; 1274 } 1275 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1276 break; 1277 #endif 1278 case IPPROTO_ICMP : 1279 ic = fin->fin_dp; 1280 1281 switch (ic->icmp_type) 1282 { 1283 case ICMP_ECHO : 1284 case ICMP_TSTAMP : 1285 case ICMP_IREQ : 1286 case ICMP_MASKREQ : 1287 is->is_icmp.ici_type = ic->icmp_type; 1288 hv += (is->is_icmp.ici_id = ic->icmp_id); 1289 break; 1290 default : 1291 return NULL; 1292 } 1293 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1294 break; 1295 1296 case IPPROTO_GRE : 1297 gre = fin->fin_dp; 1298 1299 is->is_gre.gs_flags = gre->gr_flags; 1300 is->is_gre.gs_ptype = gre->gr_ptype; 1301 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1302 is->is_call[0] = fin->fin_data[0]; 1303 is->is_call[1] = fin->fin_data[1]; 1304 } 1305 break; 1306 1307 case IPPROTO_TCP : 1308 tcp = fin->fin_dp; 1309 1310 if (tcp->th_flags & TH_RST) 1311 return NULL; 1312 /* 1313 * The endian of the ports doesn't matter, but the ack and 1314 * sequence numbers do as we do mathematics on them later. 1315 */ 1316 is->is_sport = htons(fin->fin_data[0]); 1317 is->is_dport = htons(fin->fin_data[1]); 1318 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1319 hv += is->is_sport; 1320 hv += is->is_dport; 1321 } 1322 1323 /* 1324 * If this is a real packet then initialise fields in the 1325 * state information structure from the TCP header information. 1326 */ 1327 1328 is->is_maxdwin = 1; 1329 is->is_maxswin = ntohs(tcp->th_win); 1330 if (is->is_maxswin == 0) 1331 is->is_maxswin = 1; 1332 1333 if ((fin->fin_flx & FI_IGNORE) == 0) { 1334 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1335 (TCP_OFF(tcp) << 2) + 1336 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1337 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1338 is->is_maxsend = is->is_send; 1339 1340 /* 1341 * Window scale option is only present in 1342 * SYN/SYN-ACK packet. 1343 */ 1344 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1345 TH_SYN && 1346 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1347 if (fr_tcpoptions(fin, tcp, 1348 &is->is_tcp.ts_data[0]) == -1) { 1349 fin->fin_flx |= FI_BAD; 1350 } 1351 } 1352 1353 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1354 fr_checknewisn(fin, is); 1355 fr_fixoutisn(fin, is); 1356 } 1357 1358 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1359 flags |= IS_TCPFSM; 1360 else { 1361 is->is_maxdwin = is->is_maxswin * 2; 1362 is->is_dend = ntohl(tcp->th_ack); 1363 is->is_maxdend = ntohl(tcp->th_ack); 1364 is->is_maxdwin *= 2; 1365 } 1366 } 1367 1368 /* 1369 * If we're creating state for a starting connection, start the 1370 * timer on it as we'll never see an error if it fails to 1371 * connect. 1372 */ 1373 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1374 break; 1375 1376 case IPPROTO_UDP : 1377 tcp = fin->fin_dp; 1378 1379 is->is_sport = htons(fin->fin_data[0]); 1380 is->is_dport = htons(fin->fin_data[1]); 1381 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1382 hv += tcp->th_dport; 1383 hv += tcp->th_sport; 1384 } 1385 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1386 break; 1387 1388 default : 1389 break; 1390 } 1391 hv = DOUBLE_HASH(hv, ifs); 1392 is->is_hv = hv; 1393 is->is_rule = fr; 1394 is->is_flags = flags & IS_INHERITED; 1395 1396 /* 1397 * Look for identical state. 1398 */ 1399 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1400 is != NULL; 1401 is = is->is_hnext) { 1402 if (fr_matchstates(&ips, is) == 1) 1403 break; 1404 } 1405 1406 /* 1407 * we've found a matching state -> state already exists, 1408 * we are not going to add a duplicate record. 1409 */ 1410 if (is != NULL) 1411 return NULL; 1412 1413 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1414 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1415 return NULL; 1416 } 1417 KMALLOC(is, ipstate_t *); 1418 if (is == NULL) { 1419 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1420 return NULL; 1421 } 1422 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1423 /* 1424 * Do not do the modulous here, it is done in fr_stinsert(). 1425 */ 1426 if (fr != NULL) { 1427 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1428 if (fr->fr_age[0] != 0) { 1429 is->is_tqehead[0] = 1430 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1431 fr->fr_age[0], ifs); 1432 is->is_sti.tqe_flags |= TQE_RULEBASED; 1433 } 1434 if (fr->fr_age[1] != 0) { 1435 is->is_tqehead[1] = 1436 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1437 fr->fr_age[1], ifs); 1438 is->is_sti.tqe_flags |= TQE_RULEBASED; 1439 } 1440 is->is_tag = fr->fr_logtag; 1441 1442 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1443 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1444 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1445 1446 if (((ifp = fr->fr_ifas[1]) != NULL) && 1447 (ifp != (void *)-1)) { 1448 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1449 } 1450 if (((ifp = fr->fr_ifas[2]) != NULL) && 1451 (ifp != (void *)-1)) { 1452 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1453 } 1454 if (((ifp = fr->fr_ifas[3]) != NULL) && 1455 (ifp != (void *)-1)) { 1456 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1457 } 1458 } 1459 1460 is->is_ifp[out << 1] = fin->fin_ifp; 1461 if (fin->fin_ifp != NULL) { 1462 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1463 } 1464 1465 is->is_ref = 1; 1466 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1467 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1468 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1469 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1470 if ((fin->fin_flx & FI_IGNORE) == 0) { 1471 is->is_pkts[out] = 1; 1472 is->is_bytes[out] = fin->fin_plen; 1473 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1474 is->is_flx[out][0] &= ~FI_OOW; 1475 } 1476 1477 if (pass & FR_STSTRICT) 1478 is->is_flags |= IS_STRICT; 1479 1480 if (pass & FR_STATESYNC) 1481 is->is_flags |= IS_STATESYNC; 1482 1483 if (flags & (SI_WILDP|SI_WILDA)) { 1484 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1485 } 1486 is->is_rulen = fin->fin_rule; 1487 1488 1489 if (pass & FR_LOGFIRST) 1490 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1491 1492 READ_ENTER(&ifs->ifs_ipf_state); 1493 is->is_me = stsave; 1494 1495 fr_stinsert(is, fin->fin_rev, ifs); 1496 1497 if (fin->fin_p == IPPROTO_TCP) { 1498 /* 1499 * If we're creating state for a starting connection, start the 1500 * timer on it as we'll never see an error if it fails to 1501 * connect. 1502 */ 1503 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1504 is->is_flags); 1505 MUTEX_EXIT(&is->is_lock); 1506 #ifdef IPFILTER_SCAN 1507 if ((is->is_flags & SI_CLONE) == 0) 1508 (void) ipsc_attachis(is); 1509 #endif 1510 } else { 1511 MUTEX_EXIT(&is->is_lock); 1512 } 1513 #ifdef IPFILTER_SYNC 1514 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1515 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1516 #endif 1517 if (ifs->ifs_ipstate_logging) 1518 ipstate_log(is, ISL_NEW, ifs); 1519 1520 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1521 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1522 fin->fin_flx |= FI_STATE; 1523 if (fin->fin_flx & FI_FRAG) 1524 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1525 1526 return is; 1527 } 1528 1529 1530 /* ------------------------------------------------------------------------ */ 1531 /* Function: fr_tcpoptions */ 1532 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1533 /* Parameters: fin(I) - pointer to packet information */ 1534 /* tcp(I) - pointer to TCP packet header */ 1535 /* td(I) - pointer to TCP data held as part of the state */ 1536 /* */ 1537 /* Look after the TCP header for any options and deal with those that are */ 1538 /* present. Record details about those that we recogise. */ 1539 /* ------------------------------------------------------------------------ */ 1540 static int fr_tcpoptions(fin, tcp, td) 1541 fr_info_t *fin; 1542 tcphdr_t *tcp; 1543 tcpdata_t *td; 1544 { 1545 int off, mlen, ol, i, len, retval; 1546 char buf[64], *s, opt; 1547 mb_t *m = NULL; 1548 1549 len = (TCP_OFF(tcp) << 2); 1550 if (fin->fin_dlen < len) 1551 return 0; 1552 len -= sizeof(*tcp); 1553 1554 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1555 1556 m = fin->fin_m; 1557 mlen = MSGDSIZE(m) - off; 1558 if (len > mlen) { 1559 len = mlen; 1560 retval = 0; 1561 } else { 1562 retval = 1; 1563 } 1564 1565 COPYDATA(m, off, len, buf); 1566 1567 for (s = buf; len > 0; ) { 1568 opt = *s; 1569 if (opt == TCPOPT_EOL) 1570 break; 1571 else if (opt == TCPOPT_NOP) 1572 ol = 1; 1573 else { 1574 if (len < 2) 1575 break; 1576 ol = (int)*(s + 1); 1577 if (ol < 2 || ol > len) 1578 break; 1579 1580 /* 1581 * Extract the TCP options we are interested in out of 1582 * the header and store them in the the tcpdata struct. 1583 */ 1584 switch (opt) 1585 { 1586 case TCPOPT_WINDOW : 1587 if (ol == TCPOLEN_WINDOW) { 1588 i = (int)*(s + 2); 1589 if (i > TCP_WSCALE_MAX) 1590 i = TCP_WSCALE_MAX; 1591 else if (i < 0) 1592 i = 0; 1593 td->td_winscale = i; 1594 td->td_winflags |= TCP_WSCALE_SEEN | 1595 TCP_WSCALE_FIRST; 1596 } else 1597 retval = -1; 1598 break; 1599 case TCPOPT_MAXSEG : 1600 /* 1601 * So, if we wanted to set the TCP MAXSEG, 1602 * it should be done here... 1603 */ 1604 if (ol == TCPOLEN_MAXSEG) { 1605 i = (int)*(s + 2); 1606 i <<= 8; 1607 i += (int)*(s + 3); 1608 td->td_maxseg = i; 1609 } else 1610 retval = -1; 1611 break; 1612 case TCPOPT_SACK_PERMITTED : 1613 if (ol == TCPOLEN_SACK_PERMITTED) 1614 td->td_winflags |= TCP_SACK_PERMIT; 1615 else 1616 retval = -1; 1617 break; 1618 } 1619 } 1620 len -= ol; 1621 s += ol; 1622 } 1623 return retval; 1624 } 1625 1626 1627 /* ------------------------------------------------------------------------ */ 1628 /* Function: fr_tcpstate */ 1629 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1630 /* Parameters: fin(I) - pointer to packet information */ 1631 /* tcp(I) - pointer to TCP packet header */ 1632 /* is(I) - pointer to master state structure */ 1633 /* */ 1634 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1635 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1636 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1637 /* ------------------------------------------------------------------------ */ 1638 static int fr_tcpstate(fin, tcp, is) 1639 fr_info_t *fin; 1640 tcphdr_t *tcp; 1641 ipstate_t *is; 1642 { 1643 int source, ret = 0, flags; 1644 tcpdata_t *fdata, *tdata; 1645 ipf_stack_t *ifs = fin->fin_ifs; 1646 1647 source = !fin->fin_rev; 1648 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1649 (ntohs(is->is_sport) != fin->fin_data[0])) 1650 source = 0; 1651 fdata = &is->is_tcp.ts_data[!source]; 1652 tdata = &is->is_tcp.ts_data[source]; 1653 1654 MUTEX_ENTER(&is->is_lock); 1655 1656 /* 1657 * If a SYN packet is received for a connection that is in a half 1658 * closed state, then move its state entry to deletetq. In such case 1659 * the SYN packet will be consequently dropped. This allows new state 1660 * entry to be created with a retransmited SYN packet. 1661 */ 1662 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1663 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1664 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1665 is->is_state[source] = IPF_TCPS_CLOSED; 1666 is->is_state[!source] = IPF_TCPS_CLOSED; 1667 /* 1668 * Do not update is->is_sti.tqe_die in case state entry 1669 * is already present in deletetq. It prevents state 1670 * entry ttl update by retransmitted SYN packets, which 1671 * may arrive before timer tick kicks off. The SYN 1672 * packet will be dropped again. 1673 */ 1674 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1675 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1676 &fin->fin_ifs->ifs_ips_deletetq, 1677 fin->fin_ifs); 1678 1679 MUTEX_EXIT(&is->is_lock); 1680 return 0; 1681 } 1682 } 1683 1684 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1685 #ifdef IPFILTER_SCAN 1686 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1687 ipsc_packet(fin, is); 1688 if (FR_ISBLOCK(is->is_pass)) { 1689 MUTEX_EXIT(&is->is_lock); 1690 return 1; 1691 } 1692 } 1693 #endif 1694 1695 /* 1696 * Nearing end of connection, start timeout. 1697 */ 1698 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1699 is->is_flags); 1700 if (ret == 0) { 1701 MUTEX_EXIT(&is->is_lock); 1702 return 0; 1703 } 1704 1705 /* 1706 * set s0's as appropriate. Use syn-ack packet as it 1707 * contains both pieces of required information. 1708 */ 1709 /* 1710 * Window scale option is only present in SYN/SYN-ACK packet. 1711 * Compare with ~TH_FIN to mask out T/TCP setups. 1712 */ 1713 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1714 if (flags == (TH_SYN|TH_ACK)) { 1715 is->is_s0[source] = ntohl(tcp->th_ack); 1716 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1717 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1718 (void) fr_tcpoptions(fin, tcp, fdata); 1719 } 1720 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1721 fr_checknewisn(fin, is); 1722 } else if (flags == TH_SYN) { 1723 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1724 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1725 (void) fr_tcpoptions(fin, tcp, tdata); 1726 1727 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1728 fr_checknewisn(fin, is); 1729 1730 } 1731 ret = 1; 1732 } else 1733 fin->fin_flx |= FI_OOW; 1734 MUTEX_EXIT(&is->is_lock); 1735 return ret; 1736 } 1737 1738 1739 /* ------------------------------------------------------------------------ */ 1740 /* Function: fr_checknewisn */ 1741 /* Returns: Nil */ 1742 /* Parameters: fin(I) - pointer to packet information */ 1743 /* is(I) - pointer to master state structure */ 1744 /* */ 1745 /* Check to see if this TCP connection is expecting and needs a new */ 1746 /* sequence number for a particular direction of the connection. */ 1747 /* */ 1748 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1749 /* one ready. */ 1750 /* ------------------------------------------------------------------------ */ 1751 static void fr_checknewisn(fin, is) 1752 fr_info_t *fin; 1753 ipstate_t *is; 1754 { 1755 u_32_t sumd, old, new; 1756 tcphdr_t *tcp; 1757 int i; 1758 1759 i = fin->fin_rev; 1760 tcp = fin->fin_dp; 1761 1762 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1763 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1764 old = ntohl(tcp->th_seq); 1765 new = fr_newisn(fin); 1766 is->is_isninc[i] = new - old; 1767 CALC_SUMD(old, new, sumd); 1768 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1769 1770 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1771 } 1772 } 1773 1774 1775 /* ------------------------------------------------------------------------ */ 1776 /* Function: fr_tcpinwindow */ 1777 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1778 /* Parameters: fin(I) - pointer to packet information */ 1779 /* fdata(I) - pointer to tcp state informatio (forward) */ 1780 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1781 /* tcp(I) - pointer to TCP packet header */ 1782 /* */ 1783 /* Given a packet has matched addresses and ports, check to see if it is */ 1784 /* within the TCP data window. In a show of generosity, allow packets that */ 1785 /* are within the window space behind the current sequence # as well. */ 1786 /* ------------------------------------------------------------------------ */ 1787 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1788 fr_info_t *fin; 1789 tcpdata_t *fdata, *tdata; 1790 tcphdr_t *tcp; 1791 int flags; 1792 { 1793 tcp_seq seq, ack, end; 1794 int ackskew, tcpflags; 1795 u_32_t win, maxwin; 1796 int dsize, inseq; 1797 1798 /* 1799 * Find difference between last checked packet and this packet. 1800 */ 1801 tcpflags = tcp->th_flags; 1802 seq = ntohl(tcp->th_seq); 1803 ack = ntohl(tcp->th_ack); 1804 1805 if (tcpflags & TH_SYN) 1806 win = ntohs(tcp->th_win); 1807 else 1808 win = ntohs(tcp->th_win) << fdata->td_winscale; 1809 1810 /* 1811 * win 0 means the receiving endpoint has closed the window, because it 1812 * has not enough memory to receive data from sender. In such case we 1813 * are pretending window size to be 1 to let TCP probe data through. 1814 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1815 * state this accurately, so we have to allow 1 octet (win = 1) even if 1816 * the window is closed (win == 0). 1817 */ 1818 if (win == 0) 1819 win = 1; 1820 1821 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1822 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1823 1824 /* 1825 * if window scaling is present, the scaling is only allowed 1826 * for windows not in the first SYN packet. In that packet the 1827 * window is 65535 to specify the largest window possible 1828 * for receivers not implementing the window scale option. 1829 * Currently, we do not assume TTCP here. That means that 1830 * if we see a second packet from a host (after the initial 1831 * SYN), we can assume that the receiver of the SYN did 1832 * already send back the SYN/ACK (and thus that we know if 1833 * the receiver also does window scaling) 1834 */ 1835 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1836 fdata->td_maxwin = win; 1837 } 1838 1839 end = seq + dsize; 1840 1841 if ((fdata->td_end == 0) && 1842 (!(flags & IS_TCPFSM) || 1843 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1844 /* 1845 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1846 */ 1847 fdata->td_end = end - 1; 1848 fdata->td_maxwin = 1; 1849 fdata->td_maxend = end + win; 1850 } 1851 1852 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1853 ack = tdata->td_end; 1854 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1855 (ack == 0)) { 1856 /* gross hack to get around certain broken tcp stacks */ 1857 ack = tdata->td_end; 1858 } 1859 1860 maxwin = tdata->td_maxwin; 1861 ackskew = tdata->td_end - ack; 1862 1863 /* 1864 * Strict sequencing only allows in-order delivery. 1865 */ 1866 if ((flags & IS_STRICT) != 0) { 1867 if (seq != fdata->td_end) { 1868 DTRACE_PROBE(strict_check); 1869 return 0; 1870 } 1871 } 1872 1873 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1874 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1875 inseq = 0; 1876 DTRACE_PROBE4( 1877 dyn_params, 1878 int, dsize, 1879 int, ackskew, 1880 int, maxwin, 1881 int, win 1882 ); 1883 if ( 1884 #if defined(_KERNEL) 1885 /* 1886 * end <-> s + n 1887 * maxend <-> ack + win 1888 * this is upperbound check 1889 */ 1890 (SEQ_GE(fdata->td_maxend, end)) && 1891 /* 1892 * this is lowerbound check 1893 */ 1894 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1895 #endif 1896 /* XXX what about big packets */ 1897 #define MAXACKWINDOW 66000 1898 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1899 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1900 inseq = 1; 1901 /* 1902 * Microsoft Windows will send the next packet to the right of the 1903 * window if SACK is in use. 1904 */ 1905 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1906 (fdata->td_winflags & TCP_SACK_PERMIT) && 1907 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1908 inseq = 1; 1909 /* 1910 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1911 * response to initial SYN packet, when there is no application 1912 * listeing to on a port, where the SYN packet has came to. 1913 */ 1914 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1915 (ackskew >= -1) && (ackskew <= 1)) { 1916 inseq = 1; 1917 } else if (!(flags & IS_TCPFSM)) { 1918 1919 if (!(fdata->td_winflags & 1920 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1921 /* 1922 * No TCPFSM and no window scaling, so make some 1923 * extra guesses. 1924 */ 1925 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1926 inseq = 1; 1927 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1928 inseq = 1; 1929 } 1930 } 1931 1932 if (inseq) { 1933 /* if ackskew < 0 then this should be due to fragmented 1934 * packets. There is no way to know the length of the 1935 * total packet in advance. 1936 * We do know the total length from the fragment cache though. 1937 * Note however that there might be more sessions with 1938 * exactly the same source and destination parameters in the 1939 * state cache (and source and destination is the only stuff 1940 * that is saved in the fragment cache). Note further that 1941 * some TCP connections in the state cache are hashed with 1942 * sport and dport as well which makes it not worthwhile to 1943 * look for them. 1944 * Thus, when ackskew is negative but still seems to belong 1945 * to this session, we bump up the destinations end value. 1946 */ 1947 if (ackskew < 0) { 1948 DTRACE_PROBE2(end_update_td, 1949 int, tdata->td_end, 1950 int, ack 1951 ); 1952 tdata->td_end = ack; 1953 } 1954 1955 /* update max window seen */ 1956 if (fdata->td_maxwin < win) { 1957 DTRACE_PROBE2(win_update_fd, 1958 int, fdata->td_maxwin, 1959 int, win 1960 ); 1961 fdata->td_maxwin = win; 1962 } 1963 1964 if (SEQ_GT(end, fdata->td_end)) { 1965 DTRACE_PROBE2(end_update_fd, 1966 int, fdata->td_end, 1967 int, end 1968 ); 1969 fdata->td_end = end; 1970 } 1971 1972 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1973 DTRACE_PROBE2(max_end_update_td, 1974 int, tdata->td_maxend, 1975 int, ack + win 1976 ); 1977 tdata->td_maxend = ack + win; 1978 } 1979 1980 return 1; 1981 } 1982 fin->fin_flx |= FI_OOW; 1983 1984 #if defined(_KERNEL) 1985 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1986 fin->fin_flx |= FI_NEG_OOW; 1987 #endif 1988 1989 return 0; 1990 } 1991 1992 1993 /* ------------------------------------------------------------------------ */ 1994 /* Function: fr_stclone */ 1995 /* Returns: ipstate_t* - NULL == cloning failed, */ 1996 /* else pointer to new state structure */ 1997 /* Parameters: fin(I) - pointer to packet information */ 1998 /* tcp(I) - pointer to TCP/UDP header */ 1999 /* is(I) - pointer to master state structure */ 2000 /* */ 2001 /* Create a "duplcate" state table entry from the master. */ 2002 /* ------------------------------------------------------------------------ */ 2003 static ipstate_t *fr_stclone(fin, tcp, is) 2004 fr_info_t *fin; 2005 tcphdr_t *tcp; 2006 ipstate_t *is; 2007 { 2008 ipstate_t *clone; 2009 u_32_t send; 2010 ipf_stack_t *ifs = fin->fin_ifs; 2011 2012 /* 2013 * Trigger automatic call to fr_state_flush() if the 2014 * table has reached capacity specified by hi watermark. 2015 */ 2016 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2017 ifs->ifs_fr_state_doflush = 1; 2018 2019 /* 2020 * If automatic flushing did not do its job, and the table 2021 * has filled up, don't try to create a new entry. A NULL 2022 * return will indicate that the cloning has failed. 2023 */ 2024 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2025 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2026 return NULL; 2027 } 2028 2029 KMALLOC(clone, ipstate_t *); 2030 if (clone == NULL) 2031 return NULL; 2032 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2033 2034 MUTEX_NUKE(&clone->is_lock); 2035 2036 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2037 clone->is_state[0] = 0; 2038 clone->is_state[1] = 0; 2039 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2040 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2041 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2042 2043 if (fin->fin_rev == 1) { 2044 clone->is_dend = send; 2045 clone->is_maxdend = send; 2046 clone->is_send = 0; 2047 clone->is_maxswin = 1; 2048 clone->is_maxdwin = ntohs(tcp->th_win); 2049 if (clone->is_maxdwin == 0) 2050 clone->is_maxdwin = 1; 2051 } else { 2052 clone->is_send = send; 2053 clone->is_maxsend = send; 2054 clone->is_dend = 0; 2055 clone->is_maxdwin = 1; 2056 clone->is_maxswin = ntohs(tcp->th_win); 2057 if (clone->is_maxswin == 0) 2058 clone->is_maxswin = 1; 2059 } 2060 2061 clone->is_flags &= ~SI_CLONE; 2062 clone->is_flags |= SI_CLONED; 2063 fr_stinsert(clone, fin->fin_rev, ifs); 2064 clone->is_ref = 1; 2065 if (clone->is_p == IPPROTO_TCP) { 2066 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2067 clone->is_flags); 2068 } 2069 MUTEX_EXIT(&clone->is_lock); 2070 #ifdef IPFILTER_SCAN 2071 (void) ipsc_attachis(is); 2072 #endif 2073 #ifdef IPFILTER_SYNC 2074 if (is->is_flags & IS_STATESYNC) 2075 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2076 #endif 2077 return clone; 2078 } 2079 2080 2081 /* ------------------------------------------------------------------------ */ 2082 /* Function: fr_matchsrcdst */ 2083 /* Returns: Nil */ 2084 /* Parameters: fin(I) - pointer to packet information */ 2085 /* is(I) - pointer to state structure */ 2086 /* src(I) - pointer to source address */ 2087 /* dst(I) - pointer to destination address */ 2088 /* tcp(I) - pointer to TCP/UDP header */ 2089 /* */ 2090 /* Match a state table entry against an IP packet. The logic below is that */ 2091 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2092 /* still 0 after the test. no match. */ 2093 /* ------------------------------------------------------------------------ */ 2094 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2095 fr_info_t *fin; 2096 ipstate_t *is; 2097 i6addr_t *src, *dst; 2098 tcphdr_t *tcp; 2099 u_32_t cmask; 2100 { 2101 int ret = 0, rev, out, flags, flx = 0, idx; 2102 u_short sp, dp; 2103 u_32_t cflx; 2104 void *ifp; 2105 ipf_stack_t *ifs = fin->fin_ifs; 2106 2107 rev = IP6_NEQ(&is->is_dst, dst); 2108 ifp = fin->fin_ifp; 2109 out = fin->fin_out; 2110 flags = is->is_flags; 2111 sp = 0; 2112 dp = 0; 2113 2114 if (tcp != NULL) { 2115 sp = htons(fin->fin_sport); 2116 dp = ntohs(fin->fin_dport); 2117 } 2118 if (!rev) { 2119 if (tcp != NULL) { 2120 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2121 rev = 1; 2122 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2123 rev = 1; 2124 } 2125 } 2126 2127 idx = (out << 1) + rev; 2128 2129 /* 2130 * If the interface for this 'direction' is set, make sure it matches. 2131 * An interface name that is not set matches any, as does a name of *. 2132 */ 2133 if ((is->is_ifp[idx] == NULL && 2134 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2135 is->is_ifp[idx] == ifp) 2136 ret = 1; 2137 2138 if (ret == 0) 2139 return NULL; 2140 ret = 0; 2141 2142 /* 2143 * Match addresses and ports. 2144 */ 2145 if (rev == 0) { 2146 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2147 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2148 if (tcp) { 2149 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2150 (dp == is->is_dport || flags & SI_W_DPORT)) 2151 ret = 1; 2152 } else { 2153 ret = 1; 2154 } 2155 } 2156 } else { 2157 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2158 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2159 if (tcp) { 2160 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2161 (sp == is->is_dport || flags & SI_W_DPORT)) 2162 ret = 1; 2163 } else { 2164 ret = 1; 2165 } 2166 } 2167 } 2168 2169 if (ret == 0) 2170 return NULL; 2171 2172 /* 2173 * Whether or not this should be here, is questionable, but the aim 2174 * is to get this out of the main line. 2175 */ 2176 if (tcp == NULL) 2177 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2178 2179 /* 2180 * Only one of the source or destination address can be flaged as a 2181 * wildcard. Fill in the missing address, if set. 2182 * For IPv6, if the address being copied in is multicast, then 2183 * don't reset the wild flag - multicast causes it to be set in the 2184 * first place! 2185 */ 2186 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2187 fr_ip_t *fi = &fin->fin_fi; 2188 2189 if ((flags & SI_W_SADDR) != 0) { 2190 if (rev == 0) { 2191 #ifdef USE_INET6 2192 if (is->is_v == 6 && 2193 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2194 /*EMPTY*/; 2195 else 2196 #endif 2197 { 2198 is->is_src = fi->fi_src; 2199 is->is_flags &= ~SI_W_SADDR; 2200 } 2201 } else { 2202 #ifdef USE_INET6 2203 if (is->is_v == 6 && 2204 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2205 /*EMPTY*/; 2206 else 2207 #endif 2208 { 2209 is->is_src = fi->fi_dst; 2210 is->is_flags &= ~SI_W_SADDR; 2211 } 2212 } 2213 } else if ((flags & SI_W_DADDR) != 0) { 2214 if (rev == 0) { 2215 #ifdef USE_INET6 2216 if (is->is_v == 6 && 2217 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2218 /*EMPTY*/; 2219 else 2220 #endif 2221 { 2222 is->is_dst = fi->fi_dst; 2223 is->is_flags &= ~SI_W_DADDR; 2224 } 2225 } else { 2226 #ifdef USE_INET6 2227 if (is->is_v == 6 && 2228 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2229 /*EMPTY*/; 2230 else 2231 #endif 2232 { 2233 is->is_dst = fi->fi_src; 2234 is->is_flags &= ~SI_W_DADDR; 2235 } 2236 } 2237 } 2238 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2239 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2240 } 2241 } 2242 2243 flx = fin->fin_flx & cmask; 2244 cflx = is->is_flx[out][rev]; 2245 2246 /* 2247 * Match up any flags set from IP options. 2248 */ 2249 if ((cflx && (flx != (cflx & cmask))) || 2250 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2251 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2252 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2253 return NULL; 2254 2255 /* 2256 * Only one of the source or destination port can be flagged as a 2257 * wildcard. When filling it in, fill in a copy of the matched entry 2258 * if it has the cloning flag set. 2259 */ 2260 if ((fin->fin_flx & FI_IGNORE) != 0) { 2261 fin->fin_rev = rev; 2262 return is; 2263 } 2264 2265 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2266 if ((flags & SI_CLONE) != 0) { 2267 ipstate_t *clone; 2268 2269 clone = fr_stclone(fin, tcp, is); 2270 if (clone == NULL) 2271 return NULL; 2272 is = clone; 2273 } else { 2274 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2275 } 2276 2277 if ((flags & SI_W_SPORT) != 0) { 2278 if (rev == 0) { 2279 is->is_sport = sp; 2280 is->is_send = ntohl(tcp->th_seq); 2281 } else { 2282 is->is_sport = dp; 2283 is->is_send = ntohl(tcp->th_ack); 2284 } 2285 is->is_maxsend = is->is_send + 1; 2286 } else if ((flags & SI_W_DPORT) != 0) { 2287 if (rev == 0) { 2288 is->is_dport = dp; 2289 is->is_dend = ntohl(tcp->th_ack); 2290 } else { 2291 is->is_dport = sp; 2292 is->is_dend = ntohl(tcp->th_seq); 2293 } 2294 is->is_maxdend = is->is_dend + 1; 2295 } 2296 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2297 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2298 ipstate_log(is, ISL_CLONE, ifs); 2299 } 2300 2301 ret = -1; 2302 2303 if (is->is_flx[out][rev] == 0) { 2304 is->is_flx[out][rev] = flx; 2305 is->is_opt[rev] = fin->fin_optmsk; 2306 if (is->is_v == 6) { 2307 is->is_opt[rev] &= ~0x8; 2308 is->is_optmsk[rev] &= ~0x8; 2309 } 2310 } 2311 2312 /* 2313 * Check if the interface name for this "direction" is set and if not, 2314 * fill it in. 2315 */ 2316 if (is->is_ifp[idx] == NULL && 2317 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2318 is->is_ifp[idx] = ifp; 2319 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2320 } 2321 fin->fin_rev = rev; 2322 return is; 2323 } 2324 2325 2326 /* ------------------------------------------------------------------------ */ 2327 /* Function: fr_checkicmpmatchingstate */ 2328 /* Returns: Nil */ 2329 /* Parameters: fin(I) - pointer to packet information */ 2330 /* */ 2331 /* If we've got an ICMP error message, using the information stored in the */ 2332 /* ICMP packet, look for a matching state table entry. */ 2333 /* */ 2334 /* If we return NULL then no lock on ipf_state is held. */ 2335 /* If we return non-null then a read-lock on ipf_state is held. */ 2336 /* ------------------------------------------------------------------------ */ 2337 static ipstate_t *fr_checkicmpmatchingstate(fin) 2338 fr_info_t *fin; 2339 { 2340 ipstate_t *is, **isp; 2341 u_short sport, dport; 2342 u_char pr; 2343 int backward, i, oi; 2344 i6addr_t dst, src; 2345 struct icmp *ic; 2346 u_short savelen; 2347 icmphdr_t *icmp; 2348 fr_info_t ofin; 2349 tcphdr_t *tcp; 2350 int len; 2351 ip_t *oip; 2352 u_int hv; 2353 ipf_stack_t *ifs = fin->fin_ifs; 2354 2355 /* 2356 * Does it at least have the return (basic) IP header ? 2357 * Is it an actual recognised ICMP error type? 2358 * Only a basic IP header (no options) should be with 2359 * an ICMP error header. 2360 */ 2361 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2362 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2363 !(fin->fin_flx & FI_ICMPERR)) 2364 return NULL; 2365 ic = fin->fin_dp; 2366 2367 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2368 /* 2369 * Check if the at least the old IP header (with options) and 2370 * 8 bytes of payload is present. 2371 */ 2372 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2373 return NULL; 2374 2375 /* 2376 * Sanity Checks. 2377 */ 2378 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2379 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2380 return NULL; 2381 2382 /* 2383 * Is the buffer big enough for all of it ? It's the size of the IP 2384 * header claimed in the encapsulated part which is of concern. It 2385 * may be too big to be in this buffer but not so big that it's 2386 * outside the ICMP packet, leading to TCP deref's causing problems. 2387 * This is possible because we don't know how big oip_hl is when we 2388 * do the pullup early in fr_check() and thus can't guarantee it is 2389 * all here now. 2390 */ 2391 #ifdef _KERNEL 2392 { 2393 mb_t *m; 2394 2395 m = fin->fin_m; 2396 # if defined(MENTAT) 2397 if ((char *)oip + len > (char *)m->b_wptr) 2398 return NULL; 2399 # else 2400 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2401 return NULL; 2402 # endif 2403 } 2404 #endif 2405 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2406 2407 /* 2408 * in the IPv4 case we must zero the i6addr union otherwise 2409 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2410 * of the 'junk' in the unused part of the union 2411 */ 2412 bzero((char *)&src, sizeof(src)); 2413 bzero((char *)&dst, sizeof(dst)); 2414 2415 /* 2416 * we make an fin entry to be able to feed it to 2417 * matchsrcdst note that not all fields are encessary 2418 * but this is the cleanest way. Note further we fill 2419 * in fin_mp such that if someone uses it we'll get 2420 * a kernel panic. fr_matchsrcdst does not use this. 2421 * 2422 * watch out here, as ip is in host order and oip in network 2423 * order. Any change we make must be undone afterwards, like 2424 * oip->ip_off - it is still in network byte order so fix it. 2425 */ 2426 savelen = oip->ip_len; 2427 oip->ip_len = len; 2428 oip->ip_off = ntohs(oip->ip_off); 2429 2430 ofin.fin_flx = FI_NOCKSUM; 2431 ofin.fin_v = 4; 2432 ofin.fin_ip = oip; 2433 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2434 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2435 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2436 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2437 ofin.fin_ifp = fin->fin_ifp; 2438 ofin.fin_out = !fin->fin_out; 2439 /* 2440 * Reset the short and bad flag here because in fr_matchsrcdst() 2441 * the flags for the current packet (fin_flx) are compared against 2442 * those for the existing session. 2443 */ 2444 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2445 2446 /* 2447 * Put old values of ip_len and ip_off back as we don't know 2448 * if we have to forward the packet (or process it again. 2449 */ 2450 oip->ip_len = savelen; 2451 oip->ip_off = htons(oip->ip_off); 2452 2453 switch (oip->ip_p) 2454 { 2455 case IPPROTO_ICMP : 2456 /* 2457 * an ICMP error can only be generated as a result of an 2458 * ICMP query, not as the response on an ICMP error 2459 * 2460 * XXX theoretically ICMP_ECHOREP and the other reply's are 2461 * ICMP query's as well, but adding them here seems strange XXX 2462 */ 2463 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2464 return NULL; 2465 2466 /* 2467 * perform a lookup of the ICMP packet in the state table 2468 */ 2469 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2470 hv = (pr = oip->ip_p); 2471 src.in4 = oip->ip_src; 2472 hv += src.in4.s_addr; 2473 dst.in4 = oip->ip_dst; 2474 hv += dst.in4.s_addr; 2475 hv += icmp->icmp_id; 2476 hv = DOUBLE_HASH(hv, ifs); 2477 2478 READ_ENTER(&ifs->ifs_ipf_state); 2479 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2480 isp = &is->is_hnext; 2481 if ((is->is_p != pr) || (is->is_v != 4)) 2482 continue; 2483 if (is->is_pass & FR_NOICMPERR) 2484 continue; 2485 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2486 NULL, FI_ICMPCMP); 2487 if (is != NULL) { 2488 if ((is->is_pass & FR_NOICMPERR) != 0) { 2489 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2490 return NULL; 2491 } 2492 /* 2493 * i : the index of this packet (the icmp 2494 * unreachable) 2495 * oi : the index of the original packet found 2496 * in the icmp header (i.e. the packet 2497 * causing this icmp) 2498 * backward : original packet was backward 2499 * compared to the state 2500 */ 2501 backward = IP6_NEQ(&is->is_src, &src); 2502 fin->fin_rev = !backward; 2503 i = (!backward << 1) + fin->fin_out; 2504 oi = (backward << 1) + ofin.fin_out; 2505 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2506 continue; 2507 ifs->ifs_ips_stats.iss_hits++; 2508 is->is_icmppkts[i]++; 2509 return is; 2510 } 2511 } 2512 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2513 return NULL; 2514 case IPPROTO_TCP : 2515 case IPPROTO_UDP : 2516 break; 2517 default : 2518 return NULL; 2519 } 2520 2521 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2522 dport = tcp->th_dport; 2523 sport = tcp->th_sport; 2524 2525 hv = (pr = oip->ip_p); 2526 src.in4 = oip->ip_src; 2527 hv += src.in4.s_addr; 2528 dst.in4 = oip->ip_dst; 2529 hv += dst.in4.s_addr; 2530 hv += dport; 2531 hv += sport; 2532 hv = DOUBLE_HASH(hv, ifs); 2533 2534 READ_ENTER(&ifs->ifs_ipf_state); 2535 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2536 isp = &is->is_hnext; 2537 /* 2538 * Only allow this icmp though if the 2539 * encapsulated packet was allowed through the 2540 * other way around. Note that the minimal amount 2541 * of info present does not allow for checking against 2542 * tcp internals such as seq and ack numbers. Only the 2543 * ports are known to be present and can be even if the 2544 * short flag is set. 2545 */ 2546 if ((is->is_p == pr) && (is->is_v == 4) && 2547 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2548 tcp, FI_ICMPCMP))) { 2549 /* 2550 * i : the index of this packet (the icmp unreachable) 2551 * oi : the index of the original packet found in the 2552 * icmp header (i.e. the packet causing this icmp) 2553 * backward : original packet was backward compared to 2554 * the state 2555 */ 2556 backward = IP6_NEQ(&is->is_src, &src); 2557 fin->fin_rev = !backward; 2558 i = (!backward << 1) + fin->fin_out; 2559 oi = (backward << 1) + ofin.fin_out; 2560 2561 if (((is->is_pass & FR_NOICMPERR) != 0) || 2562 (is->is_icmppkts[i] > is->is_pkts[oi])) 2563 break; 2564 ifs->ifs_ips_stats.iss_hits++; 2565 is->is_icmppkts[i]++; 2566 /* 2567 * we deliberately do not touch the timeouts 2568 * for the accompanying state table entry. 2569 * It remains to be seen if that is correct. XXX 2570 */ 2571 return is; 2572 } 2573 } 2574 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2575 return NULL; 2576 } 2577 2578 2579 /* ------------------------------------------------------------------------ */ 2580 /* Function: fr_ipsmove */ 2581 /* Returns: Nil */ 2582 /* Parameters: is(I) - pointer to state table entry */ 2583 /* hv(I) - new hash value for state table entry */ 2584 /* Write Locks: ipf_state */ 2585 /* */ 2586 /* Move a state entry from one position in the hash table to another. */ 2587 /* ------------------------------------------------------------------------ */ 2588 static void fr_ipsmove(is, hv, ifs) 2589 ipstate_t *is; 2590 u_int hv; 2591 ipf_stack_t *ifs; 2592 { 2593 ipstate_t **isp; 2594 u_int hvm; 2595 2596 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2597 2598 hvm = is->is_hv; 2599 /* 2600 * Remove the hash from the old location... 2601 */ 2602 isp = is->is_phnext; 2603 if (is->is_hnext) 2604 is->is_hnext->is_phnext = isp; 2605 *isp = is->is_hnext; 2606 if (ifs->ifs_ips_table[hvm] == NULL) 2607 ifs->ifs_ips_stats.iss_inuse--; 2608 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2609 2610 /* 2611 * ...and put the hash in the new one. 2612 */ 2613 hvm = DOUBLE_HASH(hv, ifs); 2614 is->is_hv = hvm; 2615 isp = &ifs->ifs_ips_table[hvm]; 2616 if (*isp) 2617 (*isp)->is_phnext = &is->is_hnext; 2618 else 2619 ifs->ifs_ips_stats.iss_inuse++; 2620 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2621 is->is_phnext = isp; 2622 is->is_hnext = *isp; 2623 *isp = is; 2624 } 2625 2626 2627 /* ------------------------------------------------------------------------ */ 2628 /* Function: fr_stlookup */ 2629 /* Returns: ipstate_t* - NULL == no matching state found, */ 2630 /* else pointer to state information is returned */ 2631 /* Parameters: fin(I) - pointer to packet information */ 2632 /* tcp(I) - pointer to TCP/UDP header. */ 2633 /* */ 2634 /* Search the state table for a matching entry to the packet described by */ 2635 /* the contents of *fin. */ 2636 /* */ 2637 /* If we return NULL then no lock on ipf_state is held. */ 2638 /* If we return non-null then a read-lock on ipf_state is held. */ 2639 /* ------------------------------------------------------------------------ */ 2640 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2641 fr_info_t *fin; 2642 tcphdr_t *tcp; 2643 ipftq_t **ifqp; 2644 { 2645 u_int hv, hvm, pr, v, tryagain; 2646 ipstate_t *is, **isp; 2647 u_short dport, sport; 2648 i6addr_t src, dst; 2649 struct icmp *ic; 2650 ipftq_t *ifq; 2651 int oow; 2652 ipf_stack_t *ifs = fin->fin_ifs; 2653 2654 is = NULL; 2655 ifq = NULL; 2656 tcp = fin->fin_dp; 2657 ic = (struct icmp *)tcp; 2658 hv = (pr = fin->fin_fi.fi_p); 2659 src = fin->fin_fi.fi_src; 2660 dst = fin->fin_fi.fi_dst; 2661 hv += src.in4.s_addr; 2662 hv += dst.in4.s_addr; 2663 2664 v = fin->fin_fi.fi_v; 2665 #ifdef USE_INET6 2666 if (v == 6) { 2667 hv += fin->fin_fi.fi_src.i6[1]; 2668 hv += fin->fin_fi.fi_src.i6[2]; 2669 hv += fin->fin_fi.fi_src.i6[3]; 2670 2671 if ((fin->fin_p == IPPROTO_ICMPV6) && 2672 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2673 hv -= dst.in4.s_addr; 2674 } else { 2675 hv += fin->fin_fi.fi_dst.i6[1]; 2676 hv += fin->fin_fi.fi_dst.i6[2]; 2677 hv += fin->fin_fi.fi_dst.i6[3]; 2678 } 2679 } 2680 #endif 2681 if ((v == 4) && 2682 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2683 if (fin->fin_out == 0) { 2684 hv -= src.in4.s_addr; 2685 } else { 2686 hv -= dst.in4.s_addr; 2687 } 2688 } 2689 2690 /* 2691 * Search the hash table for matching packet header info. 2692 */ 2693 switch (pr) 2694 { 2695 #ifdef USE_INET6 2696 case IPPROTO_ICMPV6 : 2697 tryagain = 0; 2698 if (v == 6) { 2699 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2700 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2701 hv += ic->icmp_id; 2702 } 2703 } 2704 READ_ENTER(&ifs->ifs_ipf_state); 2705 icmp6again: 2706 hvm = DOUBLE_HASH(hv, ifs); 2707 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2708 isp = &is->is_hnext; 2709 if ((is->is_p != pr) || (is->is_v != v)) 2710 continue; 2711 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2712 if (is != NULL && 2713 fr_matchicmpqueryreply(v, &is->is_icmp, 2714 ic, fin->fin_rev)) { 2715 if (fin->fin_rev) 2716 ifq = &ifs->ifs_ips_icmpacktq; 2717 else 2718 ifq = &ifs->ifs_ips_icmptq; 2719 break; 2720 } 2721 } 2722 2723 if (is != NULL) { 2724 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2725 hv += fin->fin_fi.fi_src.i6[0]; 2726 hv += fin->fin_fi.fi_src.i6[1]; 2727 hv += fin->fin_fi.fi_src.i6[2]; 2728 hv += fin->fin_fi.fi_src.i6[3]; 2729 fr_ipsmove(is, hv, ifs); 2730 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2731 } 2732 break; 2733 } 2734 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2735 2736 /* 2737 * No matching icmp state entry. Perhaps this is a 2738 * response to another state entry. 2739 * 2740 * XXX With some ICMP6 packets, the "other" address is already 2741 * in the packet, after the ICMP6 header, and this could be 2742 * used in place of the multicast address. However, taking 2743 * advantage of this requires some significant code changes 2744 * to handle the specific types where that is the case. 2745 */ 2746 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2747 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2748 hv -= fin->fin_fi.fi_src.i6[0]; 2749 hv -= fin->fin_fi.fi_src.i6[1]; 2750 hv -= fin->fin_fi.fi_src.i6[2]; 2751 hv -= fin->fin_fi.fi_src.i6[3]; 2752 tryagain = 1; 2753 WRITE_ENTER(&ifs->ifs_ipf_state); 2754 goto icmp6again; 2755 } 2756 2757 is = fr_checkicmp6matchingstate(fin); 2758 if (is != NULL) 2759 return is; 2760 break; 2761 #endif 2762 2763 case IPPROTO_ICMP : 2764 if (v == 4) { 2765 hv += ic->icmp_id; 2766 } 2767 hv = DOUBLE_HASH(hv, ifs); 2768 READ_ENTER(&ifs->ifs_ipf_state); 2769 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2770 isp = &is->is_hnext; 2771 if ((is->is_p != pr) || (is->is_v != v)) 2772 continue; 2773 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2774 if (is != NULL && 2775 fr_matchicmpqueryreply(v, &is->is_icmp, 2776 ic, fin->fin_rev)) { 2777 if (fin->fin_rev) 2778 ifq = &ifs->ifs_ips_icmpacktq; 2779 else 2780 ifq = &ifs->ifs_ips_icmptq; 2781 break; 2782 } 2783 } 2784 if (is == NULL) { 2785 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2786 } 2787 break; 2788 2789 case IPPROTO_TCP : 2790 case IPPROTO_UDP : 2791 ifqp = NULL; 2792 sport = htons(fin->fin_data[0]); 2793 hv += sport; 2794 dport = htons(fin->fin_data[1]); 2795 hv += dport; 2796 oow = 0; 2797 tryagain = 0; 2798 READ_ENTER(&ifs->ifs_ipf_state); 2799 retry_tcpudp: 2800 hvm = DOUBLE_HASH(hv, ifs); 2801 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2802 isp = &is->is_hnext; 2803 if ((is->is_p != pr) || (is->is_v != v)) 2804 continue; 2805 fin->fin_flx &= ~FI_OOW; 2806 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2807 if (is != NULL) { 2808 if (pr == IPPROTO_TCP) { 2809 if (!fr_tcpstate(fin, tcp, is)) { 2810 oow |= fin->fin_flx & FI_OOW; 2811 continue; 2812 } 2813 } 2814 break; 2815 } 2816 } 2817 if (is != NULL) { 2818 if (tryagain && 2819 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2820 hv += dport; 2821 hv += sport; 2822 fr_ipsmove(is, hv, ifs); 2823 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2824 } 2825 break; 2826 } 2827 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2828 2829 if (ifs->ifs_ips_stats.iss_wild) { 2830 if (tryagain == 0) { 2831 hv -= dport; 2832 hv -= sport; 2833 } else if (tryagain == 1) { 2834 hv = fin->fin_fi.fi_p; 2835 /* 2836 * If we try to pretend this is a reply to a 2837 * multicast/broadcast packet then we need to 2838 * exclude part of the address from the hash 2839 * calculation. 2840 */ 2841 if (fin->fin_out == 0) { 2842 hv += src.in4.s_addr; 2843 } else { 2844 hv += dst.in4.s_addr; 2845 } 2846 hv += dport; 2847 hv += sport; 2848 } 2849 tryagain++; 2850 if (tryagain <= 2) { 2851 WRITE_ENTER(&ifs->ifs_ipf_state); 2852 goto retry_tcpudp; 2853 } 2854 } 2855 fin->fin_flx |= oow; 2856 break; 2857 2858 #if 0 2859 case IPPROTO_GRE : 2860 gre = fin->fin_dp; 2861 if (GRE_REV(gre->gr_flags) == 1) { 2862 hv += gre->gr_call; 2863 } 2864 /* FALLTHROUGH */ 2865 #endif 2866 default : 2867 ifqp = NULL; 2868 hvm = DOUBLE_HASH(hv, ifs); 2869 READ_ENTER(&ifs->ifs_ipf_state); 2870 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2871 isp = &is->is_hnext; 2872 if ((is->is_p != pr) || (is->is_v != v)) 2873 continue; 2874 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2875 if (is != NULL) { 2876 ifq = &ifs->ifs_ips_iptq; 2877 break; 2878 } 2879 } 2880 if (is == NULL) { 2881 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2882 } 2883 break; 2884 } 2885 2886 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2887 (is->is_tqehead[fin->fin_rev] != NULL)) 2888 ifq = is->is_tqehead[fin->fin_rev]; 2889 if (ifq != NULL && ifqp != NULL) 2890 *ifqp = ifq; 2891 return is; 2892 } 2893 2894 2895 /* ------------------------------------------------------------------------ */ 2896 /* Function: fr_updatestate */ 2897 /* Returns: Nil */ 2898 /* Parameters: fin(I) - pointer to packet information */ 2899 /* is(I) - pointer to state table entry */ 2900 /* Read Locks: ipf_state */ 2901 /* */ 2902 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2903 /* fragment cache with a new entry as required. */ 2904 /* ------------------------------------------------------------------------ */ 2905 void fr_updatestate(fin, is, ifq) 2906 fr_info_t *fin; 2907 ipstate_t *is; 2908 ipftq_t *ifq; 2909 { 2910 ipftqent_t *tqe; 2911 int i, pass; 2912 ipf_stack_t *ifs = fin->fin_ifs; 2913 2914 i = (fin->fin_rev << 1) + fin->fin_out; 2915 2916 /* 2917 * For TCP packets, ifq == NULL. For all others, check if this new 2918 * queue is different to the last one it was on and move it if so. 2919 */ 2920 tqe = &is->is_sti; 2921 MUTEX_ENTER(&is->is_lock); 2922 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2923 ifq = is->is_tqehead[fin->fin_rev]; 2924 2925 if (ifq != NULL) 2926 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2927 2928 is->is_pkts[i]++; 2929 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2930 is->is_bytes[i] += fin->fin_plen; 2931 MUTEX_EXIT(&is->is_lock); 2932 2933 #ifdef IPFILTER_SYNC 2934 if (is->is_flags & IS_STATESYNC) 2935 ipfsync_update(SMC_STATE, fin, is->is_sync); 2936 #endif 2937 2938 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2939 2940 fin->fin_fr = is->is_rule; 2941 2942 /* 2943 * If this packet is a fragment and the rule says to track fragments, 2944 * then create a new fragment cache entry. 2945 */ 2946 pass = is->is_pass; 2947 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2948 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2949 } 2950 2951 2952 /* ------------------------------------------------------------------------ */ 2953 /* Function: fr_checkstate */ 2954 /* Returns: frentry_t* - NULL == search failed, */ 2955 /* else pointer to rule for matching state */ 2956 /* Parameters: ifp(I) - pointer to interface */ 2957 /* passp(I) - pointer to filtering result flags */ 2958 /* */ 2959 /* Check if a packet is associated with an entry in the state table. */ 2960 /* ------------------------------------------------------------------------ */ 2961 frentry_t *fr_checkstate(fin, passp) 2962 fr_info_t *fin; 2963 u_32_t *passp; 2964 { 2965 ipstate_t *is; 2966 frentry_t *fr; 2967 tcphdr_t *tcp; 2968 ipftq_t *ifq; 2969 u_int pass; 2970 ipf_stack_t *ifs = fin->fin_ifs; 2971 2972 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2973 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2974 return NULL; 2975 2976 is = NULL; 2977 if ((fin->fin_flx & FI_TCPUDP) || 2978 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2979 #ifdef USE_INET6 2980 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2981 #endif 2982 ) 2983 tcp = fin->fin_dp; 2984 else 2985 tcp = NULL; 2986 2987 /* 2988 * Search the hash table for matching packet header info. 2989 */ 2990 ifq = NULL; 2991 is = fr_stlookup(fin, tcp, &ifq); 2992 switch (fin->fin_p) 2993 { 2994 #ifdef USE_INET6 2995 case IPPROTO_ICMPV6 : 2996 if (is != NULL) 2997 break; 2998 if (fin->fin_v == 6) { 2999 is = fr_checkicmp6matchingstate(fin); 3000 if (is != NULL) 3001 goto matched; 3002 } 3003 break; 3004 #endif 3005 case IPPROTO_ICMP : 3006 if (is != NULL) 3007 break; 3008 /* 3009 * No matching icmp state entry. Perhaps this is a 3010 * response to another state entry. 3011 */ 3012 is = fr_checkicmpmatchingstate(fin); 3013 if (is != NULL) 3014 goto matched; 3015 break; 3016 case IPPROTO_TCP : 3017 if (is == NULL) 3018 break; 3019 3020 if (is->is_pass & FR_NEWISN) { 3021 if (fin->fin_out == 0) 3022 fr_fixinisn(fin, is); 3023 else if (fin->fin_out == 1) 3024 fr_fixoutisn(fin, is); 3025 } 3026 break; 3027 default : 3028 if (fin->fin_rev) 3029 ifq = &ifs->ifs_ips_udpacktq; 3030 else 3031 ifq = &ifs->ifs_ips_udptq; 3032 break; 3033 } 3034 if (is == NULL) { 3035 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3036 return NULL; 3037 } 3038 3039 matched: 3040 fr = is->is_rule; 3041 if (fr != NULL) { 3042 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3043 if (fin->fin_nattag == NULL) 3044 return NULL; 3045 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) 3046 return NULL; 3047 } 3048 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3049 fin->fin_icode = fr->fr_icode; 3050 } 3051 3052 fin->fin_rule = is->is_rulen; 3053 pass = is->is_pass; 3054 fr_updatestate(fin, is, ifq); 3055 3056 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3057 fin->fin_flx |= FI_STATE; 3058 if ((pass & FR_LOGFIRST) != 0) 3059 pass &= ~(FR_LOGFIRST|FR_LOG); 3060 *passp = pass; 3061 return fr; 3062 } 3063 3064 3065 /* ------------------------------------------------------------------------ */ 3066 /* Function: fr_fixoutisn */ 3067 /* Returns: Nil */ 3068 /* Parameters: fin(I) - pointer to packet information */ 3069 /* is(I) - pointer to master state structure */ 3070 /* */ 3071 /* Called only for outbound packets, adjusts the sequence number and the */ 3072 /* TCP checksum to match that change. */ 3073 /* ------------------------------------------------------------------------ */ 3074 static void fr_fixoutisn(fin, is) 3075 fr_info_t *fin; 3076 ipstate_t *is; 3077 { 3078 tcphdr_t *tcp; 3079 int rev; 3080 u_32_t seq; 3081 3082 tcp = fin->fin_dp; 3083 rev = fin->fin_rev; 3084 if ((is->is_flags & IS_ISNSYN) != 0) { 3085 if (rev == 0) { 3086 seq = ntohl(tcp->th_seq); 3087 seq += is->is_isninc[0]; 3088 tcp->th_seq = htonl(seq); 3089 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3090 } 3091 } 3092 if ((is->is_flags & IS_ISNACK) != 0) { 3093 if (rev == 1) { 3094 seq = ntohl(tcp->th_seq); 3095 seq += is->is_isninc[1]; 3096 tcp->th_seq = htonl(seq); 3097 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3098 } 3099 } 3100 } 3101 3102 3103 /* ------------------------------------------------------------------------ */ 3104 /* Function: fr_fixinisn */ 3105 /* Returns: Nil */ 3106 /* Parameters: fin(I) - pointer to packet information */ 3107 /* is(I) - pointer to master state structure */ 3108 /* */ 3109 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3110 /* TCP checksum to match that change. */ 3111 /* ------------------------------------------------------------------------ */ 3112 static void fr_fixinisn(fin, is) 3113 fr_info_t *fin; 3114 ipstate_t *is; 3115 { 3116 tcphdr_t *tcp; 3117 int rev; 3118 u_32_t ack; 3119 3120 tcp = fin->fin_dp; 3121 rev = fin->fin_rev; 3122 if ((is->is_flags & IS_ISNSYN) != 0) { 3123 if (rev == 1) { 3124 ack = ntohl(tcp->th_ack); 3125 ack -= is->is_isninc[0]; 3126 tcp->th_ack = htonl(ack); 3127 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3128 } 3129 } 3130 if ((is->is_flags & IS_ISNACK) != 0) { 3131 if (rev == 0) { 3132 ack = ntohl(tcp->th_ack); 3133 ack -= is->is_isninc[1]; 3134 tcp->th_ack = htonl(ack); 3135 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3136 } 3137 } 3138 } 3139 3140 3141 /* ------------------------------------------------------------------------ */ 3142 /* Function: fr_statesync */ 3143 /* Returns: Nil */ 3144 /* Parameters: action(I) - type of synchronisation to do */ 3145 /* v(I) - IP version being sync'd (v4 or v6) */ 3146 /* ifp(I) - interface identifier associated with action */ 3147 /* name(I) - name associated with ifp parameter */ 3148 /* */ 3149 /* Walk through all state entries and if an interface pointer match is */ 3150 /* found then look it up again, based on its name in case the pointer has */ 3151 /* changed since last time. */ 3152 /* */ 3153 /* If ifp is passed in as being non-null then we are only doing updates for */ 3154 /* existing, matching, uses of it. */ 3155 /* ------------------------------------------------------------------------ */ 3156 void fr_statesync(action, v, ifp, name, ifs) 3157 int action, v; 3158 void *ifp; 3159 char *name; 3160 ipf_stack_t *ifs; 3161 { 3162 ipstate_t *is; 3163 int i; 3164 3165 if (ifs->ifs_fr_running <= 0) 3166 return; 3167 3168 WRITE_ENTER(&ifs->ifs_ipf_state); 3169 3170 if (ifs->ifs_fr_running <= 0) { 3171 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3172 return; 3173 } 3174 3175 switch (action) 3176 { 3177 case IPFSYNC_RESYNC : 3178 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3179 if (v != 0 && is->is_v != v) 3180 continue; 3181 /* 3182 * Look up all the interface names in the state entry. 3183 */ 3184 for (i = 0; i < 4; i++) { 3185 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3186 is->is_v, ifs); 3187 } 3188 } 3189 break; 3190 case IPFSYNC_NEWIFP : 3191 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3192 if (v != 0 && is->is_v != v) 3193 continue; 3194 /* 3195 * Look up all the interface names in the state entry. 3196 */ 3197 for (i = 0; i < 4; i++) { 3198 if (!strncmp(is->is_ifname[i], name, 3199 sizeof(is->is_ifname[i]))) 3200 is->is_ifp[i] = ifp; 3201 } 3202 } 3203 break; 3204 case IPFSYNC_OLDIFP : 3205 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3206 if (v != 0 && is->is_v != v) 3207 continue; 3208 /* 3209 * Look up all the interface names in the state entry. 3210 */ 3211 for (i = 0; i < 4; i++) { 3212 if (is->is_ifp[i] == ifp) 3213 is->is_ifp[i] = (void *)-1; 3214 } 3215 } 3216 break; 3217 } 3218 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3219 } 3220 3221 3222 #if SOLARIS2 >= 10 3223 /* ------------------------------------------------------------------------ */ 3224 /* Function: fr_stateifindexsync */ 3225 /* Returns: void */ 3226 /* Parameters: ifp - current network interface descriptor (ifindex) */ 3227 /* newifp - new interface descriptor (new ifindex) */ 3228 /* ifs - pointer to IPF stack */ 3229 /* */ 3230 /* Write Locks: assumes ipf_mutex is locked */ 3231 /* */ 3232 /* Updates all interface indeces matching to ifp with new interface index */ 3233 /* value. */ 3234 /* ------------------------------------------------------------------------ */ 3235 void fr_stateifindexsync(ifp, newifp, ifs) 3236 void *ifp; 3237 void *newifp; 3238 ipf_stack_t *ifs; 3239 { 3240 ipstate_t *is; 3241 int i; 3242 3243 WRITE_ENTER(&ifs->ifs_ipf_state); 3244 3245 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) { 3246 3247 for (i = 0; i < 4; i++) { 3248 if (is->is_ifp[i] == ifp) 3249 is->is_ifp[i] = newifp; 3250 } 3251 } 3252 3253 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3254 } 3255 #endif 3256 3257 /* ------------------------------------------------------------------------ */ 3258 /* Function: fr_delstate */ 3259 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3260 /* Parameters: is(I) - pointer to state structure to delete */ 3261 /* why(I) - if not 0, log reason why it was deleted */ 3262 /* ifs - ipf stack instance */ 3263 /* Write Locks: ipf_state/ipf_global */ 3264 /* */ 3265 /* Deletes a state entry from the enumerated list as well as the hash table */ 3266 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3267 /* global counters as required. */ 3268 /* ------------------------------------------------------------------------ */ 3269 int fr_delstate(is, why, ifs) 3270 ipstate_t *is; 3271 int why; 3272 ipf_stack_t *ifs; 3273 { 3274 int removed = 0; 3275 3276 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3277 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3278 3279 /* 3280 * Start by removing the entry from the hash table of state entries 3281 * so it will not be "used" again. 3282 * 3283 * It will remain in the "list" of state entries until all references 3284 * have been accounted for. 3285 */ 3286 if (is->is_phnext != NULL) { 3287 removed = 1; 3288 *is->is_phnext = is->is_hnext; 3289 if (is->is_hnext != NULL) 3290 is->is_hnext->is_phnext = is->is_phnext; 3291 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3292 ifs->ifs_ips_stats.iss_inuse--; 3293 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3294 3295 is->is_phnext = NULL; 3296 is->is_hnext = NULL; 3297 } 3298 3299 /* 3300 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3301 * table that have wildcard flags set, only decerement it once 3302 * and do it here. 3303 */ 3304 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3305 if (!(is->is_flags & SI_CLONED)) { 3306 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3307 } 3308 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3309 } 3310 3311 /* 3312 * Next, remove it from the timeout queue it is in. 3313 */ 3314 fr_deletequeueentry(&is->is_sti); 3315 3316 is->is_me = NULL; 3317 3318 /* 3319 * If it is still in use by something else, do not go any further, 3320 * but note that at this point it is now an orphan. 3321 */ 3322 MUTEX_ENTER(&is->is_lock); 3323 if (is->is_ref > 1) { 3324 is->is_ref--; 3325 MUTEX_EXIT(&is->is_lock); 3326 if (removed) 3327 ifs->ifs_ips_stats.iss_orphans++; 3328 return (is->is_ref); 3329 } 3330 MUTEX_EXIT(&is->is_lock); 3331 3332 is->is_ref = 0; 3333 3334 /* 3335 * If entry has already been removed from table, 3336 * it means we're simply cleaning up an orphan. 3337 */ 3338 if (!removed) 3339 ifs->ifs_ips_stats.iss_orphans--; 3340 3341 if (is->is_tqehead[0] != NULL) 3342 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3343 3344 if (is->is_tqehead[1] != NULL) 3345 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3346 3347 #ifdef IPFILTER_SYNC 3348 if (is->is_sync) 3349 ipfsync_del(is->is_sync); 3350 #endif 3351 #ifdef IPFILTER_SCAN 3352 (void) ipsc_detachis(is); 3353 #endif 3354 3355 /* 3356 * Now remove it from master list of state table entries. 3357 */ 3358 if (is->is_pnext != NULL) { 3359 *is->is_pnext = is->is_next; 3360 if (is->is_next != NULL) { 3361 is->is_next->is_pnext = is->is_pnext; 3362 is->is_next = NULL; 3363 } 3364 is->is_pnext = NULL; 3365 } 3366 3367 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3368 ipstate_log(is, why, ifs); 3369 3370 if (is->is_rule != NULL) { 3371 is->is_rule->fr_statecnt--; 3372 (void)fr_derefrule(&is->is_rule, ifs); 3373 } 3374 3375 MUTEX_DESTROY(&is->is_lock); 3376 KFREE(is); 3377 ifs->ifs_ips_num--; 3378 3379 return (0); 3380 } 3381 3382 3383 /* ------------------------------------------------------------------------ */ 3384 /* Function: fr_timeoutstate */ 3385 /* Returns: Nil */ 3386 /* Parameters: ifs - ipf stack instance */ 3387 /* */ 3388 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3389 /* used here is to keep the queue sorted with the oldest things at the top */ 3390 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3391 /* expired then neither will any under it. */ 3392 /* ------------------------------------------------------------------------ */ 3393 void fr_timeoutstate(ifs) 3394 ipf_stack_t *ifs; 3395 { 3396 ipftq_t *ifq, *ifqnext; 3397 ipftqent_t *tqe, *tqn; 3398 ipstate_t *is; 3399 SPL_INT(s); 3400 3401 SPL_NET(s); 3402 WRITE_ENTER(&ifs->ifs_ipf_state); 3403 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3404 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3405 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3406 break; 3407 tqn = tqe->tqe_next; 3408 is = tqe->tqe_parent; 3409 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3410 } 3411 3412 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3413 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3414 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3415 break; 3416 tqn = tqe->tqe_next; 3417 is = tqe->tqe_parent; 3418 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3419 } 3420 } 3421 3422 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3423 ifqnext = ifq->ifq_next; 3424 3425 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3426 (ifq->ifq_ref == 0)) { 3427 fr_freetimeoutqueue(ifq, ifs); 3428 } 3429 } 3430 3431 if (ifs->ifs_fr_state_doflush) { 3432 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3433 ifs->ifs_fr_state_doflush = 0; 3434 } 3435 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3436 SPL_X(s); 3437 } 3438 3439 3440 /* ---------------------------------------------------------------------- */ 3441 /* Function: fr_state_flush */ 3442 /* Returns: int - 0 == success, -1 == failure */ 3443 /* Parameters: flush_option - how to flush the active State table */ 3444 /* proto - IP version to flush (4, 6, or both) */ 3445 /* ifs - ipf stack instance */ 3446 /* Write Locks: ipf_state */ 3447 /* */ 3448 /* Flush state tables. Three possible flush options currently defined: */ 3449 /* */ 3450 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3451 /* */ 3452 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3453 /* have started to close on both ends using */ 3454 /* ipf_flushclosing(). */ 3455 /* */ 3456 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3457 /* Then, if needed, flush entries with TCP */ 3458 /* connections which have been idle for a long */ 3459 /* time with ipf_extraflush(). */ 3460 /* ---------------------------------------------------------------------- */ 3461 static int fr_state_flush(flush_option, proto, ifs) 3462 int flush_option, proto; 3463 ipf_stack_t *ifs; 3464 { 3465 ipstate_t *is, *isn; 3466 int removed; 3467 SPL_INT(s); 3468 3469 removed = 0; 3470 3471 SPL_NET(s); 3472 switch (flush_option) 3473 { 3474 case FLUSH_TABLE_ALL: 3475 isn = ifs->ifs_ips_list; 3476 while ((is = isn) != NULL) { 3477 isn = is->is_next; 3478 if ((proto != 0) && (is->is_v != proto)) 3479 continue; 3480 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3481 removed++; 3482 } 3483 break; 3484 3485 case FLUSH_TABLE_CLOSING: 3486 removed = ipf_flushclosing(STATE_FLUSH, 3487 IPF_TCPS_CLOSE_WAIT, 3488 ifs->ifs_ips_tqtqb, 3489 ifs->ifs_ips_utqe, 3490 ifs); 3491 break; 3492 3493 case FLUSH_TABLE_EXTRA: 3494 removed = ipf_flushclosing(STATE_FLUSH, 3495 IPF_TCPS_FIN_WAIT_2, 3496 ifs->ifs_ips_tqtqb, 3497 ifs->ifs_ips_utqe, 3498 ifs); 3499 3500 /* 3501 * Be sure we haven't done this in the last 10 seconds. 3502 */ 3503 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3504 IPF_TTLVAL(10)) 3505 break; 3506 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3507 removed += ipf_extraflush(STATE_FLUSH, 3508 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3509 ifs->ifs_ips_utqe, 3510 ifs); 3511 break; 3512 3513 default: /* Flush Nothing */ 3514 break; 3515 } 3516 3517 SPL_X(s); 3518 return (removed); 3519 } 3520 3521 3522 /* ------------------------------------------------------------------------ */ 3523 /* Function: fr_tcp_age */ 3524 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3525 /* Parameters: tq(I) - pointer to timeout queue information */ 3526 /* fin(I) - pointer to packet information */ 3527 /* tqtab(I) - TCP timeout queue table this is in */ 3528 /* flags(I) - flags from state/NAT entry */ 3529 /* */ 3530 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3531 /* */ 3532 /* - (try to) base state transitions on real evidence only, */ 3533 /* i.e. packets that are sent and have been received by ipfilter; */ 3534 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3535 /* */ 3536 /* - deal with half-closed connections correctly; */ 3537 /* */ 3538 /* - store the state of the source in state[0] such that ipfstat */ 3539 /* displays the state as source/dest instead of dest/source; the calls */ 3540 /* to fr_tcp_age have been changed accordingly. */ 3541 /* */ 3542 /* Internal Parameters: */ 3543 /* */ 3544 /* state[0] = state of source (host that initiated connection) */ 3545 /* state[1] = state of dest (host that accepted the connection) */ 3546 /* */ 3547 /* dir == 0 : a packet from source to dest */ 3548 /* dir == 1 : a packet from dest to source */ 3549 /* */ 3550 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3551 /* ------------------------------------------------------------------------ */ 3552 int fr_tcp_age(tqe, fin, tqtab, flags) 3553 ipftqent_t *tqe; 3554 fr_info_t *fin; 3555 ipftq_t *tqtab; 3556 int flags; 3557 { 3558 int dlen, ostate, nstate, rval, dir; 3559 u_char tcpflags; 3560 tcphdr_t *tcp; 3561 ipf_stack_t *ifs = fin->fin_ifs; 3562 3563 tcp = fin->fin_dp; 3564 3565 rval = 0; 3566 dir = fin->fin_rev; 3567 tcpflags = tcp->th_flags; 3568 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3569 3570 ostate = tqe->tqe_state[1 - dir]; 3571 nstate = tqe->tqe_state[dir]; 3572 3573 DTRACE_PROBE4( 3574 indata, 3575 fr_info_t *, fin, 3576 int, ostate, 3577 int, nstate, 3578 u_char, tcpflags 3579 ); 3580 3581 if (tcpflags & TH_RST) { 3582 if (!(tcpflags & TH_PUSH) && !dlen) 3583 nstate = IPF_TCPS_CLOSED; 3584 else 3585 nstate = IPF_TCPS_CLOSE_WAIT; 3586 3587 /* 3588 * Once RST is received, we must advance peer's state to 3589 * CLOSE_WAIT. 3590 */ 3591 if (ostate <= IPF_TCPS_ESTABLISHED) { 3592 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3593 } 3594 rval = 1; 3595 } else { 3596 3597 switch (nstate) 3598 { 3599 case IPF_TCPS_LISTEN: /* 0 */ 3600 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3601 /* 3602 * 'dir' received an S and sends SA in 3603 * response, CLOSED -> SYN_RECEIVED 3604 */ 3605 nstate = IPF_TCPS_SYN_RECEIVED; 3606 rval = 1; 3607 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3608 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3609 nstate = IPF_TCPS_SYN_SENT; 3610 rval = 1; 3611 } 3612 /* 3613 * the next piece of code makes it possible to get 3614 * already established connections into the state table 3615 * after a restart or reload of the filter rules; this 3616 * does not work when a strict 'flags S keep state' is 3617 * used for tcp connections of course 3618 */ 3619 if (((flags & IS_TCPFSM) == 0) && 3620 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3621 /* 3622 * we saw an A, guess 'dir' is in ESTABLISHED 3623 * mode 3624 */ 3625 switch (ostate) 3626 { 3627 case IPF_TCPS_LISTEN : 3628 case IPF_TCPS_SYN_RECEIVED : 3629 nstate = IPF_TCPS_HALF_ESTAB; 3630 rval = 1; 3631 break; 3632 case IPF_TCPS_HALF_ESTAB : 3633 case IPF_TCPS_ESTABLISHED : 3634 nstate = IPF_TCPS_ESTABLISHED; 3635 rval = 1; 3636 break; 3637 default : 3638 break; 3639 } 3640 } 3641 /* 3642 * TODO: besides regular ACK packets we can have other 3643 * packets as well; it is yet to be determined how we 3644 * should initialize the states in those cases 3645 */ 3646 break; 3647 3648 case IPF_TCPS_SYN_SENT: /* 1 */ 3649 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3650 /* 3651 * A retransmitted SYN packet. We do not reset 3652 * the timeout here to fr_tcptimeout because a 3653 * connection connect timeout does not renew 3654 * after every packet that is sent. We need to 3655 * set rval so as to indicate the packet has 3656 * passed the check for its flags being valid 3657 * in the TCP FSM. Setting rval to 2 has the 3658 * result of not resetting the timeout. 3659 */ 3660 rval = 2; 3661 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3662 TH_ACK) { 3663 /* 3664 * we see an A from 'dir' which is in SYN_SENT 3665 * state: 'dir' sent an A in response to an SA 3666 * which it received, SYN_SENT -> ESTABLISHED 3667 */ 3668 nstate = IPF_TCPS_ESTABLISHED; 3669 rval = 1; 3670 } else if (tcpflags & TH_FIN) { 3671 /* 3672 * we see an F from 'dir' which is in SYN_SENT 3673 * state and wants to close its side of the 3674 * connection; SYN_SENT -> FIN_WAIT_1 3675 */ 3676 nstate = IPF_TCPS_FIN_WAIT_1; 3677 rval = 1; 3678 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3679 /* 3680 * we see an SA from 'dir' which is already in 3681 * SYN_SENT state, this means we have a 3682 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3683 */ 3684 nstate = IPF_TCPS_SYN_RECEIVED; 3685 rval = 1; 3686 } 3687 break; 3688 3689 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3690 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3691 /* 3692 * we see an A from 'dir' which was in 3693 * SYN_RECEIVED state so it must now be in 3694 * established state, SYN_RECEIVED -> 3695 * ESTABLISHED 3696 */ 3697 nstate = IPF_TCPS_ESTABLISHED; 3698 rval = 1; 3699 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3700 TH_OPENING) { 3701 /* 3702 * We see an SA from 'dir' which is already in 3703 * SYN_RECEIVED state. 3704 */ 3705 rval = 2; 3706 } else if (tcpflags & TH_FIN) { 3707 /* 3708 * we see an F from 'dir' which is in 3709 * SYN_RECEIVED state and wants to close its 3710 * side of the connection; SYN_RECEIVED -> 3711 * FIN_WAIT_1 3712 */ 3713 nstate = IPF_TCPS_FIN_WAIT_1; 3714 rval = 1; 3715 } 3716 break; 3717 3718 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3719 if (tcpflags & TH_FIN) { 3720 nstate = IPF_TCPS_FIN_WAIT_1; 3721 rval = 1; 3722 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3723 /* 3724 * If we've picked up a connection in mid 3725 * flight, we could be looking at a follow on 3726 * packet from the same direction as the one 3727 * that created this state. Recognise it but 3728 * do not advance the entire connection's 3729 * state. 3730 */ 3731 switch (ostate) 3732 { 3733 case IPF_TCPS_LISTEN : 3734 case IPF_TCPS_SYN_SENT : 3735 case IPF_TCPS_SYN_RECEIVED : 3736 rval = 1; 3737 break; 3738 case IPF_TCPS_HALF_ESTAB : 3739 case IPF_TCPS_ESTABLISHED : 3740 nstate = IPF_TCPS_ESTABLISHED; 3741 rval = 1; 3742 break; 3743 default : 3744 break; 3745 } 3746 } 3747 break; 3748 3749 case IPF_TCPS_ESTABLISHED: /* 4 */ 3750 rval = 1; 3751 if (tcpflags & TH_FIN) { 3752 /* 3753 * 'dir' closed its side of the connection; 3754 * this gives us a half-closed connection; 3755 * ESTABLISHED -> FIN_WAIT_1 3756 */ 3757 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3758 nstate = IPF_TCPS_CLOSING; 3759 } else { 3760 nstate = IPF_TCPS_FIN_WAIT_1; 3761 } 3762 } else if (tcpflags & TH_ACK) { 3763 /* 3764 * an ACK, should we exclude other flags here? 3765 */ 3766 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3767 /* 3768 * We know the other side did an active 3769 * close, so we are ACKing the recvd 3770 * FIN packet (does the window matching 3771 * code guarantee this?) and go into 3772 * CLOSE_WAIT state; this gives us a 3773 * half-closed connection 3774 */ 3775 nstate = IPF_TCPS_CLOSE_WAIT; 3776 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3777 /* 3778 * still a fully established 3779 * connection reset timeout 3780 */ 3781 nstate = IPF_TCPS_ESTABLISHED; 3782 } 3783 } 3784 break; 3785 3786 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3787 rval = 1; 3788 if (tcpflags & TH_FIN) { 3789 /* 3790 * application closed and 'dir' sent a FIN, 3791 * we're now going into LAST_ACK state 3792 */ 3793 nstate = IPF_TCPS_LAST_ACK; 3794 } else { 3795 /* 3796 * we remain in CLOSE_WAIT because the other 3797 * side has closed already and we did not 3798 * close our side yet; reset timeout 3799 */ 3800 nstate = IPF_TCPS_CLOSE_WAIT; 3801 } 3802 break; 3803 3804 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3805 rval = 1; 3806 if ((tcpflags & TH_ACK) && 3807 ostate > IPF_TCPS_CLOSE_WAIT) { 3808 /* 3809 * if the other side is not active anymore 3810 * it has sent us a FIN packet that we are 3811 * ack'ing now with an ACK; this means both 3812 * sides have now closed the connection and 3813 * we go into LAST_ACK 3814 */ 3815 /* 3816 * XXX: how do we know we really are ACKing 3817 * the FIN packet here? does the window code 3818 * guarantee that? 3819 */ 3820 nstate = IPF_TCPS_LAST_ACK; 3821 } else { 3822 /* 3823 * we closed our side of the connection 3824 * already but the other side is still active 3825 * (ESTABLISHED/CLOSE_WAIT); continue with 3826 * this half-closed connection 3827 */ 3828 nstate = IPF_TCPS_FIN_WAIT_1; 3829 } 3830 break; 3831 3832 case IPF_TCPS_CLOSING: /* 7 */ 3833 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3834 nstate = IPF_TCPS_TIME_WAIT; 3835 } 3836 rval = 1; 3837 break; 3838 3839 case IPF_TCPS_LAST_ACK: /* 8 */ 3840 /* 3841 * We want to reset timer here to keep state in table. 3842 * If we would allow the state to time out here, while 3843 * there would still be packets being retransmitted, we 3844 * would cut off line between the two peers preventing 3845 * them to close connection properly. 3846 */ 3847 rval = 1; 3848 break; 3849 3850 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3851 /* NOT USED */ 3852 break; 3853 3854 case IPF_TCPS_TIME_WAIT: /* 10 */ 3855 /* we're in 2MSL timeout now */ 3856 if (ostate == IPF_TCPS_LAST_ACK) { 3857 nstate = IPF_TCPS_CLOSED; 3858 rval = 1; 3859 } else { 3860 rval = 2; 3861 } 3862 break; 3863 3864 case IPF_TCPS_CLOSED: /* 11 */ 3865 rval = 2; 3866 break; 3867 3868 default : 3869 #if defined(_KERNEL) 3870 ASSERT(nstate >= IPF_TCPS_LISTEN && 3871 nstate <= IPF_TCPS_CLOSED); 3872 #else 3873 abort(); 3874 #endif 3875 break; 3876 } 3877 } 3878 3879 /* 3880 * If rval == 2 then do not update the queue position, but treat the 3881 * packet as being ok. 3882 */ 3883 if (rval == 2) { 3884 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3885 rval = 1; 3886 } 3887 else if (rval == 1) { 3888 tqe->tqe_state[dir] = nstate; 3889 /* 3890 * The nstate can either advance to a new state, or remain 3891 * unchanged, resetting the timer by moving to the bottom of 3892 * the queue. 3893 */ 3894 DTRACE_PROBE1(state_done, int, nstate); 3895 3896 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3897 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3898 } 3899 3900 return rval; 3901 } 3902 3903 3904 /* ------------------------------------------------------------------------ */ 3905 /* Function: ipstate_log */ 3906 /* Returns: Nil */ 3907 /* Parameters: is(I) - pointer to state structure */ 3908 /* type(I) - type of log entry to create */ 3909 /* */ 3910 /* Creates a state table log entry using the state structure and type info. */ 3911 /* passed in. Log packet/byte counts, source/destination address and other */ 3912 /* protocol specific information. */ 3913 /* ------------------------------------------------------------------------ */ 3914 void ipstate_log(is, type, ifs) 3915 struct ipstate *is; 3916 u_int type; 3917 ipf_stack_t *ifs; 3918 { 3919 #ifdef IPFILTER_LOG 3920 struct ipslog ipsl; 3921 size_t sizes[1]; 3922 void *items[1]; 3923 int types[1]; 3924 3925 /* 3926 * Copy information out of the ipstate_t structure and into the 3927 * structure used for logging. 3928 */ 3929 ipsl.isl_type = type; 3930 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3931 ipsl.isl_bytes[0] = is->is_bytes[0]; 3932 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3933 ipsl.isl_bytes[1] = is->is_bytes[1]; 3934 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3935 ipsl.isl_bytes[2] = is->is_bytes[2]; 3936 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3937 ipsl.isl_bytes[3] = is->is_bytes[3]; 3938 ipsl.isl_src = is->is_src; 3939 ipsl.isl_dst = is->is_dst; 3940 ipsl.isl_p = is->is_p; 3941 ipsl.isl_v = is->is_v; 3942 ipsl.isl_flags = is->is_flags; 3943 ipsl.isl_tag = is->is_tag; 3944 ipsl.isl_rulen = is->is_rulen; 3945 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3946 3947 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3948 ipsl.isl_sport = is->is_sport; 3949 ipsl.isl_dport = is->is_dport; 3950 if (ipsl.isl_p == IPPROTO_TCP) { 3951 ipsl.isl_state[0] = is->is_state[0]; 3952 ipsl.isl_state[1] = is->is_state[1]; 3953 } 3954 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3955 ipsl.isl_itype = is->is_icmp.ici_type; 3956 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3957 ipsl.isl_itype = is->is_icmp.ici_type; 3958 } else { 3959 ipsl.isl_ps.isl_filler[0] = 0; 3960 ipsl.isl_ps.isl_filler[1] = 0; 3961 } 3962 3963 items[0] = &ipsl; 3964 sizes[0] = sizeof(ipsl); 3965 types[0] = 0; 3966 3967 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3968 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3969 } else { 3970 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3971 } 3972 #endif 3973 } 3974 3975 3976 #ifdef USE_INET6 3977 /* ------------------------------------------------------------------------ */ 3978 /* Function: fr_checkicmp6matchingstate */ 3979 /* Returns: ipstate_t* - NULL == no match found, */ 3980 /* else pointer to matching state entry */ 3981 /* Parameters: fin(I) - pointer to packet information */ 3982 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3983 /* */ 3984 /* If we've got an ICMPv6 error message, using the information stored in */ 3985 /* the ICMPv6 packet, look for a matching state table entry. */ 3986 /* ------------------------------------------------------------------------ */ 3987 static ipstate_t *fr_checkicmp6matchingstate(fin) 3988 fr_info_t *fin; 3989 { 3990 struct icmp6_hdr *ic6, *oic; 3991 int backward, i; 3992 ipstate_t *is, **isp; 3993 u_short sport, dport; 3994 i6addr_t dst, src; 3995 u_short savelen; 3996 icmpinfo_t *ic; 3997 fr_info_t ofin; 3998 tcphdr_t *tcp; 3999 ip6_t *oip6; 4000 u_char pr; 4001 u_int hv; 4002 ipf_stack_t *ifs = fin->fin_ifs; 4003 4004 /* 4005 * Does it at least have the return (basic) IP header ? 4006 * Is it an actual recognised ICMP error type? 4007 * Only a basic IP header (no options) should be with 4008 * an ICMP error header. 4009 */ 4010 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 4011 !(fin->fin_flx & FI_ICMPERR)) 4012 return NULL; 4013 4014 ic6 = fin->fin_dp; 4015 4016 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 4017 if (fin->fin_plen < sizeof(*oip6)) 4018 return NULL; 4019 4020 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 4021 ofin.fin_v = 6; 4022 ofin.fin_ifp = fin->fin_ifp; 4023 ofin.fin_out = !fin->fin_out; 4024 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 4025 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 4026 4027 /* 4028 * We make a fin entry to be able to feed it to 4029 * matchsrcdst. Note that not all fields are necessary 4030 * but this is the cleanest way. Note further we fill 4031 * in fin_mp such that if someone uses it we'll get 4032 * a kernel panic. fr_matchsrcdst does not use this. 4033 * 4034 * watch out here, as ip is in host order and oip6 in network 4035 * order. Any change we make must be undone afterwards. 4036 */ 4037 savelen = oip6->ip6_plen; 4038 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4039 ofin.fin_flx = FI_NOCKSUM; 4040 ofin.fin_ip = (ip_t *)oip6; 4041 ofin.fin_plen = oip6->ip6_plen; 4042 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4043 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4044 oip6->ip6_plen = savelen; 4045 4046 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4047 oic = (struct icmp6_hdr *)(oip6 + 1); 4048 /* 4049 * an ICMP error can only be generated as a result of an 4050 * ICMP query, not as the response on an ICMP error 4051 * 4052 * XXX theoretically ICMP_ECHOREP and the other reply's are 4053 * ICMP query's as well, but adding them here seems strange XXX 4054 */ 4055 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4056 return NULL; 4057 4058 /* 4059 * perform a lookup of the ICMP packet in the state table 4060 */ 4061 hv = (pr = oip6->ip6_nxt); 4062 src.in6 = oip6->ip6_src; 4063 hv += src.in4.s_addr; 4064 dst.in6 = oip6->ip6_dst; 4065 hv += dst.in4.s_addr; 4066 hv += oic->icmp6_id; 4067 hv += oic->icmp6_seq; 4068 hv = DOUBLE_HASH(hv, ifs); 4069 4070 READ_ENTER(&ifs->ifs_ipf_state); 4071 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4072 ic = &is->is_icmp; 4073 isp = &is->is_hnext; 4074 if ((is->is_p == pr) && 4075 !(is->is_pass & FR_NOICMPERR) && 4076 (oic->icmp6_id == ic->ici_id) && 4077 (oic->icmp6_seq == ic->ici_seq) && 4078 (is = fr_matchsrcdst(&ofin, is, &src, 4079 &dst, NULL, FI_ICMPCMP))) { 4080 /* 4081 * in the state table ICMP query's are stored 4082 * with the type of the corresponding ICMP 4083 * response. Correct here 4084 */ 4085 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4086 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4087 (ic->ici_type - 1 == oic->icmp6_type )) { 4088 ifs->ifs_ips_stats.iss_hits++; 4089 backward = IP6_NEQ(&is->is_dst, &src); 4090 fin->fin_rev = !backward; 4091 i = (backward << 1) + fin->fin_out; 4092 is->is_icmppkts[i]++; 4093 return is; 4094 } 4095 } 4096 } 4097 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4098 return NULL; 4099 } 4100 4101 hv = (pr = oip6->ip6_nxt); 4102 src.in6 = oip6->ip6_src; 4103 hv += src.i6[0]; 4104 hv += src.i6[1]; 4105 hv += src.i6[2]; 4106 hv += src.i6[3]; 4107 dst.in6 = oip6->ip6_dst; 4108 hv += dst.i6[0]; 4109 hv += dst.i6[1]; 4110 hv += dst.i6[2]; 4111 hv += dst.i6[3]; 4112 4113 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4114 tcp = (tcphdr_t *)(oip6 + 1); 4115 dport = tcp->th_dport; 4116 sport = tcp->th_sport; 4117 hv += dport; 4118 hv += sport; 4119 } else 4120 tcp = NULL; 4121 hv = DOUBLE_HASH(hv, ifs); 4122 4123 READ_ENTER(&ifs->ifs_ipf_state); 4124 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4125 isp = &is->is_hnext; 4126 /* 4127 * Only allow this icmp though if the 4128 * encapsulated packet was allowed through the 4129 * other way around. Note that the minimal amount 4130 * of info present does not allow for checking against 4131 * tcp internals such as seq and ack numbers. 4132 */ 4133 if ((is->is_p != pr) || (is->is_v != 6) || 4134 (is->is_pass & FR_NOICMPERR)) 4135 continue; 4136 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4137 if (is != NULL) { 4138 ifs->ifs_ips_stats.iss_hits++; 4139 backward = IP6_NEQ(&is->is_dst, &src); 4140 fin->fin_rev = !backward; 4141 i = (backward << 1) + fin->fin_out; 4142 is->is_icmppkts[i]++; 4143 /* 4144 * we deliberately do not touch the timeouts 4145 * for the accompanying state table entry. 4146 * It remains to be seen if that is correct. XXX 4147 */ 4148 return is; 4149 } 4150 } 4151 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4152 return NULL; 4153 } 4154 #endif 4155 4156 4157 /* ------------------------------------------------------------------------ */ 4158 /* Function: fr_sttab_init */ 4159 /* Returns: Nil */ 4160 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4161 /* */ 4162 /* Initialise the array of timeout queues for TCP. */ 4163 /* ------------------------------------------------------------------------ */ 4164 void fr_sttab_init(tqp, ifs) 4165 ipftq_t *tqp; 4166 ipf_stack_t *ifs; 4167 { 4168 int i; 4169 4170 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4171 tqp[i].ifq_ttl = 0; 4172 tqp[i].ifq_ref = 1; 4173 tqp[i].ifq_head = NULL; 4174 tqp[i].ifq_tail = &tqp[i].ifq_head; 4175 tqp[i].ifq_next = tqp + i + 1; 4176 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4177 } 4178 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4179 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4180 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4181 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4182 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4183 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4184 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4185 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4186 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4187 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4188 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4189 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4190 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4191 } 4192 4193 4194 /* ------------------------------------------------------------------------ */ 4195 /* Function: fr_sttab_destroy */ 4196 /* Returns: Nil */ 4197 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4198 /* */ 4199 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4200 /* of timeout queues for TCP. */ 4201 /* ------------------------------------------------------------------------ */ 4202 void fr_sttab_destroy(tqp) 4203 ipftq_t *tqp; 4204 { 4205 int i; 4206 4207 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4208 MUTEX_DESTROY(&tqp[i].ifq_lock); 4209 } 4210 4211 4212 /* ------------------------------------------------------------------------ */ 4213 /* Function: fr_statederef */ 4214 /* Returns: Nil */ 4215 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4216 /* ifs - ipf stack instance */ 4217 /* */ 4218 /* Decrement the reference counter for this state table entry and free it */ 4219 /* if there are no more things using it. */ 4220 /* */ 4221 /* Internal parameters: */ 4222 /* state[0] = state of source (host that initiated connection) */ 4223 /* state[1] = state of dest (host that accepted the connection) */ 4224 /* ------------------------------------------------------------------------ */ 4225 void fr_statederef(isp, ifs) 4226 ipstate_t **isp; 4227 ipf_stack_t *ifs; 4228 { 4229 ipstate_t *is; 4230 4231 is = *isp; 4232 *isp = NULL; 4233 4234 MUTEX_ENTER(&is->is_lock); 4235 if (is->is_ref > 1) { 4236 is->is_ref--; 4237 MUTEX_EXIT(&is->is_lock); 4238 #ifndef _KERNEL 4239 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4240 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4241 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4242 } 4243 #endif 4244 return; 4245 } 4246 MUTEX_EXIT(&is->is_lock); 4247 4248 WRITE_ENTER(&ifs->ifs_ipf_state); 4249 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4250 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4251 } 4252 4253 4254 /* ------------------------------------------------------------------------ */ 4255 /* Function: fr_setstatequeue */ 4256 /* Returns: Nil */ 4257 /* Parameters: is(I) - pointer to state structure */ 4258 /* rev(I) - forward(0) or reverse(1) direction */ 4259 /* Locks: ipf_state (read or write) */ 4260 /* */ 4261 /* Put the state entry on its default queue entry, using rev as a helped in */ 4262 /* determining which queue it should be placed on. */ 4263 /* ------------------------------------------------------------------------ */ 4264 void fr_setstatequeue(is, rev, ifs) 4265 ipstate_t *is; 4266 int rev; 4267 ipf_stack_t *ifs; 4268 { 4269 ipftq_t *oifq, *nifq; 4270 4271 4272 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4273 nifq = is->is_tqehead[rev]; 4274 else 4275 nifq = NULL; 4276 4277 if (nifq == NULL) { 4278 switch (is->is_p) 4279 { 4280 #ifdef USE_INET6 4281 case IPPROTO_ICMPV6 : 4282 if (rev == 1) 4283 nifq = &ifs->ifs_ips_icmpacktq; 4284 else 4285 nifq = &ifs->ifs_ips_icmptq; 4286 break; 4287 #endif 4288 case IPPROTO_ICMP : 4289 if (rev == 1) 4290 nifq = &ifs->ifs_ips_icmpacktq; 4291 else 4292 nifq = &ifs->ifs_ips_icmptq; 4293 break; 4294 case IPPROTO_TCP : 4295 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4296 break; 4297 4298 case IPPROTO_UDP : 4299 if (rev == 1) 4300 nifq = &ifs->ifs_ips_udpacktq; 4301 else 4302 nifq = &ifs->ifs_ips_udptq; 4303 break; 4304 4305 default : 4306 nifq = &ifs->ifs_ips_iptq; 4307 break; 4308 } 4309 } 4310 4311 oifq = is->is_sti.tqe_ifq; 4312 /* 4313 * If it's currently on a timeout queue, move it from one queue to 4314 * another, else put it on the end of the newly determined queue. 4315 */ 4316 if (oifq != NULL) 4317 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4318 else 4319 fr_queueappend(&is->is_sti, nifq, is, ifs); 4320 return; 4321 } 4322 4323 4324 /* ------------------------------------------------------------------------ */ 4325 /* Function: fr_stateiter */ 4326 /* Returns: int - 0 == success, else error */ 4327 /* Parameters: token(I) - pointer to ipftoken structure */ 4328 /* itp(I) - pointer to ipfgeniter structure */ 4329 /* */ 4330 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4331 /* walks through the list of entries in the state table list (ips_list.) */ 4332 /* ------------------------------------------------------------------------ */ 4333 static int fr_stateiter(token, itp, ifs) 4334 ipftoken_t *token; 4335 ipfgeniter_t *itp; 4336 ipf_stack_t *ifs; 4337 { 4338 ipstate_t *is, *next, zero; 4339 int error, count; 4340 char *dst; 4341 4342 if (itp->igi_data == NULL) 4343 return EFAULT; 4344 4345 if (itp->igi_nitems == 0) 4346 return EINVAL; 4347 4348 if (itp->igi_type != IPFGENITER_STATE) 4349 return EINVAL; 4350 4351 error = 0; 4352 4353 READ_ENTER(&ifs->ifs_ipf_state); 4354 4355 /* 4356 * Get "previous" entry from the token and find the next entry. 4357 */ 4358 is = token->ipt_data; 4359 if (is == NULL) { 4360 next = ifs->ifs_ips_list; 4361 } else { 4362 next = is->is_next; 4363 } 4364 4365 dst = itp->igi_data; 4366 for (count = itp->igi_nitems; count > 0; count--) { 4367 /* 4368 * If we found an entry, add a reference to it and update the token. 4369 * Otherwise, zero out data to be returned and NULL out token. 4370 */ 4371 if (next != NULL) { 4372 MUTEX_ENTER(&next->is_lock); 4373 next->is_ref++; 4374 MUTEX_EXIT(&next->is_lock); 4375 token->ipt_data = next; 4376 } else { 4377 bzero(&zero, sizeof(zero)); 4378 next = &zero; 4379 token->ipt_data = NULL; 4380 } 4381 4382 /* 4383 * Safe to release lock now the we have a reference. 4384 */ 4385 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4386 4387 /* 4388 * Copy out data and clean up references and tokens. 4389 */ 4390 error = COPYOUT(next, dst, sizeof(*next)); 4391 if (error != 0) 4392 error = EFAULT; 4393 if (token->ipt_data == NULL) { 4394 ipf_freetoken(token, ifs); 4395 break; 4396 } else { 4397 if (is != NULL) 4398 fr_statederef(&is, ifs); 4399 if (next->is_next == NULL) { 4400 ipf_freetoken(token, ifs); 4401 break; 4402 } 4403 } 4404 4405 if ((count == 1) || (error != 0)) 4406 break; 4407 4408 READ_ENTER(&ifs->ifs_ipf_state); 4409 dst += sizeof(*next); 4410 is = next; 4411 next = is->is_next; 4412 } 4413 4414 return error; 4415 } 4416