1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/file.h> 20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 21 defined(_KERNEL) 22 # include "opt_ipfilter_log.h" 23 #endif 24 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 25 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 26 #include "opt_inet6.h" 27 #endif 28 #if !defined(_KERNEL) && !defined(__KERNEL__) 29 # include <stdio.h> 30 # include <stdlib.h> 31 # include <string.h> 32 # define _KERNEL 33 # ifdef __OpenBSD__ 34 struct file; 35 # endif 36 # include <sys/uio.h> 37 # undef _KERNEL 38 #endif 39 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 40 # include <sys/filio.h> 41 # include <sys/fcntl.h> 42 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 43 # include "opt_ipfilter.h" 44 # endif 45 #else 46 # include <sys/ioctl.h> 47 #endif 48 #include <sys/time.h> 49 #if !defined(linux) 50 # include <sys/protosw.h> 51 #endif 52 #include <sys/socket.h> 53 #if defined(_KERNEL) 54 # include <sys/systm.h> 55 # if !defined(__SVR4) && !defined(__svr4__) 56 # include <sys/mbuf.h> 57 # endif 58 #endif 59 #if defined(__SVR4) || defined(__svr4__) 60 # include <sys/filio.h> 61 # include <sys/byteorder.h> 62 # ifdef _KERNEL 63 # include <sys/dditypes.h> 64 # endif 65 # include <sys/stream.h> 66 # include <sys/kmem.h> 67 #endif 68 69 #include <net/if.h> 70 #ifdef sun 71 # include <net/af.h> 72 #endif 73 #include <net/route.h> 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/tcp.h> 78 #if !defined(linux) 79 # include <netinet/ip_var.h> 80 #endif 81 #if !defined(__hpux) && !defined(linux) 82 # include <netinet/tcp_fsm.h> 83 #endif 84 #include <netinet/udp.h> 85 #include <netinet/ip_icmp.h> 86 #include "netinet/ip_compat.h" 87 #include <netinet/tcpip.h> 88 #include "netinet/ip_fil.h" 89 #include "netinet/ip_nat.h" 90 #include "netinet/ip_frag.h" 91 #include "netinet/ip_state.h" 92 #include "netinet/ip_proxy.h" 93 #include "netinet/ipf_stack.h" 94 #ifdef IPFILTER_SYNC 95 #include "netinet/ip_sync.h" 96 #endif 97 #ifdef IPFILTER_SCAN 98 #include "netinet/ip_scan.h" 99 #endif 100 #ifdef USE_INET6 101 #include <netinet/icmp6.h> 102 #endif 103 #if (__FreeBSD_version >= 300000) 104 # include <sys/malloc.h> 105 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 106 # include <sys/libkern.h> 107 # include <sys/systm.h> 108 # endif 109 #endif 110 /* END OF INCLUDES */ 111 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 116 #endif 117 118 #ifdef USE_INET6 119 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 120 #endif 121 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 122 i6addr_t *, tcphdr_t *, u_32_t)); 123 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 124 static int fr_state_flush __P((int, int, ipf_stack_t *)); 125 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 126 static void fr_delstate __P((ipstate_t *, int, ipf_stack_t *)); 127 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 128 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 129 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 131 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 132 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 134 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 135 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 136 137 int fr_stputent __P((caddr_t, ipf_stack_t *)); 138 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 139 140 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 141 #define FIVE_DAYS (5 * ONE_DAY) 142 #define DOUBLE_HASH(x, ifs) \ 143 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 144 145 146 147 /* ------------------------------------------------------------------------ */ 148 /* Function: fr_stateinit */ 149 /* Returns: int - 0 == success, -1 == failure */ 150 /* Parameters: Nil */ 151 /* */ 152 /* Initialise all the global variables used within the state code. */ 153 /* This action also includes initiailising locks. */ 154 /* ------------------------------------------------------------------------ */ 155 int fr_stateinit(ifs) 156 ipf_stack_t *ifs; 157 { 158 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 159 struct timeval tv; 160 #endif 161 int i; 162 163 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 164 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 165 if (ifs->ifs_ips_table == NULL) 166 return -1; 167 bzero((char *)ifs->ifs_ips_table, 168 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 169 170 KMALLOCS(ifs->ifs_ips_seed, u_long *, 171 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 172 if (ifs->ifs_ips_seed == NULL) 173 return -2; 174 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 175 tv.tv_sec = 0; 176 GETKTIME(&tv); 177 #endif 178 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 179 /* 180 * XXX - ips_seed[X] should be a random number of sorts. 181 */ 182 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 183 ifs->ifs_ips_seed[i] = ipf_random(); 184 #else 185 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 186 ifs->ifs_fr_statesize; 187 ifs->ifs_ips_seed[i] += tv.tv_sec; 188 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 189 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 190 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 191 #endif 192 } 193 194 /* fill icmp reply type table */ 195 for (i = 0; i <= ICMP_MAXTYPE; i++) 196 icmpreplytype4[i] = -1; 197 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 198 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 199 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 200 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 201 #ifdef USE_INET6 202 /* fill icmp reply type table */ 203 for (i = 0; i <= ICMP6_MAXTYPE; i++) 204 icmpreplytype6[i] = -1; 205 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 206 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 207 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 208 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 209 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 210 #endif 211 212 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 213 ifs->ifs_fr_statesize * sizeof(u_long)); 214 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 215 return -1; 216 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 217 ifs->ifs_fr_statesize * sizeof(u_long)); 218 219 if (ifs->ifs_fr_state_maxbucket == 0) { 220 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 221 ifs->ifs_fr_state_maxbucket++; 222 ifs->ifs_fr_state_maxbucket *= 2; 223 } 224 225 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 226 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 227 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 228 ifs->ifs_ips_udptq.ifq_ref = 1; 229 ifs->ifs_ips_udptq.ifq_head = NULL; 230 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 231 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 232 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 233 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 234 ifs->ifs_ips_udpacktq.ifq_ref = 1; 235 ifs->ifs_ips_udpacktq.ifq_head = NULL; 236 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 237 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 238 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 239 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 240 ifs->ifs_ips_icmptq.ifq_ref = 1; 241 ifs->ifs_ips_icmptq.ifq_head = NULL; 242 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 243 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 244 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 245 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 246 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 247 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 248 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 249 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 250 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 251 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 252 ifs->ifs_ips_iptq.ifq_ref = 1; 253 ifs->ifs_ips_iptq.ifq_head = NULL; 254 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 255 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 256 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 257 /* entry's ttl in deletetq is just 1 tick */ 258 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 259 ifs->ifs_ips_deletetq.ifq_ref = 1; 260 ifs->ifs_ips_deletetq.ifq_head = NULL; 261 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 262 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 263 ifs->ifs_ips_deletetq.ifq_next = NULL; 264 265 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 266 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 267 ifs->ifs_fr_state_init = 1; 268 269 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 270 return 0; 271 } 272 273 274 /* ------------------------------------------------------------------------ */ 275 /* Function: fr_stateunload */ 276 /* Returns: Nil */ 277 /* Parameters: Nil */ 278 /* */ 279 /* Release and destroy any resources acquired or initialised so that */ 280 /* IPFilter can be unloaded or re-initialised. */ 281 /* ------------------------------------------------------------------------ */ 282 void fr_stateunload(ifs) 283 ipf_stack_t *ifs; 284 { 285 ipftq_t *ifq, *ifqnext; 286 ipstate_t *is; 287 288 while ((is = ifs->ifs_ips_list) != NULL) 289 fr_delstate(is, 0, ifs); 290 291 /* 292 * Proxy timeout queues are not cleaned here because although they 293 * exist on the state list, appr_unload is called after fr_stateunload 294 * and the proxies actually are responsible for them being created. 295 * Should the proxy timeouts have their own list? There's no real 296 * justification as this is the only complicationA 297 */ 298 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 299 ifqnext = ifq->ifq_next; 300 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 301 (fr_deletetimeoutqueue(ifq) == 0)) 302 fr_freetimeoutqueue(ifq, ifs); 303 } 304 305 ifs->ifs_ips_stats.iss_inuse = 0; 306 ifs->ifs_ips_num = 0; 307 308 if (ifs->ifs_fr_state_init == 1) { 309 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 310 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 314 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 315 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 316 } 317 318 if (ifs->ifs_ips_table != NULL) { 319 KFREES(ifs->ifs_ips_table, 320 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 321 ifs->ifs_ips_table = NULL; 322 } 323 324 if (ifs->ifs_ips_seed != NULL) { 325 KFREES(ifs->ifs_ips_seed, 326 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 327 ifs->ifs_ips_seed = NULL; 328 } 329 330 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 331 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 332 ifs->ifs_fr_statesize * sizeof(u_long)); 333 ifs->ifs_ips_stats.iss_bucketlen = NULL; 334 } 335 336 if (ifs->ifs_fr_state_maxbucket_reset == 1) 337 ifs->ifs_fr_state_maxbucket = 0; 338 339 if (ifs->ifs_fr_state_init == 1) { 340 ifs->ifs_fr_state_init = 0; 341 RW_DESTROY(&ifs->ifs_ipf_state); 342 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 343 } 344 } 345 346 347 /* ------------------------------------------------------------------------ */ 348 /* Function: fr_statetstats */ 349 /* Returns: ips_state_t* - pointer to state stats structure */ 350 /* Parameters: Nil */ 351 /* */ 352 /* Put all the current numbers and pointers into a single struct and return */ 353 /* a pointer to it. */ 354 /* ------------------------------------------------------------------------ */ 355 static ips_stat_t *fr_statetstats(ifs) 356 ipf_stack_t *ifs; 357 { 358 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 359 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 360 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 361 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 362 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 363 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 364 return &ifs->ifs_ips_stats; 365 } 366 367 /* ------------------------------------------------------------------------ */ 368 /* Function: fr_state_remove */ 369 /* Returns: int - 0 == success, != 0 == failure */ 370 /* Parameters: data(I) - pointer to state structure to delete from table */ 371 /* */ 372 /* Search for a state structure that matches the one passed, according to */ 373 /* the IP addresses and other protocol specific information. */ 374 /* ------------------------------------------------------------------------ */ 375 static int fr_state_remove(data, ifs) 376 caddr_t data; 377 ipf_stack_t *ifs; 378 { 379 ipstate_t *sp, st; 380 int error; 381 382 sp = &st; 383 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 384 if (error) 385 return EFAULT; 386 387 WRITE_ENTER(&ifs->ifs_ipf_state); 388 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 389 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 390 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 391 sizeof(st.is_src)) && 392 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 393 sizeof(st.is_dst)) && 394 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 395 sizeof(st.is_ps))) { 396 fr_delstate(sp, ISL_REMOVE, ifs); 397 RWLOCK_EXIT(&ifs->ifs_ipf_state); 398 return 0; 399 } 400 RWLOCK_EXIT(&ifs->ifs_ipf_state); 401 return ESRCH; 402 } 403 404 405 /* ------------------------------------------------------------------------ */ 406 /* Function: fr_state_ioctl */ 407 /* Returns: int - 0 == success, != 0 == failure */ 408 /* Parameters: data(I) - pointer to ioctl data */ 409 /* cmd(I) - ioctl command integer */ 410 /* mode(I) - file mode bits used with open */ 411 /* */ 412 /* Processes an ioctl call made to operate on the IP Filter state device. */ 413 /* ------------------------------------------------------------------------ */ 414 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 415 caddr_t data; 416 ioctlcmd_t cmd; 417 int mode, uid; 418 void *ctx; 419 ipf_stack_t *ifs; 420 { 421 int arg, ret, error = 0; 422 423 switch (cmd) 424 { 425 /* 426 * Delete an entry from the state table. 427 */ 428 case SIOCDELST : 429 error = fr_state_remove(data, ifs); 430 break; 431 /* 432 * Flush the state table 433 */ 434 case SIOCIPFFL : 435 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 436 if (error != 0) { 437 error = EFAULT; 438 } else { 439 if (arg == 0 || arg == 1) { 440 WRITE_ENTER(&ifs->ifs_ipf_state); 441 ret = fr_state_flush(arg, 4, ifs); 442 RWLOCK_EXIT(&ifs->ifs_ipf_state); 443 error = BCOPYOUT((char *)&ret, data, 444 sizeof(ret)); 445 if (error != 0) 446 return EFAULT; 447 } else { 448 error = EINVAL; 449 } 450 } 451 break; 452 453 #ifdef USE_INET6 454 case SIOCIPFL6 : 455 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 456 if (error != 0) { 457 error = EFAULT; 458 } else { 459 if (arg == 0 || arg == 1) { 460 WRITE_ENTER(&ifs->ifs_ipf_state); 461 ret = fr_state_flush(arg, 6, ifs); 462 RWLOCK_EXIT(&ifs->ifs_ipf_state); 463 error = BCOPYOUT((char *)&ret, data, 464 sizeof(ret)); 465 if (error != 0) 466 return EFAULT; 467 } else { 468 error = EINVAL; 469 } 470 } 471 break; 472 #endif 473 #ifdef IPFILTER_LOG 474 /* 475 * Flush the state log. 476 */ 477 case SIOCIPFFB : 478 if (!(mode & FWRITE)) 479 error = EPERM; 480 else { 481 int tmp; 482 483 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 484 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 485 if (error != 0) 486 error = EFAULT; 487 } 488 break; 489 /* 490 * Turn logging of state information on/off. 491 */ 492 case SIOCSETLG : 493 if (!(mode & FWRITE)) { 494 error = EPERM; 495 } else { 496 error = BCOPYIN((char *)data, 497 (char *)&ifs->ifs_ipstate_logging, 498 sizeof(ifs->ifs_ipstate_logging)); 499 if (error != 0) 500 error = EFAULT; 501 } 502 break; 503 /* 504 * Return the current state of logging. 505 */ 506 case SIOCGETLG : 507 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 508 (char *)data, 509 sizeof(ifs->ifs_ipstate_logging)); 510 if (error != 0) 511 error = EFAULT; 512 break; 513 /* 514 * Return the number of bytes currently waiting to be read. 515 */ 516 case FIONREAD : 517 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 518 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 519 if (error != 0) 520 error = EFAULT; 521 break; 522 #endif 523 /* 524 * Get the current state statistics. 525 */ 526 case SIOCGETFS : 527 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 528 break; 529 /* 530 * Lock/Unlock the state table. (Locking prevents any changes, which 531 * means no packets match). 532 */ 533 case SIOCSTLCK : 534 if (!(mode & FWRITE)) { 535 error = EPERM; 536 } else { 537 error = fr_lock(data, &ifs->ifs_fr_state_lock); 538 } 539 break; 540 /* 541 * Add an entry to the current state table. 542 */ 543 case SIOCSTPUT : 544 if (!ifs->ifs_fr_state_lock || !(mode &FWRITE)) { 545 error = EACCES; 546 break; 547 } 548 error = fr_stputent(data, ifs); 549 break; 550 /* 551 * Get a state table entry. 552 */ 553 case SIOCSTGET : 554 if (!ifs->ifs_fr_state_lock) { 555 error = EACCES; 556 break; 557 } 558 error = fr_stgetent(data, ifs); 559 break; 560 561 case SIOCGENITER : 562 { 563 ipftoken_t *token; 564 ipfgeniter_t iter; 565 566 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 567 if (error != 0) 568 break; 569 570 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 571 if (token != NULL) 572 error = fr_stateiter(token, &iter, ifs); 573 else 574 error = ESRCH; 575 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 576 break; 577 } 578 579 case SIOCIPFDELTOK : 580 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 581 if (error != 0) { 582 error = EFAULT; 583 } else { 584 error = ipf_deltoken(arg, uid, ctx, ifs); 585 } 586 break; 587 588 default : 589 error = EINVAL; 590 break; 591 } 592 return error; 593 } 594 595 596 /* ------------------------------------------------------------------------ */ 597 /* Function: fr_stgetent */ 598 /* Returns: int - 0 == success, != 0 == failure */ 599 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 600 /* */ 601 /* Copy out state information from the kernel to a user space process. If */ 602 /* there is a filter rule associated with the state entry, copy that out */ 603 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 604 /* the struct passed in and if not null and not found in the list of current*/ 605 /* state entries, the retrieval fails. */ 606 /* ------------------------------------------------------------------------ */ 607 int fr_stgetent(data, ifs) 608 caddr_t data; 609 ipf_stack_t *ifs; 610 { 611 ipstate_t *is, *isn; 612 ipstate_save_t ips; 613 int error; 614 615 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 616 if (error) 617 return EFAULT; 618 619 isn = ips.ips_next; 620 if (isn == NULL) { 621 isn = ifs->ifs_ips_list; 622 if (isn == NULL) { 623 if (ips.ips_next == NULL) 624 return ENOENT; 625 return 0; 626 } 627 } else { 628 /* 629 * Make sure the pointer we're copying from exists in the 630 * current list of entries. Security precaution to prevent 631 * copying of random kernel data. 632 */ 633 for (is = ifs->ifs_ips_list; is; is = is->is_next) 634 if (is == isn) 635 break; 636 if (!is) 637 return ESRCH; 638 } 639 ips.ips_next = isn->is_next; 640 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 641 ips.ips_rule = isn->is_rule; 642 if (isn->is_rule != NULL) 643 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 644 sizeof(ips.ips_fr)); 645 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 646 if (error) 647 return EFAULT; 648 return 0; 649 } 650 651 652 /* ------------------------------------------------------------------------ */ 653 /* Function: fr_stputent */ 654 /* Returns: int - 0 == success, != 0 == failure */ 655 /* Parameters: data(I) - pointer to state information struct */ 656 /* */ 657 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 658 /* the state table. If the state info. includes a pointer to a filter rule */ 659 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 660 /* output. */ 661 /* ------------------------------------------------------------------------ */ 662 int fr_stputent(data, ifs) 663 caddr_t data; 664 ipf_stack_t *ifs; 665 { 666 ipstate_t *is, *isn; 667 ipstate_save_t ips; 668 int error, i; 669 frentry_t *fr; 670 char *name; 671 672 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 673 if (error) 674 return EFAULT; 675 676 KMALLOC(isn, ipstate_t *); 677 if (isn == NULL) 678 return ENOMEM; 679 680 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 681 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 682 isn->is_sti.tqe_pnext = NULL; 683 isn->is_sti.tqe_next = NULL; 684 isn->is_sti.tqe_ifq = NULL; 685 isn->is_sti.tqe_parent = isn; 686 isn->is_ifp[0] = NULL; 687 isn->is_ifp[1] = NULL; 688 isn->is_ifp[2] = NULL; 689 isn->is_ifp[3] = NULL; 690 isn->is_sync = NULL; 691 fr = ips.ips_rule; 692 693 if (fr == NULL) { 694 READ_ENTER(&ifs->ifs_ipf_state); 695 fr_stinsert(isn, 0, ifs); 696 MUTEX_EXIT(&isn->is_lock); 697 RWLOCK_EXIT(&ifs->ifs_ipf_state); 698 return 0; 699 } 700 701 if (isn->is_flags & SI_NEWFR) { 702 KMALLOC(fr, frentry_t *); 703 if (fr == NULL) { 704 KFREE(isn); 705 return ENOMEM; 706 } 707 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 708 isn->is_rule = fr; 709 ips.ips_is.is_rule = fr; 710 MUTEX_NUKE(&fr->fr_lock); 711 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 712 713 /* 714 * Look up all the interface names in the rule. 715 */ 716 for (i = 0; i < 4; i++) { 717 name = fr->fr_ifnames[i]; 718 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 719 name = isn->is_ifname[i]; 720 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 721 } 722 723 fr->fr_ref = 0; 724 fr->fr_dsize = 0; 725 fr->fr_data = NULL; 726 fr->fr_type = FR_T_NONE; 727 728 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 729 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 730 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 731 732 /* 733 * send a copy back to userland of what we ended up 734 * to allow for verification. 735 */ 736 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 737 if (error) { 738 KFREE(isn); 739 MUTEX_DESTROY(&fr->fr_lock); 740 KFREE(fr); 741 return EFAULT; 742 } 743 READ_ENTER(&ifs->ifs_ipf_state); 744 fr_stinsert(isn, 0, ifs); 745 MUTEX_EXIT(&isn->is_lock); 746 RWLOCK_EXIT(&ifs->ifs_ipf_state); 747 748 } else { 749 READ_ENTER(&ifs->ifs_ipf_state); 750 for (is = ifs->ifs_ips_list; is; is = is->is_next) 751 if (is->is_rule == fr) { 752 fr_stinsert(isn, 0, ifs); 753 MUTEX_EXIT(&isn->is_lock); 754 break; 755 } 756 757 if (is == NULL) { 758 KFREE(isn); 759 isn = NULL; 760 } 761 RWLOCK_EXIT(&ifs->ifs_ipf_state); 762 763 return (isn == NULL) ? ESRCH : 0; 764 } 765 766 return 0; 767 } 768 769 770 /* ------------------------------------------------------------------------ */ 771 /* Function: fr_stinsert */ 772 /* Returns: Nil */ 773 /* Parameters: is(I) - pointer to state structure */ 774 /* rev(I) - flag indicating forward/reverse direction of packet */ 775 /* */ 776 /* Inserts a state structure into the hash table (for lookups) and the list */ 777 /* of state entries (for enumeration). Resolves all of the interface names */ 778 /* to pointers and adjusts running stats for the hash table as appropriate. */ 779 /* */ 780 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 781 /* Exits with is_lock initialised and held. */ 782 /* ------------------------------------------------------------------------ */ 783 void fr_stinsert(is, rev, ifs) 784 ipstate_t *is; 785 int rev; 786 ipf_stack_t *ifs; 787 { 788 frentry_t *fr; 789 u_int hv; 790 int i; 791 792 MUTEX_INIT(&is->is_lock, "ipf state entry"); 793 794 fr = is->is_rule; 795 if (fr != NULL) { 796 MUTEX_ENTER(&fr->fr_lock); 797 fr->fr_ref++; 798 fr->fr_statecnt++; 799 MUTEX_EXIT(&fr->fr_lock); 800 } 801 802 /* 803 * Look up all the interface names in the state entry. 804 */ 805 for (i = 0; i < 4; i++) { 806 if (is->is_ifp[i] != NULL) 807 continue; 808 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 809 } 810 811 /* 812 * If we could trust is_hv, then the modulous would not be needed, but 813 * when running with IPFILTER_SYNC, this stops bad values. 814 */ 815 hv = is->is_hv % ifs->ifs_fr_statesize; 816 is->is_hv = hv; 817 818 /* 819 * We need to get both of these locks...the first because it is 820 * possible that once the insert is complete another packet might 821 * come along, match the entry and want to update it. 822 */ 823 MUTEX_ENTER(&is->is_lock); 824 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 825 826 /* 827 * add into list table. 828 */ 829 if (ifs->ifs_ips_list != NULL) 830 ifs->ifs_ips_list->is_pnext = &is->is_next; 831 is->is_pnext = &ifs->ifs_ips_list; 832 is->is_next = ifs->ifs_ips_list; 833 ifs->ifs_ips_list = is; 834 835 if (ifs->ifs_ips_table[hv] != NULL) 836 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 837 else 838 ifs->ifs_ips_stats.iss_inuse++; 839 is->is_phnext = ifs->ifs_ips_table + hv; 840 is->is_hnext = ifs->ifs_ips_table[hv]; 841 ifs->ifs_ips_table[hv] = is; 842 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 843 ifs->ifs_ips_num++; 844 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 845 846 fr_setstatequeue(is, rev, ifs); 847 } 848 849 /* ------------------------------------------------------------------------ */ 850 /* Function: fr_match_ipv4addrs */ 851 /* Returns: int - 2 strong match (same addresses, same direction) */ 852 /* 1 weak match (same address, opposite direction) */ 853 /* 0 no match */ 854 /* */ 855 /* Function matches IPv4 addresses. */ 856 /* ------------------------------------------------------------------------ */ 857 static int fr_match_ipv4addrs(is1, is2) 858 ipstate_t *is1; 859 ipstate_t *is2; 860 { 861 int rv; 862 863 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 864 rv = 2; 865 else if (is1->is_saddr == is2->is_daddr && 866 is1->is_daddr == is2->is_saddr) 867 rv = 1; 868 else 869 rv = 0; 870 871 return (rv); 872 } 873 874 /* ------------------------------------------------------------------------ */ 875 /* Function: fr_match_ipv6addrs */ 876 /* Returns: int - 2 strong match (same addresses, same direction) */ 877 /* 1 weak match (same addresses, opposite direction) */ 878 /* 0 no match */ 879 /* */ 880 /* Function matches IPv6 addresses. */ 881 /* ------------------------------------------------------------------------ */ 882 static int fr_match_ipv6addrs(is1, is2) 883 ipstate_t *is1; 884 ipstate_t *is2; 885 { 886 int rv; 887 888 if (IP6_EQ(&is1->is_src, &is2->is_src) && 889 IP6_EQ(&is1->is_dst, &is2->is_dst)) 890 rv = 2; 891 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 892 IP6_EQ(&is1->is_dst, &is2->is_src)) { 893 rv = 1; 894 } 895 else 896 rv = 0; 897 898 return (rv); 899 } 900 /* ------------------------------------------------------------------------ */ 901 /* Function: fr_match_addresses */ 902 /* Returns: int - 2 strong match (same addresses, same direction) */ 903 /* 1 weak match (same address, opposite directions) */ 904 /* 0 no match */ 905 /* Parameters: is1, is2 pointers to states we are checking */ 906 /* */ 907 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 908 /* and IPv6 address format. */ 909 /* ------------------------------------------------------------------------ */ 910 static int fr_match_addresses(is1, is2) 911 ipstate_t *is1; 912 ipstate_t *is2; 913 { 914 int rv; 915 916 if (is1->is_v == 4) { 917 rv = fr_match_ipv4addrs(is1, is2); 918 } else { 919 rv = fr_match_ipv6addrs(is1, is2); 920 } 921 922 return (rv); 923 } 924 925 /* ------------------------------------------------------------------------ */ 926 /* Function: fr_match_ppairs */ 927 /* Returns: int - 2 strong match (same ports, same direction) */ 928 /* 1 weak match (same ports, different direction) */ 929 /* 0 no match */ 930 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 931 /* */ 932 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 933 /* src, dst port, which belong to session (state entry). */ 934 /* ------------------------------------------------------------------------ */ 935 static int fr_match_ppairs(ppairs1, ppairs2) 936 port_pair_t *ppairs1; 937 port_pair_t *ppairs2; 938 { 939 int rv; 940 941 if (ppairs1->pp_sport == ppairs2->pp_sport && 942 ppairs1->pp_dport == ppairs2->pp_dport) 943 rv = 2; 944 else if (ppairs1->pp_sport == ppairs2->pp_dport && 945 ppairs1->pp_dport == ppairs2->pp_sport) 946 rv = 1; 947 else 948 rv = 0; 949 950 return (rv); 951 } 952 953 /* ------------------------------------------------------------------------ */ 954 /* Function: fr_match_l4_hdr */ 955 /* Returns: int - 0 no match, */ 956 /* 1 weak match (same ports, different directions) */ 957 /* 2 strong match (same ports, same direction) */ 958 /* Parameters is1, is2 - states we want to match */ 959 /* */ 960 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 961 /* GRE protocol). */ 962 /* ------------------------------------------------------------------------ */ 963 static int fr_match_l4_hdr(is1, is2) 964 ipstate_t *is1; 965 ipstate_t *is2; 966 { 967 int rv = 0; 968 port_pair_t pp1; 969 port_pair_t pp2; 970 971 if (is1->is_p != is2->is_p) 972 return (0); 973 974 switch (is1->is_p) { 975 case IPPROTO_TCP: 976 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 977 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 978 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 979 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 980 rv = fr_match_ppairs(&pp1, &pp2); 981 break; 982 case IPPROTO_UDP: 983 pp1.pp_sport = is1->is_ps.is_us.us_sport; 984 pp1.pp_dport = is1->is_ps.is_us.us_dport; 985 pp2.pp_sport = is2->is_ps.is_us.us_sport; 986 pp2.pp_dport = is2->is_ps.is_us.us_dport; 987 rv = fr_match_ppairs(&pp1, &pp2); 988 break; 989 case IPPROTO_GRE: 990 /* greinfo_t can be also interprted as port pair */ 991 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 992 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 993 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 994 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 995 rv = fr_match_ppairs(&pp1, &pp2); 996 break; 997 case IPPROTO_ICMP: 998 case IPPROTO_ICMPV6: 999 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1000 rv = 1; 1001 else 1002 rv = 0; 1003 break; 1004 default: 1005 rv = 0; 1006 } 1007 1008 return (rv); 1009 } 1010 1011 /* ------------------------------------------------------------------------ */ 1012 /* Function: fr_matchstates */ 1013 /* Returns: int - nonzero match, zero no match */ 1014 /* Parameters is1, is2 - states we want to match */ 1015 /* */ 1016 /* The state entries are equal (identical match) if they belong to the same */ 1017 /* session. Any time new state entry is being added the fr_addstate() */ 1018 /* function creates temporal state entry from the data it gets from IP and */ 1019 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1020 /* which is also stored within the state entry. We should keep in mind the */ 1021 /* information about packet direction is spread accross L3 (addresses) and */ 1022 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1023 /* - no match (match(is1, is2) == 0)) */ 1024 /* - weak match same addresses (ports), but different */ 1025 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1026 /* - strong match same addresses (ports) and same directions */ 1027 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1028 /* */ 1029 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1030 /* and functions, which are used to compare ports (L4 header) data. We say */ 1031 /* the is1 and is2 are same (identical) if there is a match */ 1032 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1033 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1034 /* Such requirement deals with case as follows: */ 1035 /* suppose there are two connections between hosts A, B. Connection 1: */ 1036 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1037 /* Connection 2: */ 1038 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1039 /* since we've introduced match levels into our fr_matchstates(), we are */ 1040 /* able to identify, which packets belong to connection A and which belong */ 1041 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1042 /* from con. 1 packet, which travelled from A to B: */ 1043 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1044 /* while s2, has been created from packet which belongs to con. 2 and is */ 1045 /* also coming from A to B: */ 1046 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1047 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1048 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1049 /* different the state entries are not identical -> no match as a final */ 1050 /* result. */ 1051 /* ------------------------------------------------------------------------ */ 1052 static int fr_matchstates(is1, is2) 1053 ipstate_t *is1; 1054 ipstate_t *is2; 1055 { 1056 int rv; 1057 int amatch; 1058 int pmatch; 1059 1060 if (bcmp(&is1->is_pass, &is2->is_pass, 1061 offsetof(struct ipstate, is_ps) - 1062 offsetof(struct ipstate, is_pass)) == 0) { 1063 1064 pmatch = fr_match_l4_hdr(is1, is2); 1065 amatch = fr_match_addresses(is1, is2); 1066 /* 1067 * If addresses match (amatch != 0), then 'match levels' 1068 * must be same for matching entries. If amatch and pmatch 1069 * have different values (different match levels), then 1070 * is1 and is2 belong to different sessions. 1071 */ 1072 rv = (amatch != 0) && (amatch == pmatch); 1073 } 1074 else 1075 rv = 0; 1076 1077 return (rv); 1078 } 1079 1080 /* ------------------------------------------------------------------------ */ 1081 /* Function: fr_addstate */ 1082 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1083 /* Parameters: fin(I) - pointer to packet information */ 1084 /* stsave(O) - pointer to place to save pointer to created */ 1085 /* state structure. */ 1086 /* flags(I) - flags to use when creating the structure */ 1087 /* */ 1088 /* Creates a new IP state structure from the packet information collected. */ 1089 /* Inserts it into the state table and appends to the bottom of the active */ 1090 /* list. If the capacity of the table has reached the maximum allowed then */ 1091 /* the call will fail and a flush is scheduled for the next timeout call. */ 1092 /* ------------------------------------------------------------------------ */ 1093 ipstate_t *fr_addstate(fin, stsave, flags) 1094 fr_info_t *fin; 1095 ipstate_t **stsave; 1096 u_int flags; 1097 { 1098 ipstate_t *is, ips; 1099 struct icmp *ic; 1100 u_int pass, hv; 1101 frentry_t *fr; 1102 tcphdr_t *tcp; 1103 grehdr_t *gre; 1104 void *ifp; 1105 int out; 1106 ipf_stack_t *ifs = fin->fin_ifs; 1107 1108 if (ifs->ifs_fr_state_lock || 1109 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1110 return NULL; 1111 1112 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1113 return NULL; 1114 1115 /* 1116 * If a "keep state" rule has reached the maximum number of references 1117 * to it, then schedule an automatic flush in case we can clear out 1118 * some "dead old wood". Note that because the lock isn't held on 1119 * fr it is possible that we could overflow. The cost of overflowing 1120 * is being ignored here as the number by which it can overflow is 1121 * a product of the number of simultaneous threads that could be 1122 * executing in here, so a limit of 100 won't result in 200, but could 1123 * result in 101 or 102. 1124 */ 1125 fr = fin->fin_fr; 1126 if (fr != NULL) { 1127 if ((ifs->ifs_ips_num == ifs->ifs_fr_statemax) && (fr->fr_statemax == 0)) { 1128 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1129 ifs->ifs_fr_state_doflush = 1; 1130 return NULL; 1131 } 1132 if ((fr->fr_statemax != 0) && 1133 (fr->fr_statecnt >= fr->fr_statemax)) { 1134 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1135 ifs->ifs_fr_state_doflush = 1; 1136 return NULL; 1137 } 1138 } 1139 1140 ic = NULL; 1141 tcp = NULL; 1142 out = fin->fin_out; 1143 is = &ips; 1144 bzero((char *)is, sizeof(*is)); 1145 1146 if (fr == NULL) { 1147 pass = ifs->ifs_fr_flags; 1148 is->is_tag = FR_NOLOGTAG; 1149 } else { 1150 pass = fr->fr_flags; 1151 } 1152 1153 is->is_die = 1 + ifs->ifs_fr_ticks; 1154 /* 1155 * We want to check everything that is a property of this packet, 1156 * but we don't (automatically) care about it's fragment status as 1157 * this may change. 1158 */ 1159 is->is_pass = pass; 1160 is->is_v = fin->fin_v; 1161 is->is_opt[0] = fin->fin_optmsk; 1162 is->is_optmsk[0] = 0xffffffff; 1163 is->is_optmsk[1] = 0xffffffff; 1164 if (is->is_v == 6) { 1165 is->is_opt[0] &= ~0x8; 1166 is->is_optmsk[0] &= ~0x8; 1167 is->is_optmsk[1] &= ~0x8; 1168 } 1169 is->is_sec = fin->fin_secmsk; 1170 is->is_secmsk = 0xffff; 1171 is->is_auth = fin->fin_auth; 1172 is->is_authmsk = 0xffff; 1173 1174 /* 1175 * Copy and calculate... 1176 */ 1177 hv = (is->is_p = fin->fin_fi.fi_p); 1178 is->is_src = fin->fin_fi.fi_src; 1179 hv += is->is_saddr; 1180 is->is_dst = fin->fin_fi.fi_dst; 1181 hv += is->is_daddr; 1182 #ifdef USE_INET6 1183 if (fin->fin_v == 6) { 1184 /* 1185 * For ICMPv6, we check to see if the destination address is 1186 * a multicast address. If it is, do not include it in the 1187 * calculation of the hash because the correct reply will come 1188 * back from a real address, not a multicast address. 1189 */ 1190 if ((is->is_p == IPPROTO_ICMPV6) && 1191 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1192 /* 1193 * So you can do keep state with neighbour discovery. 1194 * 1195 * Here we could use the address from the neighbour 1196 * solicit message to put in the state structure and 1197 * we could use that without a wildcard flag too... 1198 */ 1199 is->is_flags |= SI_W_DADDR; 1200 hv -= is->is_daddr; 1201 } else { 1202 hv += is->is_dst.i6[1]; 1203 hv += is->is_dst.i6[2]; 1204 hv += is->is_dst.i6[3]; 1205 } 1206 hv += is->is_src.i6[1]; 1207 hv += is->is_src.i6[2]; 1208 hv += is->is_src.i6[3]; 1209 } 1210 #endif 1211 if ((fin->fin_v == 4) && 1212 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1213 if (fin->fin_out == 0) { 1214 flags |= SI_W_DADDR|SI_CLONE; 1215 hv -= is->is_daddr; 1216 } else { 1217 flags |= SI_W_SADDR|SI_CLONE; 1218 hv -= is->is_saddr; 1219 } 1220 } 1221 1222 switch (is->is_p) 1223 { 1224 #ifdef USE_INET6 1225 case IPPROTO_ICMPV6 : 1226 ic = fin->fin_dp; 1227 1228 switch (ic->icmp_type) 1229 { 1230 case ICMP6_ECHO_REQUEST : 1231 is->is_icmp.ici_type = ic->icmp_type; 1232 hv += (is->is_icmp.ici_id = ic->icmp_id); 1233 break; 1234 case ICMP6_MEMBERSHIP_QUERY : 1235 case ND_ROUTER_SOLICIT : 1236 case ND_NEIGHBOR_SOLICIT : 1237 case ICMP6_NI_QUERY : 1238 is->is_icmp.ici_type = ic->icmp_type; 1239 break; 1240 default : 1241 return NULL; 1242 } 1243 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1244 break; 1245 #endif 1246 case IPPROTO_ICMP : 1247 ic = fin->fin_dp; 1248 1249 switch (ic->icmp_type) 1250 { 1251 case ICMP_ECHO : 1252 case ICMP_TSTAMP : 1253 case ICMP_IREQ : 1254 case ICMP_MASKREQ : 1255 is->is_icmp.ici_type = ic->icmp_type; 1256 hv += (is->is_icmp.ici_id = ic->icmp_id); 1257 break; 1258 default : 1259 return NULL; 1260 } 1261 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1262 break; 1263 1264 case IPPROTO_GRE : 1265 gre = fin->fin_dp; 1266 1267 is->is_gre.gs_flags = gre->gr_flags; 1268 is->is_gre.gs_ptype = gre->gr_ptype; 1269 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1270 is->is_call[0] = fin->fin_data[0]; 1271 is->is_call[1] = fin->fin_data[1]; 1272 } 1273 break; 1274 1275 case IPPROTO_TCP : 1276 tcp = fin->fin_dp; 1277 1278 if (tcp->th_flags & TH_RST) 1279 return NULL; 1280 /* 1281 * The endian of the ports doesn't matter, but the ack and 1282 * sequence numbers do as we do mathematics on them later. 1283 */ 1284 is->is_sport = htons(fin->fin_data[0]); 1285 is->is_dport = htons(fin->fin_data[1]); 1286 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1287 hv += is->is_sport; 1288 hv += is->is_dport; 1289 } 1290 1291 /* 1292 * If this is a real packet then initialise fields in the 1293 * state information structure from the TCP header information. 1294 */ 1295 1296 is->is_maxdwin = 1; 1297 is->is_maxswin = ntohs(tcp->th_win); 1298 if (is->is_maxswin == 0) 1299 is->is_maxswin = 1; 1300 1301 if ((fin->fin_flx & FI_IGNORE) == 0) { 1302 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1303 (TCP_OFF(tcp) << 2) + 1304 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1305 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1306 is->is_maxsend = is->is_send; 1307 1308 /* 1309 * Window scale option is only present in 1310 * SYN/SYN-ACK packet. 1311 */ 1312 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1313 TH_SYN && 1314 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1315 if (fr_tcpoptions(fin, tcp, 1316 &is->is_tcp.ts_data[0]) == -1) { 1317 fin->fin_flx |= FI_BAD; 1318 } 1319 } 1320 1321 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1322 fr_checknewisn(fin, is); 1323 fr_fixoutisn(fin, is); 1324 } 1325 1326 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1327 flags |= IS_TCPFSM; 1328 else { 1329 is->is_maxdwin = is->is_maxswin * 2; 1330 is->is_dend = ntohl(tcp->th_ack); 1331 is->is_maxdend = ntohl(tcp->th_ack); 1332 is->is_maxdwin *= 2; 1333 } 1334 } 1335 1336 /* 1337 * If we're creating state for a starting connection, start the 1338 * timer on it as we'll never see an error if it fails to 1339 * connect. 1340 */ 1341 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1342 break; 1343 1344 case IPPROTO_UDP : 1345 tcp = fin->fin_dp; 1346 1347 is->is_sport = htons(fin->fin_data[0]); 1348 is->is_dport = htons(fin->fin_data[1]); 1349 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1350 hv += tcp->th_dport; 1351 hv += tcp->th_sport; 1352 } 1353 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1354 break; 1355 1356 default : 1357 break; 1358 } 1359 hv = DOUBLE_HASH(hv, ifs); 1360 is->is_hv = hv; 1361 is->is_rule = fr; 1362 is->is_flags = flags & IS_INHERITED; 1363 1364 /* 1365 * Look for identical state. 1366 */ 1367 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1368 is != NULL; 1369 is = is->is_hnext) { 1370 if (fr_matchstates(&ips, is) == 1) 1371 break; 1372 } 1373 1374 /* 1375 * we've found a matching state -> state already exists, 1376 * we are not going to add a duplicate record. 1377 */ 1378 if (is != NULL) 1379 return NULL; 1380 1381 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1382 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1383 return NULL; 1384 } 1385 KMALLOC(is, ipstate_t *); 1386 if (is == NULL) { 1387 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1388 return NULL; 1389 } 1390 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1391 /* 1392 * Do not do the modulous here, it is done in fr_stinsert(). 1393 */ 1394 if (fr != NULL) { 1395 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1396 if (fr->fr_age[0] != 0) { 1397 is->is_tqehead[0] = 1398 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1399 fr->fr_age[0], ifs); 1400 is->is_sti.tqe_flags |= TQE_RULEBASED; 1401 } 1402 if (fr->fr_age[1] != 0) { 1403 is->is_tqehead[1] = 1404 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1405 fr->fr_age[1], ifs); 1406 is->is_sti.tqe_flags |= TQE_RULEBASED; 1407 } 1408 is->is_tag = fr->fr_logtag; 1409 1410 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1411 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1412 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1413 1414 if (((ifp = fr->fr_ifas[1]) != NULL) && 1415 (ifp != (void *)-1)) { 1416 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1417 } 1418 if (((ifp = fr->fr_ifas[2]) != NULL) && 1419 (ifp != (void *)-1)) { 1420 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1421 } 1422 if (((ifp = fr->fr_ifas[3]) != NULL) && 1423 (ifp != (void *)-1)) { 1424 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1425 } 1426 } 1427 1428 is->is_ifp[out << 1] = fin->fin_ifp; 1429 if (fin->fin_ifp != NULL) { 1430 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fr->fr_v); 1431 } 1432 1433 /* 1434 * It may seem strange to set is_ref to 2, but fr_check() will call 1435 * fr_statederef() after calling fr_addstate() and the idea is to 1436 * have it exist at the end of fr_check() with is_ref == 1. 1437 */ 1438 is->is_ref = 2; 1439 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1440 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1441 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1442 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1443 if ((fin->fin_flx & FI_IGNORE) == 0) { 1444 is->is_pkts[out] = 1; 1445 is->is_bytes[out] = fin->fin_plen; 1446 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1447 is->is_flx[out][0] &= ~FI_OOW; 1448 } 1449 1450 if (pass & FR_STSTRICT) 1451 is->is_flags |= IS_STRICT; 1452 1453 if (pass & FR_STATESYNC) 1454 is->is_flags |= IS_STATESYNC; 1455 1456 if (flags & (SI_WILDP|SI_WILDA)) { 1457 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1458 } 1459 is->is_rulen = fin->fin_rule; 1460 1461 1462 if (pass & FR_LOGFIRST) 1463 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1464 1465 READ_ENTER(&ifs->ifs_ipf_state); 1466 is->is_me = stsave; 1467 1468 fr_stinsert(is, fin->fin_rev, ifs); 1469 1470 if (fin->fin_p == IPPROTO_TCP) { 1471 /* 1472 * If we're creating state for a starting connection, start the 1473 * timer on it as we'll never see an error if it fails to 1474 * connect. 1475 */ 1476 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1477 is->is_flags); 1478 MUTEX_EXIT(&is->is_lock); 1479 #ifdef IPFILTER_SCAN 1480 if ((is->is_flags & SI_CLONE) == 0) 1481 (void) ipsc_attachis(is); 1482 #endif 1483 } else { 1484 MUTEX_EXIT(&is->is_lock); 1485 } 1486 #ifdef IPFILTER_SYNC 1487 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1488 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1489 #endif 1490 if (ifs->ifs_ipstate_logging) 1491 ipstate_log(is, ISL_NEW, ifs); 1492 1493 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1494 fin->fin_state = is; 1495 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1496 fin->fin_flx |= FI_STATE; 1497 if (fin->fin_flx & FI_FRAG) 1498 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1499 1500 return is; 1501 } 1502 1503 1504 /* ------------------------------------------------------------------------ */ 1505 /* Function: fr_tcpoptions */ 1506 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1507 /* Parameters: fin(I) - pointer to packet information */ 1508 /* tcp(I) - pointer to TCP packet header */ 1509 /* td(I) - pointer to TCP data held as part of the state */ 1510 /* */ 1511 /* Look after the TCP header for any options and deal with those that are */ 1512 /* present. Record details about those that we recogise. */ 1513 /* ------------------------------------------------------------------------ */ 1514 static int fr_tcpoptions(fin, tcp, td) 1515 fr_info_t *fin; 1516 tcphdr_t *tcp; 1517 tcpdata_t *td; 1518 { 1519 int off, mlen, ol, i, len, retval; 1520 char buf[64], *s, opt; 1521 mb_t *m = NULL; 1522 1523 len = (TCP_OFF(tcp) << 2); 1524 if (fin->fin_dlen < len) 1525 return 0; 1526 len -= sizeof(*tcp); 1527 1528 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1529 1530 m = fin->fin_m; 1531 mlen = MSGDSIZE(m) - off; 1532 if (len > mlen) { 1533 len = mlen; 1534 retval = 0; 1535 } else { 1536 retval = 1; 1537 } 1538 1539 COPYDATA(m, off, len, buf); 1540 1541 for (s = buf; len > 0; ) { 1542 opt = *s; 1543 if (opt == TCPOPT_EOL) 1544 break; 1545 else if (opt == TCPOPT_NOP) 1546 ol = 1; 1547 else { 1548 if (len < 2) 1549 break; 1550 ol = (int)*(s + 1); 1551 if (ol < 2 || ol > len) 1552 break; 1553 1554 /* 1555 * Extract the TCP options we are interested in out of 1556 * the header and store them in the the tcpdata struct. 1557 */ 1558 switch (opt) 1559 { 1560 case TCPOPT_WINDOW : 1561 if (ol == TCPOLEN_WINDOW) { 1562 i = (int)*(s + 2); 1563 if (i > TCP_WSCALE_MAX) 1564 i = TCP_WSCALE_MAX; 1565 else if (i < 0) 1566 i = 0; 1567 td->td_winscale = i; 1568 td->td_winflags |= TCP_WSCALE_SEEN | 1569 TCP_WSCALE_FIRST; 1570 } else 1571 retval = -1; 1572 break; 1573 case TCPOPT_MAXSEG : 1574 /* 1575 * So, if we wanted to set the TCP MAXSEG, 1576 * it should be done here... 1577 */ 1578 if (ol == TCPOLEN_MAXSEG) { 1579 i = (int)*(s + 2); 1580 i <<= 8; 1581 i += (int)*(s + 3); 1582 td->td_maxseg = i; 1583 } else 1584 retval = -1; 1585 break; 1586 case TCPOPT_SACK_PERMITTED : 1587 if (ol == TCPOLEN_SACK_PERMITTED) 1588 td->td_winflags |= TCP_SACK_PERMIT; 1589 else 1590 retval = -1; 1591 break; 1592 } 1593 } 1594 len -= ol; 1595 s += ol; 1596 } 1597 return retval; 1598 } 1599 1600 1601 /* ------------------------------------------------------------------------ */ 1602 /* Function: fr_tcpstate */ 1603 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1604 /* Parameters: fin(I) - pointer to packet information */ 1605 /* tcp(I) - pointer to TCP packet header */ 1606 /* is(I) - pointer to master state structure */ 1607 /* */ 1608 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1609 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1610 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1611 /* ------------------------------------------------------------------------ */ 1612 static int fr_tcpstate(fin, tcp, is) 1613 fr_info_t *fin; 1614 tcphdr_t *tcp; 1615 ipstate_t *is; 1616 { 1617 int source, ret = 0, flags; 1618 tcpdata_t *fdata, *tdata; 1619 ipf_stack_t *ifs = fin->fin_ifs; 1620 1621 source = !fin->fin_rev; 1622 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1623 (ntohs(is->is_sport) != fin->fin_data[0])) 1624 source = 0; 1625 fdata = &is->is_tcp.ts_data[!source]; 1626 tdata = &is->is_tcp.ts_data[source]; 1627 1628 MUTEX_ENTER(&is->is_lock); 1629 1630 /* 1631 * If a SYN packet is received for a connection that is in a half 1632 * closed state, then move its state entry to deletetq. In such case 1633 * the SYN packet will be consequently dropped. This allows new state 1634 * entry to be created with a retransmited SYN packet. 1635 */ 1636 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1637 if (((is->is_state[source] > IPF_TCPS_ESTABLISHED) || 1638 (is->is_state[source] == IPF_TCPS_CLOSED)) && 1639 ((is->is_state[!source] > IPF_TCPS_ESTABLISHED) || 1640 (is->is_state[!source] == IPF_TCPS_CLOSED))) { 1641 /* 1642 * Do not update is->is_sti.tqe_die in case state entry 1643 * is already present in deletetq. It prevents state 1644 * entry ttl update by retransmitted SYN packets, which 1645 * may arrive before timer tick kicks off. The SYN 1646 * packet will be dropped again. 1647 */ 1648 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1649 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1650 &fin->fin_ifs->ifs_ips_deletetq, 1651 fin->fin_ifs); 1652 1653 MUTEX_EXIT(&is->is_lock); 1654 return 0; 1655 } 1656 } 1657 1658 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1659 #ifdef IPFILTER_SCAN 1660 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1661 ipsc_packet(fin, is); 1662 if (FR_ISBLOCK(is->is_pass)) { 1663 MUTEX_EXIT(&is->is_lock); 1664 return 1; 1665 } 1666 } 1667 #endif 1668 1669 /* 1670 * Nearing end of connection, start timeout. 1671 */ 1672 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1673 is->is_flags); 1674 if (ret == 0) { 1675 MUTEX_EXIT(&is->is_lock); 1676 return 0; 1677 } 1678 1679 /* 1680 * set s0's as appropriate. Use syn-ack packet as it 1681 * contains both pieces of required information. 1682 */ 1683 /* 1684 * Window scale option is only present in SYN/SYN-ACK packet. 1685 * Compare with ~TH_FIN to mask out T/TCP setups. 1686 */ 1687 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1688 if (flags == (TH_SYN|TH_ACK)) { 1689 is->is_s0[source] = ntohl(tcp->th_ack); 1690 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1691 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1692 (void) fr_tcpoptions(fin, tcp, fdata); 1693 } 1694 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1695 fr_checknewisn(fin, is); 1696 } else if (flags == TH_SYN) { 1697 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1698 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1699 (void) fr_tcpoptions(fin, tcp, tdata); 1700 1701 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1702 fr_checknewisn(fin, is); 1703 1704 } 1705 ret = 1; 1706 } else 1707 fin->fin_flx |= FI_OOW; 1708 MUTEX_EXIT(&is->is_lock); 1709 return ret; 1710 } 1711 1712 1713 /* ------------------------------------------------------------------------ */ 1714 /* Function: fr_checknewisn */ 1715 /* Returns: Nil */ 1716 /* Parameters: fin(I) - pointer to packet information */ 1717 /* is(I) - pointer to master state structure */ 1718 /* */ 1719 /* Check to see if this TCP connection is expecting and needs a new */ 1720 /* sequence number for a particular direction of the connection. */ 1721 /* */ 1722 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1723 /* one ready. */ 1724 /* ------------------------------------------------------------------------ */ 1725 static void fr_checknewisn(fin, is) 1726 fr_info_t *fin; 1727 ipstate_t *is; 1728 { 1729 u_32_t sumd, old, new; 1730 tcphdr_t *tcp; 1731 int i; 1732 1733 i = fin->fin_rev; 1734 tcp = fin->fin_dp; 1735 1736 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1737 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1738 old = ntohl(tcp->th_seq); 1739 new = fr_newisn(fin); 1740 is->is_isninc[i] = new - old; 1741 CALC_SUMD(old, new, sumd); 1742 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1743 1744 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1745 } 1746 } 1747 1748 1749 /* ------------------------------------------------------------------------ */ 1750 /* Function: fr_tcpinwindow */ 1751 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1752 /* Parameters: fin(I) - pointer to packet information */ 1753 /* fdata(I) - pointer to tcp state informatio (forward) */ 1754 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1755 /* tcp(I) - pointer to TCP packet header */ 1756 /* */ 1757 /* Given a packet has matched addresses and ports, check to see if it is */ 1758 /* within the TCP data window. In a show of generosity, allow packets that */ 1759 /* are within the window space behind the current sequence # as well. */ 1760 /* ------------------------------------------------------------------------ */ 1761 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1762 fr_info_t *fin; 1763 tcpdata_t *fdata, *tdata; 1764 tcphdr_t *tcp; 1765 int flags; 1766 { 1767 tcp_seq seq, ack, end; 1768 int ackskew, tcpflags; 1769 u_32_t win, maxwin; 1770 int dsize, inseq; 1771 1772 /* 1773 * Find difference between last checked packet and this packet. 1774 */ 1775 tcpflags = tcp->th_flags; 1776 seq = ntohl(tcp->th_seq); 1777 ack = ntohl(tcp->th_ack); 1778 1779 if (tcpflags & TH_SYN) 1780 win = ntohs(tcp->th_win); 1781 else 1782 win = ntohs(tcp->th_win) << fdata->td_winscale; 1783 1784 /* 1785 * win 0 means the receiving endpoint has closed the window, because it 1786 * has not enough memory to receive data from sender. In such case we 1787 * are pretending window size to be 1 to let TCP probe data through. 1788 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1789 * state this accurately, so we have to allow 1 octet (win = 1) even if 1790 * the window is closed (win == 0). 1791 */ 1792 if (win == 0) 1793 win = 1; 1794 1795 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1796 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1797 1798 /* 1799 * if window scaling is present, the scaling is only allowed 1800 * for windows not in the first SYN packet. In that packet the 1801 * window is 65535 to specify the largest window possible 1802 * for receivers not implementing the window scale option. 1803 * Currently, we do not assume TTCP here. That means that 1804 * if we see a second packet from a host (after the initial 1805 * SYN), we can assume that the receiver of the SYN did 1806 * already send back the SYN/ACK (and thus that we know if 1807 * the receiver also does window scaling) 1808 */ 1809 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1810 fdata->td_maxwin = win; 1811 } 1812 1813 end = seq + dsize; 1814 1815 if ((fdata->td_end == 0) && 1816 (!(flags & IS_TCPFSM) || 1817 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1818 /* 1819 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1820 */ 1821 fdata->td_end = end - 1; 1822 fdata->td_maxwin = 1; 1823 fdata->td_maxend = end + win; 1824 } 1825 1826 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1827 ack = tdata->td_end; 1828 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1829 (ack == 0)) { 1830 /* gross hack to get around certain broken tcp stacks */ 1831 ack = tdata->td_end; 1832 } 1833 1834 maxwin = tdata->td_maxwin; 1835 ackskew = tdata->td_end - ack; 1836 1837 /* 1838 * Strict sequencing only allows in-order delivery. 1839 */ 1840 if ((flags & IS_STRICT) != 0) { 1841 if (seq != fdata->td_end) { 1842 DTRACE_PROBE(strict_check); 1843 return 0; 1844 } 1845 } 1846 1847 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1848 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1849 inseq = 0; 1850 DTRACE_PROBE4( 1851 dyn_params, 1852 int, dsize, 1853 int, ackskew, 1854 int, maxwin, 1855 int, win 1856 ); 1857 if ( 1858 #if defined(_KERNEL) 1859 /* 1860 * end <-> s + n 1861 * maxend <-> ack + win 1862 * this is upperbound check 1863 */ 1864 (SEQ_GE(fdata->td_maxend, end)) && 1865 /* 1866 * this is lowerbound check 1867 */ 1868 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1869 #endif 1870 /* XXX what about big packets */ 1871 #define MAXACKWINDOW 66000 1872 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1873 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1874 inseq = 1; 1875 /* 1876 * Microsoft Windows will send the next packet to the right of the 1877 * window if SACK is in use. 1878 */ 1879 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1880 (fdata->td_winflags & TCP_SACK_PERMIT) && 1881 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1882 inseq = 1; 1883 /* 1884 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1885 * response to initial SYN packet, when there is no application 1886 * listeing to on a port, where the SYN packet has came to. 1887 */ 1888 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1889 (ackskew >= -1) && (ackskew <= 1)) { 1890 inseq = 1; 1891 } else if (!(flags & IS_TCPFSM)) { 1892 1893 if (!(fdata->td_winflags & 1894 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1895 /* 1896 * No TCPFSM and no window scaling, so make some 1897 * extra guesses. 1898 */ 1899 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1900 inseq = 1; 1901 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1902 inseq = 1; 1903 } 1904 } 1905 1906 if (inseq) { 1907 /* if ackskew < 0 then this should be due to fragmented 1908 * packets. There is no way to know the length of the 1909 * total packet in advance. 1910 * We do know the total length from the fragment cache though. 1911 * Note however that there might be more sessions with 1912 * exactly the same source and destination parameters in the 1913 * state cache (and source and destination is the only stuff 1914 * that is saved in the fragment cache). Note further that 1915 * some TCP connections in the state cache are hashed with 1916 * sport and dport as well which makes it not worthwhile to 1917 * look for them. 1918 * Thus, when ackskew is negative but still seems to belong 1919 * to this session, we bump up the destinations end value. 1920 */ 1921 if (ackskew < 0) { 1922 DTRACE_PROBE2(end_update_td, 1923 int, tdata->td_end, 1924 int, ack 1925 ); 1926 tdata->td_end = ack; 1927 } 1928 1929 /* update max window seen */ 1930 if (fdata->td_maxwin < win) { 1931 DTRACE_PROBE2(win_update_fd, 1932 int, fdata->td_maxwin, 1933 int, win 1934 ); 1935 fdata->td_maxwin = win; 1936 } 1937 1938 if (SEQ_GT(end, fdata->td_end)) { 1939 DTRACE_PROBE2(end_update_fd, 1940 int, fdata->td_end, 1941 int, end 1942 ); 1943 fdata->td_end = end; 1944 } 1945 1946 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1947 DTRACE_PROBE2(max_end_update_td, 1948 int, tdata->td_maxend, 1949 int, ack + win 1950 ); 1951 tdata->td_maxend = ack + win; 1952 } 1953 1954 return 1; 1955 } 1956 fin->fin_flx |= FI_OOW; 1957 1958 #if defined(_KERNEL) 1959 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1960 fin->fin_flx |= FI_NEG_OOW; 1961 #endif 1962 1963 return 0; 1964 } 1965 1966 1967 /* ------------------------------------------------------------------------ */ 1968 /* Function: fr_stclone */ 1969 /* Returns: ipstate_t* - NULL == cloning failed, */ 1970 /* else pointer to new state structure */ 1971 /* Parameters: fin(I) - pointer to packet information */ 1972 /* tcp(I) - pointer to TCP/UDP header */ 1973 /* is(I) - pointer to master state structure */ 1974 /* */ 1975 /* Create a "duplcate" state table entry from the master. */ 1976 /* ------------------------------------------------------------------------ */ 1977 static ipstate_t *fr_stclone(fin, tcp, is) 1978 fr_info_t *fin; 1979 tcphdr_t *tcp; 1980 ipstate_t *is; 1981 { 1982 ipstate_t *clone; 1983 u_32_t send; 1984 ipf_stack_t *ifs = fin->fin_ifs; 1985 1986 if (ifs->ifs_ips_num == ifs->ifs_fr_statemax) { 1987 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1988 ifs->ifs_fr_state_doflush = 1; 1989 return NULL; 1990 } 1991 KMALLOC(clone, ipstate_t *); 1992 if (clone == NULL) 1993 return NULL; 1994 bcopy((char *)is, (char *)clone, sizeof(*clone)); 1995 1996 MUTEX_NUKE(&clone->is_lock); 1997 1998 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 1999 clone->is_state[0] = 0; 2000 clone->is_state[1] = 0; 2001 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2002 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2003 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2004 2005 if (fin->fin_rev == 1) { 2006 clone->is_dend = send; 2007 clone->is_maxdend = send; 2008 clone->is_send = 0; 2009 clone->is_maxswin = 1; 2010 clone->is_maxdwin = ntohs(tcp->th_win); 2011 if (clone->is_maxdwin == 0) 2012 clone->is_maxdwin = 1; 2013 } else { 2014 clone->is_send = send; 2015 clone->is_maxsend = send; 2016 clone->is_dend = 0; 2017 clone->is_maxdwin = 1; 2018 clone->is_maxswin = ntohs(tcp->th_win); 2019 if (clone->is_maxswin == 0) 2020 clone->is_maxswin = 1; 2021 } 2022 2023 clone->is_flags &= ~SI_CLONE; 2024 clone->is_flags |= SI_CLONED; 2025 fr_stinsert(clone, fin->fin_rev, ifs); 2026 clone->is_ref = 2; 2027 if (clone->is_p == IPPROTO_TCP) { 2028 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2029 clone->is_flags); 2030 } 2031 MUTEX_EXIT(&clone->is_lock); 2032 #ifdef IPFILTER_SCAN 2033 (void) ipsc_attachis(is); 2034 #endif 2035 #ifdef IPFILTER_SYNC 2036 if (is->is_flags & IS_STATESYNC) 2037 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2038 #endif 2039 return clone; 2040 } 2041 2042 2043 /* ------------------------------------------------------------------------ */ 2044 /* Function: fr_matchsrcdst */ 2045 /* Returns: Nil */ 2046 /* Parameters: fin(I) - pointer to packet information */ 2047 /* is(I) - pointer to state structure */ 2048 /* src(I) - pointer to source address */ 2049 /* dst(I) - pointer to destination address */ 2050 /* tcp(I) - pointer to TCP/UDP header */ 2051 /* */ 2052 /* Match a state table entry against an IP packet. The logic below is that */ 2053 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2054 /* still 0 after the test. no match. */ 2055 /* ------------------------------------------------------------------------ */ 2056 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2057 fr_info_t *fin; 2058 ipstate_t *is; 2059 i6addr_t *src, *dst; 2060 tcphdr_t *tcp; 2061 u_32_t cmask; 2062 { 2063 int ret = 0, rev, out, flags, flx = 0, idx; 2064 u_short sp, dp; 2065 u_32_t cflx; 2066 void *ifp; 2067 ipf_stack_t *ifs = fin->fin_ifs; 2068 2069 rev = IP6_NEQ(&is->is_dst, dst); 2070 ifp = fin->fin_ifp; 2071 out = fin->fin_out; 2072 flags = is->is_flags; 2073 sp = 0; 2074 dp = 0; 2075 2076 if (tcp != NULL) { 2077 sp = htons(fin->fin_sport); 2078 dp = ntohs(fin->fin_dport); 2079 } 2080 if (!rev) { 2081 if (tcp != NULL) { 2082 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2083 rev = 1; 2084 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2085 rev = 1; 2086 } 2087 } 2088 2089 idx = (out << 1) + rev; 2090 2091 /* 2092 * If the interface for this 'direction' is set, make sure it matches. 2093 * An interface name that is not set matches any, as does a name of *. 2094 */ 2095 if ((is->is_ifp[idx] == NULL && 2096 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2097 is->is_ifp[idx] == ifp) 2098 ret = 1; 2099 2100 if (ret == 0) 2101 return NULL; 2102 ret = 0; 2103 2104 /* 2105 * Match addresses and ports. 2106 */ 2107 if (rev == 0) { 2108 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2109 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2110 if (tcp) { 2111 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2112 (dp == is->is_dport || flags & SI_W_DPORT)) 2113 ret = 1; 2114 } else { 2115 ret = 1; 2116 } 2117 } 2118 } else { 2119 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2120 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2121 if (tcp) { 2122 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2123 (sp == is->is_dport || flags & SI_W_DPORT)) 2124 ret = 1; 2125 } else { 2126 ret = 1; 2127 } 2128 } 2129 } 2130 2131 if (ret == 0) 2132 return NULL; 2133 2134 /* 2135 * Whether or not this should be here, is questionable, but the aim 2136 * is to get this out of the main line. 2137 */ 2138 if (tcp == NULL) 2139 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2140 2141 /* 2142 * Only one of the source or destination address can be flaged as a 2143 * wildcard. Fill in the missing address, if set. 2144 * For IPv6, if the address being copied in is multicast, then 2145 * don't reset the wild flag - multicast causes it to be set in the 2146 * first place! 2147 */ 2148 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2149 fr_ip_t *fi = &fin->fin_fi; 2150 2151 if ((flags & SI_W_SADDR) != 0) { 2152 if (rev == 0) { 2153 #ifdef USE_INET6 2154 if (is->is_v == 6 && 2155 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2156 /*EMPTY*/; 2157 else 2158 #endif 2159 { 2160 is->is_src = fi->fi_src; 2161 is->is_flags &= ~SI_W_SADDR; 2162 } 2163 } else { 2164 #ifdef USE_INET6 2165 if (is->is_v == 6 && 2166 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2167 /*EMPTY*/; 2168 else 2169 #endif 2170 { 2171 is->is_src = fi->fi_dst; 2172 is->is_flags &= ~SI_W_SADDR; 2173 } 2174 } 2175 } else if ((flags & SI_W_DADDR) != 0) { 2176 if (rev == 0) { 2177 #ifdef USE_INET6 2178 if (is->is_v == 6 && 2179 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2180 /*EMPTY*/; 2181 else 2182 #endif 2183 { 2184 is->is_dst = fi->fi_dst; 2185 is->is_flags &= ~SI_W_DADDR; 2186 } 2187 } else { 2188 #ifdef USE_INET6 2189 if (is->is_v == 6 && 2190 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2191 /*EMPTY*/; 2192 else 2193 #endif 2194 { 2195 is->is_dst = fi->fi_src; 2196 is->is_flags &= ~SI_W_DADDR; 2197 } 2198 } 2199 } 2200 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2201 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2202 } 2203 } 2204 2205 flx = fin->fin_flx & cmask; 2206 cflx = is->is_flx[out][rev]; 2207 2208 /* 2209 * Match up any flags set from IP options. 2210 */ 2211 if ((cflx && (flx != (cflx & cmask))) || 2212 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2213 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2214 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2215 return NULL; 2216 2217 /* 2218 * Only one of the source or destination port can be flagged as a 2219 * wildcard. When filling it in, fill in a copy of the matched entry 2220 * if it has the cloning flag set. 2221 */ 2222 if ((fin->fin_flx & FI_IGNORE) != 0) { 2223 fin->fin_rev = rev; 2224 return is; 2225 } 2226 2227 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2228 if ((flags & SI_CLONE) != 0) { 2229 ipstate_t *clone; 2230 2231 clone = fr_stclone(fin, tcp, is); 2232 if (clone == NULL) 2233 return NULL; 2234 is = clone; 2235 } else { 2236 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2237 } 2238 2239 if ((flags & SI_W_SPORT) != 0) { 2240 if (rev == 0) { 2241 is->is_sport = sp; 2242 is->is_send = ntohl(tcp->th_seq); 2243 } else { 2244 is->is_sport = dp; 2245 is->is_send = ntohl(tcp->th_ack); 2246 } 2247 is->is_maxsend = is->is_send + 1; 2248 } else if ((flags & SI_W_DPORT) != 0) { 2249 if (rev == 0) { 2250 is->is_dport = dp; 2251 is->is_dend = ntohl(tcp->th_ack); 2252 } else { 2253 is->is_dport = sp; 2254 is->is_dend = ntohl(tcp->th_seq); 2255 } 2256 is->is_maxdend = is->is_dend + 1; 2257 } 2258 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2259 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2260 ipstate_log(is, ISL_CLONE, ifs); 2261 } 2262 2263 ret = -1; 2264 2265 if (is->is_flx[out][rev] == 0) { 2266 is->is_flx[out][rev] = flx; 2267 is->is_opt[rev] = fin->fin_optmsk; 2268 if (is->is_v == 6) { 2269 is->is_opt[rev] &= ~0x8; 2270 is->is_optmsk[rev] &= ~0x8; 2271 } 2272 } 2273 2274 /* 2275 * Check if the interface name for this "direction" is set and if not, 2276 * fill it in. 2277 */ 2278 if (is->is_ifp[idx] == NULL && 2279 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2280 is->is_ifp[idx] = ifp; 2281 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2282 } 2283 fin->fin_rev = rev; 2284 return is; 2285 } 2286 2287 2288 /* ------------------------------------------------------------------------ */ 2289 /* Function: fr_checkicmpmatchingstate */ 2290 /* Returns: Nil */ 2291 /* Parameters: fin(I) - pointer to packet information */ 2292 /* */ 2293 /* If we've got an ICMP error message, using the information stored in the */ 2294 /* ICMP packet, look for a matching state table entry. */ 2295 /* */ 2296 /* If we return NULL then no lock on ipf_state is held. */ 2297 /* If we return non-null then a read-lock on ipf_state is held. */ 2298 /* ------------------------------------------------------------------------ */ 2299 static ipstate_t *fr_checkicmpmatchingstate(fin) 2300 fr_info_t *fin; 2301 { 2302 ipstate_t *is, **isp; 2303 u_short sport, dport; 2304 u_char pr; 2305 int backward, i, oi; 2306 i6addr_t dst, src; 2307 struct icmp *ic; 2308 u_short savelen; 2309 icmphdr_t *icmp; 2310 fr_info_t ofin; 2311 tcphdr_t *tcp; 2312 int len; 2313 ip_t *oip; 2314 u_int hv; 2315 ipf_stack_t *ifs = fin->fin_ifs; 2316 2317 /* 2318 * Does it at least have the return (basic) IP header ? 2319 * Is it an actual recognised ICMP error type? 2320 * Only a basic IP header (no options) should be with 2321 * an ICMP error header. 2322 */ 2323 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2324 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2325 !(fin->fin_flx & FI_ICMPERR)) 2326 return NULL; 2327 ic = fin->fin_dp; 2328 2329 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2330 /* 2331 * Check if the at least the old IP header (with options) and 2332 * 8 bytes of payload is present. 2333 */ 2334 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2335 return NULL; 2336 2337 /* 2338 * Sanity Checks. 2339 */ 2340 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2341 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2342 return NULL; 2343 2344 /* 2345 * Is the buffer big enough for all of it ? It's the size of the IP 2346 * header claimed in the encapsulated part which is of concern. It 2347 * may be too big to be in this buffer but not so big that it's 2348 * outside the ICMP packet, leading to TCP deref's causing problems. 2349 * This is possible because we don't know how big oip_hl is when we 2350 * do the pullup early in fr_check() and thus can't guarantee it is 2351 * all here now. 2352 */ 2353 #ifdef _KERNEL 2354 { 2355 mb_t *m; 2356 2357 m = fin->fin_m; 2358 # if defined(MENTAT) 2359 if ((char *)oip + len > (char *)m->b_wptr) 2360 return NULL; 2361 # else 2362 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2363 return NULL; 2364 # endif 2365 } 2366 #endif 2367 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2368 2369 /* 2370 * in the IPv4 case we must zero the i6addr union otherwise 2371 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2372 * of the 'junk' in the unused part of the union 2373 */ 2374 bzero((char *)&src, sizeof(src)); 2375 bzero((char *)&dst, sizeof(dst)); 2376 2377 /* 2378 * we make an fin entry to be able to feed it to 2379 * matchsrcdst note that not all fields are encessary 2380 * but this is the cleanest way. Note further we fill 2381 * in fin_mp such that if someone uses it we'll get 2382 * a kernel panic. fr_matchsrcdst does not use this. 2383 * 2384 * watch out here, as ip is in host order and oip in network 2385 * order. Any change we make must be undone afterwards, like 2386 * oip->ip_off - it is still in network byte order so fix it. 2387 */ 2388 savelen = oip->ip_len; 2389 oip->ip_len = len; 2390 oip->ip_off = ntohs(oip->ip_off); 2391 2392 ofin.fin_flx = FI_NOCKSUM; 2393 ofin.fin_v = 4; 2394 ofin.fin_ip = oip; 2395 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2396 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2397 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2398 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2399 ofin.fin_ifp = fin->fin_ifp; 2400 ofin.fin_out = !fin->fin_out; 2401 /* 2402 * Reset the short and bad flag here because in fr_matchsrcdst() 2403 * the flags for the current packet (fin_flx) are compared against 2404 * those for the existing session. 2405 */ 2406 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2407 2408 /* 2409 * Put old values of ip_len and ip_off back as we don't know 2410 * if we have to forward the packet (or process it again. 2411 */ 2412 oip->ip_len = savelen; 2413 oip->ip_off = htons(oip->ip_off); 2414 2415 switch (oip->ip_p) 2416 { 2417 case IPPROTO_ICMP : 2418 /* 2419 * an ICMP error can only be generated as a result of an 2420 * ICMP query, not as the response on an ICMP error 2421 * 2422 * XXX theoretically ICMP_ECHOREP and the other reply's are 2423 * ICMP query's as well, but adding them here seems strange XXX 2424 */ 2425 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2426 return NULL; 2427 2428 /* 2429 * perform a lookup of the ICMP packet in the state table 2430 */ 2431 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2432 hv = (pr = oip->ip_p); 2433 src.in4 = oip->ip_src; 2434 hv += src.in4.s_addr; 2435 dst.in4 = oip->ip_dst; 2436 hv += dst.in4.s_addr; 2437 hv += icmp->icmp_id; 2438 hv = DOUBLE_HASH(hv, ifs); 2439 2440 READ_ENTER(&ifs->ifs_ipf_state); 2441 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2442 isp = &is->is_hnext; 2443 if ((is->is_p != pr) || (is->is_v != 4)) 2444 continue; 2445 if (is->is_pass & FR_NOICMPERR) 2446 continue; 2447 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2448 NULL, FI_ICMPCMP); 2449 if (is != NULL) { 2450 if ((is->is_pass & FR_NOICMPERR) != 0) { 2451 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2452 return NULL; 2453 } 2454 /* 2455 * i : the index of this packet (the icmp 2456 * unreachable) 2457 * oi : the index of the original packet found 2458 * in the icmp header (i.e. the packet 2459 * causing this icmp) 2460 * backward : original packet was backward 2461 * compared to the state 2462 */ 2463 backward = IP6_NEQ(&is->is_src, &src); 2464 fin->fin_rev = !backward; 2465 i = (!backward << 1) + fin->fin_out; 2466 oi = (backward << 1) + ofin.fin_out; 2467 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2468 continue; 2469 ifs->ifs_ips_stats.iss_hits++; 2470 is->is_icmppkts[i]++; 2471 return is; 2472 } 2473 } 2474 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2475 return NULL; 2476 case IPPROTO_TCP : 2477 case IPPROTO_UDP : 2478 break; 2479 default : 2480 return NULL; 2481 } 2482 2483 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2484 dport = tcp->th_dport; 2485 sport = tcp->th_sport; 2486 2487 hv = (pr = oip->ip_p); 2488 src.in4 = oip->ip_src; 2489 hv += src.in4.s_addr; 2490 dst.in4 = oip->ip_dst; 2491 hv += dst.in4.s_addr; 2492 hv += dport; 2493 hv += sport; 2494 hv = DOUBLE_HASH(hv, ifs); 2495 2496 READ_ENTER(&ifs->ifs_ipf_state); 2497 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2498 isp = &is->is_hnext; 2499 /* 2500 * Only allow this icmp though if the 2501 * encapsulated packet was allowed through the 2502 * other way around. Note that the minimal amount 2503 * of info present does not allow for checking against 2504 * tcp internals such as seq and ack numbers. Only the 2505 * ports are known to be present and can be even if the 2506 * short flag is set. 2507 */ 2508 if ((is->is_p == pr) && (is->is_v == 4) && 2509 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2510 tcp, FI_ICMPCMP))) { 2511 /* 2512 * i : the index of this packet (the icmp unreachable) 2513 * oi : the index of the original packet found in the 2514 * icmp header (i.e. the packet causing this icmp) 2515 * backward : original packet was backward compared to 2516 * the state 2517 */ 2518 backward = IP6_NEQ(&is->is_src, &src); 2519 fin->fin_rev = !backward; 2520 i = (!backward << 1) + fin->fin_out; 2521 oi = (backward << 1) + ofin.fin_out; 2522 2523 if (((is->is_pass & FR_NOICMPERR) != 0) || 2524 (is->is_icmppkts[i] > is->is_pkts[oi])) 2525 break; 2526 ifs->ifs_ips_stats.iss_hits++; 2527 is->is_icmppkts[i]++; 2528 /* 2529 * we deliberately do not touch the timeouts 2530 * for the accompanying state table entry. 2531 * It remains to be seen if that is correct. XXX 2532 */ 2533 return is; 2534 } 2535 } 2536 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2537 return NULL; 2538 } 2539 2540 2541 /* ------------------------------------------------------------------------ */ 2542 /* Function: fr_ipsmove */ 2543 /* Returns: Nil */ 2544 /* Parameters: is(I) - pointer to state table entry */ 2545 /* hv(I) - new hash value for state table entry */ 2546 /* Write Locks: ipf_state */ 2547 /* */ 2548 /* Move a state entry from one position in the hash table to another. */ 2549 /* ------------------------------------------------------------------------ */ 2550 static void fr_ipsmove(is, hv, ifs) 2551 ipstate_t *is; 2552 u_int hv; 2553 ipf_stack_t *ifs; 2554 { 2555 ipstate_t **isp; 2556 u_int hvm; 2557 2558 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2559 2560 hvm = is->is_hv; 2561 /* 2562 * Remove the hash from the old location... 2563 */ 2564 isp = is->is_phnext; 2565 if (is->is_hnext) 2566 is->is_hnext->is_phnext = isp; 2567 *isp = is->is_hnext; 2568 if (ifs->ifs_ips_table[hvm] == NULL) 2569 ifs->ifs_ips_stats.iss_inuse--; 2570 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2571 2572 /* 2573 * ...and put the hash in the new one. 2574 */ 2575 hvm = DOUBLE_HASH(hv, ifs); 2576 is->is_hv = hvm; 2577 isp = &ifs->ifs_ips_table[hvm]; 2578 if (*isp) 2579 (*isp)->is_phnext = &is->is_hnext; 2580 else 2581 ifs->ifs_ips_stats.iss_inuse++; 2582 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2583 is->is_phnext = isp; 2584 is->is_hnext = *isp; 2585 *isp = is; 2586 } 2587 2588 2589 /* ------------------------------------------------------------------------ */ 2590 /* Function: fr_stlookup */ 2591 /* Returns: ipstate_t* - NULL == no matching state found, */ 2592 /* else pointer to state information is returned */ 2593 /* Parameters: fin(I) - pointer to packet information */ 2594 /* tcp(I) - pointer to TCP/UDP header. */ 2595 /* */ 2596 /* Search the state table for a matching entry to the packet described by */ 2597 /* the contents of *fin. */ 2598 /* */ 2599 /* If we return NULL then no lock on ipf_state is held. */ 2600 /* If we return non-null then a read-lock on ipf_state is held. */ 2601 /* ------------------------------------------------------------------------ */ 2602 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2603 fr_info_t *fin; 2604 tcphdr_t *tcp; 2605 ipftq_t **ifqp; 2606 { 2607 u_int hv, hvm, pr, v, tryagain; 2608 ipstate_t *is, **isp; 2609 u_short dport, sport; 2610 i6addr_t src, dst; 2611 struct icmp *ic; 2612 ipftq_t *ifq; 2613 int oow; 2614 ipf_stack_t *ifs = fin->fin_ifs; 2615 2616 is = NULL; 2617 ifq = NULL; 2618 tcp = fin->fin_dp; 2619 ic = (struct icmp *)tcp; 2620 hv = (pr = fin->fin_fi.fi_p); 2621 src = fin->fin_fi.fi_src; 2622 dst = fin->fin_fi.fi_dst; 2623 hv += src.in4.s_addr; 2624 hv += dst.in4.s_addr; 2625 2626 v = fin->fin_fi.fi_v; 2627 #ifdef USE_INET6 2628 if (v == 6) { 2629 hv += fin->fin_fi.fi_src.i6[1]; 2630 hv += fin->fin_fi.fi_src.i6[2]; 2631 hv += fin->fin_fi.fi_src.i6[3]; 2632 2633 if ((fin->fin_p == IPPROTO_ICMPV6) && 2634 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2635 hv -= dst.in4.s_addr; 2636 } else { 2637 hv += fin->fin_fi.fi_dst.i6[1]; 2638 hv += fin->fin_fi.fi_dst.i6[2]; 2639 hv += fin->fin_fi.fi_dst.i6[3]; 2640 } 2641 } 2642 #endif 2643 if ((v == 4) && 2644 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2645 if (fin->fin_out == 0) { 2646 hv -= src.in4.s_addr; 2647 } else { 2648 hv -= dst.in4.s_addr; 2649 } 2650 } 2651 2652 /* 2653 * Search the hash table for matching packet header info. 2654 */ 2655 switch (pr) 2656 { 2657 #ifdef USE_INET6 2658 case IPPROTO_ICMPV6 : 2659 tryagain = 0; 2660 if (v == 6) { 2661 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2662 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2663 hv += ic->icmp_id; 2664 } 2665 } 2666 READ_ENTER(&ifs->ifs_ipf_state); 2667 icmp6again: 2668 hvm = DOUBLE_HASH(hv, ifs); 2669 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2670 isp = &is->is_hnext; 2671 if ((is->is_p != pr) || (is->is_v != v)) 2672 continue; 2673 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2674 if (is != NULL && 2675 fr_matchicmpqueryreply(v, &is->is_icmp, 2676 ic, fin->fin_rev)) { 2677 if (fin->fin_rev) 2678 ifq = &ifs->ifs_ips_icmpacktq; 2679 else 2680 ifq = &ifs->ifs_ips_icmptq; 2681 break; 2682 } 2683 } 2684 2685 if (is != NULL) { 2686 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2687 hv += fin->fin_fi.fi_src.i6[0]; 2688 hv += fin->fin_fi.fi_src.i6[1]; 2689 hv += fin->fin_fi.fi_src.i6[2]; 2690 hv += fin->fin_fi.fi_src.i6[3]; 2691 fr_ipsmove(is, hv, ifs); 2692 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2693 } 2694 break; 2695 } 2696 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2697 2698 /* 2699 * No matching icmp state entry. Perhaps this is a 2700 * response to another state entry. 2701 * 2702 * XXX With some ICMP6 packets, the "other" address is already 2703 * in the packet, after the ICMP6 header, and this could be 2704 * used in place of the multicast address. However, taking 2705 * advantage of this requires some significant code changes 2706 * to handle the specific types where that is the case. 2707 */ 2708 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2709 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2710 hv -= fin->fin_fi.fi_src.i6[0]; 2711 hv -= fin->fin_fi.fi_src.i6[1]; 2712 hv -= fin->fin_fi.fi_src.i6[2]; 2713 hv -= fin->fin_fi.fi_src.i6[3]; 2714 tryagain = 1; 2715 WRITE_ENTER(&ifs->ifs_ipf_state); 2716 goto icmp6again; 2717 } 2718 2719 is = fr_checkicmp6matchingstate(fin); 2720 if (is != NULL) 2721 return is; 2722 break; 2723 #endif 2724 2725 case IPPROTO_ICMP : 2726 if (v == 4) { 2727 hv += ic->icmp_id; 2728 } 2729 hv = DOUBLE_HASH(hv, ifs); 2730 READ_ENTER(&ifs->ifs_ipf_state); 2731 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2732 isp = &is->is_hnext; 2733 if ((is->is_p != pr) || (is->is_v != v)) 2734 continue; 2735 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2736 if (is != NULL && 2737 fr_matchicmpqueryreply(v, &is->is_icmp, 2738 ic, fin->fin_rev)) { 2739 if (fin->fin_rev) 2740 ifq = &ifs->ifs_ips_icmpacktq; 2741 else 2742 ifq = &ifs->ifs_ips_icmptq; 2743 break; 2744 } 2745 } 2746 if (is == NULL) { 2747 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2748 } 2749 break; 2750 2751 case IPPROTO_TCP : 2752 case IPPROTO_UDP : 2753 ifqp = NULL; 2754 sport = htons(fin->fin_data[0]); 2755 hv += sport; 2756 dport = htons(fin->fin_data[1]); 2757 hv += dport; 2758 oow = 0; 2759 tryagain = 0; 2760 READ_ENTER(&ifs->ifs_ipf_state); 2761 retry_tcpudp: 2762 hvm = DOUBLE_HASH(hv, ifs); 2763 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2764 isp = &is->is_hnext; 2765 if ((is->is_p != pr) || (is->is_v != v)) 2766 continue; 2767 fin->fin_flx &= ~FI_OOW; 2768 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2769 if (is != NULL) { 2770 if (pr == IPPROTO_TCP) { 2771 if (!fr_tcpstate(fin, tcp, is)) { 2772 oow |= fin->fin_flx & FI_OOW; 2773 continue; 2774 } 2775 } 2776 break; 2777 } 2778 } 2779 if (is != NULL) { 2780 if (tryagain && 2781 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2782 hv += dport; 2783 hv += sport; 2784 fr_ipsmove(is, hv, ifs); 2785 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2786 } 2787 break; 2788 } 2789 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2790 2791 if (ifs->ifs_ips_stats.iss_wild) { 2792 if (tryagain == 0) { 2793 hv -= dport; 2794 hv -= sport; 2795 } else if (tryagain == 1) { 2796 hv = fin->fin_fi.fi_p; 2797 /* 2798 * If we try to pretend this is a reply to a 2799 * multicast/broadcast packet then we need to 2800 * exclude part of the address from the hash 2801 * calculation. 2802 */ 2803 if (fin->fin_out == 0) { 2804 hv += src.in4.s_addr; 2805 } else { 2806 hv += dst.in4.s_addr; 2807 } 2808 hv += dport; 2809 hv += sport; 2810 } 2811 tryagain++; 2812 if (tryagain <= 2) { 2813 WRITE_ENTER(&ifs->ifs_ipf_state); 2814 goto retry_tcpudp; 2815 } 2816 } 2817 fin->fin_flx |= oow; 2818 break; 2819 2820 #if 0 2821 case IPPROTO_GRE : 2822 gre = fin->fin_dp; 2823 if (GRE_REV(gre->gr_flags) == 1) { 2824 hv += gre->gr_call; 2825 } 2826 /* FALLTHROUGH */ 2827 #endif 2828 default : 2829 ifqp = NULL; 2830 hvm = DOUBLE_HASH(hv, ifs); 2831 READ_ENTER(&ifs->ifs_ipf_state); 2832 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2833 isp = &is->is_hnext; 2834 if ((is->is_p != pr) || (is->is_v != v)) 2835 continue; 2836 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2837 if (is != NULL) { 2838 ifq = &ifs->ifs_ips_iptq; 2839 break; 2840 } 2841 } 2842 if (is == NULL) { 2843 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2844 } 2845 break; 2846 } 2847 2848 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2849 (is->is_tqehead[fin->fin_rev] != NULL)) 2850 ifq = is->is_tqehead[fin->fin_rev]; 2851 if (ifq != NULL && ifqp != NULL) 2852 *ifqp = ifq; 2853 return is; 2854 } 2855 2856 2857 /* ------------------------------------------------------------------------ */ 2858 /* Function: fr_updatestate */ 2859 /* Returns: Nil */ 2860 /* Parameters: fin(I) - pointer to packet information */ 2861 /* is(I) - pointer to state table entry */ 2862 /* Read Locks: ipf_state */ 2863 /* */ 2864 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2865 /* fragment cache with a new entry as required. */ 2866 /* ------------------------------------------------------------------------ */ 2867 void fr_updatestate(fin, is, ifq) 2868 fr_info_t *fin; 2869 ipstate_t *is; 2870 ipftq_t *ifq; 2871 { 2872 ipftqent_t *tqe; 2873 int i, pass; 2874 ipf_stack_t *ifs = fin->fin_ifs; 2875 2876 i = (fin->fin_rev << 1) + fin->fin_out; 2877 2878 /* 2879 * For TCP packets, ifq == NULL. For all others, check if this new 2880 * queue is different to the last one it was on and move it if so. 2881 */ 2882 tqe = &is->is_sti; 2883 MUTEX_ENTER(&is->is_lock); 2884 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2885 ifq = is->is_tqehead[fin->fin_rev]; 2886 2887 if (ifq != NULL) 2888 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2889 2890 is->is_pkts[i]++; 2891 is->is_bytes[i] += fin->fin_plen; 2892 MUTEX_EXIT(&is->is_lock); 2893 2894 #ifdef IPFILTER_SYNC 2895 if (is->is_flags & IS_STATESYNC) 2896 ipfsync_update(SMC_STATE, fin, is->is_sync); 2897 #endif 2898 2899 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2900 2901 fin->fin_fr = is->is_rule; 2902 2903 /* 2904 * If this packet is a fragment and the rule says to track fragments, 2905 * then create a new fragment cache entry. 2906 */ 2907 pass = is->is_pass; 2908 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2909 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2910 } 2911 2912 2913 /* ------------------------------------------------------------------------ */ 2914 /* Function: fr_checkstate */ 2915 /* Returns: frentry_t* - NULL == search failed, */ 2916 /* else pointer to rule for matching state */ 2917 /* Parameters: ifp(I) - pointer to interface */ 2918 /* passp(I) - pointer to filtering result flags */ 2919 /* */ 2920 /* Check if a packet is associated with an entry in the state table. */ 2921 /* ------------------------------------------------------------------------ */ 2922 frentry_t *fr_checkstate(fin, passp) 2923 fr_info_t *fin; 2924 u_32_t *passp; 2925 { 2926 ipstate_t *is; 2927 frentry_t *fr; 2928 tcphdr_t *tcp; 2929 ipftq_t *ifq; 2930 u_int pass; 2931 ipf_stack_t *ifs = fin->fin_ifs; 2932 2933 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2934 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2935 return NULL; 2936 2937 is = NULL; 2938 if ((fin->fin_flx & FI_TCPUDP) || 2939 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2940 #ifdef USE_INET6 2941 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2942 #endif 2943 ) 2944 tcp = fin->fin_dp; 2945 else 2946 tcp = NULL; 2947 2948 /* 2949 * Search the hash table for matching packet header info. 2950 */ 2951 ifq = NULL; 2952 is = fin->fin_state; 2953 if (is == NULL) 2954 is = fr_stlookup(fin, tcp, &ifq); 2955 switch (fin->fin_p) 2956 { 2957 #ifdef USE_INET6 2958 case IPPROTO_ICMPV6 : 2959 if (is != NULL) 2960 break; 2961 if (fin->fin_v == 6) { 2962 is = fr_checkicmp6matchingstate(fin); 2963 if (is != NULL) 2964 goto matched; 2965 } 2966 break; 2967 #endif 2968 case IPPROTO_ICMP : 2969 if (is != NULL) 2970 break; 2971 /* 2972 * No matching icmp state entry. Perhaps this is a 2973 * response to another state entry. 2974 */ 2975 is = fr_checkicmpmatchingstate(fin); 2976 if (is != NULL) 2977 goto matched; 2978 break; 2979 case IPPROTO_TCP : 2980 if (is == NULL) 2981 break; 2982 2983 if (is->is_pass & FR_NEWISN) { 2984 if (fin->fin_out == 0) 2985 fr_fixinisn(fin, is); 2986 else if (fin->fin_out == 1) 2987 fr_fixoutisn(fin, is); 2988 } 2989 break; 2990 default : 2991 if (fin->fin_rev) 2992 ifq = &ifs->ifs_ips_udpacktq; 2993 else 2994 ifq = &ifs->ifs_ips_udptq; 2995 break; 2996 } 2997 if (is == NULL) { 2998 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 2999 return NULL; 3000 } 3001 3002 matched: 3003 fr = is->is_rule; 3004 if (fr != NULL) { 3005 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3006 if (fin->fin_nattag == NULL) 3007 return NULL; 3008 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) 3009 return NULL; 3010 } 3011 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3012 fin->fin_icode = fr->fr_icode; 3013 } 3014 3015 fin->fin_rule = is->is_rulen; 3016 pass = is->is_pass; 3017 fr_updatestate(fin, is, ifq); 3018 if (fin->fin_out == 1) 3019 fin->fin_nat = is->is_nat[fin->fin_rev]; 3020 3021 fin->fin_state = is; 3022 MUTEX_ENTER(&is->is_lock); 3023 is->is_ref++; 3024 MUTEX_EXIT(&is->is_lock); 3025 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3026 fin->fin_flx |= FI_STATE; 3027 if ((pass & FR_LOGFIRST) != 0) 3028 pass &= ~(FR_LOGFIRST|FR_LOG); 3029 *passp = pass; 3030 return fr; 3031 } 3032 3033 3034 /* ------------------------------------------------------------------------ */ 3035 /* Function: fr_fixoutisn */ 3036 /* Returns: Nil */ 3037 /* Parameters: fin(I) - pointer to packet information */ 3038 /* is(I) - pointer to master state structure */ 3039 /* */ 3040 /* Called only for outbound packets, adjusts the sequence number and the */ 3041 /* TCP checksum to match that change. */ 3042 /* ------------------------------------------------------------------------ */ 3043 static void fr_fixoutisn(fin, is) 3044 fr_info_t *fin; 3045 ipstate_t *is; 3046 { 3047 tcphdr_t *tcp; 3048 int rev; 3049 u_32_t seq; 3050 3051 tcp = fin->fin_dp; 3052 rev = fin->fin_rev; 3053 if ((is->is_flags & IS_ISNSYN) != 0) { 3054 if (rev == 0) { 3055 seq = ntohl(tcp->th_seq); 3056 seq += is->is_isninc[0]; 3057 tcp->th_seq = htonl(seq); 3058 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3059 } 3060 } 3061 if ((is->is_flags & IS_ISNACK) != 0) { 3062 if (rev == 1) { 3063 seq = ntohl(tcp->th_seq); 3064 seq += is->is_isninc[1]; 3065 tcp->th_seq = htonl(seq); 3066 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3067 } 3068 } 3069 } 3070 3071 3072 /* ------------------------------------------------------------------------ */ 3073 /* Function: fr_fixinisn */ 3074 /* Returns: Nil */ 3075 /* Parameters: fin(I) - pointer to packet information */ 3076 /* is(I) - pointer to master state structure */ 3077 /* */ 3078 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3079 /* TCP checksum to match that change. */ 3080 /* ------------------------------------------------------------------------ */ 3081 static void fr_fixinisn(fin, is) 3082 fr_info_t *fin; 3083 ipstate_t *is; 3084 { 3085 tcphdr_t *tcp; 3086 int rev; 3087 u_32_t ack; 3088 3089 tcp = fin->fin_dp; 3090 rev = fin->fin_rev; 3091 if ((is->is_flags & IS_ISNSYN) != 0) { 3092 if (rev == 1) { 3093 ack = ntohl(tcp->th_ack); 3094 ack -= is->is_isninc[0]; 3095 tcp->th_ack = htonl(ack); 3096 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3097 } 3098 } 3099 if ((is->is_flags & IS_ISNACK) != 0) { 3100 if (rev == 0) { 3101 ack = ntohl(tcp->th_ack); 3102 ack -= is->is_isninc[1]; 3103 tcp->th_ack = htonl(ack); 3104 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3105 } 3106 } 3107 } 3108 3109 3110 /* ------------------------------------------------------------------------ */ 3111 /* Function: fr_statesync */ 3112 /* Returns: Nil */ 3113 /* Parameters: action(I) - type of synchronisation to do */ 3114 /* v(I) - IP version being sync'd (v4 or v6) */ 3115 /* ifp(I) - interface identifier associated with action */ 3116 /* name(I) - name associated with ifp parameter */ 3117 /* */ 3118 /* Walk through all state entries and if an interface pointer match is */ 3119 /* found then look it up again, based on its name in case the pointer has */ 3120 /* changed since last time. */ 3121 /* */ 3122 /* If ifp is passed in as being non-null then we are only doing updates for */ 3123 /* existing, matching, uses of it. */ 3124 /* ------------------------------------------------------------------------ */ 3125 void fr_statesync(action, v, ifp, name, ifs) 3126 int action, v; 3127 void *ifp; 3128 char *name; 3129 ipf_stack_t *ifs; 3130 { 3131 ipstate_t *is; 3132 int i; 3133 3134 if (ifs->ifs_fr_running <= 0) 3135 return; 3136 3137 WRITE_ENTER(&ifs->ifs_ipf_state); 3138 3139 if (ifs->ifs_fr_running <= 0) { 3140 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3141 return; 3142 } 3143 3144 switch (action) 3145 { 3146 case IPFSYNC_RESYNC : 3147 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3148 if (v != 0 && is->is_v != v) 3149 continue; 3150 /* 3151 * Look up all the interface names in the state entry. 3152 */ 3153 for (i = 0; i < 4; i++) { 3154 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3155 is->is_v, ifs); 3156 } 3157 } 3158 break; 3159 case IPFSYNC_NEWIFP : 3160 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3161 if (v != 0 && is->is_v != v) 3162 continue; 3163 /* 3164 * Look up all the interface names in the state entry. 3165 */ 3166 for (i = 0; i < 4; i++) { 3167 if (!strncmp(is->is_ifname[i], name, 3168 sizeof(is->is_ifname[i]))) 3169 is->is_ifp[i] = ifp; 3170 } 3171 } 3172 break; 3173 case IPFSYNC_OLDIFP : 3174 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3175 if (v != 0 && is->is_v != v) 3176 continue; 3177 /* 3178 * Look up all the interface names in the state entry. 3179 */ 3180 for (i = 0; i < 4; i++) { 3181 if (is->is_ifp[i] == ifp) 3182 is->is_ifp[i] = (void *)-1; 3183 } 3184 } 3185 break; 3186 } 3187 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3188 } 3189 3190 3191 /* ------------------------------------------------------------------------ */ 3192 /* Function: fr_delstate */ 3193 /* Returns: Nil */ 3194 /* Parameters: is(I) - pointer to state structure to delete */ 3195 /* why(I) - if not 0, log reason why it was deleted */ 3196 /* ifs - ipf stack instance */ 3197 /* Write Locks: ipf_state/ipf_global */ 3198 /* */ 3199 /* Deletes a state entry from the enumerated list as well as the hash table */ 3200 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3201 /* global counters as required. */ 3202 /* ------------------------------------------------------------------------ */ 3203 static void fr_delstate(is, why, ifs) 3204 ipstate_t *is; 3205 int why; 3206 ipf_stack_t *ifs; 3207 { 3208 int removed = 0; 3209 3210 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3211 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3212 3213 /* 3214 * Start by removing the entry from the hash table of state entries 3215 * so it will not be "used" again. 3216 * 3217 * It will remain in the "list" of state entries until all references 3218 * have been accounted for. 3219 */ 3220 if (is->is_phnext != NULL) { 3221 removed = 1; 3222 *is->is_phnext = is->is_hnext; 3223 if (is->is_hnext != NULL) 3224 is->is_hnext->is_phnext = is->is_phnext; 3225 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3226 ifs->ifs_ips_stats.iss_inuse--; 3227 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3228 3229 is->is_phnext = NULL; 3230 is->is_hnext = NULL; 3231 } 3232 3233 /* 3234 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3235 * table that have wildcard flags set, only decerement it once 3236 * and do it here. 3237 */ 3238 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3239 if (!(is->is_flags & SI_CLONED)) { 3240 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3241 } 3242 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3243 } 3244 3245 /* 3246 * Next, remove it from the timeout queue it is in. 3247 */ 3248 fr_deletequeueentry(&is->is_sti); 3249 3250 is->is_me = NULL; 3251 3252 /* 3253 * If it is still in use by something else, do not go any further, 3254 * but note that at this point it is now an orphan. 3255 */ 3256 MUTEX_ENTER(&is->is_lock); 3257 if (is->is_ref > 1) { 3258 is->is_ref--; 3259 MUTEX_EXIT(&is->is_lock); 3260 if (removed) 3261 ifs->ifs_ips_stats.iss_orphans++; 3262 return; 3263 } 3264 MUTEX_EXIT(&is->is_lock); 3265 3266 is->is_ref = 0; 3267 3268 /* 3269 * If entry has already been removed from table, 3270 * it means we're simply cleaning up an orphan. 3271 */ 3272 if (!removed) 3273 ifs->ifs_ips_stats.iss_orphans--; 3274 3275 if (is->is_tqehead[0] != NULL) 3276 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3277 3278 if (is->is_tqehead[1] != NULL) 3279 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3280 3281 #ifdef IPFILTER_SYNC 3282 if (is->is_sync) 3283 ipfsync_del(is->is_sync); 3284 #endif 3285 #ifdef IPFILTER_SCAN 3286 (void) ipsc_detachis(is); 3287 #endif 3288 3289 /* 3290 * Now remove it from master list of state table entries. 3291 */ 3292 if (is->is_pnext != NULL) { 3293 *is->is_pnext = is->is_next; 3294 if (is->is_next != NULL) { 3295 is->is_next->is_pnext = is->is_pnext; 3296 is->is_next = NULL; 3297 } 3298 is->is_pnext = NULL; 3299 } 3300 3301 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3302 ipstate_log(is, why, ifs); 3303 3304 if (is->is_rule != NULL) { 3305 is->is_rule->fr_statecnt--; 3306 (void)fr_derefrule(&is->is_rule, ifs); 3307 } 3308 3309 MUTEX_DESTROY(&is->is_lock); 3310 KFREE(is); 3311 ifs->ifs_ips_num--; 3312 } 3313 3314 3315 /* ------------------------------------------------------------------------ */ 3316 /* Function: fr_timeoutstate */ 3317 /* Returns: Nil */ 3318 /* Parameters: Nil */ 3319 /* */ 3320 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3321 /* used here is to keep the queue sorted with the oldest things at the top */ 3322 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3323 /* expired then neither will any under it. */ 3324 /* ------------------------------------------------------------------------ */ 3325 void fr_timeoutstate(ifs) 3326 ipf_stack_t *ifs; 3327 { 3328 ipftq_t *ifq, *ifqnext; 3329 ipftqent_t *tqe, *tqn; 3330 ipstate_t *is; 3331 SPL_INT(s); 3332 3333 SPL_NET(s); 3334 WRITE_ENTER(&ifs->ifs_ipf_state); 3335 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3336 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3337 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3338 break; 3339 tqn = tqe->tqe_next; 3340 is = tqe->tqe_parent; 3341 fr_delstate(is, ISL_EXPIRE, ifs); 3342 } 3343 3344 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3345 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3346 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3347 break; 3348 tqn = tqe->tqe_next; 3349 is = tqe->tqe_parent; 3350 fr_delstate(is, ISL_EXPIRE, ifs); 3351 } 3352 } 3353 3354 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3355 ifqnext = ifq->ifq_next; 3356 3357 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3358 (ifq->ifq_ref == 0)) { 3359 fr_freetimeoutqueue(ifq, ifs); 3360 } 3361 } 3362 3363 if (ifs->ifs_fr_state_doflush) { 3364 (void) fr_state_flush(2, 0, ifs); 3365 ifs->ifs_fr_state_doflush = 0; 3366 } 3367 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3368 SPL_X(s); 3369 } 3370 3371 3372 /* ------------------------------------------------------------------------ */ 3373 /* Function: fr_state_flush */ 3374 /* Returns: int - 0 == success, -1 == failure */ 3375 /* Parameters: Nil */ 3376 /* Write Locks: ipf_state */ 3377 /* */ 3378 /* Flush state tables. Three actions currently defined: */ 3379 /* which == 0 : flush all state table entries */ 3380 /* which == 1 : flush TCP connections which have started to close but are */ 3381 /* stuck for some reason. */ 3382 /* which == 2 : flush TCP connections which have been idle for a long time, */ 3383 /* starting at > 4 days idle and working back in successive half-*/ 3384 /* days to at most 12 hours old. If this fails to free enough */ 3385 /* slots then work backwards in half hour slots to 30 minutes. */ 3386 /* If that too fails, then work backwards in 30 second intervals */ 3387 /* for the last 30 minutes to at worst 30 seconds idle. */ 3388 /* ------------------------------------------------------------------------ */ 3389 static int fr_state_flush(which, proto, ifs) 3390 int which, proto; 3391 ipf_stack_t *ifs; 3392 { 3393 ipftq_t *ifq, *ifqnext; 3394 ipftqent_t *tqe, *tqn; 3395 ipstate_t *is, **isp; 3396 int delete, removed; 3397 long try, maxtick; 3398 u_long interval; 3399 SPL_INT(s); 3400 3401 removed = 0; 3402 3403 SPL_NET(s); 3404 for (isp = &ifs->ifs_ips_list; ((is = *isp) != NULL); ) { 3405 delete = 0; 3406 3407 if ((proto != 0) && (is->is_v != proto)) { 3408 isp = &is->is_next; 3409 continue; 3410 } 3411 3412 switch (which) 3413 { 3414 case 0 : 3415 delete = 1; 3416 break; 3417 case 1 : 3418 case 2 : 3419 if (is->is_p != IPPROTO_TCP) 3420 break; 3421 if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || 3422 (is->is_state[1] != IPF_TCPS_ESTABLISHED)) 3423 delete = 1; 3424 break; 3425 } 3426 3427 if (delete) { 3428 if (is->is_p == IPPROTO_TCP) 3429 ifs->ifs_ips_stats.iss_fin++; 3430 else 3431 ifs->ifs_ips_stats.iss_expire++; 3432 fr_delstate(is, ISL_FLUSH, ifs); 3433 removed++; 3434 } else 3435 isp = &is->is_next; 3436 } 3437 3438 if (which != 2) { 3439 SPL_X(s); 3440 return removed; 3441 } 3442 3443 /* 3444 * Asked to remove inactive entries because the table is full, try 3445 * again, 3 times, if first attempt failed with a different criteria 3446 * each time. The order tried in must be in decreasing age. 3447 * Another alternative is to implement random drop and drop N entries 3448 * at random until N have been freed up. 3449 */ 3450 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < IPF_TTLVAL(5)) 3451 goto force_flush_skipped; 3452 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3453 3454 if (ifs->ifs_fr_ticks > IPF_TTLVAL(43200)) 3455 interval = IPF_TTLVAL(43200); 3456 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(1800)) 3457 interval = IPF_TTLVAL(1800); 3458 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(30)) 3459 interval = IPF_TTLVAL(30); 3460 else 3461 interval = IPF_TTLVAL(10); 3462 try = ifs->ifs_fr_ticks - (ifs->ifs_fr_ticks - interval); 3463 if (try < 0) 3464 goto force_flush_skipped; 3465 3466 while (removed == 0) { 3467 maxtick = ifs->ifs_fr_ticks - interval; 3468 if (maxtick < 0) 3469 break; 3470 3471 while (try < maxtick) { 3472 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; 3473 ifq = ifq->ifq_next) { 3474 for (tqn = ifq->ifq_head; 3475 ((tqe = tqn) != NULL); ) { 3476 if (tqe->tqe_die > try) 3477 break; 3478 tqn = tqe->tqe_next; 3479 is = tqe->tqe_parent; 3480 fr_delstate(is, ISL_EXPIRE, ifs); 3481 removed++; 3482 } 3483 } 3484 3485 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3486 ifqnext = ifq->ifq_next; 3487 3488 for (tqn = ifq->ifq_head; 3489 ((tqe = tqn) != NULL); ) { 3490 if (tqe->tqe_die > try) 3491 break; 3492 tqn = tqe->tqe_next; 3493 is = tqe->tqe_parent; 3494 fr_delstate(is, ISL_EXPIRE, ifs); 3495 removed++; 3496 } 3497 } 3498 if (try + interval > maxtick) 3499 break; 3500 try += interval; 3501 } 3502 3503 if (removed == 0) { 3504 if (interval == IPF_TTLVAL(43200)) { 3505 interval = IPF_TTLVAL(1800); 3506 } else if (interval == IPF_TTLVAL(1800)) { 3507 interval = IPF_TTLVAL(30); 3508 } else if (interval == IPF_TTLVAL(30)) { 3509 interval = IPF_TTLVAL(10); 3510 } else { 3511 break; 3512 } 3513 } 3514 } 3515 force_flush_skipped: 3516 SPL_X(s); 3517 return removed; 3518 } 3519 3520 3521 3522 /* ------------------------------------------------------------------------ */ 3523 /* Function: fr_tcp_age */ 3524 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3525 /* Parameters: tq(I) - pointer to timeout queue information */ 3526 /* fin(I) - pointer to packet information */ 3527 /* tqtab(I) - TCP timeout queue table this is in */ 3528 /* flags(I) - flags from state/NAT entry */ 3529 /* */ 3530 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3531 /* */ 3532 /* - (try to) base state transitions on real evidence only, */ 3533 /* i.e. packets that are sent and have been received by ipfilter; */ 3534 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3535 /* */ 3536 /* - deal with half-closed connections correctly; */ 3537 /* */ 3538 /* - store the state of the source in state[0] such that ipfstat */ 3539 /* displays the state as source/dest instead of dest/source; the calls */ 3540 /* to fr_tcp_age have been changed accordingly. */ 3541 /* */ 3542 /* Internal Parameters: */ 3543 /* */ 3544 /* state[0] = state of source (host that initiated connection) */ 3545 /* state[1] = state of dest (host that accepted the connection) */ 3546 /* */ 3547 /* dir == 0 : a packet from source to dest */ 3548 /* dir == 1 : a packet from dest to source */ 3549 /* */ 3550 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3551 /* ------------------------------------------------------------------------ */ 3552 int fr_tcp_age(tqe, fin, tqtab, flags) 3553 ipftqent_t *tqe; 3554 fr_info_t *fin; 3555 ipftq_t *tqtab; 3556 int flags; 3557 { 3558 int dlen, ostate, nstate, rval, dir; 3559 u_char tcpflags; 3560 tcphdr_t *tcp; 3561 ipf_stack_t *ifs = fin->fin_ifs; 3562 3563 tcp = fin->fin_dp; 3564 3565 rval = 0; 3566 dir = fin->fin_rev; 3567 tcpflags = tcp->th_flags; 3568 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3569 3570 if (tcpflags & TH_RST) { 3571 if (!(tcpflags & TH_PUSH) && !dlen) 3572 nstate = IPF_TCPS_CLOSED; 3573 else 3574 nstate = IPF_TCPS_CLOSE_WAIT; 3575 rval = 1; 3576 } else { 3577 ostate = tqe->tqe_state[1 - dir]; 3578 nstate = tqe->tqe_state[dir]; 3579 3580 switch (nstate) 3581 { 3582 case IPF_TCPS_CLOSED: /* 0 */ 3583 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3584 /* 3585 * 'dir' received an S and sends SA in 3586 * response, CLOSED -> SYN_RECEIVED 3587 */ 3588 nstate = IPF_TCPS_SYN_RECEIVED; 3589 rval = 1; 3590 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3591 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3592 nstate = IPF_TCPS_SYN_SENT; 3593 rval = 1; 3594 } 3595 /* 3596 * the next piece of code makes it possible to get 3597 * already established connections into the state table 3598 * after a restart or reload of the filter rules; this 3599 * does not work when a strict 'flags S keep state' is 3600 * used for tcp connections of course 3601 */ 3602 if (((flags & IS_TCPFSM) == 0) && 3603 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3604 /* 3605 * we saw an A, guess 'dir' is in ESTABLISHED 3606 * mode 3607 */ 3608 switch (ostate) 3609 { 3610 case IPF_TCPS_CLOSED : 3611 case IPF_TCPS_SYN_RECEIVED : 3612 nstate = IPF_TCPS_HALF_ESTAB; 3613 rval = 1; 3614 break; 3615 case IPF_TCPS_HALF_ESTAB : 3616 case IPF_TCPS_ESTABLISHED : 3617 nstate = IPF_TCPS_ESTABLISHED; 3618 rval = 1; 3619 break; 3620 default : 3621 break; 3622 } 3623 } 3624 /* 3625 * TODO: besides regular ACK packets we can have other 3626 * packets as well; it is yet to be determined how we 3627 * should initialize the states in those cases 3628 */ 3629 break; 3630 3631 case IPF_TCPS_LISTEN: /* 1 */ 3632 /* NOT USED */ 3633 break; 3634 3635 case IPF_TCPS_SYN_SENT: /* 2 */ 3636 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3637 /* 3638 * A retransmitted SYN packet. We do not reset 3639 * the timeout here to fr_tcptimeout because a 3640 * connection connect timeout does not renew 3641 * after every packet that is sent. We need to 3642 * set rval so as to indicate the packet has 3643 * passed the check for its flags being valid 3644 * in the TCP FSM. Setting rval to 2 has the 3645 * result of not resetting the timeout. 3646 */ 3647 rval = 2; 3648 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3649 TH_ACK) { 3650 /* 3651 * we see an A from 'dir' which is in SYN_SENT 3652 * state: 'dir' sent an A in response to an SA 3653 * which it received, SYN_SENT -> ESTABLISHED 3654 */ 3655 nstate = IPF_TCPS_ESTABLISHED; 3656 rval = 1; 3657 } else if (tcpflags & TH_FIN) { 3658 /* 3659 * we see an F from 'dir' which is in SYN_SENT 3660 * state and wants to close its side of the 3661 * connection; SYN_SENT -> FIN_WAIT_1 3662 */ 3663 nstate = IPF_TCPS_FIN_WAIT_1; 3664 rval = 1; 3665 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3666 /* 3667 * we see an SA from 'dir' which is already in 3668 * SYN_SENT state, this means we have a 3669 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3670 */ 3671 nstate = IPF_TCPS_SYN_RECEIVED; 3672 rval = 1; 3673 } 3674 break; 3675 3676 case IPF_TCPS_SYN_RECEIVED: /* 3 */ 3677 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3678 /* 3679 * we see an A from 'dir' which was in 3680 * SYN_RECEIVED state so it must now be in 3681 * established state, SYN_RECEIVED -> 3682 * ESTABLISHED 3683 */ 3684 nstate = IPF_TCPS_ESTABLISHED; 3685 rval = 1; 3686 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3687 TH_OPENING) { 3688 /* 3689 * We see an SA from 'dir' which is already in 3690 * SYN_RECEIVED state. 3691 */ 3692 rval = 2; 3693 } else if (tcpflags & TH_FIN) { 3694 /* 3695 * we see an F from 'dir' which is in 3696 * SYN_RECEIVED state and wants to close its 3697 * side of the connection; SYN_RECEIVED -> 3698 * FIN_WAIT_1 3699 */ 3700 nstate = IPF_TCPS_FIN_WAIT_1; 3701 rval = 1; 3702 } 3703 break; 3704 3705 case IPF_TCPS_HALF_ESTAB: /* 4 */ 3706 if (ostate >= IPF_TCPS_HALF_ESTAB) { 3707 if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3708 nstate = IPF_TCPS_ESTABLISHED; 3709 } 3710 } 3711 rval = 1; 3712 3713 break; 3714 3715 case IPF_TCPS_ESTABLISHED: /* 5 */ 3716 rval = 1; 3717 if (tcpflags & TH_FIN) { 3718 /* 3719 * 'dir' closed its side of the connection; 3720 * this gives us a half-closed connection; 3721 * ESTABLISHED -> FIN_WAIT_1 3722 */ 3723 nstate = IPF_TCPS_FIN_WAIT_1; 3724 } else if (tcpflags & TH_ACK) { 3725 /* 3726 * an ACK, should we exclude other flags here? 3727 */ 3728 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3729 /* 3730 * We know the other side did an active 3731 * close, so we are ACKing the recvd 3732 * FIN packet (does the window matching 3733 * code guarantee this?) and go into 3734 * CLOSE_WAIT state; this gives us a 3735 * half-closed connection 3736 */ 3737 nstate = IPF_TCPS_CLOSE_WAIT; 3738 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3739 /* 3740 * still a fully established 3741 * connection reset timeout 3742 */ 3743 nstate = IPF_TCPS_ESTABLISHED; 3744 } 3745 } 3746 break; 3747 3748 case IPF_TCPS_CLOSE_WAIT: /* 6 */ 3749 rval = 1; 3750 if (tcpflags & TH_FIN) { 3751 /* 3752 * application closed and 'dir' sent a FIN, 3753 * we're now going into LAST_ACK state 3754 */ 3755 nstate = IPF_TCPS_LAST_ACK; 3756 } else { 3757 /* 3758 * we remain in CLOSE_WAIT because the other 3759 * side has closed already and we did not 3760 * close our side yet; reset timeout 3761 */ 3762 nstate = IPF_TCPS_CLOSE_WAIT; 3763 } 3764 break; 3765 3766 case IPF_TCPS_FIN_WAIT_1: /* 7 */ 3767 rval = 1; 3768 if ((tcpflags & TH_ACK) && 3769 ostate > IPF_TCPS_CLOSE_WAIT) { 3770 /* 3771 * if the other side is not active anymore 3772 * it has sent us a FIN packet that we are 3773 * ack'ing now with an ACK; this means both 3774 * sides have now closed the connection and 3775 * we go into TIME_WAIT 3776 */ 3777 /* 3778 * XXX: how do we know we really are ACKing 3779 * the FIN packet here? does the window code 3780 * guarantee that? 3781 */ 3782 nstate = IPF_TCPS_TIME_WAIT; 3783 } else { 3784 /* 3785 * we closed our side of the connection 3786 * already but the other side is still active 3787 * (ESTABLISHED/CLOSE_WAIT); continue with 3788 * this half-closed connection 3789 */ 3790 nstate = IPF_TCPS_FIN_WAIT_1; 3791 } 3792 break; 3793 3794 case IPF_TCPS_CLOSING: /* 8 */ 3795 /* NOT USED */ 3796 break; 3797 3798 case IPF_TCPS_LAST_ACK: /* 9 */ 3799 /* 3800 * We want to reset timer here to keep state in table. 3801 * If we would allow the state to time out here, while 3802 * there would still be packets being retransmitted, we 3803 * would cut off line between the two peers preventing 3804 * them to close connection properly. 3805 */ 3806 rval = 1; 3807 break; 3808 3809 case IPF_TCPS_FIN_WAIT_2: /* 10 */ 3810 rval = 1; 3811 if ((tcpflags & TH_OPENING) == TH_OPENING) 3812 nstate = IPF_TCPS_SYN_RECEIVED; 3813 else if (tcpflags & TH_SYN) 3814 nstate = IPF_TCPS_SYN_SENT; 3815 break; 3816 3817 case IPF_TCPS_TIME_WAIT: /* 11 */ 3818 /* we're in 2MSL timeout now */ 3819 rval = 1; 3820 break; 3821 3822 default : 3823 #if defined(_KERNEL) 3824 # if SOLARIS 3825 cmn_err(CE_NOTE, 3826 "tcp %lx flags %x si %lx nstate %d ostate %d\n", 3827 (u_long)tcp, tcpflags, (u_long)tqe, 3828 nstate, ostate); 3829 # else 3830 printf("tcp %lx flags %x si %lx nstate %d ostate %d\n", 3831 (u_long)tcp, tcpflags, (u_long)tqe, 3832 nstate, ostate); 3833 # endif 3834 #else 3835 abort(); 3836 #endif 3837 break; 3838 } 3839 } 3840 3841 /* 3842 * If rval == 2 then do not update the queue position, but treat the 3843 * packet as being ok. 3844 */ 3845 if (rval == 2) 3846 rval = 1; 3847 else if (rval == 1) { 3848 tqe->tqe_state[dir] = nstate; 3849 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3850 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3851 } 3852 3853 return rval; 3854 } 3855 3856 3857 /* ------------------------------------------------------------------------ */ 3858 /* Function: ipstate_log */ 3859 /* Returns: Nil */ 3860 /* Parameters: is(I) - pointer to state structure */ 3861 /* type(I) - type of log entry to create */ 3862 /* */ 3863 /* Creates a state table log entry using the state structure and type info. */ 3864 /* passed in. Log packet/byte counts, source/destination address and other */ 3865 /* protocol specific information. */ 3866 /* ------------------------------------------------------------------------ */ 3867 void ipstate_log(is, type, ifs) 3868 struct ipstate *is; 3869 u_int type; 3870 ipf_stack_t *ifs; 3871 { 3872 #ifdef IPFILTER_LOG 3873 struct ipslog ipsl; 3874 size_t sizes[1]; 3875 void *items[1]; 3876 int types[1]; 3877 3878 /* 3879 * Copy information out of the ipstate_t structure and into the 3880 * structure used for logging. 3881 */ 3882 ipsl.isl_type = type; 3883 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3884 ipsl.isl_bytes[0] = is->is_bytes[0]; 3885 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3886 ipsl.isl_bytes[1] = is->is_bytes[1]; 3887 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3888 ipsl.isl_bytes[2] = is->is_bytes[2]; 3889 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3890 ipsl.isl_bytes[3] = is->is_bytes[3]; 3891 ipsl.isl_src = is->is_src; 3892 ipsl.isl_dst = is->is_dst; 3893 ipsl.isl_p = is->is_p; 3894 ipsl.isl_v = is->is_v; 3895 ipsl.isl_flags = is->is_flags; 3896 ipsl.isl_tag = is->is_tag; 3897 ipsl.isl_rulen = is->is_rulen; 3898 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3899 3900 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3901 ipsl.isl_sport = is->is_sport; 3902 ipsl.isl_dport = is->is_dport; 3903 if (ipsl.isl_p == IPPROTO_TCP) { 3904 ipsl.isl_state[0] = is->is_state[0]; 3905 ipsl.isl_state[1] = is->is_state[1]; 3906 } 3907 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3908 ipsl.isl_itype = is->is_icmp.ici_type; 3909 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3910 ipsl.isl_itype = is->is_icmp.ici_type; 3911 } else { 3912 ipsl.isl_ps.isl_filler[0] = 0; 3913 ipsl.isl_ps.isl_filler[1] = 0; 3914 } 3915 3916 items[0] = &ipsl; 3917 sizes[0] = sizeof(ipsl); 3918 types[0] = 0; 3919 3920 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3921 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3922 } else { 3923 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3924 } 3925 #endif 3926 } 3927 3928 3929 #ifdef USE_INET6 3930 /* ------------------------------------------------------------------------ */ 3931 /* Function: fr_checkicmp6matchingstate */ 3932 /* Returns: ipstate_t* - NULL == no match found, */ 3933 /* else pointer to matching state entry */ 3934 /* Parameters: fin(I) - pointer to packet information */ 3935 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3936 /* */ 3937 /* If we've got an ICMPv6 error message, using the information stored in */ 3938 /* the ICMPv6 packet, look for a matching state table entry. */ 3939 /* ------------------------------------------------------------------------ */ 3940 static ipstate_t *fr_checkicmp6matchingstate(fin) 3941 fr_info_t *fin; 3942 { 3943 struct icmp6_hdr *ic6, *oic; 3944 int backward, i; 3945 ipstate_t *is, **isp; 3946 u_short sport, dport; 3947 i6addr_t dst, src; 3948 u_short savelen; 3949 icmpinfo_t *ic; 3950 fr_info_t ofin; 3951 tcphdr_t *tcp; 3952 ip6_t *oip6; 3953 u_char pr; 3954 u_int hv; 3955 ipf_stack_t *ifs = fin->fin_ifs; 3956 3957 /* 3958 * Does it at least have the return (basic) IP header ? 3959 * Is it an actual recognised ICMP error type? 3960 * Only a basic IP header (no options) should be with 3961 * an ICMP error header. 3962 */ 3963 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 3964 !(fin->fin_flx & FI_ICMPERR)) 3965 return NULL; 3966 3967 ic6 = fin->fin_dp; 3968 3969 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 3970 if (fin->fin_plen < sizeof(*oip6)) 3971 return NULL; 3972 3973 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 3974 ofin.fin_v = 6; 3975 ofin.fin_ifp = fin->fin_ifp; 3976 ofin.fin_out = !fin->fin_out; 3977 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 3978 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 3979 3980 /* 3981 * We make a fin entry to be able to feed it to 3982 * matchsrcdst. Note that not all fields are necessary 3983 * but this is the cleanest way. Note further we fill 3984 * in fin_mp such that if someone uses it we'll get 3985 * a kernel panic. fr_matchsrcdst does not use this. 3986 * 3987 * watch out here, as ip is in host order and oip6 in network 3988 * order. Any change we make must be undone afterwards. 3989 */ 3990 savelen = oip6->ip6_plen; 3991 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 3992 ofin.fin_flx = FI_NOCKSUM; 3993 ofin.fin_ip = (ip_t *)oip6; 3994 ofin.fin_plen = oip6->ip6_plen; 3995 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 3996 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 3997 oip6->ip6_plen = savelen; 3998 3999 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4000 oic = (struct icmp6_hdr *)(oip6 + 1); 4001 /* 4002 * an ICMP error can only be generated as a result of an 4003 * ICMP query, not as the response on an ICMP error 4004 * 4005 * XXX theoretically ICMP_ECHOREP and the other reply's are 4006 * ICMP query's as well, but adding them here seems strange XXX 4007 */ 4008 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4009 return NULL; 4010 4011 /* 4012 * perform a lookup of the ICMP packet in the state table 4013 */ 4014 hv = (pr = oip6->ip6_nxt); 4015 src.in6 = oip6->ip6_src; 4016 hv += src.in4.s_addr; 4017 dst.in6 = oip6->ip6_dst; 4018 hv += dst.in4.s_addr; 4019 hv += oic->icmp6_id; 4020 hv += oic->icmp6_seq; 4021 hv = DOUBLE_HASH(hv, ifs); 4022 4023 READ_ENTER(&ifs->ifs_ipf_state); 4024 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4025 ic = &is->is_icmp; 4026 isp = &is->is_hnext; 4027 if ((is->is_p == pr) && 4028 !(is->is_pass & FR_NOICMPERR) && 4029 (oic->icmp6_id == ic->ici_id) && 4030 (oic->icmp6_seq == ic->ici_seq) && 4031 (is = fr_matchsrcdst(&ofin, is, &src, 4032 &dst, NULL, FI_ICMPCMP))) { 4033 /* 4034 * in the state table ICMP query's are stored 4035 * with the type of the corresponding ICMP 4036 * response. Correct here 4037 */ 4038 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4039 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4040 (ic->ici_type - 1 == oic->icmp6_type )) { 4041 ifs->ifs_ips_stats.iss_hits++; 4042 backward = IP6_NEQ(&is->is_dst, &src); 4043 fin->fin_rev = !backward; 4044 i = (backward << 1) + fin->fin_out; 4045 is->is_icmppkts[i]++; 4046 return is; 4047 } 4048 } 4049 } 4050 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4051 return NULL; 4052 } 4053 4054 hv = (pr = oip6->ip6_nxt); 4055 src.in6 = oip6->ip6_src; 4056 hv += src.i6[0]; 4057 hv += src.i6[1]; 4058 hv += src.i6[2]; 4059 hv += src.i6[3]; 4060 dst.in6 = oip6->ip6_dst; 4061 hv += dst.i6[0]; 4062 hv += dst.i6[1]; 4063 hv += dst.i6[2]; 4064 hv += dst.i6[3]; 4065 4066 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4067 tcp = (tcphdr_t *)(oip6 + 1); 4068 dport = tcp->th_dport; 4069 sport = tcp->th_sport; 4070 hv += dport; 4071 hv += sport; 4072 } else 4073 tcp = NULL; 4074 hv = DOUBLE_HASH(hv, ifs); 4075 4076 READ_ENTER(&ifs->ifs_ipf_state); 4077 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4078 isp = &is->is_hnext; 4079 /* 4080 * Only allow this icmp though if the 4081 * encapsulated packet was allowed through the 4082 * other way around. Note that the minimal amount 4083 * of info present does not allow for checking against 4084 * tcp internals such as seq and ack numbers. 4085 */ 4086 if ((is->is_p != pr) || (is->is_v != 6) || 4087 (is->is_pass & FR_NOICMPERR)) 4088 continue; 4089 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4090 if (is != NULL) { 4091 ifs->ifs_ips_stats.iss_hits++; 4092 backward = IP6_NEQ(&is->is_dst, &src); 4093 fin->fin_rev = !backward; 4094 i = (backward << 1) + fin->fin_out; 4095 is->is_icmppkts[i]++; 4096 /* 4097 * we deliberately do not touch the timeouts 4098 * for the accompanying state table entry. 4099 * It remains to be seen if that is correct. XXX 4100 */ 4101 return is; 4102 } 4103 } 4104 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4105 return NULL; 4106 } 4107 #endif 4108 4109 4110 /* ------------------------------------------------------------------------ */ 4111 /* Function: fr_sttab_init */ 4112 /* Returns: Nil */ 4113 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4114 /* */ 4115 /* Initialise the array of timeout queues for TCP. */ 4116 /* ------------------------------------------------------------------------ */ 4117 void fr_sttab_init(tqp, ifs) 4118 ipftq_t *tqp; 4119 ipf_stack_t *ifs; 4120 { 4121 int i; 4122 4123 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4124 tqp[i].ifq_ttl = 0; 4125 tqp[i].ifq_ref = 1; 4126 tqp[i].ifq_head = NULL; 4127 tqp[i].ifq_tail = &tqp[i].ifq_head; 4128 tqp[i].ifq_next = tqp + i + 1; 4129 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4130 } 4131 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4132 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4133 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4134 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4135 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4136 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4137 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4138 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4139 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4140 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4141 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4142 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4143 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4144 } 4145 4146 4147 /* ------------------------------------------------------------------------ */ 4148 /* Function: fr_sttab_destroy */ 4149 /* Returns: Nil */ 4150 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4151 /* */ 4152 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4153 /* of timeout queues for TCP. */ 4154 /* ------------------------------------------------------------------------ */ 4155 void fr_sttab_destroy(tqp) 4156 ipftq_t *tqp; 4157 { 4158 int i; 4159 4160 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4161 MUTEX_DESTROY(&tqp[i].ifq_lock); 4162 } 4163 4164 4165 /* ------------------------------------------------------------------------ */ 4166 /* Function: fr_statederef */ 4167 /* Returns: Nil */ 4168 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4169 /* ifs - ipf stack instance */ 4170 /* */ 4171 /* Decrement the reference counter for this state table entry and free it */ 4172 /* if there are no more things using it. */ 4173 /* */ 4174 /* Internal parameters: */ 4175 /* state[0] = state of source (host that initiated connection) */ 4176 /* state[1] = state of dest (host that accepted the connection) */ 4177 /* ------------------------------------------------------------------------ */ 4178 void fr_statederef(isp, ifs) 4179 ipstate_t **isp; 4180 ipf_stack_t *ifs; 4181 { 4182 ipstate_t *is; 4183 4184 is = *isp; 4185 *isp = NULL; 4186 4187 MUTEX_ENTER(&is->is_lock); 4188 if (is->is_ref > 1) { 4189 is->is_ref--; 4190 MUTEX_EXIT(&is->is_lock); 4191 #ifndef _KERNEL 4192 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4193 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4194 fr_delstate(is, ISL_ORPHAN, ifs); 4195 } 4196 #endif 4197 return; 4198 } 4199 MUTEX_EXIT(&is->is_lock); 4200 4201 WRITE_ENTER(&ifs->ifs_ipf_state); 4202 fr_delstate(is, ISL_EXPIRE, ifs); 4203 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4204 } 4205 4206 4207 /* ------------------------------------------------------------------------ */ 4208 /* Function: fr_setstatequeue */ 4209 /* Returns: Nil */ 4210 /* Parameters: is(I) - pointer to state structure */ 4211 /* rev(I) - forward(0) or reverse(1) direction */ 4212 /* Locks: ipf_state (read or write) */ 4213 /* */ 4214 /* Put the state entry on its default queue entry, using rev as a helped in */ 4215 /* determining which queue it should be placed on. */ 4216 /* ------------------------------------------------------------------------ */ 4217 void fr_setstatequeue(is, rev, ifs) 4218 ipstate_t *is; 4219 int rev; 4220 ipf_stack_t *ifs; 4221 { 4222 ipftq_t *oifq, *nifq; 4223 4224 4225 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4226 nifq = is->is_tqehead[rev]; 4227 else 4228 nifq = NULL; 4229 4230 if (nifq == NULL) { 4231 switch (is->is_p) 4232 { 4233 #ifdef USE_INET6 4234 case IPPROTO_ICMPV6 : 4235 if (rev == 1) 4236 nifq = &ifs->ifs_ips_icmpacktq; 4237 else 4238 nifq = &ifs->ifs_ips_icmptq; 4239 break; 4240 #endif 4241 case IPPROTO_ICMP : 4242 if (rev == 1) 4243 nifq = &ifs->ifs_ips_icmpacktq; 4244 else 4245 nifq = &ifs->ifs_ips_icmptq; 4246 break; 4247 case IPPROTO_TCP : 4248 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4249 break; 4250 4251 case IPPROTO_UDP : 4252 if (rev == 1) 4253 nifq = &ifs->ifs_ips_udpacktq; 4254 else 4255 nifq = &ifs->ifs_ips_udptq; 4256 break; 4257 4258 default : 4259 nifq = &ifs->ifs_ips_iptq; 4260 break; 4261 } 4262 } 4263 4264 oifq = is->is_sti.tqe_ifq; 4265 /* 4266 * If it's currently on a timeout queue, move it from one queue to 4267 * another, else put it on the end of the newly determined queue. 4268 */ 4269 if (oifq != NULL) 4270 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4271 else 4272 fr_queueappend(&is->is_sti, nifq, is, ifs); 4273 return; 4274 } 4275 4276 4277 /* ------------------------------------------------------------------------ */ 4278 /* Function: fr_stateiter */ 4279 /* Returns: int - 0 == success, else error */ 4280 /* Parameters: token(I) - pointer to ipftoken structure */ 4281 /* itp(I) - pointer to ipfgeniter structure */ 4282 /* */ 4283 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4284 /* walks through the list of entries in the state table list (ips_list.) */ 4285 /* ------------------------------------------------------------------------ */ 4286 static int fr_stateiter(token, itp, ifs) 4287 ipftoken_t *token; 4288 ipfgeniter_t *itp; 4289 ipf_stack_t *ifs; 4290 { 4291 ipstate_t *is, *next, zero; 4292 int error, count; 4293 char *dst; 4294 4295 if (itp->igi_data == NULL) 4296 return EFAULT; 4297 4298 if (itp->igi_nitems == 0) 4299 return EINVAL; 4300 4301 if (itp->igi_type != IPFGENITER_STATE) 4302 return EINVAL; 4303 4304 error = 0; 4305 4306 READ_ENTER(&ifs->ifs_ipf_state); 4307 4308 /* 4309 * Get "previous" entry from the token and find the next entry. 4310 */ 4311 is = token->ipt_data; 4312 if (is == NULL) { 4313 next = ifs->ifs_ips_list; 4314 } else { 4315 next = is->is_next; 4316 } 4317 4318 dst = itp->igi_data; 4319 for (count = itp->igi_nitems; count > 0; count--) { 4320 /* 4321 * If we found an entry, add a reference to it and update the token. 4322 * Otherwise, zero out data to be returned and NULL out token. 4323 */ 4324 if (next != NULL) { 4325 MUTEX_ENTER(&next->is_lock); 4326 next->is_ref++; 4327 MUTEX_EXIT(&next->is_lock); 4328 token->ipt_data = next; 4329 } else { 4330 bzero(&zero, sizeof(zero)); 4331 next = &zero; 4332 token->ipt_data = NULL; 4333 } 4334 4335 /* 4336 * Safe to release lock now the we have a reference. 4337 */ 4338 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4339 4340 /* 4341 * Copy out data and clean up references and tokens. 4342 */ 4343 error = COPYOUT(next, dst, sizeof(*next)); 4344 if (error != 0) 4345 error = EFAULT; 4346 if (token->ipt_data == NULL) { 4347 ipf_freetoken(token, ifs); 4348 break; 4349 } else { 4350 if (is != NULL) 4351 fr_statederef(&is, ifs); 4352 if (next->is_next == NULL) { 4353 ipf_freetoken(token, ifs); 4354 break; 4355 } 4356 } 4357 4358 if ((count == 1) || (error != 0)) 4359 break; 4360 4361 READ_ENTER(&ifs->ifs_ipf_state); 4362 dst += sizeof(*next); 4363 is = next; 4364 next = is->is_next; 4365 } 4366 4367 return error; 4368 } 4369