1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/file.h> 20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 21 defined(_KERNEL) 22 # include "opt_ipfilter_log.h" 23 #endif 24 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 25 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 26 #include "opt_inet6.h" 27 #endif 28 #if !defined(_KERNEL) && !defined(__KERNEL__) 29 # include <stdio.h> 30 # include <stdlib.h> 31 # include <string.h> 32 # define _KERNEL 33 # ifdef __OpenBSD__ 34 struct file; 35 # endif 36 # include <sys/uio.h> 37 # undef _KERNEL 38 #endif 39 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 40 # include <sys/filio.h> 41 # include <sys/fcntl.h> 42 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 43 # include "opt_ipfilter.h" 44 # endif 45 #else 46 # include <sys/ioctl.h> 47 #endif 48 #include <sys/time.h> 49 #if !defined(linux) 50 # include <sys/protosw.h> 51 #endif 52 #include <sys/socket.h> 53 #if defined(_KERNEL) 54 # include <sys/systm.h> 55 # if !defined(__SVR4) && !defined(__svr4__) 56 # include <sys/mbuf.h> 57 # endif 58 #endif 59 #if defined(__SVR4) || defined(__svr4__) 60 # include <sys/filio.h> 61 # include <sys/byteorder.h> 62 # ifdef _KERNEL 63 # include <sys/dditypes.h> 64 # endif 65 # include <sys/stream.h> 66 # include <sys/kmem.h> 67 #endif 68 69 #include <net/if.h> 70 #ifdef sun 71 # include <net/af.h> 72 #endif 73 #include <net/route.h> 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/tcp.h> 78 #if !defined(linux) 79 # include <netinet/ip_var.h> 80 #endif 81 #if !defined(__hpux) && !defined(linux) 82 # include <netinet/tcp_fsm.h> 83 #endif 84 #include <netinet/udp.h> 85 #include <netinet/ip_icmp.h> 86 #include "netinet/ip_compat.h" 87 #include <netinet/tcpip.h> 88 #include "netinet/ip_fil.h" 89 #include "netinet/ip_nat.h" 90 #include "netinet/ip_frag.h" 91 #include "netinet/ip_state.h" 92 #include "netinet/ip_proxy.h" 93 #include "netinet/ipf_stack.h" 94 #ifdef IPFILTER_SYNC 95 #include "netinet/ip_sync.h" 96 #endif 97 #ifdef IPFILTER_SCAN 98 #include "netinet/ip_scan.h" 99 #endif 100 #ifdef USE_INET6 101 #include <netinet/icmp6.h> 102 #endif 103 #if (__FreeBSD_version >= 300000) 104 # include <sys/malloc.h> 105 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 106 # include <sys/libkern.h> 107 # include <sys/systm.h> 108 # endif 109 #endif 110 /* END OF INCLUDES */ 111 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 116 #endif 117 118 #ifdef USE_INET6 119 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 120 #endif 121 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 122 i6addr_t *, tcphdr_t *, u_32_t)); 123 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 124 static int fr_state_flush __P((int, int, ipf_stack_t *)); 125 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 126 static void fr_delstate __P((ipstate_t *, int, ipf_stack_t *)); 127 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 128 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 129 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 131 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 132 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 134 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 135 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 136 137 int fr_stputent __P((caddr_t, ipf_stack_t *)); 138 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 139 140 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 141 #define FIVE_DAYS (5 * ONE_DAY) 142 #define DOUBLE_HASH(x, ifs) \ 143 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 144 145 146 147 /* ------------------------------------------------------------------------ */ 148 /* Function: fr_stateinit */ 149 /* Returns: int - 0 == success, -1 == failure */ 150 /* Parameters: Nil */ 151 /* */ 152 /* Initialise all the global variables used within the state code. */ 153 /* This action also includes initiailising locks. */ 154 /* ------------------------------------------------------------------------ */ 155 int fr_stateinit(ifs) 156 ipf_stack_t *ifs; 157 { 158 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 159 struct timeval tv; 160 #endif 161 int i; 162 163 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 164 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 165 if (ifs->ifs_ips_table == NULL) 166 return -1; 167 bzero((char *)ifs->ifs_ips_table, 168 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 169 170 KMALLOCS(ifs->ifs_ips_seed, u_long *, 171 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 172 if (ifs->ifs_ips_seed == NULL) 173 return -2; 174 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 175 tv.tv_sec = 0; 176 GETKTIME(&tv); 177 #endif 178 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 179 /* 180 * XXX - ips_seed[X] should be a random number of sorts. 181 */ 182 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 183 ifs->ifs_ips_seed[i] = ipf_random(); 184 #else 185 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 186 ifs->ifs_fr_statesize; 187 ifs->ifs_ips_seed[i] += tv.tv_sec; 188 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 189 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 190 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 191 #endif 192 } 193 194 /* fill icmp reply type table */ 195 for (i = 0; i <= ICMP_MAXTYPE; i++) 196 icmpreplytype4[i] = -1; 197 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 198 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 199 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 200 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 201 #ifdef USE_INET6 202 /* fill icmp reply type table */ 203 for (i = 0; i <= ICMP6_MAXTYPE; i++) 204 icmpreplytype6[i] = -1; 205 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 206 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 207 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 208 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 209 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 210 #endif 211 212 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 213 ifs->ifs_fr_statesize * sizeof(u_long)); 214 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 215 return -1; 216 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 217 ifs->ifs_fr_statesize * sizeof(u_long)); 218 219 if (ifs->ifs_fr_state_maxbucket == 0) { 220 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 221 ifs->ifs_fr_state_maxbucket++; 222 ifs->ifs_fr_state_maxbucket *= 2; 223 } 224 225 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 226 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 227 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 228 ifs->ifs_ips_udptq.ifq_ref = 1; 229 ifs->ifs_ips_udptq.ifq_head = NULL; 230 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 231 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 232 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 233 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 234 ifs->ifs_ips_udpacktq.ifq_ref = 1; 235 ifs->ifs_ips_udpacktq.ifq_head = NULL; 236 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 237 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 238 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 239 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 240 ifs->ifs_ips_icmptq.ifq_ref = 1; 241 ifs->ifs_ips_icmptq.ifq_head = NULL; 242 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 243 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 244 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 245 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 246 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 247 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 248 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 249 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 250 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 251 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 252 ifs->ifs_ips_iptq.ifq_ref = 1; 253 ifs->ifs_ips_iptq.ifq_head = NULL; 254 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 255 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 256 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 257 /* entry's ttl in deletetq is just 1 tick */ 258 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 259 ifs->ifs_ips_deletetq.ifq_ref = 1; 260 ifs->ifs_ips_deletetq.ifq_head = NULL; 261 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 262 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 263 ifs->ifs_ips_deletetq.ifq_next = NULL; 264 265 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 266 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 267 ifs->ifs_fr_state_init = 1; 268 269 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 270 return 0; 271 } 272 273 274 /* ------------------------------------------------------------------------ */ 275 /* Function: fr_stateunload */ 276 /* Returns: Nil */ 277 /* Parameters: Nil */ 278 /* */ 279 /* Release and destroy any resources acquired or initialised so that */ 280 /* IPFilter can be unloaded or re-initialised. */ 281 /* ------------------------------------------------------------------------ */ 282 void fr_stateunload(ifs) 283 ipf_stack_t *ifs; 284 { 285 ipftq_t *ifq, *ifqnext; 286 ipstate_t *is; 287 288 while ((is = ifs->ifs_ips_list) != NULL) 289 fr_delstate(is, 0, ifs); 290 291 /* 292 * Proxy timeout queues are not cleaned here because although they 293 * exist on the state list, appr_unload is called after fr_stateunload 294 * and the proxies actually are responsible for them being created. 295 * Should the proxy timeouts have their own list? There's no real 296 * justification as this is the only complicationA 297 */ 298 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 299 ifqnext = ifq->ifq_next; 300 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 301 (fr_deletetimeoutqueue(ifq) == 0)) 302 fr_freetimeoutqueue(ifq, ifs); 303 } 304 305 ifs->ifs_ips_stats.iss_inuse = 0; 306 ifs->ifs_ips_num = 0; 307 308 if (ifs->ifs_fr_state_init == 1) { 309 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 310 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 314 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 315 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 316 } 317 318 if (ifs->ifs_ips_table != NULL) { 319 KFREES(ifs->ifs_ips_table, 320 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 321 ifs->ifs_ips_table = NULL; 322 } 323 324 if (ifs->ifs_ips_seed != NULL) { 325 KFREES(ifs->ifs_ips_seed, 326 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 327 ifs->ifs_ips_seed = NULL; 328 } 329 330 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 331 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 332 ifs->ifs_fr_statesize * sizeof(u_long)); 333 ifs->ifs_ips_stats.iss_bucketlen = NULL; 334 } 335 336 if (ifs->ifs_fr_state_maxbucket_reset == 1) 337 ifs->ifs_fr_state_maxbucket = 0; 338 339 if (ifs->ifs_fr_state_init == 1) { 340 ifs->ifs_fr_state_init = 0; 341 RW_DESTROY(&ifs->ifs_ipf_state); 342 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 343 } 344 } 345 346 347 /* ------------------------------------------------------------------------ */ 348 /* Function: fr_statetstats */ 349 /* Returns: ips_state_t* - pointer to state stats structure */ 350 /* Parameters: Nil */ 351 /* */ 352 /* Put all the current numbers and pointers into a single struct and return */ 353 /* a pointer to it. */ 354 /* ------------------------------------------------------------------------ */ 355 static ips_stat_t *fr_statetstats(ifs) 356 ipf_stack_t *ifs; 357 { 358 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 359 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 360 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 361 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 362 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 363 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 364 return &ifs->ifs_ips_stats; 365 } 366 367 /* ------------------------------------------------------------------------ */ 368 /* Function: fr_state_remove */ 369 /* Returns: int - 0 == success, != 0 == failure */ 370 /* Parameters: data(I) - pointer to state structure to delete from table */ 371 /* */ 372 /* Search for a state structure that matches the one passed, according to */ 373 /* the IP addresses and other protocol specific information. */ 374 /* ------------------------------------------------------------------------ */ 375 static int fr_state_remove(data, ifs) 376 caddr_t data; 377 ipf_stack_t *ifs; 378 { 379 ipstate_t *sp, st; 380 int error; 381 382 sp = &st; 383 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 384 if (error) 385 return EFAULT; 386 387 WRITE_ENTER(&ifs->ifs_ipf_state); 388 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 389 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 390 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 391 sizeof(st.is_src)) && 392 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 393 sizeof(st.is_dst)) && 394 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 395 sizeof(st.is_ps))) { 396 fr_delstate(sp, ISL_REMOVE, ifs); 397 RWLOCK_EXIT(&ifs->ifs_ipf_state); 398 return 0; 399 } 400 RWLOCK_EXIT(&ifs->ifs_ipf_state); 401 return ESRCH; 402 } 403 404 405 /* ------------------------------------------------------------------------ */ 406 /* Function: fr_state_ioctl */ 407 /* Returns: int - 0 == success, != 0 == failure */ 408 /* Parameters: data(I) - pointer to ioctl data */ 409 /* cmd(I) - ioctl command integer */ 410 /* mode(I) - file mode bits used with open */ 411 /* */ 412 /* Processes an ioctl call made to operate on the IP Filter state device. */ 413 /* ------------------------------------------------------------------------ */ 414 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 415 caddr_t data; 416 ioctlcmd_t cmd; 417 int mode, uid; 418 void *ctx; 419 ipf_stack_t *ifs; 420 { 421 int arg, ret, error = 0; 422 423 switch (cmd) 424 { 425 /* 426 * Delete an entry from the state table. 427 */ 428 case SIOCDELST : 429 error = fr_state_remove(data, ifs); 430 break; 431 /* 432 * Flush the state table 433 */ 434 case SIOCIPFFL : 435 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 436 if (error != 0) { 437 error = EFAULT; 438 } else { 439 if (arg == 0 || arg == 1) { 440 WRITE_ENTER(&ifs->ifs_ipf_state); 441 ret = fr_state_flush(arg, 4, ifs); 442 RWLOCK_EXIT(&ifs->ifs_ipf_state); 443 error = BCOPYOUT((char *)&ret, data, 444 sizeof(ret)); 445 if (error != 0) 446 return EFAULT; 447 } else { 448 error = EINVAL; 449 } 450 } 451 break; 452 453 #ifdef USE_INET6 454 case SIOCIPFL6 : 455 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 456 if (error != 0) { 457 error = EFAULT; 458 } else { 459 if (arg == 0 || arg == 1) { 460 WRITE_ENTER(&ifs->ifs_ipf_state); 461 ret = fr_state_flush(arg, 6, ifs); 462 RWLOCK_EXIT(&ifs->ifs_ipf_state); 463 error = BCOPYOUT((char *)&ret, data, 464 sizeof(ret)); 465 if (error != 0) 466 return EFAULT; 467 } else { 468 error = EINVAL; 469 } 470 } 471 break; 472 #endif 473 #ifdef IPFILTER_LOG 474 /* 475 * Flush the state log. 476 */ 477 case SIOCIPFFB : 478 if (!(mode & FWRITE)) 479 error = EPERM; 480 else { 481 int tmp; 482 483 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 484 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 485 if (error != 0) 486 error = EFAULT; 487 } 488 break; 489 /* 490 * Turn logging of state information on/off. 491 */ 492 case SIOCSETLG : 493 if (!(mode & FWRITE)) { 494 error = EPERM; 495 } else { 496 error = BCOPYIN((char *)data, 497 (char *)&ifs->ifs_ipstate_logging, 498 sizeof(ifs->ifs_ipstate_logging)); 499 if (error != 0) 500 error = EFAULT; 501 } 502 break; 503 /* 504 * Return the current state of logging. 505 */ 506 case SIOCGETLG : 507 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 508 (char *)data, 509 sizeof(ifs->ifs_ipstate_logging)); 510 if (error != 0) 511 error = EFAULT; 512 break; 513 /* 514 * Return the number of bytes currently waiting to be read. 515 */ 516 case FIONREAD : 517 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 518 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 519 if (error != 0) 520 error = EFAULT; 521 break; 522 #endif 523 /* 524 * Get the current state statistics. 525 */ 526 case SIOCGETFS : 527 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 528 break; 529 /* 530 * Lock/Unlock the state table. (Locking prevents any changes, which 531 * means no packets match). 532 */ 533 case SIOCSTLCK : 534 if (!(mode & FWRITE)) { 535 error = EPERM; 536 } else { 537 error = fr_lock(data, &ifs->ifs_fr_state_lock); 538 } 539 break; 540 /* 541 * Add an entry to the current state table. 542 */ 543 case SIOCSTPUT : 544 if (!ifs->ifs_fr_state_lock || !(mode &FWRITE)) { 545 error = EACCES; 546 break; 547 } 548 error = fr_stputent(data, ifs); 549 break; 550 /* 551 * Get a state table entry. 552 */ 553 case SIOCSTGET : 554 if (!ifs->ifs_fr_state_lock) { 555 error = EACCES; 556 break; 557 } 558 error = fr_stgetent(data, ifs); 559 break; 560 561 case SIOCGENITER : 562 { 563 ipftoken_t *token; 564 ipfgeniter_t iter; 565 566 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 567 if (error != 0) 568 break; 569 570 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 571 if (token != NULL) 572 error = fr_stateiter(token, &iter, ifs); 573 else 574 error = ESRCH; 575 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 576 break; 577 } 578 579 case SIOCIPFDELTOK : 580 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 581 if (error != 0) { 582 error = EFAULT; 583 } else { 584 error = ipf_deltoken(arg, uid, ctx, ifs); 585 } 586 break; 587 588 default : 589 error = EINVAL; 590 break; 591 } 592 return error; 593 } 594 595 596 /* ------------------------------------------------------------------------ */ 597 /* Function: fr_stgetent */ 598 /* Returns: int - 0 == success, != 0 == failure */ 599 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 600 /* */ 601 /* Copy out state information from the kernel to a user space process. If */ 602 /* there is a filter rule associated with the state entry, copy that out */ 603 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 604 /* the struct passed in and if not null and not found in the list of current*/ 605 /* state entries, the retrieval fails. */ 606 /* ------------------------------------------------------------------------ */ 607 int fr_stgetent(data, ifs) 608 caddr_t data; 609 ipf_stack_t *ifs; 610 { 611 ipstate_t *is, *isn; 612 ipstate_save_t ips; 613 int error; 614 615 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 616 if (error) 617 return EFAULT; 618 619 isn = ips.ips_next; 620 if (isn == NULL) { 621 isn = ifs->ifs_ips_list; 622 if (isn == NULL) { 623 if (ips.ips_next == NULL) 624 return ENOENT; 625 return 0; 626 } 627 } else { 628 /* 629 * Make sure the pointer we're copying from exists in the 630 * current list of entries. Security precaution to prevent 631 * copying of random kernel data. 632 */ 633 for (is = ifs->ifs_ips_list; is; is = is->is_next) 634 if (is == isn) 635 break; 636 if (!is) 637 return ESRCH; 638 } 639 ips.ips_next = isn->is_next; 640 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 641 ips.ips_rule = isn->is_rule; 642 if (isn->is_rule != NULL) 643 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 644 sizeof(ips.ips_fr)); 645 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 646 if (error) 647 return EFAULT; 648 return 0; 649 } 650 651 652 /* ------------------------------------------------------------------------ */ 653 /* Function: fr_stputent */ 654 /* Returns: int - 0 == success, != 0 == failure */ 655 /* Parameters: data(I) - pointer to state information struct */ 656 /* */ 657 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 658 /* the state table. If the state info. includes a pointer to a filter rule */ 659 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 660 /* output. */ 661 /* ------------------------------------------------------------------------ */ 662 int fr_stputent(data, ifs) 663 caddr_t data; 664 ipf_stack_t *ifs; 665 { 666 ipstate_t *is, *isn; 667 ipstate_save_t ips; 668 int error, i; 669 frentry_t *fr; 670 char *name; 671 672 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 673 if (error) 674 return EFAULT; 675 676 KMALLOC(isn, ipstate_t *); 677 if (isn == NULL) 678 return ENOMEM; 679 680 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 681 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 682 isn->is_sti.tqe_pnext = NULL; 683 isn->is_sti.tqe_next = NULL; 684 isn->is_sti.tqe_ifq = NULL; 685 isn->is_sti.tqe_parent = isn; 686 isn->is_ifp[0] = NULL; 687 isn->is_ifp[1] = NULL; 688 isn->is_ifp[2] = NULL; 689 isn->is_ifp[3] = NULL; 690 isn->is_sync = NULL; 691 fr = ips.ips_rule; 692 693 if (fr == NULL) { 694 READ_ENTER(&ifs->ifs_ipf_state); 695 fr_stinsert(isn, 0, ifs); 696 MUTEX_EXIT(&isn->is_lock); 697 RWLOCK_EXIT(&ifs->ifs_ipf_state); 698 return 0; 699 } 700 701 if (isn->is_flags & SI_NEWFR) { 702 KMALLOC(fr, frentry_t *); 703 if (fr == NULL) { 704 KFREE(isn); 705 return ENOMEM; 706 } 707 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 708 isn->is_rule = fr; 709 ips.ips_is.is_rule = fr; 710 MUTEX_NUKE(&fr->fr_lock); 711 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 712 713 /* 714 * Look up all the interface names in the rule. 715 */ 716 for (i = 0; i < 4; i++) { 717 name = fr->fr_ifnames[i]; 718 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 719 name = isn->is_ifname[i]; 720 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 721 } 722 723 fr->fr_ref = 0; 724 fr->fr_dsize = 0; 725 fr->fr_data = NULL; 726 fr->fr_type = FR_T_NONE; 727 728 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 729 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 730 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 731 732 /* 733 * send a copy back to userland of what we ended up 734 * to allow for verification. 735 */ 736 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 737 if (error) { 738 KFREE(isn); 739 MUTEX_DESTROY(&fr->fr_lock); 740 KFREE(fr); 741 return EFAULT; 742 } 743 READ_ENTER(&ifs->ifs_ipf_state); 744 fr_stinsert(isn, 0, ifs); 745 MUTEX_EXIT(&isn->is_lock); 746 RWLOCK_EXIT(&ifs->ifs_ipf_state); 747 748 } else { 749 READ_ENTER(&ifs->ifs_ipf_state); 750 for (is = ifs->ifs_ips_list; is; is = is->is_next) 751 if (is->is_rule == fr) { 752 fr_stinsert(isn, 0, ifs); 753 MUTEX_EXIT(&isn->is_lock); 754 break; 755 } 756 757 if (is == NULL) { 758 KFREE(isn); 759 isn = NULL; 760 } 761 RWLOCK_EXIT(&ifs->ifs_ipf_state); 762 763 return (isn == NULL) ? ESRCH : 0; 764 } 765 766 return 0; 767 } 768 769 770 /* ------------------------------------------------------------------------ */ 771 /* Function: fr_stinsert */ 772 /* Returns: Nil */ 773 /* Parameters: is(I) - pointer to state structure */ 774 /* rev(I) - flag indicating forward/reverse direction of packet */ 775 /* */ 776 /* Inserts a state structure into the hash table (for lookups) and the list */ 777 /* of state entries (for enumeration). Resolves all of the interface names */ 778 /* to pointers and adjusts running stats for the hash table as appropriate. */ 779 /* */ 780 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 781 /* Exits with is_lock initialised and held. */ 782 /* ------------------------------------------------------------------------ */ 783 void fr_stinsert(is, rev, ifs) 784 ipstate_t *is; 785 int rev; 786 ipf_stack_t *ifs; 787 { 788 frentry_t *fr; 789 u_int hv; 790 int i; 791 792 MUTEX_INIT(&is->is_lock, "ipf state entry"); 793 794 fr = is->is_rule; 795 if (fr != NULL) { 796 MUTEX_ENTER(&fr->fr_lock); 797 fr->fr_ref++; 798 fr->fr_statecnt++; 799 MUTEX_EXIT(&fr->fr_lock); 800 } 801 802 /* 803 * Look up all the interface names in the state entry. 804 */ 805 for (i = 0; i < 4; i++) { 806 if (is->is_ifp[i] != NULL) 807 continue; 808 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 809 } 810 811 /* 812 * If we could trust is_hv, then the modulous would not be needed, but 813 * when running with IPFILTER_SYNC, this stops bad values. 814 */ 815 hv = is->is_hv % ifs->ifs_fr_statesize; 816 is->is_hv = hv; 817 818 /* 819 * We need to get both of these locks...the first because it is 820 * possible that once the insert is complete another packet might 821 * come along, match the entry and want to update it. 822 */ 823 MUTEX_ENTER(&is->is_lock); 824 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 825 826 /* 827 * add into list table. 828 */ 829 if (ifs->ifs_ips_list != NULL) 830 ifs->ifs_ips_list->is_pnext = &is->is_next; 831 is->is_pnext = &ifs->ifs_ips_list; 832 is->is_next = ifs->ifs_ips_list; 833 ifs->ifs_ips_list = is; 834 835 if (ifs->ifs_ips_table[hv] != NULL) 836 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 837 else 838 ifs->ifs_ips_stats.iss_inuse++; 839 is->is_phnext = ifs->ifs_ips_table + hv; 840 is->is_hnext = ifs->ifs_ips_table[hv]; 841 ifs->ifs_ips_table[hv] = is; 842 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 843 ifs->ifs_ips_num++; 844 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 845 846 fr_setstatequeue(is, rev, ifs); 847 } 848 849 /* ------------------------------------------------------------------------ */ 850 /* Function: fr_match_ipv4addrs */ 851 /* Returns: int - 2 strong match (same addresses, same direction) */ 852 /* 1 weak match (same address, opposite direction) */ 853 /* 0 no match */ 854 /* */ 855 /* Function matches IPv4 addresses. */ 856 /* ------------------------------------------------------------------------ */ 857 static int fr_match_ipv4addrs(is1, is2) 858 ipstate_t *is1; 859 ipstate_t *is2; 860 { 861 int rv; 862 863 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 864 rv = 2; 865 else if (is1->is_saddr == is2->is_daddr && 866 is1->is_daddr == is2->is_saddr) 867 rv = 1; 868 else 869 rv = 0; 870 871 return (rv); 872 } 873 874 /* ------------------------------------------------------------------------ */ 875 /* Function: fr_match_ipv6addrs */ 876 /* Returns: int - 2 strong match (same addresses, same direction) */ 877 /* 1 weak match (same addresses, opposite direction) */ 878 /* 0 no match */ 879 /* */ 880 /* Function matches IPv6 addresses. */ 881 /* ------------------------------------------------------------------------ */ 882 static int fr_match_ipv6addrs(is1, is2) 883 ipstate_t *is1; 884 ipstate_t *is2; 885 { 886 int rv; 887 888 if (IP6_EQ(&is1->is_src, &is2->is_src) && 889 IP6_EQ(&is1->is_dst, &is2->is_dst)) 890 rv = 2; 891 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 892 IP6_EQ(&is1->is_dst, &is2->is_src)) { 893 rv = 1; 894 } 895 else 896 rv = 0; 897 898 return (rv); 899 } 900 /* ------------------------------------------------------------------------ */ 901 /* Function: fr_match_addresses */ 902 /* Returns: int - 2 strong match (same addresses, same direction) */ 903 /* 1 weak match (same address, opposite directions) */ 904 /* 0 no match */ 905 /* Parameters: is1, is2 pointers to states we are checking */ 906 /* */ 907 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 908 /* and IPv6 address format. */ 909 /* ------------------------------------------------------------------------ */ 910 static int fr_match_addresses(is1, is2) 911 ipstate_t *is1; 912 ipstate_t *is2; 913 { 914 int rv; 915 916 if (is1->is_v == 4) { 917 rv = fr_match_ipv4addrs(is1, is2); 918 } 919 else { 920 rv = fr_match_ipv6addrs(is1, is2); 921 } 922 923 return (rv); 924 } 925 926 /* ------------------------------------------------------------------------ */ 927 /* Function: fr_match_ppairs */ 928 /* Returns: int - 2 strong match (same ports, same direction) */ 929 /* 1 weak match (same ports, different direction) */ 930 /* 0 no match */ 931 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 932 /* */ 933 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 934 /* src, dst port, which belong to session (state entry). */ 935 /* ------------------------------------------------------------------------ */ 936 static int fr_match_ppairs(ppairs1, ppairs2) 937 port_pair_t *ppairs1; 938 port_pair_t *ppairs2; 939 { 940 int rv; 941 942 if (ppairs1->pp_sport == ppairs2->pp_sport && 943 ppairs1->pp_dport == ppairs2->pp_dport) 944 rv = 2; 945 else if (ppairs1->pp_sport == ppairs2->pp_dport && 946 ppairs1->pp_dport == ppairs2->pp_sport) 947 rv = 1; 948 else 949 rv = 0; 950 951 return (rv); 952 } 953 954 /* ------------------------------------------------------------------------ */ 955 /* Function: fr_match_l4_hdr */ 956 /* Returns: int - 0 no match, */ 957 /* 1 weak match (same ports, different directions) */ 958 /* 2 strong match (same ports, same direction) */ 959 /* Parameters is1, is2 - states we want to match */ 960 /* */ 961 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 962 /* GRE protocol). */ 963 /* ------------------------------------------------------------------------ */ 964 static int fr_match_l4_hdr(is1, is2) 965 ipstate_t *is1; 966 ipstate_t *is2; 967 { 968 int rv = 0; 969 port_pair_t pp1; 970 port_pair_t pp2; 971 972 if (is1->is_p != is2->is_p) 973 return (0); 974 975 switch (is1->is_p) { 976 case IPPROTO_TCP: 977 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 978 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 979 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 980 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 981 rv = fr_match_ppairs(&pp1, &pp2); 982 break; 983 case IPPROTO_UDP: 984 pp1.pp_sport = is1->is_ps.is_us.us_sport; 985 pp1.pp_dport = is1->is_ps.is_us.us_dport; 986 pp2.pp_sport = is2->is_ps.is_us.us_sport; 987 pp2.pp_dport = is2->is_ps.is_us.us_dport; 988 rv = fr_match_ppairs(&pp1, &pp2); 989 break; 990 case IPPROTO_GRE: 991 /* greinfo_t can be also interprted as port pair */ 992 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 993 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 994 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 995 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 996 rv = fr_match_ppairs(&pp1, &pp2); 997 break; 998 case IPPROTO_ICMP: 999 case IPPROTO_ICMPV6: 1000 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof(icmpinfo_t))) 1001 rv = 1; 1002 else 1003 rv = 0; 1004 break; 1005 default: 1006 rv = 0; 1007 } 1008 1009 return (rv); 1010 } 1011 1012 /* ------------------------------------------------------------------------ */ 1013 /* Function: fr_matchstates */ 1014 /* Returns: int - nonzero match, zero no match */ 1015 /* Parameters is1, is2 - states we want to match */ 1016 /* */ 1017 /* The state entries are equal (identical match) if they belong to the same */ 1018 /* session. Any time new state entry is being added the fr_addstate() */ 1019 /* function creates temporal state entry from the data it gets from IP and */ 1020 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1021 /* which is also stored within the state entry. We should keep in mind the */ 1022 /* information about packet direction is spread accross L3 (addresses) and */ 1023 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1024 /* - no match (match(is1, is2) == 0)) */ 1025 /* - weak match same addresses (ports), but different */ 1026 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1027 /* - strong match same addresses (ports) and same directions */ 1028 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1029 /* */ 1030 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1031 /* and functions, which are used to compare ports (L4 header) data. We say */ 1032 /* the is1 and is2 are same (identical) if there is a match */ 1033 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1034 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1035 /* Such requirement deals with case as follows: */ 1036 /* suppose there are two connections between hosts A, B. Connection 1: */ 1037 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1038 /* Connection 2: */ 1039 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1040 /* since we've introduced match levels into our fr_matchstates(), we are */ 1041 /* able to identify, which packets belong to connection A and which belong */ 1042 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1043 /* from con. 1 packet, which travelled from A to B: */ 1044 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1045 /* while s2, has been created from packet which belongs to con. 2 and is */ 1046 /* also coming from A to B: */ 1047 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1048 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1049 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1050 /* different the state entries are not identical -> no match as a final */ 1051 /* result. */ 1052 /* ------------------------------------------------------------------------ */ 1053 static int fr_matchstates(is1, is2) 1054 ipstate_t *is1; 1055 ipstate_t *is2; 1056 { 1057 int rv; 1058 int amatch; 1059 int pmatch; 1060 1061 if (bcmp(&is1->is_pass, &is2->is_pass, 1062 offsetof(struct ipstate, is_ps) - 1063 offsetof(struct ipstate, is_pass)) == 0) { 1064 1065 pmatch = fr_match_l4_hdr(is1, is2); 1066 amatch = fr_match_addresses(is1, is2); 1067 /* 1068 * If addresses match (amatch != 0), then 'match levels' 1069 * must be same for matching entries. If amatch and pmatch 1070 * have different values (different match levels), then 1071 * is1 and is2 belong to different sessions. 1072 */ 1073 rv = (amatch != 0) && (amatch == pmatch); 1074 } 1075 else 1076 rv = 0; 1077 1078 return (rv); 1079 } 1080 1081 /* ------------------------------------------------------------------------ */ 1082 /* Function: fr_addstate */ 1083 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1084 /* Parameters: fin(I) - pointer to packet information */ 1085 /* stsave(O) - pointer to place to save pointer to created */ 1086 /* state structure. */ 1087 /* flags(I) - flags to use when creating the structure */ 1088 /* */ 1089 /* Creates a new IP state structure from the packet information collected. */ 1090 /* Inserts it into the state table and appends to the bottom of the active */ 1091 /* list. If the capacity of the table has reached the maximum allowed then */ 1092 /* the call will fail and a flush is scheduled for the next timeout call. */ 1093 /* ------------------------------------------------------------------------ */ 1094 ipstate_t *fr_addstate(fin, stsave, flags) 1095 fr_info_t *fin; 1096 ipstate_t **stsave; 1097 u_int flags; 1098 { 1099 ipstate_t *is, ips; 1100 struct icmp *ic; 1101 u_int pass, hv; 1102 frentry_t *fr; 1103 tcphdr_t *tcp; 1104 grehdr_t *gre; 1105 void *ifp; 1106 int out; 1107 ipf_stack_t *ifs = fin->fin_ifs; 1108 1109 if (ifs->ifs_fr_state_lock || 1110 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1111 return NULL; 1112 1113 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1114 return NULL; 1115 1116 /* 1117 * If a "keep state" rule has reached the maximum number of references 1118 * to it, then schedule an automatic flush in case we can clear out 1119 * some "dead old wood". Note that because the lock isn't held on 1120 * fr it is possible that we could overflow. The cost of overflowing 1121 * is being ignored here as the number by which it can overflow is 1122 * a product of the number of simultaneous threads that could be 1123 * executing in here, so a limit of 100 won't result in 200, but could 1124 * result in 101 or 102. 1125 */ 1126 fr = fin->fin_fr; 1127 if (fr != NULL) { 1128 if ((ifs->ifs_ips_num == ifs->ifs_fr_statemax) && (fr->fr_statemax == 0)) { 1129 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1130 ifs->ifs_fr_state_doflush = 1; 1131 return NULL; 1132 } 1133 if ((fr->fr_statemax != 0) && 1134 (fr->fr_statecnt >= fr->fr_statemax)) { 1135 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1136 ifs->ifs_fr_state_doflush = 1; 1137 return NULL; 1138 } 1139 } 1140 1141 ic = NULL; 1142 tcp = NULL; 1143 out = fin->fin_out; 1144 is = &ips; 1145 bzero((char *)is, sizeof(*is)); 1146 1147 if (fr == NULL) { 1148 pass = ifs->ifs_fr_flags; 1149 is->is_tag = FR_NOLOGTAG; 1150 } 1151 else { 1152 pass = fr->fr_flags; 1153 } 1154 1155 is->is_die = 1 + ifs->ifs_fr_ticks; 1156 /* 1157 * We want to check everything that is a property of this packet, 1158 * but we don't (automatically) care about it's fragment status as 1159 * this may change. 1160 */ 1161 is->is_pass = pass; 1162 is->is_v = fin->fin_v; 1163 is->is_opt[0] = fin->fin_optmsk; 1164 is->is_optmsk[0] = 0xffffffff; 1165 is->is_optmsk[1] = 0xffffffff; 1166 if (is->is_v == 6) { 1167 is->is_opt[0] &= ~0x8; 1168 is->is_optmsk[0] &= ~0x8; 1169 is->is_optmsk[1] &= ~0x8; 1170 } 1171 is->is_sec = fin->fin_secmsk; 1172 is->is_secmsk = 0xffff; 1173 is->is_auth = fin->fin_auth; 1174 is->is_authmsk = 0xffff; 1175 1176 /* 1177 * Copy and calculate... 1178 */ 1179 hv = (is->is_p = fin->fin_fi.fi_p); 1180 is->is_src = fin->fin_fi.fi_src; 1181 hv += is->is_saddr; 1182 is->is_dst = fin->fin_fi.fi_dst; 1183 hv += is->is_daddr; 1184 #ifdef USE_INET6 1185 if (fin->fin_v == 6) { 1186 /* 1187 * For ICMPv6, we check to see if the destination address is 1188 * a multicast address. If it is, do not include it in the 1189 * calculation of the hash because the correct reply will come 1190 * back from a real address, not a multicast address. 1191 */ 1192 if ((is->is_p == IPPROTO_ICMPV6) && 1193 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1194 /* 1195 * So you can do keep state with neighbour discovery. 1196 * 1197 * Here we could use the address from the neighbour 1198 * solicit message to put in the state structure and 1199 * we could use that without a wildcard flag too... 1200 */ 1201 is->is_flags |= SI_W_DADDR; 1202 hv -= is->is_daddr; 1203 } else { 1204 hv += is->is_dst.i6[1]; 1205 hv += is->is_dst.i6[2]; 1206 hv += is->is_dst.i6[3]; 1207 } 1208 hv += is->is_src.i6[1]; 1209 hv += is->is_src.i6[2]; 1210 hv += is->is_src.i6[3]; 1211 } 1212 #endif 1213 if ((fin->fin_v == 4) && 1214 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1215 if (fin->fin_out == 0) { 1216 flags |= SI_W_DADDR|SI_CLONE; 1217 hv -= is->is_daddr; 1218 } else { 1219 flags |= SI_W_SADDR|SI_CLONE; 1220 hv -= is->is_saddr; 1221 } 1222 } 1223 1224 switch (is->is_p) 1225 { 1226 #ifdef USE_INET6 1227 case IPPROTO_ICMPV6 : 1228 ic = fin->fin_dp; 1229 1230 switch (ic->icmp_type) 1231 { 1232 case ICMP6_ECHO_REQUEST : 1233 is->is_icmp.ici_type = ic->icmp_type; 1234 hv += (is->is_icmp.ici_id = ic->icmp_id); 1235 break; 1236 case ICMP6_MEMBERSHIP_QUERY : 1237 case ND_ROUTER_SOLICIT : 1238 case ND_NEIGHBOR_SOLICIT : 1239 case ICMP6_NI_QUERY : 1240 is->is_icmp.ici_type = ic->icmp_type; 1241 break; 1242 default : 1243 return NULL; 1244 } 1245 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1246 break; 1247 #endif 1248 case IPPROTO_ICMP : 1249 ic = fin->fin_dp; 1250 1251 switch (ic->icmp_type) 1252 { 1253 case ICMP_ECHO : 1254 case ICMP_TSTAMP : 1255 case ICMP_IREQ : 1256 case ICMP_MASKREQ : 1257 is->is_icmp.ici_type = ic->icmp_type; 1258 hv += (is->is_icmp.ici_id = ic->icmp_id); 1259 break; 1260 default : 1261 return NULL; 1262 } 1263 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1264 break; 1265 1266 case IPPROTO_GRE : 1267 gre = fin->fin_dp; 1268 1269 is->is_gre.gs_flags = gre->gr_flags; 1270 is->is_gre.gs_ptype = gre->gr_ptype; 1271 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1272 is->is_call[0] = fin->fin_data[0]; 1273 is->is_call[1] = fin->fin_data[1]; 1274 } 1275 break; 1276 1277 case IPPROTO_TCP : 1278 tcp = fin->fin_dp; 1279 1280 if (tcp->th_flags & TH_RST) 1281 return NULL; 1282 /* 1283 * The endian of the ports doesn't matter, but the ack and 1284 * sequence numbers do as we do mathematics on them later. 1285 */ 1286 is->is_sport = htons(fin->fin_data[0]); 1287 is->is_dport = htons(fin->fin_data[1]); 1288 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1289 hv += is->is_sport; 1290 hv += is->is_dport; 1291 } 1292 1293 /* 1294 * If this is a real packet then initialise fields in the 1295 * state information structure from the TCP header information. 1296 */ 1297 1298 is->is_maxdwin = 1; 1299 is->is_maxswin = ntohs(tcp->th_win); 1300 if (is->is_maxswin == 0) 1301 is->is_maxswin = 1; 1302 1303 if ((fin->fin_flx & FI_IGNORE) == 0) { 1304 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1305 (TCP_OFF(tcp) << 2) + 1306 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1307 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1308 is->is_maxsend = is->is_send; 1309 1310 /* 1311 * Window scale option is only present in 1312 * SYN/SYN-ACK packet. 1313 */ 1314 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1315 TH_SYN && 1316 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1317 if (fr_tcpoptions(fin, tcp, 1318 &is->is_tcp.ts_data[0]) == -1) { 1319 fin->fin_flx |= FI_BAD; 1320 } 1321 } 1322 1323 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1324 fr_checknewisn(fin, is); 1325 fr_fixoutisn(fin, is); 1326 } 1327 1328 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1329 flags |= IS_TCPFSM; 1330 else { 1331 is->is_maxdwin = is->is_maxswin * 2; 1332 is->is_dend = ntohl(tcp->th_ack); 1333 is->is_maxdend = ntohl(tcp->th_ack); 1334 is->is_maxdwin *= 2; 1335 } 1336 } 1337 1338 /* 1339 * If we're creating state for a starting connection, start the 1340 * timer on it as we'll never see an error if it fails to 1341 * connect. 1342 */ 1343 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1344 break; 1345 1346 case IPPROTO_UDP : 1347 tcp = fin->fin_dp; 1348 1349 is->is_sport = htons(fin->fin_data[0]); 1350 is->is_dport = htons(fin->fin_data[1]); 1351 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1352 hv += tcp->th_dport; 1353 hv += tcp->th_sport; 1354 } 1355 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1356 break; 1357 1358 default : 1359 break; 1360 } 1361 hv = DOUBLE_HASH(hv, ifs); 1362 is->is_hv = hv; 1363 is->is_rule = fr; 1364 is->is_flags = flags & IS_INHERITED; 1365 1366 /* 1367 * Look for identical state. 1368 */ 1369 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1370 is != NULL; 1371 is = is->is_hnext) { 1372 if (fr_matchstates(&ips, is) == 1) 1373 break; 1374 } 1375 1376 /* 1377 * we've found a matching state -> state already exists, 1378 * we are not going to add a duplicate record. 1379 */ 1380 if (is != NULL) 1381 return NULL; 1382 1383 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1384 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1385 return NULL; 1386 } 1387 KMALLOC(is, ipstate_t *); 1388 if (is == NULL) { 1389 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1390 return NULL; 1391 } 1392 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1393 /* 1394 * Do not do the modulous here, it is done in fr_stinsert(). 1395 */ 1396 if (fr != NULL) { 1397 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1398 if (fr->fr_age[0] != 0) { 1399 is->is_tqehead[0] = 1400 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1401 fr->fr_age[0], ifs); 1402 is->is_sti.tqe_flags |= TQE_RULEBASED; 1403 } 1404 if (fr->fr_age[1] != 0) { 1405 is->is_tqehead[1] = 1406 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1407 fr->fr_age[1], ifs); 1408 is->is_sti.tqe_flags |= TQE_RULEBASED; 1409 } 1410 is->is_tag = fr->fr_logtag; 1411 1412 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1413 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1414 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1415 1416 if (((ifp = fr->fr_ifas[1]) != NULL) && 1417 (ifp != (void *)-1)) { 1418 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1419 } 1420 if (((ifp = fr->fr_ifas[2]) != NULL) && 1421 (ifp != (void *)-1)) { 1422 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1423 } 1424 if (((ifp = fr->fr_ifas[3]) != NULL) && 1425 (ifp != (void *)-1)) { 1426 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1427 } 1428 } 1429 1430 is->is_ifp[out << 1] = fin->fin_ifp; 1431 if (fin->fin_ifp != NULL) { 1432 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fr->fr_v); 1433 } 1434 1435 /* 1436 * It may seem strange to set is_ref to 2, but fr_check() will call 1437 * fr_statederef() after calling fr_addstate() and the idea is to 1438 * have it exist at the end of fr_check() with is_ref == 1. 1439 */ 1440 is->is_ref = 2; 1441 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1442 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1443 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1444 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1445 if ((fin->fin_flx & FI_IGNORE) == 0) { 1446 is->is_pkts[out] = 1; 1447 is->is_bytes[out] = fin->fin_plen; 1448 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1449 is->is_flx[out][0] &= ~FI_OOW; 1450 } 1451 1452 if (pass & FR_STSTRICT) 1453 is->is_flags |= IS_STRICT; 1454 1455 if (pass & FR_STATESYNC) 1456 is->is_flags |= IS_STATESYNC; 1457 1458 if (flags & (SI_WILDP|SI_WILDA)) { 1459 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1460 } 1461 is->is_rulen = fin->fin_rule; 1462 1463 1464 if (pass & FR_LOGFIRST) 1465 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1466 1467 READ_ENTER(&ifs->ifs_ipf_state); 1468 is->is_me = stsave; 1469 1470 fr_stinsert(is, fin->fin_rev, ifs); 1471 1472 if (fin->fin_p == IPPROTO_TCP) { 1473 /* 1474 * If we're creating state for a starting connection, start the 1475 * timer on it as we'll never see an error if it fails to 1476 * connect. 1477 */ 1478 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1479 is->is_flags); 1480 MUTEX_EXIT(&is->is_lock); 1481 #ifdef IPFILTER_SCAN 1482 if ((is->is_flags & SI_CLONE) == 0) 1483 (void) ipsc_attachis(is); 1484 #endif 1485 } else { 1486 MUTEX_EXIT(&is->is_lock); 1487 } 1488 #ifdef IPFILTER_SYNC 1489 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1490 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1491 #endif 1492 if (ifs->ifs_ipstate_logging) 1493 ipstate_log(is, ISL_NEW, ifs); 1494 1495 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1496 fin->fin_state = is; 1497 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1498 fin->fin_flx |= FI_STATE; 1499 if (fin->fin_flx & FI_FRAG) 1500 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1501 1502 return is; 1503 } 1504 1505 1506 /* ------------------------------------------------------------------------ */ 1507 /* Function: fr_tcpoptions */ 1508 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1509 /* Parameters: fin(I) - pointer to packet information */ 1510 /* tcp(I) - pointer to TCP packet header */ 1511 /* td(I) - pointer to TCP data held as part of the state */ 1512 /* */ 1513 /* Look after the TCP header for any options and deal with those that are */ 1514 /* present. Record details about those that we recogise. */ 1515 /* ------------------------------------------------------------------------ */ 1516 static int fr_tcpoptions(fin, tcp, td) 1517 fr_info_t *fin; 1518 tcphdr_t *tcp; 1519 tcpdata_t *td; 1520 { 1521 int off, mlen, ol, i, len, retval; 1522 char buf[64], *s, opt; 1523 mb_t *m = NULL; 1524 1525 len = (TCP_OFF(tcp) << 2); 1526 if (fin->fin_dlen < len) 1527 return 0; 1528 len -= sizeof(*tcp); 1529 1530 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1531 1532 m = fin->fin_m; 1533 mlen = MSGDSIZE(m) - off; 1534 if (len > mlen) { 1535 len = mlen; 1536 retval = 0; 1537 } else { 1538 retval = 1; 1539 } 1540 1541 COPYDATA(m, off, len, buf); 1542 1543 for (s = buf; len > 0; ) { 1544 opt = *s; 1545 if (opt == TCPOPT_EOL) 1546 break; 1547 else if (opt == TCPOPT_NOP) 1548 ol = 1; 1549 else { 1550 if (len < 2) 1551 break; 1552 ol = (int)*(s + 1); 1553 if (ol < 2 || ol > len) 1554 break; 1555 1556 /* 1557 * Extract the TCP options we are interested in out of 1558 * the header and store them in the the tcpdata struct. 1559 */ 1560 switch (opt) 1561 { 1562 case TCPOPT_WINDOW : 1563 if (ol == TCPOLEN_WINDOW) { 1564 i = (int)*(s + 2); 1565 if (i > TCP_WSCALE_MAX) 1566 i = TCP_WSCALE_MAX; 1567 else if (i < 0) 1568 i = 0; 1569 td->td_winscale = i; 1570 td->td_winflags |= TCP_WSCALE_SEEN| 1571 TCP_WSCALE_FIRST; 1572 } else 1573 retval = -1; 1574 break; 1575 case TCPOPT_MAXSEG : 1576 /* 1577 * So, if we wanted to set the TCP MAXSEG, 1578 * it should be done here... 1579 */ 1580 if (ol == TCPOLEN_MAXSEG) { 1581 i = (int)*(s + 2); 1582 i <<= 8; 1583 i += (int)*(s + 3); 1584 td->td_maxseg = i; 1585 } else 1586 retval = -1; 1587 break; 1588 case TCPOPT_SACK_PERMITTED : 1589 if (ol == TCPOLEN_SACK_PERMITTED) 1590 td->td_winflags |= TCP_SACK_PERMIT; 1591 else 1592 retval = -1; 1593 break; 1594 } 1595 } 1596 len -= ol; 1597 s += ol; 1598 } 1599 return retval; 1600 } 1601 1602 1603 /* ------------------------------------------------------------------------ */ 1604 /* Function: fr_tcpstate */ 1605 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1606 /* Parameters: fin(I) - pointer to packet information */ 1607 /* tcp(I) - pointer to TCP packet header */ 1608 /* is(I) - pointer to master state structure */ 1609 /* */ 1610 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1611 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1612 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1613 /* ------------------------------------------------------------------------ */ 1614 static int fr_tcpstate(fin, tcp, is) 1615 fr_info_t *fin; 1616 tcphdr_t *tcp; 1617 ipstate_t *is; 1618 { 1619 int source, ret = 0, flags; 1620 tcpdata_t *fdata, *tdata; 1621 ipf_stack_t *ifs = fin->fin_ifs; 1622 1623 source = !fin->fin_rev; 1624 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1625 (ntohs(is->is_sport) != fin->fin_data[0])) 1626 source = 0; 1627 fdata = &is->is_tcp.ts_data[!source]; 1628 tdata = &is->is_tcp.ts_data[source]; 1629 1630 MUTEX_ENTER(&is->is_lock); 1631 1632 /* 1633 * If a SYN packet is received for a connection that is in a half 1634 * closed state, then move its state entry to deletetq. In such case 1635 * the SYN packet will be consequently dropped. This allows new state 1636 * entry to be created with a retransmited SYN packet. 1637 */ 1638 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1639 if (((is->is_state[source] > IPF_TCPS_ESTABLISHED) || 1640 (is->is_state[source] == IPF_TCPS_CLOSED)) && 1641 ((is->is_state[!source] > IPF_TCPS_ESTABLISHED) || 1642 (is->is_state[!source] == IPF_TCPS_CLOSED))) { 1643 /* 1644 * Do not update is->is_sti.tqe_die in case state entry 1645 * is already present in deletetq. It prevents state 1646 * entry ttl update by retransmitted SYN packets, which 1647 * may arrive before timer tick kicks off. The SYN 1648 * packet will be dropped again. 1649 */ 1650 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1651 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1652 &fin->fin_ifs->ifs_ips_deletetq, 1653 fin->fin_ifs); 1654 1655 MUTEX_EXIT(&is->is_lock); 1656 return 0; 1657 } 1658 } 1659 1660 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1661 #ifdef IPFILTER_SCAN 1662 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1663 ipsc_packet(fin, is); 1664 if (FR_ISBLOCK(is->is_pass)) { 1665 MUTEX_EXIT(&is->is_lock); 1666 return 1; 1667 } 1668 } 1669 #endif 1670 1671 /* 1672 * Nearing end of connection, start timeout. 1673 */ 1674 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1675 is->is_flags); 1676 if (ret == 0) { 1677 MUTEX_EXIT(&is->is_lock); 1678 return 0; 1679 } 1680 1681 /* 1682 * set s0's as appropriate. Use syn-ack packet as it 1683 * contains both pieces of required information. 1684 */ 1685 /* 1686 * Window scale option is only present in SYN/SYN-ACK packet. 1687 * Compare with ~TH_FIN to mask out T/TCP setups. 1688 */ 1689 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1690 if (flags == (TH_SYN|TH_ACK)) { 1691 is->is_s0[source] = ntohl(tcp->th_ack); 1692 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1693 if (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) { 1694 (void) fr_tcpoptions(fin, tcp, fdata); 1695 } 1696 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1697 fr_checknewisn(fin, is); 1698 } else if (flags == TH_SYN) { 1699 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1700 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1701 (void) fr_tcpoptions(fin, tcp, tdata); 1702 1703 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1704 fr_checknewisn(fin, is); 1705 1706 } 1707 ret = 1; 1708 } else 1709 fin->fin_flx |= FI_OOW; 1710 MUTEX_EXIT(&is->is_lock); 1711 return ret; 1712 } 1713 1714 1715 /* ------------------------------------------------------------------------ */ 1716 /* Function: fr_checknewisn */ 1717 /* Returns: Nil */ 1718 /* Parameters: fin(I) - pointer to packet information */ 1719 /* is(I) - pointer to master state structure */ 1720 /* */ 1721 /* Check to see if this TCP connection is expecting and needs a new */ 1722 /* sequence number for a particular direction of the connection. */ 1723 /* */ 1724 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1725 /* one ready. */ 1726 /* ------------------------------------------------------------------------ */ 1727 static void fr_checknewisn(fin, is) 1728 fr_info_t *fin; 1729 ipstate_t *is; 1730 { 1731 u_32_t sumd, old, new; 1732 tcphdr_t *tcp; 1733 int i; 1734 1735 i = fin->fin_rev; 1736 tcp = fin->fin_dp; 1737 1738 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1739 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1740 old = ntohl(tcp->th_seq); 1741 new = fr_newisn(fin); 1742 is->is_isninc[i] = new - old; 1743 CALC_SUMD(old, new, sumd); 1744 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1745 1746 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1747 } 1748 } 1749 1750 1751 /* ------------------------------------------------------------------------ */ 1752 /* Function: fr_tcpinwindow */ 1753 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1754 /* Parameters: fin(I) - pointer to packet information */ 1755 /* fdata(I) - pointer to tcp state informatio (forward) */ 1756 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1757 /* tcp(I) - pointer to TCP packet header */ 1758 /* */ 1759 /* Given a packet has matched addresses and ports, check to see if it is */ 1760 /* within the TCP data window. In a show of generosity, allow packets that */ 1761 /* are within the window space behind the current sequence # as well. */ 1762 /* ------------------------------------------------------------------------ */ 1763 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1764 fr_info_t *fin; 1765 tcpdata_t *fdata, *tdata; 1766 tcphdr_t *tcp; 1767 int flags; 1768 { 1769 tcp_seq seq, ack, end; 1770 int ackskew, tcpflags; 1771 u_32_t win, maxwin; 1772 int dsize, inseq; 1773 1774 /* 1775 * Find difference between last checked packet and this packet. 1776 */ 1777 tcpflags = tcp->th_flags; 1778 seq = ntohl(tcp->th_seq); 1779 ack = ntohl(tcp->th_ack); 1780 1781 if (tcpflags & TH_SYN) 1782 win = ntohs(tcp->th_win); 1783 else 1784 win = ntohs(tcp->th_win) << fdata->td_winscale; 1785 1786 /* 1787 * win 0 means the receiving endpoint has closed the window, because it 1788 * has not enough memory to receive data from sender. In such case we 1789 * are pretending window size to be 1 to let TCP probe data through. 1790 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1791 * state this accurately, so we have to allow 1 octet (win = 1) even if 1792 * the window is closed (win == 0). 1793 */ 1794 if (win == 0) 1795 win = 1; 1796 1797 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1798 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1799 1800 /* 1801 * if window scaling is present, the scaling is only allowed 1802 * for windows not in the first SYN packet. In that packet the 1803 * window is 65535 to specify the largest window possible 1804 * for receivers not implementing the window scale option. 1805 * Currently, we do not assume TTCP here. That means that 1806 * if we see a second packet from a host (after the initial 1807 * SYN), we can assume that the receiver of the SYN did 1808 * already send back the SYN/ACK (and thus that we know if 1809 * the receiver also does window scaling) 1810 */ 1811 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1812 fdata->td_maxwin = win; 1813 } 1814 1815 end = seq + dsize; 1816 1817 if ((fdata->td_end == 0) && 1818 (!(flags & IS_TCPFSM) || 1819 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1820 /* 1821 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1822 */ 1823 fdata->td_end = end - 1; 1824 fdata->td_maxwin = 1; 1825 fdata->td_maxend = end + win; 1826 } 1827 1828 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1829 ack = tdata->td_end; 1830 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1831 (ack == 0)) { 1832 /* gross hack to get around certain broken tcp stacks */ 1833 ack = tdata->td_end; 1834 } 1835 1836 maxwin = tdata->td_maxwin; 1837 ackskew = tdata->td_end - ack; 1838 1839 /* 1840 * Strict sequencing only allows in-order delivery. 1841 */ 1842 if ((flags & IS_STRICT) != 0) { 1843 if (seq != fdata->td_end) { 1844 return 0; 1845 } 1846 } 1847 1848 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1849 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1850 inseq = 0; 1851 if ( 1852 #if defined(_KERNEL) 1853 (SEQ_GE(fdata->td_maxend, end)) && 1854 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1855 #endif 1856 /* XXX what about big packets */ 1857 #define MAXACKWINDOW 66000 1858 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1859 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1860 inseq = 1; 1861 /* 1862 * Microsoft Windows will send the next packet to the right of the 1863 * window if SACK is in use. 1864 */ 1865 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1866 (fdata->td_winflags & TCP_SACK_PERMIT) && 1867 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1868 inseq = 1; 1869 /* 1870 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1871 * response to initial SYN packet, when there is no application 1872 * listeing to on a port, where the SYN packet has came to. 1873 */ 1874 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1875 (ackskew >= -1) && (ackskew <= 1)) { 1876 inseq = 1; 1877 } else if (!(flags & IS_TCPFSM)) { 1878 1879 if (!(fdata->td_winflags & 1880 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1881 /* 1882 * No TCPFSM and no window scaling, so make some 1883 * extra guesses. 1884 */ 1885 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1886 inseq = 1; 1887 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1888 inseq = 1; 1889 } 1890 } 1891 1892 if (inseq) { 1893 /* if ackskew < 0 then this should be due to fragmented 1894 * packets. There is no way to know the length of the 1895 * total packet in advance. 1896 * We do know the total length from the fragment cache though. 1897 * Note however that there might be more sessions with 1898 * exactly the same source and destination parameters in the 1899 * state cache (and source and destination is the only stuff 1900 * that is saved in the fragment cache). Note further that 1901 * some TCP connections in the state cache are hashed with 1902 * sport and dport as well which makes it not worthwhile to 1903 * look for them. 1904 * Thus, when ackskew is negative but still seems to belong 1905 * to this session, we bump up the destinations end value. 1906 */ 1907 if (ackskew < 0) 1908 tdata->td_end = ack; 1909 1910 /* update max window seen */ 1911 if (fdata->td_maxwin < win) 1912 fdata->td_maxwin = win; 1913 if (SEQ_GT(end, fdata->td_end)) 1914 fdata->td_end = end; 1915 if (SEQ_GE(ack + win, tdata->td_maxend)) 1916 tdata->td_maxend = ack + win; 1917 return 1; 1918 } 1919 fin->fin_flx |= FI_OOW; 1920 return 0; 1921 } 1922 1923 1924 /* ------------------------------------------------------------------------ */ 1925 /* Function: fr_stclone */ 1926 /* Returns: ipstate_t* - NULL == cloning failed, */ 1927 /* else pointer to new state structure */ 1928 /* Parameters: fin(I) - pointer to packet information */ 1929 /* tcp(I) - pointer to TCP/UDP header */ 1930 /* is(I) - pointer to master state structure */ 1931 /* */ 1932 /* Create a "duplcate" state table entry from the master. */ 1933 /* ------------------------------------------------------------------------ */ 1934 static ipstate_t *fr_stclone(fin, tcp, is) 1935 fr_info_t *fin; 1936 tcphdr_t *tcp; 1937 ipstate_t *is; 1938 { 1939 ipstate_t *clone; 1940 u_32_t send; 1941 ipf_stack_t *ifs = fin->fin_ifs; 1942 1943 if (ifs->ifs_ips_num == ifs->ifs_fr_statemax) { 1944 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1945 ifs->ifs_fr_state_doflush = 1; 1946 return NULL; 1947 } 1948 KMALLOC(clone, ipstate_t *); 1949 if (clone == NULL) 1950 return NULL; 1951 bcopy((char *)is, (char *)clone, sizeof(*clone)); 1952 1953 MUTEX_NUKE(&clone->is_lock); 1954 1955 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 1956 clone->is_state[0] = 0; 1957 clone->is_state[1] = 0; 1958 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1959 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1960 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1961 1962 if (fin->fin_rev == 1) { 1963 clone->is_dend = send; 1964 clone->is_maxdend = send; 1965 clone->is_send = 0; 1966 clone->is_maxswin = 1; 1967 clone->is_maxdwin = ntohs(tcp->th_win); 1968 if (clone->is_maxdwin == 0) 1969 clone->is_maxdwin = 1; 1970 } else { 1971 clone->is_send = send; 1972 clone->is_maxsend = send; 1973 clone->is_dend = 0; 1974 clone->is_maxdwin = 1; 1975 clone->is_maxswin = ntohs(tcp->th_win); 1976 if (clone->is_maxswin == 0) 1977 clone->is_maxswin = 1; 1978 } 1979 1980 clone->is_flags &= ~SI_CLONE; 1981 clone->is_flags |= SI_CLONED; 1982 fr_stinsert(clone, fin->fin_rev, ifs); 1983 clone->is_ref = 2; 1984 if (clone->is_p == IPPROTO_TCP) { 1985 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 1986 clone->is_flags); 1987 } 1988 MUTEX_EXIT(&clone->is_lock); 1989 #ifdef IPFILTER_SCAN 1990 (void) ipsc_attachis(is); 1991 #endif 1992 #ifdef IPFILTER_SYNC 1993 if (is->is_flags & IS_STATESYNC) 1994 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 1995 #endif 1996 return clone; 1997 } 1998 1999 2000 /* ------------------------------------------------------------------------ */ 2001 /* Function: fr_matchsrcdst */ 2002 /* Returns: Nil */ 2003 /* Parameters: fin(I) - pointer to packet information */ 2004 /* is(I) - pointer to state structure */ 2005 /* src(I) - pointer to source address */ 2006 /* dst(I) - pointer to destination address */ 2007 /* tcp(I) - pointer to TCP/UDP header */ 2008 /* */ 2009 /* Match a state table entry against an IP packet. The logic below is that */ 2010 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2011 /* still 0 after the test. no match. */ 2012 /* ------------------------------------------------------------------------ */ 2013 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2014 fr_info_t *fin; 2015 ipstate_t *is; 2016 i6addr_t *src, *dst; 2017 tcphdr_t *tcp; 2018 u_32_t cmask; 2019 { 2020 int ret = 0, rev, out, flags, flx = 0, idx; 2021 u_short sp, dp; 2022 u_32_t cflx; 2023 void *ifp; 2024 ipf_stack_t *ifs = fin->fin_ifs; 2025 2026 rev = IP6_NEQ(&is->is_dst, dst); 2027 ifp = fin->fin_ifp; 2028 out = fin->fin_out; 2029 flags = is->is_flags; 2030 sp = 0; 2031 dp = 0; 2032 2033 if (tcp != NULL) { 2034 sp = htons(fin->fin_sport); 2035 dp = ntohs(fin->fin_dport); 2036 } 2037 if (!rev) { 2038 if (tcp != NULL) { 2039 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2040 rev = 1; 2041 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2042 rev = 1; 2043 } 2044 } 2045 2046 idx = (out << 1) + rev; 2047 2048 /* 2049 * If the interface for this 'direction' is set, make sure it matches. 2050 * An interface name that is not set matches any, as does a name of *. 2051 */ 2052 if ((is->is_ifp[idx] == NULL && 2053 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2054 is->is_ifp[idx] == ifp) 2055 ret = 1; 2056 2057 if (ret == 0) 2058 return NULL; 2059 ret = 0; 2060 2061 /* 2062 * Match addresses and ports. 2063 */ 2064 if (rev == 0) { 2065 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2066 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2067 if (tcp) { 2068 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2069 (dp == is->is_dport || flags & SI_W_DPORT)) 2070 ret = 1; 2071 } else { 2072 ret = 1; 2073 } 2074 } 2075 } else { 2076 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2077 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2078 if (tcp) { 2079 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2080 (sp == is->is_dport || flags & SI_W_DPORT)) 2081 ret = 1; 2082 } else { 2083 ret = 1; 2084 } 2085 } 2086 } 2087 2088 if (ret == 0) 2089 return NULL; 2090 2091 /* 2092 * Whether or not this should be here, is questionable, but the aim 2093 * is to get this out of the main line. 2094 */ 2095 if (tcp == NULL) 2096 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2097 2098 /* 2099 * Only one of the source or destination address can be flaged as a 2100 * wildcard. Fill in the missing address, if set. 2101 * For IPv6, if the address being copied in is multicast, then 2102 * don't reset the wild flag - multicast causes it to be set in the 2103 * first place! 2104 */ 2105 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2106 fr_ip_t *fi = &fin->fin_fi; 2107 2108 if ((flags & SI_W_SADDR) != 0) { 2109 if (rev == 0) { 2110 #ifdef USE_INET6 2111 if (is->is_v == 6 && 2112 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2113 /*EMPTY*/; 2114 else 2115 #endif 2116 { 2117 is->is_src = fi->fi_src; 2118 is->is_flags &= ~SI_W_SADDR; 2119 } 2120 } else { 2121 #ifdef USE_INET6 2122 if (is->is_v == 6 && 2123 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2124 /*EMPTY*/; 2125 else 2126 #endif 2127 { 2128 is->is_src = fi->fi_dst; 2129 is->is_flags &= ~SI_W_SADDR; 2130 } 2131 } 2132 } else if ((flags & SI_W_DADDR) != 0) { 2133 if (rev == 0) { 2134 #ifdef USE_INET6 2135 if (is->is_v == 6 && 2136 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2137 /*EMPTY*/; 2138 else 2139 #endif 2140 { 2141 is->is_dst = fi->fi_dst; 2142 is->is_flags &= ~SI_W_DADDR; 2143 } 2144 } else { 2145 #ifdef USE_INET6 2146 if (is->is_v == 6 && 2147 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2148 /*EMPTY*/; 2149 else 2150 #endif 2151 { 2152 is->is_dst = fi->fi_src; 2153 is->is_flags &= ~SI_W_DADDR; 2154 } 2155 } 2156 } 2157 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2158 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2159 } 2160 } 2161 2162 flx = fin->fin_flx & cmask; 2163 cflx = is->is_flx[out][rev]; 2164 2165 /* 2166 * Match up any flags set from IP options. 2167 */ 2168 if ((cflx && (flx != (cflx & cmask))) || 2169 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2170 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2171 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2172 return NULL; 2173 2174 /* 2175 * Only one of the source or destination port can be flagged as a 2176 * wildcard. When filling it in, fill in a copy of the matched entry 2177 * if it has the cloning flag set. 2178 */ 2179 if ((fin->fin_flx & FI_IGNORE) != 0) { 2180 fin->fin_rev = rev; 2181 return is; 2182 } 2183 2184 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2185 if ((flags & SI_CLONE) != 0) { 2186 ipstate_t *clone; 2187 2188 clone = fr_stclone(fin, tcp, is); 2189 if (clone == NULL) 2190 return NULL; 2191 is = clone; 2192 } else { 2193 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2194 } 2195 2196 if ((flags & SI_W_SPORT) != 0) { 2197 if (rev == 0) { 2198 is->is_sport = sp; 2199 is->is_send = ntohl(tcp->th_seq); 2200 } else { 2201 is->is_sport = dp; 2202 is->is_send = ntohl(tcp->th_ack); 2203 } 2204 is->is_maxsend = is->is_send + 1; 2205 } else if ((flags & SI_W_DPORT) != 0) { 2206 if (rev == 0) { 2207 is->is_dport = dp; 2208 is->is_dend = ntohl(tcp->th_ack); 2209 } else { 2210 is->is_dport = sp; 2211 is->is_dend = ntohl(tcp->th_seq); 2212 } 2213 is->is_maxdend = is->is_dend + 1; 2214 } 2215 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2216 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2217 ipstate_log(is, ISL_CLONE, ifs); 2218 } 2219 2220 ret = -1; 2221 2222 if (is->is_flx[out][rev] == 0) { 2223 is->is_flx[out][rev] = flx; 2224 is->is_opt[rev] = fin->fin_optmsk; 2225 if (is->is_v == 6) { 2226 is->is_opt[rev] &= ~0x8; 2227 is->is_optmsk[rev] &= ~0x8; 2228 } 2229 } 2230 2231 /* 2232 * Check if the interface name for this "direction" is set and if not, 2233 * fill it in. 2234 */ 2235 if (is->is_ifp[idx] == NULL && 2236 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2237 is->is_ifp[idx] = ifp; 2238 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2239 } 2240 fin->fin_rev = rev; 2241 return is; 2242 } 2243 2244 2245 /* ------------------------------------------------------------------------ */ 2246 /* Function: fr_checkicmpmatchingstate */ 2247 /* Returns: Nil */ 2248 /* Parameters: fin(I) - pointer to packet information */ 2249 /* */ 2250 /* If we've got an ICMP error message, using the information stored in the */ 2251 /* ICMP packet, look for a matching state table entry. */ 2252 /* */ 2253 /* If we return NULL then no lock on ipf_state is held. */ 2254 /* If we return non-null then a read-lock on ipf_state is held. */ 2255 /* ------------------------------------------------------------------------ */ 2256 static ipstate_t *fr_checkicmpmatchingstate(fin) 2257 fr_info_t *fin; 2258 { 2259 ipstate_t *is, **isp; 2260 u_short sport, dport; 2261 u_char pr; 2262 int backward, i, oi; 2263 i6addr_t dst, src; 2264 struct icmp *ic; 2265 u_short savelen; 2266 icmphdr_t *icmp; 2267 fr_info_t ofin; 2268 tcphdr_t *tcp; 2269 int len; 2270 ip_t *oip; 2271 u_int hv; 2272 ipf_stack_t *ifs = fin->fin_ifs; 2273 2274 /* 2275 * Does it at least have the return (basic) IP header ? 2276 * Is it an actual recognised ICMP error type? 2277 * Only a basic IP header (no options) should be with 2278 * an ICMP error header. 2279 */ 2280 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2281 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2282 !(fin->fin_flx & FI_ICMPERR)) 2283 return NULL; 2284 ic = fin->fin_dp; 2285 2286 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2287 /* 2288 * Check if the at least the old IP header (with options) and 2289 * 8 bytes of payload is present. 2290 */ 2291 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2292 return NULL; 2293 2294 /* 2295 * Sanity Checks. 2296 */ 2297 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2298 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2299 return NULL; 2300 2301 /* 2302 * Is the buffer big enough for all of it ? It's the size of the IP 2303 * header claimed in the encapsulated part which is of concern. It 2304 * may be too big to be in this buffer but not so big that it's 2305 * outside the ICMP packet, leading to TCP deref's causing problems. 2306 * This is possible because we don't know how big oip_hl is when we 2307 * do the pullup early in fr_check() and thus can't guarantee it is 2308 * all here now. 2309 */ 2310 #ifdef _KERNEL 2311 { 2312 mb_t *m; 2313 2314 m = fin->fin_m; 2315 # if defined(MENTAT) 2316 if ((char *)oip + len > (char *)m->b_wptr) 2317 return NULL; 2318 # else 2319 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2320 return NULL; 2321 # endif 2322 } 2323 #endif 2324 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2325 2326 /* 2327 * in the IPv4 case we must zero the i6addr union otherwise 2328 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2329 * of the 'junk' in the unused part of the union 2330 */ 2331 bzero((char *)&src, sizeof(src)); 2332 bzero((char *)&dst, sizeof(dst)); 2333 2334 /* 2335 * we make an fin entry to be able to feed it to 2336 * matchsrcdst note that not all fields are encessary 2337 * but this is the cleanest way. Note further we fill 2338 * in fin_mp such that if someone uses it we'll get 2339 * a kernel panic. fr_matchsrcdst does not use this. 2340 * 2341 * watch out here, as ip is in host order and oip in network 2342 * order. Any change we make must be undone afterwards, like 2343 * oip->ip_off - it is still in network byte order so fix it. 2344 */ 2345 savelen = oip->ip_len; 2346 oip->ip_len = len; 2347 oip->ip_off = ntohs(oip->ip_off); 2348 2349 ofin.fin_flx = FI_NOCKSUM; 2350 ofin.fin_v = 4; 2351 ofin.fin_ip = oip; 2352 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2353 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2354 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2355 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2356 ofin.fin_ifp = fin->fin_ifp; 2357 ofin.fin_out = !fin->fin_out; 2358 /* 2359 * Reset the short and bad flag here because in fr_matchsrcdst() 2360 * the flags for the current packet (fin_flx) are compared against 2361 * those for the existing session. 2362 */ 2363 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2364 2365 /* 2366 * Put old values of ip_len and ip_off back as we don't know 2367 * if we have to forward the packet (or process it again. 2368 */ 2369 oip->ip_len = savelen; 2370 oip->ip_off = htons(oip->ip_off); 2371 2372 switch (oip->ip_p) 2373 { 2374 case IPPROTO_ICMP : 2375 /* 2376 * an ICMP error can only be generated as a result of an 2377 * ICMP query, not as the response on an ICMP error 2378 * 2379 * XXX theoretically ICMP_ECHOREP and the other reply's are 2380 * ICMP query's as well, but adding them here seems strange XXX 2381 */ 2382 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2383 return NULL; 2384 2385 /* 2386 * perform a lookup of the ICMP packet in the state table 2387 */ 2388 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2389 hv = (pr = oip->ip_p); 2390 src.in4 = oip->ip_src; 2391 hv += src.in4.s_addr; 2392 dst.in4 = oip->ip_dst; 2393 hv += dst.in4.s_addr; 2394 hv += icmp->icmp_id; 2395 hv = DOUBLE_HASH(hv, ifs); 2396 2397 READ_ENTER(&ifs->ifs_ipf_state); 2398 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2399 isp = &is->is_hnext; 2400 if ((is->is_p != pr) || (is->is_v != 4)) 2401 continue; 2402 if (is->is_pass & FR_NOICMPERR) 2403 continue; 2404 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2405 NULL, FI_ICMPCMP); 2406 if (is != NULL) { 2407 if ((is->is_pass & FR_NOICMPERR) != 0) { 2408 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2409 return NULL; 2410 } 2411 /* 2412 * i : the index of this packet (the icmp 2413 * unreachable) 2414 * oi : the index of the original packet found 2415 * in the icmp header (i.e. the packet 2416 * causing this icmp) 2417 * backward : original packet was backward 2418 * compared to the state 2419 */ 2420 backward = IP6_NEQ(&is->is_src, &src); 2421 fin->fin_rev = !backward; 2422 i = (!backward << 1) + fin->fin_out; 2423 oi = (backward << 1) + ofin.fin_out; 2424 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2425 continue; 2426 ifs->ifs_ips_stats.iss_hits++; 2427 is->is_icmppkts[i]++; 2428 return is; 2429 } 2430 } 2431 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2432 return NULL; 2433 case IPPROTO_TCP : 2434 case IPPROTO_UDP : 2435 break; 2436 default : 2437 return NULL; 2438 } 2439 2440 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2441 dport = tcp->th_dport; 2442 sport = tcp->th_sport; 2443 2444 hv = (pr = oip->ip_p); 2445 src.in4 = oip->ip_src; 2446 hv += src.in4.s_addr; 2447 dst.in4 = oip->ip_dst; 2448 hv += dst.in4.s_addr; 2449 hv += dport; 2450 hv += sport; 2451 hv = DOUBLE_HASH(hv, ifs); 2452 2453 READ_ENTER(&ifs->ifs_ipf_state); 2454 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2455 isp = &is->is_hnext; 2456 /* 2457 * Only allow this icmp though if the 2458 * encapsulated packet was allowed through the 2459 * other way around. Note that the minimal amount 2460 * of info present does not allow for checking against 2461 * tcp internals such as seq and ack numbers. Only the 2462 * ports are known to be present and can be even if the 2463 * short flag is set. 2464 */ 2465 if ((is->is_p == pr) && (is->is_v == 4) && 2466 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2467 tcp, FI_ICMPCMP))) { 2468 /* 2469 * i : the index of this packet (the icmp unreachable) 2470 * oi : the index of the original packet found in the 2471 * icmp header (i.e. the packet causing this icmp) 2472 * backward : original packet was backward compared to 2473 * the state 2474 */ 2475 backward = IP6_NEQ(&is->is_src, &src); 2476 fin->fin_rev = !backward; 2477 i = (!backward << 1) + fin->fin_out; 2478 oi = (backward << 1) + ofin.fin_out; 2479 2480 if (((is->is_pass & FR_NOICMPERR) != 0) || 2481 (is->is_icmppkts[i] > is->is_pkts[oi])) 2482 break; 2483 ifs->ifs_ips_stats.iss_hits++; 2484 is->is_icmppkts[i]++; 2485 /* 2486 * we deliberately do not touch the timeouts 2487 * for the accompanying state table entry. 2488 * It remains to be seen if that is correct. XXX 2489 */ 2490 return is; 2491 } 2492 } 2493 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2494 return NULL; 2495 } 2496 2497 2498 /* ------------------------------------------------------------------------ */ 2499 /* Function: fr_ipsmove */ 2500 /* Returns: Nil */ 2501 /* Parameters: is(I) - pointer to state table entry */ 2502 /* hv(I) - new hash value for state table entry */ 2503 /* Write Locks: ipf_state */ 2504 /* */ 2505 /* Move a state entry from one position in the hash table to another. */ 2506 /* ------------------------------------------------------------------------ */ 2507 static void fr_ipsmove(is, hv, ifs) 2508 ipstate_t *is; 2509 u_int hv; 2510 ipf_stack_t *ifs; 2511 { 2512 ipstate_t **isp; 2513 u_int hvm; 2514 2515 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2516 2517 hvm = is->is_hv; 2518 /* 2519 * Remove the hash from the old location... 2520 */ 2521 isp = is->is_phnext; 2522 if (is->is_hnext) 2523 is->is_hnext->is_phnext = isp; 2524 *isp = is->is_hnext; 2525 if (ifs->ifs_ips_table[hvm] == NULL) 2526 ifs->ifs_ips_stats.iss_inuse--; 2527 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2528 2529 /* 2530 * ...and put the hash in the new one. 2531 */ 2532 hvm = DOUBLE_HASH(hv, ifs); 2533 is->is_hv = hvm; 2534 isp = &ifs->ifs_ips_table[hvm]; 2535 if (*isp) 2536 (*isp)->is_phnext = &is->is_hnext; 2537 else 2538 ifs->ifs_ips_stats.iss_inuse++; 2539 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2540 is->is_phnext = isp; 2541 is->is_hnext = *isp; 2542 *isp = is; 2543 } 2544 2545 2546 /* ------------------------------------------------------------------------ */ 2547 /* Function: fr_stlookup */ 2548 /* Returns: ipstate_t* - NULL == no matching state found, */ 2549 /* else pointer to state information is returned */ 2550 /* Parameters: fin(I) - pointer to packet information */ 2551 /* tcp(I) - pointer to TCP/UDP header. */ 2552 /* */ 2553 /* Search the state table for a matching entry to the packet described by */ 2554 /* the contents of *fin. */ 2555 /* */ 2556 /* If we return NULL then no lock on ipf_state is held. */ 2557 /* If we return non-null then a read-lock on ipf_state is held. */ 2558 /* ------------------------------------------------------------------------ */ 2559 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2560 fr_info_t *fin; 2561 tcphdr_t *tcp; 2562 ipftq_t **ifqp; 2563 { 2564 u_int hv, hvm, pr, v, tryagain; 2565 ipstate_t *is, **isp; 2566 u_short dport, sport; 2567 i6addr_t src, dst; 2568 struct icmp *ic; 2569 ipftq_t *ifq; 2570 int oow; 2571 ipf_stack_t *ifs = fin->fin_ifs; 2572 2573 is = NULL; 2574 ifq = NULL; 2575 tcp = fin->fin_dp; 2576 ic = (struct icmp *)tcp; 2577 hv = (pr = fin->fin_fi.fi_p); 2578 src = fin->fin_fi.fi_src; 2579 dst = fin->fin_fi.fi_dst; 2580 hv += src.in4.s_addr; 2581 hv += dst.in4.s_addr; 2582 2583 v = fin->fin_fi.fi_v; 2584 #ifdef USE_INET6 2585 if (v == 6) { 2586 hv += fin->fin_fi.fi_src.i6[1]; 2587 hv += fin->fin_fi.fi_src.i6[2]; 2588 hv += fin->fin_fi.fi_src.i6[3]; 2589 2590 if ((fin->fin_p == IPPROTO_ICMPV6) && 2591 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2592 hv -= dst.in4.s_addr; 2593 } else { 2594 hv += fin->fin_fi.fi_dst.i6[1]; 2595 hv += fin->fin_fi.fi_dst.i6[2]; 2596 hv += fin->fin_fi.fi_dst.i6[3]; 2597 } 2598 } 2599 #endif 2600 if ((v == 4) && 2601 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2602 if (fin->fin_out == 0) { 2603 hv -= src.in4.s_addr; 2604 } else { 2605 hv -= dst.in4.s_addr; 2606 } 2607 } 2608 2609 /* 2610 * Search the hash table for matching packet header info. 2611 */ 2612 switch (pr) 2613 { 2614 #ifdef USE_INET6 2615 case IPPROTO_ICMPV6 : 2616 tryagain = 0; 2617 if (v == 6) { 2618 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2619 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2620 hv += ic->icmp_id; 2621 } 2622 } 2623 READ_ENTER(&ifs->ifs_ipf_state); 2624 icmp6again: 2625 hvm = DOUBLE_HASH(hv, ifs); 2626 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2627 isp = &is->is_hnext; 2628 if ((is->is_p != pr) || (is->is_v != v)) 2629 continue; 2630 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2631 if (is != NULL && 2632 fr_matchicmpqueryreply(v, &is->is_icmp, 2633 ic, fin->fin_rev)) { 2634 if (fin->fin_rev) 2635 ifq = &ifs->ifs_ips_icmpacktq; 2636 else 2637 ifq = &ifs->ifs_ips_icmptq; 2638 break; 2639 } 2640 } 2641 2642 if (is != NULL) { 2643 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2644 hv += fin->fin_fi.fi_src.i6[0]; 2645 hv += fin->fin_fi.fi_src.i6[1]; 2646 hv += fin->fin_fi.fi_src.i6[2]; 2647 hv += fin->fin_fi.fi_src.i6[3]; 2648 fr_ipsmove(is, hv, ifs); 2649 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2650 } 2651 break; 2652 } 2653 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2654 2655 /* 2656 * No matching icmp state entry. Perhaps this is a 2657 * response to another state entry. 2658 * 2659 * XXX With some ICMP6 packets, the "other" address is already 2660 * in the packet, after the ICMP6 header, and this could be 2661 * used in place of the multicast address. However, taking 2662 * advantage of this requires some significant code changes 2663 * to handle the specific types where that is the case. 2664 */ 2665 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2666 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2667 hv -= fin->fin_fi.fi_src.i6[0]; 2668 hv -= fin->fin_fi.fi_src.i6[1]; 2669 hv -= fin->fin_fi.fi_src.i6[2]; 2670 hv -= fin->fin_fi.fi_src.i6[3]; 2671 tryagain = 1; 2672 WRITE_ENTER(&ifs->ifs_ipf_state); 2673 goto icmp6again; 2674 } 2675 2676 is = fr_checkicmp6matchingstate(fin); 2677 if (is != NULL) 2678 return is; 2679 break; 2680 #endif 2681 2682 case IPPROTO_ICMP : 2683 if (v == 4) { 2684 hv += ic->icmp_id; 2685 } 2686 hv = DOUBLE_HASH(hv, ifs); 2687 READ_ENTER(&ifs->ifs_ipf_state); 2688 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2689 isp = &is->is_hnext; 2690 if ((is->is_p != pr) || (is->is_v != v)) 2691 continue; 2692 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2693 if (is != NULL && 2694 fr_matchicmpqueryreply(v, &is->is_icmp, 2695 ic, fin->fin_rev)) { 2696 if (fin->fin_rev) 2697 ifq = &ifs->ifs_ips_icmpacktq; 2698 else 2699 ifq = &ifs->ifs_ips_icmptq; 2700 break; 2701 } 2702 } 2703 if (is == NULL) { 2704 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2705 } 2706 break; 2707 2708 case IPPROTO_TCP : 2709 case IPPROTO_UDP : 2710 ifqp = NULL; 2711 sport = htons(fin->fin_data[0]); 2712 hv += sport; 2713 dport = htons(fin->fin_data[1]); 2714 hv += dport; 2715 oow = 0; 2716 tryagain = 0; 2717 READ_ENTER(&ifs->ifs_ipf_state); 2718 retry_tcpudp: 2719 hvm = DOUBLE_HASH(hv, ifs); 2720 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2721 isp = &is->is_hnext; 2722 if ((is->is_p != pr) || (is->is_v != v)) 2723 continue; 2724 fin->fin_flx &= ~FI_OOW; 2725 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2726 if (is != NULL) { 2727 if (pr == IPPROTO_TCP) { 2728 if (!fr_tcpstate(fin, tcp, is)) { 2729 oow |= fin->fin_flx & FI_OOW; 2730 continue; 2731 } 2732 } 2733 break; 2734 } 2735 } 2736 if (is != NULL) { 2737 if (tryagain && 2738 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2739 hv += dport; 2740 hv += sport; 2741 fr_ipsmove(is, hv, ifs); 2742 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2743 } 2744 break; 2745 } 2746 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2747 2748 if (ifs->ifs_ips_stats.iss_wild) { 2749 if (tryagain == 0) { 2750 hv -= dport; 2751 hv -= sport; 2752 } else if (tryagain == 1) { 2753 hv = fin->fin_fi.fi_p; 2754 /* 2755 * If we try to pretend this is a reply to a 2756 * multicast/broadcast packet then we need to 2757 * exclude part of the address from the hash 2758 * calculation. 2759 */ 2760 if (fin->fin_out == 0) { 2761 hv += src.in4.s_addr; 2762 } else { 2763 hv += dst.in4.s_addr; 2764 } 2765 hv += dport; 2766 hv += sport; 2767 } 2768 tryagain++; 2769 if (tryagain <= 2) { 2770 WRITE_ENTER(&ifs->ifs_ipf_state); 2771 goto retry_tcpudp; 2772 } 2773 } 2774 fin->fin_flx |= oow; 2775 break; 2776 2777 #if 0 2778 case IPPROTO_GRE : 2779 gre = fin->fin_dp; 2780 if (GRE_REV(gre->gr_flags) == 1) { 2781 hv += gre->gr_call; 2782 } 2783 /* FALLTHROUGH */ 2784 #endif 2785 default : 2786 ifqp = NULL; 2787 hvm = DOUBLE_HASH(hv, ifs); 2788 READ_ENTER(&ifs->ifs_ipf_state); 2789 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2790 isp = &is->is_hnext; 2791 if ((is->is_p != pr) || (is->is_v != v)) 2792 continue; 2793 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2794 if (is != NULL) { 2795 ifq = &ifs->ifs_ips_iptq; 2796 break; 2797 } 2798 } 2799 if (is == NULL) { 2800 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2801 } 2802 break; 2803 } 2804 2805 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2806 (is->is_tqehead[fin->fin_rev] != NULL)) 2807 ifq = is->is_tqehead[fin->fin_rev]; 2808 if (ifq != NULL && ifqp != NULL) 2809 *ifqp = ifq; 2810 return is; 2811 } 2812 2813 2814 /* ------------------------------------------------------------------------ */ 2815 /* Function: fr_updatestate */ 2816 /* Returns: Nil */ 2817 /* Parameters: fin(I) - pointer to packet information */ 2818 /* is(I) - pointer to state table entry */ 2819 /* Read Locks: ipf_state */ 2820 /* */ 2821 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2822 /* fragment cache with a new entry as required. */ 2823 /* ------------------------------------------------------------------------ */ 2824 void fr_updatestate(fin, is, ifq) 2825 fr_info_t *fin; 2826 ipstate_t *is; 2827 ipftq_t *ifq; 2828 { 2829 ipftqent_t *tqe; 2830 int i, pass; 2831 ipf_stack_t *ifs = fin->fin_ifs; 2832 2833 i = (fin->fin_rev << 1) + fin->fin_out; 2834 2835 /* 2836 * For TCP packets, ifq == NULL. For all others, check if this new 2837 * queue is different to the last one it was on and move it if so. 2838 */ 2839 tqe = &is->is_sti; 2840 MUTEX_ENTER(&is->is_lock); 2841 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2842 ifq = is->is_tqehead[fin->fin_rev]; 2843 2844 if (ifq != NULL) 2845 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2846 2847 is->is_pkts[i]++; 2848 is->is_bytes[i] += fin->fin_plen; 2849 MUTEX_EXIT(&is->is_lock); 2850 2851 #ifdef IPFILTER_SYNC 2852 if (is->is_flags & IS_STATESYNC) 2853 ipfsync_update(SMC_STATE, fin, is->is_sync); 2854 #endif 2855 2856 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2857 2858 fin->fin_fr = is->is_rule; 2859 2860 /* 2861 * If this packet is a fragment and the rule says to track fragments, 2862 * then create a new fragment cache entry. 2863 */ 2864 pass = is->is_pass; 2865 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2866 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2867 } 2868 2869 2870 /* ------------------------------------------------------------------------ */ 2871 /* Function: fr_checkstate */ 2872 /* Returns: frentry_t* - NULL == search failed, */ 2873 /* else pointer to rule for matching state */ 2874 /* Parameters: ifp(I) - pointer to interface */ 2875 /* passp(I) - pointer to filtering result flags */ 2876 /* */ 2877 /* Check if a packet is associated with an entry in the state table. */ 2878 /* ------------------------------------------------------------------------ */ 2879 frentry_t *fr_checkstate(fin, passp) 2880 fr_info_t *fin; 2881 u_32_t *passp; 2882 { 2883 ipstate_t *is; 2884 frentry_t *fr; 2885 tcphdr_t *tcp; 2886 ipftq_t *ifq; 2887 u_int pass; 2888 ipf_stack_t *ifs = fin->fin_ifs; 2889 2890 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2891 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2892 return NULL; 2893 2894 is = NULL; 2895 if ((fin->fin_flx & FI_TCPUDP) || 2896 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2897 #ifdef USE_INET6 2898 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2899 #endif 2900 ) 2901 tcp = fin->fin_dp; 2902 else 2903 tcp = NULL; 2904 2905 /* 2906 * Search the hash table for matching packet header info. 2907 */ 2908 ifq = NULL; 2909 is = fin->fin_state; 2910 if (is == NULL) 2911 is = fr_stlookup(fin, tcp, &ifq); 2912 switch (fin->fin_p) 2913 { 2914 #ifdef USE_INET6 2915 case IPPROTO_ICMPV6 : 2916 if (is != NULL) 2917 break; 2918 if (fin->fin_v == 6) { 2919 is = fr_checkicmp6matchingstate(fin); 2920 if (is != NULL) 2921 goto matched; 2922 } 2923 break; 2924 #endif 2925 case IPPROTO_ICMP : 2926 if (is != NULL) 2927 break; 2928 /* 2929 * No matching icmp state entry. Perhaps this is a 2930 * response to another state entry. 2931 */ 2932 is = fr_checkicmpmatchingstate(fin); 2933 if (is != NULL) 2934 goto matched; 2935 break; 2936 case IPPROTO_TCP : 2937 if (is == NULL) 2938 break; 2939 2940 if (is->is_pass & FR_NEWISN) { 2941 if (fin->fin_out == 0) 2942 fr_fixinisn(fin, is); 2943 else if (fin->fin_out == 1) 2944 fr_fixoutisn(fin, is); 2945 } 2946 break; 2947 default : 2948 if (fin->fin_rev) 2949 ifq = &ifs->ifs_ips_udpacktq; 2950 else 2951 ifq = &ifs->ifs_ips_udptq; 2952 break; 2953 } 2954 if (is == NULL) { 2955 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 2956 return NULL; 2957 } 2958 2959 matched: 2960 fr = is->is_rule; 2961 if (fr != NULL) { 2962 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 2963 if (fin->fin_nattag == NULL) 2964 return NULL; 2965 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) 2966 return NULL; 2967 } 2968 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 2969 fin->fin_icode = fr->fr_icode; 2970 } 2971 2972 fin->fin_rule = is->is_rulen; 2973 pass = is->is_pass; 2974 fr_updatestate(fin, is, ifq); 2975 if (fin->fin_out == 1) 2976 fin->fin_nat = is->is_nat[fin->fin_rev]; 2977 2978 fin->fin_state = is; 2979 MUTEX_ENTER(&is->is_lock); 2980 is->is_ref++; 2981 MUTEX_EXIT(&is->is_lock); 2982 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2983 fin->fin_flx |= FI_STATE; 2984 if ((pass & FR_LOGFIRST) != 0) 2985 pass &= ~(FR_LOGFIRST|FR_LOG); 2986 *passp = pass; 2987 return fr; 2988 } 2989 2990 2991 /* ------------------------------------------------------------------------ */ 2992 /* Function: fr_fixoutisn */ 2993 /* Returns: Nil */ 2994 /* Parameters: fin(I) - pointer to packet information */ 2995 /* is(I) - pointer to master state structure */ 2996 /* */ 2997 /* Called only for outbound packets, adjusts the sequence number and the */ 2998 /* TCP checksum to match that change. */ 2999 /* ------------------------------------------------------------------------ */ 3000 static void fr_fixoutisn(fin, is) 3001 fr_info_t *fin; 3002 ipstate_t *is; 3003 { 3004 tcphdr_t *tcp; 3005 int rev; 3006 u_32_t seq; 3007 3008 tcp = fin->fin_dp; 3009 rev = fin->fin_rev; 3010 if ((is->is_flags & IS_ISNSYN) != 0) { 3011 if (rev == 0) { 3012 seq = ntohl(tcp->th_seq); 3013 seq += is->is_isninc[0]; 3014 tcp->th_seq = htonl(seq); 3015 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3016 } 3017 } 3018 if ((is->is_flags & IS_ISNACK) != 0) { 3019 if (rev == 1) { 3020 seq = ntohl(tcp->th_seq); 3021 seq += is->is_isninc[1]; 3022 tcp->th_seq = htonl(seq); 3023 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3024 } 3025 } 3026 } 3027 3028 3029 /* ------------------------------------------------------------------------ */ 3030 /* Function: fr_fixinisn */ 3031 /* Returns: Nil */ 3032 /* Parameters: fin(I) - pointer to packet information */ 3033 /* is(I) - pointer to master state structure */ 3034 /* */ 3035 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3036 /* TCP checksum to match that change. */ 3037 /* ------------------------------------------------------------------------ */ 3038 static void fr_fixinisn(fin, is) 3039 fr_info_t *fin; 3040 ipstate_t *is; 3041 { 3042 tcphdr_t *tcp; 3043 int rev; 3044 u_32_t ack; 3045 3046 tcp = fin->fin_dp; 3047 rev = fin->fin_rev; 3048 if ((is->is_flags & IS_ISNSYN) != 0) { 3049 if (rev == 1) { 3050 ack = ntohl(tcp->th_ack); 3051 ack -= is->is_isninc[0]; 3052 tcp->th_ack = htonl(ack); 3053 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3054 } 3055 } 3056 if ((is->is_flags & IS_ISNACK) != 0) { 3057 if (rev == 0) { 3058 ack = ntohl(tcp->th_ack); 3059 ack -= is->is_isninc[1]; 3060 tcp->th_ack = htonl(ack); 3061 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3062 } 3063 } 3064 } 3065 3066 3067 /* ------------------------------------------------------------------------ */ 3068 /* Function: fr_statesync */ 3069 /* Returns: Nil */ 3070 /* Parameters: action(I) - type of synchronisation to do */ 3071 /* v(I) - IP version being sync'd (v4 or v6) */ 3072 /* ifp(I) - interface identifier associated with action */ 3073 /* name(I) - name associated with ifp parameter */ 3074 /* */ 3075 /* Walk through all state entries and if an interface pointer match is */ 3076 /* found then look it up again, based on its name in case the pointer has */ 3077 /* changed since last time. */ 3078 /* */ 3079 /* If ifp is passed in as being non-null then we are only doing updates for */ 3080 /* existing, matching, uses of it. */ 3081 /* ------------------------------------------------------------------------ */ 3082 void fr_statesync(action, v, ifp, name, ifs) 3083 int action, v; 3084 void *ifp; 3085 char *name; 3086 ipf_stack_t *ifs; 3087 { 3088 ipstate_t *is; 3089 int i; 3090 3091 if (ifs->ifs_fr_running <= 0) 3092 return; 3093 3094 WRITE_ENTER(&ifs->ifs_ipf_state); 3095 3096 if (ifs->ifs_fr_running <= 0) { 3097 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3098 return; 3099 } 3100 3101 switch (action) 3102 { 3103 case IPFSYNC_RESYNC : 3104 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3105 if (v != 0 && is->is_v != v) 3106 continue; 3107 /* 3108 * Look up all the interface names in the state entry. 3109 */ 3110 for (i = 0; i < 4; i++) { 3111 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3112 is->is_v, ifs); 3113 } 3114 } 3115 break; 3116 case IPFSYNC_NEWIFP : 3117 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3118 if (v != 0 && is->is_v != v) 3119 continue; 3120 /* 3121 * Look up all the interface names in the state entry. 3122 */ 3123 for (i = 0; i < 4; i++) { 3124 if (!strncmp(is->is_ifname[i], name, 3125 sizeof(is->is_ifname[i]))) 3126 is->is_ifp[i] = ifp; 3127 } 3128 } 3129 break; 3130 case IPFSYNC_OLDIFP : 3131 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3132 if (v != 0 && is->is_v != v) 3133 continue; 3134 /* 3135 * Look up all the interface names in the state entry. 3136 */ 3137 for (i = 0; i < 4; i++) { 3138 if (is->is_ifp[i] == ifp) 3139 is->is_ifp[i] = (void *)-1; 3140 } 3141 } 3142 break; 3143 } 3144 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3145 } 3146 3147 3148 /* ------------------------------------------------------------------------ */ 3149 /* Function: fr_delstate */ 3150 /* Returns: Nil */ 3151 /* Parameters: is(I) - pointer to state structure to delete */ 3152 /* why(I) - if not 0, log reason why it was deleted */ 3153 /* ifs - ipf stack instance */ 3154 /* Write Locks: ipf_state/ipf_global */ 3155 /* */ 3156 /* Deletes a state entry from the enumerated list as well as the hash table */ 3157 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3158 /* global counters as required. */ 3159 /* ------------------------------------------------------------------------ */ 3160 static void fr_delstate(is, why, ifs) 3161 ipstate_t *is; 3162 int why; 3163 ipf_stack_t *ifs; 3164 { 3165 int removed = 0; 3166 3167 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3168 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3169 3170 /* 3171 * Start by removing the entry from the hash table of state entries 3172 * so it will not be "used" again. 3173 * 3174 * It will remain in the "list" of state entries until all references 3175 * have been accounted for. 3176 */ 3177 if (is->is_phnext != NULL) { 3178 removed = 1; 3179 *is->is_phnext = is->is_hnext; 3180 if (is->is_hnext != NULL) 3181 is->is_hnext->is_phnext = is->is_phnext; 3182 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3183 ifs->ifs_ips_stats.iss_inuse--; 3184 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3185 3186 is->is_phnext = NULL; 3187 is->is_hnext = NULL; 3188 } 3189 3190 /* 3191 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3192 * table that have wildcard flags set, only decerement it once 3193 * and do it here. 3194 */ 3195 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3196 if (!(is->is_flags & SI_CLONED)) { 3197 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3198 } 3199 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3200 } 3201 3202 /* 3203 * Next, remove it from the timeout queue it is in. 3204 */ 3205 fr_deletequeueentry(&is->is_sti); 3206 3207 is->is_me = NULL; 3208 3209 /* 3210 * If it is still in use by something else, do not go any further, 3211 * but note that at this point it is now an orphan. 3212 */ 3213 MUTEX_ENTER(&is->is_lock); 3214 if (is->is_ref > 1) { 3215 is->is_ref--; 3216 MUTEX_EXIT(&is->is_lock); 3217 if (removed) 3218 ifs->ifs_ips_stats.iss_orphans++; 3219 return; 3220 } 3221 MUTEX_EXIT(&is->is_lock); 3222 3223 is->is_ref = 0; 3224 3225 /* 3226 * If entry has already been removed from table, 3227 * it means we're simply cleaning up an orphan. 3228 */ 3229 if (!removed) 3230 ifs->ifs_ips_stats.iss_orphans--; 3231 3232 if (is->is_tqehead[0] != NULL) 3233 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3234 3235 if (is->is_tqehead[1] != NULL) 3236 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3237 3238 #ifdef IPFILTER_SYNC 3239 if (is->is_sync) 3240 ipfsync_del(is->is_sync); 3241 #endif 3242 #ifdef IPFILTER_SCAN 3243 (void) ipsc_detachis(is); 3244 #endif 3245 3246 /* 3247 * Now remove it from master list of state table entries. 3248 */ 3249 if (is->is_pnext != NULL) { 3250 *is->is_pnext = is->is_next; 3251 if (is->is_next != NULL) { 3252 is->is_next->is_pnext = is->is_pnext; 3253 is->is_next = NULL; 3254 } 3255 is->is_pnext = NULL; 3256 } 3257 3258 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3259 ipstate_log(is, why, ifs); 3260 3261 if (is->is_rule != NULL) { 3262 is->is_rule->fr_statecnt--; 3263 (void)fr_derefrule(&is->is_rule, ifs); 3264 } 3265 3266 MUTEX_DESTROY(&is->is_lock); 3267 KFREE(is); 3268 ifs->ifs_ips_num--; 3269 } 3270 3271 3272 /* ------------------------------------------------------------------------ */ 3273 /* Function: fr_timeoutstate */ 3274 /* Returns: Nil */ 3275 /* Parameters: Nil */ 3276 /* */ 3277 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3278 /* used here is to keep the queue sorted with the oldest things at the top */ 3279 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3280 /* expired then neither will any under it. */ 3281 /* ------------------------------------------------------------------------ */ 3282 void fr_timeoutstate(ifs) 3283 ipf_stack_t *ifs; 3284 { 3285 ipftq_t *ifq, *ifqnext; 3286 ipftqent_t *tqe, *tqn; 3287 ipstate_t *is; 3288 SPL_INT(s); 3289 3290 SPL_NET(s); 3291 WRITE_ENTER(&ifs->ifs_ipf_state); 3292 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3293 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3294 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3295 break; 3296 tqn = tqe->tqe_next; 3297 is = tqe->tqe_parent; 3298 fr_delstate(is, ISL_EXPIRE, ifs); 3299 } 3300 3301 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3302 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3303 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3304 break; 3305 tqn = tqe->tqe_next; 3306 is = tqe->tqe_parent; 3307 fr_delstate(is, ISL_EXPIRE, ifs); 3308 } 3309 } 3310 3311 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3312 ifqnext = ifq->ifq_next; 3313 3314 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3315 (ifq->ifq_ref == 0)) { 3316 fr_freetimeoutqueue(ifq, ifs); 3317 } 3318 } 3319 3320 if (ifs->ifs_fr_state_doflush) { 3321 (void) fr_state_flush(2, 0, ifs); 3322 ifs->ifs_fr_state_doflush = 0; 3323 } 3324 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3325 SPL_X(s); 3326 } 3327 3328 3329 /* ------------------------------------------------------------------------ */ 3330 /* Function: fr_state_flush */ 3331 /* Returns: int - 0 == success, -1 == failure */ 3332 /* Parameters: Nil */ 3333 /* Write Locks: ipf_state */ 3334 /* */ 3335 /* Flush state tables. Three actions currently defined: */ 3336 /* which == 0 : flush all state table entries */ 3337 /* which == 1 : flush TCP connections which have started to close but are */ 3338 /* stuck for some reason. */ 3339 /* which == 2 : flush TCP connections which have been idle for a long time, */ 3340 /* starting at > 4 days idle and working back in successive half-*/ 3341 /* days to at most 12 hours old. If this fails to free enough */ 3342 /* slots then work backwards in half hour slots to 30 minutes. */ 3343 /* If that too fails, then work backwards in 30 second intervals */ 3344 /* for the last 30 minutes to at worst 30 seconds idle. */ 3345 /* ------------------------------------------------------------------------ */ 3346 static int fr_state_flush(which, proto, ifs) 3347 int which, proto; 3348 ipf_stack_t *ifs; 3349 { 3350 ipftq_t *ifq, *ifqnext; 3351 ipftqent_t *tqe, *tqn; 3352 ipstate_t *is, **isp; 3353 int delete, removed; 3354 long try, maxtick; 3355 u_long interval; 3356 SPL_INT(s); 3357 3358 removed = 0; 3359 3360 SPL_NET(s); 3361 for (isp = &ifs->ifs_ips_list; ((is = *isp) != NULL); ) { 3362 delete = 0; 3363 3364 if ((proto != 0) && (is->is_v != proto)) { 3365 isp = &is->is_next; 3366 continue; 3367 } 3368 3369 switch (which) 3370 { 3371 case 0 : 3372 delete = 1; 3373 break; 3374 case 1 : 3375 case 2 : 3376 if (is->is_p != IPPROTO_TCP) 3377 break; 3378 if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || 3379 (is->is_state[1] != IPF_TCPS_ESTABLISHED)) 3380 delete = 1; 3381 break; 3382 } 3383 3384 if (delete) { 3385 if (is->is_p == IPPROTO_TCP) 3386 ifs->ifs_ips_stats.iss_fin++; 3387 else 3388 ifs->ifs_ips_stats.iss_expire++; 3389 fr_delstate(is, ISL_FLUSH, ifs); 3390 removed++; 3391 } else 3392 isp = &is->is_next; 3393 } 3394 3395 if (which != 2) { 3396 SPL_X(s); 3397 return removed; 3398 } 3399 3400 /* 3401 * Asked to remove inactive entries because the table is full, try 3402 * again, 3 times, if first attempt failed with a different criteria 3403 * each time. The order tried in must be in decreasing age. 3404 * Another alternative is to implement random drop and drop N entries 3405 * at random until N have been freed up. 3406 */ 3407 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < IPF_TTLVAL(5)) 3408 goto force_flush_skipped; 3409 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3410 3411 if (ifs->ifs_fr_ticks > IPF_TTLVAL(43200)) 3412 interval = IPF_TTLVAL(43200); 3413 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(1800)) 3414 interval = IPF_TTLVAL(1800); 3415 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(30)) 3416 interval = IPF_TTLVAL(30); 3417 else 3418 interval = IPF_TTLVAL(10); 3419 try = ifs->ifs_fr_ticks - (ifs->ifs_fr_ticks - interval); 3420 if (try < 0) 3421 goto force_flush_skipped; 3422 3423 while (removed == 0) { 3424 maxtick = ifs->ifs_fr_ticks - interval; 3425 if (maxtick < 0) 3426 break; 3427 3428 while (try < maxtick) { 3429 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; 3430 ifq = ifq->ifq_next) { 3431 for (tqn = ifq->ifq_head; 3432 ((tqe = tqn) != NULL); ) { 3433 if (tqe->tqe_die > try) 3434 break; 3435 tqn = tqe->tqe_next; 3436 is = tqe->tqe_parent; 3437 fr_delstate(is, ISL_EXPIRE, ifs); 3438 removed++; 3439 } 3440 } 3441 3442 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3443 ifqnext = ifq->ifq_next; 3444 3445 for (tqn = ifq->ifq_head; 3446 ((tqe = tqn) != NULL); ) { 3447 if (tqe->tqe_die > try) 3448 break; 3449 tqn = tqe->tqe_next; 3450 is = tqe->tqe_parent; 3451 fr_delstate(is, ISL_EXPIRE, ifs); 3452 removed++; 3453 } 3454 } 3455 if (try + interval > maxtick) 3456 break; 3457 try += interval; 3458 } 3459 3460 if (removed == 0) { 3461 if (interval == IPF_TTLVAL(43200)) { 3462 interval = IPF_TTLVAL(1800); 3463 } else if (interval == IPF_TTLVAL(1800)) { 3464 interval = IPF_TTLVAL(30); 3465 } else if (interval == IPF_TTLVAL(30)) { 3466 interval = IPF_TTLVAL(10); 3467 } else { 3468 break; 3469 } 3470 } 3471 } 3472 force_flush_skipped: 3473 SPL_X(s); 3474 return removed; 3475 } 3476 3477 3478 3479 /* ------------------------------------------------------------------------ */ 3480 /* Function: fr_tcp_age */ 3481 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3482 /* Parameters: tq(I) - pointer to timeout queue information */ 3483 /* fin(I) - pointer to packet information */ 3484 /* tqtab(I) - TCP timeout queue table this is in */ 3485 /* flags(I) - flags from state/NAT entry */ 3486 /* */ 3487 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3488 /* */ 3489 /* - (try to) base state transitions on real evidence only, */ 3490 /* i.e. packets that are sent and have been received by ipfilter; */ 3491 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3492 /* */ 3493 /* - deal with half-closed connections correctly; */ 3494 /* */ 3495 /* - store the state of the source in state[0] such that ipfstat */ 3496 /* displays the state as source/dest instead of dest/source; the calls */ 3497 /* to fr_tcp_age have been changed accordingly. */ 3498 /* */ 3499 /* Internal Parameters: */ 3500 /* */ 3501 /* state[0] = state of source (host that initiated connection) */ 3502 /* state[1] = state of dest (host that accepted the connection) */ 3503 /* */ 3504 /* dir == 0 : a packet from source to dest */ 3505 /* dir == 1 : a packet from dest to source */ 3506 /* */ 3507 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3508 /* ------------------------------------------------------------------------ */ 3509 int fr_tcp_age(tqe, fin, tqtab, flags) 3510 ipftqent_t *tqe; 3511 fr_info_t *fin; 3512 ipftq_t *tqtab; 3513 int flags; 3514 { 3515 int dlen, ostate, nstate, rval, dir; 3516 u_char tcpflags; 3517 tcphdr_t *tcp; 3518 ipf_stack_t *ifs = fin->fin_ifs; 3519 3520 tcp = fin->fin_dp; 3521 3522 rval = 0; 3523 dir = fin->fin_rev; 3524 tcpflags = tcp->th_flags; 3525 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3526 3527 if (tcpflags & TH_RST) { 3528 if (!(tcpflags & TH_PUSH) && !dlen) 3529 nstate = IPF_TCPS_CLOSED; 3530 else 3531 nstate = IPF_TCPS_CLOSE_WAIT; 3532 rval = 1; 3533 } else { 3534 ostate = tqe->tqe_state[1 - dir]; 3535 nstate = tqe->tqe_state[dir]; 3536 3537 switch (nstate) 3538 { 3539 case IPF_TCPS_CLOSED: /* 0 */ 3540 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3541 /* 3542 * 'dir' received an S and sends SA in 3543 * response, CLOSED -> SYN_RECEIVED 3544 */ 3545 nstate = IPF_TCPS_SYN_RECEIVED; 3546 rval = 1; 3547 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3548 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3549 nstate = IPF_TCPS_SYN_SENT; 3550 rval = 1; 3551 } 3552 /* 3553 * the next piece of code makes it possible to get 3554 * already established connections into the state table 3555 * after a restart or reload of the filter rules; this 3556 * does not work when a strict 'flags S keep state' is 3557 * used for tcp connections of course 3558 */ 3559 if (((flags & IS_TCPFSM) == 0) && 3560 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3561 /* 3562 * we saw an A, guess 'dir' is in ESTABLISHED 3563 * mode 3564 */ 3565 switch (ostate) 3566 { 3567 case IPF_TCPS_CLOSED : 3568 case IPF_TCPS_SYN_RECEIVED : 3569 nstate = IPF_TCPS_HALF_ESTAB; 3570 rval = 1; 3571 break; 3572 case IPF_TCPS_HALF_ESTAB : 3573 case IPF_TCPS_ESTABLISHED : 3574 nstate = IPF_TCPS_ESTABLISHED; 3575 rval = 1; 3576 break; 3577 default : 3578 break; 3579 } 3580 } 3581 /* 3582 * TODO: besides regular ACK packets we can have other 3583 * packets as well; it is yet to be determined how we 3584 * should initialize the states in those cases 3585 */ 3586 break; 3587 3588 case IPF_TCPS_LISTEN: /* 1 */ 3589 /* NOT USED */ 3590 break; 3591 3592 case IPF_TCPS_SYN_SENT: /* 2 */ 3593 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3594 /* 3595 * A retransmitted SYN packet. We do not reset 3596 * the timeout here to fr_tcptimeout because a 3597 * connection connect timeout does not renew 3598 * after every packet that is sent. We need to 3599 * set rval so as to indicate the packet has 3600 * passed the check for its flags being valid 3601 * in the TCP FSM. Setting rval to 2 has the 3602 * result of not resetting the timeout. 3603 */ 3604 rval = 2; 3605 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3606 TH_ACK) { 3607 /* 3608 * we see an A from 'dir' which is in SYN_SENT 3609 * state: 'dir' sent an A in response to an SA 3610 * which it received, SYN_SENT -> ESTABLISHED 3611 */ 3612 nstate = IPF_TCPS_ESTABLISHED; 3613 rval = 1; 3614 } else if (tcpflags & TH_FIN) { 3615 /* 3616 * we see an F from 'dir' which is in SYN_SENT 3617 * state and wants to close its side of the 3618 * connection; SYN_SENT -> FIN_WAIT_1 3619 */ 3620 nstate = IPF_TCPS_FIN_WAIT_1; 3621 rval = 1; 3622 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3623 /* 3624 * we see an SA from 'dir' which is already in 3625 * SYN_SENT state, this means we have a 3626 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3627 */ 3628 nstate = IPF_TCPS_SYN_RECEIVED; 3629 rval = 1; 3630 } 3631 break; 3632 3633 case IPF_TCPS_SYN_RECEIVED: /* 3 */ 3634 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3635 /* 3636 * we see an A from 'dir' which was in 3637 * SYN_RECEIVED state so it must now be in 3638 * established state, SYN_RECEIVED -> 3639 * ESTABLISHED 3640 */ 3641 nstate = IPF_TCPS_ESTABLISHED; 3642 rval = 1; 3643 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3644 TH_OPENING) { 3645 /* 3646 * We see an SA from 'dir' which is already in 3647 * SYN_RECEIVED state. 3648 */ 3649 rval = 2; 3650 } else if (tcpflags & TH_FIN) { 3651 /* 3652 * we see an F from 'dir' which is in 3653 * SYN_RECEIVED state and wants to close its 3654 * side of the connection; SYN_RECEIVED -> 3655 * FIN_WAIT_1 3656 */ 3657 nstate = IPF_TCPS_FIN_WAIT_1; 3658 rval = 1; 3659 } 3660 break; 3661 3662 case IPF_TCPS_HALF_ESTAB: /* 4 */ 3663 if (ostate >= IPF_TCPS_HALF_ESTAB) { 3664 if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3665 nstate = IPF_TCPS_ESTABLISHED; 3666 } 3667 } 3668 rval = 1; 3669 3670 break; 3671 3672 case IPF_TCPS_ESTABLISHED: /* 5 */ 3673 rval = 1; 3674 if (tcpflags & TH_FIN) { 3675 /* 3676 * 'dir' closed its side of the connection; 3677 * this gives us a half-closed connection; 3678 * ESTABLISHED -> FIN_WAIT_1 3679 */ 3680 nstate = IPF_TCPS_FIN_WAIT_1; 3681 } else if (tcpflags & TH_ACK) { 3682 /* 3683 * an ACK, should we exclude other flags here? 3684 */ 3685 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3686 /* 3687 * We know the other side did an active 3688 * close, so we are ACKing the recvd 3689 * FIN packet (does the window matching 3690 * code guarantee this?) and go into 3691 * CLOSE_WAIT state; this gives us a 3692 * half-closed connection 3693 */ 3694 nstate = IPF_TCPS_CLOSE_WAIT; 3695 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3696 /* 3697 * still a fully established 3698 * connection reset timeout 3699 */ 3700 nstate = IPF_TCPS_ESTABLISHED; 3701 } 3702 } 3703 break; 3704 3705 case IPF_TCPS_CLOSE_WAIT: /* 6 */ 3706 rval = 1; 3707 if (tcpflags & TH_FIN) { 3708 /* 3709 * application closed and 'dir' sent a FIN, 3710 * we're now going into LAST_ACK state 3711 */ 3712 nstate = IPF_TCPS_LAST_ACK; 3713 } else { 3714 /* 3715 * we remain in CLOSE_WAIT because the other 3716 * side has closed already and we did not 3717 * close our side yet; reset timeout 3718 */ 3719 nstate = IPF_TCPS_CLOSE_WAIT; 3720 } 3721 break; 3722 3723 case IPF_TCPS_FIN_WAIT_1: /* 7 */ 3724 rval = 1; 3725 if ((tcpflags & TH_ACK) && 3726 ostate > IPF_TCPS_CLOSE_WAIT) { 3727 /* 3728 * if the other side is not active anymore 3729 * it has sent us a FIN packet that we are 3730 * ack'ing now with an ACK; this means both 3731 * sides have now closed the connection and 3732 * we go into TIME_WAIT 3733 */ 3734 /* 3735 * XXX: how do we know we really are ACKing 3736 * the FIN packet here? does the window code 3737 * guarantee that? 3738 */ 3739 nstate = IPF_TCPS_TIME_WAIT; 3740 } else { 3741 /* 3742 * we closed our side of the connection 3743 * already but the other side is still active 3744 * (ESTABLISHED/CLOSE_WAIT); continue with 3745 * this half-closed connection 3746 */ 3747 nstate = IPF_TCPS_FIN_WAIT_1; 3748 } 3749 break; 3750 3751 case IPF_TCPS_CLOSING: /* 8 */ 3752 /* NOT USED */ 3753 break; 3754 3755 case IPF_TCPS_LAST_ACK: /* 9 */ 3756 /* 3757 * We want to reset timer here to keep state in table. 3758 * If we would allow the state to time out here, while 3759 * there would still be packets being retransmitted, we 3760 * would cut off line between the two peers preventing 3761 * them to close connection properly. 3762 */ 3763 rval = 1; 3764 break; 3765 3766 case IPF_TCPS_FIN_WAIT_2: /* 10 */ 3767 rval = 1; 3768 if ((tcpflags & TH_OPENING) == TH_OPENING) 3769 nstate = IPF_TCPS_SYN_RECEIVED; 3770 else if (tcpflags & TH_SYN) 3771 nstate = IPF_TCPS_SYN_SENT; 3772 break; 3773 3774 case IPF_TCPS_TIME_WAIT: /* 11 */ 3775 /* we're in 2MSL timeout now */ 3776 rval = 1; 3777 break; 3778 3779 default : 3780 #if defined(_KERNEL) 3781 # if SOLARIS 3782 cmn_err(CE_NOTE, 3783 "tcp %lx flags %x si %lx nstate %d ostate %d\n", 3784 (u_long)tcp, tcpflags, (u_long)tqe, 3785 nstate, ostate); 3786 # else 3787 printf("tcp %lx flags %x si %lx nstate %d ostate %d\n", 3788 (u_long)tcp, tcpflags, (u_long)tqe, 3789 nstate, ostate); 3790 # endif 3791 #else 3792 abort(); 3793 #endif 3794 break; 3795 } 3796 } 3797 3798 /* 3799 * If rval == 2 then do not update the queue position, but treat the 3800 * packet as being ok. 3801 */ 3802 if (rval == 2) 3803 rval = 1; 3804 else if (rval == 1) { 3805 tqe->tqe_state[dir] = nstate; 3806 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3807 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3808 } 3809 3810 return rval; 3811 } 3812 3813 3814 /* ------------------------------------------------------------------------ */ 3815 /* Function: ipstate_log */ 3816 /* Returns: Nil */ 3817 /* Parameters: is(I) - pointer to state structure */ 3818 /* type(I) - type of log entry to create */ 3819 /* */ 3820 /* Creates a state table log entry using the state structure and type info. */ 3821 /* passed in. Log packet/byte counts, source/destination address and other */ 3822 /* protocol specific information. */ 3823 /* ------------------------------------------------------------------------ */ 3824 void ipstate_log(is, type, ifs) 3825 struct ipstate *is; 3826 u_int type; 3827 ipf_stack_t *ifs; 3828 { 3829 #ifdef IPFILTER_LOG 3830 struct ipslog ipsl; 3831 size_t sizes[1]; 3832 void *items[1]; 3833 int types[1]; 3834 3835 /* 3836 * Copy information out of the ipstate_t structure and into the 3837 * structure used for logging. 3838 */ 3839 ipsl.isl_type = type; 3840 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3841 ipsl.isl_bytes[0] = is->is_bytes[0]; 3842 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3843 ipsl.isl_bytes[1] = is->is_bytes[1]; 3844 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3845 ipsl.isl_bytes[2] = is->is_bytes[2]; 3846 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3847 ipsl.isl_bytes[3] = is->is_bytes[3]; 3848 ipsl.isl_src = is->is_src; 3849 ipsl.isl_dst = is->is_dst; 3850 ipsl.isl_p = is->is_p; 3851 ipsl.isl_v = is->is_v; 3852 ipsl.isl_flags = is->is_flags; 3853 ipsl.isl_tag = is->is_tag; 3854 ipsl.isl_rulen = is->is_rulen; 3855 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3856 3857 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3858 ipsl.isl_sport = is->is_sport; 3859 ipsl.isl_dport = is->is_dport; 3860 if (ipsl.isl_p == IPPROTO_TCP) { 3861 ipsl.isl_state[0] = is->is_state[0]; 3862 ipsl.isl_state[1] = is->is_state[1]; 3863 } 3864 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3865 ipsl.isl_itype = is->is_icmp.ici_type; 3866 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3867 ipsl.isl_itype = is->is_icmp.ici_type; 3868 } else { 3869 ipsl.isl_ps.isl_filler[0] = 0; 3870 ipsl.isl_ps.isl_filler[1] = 0; 3871 } 3872 3873 items[0] = &ipsl; 3874 sizes[0] = sizeof(ipsl); 3875 types[0] = 0; 3876 3877 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3878 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3879 } else { 3880 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3881 } 3882 #endif 3883 } 3884 3885 3886 #ifdef USE_INET6 3887 /* ------------------------------------------------------------------------ */ 3888 /* Function: fr_checkicmp6matchingstate */ 3889 /* Returns: ipstate_t* - NULL == no match found, */ 3890 /* else pointer to matching state entry */ 3891 /* Parameters: fin(I) - pointer to packet information */ 3892 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3893 /* */ 3894 /* If we've got an ICMPv6 error message, using the information stored in */ 3895 /* the ICMPv6 packet, look for a matching state table entry. */ 3896 /* ------------------------------------------------------------------------ */ 3897 static ipstate_t *fr_checkicmp6matchingstate(fin) 3898 fr_info_t *fin; 3899 { 3900 struct icmp6_hdr *ic6, *oic; 3901 int backward, i; 3902 ipstate_t *is, **isp; 3903 u_short sport, dport; 3904 i6addr_t dst, src; 3905 u_short savelen; 3906 icmpinfo_t *ic; 3907 fr_info_t ofin; 3908 tcphdr_t *tcp; 3909 ip6_t *oip6; 3910 u_char pr; 3911 u_int hv; 3912 ipf_stack_t *ifs = fin->fin_ifs; 3913 3914 /* 3915 * Does it at least have the return (basic) IP header ? 3916 * Is it an actual recognised ICMP error type? 3917 * Only a basic IP header (no options) should be with 3918 * an ICMP error header. 3919 */ 3920 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 3921 !(fin->fin_flx & FI_ICMPERR)) 3922 return NULL; 3923 3924 ic6 = fin->fin_dp; 3925 3926 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 3927 if (fin->fin_plen < sizeof(*oip6)) 3928 return NULL; 3929 3930 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 3931 ofin.fin_v = 6; 3932 ofin.fin_ifp = fin->fin_ifp; 3933 ofin.fin_out = !fin->fin_out; 3934 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 3935 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 3936 3937 /* 3938 * We make a fin entry to be able to feed it to 3939 * matchsrcdst. Note that not all fields are necessary 3940 * but this is the cleanest way. Note further we fill 3941 * in fin_mp such that if someone uses it we'll get 3942 * a kernel panic. fr_matchsrcdst does not use this. 3943 * 3944 * watch out here, as ip is in host order and oip6 in network 3945 * order. Any change we make must be undone afterwards. 3946 */ 3947 savelen = oip6->ip6_plen; 3948 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 3949 ofin.fin_flx = FI_NOCKSUM; 3950 ofin.fin_ip = (ip_t *)oip6; 3951 ofin.fin_plen = oip6->ip6_plen; 3952 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 3953 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 3954 oip6->ip6_plen = savelen; 3955 3956 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 3957 oic = (struct icmp6_hdr *)(oip6 + 1); 3958 /* 3959 * an ICMP error can only be generated as a result of an 3960 * ICMP query, not as the response on an ICMP error 3961 * 3962 * XXX theoretically ICMP_ECHOREP and the other reply's are 3963 * ICMP query's as well, but adding them here seems strange XXX 3964 */ 3965 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 3966 return NULL; 3967 3968 /* 3969 * perform a lookup of the ICMP packet in the state table 3970 */ 3971 hv = (pr = oip6->ip6_nxt); 3972 src.in6 = oip6->ip6_src; 3973 hv += src.in4.s_addr; 3974 dst.in6 = oip6->ip6_dst; 3975 hv += dst.in4.s_addr; 3976 hv += oic->icmp6_id; 3977 hv += oic->icmp6_seq; 3978 hv = DOUBLE_HASH(hv, ifs); 3979 3980 READ_ENTER(&ifs->ifs_ipf_state); 3981 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 3982 ic = &is->is_icmp; 3983 isp = &is->is_hnext; 3984 if ((is->is_p == pr) && 3985 !(is->is_pass & FR_NOICMPERR) && 3986 (oic->icmp6_id == ic->ici_id) && 3987 (oic->icmp6_seq == ic->ici_seq) && 3988 (is = fr_matchsrcdst(&ofin, is, &src, 3989 &dst, NULL, FI_ICMPCMP))) { 3990 /* 3991 * in the state table ICMP query's are stored 3992 * with the type of the corresponding ICMP 3993 * response. Correct here 3994 */ 3995 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 3996 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 3997 (ic->ici_type - 1 == oic->icmp6_type )) { 3998 ifs->ifs_ips_stats.iss_hits++; 3999 backward = IP6_NEQ(&is->is_dst, &src); 4000 fin->fin_rev = !backward; 4001 i = (backward << 1) + fin->fin_out; 4002 is->is_icmppkts[i]++; 4003 return is; 4004 } 4005 } 4006 } 4007 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4008 return NULL; 4009 } 4010 4011 hv = (pr = oip6->ip6_nxt); 4012 src.in6 = oip6->ip6_src; 4013 hv += src.i6[0]; 4014 hv += src.i6[1]; 4015 hv += src.i6[2]; 4016 hv += src.i6[3]; 4017 dst.in6 = oip6->ip6_dst; 4018 hv += dst.i6[0]; 4019 hv += dst.i6[1]; 4020 hv += dst.i6[2]; 4021 hv += dst.i6[3]; 4022 4023 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4024 tcp = (tcphdr_t *)(oip6 + 1); 4025 dport = tcp->th_dport; 4026 sport = tcp->th_sport; 4027 hv += dport; 4028 hv += sport; 4029 } else 4030 tcp = NULL; 4031 hv = DOUBLE_HASH(hv, ifs); 4032 4033 READ_ENTER(&ifs->ifs_ipf_state); 4034 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4035 isp = &is->is_hnext; 4036 /* 4037 * Only allow this icmp though if the 4038 * encapsulated packet was allowed through the 4039 * other way around. Note that the minimal amount 4040 * of info present does not allow for checking against 4041 * tcp internals such as seq and ack numbers. 4042 */ 4043 if ((is->is_p != pr) || (is->is_v != 6) || 4044 (is->is_pass & FR_NOICMPERR)) 4045 continue; 4046 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4047 if (is != NULL) { 4048 ifs->ifs_ips_stats.iss_hits++; 4049 backward = IP6_NEQ(&is->is_dst, &src); 4050 fin->fin_rev = !backward; 4051 i = (backward << 1) + fin->fin_out; 4052 is->is_icmppkts[i]++; 4053 /* 4054 * we deliberately do not touch the timeouts 4055 * for the accompanying state table entry. 4056 * It remains to be seen if that is correct. XXX 4057 */ 4058 return is; 4059 } 4060 } 4061 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4062 return NULL; 4063 } 4064 #endif 4065 4066 4067 /* ------------------------------------------------------------------------ */ 4068 /* Function: fr_sttab_init */ 4069 /* Returns: Nil */ 4070 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4071 /* */ 4072 /* Initialise the array of timeout queues for TCP. */ 4073 /* ------------------------------------------------------------------------ */ 4074 void fr_sttab_init(tqp, ifs) 4075 ipftq_t *tqp; 4076 ipf_stack_t *ifs; 4077 { 4078 int i; 4079 4080 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4081 tqp[i].ifq_ttl = 0; 4082 tqp[i].ifq_ref = 1; 4083 tqp[i].ifq_head = NULL; 4084 tqp[i].ifq_tail = &tqp[i].ifq_head; 4085 tqp[i].ifq_next = tqp + i + 1; 4086 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4087 } 4088 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4089 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4090 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4091 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4092 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4093 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4094 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4095 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4096 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4097 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4098 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4099 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4100 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4101 } 4102 4103 4104 /* ------------------------------------------------------------------------ */ 4105 /* Function: fr_sttab_destroy */ 4106 /* Returns: Nil */ 4107 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4108 /* */ 4109 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4110 /* of timeout queues for TCP. */ 4111 /* ------------------------------------------------------------------------ */ 4112 void fr_sttab_destroy(tqp) 4113 ipftq_t *tqp; 4114 { 4115 int i; 4116 4117 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4118 MUTEX_DESTROY(&tqp[i].ifq_lock); 4119 } 4120 4121 4122 /* ------------------------------------------------------------------------ */ 4123 /* Function: fr_statederef */ 4124 /* Returns: Nil */ 4125 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4126 /* ifs - ipf stack instance */ 4127 /* */ 4128 /* Decrement the reference counter for this state table entry and free it */ 4129 /* if there are no more things using it. */ 4130 /* */ 4131 /* Internal parameters: */ 4132 /* state[0] = state of source (host that initiated connection) */ 4133 /* state[1] = state of dest (host that accepted the connection) */ 4134 /* ------------------------------------------------------------------------ */ 4135 void fr_statederef(isp, ifs) 4136 ipstate_t **isp; 4137 ipf_stack_t *ifs; 4138 { 4139 ipstate_t *is; 4140 4141 is = *isp; 4142 *isp = NULL; 4143 4144 MUTEX_ENTER(&is->is_lock); 4145 if (is->is_ref > 1) { 4146 is->is_ref--; 4147 MUTEX_EXIT(&is->is_lock); 4148 #ifndef _KERNEL 4149 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4150 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4151 fr_delstate(is, ISL_ORPHAN, ifs); 4152 } 4153 #endif 4154 return; 4155 } 4156 MUTEX_EXIT(&is->is_lock); 4157 4158 WRITE_ENTER(&ifs->ifs_ipf_state); 4159 fr_delstate(is, ISL_EXPIRE, ifs); 4160 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4161 } 4162 4163 4164 /* ------------------------------------------------------------------------ */ 4165 /* Function: fr_setstatequeue */ 4166 /* Returns: Nil */ 4167 /* Parameters: is(I) - pointer to state structure */ 4168 /* rev(I) - forward(0) or reverse(1) direction */ 4169 /* Locks: ipf_state (read or write) */ 4170 /* */ 4171 /* Put the state entry on its default queue entry, using rev as a helped in */ 4172 /* determining which queue it should be placed on. */ 4173 /* ------------------------------------------------------------------------ */ 4174 void fr_setstatequeue(is, rev, ifs) 4175 ipstate_t *is; 4176 int rev; 4177 ipf_stack_t *ifs; 4178 { 4179 ipftq_t *oifq, *nifq; 4180 4181 4182 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4183 nifq = is->is_tqehead[rev]; 4184 else 4185 nifq = NULL; 4186 4187 if (nifq == NULL) { 4188 switch (is->is_p) 4189 { 4190 #ifdef USE_INET6 4191 case IPPROTO_ICMPV6 : 4192 if (rev == 1) 4193 nifq = &ifs->ifs_ips_icmpacktq; 4194 else 4195 nifq = &ifs->ifs_ips_icmptq; 4196 break; 4197 #endif 4198 case IPPROTO_ICMP : 4199 if (rev == 1) 4200 nifq = &ifs->ifs_ips_icmpacktq; 4201 else 4202 nifq = &ifs->ifs_ips_icmptq; 4203 break; 4204 case IPPROTO_TCP : 4205 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4206 break; 4207 4208 case IPPROTO_UDP : 4209 if (rev == 1) 4210 nifq = &ifs->ifs_ips_udpacktq; 4211 else 4212 nifq = &ifs->ifs_ips_udptq; 4213 break; 4214 4215 default : 4216 nifq = &ifs->ifs_ips_iptq; 4217 break; 4218 } 4219 } 4220 4221 oifq = is->is_sti.tqe_ifq; 4222 /* 4223 * If it's currently on a timeout queue, move it from one queue to 4224 * another, else put it on the end of the newly determined queue. 4225 */ 4226 if (oifq != NULL) 4227 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4228 else 4229 fr_queueappend(&is->is_sti, nifq, is, ifs); 4230 return; 4231 } 4232 4233 4234 /* ------------------------------------------------------------------------ */ 4235 /* Function: fr_stateiter */ 4236 /* Returns: int - 0 == success, else error */ 4237 /* Parameters: token(I) - pointer to ipftoken structure */ 4238 /* itp(I) - pointer to ipfgeniter structure */ 4239 /* */ 4240 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4241 /* walks through the list of entries in the state table list (ips_list.) */ 4242 /* ------------------------------------------------------------------------ */ 4243 static int fr_stateiter(token, itp, ifs) 4244 ipftoken_t *token; 4245 ipfgeniter_t *itp; 4246 ipf_stack_t *ifs; 4247 { 4248 ipstate_t *is, *next, zero; 4249 int error, count; 4250 char *dst; 4251 4252 if (itp->igi_data == NULL) 4253 return EFAULT; 4254 4255 if (itp->igi_nitems == 0) 4256 return EINVAL; 4257 4258 if (itp->igi_type != IPFGENITER_STATE) 4259 return EINVAL; 4260 4261 error = 0; 4262 4263 READ_ENTER(&ifs->ifs_ipf_state); 4264 4265 /* 4266 * Get "previous" entry from the token and find the next entry. 4267 */ 4268 is = token->ipt_data; 4269 if (is == NULL) { 4270 next = ifs->ifs_ips_list; 4271 } else { 4272 next = is->is_next; 4273 } 4274 4275 dst = itp->igi_data; 4276 for (count = itp->igi_nitems; count > 0; count--) { 4277 /* 4278 * If we found an entry, add a reference to it and update the token. 4279 * Otherwise, zero out data to be returned and NULL out token. 4280 */ 4281 if (next != NULL) { 4282 MUTEX_ENTER(&next->is_lock); 4283 next->is_ref++; 4284 MUTEX_EXIT(&next->is_lock); 4285 token->ipt_data = next; 4286 } else { 4287 bzero(&zero, sizeof(zero)); 4288 next = &zero; 4289 token->ipt_data = NULL; 4290 } 4291 4292 /* 4293 * Safe to release lock now the we have a reference. 4294 */ 4295 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4296 4297 /* 4298 * Copy out data and clean up references and tokens. 4299 */ 4300 error = COPYOUT(next, dst, sizeof(*next)); 4301 if (error != 0) 4302 error = EFAULT; 4303 if (token->ipt_data == NULL) { 4304 ipf_freetoken(token, ifs); 4305 break; 4306 } else { 4307 if (is != NULL) 4308 fr_statederef(&is, ifs); 4309 if (next->is_next == NULL) { 4310 ipf_freetoken(token, ifs); 4311 break; 4312 } 4313 } 4314 4315 if ((count == 1) || (error != 0)) 4316 break; 4317 4318 READ_ENTER(&ifs->ifs_ipf_state); 4319 dst += sizeof(*next); 4320 is = next; 4321 next = is->is_next; 4322 } 4323 4324 return error; 4325 } 4326