1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 */ 8 9 #if defined(KERNEL) || defined(_KERNEL) 10 # undef KERNEL 11 # undef _KERNEL 12 # define KERNEL 1 13 # define _KERNEL 1 14 #endif 15 #include <sys/errno.h> 16 #include <sys/types.h> 17 #include <sys/param.h> 18 #include <sys/file.h> 19 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 20 defined(_KERNEL) 21 # include "opt_ipfilter_log.h" 22 #endif 23 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 24 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 25 #include "opt_inet6.h" 26 #endif 27 #if !defined(_KERNEL) && !defined(__KERNEL__) 28 # include <stdio.h> 29 # include <stdlib.h> 30 # include <string.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 42 # include "opt_ipfilter.h" 43 # endif 44 #else 45 # include <sys/ioctl.h> 46 #endif 47 #include <sys/time.h> 48 #if !defined(linux) 49 # include <sys/protosw.h> 50 #endif 51 #include <sys/socket.h> 52 #if defined(_KERNEL) 53 # include <sys/systm.h> 54 # if !defined(__SVR4) && !defined(__svr4__) 55 # include <sys/mbuf.h> 56 # endif 57 #endif 58 #if defined(__SVR4) || defined(__svr4__) 59 # include <sys/filio.h> 60 # include <sys/byteorder.h> 61 # ifdef _KERNEL 62 # include <sys/dditypes.h> 63 # endif 64 # include <sys/stream.h> 65 # include <sys/kmem.h> 66 #endif 67 68 #include <net/if.h> 69 #ifdef sun 70 # include <net/af.h> 71 #endif 72 #include <net/route.h> 73 #include <netinet/in.h> 74 #include <netinet/in_systm.h> 75 #include <netinet/ip.h> 76 #include <netinet/tcp.h> 77 #if !defined(linux) 78 # include <netinet/ip_var.h> 79 #endif 80 #if !defined(__hpux) && !defined(linux) 81 # include <netinet/tcp_fsm.h> 82 #endif 83 #include <netinet/udp.h> 84 #include <netinet/ip_icmp.h> 85 #include "netinet/ip_compat.h" 86 #include <netinet/tcpip.h> 87 #include "netinet/ip_fil.h" 88 #include "netinet/ip_nat.h" 89 #include "netinet/ip_frag.h" 90 #include "netinet/ip_state.h" 91 #include "netinet/ip_proxy.h" 92 #include "netinet/ipf_stack.h" 93 #ifdef IPFILTER_SYNC 94 #include "netinet/ip_sync.h" 95 #endif 96 #ifdef IPFILTER_SCAN 97 #include "netinet/ip_scan.h" 98 #endif 99 #ifdef USE_INET6 100 #include <netinet/icmp6.h> 101 #endif 102 #if (__FreeBSD_version >= 300000) 103 # include <sys/malloc.h> 104 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 105 # include <sys/libkern.h> 106 # include <sys/systm.h> 107 # endif 108 #endif 109 /* END OF INCLUDES */ 110 111 112 #if !defined(lint) 113 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 114 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 115 #endif 116 117 #ifdef USE_INET6 118 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 119 #endif 120 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 121 i6addr_t *, tcphdr_t *, u_32_t)); 122 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 123 static int fr_state_flush __P((int, int, ipf_stack_t *)); 124 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 125 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 126 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 127 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 128 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 129 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 131 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 132 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 133 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 134 135 int fr_stputent __P((caddr_t, ipf_stack_t *)); 136 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 137 138 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 139 #define FIVE_DAYS (5 * ONE_DAY) 140 #define DOUBLE_HASH(x, ifs) \ 141 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 142 143 144 /* ------------------------------------------------------------------------ */ 145 /* Function: fr_stateinit */ 146 /* Returns: int - 0 == success, -1 == failure */ 147 /* Parameters: ifs - ipf stack instance */ 148 /* */ 149 /* Initialise all the global variables used within the state code. */ 150 /* This action also includes initiailising locks. */ 151 /* ------------------------------------------------------------------------ */ 152 int fr_stateinit(ifs) 153 ipf_stack_t *ifs; 154 { 155 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 156 struct timeval tv; 157 #endif 158 int i; 159 160 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 161 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 162 if (ifs->ifs_ips_table == NULL) 163 return -1; 164 bzero((char *)ifs->ifs_ips_table, 165 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 166 167 KMALLOCS(ifs->ifs_ips_seed, u_long *, 168 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 169 if (ifs->ifs_ips_seed == NULL) 170 return -2; 171 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 172 tv.tv_sec = 0; 173 GETKTIME(&tv); 174 #endif 175 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 176 /* 177 * XXX - ips_seed[X] should be a random number of sorts. 178 */ 179 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 180 ifs->ifs_ips_seed[i] = ipf_random(); 181 #else 182 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 183 ifs->ifs_fr_statesize; 184 ifs->ifs_ips_seed[i] += tv.tv_sec; 185 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 186 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 187 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 188 #endif 189 } 190 191 /* fill icmp reply type table */ 192 for (i = 0; i <= ICMP_MAXTYPE; i++) 193 icmpreplytype4[i] = -1; 194 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 195 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 196 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 197 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 198 #ifdef USE_INET6 199 /* fill icmp reply type table */ 200 for (i = 0; i <= ICMP6_MAXTYPE; i++) 201 icmpreplytype6[i] = -1; 202 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 203 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 204 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 205 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 206 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 207 #endif 208 209 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 210 ifs->ifs_fr_statesize * sizeof(u_long)); 211 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 212 return -1; 213 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 214 ifs->ifs_fr_statesize * sizeof(u_long)); 215 216 if (ifs->ifs_fr_state_maxbucket == 0) { 217 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 218 ifs->ifs_fr_state_maxbucket++; 219 ifs->ifs_fr_state_maxbucket *= 2; 220 } 221 222 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 223 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 224 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 225 ifs->ifs_ips_udptq.ifq_ref = 1; 226 ifs->ifs_ips_udptq.ifq_head = NULL; 227 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 228 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 229 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 230 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 231 ifs->ifs_ips_udpacktq.ifq_ref = 1; 232 ifs->ifs_ips_udpacktq.ifq_head = NULL; 233 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 234 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 235 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 236 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 237 ifs->ifs_ips_icmptq.ifq_ref = 1; 238 ifs->ifs_ips_icmptq.ifq_head = NULL; 239 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 240 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 241 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 242 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 243 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 244 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 245 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 246 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 247 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 248 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 249 ifs->ifs_ips_iptq.ifq_ref = 1; 250 ifs->ifs_ips_iptq.ifq_head = NULL; 251 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 252 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 253 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 254 /* entry's ttl in deletetq is just 1 tick */ 255 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 256 ifs->ifs_ips_deletetq.ifq_ref = 1; 257 ifs->ifs_ips_deletetq.ifq_head = NULL; 258 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 259 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 260 ifs->ifs_ips_deletetq.ifq_next = NULL; 261 262 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 263 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 264 ifs->ifs_fr_state_init = 1; 265 266 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 267 return 0; 268 } 269 270 271 /* ------------------------------------------------------------------------ */ 272 /* Function: fr_stateunload */ 273 /* Returns: Nil */ 274 /* Parameters: ifs - ipf stack instance */ 275 /* */ 276 /* Release and destroy any resources acquired or initialised so that */ 277 /* IPFilter can be unloaded or re-initialised. */ 278 /* ------------------------------------------------------------------------ */ 279 void fr_stateunload(ifs) 280 ipf_stack_t *ifs; 281 { 282 ipftq_t *ifq, *ifqnext; 283 ipstate_t *is; 284 285 while ((is = ifs->ifs_ips_list) != NULL) 286 (void) fr_delstate(is, 0, ifs); 287 288 /* 289 * Proxy timeout queues are not cleaned here because although they 290 * exist on the state list, appr_unload is called after fr_stateunload 291 * and the proxies actually are responsible for them being created. 292 * Should the proxy timeouts have their own list? There's no real 293 * justification as this is the only complicationA 294 */ 295 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 296 ifqnext = ifq->ifq_next; 297 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 298 (fr_deletetimeoutqueue(ifq) == 0)) 299 fr_freetimeoutqueue(ifq, ifs); 300 } 301 302 ifs->ifs_ips_stats.iss_inuse = 0; 303 ifs->ifs_ips_num = 0; 304 305 if (ifs->ifs_fr_state_init == 1) { 306 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 307 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 308 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 309 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 313 } 314 315 if (ifs->ifs_ips_table != NULL) { 316 KFREES(ifs->ifs_ips_table, 317 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 318 ifs->ifs_ips_table = NULL; 319 } 320 321 if (ifs->ifs_ips_seed != NULL) { 322 KFREES(ifs->ifs_ips_seed, 323 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 324 ifs->ifs_ips_seed = NULL; 325 } 326 327 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 328 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 329 ifs->ifs_fr_statesize * sizeof(u_long)); 330 ifs->ifs_ips_stats.iss_bucketlen = NULL; 331 } 332 333 if (ifs->ifs_fr_state_maxbucket_reset == 1) 334 ifs->ifs_fr_state_maxbucket = 0; 335 336 if (ifs->ifs_fr_state_init == 1) { 337 ifs->ifs_fr_state_init = 0; 338 RW_DESTROY(&ifs->ifs_ipf_state); 339 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 340 } 341 } 342 343 344 /* ------------------------------------------------------------------------ */ 345 /* Function: fr_statetstats */ 346 /* Returns: ips_state_t* - pointer to state stats structure */ 347 /* Parameters: Nil */ 348 /* */ 349 /* Put all the current numbers and pointers into a single struct and return */ 350 /* a pointer to it. */ 351 /* ------------------------------------------------------------------------ */ 352 static ips_stat_t *fr_statetstats(ifs) 353 ipf_stack_t *ifs; 354 { 355 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 356 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 357 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 358 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 359 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 360 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 361 return &ifs->ifs_ips_stats; 362 } 363 364 /* ------------------------------------------------------------------------ */ 365 /* Function: fr_state_remove */ 366 /* Returns: int - 0 == success, != 0 == failure */ 367 /* Parameters: data(I) - pointer to state structure to delete from table */ 368 /* ifs - ipf stack instance */ 369 /* */ 370 /* Search for a state structure that matches the one passed, according to */ 371 /* the IP addresses and other protocol specific information. */ 372 /* ------------------------------------------------------------------------ */ 373 static int fr_state_remove(data, ifs) 374 caddr_t data; 375 ipf_stack_t *ifs; 376 { 377 ipstate_t *sp, st; 378 int error; 379 380 sp = &st; 381 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 382 if (error) 383 return EFAULT; 384 385 WRITE_ENTER(&ifs->ifs_ipf_state); 386 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 387 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 388 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 389 sizeof(st.is_src)) && 390 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 391 sizeof(st.is_dst)) && 392 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 393 sizeof(st.is_ps))) { 394 (void) fr_delstate(sp, ISL_REMOVE, ifs); 395 RWLOCK_EXIT(&ifs->ifs_ipf_state); 396 return 0; 397 } 398 RWLOCK_EXIT(&ifs->ifs_ipf_state); 399 return ESRCH; 400 } 401 402 403 /* ------------------------------------------------------------------------ */ 404 /* Function: fr_state_ioctl */ 405 /* Returns: int - 0 == success, != 0 == failure */ 406 /* Parameters: data(I) - pointer to ioctl data */ 407 /* cmd(I) - ioctl command integer */ 408 /* mode(I) - file mode bits used with open */ 409 /* uid(I) - uid of caller */ 410 /* ctx(I) - pointer to give the uid context */ 411 /* ifs - ipf stack instance */ 412 /* */ 413 /* Processes an ioctl call made to operate on the IP Filter state device. */ 414 /* ------------------------------------------------------------------------ */ 415 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 416 caddr_t data; 417 ioctlcmd_t cmd; 418 int mode, uid; 419 void *ctx; 420 ipf_stack_t *ifs; 421 { 422 int arg, ret, error = 0; 423 424 switch (cmd) 425 { 426 /* 427 * Delete an entry from the state table. 428 */ 429 case SIOCDELST : 430 error = fr_state_remove(data, ifs); 431 break; 432 /* 433 * Flush the state table 434 */ 435 case SIOCIPFFL : 436 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 437 if (error != 0) { 438 error = EFAULT; 439 } else { 440 if (VALID_TABLE_FLUSH_OPT(arg)) { 441 WRITE_ENTER(&ifs->ifs_ipf_state); 442 ret = fr_state_flush(arg, 4, ifs); 443 RWLOCK_EXIT(&ifs->ifs_ipf_state); 444 error = BCOPYOUT((char *)&ret, data, 445 sizeof(ret)); 446 if (error != 0) 447 return EFAULT; 448 } else { 449 error = EINVAL; 450 } 451 } 452 break; 453 454 #ifdef USE_INET6 455 case SIOCIPFL6 : 456 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 457 if (error != 0) { 458 error = EFAULT; 459 } else { 460 if (VALID_TABLE_FLUSH_OPT(arg)) { 461 WRITE_ENTER(&ifs->ifs_ipf_state); 462 ret = fr_state_flush(arg, 6, ifs); 463 RWLOCK_EXIT(&ifs->ifs_ipf_state); 464 error = BCOPYOUT((char *)&ret, data, 465 sizeof(ret)); 466 if (error != 0) 467 return EFAULT; 468 } else { 469 error = EINVAL; 470 } 471 } 472 break; 473 #endif 474 #ifdef IPFILTER_LOG 475 /* 476 * Flush the state log. 477 */ 478 case SIOCIPFFB : 479 if (!(mode & FWRITE)) 480 error = EPERM; 481 else { 482 int tmp; 483 484 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 485 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 486 if (error != 0) 487 error = EFAULT; 488 } 489 break; 490 /* 491 * Turn logging of state information on/off. 492 */ 493 case SIOCSETLG : 494 if (!(mode & FWRITE)) { 495 error = EPERM; 496 } else { 497 error = BCOPYIN((char *)data, 498 (char *)&ifs->ifs_ipstate_logging, 499 sizeof(ifs->ifs_ipstate_logging)); 500 if (error != 0) 501 error = EFAULT; 502 } 503 break; 504 /* 505 * Return the current state of logging. 506 */ 507 case SIOCGETLG : 508 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 509 (char *)data, 510 sizeof(ifs->ifs_ipstate_logging)); 511 if (error != 0) 512 error = EFAULT; 513 break; 514 /* 515 * Return the number of bytes currently waiting to be read. 516 */ 517 case FIONREAD : 518 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 519 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 520 if (error != 0) 521 error = EFAULT; 522 break; 523 #endif 524 /* 525 * Get the current state statistics. 526 */ 527 case SIOCGETFS : 528 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 529 break; 530 /* 531 * Lock/Unlock the state table. (Locking prevents any changes, which 532 * means no packets match). 533 */ 534 case SIOCSTLCK : 535 if (!(mode & FWRITE)) { 536 error = EPERM; 537 } else { 538 error = fr_lock(data, &ifs->ifs_fr_state_lock); 539 } 540 break; 541 /* 542 * Add an entry to the current state table. 543 */ 544 case SIOCSTPUT : 545 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 546 error = EACCES; 547 break; 548 } 549 error = fr_stputent(data, ifs); 550 break; 551 /* 552 * Get a state table entry. 553 */ 554 case SIOCSTGET : 555 if (!ifs->ifs_fr_state_lock) { 556 error = EACCES; 557 break; 558 } 559 error = fr_stgetent(data, ifs); 560 break; 561 562 case SIOCGENITER : 563 { 564 ipftoken_t *token; 565 ipfgeniter_t iter; 566 567 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 568 if (error != 0) 569 break; 570 571 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 572 if (token != NULL) 573 error = fr_stateiter(token, &iter, ifs); 574 else 575 error = ESRCH; 576 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 577 break; 578 } 579 580 case SIOCIPFDELTOK : 581 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 582 if (error != 0) { 583 error = EFAULT; 584 } else { 585 error = ipf_deltoken(arg, uid, ctx, ifs); 586 } 587 break; 588 589 default : 590 error = EINVAL; 591 break; 592 } 593 return error; 594 } 595 596 597 /* ------------------------------------------------------------------------ */ 598 /* Function: fr_stgetent */ 599 /* Returns: int - 0 == success, != 0 == failure */ 600 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 601 /* */ 602 /* Copy out state information from the kernel to a user space process. If */ 603 /* there is a filter rule associated with the state entry, copy that out */ 604 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 605 /* the struct passed in and if not null and not found in the list of current*/ 606 /* state entries, the retrieval fails. */ 607 /* ------------------------------------------------------------------------ */ 608 int fr_stgetent(data, ifs) 609 caddr_t data; 610 ipf_stack_t *ifs; 611 { 612 ipstate_t *is, *isn; 613 ipstate_save_t ips; 614 int error; 615 616 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 617 if (error) 618 return EFAULT; 619 620 isn = ips.ips_next; 621 if (isn == NULL) { 622 isn = ifs->ifs_ips_list; 623 if (isn == NULL) { 624 if (ips.ips_next == NULL) 625 return ENOENT; 626 return 0; 627 } 628 } else { 629 /* 630 * Make sure the pointer we're copying from exists in the 631 * current list of entries. Security precaution to prevent 632 * copying of random kernel data. 633 */ 634 for (is = ifs->ifs_ips_list; is; is = is->is_next) 635 if (is == isn) 636 break; 637 if (!is) 638 return ESRCH; 639 } 640 ips.ips_next = isn->is_next; 641 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 642 ips.ips_rule = isn->is_rule; 643 if (isn->is_rule != NULL) 644 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 645 sizeof(ips.ips_fr)); 646 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 647 if (error) 648 return EFAULT; 649 return 0; 650 } 651 652 653 /* ------------------------------------------------------------------------ */ 654 /* Function: fr_stputent */ 655 /* Returns: int - 0 == success, != 0 == failure */ 656 /* Parameters: data(I) - pointer to state information struct */ 657 /* ifs - ipf stack instance */ 658 /* */ 659 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 660 /* the state table. If the state info. includes a pointer to a filter rule */ 661 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 662 /* output. */ 663 /* ------------------------------------------------------------------------ */ 664 int fr_stputent(data, ifs) 665 caddr_t data; 666 ipf_stack_t *ifs; 667 { 668 ipstate_t *is, *isn; 669 ipstate_save_t ips; 670 int error, i; 671 frentry_t *fr; 672 char *name; 673 674 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 675 if (error) 676 return EFAULT; 677 678 /* 679 * Trigger automatic call to fr_state_flush() if the 680 * table has reached capacity specified by hi watermark. 681 */ 682 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 683 ifs->ifs_fr_state_doflush = 1; 684 685 /* 686 * If automatic flushing did not do its job, and the table 687 * has filled up, don't try to create a new entry. 688 */ 689 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 690 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 691 return ENOMEM; 692 } 693 694 KMALLOC(isn, ipstate_t *); 695 if (isn == NULL) 696 return ENOMEM; 697 698 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 699 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 700 isn->is_sti.tqe_pnext = NULL; 701 isn->is_sti.tqe_next = NULL; 702 isn->is_sti.tqe_ifq = NULL; 703 isn->is_sti.tqe_parent = isn; 704 isn->is_ifp[0] = NULL; 705 isn->is_ifp[1] = NULL; 706 isn->is_ifp[2] = NULL; 707 isn->is_ifp[3] = NULL; 708 isn->is_sync = NULL; 709 fr = ips.ips_rule; 710 711 if (fr == NULL) { 712 READ_ENTER(&ifs->ifs_ipf_state); 713 fr_stinsert(isn, 0, ifs); 714 MUTEX_EXIT(&isn->is_lock); 715 RWLOCK_EXIT(&ifs->ifs_ipf_state); 716 return 0; 717 } 718 719 if (isn->is_flags & SI_NEWFR) { 720 KMALLOC(fr, frentry_t *); 721 if (fr == NULL) { 722 KFREE(isn); 723 return ENOMEM; 724 } 725 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 726 isn->is_rule = fr; 727 ips.ips_is.is_rule = fr; 728 MUTEX_NUKE(&fr->fr_lock); 729 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 730 731 /* 732 * Look up all the interface names in the rule. 733 */ 734 for (i = 0; i < 4; i++) { 735 name = fr->fr_ifnames[i]; 736 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 737 name = isn->is_ifname[i]; 738 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 739 } 740 741 fr->fr_ref = 0; 742 fr->fr_dsize = 0; 743 fr->fr_data = NULL; 744 fr->fr_type = FR_T_NONE; 745 746 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 747 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 748 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 749 750 /* 751 * send a copy back to userland of what we ended up 752 * to allow for verification. 753 */ 754 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 755 if (error) { 756 KFREE(isn); 757 MUTEX_DESTROY(&fr->fr_lock); 758 KFREE(fr); 759 return EFAULT; 760 } 761 READ_ENTER(&ifs->ifs_ipf_state); 762 fr_stinsert(isn, 0, ifs); 763 MUTEX_EXIT(&isn->is_lock); 764 RWLOCK_EXIT(&ifs->ifs_ipf_state); 765 766 } else { 767 READ_ENTER(&ifs->ifs_ipf_state); 768 for (is = ifs->ifs_ips_list; is; is = is->is_next) 769 if (is->is_rule == fr) { 770 fr_stinsert(isn, 0, ifs); 771 MUTEX_EXIT(&isn->is_lock); 772 break; 773 } 774 775 if (is == NULL) { 776 KFREE(isn); 777 isn = NULL; 778 } 779 RWLOCK_EXIT(&ifs->ifs_ipf_state); 780 781 return (isn == NULL) ? ESRCH : 0; 782 } 783 784 return 0; 785 } 786 787 788 /* ------------------------------------------------------------------------ */ 789 /* Function: fr_stinsert */ 790 /* Returns: Nil */ 791 /* Parameters: is(I) - pointer to state structure */ 792 /* rev(I) - flag indicating forward/reverse direction of packet */ 793 /* */ 794 /* Inserts a state structure into the hash table (for lookups) and the list */ 795 /* of state entries (for enumeration). Resolves all of the interface names */ 796 /* to pointers and adjusts running stats for the hash table as appropriate. */ 797 /* */ 798 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 799 /* Exits with is_lock initialised and held. */ 800 /* ------------------------------------------------------------------------ */ 801 void fr_stinsert(is, rev, ifs) 802 ipstate_t *is; 803 int rev; 804 ipf_stack_t *ifs; 805 { 806 frentry_t *fr; 807 u_int hv; 808 int i; 809 810 MUTEX_INIT(&is->is_lock, "ipf state entry"); 811 812 fr = is->is_rule; 813 if (fr != NULL) { 814 MUTEX_ENTER(&fr->fr_lock); 815 fr->fr_ref++; 816 fr->fr_statecnt++; 817 MUTEX_EXIT(&fr->fr_lock); 818 } 819 820 /* 821 * Look up all the interface names in the state entry. 822 */ 823 for (i = 0; i < 4; i++) { 824 if (is->is_ifp[i] != NULL) 825 continue; 826 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 827 } 828 829 /* 830 * If we could trust is_hv, then the modulous would not be needed, but 831 * when running with IPFILTER_SYNC, this stops bad values. 832 */ 833 hv = is->is_hv % ifs->ifs_fr_statesize; 834 is->is_hv = hv; 835 836 /* 837 * We need to get both of these locks...the first because it is 838 * possible that once the insert is complete another packet might 839 * come along, match the entry and want to update it. 840 */ 841 MUTEX_ENTER(&is->is_lock); 842 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 843 844 /* 845 * add into list table. 846 */ 847 if (ifs->ifs_ips_list != NULL) 848 ifs->ifs_ips_list->is_pnext = &is->is_next; 849 is->is_pnext = &ifs->ifs_ips_list; 850 is->is_next = ifs->ifs_ips_list; 851 ifs->ifs_ips_list = is; 852 853 if (ifs->ifs_ips_table[hv] != NULL) 854 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 855 else 856 ifs->ifs_ips_stats.iss_inuse++; 857 is->is_phnext = ifs->ifs_ips_table + hv; 858 is->is_hnext = ifs->ifs_ips_table[hv]; 859 ifs->ifs_ips_table[hv] = is; 860 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 861 ifs->ifs_ips_num++; 862 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 863 864 fr_setstatequeue(is, rev, ifs); 865 } 866 867 /* ------------------------------------------------------------------------ */ 868 /* Function: fr_match_ipv4addrs */ 869 /* Returns: int - 2 strong match (same addresses, same direction) */ 870 /* 1 weak match (same address, opposite direction) */ 871 /* 0 no match */ 872 /* */ 873 /* Function matches IPv4 addresses. */ 874 /* ------------------------------------------------------------------------ */ 875 static int fr_match_ipv4addrs(is1, is2) 876 ipstate_t *is1; 877 ipstate_t *is2; 878 { 879 int rv; 880 881 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 882 rv = 2; 883 else if (is1->is_saddr == is2->is_daddr && 884 is1->is_daddr == is2->is_saddr) 885 rv = 1; 886 else 887 rv = 0; 888 889 return (rv); 890 } 891 892 /* ------------------------------------------------------------------------ */ 893 /* Function: fr_match_ipv6addrs */ 894 /* Returns: int - 2 strong match (same addresses, same direction) */ 895 /* 1 weak match (same addresses, opposite direction) */ 896 /* 0 no match */ 897 /* */ 898 /* Function matches IPv6 addresses. */ 899 /* ------------------------------------------------------------------------ */ 900 static int fr_match_ipv6addrs(is1, is2) 901 ipstate_t *is1; 902 ipstate_t *is2; 903 { 904 int rv; 905 906 if (IP6_EQ(&is1->is_src, &is2->is_src) && 907 IP6_EQ(&is1->is_dst, &is2->is_dst)) 908 rv = 2; 909 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 910 IP6_EQ(&is1->is_dst, &is2->is_src)) { 911 rv = 1; 912 } 913 else 914 rv = 0; 915 916 return (rv); 917 } 918 /* ------------------------------------------------------------------------ */ 919 /* Function: fr_match_addresses */ 920 /* Returns: int - 2 strong match (same addresses, same direction) */ 921 /* 1 weak match (same address, opposite directions) */ 922 /* 0 no match */ 923 /* Parameters: is1, is2 pointers to states we are checking */ 924 /* */ 925 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 926 /* and IPv6 address format. */ 927 /* ------------------------------------------------------------------------ */ 928 static int fr_match_addresses(is1, is2) 929 ipstate_t *is1; 930 ipstate_t *is2; 931 { 932 int rv; 933 934 if (is1->is_v == 4) { 935 rv = fr_match_ipv4addrs(is1, is2); 936 } else { 937 rv = fr_match_ipv6addrs(is1, is2); 938 } 939 940 return (rv); 941 } 942 943 /* ------------------------------------------------------------------------ */ 944 /* Function: fr_match_ppairs */ 945 /* Returns: int - 2 strong match (same ports, same direction) */ 946 /* 1 weak match (same ports, different direction) */ 947 /* 0 no match */ 948 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 949 /* */ 950 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 951 /* src, dst port, which belong to session (state entry). */ 952 /* ------------------------------------------------------------------------ */ 953 static int fr_match_ppairs(ppairs1, ppairs2) 954 port_pair_t *ppairs1; 955 port_pair_t *ppairs2; 956 { 957 int rv; 958 959 if (ppairs1->pp_sport == ppairs2->pp_sport && 960 ppairs1->pp_dport == ppairs2->pp_dport) 961 rv = 2; 962 else if (ppairs1->pp_sport == ppairs2->pp_dport && 963 ppairs1->pp_dport == ppairs2->pp_sport) 964 rv = 1; 965 else 966 rv = 0; 967 968 return (rv); 969 } 970 971 /* ------------------------------------------------------------------------ */ 972 /* Function: fr_match_l4_hdr */ 973 /* Returns: int - 0 no match, */ 974 /* 1 weak match (same ports, different directions) */ 975 /* 2 strong match (same ports, same direction) */ 976 /* Parameters is1, is2 - states we want to match */ 977 /* */ 978 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 979 /* GRE protocol). */ 980 /* ------------------------------------------------------------------------ */ 981 static int fr_match_l4_hdr(is1, is2) 982 ipstate_t *is1; 983 ipstate_t *is2; 984 { 985 int rv = 0; 986 port_pair_t pp1; 987 port_pair_t pp2; 988 989 if (is1->is_p != is2->is_p) 990 return (0); 991 992 switch (is1->is_p) { 993 case IPPROTO_TCP: 994 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 995 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 996 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 997 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 998 rv = fr_match_ppairs(&pp1, &pp2); 999 break; 1000 case IPPROTO_UDP: 1001 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1002 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1003 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1004 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1005 rv = fr_match_ppairs(&pp1, &pp2); 1006 break; 1007 case IPPROTO_GRE: 1008 /* greinfo_t can be also interprted as port pair */ 1009 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1010 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1011 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1012 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1013 rv = fr_match_ppairs(&pp1, &pp2); 1014 break; 1015 case IPPROTO_ICMP: 1016 case IPPROTO_ICMPV6: 1017 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1018 rv = 1; 1019 else 1020 rv = 0; 1021 break; 1022 default: 1023 rv = 0; 1024 } 1025 1026 return (rv); 1027 } 1028 1029 /* ------------------------------------------------------------------------ */ 1030 /* Function: fr_matchstates */ 1031 /* Returns: int - nonzero match, zero no match */ 1032 /* Parameters is1, is2 - states we want to match */ 1033 /* */ 1034 /* The state entries are equal (identical match) if they belong to the same */ 1035 /* session. Any time new state entry is being added the fr_addstate() */ 1036 /* function creates temporal state entry from the data it gets from IP and */ 1037 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1038 /* which is also stored within the state entry. We should keep in mind the */ 1039 /* information about packet direction is spread accross L3 (addresses) and */ 1040 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1041 /* - no match (match(is1, is2) == 0)) */ 1042 /* - weak match same addresses (ports), but different */ 1043 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1044 /* - strong match same addresses (ports) and same directions */ 1045 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1046 /* */ 1047 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1048 /* and functions, which are used to compare ports (L4 header) data. We say */ 1049 /* the is1 and is2 are same (identical) if there is a match */ 1050 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1051 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1052 /* Such requirement deals with case as follows: */ 1053 /* suppose there are two connections between hosts A, B. Connection 1: */ 1054 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1055 /* Connection 2: */ 1056 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1057 /* since we've introduced match levels into our fr_matchstates(), we are */ 1058 /* able to identify, which packets belong to connection A and which belong */ 1059 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1060 /* from con. 1 packet, which travelled from A to B: */ 1061 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1062 /* while s2, has been created from packet which belongs to con. 2 and is */ 1063 /* also coming from A to B: */ 1064 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1065 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1066 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1067 /* different the state entries are not identical -> no match as a final */ 1068 /* result. */ 1069 /* ------------------------------------------------------------------------ */ 1070 static int fr_matchstates(is1, is2) 1071 ipstate_t *is1; 1072 ipstate_t *is2; 1073 { 1074 int rv; 1075 int amatch; 1076 int pmatch; 1077 1078 if (bcmp(&is1->is_pass, &is2->is_pass, 1079 offsetof(struct ipstate, is_ps) - 1080 offsetof(struct ipstate, is_pass)) == 0) { 1081 1082 pmatch = fr_match_l4_hdr(is1, is2); 1083 amatch = fr_match_addresses(is1, is2); 1084 /* 1085 * If addresses match (amatch != 0), then 'match levels' 1086 * must be same for matching entries. If amatch and pmatch 1087 * have different values (different match levels), then 1088 * is1 and is2 belong to different sessions. 1089 */ 1090 rv = (amatch != 0) && (amatch == pmatch); 1091 } 1092 else 1093 rv = 0; 1094 1095 return (rv); 1096 } 1097 1098 /* ------------------------------------------------------------------------ */ 1099 /* Function: fr_addstate */ 1100 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1101 /* Parameters: fin(I) - pointer to packet information */ 1102 /* stsave(O) - pointer to place to save pointer to created */ 1103 /* state structure. */ 1104 /* flags(I) - flags to use when creating the structure */ 1105 /* */ 1106 /* Creates a new IP state structure from the packet information collected. */ 1107 /* Inserts it into the state table and appends to the bottom of the active */ 1108 /* list. If the capacity of the table has reached the maximum allowed then */ 1109 /* the call will fail and a flush is scheduled for the next timeout call. */ 1110 /* ------------------------------------------------------------------------ */ 1111 ipstate_t *fr_addstate(fin, stsave, flags) 1112 fr_info_t *fin; 1113 ipstate_t **stsave; 1114 u_int flags; 1115 { 1116 ipstate_t *is, ips; 1117 struct icmp *ic; 1118 u_int pass, hv; 1119 frentry_t *fr; 1120 tcphdr_t *tcp; 1121 grehdr_t *gre; 1122 void *ifp; 1123 int out; 1124 ipf_stack_t *ifs = fin->fin_ifs; 1125 1126 if (ifs->ifs_fr_state_lock || 1127 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1128 return NULL; 1129 1130 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1131 return NULL; 1132 1133 /* 1134 * Trigger automatic call to fr_state_flush() if the 1135 * table has reached capacity specified by hi watermark. 1136 */ 1137 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1138 ifs->ifs_fr_state_doflush = 1; 1139 1140 /* 1141 * If the max number of state entries has been reached, and there is no 1142 * limit on the state count for the rule, then do not continue. In the 1143 * case where a limit exists, it's ok allow the entries to be created as 1144 * long as specified limit itself has not been reached. 1145 * 1146 * Note that because the lock isn't held on fr, it is possible to exceed 1147 * the specified size of the table. However, the cost of this is being 1148 * ignored here; as the number by which it can go over is a product of 1149 * the number of simultaneous threads that could be executing in here. 1150 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1151 * 1152 * Also note that, since the automatic flush should have been triggered 1153 * well before we reach the maximum number of state table entries, the 1154 * likelihood of reaching the max (and thus exceedng it) is minimal. 1155 */ 1156 fr = fin->fin_fr; 1157 if (fr != NULL) { 1158 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1159 (fr->fr_statemax == 0)) { 1160 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1161 return NULL; 1162 } 1163 if ((fr->fr_statemax != 0) && 1164 (fr->fr_statecnt >= fr->fr_statemax)) { 1165 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1166 ifs->ifs_fr_state_doflush = 1; 1167 return NULL; 1168 } 1169 } 1170 1171 ic = NULL; 1172 tcp = NULL; 1173 out = fin->fin_out; 1174 is = &ips; 1175 bzero((char *)is, sizeof(*is)); 1176 1177 if (fr == NULL) { 1178 pass = ifs->ifs_fr_flags; 1179 is->is_tag = FR_NOLOGTAG; 1180 } else { 1181 pass = fr->fr_flags; 1182 } 1183 1184 is->is_die = 1 + ifs->ifs_fr_ticks; 1185 /* 1186 * We want to check everything that is a property of this packet, 1187 * but we don't (automatically) care about it's fragment status as 1188 * this may change. 1189 */ 1190 is->is_pass = pass; 1191 is->is_v = fin->fin_v; 1192 is->is_opt[0] = fin->fin_optmsk; 1193 is->is_optmsk[0] = 0xffffffff; 1194 /* 1195 * The reverse direction option mask will be set in fr_matchsrcdst(), 1196 * when we will see the first packet from the peer. We will leave it 1197 * as zero for now. 1198 */ 1199 is->is_optmsk[1] = 0x0; 1200 1201 if (is->is_v == 6) { 1202 is->is_opt[0] &= ~0x8; 1203 is->is_optmsk[0] &= ~0x8; 1204 } 1205 is->is_sec = fin->fin_secmsk; 1206 is->is_secmsk = 0xffff; 1207 is->is_auth = fin->fin_auth; 1208 is->is_authmsk = 0xffff; 1209 1210 /* 1211 * Copy and calculate... 1212 */ 1213 hv = (is->is_p = fin->fin_fi.fi_p); 1214 is->is_src = fin->fin_fi.fi_src; 1215 hv += is->is_saddr; 1216 is->is_dst = fin->fin_fi.fi_dst; 1217 hv += is->is_daddr; 1218 #ifdef USE_INET6 1219 if (fin->fin_v == 6) { 1220 /* 1221 * For ICMPv6, we check to see if the destination address is 1222 * a multicast address. If it is, do not include it in the 1223 * calculation of the hash because the correct reply will come 1224 * back from a real address, not a multicast address. 1225 */ 1226 if ((is->is_p == IPPROTO_ICMPV6) && 1227 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1228 /* 1229 * So you can do keep state with neighbour discovery. 1230 * 1231 * Here we could use the address from the neighbour 1232 * solicit message to put in the state structure and 1233 * we could use that without a wildcard flag too... 1234 */ 1235 is->is_flags |= SI_W_DADDR; 1236 hv -= is->is_daddr; 1237 } else { 1238 hv += is->is_dst.i6[1]; 1239 hv += is->is_dst.i6[2]; 1240 hv += is->is_dst.i6[3]; 1241 } 1242 hv += is->is_src.i6[1]; 1243 hv += is->is_src.i6[2]; 1244 hv += is->is_src.i6[3]; 1245 } 1246 #endif 1247 if ((fin->fin_v == 4) && 1248 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1249 if (fin->fin_out == 0) { 1250 flags |= SI_W_DADDR|SI_CLONE; 1251 hv -= is->is_daddr; 1252 } else { 1253 flags |= SI_W_SADDR|SI_CLONE; 1254 hv -= is->is_saddr; 1255 } 1256 } 1257 1258 switch (is->is_p) 1259 { 1260 #ifdef USE_INET6 1261 case IPPROTO_ICMPV6 : 1262 ic = fin->fin_dp; 1263 1264 switch (ic->icmp_type) 1265 { 1266 case ICMP6_ECHO_REQUEST : 1267 is->is_icmp.ici_type = ic->icmp_type; 1268 hv += (is->is_icmp.ici_id = ic->icmp_id); 1269 break; 1270 case ICMP6_MEMBERSHIP_QUERY : 1271 case ND_ROUTER_SOLICIT : 1272 case ND_NEIGHBOR_SOLICIT : 1273 case ICMP6_NI_QUERY : 1274 is->is_icmp.ici_type = ic->icmp_type; 1275 break; 1276 default : 1277 return NULL; 1278 } 1279 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1280 break; 1281 #endif 1282 case IPPROTO_ICMP : 1283 ic = fin->fin_dp; 1284 1285 switch (ic->icmp_type) 1286 { 1287 case ICMP_ECHO : 1288 case ICMP_TSTAMP : 1289 case ICMP_IREQ : 1290 case ICMP_MASKREQ : 1291 is->is_icmp.ici_type = ic->icmp_type; 1292 hv += (is->is_icmp.ici_id = ic->icmp_id); 1293 break; 1294 default : 1295 return NULL; 1296 } 1297 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1298 break; 1299 1300 case IPPROTO_GRE : 1301 gre = fin->fin_dp; 1302 1303 is->is_gre.gs_flags = gre->gr_flags; 1304 is->is_gre.gs_ptype = gre->gr_ptype; 1305 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1306 is->is_call[0] = fin->fin_data[0]; 1307 is->is_call[1] = fin->fin_data[1]; 1308 } 1309 break; 1310 1311 case IPPROTO_TCP : 1312 tcp = fin->fin_dp; 1313 1314 if (tcp->th_flags & TH_RST) 1315 return NULL; 1316 /* 1317 * The endian of the ports doesn't matter, but the ack and 1318 * sequence numbers do as we do mathematics on them later. 1319 */ 1320 is->is_sport = htons(fin->fin_data[0]); 1321 is->is_dport = htons(fin->fin_data[1]); 1322 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1323 hv += is->is_sport; 1324 hv += is->is_dport; 1325 } 1326 1327 /* 1328 * If this is a real packet then initialise fields in the 1329 * state information structure from the TCP header information. 1330 */ 1331 1332 is->is_maxdwin = 1; 1333 is->is_maxswin = ntohs(tcp->th_win); 1334 if (is->is_maxswin == 0) 1335 is->is_maxswin = 1; 1336 1337 if ((fin->fin_flx & FI_IGNORE) == 0) { 1338 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1339 (TCP_OFF(tcp) << 2) + 1340 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1341 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1342 is->is_maxsend = is->is_send; 1343 1344 /* 1345 * Window scale option is only present in 1346 * SYN/SYN-ACK packet. 1347 */ 1348 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1349 TH_SYN && 1350 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1351 if (fr_tcpoptions(fin, tcp, 1352 &is->is_tcp.ts_data[0]) == -1) { 1353 fin->fin_flx |= FI_BAD; 1354 } 1355 } 1356 1357 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1358 fr_checknewisn(fin, is); 1359 fr_fixoutisn(fin, is); 1360 } 1361 1362 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1363 flags |= IS_TCPFSM; 1364 else { 1365 is->is_maxdwin = is->is_maxswin * 2; 1366 is->is_dend = ntohl(tcp->th_ack); 1367 is->is_maxdend = ntohl(tcp->th_ack); 1368 is->is_maxdwin *= 2; 1369 } 1370 } 1371 1372 /* 1373 * If we're creating state for a starting connection, start the 1374 * timer on it as we'll never see an error if it fails to 1375 * connect. 1376 */ 1377 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1378 break; 1379 1380 case IPPROTO_UDP : 1381 tcp = fin->fin_dp; 1382 1383 is->is_sport = htons(fin->fin_data[0]); 1384 is->is_dport = htons(fin->fin_data[1]); 1385 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1386 hv += tcp->th_dport; 1387 hv += tcp->th_sport; 1388 } 1389 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1390 break; 1391 1392 default : 1393 break; 1394 } 1395 hv = DOUBLE_HASH(hv, ifs); 1396 is->is_hv = hv; 1397 is->is_rule = fr; 1398 is->is_flags = flags & IS_INHERITED; 1399 1400 /* 1401 * Look for identical state. 1402 */ 1403 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1404 is != NULL; 1405 is = is->is_hnext) { 1406 if (fr_matchstates(&ips, is) == 1) 1407 break; 1408 } 1409 1410 /* 1411 * we've found a matching state -> state already exists, 1412 * we are not going to add a duplicate record. 1413 */ 1414 if (is != NULL) 1415 return NULL; 1416 1417 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1418 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1419 return NULL; 1420 } 1421 KMALLOC(is, ipstate_t *); 1422 if (is == NULL) { 1423 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1424 return NULL; 1425 } 1426 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1427 /* 1428 * Do not do the modulous here, it is done in fr_stinsert(). 1429 */ 1430 if (fr != NULL) { 1431 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1432 if (fr->fr_age[0] != 0) { 1433 is->is_tqehead[0] = 1434 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1435 fr->fr_age[0], ifs); 1436 is->is_sti.tqe_flags |= TQE_RULEBASED; 1437 } 1438 if (fr->fr_age[1] != 0) { 1439 is->is_tqehead[1] = 1440 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1441 fr->fr_age[1], ifs); 1442 is->is_sti.tqe_flags |= TQE_RULEBASED; 1443 } 1444 is->is_tag = fr->fr_logtag; 1445 1446 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1447 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1448 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1449 1450 if (((ifp = fr->fr_ifas[1]) != NULL) && 1451 (ifp != (void *)-1)) { 1452 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1453 } 1454 if (((ifp = fr->fr_ifas[2]) != NULL) && 1455 (ifp != (void *)-1)) { 1456 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1457 } 1458 if (((ifp = fr->fr_ifas[3]) != NULL) && 1459 (ifp != (void *)-1)) { 1460 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1461 } 1462 } 1463 1464 is->is_ifp[out << 1] = fin->fin_ifp; 1465 if (fin->fin_ifp != NULL) { 1466 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1467 } 1468 1469 is->is_ref = 1; 1470 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1471 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1472 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1473 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1474 if ((fin->fin_flx & FI_IGNORE) == 0) { 1475 is->is_pkts[out] = 1; 1476 is->is_bytes[out] = fin->fin_plen; 1477 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1478 is->is_flx[out][0] &= ~FI_OOW; 1479 } 1480 1481 if (pass & FR_STSTRICT) 1482 is->is_flags |= IS_STRICT; 1483 1484 if (pass & FR_STATESYNC) 1485 is->is_flags |= IS_STATESYNC; 1486 1487 if (flags & (SI_WILDP|SI_WILDA)) { 1488 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1489 } 1490 is->is_rulen = fin->fin_rule; 1491 1492 1493 if (pass & FR_LOGFIRST) 1494 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1495 1496 READ_ENTER(&ifs->ifs_ipf_state); 1497 is->is_me = stsave; 1498 1499 fr_stinsert(is, fin->fin_rev, ifs); 1500 1501 if (fin->fin_p == IPPROTO_TCP) { 1502 /* 1503 * If we're creating state for a starting connection, start the 1504 * timer on it as we'll never see an error if it fails to 1505 * connect. 1506 */ 1507 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1508 is->is_flags); 1509 MUTEX_EXIT(&is->is_lock); 1510 #ifdef IPFILTER_SCAN 1511 if ((is->is_flags & SI_CLONE) == 0) 1512 (void) ipsc_attachis(is); 1513 #endif 1514 } else { 1515 MUTEX_EXIT(&is->is_lock); 1516 } 1517 #ifdef IPFILTER_SYNC 1518 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1519 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1520 #endif 1521 if (ifs->ifs_ipstate_logging) 1522 ipstate_log(is, ISL_NEW, ifs); 1523 1524 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1525 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1526 fin->fin_flx |= FI_STATE; 1527 if (fin->fin_flx & FI_FRAG) 1528 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1529 1530 return is; 1531 } 1532 1533 1534 /* ------------------------------------------------------------------------ */ 1535 /* Function: fr_tcpoptions */ 1536 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1537 /* Parameters: fin(I) - pointer to packet information */ 1538 /* tcp(I) - pointer to TCP packet header */ 1539 /* td(I) - pointer to TCP data held as part of the state */ 1540 /* */ 1541 /* Look after the TCP header for any options and deal with those that are */ 1542 /* present. Record details about those that we recogise. */ 1543 /* ------------------------------------------------------------------------ */ 1544 static int fr_tcpoptions(fin, tcp, td) 1545 fr_info_t *fin; 1546 tcphdr_t *tcp; 1547 tcpdata_t *td; 1548 { 1549 int off, mlen, ol, i, len, retval; 1550 char buf[64], *s, opt; 1551 mb_t *m = NULL; 1552 1553 len = (TCP_OFF(tcp) << 2); 1554 if (fin->fin_dlen < len) 1555 return 0; 1556 len -= sizeof(*tcp); 1557 1558 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1559 1560 m = fin->fin_m; 1561 mlen = MSGDSIZE(m) - off; 1562 if (len > mlen) { 1563 len = mlen; 1564 retval = 0; 1565 } else { 1566 retval = 1; 1567 } 1568 1569 COPYDATA(m, off, len, buf); 1570 1571 for (s = buf; len > 0; ) { 1572 opt = *s; 1573 if (opt == TCPOPT_EOL) 1574 break; 1575 else if (opt == TCPOPT_NOP) 1576 ol = 1; 1577 else { 1578 if (len < 2) 1579 break; 1580 ol = (int)*(s + 1); 1581 if (ol < 2 || ol > len) 1582 break; 1583 1584 /* 1585 * Extract the TCP options we are interested in out of 1586 * the header and store them in the the tcpdata struct. 1587 */ 1588 switch (opt) 1589 { 1590 case TCPOPT_WINDOW : 1591 if (ol == TCPOLEN_WINDOW) { 1592 i = (int)*(s + 2); 1593 if (i > TCP_WSCALE_MAX) 1594 i = TCP_WSCALE_MAX; 1595 else if (i < 0) 1596 i = 0; 1597 td->td_winscale = i; 1598 td->td_winflags |= TCP_WSCALE_SEEN | 1599 TCP_WSCALE_FIRST; 1600 } else 1601 retval = -1; 1602 break; 1603 case TCPOPT_MAXSEG : 1604 /* 1605 * So, if we wanted to set the TCP MAXSEG, 1606 * it should be done here... 1607 */ 1608 if (ol == TCPOLEN_MAXSEG) { 1609 i = (int)*(s + 2); 1610 i <<= 8; 1611 i += (int)*(s + 3); 1612 td->td_maxseg = i; 1613 } else 1614 retval = -1; 1615 break; 1616 case TCPOPT_SACK_PERMITTED : 1617 if (ol == TCPOLEN_SACK_PERMITTED) 1618 td->td_winflags |= TCP_SACK_PERMIT; 1619 else 1620 retval = -1; 1621 break; 1622 } 1623 } 1624 len -= ol; 1625 s += ol; 1626 } 1627 return retval; 1628 } 1629 1630 1631 /* ------------------------------------------------------------------------ */ 1632 /* Function: fr_tcpstate */ 1633 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1634 /* Parameters: fin(I) - pointer to packet information */ 1635 /* tcp(I) - pointer to TCP packet header */ 1636 /* is(I) - pointer to master state structure */ 1637 /* */ 1638 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1639 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1640 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1641 /* ------------------------------------------------------------------------ */ 1642 static int fr_tcpstate(fin, tcp, is) 1643 fr_info_t *fin; 1644 tcphdr_t *tcp; 1645 ipstate_t *is; 1646 { 1647 int source, ret = 0, flags; 1648 tcpdata_t *fdata, *tdata; 1649 ipf_stack_t *ifs = fin->fin_ifs; 1650 1651 source = !fin->fin_rev; 1652 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1653 (ntohs(is->is_sport) != fin->fin_data[0])) 1654 source = 0; 1655 fdata = &is->is_tcp.ts_data[!source]; 1656 tdata = &is->is_tcp.ts_data[source]; 1657 1658 MUTEX_ENTER(&is->is_lock); 1659 1660 /* 1661 * If a SYN packet is received for a connection that is in a half 1662 * closed state, then move its state entry to deletetq. In such case 1663 * the SYN packet will be consequently dropped. This allows new state 1664 * entry to be created with a retransmited SYN packet. 1665 */ 1666 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1667 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1668 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1669 is->is_state[source] = IPF_TCPS_CLOSED; 1670 is->is_state[!source] = IPF_TCPS_CLOSED; 1671 /* 1672 * Do not update is->is_sti.tqe_die in case state entry 1673 * is already present in deletetq. It prevents state 1674 * entry ttl update by retransmitted SYN packets, which 1675 * may arrive before timer tick kicks off. The SYN 1676 * packet will be dropped again. 1677 */ 1678 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1679 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1680 &fin->fin_ifs->ifs_ips_deletetq, 1681 fin->fin_ifs); 1682 1683 MUTEX_EXIT(&is->is_lock); 1684 return 0; 1685 } 1686 } 1687 1688 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1689 #ifdef IPFILTER_SCAN 1690 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1691 ipsc_packet(fin, is); 1692 if (FR_ISBLOCK(is->is_pass)) { 1693 MUTEX_EXIT(&is->is_lock); 1694 return 1; 1695 } 1696 } 1697 #endif 1698 1699 /* 1700 * Nearing end of connection, start timeout. 1701 */ 1702 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1703 is->is_flags); 1704 if (ret == 0) { 1705 MUTEX_EXIT(&is->is_lock); 1706 return 0; 1707 } 1708 1709 /* 1710 * set s0's as appropriate. Use syn-ack packet as it 1711 * contains both pieces of required information. 1712 */ 1713 /* 1714 * Window scale option is only present in SYN/SYN-ACK packet. 1715 * Compare with ~TH_FIN to mask out T/TCP setups. 1716 */ 1717 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1718 if (flags == (TH_SYN|TH_ACK)) { 1719 is->is_s0[source] = ntohl(tcp->th_ack); 1720 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1721 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1722 (void) fr_tcpoptions(fin, tcp, fdata); 1723 } 1724 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1725 fr_checknewisn(fin, is); 1726 } else if (flags == TH_SYN) { 1727 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1728 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1729 (void) fr_tcpoptions(fin, tcp, tdata); 1730 1731 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1732 fr_checknewisn(fin, is); 1733 1734 } 1735 ret = 1; 1736 } else 1737 fin->fin_flx |= FI_OOW; 1738 MUTEX_EXIT(&is->is_lock); 1739 return ret; 1740 } 1741 1742 1743 /* ------------------------------------------------------------------------ */ 1744 /* Function: fr_checknewisn */ 1745 /* Returns: Nil */ 1746 /* Parameters: fin(I) - pointer to packet information */ 1747 /* is(I) - pointer to master state structure */ 1748 /* */ 1749 /* Check to see if this TCP connection is expecting and needs a new */ 1750 /* sequence number for a particular direction of the connection. */ 1751 /* */ 1752 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1753 /* one ready. */ 1754 /* ------------------------------------------------------------------------ */ 1755 static void fr_checknewisn(fin, is) 1756 fr_info_t *fin; 1757 ipstate_t *is; 1758 { 1759 u_32_t sumd, old, new; 1760 tcphdr_t *tcp; 1761 int i; 1762 1763 i = fin->fin_rev; 1764 tcp = fin->fin_dp; 1765 1766 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1767 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1768 old = ntohl(tcp->th_seq); 1769 new = fr_newisn(fin); 1770 is->is_isninc[i] = new - old; 1771 CALC_SUMD(old, new, sumd); 1772 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1773 1774 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1775 } 1776 } 1777 1778 1779 /* ------------------------------------------------------------------------ */ 1780 /* Function: fr_tcpinwindow */ 1781 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1782 /* Parameters: fin(I) - pointer to packet information */ 1783 /* fdata(I) - pointer to tcp state informatio (forward) */ 1784 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1785 /* tcp(I) - pointer to TCP packet header */ 1786 /* */ 1787 /* Given a packet has matched addresses and ports, check to see if it is */ 1788 /* within the TCP data window. In a show of generosity, allow packets that */ 1789 /* are within the window space behind the current sequence # as well. */ 1790 /* ------------------------------------------------------------------------ */ 1791 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1792 fr_info_t *fin; 1793 tcpdata_t *fdata, *tdata; 1794 tcphdr_t *tcp; 1795 int flags; 1796 { 1797 tcp_seq seq, ack, end; 1798 int ackskew, tcpflags; 1799 u_32_t win, maxwin; 1800 int dsize, inseq; 1801 1802 /* 1803 * Find difference between last checked packet and this packet. 1804 */ 1805 tcpflags = tcp->th_flags; 1806 seq = ntohl(tcp->th_seq); 1807 ack = ntohl(tcp->th_ack); 1808 1809 if (tcpflags & TH_SYN) 1810 win = ntohs(tcp->th_win); 1811 else 1812 win = ntohs(tcp->th_win) << fdata->td_winscale; 1813 1814 /* 1815 * win 0 means the receiving endpoint has closed the window, because it 1816 * has not enough memory to receive data from sender. In such case we 1817 * are pretending window size to be 1 to let TCP probe data through. 1818 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1819 * state this accurately, so we have to allow 1 octet (win = 1) even if 1820 * the window is closed (win == 0). 1821 */ 1822 if (win == 0) 1823 win = 1; 1824 1825 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1826 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1827 1828 /* 1829 * if window scaling is present, the scaling is only allowed 1830 * for windows not in the first SYN packet. In that packet the 1831 * window is 65535 to specify the largest window possible 1832 * for receivers not implementing the window scale option. 1833 * Currently, we do not assume TTCP here. That means that 1834 * if we see a second packet from a host (after the initial 1835 * SYN), we can assume that the receiver of the SYN did 1836 * already send back the SYN/ACK (and thus that we know if 1837 * the receiver also does window scaling) 1838 */ 1839 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1840 fdata->td_maxwin = win; 1841 } 1842 1843 end = seq + dsize; 1844 1845 if ((fdata->td_end == 0) && 1846 (!(flags & IS_TCPFSM) || 1847 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1848 /* 1849 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1850 */ 1851 fdata->td_end = end - 1; 1852 fdata->td_maxwin = 1; 1853 fdata->td_maxend = end + win; 1854 } 1855 1856 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1857 ack = tdata->td_end; 1858 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1859 (ack == 0)) { 1860 /* gross hack to get around certain broken tcp stacks */ 1861 ack = tdata->td_end; 1862 } 1863 1864 maxwin = tdata->td_maxwin; 1865 ackskew = tdata->td_end - ack; 1866 1867 /* 1868 * Strict sequencing only allows in-order delivery. 1869 */ 1870 if ((flags & IS_STRICT) != 0) { 1871 if (seq != fdata->td_end) { 1872 DTRACE_PROBE(strict_check); 1873 return 0; 1874 } 1875 } 1876 1877 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1878 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1879 inseq = 0; 1880 DTRACE_PROBE4( 1881 dyn_params, 1882 int, dsize, 1883 int, ackskew, 1884 int, maxwin, 1885 int, win 1886 ); 1887 if ( 1888 #if defined(_KERNEL) 1889 /* 1890 * end <-> s + n 1891 * maxend <-> ack + win 1892 * this is upperbound check 1893 */ 1894 (SEQ_GE(fdata->td_maxend, end)) && 1895 /* 1896 * this is lowerbound check 1897 */ 1898 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1899 #endif 1900 /* XXX what about big packets */ 1901 #define MAXACKWINDOW 66000 1902 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1903 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1904 inseq = 1; 1905 /* 1906 * Microsoft Windows will send the next packet to the right of the 1907 * window if SACK is in use. 1908 */ 1909 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1910 (fdata->td_winflags & TCP_SACK_PERMIT) && 1911 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1912 inseq = 1; 1913 /* 1914 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1915 * response to initial SYN packet, when there is no application 1916 * listeing to on a port, where the SYN packet has came to. 1917 */ 1918 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1919 (ackskew >= -1) && (ackskew <= 1)) { 1920 inseq = 1; 1921 } else if (!(flags & IS_TCPFSM)) { 1922 1923 if (!(fdata->td_winflags & 1924 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1925 /* 1926 * No TCPFSM and no window scaling, so make some 1927 * extra guesses. 1928 */ 1929 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1930 inseq = 1; 1931 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1932 inseq = 1; 1933 } 1934 } 1935 1936 if (inseq) { 1937 /* if ackskew < 0 then this should be due to fragmented 1938 * packets. There is no way to know the length of the 1939 * total packet in advance. 1940 * We do know the total length from the fragment cache though. 1941 * Note however that there might be more sessions with 1942 * exactly the same source and destination parameters in the 1943 * state cache (and source and destination is the only stuff 1944 * that is saved in the fragment cache). Note further that 1945 * some TCP connections in the state cache are hashed with 1946 * sport and dport as well which makes it not worthwhile to 1947 * look for them. 1948 * Thus, when ackskew is negative but still seems to belong 1949 * to this session, we bump up the destinations end value. 1950 */ 1951 if (ackskew < 0) { 1952 DTRACE_PROBE2(end_update_td, 1953 int, tdata->td_end, 1954 int, ack 1955 ); 1956 tdata->td_end = ack; 1957 } 1958 1959 /* update max window seen */ 1960 if (fdata->td_maxwin < win) { 1961 DTRACE_PROBE2(win_update_fd, 1962 int, fdata->td_maxwin, 1963 int, win 1964 ); 1965 fdata->td_maxwin = win; 1966 } 1967 1968 if (SEQ_GT(end, fdata->td_end)) { 1969 DTRACE_PROBE2(end_update_fd, 1970 int, fdata->td_end, 1971 int, end 1972 ); 1973 fdata->td_end = end; 1974 } 1975 1976 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1977 DTRACE_PROBE2(max_end_update_td, 1978 int, tdata->td_maxend, 1979 int, ack + win 1980 ); 1981 tdata->td_maxend = ack + win; 1982 } 1983 1984 return 1; 1985 } 1986 fin->fin_flx |= FI_OOW; 1987 1988 #if defined(_KERNEL) 1989 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1990 fin->fin_flx |= FI_NEG_OOW; 1991 #endif 1992 1993 return 0; 1994 } 1995 1996 1997 /* ------------------------------------------------------------------------ */ 1998 /* Function: fr_stclone */ 1999 /* Returns: ipstate_t* - NULL == cloning failed, */ 2000 /* else pointer to new state structure */ 2001 /* Parameters: fin(I) - pointer to packet information */ 2002 /* tcp(I) - pointer to TCP/UDP header */ 2003 /* is(I) - pointer to master state structure */ 2004 /* */ 2005 /* Create a "duplcate" state table entry from the master. */ 2006 /* ------------------------------------------------------------------------ */ 2007 static ipstate_t *fr_stclone(fin, tcp, is) 2008 fr_info_t *fin; 2009 tcphdr_t *tcp; 2010 ipstate_t *is; 2011 { 2012 ipstate_t *clone; 2013 u_32_t send; 2014 ipf_stack_t *ifs = fin->fin_ifs; 2015 2016 /* 2017 * Trigger automatic call to fr_state_flush() if the 2018 * table has reached capacity specified by hi watermark. 2019 */ 2020 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2021 ifs->ifs_fr_state_doflush = 1; 2022 2023 /* 2024 * If automatic flushing did not do its job, and the table 2025 * has filled up, don't try to create a new entry. A NULL 2026 * return will indicate that the cloning has failed. 2027 */ 2028 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2029 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2030 return NULL; 2031 } 2032 2033 KMALLOC(clone, ipstate_t *); 2034 if (clone == NULL) 2035 return NULL; 2036 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2037 2038 MUTEX_NUKE(&clone->is_lock); 2039 2040 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2041 clone->is_state[0] = 0; 2042 clone->is_state[1] = 0; 2043 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2044 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2045 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2046 2047 if (fin->fin_rev == 1) { 2048 clone->is_dend = send; 2049 clone->is_maxdend = send; 2050 clone->is_send = 0; 2051 clone->is_maxswin = 1; 2052 clone->is_maxdwin = ntohs(tcp->th_win); 2053 if (clone->is_maxdwin == 0) 2054 clone->is_maxdwin = 1; 2055 } else { 2056 clone->is_send = send; 2057 clone->is_maxsend = send; 2058 clone->is_dend = 0; 2059 clone->is_maxdwin = 1; 2060 clone->is_maxswin = ntohs(tcp->th_win); 2061 if (clone->is_maxswin == 0) 2062 clone->is_maxswin = 1; 2063 } 2064 2065 clone->is_flags &= ~SI_CLONE; 2066 clone->is_flags |= SI_CLONED; 2067 fr_stinsert(clone, fin->fin_rev, ifs); 2068 clone->is_ref = 1; 2069 if (clone->is_p == IPPROTO_TCP) { 2070 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2071 clone->is_flags); 2072 } 2073 MUTEX_EXIT(&clone->is_lock); 2074 #ifdef IPFILTER_SCAN 2075 (void) ipsc_attachis(is); 2076 #endif 2077 #ifdef IPFILTER_SYNC 2078 if (is->is_flags & IS_STATESYNC) 2079 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2080 #endif 2081 return clone; 2082 } 2083 2084 2085 /* ------------------------------------------------------------------------ */ 2086 /* Function: fr_matchsrcdst */ 2087 /* Returns: Nil */ 2088 /* Parameters: fin(I) - pointer to packet information */ 2089 /* is(I) - pointer to state structure */ 2090 /* src(I) - pointer to source address */ 2091 /* dst(I) - pointer to destination address */ 2092 /* tcp(I) - pointer to TCP/UDP header */ 2093 /* */ 2094 /* Match a state table entry against an IP packet. The logic below is that */ 2095 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2096 /* still 0 after the test. no match. */ 2097 /* ------------------------------------------------------------------------ */ 2098 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2099 fr_info_t *fin; 2100 ipstate_t *is; 2101 i6addr_t *src, *dst; 2102 tcphdr_t *tcp; 2103 u_32_t cmask; 2104 { 2105 int ret = 0, rev, out, flags, flx = 0, idx; 2106 u_short sp, dp; 2107 u_32_t cflx; 2108 void *ifp; 2109 ipf_stack_t *ifs = fin->fin_ifs; 2110 2111 rev = IP6_NEQ(&is->is_dst, dst); 2112 ifp = fin->fin_ifp; 2113 out = fin->fin_out; 2114 flags = is->is_flags; 2115 sp = 0; 2116 dp = 0; 2117 2118 if (tcp != NULL) { 2119 sp = htons(fin->fin_sport); 2120 dp = ntohs(fin->fin_dport); 2121 } 2122 if (!rev) { 2123 if (tcp != NULL) { 2124 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2125 rev = 1; 2126 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2127 rev = 1; 2128 } 2129 } 2130 2131 idx = (out << 1) + rev; 2132 2133 /* 2134 * If the interface for this 'direction' is set, make sure it matches. 2135 * An interface name that is not set matches any, as does a name of *. 2136 */ 2137 if ((is->is_ifp[idx] == NULL && 2138 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2139 is->is_ifp[idx] == ifp) 2140 ret = 1; 2141 2142 if (ret == 0) { 2143 DTRACE_PROBE(no_match_on_iface); 2144 return NULL; 2145 } 2146 ret = 0; 2147 2148 /* 2149 * Match addresses and ports. 2150 */ 2151 if (rev == 0) { 2152 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2153 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2154 if (tcp) { 2155 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2156 (dp == is->is_dport || flags & SI_W_DPORT)) 2157 ret = 1; 2158 } else { 2159 ret = 1; 2160 } 2161 } 2162 } else { 2163 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2164 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2165 if (tcp) { 2166 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2167 (sp == is->is_dport || flags & SI_W_DPORT)) 2168 ret = 1; 2169 } else { 2170 ret = 1; 2171 } 2172 } 2173 } 2174 2175 if (ret == 0) { 2176 DTRACE_PROBE(no_match_on_addrs); 2177 return NULL; 2178 } 2179 /* 2180 * Whether or not this should be here, is questionable, but the aim 2181 * is to get this out of the main line. 2182 */ 2183 if (tcp == NULL) 2184 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2185 2186 /* 2187 * Only one of the source or destination address can be flaged as a 2188 * wildcard. Fill in the missing address, if set. 2189 * For IPv6, if the address being copied in is multicast, then 2190 * don't reset the wild flag - multicast causes it to be set in the 2191 * first place! 2192 */ 2193 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2194 fr_ip_t *fi = &fin->fin_fi; 2195 2196 if ((flags & SI_W_SADDR) != 0) { 2197 if (rev == 0) { 2198 #ifdef USE_INET6 2199 if (is->is_v == 6 && 2200 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2201 /*EMPTY*/; 2202 else 2203 #endif 2204 { 2205 is->is_src = fi->fi_src; 2206 is->is_flags &= ~SI_W_SADDR; 2207 } 2208 } else { 2209 #ifdef USE_INET6 2210 if (is->is_v == 6 && 2211 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2212 /*EMPTY*/; 2213 else 2214 #endif 2215 { 2216 is->is_src = fi->fi_dst; 2217 is->is_flags &= ~SI_W_SADDR; 2218 } 2219 } 2220 } else if ((flags & SI_W_DADDR) != 0) { 2221 if (rev == 0) { 2222 #ifdef USE_INET6 2223 if (is->is_v == 6 && 2224 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2225 /*EMPTY*/; 2226 else 2227 #endif 2228 { 2229 is->is_dst = fi->fi_dst; 2230 is->is_flags &= ~SI_W_DADDR; 2231 } 2232 } else { 2233 #ifdef USE_INET6 2234 if (is->is_v == 6 && 2235 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2236 /*EMPTY*/; 2237 else 2238 #endif 2239 { 2240 is->is_dst = fi->fi_src; 2241 is->is_flags &= ~SI_W_DADDR; 2242 } 2243 } 2244 } 2245 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2246 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2247 } 2248 } 2249 2250 flx = fin->fin_flx & cmask; 2251 cflx = is->is_flx[out][rev]; 2252 2253 /* 2254 * Match up any flags set from IP options. 2255 */ 2256 if ((cflx && (flx != (cflx & cmask))) || 2257 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2258 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2259 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) { 2260 DTRACE_PROBE4(no_match_on_flags, 2261 int, (cflx && (flx != (cflx & cmask))), 2262 int, 2263 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]), 2264 int, ((fin->fin_secmsk & is->is_secmsk) != is->is_sec), 2265 int, ((fin->fin_auth & is->is_authmsk) != is->is_auth) 2266 ); 2267 return NULL; 2268 } 2269 /* 2270 * Only one of the source or destination port can be flagged as a 2271 * wildcard. When filling it in, fill in a copy of the matched entry 2272 * if it has the cloning flag set. 2273 */ 2274 if ((fin->fin_flx & FI_IGNORE) != 0) { 2275 fin->fin_rev = rev; 2276 return is; 2277 } 2278 2279 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2280 if ((flags & SI_CLONE) != 0) { 2281 ipstate_t *clone; 2282 2283 clone = fr_stclone(fin, tcp, is); 2284 if (clone == NULL) 2285 return NULL; 2286 is = clone; 2287 } else { 2288 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2289 } 2290 2291 if ((flags & SI_W_SPORT) != 0) { 2292 if (rev == 0) { 2293 is->is_sport = sp; 2294 is->is_send = ntohl(tcp->th_seq); 2295 } else { 2296 is->is_sport = dp; 2297 is->is_send = ntohl(tcp->th_ack); 2298 } 2299 is->is_maxsend = is->is_send + 1; 2300 } else if ((flags & SI_W_DPORT) != 0) { 2301 if (rev == 0) { 2302 is->is_dport = dp; 2303 is->is_dend = ntohl(tcp->th_ack); 2304 } else { 2305 is->is_dport = sp; 2306 is->is_dend = ntohl(tcp->th_seq); 2307 } 2308 is->is_maxdend = is->is_dend + 1; 2309 } 2310 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2311 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2312 ipstate_log(is, ISL_CLONE, ifs); 2313 } 2314 2315 ret = -1; 2316 2317 if (is->is_flx[out][rev] == 0) { 2318 is->is_flx[out][rev] = flx; 2319 /* 2320 * If we are dealing with the first packet coming in reverse 2321 * direction (sent by peer), then we have to set options into 2322 * state. 2323 */ 2324 if (rev == 1 && is->is_optmsk[1] == 0x0) { 2325 is->is_optmsk[1] = 0xffffffff; 2326 is->is_opt[1] = fin->fin_optmsk; 2327 DTRACE_PROBE(set_rev_opts); 2328 } 2329 if (is->is_v == 6) { 2330 is->is_opt[rev] &= ~0x8; 2331 is->is_optmsk[rev] &= ~0x8; 2332 } 2333 } 2334 2335 /* 2336 * Check if the interface name for this "direction" is set and if not, 2337 * fill it in. 2338 */ 2339 if (is->is_ifp[idx] == NULL && 2340 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2341 is->is_ifp[idx] = ifp; 2342 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2343 } 2344 fin->fin_rev = rev; 2345 return is; 2346 } 2347 2348 2349 /* ------------------------------------------------------------------------ */ 2350 /* Function: fr_checkicmpmatchingstate */ 2351 /* Returns: Nil */ 2352 /* Parameters: fin(I) - pointer to packet information */ 2353 /* */ 2354 /* If we've got an ICMP error message, using the information stored in the */ 2355 /* ICMP packet, look for a matching state table entry. */ 2356 /* */ 2357 /* If we return NULL then no lock on ipf_state is held. */ 2358 /* If we return non-null then a read-lock on ipf_state is held. */ 2359 /* ------------------------------------------------------------------------ */ 2360 static ipstate_t *fr_checkicmpmatchingstate(fin) 2361 fr_info_t *fin; 2362 { 2363 ipstate_t *is, **isp; 2364 u_short sport, dport; 2365 u_char pr; 2366 int backward, i, oi; 2367 i6addr_t dst, src; 2368 struct icmp *ic; 2369 u_short savelen; 2370 icmphdr_t *icmp; 2371 fr_info_t ofin; 2372 tcphdr_t *tcp; 2373 int len; 2374 ip_t *oip; 2375 u_int hv; 2376 ipf_stack_t *ifs = fin->fin_ifs; 2377 2378 /* 2379 * Does it at least have the return (basic) IP header ? 2380 * Is it an actual recognised ICMP error type? 2381 * Only a basic IP header (no options) should be with 2382 * an ICMP error header. 2383 */ 2384 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2385 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2386 !(fin->fin_flx & FI_ICMPERR)) 2387 return NULL; 2388 ic = fin->fin_dp; 2389 2390 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2391 /* 2392 * Check if the at least the old IP header (with options) and 2393 * 8 bytes of payload is present. 2394 */ 2395 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2396 return NULL; 2397 2398 /* 2399 * Sanity Checks. 2400 */ 2401 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2402 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2403 return NULL; 2404 2405 /* 2406 * Is the buffer big enough for all of it ? It's the size of the IP 2407 * header claimed in the encapsulated part which is of concern. It 2408 * may be too big to be in this buffer but not so big that it's 2409 * outside the ICMP packet, leading to TCP deref's causing problems. 2410 * This is possible because we don't know how big oip_hl is when we 2411 * do the pullup early in fr_check() and thus can't guarantee it is 2412 * all here now. 2413 */ 2414 #ifdef _KERNEL 2415 { 2416 mb_t *m; 2417 2418 m = fin->fin_m; 2419 # if defined(MENTAT) 2420 if ((char *)oip + len > (char *)m->b_wptr) 2421 return NULL; 2422 # else 2423 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2424 return NULL; 2425 # endif 2426 } 2427 #endif 2428 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2429 2430 /* 2431 * in the IPv4 case we must zero the i6addr union otherwise 2432 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2433 * of the 'junk' in the unused part of the union 2434 */ 2435 bzero((char *)&src, sizeof(src)); 2436 bzero((char *)&dst, sizeof(dst)); 2437 2438 /* 2439 * we make an fin entry to be able to feed it to 2440 * matchsrcdst note that not all fields are encessary 2441 * but this is the cleanest way. Note further we fill 2442 * in fin_mp such that if someone uses it we'll get 2443 * a kernel panic. fr_matchsrcdst does not use this. 2444 * 2445 * watch out here, as ip is in host order and oip in network 2446 * order. Any change we make must be undone afterwards, like 2447 * oip->ip_off - it is still in network byte order so fix it. 2448 */ 2449 savelen = oip->ip_len; 2450 oip->ip_len = len; 2451 oip->ip_off = ntohs(oip->ip_off); 2452 2453 ofin.fin_flx = FI_NOCKSUM; 2454 ofin.fin_v = 4; 2455 ofin.fin_ip = oip; 2456 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2457 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2458 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2459 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2460 ofin.fin_ifp = fin->fin_ifp; 2461 ofin.fin_out = !fin->fin_out; 2462 /* 2463 * Reset the short and bad flag here because in fr_matchsrcdst() 2464 * the flags for the current packet (fin_flx) are compared against 2465 * those for the existing session. 2466 */ 2467 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2468 2469 /* 2470 * Put old values of ip_len and ip_off back as we don't know 2471 * if we have to forward the packet (or process it again. 2472 */ 2473 oip->ip_len = savelen; 2474 oip->ip_off = htons(oip->ip_off); 2475 2476 switch (oip->ip_p) 2477 { 2478 case IPPROTO_ICMP : 2479 /* 2480 * an ICMP error can only be generated as a result of an 2481 * ICMP query, not as the response on an ICMP error 2482 * 2483 * XXX theoretically ICMP_ECHOREP and the other reply's are 2484 * ICMP query's as well, but adding them here seems strange XXX 2485 */ 2486 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2487 return NULL; 2488 2489 /* 2490 * perform a lookup of the ICMP packet in the state table 2491 */ 2492 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2493 hv = (pr = oip->ip_p); 2494 src.in4 = oip->ip_src; 2495 hv += src.in4.s_addr; 2496 dst.in4 = oip->ip_dst; 2497 hv += dst.in4.s_addr; 2498 hv += icmp->icmp_id; 2499 hv = DOUBLE_HASH(hv, ifs); 2500 2501 READ_ENTER(&ifs->ifs_ipf_state); 2502 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2503 isp = &is->is_hnext; 2504 if ((is->is_p != pr) || (is->is_v != 4)) 2505 continue; 2506 if (is->is_pass & FR_NOICMPERR) 2507 continue; 2508 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2509 NULL, FI_ICMPCMP); 2510 if (is != NULL) { 2511 if ((is->is_pass & FR_NOICMPERR) != 0) { 2512 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2513 return NULL; 2514 } 2515 /* 2516 * i : the index of this packet (the icmp 2517 * unreachable) 2518 * oi : the index of the original packet found 2519 * in the icmp header (i.e. the packet 2520 * causing this icmp) 2521 * backward : original packet was backward 2522 * compared to the state 2523 */ 2524 backward = IP6_NEQ(&is->is_src, &src); 2525 fin->fin_rev = !backward; 2526 i = (!backward << 1) + fin->fin_out; 2527 oi = (backward << 1) + ofin.fin_out; 2528 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2529 continue; 2530 ifs->ifs_ips_stats.iss_hits++; 2531 is->is_icmppkts[i]++; 2532 return is; 2533 } 2534 } 2535 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2536 return NULL; 2537 case IPPROTO_TCP : 2538 case IPPROTO_UDP : 2539 break; 2540 default : 2541 return NULL; 2542 } 2543 2544 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2545 dport = tcp->th_dport; 2546 sport = tcp->th_sport; 2547 2548 hv = (pr = oip->ip_p); 2549 src.in4 = oip->ip_src; 2550 hv += src.in4.s_addr; 2551 dst.in4 = oip->ip_dst; 2552 hv += dst.in4.s_addr; 2553 hv += dport; 2554 hv += sport; 2555 hv = DOUBLE_HASH(hv, ifs); 2556 2557 READ_ENTER(&ifs->ifs_ipf_state); 2558 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2559 isp = &is->is_hnext; 2560 /* 2561 * Only allow this icmp though if the 2562 * encapsulated packet was allowed through the 2563 * other way around. Note that the minimal amount 2564 * of info present does not allow for checking against 2565 * tcp internals such as seq and ack numbers. Only the 2566 * ports are known to be present and can be even if the 2567 * short flag is set. 2568 */ 2569 if ((is->is_p == pr) && (is->is_v == 4) && 2570 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2571 tcp, FI_ICMPCMP))) { 2572 /* 2573 * i : the index of this packet (the icmp unreachable) 2574 * oi : the index of the original packet found in the 2575 * icmp header (i.e. the packet causing this icmp) 2576 * backward : original packet was backward compared to 2577 * the state 2578 */ 2579 backward = IP6_NEQ(&is->is_src, &src); 2580 fin->fin_rev = !backward; 2581 i = (!backward << 1) + fin->fin_out; 2582 oi = (backward << 1) + ofin.fin_out; 2583 2584 if (((is->is_pass & FR_NOICMPERR) != 0) || 2585 (is->is_icmppkts[i] > is->is_pkts[oi])) 2586 break; 2587 ifs->ifs_ips_stats.iss_hits++; 2588 is->is_icmppkts[i]++; 2589 /* 2590 * we deliberately do not touch the timeouts 2591 * for the accompanying state table entry. 2592 * It remains to be seen if that is correct. XXX 2593 */ 2594 return is; 2595 } 2596 } 2597 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2598 return NULL; 2599 } 2600 2601 2602 /* ------------------------------------------------------------------------ */ 2603 /* Function: fr_ipsmove */ 2604 /* Returns: Nil */ 2605 /* Parameters: is(I) - pointer to state table entry */ 2606 /* hv(I) - new hash value for state table entry */ 2607 /* Write Locks: ipf_state */ 2608 /* */ 2609 /* Move a state entry from one position in the hash table to another. */ 2610 /* ------------------------------------------------------------------------ */ 2611 static void fr_ipsmove(is, hv, ifs) 2612 ipstate_t *is; 2613 u_int hv; 2614 ipf_stack_t *ifs; 2615 { 2616 ipstate_t **isp; 2617 u_int hvm; 2618 2619 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2620 2621 hvm = is->is_hv; 2622 /* 2623 * Remove the hash from the old location... 2624 */ 2625 isp = is->is_phnext; 2626 if (is->is_hnext) 2627 is->is_hnext->is_phnext = isp; 2628 *isp = is->is_hnext; 2629 if (ifs->ifs_ips_table[hvm] == NULL) 2630 ifs->ifs_ips_stats.iss_inuse--; 2631 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2632 2633 /* 2634 * ...and put the hash in the new one. 2635 */ 2636 hvm = DOUBLE_HASH(hv, ifs); 2637 is->is_hv = hvm; 2638 isp = &ifs->ifs_ips_table[hvm]; 2639 if (*isp) 2640 (*isp)->is_phnext = &is->is_hnext; 2641 else 2642 ifs->ifs_ips_stats.iss_inuse++; 2643 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2644 is->is_phnext = isp; 2645 is->is_hnext = *isp; 2646 *isp = is; 2647 } 2648 2649 2650 /* ------------------------------------------------------------------------ */ 2651 /* Function: fr_stlookup */ 2652 /* Returns: ipstate_t* - NULL == no matching state found, */ 2653 /* else pointer to state information is returned */ 2654 /* Parameters: fin(I) - pointer to packet information */ 2655 /* tcp(I) - pointer to TCP/UDP header. */ 2656 /* */ 2657 /* Search the state table for a matching entry to the packet described by */ 2658 /* the contents of *fin. */ 2659 /* */ 2660 /* If we return NULL then no lock on ipf_state is held. */ 2661 /* If we return non-null then a read-lock on ipf_state is held. */ 2662 /* ------------------------------------------------------------------------ */ 2663 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2664 fr_info_t *fin; 2665 tcphdr_t *tcp; 2666 ipftq_t **ifqp; 2667 { 2668 u_int hv, hvm, pr, v, tryagain; 2669 ipstate_t *is, **isp; 2670 u_short dport, sport; 2671 i6addr_t src, dst; 2672 struct icmp *ic; 2673 ipftq_t *ifq; 2674 int oow; 2675 ipf_stack_t *ifs = fin->fin_ifs; 2676 2677 is = NULL; 2678 ifq = NULL; 2679 tcp = fin->fin_dp; 2680 ic = (struct icmp *)tcp; 2681 hv = (pr = fin->fin_fi.fi_p); 2682 src = fin->fin_fi.fi_src; 2683 dst = fin->fin_fi.fi_dst; 2684 hv += src.in4.s_addr; 2685 hv += dst.in4.s_addr; 2686 2687 v = fin->fin_fi.fi_v; 2688 #ifdef USE_INET6 2689 if (v == 6) { 2690 hv += fin->fin_fi.fi_src.i6[1]; 2691 hv += fin->fin_fi.fi_src.i6[2]; 2692 hv += fin->fin_fi.fi_src.i6[3]; 2693 2694 if ((fin->fin_p == IPPROTO_ICMPV6) && 2695 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2696 hv -= dst.in4.s_addr; 2697 } else { 2698 hv += fin->fin_fi.fi_dst.i6[1]; 2699 hv += fin->fin_fi.fi_dst.i6[2]; 2700 hv += fin->fin_fi.fi_dst.i6[3]; 2701 } 2702 } 2703 #endif 2704 if ((v == 4) && 2705 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2706 if (fin->fin_out == 0) { 2707 hv -= src.in4.s_addr; 2708 } else { 2709 hv -= dst.in4.s_addr; 2710 } 2711 } 2712 2713 /* 2714 * Search the hash table for matching packet header info. 2715 */ 2716 switch (pr) 2717 { 2718 #ifdef USE_INET6 2719 case IPPROTO_ICMPV6 : 2720 tryagain = 0; 2721 if (v == 6) { 2722 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2723 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2724 hv += ic->icmp_id; 2725 } 2726 } 2727 READ_ENTER(&ifs->ifs_ipf_state); 2728 icmp6again: 2729 hvm = DOUBLE_HASH(hv, ifs); 2730 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2731 isp = &is->is_hnext; 2732 if ((is->is_p != pr) || (is->is_v != v)) 2733 continue; 2734 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2735 if (is != NULL && 2736 fr_matchicmpqueryreply(v, &is->is_icmp, 2737 ic, fin->fin_rev)) { 2738 if (fin->fin_rev) 2739 ifq = &ifs->ifs_ips_icmpacktq; 2740 else 2741 ifq = &ifs->ifs_ips_icmptq; 2742 break; 2743 } 2744 } 2745 2746 if (is != NULL) { 2747 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2748 hv += fin->fin_fi.fi_src.i6[0]; 2749 hv += fin->fin_fi.fi_src.i6[1]; 2750 hv += fin->fin_fi.fi_src.i6[2]; 2751 hv += fin->fin_fi.fi_src.i6[3]; 2752 fr_ipsmove(is, hv, ifs); 2753 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2754 } 2755 break; 2756 } 2757 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2758 2759 /* 2760 * No matching icmp state entry. Perhaps this is a 2761 * response to another state entry. 2762 * 2763 * XXX With some ICMP6 packets, the "other" address is already 2764 * in the packet, after the ICMP6 header, and this could be 2765 * used in place of the multicast address. However, taking 2766 * advantage of this requires some significant code changes 2767 * to handle the specific types where that is the case. 2768 */ 2769 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2770 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2771 hv -= fin->fin_fi.fi_src.i6[0]; 2772 hv -= fin->fin_fi.fi_src.i6[1]; 2773 hv -= fin->fin_fi.fi_src.i6[2]; 2774 hv -= fin->fin_fi.fi_src.i6[3]; 2775 tryagain = 1; 2776 WRITE_ENTER(&ifs->ifs_ipf_state); 2777 goto icmp6again; 2778 } 2779 2780 is = fr_checkicmp6matchingstate(fin); 2781 if (is != NULL) 2782 return is; 2783 break; 2784 #endif 2785 2786 case IPPROTO_ICMP : 2787 if (v == 4) { 2788 hv += ic->icmp_id; 2789 } 2790 hv = DOUBLE_HASH(hv, ifs); 2791 READ_ENTER(&ifs->ifs_ipf_state); 2792 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2793 isp = &is->is_hnext; 2794 if ((is->is_p != pr) || (is->is_v != v)) 2795 continue; 2796 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2797 if (is != NULL && 2798 fr_matchicmpqueryreply(v, &is->is_icmp, 2799 ic, fin->fin_rev)) { 2800 if (fin->fin_rev) 2801 ifq = &ifs->ifs_ips_icmpacktq; 2802 else 2803 ifq = &ifs->ifs_ips_icmptq; 2804 break; 2805 } 2806 } 2807 if (is == NULL) { 2808 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2809 } 2810 break; 2811 2812 case IPPROTO_TCP : 2813 case IPPROTO_UDP : 2814 ifqp = NULL; 2815 sport = htons(fin->fin_data[0]); 2816 hv += sport; 2817 dport = htons(fin->fin_data[1]); 2818 hv += dport; 2819 oow = 0; 2820 tryagain = 0; 2821 READ_ENTER(&ifs->ifs_ipf_state); 2822 retry_tcpudp: 2823 hvm = DOUBLE_HASH(hv, ifs); 2824 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2825 isp = &is->is_hnext; 2826 if ((is->is_p != pr) || (is->is_v != v)) 2827 continue; 2828 fin->fin_flx &= ~FI_OOW; 2829 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2830 if (is != NULL) { 2831 if (pr == IPPROTO_TCP) { 2832 if (!fr_tcpstate(fin, tcp, is)) { 2833 oow |= fin->fin_flx & FI_OOW; 2834 continue; 2835 } 2836 } 2837 break; 2838 } 2839 } 2840 if (is != NULL) { 2841 if (tryagain && 2842 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2843 hv += dport; 2844 hv += sport; 2845 fr_ipsmove(is, hv, ifs); 2846 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2847 } 2848 break; 2849 } 2850 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2851 2852 if (ifs->ifs_ips_stats.iss_wild) { 2853 if (tryagain == 0) { 2854 hv -= dport; 2855 hv -= sport; 2856 } else if (tryagain == 1) { 2857 hv = fin->fin_fi.fi_p; 2858 /* 2859 * If we try to pretend this is a reply to a 2860 * multicast/broadcast packet then we need to 2861 * exclude part of the address from the hash 2862 * calculation. 2863 */ 2864 if (fin->fin_out == 0) { 2865 hv += src.in4.s_addr; 2866 } else { 2867 hv += dst.in4.s_addr; 2868 } 2869 hv += dport; 2870 hv += sport; 2871 } 2872 tryagain++; 2873 if (tryagain <= 2) { 2874 WRITE_ENTER(&ifs->ifs_ipf_state); 2875 goto retry_tcpudp; 2876 } 2877 } 2878 fin->fin_flx |= oow; 2879 break; 2880 2881 #if 0 2882 case IPPROTO_GRE : 2883 gre = fin->fin_dp; 2884 if (GRE_REV(gre->gr_flags) == 1) { 2885 hv += gre->gr_call; 2886 } 2887 /* FALLTHROUGH */ 2888 #endif 2889 default : 2890 ifqp = NULL; 2891 hvm = DOUBLE_HASH(hv, ifs); 2892 READ_ENTER(&ifs->ifs_ipf_state); 2893 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2894 isp = &is->is_hnext; 2895 if ((is->is_p != pr) || (is->is_v != v)) 2896 continue; 2897 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2898 if (is != NULL) { 2899 ifq = &ifs->ifs_ips_iptq; 2900 break; 2901 } 2902 } 2903 if (is == NULL) { 2904 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2905 } 2906 break; 2907 } 2908 2909 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2910 (is->is_tqehead[fin->fin_rev] != NULL)) 2911 ifq = is->is_tqehead[fin->fin_rev]; 2912 if (ifq != NULL && ifqp != NULL) 2913 *ifqp = ifq; 2914 return is; 2915 } 2916 2917 2918 /* ------------------------------------------------------------------------ */ 2919 /* Function: fr_updatestate */ 2920 /* Returns: Nil */ 2921 /* Parameters: fin(I) - pointer to packet information */ 2922 /* is(I) - pointer to state table entry */ 2923 /* Read Locks: ipf_state */ 2924 /* */ 2925 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2926 /* fragment cache with a new entry as required. */ 2927 /* ------------------------------------------------------------------------ */ 2928 void fr_updatestate(fin, is, ifq) 2929 fr_info_t *fin; 2930 ipstate_t *is; 2931 ipftq_t *ifq; 2932 { 2933 ipftqent_t *tqe; 2934 int i, pass; 2935 ipf_stack_t *ifs = fin->fin_ifs; 2936 2937 i = (fin->fin_rev << 1) + fin->fin_out; 2938 2939 /* 2940 * For TCP packets, ifq == NULL. For all others, check if this new 2941 * queue is different to the last one it was on and move it if so. 2942 */ 2943 tqe = &is->is_sti; 2944 MUTEX_ENTER(&is->is_lock); 2945 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2946 ifq = is->is_tqehead[fin->fin_rev]; 2947 2948 if (ifq != NULL) 2949 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2950 2951 is->is_pkts[i]++; 2952 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2953 is->is_bytes[i] += fin->fin_plen; 2954 MUTEX_EXIT(&is->is_lock); 2955 2956 #ifdef IPFILTER_SYNC 2957 if (is->is_flags & IS_STATESYNC) 2958 ipfsync_update(SMC_STATE, fin, is->is_sync); 2959 #endif 2960 2961 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2962 2963 fin->fin_fr = is->is_rule; 2964 2965 /* 2966 * If this packet is a fragment and the rule says to track fragments, 2967 * then create a new fragment cache entry. 2968 */ 2969 pass = is->is_pass; 2970 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2971 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2972 } 2973 2974 2975 /* ------------------------------------------------------------------------ */ 2976 /* Function: fr_checkstate */ 2977 /* Returns: frentry_t* - NULL == search failed, */ 2978 /* else pointer to rule for matching state */ 2979 /* Parameters: ifp(I) - pointer to interface */ 2980 /* passp(I) - pointer to filtering result flags */ 2981 /* */ 2982 /* Check if a packet is associated with an entry in the state table. */ 2983 /* ------------------------------------------------------------------------ */ 2984 frentry_t *fr_checkstate(fin, passp) 2985 fr_info_t *fin; 2986 u_32_t *passp; 2987 { 2988 ipstate_t *is; 2989 frentry_t *fr; 2990 tcphdr_t *tcp; 2991 ipftq_t *ifq; 2992 u_int pass; 2993 ipf_stack_t *ifs = fin->fin_ifs; 2994 2995 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2996 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2997 return NULL; 2998 2999 is = NULL; 3000 if ((fin->fin_flx & FI_TCPUDP) || 3001 (fin->fin_fi.fi_p == IPPROTO_ICMP) 3002 #ifdef USE_INET6 3003 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 3004 #endif 3005 ) 3006 tcp = fin->fin_dp; 3007 else 3008 tcp = NULL; 3009 3010 /* 3011 * Search the hash table for matching packet header info. 3012 */ 3013 ifq = NULL; 3014 is = fr_stlookup(fin, tcp, &ifq); 3015 switch (fin->fin_p) 3016 { 3017 #ifdef USE_INET6 3018 case IPPROTO_ICMPV6 : 3019 if (is != NULL) 3020 break; 3021 if (fin->fin_v == 6) { 3022 is = fr_checkicmp6matchingstate(fin); 3023 if (is != NULL) 3024 goto matched; 3025 } 3026 break; 3027 #endif 3028 case IPPROTO_ICMP : 3029 if (is != NULL) 3030 break; 3031 /* 3032 * No matching icmp state entry. Perhaps this is a 3033 * response to another state entry. 3034 */ 3035 is = fr_checkicmpmatchingstate(fin); 3036 if (is != NULL) 3037 goto matched; 3038 break; 3039 case IPPROTO_TCP : 3040 if (is == NULL) 3041 break; 3042 3043 if (is->is_pass & FR_NEWISN) { 3044 if (fin->fin_out == 0) 3045 fr_fixinisn(fin, is); 3046 else if (fin->fin_out == 1) 3047 fr_fixoutisn(fin, is); 3048 } 3049 break; 3050 default : 3051 if (fin->fin_rev) 3052 ifq = &ifs->ifs_ips_udpacktq; 3053 else 3054 ifq = &ifs->ifs_ips_udptq; 3055 break; 3056 } 3057 if (is == NULL) { 3058 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3059 return NULL; 3060 } 3061 3062 matched: 3063 fr = is->is_rule; 3064 if (fr != NULL) { 3065 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3066 if (fin->fin_nattag == NULL) { 3067 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3068 return NULL; 3069 } 3070 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) { 3071 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3072 return NULL; 3073 } 3074 } 3075 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3076 fin->fin_icode = fr->fr_icode; 3077 } 3078 3079 fin->fin_rule = is->is_rulen; 3080 pass = is->is_pass; 3081 fr_updatestate(fin, is, ifq); 3082 3083 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3084 fin->fin_flx |= FI_STATE; 3085 if ((pass & FR_LOGFIRST) != 0) 3086 pass &= ~(FR_LOGFIRST|FR_LOG); 3087 *passp = pass; 3088 return fr; 3089 } 3090 3091 3092 /* ------------------------------------------------------------------------ */ 3093 /* Function: fr_fixoutisn */ 3094 /* Returns: Nil */ 3095 /* Parameters: fin(I) - pointer to packet information */ 3096 /* is(I) - pointer to master state structure */ 3097 /* */ 3098 /* Called only for outbound packets, adjusts the sequence number and the */ 3099 /* TCP checksum to match that change. */ 3100 /* ------------------------------------------------------------------------ */ 3101 static void fr_fixoutisn(fin, is) 3102 fr_info_t *fin; 3103 ipstate_t *is; 3104 { 3105 tcphdr_t *tcp; 3106 int rev; 3107 u_32_t seq; 3108 3109 tcp = fin->fin_dp; 3110 rev = fin->fin_rev; 3111 if ((is->is_flags & IS_ISNSYN) != 0) { 3112 if (rev == 0) { 3113 seq = ntohl(tcp->th_seq); 3114 seq += is->is_isninc[0]; 3115 tcp->th_seq = htonl(seq); 3116 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3117 } 3118 } 3119 if ((is->is_flags & IS_ISNACK) != 0) { 3120 if (rev == 1) { 3121 seq = ntohl(tcp->th_seq); 3122 seq += is->is_isninc[1]; 3123 tcp->th_seq = htonl(seq); 3124 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3125 } 3126 } 3127 } 3128 3129 3130 /* ------------------------------------------------------------------------ */ 3131 /* Function: fr_fixinisn */ 3132 /* Returns: Nil */ 3133 /* Parameters: fin(I) - pointer to packet information */ 3134 /* is(I) - pointer to master state structure */ 3135 /* */ 3136 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3137 /* TCP checksum to match that change. */ 3138 /* ------------------------------------------------------------------------ */ 3139 static void fr_fixinisn(fin, is) 3140 fr_info_t *fin; 3141 ipstate_t *is; 3142 { 3143 tcphdr_t *tcp; 3144 int rev; 3145 u_32_t ack; 3146 3147 tcp = fin->fin_dp; 3148 rev = fin->fin_rev; 3149 if ((is->is_flags & IS_ISNSYN) != 0) { 3150 if (rev == 1) { 3151 ack = ntohl(tcp->th_ack); 3152 ack -= is->is_isninc[0]; 3153 tcp->th_ack = htonl(ack); 3154 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3155 } 3156 } 3157 if ((is->is_flags & IS_ISNACK) != 0) { 3158 if (rev == 0) { 3159 ack = ntohl(tcp->th_ack); 3160 ack -= is->is_isninc[1]; 3161 tcp->th_ack = htonl(ack); 3162 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3163 } 3164 } 3165 } 3166 3167 3168 /* ------------------------------------------------------------------------ */ 3169 /* Function: fr_statesync */ 3170 /* Returns: Nil */ 3171 /* Parameters: action(I) - type of synchronisation to do */ 3172 /* v(I) - IP version being sync'd (v4 or v6) */ 3173 /* ifp(I) - interface identifier associated with action */ 3174 /* name(I) - name associated with ifp parameter */ 3175 /* */ 3176 /* Walk through all state entries and if an interface pointer match is */ 3177 /* found then look it up again, based on its name in case the pointer has */ 3178 /* changed since last time. */ 3179 /* */ 3180 /* If ifp is passed in as being non-null then we are only doing updates for */ 3181 /* existing, matching, uses of it. */ 3182 /* ------------------------------------------------------------------------ */ 3183 void fr_statesync(action, v, ifp, name, ifs) 3184 int action, v; 3185 void *ifp; 3186 char *name; 3187 ipf_stack_t *ifs; 3188 { 3189 ipstate_t *is; 3190 int i; 3191 3192 if (ifs->ifs_fr_running <= 0) 3193 return; 3194 3195 WRITE_ENTER(&ifs->ifs_ipf_state); 3196 3197 if (ifs->ifs_fr_running <= 0) { 3198 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3199 return; 3200 } 3201 3202 switch (action) 3203 { 3204 case IPFSYNC_RESYNC : 3205 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3206 if (v != 0 && is->is_v != v) 3207 continue; 3208 /* 3209 * Look up all the interface names in the state entry. 3210 */ 3211 for (i = 0; i < 4; i++) { 3212 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3213 is->is_v, ifs); 3214 } 3215 } 3216 break; 3217 case IPFSYNC_NEWIFP : 3218 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3219 if (v != 0 && is->is_v != v) 3220 continue; 3221 /* 3222 * Look up all the interface names in the state entry. 3223 */ 3224 for (i = 0; i < 4; i++) { 3225 if (!strncmp(is->is_ifname[i], name, 3226 sizeof(is->is_ifname[i]))) 3227 is->is_ifp[i] = ifp; 3228 } 3229 } 3230 break; 3231 case IPFSYNC_OLDIFP : 3232 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3233 if (v != 0 && is->is_v != v) 3234 continue; 3235 /* 3236 * Look up all the interface names in the state entry. 3237 */ 3238 for (i = 0; i < 4; i++) { 3239 if (is->is_ifp[i] == ifp) 3240 is->is_ifp[i] = (void *)-1; 3241 } 3242 } 3243 break; 3244 } 3245 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3246 } 3247 3248 3249 #if SOLARIS2 >= 10 3250 /* ------------------------------------------------------------------------ */ 3251 /* Function: fr_stateifindexsync */ 3252 /* Returns: void */ 3253 /* Parameters: ifp - current network interface descriptor (ifindex) */ 3254 /* newifp - new interface descriptor (new ifindex) */ 3255 /* ifs - pointer to IPF stack */ 3256 /* */ 3257 /* Write Locks: assumes ipf_mutex is locked */ 3258 /* */ 3259 /* Updates all interface indeces matching to ifp with new interface index */ 3260 /* value. */ 3261 /* ------------------------------------------------------------------------ */ 3262 void fr_stateifindexsync(ifp, newifp, ifs) 3263 void *ifp; 3264 void *newifp; 3265 ipf_stack_t *ifs; 3266 { 3267 ipstate_t *is; 3268 int i; 3269 3270 WRITE_ENTER(&ifs->ifs_ipf_state); 3271 3272 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) { 3273 3274 for (i = 0; i < 4; i++) { 3275 if (is->is_ifp[i] == ifp) 3276 is->is_ifp[i] = newifp; 3277 } 3278 } 3279 3280 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3281 } 3282 #endif 3283 3284 /* ------------------------------------------------------------------------ */ 3285 /* Function: fr_delstate */ 3286 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3287 /* Parameters: is(I) - pointer to state structure to delete */ 3288 /* why(I) - if not 0, log reason why it was deleted */ 3289 /* ifs - ipf stack instance */ 3290 /* Write Locks: ipf_state/ipf_global */ 3291 /* */ 3292 /* Deletes a state entry from the enumerated list as well as the hash table */ 3293 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3294 /* global counters as required. */ 3295 /* ------------------------------------------------------------------------ */ 3296 int fr_delstate(is, why, ifs) 3297 ipstate_t *is; 3298 int why; 3299 ipf_stack_t *ifs; 3300 { 3301 int removed = 0; 3302 3303 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3304 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3305 3306 /* 3307 * Start by removing the entry from the hash table of state entries 3308 * so it will not be "used" again. 3309 * 3310 * It will remain in the "list" of state entries until all references 3311 * have been accounted for. 3312 */ 3313 if (is->is_phnext != NULL) { 3314 removed = 1; 3315 *is->is_phnext = is->is_hnext; 3316 if (is->is_hnext != NULL) 3317 is->is_hnext->is_phnext = is->is_phnext; 3318 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3319 ifs->ifs_ips_stats.iss_inuse--; 3320 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3321 3322 is->is_phnext = NULL; 3323 is->is_hnext = NULL; 3324 } 3325 3326 /* 3327 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3328 * table that have wildcard flags set, only decerement it once 3329 * and do it here. 3330 */ 3331 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3332 if (!(is->is_flags & SI_CLONED)) { 3333 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3334 } 3335 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3336 } 3337 3338 /* 3339 * Next, remove it from the timeout queue it is in. 3340 */ 3341 fr_deletequeueentry(&is->is_sti); 3342 3343 is->is_me = NULL; 3344 3345 /* 3346 * If it is still in use by something else, do not go any further, 3347 * but note that at this point it is now an orphan. 3348 */ 3349 MUTEX_ENTER(&is->is_lock); 3350 if (is->is_ref > 1) { 3351 is->is_ref--; 3352 MUTEX_EXIT(&is->is_lock); 3353 if (removed) 3354 ifs->ifs_ips_stats.iss_orphans++; 3355 return (is->is_ref); 3356 } 3357 MUTEX_EXIT(&is->is_lock); 3358 3359 is->is_ref = 0; 3360 3361 /* 3362 * If entry has already been removed from table, 3363 * it means we're simply cleaning up an orphan. 3364 */ 3365 if (!removed) 3366 ifs->ifs_ips_stats.iss_orphans--; 3367 3368 if (is->is_tqehead[0] != NULL) 3369 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3370 3371 if (is->is_tqehead[1] != NULL) 3372 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3373 3374 #ifdef IPFILTER_SYNC 3375 if (is->is_sync) 3376 ipfsync_del(is->is_sync); 3377 #endif 3378 #ifdef IPFILTER_SCAN 3379 (void) ipsc_detachis(is); 3380 #endif 3381 3382 /* 3383 * Now remove it from master list of state table entries. 3384 */ 3385 if (is->is_pnext != NULL) { 3386 *is->is_pnext = is->is_next; 3387 if (is->is_next != NULL) { 3388 is->is_next->is_pnext = is->is_pnext; 3389 is->is_next = NULL; 3390 } 3391 is->is_pnext = NULL; 3392 } 3393 3394 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3395 ipstate_log(is, why, ifs); 3396 3397 if (is->is_rule != NULL) { 3398 is->is_rule->fr_statecnt--; 3399 (void)fr_derefrule(&is->is_rule, ifs); 3400 } 3401 3402 MUTEX_DESTROY(&is->is_lock); 3403 KFREE(is); 3404 ifs->ifs_ips_num--; 3405 3406 return (0); 3407 } 3408 3409 3410 /* ------------------------------------------------------------------------ */ 3411 /* Function: fr_timeoutstate */ 3412 /* Returns: Nil */ 3413 /* Parameters: ifs - ipf stack instance */ 3414 /* */ 3415 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3416 /* used here is to keep the queue sorted with the oldest things at the top */ 3417 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3418 /* expired then neither will any under it. */ 3419 /* ------------------------------------------------------------------------ */ 3420 void fr_timeoutstate(ifs) 3421 ipf_stack_t *ifs; 3422 { 3423 ipftq_t *ifq, *ifqnext; 3424 ipftqent_t *tqe, *tqn; 3425 ipstate_t *is; 3426 SPL_INT(s); 3427 3428 SPL_NET(s); 3429 WRITE_ENTER(&ifs->ifs_ipf_state); 3430 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3431 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3432 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3433 break; 3434 tqn = tqe->tqe_next; 3435 is = tqe->tqe_parent; 3436 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3437 } 3438 3439 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3440 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3441 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3442 break; 3443 tqn = tqe->tqe_next; 3444 is = tqe->tqe_parent; 3445 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3446 } 3447 } 3448 3449 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3450 ifqnext = ifq->ifq_next; 3451 3452 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3453 (ifq->ifq_ref == 0)) { 3454 fr_freetimeoutqueue(ifq, ifs); 3455 } 3456 } 3457 3458 if (ifs->ifs_fr_state_doflush) { 3459 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3460 ifs->ifs_fr_state_doflush = 0; 3461 } 3462 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3463 SPL_X(s); 3464 } 3465 3466 3467 /* ---------------------------------------------------------------------- */ 3468 /* Function: fr_state_flush */ 3469 /* Returns: int - 0 == success, -1 == failure */ 3470 /* Parameters: flush_option - how to flush the active State table */ 3471 /* proto - IP version to flush (4, 6, or both) */ 3472 /* ifs - ipf stack instance */ 3473 /* Write Locks: ipf_state */ 3474 /* */ 3475 /* Flush state tables. Three possible flush options currently defined: */ 3476 /* */ 3477 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3478 /* */ 3479 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3480 /* have started to close on both ends using */ 3481 /* ipf_flushclosing(). */ 3482 /* */ 3483 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3484 /* Then, if needed, flush entries with TCP */ 3485 /* connections which have been idle for a long */ 3486 /* time with ipf_extraflush(). */ 3487 /* ---------------------------------------------------------------------- */ 3488 static int fr_state_flush(flush_option, proto, ifs) 3489 int flush_option, proto; 3490 ipf_stack_t *ifs; 3491 { 3492 ipstate_t *is, *isn; 3493 int removed; 3494 SPL_INT(s); 3495 3496 removed = 0; 3497 3498 SPL_NET(s); 3499 switch (flush_option) 3500 { 3501 case FLUSH_TABLE_ALL: 3502 isn = ifs->ifs_ips_list; 3503 while ((is = isn) != NULL) { 3504 isn = is->is_next; 3505 if ((proto != 0) && (is->is_v != proto)) 3506 continue; 3507 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3508 removed++; 3509 } 3510 break; 3511 3512 case FLUSH_TABLE_CLOSING: 3513 removed = ipf_flushclosing(STATE_FLUSH, 3514 IPF_TCPS_CLOSE_WAIT, 3515 ifs->ifs_ips_tqtqb, 3516 ifs->ifs_ips_utqe, 3517 ifs); 3518 break; 3519 3520 case FLUSH_TABLE_EXTRA: 3521 removed = ipf_flushclosing(STATE_FLUSH, 3522 IPF_TCPS_FIN_WAIT_2, 3523 ifs->ifs_ips_tqtqb, 3524 ifs->ifs_ips_utqe, 3525 ifs); 3526 3527 /* 3528 * Be sure we haven't done this in the last 10 seconds. 3529 */ 3530 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3531 IPF_TTLVAL(10)) 3532 break; 3533 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3534 removed += ipf_extraflush(STATE_FLUSH, 3535 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3536 ifs->ifs_ips_utqe, 3537 ifs); 3538 break; 3539 3540 default: /* Flush Nothing */ 3541 break; 3542 } 3543 3544 SPL_X(s); 3545 return (removed); 3546 } 3547 3548 3549 /* ------------------------------------------------------------------------ */ 3550 /* Function: fr_tcp_age */ 3551 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3552 /* Parameters: tq(I) - pointer to timeout queue information */ 3553 /* fin(I) - pointer to packet information */ 3554 /* tqtab(I) - TCP timeout queue table this is in */ 3555 /* flags(I) - flags from state/NAT entry */ 3556 /* */ 3557 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3558 /* */ 3559 /* - (try to) base state transitions on real evidence only, */ 3560 /* i.e. packets that are sent and have been received by ipfilter; */ 3561 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3562 /* */ 3563 /* - deal with half-closed connections correctly; */ 3564 /* */ 3565 /* - store the state of the source in state[0] such that ipfstat */ 3566 /* displays the state as source/dest instead of dest/source; the calls */ 3567 /* to fr_tcp_age have been changed accordingly. */ 3568 /* */ 3569 /* Internal Parameters: */ 3570 /* */ 3571 /* state[0] = state of source (host that initiated connection) */ 3572 /* state[1] = state of dest (host that accepted the connection) */ 3573 /* */ 3574 /* dir == 0 : a packet from source to dest */ 3575 /* dir == 1 : a packet from dest to source */ 3576 /* */ 3577 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3578 /* ------------------------------------------------------------------------ */ 3579 int fr_tcp_age(tqe, fin, tqtab, flags) 3580 ipftqent_t *tqe; 3581 fr_info_t *fin; 3582 ipftq_t *tqtab; 3583 int flags; 3584 { 3585 int dlen, ostate, nstate, rval, dir; 3586 u_char tcpflags; 3587 tcphdr_t *tcp; 3588 ipf_stack_t *ifs = fin->fin_ifs; 3589 3590 tcp = fin->fin_dp; 3591 3592 rval = 0; 3593 dir = fin->fin_rev; 3594 tcpflags = tcp->th_flags; 3595 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3596 3597 ostate = tqe->tqe_state[1 - dir]; 3598 nstate = tqe->tqe_state[dir]; 3599 3600 DTRACE_PROBE4( 3601 indata, 3602 fr_info_t *, fin, 3603 int, ostate, 3604 int, nstate, 3605 u_char, tcpflags 3606 ); 3607 3608 if (tcpflags & TH_RST) { 3609 if (!(tcpflags & TH_PUSH) && !dlen) 3610 nstate = IPF_TCPS_CLOSED; 3611 else 3612 nstate = IPF_TCPS_CLOSE_WAIT; 3613 3614 /* 3615 * Once RST is received, we must advance peer's state to 3616 * CLOSE_WAIT. 3617 */ 3618 if (ostate <= IPF_TCPS_ESTABLISHED) { 3619 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3620 } 3621 rval = 1; 3622 } else { 3623 3624 switch (nstate) 3625 { 3626 case IPF_TCPS_LISTEN: /* 0 */ 3627 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3628 /* 3629 * 'dir' received an S and sends SA in 3630 * response, CLOSED -> SYN_RECEIVED 3631 */ 3632 nstate = IPF_TCPS_SYN_RECEIVED; 3633 rval = 1; 3634 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3635 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3636 nstate = IPF_TCPS_SYN_SENT; 3637 rval = 1; 3638 } 3639 /* 3640 * the next piece of code makes it possible to get 3641 * already established connections into the state table 3642 * after a restart or reload of the filter rules; this 3643 * does not work when a strict 'flags S keep state' is 3644 * used for tcp connections of course 3645 */ 3646 if (((flags & IS_TCPFSM) == 0) && 3647 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3648 /* 3649 * we saw an A, guess 'dir' is in ESTABLISHED 3650 * mode 3651 */ 3652 switch (ostate) 3653 { 3654 case IPF_TCPS_LISTEN : 3655 case IPF_TCPS_SYN_RECEIVED : 3656 nstate = IPF_TCPS_HALF_ESTAB; 3657 rval = 1; 3658 break; 3659 case IPF_TCPS_HALF_ESTAB : 3660 case IPF_TCPS_ESTABLISHED : 3661 nstate = IPF_TCPS_ESTABLISHED; 3662 rval = 1; 3663 break; 3664 default : 3665 break; 3666 } 3667 } 3668 /* 3669 * TODO: besides regular ACK packets we can have other 3670 * packets as well; it is yet to be determined how we 3671 * should initialize the states in those cases 3672 */ 3673 break; 3674 3675 case IPF_TCPS_SYN_SENT: /* 1 */ 3676 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3677 /* 3678 * A retransmitted SYN packet. We do not reset 3679 * the timeout here to fr_tcptimeout because a 3680 * connection connect timeout does not renew 3681 * after every packet that is sent. We need to 3682 * set rval so as to indicate the packet has 3683 * passed the check for its flags being valid 3684 * in the TCP FSM. Setting rval to 2 has the 3685 * result of not resetting the timeout. 3686 */ 3687 rval = 2; 3688 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3689 TH_ACK) { 3690 /* 3691 * we see an A from 'dir' which is in SYN_SENT 3692 * state: 'dir' sent an A in response to an SA 3693 * which it received, SYN_SENT -> ESTABLISHED 3694 */ 3695 nstate = IPF_TCPS_ESTABLISHED; 3696 rval = 1; 3697 } else if (tcpflags & TH_FIN) { 3698 /* 3699 * we see an F from 'dir' which is in SYN_SENT 3700 * state and wants to close its side of the 3701 * connection; SYN_SENT -> FIN_WAIT_1 3702 */ 3703 nstate = IPF_TCPS_FIN_WAIT_1; 3704 rval = 1; 3705 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3706 /* 3707 * we see an SA from 'dir' which is already in 3708 * SYN_SENT state, this means we have a 3709 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3710 */ 3711 nstate = IPF_TCPS_SYN_RECEIVED; 3712 rval = 1; 3713 } 3714 break; 3715 3716 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3717 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3718 /* 3719 * we see an A from 'dir' which was in 3720 * SYN_RECEIVED state so it must now be in 3721 * established state, SYN_RECEIVED -> 3722 * ESTABLISHED 3723 */ 3724 nstate = IPF_TCPS_ESTABLISHED; 3725 rval = 1; 3726 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3727 TH_OPENING) { 3728 /* 3729 * We see an SA from 'dir' which is already in 3730 * SYN_RECEIVED state. 3731 */ 3732 rval = 2; 3733 } else if (tcpflags & TH_FIN) { 3734 /* 3735 * we see an F from 'dir' which is in 3736 * SYN_RECEIVED state and wants to close its 3737 * side of the connection; SYN_RECEIVED -> 3738 * FIN_WAIT_1 3739 */ 3740 nstate = IPF_TCPS_FIN_WAIT_1; 3741 rval = 1; 3742 } 3743 break; 3744 3745 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3746 if (tcpflags & TH_FIN) { 3747 nstate = IPF_TCPS_FIN_WAIT_1; 3748 rval = 1; 3749 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3750 /* 3751 * If we've picked up a connection in mid 3752 * flight, we could be looking at a follow on 3753 * packet from the same direction as the one 3754 * that created this state. Recognise it but 3755 * do not advance the entire connection's 3756 * state. 3757 */ 3758 switch (ostate) 3759 { 3760 case IPF_TCPS_LISTEN : 3761 case IPF_TCPS_SYN_SENT : 3762 case IPF_TCPS_SYN_RECEIVED : 3763 rval = 1; 3764 break; 3765 case IPF_TCPS_HALF_ESTAB : 3766 case IPF_TCPS_ESTABLISHED : 3767 nstate = IPF_TCPS_ESTABLISHED; 3768 rval = 1; 3769 break; 3770 default : 3771 break; 3772 } 3773 } 3774 break; 3775 3776 case IPF_TCPS_ESTABLISHED: /* 4 */ 3777 rval = 1; 3778 if (tcpflags & TH_FIN) { 3779 /* 3780 * 'dir' closed its side of the connection; 3781 * this gives us a half-closed connection; 3782 * ESTABLISHED -> FIN_WAIT_1 3783 */ 3784 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3785 nstate = IPF_TCPS_CLOSING; 3786 } else { 3787 nstate = IPF_TCPS_FIN_WAIT_1; 3788 } 3789 } else if (tcpflags & TH_ACK) { 3790 /* 3791 * an ACK, should we exclude other flags here? 3792 */ 3793 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3794 /* 3795 * We know the other side did an active 3796 * close, so we are ACKing the recvd 3797 * FIN packet (does the window matching 3798 * code guarantee this?) and go into 3799 * CLOSE_WAIT state; this gives us a 3800 * half-closed connection 3801 */ 3802 nstate = IPF_TCPS_CLOSE_WAIT; 3803 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3804 /* 3805 * still a fully established 3806 * connection reset timeout 3807 */ 3808 nstate = IPF_TCPS_ESTABLISHED; 3809 } 3810 } 3811 break; 3812 3813 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3814 rval = 1; 3815 if (tcpflags & TH_FIN) { 3816 /* 3817 * application closed and 'dir' sent a FIN, 3818 * we're now going into LAST_ACK state 3819 */ 3820 nstate = IPF_TCPS_LAST_ACK; 3821 } else { 3822 /* 3823 * we remain in CLOSE_WAIT because the other 3824 * side has closed already and we did not 3825 * close our side yet; reset timeout 3826 */ 3827 nstate = IPF_TCPS_CLOSE_WAIT; 3828 } 3829 break; 3830 3831 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3832 rval = 1; 3833 if ((tcpflags & TH_ACK) && 3834 ostate > IPF_TCPS_CLOSE_WAIT) { 3835 /* 3836 * if the other side is not active anymore 3837 * it has sent us a FIN packet that we are 3838 * ack'ing now with an ACK; this means both 3839 * sides have now closed the connection and 3840 * we go into LAST_ACK 3841 */ 3842 /* 3843 * XXX: how do we know we really are ACKing 3844 * the FIN packet here? does the window code 3845 * guarantee that? 3846 */ 3847 nstate = IPF_TCPS_LAST_ACK; 3848 } else { 3849 /* 3850 * we closed our side of the connection 3851 * already but the other side is still active 3852 * (ESTABLISHED/CLOSE_WAIT); continue with 3853 * this half-closed connection 3854 */ 3855 nstate = IPF_TCPS_FIN_WAIT_1; 3856 } 3857 break; 3858 3859 case IPF_TCPS_CLOSING: /* 7 */ 3860 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3861 nstate = IPF_TCPS_TIME_WAIT; 3862 } 3863 rval = 1; 3864 break; 3865 3866 case IPF_TCPS_LAST_ACK: /* 8 */ 3867 /* 3868 * We want to reset timer here to keep state in table. 3869 * If we would allow the state to time out here, while 3870 * there would still be packets being retransmitted, we 3871 * would cut off line between the two peers preventing 3872 * them to close connection properly. 3873 */ 3874 rval = 1; 3875 break; 3876 3877 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3878 /* NOT USED */ 3879 break; 3880 3881 case IPF_TCPS_TIME_WAIT: /* 10 */ 3882 /* we're in 2MSL timeout now */ 3883 if (ostate == IPF_TCPS_LAST_ACK) { 3884 nstate = IPF_TCPS_CLOSED; 3885 rval = 1; 3886 } else { 3887 rval = 2; 3888 } 3889 break; 3890 3891 case IPF_TCPS_CLOSED: /* 11 */ 3892 rval = 2; 3893 break; 3894 3895 default : 3896 #if defined(_KERNEL) 3897 ASSERT(nstate >= IPF_TCPS_LISTEN && 3898 nstate <= IPF_TCPS_CLOSED); 3899 #else 3900 abort(); 3901 #endif 3902 break; 3903 } 3904 } 3905 3906 /* 3907 * If rval == 2 then do not update the queue position, but treat the 3908 * packet as being ok. 3909 */ 3910 if (rval == 2) { 3911 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3912 rval = 1; 3913 } 3914 else if (rval == 1) { 3915 tqe->tqe_state[dir] = nstate; 3916 /* 3917 * The nstate can either advance to a new state, or remain 3918 * unchanged, resetting the timer by moving to the bottom of 3919 * the queue. 3920 */ 3921 DTRACE_PROBE1(state_done, int, nstate); 3922 3923 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3924 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3925 } 3926 3927 return rval; 3928 } 3929 3930 3931 /* ------------------------------------------------------------------------ */ 3932 /* Function: ipstate_log */ 3933 /* Returns: Nil */ 3934 /* Parameters: is(I) - pointer to state structure */ 3935 /* type(I) - type of log entry to create */ 3936 /* */ 3937 /* Creates a state table log entry using the state structure and type info. */ 3938 /* passed in. Log packet/byte counts, source/destination address and other */ 3939 /* protocol specific information. */ 3940 /* ------------------------------------------------------------------------ */ 3941 void ipstate_log(is, type, ifs) 3942 struct ipstate *is; 3943 u_int type; 3944 ipf_stack_t *ifs; 3945 { 3946 #ifdef IPFILTER_LOG 3947 struct ipslog ipsl; 3948 size_t sizes[1]; 3949 void *items[1]; 3950 int types[1]; 3951 3952 /* 3953 * Copy information out of the ipstate_t structure and into the 3954 * structure used for logging. 3955 */ 3956 ipsl.isl_type = type; 3957 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3958 ipsl.isl_bytes[0] = is->is_bytes[0]; 3959 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3960 ipsl.isl_bytes[1] = is->is_bytes[1]; 3961 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3962 ipsl.isl_bytes[2] = is->is_bytes[2]; 3963 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3964 ipsl.isl_bytes[3] = is->is_bytes[3]; 3965 ipsl.isl_src = is->is_src; 3966 ipsl.isl_dst = is->is_dst; 3967 ipsl.isl_p = is->is_p; 3968 ipsl.isl_v = is->is_v; 3969 ipsl.isl_flags = is->is_flags; 3970 ipsl.isl_tag = is->is_tag; 3971 ipsl.isl_rulen = is->is_rulen; 3972 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3973 3974 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3975 ipsl.isl_sport = is->is_sport; 3976 ipsl.isl_dport = is->is_dport; 3977 if (ipsl.isl_p == IPPROTO_TCP) { 3978 ipsl.isl_state[0] = is->is_state[0]; 3979 ipsl.isl_state[1] = is->is_state[1]; 3980 } 3981 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3982 ipsl.isl_itype = is->is_icmp.ici_type; 3983 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3984 ipsl.isl_itype = is->is_icmp.ici_type; 3985 } else { 3986 ipsl.isl_ps.isl_filler[0] = 0; 3987 ipsl.isl_ps.isl_filler[1] = 0; 3988 } 3989 3990 items[0] = &ipsl; 3991 sizes[0] = sizeof(ipsl); 3992 types[0] = 0; 3993 3994 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3995 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3996 } else { 3997 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3998 } 3999 #endif 4000 } 4001 4002 4003 #ifdef USE_INET6 4004 /* ------------------------------------------------------------------------ */ 4005 /* Function: fr_checkicmp6matchingstate */ 4006 /* Returns: ipstate_t* - NULL == no match found, */ 4007 /* else pointer to matching state entry */ 4008 /* Parameters: fin(I) - pointer to packet information */ 4009 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 4010 /* */ 4011 /* If we've got an ICMPv6 error message, using the information stored in */ 4012 /* the ICMPv6 packet, look for a matching state table entry. */ 4013 /* ------------------------------------------------------------------------ */ 4014 static ipstate_t *fr_checkicmp6matchingstate(fin) 4015 fr_info_t *fin; 4016 { 4017 struct icmp6_hdr *ic6, *oic; 4018 int backward, i; 4019 ipstate_t *is, **isp; 4020 u_short sport, dport; 4021 i6addr_t dst, src; 4022 u_short savelen; 4023 icmpinfo_t *ic; 4024 fr_info_t ofin; 4025 tcphdr_t *tcp; 4026 ip6_t *oip6; 4027 u_char pr; 4028 u_int hv; 4029 ipf_stack_t *ifs = fin->fin_ifs; 4030 4031 /* 4032 * Does it at least have the return (basic) IP header ? 4033 * Is it an actual recognised ICMP error type? 4034 * Only a basic IP header (no options) should be with 4035 * an ICMP error header. 4036 */ 4037 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 4038 !(fin->fin_flx & FI_ICMPERR)) 4039 return NULL; 4040 4041 ic6 = fin->fin_dp; 4042 4043 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 4044 if (fin->fin_plen < sizeof(*oip6)) 4045 return NULL; 4046 4047 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 4048 ofin.fin_v = 6; 4049 ofin.fin_ifp = fin->fin_ifp; 4050 ofin.fin_out = !fin->fin_out; 4051 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 4052 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 4053 4054 /* 4055 * We make a fin entry to be able to feed it to 4056 * matchsrcdst. Note that not all fields are necessary 4057 * but this is the cleanest way. Note further we fill 4058 * in fin_mp such that if someone uses it we'll get 4059 * a kernel panic. fr_matchsrcdst does not use this. 4060 * 4061 * watch out here, as ip is in host order and oip6 in network 4062 * order. Any change we make must be undone afterwards. 4063 */ 4064 savelen = oip6->ip6_plen; 4065 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4066 ofin.fin_flx = FI_NOCKSUM; 4067 ofin.fin_ip = (ip_t *)oip6; 4068 ofin.fin_plen = oip6->ip6_plen; 4069 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4070 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4071 oip6->ip6_plen = savelen; 4072 4073 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4074 oic = (struct icmp6_hdr *)(oip6 + 1); 4075 /* 4076 * an ICMP error can only be generated as a result of an 4077 * ICMP query, not as the response on an ICMP error 4078 * 4079 * XXX theoretically ICMP_ECHOREP and the other reply's are 4080 * ICMP query's as well, but adding them here seems strange XXX 4081 */ 4082 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4083 return NULL; 4084 4085 /* 4086 * perform a lookup of the ICMP packet in the state table 4087 */ 4088 hv = (pr = oip6->ip6_nxt); 4089 src.in6 = oip6->ip6_src; 4090 hv += src.in4.s_addr; 4091 dst.in6 = oip6->ip6_dst; 4092 hv += dst.in4.s_addr; 4093 hv += oic->icmp6_id; 4094 hv += oic->icmp6_seq; 4095 hv = DOUBLE_HASH(hv, ifs); 4096 4097 READ_ENTER(&ifs->ifs_ipf_state); 4098 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4099 ic = &is->is_icmp; 4100 isp = &is->is_hnext; 4101 if ((is->is_p == pr) && 4102 !(is->is_pass & FR_NOICMPERR) && 4103 (oic->icmp6_id == ic->ici_id) && 4104 (oic->icmp6_seq == ic->ici_seq) && 4105 (is = fr_matchsrcdst(&ofin, is, &src, 4106 &dst, NULL, FI_ICMPCMP))) { 4107 /* 4108 * in the state table ICMP query's are stored 4109 * with the type of the corresponding ICMP 4110 * response. Correct here 4111 */ 4112 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4113 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4114 (ic->ici_type - 1 == oic->icmp6_type )) { 4115 ifs->ifs_ips_stats.iss_hits++; 4116 backward = IP6_NEQ(&is->is_dst, &src); 4117 fin->fin_rev = !backward; 4118 i = (backward << 1) + fin->fin_out; 4119 is->is_icmppkts[i]++; 4120 return is; 4121 } 4122 } 4123 } 4124 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4125 return NULL; 4126 } 4127 4128 hv = (pr = oip6->ip6_nxt); 4129 src.in6 = oip6->ip6_src; 4130 hv += src.i6[0]; 4131 hv += src.i6[1]; 4132 hv += src.i6[2]; 4133 hv += src.i6[3]; 4134 dst.in6 = oip6->ip6_dst; 4135 hv += dst.i6[0]; 4136 hv += dst.i6[1]; 4137 hv += dst.i6[2]; 4138 hv += dst.i6[3]; 4139 4140 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4141 tcp = (tcphdr_t *)(oip6 + 1); 4142 dport = tcp->th_dport; 4143 sport = tcp->th_sport; 4144 hv += dport; 4145 hv += sport; 4146 } else 4147 tcp = NULL; 4148 hv = DOUBLE_HASH(hv, ifs); 4149 4150 READ_ENTER(&ifs->ifs_ipf_state); 4151 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4152 isp = &is->is_hnext; 4153 /* 4154 * Only allow this icmp though if the 4155 * encapsulated packet was allowed through the 4156 * other way around. Note that the minimal amount 4157 * of info present does not allow for checking against 4158 * tcp internals such as seq and ack numbers. 4159 */ 4160 if ((is->is_p != pr) || (is->is_v != 6) || 4161 (is->is_pass & FR_NOICMPERR)) 4162 continue; 4163 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4164 if (is != NULL) { 4165 ifs->ifs_ips_stats.iss_hits++; 4166 backward = IP6_NEQ(&is->is_dst, &src); 4167 fin->fin_rev = !backward; 4168 i = (backward << 1) + fin->fin_out; 4169 is->is_icmppkts[i]++; 4170 /* 4171 * we deliberately do not touch the timeouts 4172 * for the accompanying state table entry. 4173 * It remains to be seen if that is correct. XXX 4174 */ 4175 return is; 4176 } 4177 } 4178 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4179 return NULL; 4180 } 4181 #endif 4182 4183 4184 /* ------------------------------------------------------------------------ */ 4185 /* Function: fr_sttab_init */ 4186 /* Returns: Nil */ 4187 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4188 /* */ 4189 /* Initialise the array of timeout queues for TCP. */ 4190 /* ------------------------------------------------------------------------ */ 4191 void fr_sttab_init(tqp, ifs) 4192 ipftq_t *tqp; 4193 ipf_stack_t *ifs; 4194 { 4195 int i; 4196 4197 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4198 tqp[i].ifq_ttl = 0; 4199 tqp[i].ifq_ref = 1; 4200 tqp[i].ifq_head = NULL; 4201 tqp[i].ifq_tail = &tqp[i].ifq_head; 4202 tqp[i].ifq_next = tqp + i + 1; 4203 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4204 } 4205 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4206 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4207 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4208 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4209 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4210 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4211 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4212 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4213 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4214 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4215 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4216 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4217 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4218 } 4219 4220 4221 /* ------------------------------------------------------------------------ */ 4222 /* Function: fr_sttab_destroy */ 4223 /* Returns: Nil */ 4224 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4225 /* */ 4226 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4227 /* of timeout queues for TCP. */ 4228 /* ------------------------------------------------------------------------ */ 4229 void fr_sttab_destroy(tqp) 4230 ipftq_t *tqp; 4231 { 4232 int i; 4233 4234 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4235 MUTEX_DESTROY(&tqp[i].ifq_lock); 4236 } 4237 4238 4239 /* ------------------------------------------------------------------------ */ 4240 /* Function: fr_statederef */ 4241 /* Returns: Nil */ 4242 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4243 /* ifs - ipf stack instance */ 4244 /* */ 4245 /* Decrement the reference counter for this state table entry and free it */ 4246 /* if there are no more things using it. */ 4247 /* */ 4248 /* Internal parameters: */ 4249 /* state[0] = state of source (host that initiated connection) */ 4250 /* state[1] = state of dest (host that accepted the connection) */ 4251 /* ------------------------------------------------------------------------ */ 4252 void fr_statederef(isp, ifs) 4253 ipstate_t **isp; 4254 ipf_stack_t *ifs; 4255 { 4256 ipstate_t *is; 4257 4258 is = *isp; 4259 *isp = NULL; 4260 4261 MUTEX_ENTER(&is->is_lock); 4262 if (is->is_ref > 1) { 4263 is->is_ref--; 4264 MUTEX_EXIT(&is->is_lock); 4265 #ifndef _KERNEL 4266 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4267 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4268 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4269 } 4270 #endif 4271 return; 4272 } 4273 MUTEX_EXIT(&is->is_lock); 4274 4275 WRITE_ENTER(&ifs->ifs_ipf_state); 4276 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4277 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4278 } 4279 4280 4281 /* ------------------------------------------------------------------------ */ 4282 /* Function: fr_setstatequeue */ 4283 /* Returns: Nil */ 4284 /* Parameters: is(I) - pointer to state structure */ 4285 /* rev(I) - forward(0) or reverse(1) direction */ 4286 /* Locks: ipf_state (read or write) */ 4287 /* */ 4288 /* Put the state entry on its default queue entry, using rev as a helped in */ 4289 /* determining which queue it should be placed on. */ 4290 /* ------------------------------------------------------------------------ */ 4291 void fr_setstatequeue(is, rev, ifs) 4292 ipstate_t *is; 4293 int rev; 4294 ipf_stack_t *ifs; 4295 { 4296 ipftq_t *oifq, *nifq; 4297 4298 4299 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4300 nifq = is->is_tqehead[rev]; 4301 else 4302 nifq = NULL; 4303 4304 if (nifq == NULL) { 4305 switch (is->is_p) 4306 { 4307 #ifdef USE_INET6 4308 case IPPROTO_ICMPV6 : 4309 if (rev == 1) 4310 nifq = &ifs->ifs_ips_icmpacktq; 4311 else 4312 nifq = &ifs->ifs_ips_icmptq; 4313 break; 4314 #endif 4315 case IPPROTO_ICMP : 4316 if (rev == 1) 4317 nifq = &ifs->ifs_ips_icmpacktq; 4318 else 4319 nifq = &ifs->ifs_ips_icmptq; 4320 break; 4321 case IPPROTO_TCP : 4322 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4323 break; 4324 4325 case IPPROTO_UDP : 4326 if (rev == 1) 4327 nifq = &ifs->ifs_ips_udpacktq; 4328 else 4329 nifq = &ifs->ifs_ips_udptq; 4330 break; 4331 4332 default : 4333 nifq = &ifs->ifs_ips_iptq; 4334 break; 4335 } 4336 } 4337 4338 oifq = is->is_sti.tqe_ifq; 4339 /* 4340 * If it's currently on a timeout queue, move it from one queue to 4341 * another, else put it on the end of the newly determined queue. 4342 */ 4343 if (oifq != NULL) 4344 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4345 else 4346 fr_queueappend(&is->is_sti, nifq, is, ifs); 4347 return; 4348 } 4349 4350 4351 /* ------------------------------------------------------------------------ */ 4352 /* Function: fr_stateiter */ 4353 /* Returns: int - 0 == success, else error */ 4354 /* Parameters: token(I) - pointer to ipftoken structure */ 4355 /* itp(I) - pointer to ipfgeniter structure */ 4356 /* */ 4357 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4358 /* walks through the list of entries in the state table list (ips_list.) */ 4359 /* ------------------------------------------------------------------------ */ 4360 static int fr_stateiter(token, itp, ifs) 4361 ipftoken_t *token; 4362 ipfgeniter_t *itp; 4363 ipf_stack_t *ifs; 4364 { 4365 ipstate_t *is, *next, zero; 4366 int error, count; 4367 char *dst; 4368 4369 if (itp->igi_data == NULL) 4370 return EFAULT; 4371 4372 if (itp->igi_nitems == 0) 4373 return EINVAL; 4374 4375 if (itp->igi_type != IPFGENITER_STATE) 4376 return EINVAL; 4377 4378 error = 0; 4379 4380 READ_ENTER(&ifs->ifs_ipf_state); 4381 4382 /* 4383 * Get "previous" entry from the token and find the next entry. 4384 */ 4385 is = token->ipt_data; 4386 if (is == NULL) { 4387 next = ifs->ifs_ips_list; 4388 } else { 4389 next = is->is_next; 4390 } 4391 4392 dst = itp->igi_data; 4393 for (count = itp->igi_nitems; count > 0; count--) { 4394 /* 4395 * If we found an entry, add a reference to it and update the token. 4396 * Otherwise, zero out data to be returned and NULL out token. 4397 */ 4398 if (next != NULL) { 4399 MUTEX_ENTER(&next->is_lock); 4400 next->is_ref++; 4401 MUTEX_EXIT(&next->is_lock); 4402 token->ipt_data = next; 4403 } else { 4404 bzero(&zero, sizeof(zero)); 4405 next = &zero; 4406 token->ipt_data = NULL; 4407 } 4408 4409 /* 4410 * Safe to release lock now the we have a reference. 4411 */ 4412 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4413 4414 /* 4415 * Copy out data and clean up references and tokens. 4416 */ 4417 error = COPYOUT(next, dst, sizeof(*next)); 4418 if (error != 0) 4419 error = EFAULT; 4420 if (token->ipt_data == NULL) { 4421 ipf_freetoken(token, ifs); 4422 break; 4423 } else { 4424 if (is != NULL) 4425 fr_statederef(&is, ifs); 4426 if (next->is_next == NULL) { 4427 ipf_freetoken(token, ifs); 4428 break; 4429 } 4430 } 4431 4432 if ((count == 1) || (error != 0)) 4433 break; 4434 4435 READ_ENTER(&ifs->ifs_ipf_state); 4436 dst += sizeof(*next); 4437 is = next; 4438 next = is->is_next; 4439 } 4440 4441 return error; 4442 } 4443