1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 * 8 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 9 */ 10 11 #if defined(KERNEL) || defined(_KERNEL) 12 # undef KERNEL 13 # undef _KERNEL 14 # define KERNEL 1 15 # define _KERNEL 1 16 #endif 17 #include <sys/errno.h> 18 #include <sys/types.h> 19 #include <sys/param.h> 20 #include <sys/file.h> 21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 22 defined(_KERNEL) 23 # include "opt_ipfilter_log.h" 24 #endif 25 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 26 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 27 #include "opt_inet6.h" 28 #endif 29 #if !defined(_KERNEL) && !defined(__KERNEL__) 30 # include <stdio.h> 31 # include <stdlib.h> 32 # include <string.h> 33 # define _KERNEL 34 # ifdef __OpenBSD__ 35 struct file; 36 # endif 37 # include <sys/uio.h> 38 # undef _KERNEL 39 #endif 40 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 41 # include <sys/filio.h> 42 # include <sys/fcntl.h> 43 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 44 # include "opt_ipfilter.h" 45 # endif 46 #else 47 # include <sys/ioctl.h> 48 #endif 49 #include <sys/time.h> 50 #if !defined(linux) 51 # include <sys/protosw.h> 52 #endif 53 #include <sys/socket.h> 54 #if defined(_KERNEL) 55 # include <sys/systm.h> 56 # if !defined(__SVR4) && !defined(__svr4__) 57 # include <sys/mbuf.h> 58 # endif 59 #endif 60 #if defined(__SVR4) || defined(__svr4__) 61 # include <sys/filio.h> 62 # include <sys/byteorder.h> 63 # ifdef _KERNEL 64 # include <sys/dditypes.h> 65 # endif 66 # include <sys/stream.h> 67 # include <sys/kmem.h> 68 #endif 69 70 #include <net/if.h> 71 #ifdef sun 72 # include <net/af.h> 73 #endif 74 #include <net/route.h> 75 #include <netinet/in.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/tcp.h> 79 #if !defined(linux) 80 # include <netinet/ip_var.h> 81 #endif 82 #if !defined(__hpux) && !defined(linux) 83 # include <netinet/tcp_fsm.h> 84 #endif 85 #include <netinet/udp.h> 86 #include <netinet/ip_icmp.h> 87 #include "netinet/ip_compat.h" 88 #include <netinet/tcpip.h> 89 #include "netinet/ip_fil.h" 90 #include "netinet/ip_nat.h" 91 #include "netinet/ip_frag.h" 92 #include "netinet/ip_state.h" 93 #include "netinet/ip_proxy.h" 94 #include "netinet/ipf_stack.h" 95 #ifdef IPFILTER_SYNC 96 #include "netinet/ip_sync.h" 97 #endif 98 #ifdef IPFILTER_SCAN 99 #include "netinet/ip_scan.h" 100 #endif 101 #ifdef USE_INET6 102 #include <netinet/icmp6.h> 103 #endif 104 #if (__FreeBSD_version >= 300000) 105 # include <sys/malloc.h> 106 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 107 # include <sys/libkern.h> 108 # include <sys/systm.h> 109 # endif 110 #endif 111 /* END OF INCLUDES */ 112 113 114 #if !defined(lint) 115 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 116 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 117 #endif 118 119 #ifdef USE_INET6 120 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 121 #endif 122 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 123 i6addr_t *, tcphdr_t *, u_32_t)); 124 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 125 static int fr_state_flush __P((int, int, ipf_stack_t *)); 126 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 127 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 128 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 129 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 131 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 132 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 134 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 135 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 136 137 int fr_stputent __P((caddr_t, ipf_stack_t *)); 138 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 139 140 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 141 #define FIVE_DAYS (5 * ONE_DAY) 142 #define DOUBLE_HASH(x, ifs) \ 143 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 144 145 146 /* ------------------------------------------------------------------------ */ 147 /* Function: fr_stateinit */ 148 /* Returns: int - 0 == success, -1 == failure */ 149 /* Parameters: ifs - ipf stack instance */ 150 /* */ 151 /* Initialise all the global variables used within the state code. */ 152 /* This action also includes initiailising locks. */ 153 /* ------------------------------------------------------------------------ */ 154 int fr_stateinit(ifs) 155 ipf_stack_t *ifs; 156 { 157 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 158 struct timeval tv; 159 #endif 160 int i; 161 162 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 163 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 164 if (ifs->ifs_ips_table == NULL) 165 return -1; 166 bzero((char *)ifs->ifs_ips_table, 167 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 168 169 KMALLOCS(ifs->ifs_ips_seed, u_long *, 170 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 171 if (ifs->ifs_ips_seed == NULL) 172 return -2; 173 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 174 tv.tv_sec = 0; 175 GETKTIME(&tv); 176 #endif 177 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 178 /* 179 * XXX - ips_seed[X] should be a random number of sorts. 180 */ 181 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 182 ifs->ifs_ips_seed[i] = ipf_random(); 183 #else 184 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 185 ifs->ifs_fr_statesize; 186 ifs->ifs_ips_seed[i] += tv.tv_sec; 187 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 188 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 189 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 190 #endif 191 } 192 193 /* fill icmp reply type table */ 194 for (i = 0; i <= ICMP_MAXTYPE; i++) 195 icmpreplytype4[i] = -1; 196 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 197 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 198 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 199 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 200 #ifdef USE_INET6 201 /* fill icmp reply type table */ 202 for (i = 0; i <= ICMP6_MAXTYPE; i++) 203 icmpreplytype6[i] = -1; 204 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 205 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 206 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 207 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 208 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 209 #endif 210 211 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 212 ifs->ifs_fr_statesize * sizeof(u_long)); 213 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 214 return -1; 215 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 216 ifs->ifs_fr_statesize * sizeof(u_long)); 217 218 if (ifs->ifs_fr_state_maxbucket == 0) { 219 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 220 ifs->ifs_fr_state_maxbucket++; 221 ifs->ifs_fr_state_maxbucket *= 2; 222 } 223 224 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 225 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 226 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 227 ifs->ifs_ips_udptq.ifq_ref = 1; 228 ifs->ifs_ips_udptq.ifq_head = NULL; 229 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 230 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 231 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 232 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 233 ifs->ifs_ips_udpacktq.ifq_ref = 1; 234 ifs->ifs_ips_udpacktq.ifq_head = NULL; 235 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 236 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 237 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 238 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 239 ifs->ifs_ips_icmptq.ifq_ref = 1; 240 ifs->ifs_ips_icmptq.ifq_head = NULL; 241 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 242 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 243 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 244 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 245 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 246 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 247 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 248 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 249 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 250 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 251 ifs->ifs_ips_iptq.ifq_ref = 1; 252 ifs->ifs_ips_iptq.ifq_head = NULL; 253 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 254 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 255 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 256 /* entry's ttl in deletetq is just 1 tick */ 257 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 258 ifs->ifs_ips_deletetq.ifq_ref = 1; 259 ifs->ifs_ips_deletetq.ifq_head = NULL; 260 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 261 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 262 ifs->ifs_ips_deletetq.ifq_next = NULL; 263 264 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 265 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 266 ifs->ifs_fr_state_init = 1; 267 268 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 269 return 0; 270 } 271 272 273 /* ------------------------------------------------------------------------ */ 274 /* Function: fr_stateunload */ 275 /* Returns: Nil */ 276 /* Parameters: ifs - ipf stack instance */ 277 /* */ 278 /* Release and destroy any resources acquired or initialised so that */ 279 /* IPFilter can be unloaded or re-initialised. */ 280 /* ------------------------------------------------------------------------ */ 281 void fr_stateunload(ifs) 282 ipf_stack_t *ifs; 283 { 284 ipftq_t *ifq, *ifqnext; 285 ipstate_t *is; 286 287 while ((is = ifs->ifs_ips_list) != NULL) 288 (void) fr_delstate(is, 0, ifs); 289 290 /* 291 * Proxy timeout queues are not cleaned here because although they 292 * exist on the state list, appr_unload is called after fr_stateunload 293 * and the proxies actually are responsible for them being created. 294 * Should the proxy timeouts have their own list? There's no real 295 * justification as this is the only complicationA 296 */ 297 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 298 ifqnext = ifq->ifq_next; 299 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 300 (fr_deletetimeoutqueue(ifq) == 0)) 301 fr_freetimeoutqueue(ifq, ifs); 302 } 303 304 ifs->ifs_ips_stats.iss_inuse = 0; 305 ifs->ifs_ips_num = 0; 306 307 if (ifs->ifs_fr_state_init == 1) { 308 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 309 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 314 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 315 } 316 317 if (ifs->ifs_ips_table != NULL) { 318 KFREES(ifs->ifs_ips_table, 319 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 320 ifs->ifs_ips_table = NULL; 321 } 322 323 if (ifs->ifs_ips_seed != NULL) { 324 KFREES(ifs->ifs_ips_seed, 325 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 326 ifs->ifs_ips_seed = NULL; 327 } 328 329 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 330 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 331 ifs->ifs_fr_statesize * sizeof(u_long)); 332 ifs->ifs_ips_stats.iss_bucketlen = NULL; 333 } 334 335 if (ifs->ifs_fr_state_maxbucket_reset == 1) 336 ifs->ifs_fr_state_maxbucket = 0; 337 338 if (ifs->ifs_fr_state_init == 1) { 339 ifs->ifs_fr_state_init = 0; 340 RW_DESTROY(&ifs->ifs_ipf_state); 341 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 342 } 343 } 344 345 346 /* ------------------------------------------------------------------------ */ 347 /* Function: fr_statetstats */ 348 /* Returns: ips_state_t* - pointer to state stats structure */ 349 /* Parameters: Nil */ 350 /* */ 351 /* Put all the current numbers and pointers into a single struct and return */ 352 /* a pointer to it. */ 353 /* ------------------------------------------------------------------------ */ 354 static ips_stat_t *fr_statetstats(ifs) 355 ipf_stack_t *ifs; 356 { 357 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 358 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 359 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 360 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 361 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 362 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 363 return &ifs->ifs_ips_stats; 364 } 365 366 /* ------------------------------------------------------------------------ */ 367 /* Function: fr_state_remove */ 368 /* Returns: int - 0 == success, != 0 == failure */ 369 /* Parameters: data(I) - pointer to state structure to delete from table */ 370 /* ifs - ipf stack instance */ 371 /* */ 372 /* Search for a state structure that matches the one passed, according to */ 373 /* the IP addresses and other protocol specific information. */ 374 /* ------------------------------------------------------------------------ */ 375 static int fr_state_remove(data, ifs) 376 caddr_t data; 377 ipf_stack_t *ifs; 378 { 379 ipstate_t *sp, st; 380 int error; 381 382 sp = &st; 383 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 384 if (error) 385 return EFAULT; 386 387 WRITE_ENTER(&ifs->ifs_ipf_state); 388 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 389 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 390 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 391 sizeof(st.is_src)) && 392 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 393 sizeof(st.is_dst)) && 394 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 395 sizeof(st.is_ps))) { 396 (void) fr_delstate(sp, ISL_REMOVE, ifs); 397 RWLOCK_EXIT(&ifs->ifs_ipf_state); 398 return 0; 399 } 400 RWLOCK_EXIT(&ifs->ifs_ipf_state); 401 return ESRCH; 402 } 403 404 405 /* ------------------------------------------------------------------------ */ 406 /* Function: fr_state_ioctl */ 407 /* Returns: int - 0 == success, != 0 == failure */ 408 /* Parameters: data(I) - pointer to ioctl data */ 409 /* cmd(I) - ioctl command integer */ 410 /* mode(I) - file mode bits used with open */ 411 /* uid(I) - uid of caller */ 412 /* ctx(I) - pointer to give the uid context */ 413 /* ifs - ipf stack instance */ 414 /* */ 415 /* Processes an ioctl call made to operate on the IP Filter state device. */ 416 /* ------------------------------------------------------------------------ */ 417 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 418 caddr_t data; 419 ioctlcmd_t cmd; 420 int mode, uid; 421 void *ctx; 422 ipf_stack_t *ifs; 423 { 424 int arg, ret, error = 0; 425 426 switch (cmd) 427 { 428 /* 429 * Delete an entry from the state table. 430 */ 431 case SIOCDELST : 432 error = fr_state_remove(data, ifs); 433 break; 434 /* 435 * Flush the state table 436 */ 437 case SIOCIPFFL : 438 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 439 if (error != 0) { 440 error = EFAULT; 441 } else { 442 if (VALID_TABLE_FLUSH_OPT(arg)) { 443 WRITE_ENTER(&ifs->ifs_ipf_state); 444 ret = fr_state_flush(arg, 4, ifs); 445 RWLOCK_EXIT(&ifs->ifs_ipf_state); 446 error = BCOPYOUT((char *)&ret, data, 447 sizeof(ret)); 448 if (error != 0) 449 return EFAULT; 450 } else { 451 error = EINVAL; 452 } 453 } 454 break; 455 456 #ifdef USE_INET6 457 case SIOCIPFL6 : 458 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 459 if (error != 0) { 460 error = EFAULT; 461 } else { 462 if (VALID_TABLE_FLUSH_OPT(arg)) { 463 WRITE_ENTER(&ifs->ifs_ipf_state); 464 ret = fr_state_flush(arg, 6, ifs); 465 RWLOCK_EXIT(&ifs->ifs_ipf_state); 466 error = BCOPYOUT((char *)&ret, data, 467 sizeof(ret)); 468 if (error != 0) 469 return EFAULT; 470 } else { 471 error = EINVAL; 472 } 473 } 474 break; 475 #endif 476 #ifdef IPFILTER_LOG 477 /* 478 * Flush the state log. 479 */ 480 case SIOCIPFFB : 481 if (!(mode & FWRITE)) 482 error = EPERM; 483 else { 484 int tmp; 485 486 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 487 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 488 if (error != 0) 489 error = EFAULT; 490 } 491 break; 492 /* 493 * Turn logging of state information on/off. 494 */ 495 case SIOCSETLG : 496 if (!(mode & FWRITE)) { 497 error = EPERM; 498 } else { 499 error = BCOPYIN((char *)data, 500 (char *)&ifs->ifs_ipstate_logging, 501 sizeof(ifs->ifs_ipstate_logging)); 502 if (error != 0) 503 error = EFAULT; 504 } 505 break; 506 /* 507 * Return the current state of logging. 508 */ 509 case SIOCGETLG : 510 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 511 (char *)data, 512 sizeof(ifs->ifs_ipstate_logging)); 513 if (error != 0) 514 error = EFAULT; 515 break; 516 /* 517 * Return the number of bytes currently waiting to be read. 518 */ 519 case FIONREAD : 520 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 521 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 522 if (error != 0) 523 error = EFAULT; 524 break; 525 #endif 526 /* 527 * Get the current state statistics. 528 */ 529 case SIOCGETFS : 530 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 531 break; 532 /* 533 * Lock/Unlock the state table. (Locking prevents any changes, which 534 * means no packets match). 535 */ 536 case SIOCSTLCK : 537 if (!(mode & FWRITE)) { 538 error = EPERM; 539 } else { 540 error = fr_lock(data, &ifs->ifs_fr_state_lock); 541 } 542 break; 543 /* 544 * Add an entry to the current state table. 545 */ 546 case SIOCSTPUT : 547 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 548 error = EACCES; 549 break; 550 } 551 error = fr_stputent(data, ifs); 552 break; 553 /* 554 * Get a state table entry. 555 */ 556 case SIOCSTGET : 557 if (!ifs->ifs_fr_state_lock) { 558 error = EACCES; 559 break; 560 } 561 error = fr_stgetent(data, ifs); 562 break; 563 564 case SIOCGENITER : 565 { 566 ipftoken_t *token; 567 ipfgeniter_t iter; 568 569 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 570 if (error != 0) 571 break; 572 573 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 574 if (token != NULL) 575 error = fr_stateiter(token, &iter, ifs); 576 else 577 error = ESRCH; 578 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 579 break; 580 } 581 582 case SIOCIPFDELTOK : 583 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 584 if (error != 0) { 585 error = EFAULT; 586 } else { 587 error = ipf_deltoken(arg, uid, ctx, ifs); 588 } 589 break; 590 591 default : 592 error = EINVAL; 593 break; 594 } 595 return error; 596 } 597 598 599 /* ------------------------------------------------------------------------ */ 600 /* Function: fr_stgetent */ 601 /* Returns: int - 0 == success, != 0 == failure */ 602 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 603 /* */ 604 /* Copy out state information from the kernel to a user space process. If */ 605 /* there is a filter rule associated with the state entry, copy that out */ 606 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 607 /* the struct passed in and if not null and not found in the list of current*/ 608 /* state entries, the retrieval fails. */ 609 /* ------------------------------------------------------------------------ */ 610 int fr_stgetent(data, ifs) 611 caddr_t data; 612 ipf_stack_t *ifs; 613 { 614 ipstate_t *is, *isn; 615 ipstate_save_t ips; 616 int error; 617 618 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 619 if (error) 620 return EFAULT; 621 622 isn = ips.ips_next; 623 if (isn == NULL) { 624 isn = ifs->ifs_ips_list; 625 if (isn == NULL) { 626 if (ips.ips_next == NULL) 627 return ENOENT; 628 return 0; 629 } 630 } else { 631 /* 632 * Make sure the pointer we're copying from exists in the 633 * current list of entries. Security precaution to prevent 634 * copying of random kernel data. 635 */ 636 for (is = ifs->ifs_ips_list; is; is = is->is_next) 637 if (is == isn) 638 break; 639 if (!is) 640 return ESRCH; 641 } 642 ips.ips_next = isn->is_next; 643 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 644 ips.ips_rule = isn->is_rule; 645 if (isn->is_rule != NULL) 646 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 647 sizeof(ips.ips_fr)); 648 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 649 if (error) 650 return EFAULT; 651 return 0; 652 } 653 654 655 /* ------------------------------------------------------------------------ */ 656 /* Function: fr_stputent */ 657 /* Returns: int - 0 == success, != 0 == failure */ 658 /* Parameters: data(I) - pointer to state information struct */ 659 /* ifs - ipf stack instance */ 660 /* */ 661 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 662 /* the state table. If the state info. includes a pointer to a filter rule */ 663 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 664 /* output. */ 665 /* ------------------------------------------------------------------------ */ 666 int fr_stputent(data, ifs) 667 caddr_t data; 668 ipf_stack_t *ifs; 669 { 670 ipstate_t *is, *isn; 671 ipstate_save_t ips; 672 int error, i; 673 frentry_t *fr; 674 char *name; 675 676 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 677 if (error) 678 return EFAULT; 679 680 /* 681 * Trigger automatic call to fr_state_flush() if the 682 * table has reached capacity specified by hi watermark. 683 */ 684 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 685 ifs->ifs_fr_state_doflush = 1; 686 687 /* 688 * If automatic flushing did not do its job, and the table 689 * has filled up, don't try to create a new entry. 690 */ 691 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 692 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 693 return ENOMEM; 694 } 695 696 KMALLOC(isn, ipstate_t *); 697 if (isn == NULL) 698 return ENOMEM; 699 700 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 701 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 702 isn->is_sti.tqe_pnext = NULL; 703 isn->is_sti.tqe_next = NULL; 704 isn->is_sti.tqe_ifq = NULL; 705 isn->is_sti.tqe_parent = isn; 706 isn->is_ifp[0] = NULL; 707 isn->is_ifp[1] = NULL; 708 isn->is_ifp[2] = NULL; 709 isn->is_ifp[3] = NULL; 710 isn->is_sync = NULL; 711 fr = ips.ips_rule; 712 713 if (fr == NULL) { 714 READ_ENTER(&ifs->ifs_ipf_state); 715 fr_stinsert(isn, 0, ifs); 716 MUTEX_EXIT(&isn->is_lock); 717 RWLOCK_EXIT(&ifs->ifs_ipf_state); 718 return 0; 719 } 720 721 if (isn->is_flags & SI_NEWFR) { 722 KMALLOC(fr, frentry_t *); 723 if (fr == NULL) { 724 KFREE(isn); 725 return ENOMEM; 726 } 727 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 728 isn->is_rule = fr; 729 ips.ips_is.is_rule = fr; 730 MUTEX_NUKE(&fr->fr_lock); 731 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 732 733 /* 734 * Look up all the interface names in the rule. 735 */ 736 for (i = 0; i < 4; i++) { 737 name = fr->fr_ifnames[i]; 738 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 739 name = isn->is_ifname[i]; 740 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 741 } 742 743 fr->fr_ref = 0; 744 fr->fr_dsize = 0; 745 fr->fr_data = NULL; 746 fr->fr_type = FR_T_NONE; 747 748 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 749 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 750 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 751 752 /* 753 * send a copy back to userland of what we ended up 754 * to allow for verification. 755 */ 756 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 757 if (error) { 758 KFREE(isn); 759 MUTEX_DESTROY(&fr->fr_lock); 760 KFREE(fr); 761 return EFAULT; 762 } 763 READ_ENTER(&ifs->ifs_ipf_state); 764 fr_stinsert(isn, 0, ifs); 765 MUTEX_EXIT(&isn->is_lock); 766 RWLOCK_EXIT(&ifs->ifs_ipf_state); 767 768 } else { 769 READ_ENTER(&ifs->ifs_ipf_state); 770 for (is = ifs->ifs_ips_list; is; is = is->is_next) 771 if (is->is_rule == fr) { 772 fr_stinsert(isn, 0, ifs); 773 MUTEX_EXIT(&isn->is_lock); 774 break; 775 } 776 777 if (is == NULL) { 778 KFREE(isn); 779 isn = NULL; 780 } 781 RWLOCK_EXIT(&ifs->ifs_ipf_state); 782 783 return (isn == NULL) ? ESRCH : 0; 784 } 785 786 return 0; 787 } 788 789 790 /* ------------------------------------------------------------------------ */ 791 /* Function: fr_stinsert */ 792 /* Returns: Nil */ 793 /* Parameters: is(I) - pointer to state structure */ 794 /* rev(I) - flag indicating forward/reverse direction of packet */ 795 /* */ 796 /* Inserts a state structure into the hash table (for lookups) and the list */ 797 /* of state entries (for enumeration). Resolves all of the interface names */ 798 /* to pointers and adjusts running stats for the hash table as appropriate. */ 799 /* */ 800 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 801 /* Exits with is_lock initialised and held. */ 802 /* ------------------------------------------------------------------------ */ 803 void fr_stinsert(is, rev, ifs) 804 ipstate_t *is; 805 int rev; 806 ipf_stack_t *ifs; 807 { 808 frentry_t *fr; 809 u_int hv; 810 int i; 811 812 MUTEX_INIT(&is->is_lock, "ipf state entry"); 813 814 fr = is->is_rule; 815 if (fr != NULL) { 816 MUTEX_ENTER(&fr->fr_lock); 817 fr->fr_ref++; 818 fr->fr_statecnt++; 819 MUTEX_EXIT(&fr->fr_lock); 820 } 821 822 /* 823 * Look up all the interface names in the state entry. 824 */ 825 for (i = 0; i < 4; i++) { 826 if (is->is_ifp[i] != NULL) 827 continue; 828 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 829 } 830 831 /* 832 * If we could trust is_hv, then the modulous would not be needed, but 833 * when running with IPFILTER_SYNC, this stops bad values. 834 */ 835 hv = is->is_hv % ifs->ifs_fr_statesize; 836 is->is_hv = hv; 837 838 /* 839 * We need to get both of these locks...the first because it is 840 * possible that once the insert is complete another packet might 841 * come along, match the entry and want to update it. 842 */ 843 MUTEX_ENTER(&is->is_lock); 844 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 845 846 /* 847 * add into list table. 848 */ 849 if (ifs->ifs_ips_list != NULL) 850 ifs->ifs_ips_list->is_pnext = &is->is_next; 851 is->is_pnext = &ifs->ifs_ips_list; 852 is->is_next = ifs->ifs_ips_list; 853 ifs->ifs_ips_list = is; 854 855 if (ifs->ifs_ips_table[hv] != NULL) 856 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 857 else 858 ifs->ifs_ips_stats.iss_inuse++; 859 is->is_phnext = ifs->ifs_ips_table + hv; 860 is->is_hnext = ifs->ifs_ips_table[hv]; 861 ifs->ifs_ips_table[hv] = is; 862 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 863 ifs->ifs_ips_num++; 864 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 865 866 fr_setstatequeue(is, rev, ifs); 867 } 868 869 /* ------------------------------------------------------------------------ */ 870 /* Function: fr_match_ipv4addrs */ 871 /* Returns: int - 2 strong match (same addresses, same direction) */ 872 /* 1 weak match (same address, opposite direction) */ 873 /* 0 no match */ 874 /* */ 875 /* Function matches IPv4 addresses. */ 876 /* ------------------------------------------------------------------------ */ 877 static int fr_match_ipv4addrs(is1, is2) 878 ipstate_t *is1; 879 ipstate_t *is2; 880 { 881 int rv; 882 883 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 884 rv = 2; 885 else if (is1->is_saddr == is2->is_daddr && 886 is1->is_daddr == is2->is_saddr) 887 rv = 1; 888 else 889 rv = 0; 890 891 return (rv); 892 } 893 894 /* ------------------------------------------------------------------------ */ 895 /* Function: fr_match_ipv6addrs */ 896 /* Returns: int - 2 strong match (same addresses, same direction) */ 897 /* 1 weak match (same addresses, opposite direction) */ 898 /* 0 no match */ 899 /* */ 900 /* Function matches IPv6 addresses. */ 901 /* ------------------------------------------------------------------------ */ 902 static int fr_match_ipv6addrs(is1, is2) 903 ipstate_t *is1; 904 ipstate_t *is2; 905 { 906 int rv; 907 908 if (IP6_EQ(&is1->is_src, &is2->is_src) && 909 IP6_EQ(&is1->is_dst, &is2->is_dst)) 910 rv = 2; 911 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 912 IP6_EQ(&is1->is_dst, &is2->is_src)) { 913 rv = 1; 914 } 915 else 916 rv = 0; 917 918 return (rv); 919 } 920 /* ------------------------------------------------------------------------ */ 921 /* Function: fr_match_addresses */ 922 /* Returns: int - 2 strong match (same addresses, same direction) */ 923 /* 1 weak match (same address, opposite directions) */ 924 /* 0 no match */ 925 /* Parameters: is1, is2 pointers to states we are checking */ 926 /* */ 927 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 928 /* and IPv6 address format. */ 929 /* ------------------------------------------------------------------------ */ 930 static int fr_match_addresses(is1, is2) 931 ipstate_t *is1; 932 ipstate_t *is2; 933 { 934 int rv; 935 936 if (is1->is_v == 4) { 937 rv = fr_match_ipv4addrs(is1, is2); 938 } else { 939 rv = fr_match_ipv6addrs(is1, is2); 940 } 941 942 return (rv); 943 } 944 945 /* ------------------------------------------------------------------------ */ 946 /* Function: fr_match_ppairs */ 947 /* Returns: int - 2 strong match (same ports, same direction) */ 948 /* 1 weak match (same ports, different direction) */ 949 /* 0 no match */ 950 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 951 /* */ 952 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 953 /* src, dst port, which belong to session (state entry). */ 954 /* ------------------------------------------------------------------------ */ 955 static int fr_match_ppairs(ppairs1, ppairs2) 956 port_pair_t *ppairs1; 957 port_pair_t *ppairs2; 958 { 959 int rv; 960 961 if (ppairs1->pp_sport == ppairs2->pp_sport && 962 ppairs1->pp_dport == ppairs2->pp_dport) 963 rv = 2; 964 else if (ppairs1->pp_sport == ppairs2->pp_dport && 965 ppairs1->pp_dport == ppairs2->pp_sport) 966 rv = 1; 967 else 968 rv = 0; 969 970 return (rv); 971 } 972 973 /* ------------------------------------------------------------------------ */ 974 /* Function: fr_match_l4_hdr */ 975 /* Returns: int - 0 no match, */ 976 /* 1 weak match (same ports, different directions) */ 977 /* 2 strong match (same ports, same direction) */ 978 /* Parameters is1, is2 - states we want to match */ 979 /* */ 980 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 981 /* GRE protocol). */ 982 /* ------------------------------------------------------------------------ */ 983 static int fr_match_l4_hdr(is1, is2) 984 ipstate_t *is1; 985 ipstate_t *is2; 986 { 987 int rv = 0; 988 port_pair_t pp1; 989 port_pair_t pp2; 990 991 if (is1->is_p != is2->is_p) 992 return (0); 993 994 switch (is1->is_p) { 995 case IPPROTO_TCP: 996 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 997 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 998 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 999 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 1000 rv = fr_match_ppairs(&pp1, &pp2); 1001 break; 1002 case IPPROTO_UDP: 1003 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1004 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1005 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1006 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1007 rv = fr_match_ppairs(&pp1, &pp2); 1008 break; 1009 case IPPROTO_GRE: 1010 /* greinfo_t can be also interprted as port pair */ 1011 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1012 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1013 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1014 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1015 rv = fr_match_ppairs(&pp1, &pp2); 1016 break; 1017 case IPPROTO_ICMP: 1018 case IPPROTO_ICMPV6: 1019 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1020 rv = 1; 1021 else 1022 rv = 0; 1023 break; 1024 default: 1025 rv = 0; 1026 } 1027 1028 return (rv); 1029 } 1030 1031 /* ------------------------------------------------------------------------ */ 1032 /* Function: fr_matchstates */ 1033 /* Returns: int - nonzero match, zero no match */ 1034 /* Parameters is1, is2 - states we want to match */ 1035 /* */ 1036 /* The state entries are equal (identical match) if they belong to the same */ 1037 /* session. Any time new state entry is being added the fr_addstate() */ 1038 /* function creates temporal state entry from the data it gets from IP and */ 1039 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1040 /* which is also stored within the state entry. We should keep in mind the */ 1041 /* information about packet direction is spread accross L3 (addresses) and */ 1042 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1043 /* - no match (match(is1, is2) == 0)) */ 1044 /* - weak match same addresses (ports), but different */ 1045 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1046 /* - strong match same addresses (ports) and same directions */ 1047 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1048 /* */ 1049 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1050 /* and functions, which are used to compare ports (L4 header) data. We say */ 1051 /* the is1 and is2 are same (identical) if there is a match */ 1052 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1053 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1054 /* Such requirement deals with case as follows: */ 1055 /* suppose there are two connections between hosts A, B. Connection 1: */ 1056 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1057 /* Connection 2: */ 1058 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1059 /* since we've introduced match levels into our fr_matchstates(), we are */ 1060 /* able to identify, which packets belong to connection A and which belong */ 1061 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1062 /* from con. 1 packet, which travelled from A to B: */ 1063 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1064 /* while s2, has been created from packet which belongs to con. 2 and is */ 1065 /* also coming from A to B: */ 1066 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1067 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1068 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1069 /* different the state entries are not identical -> no match as a final */ 1070 /* result. */ 1071 /* ------------------------------------------------------------------------ */ 1072 static int fr_matchstates(is1, is2) 1073 ipstate_t *is1; 1074 ipstate_t *is2; 1075 { 1076 int rv; 1077 int amatch; 1078 int pmatch; 1079 1080 if (bcmp(&is1->is_pass, &is2->is_pass, 1081 offsetof(struct ipstate, is_ps) - 1082 offsetof(struct ipstate, is_pass)) == 0) { 1083 1084 pmatch = fr_match_l4_hdr(is1, is2); 1085 amatch = fr_match_addresses(is1, is2); 1086 /* 1087 * If addresses match (amatch != 0), then 'match levels' 1088 * must be same for matching entries. If amatch and pmatch 1089 * have different values (different match levels), then 1090 * is1 and is2 belong to different sessions. 1091 */ 1092 rv = (amatch != 0) && (amatch == pmatch); 1093 } 1094 else 1095 rv = 0; 1096 1097 return (rv); 1098 } 1099 1100 /* ------------------------------------------------------------------------ */ 1101 /* Function: fr_addstate */ 1102 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1103 /* Parameters: fin(I) - pointer to packet information */ 1104 /* stsave(O) - pointer to place to save pointer to created */ 1105 /* state structure. */ 1106 /* flags(I) - flags to use when creating the structure */ 1107 /* */ 1108 /* Creates a new IP state structure from the packet information collected. */ 1109 /* Inserts it into the state table and appends to the bottom of the active */ 1110 /* list. If the capacity of the table has reached the maximum allowed then */ 1111 /* the call will fail and a flush is scheduled for the next timeout call. */ 1112 /* ------------------------------------------------------------------------ */ 1113 ipstate_t *fr_addstate(fin, stsave, flags) 1114 fr_info_t *fin; 1115 ipstate_t **stsave; 1116 u_int flags; 1117 { 1118 ipstate_t *is, ips; 1119 struct icmp *ic; 1120 u_int pass, hv; 1121 frentry_t *fr; 1122 tcphdr_t *tcp; 1123 grehdr_t *gre; 1124 void *ifp; 1125 int out; 1126 ipf_stack_t *ifs = fin->fin_ifs; 1127 1128 if (ifs->ifs_fr_state_lock || 1129 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1130 return NULL; 1131 1132 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1133 return NULL; 1134 1135 /* 1136 * Trigger automatic call to fr_state_flush() if the 1137 * table has reached capacity specified by hi watermark. 1138 */ 1139 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1140 ifs->ifs_fr_state_doflush = 1; 1141 1142 /* 1143 * If the max number of state entries has been reached, and there is no 1144 * limit on the state count for the rule, then do not continue. In the 1145 * case where a limit exists, it's ok allow the entries to be created as 1146 * long as specified limit itself has not been reached. 1147 * 1148 * Note that because the lock isn't held on fr, it is possible to exceed 1149 * the specified size of the table. However, the cost of this is being 1150 * ignored here; as the number by which it can go over is a product of 1151 * the number of simultaneous threads that could be executing in here. 1152 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1153 * 1154 * Also note that, since the automatic flush should have been triggered 1155 * well before we reach the maximum number of state table entries, the 1156 * likelihood of reaching the max (and thus exceedng it) is minimal. 1157 */ 1158 fr = fin->fin_fr; 1159 if (fr != NULL) { 1160 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1161 (fr->fr_statemax == 0)) { 1162 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1163 return NULL; 1164 } 1165 if ((fr->fr_statemax != 0) && 1166 (fr->fr_statecnt >= fr->fr_statemax)) { 1167 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1168 ifs->ifs_fr_state_doflush = 1; 1169 return NULL; 1170 } 1171 } 1172 1173 ic = NULL; 1174 tcp = NULL; 1175 out = fin->fin_out; 1176 is = &ips; 1177 bzero((char *)is, sizeof(*is)); 1178 1179 if (fr == NULL) { 1180 pass = ifs->ifs_fr_flags; 1181 is->is_tag = FR_NOLOGTAG; 1182 } else { 1183 pass = fr->fr_flags; 1184 } 1185 1186 is->is_die = 1 + ifs->ifs_fr_ticks; 1187 /* 1188 * We want to check everything that is a property of this packet, 1189 * but we don't (automatically) care about it's fragment status as 1190 * this may change. 1191 */ 1192 is->is_pass = pass; 1193 is->is_v = fin->fin_v; 1194 is->is_opt[0] = fin->fin_optmsk; 1195 is->is_optmsk[0] = 0xffffffff; 1196 /* 1197 * The reverse direction option mask will be set in fr_matchsrcdst(), 1198 * when we will see the first packet from the peer. We will leave it 1199 * as zero for now. 1200 */ 1201 is->is_optmsk[1] = 0x0; 1202 1203 if (is->is_v == 6) { 1204 is->is_opt[0] &= ~0x8; 1205 is->is_optmsk[0] &= ~0x8; 1206 } 1207 is->is_sec = fin->fin_secmsk; 1208 is->is_secmsk = 0xffff; 1209 is->is_auth = fin->fin_auth; 1210 is->is_authmsk = 0xffff; 1211 1212 /* 1213 * Copy and calculate... 1214 */ 1215 hv = (is->is_p = fin->fin_fi.fi_p); 1216 is->is_src = fin->fin_fi.fi_src; 1217 hv += is->is_saddr; 1218 is->is_dst = fin->fin_fi.fi_dst; 1219 hv += is->is_daddr; 1220 #ifdef USE_INET6 1221 if (fin->fin_v == 6) { 1222 /* 1223 * For ICMPv6, we check to see if the destination address is 1224 * a multicast address. If it is, do not include it in the 1225 * calculation of the hash because the correct reply will come 1226 * back from a real address, not a multicast address. 1227 */ 1228 if ((is->is_p == IPPROTO_ICMPV6) && 1229 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1230 /* 1231 * So you can do keep state with neighbour discovery. 1232 * 1233 * Here we could use the address from the neighbour 1234 * solicit message to put in the state structure and 1235 * we could use that without a wildcard flag too... 1236 */ 1237 is->is_flags |= SI_W_DADDR; 1238 hv -= is->is_daddr; 1239 } else { 1240 hv += is->is_dst.i6[1]; 1241 hv += is->is_dst.i6[2]; 1242 hv += is->is_dst.i6[3]; 1243 } 1244 hv += is->is_src.i6[1]; 1245 hv += is->is_src.i6[2]; 1246 hv += is->is_src.i6[3]; 1247 } 1248 #endif 1249 if ((fin->fin_v == 4) && 1250 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1251 if (fin->fin_out == 0) { 1252 flags |= SI_W_DADDR|SI_CLONE; 1253 hv -= is->is_daddr; 1254 } else { 1255 flags |= SI_W_SADDR|SI_CLONE; 1256 hv -= is->is_saddr; 1257 } 1258 } 1259 1260 switch (is->is_p) 1261 { 1262 #ifdef USE_INET6 1263 case IPPROTO_ICMPV6 : 1264 ic = fin->fin_dp; 1265 1266 switch (ic->icmp_type) 1267 { 1268 case ICMP6_ECHO_REQUEST : 1269 is->is_icmp.ici_type = ic->icmp_type; 1270 hv += (is->is_icmp.ici_id = ic->icmp_id); 1271 break; 1272 case ICMP6_MEMBERSHIP_QUERY : 1273 case ND_ROUTER_SOLICIT : 1274 case ND_NEIGHBOR_SOLICIT : 1275 case ICMP6_NI_QUERY : 1276 is->is_icmp.ici_type = ic->icmp_type; 1277 break; 1278 default : 1279 return NULL; 1280 } 1281 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1282 break; 1283 #endif 1284 case IPPROTO_ICMP : 1285 ic = fin->fin_dp; 1286 1287 switch (ic->icmp_type) 1288 { 1289 case ICMP_ECHO : 1290 case ICMP_ECHOREPLY : 1291 case ICMP_TSTAMP : 1292 case ICMP_IREQ : 1293 case ICMP_MASKREQ : 1294 is->is_icmp.ici_type = ic->icmp_type; 1295 hv += (is->is_icmp.ici_id = ic->icmp_id); 1296 break; 1297 default : 1298 return NULL; 1299 } 1300 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1301 break; 1302 1303 case IPPROTO_GRE : 1304 gre = fin->fin_dp; 1305 1306 is->is_gre.gs_flags = gre->gr_flags; 1307 is->is_gre.gs_ptype = gre->gr_ptype; 1308 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1309 is->is_call[0] = fin->fin_data[0]; 1310 is->is_call[1] = fin->fin_data[1]; 1311 } 1312 break; 1313 1314 case IPPROTO_TCP : 1315 tcp = fin->fin_dp; 1316 1317 if (tcp->th_flags & TH_RST) 1318 return NULL; 1319 /* 1320 * The endian of the ports doesn't matter, but the ack and 1321 * sequence numbers do as we do mathematics on them later. 1322 */ 1323 is->is_sport = htons(fin->fin_data[0]); 1324 is->is_dport = htons(fin->fin_data[1]); 1325 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1326 hv += is->is_sport; 1327 hv += is->is_dport; 1328 } 1329 1330 /* 1331 * If this is a real packet then initialise fields in the 1332 * state information structure from the TCP header information. 1333 */ 1334 1335 is->is_maxdwin = 1; 1336 is->is_maxswin = ntohs(tcp->th_win); 1337 if (is->is_maxswin == 0) 1338 is->is_maxswin = 1; 1339 1340 if ((fin->fin_flx & FI_IGNORE) == 0) { 1341 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1342 (TCP_OFF(tcp) << 2) + 1343 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1344 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1345 is->is_maxsend = is->is_send; 1346 1347 /* 1348 * Window scale option is only present in 1349 * SYN/SYN-ACK packet. 1350 */ 1351 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1352 TH_SYN && 1353 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1354 if (fr_tcpoptions(fin, tcp, 1355 &is->is_tcp.ts_data[0]) == -1) { 1356 fin->fin_flx |= FI_BAD; 1357 } 1358 } 1359 1360 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1361 fr_checknewisn(fin, is); 1362 fr_fixoutisn(fin, is); 1363 } 1364 1365 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1366 flags |= IS_TCPFSM; 1367 else { 1368 is->is_maxdwin = is->is_maxswin * 2; 1369 is->is_dend = ntohl(tcp->th_ack); 1370 is->is_maxdend = ntohl(tcp->th_ack); 1371 is->is_maxdwin *= 2; 1372 } 1373 } 1374 1375 /* 1376 * If we're creating state for a starting connection, start the 1377 * timer on it as we'll never see an error if it fails to 1378 * connect. 1379 */ 1380 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1381 break; 1382 1383 case IPPROTO_UDP : 1384 tcp = fin->fin_dp; 1385 1386 is->is_sport = htons(fin->fin_data[0]); 1387 is->is_dport = htons(fin->fin_data[1]); 1388 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1389 hv += tcp->th_dport; 1390 hv += tcp->th_sport; 1391 } 1392 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1393 break; 1394 1395 default : 1396 break; 1397 } 1398 hv = DOUBLE_HASH(hv, ifs); 1399 is->is_hv = hv; 1400 is->is_rule = fr; 1401 is->is_flags = flags & IS_INHERITED; 1402 1403 /* 1404 * Look for identical state. 1405 */ 1406 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1407 is != NULL; 1408 is = is->is_hnext) { 1409 if (fr_matchstates(&ips, is) == 1) 1410 break; 1411 } 1412 1413 /* 1414 * we've found a matching state -> state already exists, 1415 * we are not going to add a duplicate record. 1416 */ 1417 if (is != NULL) 1418 return NULL; 1419 1420 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1421 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1422 return NULL; 1423 } 1424 KMALLOC(is, ipstate_t *); 1425 if (is == NULL) { 1426 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1427 return NULL; 1428 } 1429 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1430 /* 1431 * Do not do the modulous here, it is done in fr_stinsert(). 1432 */ 1433 if (fr != NULL) { 1434 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1435 if (fr->fr_age[0] != 0) { 1436 is->is_tqehead[0] = 1437 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1438 fr->fr_age[0], ifs); 1439 is->is_sti.tqe_flags |= TQE_RULEBASED; 1440 } 1441 if (fr->fr_age[1] != 0) { 1442 is->is_tqehead[1] = 1443 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1444 fr->fr_age[1], ifs); 1445 is->is_sti.tqe_flags |= TQE_RULEBASED; 1446 } 1447 is->is_tag = fr->fr_logtag; 1448 1449 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1450 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1451 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1452 1453 if (((ifp = fr->fr_ifas[1]) != NULL) && 1454 (ifp != (void *)-1)) { 1455 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1456 } 1457 if (((ifp = fr->fr_ifas[2]) != NULL) && 1458 (ifp != (void *)-1)) { 1459 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1460 } 1461 if (((ifp = fr->fr_ifas[3]) != NULL) && 1462 (ifp != (void *)-1)) { 1463 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1464 } 1465 } 1466 1467 is->is_ifp[out << 1] = fin->fin_ifp; 1468 if (fin->fin_ifp != NULL) { 1469 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1470 } 1471 1472 is->is_ref = 1; 1473 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1474 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1475 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1476 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1477 if ((fin->fin_flx & FI_IGNORE) == 0) { 1478 is->is_pkts[out] = 1; 1479 is->is_bytes[out] = fin->fin_plen; 1480 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1481 is->is_flx[out][0] &= ~FI_OOW; 1482 } 1483 1484 if (pass & FR_STSTRICT) 1485 is->is_flags |= IS_STRICT; 1486 1487 if (pass & FR_STATESYNC) 1488 is->is_flags |= IS_STATESYNC; 1489 1490 if (flags & (SI_WILDP|SI_WILDA)) { 1491 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1492 } 1493 is->is_rulen = fin->fin_rule; 1494 1495 1496 if (pass & FR_LOGFIRST) 1497 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1498 1499 READ_ENTER(&ifs->ifs_ipf_state); 1500 is->is_me = stsave; 1501 1502 fr_stinsert(is, fin->fin_rev, ifs); 1503 1504 if (fin->fin_p == IPPROTO_TCP) { 1505 /* 1506 * If we're creating state for a starting connection, start the 1507 * timer on it as we'll never see an error if it fails to 1508 * connect. 1509 */ 1510 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1511 is->is_flags); 1512 MUTEX_EXIT(&is->is_lock); 1513 #ifdef IPFILTER_SCAN 1514 if ((is->is_flags & SI_CLONE) == 0) 1515 (void) ipsc_attachis(is); 1516 #endif 1517 } else { 1518 MUTEX_EXIT(&is->is_lock); 1519 } 1520 #ifdef IPFILTER_SYNC 1521 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1522 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1523 #endif 1524 if (ifs->ifs_ipstate_logging) 1525 ipstate_log(is, ISL_NEW, ifs); 1526 1527 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1528 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1529 fin->fin_flx |= FI_STATE; 1530 if (fin->fin_flx & FI_FRAG) 1531 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1532 1533 return is; 1534 } 1535 1536 1537 /* ------------------------------------------------------------------------ */ 1538 /* Function: fr_tcpoptions */ 1539 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1540 /* Parameters: fin(I) - pointer to packet information */ 1541 /* tcp(I) - pointer to TCP packet header */ 1542 /* td(I) - pointer to TCP data held as part of the state */ 1543 /* */ 1544 /* Look after the TCP header for any options and deal with those that are */ 1545 /* present. Record details about those that we recogise. */ 1546 /* ------------------------------------------------------------------------ */ 1547 static int fr_tcpoptions(fin, tcp, td) 1548 fr_info_t *fin; 1549 tcphdr_t *tcp; 1550 tcpdata_t *td; 1551 { 1552 int off, mlen, ol, i, len, retval; 1553 char buf[64], *s, opt; 1554 mb_t *m = NULL; 1555 1556 len = (TCP_OFF(tcp) << 2); 1557 if (fin->fin_dlen < len) 1558 return 0; 1559 len -= sizeof(*tcp); 1560 1561 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1562 1563 m = fin->fin_m; 1564 mlen = MSGDSIZE(m) - off; 1565 if (len > mlen) { 1566 len = mlen; 1567 retval = 0; 1568 } else { 1569 retval = 1; 1570 } 1571 1572 COPYDATA(m, off, len, buf); 1573 1574 for (s = buf; len > 0; ) { 1575 opt = *s; 1576 if (opt == TCPOPT_EOL) 1577 break; 1578 else if (opt == TCPOPT_NOP) 1579 ol = 1; 1580 else { 1581 if (len < 2) 1582 break; 1583 ol = (int)*(s + 1); 1584 if (ol < 2 || ol > len) 1585 break; 1586 1587 /* 1588 * Extract the TCP options we are interested in out of 1589 * the header and store them in the the tcpdata struct. 1590 */ 1591 switch (opt) 1592 { 1593 case TCPOPT_WINDOW : 1594 if (ol == TCPOLEN_WINDOW) { 1595 i = (int)*(s + 2); 1596 if (i > TCP_WSCALE_MAX) 1597 i = TCP_WSCALE_MAX; 1598 else if (i < 0) 1599 i = 0; 1600 td->td_winscale = i; 1601 td->td_winflags |= TCP_WSCALE_SEEN | 1602 TCP_WSCALE_FIRST; 1603 } else 1604 retval = -1; 1605 break; 1606 case TCPOPT_MAXSEG : 1607 /* 1608 * So, if we wanted to set the TCP MAXSEG, 1609 * it should be done here... 1610 */ 1611 if (ol == TCPOLEN_MAXSEG) { 1612 i = (int)*(s + 2); 1613 i <<= 8; 1614 i += (int)*(s + 3); 1615 td->td_maxseg = i; 1616 } else 1617 retval = -1; 1618 break; 1619 case TCPOPT_SACK_PERMITTED : 1620 if (ol == TCPOLEN_SACK_PERMITTED) 1621 td->td_winflags |= TCP_SACK_PERMIT; 1622 else 1623 retval = -1; 1624 break; 1625 } 1626 } 1627 len -= ol; 1628 s += ol; 1629 } 1630 return retval; 1631 } 1632 1633 1634 /* ------------------------------------------------------------------------ */ 1635 /* Function: fr_tcpstate */ 1636 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1637 /* Parameters: fin(I) - pointer to packet information */ 1638 /* tcp(I) - pointer to TCP packet header */ 1639 /* is(I) - pointer to master state structure */ 1640 /* */ 1641 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1642 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1643 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1644 /* ------------------------------------------------------------------------ */ 1645 static int fr_tcpstate(fin, tcp, is) 1646 fr_info_t *fin; 1647 tcphdr_t *tcp; 1648 ipstate_t *is; 1649 { 1650 int source, ret = 0, flags; 1651 tcpdata_t *fdata, *tdata; 1652 ipf_stack_t *ifs = fin->fin_ifs; 1653 1654 source = !fin->fin_rev; 1655 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1656 (ntohs(is->is_sport) != fin->fin_data[0])) 1657 source = 0; 1658 fdata = &is->is_tcp.ts_data[!source]; 1659 tdata = &is->is_tcp.ts_data[source]; 1660 1661 MUTEX_ENTER(&is->is_lock); 1662 1663 /* 1664 * If a SYN packet is received for a connection that is in a half 1665 * closed state, then move its state entry to deletetq. In such case 1666 * the SYN packet will be consequently dropped. This allows new state 1667 * entry to be created with a retransmited SYN packet. 1668 */ 1669 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1670 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1671 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1672 is->is_state[source] = IPF_TCPS_CLOSED; 1673 is->is_state[!source] = IPF_TCPS_CLOSED; 1674 /* 1675 * Do not update is->is_sti.tqe_die in case state entry 1676 * is already present in deletetq. It prevents state 1677 * entry ttl update by retransmitted SYN packets, which 1678 * may arrive before timer tick kicks off. The SYN 1679 * packet will be dropped again. 1680 */ 1681 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1682 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1683 &fin->fin_ifs->ifs_ips_deletetq, 1684 fin->fin_ifs); 1685 1686 MUTEX_EXIT(&is->is_lock); 1687 return 0; 1688 } 1689 } 1690 1691 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1692 #ifdef IPFILTER_SCAN 1693 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1694 ipsc_packet(fin, is); 1695 if (FR_ISBLOCK(is->is_pass)) { 1696 MUTEX_EXIT(&is->is_lock); 1697 return 1; 1698 } 1699 } 1700 #endif 1701 1702 /* 1703 * Nearing end of connection, start timeout. 1704 */ 1705 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1706 is->is_flags); 1707 if (ret == 0) { 1708 MUTEX_EXIT(&is->is_lock); 1709 return 0; 1710 } 1711 1712 /* 1713 * set s0's as appropriate. Use syn-ack packet as it 1714 * contains both pieces of required information. 1715 */ 1716 /* 1717 * Window scale option is only present in SYN/SYN-ACK packet. 1718 * Compare with ~TH_FIN to mask out T/TCP setups. 1719 */ 1720 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1721 if (flags == (TH_SYN|TH_ACK)) { 1722 is->is_s0[source] = ntohl(tcp->th_ack); 1723 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1724 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1725 (void) fr_tcpoptions(fin, tcp, fdata); 1726 } 1727 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1728 fr_checknewisn(fin, is); 1729 } else if (flags == TH_SYN) { 1730 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1731 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1732 (void) fr_tcpoptions(fin, tcp, tdata); 1733 1734 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1735 fr_checknewisn(fin, is); 1736 1737 } 1738 ret = 1; 1739 } else 1740 fin->fin_flx |= FI_OOW; 1741 MUTEX_EXIT(&is->is_lock); 1742 return ret; 1743 } 1744 1745 1746 /* ------------------------------------------------------------------------ */ 1747 /* Function: fr_checknewisn */ 1748 /* Returns: Nil */ 1749 /* Parameters: fin(I) - pointer to packet information */ 1750 /* is(I) - pointer to master state structure */ 1751 /* */ 1752 /* Check to see if this TCP connection is expecting and needs a new */ 1753 /* sequence number for a particular direction of the connection. */ 1754 /* */ 1755 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1756 /* one ready. */ 1757 /* ------------------------------------------------------------------------ */ 1758 static void fr_checknewisn(fin, is) 1759 fr_info_t *fin; 1760 ipstate_t *is; 1761 { 1762 u_32_t sumd, old, new; 1763 tcphdr_t *tcp; 1764 int i; 1765 1766 i = fin->fin_rev; 1767 tcp = fin->fin_dp; 1768 1769 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1770 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1771 old = ntohl(tcp->th_seq); 1772 new = fr_newisn(fin); 1773 is->is_isninc[i] = new - old; 1774 CALC_SUMD(old, new, sumd); 1775 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1776 1777 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1778 } 1779 } 1780 1781 1782 /* ------------------------------------------------------------------------ */ 1783 /* Function: fr_tcpinwindow */ 1784 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1785 /* Parameters: fin(I) - pointer to packet information */ 1786 /* fdata(I) - pointer to tcp state informatio (forward) */ 1787 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1788 /* tcp(I) - pointer to TCP packet header */ 1789 /* */ 1790 /* Given a packet has matched addresses and ports, check to see if it is */ 1791 /* within the TCP data window. In a show of generosity, allow packets that */ 1792 /* are within the window space behind the current sequence # as well. */ 1793 /* ------------------------------------------------------------------------ */ 1794 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1795 fr_info_t *fin; 1796 tcpdata_t *fdata, *tdata; 1797 tcphdr_t *tcp; 1798 int flags; 1799 { 1800 tcp_seq seq, ack, end; 1801 int ackskew, tcpflags; 1802 u_32_t win, maxwin; 1803 int dsize, inseq; 1804 1805 /* 1806 * Find difference between last checked packet and this packet. 1807 */ 1808 tcpflags = tcp->th_flags; 1809 seq = ntohl(tcp->th_seq); 1810 ack = ntohl(tcp->th_ack); 1811 1812 if (tcpflags & TH_SYN) 1813 win = ntohs(tcp->th_win); 1814 else 1815 win = ntohs(tcp->th_win) << fdata->td_winscale; 1816 1817 /* 1818 * win 0 means the receiving endpoint has closed the window, because it 1819 * has not enough memory to receive data from sender. In such case we 1820 * are pretending window size to be 1 to let TCP probe data through. 1821 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1822 * state this accurately, so we have to allow 1 octet (win = 1) even if 1823 * the window is closed (win == 0). 1824 */ 1825 if (win == 0) 1826 win = 1; 1827 1828 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1829 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1830 1831 /* 1832 * if window scaling is present, the scaling is only allowed 1833 * for windows not in the first SYN packet. In that packet the 1834 * window is 65535 to specify the largest window possible 1835 * for receivers not implementing the window scale option. 1836 * Currently, we do not assume TTCP here. That means that 1837 * if we see a second packet from a host (after the initial 1838 * SYN), we can assume that the receiver of the SYN did 1839 * already send back the SYN/ACK (and thus that we know if 1840 * the receiver also does window scaling) 1841 */ 1842 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1843 fdata->td_maxwin = win; 1844 } 1845 1846 end = seq + dsize; 1847 1848 if ((fdata->td_end == 0) && 1849 (!(flags & IS_TCPFSM) || 1850 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1851 /* 1852 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1853 */ 1854 fdata->td_end = end - 1; 1855 fdata->td_maxwin = 1; 1856 fdata->td_maxend = end + win; 1857 } 1858 1859 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1860 ack = tdata->td_end; 1861 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1862 (ack == 0)) { 1863 /* gross hack to get around certain broken tcp stacks */ 1864 ack = tdata->td_end; 1865 } 1866 1867 maxwin = tdata->td_maxwin; 1868 ackskew = tdata->td_end - ack; 1869 1870 /* 1871 * Strict sequencing only allows in-order delivery. 1872 */ 1873 if ((flags & IS_STRICT) != 0) { 1874 if (seq != fdata->td_end) { 1875 DTRACE_PROBE(strict_check); 1876 return 0; 1877 } 1878 } 1879 1880 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1881 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1882 inseq = 0; 1883 DTRACE_PROBE4( 1884 dyn_params, 1885 int, dsize, 1886 int, ackskew, 1887 int, maxwin, 1888 int, win 1889 ); 1890 if ( 1891 #if defined(_KERNEL) 1892 /* 1893 * end <-> s + n 1894 * maxend <-> ack + win 1895 * this is upperbound check 1896 */ 1897 (SEQ_GE(fdata->td_maxend, end)) && 1898 /* 1899 * this is lowerbound check 1900 */ 1901 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1902 #endif 1903 /* XXX what about big packets */ 1904 #define MAXACKWINDOW 66000 1905 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1906 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1907 inseq = 1; 1908 /* 1909 * Microsoft Windows will send the next packet to the right of the 1910 * window if SACK is in use. 1911 */ 1912 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1913 (fdata->td_winflags & TCP_SACK_PERMIT) && 1914 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1915 inseq = 1; 1916 /* 1917 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1918 * response to initial SYN packet, when there is no application 1919 * listeing to on a port, where the SYN packet has came to. 1920 */ 1921 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1922 (ackskew >= -1) && (ackskew <= 1)) { 1923 inseq = 1; 1924 } else if (!(flags & IS_TCPFSM)) { 1925 1926 if (!(fdata->td_winflags & 1927 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1928 /* 1929 * No TCPFSM and no window scaling, so make some 1930 * extra guesses. 1931 */ 1932 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1933 inseq = 1; 1934 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1935 inseq = 1; 1936 } 1937 } 1938 1939 if (inseq) { 1940 /* if ackskew < 0 then this should be due to fragmented 1941 * packets. There is no way to know the length of the 1942 * total packet in advance. 1943 * We do know the total length from the fragment cache though. 1944 * Note however that there might be more sessions with 1945 * exactly the same source and destination parameters in the 1946 * state cache (and source and destination is the only stuff 1947 * that is saved in the fragment cache). Note further that 1948 * some TCP connections in the state cache are hashed with 1949 * sport and dport as well which makes it not worthwhile to 1950 * look for them. 1951 * Thus, when ackskew is negative but still seems to belong 1952 * to this session, we bump up the destinations end value. 1953 */ 1954 if (ackskew < 0) { 1955 DTRACE_PROBE2(end_update_td, 1956 int, tdata->td_end, 1957 int, ack 1958 ); 1959 tdata->td_end = ack; 1960 } 1961 1962 /* update max window seen */ 1963 if (fdata->td_maxwin < win) { 1964 DTRACE_PROBE2(win_update_fd, 1965 int, fdata->td_maxwin, 1966 int, win 1967 ); 1968 fdata->td_maxwin = win; 1969 } 1970 1971 if (SEQ_GT(end, fdata->td_end)) { 1972 DTRACE_PROBE2(end_update_fd, 1973 int, fdata->td_end, 1974 int, end 1975 ); 1976 fdata->td_end = end; 1977 } 1978 1979 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1980 DTRACE_PROBE2(max_end_update_td, 1981 int, tdata->td_maxend, 1982 int, ack + win 1983 ); 1984 tdata->td_maxend = ack + win; 1985 } 1986 1987 return 1; 1988 } 1989 fin->fin_flx |= FI_OOW; 1990 1991 #if defined(_KERNEL) 1992 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1993 fin->fin_flx |= FI_NEG_OOW; 1994 #endif 1995 1996 return 0; 1997 } 1998 1999 2000 /* ------------------------------------------------------------------------ */ 2001 /* Function: fr_stclone */ 2002 /* Returns: ipstate_t* - NULL == cloning failed, */ 2003 /* else pointer to new state structure */ 2004 /* Parameters: fin(I) - pointer to packet information */ 2005 /* tcp(I) - pointer to TCP/UDP header */ 2006 /* is(I) - pointer to master state structure */ 2007 /* */ 2008 /* Create a "duplcate" state table entry from the master. */ 2009 /* ------------------------------------------------------------------------ */ 2010 static ipstate_t *fr_stclone(fin, tcp, is) 2011 fr_info_t *fin; 2012 tcphdr_t *tcp; 2013 ipstate_t *is; 2014 { 2015 ipstate_t *clone; 2016 u_32_t send; 2017 ipf_stack_t *ifs = fin->fin_ifs; 2018 2019 /* 2020 * Trigger automatic call to fr_state_flush() if the 2021 * table has reached capacity specified by hi watermark. 2022 */ 2023 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2024 ifs->ifs_fr_state_doflush = 1; 2025 2026 /* 2027 * If automatic flushing did not do its job, and the table 2028 * has filled up, don't try to create a new entry. A NULL 2029 * return will indicate that the cloning has failed. 2030 */ 2031 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2032 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2033 return NULL; 2034 } 2035 2036 KMALLOC(clone, ipstate_t *); 2037 if (clone == NULL) 2038 return NULL; 2039 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2040 2041 MUTEX_NUKE(&clone->is_lock); 2042 2043 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2044 clone->is_state[0] = 0; 2045 clone->is_state[1] = 0; 2046 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2047 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2048 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2049 2050 if (fin->fin_rev == 1) { 2051 clone->is_dend = send; 2052 clone->is_maxdend = send; 2053 clone->is_send = 0; 2054 clone->is_maxswin = 1; 2055 clone->is_maxdwin = ntohs(tcp->th_win); 2056 if (clone->is_maxdwin == 0) 2057 clone->is_maxdwin = 1; 2058 } else { 2059 clone->is_send = send; 2060 clone->is_maxsend = send; 2061 clone->is_dend = 0; 2062 clone->is_maxdwin = 1; 2063 clone->is_maxswin = ntohs(tcp->th_win); 2064 if (clone->is_maxswin == 0) 2065 clone->is_maxswin = 1; 2066 } 2067 2068 clone->is_flags &= ~SI_CLONE; 2069 clone->is_flags |= SI_CLONED; 2070 fr_stinsert(clone, fin->fin_rev, ifs); 2071 clone->is_ref = 1; 2072 if (clone->is_p == IPPROTO_TCP) { 2073 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2074 clone->is_flags); 2075 } 2076 MUTEX_EXIT(&clone->is_lock); 2077 #ifdef IPFILTER_SCAN 2078 (void) ipsc_attachis(is); 2079 #endif 2080 #ifdef IPFILTER_SYNC 2081 if (is->is_flags & IS_STATESYNC) 2082 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2083 #endif 2084 return clone; 2085 } 2086 2087 2088 /* ------------------------------------------------------------------------ */ 2089 /* Function: fr_matchsrcdst */ 2090 /* Returns: Nil */ 2091 /* Parameters: fin(I) - pointer to packet information */ 2092 /* is(I) - pointer to state structure */ 2093 /* src(I) - pointer to source address */ 2094 /* dst(I) - pointer to destination address */ 2095 /* tcp(I) - pointer to TCP/UDP header */ 2096 /* */ 2097 /* Match a state table entry against an IP packet. The logic below is that */ 2098 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2099 /* still 0 after the test. no match. */ 2100 /* ------------------------------------------------------------------------ */ 2101 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2102 fr_info_t *fin; 2103 ipstate_t *is; 2104 i6addr_t *src, *dst; 2105 tcphdr_t *tcp; 2106 u_32_t cmask; 2107 { 2108 int ret = 0, rev, out, flags, flx = 0, idx; 2109 u_short sp, dp; 2110 u_32_t cflx; 2111 void *ifp; 2112 ipf_stack_t *ifs = fin->fin_ifs; 2113 2114 rev = IP6_NEQ(&is->is_dst, dst); 2115 ifp = fin->fin_ifp; 2116 out = fin->fin_out; 2117 flags = is->is_flags; 2118 sp = 0; 2119 dp = 0; 2120 2121 if (tcp != NULL) { 2122 sp = htons(fin->fin_sport); 2123 dp = ntohs(fin->fin_dport); 2124 } 2125 if (!rev) { 2126 if (tcp != NULL) { 2127 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2128 rev = 1; 2129 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2130 rev = 1; 2131 } 2132 } 2133 2134 idx = (out << 1) + rev; 2135 2136 /* 2137 * If the interface for this 'direction' is set, make sure it matches. 2138 * An interface name that is not set matches any, as does a name of *. 2139 */ 2140 if ((is->is_ifp[idx] == NULL && 2141 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2142 is->is_ifp[idx] == ifp) 2143 ret = 1; 2144 2145 if (ret == 0) { 2146 DTRACE_PROBE(no_match_on_iface); 2147 return NULL; 2148 } 2149 ret = 0; 2150 2151 /* 2152 * Match addresses and ports. 2153 */ 2154 if (rev == 0) { 2155 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2156 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2157 if (tcp) { 2158 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2159 (dp == is->is_dport || flags & SI_W_DPORT)) 2160 ret = 1; 2161 } else { 2162 ret = 1; 2163 } 2164 } 2165 } else { 2166 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2167 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2168 if (tcp) { 2169 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2170 (sp == is->is_dport || flags & SI_W_DPORT)) 2171 ret = 1; 2172 } else { 2173 ret = 1; 2174 } 2175 } 2176 } 2177 2178 if (ret == 0) { 2179 DTRACE_PROBE(no_match_on_addrs); 2180 return NULL; 2181 } 2182 /* 2183 * Whether or not this should be here, is questionable, but the aim 2184 * is to get this out of the main line. 2185 */ 2186 if (tcp == NULL) 2187 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2188 2189 /* 2190 * Only one of the source or destination address can be flaged as a 2191 * wildcard. Fill in the missing address, if set. 2192 * For IPv6, if the address being copied in is multicast, then 2193 * don't reset the wild flag - multicast causes it to be set in the 2194 * first place! 2195 */ 2196 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2197 fr_ip_t *fi = &fin->fin_fi; 2198 2199 if ((flags & SI_W_SADDR) != 0) { 2200 if (rev == 0) { 2201 #ifdef USE_INET6 2202 if (is->is_v == 6 && 2203 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2204 /*EMPTY*/; 2205 else 2206 #endif 2207 { 2208 is->is_src = fi->fi_src; 2209 is->is_flags &= ~SI_W_SADDR; 2210 } 2211 } else { 2212 #ifdef USE_INET6 2213 if (is->is_v == 6 && 2214 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2215 /*EMPTY*/; 2216 else 2217 #endif 2218 { 2219 is->is_src = fi->fi_dst; 2220 is->is_flags &= ~SI_W_SADDR; 2221 } 2222 } 2223 } else if ((flags & SI_W_DADDR) != 0) { 2224 if (rev == 0) { 2225 #ifdef USE_INET6 2226 if (is->is_v == 6 && 2227 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2228 /*EMPTY*/; 2229 else 2230 #endif 2231 { 2232 is->is_dst = fi->fi_dst; 2233 is->is_flags &= ~SI_W_DADDR; 2234 } 2235 } else { 2236 #ifdef USE_INET6 2237 if (is->is_v == 6 && 2238 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2239 /*EMPTY*/; 2240 else 2241 #endif 2242 { 2243 is->is_dst = fi->fi_src; 2244 is->is_flags &= ~SI_W_DADDR; 2245 } 2246 } 2247 } 2248 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2249 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2250 } 2251 } 2252 2253 flx = fin->fin_flx & cmask; 2254 cflx = is->is_flx[out][rev]; 2255 2256 /* 2257 * Match up any flags set from IP options. 2258 */ 2259 if ((cflx && (flx != (cflx & cmask))) || 2260 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2261 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2262 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) { 2263 DTRACE_PROBE4(no_match_on_flags, 2264 int, (cflx && (flx != (cflx & cmask))), 2265 int, 2266 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]), 2267 int, ((fin->fin_secmsk & is->is_secmsk) != is->is_sec), 2268 int, ((fin->fin_auth & is->is_authmsk) != is->is_auth) 2269 ); 2270 return NULL; 2271 } 2272 /* 2273 * Only one of the source or destination port can be flagged as a 2274 * wildcard. When filling it in, fill in a copy of the matched entry 2275 * if it has the cloning flag set. 2276 */ 2277 if ((fin->fin_flx & FI_IGNORE) != 0) { 2278 fin->fin_rev = rev; 2279 return is; 2280 } 2281 2282 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2283 if ((flags & SI_CLONE) != 0) { 2284 ipstate_t *clone; 2285 2286 clone = fr_stclone(fin, tcp, is); 2287 if (clone == NULL) 2288 return NULL; 2289 is = clone; 2290 } else { 2291 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2292 } 2293 2294 if ((flags & SI_W_SPORT) != 0) { 2295 if (rev == 0) { 2296 is->is_sport = sp; 2297 is->is_send = ntohl(tcp->th_seq); 2298 } else { 2299 is->is_sport = dp; 2300 is->is_send = ntohl(tcp->th_ack); 2301 } 2302 is->is_maxsend = is->is_send + 1; 2303 } else if ((flags & SI_W_DPORT) != 0) { 2304 if (rev == 0) { 2305 is->is_dport = dp; 2306 is->is_dend = ntohl(tcp->th_ack); 2307 } else { 2308 is->is_dport = sp; 2309 is->is_dend = ntohl(tcp->th_seq); 2310 } 2311 is->is_maxdend = is->is_dend + 1; 2312 } 2313 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2314 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2315 ipstate_log(is, ISL_CLONE, ifs); 2316 } 2317 2318 ret = -1; 2319 2320 if (is->is_flx[out][rev] == 0) { 2321 is->is_flx[out][rev] = flx; 2322 /* 2323 * If we are dealing with the first packet coming in reverse 2324 * direction (sent by peer), then we have to set options into 2325 * state. 2326 */ 2327 if (rev == 1 && is->is_optmsk[1] == 0x0) { 2328 is->is_optmsk[1] = 0xffffffff; 2329 is->is_opt[1] = fin->fin_optmsk; 2330 DTRACE_PROBE(set_rev_opts); 2331 } 2332 if (is->is_v == 6) { 2333 is->is_opt[rev] &= ~0x8; 2334 is->is_optmsk[rev] &= ~0x8; 2335 } 2336 } 2337 2338 /* 2339 * Check if the interface name for this "direction" is set and if not, 2340 * fill it in. 2341 */ 2342 if (is->is_ifp[idx] == NULL && 2343 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2344 is->is_ifp[idx] = ifp; 2345 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2346 } 2347 fin->fin_rev = rev; 2348 return is; 2349 } 2350 2351 2352 /* ------------------------------------------------------------------------ */ 2353 /* Function: fr_checkicmpmatchingstate */ 2354 /* Returns: Nil */ 2355 /* Parameters: fin(I) - pointer to packet information */ 2356 /* */ 2357 /* If we've got an ICMP error message, using the information stored in the */ 2358 /* ICMP packet, look for a matching state table entry. */ 2359 /* */ 2360 /* If we return NULL then no lock on ipf_state is held. */ 2361 /* If we return non-null then a read-lock on ipf_state is held. */ 2362 /* ------------------------------------------------------------------------ */ 2363 static ipstate_t *fr_checkicmpmatchingstate(fin) 2364 fr_info_t *fin; 2365 { 2366 ipstate_t *is, **isp; 2367 u_short sport, dport; 2368 u_char pr; 2369 int backward, i, oi; 2370 i6addr_t dst, src; 2371 struct icmp *ic; 2372 u_short savelen; 2373 icmphdr_t *icmp; 2374 fr_info_t ofin; 2375 tcphdr_t *tcp; 2376 int len; 2377 ip_t *oip; 2378 u_int hv; 2379 ipf_stack_t *ifs = fin->fin_ifs; 2380 2381 /* 2382 * Does it at least have the return (basic) IP header ? 2383 * Is it an actual recognised ICMP error type? 2384 * Only a basic IP header (no options) should be with 2385 * an ICMP error header. 2386 */ 2387 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2388 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2389 !(fin->fin_flx & FI_ICMPERR)) 2390 return NULL; 2391 ic = fin->fin_dp; 2392 2393 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2394 /* 2395 * Check if the at least the old IP header (with options) and 2396 * 8 bytes of payload is present. 2397 */ 2398 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2399 return NULL; 2400 2401 /* 2402 * Sanity Checks. 2403 */ 2404 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2405 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2406 return NULL; 2407 2408 /* 2409 * Is the buffer big enough for all of it ? It's the size of the IP 2410 * header claimed in the encapsulated part which is of concern. It 2411 * may be too big to be in this buffer but not so big that it's 2412 * outside the ICMP packet, leading to TCP deref's causing problems. 2413 * This is possible because we don't know how big oip_hl is when we 2414 * do the pullup early in fr_check() and thus can't guarantee it is 2415 * all here now. 2416 */ 2417 #ifdef _KERNEL 2418 { 2419 mb_t *m; 2420 2421 m = fin->fin_m; 2422 # if defined(MENTAT) 2423 if ((char *)oip + len > (char *)m->b_wptr) 2424 return NULL; 2425 # else 2426 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2427 return NULL; 2428 # endif 2429 } 2430 #endif 2431 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2432 2433 /* 2434 * in the IPv4 case we must zero the i6addr union otherwise 2435 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2436 * of the 'junk' in the unused part of the union 2437 */ 2438 bzero((char *)&src, sizeof(src)); 2439 bzero((char *)&dst, sizeof(dst)); 2440 2441 /* 2442 * we make an fin entry to be able to feed it to 2443 * matchsrcdst note that not all fields are encessary 2444 * but this is the cleanest way. Note further we fill 2445 * in fin_mp such that if someone uses it we'll get 2446 * a kernel panic. fr_matchsrcdst does not use this. 2447 * 2448 * watch out here, as ip is in host order and oip in network 2449 * order. Any change we make must be undone afterwards, like 2450 * oip->ip_off - it is still in network byte order so fix it. 2451 */ 2452 savelen = oip->ip_len; 2453 oip->ip_len = len; 2454 oip->ip_off = ntohs(oip->ip_off); 2455 2456 ofin.fin_flx = FI_NOCKSUM; 2457 ofin.fin_v = 4; 2458 ofin.fin_ip = oip; 2459 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2460 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2461 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2462 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2463 ofin.fin_ifp = fin->fin_ifp; 2464 ofin.fin_out = !fin->fin_out; 2465 /* 2466 * Reset the short and bad flag here because in fr_matchsrcdst() 2467 * the flags for the current packet (fin_flx) are compared against 2468 * those for the existing session. 2469 */ 2470 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2471 2472 /* 2473 * Put old values of ip_len and ip_off back as we don't know 2474 * if we have to forward the packet (or process it again. 2475 */ 2476 oip->ip_len = savelen; 2477 oip->ip_off = htons(oip->ip_off); 2478 2479 switch (oip->ip_p) 2480 { 2481 case IPPROTO_ICMP : 2482 /* 2483 * an ICMP error can only be generated as a result of an 2484 * ICMP query, not as the response on an ICMP error 2485 * 2486 * XXX theoretically ICMP_ECHOREP and the other reply's are 2487 * ICMP query's as well, but adding them here seems strange XXX 2488 */ 2489 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2490 return NULL; 2491 2492 /* 2493 * perform a lookup of the ICMP packet in the state table 2494 */ 2495 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2496 hv = (pr = oip->ip_p); 2497 src.in4 = oip->ip_src; 2498 hv += src.in4.s_addr; 2499 dst.in4 = oip->ip_dst; 2500 hv += dst.in4.s_addr; 2501 hv += icmp->icmp_id; 2502 hv = DOUBLE_HASH(hv, ifs); 2503 2504 READ_ENTER(&ifs->ifs_ipf_state); 2505 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2506 isp = &is->is_hnext; 2507 if ((is->is_p != pr) || (is->is_v != 4)) 2508 continue; 2509 if (is->is_pass & FR_NOICMPERR) 2510 continue; 2511 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2512 NULL, FI_ICMPCMP); 2513 if (is != NULL) { 2514 if ((is->is_pass & FR_NOICMPERR) != 0) { 2515 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2516 return NULL; 2517 } 2518 /* 2519 * i : the index of this packet (the icmp 2520 * unreachable) 2521 * oi : the index of the original packet found 2522 * in the icmp header (i.e. the packet 2523 * causing this icmp) 2524 * backward : original packet was backward 2525 * compared to the state 2526 */ 2527 backward = IP6_NEQ(&is->is_src, &src); 2528 fin->fin_rev = !backward; 2529 i = (!backward << 1) + fin->fin_out; 2530 oi = (backward << 1) + ofin.fin_out; 2531 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2532 continue; 2533 ifs->ifs_ips_stats.iss_hits++; 2534 is->is_icmppkts[i]++; 2535 return is; 2536 } 2537 } 2538 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2539 return NULL; 2540 case IPPROTO_TCP : 2541 case IPPROTO_UDP : 2542 break; 2543 default : 2544 return NULL; 2545 } 2546 2547 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2548 dport = tcp->th_dport; 2549 sport = tcp->th_sport; 2550 2551 hv = (pr = oip->ip_p); 2552 src.in4 = oip->ip_src; 2553 hv += src.in4.s_addr; 2554 dst.in4 = oip->ip_dst; 2555 hv += dst.in4.s_addr; 2556 hv += dport; 2557 hv += sport; 2558 hv = DOUBLE_HASH(hv, ifs); 2559 2560 READ_ENTER(&ifs->ifs_ipf_state); 2561 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2562 isp = &is->is_hnext; 2563 /* 2564 * Only allow this icmp though if the 2565 * encapsulated packet was allowed through the 2566 * other way around. Note that the minimal amount 2567 * of info present does not allow for checking against 2568 * tcp internals such as seq and ack numbers. Only the 2569 * ports are known to be present and can be even if the 2570 * short flag is set. 2571 */ 2572 if ((is->is_p == pr) && (is->is_v == 4) && 2573 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2574 tcp, FI_ICMPCMP))) { 2575 /* 2576 * i : the index of this packet (the icmp unreachable) 2577 * oi : the index of the original packet found in the 2578 * icmp header (i.e. the packet causing this icmp) 2579 * backward : original packet was backward compared to 2580 * the state 2581 */ 2582 backward = IP6_NEQ(&is->is_src, &src); 2583 fin->fin_rev = !backward; 2584 i = (!backward << 1) + fin->fin_out; 2585 oi = (backward << 1) + ofin.fin_out; 2586 2587 if (((is->is_pass & FR_NOICMPERR) != 0) || 2588 (is->is_icmppkts[i] > is->is_pkts[oi])) 2589 break; 2590 ifs->ifs_ips_stats.iss_hits++; 2591 is->is_icmppkts[i]++; 2592 /* 2593 * we deliberately do not touch the timeouts 2594 * for the accompanying state table entry. 2595 * It remains to be seen if that is correct. XXX 2596 */ 2597 return is; 2598 } 2599 } 2600 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2601 return NULL; 2602 } 2603 2604 2605 /* ------------------------------------------------------------------------ */ 2606 /* Function: fr_ipsmove */ 2607 /* Returns: Nil */ 2608 /* Parameters: is(I) - pointer to state table entry */ 2609 /* hv(I) - new hash value for state table entry */ 2610 /* Write Locks: ipf_state */ 2611 /* */ 2612 /* Move a state entry from one position in the hash table to another. */ 2613 /* ------------------------------------------------------------------------ */ 2614 static void fr_ipsmove(is, hv, ifs) 2615 ipstate_t *is; 2616 u_int hv; 2617 ipf_stack_t *ifs; 2618 { 2619 ipstate_t **isp; 2620 u_int hvm; 2621 2622 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2623 2624 hvm = is->is_hv; 2625 /* 2626 * Remove the hash from the old location... 2627 */ 2628 isp = is->is_phnext; 2629 if (is->is_hnext) 2630 is->is_hnext->is_phnext = isp; 2631 *isp = is->is_hnext; 2632 if (ifs->ifs_ips_table[hvm] == NULL) 2633 ifs->ifs_ips_stats.iss_inuse--; 2634 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2635 2636 /* 2637 * ...and put the hash in the new one. 2638 */ 2639 hvm = DOUBLE_HASH(hv, ifs); 2640 is->is_hv = hvm; 2641 isp = &ifs->ifs_ips_table[hvm]; 2642 if (*isp) 2643 (*isp)->is_phnext = &is->is_hnext; 2644 else 2645 ifs->ifs_ips_stats.iss_inuse++; 2646 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2647 is->is_phnext = isp; 2648 is->is_hnext = *isp; 2649 *isp = is; 2650 } 2651 2652 2653 /* ------------------------------------------------------------------------ */ 2654 /* Function: fr_stlookup */ 2655 /* Returns: ipstate_t* - NULL == no matching state found, */ 2656 /* else pointer to state information is returned */ 2657 /* Parameters: fin(I) - pointer to packet information */ 2658 /* tcp(I) - pointer to TCP/UDP header. */ 2659 /* */ 2660 /* Search the state table for a matching entry to the packet described by */ 2661 /* the contents of *fin. */ 2662 /* */ 2663 /* If we return NULL then no lock on ipf_state is held. */ 2664 /* If we return non-null then a read-lock on ipf_state is held. */ 2665 /* ------------------------------------------------------------------------ */ 2666 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2667 fr_info_t *fin; 2668 tcphdr_t *tcp; 2669 ipftq_t **ifqp; 2670 { 2671 u_int hv, hvm, pr, v, tryagain; 2672 ipstate_t *is, **isp; 2673 u_short dport, sport; 2674 i6addr_t src, dst; 2675 struct icmp *ic; 2676 ipftq_t *ifq; 2677 int oow; 2678 ipf_stack_t *ifs = fin->fin_ifs; 2679 2680 is = NULL; 2681 ifq = NULL; 2682 tcp = fin->fin_dp; 2683 ic = (struct icmp *)tcp; 2684 hv = (pr = fin->fin_fi.fi_p); 2685 src = fin->fin_fi.fi_src; 2686 dst = fin->fin_fi.fi_dst; 2687 hv += src.in4.s_addr; 2688 hv += dst.in4.s_addr; 2689 2690 v = fin->fin_fi.fi_v; 2691 #ifdef USE_INET6 2692 if (v == 6) { 2693 hv += fin->fin_fi.fi_src.i6[1]; 2694 hv += fin->fin_fi.fi_src.i6[2]; 2695 hv += fin->fin_fi.fi_src.i6[3]; 2696 2697 if ((fin->fin_p == IPPROTO_ICMPV6) && 2698 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2699 hv -= dst.in4.s_addr; 2700 } else { 2701 hv += fin->fin_fi.fi_dst.i6[1]; 2702 hv += fin->fin_fi.fi_dst.i6[2]; 2703 hv += fin->fin_fi.fi_dst.i6[3]; 2704 } 2705 } 2706 #endif 2707 if ((v == 4) && 2708 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2709 if (fin->fin_out == 0) { 2710 hv -= src.in4.s_addr; 2711 } else { 2712 hv -= dst.in4.s_addr; 2713 } 2714 } 2715 2716 /* 2717 * Search the hash table for matching packet header info. 2718 */ 2719 switch (pr) 2720 { 2721 #ifdef USE_INET6 2722 case IPPROTO_ICMPV6 : 2723 tryagain = 0; 2724 if (v == 6) { 2725 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2726 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2727 hv += ic->icmp_id; 2728 } 2729 } 2730 READ_ENTER(&ifs->ifs_ipf_state); 2731 icmp6again: 2732 hvm = DOUBLE_HASH(hv, ifs); 2733 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2734 isp = &is->is_hnext; 2735 if ((is->is_p != pr) || (is->is_v != v)) 2736 continue; 2737 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2738 if (is != NULL && 2739 fr_matchicmpqueryreply(v, &is->is_icmp, 2740 ic, fin->fin_rev)) { 2741 if (fin->fin_rev) 2742 ifq = &ifs->ifs_ips_icmpacktq; 2743 else 2744 ifq = &ifs->ifs_ips_icmptq; 2745 break; 2746 } 2747 } 2748 2749 if (is != NULL) { 2750 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2751 hv += fin->fin_fi.fi_src.i6[0]; 2752 hv += fin->fin_fi.fi_src.i6[1]; 2753 hv += fin->fin_fi.fi_src.i6[2]; 2754 hv += fin->fin_fi.fi_src.i6[3]; 2755 fr_ipsmove(is, hv, ifs); 2756 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2757 } 2758 break; 2759 } 2760 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2761 2762 /* 2763 * No matching icmp state entry. Perhaps this is a 2764 * response to another state entry. 2765 * 2766 * XXX With some ICMP6 packets, the "other" address is already 2767 * in the packet, after the ICMP6 header, and this could be 2768 * used in place of the multicast address. However, taking 2769 * advantage of this requires some significant code changes 2770 * to handle the specific types where that is the case. 2771 */ 2772 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2773 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2774 hv -= fin->fin_fi.fi_src.i6[0]; 2775 hv -= fin->fin_fi.fi_src.i6[1]; 2776 hv -= fin->fin_fi.fi_src.i6[2]; 2777 hv -= fin->fin_fi.fi_src.i6[3]; 2778 tryagain = 1; 2779 WRITE_ENTER(&ifs->ifs_ipf_state); 2780 goto icmp6again; 2781 } 2782 2783 is = fr_checkicmp6matchingstate(fin); 2784 if (is != NULL) 2785 return is; 2786 break; 2787 #endif 2788 2789 case IPPROTO_ICMP : 2790 if (v == 4) { 2791 hv += ic->icmp_id; 2792 } 2793 hv = DOUBLE_HASH(hv, ifs); 2794 READ_ENTER(&ifs->ifs_ipf_state); 2795 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2796 isp = &is->is_hnext; 2797 if ((is->is_p != pr) || (is->is_v != v)) 2798 continue; 2799 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2800 if (is != NULL && 2801 fr_matchicmpqueryreply(v, &is->is_icmp, 2802 ic, fin->fin_rev)) { 2803 if (fin->fin_rev) 2804 ifq = &ifs->ifs_ips_icmpacktq; 2805 else 2806 ifq = &ifs->ifs_ips_icmptq; 2807 break; 2808 } 2809 } 2810 if (is == NULL) { 2811 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2812 } 2813 break; 2814 2815 case IPPROTO_TCP : 2816 case IPPROTO_UDP : 2817 ifqp = NULL; 2818 sport = htons(fin->fin_data[0]); 2819 hv += sport; 2820 dport = htons(fin->fin_data[1]); 2821 hv += dport; 2822 oow = 0; 2823 tryagain = 0; 2824 READ_ENTER(&ifs->ifs_ipf_state); 2825 retry_tcpudp: 2826 hvm = DOUBLE_HASH(hv, ifs); 2827 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2828 isp = &is->is_hnext; 2829 if ((is->is_p != pr) || (is->is_v != v)) 2830 continue; 2831 fin->fin_flx &= ~FI_OOW; 2832 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2833 if (is != NULL) { 2834 if (pr == IPPROTO_TCP) { 2835 if (!fr_tcpstate(fin, tcp, is)) { 2836 oow |= fin->fin_flx & FI_OOW; 2837 continue; 2838 } 2839 } 2840 break; 2841 } 2842 } 2843 if (is != NULL) { 2844 if (tryagain && 2845 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2846 hv += dport; 2847 hv += sport; 2848 fr_ipsmove(is, hv, ifs); 2849 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2850 } 2851 break; 2852 } 2853 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2854 2855 if (ifs->ifs_ips_stats.iss_wild) { 2856 if (tryagain == 0) { 2857 hv -= dport; 2858 hv -= sport; 2859 } else if (tryagain == 1) { 2860 hv = fin->fin_fi.fi_p; 2861 /* 2862 * If we try to pretend this is a reply to a 2863 * multicast/broadcast packet then we need to 2864 * exclude part of the address from the hash 2865 * calculation. 2866 */ 2867 if (fin->fin_out == 0) { 2868 hv += src.in4.s_addr; 2869 } else { 2870 hv += dst.in4.s_addr; 2871 } 2872 hv += dport; 2873 hv += sport; 2874 } 2875 tryagain++; 2876 if (tryagain <= 2) { 2877 WRITE_ENTER(&ifs->ifs_ipf_state); 2878 goto retry_tcpudp; 2879 } 2880 } 2881 fin->fin_flx |= oow; 2882 break; 2883 2884 #if 0 2885 case IPPROTO_GRE : 2886 gre = fin->fin_dp; 2887 if (GRE_REV(gre->gr_flags) == 1) { 2888 hv += gre->gr_call; 2889 } 2890 /* FALLTHROUGH */ 2891 #endif 2892 default : 2893 ifqp = NULL; 2894 hvm = DOUBLE_HASH(hv, ifs); 2895 READ_ENTER(&ifs->ifs_ipf_state); 2896 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2897 isp = &is->is_hnext; 2898 if ((is->is_p != pr) || (is->is_v != v)) 2899 continue; 2900 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2901 if (is != NULL) { 2902 ifq = &ifs->ifs_ips_iptq; 2903 break; 2904 } 2905 } 2906 if (is == NULL) { 2907 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2908 } 2909 break; 2910 } 2911 2912 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2913 (is->is_tqehead[fin->fin_rev] != NULL)) 2914 ifq = is->is_tqehead[fin->fin_rev]; 2915 if (ifq != NULL && ifqp != NULL) 2916 *ifqp = ifq; 2917 return is; 2918 } 2919 2920 2921 /* ------------------------------------------------------------------------ */ 2922 /* Function: fr_updatestate */ 2923 /* Returns: Nil */ 2924 /* Parameters: fin(I) - pointer to packet information */ 2925 /* is(I) - pointer to state table entry */ 2926 /* Read Locks: ipf_state */ 2927 /* */ 2928 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2929 /* fragment cache with a new entry as required. */ 2930 /* ------------------------------------------------------------------------ */ 2931 void fr_updatestate(fin, is, ifq) 2932 fr_info_t *fin; 2933 ipstate_t *is; 2934 ipftq_t *ifq; 2935 { 2936 ipftqent_t *tqe; 2937 int i, pass; 2938 ipf_stack_t *ifs = fin->fin_ifs; 2939 2940 i = (fin->fin_rev << 1) + fin->fin_out; 2941 2942 /* 2943 * For TCP packets, ifq == NULL. For all others, check if this new 2944 * queue is different to the last one it was on and move it if so. 2945 */ 2946 tqe = &is->is_sti; 2947 MUTEX_ENTER(&is->is_lock); 2948 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2949 ifq = is->is_tqehead[fin->fin_rev]; 2950 2951 if (ifq != NULL) 2952 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2953 2954 is->is_pkts[i]++; 2955 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2956 is->is_bytes[i] += fin->fin_plen; 2957 MUTEX_EXIT(&is->is_lock); 2958 2959 #ifdef IPFILTER_SYNC 2960 if (is->is_flags & IS_STATESYNC) 2961 ipfsync_update(SMC_STATE, fin, is->is_sync); 2962 #endif 2963 2964 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2965 2966 fin->fin_fr = is->is_rule; 2967 2968 /* 2969 * If this packet is a fragment and the rule says to track fragments, 2970 * then create a new fragment cache entry. 2971 */ 2972 pass = is->is_pass; 2973 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2974 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2975 } 2976 2977 2978 /* ------------------------------------------------------------------------ */ 2979 /* Function: fr_checkstate */ 2980 /* Returns: frentry_t* - NULL == search failed, */ 2981 /* else pointer to rule for matching state */ 2982 /* Parameters: ifp(I) - pointer to interface */ 2983 /* passp(I) - pointer to filtering result flags */ 2984 /* */ 2985 /* Check if a packet is associated with an entry in the state table. */ 2986 /* ------------------------------------------------------------------------ */ 2987 frentry_t *fr_checkstate(fin, passp) 2988 fr_info_t *fin; 2989 u_32_t *passp; 2990 { 2991 ipstate_t *is; 2992 frentry_t *fr; 2993 tcphdr_t *tcp; 2994 ipftq_t *ifq; 2995 u_int pass; 2996 ipf_stack_t *ifs = fin->fin_ifs; 2997 2998 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2999 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 3000 return NULL; 3001 3002 is = NULL; 3003 if ((fin->fin_flx & FI_TCPUDP) || 3004 (fin->fin_fi.fi_p == IPPROTO_ICMP) 3005 #ifdef USE_INET6 3006 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 3007 #endif 3008 ) 3009 tcp = fin->fin_dp; 3010 else 3011 tcp = NULL; 3012 3013 /* 3014 * Search the hash table for matching packet header info. 3015 */ 3016 ifq = NULL; 3017 is = fr_stlookup(fin, tcp, &ifq); 3018 switch (fin->fin_p) 3019 { 3020 #ifdef USE_INET6 3021 case IPPROTO_ICMPV6 : 3022 if (is != NULL) 3023 break; 3024 if (fin->fin_v == 6) { 3025 is = fr_checkicmp6matchingstate(fin); 3026 if (is != NULL) 3027 goto matched; 3028 } 3029 break; 3030 #endif 3031 case IPPROTO_ICMP : 3032 if (is != NULL) 3033 break; 3034 /* 3035 * No matching icmp state entry. Perhaps this is a 3036 * response to another state entry. 3037 */ 3038 is = fr_checkicmpmatchingstate(fin); 3039 if (is != NULL) 3040 goto matched; 3041 break; 3042 case IPPROTO_TCP : 3043 if (is == NULL) 3044 break; 3045 3046 if (is->is_pass & FR_NEWISN) { 3047 if (fin->fin_out == 0) 3048 fr_fixinisn(fin, is); 3049 else if (fin->fin_out == 1) 3050 fr_fixoutisn(fin, is); 3051 } 3052 break; 3053 default : 3054 if (fin->fin_rev) 3055 ifq = &ifs->ifs_ips_udpacktq; 3056 else 3057 ifq = &ifs->ifs_ips_udptq; 3058 break; 3059 } 3060 if (is == NULL) { 3061 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3062 return NULL; 3063 } 3064 3065 matched: 3066 fr = is->is_rule; 3067 if (fr != NULL) { 3068 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3069 if (fin->fin_nattag == NULL) { 3070 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3071 return NULL; 3072 } 3073 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) { 3074 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3075 return NULL; 3076 } 3077 } 3078 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3079 fin->fin_icode = fr->fr_icode; 3080 } 3081 3082 fin->fin_rule = is->is_rulen; 3083 pass = is->is_pass; 3084 fr_updatestate(fin, is, ifq); 3085 3086 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3087 fin->fin_flx |= FI_STATE; 3088 if ((pass & FR_LOGFIRST) != 0) 3089 pass &= ~(FR_LOGFIRST|FR_LOG); 3090 *passp = pass; 3091 return fr; 3092 } 3093 3094 3095 /* ------------------------------------------------------------------------ */ 3096 /* Function: fr_fixoutisn */ 3097 /* Returns: Nil */ 3098 /* Parameters: fin(I) - pointer to packet information */ 3099 /* is(I) - pointer to master state structure */ 3100 /* */ 3101 /* Called only for outbound packets, adjusts the sequence number and the */ 3102 /* TCP checksum to match that change. */ 3103 /* ------------------------------------------------------------------------ */ 3104 static void fr_fixoutisn(fin, is) 3105 fr_info_t *fin; 3106 ipstate_t *is; 3107 { 3108 tcphdr_t *tcp; 3109 int rev; 3110 u_32_t seq; 3111 3112 tcp = fin->fin_dp; 3113 rev = fin->fin_rev; 3114 if ((is->is_flags & IS_ISNSYN) != 0) { 3115 if (rev == 0) { 3116 seq = ntohl(tcp->th_seq); 3117 seq += is->is_isninc[0]; 3118 tcp->th_seq = htonl(seq); 3119 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3120 } 3121 } 3122 if ((is->is_flags & IS_ISNACK) != 0) { 3123 if (rev == 1) { 3124 seq = ntohl(tcp->th_seq); 3125 seq += is->is_isninc[1]; 3126 tcp->th_seq = htonl(seq); 3127 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3128 } 3129 } 3130 } 3131 3132 3133 /* ------------------------------------------------------------------------ */ 3134 /* Function: fr_fixinisn */ 3135 /* Returns: Nil */ 3136 /* Parameters: fin(I) - pointer to packet information */ 3137 /* is(I) - pointer to master state structure */ 3138 /* */ 3139 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3140 /* TCP checksum to match that change. */ 3141 /* ------------------------------------------------------------------------ */ 3142 static void fr_fixinisn(fin, is) 3143 fr_info_t *fin; 3144 ipstate_t *is; 3145 { 3146 tcphdr_t *tcp; 3147 int rev; 3148 u_32_t ack; 3149 3150 tcp = fin->fin_dp; 3151 rev = fin->fin_rev; 3152 if ((is->is_flags & IS_ISNSYN) != 0) { 3153 if (rev == 1) { 3154 ack = ntohl(tcp->th_ack); 3155 ack -= is->is_isninc[0]; 3156 tcp->th_ack = htonl(ack); 3157 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3158 } 3159 } 3160 if ((is->is_flags & IS_ISNACK) != 0) { 3161 if (rev == 0) { 3162 ack = ntohl(tcp->th_ack); 3163 ack -= is->is_isninc[1]; 3164 tcp->th_ack = htonl(ack); 3165 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3166 } 3167 } 3168 } 3169 3170 3171 /* ------------------------------------------------------------------------ */ 3172 /* Function: fr_statesync */ 3173 /* Returns: Nil */ 3174 /* Parameters: action(I) - type of synchronisation to do */ 3175 /* v(I) - IP version being sync'd (v4 or v6) */ 3176 /* ifp(I) - interface identifier associated with action */ 3177 /* name(I) - name associated with ifp parameter */ 3178 /* */ 3179 /* Walk through all state entries and if an interface pointer match is */ 3180 /* found then look it up again, based on its name in case the pointer has */ 3181 /* changed since last time. */ 3182 /* */ 3183 /* If ifp is passed in as being non-null then we are only doing updates for */ 3184 /* existing, matching, uses of it. */ 3185 /* ------------------------------------------------------------------------ */ 3186 void fr_statesync(action, v, ifp, name, ifs) 3187 int action, v; 3188 void *ifp; 3189 char *name; 3190 ipf_stack_t *ifs; 3191 { 3192 ipstate_t *is; 3193 int i; 3194 3195 if (ifs->ifs_fr_running <= 0) 3196 return; 3197 3198 WRITE_ENTER(&ifs->ifs_ipf_state); 3199 3200 if (ifs->ifs_fr_running <= 0) { 3201 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3202 return; 3203 } 3204 3205 switch (action) 3206 { 3207 case IPFSYNC_RESYNC : 3208 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3209 if (v != 0 && is->is_v != v) 3210 continue; 3211 /* 3212 * Look up all the interface names in the state entry. 3213 */ 3214 for (i = 0; i < 4; i++) { 3215 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3216 is->is_v, ifs); 3217 } 3218 } 3219 break; 3220 case IPFSYNC_NEWIFP : 3221 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3222 if (v != 0 && is->is_v != v) 3223 continue; 3224 /* 3225 * Look up all the interface names in the state entry. 3226 */ 3227 for (i = 0; i < 4; i++) { 3228 if (!strncmp(is->is_ifname[i], name, 3229 sizeof(is->is_ifname[i]))) 3230 is->is_ifp[i] = ifp; 3231 } 3232 } 3233 break; 3234 case IPFSYNC_OLDIFP : 3235 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3236 if (v != 0 && is->is_v != v) 3237 continue; 3238 /* 3239 * Look up all the interface names in the state entry. 3240 */ 3241 for (i = 0; i < 4; i++) { 3242 if (is->is_ifp[i] == ifp) 3243 is->is_ifp[i] = (void *)-1; 3244 } 3245 } 3246 break; 3247 } 3248 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3249 } 3250 3251 3252 #if SOLARIS2 >= 10 3253 /* ------------------------------------------------------------------------ */ 3254 /* Function: fr_stateifindexsync */ 3255 /* Returns: void */ 3256 /* Parameters: ifp - current network interface descriptor (ifindex) */ 3257 /* newifp - new interface descriptor (new ifindex) */ 3258 /* ifs - pointer to IPF stack */ 3259 /* */ 3260 /* Write Locks: assumes ipf_mutex is locked */ 3261 /* */ 3262 /* Updates all interface indeces matching to ifp with new interface index */ 3263 /* value. */ 3264 /* ------------------------------------------------------------------------ */ 3265 void fr_stateifindexsync(ifp, newifp, ifs) 3266 void *ifp; 3267 void *newifp; 3268 ipf_stack_t *ifs; 3269 { 3270 ipstate_t *is; 3271 int i; 3272 3273 WRITE_ENTER(&ifs->ifs_ipf_state); 3274 3275 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) { 3276 3277 for (i = 0; i < 4; i++) { 3278 if (is->is_ifp[i] == ifp) 3279 is->is_ifp[i] = newifp; 3280 } 3281 } 3282 3283 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3284 } 3285 #endif 3286 3287 /* ------------------------------------------------------------------------ */ 3288 /* Function: fr_delstate */ 3289 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3290 /* Parameters: is(I) - pointer to state structure to delete */ 3291 /* why(I) - if not 0, log reason why it was deleted */ 3292 /* ifs - ipf stack instance */ 3293 /* Write Locks: ipf_state/ipf_global */ 3294 /* */ 3295 /* Deletes a state entry from the enumerated list as well as the hash table */ 3296 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3297 /* global counters as required. */ 3298 /* ------------------------------------------------------------------------ */ 3299 int fr_delstate(is, why, ifs) 3300 ipstate_t *is; 3301 int why; 3302 ipf_stack_t *ifs; 3303 { 3304 int removed = 0; 3305 3306 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3307 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3308 3309 /* 3310 * Start by removing the entry from the hash table of state entries 3311 * so it will not be "used" again. 3312 * 3313 * It will remain in the "list" of state entries until all references 3314 * have been accounted for. 3315 */ 3316 if (is->is_phnext != NULL) { 3317 removed = 1; 3318 *is->is_phnext = is->is_hnext; 3319 if (is->is_hnext != NULL) 3320 is->is_hnext->is_phnext = is->is_phnext; 3321 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3322 ifs->ifs_ips_stats.iss_inuse--; 3323 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3324 3325 is->is_phnext = NULL; 3326 is->is_hnext = NULL; 3327 } 3328 3329 /* 3330 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3331 * table that have wildcard flags set, only decerement it once 3332 * and do it here. 3333 */ 3334 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3335 if (!(is->is_flags & SI_CLONED)) { 3336 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3337 } 3338 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3339 } 3340 3341 /* 3342 * Next, remove it from the timeout queue it is in. 3343 */ 3344 fr_deletequeueentry(&is->is_sti); 3345 3346 is->is_me = NULL; 3347 3348 /* 3349 * If it is still in use by something else, do not go any further, 3350 * but note that at this point it is now an orphan. 3351 */ 3352 MUTEX_ENTER(&is->is_lock); 3353 if (is->is_ref > 1) { 3354 is->is_ref--; 3355 MUTEX_EXIT(&is->is_lock); 3356 if (removed) 3357 ifs->ifs_ips_stats.iss_orphans++; 3358 return (is->is_ref); 3359 } 3360 MUTEX_EXIT(&is->is_lock); 3361 3362 is->is_ref = 0; 3363 3364 /* 3365 * If entry has already been removed from table, 3366 * it means we're simply cleaning up an orphan. 3367 */ 3368 if (!removed) 3369 ifs->ifs_ips_stats.iss_orphans--; 3370 3371 if (is->is_tqehead[0] != NULL) 3372 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3373 3374 if (is->is_tqehead[1] != NULL) 3375 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3376 3377 #ifdef IPFILTER_SYNC 3378 if (is->is_sync) 3379 ipfsync_del(is->is_sync); 3380 #endif 3381 #ifdef IPFILTER_SCAN 3382 (void) ipsc_detachis(is); 3383 #endif 3384 3385 /* 3386 * Now remove it from master list of state table entries. 3387 */ 3388 if (is->is_pnext != NULL) { 3389 *is->is_pnext = is->is_next; 3390 if (is->is_next != NULL) { 3391 is->is_next->is_pnext = is->is_pnext; 3392 is->is_next = NULL; 3393 } 3394 is->is_pnext = NULL; 3395 } 3396 3397 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3398 ipstate_log(is, why, ifs); 3399 3400 if (is->is_rule != NULL) { 3401 is->is_rule->fr_statecnt--; 3402 (void)fr_derefrule(&is->is_rule, ifs); 3403 } 3404 3405 MUTEX_DESTROY(&is->is_lock); 3406 KFREE(is); 3407 ifs->ifs_ips_num--; 3408 3409 return (0); 3410 } 3411 3412 3413 /* ------------------------------------------------------------------------ */ 3414 /* Function: fr_timeoutstate */ 3415 /* Returns: Nil */ 3416 /* Parameters: ifs - ipf stack instance */ 3417 /* */ 3418 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3419 /* used here is to keep the queue sorted with the oldest things at the top */ 3420 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3421 /* expired then neither will any under it. */ 3422 /* ------------------------------------------------------------------------ */ 3423 void fr_timeoutstate(ifs) 3424 ipf_stack_t *ifs; 3425 { 3426 ipftq_t *ifq, *ifqnext; 3427 ipftqent_t *tqe, *tqn; 3428 ipstate_t *is; 3429 SPL_INT(s); 3430 3431 SPL_NET(s); 3432 WRITE_ENTER(&ifs->ifs_ipf_state); 3433 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3434 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3435 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3436 break; 3437 tqn = tqe->tqe_next; 3438 is = tqe->tqe_parent; 3439 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3440 } 3441 3442 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3443 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3444 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3445 break; 3446 tqn = tqe->tqe_next; 3447 is = tqe->tqe_parent; 3448 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3449 } 3450 } 3451 3452 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3453 ifqnext = ifq->ifq_next; 3454 3455 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3456 (ifq->ifq_ref == 0)) { 3457 fr_freetimeoutqueue(ifq, ifs); 3458 } 3459 } 3460 3461 if (ifs->ifs_fr_state_doflush) { 3462 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3463 ifs->ifs_fr_state_doflush = 0; 3464 } 3465 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3466 SPL_X(s); 3467 } 3468 3469 3470 /* ---------------------------------------------------------------------- */ 3471 /* Function: fr_state_flush */ 3472 /* Returns: int - 0 == success, -1 == failure */ 3473 /* Parameters: flush_option - how to flush the active State table */ 3474 /* proto - IP version to flush (4, 6, or both) */ 3475 /* ifs - ipf stack instance */ 3476 /* Write Locks: ipf_state */ 3477 /* */ 3478 /* Flush state tables. Three possible flush options currently defined: */ 3479 /* */ 3480 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3481 /* */ 3482 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3483 /* have started to close on both ends using */ 3484 /* ipf_flushclosing(). */ 3485 /* */ 3486 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3487 /* Then, if needed, flush entries with TCP */ 3488 /* connections which have been idle for a long */ 3489 /* time with ipf_extraflush(). */ 3490 /* ---------------------------------------------------------------------- */ 3491 static int fr_state_flush(flush_option, proto, ifs) 3492 int flush_option, proto; 3493 ipf_stack_t *ifs; 3494 { 3495 ipstate_t *is, *isn; 3496 int removed; 3497 SPL_INT(s); 3498 3499 removed = 0; 3500 3501 SPL_NET(s); 3502 switch (flush_option) 3503 { 3504 case FLUSH_TABLE_ALL: 3505 isn = ifs->ifs_ips_list; 3506 while ((is = isn) != NULL) { 3507 isn = is->is_next; 3508 if ((proto != 0) && (is->is_v != proto)) 3509 continue; 3510 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3511 removed++; 3512 } 3513 break; 3514 3515 case FLUSH_TABLE_CLOSING: 3516 removed = ipf_flushclosing(STATE_FLUSH, 3517 IPF_TCPS_CLOSE_WAIT, 3518 ifs->ifs_ips_tqtqb, 3519 ifs->ifs_ips_utqe, 3520 ifs); 3521 break; 3522 3523 case FLUSH_TABLE_EXTRA: 3524 removed = ipf_flushclosing(STATE_FLUSH, 3525 IPF_TCPS_FIN_WAIT_2, 3526 ifs->ifs_ips_tqtqb, 3527 ifs->ifs_ips_utqe, 3528 ifs); 3529 3530 /* 3531 * Be sure we haven't done this in the last 10 seconds. 3532 */ 3533 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3534 IPF_TTLVAL(10)) 3535 break; 3536 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3537 removed += ipf_extraflush(STATE_FLUSH, 3538 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3539 ifs->ifs_ips_utqe, 3540 ifs); 3541 break; 3542 3543 default: /* Flush Nothing */ 3544 break; 3545 } 3546 3547 SPL_X(s); 3548 return (removed); 3549 } 3550 3551 3552 /* ------------------------------------------------------------------------ */ 3553 /* Function: fr_tcp_age */ 3554 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3555 /* Parameters: tq(I) - pointer to timeout queue information */ 3556 /* fin(I) - pointer to packet information */ 3557 /* tqtab(I) - TCP timeout queue table this is in */ 3558 /* flags(I) - flags from state/NAT entry */ 3559 /* */ 3560 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3561 /* */ 3562 /* - (try to) base state transitions on real evidence only, */ 3563 /* i.e. packets that are sent and have been received by ipfilter; */ 3564 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3565 /* */ 3566 /* - deal with half-closed connections correctly; */ 3567 /* */ 3568 /* - store the state of the source in state[0] such that ipfstat */ 3569 /* displays the state as source/dest instead of dest/source; the calls */ 3570 /* to fr_tcp_age have been changed accordingly. */ 3571 /* */ 3572 /* Internal Parameters: */ 3573 /* */ 3574 /* state[0] = state of source (host that initiated connection) */ 3575 /* state[1] = state of dest (host that accepted the connection) */ 3576 /* */ 3577 /* dir == 0 : a packet from source to dest */ 3578 /* dir == 1 : a packet from dest to source */ 3579 /* */ 3580 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3581 /* ------------------------------------------------------------------------ */ 3582 int fr_tcp_age(tqe, fin, tqtab, flags) 3583 ipftqent_t *tqe; 3584 fr_info_t *fin; 3585 ipftq_t *tqtab; 3586 int flags; 3587 { 3588 int dlen, ostate, nstate, rval, dir; 3589 u_char tcpflags; 3590 tcphdr_t *tcp; 3591 ipf_stack_t *ifs = fin->fin_ifs; 3592 3593 tcp = fin->fin_dp; 3594 3595 rval = 0; 3596 dir = fin->fin_rev; 3597 tcpflags = tcp->th_flags; 3598 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3599 3600 ostate = tqe->tqe_state[1 - dir]; 3601 nstate = tqe->tqe_state[dir]; 3602 3603 DTRACE_PROBE4( 3604 indata, 3605 fr_info_t *, fin, 3606 int, ostate, 3607 int, nstate, 3608 u_char, tcpflags 3609 ); 3610 3611 if (tcpflags & TH_RST) { 3612 if (!(tcpflags & TH_PUSH) && !dlen) 3613 nstate = IPF_TCPS_CLOSED; 3614 else 3615 nstate = IPF_TCPS_CLOSE_WAIT; 3616 3617 /* 3618 * Once RST is received, we must advance peer's state to 3619 * CLOSE_WAIT. 3620 */ 3621 if (ostate <= IPF_TCPS_ESTABLISHED) { 3622 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3623 } 3624 rval = 1; 3625 } else { 3626 3627 switch (nstate) 3628 { 3629 case IPF_TCPS_LISTEN: /* 0 */ 3630 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3631 /* 3632 * 'dir' received an S and sends SA in 3633 * response, CLOSED -> SYN_RECEIVED 3634 */ 3635 nstate = IPF_TCPS_SYN_RECEIVED; 3636 rval = 1; 3637 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3638 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3639 nstate = IPF_TCPS_SYN_SENT; 3640 rval = 1; 3641 } 3642 /* 3643 * the next piece of code makes it possible to get 3644 * already established connections into the state table 3645 * after a restart or reload of the filter rules; this 3646 * does not work when a strict 'flags S keep state' is 3647 * used for tcp connections of course 3648 */ 3649 if (((flags & IS_TCPFSM) == 0) && 3650 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3651 /* 3652 * we saw an A, guess 'dir' is in ESTABLISHED 3653 * mode 3654 */ 3655 switch (ostate) 3656 { 3657 case IPF_TCPS_LISTEN : 3658 case IPF_TCPS_SYN_RECEIVED : 3659 nstate = IPF_TCPS_HALF_ESTAB; 3660 rval = 1; 3661 break; 3662 case IPF_TCPS_HALF_ESTAB : 3663 case IPF_TCPS_ESTABLISHED : 3664 nstate = IPF_TCPS_ESTABLISHED; 3665 rval = 1; 3666 break; 3667 default : 3668 break; 3669 } 3670 } 3671 /* 3672 * TODO: besides regular ACK packets we can have other 3673 * packets as well; it is yet to be determined how we 3674 * should initialize the states in those cases 3675 */ 3676 break; 3677 3678 case IPF_TCPS_SYN_SENT: /* 1 */ 3679 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3680 /* 3681 * A retransmitted SYN packet. We do not reset 3682 * the timeout here to fr_tcptimeout because a 3683 * connection connect timeout does not renew 3684 * after every packet that is sent. We need to 3685 * set rval so as to indicate the packet has 3686 * passed the check for its flags being valid 3687 * in the TCP FSM. Setting rval to 2 has the 3688 * result of not resetting the timeout. 3689 */ 3690 rval = 2; 3691 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3692 TH_ACK) { 3693 /* 3694 * we see an A from 'dir' which is in SYN_SENT 3695 * state: 'dir' sent an A in response to an SA 3696 * which it received, SYN_SENT -> ESTABLISHED 3697 */ 3698 nstate = IPF_TCPS_ESTABLISHED; 3699 rval = 1; 3700 } else if (tcpflags & TH_FIN) { 3701 /* 3702 * we see an F from 'dir' which is in SYN_SENT 3703 * state and wants to close its side of the 3704 * connection; SYN_SENT -> FIN_WAIT_1 3705 */ 3706 nstate = IPF_TCPS_FIN_WAIT_1; 3707 rval = 1; 3708 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3709 /* 3710 * we see an SA from 'dir' which is already in 3711 * SYN_SENT state, this means we have a 3712 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3713 */ 3714 nstate = IPF_TCPS_SYN_RECEIVED; 3715 rval = 1; 3716 } 3717 break; 3718 3719 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3720 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3721 /* 3722 * we see an A from 'dir' which was in 3723 * SYN_RECEIVED state so it must now be in 3724 * established state, SYN_RECEIVED -> 3725 * ESTABLISHED 3726 */ 3727 nstate = IPF_TCPS_ESTABLISHED; 3728 rval = 1; 3729 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3730 TH_OPENING) { 3731 /* 3732 * We see an SA from 'dir' which is already in 3733 * SYN_RECEIVED state. 3734 */ 3735 rval = 2; 3736 } else if (tcpflags & TH_FIN) { 3737 /* 3738 * we see an F from 'dir' which is in 3739 * SYN_RECEIVED state and wants to close its 3740 * side of the connection; SYN_RECEIVED -> 3741 * FIN_WAIT_1 3742 */ 3743 nstate = IPF_TCPS_FIN_WAIT_1; 3744 rval = 1; 3745 } 3746 break; 3747 3748 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3749 if (tcpflags & TH_FIN) { 3750 nstate = IPF_TCPS_FIN_WAIT_1; 3751 rval = 1; 3752 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3753 /* 3754 * If we've picked up a connection in mid 3755 * flight, we could be looking at a follow on 3756 * packet from the same direction as the one 3757 * that created this state. Recognise it but 3758 * do not advance the entire connection's 3759 * state. 3760 */ 3761 switch (ostate) 3762 { 3763 case IPF_TCPS_LISTEN : 3764 case IPF_TCPS_SYN_SENT : 3765 case IPF_TCPS_SYN_RECEIVED : 3766 rval = 1; 3767 break; 3768 case IPF_TCPS_HALF_ESTAB : 3769 case IPF_TCPS_ESTABLISHED : 3770 nstate = IPF_TCPS_ESTABLISHED; 3771 rval = 1; 3772 break; 3773 default : 3774 break; 3775 } 3776 } 3777 break; 3778 3779 case IPF_TCPS_ESTABLISHED: /* 4 */ 3780 rval = 1; 3781 if (tcpflags & TH_FIN) { 3782 /* 3783 * 'dir' closed its side of the connection; 3784 * this gives us a half-closed connection; 3785 * ESTABLISHED -> FIN_WAIT_1 3786 */ 3787 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3788 nstate = IPF_TCPS_CLOSING; 3789 } else { 3790 nstate = IPF_TCPS_FIN_WAIT_1; 3791 } 3792 } else if (tcpflags & TH_ACK) { 3793 /* 3794 * an ACK, should we exclude other flags here? 3795 */ 3796 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3797 /* 3798 * We know the other side did an active 3799 * close, so we are ACKing the recvd 3800 * FIN packet (does the window matching 3801 * code guarantee this?) and go into 3802 * CLOSE_WAIT state; this gives us a 3803 * half-closed connection 3804 */ 3805 nstate = IPF_TCPS_CLOSE_WAIT; 3806 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3807 /* 3808 * still a fully established 3809 * connection reset timeout 3810 */ 3811 nstate = IPF_TCPS_ESTABLISHED; 3812 } 3813 } 3814 break; 3815 3816 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3817 rval = 1; 3818 if (tcpflags & TH_FIN) { 3819 /* 3820 * application closed and 'dir' sent a FIN, 3821 * we're now going into LAST_ACK state 3822 */ 3823 nstate = IPF_TCPS_LAST_ACK; 3824 } else { 3825 /* 3826 * we remain in CLOSE_WAIT because the other 3827 * side has closed already and we did not 3828 * close our side yet; reset timeout 3829 */ 3830 nstate = IPF_TCPS_CLOSE_WAIT; 3831 } 3832 break; 3833 3834 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3835 rval = 1; 3836 if ((tcpflags & TH_ACK) && 3837 ostate > IPF_TCPS_CLOSE_WAIT) { 3838 /* 3839 * if the other side is not active anymore 3840 * it has sent us a FIN packet that we are 3841 * ack'ing now with an ACK; this means both 3842 * sides have now closed the connection and 3843 * we go into LAST_ACK 3844 */ 3845 /* 3846 * XXX: how do we know we really are ACKing 3847 * the FIN packet here? does the window code 3848 * guarantee that? 3849 */ 3850 nstate = IPF_TCPS_LAST_ACK; 3851 } else { 3852 /* 3853 * we closed our side of the connection 3854 * already but the other side is still active 3855 * (ESTABLISHED/CLOSE_WAIT); continue with 3856 * this half-closed connection 3857 */ 3858 nstate = IPF_TCPS_FIN_WAIT_1; 3859 } 3860 break; 3861 3862 case IPF_TCPS_CLOSING: /* 7 */ 3863 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3864 nstate = IPF_TCPS_TIME_WAIT; 3865 } 3866 rval = 1; 3867 break; 3868 3869 case IPF_TCPS_LAST_ACK: /* 8 */ 3870 /* 3871 * We want to reset timer here to keep state in table. 3872 * If we would allow the state to time out here, while 3873 * there would still be packets being retransmitted, we 3874 * would cut off line between the two peers preventing 3875 * them to close connection properly. 3876 */ 3877 rval = 1; 3878 break; 3879 3880 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3881 /* NOT USED */ 3882 break; 3883 3884 case IPF_TCPS_TIME_WAIT: /* 10 */ 3885 /* we're in 2MSL timeout now */ 3886 if (ostate == IPF_TCPS_LAST_ACK) { 3887 nstate = IPF_TCPS_CLOSED; 3888 rval = 1; 3889 } else { 3890 rval = 2; 3891 } 3892 break; 3893 3894 case IPF_TCPS_CLOSED: /* 11 */ 3895 rval = 2; 3896 break; 3897 3898 default : 3899 #if defined(_KERNEL) 3900 ASSERT(nstate >= IPF_TCPS_LISTEN && 3901 nstate <= IPF_TCPS_CLOSED); 3902 #else 3903 abort(); 3904 #endif 3905 break; 3906 } 3907 } 3908 3909 /* 3910 * If rval == 2 then do not update the queue position, but treat the 3911 * packet as being ok. 3912 */ 3913 if (rval == 2) { 3914 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3915 rval = 1; 3916 } 3917 else if (rval == 1) { 3918 tqe->tqe_state[dir] = nstate; 3919 /* 3920 * The nstate can either advance to a new state, or remain 3921 * unchanged, resetting the timer by moving to the bottom of 3922 * the queue. 3923 */ 3924 DTRACE_PROBE1(state_done, int, nstate); 3925 3926 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3927 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3928 } 3929 3930 return rval; 3931 } 3932 3933 3934 /* ------------------------------------------------------------------------ */ 3935 /* Function: ipstate_log */ 3936 /* Returns: Nil */ 3937 /* Parameters: is(I) - pointer to state structure */ 3938 /* type(I) - type of log entry to create */ 3939 /* */ 3940 /* Creates a state table log entry using the state structure and type info. */ 3941 /* passed in. Log packet/byte counts, source/destination address and other */ 3942 /* protocol specific information. */ 3943 /* ------------------------------------------------------------------------ */ 3944 void ipstate_log(is, type, ifs) 3945 struct ipstate *is; 3946 u_int type; 3947 ipf_stack_t *ifs; 3948 { 3949 #ifdef IPFILTER_LOG 3950 struct ipslog ipsl; 3951 size_t sizes[1]; 3952 void *items[1]; 3953 int types[1]; 3954 3955 /* 3956 * Copy information out of the ipstate_t structure and into the 3957 * structure used for logging. 3958 */ 3959 ipsl.isl_type = type; 3960 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3961 ipsl.isl_bytes[0] = is->is_bytes[0]; 3962 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3963 ipsl.isl_bytes[1] = is->is_bytes[1]; 3964 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3965 ipsl.isl_bytes[2] = is->is_bytes[2]; 3966 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3967 ipsl.isl_bytes[3] = is->is_bytes[3]; 3968 ipsl.isl_src = is->is_src; 3969 ipsl.isl_dst = is->is_dst; 3970 ipsl.isl_p = is->is_p; 3971 ipsl.isl_v = is->is_v; 3972 ipsl.isl_flags = is->is_flags; 3973 ipsl.isl_tag = is->is_tag; 3974 ipsl.isl_rulen = is->is_rulen; 3975 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3976 3977 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3978 ipsl.isl_sport = is->is_sport; 3979 ipsl.isl_dport = is->is_dport; 3980 if (ipsl.isl_p == IPPROTO_TCP) { 3981 ipsl.isl_state[0] = is->is_state[0]; 3982 ipsl.isl_state[1] = is->is_state[1]; 3983 } 3984 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3985 ipsl.isl_itype = is->is_icmp.ici_type; 3986 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3987 ipsl.isl_itype = is->is_icmp.ici_type; 3988 } else { 3989 ipsl.isl_ps.isl_filler[0] = 0; 3990 ipsl.isl_ps.isl_filler[1] = 0; 3991 } 3992 3993 items[0] = &ipsl; 3994 sizes[0] = sizeof(ipsl); 3995 types[0] = 0; 3996 3997 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3998 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3999 } else { 4000 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 4001 } 4002 #endif 4003 } 4004 4005 4006 #ifdef USE_INET6 4007 /* ------------------------------------------------------------------------ */ 4008 /* Function: fr_checkicmp6matchingstate */ 4009 /* Returns: ipstate_t* - NULL == no match found, */ 4010 /* else pointer to matching state entry */ 4011 /* Parameters: fin(I) - pointer to packet information */ 4012 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 4013 /* */ 4014 /* If we've got an ICMPv6 error message, using the information stored in */ 4015 /* the ICMPv6 packet, look for a matching state table entry. */ 4016 /* ------------------------------------------------------------------------ */ 4017 static ipstate_t *fr_checkicmp6matchingstate(fin) 4018 fr_info_t *fin; 4019 { 4020 struct icmp6_hdr *ic6, *oic; 4021 int backward, i; 4022 ipstate_t *is, **isp; 4023 u_short sport, dport; 4024 i6addr_t dst, src; 4025 u_short savelen; 4026 icmpinfo_t *ic; 4027 fr_info_t ofin; 4028 tcphdr_t *tcp; 4029 ip6_t *oip6; 4030 u_char pr; 4031 u_int hv; 4032 ipf_stack_t *ifs = fin->fin_ifs; 4033 4034 /* 4035 * Does it at least have the return (basic) IP header ? 4036 * Is it an actual recognised ICMP error type? 4037 * Only a basic IP header (no options) should be with 4038 * an ICMP error header. 4039 */ 4040 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 4041 !(fin->fin_flx & FI_ICMPERR)) 4042 return NULL; 4043 4044 ic6 = fin->fin_dp; 4045 4046 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 4047 if (fin->fin_plen < sizeof(*oip6)) 4048 return NULL; 4049 4050 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 4051 ofin.fin_v = 6; 4052 ofin.fin_ifp = fin->fin_ifp; 4053 ofin.fin_out = !fin->fin_out; 4054 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 4055 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 4056 4057 /* 4058 * We make a fin entry to be able to feed it to 4059 * matchsrcdst. Note that not all fields are necessary 4060 * but this is the cleanest way. Note further we fill 4061 * in fin_mp such that if someone uses it we'll get 4062 * a kernel panic. fr_matchsrcdst does not use this. 4063 * 4064 * watch out here, as ip is in host order and oip6 in network 4065 * order. Any change we make must be undone afterwards. 4066 */ 4067 savelen = oip6->ip6_plen; 4068 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4069 ofin.fin_flx = FI_NOCKSUM; 4070 ofin.fin_ip = (ip_t *)oip6; 4071 ofin.fin_plen = oip6->ip6_plen; 4072 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4073 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4074 oip6->ip6_plen = savelen; 4075 4076 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4077 oic = (struct icmp6_hdr *)(oip6 + 1); 4078 /* 4079 * an ICMP error can only be generated as a result of an 4080 * ICMP query, not as the response on an ICMP error 4081 * 4082 * XXX theoretically ICMP_ECHOREP and the other reply's are 4083 * ICMP query's as well, but adding them here seems strange XXX 4084 */ 4085 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4086 return NULL; 4087 4088 /* 4089 * perform a lookup of the ICMP packet in the state table 4090 */ 4091 hv = (pr = oip6->ip6_nxt); 4092 src.in6 = oip6->ip6_src; 4093 hv += src.in4.s_addr; 4094 dst.in6 = oip6->ip6_dst; 4095 hv += dst.in4.s_addr; 4096 hv += oic->icmp6_id; 4097 hv += oic->icmp6_seq; 4098 hv = DOUBLE_HASH(hv, ifs); 4099 4100 READ_ENTER(&ifs->ifs_ipf_state); 4101 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4102 ic = &is->is_icmp; 4103 isp = &is->is_hnext; 4104 if ((is->is_p == pr) && 4105 !(is->is_pass & FR_NOICMPERR) && 4106 (oic->icmp6_id == ic->ici_id) && 4107 (oic->icmp6_seq == ic->ici_seq) && 4108 (is = fr_matchsrcdst(&ofin, is, &src, 4109 &dst, NULL, FI_ICMPCMP))) { 4110 /* 4111 * in the state table ICMP query's are stored 4112 * with the type of the corresponding ICMP 4113 * response. Correct here 4114 */ 4115 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4116 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4117 (ic->ici_type - 1 == oic->icmp6_type )) { 4118 ifs->ifs_ips_stats.iss_hits++; 4119 backward = IP6_NEQ(&is->is_dst, &src); 4120 fin->fin_rev = !backward; 4121 i = (backward << 1) + fin->fin_out; 4122 is->is_icmppkts[i]++; 4123 return is; 4124 } 4125 } 4126 } 4127 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4128 return NULL; 4129 } 4130 4131 hv = (pr = oip6->ip6_nxt); 4132 src.in6 = oip6->ip6_src; 4133 hv += src.i6[0]; 4134 hv += src.i6[1]; 4135 hv += src.i6[2]; 4136 hv += src.i6[3]; 4137 dst.in6 = oip6->ip6_dst; 4138 hv += dst.i6[0]; 4139 hv += dst.i6[1]; 4140 hv += dst.i6[2]; 4141 hv += dst.i6[3]; 4142 4143 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4144 tcp = (tcphdr_t *)(oip6 + 1); 4145 dport = tcp->th_dport; 4146 sport = tcp->th_sport; 4147 hv += dport; 4148 hv += sport; 4149 } else 4150 tcp = NULL; 4151 hv = DOUBLE_HASH(hv, ifs); 4152 4153 READ_ENTER(&ifs->ifs_ipf_state); 4154 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4155 isp = &is->is_hnext; 4156 /* 4157 * Only allow this icmp though if the 4158 * encapsulated packet was allowed through the 4159 * other way around. Note that the minimal amount 4160 * of info present does not allow for checking against 4161 * tcp internals such as seq and ack numbers. 4162 */ 4163 if ((is->is_p != pr) || (is->is_v != 6) || 4164 (is->is_pass & FR_NOICMPERR)) 4165 continue; 4166 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4167 if (is != NULL) { 4168 ifs->ifs_ips_stats.iss_hits++; 4169 backward = IP6_NEQ(&is->is_dst, &src); 4170 fin->fin_rev = !backward; 4171 i = (backward << 1) + fin->fin_out; 4172 is->is_icmppkts[i]++; 4173 /* 4174 * we deliberately do not touch the timeouts 4175 * for the accompanying state table entry. 4176 * It remains to be seen if that is correct. XXX 4177 */ 4178 return is; 4179 } 4180 } 4181 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4182 return NULL; 4183 } 4184 #endif 4185 4186 4187 /* ------------------------------------------------------------------------ */ 4188 /* Function: fr_sttab_init */ 4189 /* Returns: Nil */ 4190 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4191 /* */ 4192 /* Initialise the array of timeout queues for TCP. */ 4193 /* ------------------------------------------------------------------------ */ 4194 void fr_sttab_init(tqp, ifs) 4195 ipftq_t *tqp; 4196 ipf_stack_t *ifs; 4197 { 4198 int i; 4199 4200 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4201 tqp[i].ifq_ttl = 0; 4202 tqp[i].ifq_ref = 1; 4203 tqp[i].ifq_head = NULL; 4204 tqp[i].ifq_tail = &tqp[i].ifq_head; 4205 tqp[i].ifq_next = tqp + i + 1; 4206 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4207 } 4208 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4209 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4210 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4211 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4212 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4213 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4214 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4215 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4216 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4217 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4218 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4219 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4220 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4221 } 4222 4223 4224 /* ------------------------------------------------------------------------ */ 4225 /* Function: fr_sttab_destroy */ 4226 /* Returns: Nil */ 4227 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4228 /* */ 4229 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4230 /* of timeout queues for TCP. */ 4231 /* ------------------------------------------------------------------------ */ 4232 void fr_sttab_destroy(tqp) 4233 ipftq_t *tqp; 4234 { 4235 int i; 4236 4237 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4238 MUTEX_DESTROY(&tqp[i].ifq_lock); 4239 } 4240 4241 4242 /* ------------------------------------------------------------------------ */ 4243 /* Function: fr_statederef */ 4244 /* Returns: Nil */ 4245 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4246 /* ifs - ipf stack instance */ 4247 /* */ 4248 /* Decrement the reference counter for this state table entry and free it */ 4249 /* if there are no more things using it. */ 4250 /* */ 4251 /* Internal parameters: */ 4252 /* state[0] = state of source (host that initiated connection) */ 4253 /* state[1] = state of dest (host that accepted the connection) */ 4254 /* ------------------------------------------------------------------------ */ 4255 void fr_statederef(isp, ifs) 4256 ipstate_t **isp; 4257 ipf_stack_t *ifs; 4258 { 4259 ipstate_t *is; 4260 4261 is = *isp; 4262 *isp = NULL; 4263 4264 MUTEX_ENTER(&is->is_lock); 4265 if (is->is_ref > 1) { 4266 is->is_ref--; 4267 MUTEX_EXIT(&is->is_lock); 4268 #ifndef _KERNEL 4269 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4270 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4271 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4272 } 4273 #endif 4274 return; 4275 } 4276 MUTEX_EXIT(&is->is_lock); 4277 4278 WRITE_ENTER(&ifs->ifs_ipf_state); 4279 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4280 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4281 } 4282 4283 4284 /* ------------------------------------------------------------------------ */ 4285 /* Function: fr_setstatequeue */ 4286 /* Returns: Nil */ 4287 /* Parameters: is(I) - pointer to state structure */ 4288 /* rev(I) - forward(0) or reverse(1) direction */ 4289 /* Locks: ipf_state (read or write) */ 4290 /* */ 4291 /* Put the state entry on its default queue entry, using rev as a helped in */ 4292 /* determining which queue it should be placed on. */ 4293 /* ------------------------------------------------------------------------ */ 4294 void fr_setstatequeue(is, rev, ifs) 4295 ipstate_t *is; 4296 int rev; 4297 ipf_stack_t *ifs; 4298 { 4299 ipftq_t *oifq, *nifq; 4300 4301 4302 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4303 nifq = is->is_tqehead[rev]; 4304 else 4305 nifq = NULL; 4306 4307 if (nifq == NULL) { 4308 switch (is->is_p) 4309 { 4310 #ifdef USE_INET6 4311 case IPPROTO_ICMPV6 : 4312 if (rev == 1) 4313 nifq = &ifs->ifs_ips_icmpacktq; 4314 else 4315 nifq = &ifs->ifs_ips_icmptq; 4316 break; 4317 #endif 4318 case IPPROTO_ICMP : 4319 if (rev == 1) 4320 nifq = &ifs->ifs_ips_icmpacktq; 4321 else 4322 nifq = &ifs->ifs_ips_icmptq; 4323 break; 4324 case IPPROTO_TCP : 4325 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4326 break; 4327 4328 case IPPROTO_UDP : 4329 if (rev == 1) 4330 nifq = &ifs->ifs_ips_udpacktq; 4331 else 4332 nifq = &ifs->ifs_ips_udptq; 4333 break; 4334 4335 default : 4336 nifq = &ifs->ifs_ips_iptq; 4337 break; 4338 } 4339 } 4340 4341 oifq = is->is_sti.tqe_ifq; 4342 /* 4343 * If it's currently on a timeout queue, move it from one queue to 4344 * another, else put it on the end of the newly determined queue. 4345 */ 4346 if (oifq != NULL) 4347 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4348 else 4349 fr_queueappend(&is->is_sti, nifq, is, ifs); 4350 return; 4351 } 4352 4353 4354 /* ------------------------------------------------------------------------ */ 4355 /* Function: fr_stateiter */ 4356 /* Returns: int - 0 == success, else error */ 4357 /* Parameters: token(I) - pointer to ipftoken structure */ 4358 /* itp(I) - pointer to ipfgeniter structure */ 4359 /* */ 4360 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4361 /* walks through the list of entries in the state table list (ips_list.) */ 4362 /* ------------------------------------------------------------------------ */ 4363 static int fr_stateiter(token, itp, ifs) 4364 ipftoken_t *token; 4365 ipfgeniter_t *itp; 4366 ipf_stack_t *ifs; 4367 { 4368 ipstate_t *is, *next, zero; 4369 int error, count; 4370 char *dst; 4371 4372 if (itp->igi_data == NULL) 4373 return EFAULT; 4374 4375 if (itp->igi_nitems == 0) 4376 return EINVAL; 4377 4378 if (itp->igi_type != IPFGENITER_STATE) 4379 return EINVAL; 4380 4381 error = 0; 4382 4383 READ_ENTER(&ifs->ifs_ipf_state); 4384 4385 /* 4386 * Get "previous" entry from the token and find the next entry. 4387 */ 4388 is = token->ipt_data; 4389 if (is == NULL) { 4390 next = ifs->ifs_ips_list; 4391 } else { 4392 next = is->is_next; 4393 } 4394 4395 dst = itp->igi_data; 4396 for (count = itp->igi_nitems; count > 0; count--) { 4397 /* 4398 * If we found an entry, add a reference to it and update the token. 4399 * Otherwise, zero out data to be returned and NULL out token. 4400 */ 4401 if (next != NULL) { 4402 MUTEX_ENTER(&next->is_lock); 4403 next->is_ref++; 4404 MUTEX_EXIT(&next->is_lock); 4405 token->ipt_data = next; 4406 } else { 4407 bzero(&zero, sizeof(zero)); 4408 next = &zero; 4409 token->ipt_data = NULL; 4410 } 4411 4412 /* 4413 * Safe to release lock now the we have a reference. 4414 */ 4415 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4416 4417 /* 4418 * Copy out data and clean up references and tokens. 4419 */ 4420 error = COPYOUT(next, dst, sizeof(*next)); 4421 if (error != 0) 4422 error = EFAULT; 4423 if (token->ipt_data == NULL) { 4424 ipf_freetoken(token, ifs); 4425 break; 4426 } else { 4427 if (is != NULL) 4428 fr_statederef(&is, ifs); 4429 if (next->is_next == NULL) { 4430 ipf_freetoken(token, ifs); 4431 break; 4432 } 4433 } 4434 4435 if ((count == 1) || (error != 0)) 4436 break; 4437 4438 READ_ENTER(&ifs->ifs_ipf_state); 4439 dst += sizeof(*next); 4440 is = next; 4441 next = is->is_next; 4442 } 4443 4444 return error; 4445 } 4446