1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #pragma ident "%Z%%M% %I% %E% SMI" 11 12 #if defined(KERNEL) || defined(_KERNEL) 13 # undef KERNEL 14 # undef _KERNEL 15 # define KERNEL 1 16 # define _KERNEL 1 17 #endif 18 #include <sys/errno.h> 19 #include <sys/types.h> 20 #include <sys/param.h> 21 #include <sys/file.h> 22 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 23 defined(_KERNEL) 24 # include "opt_ipfilter_log.h" 25 #endif 26 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 27 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 28 #include "opt_inet6.h" 29 #endif 30 #if !defined(_KERNEL) && !defined(__KERNEL__) 31 # include <stdio.h> 32 # include <stdlib.h> 33 # include <string.h> 34 # define _KERNEL 35 # ifdef __OpenBSD__ 36 struct file; 37 # endif 38 # include <sys/uio.h> 39 # undef _KERNEL 40 #endif 41 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 42 # include <sys/filio.h> 43 # include <sys/fcntl.h> 44 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 45 # include "opt_ipfilter.h" 46 # endif 47 #else 48 # include <sys/ioctl.h> 49 #endif 50 #include <sys/time.h> 51 #if !defined(linux) 52 # include <sys/protosw.h> 53 #endif 54 #include <sys/socket.h> 55 #if defined(_KERNEL) 56 # include <sys/systm.h> 57 # if !defined(__SVR4) && !defined(__svr4__) 58 # include <sys/mbuf.h> 59 # endif 60 #endif 61 #if defined(__SVR4) || defined(__svr4__) 62 # include <sys/filio.h> 63 # include <sys/byteorder.h> 64 # ifdef _KERNEL 65 # include <sys/dditypes.h> 66 # endif 67 # include <sys/stream.h> 68 # include <sys/kmem.h> 69 #endif 70 71 #include <net/if.h> 72 #ifdef sun 73 # include <net/af.h> 74 #endif 75 #include <net/route.h> 76 #include <netinet/in.h> 77 #include <netinet/in_systm.h> 78 #include <netinet/ip.h> 79 #include <netinet/tcp.h> 80 #if !defined(linux) 81 # include <netinet/ip_var.h> 82 #endif 83 #if !defined(__hpux) && !defined(linux) 84 # include <netinet/tcp_fsm.h> 85 #endif 86 #include <netinet/udp.h> 87 #include <netinet/ip_icmp.h> 88 #include "netinet/ip_compat.h" 89 #include <netinet/tcpip.h> 90 #include "netinet/ip_fil.h" 91 #include "netinet/ip_nat.h" 92 #include "netinet/ip_frag.h" 93 #include "netinet/ip_state.h" 94 #include "netinet/ip_proxy.h" 95 #include "netinet/ipf_stack.h" 96 #ifdef IPFILTER_SYNC 97 #include "netinet/ip_sync.h" 98 #endif 99 #ifdef IPFILTER_SCAN 100 #include "netinet/ip_scan.h" 101 #endif 102 #ifdef USE_INET6 103 #include <netinet/icmp6.h> 104 #endif 105 #if (__FreeBSD_version >= 300000) 106 # include <sys/malloc.h> 107 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 108 # include <sys/libkern.h> 109 # include <sys/systm.h> 110 # endif 111 #endif 112 /* END OF INCLUDES */ 113 114 115 #if !defined(lint) 116 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 117 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 118 #endif 119 120 #ifdef USE_INET6 121 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 122 #endif 123 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 124 i6addr_t *, tcphdr_t *, u_32_t)); 125 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 126 static int fr_state_flush __P((int, int, ipf_stack_t *)); 127 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 128 static void fr_delstate __P((ipstate_t *, int, ipf_stack_t *)); 129 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 130 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 131 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 132 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 133 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 134 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 135 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 136 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 137 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 138 139 int fr_stputent __P((caddr_t, ipf_stack_t *)); 140 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 141 142 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 143 #define FIVE_DAYS (5 * ONE_DAY) 144 #define DOUBLE_HASH(x, ifs) \ 145 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 146 147 148 149 /* ------------------------------------------------------------------------ */ 150 /* Function: fr_stateinit */ 151 /* Returns: int - 0 == success, -1 == failure */ 152 /* Parameters: Nil */ 153 /* */ 154 /* Initialise all the global variables used within the state code. */ 155 /* This action also includes initiailising locks. */ 156 /* ------------------------------------------------------------------------ */ 157 int fr_stateinit(ifs) 158 ipf_stack_t *ifs; 159 { 160 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 161 struct timeval tv; 162 #endif 163 int i; 164 165 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 166 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 167 if (ifs->ifs_ips_table == NULL) 168 return -1; 169 bzero((char *)ifs->ifs_ips_table, 170 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 171 172 KMALLOCS(ifs->ifs_ips_seed, u_long *, 173 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 174 if (ifs->ifs_ips_seed == NULL) 175 return -2; 176 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 177 tv.tv_sec = 0; 178 GETKTIME(&tv); 179 #endif 180 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 181 /* 182 * XXX - ips_seed[X] should be a random number of sorts. 183 */ 184 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 185 ifs->ifs_ips_seed[i] = ipf_random(); 186 #else 187 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 188 ifs->ifs_fr_statesize; 189 ifs->ifs_ips_seed[i] += tv.tv_sec; 190 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 191 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 192 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 193 #endif 194 } 195 196 /* fill icmp reply type table */ 197 for (i = 0; i <= ICMP_MAXTYPE; i++) 198 icmpreplytype4[i] = -1; 199 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 200 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 201 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 202 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 203 #ifdef USE_INET6 204 /* fill icmp reply type table */ 205 for (i = 0; i <= ICMP6_MAXTYPE; i++) 206 icmpreplytype6[i] = -1; 207 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 208 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 209 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 210 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 211 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 212 #endif 213 214 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 215 ifs->ifs_fr_statesize * sizeof(u_long)); 216 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 217 return -1; 218 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 219 ifs->ifs_fr_statesize * sizeof(u_long)); 220 221 if (ifs->ifs_fr_state_maxbucket == 0) { 222 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 223 ifs->ifs_fr_state_maxbucket++; 224 ifs->ifs_fr_state_maxbucket *= 2; 225 } 226 227 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 228 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 229 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 230 ifs->ifs_ips_udptq.ifq_ref = 1; 231 ifs->ifs_ips_udptq.ifq_head = NULL; 232 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 233 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 234 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 235 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 236 ifs->ifs_ips_udpacktq.ifq_ref = 1; 237 ifs->ifs_ips_udpacktq.ifq_head = NULL; 238 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 239 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 240 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 241 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 242 ifs->ifs_ips_icmptq.ifq_ref = 1; 243 ifs->ifs_ips_icmptq.ifq_head = NULL; 244 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 245 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 246 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 247 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 248 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 249 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 250 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 251 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 252 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 253 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 254 ifs->ifs_ips_iptq.ifq_ref = 1; 255 ifs->ifs_ips_iptq.ifq_head = NULL; 256 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 257 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 258 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 259 /* entry's ttl in deletetq is just 1 tick */ 260 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 261 ifs->ifs_ips_deletetq.ifq_ref = 1; 262 ifs->ifs_ips_deletetq.ifq_head = NULL; 263 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 264 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 265 ifs->ifs_ips_deletetq.ifq_next = NULL; 266 267 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 268 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 269 ifs->ifs_fr_state_init = 1; 270 271 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 272 return 0; 273 } 274 275 276 /* ------------------------------------------------------------------------ */ 277 /* Function: fr_stateunload */ 278 /* Returns: Nil */ 279 /* Parameters: Nil */ 280 /* */ 281 /* Release and destroy any resources acquired or initialised so that */ 282 /* IPFilter can be unloaded or re-initialised. */ 283 /* ------------------------------------------------------------------------ */ 284 void fr_stateunload(ifs) 285 ipf_stack_t *ifs; 286 { 287 ipftq_t *ifq, *ifqnext; 288 ipstate_t *is; 289 290 while ((is = ifs->ifs_ips_list) != NULL) 291 fr_delstate(is, 0, ifs); 292 293 /* 294 * Proxy timeout queues are not cleaned here because although they 295 * exist on the state list, appr_unload is called after fr_stateunload 296 * and the proxies actually are responsible for them being created. 297 * Should the proxy timeouts have their own list? There's no real 298 * justification as this is the only complicationA 299 */ 300 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 301 ifqnext = ifq->ifq_next; 302 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 303 (fr_deletetimeoutqueue(ifq) == 0)) 304 fr_freetimeoutqueue(ifq, ifs); 305 } 306 307 ifs->ifs_ips_stats.iss_inuse = 0; 308 ifs->ifs_ips_num = 0; 309 310 if (ifs->ifs_fr_state_init == 1) { 311 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 312 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 314 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 315 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 316 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 317 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 318 } 319 320 if (ifs->ifs_ips_table != NULL) { 321 KFREES(ifs->ifs_ips_table, 322 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 323 ifs->ifs_ips_table = NULL; 324 } 325 326 if (ifs->ifs_ips_seed != NULL) { 327 KFREES(ifs->ifs_ips_seed, 328 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 329 ifs->ifs_ips_seed = NULL; 330 } 331 332 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 333 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 334 ifs->ifs_fr_statesize * sizeof(u_long)); 335 ifs->ifs_ips_stats.iss_bucketlen = NULL; 336 } 337 338 if (ifs->ifs_fr_state_maxbucket_reset == 1) 339 ifs->ifs_fr_state_maxbucket = 0; 340 341 if (ifs->ifs_fr_state_init == 1) { 342 ifs->ifs_fr_state_init = 0; 343 RW_DESTROY(&ifs->ifs_ipf_state); 344 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 345 } 346 } 347 348 349 /* ------------------------------------------------------------------------ */ 350 /* Function: fr_statetstats */ 351 /* Returns: ips_state_t* - pointer to state stats structure */ 352 /* Parameters: Nil */ 353 /* */ 354 /* Put all the current numbers and pointers into a single struct and return */ 355 /* a pointer to it. */ 356 /* ------------------------------------------------------------------------ */ 357 static ips_stat_t *fr_statetstats(ifs) 358 ipf_stack_t *ifs; 359 { 360 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 361 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 362 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 363 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 364 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 365 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 366 return &ifs->ifs_ips_stats; 367 } 368 369 /* ------------------------------------------------------------------------ */ 370 /* Function: fr_state_remove */ 371 /* Returns: int - 0 == success, != 0 == failure */ 372 /* Parameters: data(I) - pointer to state structure to delete from table */ 373 /* */ 374 /* Search for a state structure that matches the one passed, according to */ 375 /* the IP addresses and other protocol specific information. */ 376 /* ------------------------------------------------------------------------ */ 377 static int fr_state_remove(data, ifs) 378 caddr_t data; 379 ipf_stack_t *ifs; 380 { 381 ipstate_t *sp, st; 382 int error; 383 384 sp = &st; 385 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 386 if (error) 387 return EFAULT; 388 389 WRITE_ENTER(&ifs->ifs_ipf_state); 390 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 391 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 392 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 393 sizeof(st.is_src)) && 394 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_src, 395 sizeof(st.is_dst)) && 396 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 397 sizeof(st.is_ps))) { 398 fr_delstate(sp, ISL_REMOVE, ifs); 399 RWLOCK_EXIT(&ifs->ifs_ipf_state); 400 return 0; 401 } 402 RWLOCK_EXIT(&ifs->ifs_ipf_state); 403 return ESRCH; 404 } 405 406 407 /* ------------------------------------------------------------------------ */ 408 /* Function: fr_state_ioctl */ 409 /* Returns: int - 0 == success, != 0 == failure */ 410 /* Parameters: data(I) - pointer to ioctl data */ 411 /* cmd(I) - ioctl command integer */ 412 /* mode(I) - file mode bits used with open */ 413 /* */ 414 /* Processes an ioctl call made to operate on the IP Filter state device. */ 415 /* ------------------------------------------------------------------------ */ 416 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 417 caddr_t data; 418 ioctlcmd_t cmd; 419 int mode, uid; 420 void *ctx; 421 ipf_stack_t *ifs; 422 { 423 int arg, ret, error = 0; 424 425 switch (cmd) 426 { 427 /* 428 * Delete an entry from the state table. 429 */ 430 case SIOCDELST : 431 error = fr_state_remove(data, ifs); 432 break; 433 /* 434 * Flush the state table 435 */ 436 case SIOCIPFFL : 437 BCOPYIN(data, (char *)&arg, sizeof(arg)); 438 if (arg == 0 || arg == 1) { 439 WRITE_ENTER(&ifs->ifs_ipf_state); 440 ret = fr_state_flush(arg, 4, ifs); 441 RWLOCK_EXIT(&ifs->ifs_ipf_state); 442 BCOPYOUT((char *)&ret, data, sizeof(ret)); 443 } else 444 error = EINVAL; 445 break; 446 #ifdef USE_INET6 447 case SIOCIPFL6 : 448 BCOPYIN(data, (char *)&arg, sizeof(arg)); 449 if (arg == 0 || arg == 1) { 450 WRITE_ENTER(&ifs->ifs_ipf_state); 451 ret = fr_state_flush(arg, 6, ifs); 452 RWLOCK_EXIT(&ifs->ifs_ipf_state); 453 BCOPYOUT((char *)&ret, data, sizeof(ret)); 454 } else 455 error = EINVAL; 456 break; 457 #endif 458 #ifdef IPFILTER_LOG 459 /* 460 * Flush the state log. 461 */ 462 case SIOCIPFFB : 463 if (!(mode & FWRITE)) 464 error = EPERM; 465 else { 466 int tmp; 467 468 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 469 BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 470 } 471 break; 472 /* 473 * Turn logging of state information on/off. 474 */ 475 case SIOCSETLG : 476 if (!(mode & FWRITE)) 477 error = EPERM; 478 else { 479 BCOPYIN((char *)data, 480 (char *)&ifs->ifs_ipstate_logging, 481 sizeof(ifs->ifs_ipstate_logging)); 482 } 483 break; 484 /* 485 * Return the current state of logging. 486 */ 487 case SIOCGETLG : 488 BCOPYOUT((char *)&ifs->ifs_ipstate_logging, (char *)data, 489 sizeof(ifs->ifs_ipstate_logging)); 490 break; 491 /* 492 * Return the number of bytes currently waiting to be read. 493 */ 494 case FIONREAD : 495 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 496 BCOPYOUT((char *)&arg, data, sizeof(arg)); 497 break; 498 #endif 499 /* 500 * Get the current state statistics. 501 */ 502 case SIOCGETFS : 503 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 504 break; 505 /* 506 * Lock/Unlock the state table. (Locking prevents any changes, which 507 * means no packets match). 508 */ 509 case SIOCSTLCK : 510 if (!(mode & FWRITE)) { 511 error = EPERM; 512 } else { 513 fr_lock(data, &ifs->ifs_fr_state_lock); 514 } 515 break; 516 /* 517 * Add an entry to the current state table. 518 */ 519 case SIOCSTPUT : 520 if (!ifs->ifs_fr_state_lock || !(mode &FWRITE)) { 521 error = EACCES; 522 break; 523 } 524 error = fr_stputent(data, ifs); 525 break; 526 /* 527 * Get a state table entry. 528 */ 529 case SIOCSTGET : 530 if (!ifs->ifs_fr_state_lock) { 531 error = EACCES; 532 break; 533 } 534 error = fr_stgetent(data, ifs); 535 break; 536 537 case SIOCGENITER : 538 { 539 ipftoken_t *token; 540 ipfgeniter_t iter; 541 542 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 543 if (error != 0) 544 break; 545 546 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 547 if (token != NULL) 548 error = fr_stateiter(token, &iter, ifs); 549 else 550 error = ESRCH; 551 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 552 break; 553 } 554 555 case SIOCIPFDELTOK : 556 (void) BCOPYIN(data, (char *)&arg, sizeof(arg)); 557 error = ipf_deltoken(arg, uid, ctx, ifs); 558 break; 559 560 default : 561 error = EINVAL; 562 break; 563 } 564 return error; 565 } 566 567 568 /* ------------------------------------------------------------------------ */ 569 /* Function: fr_stgetent */ 570 /* Returns: int - 0 == success, != 0 == failure */ 571 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 572 /* */ 573 /* Copy out state information from the kernel to a user space process. If */ 574 /* there is a filter rule associated with the state entry, copy that out */ 575 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 576 /* the struct passed in and if not null and not found in the list of current*/ 577 /* state entries, the retrieval fails. */ 578 /* ------------------------------------------------------------------------ */ 579 int fr_stgetent(data, ifs) 580 caddr_t data; 581 ipf_stack_t *ifs; 582 { 583 ipstate_t *is, *isn; 584 ipstate_save_t ips; 585 int error; 586 587 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 588 if (error) 589 return EFAULT; 590 591 isn = ips.ips_next; 592 if (isn == NULL) { 593 isn = ifs->ifs_ips_list; 594 if (isn == NULL) { 595 if (ips.ips_next == NULL) 596 return ENOENT; 597 return 0; 598 } 599 } else { 600 /* 601 * Make sure the pointer we're copying from exists in the 602 * current list of entries. Security precaution to prevent 603 * copying of random kernel data. 604 */ 605 for (is = ifs->ifs_ips_list; is; is = is->is_next) 606 if (is == isn) 607 break; 608 if (!is) 609 return ESRCH; 610 } 611 ips.ips_next = isn->is_next; 612 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 613 ips.ips_rule = isn->is_rule; 614 if (isn->is_rule != NULL) 615 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 616 sizeof(ips.ips_fr)); 617 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 618 if (error) 619 return EFAULT; 620 return 0; 621 } 622 623 624 /* ------------------------------------------------------------------------ */ 625 /* Function: fr_stputent */ 626 /* Returns: int - 0 == success, != 0 == failure */ 627 /* Parameters: data(I) - pointer to state information struct */ 628 /* */ 629 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 630 /* the state table. If the state info. includes a pointer to a filter rule */ 631 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 632 /* output. */ 633 /* ------------------------------------------------------------------------ */ 634 int fr_stputent(data, ifs) 635 caddr_t data; 636 ipf_stack_t *ifs; 637 { 638 ipstate_t *is, *isn; 639 ipstate_save_t ips; 640 int error, i; 641 frentry_t *fr; 642 char *name; 643 644 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 645 if (error) 646 return EFAULT; 647 648 KMALLOC(isn, ipstate_t *); 649 if (isn == NULL) 650 return ENOMEM; 651 652 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 653 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 654 isn->is_sti.tqe_pnext = NULL; 655 isn->is_sti.tqe_next = NULL; 656 isn->is_sti.tqe_ifq = NULL; 657 isn->is_sti.tqe_parent = isn; 658 isn->is_ifp[0] = NULL; 659 isn->is_ifp[1] = NULL; 660 isn->is_ifp[2] = NULL; 661 isn->is_ifp[3] = NULL; 662 isn->is_sync = NULL; 663 fr = ips.ips_rule; 664 665 if (fr == NULL) { 666 READ_ENTER(&ifs->ifs_ipf_state); 667 fr_stinsert(isn, 0, ifs); 668 MUTEX_EXIT(&isn->is_lock); 669 RWLOCK_EXIT(&ifs->ifs_ipf_state); 670 return 0; 671 } 672 673 if (isn->is_flags & SI_NEWFR) { 674 KMALLOC(fr, frentry_t *); 675 if (fr == NULL) { 676 KFREE(isn); 677 return ENOMEM; 678 } 679 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 680 isn->is_rule = fr; 681 ips.ips_is.is_rule = fr; 682 MUTEX_NUKE(&fr->fr_lock); 683 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 684 685 /* 686 * Look up all the interface names in the rule. 687 */ 688 for (i = 0; i < 4; i++) { 689 name = fr->fr_ifnames[i]; 690 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 691 name = isn->is_ifname[i]; 692 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 693 } 694 695 fr->fr_ref = 0; 696 fr->fr_dsize = 0; 697 fr->fr_data = NULL; 698 fr->fr_type = FR_T_NONE; 699 700 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 701 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 702 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 703 704 /* 705 * send a copy back to userland of what we ended up 706 * to allow for verification. 707 */ 708 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 709 if (error) { 710 KFREE(isn); 711 MUTEX_DESTROY(&fr->fr_lock); 712 KFREE(fr); 713 return EFAULT; 714 } 715 READ_ENTER(&ifs->ifs_ipf_state); 716 fr_stinsert(isn, 0, ifs); 717 MUTEX_EXIT(&isn->is_lock); 718 RWLOCK_EXIT(&ifs->ifs_ipf_state); 719 720 } else { 721 READ_ENTER(&ifs->ifs_ipf_state); 722 for (is = ifs->ifs_ips_list; is; is = is->is_next) 723 if (is->is_rule == fr) { 724 fr_stinsert(isn, 0, ifs); 725 MUTEX_EXIT(&isn->is_lock); 726 break; 727 } 728 729 if (is == NULL) { 730 KFREE(isn); 731 isn = NULL; 732 } 733 RWLOCK_EXIT(&ifs->ifs_ipf_state); 734 735 return (isn == NULL) ? ESRCH : 0; 736 } 737 738 return 0; 739 } 740 741 742 /* ------------------------------------------------------------------------ */ 743 /* Function: fr_stinsert */ 744 /* Returns: Nil */ 745 /* Parameters: is(I) - pointer to state structure */ 746 /* rev(I) - flag indicating forward/reverse direction of packet */ 747 /* */ 748 /* Inserts a state structure into the hash table (for lookups) and the list */ 749 /* of state entries (for enumeration). Resolves all of the interface names */ 750 /* to pointers and adjusts running stats for the hash table as appropriate. */ 751 /* */ 752 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 753 /* Exits with is_lock initialised and held. */ 754 /* ------------------------------------------------------------------------ */ 755 void fr_stinsert(is, rev, ifs) 756 ipstate_t *is; 757 int rev; 758 ipf_stack_t *ifs; 759 { 760 frentry_t *fr; 761 u_int hv; 762 int i; 763 764 MUTEX_INIT(&is->is_lock, "ipf state entry"); 765 766 fr = is->is_rule; 767 if (fr != NULL) { 768 MUTEX_ENTER(&fr->fr_lock); 769 fr->fr_ref++; 770 fr->fr_statecnt++; 771 MUTEX_EXIT(&fr->fr_lock); 772 } 773 774 /* 775 * Look up all the interface names in the state entry. 776 */ 777 for (i = 0; i < 4; i++) { 778 if (is->is_ifp[i] != NULL) 779 continue; 780 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 781 } 782 783 /* 784 * If we could trust is_hv, then the modulous would not be needed, but 785 * when running with IPFILTER_SYNC, this stops bad values. 786 */ 787 hv = is->is_hv % ifs->ifs_fr_statesize; 788 is->is_hv = hv; 789 790 /* 791 * We need to get both of these locks...the first because it is 792 * possible that once the insert is complete another packet might 793 * come along, match the entry and want to update it. 794 */ 795 MUTEX_ENTER(&is->is_lock); 796 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 797 798 /* 799 * add into list table. 800 */ 801 if (ifs->ifs_ips_list != NULL) 802 ifs->ifs_ips_list->is_pnext = &is->is_next; 803 is->is_pnext = &ifs->ifs_ips_list; 804 is->is_next = ifs->ifs_ips_list; 805 ifs->ifs_ips_list = is; 806 807 if (ifs->ifs_ips_table[hv] != NULL) 808 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 809 else 810 ifs->ifs_ips_stats.iss_inuse++; 811 is->is_phnext = ifs->ifs_ips_table + hv; 812 is->is_hnext = ifs->ifs_ips_table[hv]; 813 ifs->ifs_ips_table[hv] = is; 814 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 815 ifs->ifs_ips_num++; 816 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 817 818 fr_setstatequeue(is, rev, ifs); 819 } 820 821 /* ------------------------------------------------------------------------ */ 822 /* Function: fr_match_ipv4addrs */ 823 /* Returns: int - 2 strong match (same addresses, same direction) */ 824 /* 1 weak match (same address, opposite direction) */ 825 /* 0 no match */ 826 /* */ 827 /* Function matches IPv4 addresses. */ 828 /* ------------------------------------------------------------------------ */ 829 static int fr_match_ipv4addrs(is1, is2) 830 ipstate_t *is1; 831 ipstate_t *is2; 832 { 833 int rv; 834 835 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 836 rv = 2; 837 else if (is1->is_saddr == is2->is_daddr && 838 is1->is_daddr == is2->is_saddr) 839 rv = 1; 840 else 841 rv = 0; 842 843 return (rv); 844 } 845 846 /* ------------------------------------------------------------------------ */ 847 /* Function: fr_match_ipv6addrs */ 848 /* Returns: int - 2 strong match (same addresses, same direction) */ 849 /* 1 weak match (same addresses, opposite direction) */ 850 /* 0 no match */ 851 /* */ 852 /* Function matches IPv6 addresses. */ 853 /* ------------------------------------------------------------------------ */ 854 static int fr_match_ipv6addrs(is1, is2) 855 ipstate_t *is1; 856 ipstate_t *is2; 857 { 858 int rv; 859 860 if (IP6_EQ(&is1->is_src, &is2->is_src) && 861 IP6_EQ(&is1->is_dst, &is2->is_dst)) 862 rv = 2; 863 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 864 IP6_EQ(&is1->is_dst, &is2->is_src)) { 865 rv = 1; 866 } 867 else 868 rv = 0; 869 870 return (rv); 871 } 872 /* ------------------------------------------------------------------------ */ 873 /* Function: fr_match_addresses */ 874 /* Returns: int - 2 strong match (same addresses, same direction) */ 875 /* 1 weak match (same address, opposite directions) */ 876 /* 0 no match */ 877 /* Parameters: is1, is2 pointers to states we are checking */ 878 /* */ 879 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 880 /* and IPv6 address format. */ 881 /* ------------------------------------------------------------------------ */ 882 static int fr_match_addresses(is1, is2) 883 ipstate_t *is1; 884 ipstate_t *is2; 885 { 886 int rv; 887 888 if (is1->is_v == 4) { 889 rv = fr_match_ipv4addrs(is1, is2); 890 } 891 else { 892 rv = fr_match_ipv6addrs(is1, is2); 893 } 894 895 return (rv); 896 } 897 898 /* ------------------------------------------------------------------------ */ 899 /* Function: fr_match_ppairs */ 900 /* Returns: int - 2 strong match (same ports, same direction) */ 901 /* 1 weak match (same ports, different direction) */ 902 /* 0 no match */ 903 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 904 /* */ 905 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 906 /* src, dst port, which belong to session (state entry). */ 907 /* ------------------------------------------------------------------------ */ 908 static int fr_match_ppairs(ppairs1, ppairs2) 909 port_pair_t *ppairs1; 910 port_pair_t *ppairs2; 911 { 912 int rv; 913 914 if (ppairs1->pp_sport == ppairs2->pp_sport && 915 ppairs1->pp_dport == ppairs2->pp_dport) 916 rv = 2; 917 else if (ppairs1->pp_sport == ppairs2->pp_dport && 918 ppairs1->pp_dport == ppairs2->pp_sport) 919 rv = 1; 920 else 921 rv = 0; 922 923 return (rv); 924 } 925 926 /* ------------------------------------------------------------------------ */ 927 /* Function: fr_match_l4_hdr */ 928 /* Returns: int - 0 no match, */ 929 /* 1 weak match (same ports, different directions) */ 930 /* 2 strong match (same ports, same direction) */ 931 /* Parameters is1, is2 - states we want to match */ 932 /* */ 933 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 934 /* GRE protocol). */ 935 /* ------------------------------------------------------------------------ */ 936 static int fr_match_l4_hdr(is1, is2) 937 ipstate_t *is1; 938 ipstate_t *is2; 939 { 940 int rv = 0; 941 port_pair_t pp1; 942 port_pair_t pp2; 943 944 if (is1->is_p != is2->is_p) 945 return (0); 946 947 switch (is1->is_p) { 948 case IPPROTO_TCP: 949 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 950 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 951 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 952 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 953 rv = fr_match_ppairs(&pp1, &pp2); 954 break; 955 case IPPROTO_UDP: 956 pp1.pp_sport = is1->is_ps.is_us.us_sport; 957 pp1.pp_dport = is1->is_ps.is_us.us_dport; 958 pp2.pp_sport = is2->is_ps.is_us.us_sport; 959 pp2.pp_dport = is2->is_ps.is_us.us_dport; 960 rv = fr_match_ppairs(&pp1, &pp2); 961 break; 962 case IPPROTO_GRE: 963 /* greinfo_t can be also interprted as port pair */ 964 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 965 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 966 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 967 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 968 rv = fr_match_ppairs(&pp1, &pp2); 969 break; 970 case IPPROTO_ICMP: 971 case IPPROTO_ICMPV6: 972 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof(icmpinfo_t))) 973 rv = 1; 974 else 975 rv = 0; 976 break; 977 default: 978 rv = 0; 979 } 980 981 return (rv); 982 } 983 984 /* ------------------------------------------------------------------------ */ 985 /* Function: fr_matchstates */ 986 /* Returns: int - nonzero match, zero no match */ 987 /* Parameters is1, is2 - states we want to match */ 988 /* */ 989 /* The state entries are equal (identical match) if they belong to the same */ 990 /* session. Any time new state entry is being added the fr_addstate() */ 991 /* function creates temporal state entry from the data it gets from IP and */ 992 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 993 /* which is also stored within the state entry. We should keep in mind the */ 994 /* information about packet direction is spread accross L3 (addresses) and */ 995 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 996 /* - no match (match(is1, is2) == 0)) */ 997 /* - weak match same addresses (ports), but different */ 998 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 999 /* - strong match same addresses (ports) and same directions */ 1000 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1001 /* */ 1002 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1003 /* and functions, which are used to compare ports (L4 header) data. We say */ 1004 /* the is1 and is2 are same (identical) if there is a match */ 1005 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1006 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1007 /* Such requirement deals with case as follows: */ 1008 /* suppose there are two connections between hosts A, B. Connection 1: */ 1009 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1010 /* Connection 2: */ 1011 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1012 /* since we've introduced match levels into our fr_matchstates(), we are */ 1013 /* able to identify, which packets belong to connection A and which belong */ 1014 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1015 /* from con. 1 packet, which travelled from A to B: */ 1016 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1017 /* while s2, has been created from packet which belongs to con. 2 and is */ 1018 /* also coming from A to B: */ 1019 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1020 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1021 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1022 /* different the state entries are not identical -> no match as a final */ 1023 /* result. */ 1024 /* ------------------------------------------------------------------------ */ 1025 static int fr_matchstates(is1, is2) 1026 ipstate_t *is1; 1027 ipstate_t *is2; 1028 { 1029 int rv; 1030 int amatch; 1031 int pmatch; 1032 1033 if (bcmp(&is1->is_pass, &is2->is_pass, 1034 offsetof(struct ipstate, is_ps) - 1035 offsetof(struct ipstate, is_pass)) == 0) { 1036 1037 pmatch = fr_match_l4_hdr(is1, is2); 1038 amatch = fr_match_addresses(is1, is2); 1039 /* 1040 * If addresses match (amatch != 0), then 'match levels' 1041 * must be same for matching entries. If amatch and pmatch 1042 * have different values (different match levels), then 1043 * is1 and is2 belong to different sessions. 1044 */ 1045 rv = (amatch != 0) && (amatch == pmatch); 1046 } 1047 else 1048 rv = 0; 1049 1050 return (rv); 1051 } 1052 1053 /* ------------------------------------------------------------------------ */ 1054 /* Function: fr_addstate */ 1055 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1056 /* Parameters: fin(I) - pointer to packet information */ 1057 /* stsave(O) - pointer to place to save pointer to created */ 1058 /* state structure. */ 1059 /* flags(I) - flags to use when creating the structure */ 1060 /* */ 1061 /* Creates a new IP state structure from the packet information collected. */ 1062 /* Inserts it into the state table and appends to the bottom of the active */ 1063 /* list. If the capacity of the table has reached the maximum allowed then */ 1064 /* the call will fail and a flush is scheduled for the next timeout call. */ 1065 /* ------------------------------------------------------------------------ */ 1066 ipstate_t *fr_addstate(fin, stsave, flags) 1067 fr_info_t *fin; 1068 ipstate_t **stsave; 1069 u_int flags; 1070 { 1071 ipstate_t *is, ips; 1072 struct icmp *ic; 1073 u_int pass, hv; 1074 frentry_t *fr; 1075 tcphdr_t *tcp; 1076 grehdr_t *gre; 1077 void *ifp; 1078 int out; 1079 ipf_stack_t *ifs = fin->fin_ifs; 1080 1081 if (ifs->ifs_fr_state_lock || 1082 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1083 return NULL; 1084 1085 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1086 return NULL; 1087 1088 /* 1089 * If a "keep state" rule has reached the maximum number of references 1090 * to it, then schedule an automatic flush in case we can clear out 1091 * some "dead old wood". Note that because the lock isn't held on 1092 * fr it is possible that we could overflow. The cost of overflowing 1093 * is being ignored here as the number by which it can overflow is 1094 * a product of the number of simultaneous threads that could be 1095 * executing in here, so a limit of 100 won't result in 200, but could 1096 * result in 101 or 102. 1097 */ 1098 fr = fin->fin_fr; 1099 if (fr != NULL) { 1100 if ((ifs->ifs_ips_num == ifs->ifs_fr_statemax) && (fr->fr_statemax == 0)) { 1101 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1102 ifs->ifs_fr_state_doflush = 1; 1103 return NULL; 1104 } 1105 if ((fr->fr_statemax != 0) && 1106 (fr->fr_statecnt >= fr->fr_statemax)) { 1107 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1108 ifs->ifs_fr_state_doflush = 1; 1109 return NULL; 1110 } 1111 } 1112 1113 ic = NULL; 1114 tcp = NULL; 1115 out = fin->fin_out; 1116 is = &ips; 1117 bzero((char *)is, sizeof(*is)); 1118 1119 if (fr == NULL) { 1120 pass = ifs->ifs_fr_flags; 1121 is->is_tag = FR_NOLOGTAG; 1122 } 1123 else { 1124 pass = fr->fr_flags; 1125 } 1126 1127 is->is_die = 1 + ifs->ifs_fr_ticks; 1128 /* 1129 * We want to check everything that is a property of this packet, 1130 * but we don't (automatically) care about it's fragment status as 1131 * this may change. 1132 */ 1133 is->is_pass = pass; 1134 is->is_v = fin->fin_v; 1135 is->is_opt[0] = fin->fin_optmsk; 1136 is->is_optmsk[0] = 0xffffffff; 1137 is->is_optmsk[1] = 0xffffffff; 1138 if (is->is_v == 6) { 1139 is->is_opt[0] &= ~0x8; 1140 is->is_optmsk[0] &= ~0x8; 1141 is->is_optmsk[1] &= ~0x8; 1142 } 1143 is->is_sec = fin->fin_secmsk; 1144 is->is_secmsk = 0xffff; 1145 is->is_auth = fin->fin_auth; 1146 is->is_authmsk = 0xffff; 1147 1148 /* 1149 * Copy and calculate... 1150 */ 1151 hv = (is->is_p = fin->fin_fi.fi_p); 1152 is->is_src = fin->fin_fi.fi_src; 1153 hv += is->is_saddr; 1154 is->is_dst = fin->fin_fi.fi_dst; 1155 hv += is->is_daddr; 1156 #ifdef USE_INET6 1157 if (fin->fin_v == 6) { 1158 /* 1159 * For ICMPv6, we check to see if the destination address is 1160 * a multicast address. If it is, do not include it in the 1161 * calculation of the hash because the correct reply will come 1162 * back from a real address, not a multicast address. 1163 */ 1164 if ((is->is_p == IPPROTO_ICMPV6) && 1165 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1166 /* 1167 * So you can do keep state with neighbour discovery. 1168 * 1169 * Here we could use the address from the neighbour 1170 * solicit message to put in the state structure and 1171 * we could use that without a wildcard flag too... 1172 */ 1173 is->is_flags |= SI_W_DADDR; 1174 hv -= is->is_daddr; 1175 } else { 1176 hv += is->is_dst.i6[1]; 1177 hv += is->is_dst.i6[2]; 1178 hv += is->is_dst.i6[3]; 1179 } 1180 hv += is->is_src.i6[1]; 1181 hv += is->is_src.i6[2]; 1182 hv += is->is_src.i6[3]; 1183 } 1184 #endif 1185 1186 switch (is->is_p) 1187 { 1188 #ifdef USE_INET6 1189 case IPPROTO_ICMPV6 : 1190 ic = fin->fin_dp; 1191 1192 switch (ic->icmp_type) 1193 { 1194 case ICMP6_ECHO_REQUEST : 1195 is->is_icmp.ici_type = ic->icmp_type; 1196 hv += (is->is_icmp.ici_id = ic->icmp_id); 1197 break; 1198 case ICMP6_MEMBERSHIP_QUERY : 1199 case ND_ROUTER_SOLICIT : 1200 case ND_NEIGHBOR_SOLICIT : 1201 case ICMP6_NI_QUERY : 1202 is->is_icmp.ici_type = ic->icmp_type; 1203 break; 1204 default : 1205 return NULL; 1206 } 1207 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1208 break; 1209 #endif 1210 case IPPROTO_ICMP : 1211 ic = fin->fin_dp; 1212 1213 switch (ic->icmp_type) 1214 { 1215 case ICMP_ECHO : 1216 case ICMP_TSTAMP : 1217 case ICMP_IREQ : 1218 case ICMP_MASKREQ : 1219 is->is_icmp.ici_type = ic->icmp_type; 1220 hv += (is->is_icmp.ici_id = ic->icmp_id); 1221 break; 1222 default : 1223 return NULL; 1224 } 1225 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1226 break; 1227 1228 case IPPROTO_GRE : 1229 gre = fin->fin_dp; 1230 1231 is->is_gre.gs_flags = gre->gr_flags; 1232 is->is_gre.gs_ptype = gre->gr_ptype; 1233 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1234 is->is_call[0] = fin->fin_data[0]; 1235 is->is_call[1] = fin->fin_data[1]; 1236 } 1237 break; 1238 1239 case IPPROTO_TCP : 1240 tcp = fin->fin_dp; 1241 1242 if (tcp->th_flags & TH_RST) 1243 return NULL; 1244 /* 1245 * The endian of the ports doesn't matter, but the ack and 1246 * sequence numbers do as we do mathematics on them later. 1247 */ 1248 is->is_sport = htons(fin->fin_data[0]); 1249 is->is_dport = htons(fin->fin_data[1]); 1250 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1251 hv += is->is_sport; 1252 hv += is->is_dport; 1253 } 1254 1255 /* 1256 * If this is a real packet then initialise fields in the 1257 * state information structure from the TCP header information. 1258 */ 1259 1260 is->is_maxdwin = 1; 1261 is->is_maxswin = ntohs(tcp->th_win); 1262 if (is->is_maxswin == 0) 1263 is->is_maxswin = 1; 1264 1265 if ((fin->fin_flx & FI_IGNORE) == 0) { 1266 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1267 (TCP_OFF(tcp) << 2) + 1268 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1269 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1270 is->is_maxsend = is->is_send; 1271 1272 /* 1273 * Window scale option is only present in 1274 * SYN/SYN-ACK packet. 1275 */ 1276 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1277 TH_SYN && 1278 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1279 if (fr_tcpoptions(fin, tcp, 1280 &is->is_tcp.ts_data[0]) == -1) { 1281 fin->fin_flx |= FI_BAD; 1282 } 1283 } 1284 1285 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1286 fr_checknewisn(fin, is); 1287 fr_fixoutisn(fin, is); 1288 } 1289 1290 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1291 flags |= IS_TCPFSM; 1292 else { 1293 is->is_maxdwin = is->is_maxswin * 2; 1294 is->is_dend = ntohl(tcp->th_ack); 1295 is->is_maxdend = ntohl(tcp->th_ack); 1296 is->is_maxdwin *= 2; 1297 } 1298 } 1299 1300 /* 1301 * If we're creating state for a starting connection, start the 1302 * timer on it as we'll never see an error if it fails to 1303 * connect. 1304 */ 1305 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1306 break; 1307 1308 case IPPROTO_UDP : 1309 tcp = fin->fin_dp; 1310 1311 is->is_sport = htons(fin->fin_data[0]); 1312 is->is_dport = htons(fin->fin_data[1]); 1313 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1314 hv += tcp->th_dport; 1315 hv += tcp->th_sport; 1316 } 1317 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1318 break; 1319 1320 default : 1321 break; 1322 } 1323 hv = DOUBLE_HASH(hv, ifs); 1324 is->is_hv = hv; 1325 is->is_rule = fr; 1326 is->is_flags = flags & IS_INHERITED; 1327 1328 /* 1329 * Look for identical state. 1330 */ 1331 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1332 is != NULL; 1333 is = is->is_hnext) { 1334 if (fr_matchstates(&ips, is) == 1) 1335 break; 1336 } 1337 1338 /* 1339 * we've found a matching state -> state already exists, 1340 * we are not going to add a duplicate record. 1341 */ 1342 if (is != NULL) 1343 return NULL; 1344 1345 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1346 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1347 return NULL; 1348 } 1349 KMALLOC(is, ipstate_t *); 1350 if (is == NULL) { 1351 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1352 return NULL; 1353 } 1354 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1355 /* 1356 * Do not do the modulous here, it is done in fr_stinsert(). 1357 */ 1358 if (fr != NULL) { 1359 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1360 if (fr->fr_age[0] != 0) { 1361 is->is_tqehead[0] = 1362 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1363 fr->fr_age[0], ifs); 1364 is->is_sti.tqe_flags |= TQE_RULEBASED; 1365 } 1366 if (fr->fr_age[1] != 0) { 1367 is->is_tqehead[1] = 1368 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1369 fr->fr_age[1], ifs); 1370 is->is_sti.tqe_flags |= TQE_RULEBASED; 1371 } 1372 is->is_tag = fr->fr_logtag; 1373 1374 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1375 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1376 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1377 1378 if (((ifp = fr->fr_ifas[1]) != NULL) && 1379 (ifp != (void *)-1)) { 1380 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1381 } 1382 if (((ifp = fr->fr_ifas[2]) != NULL) && 1383 (ifp != (void *)-1)) { 1384 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1385 } 1386 if (((ifp = fr->fr_ifas[3]) != NULL) && 1387 (ifp != (void *)-1)) { 1388 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1389 } 1390 } 1391 1392 is->is_ifp[out << 1] = fin->fin_ifp; 1393 if (fin->fin_ifp != NULL) { 1394 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fr->fr_v); 1395 } 1396 1397 /* 1398 * It may seem strange to set is_ref to 2, but fr_check() will call 1399 * fr_statederef() after calling fr_addstate() and the idea is to 1400 * have it exist at the end of fr_check() with is_ref == 1. 1401 */ 1402 is->is_ref = 2; 1403 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1404 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1405 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1406 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1407 if ((fin->fin_flx & FI_IGNORE) == 0) { 1408 is->is_pkts[out] = 1; 1409 is->is_bytes[out] = fin->fin_plen; 1410 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1411 is->is_flx[out][0] &= ~FI_OOW; 1412 } 1413 1414 if (pass & FR_STSTRICT) 1415 is->is_flags |= IS_STRICT; 1416 1417 if (pass & FR_STATESYNC) 1418 is->is_flags |= IS_STATESYNC; 1419 1420 if (flags & (SI_WILDP|SI_WILDA)) { 1421 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1422 } 1423 is->is_rulen = fin->fin_rule; 1424 1425 1426 if (pass & FR_LOGFIRST) 1427 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1428 1429 READ_ENTER(&ifs->ifs_ipf_state); 1430 is->is_me = stsave; 1431 1432 fr_stinsert(is, fin->fin_rev, ifs); 1433 1434 if (fin->fin_p == IPPROTO_TCP) { 1435 /* 1436 * If we're creating state for a starting connection, start the 1437 * timer on it as we'll never see an error if it fails to 1438 * connect. 1439 */ 1440 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1441 is->is_flags); 1442 MUTEX_EXIT(&is->is_lock); 1443 #ifdef IPFILTER_SCAN 1444 if ((is->is_flags & SI_CLONE) == 0) 1445 (void) ipsc_attachis(is); 1446 #endif 1447 } else { 1448 MUTEX_EXIT(&is->is_lock); 1449 } 1450 #ifdef IPFILTER_SYNC 1451 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1452 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1453 #endif 1454 if (ifs->ifs_ipstate_logging) 1455 ipstate_log(is, ISL_NEW, ifs); 1456 1457 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1458 fin->fin_state = is; 1459 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1460 fin->fin_flx |= FI_STATE; 1461 if (fin->fin_flx & FI_FRAG) 1462 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1463 1464 return is; 1465 } 1466 1467 1468 /* ------------------------------------------------------------------------ */ 1469 /* Function: fr_tcpoptions */ 1470 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1471 /* Parameters: fin(I) - pointer to packet information */ 1472 /* tcp(I) - pointer to TCP packet header */ 1473 /* td(I) - pointer to TCP data held as part of the state */ 1474 /* */ 1475 /* Look after the TCP header for any options and deal with those that are */ 1476 /* present. Record details about those that we recogise. */ 1477 /* ------------------------------------------------------------------------ */ 1478 static int fr_tcpoptions(fin, tcp, td) 1479 fr_info_t *fin; 1480 tcphdr_t *tcp; 1481 tcpdata_t *td; 1482 { 1483 int off, mlen, ol, i, len, retval; 1484 char buf[64], *s, opt; 1485 mb_t *m = NULL; 1486 1487 len = (TCP_OFF(tcp) << 2); 1488 if (fin->fin_dlen < len) 1489 return 0; 1490 len -= sizeof(*tcp); 1491 1492 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1493 1494 m = fin->fin_m; 1495 mlen = MSGDSIZE(m) - off; 1496 if (len > mlen) { 1497 len = mlen; 1498 retval = 0; 1499 } else { 1500 retval = 1; 1501 } 1502 1503 COPYDATA(m, off, len, buf); 1504 1505 for (s = buf; len > 0; ) { 1506 opt = *s; 1507 if (opt == TCPOPT_EOL) 1508 break; 1509 else if (opt == TCPOPT_NOP) 1510 ol = 1; 1511 else { 1512 if (len < 2) 1513 break; 1514 ol = (int)*(s + 1); 1515 if (ol < 2 || ol > len) 1516 break; 1517 1518 /* 1519 * Extract the TCP options we are interested in out of 1520 * the header and store them in the the tcpdata struct. 1521 */ 1522 switch (opt) 1523 { 1524 case TCPOPT_WINDOW : 1525 if (ol == TCPOLEN_WINDOW) { 1526 i = (int)*(s + 2); 1527 if (i > TCP_WSCALE_MAX) 1528 i = TCP_WSCALE_MAX; 1529 else if (i < 0) 1530 i = 0; 1531 td->td_winscale = i; 1532 td->td_winflags |= TCP_WSCALE_SEEN| 1533 TCP_WSCALE_FIRST; 1534 } else 1535 retval = -1; 1536 break; 1537 case TCPOPT_MAXSEG : 1538 /* 1539 * So, if we wanted to set the TCP MAXSEG, 1540 * it should be done here... 1541 */ 1542 if (ol == TCPOLEN_MAXSEG) { 1543 i = (int)*(s + 2); 1544 i <<= 8; 1545 i += (int)*(s + 3); 1546 td->td_maxseg = i; 1547 } else 1548 retval = -1; 1549 break; 1550 case TCPOPT_SACK_PERMITTED : 1551 if (ol == TCPOLEN_SACK_PERMITTED) 1552 td->td_winflags |= TCP_SACK_PERMIT; 1553 else 1554 retval = -1; 1555 break; 1556 } 1557 } 1558 len -= ol; 1559 s += ol; 1560 } 1561 return retval; 1562 } 1563 1564 1565 /* ------------------------------------------------------------------------ */ 1566 /* Function: fr_tcpstate */ 1567 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1568 /* Parameters: fin(I) - pointer to packet information */ 1569 /* tcp(I) - pointer to TCP packet header */ 1570 /* is(I) - pointer to master state structure */ 1571 /* */ 1572 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1573 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1574 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1575 /* ------------------------------------------------------------------------ */ 1576 static int fr_tcpstate(fin, tcp, is) 1577 fr_info_t *fin; 1578 tcphdr_t *tcp; 1579 ipstate_t *is; 1580 { 1581 int source, ret = 0, flags; 1582 tcpdata_t *fdata, *tdata; 1583 ipf_stack_t *ifs = fin->fin_ifs; 1584 1585 source = !fin->fin_rev; 1586 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1587 (ntohs(is->is_sport) != fin->fin_data[0])) 1588 source = 0; 1589 fdata = &is->is_tcp.ts_data[!source]; 1590 tdata = &is->is_tcp.ts_data[source]; 1591 1592 MUTEX_ENTER(&is->is_lock); 1593 1594 /* 1595 * If a SYN packet is received for a connection that is in a half 1596 * closed state, then move its state entry to deletetq. In such case 1597 * the SYN packet will be consequently dropped. This allows new state 1598 * entry to be created with a retransmited SYN packet. 1599 */ 1600 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1601 if (((is->is_state[source] > IPF_TCPS_ESTABLISHED) || 1602 (is->is_state[source] == IPF_TCPS_CLOSED)) && 1603 ((is->is_state[!source] > IPF_TCPS_ESTABLISHED) || 1604 (is->is_state[!source] == IPF_TCPS_CLOSED))) { 1605 /* 1606 * Do not update is->is_sti.tqe_die in case state entry 1607 * is already present in deletetq. It prevents state 1608 * entry ttl update by retransmitted SYN packets, which 1609 * may arrive before timer tick kicks off. The SYN 1610 * packet will be dropped again. 1611 */ 1612 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1613 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1614 &fin->fin_ifs->ifs_ips_deletetq, 1615 fin->fin_ifs); 1616 1617 MUTEX_EXIT(&is->is_lock); 1618 return 0; 1619 } 1620 } 1621 1622 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1623 #ifdef IPFILTER_SCAN 1624 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1625 ipsc_packet(fin, is); 1626 if (FR_ISBLOCK(is->is_pass)) { 1627 MUTEX_EXIT(&is->is_lock); 1628 return 1; 1629 } 1630 } 1631 #endif 1632 1633 /* 1634 * Nearing end of connection, start timeout. 1635 */ 1636 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1637 is->is_flags); 1638 if (ret == 0) { 1639 MUTEX_EXIT(&is->is_lock); 1640 return 0; 1641 } 1642 1643 /* 1644 * set s0's as appropriate. Use syn-ack packet as it 1645 * contains both pieces of required information. 1646 */ 1647 /* 1648 * Window scale option is only present in SYN/SYN-ACK packet. 1649 * Compare with ~TH_FIN to mask out T/TCP setups. 1650 */ 1651 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1652 if (flags == (TH_SYN|TH_ACK)) { 1653 is->is_s0[source] = ntohl(tcp->th_ack); 1654 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1655 if (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) { 1656 (void) fr_tcpoptions(fin, tcp, fdata); 1657 } 1658 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1659 fr_checknewisn(fin, is); 1660 } else if (flags == TH_SYN) { 1661 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1662 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1663 (void) fr_tcpoptions(fin, tcp, tdata); 1664 1665 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1666 fr_checknewisn(fin, is); 1667 1668 } 1669 ret = 1; 1670 } else 1671 fin->fin_flx |= FI_OOW; 1672 MUTEX_EXIT(&is->is_lock); 1673 return ret; 1674 } 1675 1676 1677 /* ------------------------------------------------------------------------ */ 1678 /* Function: fr_checknewisn */ 1679 /* Returns: Nil */ 1680 /* Parameters: fin(I) - pointer to packet information */ 1681 /* is(I) - pointer to master state structure */ 1682 /* */ 1683 /* Check to see if this TCP connection is expecting and needs a new */ 1684 /* sequence number for a particular direction of the connection. */ 1685 /* */ 1686 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1687 /* one ready. */ 1688 /* ------------------------------------------------------------------------ */ 1689 static void fr_checknewisn(fin, is) 1690 fr_info_t *fin; 1691 ipstate_t *is; 1692 { 1693 u_32_t sumd, old, new; 1694 tcphdr_t *tcp; 1695 int i; 1696 1697 i = fin->fin_rev; 1698 tcp = fin->fin_dp; 1699 1700 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1701 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1702 old = ntohl(tcp->th_seq); 1703 new = fr_newisn(fin); 1704 is->is_isninc[i] = new - old; 1705 CALC_SUMD(old, new, sumd); 1706 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1707 1708 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1709 } 1710 } 1711 1712 1713 /* ------------------------------------------------------------------------ */ 1714 /* Function: fr_tcpinwindow */ 1715 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1716 /* Parameters: fin(I) - pointer to packet information */ 1717 /* fdata(I) - pointer to tcp state informatio (forward) */ 1718 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1719 /* tcp(I) - pointer to TCP packet header */ 1720 /* */ 1721 /* Given a packet has matched addresses and ports, check to see if it is */ 1722 /* within the TCP data window. In a show of generosity, allow packets that */ 1723 /* are within the window space behind the current sequence # as well. */ 1724 /* ------------------------------------------------------------------------ */ 1725 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1726 fr_info_t *fin; 1727 tcpdata_t *fdata, *tdata; 1728 tcphdr_t *tcp; 1729 int flags; 1730 { 1731 tcp_seq seq, ack, end; 1732 int ackskew, tcpflags; 1733 u_32_t win, maxwin; 1734 int dsize, inseq; 1735 1736 /* 1737 * Find difference between last checked packet and this packet. 1738 */ 1739 tcpflags = tcp->th_flags; 1740 seq = ntohl(tcp->th_seq); 1741 ack = ntohl(tcp->th_ack); 1742 1743 if (tcpflags & TH_SYN) 1744 win = ntohs(tcp->th_win); 1745 else 1746 win = ntohs(tcp->th_win) << fdata->td_winscale; 1747 1748 /* 1749 * win 0 means the receiving endpoint has closed the window, because it 1750 * has not enough memory to receive data from sender. In such case we 1751 * are pretending window size to be 1 to let TCP probe data through. 1752 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1753 * state this accurately, so we have to allow 1 octet (win = 1) even if 1754 * the window is closed (win == 0). 1755 */ 1756 if (win == 0) 1757 win = 1; 1758 1759 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1760 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1761 1762 /* 1763 * if window scaling is present, the scaling is only allowed 1764 * for windows not in the first SYN packet. In that packet the 1765 * window is 65535 to specify the largest window possible 1766 * for receivers not implementing the window scale option. 1767 * Currently, we do not assume TTCP here. That means that 1768 * if we see a second packet from a host (after the initial 1769 * SYN), we can assume that the receiver of the SYN did 1770 * already send back the SYN/ACK (and thus that we know if 1771 * the receiver also does window scaling) 1772 */ 1773 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1774 fdata->td_maxwin = win; 1775 } 1776 1777 end = seq + dsize; 1778 1779 if ((fdata->td_end == 0) && 1780 (!(flags & IS_TCPFSM) || 1781 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1782 /* 1783 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1784 */ 1785 fdata->td_end = end - 1; 1786 fdata->td_maxwin = 1; 1787 fdata->td_maxend = end + win; 1788 } 1789 1790 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1791 ack = tdata->td_end; 1792 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1793 (ack == 0)) { 1794 /* gross hack to get around certain broken tcp stacks */ 1795 ack = tdata->td_end; 1796 } 1797 1798 maxwin = tdata->td_maxwin; 1799 ackskew = tdata->td_end - ack; 1800 1801 /* 1802 * Strict sequencing only allows in-order delivery. 1803 */ 1804 if ((flags & IS_STRICT) != 0) { 1805 if (seq != fdata->td_end) { 1806 return 0; 1807 } 1808 } 1809 1810 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1811 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1812 inseq = 0; 1813 if ( 1814 #if defined(_KERNEL) 1815 (SEQ_GE(fdata->td_maxend, end)) && 1816 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1817 #endif 1818 /* XXX what about big packets */ 1819 #define MAXACKWINDOW 66000 1820 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1821 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1822 inseq = 1; 1823 /* 1824 * Microsoft Windows will send the next packet to the right of the 1825 * window if SACK is in use. 1826 */ 1827 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1828 (fdata->td_winflags & TCP_SACK_PERMIT) && 1829 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1830 inseq = 1; 1831 /* 1832 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1833 * response to initial SYN packet, when there is no application 1834 * listeing to on a port, where the SYN packet has came to. 1835 */ 1836 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1837 (ackskew >= -1) && (ackskew <= 1)) { 1838 inseq = 1; 1839 } else if (!(flags & IS_TCPFSM)) { 1840 1841 if (!(fdata->td_winflags & 1842 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1843 /* 1844 * No TCPFSM and no window scaling, so make some 1845 * extra guesses. 1846 */ 1847 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1848 inseq = 1; 1849 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1850 inseq = 1; 1851 } 1852 } 1853 1854 if (inseq) { 1855 /* if ackskew < 0 then this should be due to fragmented 1856 * packets. There is no way to know the length of the 1857 * total packet in advance. 1858 * We do know the total length from the fragment cache though. 1859 * Note however that there might be more sessions with 1860 * exactly the same source and destination parameters in the 1861 * state cache (and source and destination is the only stuff 1862 * that is saved in the fragment cache). Note further that 1863 * some TCP connections in the state cache are hashed with 1864 * sport and dport as well which makes it not worthwhile to 1865 * look for them. 1866 * Thus, when ackskew is negative but still seems to belong 1867 * to this session, we bump up the destinations end value. 1868 */ 1869 if (ackskew < 0) 1870 tdata->td_end = ack; 1871 1872 /* update max window seen */ 1873 if (fdata->td_maxwin < win) 1874 fdata->td_maxwin = win; 1875 if (SEQ_GT(end, fdata->td_end)) 1876 fdata->td_end = end; 1877 if (SEQ_GE(ack + win, tdata->td_maxend)) 1878 tdata->td_maxend = ack + win; 1879 return 1; 1880 } 1881 fin->fin_flx |= FI_OOW; 1882 return 0; 1883 } 1884 1885 1886 /* ------------------------------------------------------------------------ */ 1887 /* Function: fr_stclone */ 1888 /* Returns: ipstate_t* - NULL == cloning failed, */ 1889 /* else pointer to new state structure */ 1890 /* Parameters: fin(I) - pointer to packet information */ 1891 /* tcp(I) - pointer to TCP/UDP header */ 1892 /* is(I) - pointer to master state structure */ 1893 /* */ 1894 /* Create a "duplcate" state table entry from the master. */ 1895 /* ------------------------------------------------------------------------ */ 1896 static ipstate_t *fr_stclone(fin, tcp, is) 1897 fr_info_t *fin; 1898 tcphdr_t *tcp; 1899 ipstate_t *is; 1900 { 1901 ipstate_t *clone; 1902 u_32_t send; 1903 ipf_stack_t *ifs = fin->fin_ifs; 1904 1905 if (ifs->ifs_ips_num == ifs->ifs_fr_statemax) { 1906 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1907 ifs->ifs_fr_state_doflush = 1; 1908 return NULL; 1909 } 1910 KMALLOC(clone, ipstate_t *); 1911 if (clone == NULL) 1912 return NULL; 1913 bcopy((char *)is, (char *)clone, sizeof(*clone)); 1914 1915 MUTEX_NUKE(&clone->is_lock); 1916 1917 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 1918 clone->is_state[0] = 0; 1919 clone->is_state[1] = 0; 1920 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1921 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1922 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1923 1924 if (fin->fin_rev == 1) { 1925 clone->is_dend = send; 1926 clone->is_maxdend = send; 1927 clone->is_send = 0; 1928 clone->is_maxswin = 1; 1929 clone->is_maxdwin = ntohs(tcp->th_win); 1930 if (clone->is_maxdwin == 0) 1931 clone->is_maxdwin = 1; 1932 } else { 1933 clone->is_send = send; 1934 clone->is_maxsend = send; 1935 clone->is_dend = 0; 1936 clone->is_maxdwin = 1; 1937 clone->is_maxswin = ntohs(tcp->th_win); 1938 if (clone->is_maxswin == 0) 1939 clone->is_maxswin = 1; 1940 } 1941 1942 clone->is_flags &= ~SI_CLONE; 1943 clone->is_flags |= SI_CLONED; 1944 fr_stinsert(clone, fin->fin_rev, ifs); 1945 clone->is_ref = 2; 1946 if (clone->is_p == IPPROTO_TCP) { 1947 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 1948 clone->is_flags); 1949 } 1950 MUTEX_EXIT(&clone->is_lock); 1951 #ifdef IPFILTER_SCAN 1952 (void) ipsc_attachis(is); 1953 #endif 1954 #ifdef IPFILTER_SYNC 1955 if (is->is_flags & IS_STATESYNC) 1956 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 1957 #endif 1958 return clone; 1959 } 1960 1961 1962 /* ------------------------------------------------------------------------ */ 1963 /* Function: fr_matchsrcdst */ 1964 /* Returns: Nil */ 1965 /* Parameters: fin(I) - pointer to packet information */ 1966 /* is(I) - pointer to state structure */ 1967 /* src(I) - pointer to source address */ 1968 /* dst(I) - pointer to destination address */ 1969 /* tcp(I) - pointer to TCP/UDP header */ 1970 /* */ 1971 /* Match a state table entry against an IP packet. The logic below is that */ 1972 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 1973 /* still 0 after the test. no match. */ 1974 /* ------------------------------------------------------------------------ */ 1975 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 1976 fr_info_t *fin; 1977 ipstate_t *is; 1978 i6addr_t *src, *dst; 1979 tcphdr_t *tcp; 1980 u_32_t cmask; 1981 { 1982 int ret = 0, rev, out, flags, flx = 0, idx; 1983 u_short sp, dp; 1984 u_32_t cflx; 1985 void *ifp; 1986 ipf_stack_t *ifs = fin->fin_ifs; 1987 1988 rev = IP6_NEQ(&is->is_dst, dst); 1989 ifp = fin->fin_ifp; 1990 out = fin->fin_out; 1991 flags = is->is_flags; 1992 sp = 0; 1993 dp = 0; 1994 1995 if (tcp != NULL) { 1996 sp = htons(fin->fin_sport); 1997 dp = ntohs(fin->fin_dport); 1998 } 1999 if (!rev) { 2000 if (tcp != NULL) { 2001 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2002 rev = 1; 2003 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2004 rev = 1; 2005 } 2006 } 2007 2008 idx = (out << 1) + rev; 2009 2010 /* 2011 * If the interface for this 'direction' is set, make sure it matches. 2012 * An interface name that is not set matches any, as does a name of *. 2013 */ 2014 if ((is->is_ifp[idx] == NULL && 2015 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2016 is->is_ifp[idx] == ifp) 2017 ret = 1; 2018 2019 if (ret == 0) 2020 return NULL; 2021 ret = 0; 2022 2023 /* 2024 * Match addresses and ports. 2025 */ 2026 if (rev == 0) { 2027 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2028 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2029 if (tcp) { 2030 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2031 (dp == is->is_dport || flags & SI_W_DPORT)) 2032 ret = 1; 2033 } else { 2034 ret = 1; 2035 } 2036 } 2037 } else { 2038 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2039 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2040 if (tcp) { 2041 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2042 (sp == is->is_dport || flags & SI_W_DPORT)) 2043 ret = 1; 2044 } else { 2045 ret = 1; 2046 } 2047 } 2048 } 2049 2050 if (ret == 0) 2051 return NULL; 2052 2053 /* 2054 * Whether or not this should be here, is questionable, but the aim 2055 * is to get this out of the main line. 2056 */ 2057 if (tcp == NULL) 2058 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2059 2060 /* 2061 * Only one of the source or destination address can be flaged as a 2062 * wildcard. Fill in the missing address, if set. 2063 * For IPv6, if the address being copied in is multicast, then 2064 * don't reset the wild flag - multicast causes it to be set in the 2065 * first place! 2066 */ 2067 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2068 fr_ip_t *fi = &fin->fin_fi; 2069 2070 if ((flags & SI_W_SADDR) != 0) { 2071 if (rev == 0) { 2072 #ifdef USE_INET6 2073 if (is->is_v == 6 && 2074 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2075 /*EMPTY*/; 2076 else 2077 #endif 2078 { 2079 is->is_src = fi->fi_src; 2080 is->is_flags &= ~SI_W_SADDR; 2081 } 2082 } else { 2083 #ifdef USE_INET6 2084 if (is->is_v == 6 && 2085 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2086 /*EMPTY*/; 2087 else 2088 #endif 2089 { 2090 is->is_src = fi->fi_dst; 2091 is->is_flags &= ~SI_W_SADDR; 2092 } 2093 } 2094 } else if ((flags & SI_W_DADDR) != 0) { 2095 if (rev == 0) { 2096 #ifdef USE_INET6 2097 if (is->is_v == 6 && 2098 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2099 /*EMPTY*/; 2100 else 2101 #endif 2102 { 2103 is->is_dst = fi->fi_dst; 2104 is->is_flags &= ~SI_W_DADDR; 2105 } 2106 } else { 2107 #ifdef USE_INET6 2108 if (is->is_v == 6 && 2109 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2110 /*EMPTY*/; 2111 else 2112 #endif 2113 { 2114 is->is_dst = fi->fi_src; 2115 is->is_flags &= ~SI_W_DADDR; 2116 } 2117 } 2118 } 2119 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2120 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2121 } 2122 } 2123 2124 flx = fin->fin_flx & cmask; 2125 cflx = is->is_flx[out][rev]; 2126 2127 /* 2128 * Match up any flags set from IP options. 2129 */ 2130 if ((cflx && (flx != (cflx & cmask))) || 2131 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2132 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2133 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2134 return NULL; 2135 2136 /* 2137 * Only one of the source or destination port can be flagged as a 2138 * wildcard. When filling it in, fill in a copy of the matched entry 2139 * if it has the cloning flag set. 2140 */ 2141 if ((fin->fin_flx & FI_IGNORE) != 0) { 2142 fin->fin_rev = rev; 2143 return is; 2144 } 2145 2146 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2147 if ((flags & SI_CLONE) != 0) { 2148 ipstate_t *clone; 2149 2150 clone = fr_stclone(fin, tcp, is); 2151 if (clone == NULL) 2152 return NULL; 2153 is = clone; 2154 } else { 2155 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2156 } 2157 2158 if ((flags & SI_W_SPORT) != 0) { 2159 if (rev == 0) { 2160 is->is_sport = sp; 2161 is->is_send = ntohl(tcp->th_seq); 2162 } else { 2163 is->is_sport = dp; 2164 is->is_send = ntohl(tcp->th_ack); 2165 } 2166 is->is_maxsend = is->is_send + 1; 2167 } else if ((flags & SI_W_DPORT) != 0) { 2168 if (rev == 0) { 2169 is->is_dport = dp; 2170 is->is_dend = ntohl(tcp->th_ack); 2171 } else { 2172 is->is_dport = sp; 2173 is->is_dend = ntohl(tcp->th_seq); 2174 } 2175 is->is_maxdend = is->is_dend + 1; 2176 } 2177 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2178 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2179 ipstate_log(is, ISL_CLONE, ifs); 2180 } 2181 2182 ret = -1; 2183 2184 if (is->is_flx[out][rev] == 0) { 2185 is->is_flx[out][rev] = flx; 2186 is->is_opt[rev] = fin->fin_optmsk; 2187 if (is->is_v == 6) { 2188 is->is_opt[rev] &= ~0x8; 2189 is->is_optmsk[rev] &= ~0x8; 2190 } 2191 } 2192 2193 /* 2194 * Check if the interface name for this "direction" is set and if not, 2195 * fill it in. 2196 */ 2197 if (is->is_ifp[idx] == NULL && 2198 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2199 is->is_ifp[idx] = ifp; 2200 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2201 } 2202 fin->fin_rev = rev; 2203 return is; 2204 } 2205 2206 2207 /* ------------------------------------------------------------------------ */ 2208 /* Function: fr_checkicmpmatchingstate */ 2209 /* Returns: Nil */ 2210 /* Parameters: fin(I) - pointer to packet information */ 2211 /* */ 2212 /* If we've got an ICMP error message, using the information stored in the */ 2213 /* ICMP packet, look for a matching state table entry. */ 2214 /* */ 2215 /* If we return NULL then no lock on ipf_state is held. */ 2216 /* If we return non-null then a read-lock on ipf_state is held. */ 2217 /* ------------------------------------------------------------------------ */ 2218 static ipstate_t *fr_checkicmpmatchingstate(fin) 2219 fr_info_t *fin; 2220 { 2221 ipstate_t *is, **isp; 2222 u_short sport, dport; 2223 u_char pr; 2224 int backward, i, oi; 2225 i6addr_t dst, src; 2226 struct icmp *ic; 2227 u_short savelen; 2228 icmphdr_t *icmp; 2229 fr_info_t ofin; 2230 tcphdr_t *tcp; 2231 int len; 2232 ip_t *oip; 2233 u_int hv; 2234 ipf_stack_t *ifs = fin->fin_ifs; 2235 2236 /* 2237 * Does it at least have the return (basic) IP header ? 2238 * Is it an actual recognised ICMP error type? 2239 * Only a basic IP header (no options) should be with 2240 * an ICMP error header. 2241 */ 2242 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2243 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2244 !(fin->fin_flx & FI_ICMPERR)) 2245 return NULL; 2246 ic = fin->fin_dp; 2247 2248 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2249 /* 2250 * Check if the at least the old IP header (with options) and 2251 * 8 bytes of payload is present. 2252 */ 2253 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2254 return NULL; 2255 2256 /* 2257 * Sanity Checks. 2258 */ 2259 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2260 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2261 return NULL; 2262 2263 /* 2264 * Is the buffer big enough for all of it ? It's the size of the IP 2265 * header claimed in the encapsulated part which is of concern. It 2266 * may be too big to be in this buffer but not so big that it's 2267 * outside the ICMP packet, leading to TCP deref's causing problems. 2268 * This is possible because we don't know how big oip_hl is when we 2269 * do the pullup early in fr_check() and thus can't guarantee it is 2270 * all here now. 2271 */ 2272 #ifdef _KERNEL 2273 { 2274 mb_t *m; 2275 2276 m = fin->fin_m; 2277 # if defined(MENTAT) 2278 if ((char *)oip + len > (char *)m->b_wptr) 2279 return NULL; 2280 # else 2281 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2282 return NULL; 2283 # endif 2284 } 2285 #endif 2286 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2287 2288 /* 2289 * in the IPv4 case we must zero the i6addr union otherwise 2290 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2291 * of the 'junk' in the unused part of the union 2292 */ 2293 bzero((char *)&src, sizeof(src)); 2294 bzero((char *)&dst, sizeof(dst)); 2295 2296 /* 2297 * we make an fin entry to be able to feed it to 2298 * matchsrcdst note that not all fields are encessary 2299 * but this is the cleanest way. Note further we fill 2300 * in fin_mp such that if someone uses it we'll get 2301 * a kernel panic. fr_matchsrcdst does not use this. 2302 * 2303 * watch out here, as ip is in host order and oip in network 2304 * order. Any change we make must be undone afterwards, like 2305 * oip->ip_off - it is still in network byte order so fix it. 2306 */ 2307 savelen = oip->ip_len; 2308 oip->ip_len = len; 2309 oip->ip_off = ntohs(oip->ip_off); 2310 2311 ofin.fin_flx = FI_NOCKSUM; 2312 ofin.fin_v = 4; 2313 ofin.fin_ip = oip; 2314 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2315 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2316 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2317 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2318 ofin.fin_ifp = fin->fin_ifp; 2319 ofin.fin_out = !fin->fin_out; 2320 /* 2321 * Reset the short and bad flag here because in fr_matchsrcdst() 2322 * the flags for the current packet (fin_flx) are compared against 2323 * those for the existing session. 2324 */ 2325 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2326 2327 /* 2328 * Put old values of ip_len and ip_off back as we don't know 2329 * if we have to forward the packet (or process it again. 2330 */ 2331 oip->ip_len = savelen; 2332 oip->ip_off = htons(oip->ip_off); 2333 2334 switch (oip->ip_p) 2335 { 2336 case IPPROTO_ICMP : 2337 /* 2338 * an ICMP error can only be generated as a result of an 2339 * ICMP query, not as the response on an ICMP error 2340 * 2341 * XXX theoretically ICMP_ECHOREP and the other reply's are 2342 * ICMP query's as well, but adding them here seems strange XXX 2343 */ 2344 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2345 return NULL; 2346 2347 /* 2348 * perform a lookup of the ICMP packet in the state table 2349 */ 2350 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2351 hv = (pr = oip->ip_p); 2352 src.in4 = oip->ip_src; 2353 hv += src.in4.s_addr; 2354 dst.in4 = oip->ip_dst; 2355 hv += dst.in4.s_addr; 2356 hv += icmp->icmp_id; 2357 hv = DOUBLE_HASH(hv, ifs); 2358 2359 READ_ENTER(&ifs->ifs_ipf_state); 2360 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2361 isp = &is->is_hnext; 2362 if ((is->is_p != pr) || (is->is_v != 4)) 2363 continue; 2364 if (is->is_pass & FR_NOICMPERR) 2365 continue; 2366 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2367 NULL, FI_ICMPCMP); 2368 if (is != NULL) { 2369 if ((is->is_pass & FR_NOICMPERR) != 0) { 2370 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2371 return NULL; 2372 } 2373 /* 2374 * i : the index of this packet (the icmp 2375 * unreachable) 2376 * oi : the index of the original packet found 2377 * in the icmp header (i.e. the packet 2378 * causing this icmp) 2379 * backward : original packet was backward 2380 * compared to the state 2381 */ 2382 backward = IP6_NEQ(&is->is_src, &src); 2383 fin->fin_rev = !backward; 2384 i = (!backward << 1) + fin->fin_out; 2385 oi = (backward << 1) + ofin.fin_out; 2386 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2387 continue; 2388 ifs->ifs_ips_stats.iss_hits++; 2389 is->is_icmppkts[i]++; 2390 return is; 2391 } 2392 } 2393 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2394 return NULL; 2395 case IPPROTO_TCP : 2396 case IPPROTO_UDP : 2397 break; 2398 default : 2399 return NULL; 2400 } 2401 2402 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2403 dport = tcp->th_dport; 2404 sport = tcp->th_sport; 2405 2406 hv = (pr = oip->ip_p); 2407 src.in4 = oip->ip_src; 2408 hv += src.in4.s_addr; 2409 dst.in4 = oip->ip_dst; 2410 hv += dst.in4.s_addr; 2411 hv += dport; 2412 hv += sport; 2413 hv = DOUBLE_HASH(hv, ifs); 2414 2415 READ_ENTER(&ifs->ifs_ipf_state); 2416 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2417 isp = &is->is_hnext; 2418 /* 2419 * Only allow this icmp though if the 2420 * encapsulated packet was allowed through the 2421 * other way around. Note that the minimal amount 2422 * of info present does not allow for checking against 2423 * tcp internals such as seq and ack numbers. Only the 2424 * ports are known to be present and can be even if the 2425 * short flag is set. 2426 */ 2427 if ((is->is_p == pr) && (is->is_v == 4) && 2428 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2429 tcp, FI_ICMPCMP))) { 2430 /* 2431 * i : the index of this packet (the icmp unreachable) 2432 * oi : the index of the original packet found in the 2433 * icmp header (i.e. the packet causing this icmp) 2434 * backward : original packet was backward compared to 2435 * the state 2436 */ 2437 backward = IP6_NEQ(&is->is_src, &src); 2438 fin->fin_rev = !backward; 2439 i = (!backward << 1) + fin->fin_out; 2440 oi = (backward << 1) + ofin.fin_out; 2441 2442 if (((is->is_pass & FR_NOICMPERR) != 0) || 2443 (is->is_icmppkts[i] > is->is_pkts[oi])) 2444 break; 2445 ifs->ifs_ips_stats.iss_hits++; 2446 is->is_icmppkts[i]++; 2447 /* 2448 * we deliberately do not touch the timeouts 2449 * for the accompanying state table entry. 2450 * It remains to be seen if that is correct. XXX 2451 */ 2452 return is; 2453 } 2454 } 2455 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2456 return NULL; 2457 } 2458 2459 2460 /* ------------------------------------------------------------------------ */ 2461 /* Function: fr_ipsmove */ 2462 /* Returns: Nil */ 2463 /* Parameters: is(I) - pointer to state table entry */ 2464 /* hv(I) - new hash value for state table entry */ 2465 /* Write Locks: ipf_state */ 2466 /* */ 2467 /* Move a state entry from one position in the hash table to another. */ 2468 /* ------------------------------------------------------------------------ */ 2469 static void fr_ipsmove(is, hv, ifs) 2470 ipstate_t *is; 2471 u_int hv; 2472 ipf_stack_t *ifs; 2473 { 2474 ipstate_t **isp; 2475 u_int hvm; 2476 2477 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2478 2479 hvm = is->is_hv; 2480 /* 2481 * Remove the hash from the old location... 2482 */ 2483 isp = is->is_phnext; 2484 if (is->is_hnext) 2485 is->is_hnext->is_phnext = isp; 2486 *isp = is->is_hnext; 2487 if (ifs->ifs_ips_table[hvm] == NULL) 2488 ifs->ifs_ips_stats.iss_inuse--; 2489 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2490 2491 /* 2492 * ...and put the hash in the new one. 2493 */ 2494 hvm = DOUBLE_HASH(hv, ifs); 2495 is->is_hv = hvm; 2496 isp = &ifs->ifs_ips_table[hvm]; 2497 if (*isp) 2498 (*isp)->is_phnext = &is->is_hnext; 2499 else 2500 ifs->ifs_ips_stats.iss_inuse++; 2501 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2502 is->is_phnext = isp; 2503 is->is_hnext = *isp; 2504 *isp = is; 2505 } 2506 2507 2508 /* ------------------------------------------------------------------------ */ 2509 /* Function: fr_stlookup */ 2510 /* Returns: ipstate_t* - NULL == no matching state found, */ 2511 /* else pointer to state information is returned */ 2512 /* Parameters: fin(I) - pointer to packet information */ 2513 /* tcp(I) - pointer to TCP/UDP header. */ 2514 /* */ 2515 /* Search the state table for a matching entry to the packet described by */ 2516 /* the contents of *fin. */ 2517 /* */ 2518 /* If we return NULL then no lock on ipf_state is held. */ 2519 /* If we return non-null then a read-lock on ipf_state is held. */ 2520 /* ------------------------------------------------------------------------ */ 2521 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2522 fr_info_t *fin; 2523 tcphdr_t *tcp; 2524 ipftq_t **ifqp; 2525 { 2526 u_int hv, hvm, pr, v, tryagain; 2527 ipstate_t *is, **isp; 2528 u_short dport, sport; 2529 i6addr_t src, dst; 2530 struct icmp *ic; 2531 ipftq_t *ifq; 2532 int oow; 2533 ipf_stack_t *ifs = fin->fin_ifs; 2534 2535 is = NULL; 2536 ifq = NULL; 2537 tcp = fin->fin_dp; 2538 ic = (struct icmp *)tcp; 2539 hv = (pr = fin->fin_fi.fi_p); 2540 src = fin->fin_fi.fi_src; 2541 dst = fin->fin_fi.fi_dst; 2542 hv += src.in4.s_addr; 2543 hv += dst.in4.s_addr; 2544 2545 v = fin->fin_fi.fi_v; 2546 #ifdef USE_INET6 2547 if (v == 6) { 2548 hv += fin->fin_fi.fi_src.i6[1]; 2549 hv += fin->fin_fi.fi_src.i6[2]; 2550 hv += fin->fin_fi.fi_src.i6[3]; 2551 2552 if ((fin->fin_p == IPPROTO_ICMPV6) && 2553 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2554 hv -= dst.in4.s_addr; 2555 } else { 2556 hv += fin->fin_fi.fi_dst.i6[1]; 2557 hv += fin->fin_fi.fi_dst.i6[2]; 2558 hv += fin->fin_fi.fi_dst.i6[3]; 2559 } 2560 } 2561 #endif 2562 2563 /* 2564 * Search the hash table for matching packet header info. 2565 */ 2566 switch (pr) 2567 { 2568 #ifdef USE_INET6 2569 case IPPROTO_ICMPV6 : 2570 tryagain = 0; 2571 if (v == 6) { 2572 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2573 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2574 hv += ic->icmp_id; 2575 } 2576 } 2577 READ_ENTER(&ifs->ifs_ipf_state); 2578 icmp6again: 2579 hvm = DOUBLE_HASH(hv, ifs); 2580 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2581 isp = &is->is_hnext; 2582 if ((is->is_p != pr) || (is->is_v != v)) 2583 continue; 2584 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2585 if (is != NULL && 2586 fr_matchicmpqueryreply(v, &is->is_icmp, 2587 ic, fin->fin_rev)) { 2588 if (fin->fin_rev) 2589 ifq = &ifs->ifs_ips_icmpacktq; 2590 else 2591 ifq = &ifs->ifs_ips_icmptq; 2592 break; 2593 } 2594 } 2595 2596 if (is != NULL) { 2597 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2598 hv += fin->fin_fi.fi_src.i6[0]; 2599 hv += fin->fin_fi.fi_src.i6[1]; 2600 hv += fin->fin_fi.fi_src.i6[2]; 2601 hv += fin->fin_fi.fi_src.i6[3]; 2602 fr_ipsmove(is, hv, ifs); 2603 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2604 } 2605 break; 2606 } 2607 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2608 2609 /* 2610 * No matching icmp state entry. Perhaps this is a 2611 * response to another state entry. 2612 * 2613 * XXX With some ICMP6 packets, the "other" address is already 2614 * in the packet, after the ICMP6 header, and this could be 2615 * used in place of the multicast address. However, taking 2616 * advantage of this requires some significant code changes 2617 * to handle the specific types where that is the case. 2618 */ 2619 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2620 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2621 hv -= fin->fin_fi.fi_src.i6[0]; 2622 hv -= fin->fin_fi.fi_src.i6[1]; 2623 hv -= fin->fin_fi.fi_src.i6[2]; 2624 hv -= fin->fin_fi.fi_src.i6[3]; 2625 tryagain = 1; 2626 WRITE_ENTER(&ifs->ifs_ipf_state); 2627 goto icmp6again; 2628 } 2629 2630 is = fr_checkicmp6matchingstate(fin); 2631 if (is != NULL) 2632 return is; 2633 break; 2634 #endif 2635 2636 case IPPROTO_ICMP : 2637 if (v == 4) { 2638 hv += ic->icmp_id; 2639 } 2640 hv = DOUBLE_HASH(hv, ifs); 2641 READ_ENTER(&ifs->ifs_ipf_state); 2642 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2643 isp = &is->is_hnext; 2644 if ((is->is_p != pr) || (is->is_v != v)) 2645 continue; 2646 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2647 if (is != NULL && 2648 fr_matchicmpqueryreply(v, &is->is_icmp, 2649 ic, fin->fin_rev)) { 2650 if (fin->fin_rev) 2651 ifq = &ifs->ifs_ips_icmpacktq; 2652 else 2653 ifq = &ifs->ifs_ips_icmptq; 2654 break; 2655 } 2656 } 2657 if (is == NULL) { 2658 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2659 } 2660 break; 2661 2662 case IPPROTO_TCP : 2663 case IPPROTO_UDP : 2664 ifqp = NULL; 2665 sport = htons(fin->fin_data[0]); 2666 hv += sport; 2667 dport = htons(fin->fin_data[1]); 2668 hv += dport; 2669 oow = 0; 2670 tryagain = 0; 2671 READ_ENTER(&ifs->ifs_ipf_state); 2672 retry_tcpudp: 2673 hvm = DOUBLE_HASH(hv, ifs); 2674 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2675 isp = &is->is_hnext; 2676 if ((is->is_p != pr) || (is->is_v != v)) 2677 continue; 2678 fin->fin_flx &= ~FI_OOW; 2679 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2680 if (is != NULL) { 2681 if (pr == IPPROTO_TCP) { 2682 if (!fr_tcpstate(fin, tcp, is)) { 2683 oow |= fin->fin_flx & FI_OOW; 2684 continue; 2685 } 2686 } 2687 break; 2688 } 2689 } 2690 if (is != NULL) { 2691 if (tryagain && 2692 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2693 hv += dport; 2694 hv += sport; 2695 fr_ipsmove(is, hv, ifs); 2696 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2697 } 2698 break; 2699 } 2700 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2701 2702 if (!tryagain && ifs->ifs_ips_stats.iss_wild) { 2703 hv -= dport; 2704 hv -= sport; 2705 tryagain = 1; 2706 WRITE_ENTER(&ifs->ifs_ipf_state); 2707 goto retry_tcpudp; 2708 } 2709 fin->fin_flx |= oow; 2710 break; 2711 2712 #if 0 2713 case IPPROTO_GRE : 2714 gre = fin->fin_dp; 2715 if (GRE_REV(gre->gr_flags) == 1) { 2716 hv += gre->gr_call; 2717 } 2718 /* FALLTHROUGH */ 2719 #endif 2720 default : 2721 ifqp = NULL; 2722 hvm = DOUBLE_HASH(hv, ifs); 2723 READ_ENTER(&ifs->ifs_ipf_state); 2724 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2725 isp = &is->is_hnext; 2726 if ((is->is_p != pr) || (is->is_v != v)) 2727 continue; 2728 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2729 if (is != NULL) { 2730 ifq = &ifs->ifs_ips_iptq; 2731 break; 2732 } 2733 } 2734 if (is == NULL) { 2735 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2736 } 2737 break; 2738 } 2739 2740 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2741 (is->is_tqehead[fin->fin_rev] != NULL)) 2742 ifq = is->is_tqehead[fin->fin_rev]; 2743 if (ifq != NULL && ifqp != NULL) 2744 *ifqp = ifq; 2745 return is; 2746 } 2747 2748 2749 /* ------------------------------------------------------------------------ */ 2750 /* Function: fr_updatestate */ 2751 /* Returns: Nil */ 2752 /* Parameters: fin(I) - pointer to packet information */ 2753 /* is(I) - pointer to state table entry */ 2754 /* Read Locks: ipf_state */ 2755 /* */ 2756 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2757 /* fragment cache with a new entry as required. */ 2758 /* ------------------------------------------------------------------------ */ 2759 void fr_updatestate(fin, is, ifq) 2760 fr_info_t *fin; 2761 ipstate_t *is; 2762 ipftq_t *ifq; 2763 { 2764 ipftqent_t *tqe; 2765 int i, pass; 2766 ipf_stack_t *ifs = fin->fin_ifs; 2767 2768 i = (fin->fin_rev << 1) + fin->fin_out; 2769 2770 /* 2771 * For TCP packets, ifq == NULL. For all others, check if this new 2772 * queue is different to the last one it was on and move it if so. 2773 */ 2774 tqe = &is->is_sti; 2775 MUTEX_ENTER(&is->is_lock); 2776 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2777 ifq = is->is_tqehead[fin->fin_rev]; 2778 2779 if (ifq != NULL) 2780 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2781 2782 is->is_pkts[i]++; 2783 is->is_bytes[i] += fin->fin_plen; 2784 MUTEX_EXIT(&is->is_lock); 2785 2786 #ifdef IPFILTER_SYNC 2787 if (is->is_flags & IS_STATESYNC) 2788 ipfsync_update(SMC_STATE, fin, is->is_sync); 2789 #endif 2790 2791 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2792 2793 fin->fin_fr = is->is_rule; 2794 2795 /* 2796 * If this packet is a fragment and the rule says to track fragments, 2797 * then create a new fragment cache entry. 2798 */ 2799 pass = is->is_pass; 2800 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2801 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2802 } 2803 2804 2805 /* ------------------------------------------------------------------------ */ 2806 /* Function: fr_checkstate */ 2807 /* Returns: frentry_t* - NULL == search failed, */ 2808 /* else pointer to rule for matching state */ 2809 /* Parameters: ifp(I) - pointer to interface */ 2810 /* passp(I) - pointer to filtering result flags */ 2811 /* */ 2812 /* Check if a packet is associated with an entry in the state table. */ 2813 /* ------------------------------------------------------------------------ */ 2814 frentry_t *fr_checkstate(fin, passp) 2815 fr_info_t *fin; 2816 u_32_t *passp; 2817 { 2818 ipstate_t *is; 2819 frentry_t *fr; 2820 tcphdr_t *tcp; 2821 ipftq_t *ifq; 2822 u_int pass; 2823 ipf_stack_t *ifs = fin->fin_ifs; 2824 2825 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2826 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2827 return NULL; 2828 2829 is = NULL; 2830 if ((fin->fin_flx & FI_TCPUDP) || 2831 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2832 #ifdef USE_INET6 2833 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2834 #endif 2835 ) 2836 tcp = fin->fin_dp; 2837 else 2838 tcp = NULL; 2839 2840 /* 2841 * Search the hash table for matching packet header info. 2842 */ 2843 ifq = NULL; 2844 is = fin->fin_state; 2845 if (is == NULL) 2846 is = fr_stlookup(fin, tcp, &ifq); 2847 switch (fin->fin_p) 2848 { 2849 #ifdef USE_INET6 2850 case IPPROTO_ICMPV6 : 2851 if (is != NULL) 2852 break; 2853 if (fin->fin_v == 6) { 2854 is = fr_checkicmp6matchingstate(fin); 2855 if (is != NULL) 2856 goto matched; 2857 } 2858 break; 2859 #endif 2860 case IPPROTO_ICMP : 2861 if (is != NULL) 2862 break; 2863 /* 2864 * No matching icmp state entry. Perhaps this is a 2865 * response to another state entry. 2866 */ 2867 is = fr_checkicmpmatchingstate(fin); 2868 if (is != NULL) 2869 goto matched; 2870 break; 2871 case IPPROTO_TCP : 2872 if (is == NULL) 2873 break; 2874 2875 if (is->is_pass & FR_NEWISN) { 2876 if (fin->fin_out == 0) 2877 fr_fixinisn(fin, is); 2878 else if (fin->fin_out == 1) 2879 fr_fixoutisn(fin, is); 2880 } 2881 break; 2882 default : 2883 if (fin->fin_rev) 2884 ifq = &ifs->ifs_ips_udpacktq; 2885 else 2886 ifq = &ifs->ifs_ips_udptq; 2887 break; 2888 } 2889 if (is == NULL) { 2890 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 2891 return NULL; 2892 } 2893 2894 matched: 2895 fr = is->is_rule; 2896 if (fr != NULL) { 2897 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 2898 if (fin->fin_nattag == NULL) 2899 return NULL; 2900 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) 2901 return NULL; 2902 } 2903 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 2904 fin->fin_icode = fr->fr_icode; 2905 } 2906 2907 fin->fin_rule = is->is_rulen; 2908 pass = is->is_pass; 2909 fr_updatestate(fin, is, ifq); 2910 if (fin->fin_out == 1) 2911 fin->fin_nat = is->is_nat[fin->fin_rev]; 2912 2913 fin->fin_state = is; 2914 is->is_touched = ifs->ifs_fr_ticks; 2915 MUTEX_ENTER(&is->is_lock); 2916 is->is_ref++; 2917 MUTEX_EXIT(&is->is_lock); 2918 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2919 fin->fin_flx |= FI_STATE; 2920 if ((pass & FR_LOGFIRST) != 0) 2921 pass &= ~(FR_LOGFIRST|FR_LOG); 2922 *passp = pass; 2923 return fr; 2924 } 2925 2926 2927 /* ------------------------------------------------------------------------ */ 2928 /* Function: fr_fixoutisn */ 2929 /* Returns: Nil */ 2930 /* Parameters: fin(I) - pointer to packet information */ 2931 /* is(I) - pointer to master state structure */ 2932 /* */ 2933 /* Called only for outbound packets, adjusts the sequence number and the */ 2934 /* TCP checksum to match that change. */ 2935 /* ------------------------------------------------------------------------ */ 2936 static void fr_fixoutisn(fin, is) 2937 fr_info_t *fin; 2938 ipstate_t *is; 2939 { 2940 tcphdr_t *tcp; 2941 int rev; 2942 u_32_t seq; 2943 2944 tcp = fin->fin_dp; 2945 rev = fin->fin_rev; 2946 if ((is->is_flags & IS_ISNSYN) != 0) { 2947 if (rev == 0) { 2948 seq = ntohl(tcp->th_seq); 2949 seq += is->is_isninc[0]; 2950 tcp->th_seq = htonl(seq); 2951 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 2952 } 2953 } 2954 if ((is->is_flags & IS_ISNACK) != 0) { 2955 if (rev == 1) { 2956 seq = ntohl(tcp->th_seq); 2957 seq += is->is_isninc[1]; 2958 tcp->th_seq = htonl(seq); 2959 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 2960 } 2961 } 2962 } 2963 2964 2965 /* ------------------------------------------------------------------------ */ 2966 /* Function: fr_fixinisn */ 2967 /* Returns: Nil */ 2968 /* Parameters: fin(I) - pointer to packet information */ 2969 /* is(I) - pointer to master state structure */ 2970 /* */ 2971 /* Called only for inbound packets, adjusts the acknowledge number and the */ 2972 /* TCP checksum to match that change. */ 2973 /* ------------------------------------------------------------------------ */ 2974 static void fr_fixinisn(fin, is) 2975 fr_info_t *fin; 2976 ipstate_t *is; 2977 { 2978 tcphdr_t *tcp; 2979 int rev; 2980 u_32_t ack; 2981 2982 tcp = fin->fin_dp; 2983 rev = fin->fin_rev; 2984 if ((is->is_flags & IS_ISNSYN) != 0) { 2985 if (rev == 1) { 2986 ack = ntohl(tcp->th_ack); 2987 ack -= is->is_isninc[0]; 2988 tcp->th_ack = htonl(ack); 2989 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 2990 } 2991 } 2992 if ((is->is_flags & IS_ISNACK) != 0) { 2993 if (rev == 0) { 2994 ack = ntohl(tcp->th_ack); 2995 ack -= is->is_isninc[1]; 2996 tcp->th_ack = htonl(ack); 2997 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 2998 } 2999 } 3000 } 3001 3002 3003 /* ------------------------------------------------------------------------ */ 3004 /* Function: fr_statesync */ 3005 /* Returns: Nil */ 3006 /* Parameters: action(I) - type of synchronisation to do */ 3007 /* v(I) - IP version being sync'd (v4 or v6) */ 3008 /* ifp(I) - interface identifier associated with action */ 3009 /* name(I) - name associated with ifp parameter */ 3010 /* */ 3011 /* Walk through all state entries and if an interface pointer match is */ 3012 /* found then look it up again, based on its name in case the pointer has */ 3013 /* changed since last time. */ 3014 /* */ 3015 /* If ifp is passed in as being non-null then we are only doing updates for */ 3016 /* existing, matching, uses of it. */ 3017 /* ------------------------------------------------------------------------ */ 3018 void fr_statesync(action, v, ifp, name, ifs) 3019 int action, v; 3020 void *ifp; 3021 char *name; 3022 ipf_stack_t *ifs; 3023 { 3024 ipstate_t *is; 3025 int i; 3026 3027 if (ifs->ifs_fr_running <= 0) 3028 return; 3029 3030 WRITE_ENTER(&ifs->ifs_ipf_state); 3031 3032 if (ifs->ifs_fr_running <= 0) { 3033 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3034 return; 3035 } 3036 3037 switch (action) 3038 { 3039 case IPFSYNC_RESYNC : 3040 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3041 if (v != 0 && is->is_v != v) 3042 continue; 3043 /* 3044 * Look up all the interface names in the state entry. 3045 */ 3046 for (i = 0; i < 4; i++) { 3047 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3048 is->is_v, ifs); 3049 } 3050 } 3051 break; 3052 case IPFSYNC_NEWIFP : 3053 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3054 if (v != 0 && is->is_v != v) 3055 continue; 3056 /* 3057 * Look up all the interface names in the state entry. 3058 */ 3059 for (i = 0; i < 4; i++) { 3060 if (!strncmp(is->is_ifname[i], name, 3061 sizeof(is->is_ifname[i]))) 3062 is->is_ifp[i] = ifp; 3063 } 3064 } 3065 break; 3066 case IPFSYNC_OLDIFP : 3067 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3068 if (v != 0 && is->is_v != v) 3069 continue; 3070 /* 3071 * Look up all the interface names in the state entry. 3072 */ 3073 for (i = 0; i < 4; i++) { 3074 if (is->is_ifp[i] == ifp) 3075 is->is_ifp[i] = (void *)-1; 3076 } 3077 } 3078 break; 3079 } 3080 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3081 } 3082 3083 3084 /* ------------------------------------------------------------------------ */ 3085 /* Function: fr_delstate */ 3086 /* Returns: Nil */ 3087 /* Parameters: is(I) - pointer to state structure to delete */ 3088 /* why(I) - if not 0, log reason why it was deleted */ 3089 /* Write Locks: ipf_state/ipf_global */ 3090 /* */ 3091 /* Deletes a state entry from the enumerated list as well as the hash table */ 3092 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3093 /* global counters as required. */ 3094 /* ------------------------------------------------------------------------ */ 3095 static void fr_delstate(is, why, ifs) 3096 ipstate_t *is; 3097 int why; 3098 ipf_stack_t *ifs; 3099 { 3100 3101 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3102 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3103 3104 /* 3105 * Since we want to delete this, remove it from the state table, 3106 * where it can be found & used, first. 3107 */ 3108 if (is->is_pnext != NULL) { 3109 *is->is_pnext = is->is_next; 3110 3111 if (is->is_next != NULL) 3112 is->is_next->is_pnext = is->is_pnext; 3113 3114 is->is_pnext = NULL; 3115 is->is_next = NULL; 3116 } 3117 3118 if (is->is_phnext != NULL) { 3119 *is->is_phnext = is->is_hnext; 3120 if (is->is_hnext != NULL) 3121 is->is_hnext->is_phnext = is->is_phnext; 3122 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3123 ifs->ifs_ips_stats.iss_inuse--; 3124 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3125 3126 is->is_phnext = NULL; 3127 is->is_hnext = NULL; 3128 } 3129 3130 /* 3131 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3132 * table that have wildcard flags set, only decerement it once 3133 * and do it here. 3134 */ 3135 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3136 if (!(is->is_flags & SI_CLONED)) { 3137 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3138 } 3139 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3140 } 3141 3142 /* 3143 * Next, remove it from the timeout queue it is in. 3144 */ 3145 fr_deletequeueentry(&is->is_sti); 3146 3147 is->is_me = NULL; 3148 3149 /* 3150 * If it is still in use by something else, do not go any further, 3151 * but note that at this point it is now an orphan. 3152 */ 3153 MUTEX_ENTER(&is->is_lock); 3154 if (is->is_ref > 1) { 3155 is->is_ref--; 3156 MUTEX_EXIT(&is->is_lock); 3157 return; 3158 } 3159 MUTEX_EXIT(&is->is_lock); 3160 3161 is->is_ref = 0; 3162 3163 if (is->is_tqehead[0] != NULL) 3164 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3165 3166 if (is->is_tqehead[1] != NULL) 3167 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3168 3169 #ifdef IPFILTER_SYNC 3170 if (is->is_sync) 3171 ipfsync_del(is->is_sync); 3172 #endif 3173 #ifdef IPFILTER_SCAN 3174 (void) ipsc_detachis(is); 3175 #endif 3176 3177 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3178 ipstate_log(is, why, ifs); 3179 3180 if (is->is_rule != NULL) { 3181 is->is_rule->fr_statecnt--; 3182 (void)fr_derefrule(&is->is_rule, ifs); 3183 } 3184 3185 MUTEX_DESTROY(&is->is_lock); 3186 KFREE(is); 3187 ifs->ifs_ips_num--; 3188 } 3189 3190 3191 /* ------------------------------------------------------------------------ */ 3192 /* Function: fr_timeoutstate */ 3193 /* Returns: Nil */ 3194 /* Parameters: Nil */ 3195 /* */ 3196 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3197 /* used here is to keep the queue sorted with the oldest things at the top */ 3198 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3199 /* expired then neither will any under it. */ 3200 /* ------------------------------------------------------------------------ */ 3201 void fr_timeoutstate(ifs) 3202 ipf_stack_t *ifs; 3203 { 3204 ipftq_t *ifq, *ifqnext; 3205 ipftqent_t *tqe, *tqn; 3206 ipstate_t *is; 3207 SPL_INT(s); 3208 3209 SPL_NET(s); 3210 WRITE_ENTER(&ifs->ifs_ipf_state); 3211 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3212 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3213 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3214 break; 3215 tqn = tqe->tqe_next; 3216 is = tqe->tqe_parent; 3217 fr_delstate(is, ISL_EXPIRE, ifs); 3218 } 3219 3220 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3221 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3222 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3223 break; 3224 tqn = tqe->tqe_next; 3225 is = tqe->tqe_parent; 3226 fr_delstate(is, ISL_EXPIRE, ifs); 3227 } 3228 } 3229 3230 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3231 ifqnext = ifq->ifq_next; 3232 3233 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3234 (ifq->ifq_ref == 0)) { 3235 fr_freetimeoutqueue(ifq, ifs); 3236 } 3237 } 3238 3239 if (ifs->ifs_fr_state_doflush) { 3240 (void) fr_state_flush(2, 0, ifs); 3241 ifs->ifs_fr_state_doflush = 0; 3242 } 3243 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3244 SPL_X(s); 3245 } 3246 3247 3248 /* ------------------------------------------------------------------------ */ 3249 /* Function: fr_state_flush */ 3250 /* Returns: int - 0 == success, -1 == failure */ 3251 /* Parameters: Nil */ 3252 /* Write Locks: ipf_state */ 3253 /* */ 3254 /* Flush state tables. Three actions currently defined: */ 3255 /* which == 0 : flush all state table entries */ 3256 /* which == 1 : flush TCP connections which have started to close but are */ 3257 /* stuck for some reason. */ 3258 /* which == 2 : flush TCP connections which have been idle for a long time, */ 3259 /* starting at > 4 days idle and working back in successive half-*/ 3260 /* days to at most 12 hours old. If this fails to free enough */ 3261 /* slots then work backwards in half hour slots to 30 minutes. */ 3262 /* If that too fails, then work backwards in 30 second intervals */ 3263 /* for the last 30 minutes to at worst 30 seconds idle. */ 3264 /* ------------------------------------------------------------------------ */ 3265 static int fr_state_flush(which, proto, ifs) 3266 int which, proto; 3267 ipf_stack_t *ifs; 3268 { 3269 ipftq_t *ifq, *ifqnext; 3270 ipftqent_t *tqe, *tqn; 3271 ipstate_t *is, **isp; 3272 int delete, removed; 3273 long try, maxtick; 3274 u_long interval; 3275 SPL_INT(s); 3276 3277 removed = 0; 3278 3279 SPL_NET(s); 3280 for (isp = &ifs->ifs_ips_list; ((is = *isp) != NULL); ) { 3281 delete = 0; 3282 3283 if ((proto != 0) && (is->is_v != proto)) { 3284 isp = &is->is_next; 3285 continue; 3286 } 3287 3288 switch (which) 3289 { 3290 case 0 : 3291 delete = 1; 3292 break; 3293 case 1 : 3294 case 2 : 3295 if (is->is_p != IPPROTO_TCP) 3296 break; 3297 if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || 3298 (is->is_state[1] != IPF_TCPS_ESTABLISHED)) 3299 delete = 1; 3300 break; 3301 } 3302 3303 if (delete) { 3304 if (is->is_p == IPPROTO_TCP) 3305 ifs->ifs_ips_stats.iss_fin++; 3306 else 3307 ifs->ifs_ips_stats.iss_expire++; 3308 fr_delstate(is, ISL_FLUSH, ifs); 3309 removed++; 3310 } else 3311 isp = &is->is_next; 3312 } 3313 3314 if (which != 2) { 3315 SPL_X(s); 3316 return removed; 3317 } 3318 3319 /* 3320 * Asked to remove inactive entries because the table is full, try 3321 * again, 3 times, if first attempt failed with a different criteria 3322 * each time. The order tried in must be in decreasing age. 3323 * Another alternative is to implement random drop and drop N entries 3324 * at random until N have been freed up. 3325 */ 3326 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < IPF_TTLVAL(5)) 3327 goto force_flush_skipped; 3328 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3329 3330 if (ifs->ifs_fr_ticks > IPF_TTLVAL(43200)) 3331 interval = IPF_TTLVAL(43200); 3332 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(1800)) 3333 interval = IPF_TTLVAL(1800); 3334 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(30)) 3335 interval = IPF_TTLVAL(30); 3336 else 3337 interval = IPF_TTLVAL(10); 3338 try = ifs->ifs_fr_ticks - (ifs->ifs_fr_ticks - interval); 3339 if (try < 0) 3340 goto force_flush_skipped; 3341 3342 while (removed == 0) { 3343 maxtick = ifs->ifs_fr_ticks - interval; 3344 if (maxtick < 0) 3345 break; 3346 3347 while (try < maxtick) { 3348 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; 3349 ifq = ifq->ifq_next) { 3350 for (tqn = ifq->ifq_head; 3351 ((tqe = tqn) != NULL); ) { 3352 if (tqe->tqe_die > try) 3353 break; 3354 tqn = tqe->tqe_next; 3355 is = tqe->tqe_parent; 3356 fr_delstate(is, ISL_EXPIRE, ifs); 3357 removed++; 3358 } 3359 } 3360 3361 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3362 ifqnext = ifq->ifq_next; 3363 3364 for (tqn = ifq->ifq_head; 3365 ((tqe = tqn) != NULL); ) { 3366 if (tqe->tqe_die > try) 3367 break; 3368 tqn = tqe->tqe_next; 3369 is = tqe->tqe_parent; 3370 fr_delstate(is, ISL_EXPIRE, ifs); 3371 removed++; 3372 } 3373 } 3374 if (try + interval > maxtick) 3375 break; 3376 try += interval; 3377 } 3378 3379 if (removed == 0) { 3380 if (interval == IPF_TTLVAL(43200)) { 3381 interval = IPF_TTLVAL(1800); 3382 } else if (interval == IPF_TTLVAL(1800)) { 3383 interval = IPF_TTLVAL(30); 3384 } else if (interval == IPF_TTLVAL(30)) { 3385 interval = IPF_TTLVAL(10); 3386 } else { 3387 break; 3388 } 3389 } 3390 } 3391 force_flush_skipped: 3392 SPL_X(s); 3393 return removed; 3394 } 3395 3396 3397 3398 /* ------------------------------------------------------------------------ */ 3399 /* Function: fr_tcp_age */ 3400 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3401 /* Parameters: tq(I) - pointer to timeout queue information */ 3402 /* fin(I) - pointer to packet information */ 3403 /* tqtab(I) - TCP timeout queue table this is in */ 3404 /* flags(I) - flags from state/NAT entry */ 3405 /* */ 3406 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3407 /* */ 3408 /* - (try to) base state transitions on real evidence only, */ 3409 /* i.e. packets that are sent and have been received by ipfilter; */ 3410 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3411 /* */ 3412 /* - deal with half-closed connections correctly; */ 3413 /* */ 3414 /* - store the state of the source in state[0] such that ipfstat */ 3415 /* displays the state as source/dest instead of dest/source; the calls */ 3416 /* to fr_tcp_age have been changed accordingly. */ 3417 /* */ 3418 /* Internal Parameters: */ 3419 /* */ 3420 /* state[0] = state of source (host that initiated connection) */ 3421 /* state[1] = state of dest (host that accepted the connection) */ 3422 /* */ 3423 /* dir == 0 : a packet from source to dest */ 3424 /* dir == 1 : a packet from dest to source */ 3425 /* */ 3426 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3427 /* ------------------------------------------------------------------------ */ 3428 int fr_tcp_age(tqe, fin, tqtab, flags) 3429 ipftqent_t *tqe; 3430 fr_info_t *fin; 3431 ipftq_t *tqtab; 3432 int flags; 3433 { 3434 int dlen, ostate, nstate, rval, dir; 3435 u_char tcpflags; 3436 tcphdr_t *tcp; 3437 ipf_stack_t *ifs = fin->fin_ifs; 3438 3439 tcp = fin->fin_dp; 3440 3441 rval = 0; 3442 dir = fin->fin_rev; 3443 tcpflags = tcp->th_flags; 3444 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3445 3446 if (tcpflags & TH_RST) { 3447 if (!(tcpflags & TH_PUSH) && !dlen) 3448 nstate = IPF_TCPS_CLOSED; 3449 else 3450 nstate = IPF_TCPS_CLOSE_WAIT; 3451 rval = 1; 3452 } else { 3453 ostate = tqe->tqe_state[1 - dir]; 3454 nstate = tqe->tqe_state[dir]; 3455 3456 switch (nstate) 3457 { 3458 case IPF_TCPS_CLOSED: /* 0 */ 3459 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3460 /* 3461 * 'dir' received an S and sends SA in 3462 * response, CLOSED -> SYN_RECEIVED 3463 */ 3464 nstate = IPF_TCPS_SYN_RECEIVED; 3465 rval = 1; 3466 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3467 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3468 nstate = IPF_TCPS_SYN_SENT; 3469 rval = 1; 3470 } 3471 /* 3472 * the next piece of code makes it possible to get 3473 * already established connections into the state table 3474 * after a restart or reload of the filter rules; this 3475 * does not work when a strict 'flags S keep state' is 3476 * used for tcp connections of course 3477 */ 3478 if (((flags & IS_TCPFSM) == 0) && 3479 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3480 /* 3481 * we saw an A, guess 'dir' is in ESTABLISHED 3482 * mode 3483 */ 3484 switch (ostate) 3485 { 3486 case IPF_TCPS_CLOSED : 3487 case IPF_TCPS_SYN_RECEIVED : 3488 nstate = IPF_TCPS_HALF_ESTAB; 3489 rval = 1; 3490 break; 3491 case IPF_TCPS_HALF_ESTAB : 3492 case IPF_TCPS_ESTABLISHED : 3493 nstate = IPF_TCPS_ESTABLISHED; 3494 rval = 1; 3495 break; 3496 default : 3497 break; 3498 } 3499 } 3500 /* 3501 * TODO: besides regular ACK packets we can have other 3502 * packets as well; it is yet to be determined how we 3503 * should initialize the states in those cases 3504 */ 3505 break; 3506 3507 case IPF_TCPS_LISTEN: /* 1 */ 3508 /* NOT USED */ 3509 break; 3510 3511 case IPF_TCPS_SYN_SENT: /* 2 */ 3512 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3513 /* 3514 * A retransmitted SYN packet. We do not reset 3515 * the timeout here to fr_tcptimeout because a 3516 * connection connect timeout does not renew 3517 * after every packet that is sent. We need to 3518 * set rval so as to indicate the packet has 3519 * passed the check for its flags being valid 3520 * in the TCP FSM. Setting rval to 2 has the 3521 * result of not resetting the timeout. 3522 */ 3523 rval = 2; 3524 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3525 TH_ACK) { 3526 /* 3527 * we see an A from 'dir' which is in SYN_SENT 3528 * state: 'dir' sent an A in response to an SA 3529 * which it received, SYN_SENT -> ESTABLISHED 3530 */ 3531 nstate = IPF_TCPS_ESTABLISHED; 3532 rval = 1; 3533 } else if (tcpflags & TH_FIN) { 3534 /* 3535 * we see an F from 'dir' which is in SYN_SENT 3536 * state and wants to close its side of the 3537 * connection; SYN_SENT -> FIN_WAIT_1 3538 */ 3539 nstate = IPF_TCPS_FIN_WAIT_1; 3540 rval = 1; 3541 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3542 /* 3543 * we see an SA from 'dir' which is already in 3544 * SYN_SENT state, this means we have a 3545 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3546 */ 3547 nstate = IPF_TCPS_SYN_RECEIVED; 3548 rval = 1; 3549 } 3550 break; 3551 3552 case IPF_TCPS_SYN_RECEIVED: /* 3 */ 3553 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3554 /* 3555 * we see an A from 'dir' which was in 3556 * SYN_RECEIVED state so it must now be in 3557 * established state, SYN_RECEIVED -> 3558 * ESTABLISHED 3559 */ 3560 nstate = IPF_TCPS_ESTABLISHED; 3561 rval = 1; 3562 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3563 TH_OPENING) { 3564 /* 3565 * We see an SA from 'dir' which is already in 3566 * SYN_RECEIVED state. 3567 */ 3568 rval = 2; 3569 } else if (tcpflags & TH_FIN) { 3570 /* 3571 * we see an F from 'dir' which is in 3572 * SYN_RECEIVED state and wants to close its 3573 * side of the connection; SYN_RECEIVED -> 3574 * FIN_WAIT_1 3575 */ 3576 nstate = IPF_TCPS_FIN_WAIT_1; 3577 rval = 1; 3578 } 3579 break; 3580 3581 case IPF_TCPS_HALF_ESTAB: /* 4 */ 3582 if (ostate >= IPF_TCPS_HALF_ESTAB) { 3583 if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3584 nstate = IPF_TCPS_ESTABLISHED; 3585 } 3586 } 3587 rval = 1; 3588 3589 break; 3590 3591 case IPF_TCPS_ESTABLISHED: /* 5 */ 3592 rval = 1; 3593 if (tcpflags & TH_FIN) { 3594 /* 3595 * 'dir' closed its side of the connection; 3596 * this gives us a half-closed connection; 3597 * ESTABLISHED -> FIN_WAIT_1 3598 */ 3599 nstate = IPF_TCPS_FIN_WAIT_1; 3600 } else if (tcpflags & TH_ACK) { 3601 /* 3602 * an ACK, should we exclude other flags here? 3603 */ 3604 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3605 /* 3606 * We know the other side did an active 3607 * close, so we are ACKing the recvd 3608 * FIN packet (does the window matching 3609 * code guarantee this?) and go into 3610 * CLOSE_WAIT state; this gives us a 3611 * half-closed connection 3612 */ 3613 nstate = IPF_TCPS_CLOSE_WAIT; 3614 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3615 /* 3616 * still a fully established 3617 * connection reset timeout 3618 */ 3619 nstate = IPF_TCPS_ESTABLISHED; 3620 } 3621 } 3622 break; 3623 3624 case IPF_TCPS_CLOSE_WAIT: /* 6 */ 3625 rval = 1; 3626 if (tcpflags & TH_FIN) { 3627 /* 3628 * application closed and 'dir' sent a FIN, 3629 * we're now going into LAST_ACK state 3630 */ 3631 nstate = IPF_TCPS_LAST_ACK; 3632 } else { 3633 /* 3634 * we remain in CLOSE_WAIT because the other 3635 * side has closed already and we did not 3636 * close our side yet; reset timeout 3637 */ 3638 nstate = IPF_TCPS_CLOSE_WAIT; 3639 } 3640 break; 3641 3642 case IPF_TCPS_FIN_WAIT_1: /* 7 */ 3643 rval = 1; 3644 if ((tcpflags & TH_ACK) && 3645 ostate > IPF_TCPS_CLOSE_WAIT) { 3646 /* 3647 * if the other side is not active anymore 3648 * it has sent us a FIN packet that we are 3649 * ack'ing now with an ACK; this means both 3650 * sides have now closed the connection and 3651 * we go into TIME_WAIT 3652 */ 3653 /* 3654 * XXX: how do we know we really are ACKing 3655 * the FIN packet here? does the window code 3656 * guarantee that? 3657 */ 3658 nstate = IPF_TCPS_TIME_WAIT; 3659 } else { 3660 /* 3661 * we closed our side of the connection 3662 * already but the other side is still active 3663 * (ESTABLISHED/CLOSE_WAIT); continue with 3664 * this half-closed connection 3665 */ 3666 nstate = IPF_TCPS_FIN_WAIT_1; 3667 } 3668 break; 3669 3670 case IPF_TCPS_CLOSING: /* 8 */ 3671 /* NOT USED */ 3672 break; 3673 3674 case IPF_TCPS_LAST_ACK: /* 9 */ 3675 /* 3676 * We want to reset timer here to keep state in table. 3677 * If we would allow the state to time out here, while 3678 * there would still be packets being retransmitted, we 3679 * would cut off line between the two peers preventing 3680 * them to close connection properly. 3681 */ 3682 rval = 1; 3683 break; 3684 3685 case IPF_TCPS_FIN_WAIT_2: /* 10 */ 3686 rval = 1; 3687 if ((tcpflags & TH_OPENING) == TH_OPENING) 3688 nstate = IPF_TCPS_SYN_RECEIVED; 3689 else if (tcpflags & TH_SYN) 3690 nstate = IPF_TCPS_SYN_SENT; 3691 break; 3692 3693 case IPF_TCPS_TIME_WAIT: /* 11 */ 3694 /* we're in 2MSL timeout now */ 3695 rval = 1; 3696 break; 3697 3698 default : 3699 #if defined(_KERNEL) 3700 # if SOLARIS 3701 cmn_err(CE_NOTE, 3702 "tcp %lx flags %x si %lx nstate %d ostate %d\n", 3703 (u_long)tcp, tcpflags, (u_long)tqe, 3704 nstate, ostate); 3705 # else 3706 printf("tcp %lx flags %x si %lx nstate %d ostate %d\n", 3707 (u_long)tcp, tcpflags, (u_long)tqe, 3708 nstate, ostate); 3709 # endif 3710 #else 3711 abort(); 3712 #endif 3713 break; 3714 } 3715 } 3716 3717 /* 3718 * If rval == 2 then do not update the queue position, but treat the 3719 * packet as being ok. 3720 */ 3721 if (rval == 2) 3722 rval = 1; 3723 else if (rval == 1) { 3724 tqe->tqe_state[dir] = nstate; 3725 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3726 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3727 } 3728 3729 return rval; 3730 } 3731 3732 3733 /* ------------------------------------------------------------------------ */ 3734 /* Function: ipstate_log */ 3735 /* Returns: Nil */ 3736 /* Parameters: is(I) - pointer to state structure */ 3737 /* type(I) - type of log entry to create */ 3738 /* */ 3739 /* Creates a state table log entry using the state structure and type info. */ 3740 /* passed in. Log packet/byte counts, source/destination address and other */ 3741 /* protocol specific information. */ 3742 /* ------------------------------------------------------------------------ */ 3743 void ipstate_log(is, type, ifs) 3744 struct ipstate *is; 3745 u_int type; 3746 ipf_stack_t *ifs; 3747 { 3748 #ifdef IPFILTER_LOG 3749 struct ipslog ipsl; 3750 size_t sizes[1]; 3751 void *items[1]; 3752 int types[1]; 3753 3754 /* 3755 * Copy information out of the ipstate_t structure and into the 3756 * structure used for logging. 3757 */ 3758 ipsl.isl_type = type; 3759 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3760 ipsl.isl_bytes[0] = is->is_bytes[0]; 3761 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3762 ipsl.isl_bytes[1] = is->is_bytes[1]; 3763 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3764 ipsl.isl_bytes[2] = is->is_bytes[2]; 3765 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3766 ipsl.isl_bytes[3] = is->is_bytes[3]; 3767 ipsl.isl_src = is->is_src; 3768 ipsl.isl_dst = is->is_dst; 3769 ipsl.isl_p = is->is_p; 3770 ipsl.isl_v = is->is_v; 3771 ipsl.isl_flags = is->is_flags; 3772 ipsl.isl_tag = is->is_tag; 3773 ipsl.isl_rulen = is->is_rulen; 3774 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3775 3776 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3777 ipsl.isl_sport = is->is_sport; 3778 ipsl.isl_dport = is->is_dport; 3779 if (ipsl.isl_p == IPPROTO_TCP) { 3780 ipsl.isl_state[0] = is->is_state[0]; 3781 ipsl.isl_state[1] = is->is_state[1]; 3782 } 3783 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3784 ipsl.isl_itype = is->is_icmp.ici_type; 3785 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3786 ipsl.isl_itype = is->is_icmp.ici_type; 3787 } else { 3788 ipsl.isl_ps.isl_filler[0] = 0; 3789 ipsl.isl_ps.isl_filler[1] = 0; 3790 } 3791 3792 items[0] = &ipsl; 3793 sizes[0] = sizeof(ipsl); 3794 types[0] = 0; 3795 3796 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3797 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3798 } else { 3799 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3800 } 3801 #endif 3802 } 3803 3804 3805 #ifdef USE_INET6 3806 /* ------------------------------------------------------------------------ */ 3807 /* Function: fr_checkicmp6matchingstate */ 3808 /* Returns: ipstate_t* - NULL == no match found, */ 3809 /* else pointer to matching state entry */ 3810 /* Parameters: fin(I) - pointer to packet information */ 3811 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3812 /* */ 3813 /* If we've got an ICMPv6 error message, using the information stored in */ 3814 /* the ICMPv6 packet, look for a matching state table entry. */ 3815 /* ------------------------------------------------------------------------ */ 3816 static ipstate_t *fr_checkicmp6matchingstate(fin) 3817 fr_info_t *fin; 3818 { 3819 struct icmp6_hdr *ic6, *oic; 3820 int backward, i; 3821 ipstate_t *is, **isp; 3822 u_short sport, dport; 3823 i6addr_t dst, src; 3824 u_short savelen; 3825 icmpinfo_t *ic; 3826 fr_info_t ofin; 3827 tcphdr_t *tcp; 3828 ip6_t *oip6; 3829 u_char pr; 3830 u_int hv; 3831 ipf_stack_t *ifs = fin->fin_ifs; 3832 3833 /* 3834 * Does it at least have the return (basic) IP header ? 3835 * Is it an actual recognised ICMP error type? 3836 * Only a basic IP header (no options) should be with 3837 * an ICMP error header. 3838 */ 3839 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 3840 !(fin->fin_flx & FI_ICMPERR)) 3841 return NULL; 3842 3843 ic6 = fin->fin_dp; 3844 3845 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 3846 if (fin->fin_plen < sizeof(*oip6)) 3847 return NULL; 3848 3849 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 3850 ofin.fin_v = 6; 3851 ofin.fin_ifp = fin->fin_ifp; 3852 ofin.fin_out = !fin->fin_out; 3853 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 3854 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 3855 3856 /* 3857 * We make a fin entry to be able to feed it to 3858 * matchsrcdst. Note that not all fields are necessary 3859 * but this is the cleanest way. Note further we fill 3860 * in fin_mp such that if someone uses it we'll get 3861 * a kernel panic. fr_matchsrcdst does not use this. 3862 * 3863 * watch out here, as ip is in host order and oip6 in network 3864 * order. Any change we make must be undone afterwards. 3865 */ 3866 savelen = oip6->ip6_plen; 3867 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 3868 ofin.fin_flx = FI_NOCKSUM; 3869 ofin.fin_ip = (ip_t *)oip6; 3870 ofin.fin_plen = oip6->ip6_plen; 3871 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 3872 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 3873 oip6->ip6_plen = savelen; 3874 3875 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 3876 oic = (struct icmp6_hdr *)(oip6 + 1); 3877 /* 3878 * an ICMP error can only be generated as a result of an 3879 * ICMP query, not as the response on an ICMP error 3880 * 3881 * XXX theoretically ICMP_ECHOREP and the other reply's are 3882 * ICMP query's as well, but adding them here seems strange XXX 3883 */ 3884 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 3885 return NULL; 3886 3887 /* 3888 * perform a lookup of the ICMP packet in the state table 3889 */ 3890 hv = (pr = oip6->ip6_nxt); 3891 src.in6 = oip6->ip6_src; 3892 hv += src.in4.s_addr; 3893 dst.in6 = oip6->ip6_dst; 3894 hv += dst.in4.s_addr; 3895 hv += oic->icmp6_id; 3896 hv += oic->icmp6_seq; 3897 hv = DOUBLE_HASH(hv, ifs); 3898 3899 READ_ENTER(&ifs->ifs_ipf_state); 3900 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 3901 ic = &is->is_icmp; 3902 isp = &is->is_hnext; 3903 if ((is->is_p == pr) && 3904 !(is->is_pass & FR_NOICMPERR) && 3905 (oic->icmp6_id == ic->ici_id) && 3906 (oic->icmp6_seq == ic->ici_seq) && 3907 (is = fr_matchsrcdst(&ofin, is, &src, 3908 &dst, NULL, FI_ICMPCMP))) { 3909 /* 3910 * in the state table ICMP query's are stored 3911 * with the type of the corresponding ICMP 3912 * response. Correct here 3913 */ 3914 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 3915 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 3916 (ic->ici_type - 1 == oic->icmp6_type )) { 3917 ifs->ifs_ips_stats.iss_hits++; 3918 backward = IP6_NEQ(&is->is_dst, &src); 3919 fin->fin_rev = !backward; 3920 i = (backward << 1) + fin->fin_out; 3921 is->is_icmppkts[i]++; 3922 return is; 3923 } 3924 } 3925 } 3926 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3927 return NULL; 3928 } 3929 3930 hv = (pr = oip6->ip6_nxt); 3931 src.in6 = oip6->ip6_src; 3932 hv += src.i6[0]; 3933 hv += src.i6[1]; 3934 hv += src.i6[2]; 3935 hv += src.i6[3]; 3936 dst.in6 = oip6->ip6_dst; 3937 hv += dst.i6[0]; 3938 hv += dst.i6[1]; 3939 hv += dst.i6[2]; 3940 hv += dst.i6[3]; 3941 3942 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 3943 tcp = (tcphdr_t *)(oip6 + 1); 3944 dport = tcp->th_dport; 3945 sport = tcp->th_sport; 3946 hv += dport; 3947 hv += sport; 3948 } else 3949 tcp = NULL; 3950 hv = DOUBLE_HASH(hv, ifs); 3951 3952 READ_ENTER(&ifs->ifs_ipf_state); 3953 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 3954 isp = &is->is_hnext; 3955 /* 3956 * Only allow this icmp though if the 3957 * encapsulated packet was allowed through the 3958 * other way around. Note that the minimal amount 3959 * of info present does not allow for checking against 3960 * tcp internals such as seq and ack numbers. 3961 */ 3962 if ((is->is_p != pr) || (is->is_v != 6) || 3963 (is->is_pass & FR_NOICMPERR)) 3964 continue; 3965 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 3966 if (is != NULL) { 3967 ifs->ifs_ips_stats.iss_hits++; 3968 backward = IP6_NEQ(&is->is_dst, &src); 3969 fin->fin_rev = !backward; 3970 i = (backward << 1) + fin->fin_out; 3971 is->is_icmppkts[i]++; 3972 /* 3973 * we deliberately do not touch the timeouts 3974 * for the accompanying state table entry. 3975 * It remains to be seen if that is correct. XXX 3976 */ 3977 return is; 3978 } 3979 } 3980 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3981 return NULL; 3982 } 3983 #endif 3984 3985 3986 /* ------------------------------------------------------------------------ */ 3987 /* Function: fr_sttab_init */ 3988 /* Returns: Nil */ 3989 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 3990 /* */ 3991 /* Initialise the array of timeout queues for TCP. */ 3992 /* ------------------------------------------------------------------------ */ 3993 void fr_sttab_init(tqp, ifs) 3994 ipftq_t *tqp; 3995 ipf_stack_t *ifs; 3996 { 3997 int i; 3998 3999 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4000 tqp[i].ifq_ttl = 0; 4001 tqp[i].ifq_ref = 1; 4002 tqp[i].ifq_head = NULL; 4003 tqp[i].ifq_tail = &tqp[i].ifq_head; 4004 tqp[i].ifq_next = tqp + i + 1; 4005 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4006 } 4007 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4008 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4009 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4010 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4011 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4012 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4013 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4014 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4015 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4016 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4017 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4018 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4019 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4020 } 4021 4022 4023 /* ------------------------------------------------------------------------ */ 4024 /* Function: fr_sttab_destroy */ 4025 /* Returns: Nil */ 4026 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4027 /* */ 4028 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4029 /* of timeout queues for TCP. */ 4030 /* ------------------------------------------------------------------------ */ 4031 void fr_sttab_destroy(tqp) 4032 ipftq_t *tqp; 4033 { 4034 int i; 4035 4036 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4037 MUTEX_DESTROY(&tqp[i].ifq_lock); 4038 } 4039 4040 4041 /* ------------------------------------------------------------------------ */ 4042 /* Function: fr_statederef */ 4043 /* Returns: Nil */ 4044 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4045 /* ifs - ipf stack instance */ 4046 /* */ 4047 /* Decrement the reference counter for this state table entry and free it */ 4048 /* if there are no more things using it. */ 4049 /* */ 4050 /* Internal parameters: */ 4051 /* state[0] = state of source (host that initiated connection) */ 4052 /* state[1] = state of dest (host that accepted the connection) */ 4053 /* ------------------------------------------------------------------------ */ 4054 void fr_statederef(isp, ifs) 4055 ipstate_t **isp; 4056 ipf_stack_t *ifs; 4057 { 4058 ipstate_t *is; 4059 4060 is = *isp; 4061 *isp = NULL; 4062 4063 MUTEX_ENTER(&is->is_lock); 4064 if (is->is_ref > 1) { 4065 is->is_ref--; 4066 MUTEX_EXIT(&is->is_lock); 4067 #ifndef _KERNEL 4068 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4069 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4070 fr_delstate(is, ISL_ORPHAN, ifs); 4071 } 4072 #endif 4073 return; 4074 } 4075 MUTEX_EXIT(&is->is_lock); 4076 4077 WRITE_ENTER(&ifs->ifs_ipf_state); 4078 fr_delstate(is, ISL_EXPIRE, ifs); 4079 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4080 } 4081 4082 4083 /* ------------------------------------------------------------------------ */ 4084 /* Function: fr_setstatequeue */ 4085 /* Returns: Nil */ 4086 /* Parameters: is(I) - pointer to state structure */ 4087 /* rev(I) - forward(0) or reverse(1) direction */ 4088 /* Locks: ipf_state (read or write) */ 4089 /* */ 4090 /* Put the state entry on its default queue entry, using rev as a helped in */ 4091 /* determining which queue it should be placed on. */ 4092 /* ------------------------------------------------------------------------ */ 4093 void fr_setstatequeue(is, rev, ifs) 4094 ipstate_t *is; 4095 int rev; 4096 ipf_stack_t *ifs; 4097 { 4098 ipftq_t *oifq, *nifq; 4099 4100 4101 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4102 nifq = is->is_tqehead[rev]; 4103 else 4104 nifq = NULL; 4105 4106 if (nifq == NULL) { 4107 switch (is->is_p) 4108 { 4109 #ifdef USE_INET6 4110 case IPPROTO_ICMPV6 : 4111 if (rev == 1) 4112 nifq = &ifs->ifs_ips_icmpacktq; 4113 else 4114 nifq = &ifs->ifs_ips_icmptq; 4115 break; 4116 #endif 4117 case IPPROTO_ICMP : 4118 if (rev == 1) 4119 nifq = &ifs->ifs_ips_icmpacktq; 4120 else 4121 nifq = &ifs->ifs_ips_icmptq; 4122 break; 4123 case IPPROTO_TCP : 4124 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4125 break; 4126 4127 case IPPROTO_UDP : 4128 if (rev == 1) 4129 nifq = &ifs->ifs_ips_udpacktq; 4130 else 4131 nifq = &ifs->ifs_ips_udptq; 4132 break; 4133 4134 default : 4135 nifq = &ifs->ifs_ips_iptq; 4136 break; 4137 } 4138 } 4139 4140 oifq = is->is_sti.tqe_ifq; 4141 /* 4142 * If it's currently on a timeout queue, move it from one queue to 4143 * another, else put it on the end of the newly determined queue. 4144 */ 4145 if (oifq != NULL) 4146 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4147 else 4148 fr_queueappend(&is->is_sti, nifq, is, ifs); 4149 return; 4150 } 4151 4152 4153 /* ------------------------------------------------------------------------ */ 4154 /* Function: fr_stateiter */ 4155 /* Returns: int - 0 == success, else error */ 4156 /* Parameters: token(I) - pointer to ipftoken structure */ 4157 /* itp(I) - pointer to ipfgeniter structure */ 4158 /* */ 4159 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4160 /* walks through the list of entries in the state table list (ips_list.) */ 4161 /* ------------------------------------------------------------------------ */ 4162 static int fr_stateiter(token, itp, ifs) 4163 ipftoken_t *token; 4164 ipfgeniter_t *itp; 4165 ipf_stack_t *ifs; 4166 { 4167 ipstate_t *is, *next, zero; 4168 int error, count; 4169 char *dst; 4170 4171 if (itp->igi_data == NULL) 4172 return EFAULT; 4173 4174 if (itp->igi_nitems == 0) 4175 return EINVAL; 4176 4177 if (itp->igi_type != IPFGENITER_STATE) 4178 return EINVAL; 4179 4180 error = 0; 4181 4182 READ_ENTER(&ifs->ifs_ipf_state); 4183 4184 /* 4185 * Get "previous" entry from the token and find the next entry. 4186 */ 4187 is = token->ipt_data; 4188 if (is == NULL) { 4189 next = ifs->ifs_ips_list; 4190 } else { 4191 next = is->is_next; 4192 } 4193 4194 dst = itp->igi_data; 4195 for (count = itp->igi_nitems; count > 0; count--) { 4196 /* 4197 * If we found an entry, add a reference to it and update the token. 4198 * Otherwise, zero out data to be returned and NULL out token. 4199 */ 4200 if (next != NULL) { 4201 MUTEX_ENTER(&next->is_lock); 4202 next->is_ref++; 4203 MUTEX_EXIT(&next->is_lock); 4204 token->ipt_data = next; 4205 } else { 4206 bzero(&zero, sizeof(zero)); 4207 next = &zero; 4208 token->ipt_data = NULL; 4209 } 4210 4211 /* 4212 * Safe to release lock now the we have a reference. 4213 */ 4214 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4215 4216 /* 4217 * Copy out data and clean up references and tokens. 4218 */ 4219 error = COPYOUT(next, dst, sizeof(*next)); 4220 if (error != 0) 4221 error = EFAULT; 4222 if (token->ipt_data == NULL) { 4223 ipf_freetoken(token, ifs); 4224 break; 4225 } else { 4226 if (is != NULL) 4227 fr_statederef(&is, ifs); 4228 if (next->is_next == NULL) { 4229 ipf_freetoken(token, ifs); 4230 break; 4231 } 4232 } 4233 4234 if ((count == 1) || (error != 0)) 4235 break; 4236 4237 READ_ENTER(&ifs->ifs_ipf_state); 4238 dst += sizeof(*next); 4239 is = next; 4240 next = is->is_next; 4241 } 4242 4243 return error; 4244 } 4245