1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 7 * Use is subject to license terms. 8 */ 9 10 #if defined(KERNEL) || defined(_KERNEL) 11 # undef KERNEL 12 # undef _KERNEL 13 # define KERNEL 1 14 # define _KERNEL 1 15 #endif 16 #include <sys/errno.h> 17 #include <sys/types.h> 18 #include <sys/param.h> 19 #include <sys/file.h> 20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 21 defined(_KERNEL) 22 # include "opt_ipfilter_log.h" 23 #endif 24 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 25 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 26 #include "opt_inet6.h" 27 #endif 28 #if !defined(_KERNEL) && !defined(__KERNEL__) 29 # include <stdio.h> 30 # include <stdlib.h> 31 # include <string.h> 32 # define _KERNEL 33 # ifdef __OpenBSD__ 34 struct file; 35 # endif 36 # include <sys/uio.h> 37 # undef _KERNEL 38 #endif 39 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 40 # include <sys/filio.h> 41 # include <sys/fcntl.h> 42 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 43 # include "opt_ipfilter.h" 44 # endif 45 #else 46 # include <sys/ioctl.h> 47 #endif 48 #include <sys/time.h> 49 #if !defined(linux) 50 # include <sys/protosw.h> 51 #endif 52 #include <sys/socket.h> 53 #if defined(_KERNEL) 54 # include <sys/systm.h> 55 # if !defined(__SVR4) && !defined(__svr4__) 56 # include <sys/mbuf.h> 57 # endif 58 #endif 59 #if defined(__SVR4) || defined(__svr4__) 60 # include <sys/filio.h> 61 # include <sys/byteorder.h> 62 # ifdef _KERNEL 63 # include <sys/dditypes.h> 64 # endif 65 # include <sys/stream.h> 66 # include <sys/kmem.h> 67 #endif 68 69 #include <net/if.h> 70 #ifdef sun 71 # include <net/af.h> 72 #endif 73 #include <net/route.h> 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/tcp.h> 78 #if !defined(linux) 79 # include <netinet/ip_var.h> 80 #endif 81 #if !defined(__hpux) && !defined(linux) 82 # include <netinet/tcp_fsm.h> 83 #endif 84 #include <netinet/udp.h> 85 #include <netinet/ip_icmp.h> 86 #include "netinet/ip_compat.h" 87 #include <netinet/tcpip.h> 88 #include "netinet/ip_fil.h" 89 #include "netinet/ip_nat.h" 90 #include "netinet/ip_frag.h" 91 #include "netinet/ip_state.h" 92 #include "netinet/ip_proxy.h" 93 #include "netinet/ipf_stack.h" 94 #ifdef IPFILTER_SYNC 95 #include "netinet/ip_sync.h" 96 #endif 97 #ifdef IPFILTER_SCAN 98 #include "netinet/ip_scan.h" 99 #endif 100 #ifdef USE_INET6 101 #include <netinet/icmp6.h> 102 #endif 103 #if (__FreeBSD_version >= 300000) 104 # include <sys/malloc.h> 105 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 106 # include <sys/libkern.h> 107 # include <sys/systm.h> 108 # endif 109 #endif 110 /* END OF INCLUDES */ 111 112 113 #if !defined(lint) 114 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 115 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 116 #endif 117 118 #ifdef USE_INET6 119 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 120 #endif 121 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 122 i6addr_t *, tcphdr_t *, u_32_t)); 123 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 124 static int fr_state_flush __P((int, int, ipf_stack_t *)); 125 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 126 static void fr_delstate __P((ipstate_t *, int, ipf_stack_t *)); 127 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 128 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 129 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 131 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 132 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 134 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 135 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 136 137 int fr_stputent __P((caddr_t, ipf_stack_t *)); 138 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 139 140 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 141 #define FIVE_DAYS (5 * ONE_DAY) 142 #define DOUBLE_HASH(x, ifs) \ 143 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 144 145 146 147 /* ------------------------------------------------------------------------ */ 148 /* Function: fr_stateinit */ 149 /* Returns: int - 0 == success, -1 == failure */ 150 /* Parameters: Nil */ 151 /* */ 152 /* Initialise all the global variables used within the state code. */ 153 /* This action also includes initiailising locks. */ 154 /* ------------------------------------------------------------------------ */ 155 int fr_stateinit(ifs) 156 ipf_stack_t *ifs; 157 { 158 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 159 struct timeval tv; 160 #endif 161 int i; 162 163 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 164 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 165 if (ifs->ifs_ips_table == NULL) 166 return -1; 167 bzero((char *)ifs->ifs_ips_table, 168 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 169 170 KMALLOCS(ifs->ifs_ips_seed, u_long *, 171 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 172 if (ifs->ifs_ips_seed == NULL) 173 return -2; 174 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 175 tv.tv_sec = 0; 176 GETKTIME(&tv); 177 #endif 178 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 179 /* 180 * XXX - ips_seed[X] should be a random number of sorts. 181 */ 182 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 183 ifs->ifs_ips_seed[i] = ipf_random(); 184 #else 185 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 186 ifs->ifs_fr_statesize; 187 ifs->ifs_ips_seed[i] += tv.tv_sec; 188 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 189 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 190 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 191 #endif 192 } 193 194 /* fill icmp reply type table */ 195 for (i = 0; i <= ICMP_MAXTYPE; i++) 196 icmpreplytype4[i] = -1; 197 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 198 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 199 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 200 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 201 #ifdef USE_INET6 202 /* fill icmp reply type table */ 203 for (i = 0; i <= ICMP6_MAXTYPE; i++) 204 icmpreplytype6[i] = -1; 205 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 206 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 207 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 208 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 209 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 210 #endif 211 212 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 213 ifs->ifs_fr_statesize * sizeof(u_long)); 214 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 215 return -1; 216 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 217 ifs->ifs_fr_statesize * sizeof(u_long)); 218 219 if (ifs->ifs_fr_state_maxbucket == 0) { 220 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 221 ifs->ifs_fr_state_maxbucket++; 222 ifs->ifs_fr_state_maxbucket *= 2; 223 } 224 225 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 226 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 227 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 228 ifs->ifs_ips_udptq.ifq_ref = 1; 229 ifs->ifs_ips_udptq.ifq_head = NULL; 230 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 231 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 232 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 233 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 234 ifs->ifs_ips_udpacktq.ifq_ref = 1; 235 ifs->ifs_ips_udpacktq.ifq_head = NULL; 236 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 237 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 238 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 239 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 240 ifs->ifs_ips_icmptq.ifq_ref = 1; 241 ifs->ifs_ips_icmptq.ifq_head = NULL; 242 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 243 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 244 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 245 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 246 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 247 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 248 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 249 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 250 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 251 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 252 ifs->ifs_ips_iptq.ifq_ref = 1; 253 ifs->ifs_ips_iptq.ifq_head = NULL; 254 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 255 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 256 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 257 /* entry's ttl in deletetq is just 1 tick */ 258 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 259 ifs->ifs_ips_deletetq.ifq_ref = 1; 260 ifs->ifs_ips_deletetq.ifq_head = NULL; 261 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 262 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 263 ifs->ifs_ips_deletetq.ifq_next = NULL; 264 265 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 266 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 267 ifs->ifs_fr_state_init = 1; 268 269 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 270 return 0; 271 } 272 273 274 /* ------------------------------------------------------------------------ */ 275 /* Function: fr_stateunload */ 276 /* Returns: Nil */ 277 /* Parameters: Nil */ 278 /* */ 279 /* Release and destroy any resources acquired or initialised so that */ 280 /* IPFilter can be unloaded or re-initialised. */ 281 /* ------------------------------------------------------------------------ */ 282 void fr_stateunload(ifs) 283 ipf_stack_t *ifs; 284 { 285 ipftq_t *ifq, *ifqnext; 286 ipstate_t *is; 287 288 while ((is = ifs->ifs_ips_list) != NULL) 289 fr_delstate(is, 0, ifs); 290 291 /* 292 * Proxy timeout queues are not cleaned here because although they 293 * exist on the state list, appr_unload is called after fr_stateunload 294 * and the proxies actually are responsible for them being created. 295 * Should the proxy timeouts have their own list? There's no real 296 * justification as this is the only complicationA 297 */ 298 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 299 ifqnext = ifq->ifq_next; 300 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 301 (fr_deletetimeoutqueue(ifq) == 0)) 302 fr_freetimeoutqueue(ifq, ifs); 303 } 304 305 ifs->ifs_ips_stats.iss_inuse = 0; 306 ifs->ifs_ips_num = 0; 307 308 if (ifs->ifs_fr_state_init == 1) { 309 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 310 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 314 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 315 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 316 } 317 318 if (ifs->ifs_ips_table != NULL) { 319 KFREES(ifs->ifs_ips_table, 320 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 321 ifs->ifs_ips_table = NULL; 322 } 323 324 if (ifs->ifs_ips_seed != NULL) { 325 KFREES(ifs->ifs_ips_seed, 326 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 327 ifs->ifs_ips_seed = NULL; 328 } 329 330 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 331 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 332 ifs->ifs_fr_statesize * sizeof(u_long)); 333 ifs->ifs_ips_stats.iss_bucketlen = NULL; 334 } 335 336 if (ifs->ifs_fr_state_maxbucket_reset == 1) 337 ifs->ifs_fr_state_maxbucket = 0; 338 339 if (ifs->ifs_fr_state_init == 1) { 340 ifs->ifs_fr_state_init = 0; 341 RW_DESTROY(&ifs->ifs_ipf_state); 342 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 343 } 344 } 345 346 347 /* ------------------------------------------------------------------------ */ 348 /* Function: fr_statetstats */ 349 /* Returns: ips_state_t* - pointer to state stats structure */ 350 /* Parameters: Nil */ 351 /* */ 352 /* Put all the current numbers and pointers into a single struct and return */ 353 /* a pointer to it. */ 354 /* ------------------------------------------------------------------------ */ 355 static ips_stat_t *fr_statetstats(ifs) 356 ipf_stack_t *ifs; 357 { 358 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 359 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 360 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 361 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 362 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 363 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 364 return &ifs->ifs_ips_stats; 365 } 366 367 /* ------------------------------------------------------------------------ */ 368 /* Function: fr_state_remove */ 369 /* Returns: int - 0 == success, != 0 == failure */ 370 /* Parameters: data(I) - pointer to state structure to delete from table */ 371 /* */ 372 /* Search for a state structure that matches the one passed, according to */ 373 /* the IP addresses and other protocol specific information. */ 374 /* ------------------------------------------------------------------------ */ 375 static int fr_state_remove(data, ifs) 376 caddr_t data; 377 ipf_stack_t *ifs; 378 { 379 ipstate_t *sp, st; 380 int error; 381 382 sp = &st; 383 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 384 if (error) 385 return EFAULT; 386 387 WRITE_ENTER(&ifs->ifs_ipf_state); 388 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 389 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 390 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 391 sizeof(st.is_src)) && 392 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 393 sizeof(st.is_dst)) && 394 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 395 sizeof(st.is_ps))) { 396 fr_delstate(sp, ISL_REMOVE, ifs); 397 RWLOCK_EXIT(&ifs->ifs_ipf_state); 398 return 0; 399 } 400 RWLOCK_EXIT(&ifs->ifs_ipf_state); 401 return ESRCH; 402 } 403 404 405 /* ------------------------------------------------------------------------ */ 406 /* Function: fr_state_ioctl */ 407 /* Returns: int - 0 == success, != 0 == failure */ 408 /* Parameters: data(I) - pointer to ioctl data */ 409 /* cmd(I) - ioctl command integer */ 410 /* mode(I) - file mode bits used with open */ 411 /* */ 412 /* Processes an ioctl call made to operate on the IP Filter state device. */ 413 /* ------------------------------------------------------------------------ */ 414 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 415 caddr_t data; 416 ioctlcmd_t cmd; 417 int mode, uid; 418 void *ctx; 419 ipf_stack_t *ifs; 420 { 421 int arg, ret, error = 0; 422 423 switch (cmd) 424 { 425 /* 426 * Delete an entry from the state table. 427 */ 428 case SIOCDELST : 429 error = fr_state_remove(data, ifs); 430 break; 431 /* 432 * Flush the state table 433 */ 434 case SIOCIPFFL : 435 BCOPYIN(data, (char *)&arg, sizeof(arg)); 436 if (arg == 0 || arg == 1) { 437 WRITE_ENTER(&ifs->ifs_ipf_state); 438 ret = fr_state_flush(arg, 4, ifs); 439 RWLOCK_EXIT(&ifs->ifs_ipf_state); 440 BCOPYOUT((char *)&ret, data, sizeof(ret)); 441 } else 442 error = EINVAL; 443 break; 444 #ifdef USE_INET6 445 case SIOCIPFL6 : 446 BCOPYIN(data, (char *)&arg, sizeof(arg)); 447 if (arg == 0 || arg == 1) { 448 WRITE_ENTER(&ifs->ifs_ipf_state); 449 ret = fr_state_flush(arg, 6, ifs); 450 RWLOCK_EXIT(&ifs->ifs_ipf_state); 451 BCOPYOUT((char *)&ret, data, sizeof(ret)); 452 } else 453 error = EINVAL; 454 break; 455 #endif 456 #ifdef IPFILTER_LOG 457 /* 458 * Flush the state log. 459 */ 460 case SIOCIPFFB : 461 if (!(mode & FWRITE)) 462 error = EPERM; 463 else { 464 int tmp; 465 466 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 467 BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 468 } 469 break; 470 /* 471 * Turn logging of state information on/off. 472 */ 473 case SIOCSETLG : 474 if (!(mode & FWRITE)) 475 error = EPERM; 476 else { 477 BCOPYIN((char *)data, 478 (char *)&ifs->ifs_ipstate_logging, 479 sizeof(ifs->ifs_ipstate_logging)); 480 } 481 break; 482 /* 483 * Return the current state of logging. 484 */ 485 case SIOCGETLG : 486 BCOPYOUT((char *)&ifs->ifs_ipstate_logging, (char *)data, 487 sizeof(ifs->ifs_ipstate_logging)); 488 break; 489 /* 490 * Return the number of bytes currently waiting to be read. 491 */ 492 case FIONREAD : 493 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 494 BCOPYOUT((char *)&arg, data, sizeof(arg)); 495 break; 496 #endif 497 /* 498 * Get the current state statistics. 499 */ 500 case SIOCGETFS : 501 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 502 break; 503 /* 504 * Lock/Unlock the state table. (Locking prevents any changes, which 505 * means no packets match). 506 */ 507 case SIOCSTLCK : 508 if (!(mode & FWRITE)) { 509 error = EPERM; 510 } else { 511 fr_lock(data, &ifs->ifs_fr_state_lock); 512 } 513 break; 514 /* 515 * Add an entry to the current state table. 516 */ 517 case SIOCSTPUT : 518 if (!ifs->ifs_fr_state_lock || !(mode &FWRITE)) { 519 error = EACCES; 520 break; 521 } 522 error = fr_stputent(data, ifs); 523 break; 524 /* 525 * Get a state table entry. 526 */ 527 case SIOCSTGET : 528 if (!ifs->ifs_fr_state_lock) { 529 error = EACCES; 530 break; 531 } 532 error = fr_stgetent(data, ifs); 533 break; 534 535 case SIOCGENITER : 536 { 537 ipftoken_t *token; 538 ipfgeniter_t iter; 539 540 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 541 if (error != 0) 542 break; 543 544 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 545 if (token != NULL) 546 error = fr_stateiter(token, &iter, ifs); 547 else 548 error = ESRCH; 549 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 550 break; 551 } 552 553 case SIOCIPFDELTOK : 554 (void) BCOPYIN(data, (char *)&arg, sizeof(arg)); 555 error = ipf_deltoken(arg, uid, ctx, ifs); 556 break; 557 558 default : 559 error = EINVAL; 560 break; 561 } 562 return error; 563 } 564 565 566 /* ------------------------------------------------------------------------ */ 567 /* Function: fr_stgetent */ 568 /* Returns: int - 0 == success, != 0 == failure */ 569 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 570 /* */ 571 /* Copy out state information from the kernel to a user space process. If */ 572 /* there is a filter rule associated with the state entry, copy that out */ 573 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 574 /* the struct passed in and if not null and not found in the list of current*/ 575 /* state entries, the retrieval fails. */ 576 /* ------------------------------------------------------------------------ */ 577 int fr_stgetent(data, ifs) 578 caddr_t data; 579 ipf_stack_t *ifs; 580 { 581 ipstate_t *is, *isn; 582 ipstate_save_t ips; 583 int error; 584 585 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 586 if (error) 587 return EFAULT; 588 589 isn = ips.ips_next; 590 if (isn == NULL) { 591 isn = ifs->ifs_ips_list; 592 if (isn == NULL) { 593 if (ips.ips_next == NULL) 594 return ENOENT; 595 return 0; 596 } 597 } else { 598 /* 599 * Make sure the pointer we're copying from exists in the 600 * current list of entries. Security precaution to prevent 601 * copying of random kernel data. 602 */ 603 for (is = ifs->ifs_ips_list; is; is = is->is_next) 604 if (is == isn) 605 break; 606 if (!is) 607 return ESRCH; 608 } 609 ips.ips_next = isn->is_next; 610 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 611 ips.ips_rule = isn->is_rule; 612 if (isn->is_rule != NULL) 613 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 614 sizeof(ips.ips_fr)); 615 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 616 if (error) 617 return EFAULT; 618 return 0; 619 } 620 621 622 /* ------------------------------------------------------------------------ */ 623 /* Function: fr_stputent */ 624 /* Returns: int - 0 == success, != 0 == failure */ 625 /* Parameters: data(I) - pointer to state information struct */ 626 /* */ 627 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 628 /* the state table. If the state info. includes a pointer to a filter rule */ 629 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 630 /* output. */ 631 /* ------------------------------------------------------------------------ */ 632 int fr_stputent(data, ifs) 633 caddr_t data; 634 ipf_stack_t *ifs; 635 { 636 ipstate_t *is, *isn; 637 ipstate_save_t ips; 638 int error, i; 639 frentry_t *fr; 640 char *name; 641 642 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 643 if (error) 644 return EFAULT; 645 646 KMALLOC(isn, ipstate_t *); 647 if (isn == NULL) 648 return ENOMEM; 649 650 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 651 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 652 isn->is_sti.tqe_pnext = NULL; 653 isn->is_sti.tqe_next = NULL; 654 isn->is_sti.tqe_ifq = NULL; 655 isn->is_sti.tqe_parent = isn; 656 isn->is_ifp[0] = NULL; 657 isn->is_ifp[1] = NULL; 658 isn->is_ifp[2] = NULL; 659 isn->is_ifp[3] = NULL; 660 isn->is_sync = NULL; 661 fr = ips.ips_rule; 662 663 if (fr == NULL) { 664 READ_ENTER(&ifs->ifs_ipf_state); 665 fr_stinsert(isn, 0, ifs); 666 MUTEX_EXIT(&isn->is_lock); 667 RWLOCK_EXIT(&ifs->ifs_ipf_state); 668 return 0; 669 } 670 671 if (isn->is_flags & SI_NEWFR) { 672 KMALLOC(fr, frentry_t *); 673 if (fr == NULL) { 674 KFREE(isn); 675 return ENOMEM; 676 } 677 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 678 isn->is_rule = fr; 679 ips.ips_is.is_rule = fr; 680 MUTEX_NUKE(&fr->fr_lock); 681 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 682 683 /* 684 * Look up all the interface names in the rule. 685 */ 686 for (i = 0; i < 4; i++) { 687 name = fr->fr_ifnames[i]; 688 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 689 name = isn->is_ifname[i]; 690 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 691 } 692 693 fr->fr_ref = 0; 694 fr->fr_dsize = 0; 695 fr->fr_data = NULL; 696 fr->fr_type = FR_T_NONE; 697 698 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 699 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 700 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 701 702 /* 703 * send a copy back to userland of what we ended up 704 * to allow for verification. 705 */ 706 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 707 if (error) { 708 KFREE(isn); 709 MUTEX_DESTROY(&fr->fr_lock); 710 KFREE(fr); 711 return EFAULT; 712 } 713 READ_ENTER(&ifs->ifs_ipf_state); 714 fr_stinsert(isn, 0, ifs); 715 MUTEX_EXIT(&isn->is_lock); 716 RWLOCK_EXIT(&ifs->ifs_ipf_state); 717 718 } else { 719 READ_ENTER(&ifs->ifs_ipf_state); 720 for (is = ifs->ifs_ips_list; is; is = is->is_next) 721 if (is->is_rule == fr) { 722 fr_stinsert(isn, 0, ifs); 723 MUTEX_EXIT(&isn->is_lock); 724 break; 725 } 726 727 if (is == NULL) { 728 KFREE(isn); 729 isn = NULL; 730 } 731 RWLOCK_EXIT(&ifs->ifs_ipf_state); 732 733 return (isn == NULL) ? ESRCH : 0; 734 } 735 736 return 0; 737 } 738 739 740 /* ------------------------------------------------------------------------ */ 741 /* Function: fr_stinsert */ 742 /* Returns: Nil */ 743 /* Parameters: is(I) - pointer to state structure */ 744 /* rev(I) - flag indicating forward/reverse direction of packet */ 745 /* */ 746 /* Inserts a state structure into the hash table (for lookups) and the list */ 747 /* of state entries (for enumeration). Resolves all of the interface names */ 748 /* to pointers and adjusts running stats for the hash table as appropriate. */ 749 /* */ 750 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 751 /* Exits with is_lock initialised and held. */ 752 /* ------------------------------------------------------------------------ */ 753 void fr_stinsert(is, rev, ifs) 754 ipstate_t *is; 755 int rev; 756 ipf_stack_t *ifs; 757 { 758 frentry_t *fr; 759 u_int hv; 760 int i; 761 762 MUTEX_INIT(&is->is_lock, "ipf state entry"); 763 764 fr = is->is_rule; 765 if (fr != NULL) { 766 MUTEX_ENTER(&fr->fr_lock); 767 fr->fr_ref++; 768 fr->fr_statecnt++; 769 MUTEX_EXIT(&fr->fr_lock); 770 } 771 772 /* 773 * Look up all the interface names in the state entry. 774 */ 775 for (i = 0; i < 4; i++) { 776 if (is->is_ifp[i] != NULL) 777 continue; 778 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 779 } 780 781 /* 782 * If we could trust is_hv, then the modulous would not be needed, but 783 * when running with IPFILTER_SYNC, this stops bad values. 784 */ 785 hv = is->is_hv % ifs->ifs_fr_statesize; 786 is->is_hv = hv; 787 788 /* 789 * We need to get both of these locks...the first because it is 790 * possible that once the insert is complete another packet might 791 * come along, match the entry and want to update it. 792 */ 793 MUTEX_ENTER(&is->is_lock); 794 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 795 796 /* 797 * add into list table. 798 */ 799 if (ifs->ifs_ips_list != NULL) 800 ifs->ifs_ips_list->is_pnext = &is->is_next; 801 is->is_pnext = &ifs->ifs_ips_list; 802 is->is_next = ifs->ifs_ips_list; 803 ifs->ifs_ips_list = is; 804 805 if (ifs->ifs_ips_table[hv] != NULL) 806 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 807 else 808 ifs->ifs_ips_stats.iss_inuse++; 809 is->is_phnext = ifs->ifs_ips_table + hv; 810 is->is_hnext = ifs->ifs_ips_table[hv]; 811 ifs->ifs_ips_table[hv] = is; 812 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 813 ifs->ifs_ips_num++; 814 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 815 816 fr_setstatequeue(is, rev, ifs); 817 } 818 819 /* ------------------------------------------------------------------------ */ 820 /* Function: fr_match_ipv4addrs */ 821 /* Returns: int - 2 strong match (same addresses, same direction) */ 822 /* 1 weak match (same address, opposite direction) */ 823 /* 0 no match */ 824 /* */ 825 /* Function matches IPv4 addresses. */ 826 /* ------------------------------------------------------------------------ */ 827 static int fr_match_ipv4addrs(is1, is2) 828 ipstate_t *is1; 829 ipstate_t *is2; 830 { 831 int rv; 832 833 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 834 rv = 2; 835 else if (is1->is_saddr == is2->is_daddr && 836 is1->is_daddr == is2->is_saddr) 837 rv = 1; 838 else 839 rv = 0; 840 841 return (rv); 842 } 843 844 /* ------------------------------------------------------------------------ */ 845 /* Function: fr_match_ipv6addrs */ 846 /* Returns: int - 2 strong match (same addresses, same direction) */ 847 /* 1 weak match (same addresses, opposite direction) */ 848 /* 0 no match */ 849 /* */ 850 /* Function matches IPv6 addresses. */ 851 /* ------------------------------------------------------------------------ */ 852 static int fr_match_ipv6addrs(is1, is2) 853 ipstate_t *is1; 854 ipstate_t *is2; 855 { 856 int rv; 857 858 if (IP6_EQ(&is1->is_src, &is2->is_src) && 859 IP6_EQ(&is1->is_dst, &is2->is_dst)) 860 rv = 2; 861 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 862 IP6_EQ(&is1->is_dst, &is2->is_src)) { 863 rv = 1; 864 } 865 else 866 rv = 0; 867 868 return (rv); 869 } 870 /* ------------------------------------------------------------------------ */ 871 /* Function: fr_match_addresses */ 872 /* Returns: int - 2 strong match (same addresses, same direction) */ 873 /* 1 weak match (same address, opposite directions) */ 874 /* 0 no match */ 875 /* Parameters: is1, is2 pointers to states we are checking */ 876 /* */ 877 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 878 /* and IPv6 address format. */ 879 /* ------------------------------------------------------------------------ */ 880 static int fr_match_addresses(is1, is2) 881 ipstate_t *is1; 882 ipstate_t *is2; 883 { 884 int rv; 885 886 if (is1->is_v == 4) { 887 rv = fr_match_ipv4addrs(is1, is2); 888 } 889 else { 890 rv = fr_match_ipv6addrs(is1, is2); 891 } 892 893 return (rv); 894 } 895 896 /* ------------------------------------------------------------------------ */ 897 /* Function: fr_match_ppairs */ 898 /* Returns: int - 2 strong match (same ports, same direction) */ 899 /* 1 weak match (same ports, different direction) */ 900 /* 0 no match */ 901 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 902 /* */ 903 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 904 /* src, dst port, which belong to session (state entry). */ 905 /* ------------------------------------------------------------------------ */ 906 static int fr_match_ppairs(ppairs1, ppairs2) 907 port_pair_t *ppairs1; 908 port_pair_t *ppairs2; 909 { 910 int rv; 911 912 if (ppairs1->pp_sport == ppairs2->pp_sport && 913 ppairs1->pp_dport == ppairs2->pp_dport) 914 rv = 2; 915 else if (ppairs1->pp_sport == ppairs2->pp_dport && 916 ppairs1->pp_dport == ppairs2->pp_sport) 917 rv = 1; 918 else 919 rv = 0; 920 921 return (rv); 922 } 923 924 /* ------------------------------------------------------------------------ */ 925 /* Function: fr_match_l4_hdr */ 926 /* Returns: int - 0 no match, */ 927 /* 1 weak match (same ports, different directions) */ 928 /* 2 strong match (same ports, same direction) */ 929 /* Parameters is1, is2 - states we want to match */ 930 /* */ 931 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 932 /* GRE protocol). */ 933 /* ------------------------------------------------------------------------ */ 934 static int fr_match_l4_hdr(is1, is2) 935 ipstate_t *is1; 936 ipstate_t *is2; 937 { 938 int rv = 0; 939 port_pair_t pp1; 940 port_pair_t pp2; 941 942 if (is1->is_p != is2->is_p) 943 return (0); 944 945 switch (is1->is_p) { 946 case IPPROTO_TCP: 947 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 948 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 949 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 950 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 951 rv = fr_match_ppairs(&pp1, &pp2); 952 break; 953 case IPPROTO_UDP: 954 pp1.pp_sport = is1->is_ps.is_us.us_sport; 955 pp1.pp_dport = is1->is_ps.is_us.us_dport; 956 pp2.pp_sport = is2->is_ps.is_us.us_sport; 957 pp2.pp_dport = is2->is_ps.is_us.us_dport; 958 rv = fr_match_ppairs(&pp1, &pp2); 959 break; 960 case IPPROTO_GRE: 961 /* greinfo_t can be also interprted as port pair */ 962 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 963 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 964 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 965 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 966 rv = fr_match_ppairs(&pp1, &pp2); 967 break; 968 case IPPROTO_ICMP: 969 case IPPROTO_ICMPV6: 970 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof(icmpinfo_t))) 971 rv = 1; 972 else 973 rv = 0; 974 break; 975 default: 976 rv = 0; 977 } 978 979 return (rv); 980 } 981 982 /* ------------------------------------------------------------------------ */ 983 /* Function: fr_matchstates */ 984 /* Returns: int - nonzero match, zero no match */ 985 /* Parameters is1, is2 - states we want to match */ 986 /* */ 987 /* The state entries are equal (identical match) if they belong to the same */ 988 /* session. Any time new state entry is being added the fr_addstate() */ 989 /* function creates temporal state entry from the data it gets from IP and */ 990 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 991 /* which is also stored within the state entry. We should keep in mind the */ 992 /* information about packet direction is spread accross L3 (addresses) and */ 993 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 994 /* - no match (match(is1, is2) == 0)) */ 995 /* - weak match same addresses (ports), but different */ 996 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 997 /* - strong match same addresses (ports) and same directions */ 998 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 999 /* */ 1000 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1001 /* and functions, which are used to compare ports (L4 header) data. We say */ 1002 /* the is1 and is2 are same (identical) if there is a match */ 1003 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1004 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1005 /* Such requirement deals with case as follows: */ 1006 /* suppose there are two connections between hosts A, B. Connection 1: */ 1007 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1008 /* Connection 2: */ 1009 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1010 /* since we've introduced match levels into our fr_matchstates(), we are */ 1011 /* able to identify, which packets belong to connection A and which belong */ 1012 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1013 /* from con. 1 packet, which travelled from A to B: */ 1014 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1015 /* while s2, has been created from packet which belongs to con. 2 and is */ 1016 /* also coming from A to B: */ 1017 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1018 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1019 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1020 /* different the state entries are not identical -> no match as a final */ 1021 /* result. */ 1022 /* ------------------------------------------------------------------------ */ 1023 static int fr_matchstates(is1, is2) 1024 ipstate_t *is1; 1025 ipstate_t *is2; 1026 { 1027 int rv; 1028 int amatch; 1029 int pmatch; 1030 1031 if (bcmp(&is1->is_pass, &is2->is_pass, 1032 offsetof(struct ipstate, is_ps) - 1033 offsetof(struct ipstate, is_pass)) == 0) { 1034 1035 pmatch = fr_match_l4_hdr(is1, is2); 1036 amatch = fr_match_addresses(is1, is2); 1037 /* 1038 * If addresses match (amatch != 0), then 'match levels' 1039 * must be same for matching entries. If amatch and pmatch 1040 * have different values (different match levels), then 1041 * is1 and is2 belong to different sessions. 1042 */ 1043 rv = (amatch != 0) && (amatch == pmatch); 1044 } 1045 else 1046 rv = 0; 1047 1048 return (rv); 1049 } 1050 1051 /* ------------------------------------------------------------------------ */ 1052 /* Function: fr_addstate */ 1053 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1054 /* Parameters: fin(I) - pointer to packet information */ 1055 /* stsave(O) - pointer to place to save pointer to created */ 1056 /* state structure. */ 1057 /* flags(I) - flags to use when creating the structure */ 1058 /* */ 1059 /* Creates a new IP state structure from the packet information collected. */ 1060 /* Inserts it into the state table and appends to the bottom of the active */ 1061 /* list. If the capacity of the table has reached the maximum allowed then */ 1062 /* the call will fail and a flush is scheduled for the next timeout call. */ 1063 /* ------------------------------------------------------------------------ */ 1064 ipstate_t *fr_addstate(fin, stsave, flags) 1065 fr_info_t *fin; 1066 ipstate_t **stsave; 1067 u_int flags; 1068 { 1069 ipstate_t *is, ips; 1070 struct icmp *ic; 1071 u_int pass, hv; 1072 frentry_t *fr; 1073 tcphdr_t *tcp; 1074 grehdr_t *gre; 1075 void *ifp; 1076 int out; 1077 ipf_stack_t *ifs = fin->fin_ifs; 1078 1079 if (ifs->ifs_fr_state_lock || 1080 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1081 return NULL; 1082 1083 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1084 return NULL; 1085 1086 /* 1087 * If a "keep state" rule has reached the maximum number of references 1088 * to it, then schedule an automatic flush in case we can clear out 1089 * some "dead old wood". Note that because the lock isn't held on 1090 * fr it is possible that we could overflow. The cost of overflowing 1091 * is being ignored here as the number by which it can overflow is 1092 * a product of the number of simultaneous threads that could be 1093 * executing in here, so a limit of 100 won't result in 200, but could 1094 * result in 101 or 102. 1095 */ 1096 fr = fin->fin_fr; 1097 if (fr != NULL) { 1098 if ((ifs->ifs_ips_num == ifs->ifs_fr_statemax) && (fr->fr_statemax == 0)) { 1099 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1100 ifs->ifs_fr_state_doflush = 1; 1101 return NULL; 1102 } 1103 if ((fr->fr_statemax != 0) && 1104 (fr->fr_statecnt >= fr->fr_statemax)) { 1105 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1106 ifs->ifs_fr_state_doflush = 1; 1107 return NULL; 1108 } 1109 } 1110 1111 ic = NULL; 1112 tcp = NULL; 1113 out = fin->fin_out; 1114 is = &ips; 1115 bzero((char *)is, sizeof(*is)); 1116 1117 if (fr == NULL) { 1118 pass = ifs->ifs_fr_flags; 1119 is->is_tag = FR_NOLOGTAG; 1120 } 1121 else { 1122 pass = fr->fr_flags; 1123 } 1124 1125 is->is_die = 1 + ifs->ifs_fr_ticks; 1126 /* 1127 * We want to check everything that is a property of this packet, 1128 * but we don't (automatically) care about it's fragment status as 1129 * this may change. 1130 */ 1131 is->is_pass = pass; 1132 is->is_v = fin->fin_v; 1133 is->is_opt[0] = fin->fin_optmsk; 1134 is->is_optmsk[0] = 0xffffffff; 1135 is->is_optmsk[1] = 0xffffffff; 1136 if (is->is_v == 6) { 1137 is->is_opt[0] &= ~0x8; 1138 is->is_optmsk[0] &= ~0x8; 1139 is->is_optmsk[1] &= ~0x8; 1140 } 1141 is->is_sec = fin->fin_secmsk; 1142 is->is_secmsk = 0xffff; 1143 is->is_auth = fin->fin_auth; 1144 is->is_authmsk = 0xffff; 1145 1146 /* 1147 * Copy and calculate... 1148 */ 1149 hv = (is->is_p = fin->fin_fi.fi_p); 1150 is->is_src = fin->fin_fi.fi_src; 1151 hv += is->is_saddr; 1152 is->is_dst = fin->fin_fi.fi_dst; 1153 hv += is->is_daddr; 1154 #ifdef USE_INET6 1155 if (fin->fin_v == 6) { 1156 /* 1157 * For ICMPv6, we check to see if the destination address is 1158 * a multicast address. If it is, do not include it in the 1159 * calculation of the hash because the correct reply will come 1160 * back from a real address, not a multicast address. 1161 */ 1162 if ((is->is_p == IPPROTO_ICMPV6) && 1163 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1164 /* 1165 * So you can do keep state with neighbour discovery. 1166 * 1167 * Here we could use the address from the neighbour 1168 * solicit message to put in the state structure and 1169 * we could use that without a wildcard flag too... 1170 */ 1171 is->is_flags |= SI_W_DADDR; 1172 hv -= is->is_daddr; 1173 } else { 1174 hv += is->is_dst.i6[1]; 1175 hv += is->is_dst.i6[2]; 1176 hv += is->is_dst.i6[3]; 1177 } 1178 hv += is->is_src.i6[1]; 1179 hv += is->is_src.i6[2]; 1180 hv += is->is_src.i6[3]; 1181 } 1182 #endif 1183 1184 switch (is->is_p) 1185 { 1186 #ifdef USE_INET6 1187 case IPPROTO_ICMPV6 : 1188 ic = fin->fin_dp; 1189 1190 switch (ic->icmp_type) 1191 { 1192 case ICMP6_ECHO_REQUEST : 1193 is->is_icmp.ici_type = ic->icmp_type; 1194 hv += (is->is_icmp.ici_id = ic->icmp_id); 1195 break; 1196 case ICMP6_MEMBERSHIP_QUERY : 1197 case ND_ROUTER_SOLICIT : 1198 case ND_NEIGHBOR_SOLICIT : 1199 case ICMP6_NI_QUERY : 1200 is->is_icmp.ici_type = ic->icmp_type; 1201 break; 1202 default : 1203 return NULL; 1204 } 1205 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1206 break; 1207 #endif 1208 case IPPROTO_ICMP : 1209 ic = fin->fin_dp; 1210 1211 switch (ic->icmp_type) 1212 { 1213 case ICMP_ECHO : 1214 case ICMP_TSTAMP : 1215 case ICMP_IREQ : 1216 case ICMP_MASKREQ : 1217 is->is_icmp.ici_type = ic->icmp_type; 1218 hv += (is->is_icmp.ici_id = ic->icmp_id); 1219 break; 1220 default : 1221 return NULL; 1222 } 1223 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1224 break; 1225 1226 case IPPROTO_GRE : 1227 gre = fin->fin_dp; 1228 1229 is->is_gre.gs_flags = gre->gr_flags; 1230 is->is_gre.gs_ptype = gre->gr_ptype; 1231 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1232 is->is_call[0] = fin->fin_data[0]; 1233 is->is_call[1] = fin->fin_data[1]; 1234 } 1235 break; 1236 1237 case IPPROTO_TCP : 1238 tcp = fin->fin_dp; 1239 1240 if (tcp->th_flags & TH_RST) 1241 return NULL; 1242 /* 1243 * The endian of the ports doesn't matter, but the ack and 1244 * sequence numbers do as we do mathematics on them later. 1245 */ 1246 is->is_sport = htons(fin->fin_data[0]); 1247 is->is_dport = htons(fin->fin_data[1]); 1248 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1249 hv += is->is_sport; 1250 hv += is->is_dport; 1251 } 1252 1253 /* 1254 * If this is a real packet then initialise fields in the 1255 * state information structure from the TCP header information. 1256 */ 1257 1258 is->is_maxdwin = 1; 1259 is->is_maxswin = ntohs(tcp->th_win); 1260 if (is->is_maxswin == 0) 1261 is->is_maxswin = 1; 1262 1263 if ((fin->fin_flx & FI_IGNORE) == 0) { 1264 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1265 (TCP_OFF(tcp) << 2) + 1266 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1267 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1268 is->is_maxsend = is->is_send; 1269 1270 /* 1271 * Window scale option is only present in 1272 * SYN/SYN-ACK packet. 1273 */ 1274 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1275 TH_SYN && 1276 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1277 if (fr_tcpoptions(fin, tcp, 1278 &is->is_tcp.ts_data[0]) == -1) { 1279 fin->fin_flx |= FI_BAD; 1280 } 1281 } 1282 1283 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1284 fr_checknewisn(fin, is); 1285 fr_fixoutisn(fin, is); 1286 } 1287 1288 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1289 flags |= IS_TCPFSM; 1290 else { 1291 is->is_maxdwin = is->is_maxswin * 2; 1292 is->is_dend = ntohl(tcp->th_ack); 1293 is->is_maxdend = ntohl(tcp->th_ack); 1294 is->is_maxdwin *= 2; 1295 } 1296 } 1297 1298 /* 1299 * If we're creating state for a starting connection, start the 1300 * timer on it as we'll never see an error if it fails to 1301 * connect. 1302 */ 1303 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1304 break; 1305 1306 case IPPROTO_UDP : 1307 tcp = fin->fin_dp; 1308 1309 is->is_sport = htons(fin->fin_data[0]); 1310 is->is_dport = htons(fin->fin_data[1]); 1311 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1312 hv += tcp->th_dport; 1313 hv += tcp->th_sport; 1314 } 1315 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1316 break; 1317 1318 default : 1319 break; 1320 } 1321 hv = DOUBLE_HASH(hv, ifs); 1322 is->is_hv = hv; 1323 is->is_rule = fr; 1324 is->is_flags = flags & IS_INHERITED; 1325 1326 /* 1327 * Look for identical state. 1328 */ 1329 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1330 is != NULL; 1331 is = is->is_hnext) { 1332 if (fr_matchstates(&ips, is) == 1) 1333 break; 1334 } 1335 1336 /* 1337 * we've found a matching state -> state already exists, 1338 * we are not going to add a duplicate record. 1339 */ 1340 if (is != NULL) 1341 return NULL; 1342 1343 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1344 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1345 return NULL; 1346 } 1347 KMALLOC(is, ipstate_t *); 1348 if (is == NULL) { 1349 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1350 return NULL; 1351 } 1352 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1353 /* 1354 * Do not do the modulous here, it is done in fr_stinsert(). 1355 */ 1356 if (fr != NULL) { 1357 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1358 if (fr->fr_age[0] != 0) { 1359 is->is_tqehead[0] = 1360 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1361 fr->fr_age[0], ifs); 1362 is->is_sti.tqe_flags |= TQE_RULEBASED; 1363 } 1364 if (fr->fr_age[1] != 0) { 1365 is->is_tqehead[1] = 1366 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1367 fr->fr_age[1], ifs); 1368 is->is_sti.tqe_flags |= TQE_RULEBASED; 1369 } 1370 is->is_tag = fr->fr_logtag; 1371 1372 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1373 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1374 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1375 1376 if (((ifp = fr->fr_ifas[1]) != NULL) && 1377 (ifp != (void *)-1)) { 1378 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1379 } 1380 if (((ifp = fr->fr_ifas[2]) != NULL) && 1381 (ifp != (void *)-1)) { 1382 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1383 } 1384 if (((ifp = fr->fr_ifas[3]) != NULL) && 1385 (ifp != (void *)-1)) { 1386 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1387 } 1388 } 1389 1390 is->is_ifp[out << 1] = fin->fin_ifp; 1391 if (fin->fin_ifp != NULL) { 1392 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fr->fr_v); 1393 } 1394 1395 /* 1396 * It may seem strange to set is_ref to 2, but fr_check() will call 1397 * fr_statederef() after calling fr_addstate() and the idea is to 1398 * have it exist at the end of fr_check() with is_ref == 1. 1399 */ 1400 is->is_ref = 2; 1401 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1402 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1403 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1404 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1405 if ((fin->fin_flx & FI_IGNORE) == 0) { 1406 is->is_pkts[out] = 1; 1407 is->is_bytes[out] = fin->fin_plen; 1408 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1409 is->is_flx[out][0] &= ~FI_OOW; 1410 } 1411 1412 if (pass & FR_STSTRICT) 1413 is->is_flags |= IS_STRICT; 1414 1415 if (pass & FR_STATESYNC) 1416 is->is_flags |= IS_STATESYNC; 1417 1418 if (flags & (SI_WILDP|SI_WILDA)) { 1419 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1420 } 1421 is->is_rulen = fin->fin_rule; 1422 1423 1424 if (pass & FR_LOGFIRST) 1425 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1426 1427 READ_ENTER(&ifs->ifs_ipf_state); 1428 is->is_me = stsave; 1429 1430 fr_stinsert(is, fin->fin_rev, ifs); 1431 1432 if (fin->fin_p == IPPROTO_TCP) { 1433 /* 1434 * If we're creating state for a starting connection, start the 1435 * timer on it as we'll never see an error if it fails to 1436 * connect. 1437 */ 1438 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1439 is->is_flags); 1440 MUTEX_EXIT(&is->is_lock); 1441 #ifdef IPFILTER_SCAN 1442 if ((is->is_flags & SI_CLONE) == 0) 1443 (void) ipsc_attachis(is); 1444 #endif 1445 } else { 1446 MUTEX_EXIT(&is->is_lock); 1447 } 1448 #ifdef IPFILTER_SYNC 1449 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1450 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1451 #endif 1452 if (ifs->ifs_ipstate_logging) 1453 ipstate_log(is, ISL_NEW, ifs); 1454 1455 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1456 fin->fin_state = is; 1457 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1458 fin->fin_flx |= FI_STATE; 1459 if (fin->fin_flx & FI_FRAG) 1460 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1461 1462 return is; 1463 } 1464 1465 1466 /* ------------------------------------------------------------------------ */ 1467 /* Function: fr_tcpoptions */ 1468 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1469 /* Parameters: fin(I) - pointer to packet information */ 1470 /* tcp(I) - pointer to TCP packet header */ 1471 /* td(I) - pointer to TCP data held as part of the state */ 1472 /* */ 1473 /* Look after the TCP header for any options and deal with those that are */ 1474 /* present. Record details about those that we recogise. */ 1475 /* ------------------------------------------------------------------------ */ 1476 static int fr_tcpoptions(fin, tcp, td) 1477 fr_info_t *fin; 1478 tcphdr_t *tcp; 1479 tcpdata_t *td; 1480 { 1481 int off, mlen, ol, i, len, retval; 1482 char buf[64], *s, opt; 1483 mb_t *m = NULL; 1484 1485 len = (TCP_OFF(tcp) << 2); 1486 if (fin->fin_dlen < len) 1487 return 0; 1488 len -= sizeof(*tcp); 1489 1490 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1491 1492 m = fin->fin_m; 1493 mlen = MSGDSIZE(m) - off; 1494 if (len > mlen) { 1495 len = mlen; 1496 retval = 0; 1497 } else { 1498 retval = 1; 1499 } 1500 1501 COPYDATA(m, off, len, buf); 1502 1503 for (s = buf; len > 0; ) { 1504 opt = *s; 1505 if (opt == TCPOPT_EOL) 1506 break; 1507 else if (opt == TCPOPT_NOP) 1508 ol = 1; 1509 else { 1510 if (len < 2) 1511 break; 1512 ol = (int)*(s + 1); 1513 if (ol < 2 || ol > len) 1514 break; 1515 1516 /* 1517 * Extract the TCP options we are interested in out of 1518 * the header and store them in the the tcpdata struct. 1519 */ 1520 switch (opt) 1521 { 1522 case TCPOPT_WINDOW : 1523 if (ol == TCPOLEN_WINDOW) { 1524 i = (int)*(s + 2); 1525 if (i > TCP_WSCALE_MAX) 1526 i = TCP_WSCALE_MAX; 1527 else if (i < 0) 1528 i = 0; 1529 td->td_winscale = i; 1530 td->td_winflags |= TCP_WSCALE_SEEN| 1531 TCP_WSCALE_FIRST; 1532 } else 1533 retval = -1; 1534 break; 1535 case TCPOPT_MAXSEG : 1536 /* 1537 * So, if we wanted to set the TCP MAXSEG, 1538 * it should be done here... 1539 */ 1540 if (ol == TCPOLEN_MAXSEG) { 1541 i = (int)*(s + 2); 1542 i <<= 8; 1543 i += (int)*(s + 3); 1544 td->td_maxseg = i; 1545 } else 1546 retval = -1; 1547 break; 1548 case TCPOPT_SACK_PERMITTED : 1549 if (ol == TCPOLEN_SACK_PERMITTED) 1550 td->td_winflags |= TCP_SACK_PERMIT; 1551 else 1552 retval = -1; 1553 break; 1554 } 1555 } 1556 len -= ol; 1557 s += ol; 1558 } 1559 return retval; 1560 } 1561 1562 1563 /* ------------------------------------------------------------------------ */ 1564 /* Function: fr_tcpstate */ 1565 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1566 /* Parameters: fin(I) - pointer to packet information */ 1567 /* tcp(I) - pointer to TCP packet header */ 1568 /* is(I) - pointer to master state structure */ 1569 /* */ 1570 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1571 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1572 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1573 /* ------------------------------------------------------------------------ */ 1574 static int fr_tcpstate(fin, tcp, is) 1575 fr_info_t *fin; 1576 tcphdr_t *tcp; 1577 ipstate_t *is; 1578 { 1579 int source, ret = 0, flags; 1580 tcpdata_t *fdata, *tdata; 1581 ipf_stack_t *ifs = fin->fin_ifs; 1582 1583 source = !fin->fin_rev; 1584 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1585 (ntohs(is->is_sport) != fin->fin_data[0])) 1586 source = 0; 1587 fdata = &is->is_tcp.ts_data[!source]; 1588 tdata = &is->is_tcp.ts_data[source]; 1589 1590 MUTEX_ENTER(&is->is_lock); 1591 1592 /* 1593 * If a SYN packet is received for a connection that is in a half 1594 * closed state, then move its state entry to deletetq. In such case 1595 * the SYN packet will be consequently dropped. This allows new state 1596 * entry to be created with a retransmited SYN packet. 1597 */ 1598 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1599 if (((is->is_state[source] > IPF_TCPS_ESTABLISHED) || 1600 (is->is_state[source] == IPF_TCPS_CLOSED)) && 1601 ((is->is_state[!source] > IPF_TCPS_ESTABLISHED) || 1602 (is->is_state[!source] == IPF_TCPS_CLOSED))) { 1603 /* 1604 * Do not update is->is_sti.tqe_die in case state entry 1605 * is already present in deletetq. It prevents state 1606 * entry ttl update by retransmitted SYN packets, which 1607 * may arrive before timer tick kicks off. The SYN 1608 * packet will be dropped again. 1609 */ 1610 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1611 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1612 &fin->fin_ifs->ifs_ips_deletetq, 1613 fin->fin_ifs); 1614 1615 MUTEX_EXIT(&is->is_lock); 1616 return 0; 1617 } 1618 } 1619 1620 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1621 #ifdef IPFILTER_SCAN 1622 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1623 ipsc_packet(fin, is); 1624 if (FR_ISBLOCK(is->is_pass)) { 1625 MUTEX_EXIT(&is->is_lock); 1626 return 1; 1627 } 1628 } 1629 #endif 1630 1631 /* 1632 * Nearing end of connection, start timeout. 1633 */ 1634 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1635 is->is_flags); 1636 if (ret == 0) { 1637 MUTEX_EXIT(&is->is_lock); 1638 return 0; 1639 } 1640 1641 /* 1642 * set s0's as appropriate. Use syn-ack packet as it 1643 * contains both pieces of required information. 1644 */ 1645 /* 1646 * Window scale option is only present in SYN/SYN-ACK packet. 1647 * Compare with ~TH_FIN to mask out T/TCP setups. 1648 */ 1649 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1650 if (flags == (TH_SYN|TH_ACK)) { 1651 is->is_s0[source] = ntohl(tcp->th_ack); 1652 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1653 if (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) { 1654 (void) fr_tcpoptions(fin, tcp, fdata); 1655 } 1656 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1657 fr_checknewisn(fin, is); 1658 } else if (flags == TH_SYN) { 1659 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1660 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1661 (void) fr_tcpoptions(fin, tcp, tdata); 1662 1663 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1664 fr_checknewisn(fin, is); 1665 1666 } 1667 ret = 1; 1668 } else 1669 fin->fin_flx |= FI_OOW; 1670 MUTEX_EXIT(&is->is_lock); 1671 return ret; 1672 } 1673 1674 1675 /* ------------------------------------------------------------------------ */ 1676 /* Function: fr_checknewisn */ 1677 /* Returns: Nil */ 1678 /* Parameters: fin(I) - pointer to packet information */ 1679 /* is(I) - pointer to master state structure */ 1680 /* */ 1681 /* Check to see if this TCP connection is expecting and needs a new */ 1682 /* sequence number for a particular direction of the connection. */ 1683 /* */ 1684 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1685 /* one ready. */ 1686 /* ------------------------------------------------------------------------ */ 1687 static void fr_checknewisn(fin, is) 1688 fr_info_t *fin; 1689 ipstate_t *is; 1690 { 1691 u_32_t sumd, old, new; 1692 tcphdr_t *tcp; 1693 int i; 1694 1695 i = fin->fin_rev; 1696 tcp = fin->fin_dp; 1697 1698 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1699 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1700 old = ntohl(tcp->th_seq); 1701 new = fr_newisn(fin); 1702 is->is_isninc[i] = new - old; 1703 CALC_SUMD(old, new, sumd); 1704 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1705 1706 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1707 } 1708 } 1709 1710 1711 /* ------------------------------------------------------------------------ */ 1712 /* Function: fr_tcpinwindow */ 1713 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1714 /* Parameters: fin(I) - pointer to packet information */ 1715 /* fdata(I) - pointer to tcp state informatio (forward) */ 1716 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1717 /* tcp(I) - pointer to TCP packet header */ 1718 /* */ 1719 /* Given a packet has matched addresses and ports, check to see if it is */ 1720 /* within the TCP data window. In a show of generosity, allow packets that */ 1721 /* are within the window space behind the current sequence # as well. */ 1722 /* ------------------------------------------------------------------------ */ 1723 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1724 fr_info_t *fin; 1725 tcpdata_t *fdata, *tdata; 1726 tcphdr_t *tcp; 1727 int flags; 1728 { 1729 tcp_seq seq, ack, end; 1730 int ackskew, tcpflags; 1731 u_32_t win, maxwin; 1732 int dsize, inseq; 1733 1734 /* 1735 * Find difference between last checked packet and this packet. 1736 */ 1737 tcpflags = tcp->th_flags; 1738 seq = ntohl(tcp->th_seq); 1739 ack = ntohl(tcp->th_ack); 1740 1741 if (tcpflags & TH_SYN) 1742 win = ntohs(tcp->th_win); 1743 else 1744 win = ntohs(tcp->th_win) << fdata->td_winscale; 1745 1746 /* 1747 * win 0 means the receiving endpoint has closed the window, because it 1748 * has not enough memory to receive data from sender. In such case we 1749 * are pretending window size to be 1 to let TCP probe data through. 1750 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1751 * state this accurately, so we have to allow 1 octet (win = 1) even if 1752 * the window is closed (win == 0). 1753 */ 1754 if (win == 0) 1755 win = 1; 1756 1757 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1758 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1759 1760 /* 1761 * if window scaling is present, the scaling is only allowed 1762 * for windows not in the first SYN packet. In that packet the 1763 * window is 65535 to specify the largest window possible 1764 * for receivers not implementing the window scale option. 1765 * Currently, we do not assume TTCP here. That means that 1766 * if we see a second packet from a host (after the initial 1767 * SYN), we can assume that the receiver of the SYN did 1768 * already send back the SYN/ACK (and thus that we know if 1769 * the receiver also does window scaling) 1770 */ 1771 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1772 fdata->td_maxwin = win; 1773 } 1774 1775 end = seq + dsize; 1776 1777 if ((fdata->td_end == 0) && 1778 (!(flags & IS_TCPFSM) || 1779 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1780 /* 1781 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1782 */ 1783 fdata->td_end = end - 1; 1784 fdata->td_maxwin = 1; 1785 fdata->td_maxend = end + win; 1786 } 1787 1788 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1789 ack = tdata->td_end; 1790 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1791 (ack == 0)) { 1792 /* gross hack to get around certain broken tcp stacks */ 1793 ack = tdata->td_end; 1794 } 1795 1796 maxwin = tdata->td_maxwin; 1797 ackskew = tdata->td_end - ack; 1798 1799 /* 1800 * Strict sequencing only allows in-order delivery. 1801 */ 1802 if ((flags & IS_STRICT) != 0) { 1803 if (seq != fdata->td_end) { 1804 return 0; 1805 } 1806 } 1807 1808 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1809 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1810 inseq = 0; 1811 if ( 1812 #if defined(_KERNEL) 1813 (SEQ_GE(fdata->td_maxend, end)) && 1814 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1815 #endif 1816 /* XXX what about big packets */ 1817 #define MAXACKWINDOW 66000 1818 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1819 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1820 inseq = 1; 1821 /* 1822 * Microsoft Windows will send the next packet to the right of the 1823 * window if SACK is in use. 1824 */ 1825 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1826 (fdata->td_winflags & TCP_SACK_PERMIT) && 1827 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1828 inseq = 1; 1829 /* 1830 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1831 * response to initial SYN packet, when there is no application 1832 * listeing to on a port, where the SYN packet has came to. 1833 */ 1834 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1835 (ackskew >= -1) && (ackskew <= 1)) { 1836 inseq = 1; 1837 } else if (!(flags & IS_TCPFSM)) { 1838 1839 if (!(fdata->td_winflags & 1840 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1841 /* 1842 * No TCPFSM and no window scaling, so make some 1843 * extra guesses. 1844 */ 1845 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1846 inseq = 1; 1847 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1848 inseq = 1; 1849 } 1850 } 1851 1852 if (inseq) { 1853 /* if ackskew < 0 then this should be due to fragmented 1854 * packets. There is no way to know the length of the 1855 * total packet in advance. 1856 * We do know the total length from the fragment cache though. 1857 * Note however that there might be more sessions with 1858 * exactly the same source and destination parameters in the 1859 * state cache (and source and destination is the only stuff 1860 * that is saved in the fragment cache). Note further that 1861 * some TCP connections in the state cache are hashed with 1862 * sport and dport as well which makes it not worthwhile to 1863 * look for them. 1864 * Thus, when ackskew is negative but still seems to belong 1865 * to this session, we bump up the destinations end value. 1866 */ 1867 if (ackskew < 0) 1868 tdata->td_end = ack; 1869 1870 /* update max window seen */ 1871 if (fdata->td_maxwin < win) 1872 fdata->td_maxwin = win; 1873 if (SEQ_GT(end, fdata->td_end)) 1874 fdata->td_end = end; 1875 if (SEQ_GE(ack + win, tdata->td_maxend)) 1876 tdata->td_maxend = ack + win; 1877 return 1; 1878 } 1879 fin->fin_flx |= FI_OOW; 1880 return 0; 1881 } 1882 1883 1884 /* ------------------------------------------------------------------------ */ 1885 /* Function: fr_stclone */ 1886 /* Returns: ipstate_t* - NULL == cloning failed, */ 1887 /* else pointer to new state structure */ 1888 /* Parameters: fin(I) - pointer to packet information */ 1889 /* tcp(I) - pointer to TCP/UDP header */ 1890 /* is(I) - pointer to master state structure */ 1891 /* */ 1892 /* Create a "duplcate" state table entry from the master. */ 1893 /* ------------------------------------------------------------------------ */ 1894 static ipstate_t *fr_stclone(fin, tcp, is) 1895 fr_info_t *fin; 1896 tcphdr_t *tcp; 1897 ipstate_t *is; 1898 { 1899 ipstate_t *clone; 1900 u_32_t send; 1901 ipf_stack_t *ifs = fin->fin_ifs; 1902 1903 if (ifs->ifs_ips_num == ifs->ifs_fr_statemax) { 1904 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1905 ifs->ifs_fr_state_doflush = 1; 1906 return NULL; 1907 } 1908 KMALLOC(clone, ipstate_t *); 1909 if (clone == NULL) 1910 return NULL; 1911 bcopy((char *)is, (char *)clone, sizeof(*clone)); 1912 1913 MUTEX_NUKE(&clone->is_lock); 1914 1915 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 1916 clone->is_state[0] = 0; 1917 clone->is_state[1] = 0; 1918 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1919 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1920 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1921 1922 if (fin->fin_rev == 1) { 1923 clone->is_dend = send; 1924 clone->is_maxdend = send; 1925 clone->is_send = 0; 1926 clone->is_maxswin = 1; 1927 clone->is_maxdwin = ntohs(tcp->th_win); 1928 if (clone->is_maxdwin == 0) 1929 clone->is_maxdwin = 1; 1930 } else { 1931 clone->is_send = send; 1932 clone->is_maxsend = send; 1933 clone->is_dend = 0; 1934 clone->is_maxdwin = 1; 1935 clone->is_maxswin = ntohs(tcp->th_win); 1936 if (clone->is_maxswin == 0) 1937 clone->is_maxswin = 1; 1938 } 1939 1940 clone->is_flags &= ~SI_CLONE; 1941 clone->is_flags |= SI_CLONED; 1942 fr_stinsert(clone, fin->fin_rev, ifs); 1943 clone->is_ref = 2; 1944 if (clone->is_p == IPPROTO_TCP) { 1945 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 1946 clone->is_flags); 1947 } 1948 MUTEX_EXIT(&clone->is_lock); 1949 #ifdef IPFILTER_SCAN 1950 (void) ipsc_attachis(is); 1951 #endif 1952 #ifdef IPFILTER_SYNC 1953 if (is->is_flags & IS_STATESYNC) 1954 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 1955 #endif 1956 return clone; 1957 } 1958 1959 1960 /* ------------------------------------------------------------------------ */ 1961 /* Function: fr_matchsrcdst */ 1962 /* Returns: Nil */ 1963 /* Parameters: fin(I) - pointer to packet information */ 1964 /* is(I) - pointer to state structure */ 1965 /* src(I) - pointer to source address */ 1966 /* dst(I) - pointer to destination address */ 1967 /* tcp(I) - pointer to TCP/UDP header */ 1968 /* */ 1969 /* Match a state table entry against an IP packet. The logic below is that */ 1970 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 1971 /* still 0 after the test. no match. */ 1972 /* ------------------------------------------------------------------------ */ 1973 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 1974 fr_info_t *fin; 1975 ipstate_t *is; 1976 i6addr_t *src, *dst; 1977 tcphdr_t *tcp; 1978 u_32_t cmask; 1979 { 1980 int ret = 0, rev, out, flags, flx = 0, idx; 1981 u_short sp, dp; 1982 u_32_t cflx; 1983 void *ifp; 1984 ipf_stack_t *ifs = fin->fin_ifs; 1985 1986 rev = IP6_NEQ(&is->is_dst, dst); 1987 ifp = fin->fin_ifp; 1988 out = fin->fin_out; 1989 flags = is->is_flags; 1990 sp = 0; 1991 dp = 0; 1992 1993 if (tcp != NULL) { 1994 sp = htons(fin->fin_sport); 1995 dp = ntohs(fin->fin_dport); 1996 } 1997 if (!rev) { 1998 if (tcp != NULL) { 1999 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2000 rev = 1; 2001 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2002 rev = 1; 2003 } 2004 } 2005 2006 idx = (out << 1) + rev; 2007 2008 /* 2009 * If the interface for this 'direction' is set, make sure it matches. 2010 * An interface name that is not set matches any, as does a name of *. 2011 */ 2012 if ((is->is_ifp[idx] == NULL && 2013 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2014 is->is_ifp[idx] == ifp) 2015 ret = 1; 2016 2017 if (ret == 0) 2018 return NULL; 2019 ret = 0; 2020 2021 /* 2022 * Match addresses and ports. 2023 */ 2024 if (rev == 0) { 2025 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2026 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2027 if (tcp) { 2028 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2029 (dp == is->is_dport || flags & SI_W_DPORT)) 2030 ret = 1; 2031 } else { 2032 ret = 1; 2033 } 2034 } 2035 } else { 2036 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2037 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2038 if (tcp) { 2039 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2040 (sp == is->is_dport || flags & SI_W_DPORT)) 2041 ret = 1; 2042 } else { 2043 ret = 1; 2044 } 2045 } 2046 } 2047 2048 if (ret == 0) 2049 return NULL; 2050 2051 /* 2052 * Whether or not this should be here, is questionable, but the aim 2053 * is to get this out of the main line. 2054 */ 2055 if (tcp == NULL) 2056 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2057 2058 /* 2059 * Only one of the source or destination address can be flaged as a 2060 * wildcard. Fill in the missing address, if set. 2061 * For IPv6, if the address being copied in is multicast, then 2062 * don't reset the wild flag - multicast causes it to be set in the 2063 * first place! 2064 */ 2065 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2066 fr_ip_t *fi = &fin->fin_fi; 2067 2068 if ((flags & SI_W_SADDR) != 0) { 2069 if (rev == 0) { 2070 #ifdef USE_INET6 2071 if (is->is_v == 6 && 2072 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2073 /*EMPTY*/; 2074 else 2075 #endif 2076 { 2077 is->is_src = fi->fi_src; 2078 is->is_flags &= ~SI_W_SADDR; 2079 } 2080 } else { 2081 #ifdef USE_INET6 2082 if (is->is_v == 6 && 2083 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2084 /*EMPTY*/; 2085 else 2086 #endif 2087 { 2088 is->is_src = fi->fi_dst; 2089 is->is_flags &= ~SI_W_SADDR; 2090 } 2091 } 2092 } else if ((flags & SI_W_DADDR) != 0) { 2093 if (rev == 0) { 2094 #ifdef USE_INET6 2095 if (is->is_v == 6 && 2096 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2097 /*EMPTY*/; 2098 else 2099 #endif 2100 { 2101 is->is_dst = fi->fi_dst; 2102 is->is_flags &= ~SI_W_DADDR; 2103 } 2104 } else { 2105 #ifdef USE_INET6 2106 if (is->is_v == 6 && 2107 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2108 /*EMPTY*/; 2109 else 2110 #endif 2111 { 2112 is->is_dst = fi->fi_src; 2113 is->is_flags &= ~SI_W_DADDR; 2114 } 2115 } 2116 } 2117 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2118 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2119 } 2120 } 2121 2122 flx = fin->fin_flx & cmask; 2123 cflx = is->is_flx[out][rev]; 2124 2125 /* 2126 * Match up any flags set from IP options. 2127 */ 2128 if ((cflx && (flx != (cflx & cmask))) || 2129 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2130 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2131 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) 2132 return NULL; 2133 2134 /* 2135 * Only one of the source or destination port can be flagged as a 2136 * wildcard. When filling it in, fill in a copy of the matched entry 2137 * if it has the cloning flag set. 2138 */ 2139 if ((fin->fin_flx & FI_IGNORE) != 0) { 2140 fin->fin_rev = rev; 2141 return is; 2142 } 2143 2144 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2145 if ((flags & SI_CLONE) != 0) { 2146 ipstate_t *clone; 2147 2148 clone = fr_stclone(fin, tcp, is); 2149 if (clone == NULL) 2150 return NULL; 2151 is = clone; 2152 } else { 2153 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2154 } 2155 2156 if ((flags & SI_W_SPORT) != 0) { 2157 if (rev == 0) { 2158 is->is_sport = sp; 2159 is->is_send = ntohl(tcp->th_seq); 2160 } else { 2161 is->is_sport = dp; 2162 is->is_send = ntohl(tcp->th_ack); 2163 } 2164 is->is_maxsend = is->is_send + 1; 2165 } else if ((flags & SI_W_DPORT) != 0) { 2166 if (rev == 0) { 2167 is->is_dport = dp; 2168 is->is_dend = ntohl(tcp->th_ack); 2169 } else { 2170 is->is_dport = sp; 2171 is->is_dend = ntohl(tcp->th_seq); 2172 } 2173 is->is_maxdend = is->is_dend + 1; 2174 } 2175 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2176 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2177 ipstate_log(is, ISL_CLONE, ifs); 2178 } 2179 2180 ret = -1; 2181 2182 if (is->is_flx[out][rev] == 0) { 2183 is->is_flx[out][rev] = flx; 2184 is->is_opt[rev] = fin->fin_optmsk; 2185 if (is->is_v == 6) { 2186 is->is_opt[rev] &= ~0x8; 2187 is->is_optmsk[rev] &= ~0x8; 2188 } 2189 } 2190 2191 /* 2192 * Check if the interface name for this "direction" is set and if not, 2193 * fill it in. 2194 */ 2195 if (is->is_ifp[idx] == NULL && 2196 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2197 is->is_ifp[idx] = ifp; 2198 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2199 } 2200 fin->fin_rev = rev; 2201 return is; 2202 } 2203 2204 2205 /* ------------------------------------------------------------------------ */ 2206 /* Function: fr_checkicmpmatchingstate */ 2207 /* Returns: Nil */ 2208 /* Parameters: fin(I) - pointer to packet information */ 2209 /* */ 2210 /* If we've got an ICMP error message, using the information stored in the */ 2211 /* ICMP packet, look for a matching state table entry. */ 2212 /* */ 2213 /* If we return NULL then no lock on ipf_state is held. */ 2214 /* If we return non-null then a read-lock on ipf_state is held. */ 2215 /* ------------------------------------------------------------------------ */ 2216 static ipstate_t *fr_checkicmpmatchingstate(fin) 2217 fr_info_t *fin; 2218 { 2219 ipstate_t *is, **isp; 2220 u_short sport, dport; 2221 u_char pr; 2222 int backward, i, oi; 2223 i6addr_t dst, src; 2224 struct icmp *ic; 2225 u_short savelen; 2226 icmphdr_t *icmp; 2227 fr_info_t ofin; 2228 tcphdr_t *tcp; 2229 int len; 2230 ip_t *oip; 2231 u_int hv; 2232 ipf_stack_t *ifs = fin->fin_ifs; 2233 2234 /* 2235 * Does it at least have the return (basic) IP header ? 2236 * Is it an actual recognised ICMP error type? 2237 * Only a basic IP header (no options) should be with 2238 * an ICMP error header. 2239 */ 2240 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2241 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2242 !(fin->fin_flx & FI_ICMPERR)) 2243 return NULL; 2244 ic = fin->fin_dp; 2245 2246 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2247 /* 2248 * Check if the at least the old IP header (with options) and 2249 * 8 bytes of payload is present. 2250 */ 2251 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2252 return NULL; 2253 2254 /* 2255 * Sanity Checks. 2256 */ 2257 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2258 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2259 return NULL; 2260 2261 /* 2262 * Is the buffer big enough for all of it ? It's the size of the IP 2263 * header claimed in the encapsulated part which is of concern. It 2264 * may be too big to be in this buffer but not so big that it's 2265 * outside the ICMP packet, leading to TCP deref's causing problems. 2266 * This is possible because we don't know how big oip_hl is when we 2267 * do the pullup early in fr_check() and thus can't guarantee it is 2268 * all here now. 2269 */ 2270 #ifdef _KERNEL 2271 { 2272 mb_t *m; 2273 2274 m = fin->fin_m; 2275 # if defined(MENTAT) 2276 if ((char *)oip + len > (char *)m->b_wptr) 2277 return NULL; 2278 # else 2279 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2280 return NULL; 2281 # endif 2282 } 2283 #endif 2284 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2285 2286 /* 2287 * in the IPv4 case we must zero the i6addr union otherwise 2288 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2289 * of the 'junk' in the unused part of the union 2290 */ 2291 bzero((char *)&src, sizeof(src)); 2292 bzero((char *)&dst, sizeof(dst)); 2293 2294 /* 2295 * we make an fin entry to be able to feed it to 2296 * matchsrcdst note that not all fields are encessary 2297 * but this is the cleanest way. Note further we fill 2298 * in fin_mp such that if someone uses it we'll get 2299 * a kernel panic. fr_matchsrcdst does not use this. 2300 * 2301 * watch out here, as ip is in host order and oip in network 2302 * order. Any change we make must be undone afterwards, like 2303 * oip->ip_off - it is still in network byte order so fix it. 2304 */ 2305 savelen = oip->ip_len; 2306 oip->ip_len = len; 2307 oip->ip_off = ntohs(oip->ip_off); 2308 2309 ofin.fin_flx = FI_NOCKSUM; 2310 ofin.fin_v = 4; 2311 ofin.fin_ip = oip; 2312 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2313 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2314 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2315 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2316 ofin.fin_ifp = fin->fin_ifp; 2317 ofin.fin_out = !fin->fin_out; 2318 /* 2319 * Reset the short and bad flag here because in fr_matchsrcdst() 2320 * the flags for the current packet (fin_flx) are compared against 2321 * those for the existing session. 2322 */ 2323 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2324 2325 /* 2326 * Put old values of ip_len and ip_off back as we don't know 2327 * if we have to forward the packet (or process it again. 2328 */ 2329 oip->ip_len = savelen; 2330 oip->ip_off = htons(oip->ip_off); 2331 2332 switch (oip->ip_p) 2333 { 2334 case IPPROTO_ICMP : 2335 /* 2336 * an ICMP error can only be generated as a result of an 2337 * ICMP query, not as the response on an ICMP error 2338 * 2339 * XXX theoretically ICMP_ECHOREP and the other reply's are 2340 * ICMP query's as well, but adding them here seems strange XXX 2341 */ 2342 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2343 return NULL; 2344 2345 /* 2346 * perform a lookup of the ICMP packet in the state table 2347 */ 2348 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2349 hv = (pr = oip->ip_p); 2350 src.in4 = oip->ip_src; 2351 hv += src.in4.s_addr; 2352 dst.in4 = oip->ip_dst; 2353 hv += dst.in4.s_addr; 2354 hv += icmp->icmp_id; 2355 hv = DOUBLE_HASH(hv, ifs); 2356 2357 READ_ENTER(&ifs->ifs_ipf_state); 2358 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2359 isp = &is->is_hnext; 2360 if ((is->is_p != pr) || (is->is_v != 4)) 2361 continue; 2362 if (is->is_pass & FR_NOICMPERR) 2363 continue; 2364 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2365 NULL, FI_ICMPCMP); 2366 if (is != NULL) { 2367 if ((is->is_pass & FR_NOICMPERR) != 0) { 2368 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2369 return NULL; 2370 } 2371 /* 2372 * i : the index of this packet (the icmp 2373 * unreachable) 2374 * oi : the index of the original packet found 2375 * in the icmp header (i.e. the packet 2376 * causing this icmp) 2377 * backward : original packet was backward 2378 * compared to the state 2379 */ 2380 backward = IP6_NEQ(&is->is_src, &src); 2381 fin->fin_rev = !backward; 2382 i = (!backward << 1) + fin->fin_out; 2383 oi = (backward << 1) + ofin.fin_out; 2384 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2385 continue; 2386 ifs->ifs_ips_stats.iss_hits++; 2387 is->is_icmppkts[i]++; 2388 return is; 2389 } 2390 } 2391 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2392 return NULL; 2393 case IPPROTO_TCP : 2394 case IPPROTO_UDP : 2395 break; 2396 default : 2397 return NULL; 2398 } 2399 2400 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2401 dport = tcp->th_dport; 2402 sport = tcp->th_sport; 2403 2404 hv = (pr = oip->ip_p); 2405 src.in4 = oip->ip_src; 2406 hv += src.in4.s_addr; 2407 dst.in4 = oip->ip_dst; 2408 hv += dst.in4.s_addr; 2409 hv += dport; 2410 hv += sport; 2411 hv = DOUBLE_HASH(hv, ifs); 2412 2413 READ_ENTER(&ifs->ifs_ipf_state); 2414 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2415 isp = &is->is_hnext; 2416 /* 2417 * Only allow this icmp though if the 2418 * encapsulated packet was allowed through the 2419 * other way around. Note that the minimal amount 2420 * of info present does not allow for checking against 2421 * tcp internals such as seq and ack numbers. Only the 2422 * ports are known to be present and can be even if the 2423 * short flag is set. 2424 */ 2425 if ((is->is_p == pr) && (is->is_v == 4) && 2426 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2427 tcp, FI_ICMPCMP))) { 2428 /* 2429 * i : the index of this packet (the icmp unreachable) 2430 * oi : the index of the original packet found in the 2431 * icmp header (i.e. the packet causing this icmp) 2432 * backward : original packet was backward compared to 2433 * the state 2434 */ 2435 backward = IP6_NEQ(&is->is_src, &src); 2436 fin->fin_rev = !backward; 2437 i = (!backward << 1) + fin->fin_out; 2438 oi = (backward << 1) + ofin.fin_out; 2439 2440 if (((is->is_pass & FR_NOICMPERR) != 0) || 2441 (is->is_icmppkts[i] > is->is_pkts[oi])) 2442 break; 2443 ifs->ifs_ips_stats.iss_hits++; 2444 is->is_icmppkts[i]++; 2445 /* 2446 * we deliberately do not touch the timeouts 2447 * for the accompanying state table entry. 2448 * It remains to be seen if that is correct. XXX 2449 */ 2450 return is; 2451 } 2452 } 2453 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2454 return NULL; 2455 } 2456 2457 2458 /* ------------------------------------------------------------------------ */ 2459 /* Function: fr_ipsmove */ 2460 /* Returns: Nil */ 2461 /* Parameters: is(I) - pointer to state table entry */ 2462 /* hv(I) - new hash value for state table entry */ 2463 /* Write Locks: ipf_state */ 2464 /* */ 2465 /* Move a state entry from one position in the hash table to another. */ 2466 /* ------------------------------------------------------------------------ */ 2467 static void fr_ipsmove(is, hv, ifs) 2468 ipstate_t *is; 2469 u_int hv; 2470 ipf_stack_t *ifs; 2471 { 2472 ipstate_t **isp; 2473 u_int hvm; 2474 2475 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2476 2477 hvm = is->is_hv; 2478 /* 2479 * Remove the hash from the old location... 2480 */ 2481 isp = is->is_phnext; 2482 if (is->is_hnext) 2483 is->is_hnext->is_phnext = isp; 2484 *isp = is->is_hnext; 2485 if (ifs->ifs_ips_table[hvm] == NULL) 2486 ifs->ifs_ips_stats.iss_inuse--; 2487 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2488 2489 /* 2490 * ...and put the hash in the new one. 2491 */ 2492 hvm = DOUBLE_HASH(hv, ifs); 2493 is->is_hv = hvm; 2494 isp = &ifs->ifs_ips_table[hvm]; 2495 if (*isp) 2496 (*isp)->is_phnext = &is->is_hnext; 2497 else 2498 ifs->ifs_ips_stats.iss_inuse++; 2499 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2500 is->is_phnext = isp; 2501 is->is_hnext = *isp; 2502 *isp = is; 2503 } 2504 2505 2506 /* ------------------------------------------------------------------------ */ 2507 /* Function: fr_stlookup */ 2508 /* Returns: ipstate_t* - NULL == no matching state found, */ 2509 /* else pointer to state information is returned */ 2510 /* Parameters: fin(I) - pointer to packet information */ 2511 /* tcp(I) - pointer to TCP/UDP header. */ 2512 /* */ 2513 /* Search the state table for a matching entry to the packet described by */ 2514 /* the contents of *fin. */ 2515 /* */ 2516 /* If we return NULL then no lock on ipf_state is held. */ 2517 /* If we return non-null then a read-lock on ipf_state is held. */ 2518 /* ------------------------------------------------------------------------ */ 2519 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2520 fr_info_t *fin; 2521 tcphdr_t *tcp; 2522 ipftq_t **ifqp; 2523 { 2524 u_int hv, hvm, pr, v, tryagain; 2525 ipstate_t *is, **isp; 2526 u_short dport, sport; 2527 i6addr_t src, dst; 2528 struct icmp *ic; 2529 ipftq_t *ifq; 2530 int oow; 2531 ipf_stack_t *ifs = fin->fin_ifs; 2532 2533 is = NULL; 2534 ifq = NULL; 2535 tcp = fin->fin_dp; 2536 ic = (struct icmp *)tcp; 2537 hv = (pr = fin->fin_fi.fi_p); 2538 src = fin->fin_fi.fi_src; 2539 dst = fin->fin_fi.fi_dst; 2540 hv += src.in4.s_addr; 2541 hv += dst.in4.s_addr; 2542 2543 v = fin->fin_fi.fi_v; 2544 #ifdef USE_INET6 2545 if (v == 6) { 2546 hv += fin->fin_fi.fi_src.i6[1]; 2547 hv += fin->fin_fi.fi_src.i6[2]; 2548 hv += fin->fin_fi.fi_src.i6[3]; 2549 2550 if ((fin->fin_p == IPPROTO_ICMPV6) && 2551 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2552 hv -= dst.in4.s_addr; 2553 } else { 2554 hv += fin->fin_fi.fi_dst.i6[1]; 2555 hv += fin->fin_fi.fi_dst.i6[2]; 2556 hv += fin->fin_fi.fi_dst.i6[3]; 2557 } 2558 } 2559 #endif 2560 2561 /* 2562 * Search the hash table for matching packet header info. 2563 */ 2564 switch (pr) 2565 { 2566 #ifdef USE_INET6 2567 case IPPROTO_ICMPV6 : 2568 tryagain = 0; 2569 if (v == 6) { 2570 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2571 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2572 hv += ic->icmp_id; 2573 } 2574 } 2575 READ_ENTER(&ifs->ifs_ipf_state); 2576 icmp6again: 2577 hvm = DOUBLE_HASH(hv, ifs); 2578 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2579 isp = &is->is_hnext; 2580 if ((is->is_p != pr) || (is->is_v != v)) 2581 continue; 2582 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2583 if (is != NULL && 2584 fr_matchicmpqueryreply(v, &is->is_icmp, 2585 ic, fin->fin_rev)) { 2586 if (fin->fin_rev) 2587 ifq = &ifs->ifs_ips_icmpacktq; 2588 else 2589 ifq = &ifs->ifs_ips_icmptq; 2590 break; 2591 } 2592 } 2593 2594 if (is != NULL) { 2595 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2596 hv += fin->fin_fi.fi_src.i6[0]; 2597 hv += fin->fin_fi.fi_src.i6[1]; 2598 hv += fin->fin_fi.fi_src.i6[2]; 2599 hv += fin->fin_fi.fi_src.i6[3]; 2600 fr_ipsmove(is, hv, ifs); 2601 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2602 } 2603 break; 2604 } 2605 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2606 2607 /* 2608 * No matching icmp state entry. Perhaps this is a 2609 * response to another state entry. 2610 * 2611 * XXX With some ICMP6 packets, the "other" address is already 2612 * in the packet, after the ICMP6 header, and this could be 2613 * used in place of the multicast address. However, taking 2614 * advantage of this requires some significant code changes 2615 * to handle the specific types where that is the case. 2616 */ 2617 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2618 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2619 hv -= fin->fin_fi.fi_src.i6[0]; 2620 hv -= fin->fin_fi.fi_src.i6[1]; 2621 hv -= fin->fin_fi.fi_src.i6[2]; 2622 hv -= fin->fin_fi.fi_src.i6[3]; 2623 tryagain = 1; 2624 WRITE_ENTER(&ifs->ifs_ipf_state); 2625 goto icmp6again; 2626 } 2627 2628 is = fr_checkicmp6matchingstate(fin); 2629 if (is != NULL) 2630 return is; 2631 break; 2632 #endif 2633 2634 case IPPROTO_ICMP : 2635 if (v == 4) { 2636 hv += ic->icmp_id; 2637 } 2638 hv = DOUBLE_HASH(hv, ifs); 2639 READ_ENTER(&ifs->ifs_ipf_state); 2640 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2641 isp = &is->is_hnext; 2642 if ((is->is_p != pr) || (is->is_v != v)) 2643 continue; 2644 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2645 if (is != NULL && 2646 fr_matchicmpqueryreply(v, &is->is_icmp, 2647 ic, fin->fin_rev)) { 2648 if (fin->fin_rev) 2649 ifq = &ifs->ifs_ips_icmpacktq; 2650 else 2651 ifq = &ifs->ifs_ips_icmptq; 2652 break; 2653 } 2654 } 2655 if (is == NULL) { 2656 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2657 } 2658 break; 2659 2660 case IPPROTO_TCP : 2661 case IPPROTO_UDP : 2662 ifqp = NULL; 2663 sport = htons(fin->fin_data[0]); 2664 hv += sport; 2665 dport = htons(fin->fin_data[1]); 2666 hv += dport; 2667 oow = 0; 2668 tryagain = 0; 2669 READ_ENTER(&ifs->ifs_ipf_state); 2670 retry_tcpudp: 2671 hvm = DOUBLE_HASH(hv, ifs); 2672 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2673 isp = &is->is_hnext; 2674 if ((is->is_p != pr) || (is->is_v != v)) 2675 continue; 2676 fin->fin_flx &= ~FI_OOW; 2677 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2678 if (is != NULL) { 2679 if (pr == IPPROTO_TCP) { 2680 if (!fr_tcpstate(fin, tcp, is)) { 2681 oow |= fin->fin_flx & FI_OOW; 2682 continue; 2683 } 2684 } 2685 break; 2686 } 2687 } 2688 if (is != NULL) { 2689 if (tryagain && 2690 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2691 hv += dport; 2692 hv += sport; 2693 fr_ipsmove(is, hv, ifs); 2694 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2695 } 2696 break; 2697 } 2698 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2699 2700 if (!tryagain && ifs->ifs_ips_stats.iss_wild) { 2701 hv -= dport; 2702 hv -= sport; 2703 tryagain = 1; 2704 WRITE_ENTER(&ifs->ifs_ipf_state); 2705 goto retry_tcpudp; 2706 } 2707 fin->fin_flx |= oow; 2708 break; 2709 2710 #if 0 2711 case IPPROTO_GRE : 2712 gre = fin->fin_dp; 2713 if (GRE_REV(gre->gr_flags) == 1) { 2714 hv += gre->gr_call; 2715 } 2716 /* FALLTHROUGH */ 2717 #endif 2718 default : 2719 ifqp = NULL; 2720 hvm = DOUBLE_HASH(hv, ifs); 2721 READ_ENTER(&ifs->ifs_ipf_state); 2722 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2723 isp = &is->is_hnext; 2724 if ((is->is_p != pr) || (is->is_v != v)) 2725 continue; 2726 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2727 if (is != NULL) { 2728 ifq = &ifs->ifs_ips_iptq; 2729 break; 2730 } 2731 } 2732 if (is == NULL) { 2733 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2734 } 2735 break; 2736 } 2737 2738 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2739 (is->is_tqehead[fin->fin_rev] != NULL)) 2740 ifq = is->is_tqehead[fin->fin_rev]; 2741 if (ifq != NULL && ifqp != NULL) 2742 *ifqp = ifq; 2743 return is; 2744 } 2745 2746 2747 /* ------------------------------------------------------------------------ */ 2748 /* Function: fr_updatestate */ 2749 /* Returns: Nil */ 2750 /* Parameters: fin(I) - pointer to packet information */ 2751 /* is(I) - pointer to state table entry */ 2752 /* Read Locks: ipf_state */ 2753 /* */ 2754 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2755 /* fragment cache with a new entry as required. */ 2756 /* ------------------------------------------------------------------------ */ 2757 void fr_updatestate(fin, is, ifq) 2758 fr_info_t *fin; 2759 ipstate_t *is; 2760 ipftq_t *ifq; 2761 { 2762 ipftqent_t *tqe; 2763 int i, pass; 2764 ipf_stack_t *ifs = fin->fin_ifs; 2765 2766 i = (fin->fin_rev << 1) + fin->fin_out; 2767 2768 /* 2769 * For TCP packets, ifq == NULL. For all others, check if this new 2770 * queue is different to the last one it was on and move it if so. 2771 */ 2772 tqe = &is->is_sti; 2773 MUTEX_ENTER(&is->is_lock); 2774 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2775 ifq = is->is_tqehead[fin->fin_rev]; 2776 2777 if (ifq != NULL) 2778 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2779 2780 is->is_pkts[i]++; 2781 is->is_bytes[i] += fin->fin_plen; 2782 MUTEX_EXIT(&is->is_lock); 2783 2784 #ifdef IPFILTER_SYNC 2785 if (is->is_flags & IS_STATESYNC) 2786 ipfsync_update(SMC_STATE, fin, is->is_sync); 2787 #endif 2788 2789 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2790 2791 fin->fin_fr = is->is_rule; 2792 2793 /* 2794 * If this packet is a fragment and the rule says to track fragments, 2795 * then create a new fragment cache entry. 2796 */ 2797 pass = is->is_pass; 2798 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2799 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2800 } 2801 2802 2803 /* ------------------------------------------------------------------------ */ 2804 /* Function: fr_checkstate */ 2805 /* Returns: frentry_t* - NULL == search failed, */ 2806 /* else pointer to rule for matching state */ 2807 /* Parameters: ifp(I) - pointer to interface */ 2808 /* passp(I) - pointer to filtering result flags */ 2809 /* */ 2810 /* Check if a packet is associated with an entry in the state table. */ 2811 /* ------------------------------------------------------------------------ */ 2812 frentry_t *fr_checkstate(fin, passp) 2813 fr_info_t *fin; 2814 u_32_t *passp; 2815 { 2816 ipstate_t *is; 2817 frentry_t *fr; 2818 tcphdr_t *tcp; 2819 ipftq_t *ifq; 2820 u_int pass; 2821 ipf_stack_t *ifs = fin->fin_ifs; 2822 2823 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2824 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2825 return NULL; 2826 2827 is = NULL; 2828 if ((fin->fin_flx & FI_TCPUDP) || 2829 (fin->fin_fi.fi_p == IPPROTO_ICMP) 2830 #ifdef USE_INET6 2831 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 2832 #endif 2833 ) 2834 tcp = fin->fin_dp; 2835 else 2836 tcp = NULL; 2837 2838 /* 2839 * Search the hash table for matching packet header info. 2840 */ 2841 ifq = NULL; 2842 is = fin->fin_state; 2843 if (is == NULL) 2844 is = fr_stlookup(fin, tcp, &ifq); 2845 switch (fin->fin_p) 2846 { 2847 #ifdef USE_INET6 2848 case IPPROTO_ICMPV6 : 2849 if (is != NULL) 2850 break; 2851 if (fin->fin_v == 6) { 2852 is = fr_checkicmp6matchingstate(fin); 2853 if (is != NULL) 2854 goto matched; 2855 } 2856 break; 2857 #endif 2858 case IPPROTO_ICMP : 2859 if (is != NULL) 2860 break; 2861 /* 2862 * No matching icmp state entry. Perhaps this is a 2863 * response to another state entry. 2864 */ 2865 is = fr_checkicmpmatchingstate(fin); 2866 if (is != NULL) 2867 goto matched; 2868 break; 2869 case IPPROTO_TCP : 2870 if (is == NULL) 2871 break; 2872 2873 if (is->is_pass & FR_NEWISN) { 2874 if (fin->fin_out == 0) 2875 fr_fixinisn(fin, is); 2876 else if (fin->fin_out == 1) 2877 fr_fixoutisn(fin, is); 2878 } 2879 break; 2880 default : 2881 if (fin->fin_rev) 2882 ifq = &ifs->ifs_ips_udpacktq; 2883 else 2884 ifq = &ifs->ifs_ips_udptq; 2885 break; 2886 } 2887 if (is == NULL) { 2888 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 2889 return NULL; 2890 } 2891 2892 matched: 2893 fr = is->is_rule; 2894 if (fr != NULL) { 2895 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 2896 if (fin->fin_nattag == NULL) 2897 return NULL; 2898 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) 2899 return NULL; 2900 } 2901 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 2902 fin->fin_icode = fr->fr_icode; 2903 } 2904 2905 fin->fin_rule = is->is_rulen; 2906 pass = is->is_pass; 2907 fr_updatestate(fin, is, ifq); 2908 if (fin->fin_out == 1) 2909 fin->fin_nat = is->is_nat[fin->fin_rev]; 2910 2911 fin->fin_state = is; 2912 is->is_touched = ifs->ifs_fr_ticks; 2913 MUTEX_ENTER(&is->is_lock); 2914 is->is_ref++; 2915 MUTEX_EXIT(&is->is_lock); 2916 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2917 fin->fin_flx |= FI_STATE; 2918 if ((pass & FR_LOGFIRST) != 0) 2919 pass &= ~(FR_LOGFIRST|FR_LOG); 2920 *passp = pass; 2921 return fr; 2922 } 2923 2924 2925 /* ------------------------------------------------------------------------ */ 2926 /* Function: fr_fixoutisn */ 2927 /* Returns: Nil */ 2928 /* Parameters: fin(I) - pointer to packet information */ 2929 /* is(I) - pointer to master state structure */ 2930 /* */ 2931 /* Called only for outbound packets, adjusts the sequence number and the */ 2932 /* TCP checksum to match that change. */ 2933 /* ------------------------------------------------------------------------ */ 2934 static void fr_fixoutisn(fin, is) 2935 fr_info_t *fin; 2936 ipstate_t *is; 2937 { 2938 tcphdr_t *tcp; 2939 int rev; 2940 u_32_t seq; 2941 2942 tcp = fin->fin_dp; 2943 rev = fin->fin_rev; 2944 if ((is->is_flags & IS_ISNSYN) != 0) { 2945 if (rev == 0) { 2946 seq = ntohl(tcp->th_seq); 2947 seq += is->is_isninc[0]; 2948 tcp->th_seq = htonl(seq); 2949 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 2950 } 2951 } 2952 if ((is->is_flags & IS_ISNACK) != 0) { 2953 if (rev == 1) { 2954 seq = ntohl(tcp->th_seq); 2955 seq += is->is_isninc[1]; 2956 tcp->th_seq = htonl(seq); 2957 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 2958 } 2959 } 2960 } 2961 2962 2963 /* ------------------------------------------------------------------------ */ 2964 /* Function: fr_fixinisn */ 2965 /* Returns: Nil */ 2966 /* Parameters: fin(I) - pointer to packet information */ 2967 /* is(I) - pointer to master state structure */ 2968 /* */ 2969 /* Called only for inbound packets, adjusts the acknowledge number and the */ 2970 /* TCP checksum to match that change. */ 2971 /* ------------------------------------------------------------------------ */ 2972 static void fr_fixinisn(fin, is) 2973 fr_info_t *fin; 2974 ipstate_t *is; 2975 { 2976 tcphdr_t *tcp; 2977 int rev; 2978 u_32_t ack; 2979 2980 tcp = fin->fin_dp; 2981 rev = fin->fin_rev; 2982 if ((is->is_flags & IS_ISNSYN) != 0) { 2983 if (rev == 1) { 2984 ack = ntohl(tcp->th_ack); 2985 ack -= is->is_isninc[0]; 2986 tcp->th_ack = htonl(ack); 2987 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 2988 } 2989 } 2990 if ((is->is_flags & IS_ISNACK) != 0) { 2991 if (rev == 0) { 2992 ack = ntohl(tcp->th_ack); 2993 ack -= is->is_isninc[1]; 2994 tcp->th_ack = htonl(ack); 2995 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 2996 } 2997 } 2998 } 2999 3000 3001 /* ------------------------------------------------------------------------ */ 3002 /* Function: fr_statesync */ 3003 /* Returns: Nil */ 3004 /* Parameters: action(I) - type of synchronisation to do */ 3005 /* v(I) - IP version being sync'd (v4 or v6) */ 3006 /* ifp(I) - interface identifier associated with action */ 3007 /* name(I) - name associated with ifp parameter */ 3008 /* */ 3009 /* Walk through all state entries and if an interface pointer match is */ 3010 /* found then look it up again, based on its name in case the pointer has */ 3011 /* changed since last time. */ 3012 /* */ 3013 /* If ifp is passed in as being non-null then we are only doing updates for */ 3014 /* existing, matching, uses of it. */ 3015 /* ------------------------------------------------------------------------ */ 3016 void fr_statesync(action, v, ifp, name, ifs) 3017 int action, v; 3018 void *ifp; 3019 char *name; 3020 ipf_stack_t *ifs; 3021 { 3022 ipstate_t *is; 3023 int i; 3024 3025 if (ifs->ifs_fr_running <= 0) 3026 return; 3027 3028 WRITE_ENTER(&ifs->ifs_ipf_state); 3029 3030 if (ifs->ifs_fr_running <= 0) { 3031 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3032 return; 3033 } 3034 3035 switch (action) 3036 { 3037 case IPFSYNC_RESYNC : 3038 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3039 if (v != 0 && is->is_v != v) 3040 continue; 3041 /* 3042 * Look up all the interface names in the state entry. 3043 */ 3044 for (i = 0; i < 4; i++) { 3045 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3046 is->is_v, ifs); 3047 } 3048 } 3049 break; 3050 case IPFSYNC_NEWIFP : 3051 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3052 if (v != 0 && is->is_v != v) 3053 continue; 3054 /* 3055 * Look up all the interface names in the state entry. 3056 */ 3057 for (i = 0; i < 4; i++) { 3058 if (!strncmp(is->is_ifname[i], name, 3059 sizeof(is->is_ifname[i]))) 3060 is->is_ifp[i] = ifp; 3061 } 3062 } 3063 break; 3064 case IPFSYNC_OLDIFP : 3065 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3066 if (v != 0 && is->is_v != v) 3067 continue; 3068 /* 3069 * Look up all the interface names in the state entry. 3070 */ 3071 for (i = 0; i < 4; i++) { 3072 if (is->is_ifp[i] == ifp) 3073 is->is_ifp[i] = (void *)-1; 3074 } 3075 } 3076 break; 3077 } 3078 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3079 } 3080 3081 3082 /* ------------------------------------------------------------------------ */ 3083 /* Function: fr_delstate */ 3084 /* Returns: Nil */ 3085 /* Parameters: is(I) - pointer to state structure to delete */ 3086 /* why(I) - if not 0, log reason why it was deleted */ 3087 /* Write Locks: ipf_state/ipf_global */ 3088 /* */ 3089 /* Deletes a state entry from the enumerated list as well as the hash table */ 3090 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3091 /* global counters as required. */ 3092 /* ------------------------------------------------------------------------ */ 3093 static void fr_delstate(is, why, ifs) 3094 ipstate_t *is; 3095 int why; 3096 ipf_stack_t *ifs; 3097 { 3098 3099 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3100 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3101 3102 /* 3103 * Since we want to delete this, remove it from the state table, 3104 * where it can be found & used, first. 3105 */ 3106 if (is->is_pnext != NULL) { 3107 *is->is_pnext = is->is_next; 3108 3109 if (is->is_next != NULL) 3110 is->is_next->is_pnext = is->is_pnext; 3111 3112 is->is_pnext = NULL; 3113 is->is_next = NULL; 3114 } 3115 3116 if (is->is_phnext != NULL) { 3117 *is->is_phnext = is->is_hnext; 3118 if (is->is_hnext != NULL) 3119 is->is_hnext->is_phnext = is->is_phnext; 3120 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3121 ifs->ifs_ips_stats.iss_inuse--; 3122 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3123 3124 is->is_phnext = NULL; 3125 is->is_hnext = NULL; 3126 } 3127 3128 /* 3129 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3130 * table that have wildcard flags set, only decerement it once 3131 * and do it here. 3132 */ 3133 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3134 if (!(is->is_flags & SI_CLONED)) { 3135 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3136 } 3137 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3138 } 3139 3140 /* 3141 * Next, remove it from the timeout queue it is in. 3142 */ 3143 fr_deletequeueentry(&is->is_sti); 3144 3145 is->is_me = NULL; 3146 3147 /* 3148 * If it is still in use by something else, do not go any further, 3149 * but note that at this point it is now an orphan. 3150 */ 3151 MUTEX_ENTER(&is->is_lock); 3152 if (is->is_ref > 1) { 3153 is->is_ref--; 3154 MUTEX_EXIT(&is->is_lock); 3155 return; 3156 } 3157 MUTEX_EXIT(&is->is_lock); 3158 3159 is->is_ref = 0; 3160 3161 if (is->is_tqehead[0] != NULL) 3162 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3163 3164 if (is->is_tqehead[1] != NULL) 3165 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3166 3167 #ifdef IPFILTER_SYNC 3168 if (is->is_sync) 3169 ipfsync_del(is->is_sync); 3170 #endif 3171 #ifdef IPFILTER_SCAN 3172 (void) ipsc_detachis(is); 3173 #endif 3174 3175 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3176 ipstate_log(is, why, ifs); 3177 3178 if (is->is_rule != NULL) { 3179 is->is_rule->fr_statecnt--; 3180 (void)fr_derefrule(&is->is_rule, ifs); 3181 } 3182 3183 MUTEX_DESTROY(&is->is_lock); 3184 KFREE(is); 3185 ifs->ifs_ips_num--; 3186 } 3187 3188 3189 /* ------------------------------------------------------------------------ */ 3190 /* Function: fr_timeoutstate */ 3191 /* Returns: Nil */ 3192 /* Parameters: Nil */ 3193 /* */ 3194 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3195 /* used here is to keep the queue sorted with the oldest things at the top */ 3196 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3197 /* expired then neither will any under it. */ 3198 /* ------------------------------------------------------------------------ */ 3199 void fr_timeoutstate(ifs) 3200 ipf_stack_t *ifs; 3201 { 3202 ipftq_t *ifq, *ifqnext; 3203 ipftqent_t *tqe, *tqn; 3204 ipstate_t *is; 3205 SPL_INT(s); 3206 3207 SPL_NET(s); 3208 WRITE_ENTER(&ifs->ifs_ipf_state); 3209 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3210 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3211 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3212 break; 3213 tqn = tqe->tqe_next; 3214 is = tqe->tqe_parent; 3215 fr_delstate(is, ISL_EXPIRE, ifs); 3216 } 3217 3218 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3219 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3220 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3221 break; 3222 tqn = tqe->tqe_next; 3223 is = tqe->tqe_parent; 3224 fr_delstate(is, ISL_EXPIRE, ifs); 3225 } 3226 } 3227 3228 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3229 ifqnext = ifq->ifq_next; 3230 3231 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3232 (ifq->ifq_ref == 0)) { 3233 fr_freetimeoutqueue(ifq, ifs); 3234 } 3235 } 3236 3237 if (ifs->ifs_fr_state_doflush) { 3238 (void) fr_state_flush(2, 0, ifs); 3239 ifs->ifs_fr_state_doflush = 0; 3240 } 3241 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3242 SPL_X(s); 3243 } 3244 3245 3246 /* ------------------------------------------------------------------------ */ 3247 /* Function: fr_state_flush */ 3248 /* Returns: int - 0 == success, -1 == failure */ 3249 /* Parameters: Nil */ 3250 /* Write Locks: ipf_state */ 3251 /* */ 3252 /* Flush state tables. Three actions currently defined: */ 3253 /* which == 0 : flush all state table entries */ 3254 /* which == 1 : flush TCP connections which have started to close but are */ 3255 /* stuck for some reason. */ 3256 /* which == 2 : flush TCP connections which have been idle for a long time, */ 3257 /* starting at > 4 days idle and working back in successive half-*/ 3258 /* days to at most 12 hours old. If this fails to free enough */ 3259 /* slots then work backwards in half hour slots to 30 minutes. */ 3260 /* If that too fails, then work backwards in 30 second intervals */ 3261 /* for the last 30 minutes to at worst 30 seconds idle. */ 3262 /* ------------------------------------------------------------------------ */ 3263 static int fr_state_flush(which, proto, ifs) 3264 int which, proto; 3265 ipf_stack_t *ifs; 3266 { 3267 ipftq_t *ifq, *ifqnext; 3268 ipftqent_t *tqe, *tqn; 3269 ipstate_t *is, **isp; 3270 int delete, removed; 3271 long try, maxtick; 3272 u_long interval; 3273 SPL_INT(s); 3274 3275 removed = 0; 3276 3277 SPL_NET(s); 3278 for (isp = &ifs->ifs_ips_list; ((is = *isp) != NULL); ) { 3279 delete = 0; 3280 3281 if ((proto != 0) && (is->is_v != proto)) { 3282 isp = &is->is_next; 3283 continue; 3284 } 3285 3286 switch (which) 3287 { 3288 case 0 : 3289 delete = 1; 3290 break; 3291 case 1 : 3292 case 2 : 3293 if (is->is_p != IPPROTO_TCP) 3294 break; 3295 if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || 3296 (is->is_state[1] != IPF_TCPS_ESTABLISHED)) 3297 delete = 1; 3298 break; 3299 } 3300 3301 if (delete) { 3302 if (is->is_p == IPPROTO_TCP) 3303 ifs->ifs_ips_stats.iss_fin++; 3304 else 3305 ifs->ifs_ips_stats.iss_expire++; 3306 fr_delstate(is, ISL_FLUSH, ifs); 3307 removed++; 3308 } else 3309 isp = &is->is_next; 3310 } 3311 3312 if (which != 2) { 3313 SPL_X(s); 3314 return removed; 3315 } 3316 3317 /* 3318 * Asked to remove inactive entries because the table is full, try 3319 * again, 3 times, if first attempt failed with a different criteria 3320 * each time. The order tried in must be in decreasing age. 3321 * Another alternative is to implement random drop and drop N entries 3322 * at random until N have been freed up. 3323 */ 3324 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < IPF_TTLVAL(5)) 3325 goto force_flush_skipped; 3326 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3327 3328 if (ifs->ifs_fr_ticks > IPF_TTLVAL(43200)) 3329 interval = IPF_TTLVAL(43200); 3330 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(1800)) 3331 interval = IPF_TTLVAL(1800); 3332 else if (ifs->ifs_fr_ticks > IPF_TTLVAL(30)) 3333 interval = IPF_TTLVAL(30); 3334 else 3335 interval = IPF_TTLVAL(10); 3336 try = ifs->ifs_fr_ticks - (ifs->ifs_fr_ticks - interval); 3337 if (try < 0) 3338 goto force_flush_skipped; 3339 3340 while (removed == 0) { 3341 maxtick = ifs->ifs_fr_ticks - interval; 3342 if (maxtick < 0) 3343 break; 3344 3345 while (try < maxtick) { 3346 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; 3347 ifq = ifq->ifq_next) { 3348 for (tqn = ifq->ifq_head; 3349 ((tqe = tqn) != NULL); ) { 3350 if (tqe->tqe_die > try) 3351 break; 3352 tqn = tqe->tqe_next; 3353 is = tqe->tqe_parent; 3354 fr_delstate(is, ISL_EXPIRE, ifs); 3355 removed++; 3356 } 3357 } 3358 3359 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3360 ifqnext = ifq->ifq_next; 3361 3362 for (tqn = ifq->ifq_head; 3363 ((tqe = tqn) != NULL); ) { 3364 if (tqe->tqe_die > try) 3365 break; 3366 tqn = tqe->tqe_next; 3367 is = tqe->tqe_parent; 3368 fr_delstate(is, ISL_EXPIRE, ifs); 3369 removed++; 3370 } 3371 } 3372 if (try + interval > maxtick) 3373 break; 3374 try += interval; 3375 } 3376 3377 if (removed == 0) { 3378 if (interval == IPF_TTLVAL(43200)) { 3379 interval = IPF_TTLVAL(1800); 3380 } else if (interval == IPF_TTLVAL(1800)) { 3381 interval = IPF_TTLVAL(30); 3382 } else if (interval == IPF_TTLVAL(30)) { 3383 interval = IPF_TTLVAL(10); 3384 } else { 3385 break; 3386 } 3387 } 3388 } 3389 force_flush_skipped: 3390 SPL_X(s); 3391 return removed; 3392 } 3393 3394 3395 3396 /* ------------------------------------------------------------------------ */ 3397 /* Function: fr_tcp_age */ 3398 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3399 /* Parameters: tq(I) - pointer to timeout queue information */ 3400 /* fin(I) - pointer to packet information */ 3401 /* tqtab(I) - TCP timeout queue table this is in */ 3402 /* flags(I) - flags from state/NAT entry */ 3403 /* */ 3404 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3405 /* */ 3406 /* - (try to) base state transitions on real evidence only, */ 3407 /* i.e. packets that are sent and have been received by ipfilter; */ 3408 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3409 /* */ 3410 /* - deal with half-closed connections correctly; */ 3411 /* */ 3412 /* - store the state of the source in state[0] such that ipfstat */ 3413 /* displays the state as source/dest instead of dest/source; the calls */ 3414 /* to fr_tcp_age have been changed accordingly. */ 3415 /* */ 3416 /* Internal Parameters: */ 3417 /* */ 3418 /* state[0] = state of source (host that initiated connection) */ 3419 /* state[1] = state of dest (host that accepted the connection) */ 3420 /* */ 3421 /* dir == 0 : a packet from source to dest */ 3422 /* dir == 1 : a packet from dest to source */ 3423 /* */ 3424 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3425 /* ------------------------------------------------------------------------ */ 3426 int fr_tcp_age(tqe, fin, tqtab, flags) 3427 ipftqent_t *tqe; 3428 fr_info_t *fin; 3429 ipftq_t *tqtab; 3430 int flags; 3431 { 3432 int dlen, ostate, nstate, rval, dir; 3433 u_char tcpflags; 3434 tcphdr_t *tcp; 3435 ipf_stack_t *ifs = fin->fin_ifs; 3436 3437 tcp = fin->fin_dp; 3438 3439 rval = 0; 3440 dir = fin->fin_rev; 3441 tcpflags = tcp->th_flags; 3442 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3443 3444 if (tcpflags & TH_RST) { 3445 if (!(tcpflags & TH_PUSH) && !dlen) 3446 nstate = IPF_TCPS_CLOSED; 3447 else 3448 nstate = IPF_TCPS_CLOSE_WAIT; 3449 rval = 1; 3450 } else { 3451 ostate = tqe->tqe_state[1 - dir]; 3452 nstate = tqe->tqe_state[dir]; 3453 3454 switch (nstate) 3455 { 3456 case IPF_TCPS_CLOSED: /* 0 */ 3457 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3458 /* 3459 * 'dir' received an S and sends SA in 3460 * response, CLOSED -> SYN_RECEIVED 3461 */ 3462 nstate = IPF_TCPS_SYN_RECEIVED; 3463 rval = 1; 3464 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3465 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3466 nstate = IPF_TCPS_SYN_SENT; 3467 rval = 1; 3468 } 3469 /* 3470 * the next piece of code makes it possible to get 3471 * already established connections into the state table 3472 * after a restart or reload of the filter rules; this 3473 * does not work when a strict 'flags S keep state' is 3474 * used for tcp connections of course 3475 */ 3476 if (((flags & IS_TCPFSM) == 0) && 3477 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3478 /* 3479 * we saw an A, guess 'dir' is in ESTABLISHED 3480 * mode 3481 */ 3482 switch (ostate) 3483 { 3484 case IPF_TCPS_CLOSED : 3485 case IPF_TCPS_SYN_RECEIVED : 3486 nstate = IPF_TCPS_HALF_ESTAB; 3487 rval = 1; 3488 break; 3489 case IPF_TCPS_HALF_ESTAB : 3490 case IPF_TCPS_ESTABLISHED : 3491 nstate = IPF_TCPS_ESTABLISHED; 3492 rval = 1; 3493 break; 3494 default : 3495 break; 3496 } 3497 } 3498 /* 3499 * TODO: besides regular ACK packets we can have other 3500 * packets as well; it is yet to be determined how we 3501 * should initialize the states in those cases 3502 */ 3503 break; 3504 3505 case IPF_TCPS_LISTEN: /* 1 */ 3506 /* NOT USED */ 3507 break; 3508 3509 case IPF_TCPS_SYN_SENT: /* 2 */ 3510 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3511 /* 3512 * A retransmitted SYN packet. We do not reset 3513 * the timeout here to fr_tcptimeout because a 3514 * connection connect timeout does not renew 3515 * after every packet that is sent. We need to 3516 * set rval so as to indicate the packet has 3517 * passed the check for its flags being valid 3518 * in the TCP FSM. Setting rval to 2 has the 3519 * result of not resetting the timeout. 3520 */ 3521 rval = 2; 3522 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3523 TH_ACK) { 3524 /* 3525 * we see an A from 'dir' which is in SYN_SENT 3526 * state: 'dir' sent an A in response to an SA 3527 * which it received, SYN_SENT -> ESTABLISHED 3528 */ 3529 nstate = IPF_TCPS_ESTABLISHED; 3530 rval = 1; 3531 } else if (tcpflags & TH_FIN) { 3532 /* 3533 * we see an F from 'dir' which is in SYN_SENT 3534 * state and wants to close its side of the 3535 * connection; SYN_SENT -> FIN_WAIT_1 3536 */ 3537 nstate = IPF_TCPS_FIN_WAIT_1; 3538 rval = 1; 3539 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3540 /* 3541 * we see an SA from 'dir' which is already in 3542 * SYN_SENT state, this means we have a 3543 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3544 */ 3545 nstate = IPF_TCPS_SYN_RECEIVED; 3546 rval = 1; 3547 } 3548 break; 3549 3550 case IPF_TCPS_SYN_RECEIVED: /* 3 */ 3551 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3552 /* 3553 * we see an A from 'dir' which was in 3554 * SYN_RECEIVED state so it must now be in 3555 * established state, SYN_RECEIVED -> 3556 * ESTABLISHED 3557 */ 3558 nstate = IPF_TCPS_ESTABLISHED; 3559 rval = 1; 3560 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3561 TH_OPENING) { 3562 /* 3563 * We see an SA from 'dir' which is already in 3564 * SYN_RECEIVED state. 3565 */ 3566 rval = 2; 3567 } else if (tcpflags & TH_FIN) { 3568 /* 3569 * we see an F from 'dir' which is in 3570 * SYN_RECEIVED state and wants to close its 3571 * side of the connection; SYN_RECEIVED -> 3572 * FIN_WAIT_1 3573 */ 3574 nstate = IPF_TCPS_FIN_WAIT_1; 3575 rval = 1; 3576 } 3577 break; 3578 3579 case IPF_TCPS_HALF_ESTAB: /* 4 */ 3580 if (ostate >= IPF_TCPS_HALF_ESTAB) { 3581 if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3582 nstate = IPF_TCPS_ESTABLISHED; 3583 } 3584 } 3585 rval = 1; 3586 3587 break; 3588 3589 case IPF_TCPS_ESTABLISHED: /* 5 */ 3590 rval = 1; 3591 if (tcpflags & TH_FIN) { 3592 /* 3593 * 'dir' closed its side of the connection; 3594 * this gives us a half-closed connection; 3595 * ESTABLISHED -> FIN_WAIT_1 3596 */ 3597 nstate = IPF_TCPS_FIN_WAIT_1; 3598 } else if (tcpflags & TH_ACK) { 3599 /* 3600 * an ACK, should we exclude other flags here? 3601 */ 3602 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3603 /* 3604 * We know the other side did an active 3605 * close, so we are ACKing the recvd 3606 * FIN packet (does the window matching 3607 * code guarantee this?) and go into 3608 * CLOSE_WAIT state; this gives us a 3609 * half-closed connection 3610 */ 3611 nstate = IPF_TCPS_CLOSE_WAIT; 3612 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3613 /* 3614 * still a fully established 3615 * connection reset timeout 3616 */ 3617 nstate = IPF_TCPS_ESTABLISHED; 3618 } 3619 } 3620 break; 3621 3622 case IPF_TCPS_CLOSE_WAIT: /* 6 */ 3623 rval = 1; 3624 if (tcpflags & TH_FIN) { 3625 /* 3626 * application closed and 'dir' sent a FIN, 3627 * we're now going into LAST_ACK state 3628 */ 3629 nstate = IPF_TCPS_LAST_ACK; 3630 } else { 3631 /* 3632 * we remain in CLOSE_WAIT because the other 3633 * side has closed already and we did not 3634 * close our side yet; reset timeout 3635 */ 3636 nstate = IPF_TCPS_CLOSE_WAIT; 3637 } 3638 break; 3639 3640 case IPF_TCPS_FIN_WAIT_1: /* 7 */ 3641 rval = 1; 3642 if ((tcpflags & TH_ACK) && 3643 ostate > IPF_TCPS_CLOSE_WAIT) { 3644 /* 3645 * if the other side is not active anymore 3646 * it has sent us a FIN packet that we are 3647 * ack'ing now with an ACK; this means both 3648 * sides have now closed the connection and 3649 * we go into TIME_WAIT 3650 */ 3651 /* 3652 * XXX: how do we know we really are ACKing 3653 * the FIN packet here? does the window code 3654 * guarantee that? 3655 */ 3656 nstate = IPF_TCPS_TIME_WAIT; 3657 } else { 3658 /* 3659 * we closed our side of the connection 3660 * already but the other side is still active 3661 * (ESTABLISHED/CLOSE_WAIT); continue with 3662 * this half-closed connection 3663 */ 3664 nstate = IPF_TCPS_FIN_WAIT_1; 3665 } 3666 break; 3667 3668 case IPF_TCPS_CLOSING: /* 8 */ 3669 /* NOT USED */ 3670 break; 3671 3672 case IPF_TCPS_LAST_ACK: /* 9 */ 3673 /* 3674 * We want to reset timer here to keep state in table. 3675 * If we would allow the state to time out here, while 3676 * there would still be packets being retransmitted, we 3677 * would cut off line between the two peers preventing 3678 * them to close connection properly. 3679 */ 3680 rval = 1; 3681 break; 3682 3683 case IPF_TCPS_FIN_WAIT_2: /* 10 */ 3684 rval = 1; 3685 if ((tcpflags & TH_OPENING) == TH_OPENING) 3686 nstate = IPF_TCPS_SYN_RECEIVED; 3687 else if (tcpflags & TH_SYN) 3688 nstate = IPF_TCPS_SYN_SENT; 3689 break; 3690 3691 case IPF_TCPS_TIME_WAIT: /* 11 */ 3692 /* we're in 2MSL timeout now */ 3693 rval = 1; 3694 break; 3695 3696 default : 3697 #if defined(_KERNEL) 3698 # if SOLARIS 3699 cmn_err(CE_NOTE, 3700 "tcp %lx flags %x si %lx nstate %d ostate %d\n", 3701 (u_long)tcp, tcpflags, (u_long)tqe, 3702 nstate, ostate); 3703 # else 3704 printf("tcp %lx flags %x si %lx nstate %d ostate %d\n", 3705 (u_long)tcp, tcpflags, (u_long)tqe, 3706 nstate, ostate); 3707 # endif 3708 #else 3709 abort(); 3710 #endif 3711 break; 3712 } 3713 } 3714 3715 /* 3716 * If rval == 2 then do not update the queue position, but treat the 3717 * packet as being ok. 3718 */ 3719 if (rval == 2) 3720 rval = 1; 3721 else if (rval == 1) { 3722 tqe->tqe_state[dir] = nstate; 3723 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3724 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3725 } 3726 3727 return rval; 3728 } 3729 3730 3731 /* ------------------------------------------------------------------------ */ 3732 /* Function: ipstate_log */ 3733 /* Returns: Nil */ 3734 /* Parameters: is(I) - pointer to state structure */ 3735 /* type(I) - type of log entry to create */ 3736 /* */ 3737 /* Creates a state table log entry using the state structure and type info. */ 3738 /* passed in. Log packet/byte counts, source/destination address and other */ 3739 /* protocol specific information. */ 3740 /* ------------------------------------------------------------------------ */ 3741 void ipstate_log(is, type, ifs) 3742 struct ipstate *is; 3743 u_int type; 3744 ipf_stack_t *ifs; 3745 { 3746 #ifdef IPFILTER_LOG 3747 struct ipslog ipsl; 3748 size_t sizes[1]; 3749 void *items[1]; 3750 int types[1]; 3751 3752 /* 3753 * Copy information out of the ipstate_t structure and into the 3754 * structure used for logging. 3755 */ 3756 ipsl.isl_type = type; 3757 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3758 ipsl.isl_bytes[0] = is->is_bytes[0]; 3759 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3760 ipsl.isl_bytes[1] = is->is_bytes[1]; 3761 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3762 ipsl.isl_bytes[2] = is->is_bytes[2]; 3763 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3764 ipsl.isl_bytes[3] = is->is_bytes[3]; 3765 ipsl.isl_src = is->is_src; 3766 ipsl.isl_dst = is->is_dst; 3767 ipsl.isl_p = is->is_p; 3768 ipsl.isl_v = is->is_v; 3769 ipsl.isl_flags = is->is_flags; 3770 ipsl.isl_tag = is->is_tag; 3771 ipsl.isl_rulen = is->is_rulen; 3772 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3773 3774 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3775 ipsl.isl_sport = is->is_sport; 3776 ipsl.isl_dport = is->is_dport; 3777 if (ipsl.isl_p == IPPROTO_TCP) { 3778 ipsl.isl_state[0] = is->is_state[0]; 3779 ipsl.isl_state[1] = is->is_state[1]; 3780 } 3781 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3782 ipsl.isl_itype = is->is_icmp.ici_type; 3783 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3784 ipsl.isl_itype = is->is_icmp.ici_type; 3785 } else { 3786 ipsl.isl_ps.isl_filler[0] = 0; 3787 ipsl.isl_ps.isl_filler[1] = 0; 3788 } 3789 3790 items[0] = &ipsl; 3791 sizes[0] = sizeof(ipsl); 3792 types[0] = 0; 3793 3794 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3795 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3796 } else { 3797 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3798 } 3799 #endif 3800 } 3801 3802 3803 #ifdef USE_INET6 3804 /* ------------------------------------------------------------------------ */ 3805 /* Function: fr_checkicmp6matchingstate */ 3806 /* Returns: ipstate_t* - NULL == no match found, */ 3807 /* else pointer to matching state entry */ 3808 /* Parameters: fin(I) - pointer to packet information */ 3809 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 3810 /* */ 3811 /* If we've got an ICMPv6 error message, using the information stored in */ 3812 /* the ICMPv6 packet, look for a matching state table entry. */ 3813 /* ------------------------------------------------------------------------ */ 3814 static ipstate_t *fr_checkicmp6matchingstate(fin) 3815 fr_info_t *fin; 3816 { 3817 struct icmp6_hdr *ic6, *oic; 3818 int backward, i; 3819 ipstate_t *is, **isp; 3820 u_short sport, dport; 3821 i6addr_t dst, src; 3822 u_short savelen; 3823 icmpinfo_t *ic; 3824 fr_info_t ofin; 3825 tcphdr_t *tcp; 3826 ip6_t *oip6; 3827 u_char pr; 3828 u_int hv; 3829 ipf_stack_t *ifs = fin->fin_ifs; 3830 3831 /* 3832 * Does it at least have the return (basic) IP header ? 3833 * Is it an actual recognised ICMP error type? 3834 * Only a basic IP header (no options) should be with 3835 * an ICMP error header. 3836 */ 3837 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 3838 !(fin->fin_flx & FI_ICMPERR)) 3839 return NULL; 3840 3841 ic6 = fin->fin_dp; 3842 3843 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 3844 if (fin->fin_plen < sizeof(*oip6)) 3845 return NULL; 3846 3847 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 3848 ofin.fin_v = 6; 3849 ofin.fin_ifp = fin->fin_ifp; 3850 ofin.fin_out = !fin->fin_out; 3851 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 3852 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 3853 3854 /* 3855 * We make a fin entry to be able to feed it to 3856 * matchsrcdst. Note that not all fields are necessary 3857 * but this is the cleanest way. Note further we fill 3858 * in fin_mp such that if someone uses it we'll get 3859 * a kernel panic. fr_matchsrcdst does not use this. 3860 * 3861 * watch out here, as ip is in host order and oip6 in network 3862 * order. Any change we make must be undone afterwards. 3863 */ 3864 savelen = oip6->ip6_plen; 3865 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 3866 ofin.fin_flx = FI_NOCKSUM; 3867 ofin.fin_ip = (ip_t *)oip6; 3868 ofin.fin_plen = oip6->ip6_plen; 3869 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 3870 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 3871 oip6->ip6_plen = savelen; 3872 3873 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 3874 oic = (struct icmp6_hdr *)(oip6 + 1); 3875 /* 3876 * an ICMP error can only be generated as a result of an 3877 * ICMP query, not as the response on an ICMP error 3878 * 3879 * XXX theoretically ICMP_ECHOREP and the other reply's are 3880 * ICMP query's as well, but adding them here seems strange XXX 3881 */ 3882 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 3883 return NULL; 3884 3885 /* 3886 * perform a lookup of the ICMP packet in the state table 3887 */ 3888 hv = (pr = oip6->ip6_nxt); 3889 src.in6 = oip6->ip6_src; 3890 hv += src.in4.s_addr; 3891 dst.in6 = oip6->ip6_dst; 3892 hv += dst.in4.s_addr; 3893 hv += oic->icmp6_id; 3894 hv += oic->icmp6_seq; 3895 hv = DOUBLE_HASH(hv, ifs); 3896 3897 READ_ENTER(&ifs->ifs_ipf_state); 3898 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 3899 ic = &is->is_icmp; 3900 isp = &is->is_hnext; 3901 if ((is->is_p == pr) && 3902 !(is->is_pass & FR_NOICMPERR) && 3903 (oic->icmp6_id == ic->ici_id) && 3904 (oic->icmp6_seq == ic->ici_seq) && 3905 (is = fr_matchsrcdst(&ofin, is, &src, 3906 &dst, NULL, FI_ICMPCMP))) { 3907 /* 3908 * in the state table ICMP query's are stored 3909 * with the type of the corresponding ICMP 3910 * response. Correct here 3911 */ 3912 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 3913 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 3914 (ic->ici_type - 1 == oic->icmp6_type )) { 3915 ifs->ifs_ips_stats.iss_hits++; 3916 backward = IP6_NEQ(&is->is_dst, &src); 3917 fin->fin_rev = !backward; 3918 i = (backward << 1) + fin->fin_out; 3919 is->is_icmppkts[i]++; 3920 return is; 3921 } 3922 } 3923 } 3924 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3925 return NULL; 3926 } 3927 3928 hv = (pr = oip6->ip6_nxt); 3929 src.in6 = oip6->ip6_src; 3930 hv += src.i6[0]; 3931 hv += src.i6[1]; 3932 hv += src.i6[2]; 3933 hv += src.i6[3]; 3934 dst.in6 = oip6->ip6_dst; 3935 hv += dst.i6[0]; 3936 hv += dst.i6[1]; 3937 hv += dst.i6[2]; 3938 hv += dst.i6[3]; 3939 3940 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 3941 tcp = (tcphdr_t *)(oip6 + 1); 3942 dport = tcp->th_dport; 3943 sport = tcp->th_sport; 3944 hv += dport; 3945 hv += sport; 3946 } else 3947 tcp = NULL; 3948 hv = DOUBLE_HASH(hv, ifs); 3949 3950 READ_ENTER(&ifs->ifs_ipf_state); 3951 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 3952 isp = &is->is_hnext; 3953 /* 3954 * Only allow this icmp though if the 3955 * encapsulated packet was allowed through the 3956 * other way around. Note that the minimal amount 3957 * of info present does not allow for checking against 3958 * tcp internals such as seq and ack numbers. 3959 */ 3960 if ((is->is_p != pr) || (is->is_v != 6) || 3961 (is->is_pass & FR_NOICMPERR)) 3962 continue; 3963 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 3964 if (is != NULL) { 3965 ifs->ifs_ips_stats.iss_hits++; 3966 backward = IP6_NEQ(&is->is_dst, &src); 3967 fin->fin_rev = !backward; 3968 i = (backward << 1) + fin->fin_out; 3969 is->is_icmppkts[i]++; 3970 /* 3971 * we deliberately do not touch the timeouts 3972 * for the accompanying state table entry. 3973 * It remains to be seen if that is correct. XXX 3974 */ 3975 return is; 3976 } 3977 } 3978 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3979 return NULL; 3980 } 3981 #endif 3982 3983 3984 /* ------------------------------------------------------------------------ */ 3985 /* Function: fr_sttab_init */ 3986 /* Returns: Nil */ 3987 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 3988 /* */ 3989 /* Initialise the array of timeout queues for TCP. */ 3990 /* ------------------------------------------------------------------------ */ 3991 void fr_sttab_init(tqp, ifs) 3992 ipftq_t *tqp; 3993 ipf_stack_t *ifs; 3994 { 3995 int i; 3996 3997 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 3998 tqp[i].ifq_ttl = 0; 3999 tqp[i].ifq_ref = 1; 4000 tqp[i].ifq_head = NULL; 4001 tqp[i].ifq_tail = &tqp[i].ifq_head; 4002 tqp[i].ifq_next = tqp + i + 1; 4003 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4004 } 4005 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4006 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4007 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4008 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4009 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4010 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4011 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4012 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4013 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4014 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4015 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4016 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4017 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4018 } 4019 4020 4021 /* ------------------------------------------------------------------------ */ 4022 /* Function: fr_sttab_destroy */ 4023 /* Returns: Nil */ 4024 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4025 /* */ 4026 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4027 /* of timeout queues for TCP. */ 4028 /* ------------------------------------------------------------------------ */ 4029 void fr_sttab_destroy(tqp) 4030 ipftq_t *tqp; 4031 { 4032 int i; 4033 4034 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4035 MUTEX_DESTROY(&tqp[i].ifq_lock); 4036 } 4037 4038 4039 /* ------------------------------------------------------------------------ */ 4040 /* Function: fr_statederef */ 4041 /* Returns: Nil */ 4042 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4043 /* ifs - ipf stack instance */ 4044 /* */ 4045 /* Decrement the reference counter for this state table entry and free it */ 4046 /* if there are no more things using it. */ 4047 /* */ 4048 /* Internal parameters: */ 4049 /* state[0] = state of source (host that initiated connection) */ 4050 /* state[1] = state of dest (host that accepted the connection) */ 4051 /* ------------------------------------------------------------------------ */ 4052 void fr_statederef(isp, ifs) 4053 ipstate_t **isp; 4054 ipf_stack_t *ifs; 4055 { 4056 ipstate_t *is; 4057 4058 is = *isp; 4059 *isp = NULL; 4060 4061 MUTEX_ENTER(&is->is_lock); 4062 if (is->is_ref > 1) { 4063 is->is_ref--; 4064 MUTEX_EXIT(&is->is_lock); 4065 #ifndef _KERNEL 4066 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4067 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4068 fr_delstate(is, ISL_ORPHAN, ifs); 4069 } 4070 #endif 4071 return; 4072 } 4073 MUTEX_EXIT(&is->is_lock); 4074 4075 WRITE_ENTER(&ifs->ifs_ipf_state); 4076 fr_delstate(is, ISL_EXPIRE, ifs); 4077 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4078 } 4079 4080 4081 /* ------------------------------------------------------------------------ */ 4082 /* Function: fr_setstatequeue */ 4083 /* Returns: Nil */ 4084 /* Parameters: is(I) - pointer to state structure */ 4085 /* rev(I) - forward(0) or reverse(1) direction */ 4086 /* Locks: ipf_state (read or write) */ 4087 /* */ 4088 /* Put the state entry on its default queue entry, using rev as a helped in */ 4089 /* determining which queue it should be placed on. */ 4090 /* ------------------------------------------------------------------------ */ 4091 void fr_setstatequeue(is, rev, ifs) 4092 ipstate_t *is; 4093 int rev; 4094 ipf_stack_t *ifs; 4095 { 4096 ipftq_t *oifq, *nifq; 4097 4098 4099 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4100 nifq = is->is_tqehead[rev]; 4101 else 4102 nifq = NULL; 4103 4104 if (nifq == NULL) { 4105 switch (is->is_p) 4106 { 4107 #ifdef USE_INET6 4108 case IPPROTO_ICMPV6 : 4109 if (rev == 1) 4110 nifq = &ifs->ifs_ips_icmpacktq; 4111 else 4112 nifq = &ifs->ifs_ips_icmptq; 4113 break; 4114 #endif 4115 case IPPROTO_ICMP : 4116 if (rev == 1) 4117 nifq = &ifs->ifs_ips_icmpacktq; 4118 else 4119 nifq = &ifs->ifs_ips_icmptq; 4120 break; 4121 case IPPROTO_TCP : 4122 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4123 break; 4124 4125 case IPPROTO_UDP : 4126 if (rev == 1) 4127 nifq = &ifs->ifs_ips_udpacktq; 4128 else 4129 nifq = &ifs->ifs_ips_udptq; 4130 break; 4131 4132 default : 4133 nifq = &ifs->ifs_ips_iptq; 4134 break; 4135 } 4136 } 4137 4138 oifq = is->is_sti.tqe_ifq; 4139 /* 4140 * If it's currently on a timeout queue, move it from one queue to 4141 * another, else put it on the end of the newly determined queue. 4142 */ 4143 if (oifq != NULL) 4144 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4145 else 4146 fr_queueappend(&is->is_sti, nifq, is, ifs); 4147 return; 4148 } 4149 4150 4151 /* ------------------------------------------------------------------------ */ 4152 /* Function: fr_stateiter */ 4153 /* Returns: int - 0 == success, else error */ 4154 /* Parameters: token(I) - pointer to ipftoken structure */ 4155 /* itp(I) - pointer to ipfgeniter structure */ 4156 /* */ 4157 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4158 /* walks through the list of entries in the state table list (ips_list.) */ 4159 /* ------------------------------------------------------------------------ */ 4160 static int fr_stateiter(token, itp, ifs) 4161 ipftoken_t *token; 4162 ipfgeniter_t *itp; 4163 ipf_stack_t *ifs; 4164 { 4165 ipstate_t *is, *next, zero; 4166 int error, count; 4167 char *dst; 4168 4169 if (itp->igi_data == NULL) 4170 return EFAULT; 4171 4172 if (itp->igi_nitems == 0) 4173 return EINVAL; 4174 4175 if (itp->igi_type != IPFGENITER_STATE) 4176 return EINVAL; 4177 4178 error = 0; 4179 4180 READ_ENTER(&ifs->ifs_ipf_state); 4181 4182 /* 4183 * Get "previous" entry from the token and find the next entry. 4184 */ 4185 is = token->ipt_data; 4186 if (is == NULL) { 4187 next = ifs->ifs_ips_list; 4188 } else { 4189 next = is->is_next; 4190 } 4191 4192 dst = itp->igi_data; 4193 for (count = itp->igi_nitems; count > 0; count--) { 4194 /* 4195 * If we found an entry, add a reference to it and update the token. 4196 * Otherwise, zero out data to be returned and NULL out token. 4197 */ 4198 if (next != NULL) { 4199 MUTEX_ENTER(&next->is_lock); 4200 next->is_ref++; 4201 MUTEX_EXIT(&next->is_lock); 4202 token->ipt_data = next; 4203 } else { 4204 bzero(&zero, sizeof(zero)); 4205 next = &zero; 4206 token->ipt_data = NULL; 4207 } 4208 4209 /* 4210 * Safe to release lock now the we have a reference. 4211 */ 4212 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4213 4214 /* 4215 * Copy out data and clean up references and tokens. 4216 */ 4217 error = COPYOUT(next, dst, sizeof(*next)); 4218 if (error != 0) 4219 error = EFAULT; 4220 if (token->ipt_data == NULL) { 4221 ipf_freetoken(token, ifs); 4222 break; 4223 } else { 4224 if (is != NULL) 4225 fr_statederef(&is, ifs); 4226 if (next->is_next == NULL) { 4227 ipf_freetoken(token, ifs); 4228 break; 4229 } 4230 } 4231 4232 if ((count == 1) || (error != 0)) 4233 break; 4234 4235 READ_ENTER(&ifs->ifs_ipf_state); 4236 dst += sizeof(*next); 4237 is = next; 4238 next = is->is_next; 4239 } 4240 4241 return error; 4242 } 4243