1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017-2018 Yandex LLC 5 * Copyright (c) 2017-2018 Andrey V. Elsukov <ae@FreeBSD.org> 6 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ipfw.h" 36 #ifndef INET 37 #error IPFIREWALL requires INET. 38 #endif /* INET */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/hash.h> 43 #include <sys/mbuf.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/pcpu.h> 47 #include <sys/queue.h> 48 #include <sys/rmlock.h> 49 #include <sys/smp.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/syslog.h> 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/pfil.h> 57 #include <net/vnet.h> 58 59 #include <netinet/in.h> 60 #include <netinet/ip.h> 61 #include <netinet/ip_var.h> 62 #include <netinet/ip_fw.h> 63 #include <netinet/tcp_var.h> 64 #include <netinet/udp.h> 65 66 #include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ 67 #ifdef INET6 68 #include <netinet6/in6_var.h> 69 #include <netinet6/ip6_var.h> 70 #include <netinet6/scope6_var.h> 71 #endif 72 73 #include <netpfil/ipfw/ip_fw_private.h> 74 75 #include <machine/in_cksum.h> /* XXX for in_cksum */ 76 77 #ifdef MAC 78 #include <security/mac/mac_framework.h> 79 #endif 80 #include <ck_queue.h> 81 82 /* 83 * Description of dynamic states. 84 * 85 * Dynamic states are stored in lists accessed through a hash tables 86 * whose size is curr_dyn_buckets. This value can be modified through 87 * the sysctl variable dyn_buckets. 88 * 89 * Currently there are four tables: dyn_ipv4, dyn_ipv6, dyn_ipv4_parent, 90 * and dyn_ipv6_parent. 91 * 92 * When a packet is received, its address fields hashed, then matched 93 * against the entries in the corresponding list by addr_type. 94 * Dynamic states can be used for different purposes: 95 * + stateful rules; 96 * + enforcing limits on the number of sessions; 97 * + in-kernel NAT (not implemented yet) 98 * 99 * The lifetime of dynamic states is regulated by dyn_*_lifetime, 100 * measured in seconds and depending on the flags. 101 * 102 * The total number of dynamic states is equal to UMA zone items count. 103 * The max number of dynamic states is dyn_max. When we reach 104 * the maximum number of rules we do not create anymore. This is 105 * done to avoid consuming too much memory, but also too much 106 * time when searching on each packet (ideally, we should try instead 107 * to put a limit on the length of the list on each bucket...). 108 * 109 * Each state holds a pointer to the parent ipfw rule so we know what 110 * action to perform. Dynamic rules are removed when the parent rule is 111 * deleted. 112 * 113 * There are some limitations with dynamic rules -- we do not 114 * obey the 'randomized match', and we do not do multiple 115 * passes through the firewall. XXX check the latter!!! 116 */ 117 118 /* By default use jenkins hash function */ 119 #define IPFIREWALL_JENKINSHASH 120 121 #define DYN_COUNTER_INC(d, dir, pktlen) do { \ 122 (d)->pcnt_ ## dir++; \ 123 (d)->bcnt_ ## dir += pktlen; \ 124 } while (0) 125 126 struct dyn_data { 127 void *parent; /* pointer to parent rule */ 128 uint32_t chain_id; /* cached ruleset id */ 129 uint32_t f_pos; /* cached rule index */ 130 131 uint32_t hashval; /* hash value used for hash resize */ 132 uint16_t fibnum; /* fib used to send keepalives */ 133 uint8_t _pad[3]; 134 uint8_t set; /* parent rule set number */ 135 uint16_t rulenum; /* parent rule number */ 136 uint32_t ruleid; /* parent rule id */ 137 138 uint32_t state; /* TCP session state and flags */ 139 uint32_t ack_fwd; /* most recent ACKs in forward */ 140 uint32_t ack_rev; /* and reverse direction (used */ 141 /* to generate keepalives) */ 142 uint32_t sync; /* synchronization time */ 143 uint32_t expire; /* expire time */ 144 145 uint64_t pcnt_fwd; /* bytes counter in forward */ 146 uint64_t bcnt_fwd; /* packets counter in forward */ 147 uint64_t pcnt_rev; /* bytes counter in reverse */ 148 uint64_t bcnt_rev; /* packets counter in reverse */ 149 }; 150 151 #define DPARENT_COUNT_DEC(p) do { \ 152 MPASS(p->count > 0); \ 153 ck_pr_dec_32(&(p)->count); \ 154 } while (0) 155 #define DPARENT_COUNT_INC(p) ck_pr_inc_32(&(p)->count) 156 #define DPARENT_COUNT(p) ck_pr_load_32(&(p)->count) 157 struct dyn_parent { 158 void *parent; /* pointer to parent rule */ 159 uint32_t count; /* number of linked states */ 160 uint8_t _pad; 161 uint8_t set; /* parent rule set number */ 162 uint16_t rulenum; /* parent rule number */ 163 uint32_t ruleid; /* parent rule id */ 164 uint32_t hashval; /* hash value used for hash resize */ 165 uint32_t expire; /* expire time */ 166 }; 167 168 struct dyn_ipv4_state { 169 uint8_t type; /* State type */ 170 uint8_t proto; /* UL Protocol */ 171 uint16_t kidx; /* named object index */ 172 uint16_t sport, dport; /* ULP source and destination ports */ 173 in_addr_t src, dst; /* IPv4 source and destination */ 174 175 union { 176 struct dyn_data *data; 177 struct dyn_parent *limit; 178 }; 179 CK_SLIST_ENTRY(dyn_ipv4_state) entry; 180 SLIST_ENTRY(dyn_ipv4_state) expired; 181 }; 182 CK_SLIST_HEAD(dyn_ipv4ck_slist, dyn_ipv4_state); 183 static VNET_DEFINE(struct dyn_ipv4ck_slist *, dyn_ipv4); 184 static VNET_DEFINE(struct dyn_ipv4ck_slist *, dyn_ipv4_parent); 185 186 SLIST_HEAD(dyn_ipv4_slist, dyn_ipv4_state); 187 static VNET_DEFINE(struct dyn_ipv4_slist, dyn_expired_ipv4); 188 #define V_dyn_ipv4 VNET(dyn_ipv4) 189 #define V_dyn_ipv4_parent VNET(dyn_ipv4_parent) 190 #define V_dyn_expired_ipv4 VNET(dyn_expired_ipv4) 191 192 #ifdef INET6 193 struct dyn_ipv6_state { 194 uint8_t type; /* State type */ 195 uint8_t proto; /* UL Protocol */ 196 uint16_t kidx; /* named object index */ 197 uint16_t sport, dport; /* ULP source and destination ports */ 198 struct in6_addr src, dst; /* IPv6 source and destination */ 199 uint32_t zoneid; /* IPv6 scope zone id */ 200 union { 201 struct dyn_data *data; 202 struct dyn_parent *limit; 203 }; 204 CK_SLIST_ENTRY(dyn_ipv6_state) entry; 205 SLIST_ENTRY(dyn_ipv6_state) expired; 206 }; 207 CK_SLIST_HEAD(dyn_ipv6ck_slist, dyn_ipv6_state); 208 static VNET_DEFINE(struct dyn_ipv6ck_slist *, dyn_ipv6); 209 static VNET_DEFINE(struct dyn_ipv6ck_slist *, dyn_ipv6_parent); 210 211 SLIST_HEAD(dyn_ipv6_slist, dyn_ipv6_state); 212 static VNET_DEFINE(struct dyn_ipv6_slist, dyn_expired_ipv6); 213 #define V_dyn_ipv6 VNET(dyn_ipv6) 214 #define V_dyn_ipv6_parent VNET(dyn_ipv6_parent) 215 #define V_dyn_expired_ipv6 VNET(dyn_expired_ipv6) 216 #endif /* INET6 */ 217 218 /* 219 * Per-CPU pointer indicates that specified state is currently in use 220 * and must not be reclaimed by expiration callout. 221 */ 222 static void **dyn_hp_cache; 223 static DPCPU_DEFINE(void *, dyn_hp); 224 #define DYNSTATE_GET(cpu) ck_pr_load_ptr(DPCPU_ID_PTR((cpu), dyn_hp)) 225 #define DYNSTATE_PROTECT(v) ck_pr_store_ptr(DPCPU_PTR(dyn_hp), (v)) 226 #define DYNSTATE_RELEASE() DYNSTATE_PROTECT(NULL) 227 #define DYNSTATE_CRITICAL_ENTER() critical_enter() 228 #define DYNSTATE_CRITICAL_EXIT() do { \ 229 DYNSTATE_RELEASE(); \ 230 critical_exit(); \ 231 } while (0); 232 233 /* 234 * We keep two version numbers, one is updated when new entry added to 235 * the list. Second is updated when an entry deleted from the list. 236 * Versions are updated under bucket lock. 237 * 238 * Bucket "add" version number is used to know, that in the time between 239 * state lookup (i.e. ipfw_dyn_lookup_state()) and the followed state 240 * creation (i.e. ipfw_dyn_install_state()) another concurrent thread did 241 * not install some state in this bucket. Using this info we can avoid 242 * additional state lookup, because we are sure that we will not install 243 * the state twice. 244 * 245 * Also doing the tracking of bucket "del" version during lookup we can 246 * be sure, that state entry was not unlinked and freed in time between 247 * we read the state pointer and protect it with hazard pointer. 248 * 249 * An entry unlinked from CK list keeps unchanged until it is freed. 250 * Unlinked entries are linked into expired lists using "expired" field. 251 */ 252 253 /* 254 * dyn_expire_lock is used to protect access to dyn_expired_xxx lists. 255 * dyn_bucket_lock is used to get write access to lists in specific bucket. 256 * Currently one dyn_bucket_lock is used for all ipv4, ipv4_parent, ipv6, 257 * and ipv6_parent lists. 258 */ 259 static VNET_DEFINE(struct mtx, dyn_expire_lock); 260 static VNET_DEFINE(struct mtx *, dyn_bucket_lock); 261 #define V_dyn_expire_lock VNET(dyn_expire_lock) 262 #define V_dyn_bucket_lock VNET(dyn_bucket_lock) 263 264 /* 265 * Bucket's add/delete generation versions. 266 */ 267 static VNET_DEFINE(uint32_t *, dyn_ipv4_add); 268 static VNET_DEFINE(uint32_t *, dyn_ipv4_del); 269 static VNET_DEFINE(uint32_t *, dyn_ipv4_parent_add); 270 static VNET_DEFINE(uint32_t *, dyn_ipv4_parent_del); 271 #define V_dyn_ipv4_add VNET(dyn_ipv4_add) 272 #define V_dyn_ipv4_del VNET(dyn_ipv4_del) 273 #define V_dyn_ipv4_parent_add VNET(dyn_ipv4_parent_add) 274 #define V_dyn_ipv4_parent_del VNET(dyn_ipv4_parent_del) 275 276 #ifdef INET6 277 static VNET_DEFINE(uint32_t *, dyn_ipv6_add); 278 static VNET_DEFINE(uint32_t *, dyn_ipv6_del); 279 static VNET_DEFINE(uint32_t *, dyn_ipv6_parent_add); 280 static VNET_DEFINE(uint32_t *, dyn_ipv6_parent_del); 281 #define V_dyn_ipv6_add VNET(dyn_ipv6_add) 282 #define V_dyn_ipv6_del VNET(dyn_ipv6_del) 283 #define V_dyn_ipv6_parent_add VNET(dyn_ipv6_parent_add) 284 #define V_dyn_ipv6_parent_del VNET(dyn_ipv6_parent_del) 285 #endif /* INET6 */ 286 287 #define DYN_BUCKET(h, b) ((h) & (b - 1)) 288 #define DYN_BUCKET_VERSION(b, v) ck_pr_load_32(&V_dyn_ ## v[(b)]) 289 #define DYN_BUCKET_VERSION_BUMP(b, v) ck_pr_inc_32(&V_dyn_ ## v[(b)]) 290 291 #define DYN_BUCKET_LOCK_INIT(lock, b) \ 292 mtx_init(&lock[(b)], "IPFW dynamic bucket", NULL, MTX_DEF) 293 #define DYN_BUCKET_LOCK_DESTROY(lock, b) mtx_destroy(&lock[(b)]) 294 #define DYN_BUCKET_LOCK(b) mtx_lock(&V_dyn_bucket_lock[(b)]) 295 #define DYN_BUCKET_UNLOCK(b) mtx_unlock(&V_dyn_bucket_lock[(b)]) 296 #define DYN_BUCKET_ASSERT(b) mtx_assert(&V_dyn_bucket_lock[(b)], MA_OWNED) 297 298 #define DYN_EXPIRED_LOCK_INIT() \ 299 mtx_init(&V_dyn_expire_lock, "IPFW expired states list", NULL, MTX_DEF) 300 #define DYN_EXPIRED_LOCK_DESTROY() mtx_destroy(&V_dyn_expire_lock) 301 #define DYN_EXPIRED_LOCK() mtx_lock(&V_dyn_expire_lock) 302 #define DYN_EXPIRED_UNLOCK() mtx_unlock(&V_dyn_expire_lock) 303 304 static VNET_DEFINE(uint32_t, dyn_buckets_max); 305 static VNET_DEFINE(uint32_t, curr_dyn_buckets); 306 static VNET_DEFINE(struct callout, dyn_timeout); 307 #define V_dyn_buckets_max VNET(dyn_buckets_max) 308 #define V_curr_dyn_buckets VNET(curr_dyn_buckets) 309 #define V_dyn_timeout VNET(dyn_timeout) 310 311 /* Maximum length of states chain in a bucket */ 312 static VNET_DEFINE(uint32_t, curr_max_length); 313 #define V_curr_max_length VNET(curr_max_length) 314 315 static VNET_DEFINE(uma_zone_t, dyn_data_zone); 316 static VNET_DEFINE(uma_zone_t, dyn_parent_zone); 317 static VNET_DEFINE(uma_zone_t, dyn_ipv4_zone); 318 #ifdef INET6 319 static VNET_DEFINE(uma_zone_t, dyn_ipv6_zone); 320 #define V_dyn_ipv6_zone VNET(dyn_ipv6_zone) 321 #endif /* INET6 */ 322 #define V_dyn_data_zone VNET(dyn_data_zone) 323 #define V_dyn_parent_zone VNET(dyn_parent_zone) 324 #define V_dyn_ipv4_zone VNET(dyn_ipv4_zone) 325 326 /* 327 * Timeouts for various events in handing dynamic rules. 328 */ 329 static VNET_DEFINE(uint32_t, dyn_ack_lifetime); 330 static VNET_DEFINE(uint32_t, dyn_syn_lifetime); 331 static VNET_DEFINE(uint32_t, dyn_fin_lifetime); 332 static VNET_DEFINE(uint32_t, dyn_rst_lifetime); 333 static VNET_DEFINE(uint32_t, dyn_udp_lifetime); 334 static VNET_DEFINE(uint32_t, dyn_short_lifetime); 335 336 #define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 337 #define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 338 #define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 339 #define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 340 #define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 341 #define V_dyn_short_lifetime VNET(dyn_short_lifetime) 342 343 /* 344 * Keepalives are sent if dyn_keepalive is set. They are sent every 345 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 346 * seconds of lifetime of a rule. 347 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 348 * than dyn_keepalive_period. 349 */ 350 #define DYN_KEEPALIVE_MAXQ 512 351 static VNET_DEFINE(uint32_t, dyn_keepalive_interval); 352 static VNET_DEFINE(uint32_t, dyn_keepalive_period); 353 static VNET_DEFINE(uint32_t, dyn_keepalive); 354 static VNET_DEFINE(time_t, dyn_keepalive_last); 355 356 #define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 357 #define V_dyn_keepalive_period VNET(dyn_keepalive_period) 358 #define V_dyn_keepalive VNET(dyn_keepalive) 359 #define V_dyn_keepalive_last VNET(dyn_keepalive_last) 360 361 static VNET_DEFINE(uint32_t, dyn_max); /* max # of dynamic states */ 362 static VNET_DEFINE(uint32_t, dyn_count); /* number of states */ 363 static VNET_DEFINE(uint32_t, dyn_parent_max); /* max # of parent states */ 364 static VNET_DEFINE(uint32_t, dyn_parent_count); /* number of parent states */ 365 #define V_dyn_max VNET(dyn_max) 366 #define V_dyn_count VNET(dyn_count) 367 #define V_dyn_parent_max VNET(dyn_parent_max) 368 #define V_dyn_parent_count VNET(dyn_parent_count) 369 370 #define DYN_COUNT_DEC(name) do { \ 371 MPASS((V_ ## name) > 0); \ 372 ck_pr_dec_32(&(V_ ## name)); \ 373 } while (0) 374 #define DYN_COUNT_INC(name) ck_pr_inc_32(&(V_ ## name)) 375 #define DYN_COUNT(name) ck_pr_load_32(&(V_ ## name)) 376 377 static time_t last_log; /* Log ratelimiting */ 378 379 /* 380 * Get/set maximum number of dynamic states in given VNET instance. 381 */ 382 static int 383 sysctl_dyn_max(SYSCTL_HANDLER_ARGS) 384 { 385 uint32_t nstates; 386 int error; 387 388 nstates = V_dyn_max; 389 error = sysctl_handle_32(oidp, &nstates, 0, req); 390 /* Read operation or some error */ 391 if ((error != 0) || (req->newptr == NULL)) 392 return (error); 393 394 V_dyn_max = nstates; 395 uma_zone_set_max(V_dyn_data_zone, V_dyn_max); 396 return (0); 397 } 398 399 static int 400 sysctl_dyn_parent_max(SYSCTL_HANDLER_ARGS) 401 { 402 uint32_t nstates; 403 int error; 404 405 nstates = V_dyn_parent_max; 406 error = sysctl_handle_32(oidp, &nstates, 0, req); 407 /* Read operation or some error */ 408 if ((error != 0) || (req->newptr == NULL)) 409 return (error); 410 411 V_dyn_parent_max = nstates; 412 uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max); 413 return (0); 414 } 415 416 static int 417 sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS) 418 { 419 uint32_t nbuckets; 420 int error; 421 422 nbuckets = V_dyn_buckets_max; 423 error = sysctl_handle_32(oidp, &nbuckets, 0, req); 424 /* Read operation or some error */ 425 if ((error != 0) || (req->newptr == NULL)) 426 return (error); 427 428 if (nbuckets > 256) 429 V_dyn_buckets_max = 1 << fls(nbuckets - 1); 430 else 431 return (EINVAL); 432 return (0); 433 } 434 435 SYSCTL_DECL(_net_inet_ip_fw); 436 437 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_count, 438 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 439 "Current number of dynamic states."); 440 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_parent_count, 441 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_parent_count), 0, 442 "Current number of parent states. "); 443 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 444 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 445 "Current number of buckets for states hash table."); 446 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_max_length, 447 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_max_length), 0, 448 "Current maximum length of states chains in hash buckets."); 449 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 450 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_buckets, 451 "IU", "Max number of buckets for dynamic states hash table."); 452 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 453 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_max, 454 "IU", "Max number of dynamic states."); 455 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_parent_max, 456 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_parent_max, 457 "IU", "Max number of parent dynamic states."); 458 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 459 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 460 "Lifetime of dynamic states for TCP ACK."); 461 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 462 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 463 "Lifetime of dynamic states for TCP SYN."); 464 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 465 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 466 "Lifetime of dynamic states for TCP FIN."); 467 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 468 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 469 "Lifetime of dynamic states for TCP RST."); 470 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 471 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 472 "Lifetime of dynamic states for UDP."); 473 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 474 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 475 "Lifetime of dynamic states for other situations."); 476 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 477 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 478 "Enable keepalives for dynamic states."); 479 480 #ifdef IPFIREWALL_DYNDEBUG 481 #define DYN_DEBUG(fmt, ...) do { \ 482 printf("%s: " fmt "\n", __func__, __VA_ARGS__); \ 483 } while (0) 484 #else 485 #define DYN_DEBUG(fmt, ...) 486 #endif /* !IPFIREWALL_DYNDEBUG */ 487 488 #ifdef INET6 489 /* Functions to work with IPv6 states */ 490 static struct dyn_ipv6_state *dyn_lookup_ipv6_state( 491 const struct ipfw_flow_id *, uint32_t, const void *, 492 struct ipfw_dyn_info *, int); 493 static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *, 494 uint32_t, const void *, int, const void *, uint32_t, uint16_t, uint32_t, 495 uint16_t); 496 static struct dyn_ipv6_state *dyn_alloc_ipv6_state( 497 const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t); 498 static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, uint8_t, 499 const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t, 500 struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); 501 static void dyn_export_ipv6_state(const struct dyn_ipv6_state *, 502 ipfw_dyn_rule *); 503 504 static uint32_t dyn_getscopeid(const struct ip_fw_args *); 505 static void dyn_make_keepalive_ipv6(struct mbuf *, const struct in6_addr *, 506 const struct in6_addr *, uint32_t, uint32_t, uint32_t, uint16_t, 507 uint16_t); 508 static void dyn_enqueue_keepalive_ipv6(struct mbufq *, 509 const struct dyn_ipv6_state *); 510 static void dyn_send_keepalive_ipv6(struct ip_fw_chain *); 511 512 static struct dyn_ipv6_state *dyn_lookup_ipv6_parent( 513 const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, 514 uint32_t); 515 static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked( 516 const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, 517 uint32_t); 518 static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t, 519 uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, 520 uint16_t); 521 #endif /* INET6 */ 522 523 /* Functions to work with limit states */ 524 static void *dyn_get_parent_state(const struct ipfw_flow_id *, uint32_t, 525 struct ip_fw *, uint32_t, uint32_t, uint16_t); 526 static struct dyn_ipv4_state *dyn_lookup_ipv4_parent( 527 const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); 528 static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked( 529 const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); 530 static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t, 531 uint8_t, uint32_t); 532 static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t, 533 uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t); 534 535 static void dyn_tick(void *); 536 static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *); 537 static void dyn_free_states(struct ip_fw_chain *); 538 static void dyn_export_parent(const struct dyn_parent *, uint16_t, 539 ipfw_dyn_rule *); 540 static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t, 541 ipfw_dyn_rule *); 542 static uint32_t dyn_update_tcp_state(struct dyn_data *, 543 const struct ipfw_flow_id *, const struct tcphdr *, int); 544 static void dyn_update_proto_state(struct dyn_data *, 545 const struct ipfw_flow_id *, const void *, int, int); 546 547 /* Functions to work with IPv4 states */ 548 struct dyn_ipv4_state *dyn_lookup_ipv4_state(const struct ipfw_flow_id *, 549 const void *, struct ipfw_dyn_info *, int); 550 static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *, 551 const void *, int, const void *, uint32_t, uint16_t, uint32_t, uint16_t); 552 static struct dyn_ipv4_state *dyn_alloc_ipv4_state( 553 const struct ipfw_flow_id *, uint16_t, uint8_t); 554 static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, uint8_t, 555 const struct ipfw_flow_id *, const void *, int, uint32_t, 556 struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); 557 static void dyn_export_ipv4_state(const struct dyn_ipv4_state *, 558 ipfw_dyn_rule *); 559 560 /* 561 * Named states support. 562 */ 563 static char *default_state_name = "default"; 564 struct dyn_state_obj { 565 struct named_object no; 566 char name[64]; 567 }; 568 569 #define DYN_STATE_OBJ(ch, cmd) \ 570 ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1)) 571 /* 572 * Classifier callback. 573 * Return 0 if opcode contains object that should be referenced 574 * or rewritten. 575 */ 576 static int 577 dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 578 { 579 580 DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); 581 /* Don't rewrite "check-state any" */ 582 if (cmd->arg1 == 0 && 583 cmd->opcode == O_CHECK_STATE) 584 return (1); 585 586 *puidx = cmd->arg1; 587 *ptype = 0; 588 return (0); 589 } 590 591 static void 592 dyn_update(ipfw_insn *cmd, uint16_t idx) 593 { 594 595 cmd->arg1 = idx; 596 DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); 597 } 598 599 static int 600 dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, 601 struct named_object **pno) 602 { 603 ipfw_obj_ntlv *ntlv; 604 const char *name; 605 606 DYN_DEBUG("uidx %d", ti->uidx); 607 if (ti->uidx != 0) { 608 if (ti->tlvs == NULL) 609 return (EINVAL); 610 /* Search ntlv in the buffer provided by user */ 611 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 612 IPFW_TLV_STATE_NAME); 613 if (ntlv == NULL) 614 return (EINVAL); 615 name = ntlv->name; 616 } else 617 name = default_state_name; 618 /* 619 * Search named object with corresponding name. 620 * Since states objects are global - ignore the set value 621 * and use zero instead. 622 */ 623 *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, 624 IPFW_TLV_STATE_NAME, name); 625 /* 626 * We always return success here. 627 * The caller will check *pno and mark object as unresolved, 628 * then it will automatically create "default" object. 629 */ 630 return (0); 631 } 632 633 static struct named_object * 634 dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) 635 { 636 637 DYN_DEBUG("kidx %d", idx); 638 return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); 639 } 640 641 static int 642 dyn_create(struct ip_fw_chain *ch, struct tid_info *ti, 643 uint16_t *pkidx) 644 { 645 struct namedobj_instance *ni; 646 struct dyn_state_obj *obj; 647 struct named_object *no; 648 ipfw_obj_ntlv *ntlv; 649 char *name; 650 651 DYN_DEBUG("uidx %d", ti->uidx); 652 if (ti->uidx != 0) { 653 if (ti->tlvs == NULL) 654 return (EINVAL); 655 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 656 IPFW_TLV_STATE_NAME); 657 if (ntlv == NULL) 658 return (EINVAL); 659 name = ntlv->name; 660 } else 661 name = default_state_name; 662 663 ni = CHAIN_TO_SRV(ch); 664 obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); 665 obj->no.name = obj->name; 666 obj->no.etlv = IPFW_TLV_STATE_NAME; 667 strlcpy(obj->name, name, sizeof(obj->name)); 668 669 IPFW_UH_WLOCK(ch); 670 no = ipfw_objhash_lookup_name_type(ni, 0, 671 IPFW_TLV_STATE_NAME, name); 672 if (no != NULL) { 673 /* 674 * Object is already created. 675 * Just return its kidx and bump refcount. 676 */ 677 *pkidx = no->kidx; 678 no->refcnt++; 679 IPFW_UH_WUNLOCK(ch); 680 free(obj, M_IPFW); 681 DYN_DEBUG("\tfound kidx %d", *pkidx); 682 return (0); 683 } 684 if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { 685 DYN_DEBUG("\talloc_idx failed for %s", name); 686 IPFW_UH_WUNLOCK(ch); 687 free(obj, M_IPFW); 688 return (ENOSPC); 689 } 690 ipfw_objhash_add(ni, &obj->no); 691 SRV_OBJECT(ch, obj->no.kidx) = obj; 692 obj->no.refcnt++; 693 *pkidx = obj->no.kidx; 694 IPFW_UH_WUNLOCK(ch); 695 DYN_DEBUG("\tcreated kidx %d", *pkidx); 696 return (0); 697 } 698 699 static void 700 dyn_destroy(struct ip_fw_chain *ch, struct named_object *no) 701 { 702 struct dyn_state_obj *obj; 703 704 IPFW_UH_WLOCK_ASSERT(ch); 705 706 KASSERT(no->refcnt == 1, 707 ("Destroying object '%s' (type %u, idx %u) with refcnt %u", 708 no->name, no->etlv, no->kidx, no->refcnt)); 709 DYN_DEBUG("kidx %d", no->kidx); 710 obj = SRV_OBJECT(ch, no->kidx); 711 SRV_OBJECT(ch, no->kidx) = NULL; 712 ipfw_objhash_del(CHAIN_TO_SRV(ch), no); 713 ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx); 714 715 free(obj, M_IPFW); 716 } 717 718 static struct opcode_obj_rewrite dyn_opcodes[] = { 719 { 720 O_KEEP_STATE, IPFW_TLV_STATE_NAME, 721 dyn_classify, dyn_update, 722 dyn_findbyname, dyn_findbykidx, 723 dyn_create, dyn_destroy 724 }, 725 { 726 O_CHECK_STATE, IPFW_TLV_STATE_NAME, 727 dyn_classify, dyn_update, 728 dyn_findbyname, dyn_findbykidx, 729 dyn_create, dyn_destroy 730 }, 731 { 732 O_PROBE_STATE, IPFW_TLV_STATE_NAME, 733 dyn_classify, dyn_update, 734 dyn_findbyname, dyn_findbykidx, 735 dyn_create, dyn_destroy 736 }, 737 { 738 O_LIMIT, IPFW_TLV_STATE_NAME, 739 dyn_classify, dyn_update, 740 dyn_findbyname, dyn_findbykidx, 741 dyn_create, dyn_destroy 742 }, 743 }; 744 745 /* 746 * IMPORTANT: the hash function for dynamic rules must be commutative 747 * in source and destination (ip,port), because rules are bidirectional 748 * and we want to find both in the same bucket. 749 */ 750 #ifndef IPFIREWALL_JENKINSHASH 751 static __inline uint32_t 752 hash_packet(const struct ipfw_flow_id *id) 753 { 754 uint32_t i; 755 756 #ifdef INET6 757 if (IS_IP6_FLOW_ID(id)) 758 i = ntohl((id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 759 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 760 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 761 (id->src_ip6.__u6_addr.__u6_addr32[3])); 762 else 763 #endif /* INET6 */ 764 i = (id->dst_ip) ^ (id->src_ip); 765 i ^= (id->dst_port) ^ (id->src_port); 766 return (i); 767 } 768 769 static __inline uint32_t 770 hash_parent(const struct ipfw_flow_id *id, const void *rule) 771 { 772 773 return (hash_packet(id) ^ ((uintptr_t)rule)); 774 } 775 776 #else /* IPFIREWALL_JENKINSHASH */ 777 778 static VNET_DEFINE(uint32_t, dyn_hashseed); 779 #define V_dyn_hashseed VNET(dyn_hashseed) 780 781 static __inline int 782 addrcmp4(const struct ipfw_flow_id *id) 783 { 784 785 if (id->src_ip < id->dst_ip) 786 return (0); 787 if (id->src_ip > id->dst_ip) 788 return (1); 789 if (id->src_port <= id->dst_port) 790 return (0); 791 return (1); 792 } 793 794 #ifdef INET6 795 static __inline int 796 addrcmp6(const struct ipfw_flow_id *id) 797 { 798 int ret; 799 800 ret = memcmp(&id->src_ip6, &id->dst_ip6, sizeof(struct in6_addr)); 801 if (ret < 0) 802 return (0); 803 if (ret > 0) 804 return (1); 805 if (id->src_port <= id->dst_port) 806 return (0); 807 return (1); 808 } 809 810 static __inline uint32_t 811 hash_packet6(const struct ipfw_flow_id *id) 812 { 813 struct tuple6 { 814 struct in6_addr addr[2]; 815 uint16_t port[2]; 816 } t6; 817 818 if (addrcmp6(id) == 0) { 819 t6.addr[0] = id->src_ip6; 820 t6.addr[1] = id->dst_ip6; 821 t6.port[0] = id->src_port; 822 t6.port[1] = id->dst_port; 823 } else { 824 t6.addr[0] = id->dst_ip6; 825 t6.addr[1] = id->src_ip6; 826 t6.port[0] = id->dst_port; 827 t6.port[1] = id->src_port; 828 } 829 return (jenkins_hash32((const uint32_t *)&t6, 830 sizeof(t6) / sizeof(uint32_t), V_dyn_hashseed)); 831 } 832 #endif 833 834 static __inline uint32_t 835 hash_packet(const struct ipfw_flow_id *id) 836 { 837 struct tuple4 { 838 in_addr_t addr[2]; 839 uint16_t port[2]; 840 } t4; 841 842 if (IS_IP4_FLOW_ID(id)) { 843 /* All fields are in host byte order */ 844 if (addrcmp4(id) == 0) { 845 t4.addr[0] = id->src_ip; 846 t4.addr[1] = id->dst_ip; 847 t4.port[0] = id->src_port; 848 t4.port[1] = id->dst_port; 849 } else { 850 t4.addr[0] = id->dst_ip; 851 t4.addr[1] = id->src_ip; 852 t4.port[0] = id->dst_port; 853 t4.port[1] = id->src_port; 854 } 855 return (jenkins_hash32((const uint32_t *)&t4, 856 sizeof(t4) / sizeof(uint32_t), V_dyn_hashseed)); 857 } else 858 #ifdef INET6 859 if (IS_IP6_FLOW_ID(id)) 860 return (hash_packet6(id)); 861 #endif 862 return (0); 863 } 864 865 static __inline uint32_t 866 hash_parent(const struct ipfw_flow_id *id, const void *rule) 867 { 868 869 return (jenkins_hash32((const uint32_t *)&rule, 870 sizeof(rule) / sizeof(uint32_t), hash_packet(id))); 871 } 872 #endif /* IPFIREWALL_JENKINSHASH */ 873 874 /* 875 * Print customizable flow id description via log(9) facility. 876 */ 877 static void 878 print_dyn_rule_flags(const struct ipfw_flow_id *id, int dyn_type, 879 int log_flags, char *prefix, char *postfix) 880 { 881 struct in_addr da; 882 #ifdef INET6 883 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 884 #else 885 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 886 #endif 887 888 #ifdef INET6 889 if (IS_IP6_FLOW_ID(id)) { 890 ip6_sprintf(src, &id->src_ip6); 891 ip6_sprintf(dst, &id->dst_ip6); 892 } else 893 #endif 894 { 895 da.s_addr = htonl(id->src_ip); 896 inet_ntop(AF_INET, &da, src, sizeof(src)); 897 da.s_addr = htonl(id->dst_ip); 898 inet_ntop(AF_INET, &da, dst, sizeof(dst)); 899 } 900 log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n", 901 prefix, dyn_type, src, id->src_port, dst, 902 id->dst_port, V_dyn_count, postfix); 903 } 904 905 #define print_dyn_rule(id, dtype, prefix, postfix) \ 906 print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix) 907 908 #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 909 #define TIME_LE(a,b) ((int)((a)-(b)) < 0) 910 #define _SEQ_GE(a,b) ((int)((a)-(b)) >= 0) 911 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 912 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 913 #define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8)) 914 #define ACK_FWD 0x00010000 /* fwd ack seen */ 915 #define ACK_REV 0x00020000 /* rev ack seen */ 916 #define ACK_BOTH (ACK_FWD | ACK_REV) 917 918 static uint32_t 919 dyn_update_tcp_state(struct dyn_data *data, const struct ipfw_flow_id *pkt, 920 const struct tcphdr *tcp, int dir) 921 { 922 uint32_t ack, expire; 923 uint32_t state, old; 924 uint8_t th_flags; 925 926 expire = data->expire; 927 old = state = data->state; 928 th_flags = pkt->_flags & (TH_FIN | TH_SYN | TH_RST); 929 state |= (dir == MATCH_FORWARD) ? th_flags: (th_flags << 8); 930 switch (state & TCP_FLAGS) { 931 case TH_SYN: /* opening */ 932 expire = time_uptime + V_dyn_syn_lifetime; 933 break; 934 935 case BOTH_SYN: /* move to established */ 936 case BOTH_SYN | TH_FIN: /* one side tries to close */ 937 case BOTH_SYN | (TH_FIN << 8): 938 if (tcp == NULL) 939 break; 940 ack = ntohl(tcp->th_ack); 941 if (dir == MATCH_FORWARD) { 942 if (data->ack_fwd == 0 || 943 _SEQ_GE(ack, data->ack_fwd)) { 944 state |= ACK_FWD; 945 if (data->ack_fwd != ack) 946 ck_pr_store_32(&data->ack_fwd, ack); 947 } 948 } else { 949 if (data->ack_rev == 0 || 950 _SEQ_GE(ack, data->ack_rev)) { 951 state |= ACK_REV; 952 if (data->ack_rev != ack) 953 ck_pr_store_32(&data->ack_rev, ack); 954 } 955 } 956 if ((state & ACK_BOTH) == ACK_BOTH) { 957 /* 958 * Set expire time to V_dyn_ack_lifetime only if 959 * we got ACKs for both directions. 960 * We use XOR here to avoid possible state 961 * overwriting in concurrent thread. 962 */ 963 expire = time_uptime + V_dyn_ack_lifetime; 964 ck_pr_xor_32(&data->state, ACK_BOTH); 965 } else if ((data->state & ACK_BOTH) != (state & ACK_BOTH)) 966 ck_pr_or_32(&data->state, state & ACK_BOTH); 967 break; 968 969 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 970 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 971 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 972 expire = time_uptime + V_dyn_fin_lifetime; 973 break; 974 975 default: 976 if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) 977 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 978 expire = time_uptime + V_dyn_rst_lifetime; 979 } 980 /* Save TCP state if it was changed */ 981 if ((state & TCP_FLAGS) != (old & TCP_FLAGS)) 982 ck_pr_or_32(&data->state, state & TCP_FLAGS); 983 return (expire); 984 } 985 986 /* 987 * Update ULP specific state. 988 * For TCP we keep sequence numbers and flags. For other protocols 989 * currently we update only expire time. Packets and bytes counters 990 * are also updated here. 991 */ 992 static void 993 dyn_update_proto_state(struct dyn_data *data, const struct ipfw_flow_id *pkt, 994 const void *ulp, int pktlen, int dir) 995 { 996 uint32_t expire; 997 998 /* NOTE: we are in critical section here. */ 999 switch (pkt->proto) { 1000 case IPPROTO_UDP: 1001 case IPPROTO_UDPLITE: 1002 expire = time_uptime + V_dyn_udp_lifetime; 1003 break; 1004 case IPPROTO_TCP: 1005 expire = dyn_update_tcp_state(data, pkt, ulp, dir); 1006 break; 1007 default: 1008 expire = time_uptime + V_dyn_short_lifetime; 1009 } 1010 /* 1011 * Expiration timer has the per-second granularity, no need to update 1012 * it every time when state is matched. 1013 */ 1014 if (data->expire != expire) 1015 ck_pr_store_32(&data->expire, expire); 1016 1017 if (dir == MATCH_FORWARD) 1018 DYN_COUNTER_INC(data, fwd, pktlen); 1019 else 1020 DYN_COUNTER_INC(data, rev, pktlen); 1021 } 1022 1023 /* 1024 * Lookup IPv4 state. 1025 * Must be called in critical section. 1026 */ 1027 struct dyn_ipv4_state * 1028 dyn_lookup_ipv4_state(const struct ipfw_flow_id *pkt, const void *ulp, 1029 struct ipfw_dyn_info *info, int pktlen) 1030 { 1031 struct dyn_ipv4_state *s; 1032 uint32_t version, bucket; 1033 1034 bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets); 1035 info->version = DYN_BUCKET_VERSION(bucket, ipv4_add); 1036 restart: 1037 version = DYN_BUCKET_VERSION(bucket, ipv4_del); 1038 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 1039 DYNSTATE_PROTECT(s); 1040 if (version != DYN_BUCKET_VERSION(bucket, ipv4_del)) 1041 goto restart; 1042 if (s->proto != pkt->proto) 1043 continue; 1044 if (info->kidx != 0 && s->kidx != info->kidx) 1045 continue; 1046 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1047 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1048 info->direction = MATCH_FORWARD; 1049 break; 1050 } 1051 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1052 s->src == pkt->dst_ip && s->dst == pkt->src_ip) { 1053 info->direction = MATCH_REVERSE; 1054 break; 1055 } 1056 } 1057 1058 if (s != NULL) 1059 dyn_update_proto_state(s->data, pkt, ulp, pktlen, 1060 info->direction); 1061 return (s); 1062 } 1063 1064 /* 1065 * Lookup IPv4 state. 1066 * Simplifed version is used to check that matching state doesn't exist. 1067 */ 1068 static int 1069 dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *pkt, 1070 const void *ulp, int pktlen, const void *parent, uint32_t ruleid, 1071 uint16_t rulenum, uint32_t bucket, uint16_t kidx) 1072 { 1073 struct dyn_ipv4_state *s; 1074 int dir; 1075 1076 dir = MATCH_NONE; 1077 DYN_BUCKET_ASSERT(bucket); 1078 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 1079 if (s->proto != pkt->proto || 1080 s->kidx != kidx) 1081 continue; 1082 /* 1083 * XXXAE: Install synchronized state only when there are 1084 * no matching states. 1085 */ 1086 if (pktlen != 0 && ( 1087 s->data->parent != parent || 1088 s->data->ruleid != ruleid || 1089 s->data->rulenum != rulenum)) 1090 continue; 1091 if (s->sport == pkt->src_port && 1092 s->dport == pkt->dst_port && 1093 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1094 dir = MATCH_FORWARD; 1095 break; 1096 } 1097 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1098 s->src == pkt->dst_ip && s->dst == pkt->src_ip) { 1099 dir = MATCH_REVERSE; 1100 break; 1101 } 1102 } 1103 if (s != NULL) 1104 dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir); 1105 return (s != NULL); 1106 } 1107 1108 struct dyn_ipv4_state * 1109 dyn_lookup_ipv4_parent(const struct ipfw_flow_id *pkt, const void *rule, 1110 uint32_t ruleid, uint16_t rulenum, uint32_t hashval) 1111 { 1112 struct dyn_ipv4_state *s; 1113 uint32_t version, bucket; 1114 1115 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1116 restart: 1117 version = DYN_BUCKET_VERSION(bucket, ipv4_parent_del); 1118 CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) { 1119 DYNSTATE_PROTECT(s); 1120 if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_del)) 1121 goto restart; 1122 /* 1123 * NOTE: we do not need to check kidx, because parent rule 1124 * can not create states with different kidx. 1125 * And parent rule always created for forward direction. 1126 */ 1127 if (s->limit->parent == rule && 1128 s->limit->ruleid == ruleid && 1129 s->limit->rulenum == rulenum && 1130 s->proto == pkt->proto && 1131 s->sport == pkt->src_port && 1132 s->dport == pkt->dst_port && 1133 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1134 if (s->limit->expire != time_uptime + 1135 V_dyn_short_lifetime) 1136 ck_pr_store_32(&s->limit->expire, 1137 time_uptime + V_dyn_short_lifetime); 1138 break; 1139 } 1140 } 1141 return (s); 1142 } 1143 1144 static struct dyn_ipv4_state * 1145 dyn_lookup_ipv4_parent_locked(const struct ipfw_flow_id *pkt, 1146 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket) 1147 { 1148 struct dyn_ipv4_state *s; 1149 1150 DYN_BUCKET_ASSERT(bucket); 1151 CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) { 1152 if (s->limit->parent == rule && 1153 s->limit->ruleid == ruleid && 1154 s->limit->rulenum == rulenum && 1155 s->proto == pkt->proto && 1156 s->sport == pkt->src_port && 1157 s->dport == pkt->dst_port && 1158 s->src == pkt->src_ip && s->dst == pkt->dst_ip) 1159 break; 1160 } 1161 return (s); 1162 } 1163 1164 1165 #ifdef INET6 1166 static uint32_t 1167 dyn_getscopeid(const struct ip_fw_args *args) 1168 { 1169 1170 /* 1171 * If source or destination address is an scopeid address, we need 1172 * determine the scope zone id to resolve address scope ambiguity. 1173 */ 1174 if (IN6_IS_ADDR_LINKLOCAL(&args->f_id.src_ip6) || 1175 IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) { 1176 MPASS(args->oif != NULL || 1177 args->m->m_pkthdr.rcvif != NULL); 1178 return (in6_getscopezone(args->oif != NULL ? args->oif: 1179 args->m->m_pkthdr.rcvif, IPV6_ADDR_SCOPE_LINKLOCAL)); 1180 } 1181 return (0); 1182 } 1183 1184 /* 1185 * Lookup IPv6 state. 1186 * Must be called in critical section. 1187 */ 1188 static struct dyn_ipv6_state * 1189 dyn_lookup_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1190 const void *ulp, struct ipfw_dyn_info *info, int pktlen) 1191 { 1192 struct dyn_ipv6_state *s; 1193 uint32_t version, bucket; 1194 1195 bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets); 1196 info->version = DYN_BUCKET_VERSION(bucket, ipv6_add); 1197 restart: 1198 version = DYN_BUCKET_VERSION(bucket, ipv6_del); 1199 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 1200 DYNSTATE_PROTECT(s); 1201 if (version != DYN_BUCKET_VERSION(bucket, ipv6_del)) 1202 goto restart; 1203 if (s->proto != pkt->proto || s->zoneid != zoneid) 1204 continue; 1205 if (info->kidx != 0 && s->kidx != info->kidx) 1206 continue; 1207 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1208 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1209 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1210 info->direction = MATCH_FORWARD; 1211 break; 1212 } 1213 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1214 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) && 1215 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) { 1216 info->direction = MATCH_REVERSE; 1217 break; 1218 } 1219 } 1220 if (s != NULL) 1221 dyn_update_proto_state(s->data, pkt, ulp, pktlen, 1222 info->direction); 1223 return (s); 1224 } 1225 1226 /* 1227 * Lookup IPv6 state. 1228 * Simplifed version is used to check that matching state doesn't exist. 1229 */ 1230 static int 1231 dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1232 const void *ulp, int pktlen, const void *parent, uint32_t ruleid, 1233 uint16_t rulenum, uint32_t bucket, uint16_t kidx) 1234 { 1235 struct dyn_ipv6_state *s; 1236 int dir; 1237 1238 dir = MATCH_NONE; 1239 DYN_BUCKET_ASSERT(bucket); 1240 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 1241 if (s->proto != pkt->proto || s->kidx != kidx || 1242 s->zoneid != zoneid) 1243 continue; 1244 /* 1245 * XXXAE: Install synchronized state only when there are 1246 * no matching states. 1247 */ 1248 if (pktlen != 0 && ( 1249 s->data->parent != parent || 1250 s->data->ruleid != ruleid || 1251 s->data->rulenum != rulenum)) 1252 continue; 1253 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1254 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1255 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1256 dir = MATCH_FORWARD; 1257 break; 1258 } 1259 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1260 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) && 1261 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) { 1262 dir = MATCH_REVERSE; 1263 break; 1264 } 1265 } 1266 if (s != NULL) 1267 dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir); 1268 return (s != NULL); 1269 } 1270 1271 static struct dyn_ipv6_state * 1272 dyn_lookup_ipv6_parent(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1273 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t hashval) 1274 { 1275 struct dyn_ipv6_state *s; 1276 uint32_t version, bucket; 1277 1278 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1279 restart: 1280 version = DYN_BUCKET_VERSION(bucket, ipv6_parent_del); 1281 CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) { 1282 DYNSTATE_PROTECT(s); 1283 if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_del)) 1284 goto restart; 1285 /* 1286 * NOTE: we do not need to check kidx, because parent rule 1287 * can not create states with different kidx. 1288 * Also parent rule always created for forward direction. 1289 */ 1290 if (s->limit->parent == rule && 1291 s->limit->ruleid == ruleid && 1292 s->limit->rulenum == rulenum && 1293 s->proto == pkt->proto && 1294 s->sport == pkt->src_port && 1295 s->dport == pkt->dst_port && s->zoneid == zoneid && 1296 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1297 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1298 if (s->limit->expire != time_uptime + 1299 V_dyn_short_lifetime) 1300 ck_pr_store_32(&s->limit->expire, 1301 time_uptime + V_dyn_short_lifetime); 1302 break; 1303 } 1304 } 1305 return (s); 1306 } 1307 1308 static struct dyn_ipv6_state * 1309 dyn_lookup_ipv6_parent_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1310 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket) 1311 { 1312 struct dyn_ipv6_state *s; 1313 1314 DYN_BUCKET_ASSERT(bucket); 1315 CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) { 1316 if (s->limit->parent == rule && 1317 s->limit->ruleid == ruleid && 1318 s->limit->rulenum == rulenum && 1319 s->proto == pkt->proto && 1320 s->sport == pkt->src_port && 1321 s->dport == pkt->dst_port && s->zoneid == zoneid && 1322 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1323 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) 1324 break; 1325 } 1326 return (s); 1327 } 1328 1329 #endif /* INET6 */ 1330 1331 /* 1332 * Lookup dynamic state. 1333 * pkt - filled by ipfw_chk() ipfw_flow_id; 1334 * ulp - determined by ipfw_chk() upper level protocol header; 1335 * dyn_info - info about matched state to return back; 1336 * Returns pointer to state's parent rule and dyn_info. If there is 1337 * no state, NULL is returned. 1338 * On match ipfw_dyn_lookup() updates state's counters. 1339 */ 1340 struct ip_fw * 1341 ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp, 1342 int pktlen, const ipfw_insn *cmd, struct ipfw_dyn_info *info) 1343 { 1344 struct dyn_data *data; 1345 struct ip_fw *rule; 1346 1347 IPFW_RLOCK_ASSERT(&V_layer3_chain); 1348 1349 data = NULL; 1350 rule = NULL; 1351 info->kidx = cmd->arg1; 1352 info->direction = MATCH_NONE; 1353 info->hashval = hash_packet(&args->f_id); 1354 1355 DYNSTATE_CRITICAL_ENTER(); 1356 if (IS_IP4_FLOW_ID(&args->f_id)) { 1357 struct dyn_ipv4_state *s; 1358 1359 s = dyn_lookup_ipv4_state(&args->f_id, ulp, info, pktlen); 1360 if (s != NULL) { 1361 /* 1362 * Dynamic states are created using the same 5-tuple, 1363 * so it is assumed, that parent rule for O_LIMIT 1364 * state has the same address family. 1365 */ 1366 data = s->data; 1367 if (s->type == O_LIMIT) { 1368 s = data->parent; 1369 rule = s->limit->parent; 1370 } else 1371 rule = data->parent; 1372 } 1373 } 1374 #ifdef INET6 1375 else if (IS_IP6_FLOW_ID(&args->f_id)) { 1376 struct dyn_ipv6_state *s; 1377 1378 s = dyn_lookup_ipv6_state(&args->f_id, dyn_getscopeid(args), 1379 ulp, info, pktlen); 1380 if (s != NULL) { 1381 data = s->data; 1382 if (s->type == O_LIMIT) { 1383 s = data->parent; 1384 rule = s->limit->parent; 1385 } else 1386 rule = data->parent; 1387 } 1388 } 1389 #endif 1390 if (data != NULL) { 1391 /* 1392 * If cached chain id is the same, we can avoid rule index 1393 * lookup. Otherwise do lookup and update chain_id and f_pos. 1394 * It is safe even if there is concurrent thread that want 1395 * update the same state, because chain->id can be changed 1396 * only under IPFW_WLOCK(). 1397 */ 1398 if (data->chain_id != V_layer3_chain.id) { 1399 data->f_pos = ipfw_find_rule(&V_layer3_chain, 1400 data->rulenum, data->ruleid); 1401 /* 1402 * Check that found state has not orphaned. 1403 * When chain->id being changed the parent 1404 * rule can be deleted. If found rule doesn't 1405 * match the parent pointer, consider this 1406 * result as MATCH_NONE and return NULL. 1407 * 1408 * This will lead to creation of new similar state 1409 * that will be added into head of this bucket. 1410 * And the state that we currently have matched 1411 * should be deleted by dyn_expire_states(). 1412 */ 1413 if (V_layer3_chain.map[data->f_pos] == rule) 1414 data->chain_id = V_layer3_chain.id; 1415 else { 1416 rule = NULL; 1417 info->direction = MATCH_NONE; 1418 DYN_DEBUG("rule %p [%u, %u] is considered " 1419 "invalid in data %p", rule, data->ruleid, 1420 data->rulenum, data); 1421 } 1422 } 1423 info->f_pos = data->f_pos; 1424 } 1425 DYNSTATE_CRITICAL_EXIT(); 1426 #if 0 1427 /* 1428 * Return MATCH_NONE if parent rule is in disabled set. 1429 * This will lead to creation of new similar state that 1430 * will be added into head of this bucket. 1431 * 1432 * XXXAE: we need to be able update state's set when parent 1433 * rule set is changed. 1434 */ 1435 if (rule != NULL && (V_set_disable & (1 << rule->set))) { 1436 rule = NULL; 1437 info->direction = MATCH_NONE; 1438 } 1439 #endif 1440 return (rule); 1441 } 1442 1443 static struct dyn_parent * 1444 dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum, 1445 uint8_t set, uint32_t hashval) 1446 { 1447 struct dyn_parent *limit; 1448 1449 limit = uma_zalloc(V_dyn_parent_zone, M_NOWAIT | M_ZERO); 1450 if (limit == NULL) { 1451 if (last_log != time_uptime) { 1452 last_log = time_uptime; 1453 log(LOG_DEBUG, 1454 "ipfw: Cannot allocate parent dynamic state, " 1455 "consider increasing " 1456 "net.inet.ip.fw.dyn_parent_max\n"); 1457 } 1458 return (NULL); 1459 } 1460 1461 limit->parent = parent; 1462 limit->ruleid = ruleid; 1463 limit->rulenum = rulenum; 1464 limit->set = set; 1465 limit->hashval = hashval; 1466 limit->expire = time_uptime + V_dyn_short_lifetime; 1467 return (limit); 1468 } 1469 1470 static struct dyn_data * 1471 dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum, 1472 uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, 1473 uint32_t hashval, uint16_t fibnum) 1474 { 1475 struct dyn_data *data; 1476 1477 data = uma_zalloc(V_dyn_data_zone, M_NOWAIT | M_ZERO); 1478 if (data == NULL) { 1479 if (last_log != time_uptime) { 1480 last_log = time_uptime; 1481 log(LOG_DEBUG, 1482 "ipfw: Cannot allocate dynamic state, " 1483 "consider increasing net.inet.ip.fw.dyn_max\n"); 1484 } 1485 return (NULL); 1486 } 1487 1488 data->parent = parent; 1489 data->ruleid = ruleid; 1490 data->rulenum = rulenum; 1491 data->set = set; 1492 data->fibnum = fibnum; 1493 data->hashval = hashval; 1494 data->expire = time_uptime + V_dyn_syn_lifetime; 1495 dyn_update_proto_state(data, pkt, ulp, pktlen, MATCH_FORWARD); 1496 return (data); 1497 } 1498 1499 static struct dyn_ipv4_state * 1500 dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx, 1501 uint8_t type) 1502 { 1503 struct dyn_ipv4_state *s; 1504 1505 s = uma_zalloc(V_dyn_ipv4_zone, M_NOWAIT | M_ZERO); 1506 if (s == NULL) 1507 return (NULL); 1508 1509 s->type = type; 1510 s->kidx = kidx; 1511 s->proto = pkt->proto; 1512 s->sport = pkt->src_port; 1513 s->dport = pkt->dst_port; 1514 s->src = pkt->src_ip; 1515 s->dst = pkt->dst_ip; 1516 return (s); 1517 } 1518 1519 /* 1520 * Add IPv4 parent state. 1521 * Returns pointer to parent state. When it is not NULL we are in 1522 * critical section and pointer protected by hazard pointer. 1523 * When some error occurs, it returns NULL and exit from critical section 1524 * is not needed. 1525 */ 1526 static struct dyn_ipv4_state * 1527 dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum, 1528 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t hashval, 1529 uint32_t version, uint16_t kidx) 1530 { 1531 struct dyn_ipv4_state *s; 1532 struct dyn_parent *limit; 1533 uint32_t bucket; 1534 1535 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1536 DYN_BUCKET_LOCK(bucket); 1537 if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_add)) { 1538 /* 1539 * Bucket version has been changed since last lookup, 1540 * do lookup again to be sure that state does not exist. 1541 */ 1542 s = dyn_lookup_ipv4_parent_locked(pkt, rule, ruleid, 1543 rulenum, bucket); 1544 if (s != NULL) { 1545 /* 1546 * Simultaneous thread has already created this 1547 * state. Just return it. 1548 */ 1549 DYNSTATE_CRITICAL_ENTER(); 1550 DYNSTATE_PROTECT(s); 1551 DYN_BUCKET_UNLOCK(bucket); 1552 return (s); 1553 } 1554 } 1555 1556 limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); 1557 if (limit == NULL) { 1558 DYN_BUCKET_UNLOCK(bucket); 1559 return (NULL); 1560 } 1561 1562 s = dyn_alloc_ipv4_state(pkt, kidx, O_LIMIT_PARENT); 1563 if (s == NULL) { 1564 DYN_BUCKET_UNLOCK(bucket); 1565 uma_zfree(V_dyn_parent_zone, limit); 1566 return (NULL); 1567 } 1568 1569 s->limit = limit; 1570 CK_SLIST_INSERT_HEAD(&V_dyn_ipv4_parent[bucket], s, entry); 1571 DYN_COUNT_INC(dyn_parent_count); 1572 DYN_BUCKET_VERSION_BUMP(bucket, ipv4_parent_add); 1573 DYNSTATE_CRITICAL_ENTER(); 1574 DYNSTATE_PROTECT(s); 1575 DYN_BUCKET_UNLOCK(bucket); 1576 return (s); 1577 } 1578 1579 static int 1580 dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum, 1581 uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, 1582 uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum, 1583 uint16_t kidx, uint8_t type) 1584 { 1585 struct dyn_ipv4_state *s; 1586 void *data; 1587 uint32_t bucket; 1588 1589 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1590 DYN_BUCKET_LOCK(bucket); 1591 if (info->direction == MATCH_UNKNOWN || 1592 info->kidx != kidx || 1593 info->hashval != hashval || 1594 info->version != DYN_BUCKET_VERSION(bucket, ipv4_add)) { 1595 /* 1596 * Bucket version has been changed since last lookup, 1597 * do lookup again to be sure that state does not exist. 1598 */ 1599 if (dyn_lookup_ipv4_state_locked(pkt, ulp, pktlen, parent, 1600 ruleid, rulenum, bucket, kidx) != 0) { 1601 DYN_BUCKET_UNLOCK(bucket); 1602 return (EEXIST); 1603 } 1604 } 1605 1606 data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, 1607 pktlen, hashval, fibnum); 1608 if (data == NULL) { 1609 DYN_BUCKET_UNLOCK(bucket); 1610 return (ENOMEM); 1611 } 1612 1613 s = dyn_alloc_ipv4_state(pkt, kidx, type); 1614 if (s == NULL) { 1615 DYN_BUCKET_UNLOCK(bucket); 1616 uma_zfree(V_dyn_data_zone, data); 1617 return (ENOMEM); 1618 } 1619 1620 s->data = data; 1621 CK_SLIST_INSERT_HEAD(&V_dyn_ipv4[bucket], s, entry); 1622 DYN_COUNT_INC(dyn_count); 1623 DYN_BUCKET_VERSION_BUMP(bucket, ipv4_add); 1624 DYN_BUCKET_UNLOCK(bucket); 1625 return (0); 1626 } 1627 1628 #ifdef INET6 1629 static struct dyn_ipv6_state * 1630 dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1631 uint16_t kidx, uint8_t type) 1632 { 1633 struct dyn_ipv6_state *s; 1634 1635 s = uma_zalloc(V_dyn_ipv6_zone, M_NOWAIT | M_ZERO); 1636 if (s == NULL) 1637 return (NULL); 1638 1639 s->type = type; 1640 s->kidx = kidx; 1641 s->zoneid = zoneid; 1642 s->proto = pkt->proto; 1643 s->sport = pkt->src_port; 1644 s->dport = pkt->dst_port; 1645 s->src = pkt->src_ip6; 1646 s->dst = pkt->dst_ip6; 1647 return (s); 1648 } 1649 1650 /* 1651 * Add IPv6 parent state. 1652 * Returns pointer to parent state. When it is not NULL we are in 1653 * critical section and pointer protected by hazard pointer. 1654 * When some error occurs, it return NULL and exit from critical section 1655 * is not needed. 1656 */ 1657 static struct dyn_ipv6_state * 1658 dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum, 1659 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, 1660 uint32_t hashval, uint32_t version, uint16_t kidx) 1661 { 1662 struct dyn_ipv6_state *s; 1663 struct dyn_parent *limit; 1664 uint32_t bucket; 1665 1666 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1667 DYN_BUCKET_LOCK(bucket); 1668 if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_add)) { 1669 /* 1670 * Bucket version has been changed since last lookup, 1671 * do lookup again to be sure that state does not exist. 1672 */ 1673 s = dyn_lookup_ipv6_parent_locked(pkt, zoneid, rule, ruleid, 1674 rulenum, bucket); 1675 if (s != NULL) { 1676 /* 1677 * Simultaneous thread has already created this 1678 * state. Just return it. 1679 */ 1680 DYNSTATE_CRITICAL_ENTER(); 1681 DYNSTATE_PROTECT(s); 1682 DYN_BUCKET_UNLOCK(bucket); 1683 return (s); 1684 } 1685 } 1686 1687 limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); 1688 if (limit == NULL) { 1689 DYN_BUCKET_UNLOCK(bucket); 1690 return (NULL); 1691 } 1692 1693 s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, O_LIMIT_PARENT); 1694 if (s == NULL) { 1695 DYN_BUCKET_UNLOCK(bucket); 1696 uma_zfree(V_dyn_parent_zone, limit); 1697 return (NULL); 1698 } 1699 1700 s->limit = limit; 1701 CK_SLIST_INSERT_HEAD(&V_dyn_ipv6_parent[bucket], s, entry); 1702 DYN_COUNT_INC(dyn_parent_count); 1703 DYN_BUCKET_VERSION_BUMP(bucket, ipv6_parent_add); 1704 DYNSTATE_CRITICAL_ENTER(); 1705 DYNSTATE_PROTECT(s); 1706 DYN_BUCKET_UNLOCK(bucket); 1707 return (s); 1708 } 1709 1710 static int 1711 dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum, 1712 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, 1713 const void *ulp, int pktlen, uint32_t hashval, struct ipfw_dyn_info *info, 1714 uint16_t fibnum, uint16_t kidx, uint8_t type) 1715 { 1716 struct dyn_ipv6_state *s; 1717 struct dyn_data *data; 1718 uint32_t bucket; 1719 1720 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1721 DYN_BUCKET_LOCK(bucket); 1722 if (info->direction == MATCH_UNKNOWN || 1723 info->kidx != kidx || 1724 info->hashval != hashval || 1725 info->version != DYN_BUCKET_VERSION(bucket, ipv6_add)) { 1726 /* 1727 * Bucket version has been changed since last lookup, 1728 * do lookup again to be sure that state does not exist. 1729 */ 1730 if (dyn_lookup_ipv6_state_locked(pkt, zoneid, ulp, pktlen, 1731 parent, ruleid, rulenum, bucket, kidx) != 0) { 1732 DYN_BUCKET_UNLOCK(bucket); 1733 return (EEXIST); 1734 } 1735 } 1736 1737 data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, 1738 pktlen, hashval, fibnum); 1739 if (data == NULL) { 1740 DYN_BUCKET_UNLOCK(bucket); 1741 return (ENOMEM); 1742 } 1743 1744 s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, type); 1745 if (s == NULL) { 1746 DYN_BUCKET_UNLOCK(bucket); 1747 uma_zfree(V_dyn_data_zone, data); 1748 return (ENOMEM); 1749 } 1750 1751 s->data = data; 1752 CK_SLIST_INSERT_HEAD(&V_dyn_ipv6[bucket], s, entry); 1753 DYN_COUNT_INC(dyn_count); 1754 DYN_BUCKET_VERSION_BUMP(bucket, ipv6_add); 1755 DYN_BUCKET_UNLOCK(bucket); 1756 return (0); 1757 } 1758 #endif /* INET6 */ 1759 1760 static void * 1761 dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1762 struct ip_fw *rule, uint32_t hashval, uint32_t limit, uint16_t kidx) 1763 { 1764 char sbuf[24]; 1765 struct dyn_parent *p; 1766 void *ret; 1767 uint32_t bucket, version; 1768 1769 p = NULL; 1770 ret = NULL; 1771 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1772 DYNSTATE_CRITICAL_ENTER(); 1773 if (IS_IP4_FLOW_ID(pkt)) { 1774 struct dyn_ipv4_state *s; 1775 1776 version = DYN_BUCKET_VERSION(bucket, ipv4_parent_add); 1777 s = dyn_lookup_ipv4_parent(pkt, rule, rule->id, 1778 rule->rulenum, bucket); 1779 if (s == NULL) { 1780 /* 1781 * Exit from critical section because dyn_add_parent() 1782 * will acquire bucket lock. 1783 */ 1784 DYNSTATE_CRITICAL_EXIT(); 1785 1786 s = dyn_add_ipv4_parent(rule, rule->id, 1787 rule->rulenum, rule->set, pkt, hashval, 1788 version, kidx); 1789 if (s == NULL) 1790 return (NULL); 1791 /* Now we are in critical section again. */ 1792 } 1793 ret = s; 1794 p = s->limit; 1795 } 1796 #ifdef INET6 1797 else if (IS_IP6_FLOW_ID(pkt)) { 1798 struct dyn_ipv6_state *s; 1799 1800 version = DYN_BUCKET_VERSION(bucket, ipv6_parent_add); 1801 s = dyn_lookup_ipv6_parent(pkt, zoneid, rule, rule->id, 1802 rule->rulenum, bucket); 1803 if (s == NULL) { 1804 /* 1805 * Exit from critical section because dyn_add_parent() 1806 * can acquire bucket mutex. 1807 */ 1808 DYNSTATE_CRITICAL_EXIT(); 1809 1810 s = dyn_add_ipv6_parent(rule, rule->id, 1811 rule->rulenum, rule->set, pkt, zoneid, hashval, 1812 version, kidx); 1813 if (s == NULL) 1814 return (NULL); 1815 /* Now we are in critical section again. */ 1816 } 1817 ret = s; 1818 p = s->limit; 1819 } 1820 #endif 1821 else { 1822 DYNSTATE_CRITICAL_EXIT(); 1823 return (NULL); 1824 } 1825 1826 /* Check the limit */ 1827 if (DPARENT_COUNT(p) >= limit) { 1828 DYNSTATE_CRITICAL_EXIT(); 1829 if (V_fw_verbose && last_log != time_uptime) { 1830 last_log = time_uptime; 1831 snprintf(sbuf, sizeof(sbuf), "%u drop session", 1832 rule->rulenum); 1833 print_dyn_rule_flags(pkt, O_LIMIT, 1834 LOG_SECURITY | LOG_DEBUG, sbuf, 1835 "too many entries"); 1836 } 1837 return (NULL); 1838 } 1839 1840 /* Take new session into account. */ 1841 DPARENT_COUNT_INC(p); 1842 /* 1843 * We must exit from critical section because the following code 1844 * can acquire bucket mutex. 1845 * We rely on the the 'count' field. The state will not expire 1846 * until it has some child states, i.e. 'count' field is not zero. 1847 * Return state pointer, it will be used by child states as parent. 1848 */ 1849 DYNSTATE_CRITICAL_EXIT(); 1850 return (ret); 1851 } 1852 1853 static int 1854 dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1855 uint16_t fibnum, const void *ulp, int pktlen, void *rule, 1856 uint32_t ruleid, uint16_t rulenum, uint8_t set, 1857 struct ipfw_dyn_info *info, uint32_t limit, uint16_t limit_mask, 1858 uint16_t kidx, uint8_t type) 1859 { 1860 struct ipfw_flow_id id; 1861 uint32_t hashval, parent_hashval; 1862 int ret; 1863 1864 MPASS(type == O_LIMIT || type == O_KEEP_STATE); 1865 1866 if (type == O_LIMIT) { 1867 /* Create masked flow id and calculate bucket */ 1868 id.addr_type = pkt->addr_type; 1869 id.proto = pkt->proto; 1870 id.fib = fibnum; /* unused */ 1871 id.src_port = (limit_mask & DYN_SRC_PORT) ? 1872 pkt->src_port: 0; 1873 id.dst_port = (limit_mask & DYN_DST_PORT) ? 1874 pkt->dst_port: 0; 1875 if (IS_IP4_FLOW_ID(pkt)) { 1876 id.src_ip = (limit_mask & DYN_SRC_ADDR) ? 1877 pkt->src_ip: 0; 1878 id.dst_ip = (limit_mask & DYN_DST_ADDR) ? 1879 pkt->dst_ip: 0; 1880 } 1881 #ifdef INET6 1882 else if (IS_IP6_FLOW_ID(pkt)) { 1883 if (limit_mask & DYN_SRC_ADDR) 1884 id.src_ip6 = pkt->src_ip6; 1885 else 1886 memset(&id.src_ip6, 0, sizeof(id.src_ip6)); 1887 if (limit_mask & DYN_DST_ADDR) 1888 id.dst_ip6 = pkt->dst_ip6; 1889 else 1890 memset(&id.dst_ip6, 0, sizeof(id.dst_ip6)); 1891 } 1892 #endif 1893 else 1894 return (EAFNOSUPPORT); 1895 1896 parent_hashval = hash_parent(&id, rule); 1897 rule = dyn_get_parent_state(&id, zoneid, rule, parent_hashval, 1898 limit, kidx); 1899 if (rule == NULL) { 1900 #if 0 1901 if (V_fw_verbose && last_log != time_uptime) { 1902 last_log = time_uptime; 1903 snprintf(sbuf, sizeof(sbuf), 1904 "%u drop session", rule->rulenum); 1905 print_dyn_rule_flags(pkt, O_LIMIT, 1906 LOG_SECURITY | LOG_DEBUG, sbuf, 1907 "too many entries"); 1908 } 1909 #endif 1910 return (EACCES); 1911 } 1912 /* 1913 * Limit is not reached, create new state. 1914 * Now rule points to parent state. 1915 */ 1916 } 1917 1918 hashval = hash_packet(pkt); 1919 if (IS_IP4_FLOW_ID(pkt)) 1920 ret = dyn_add_ipv4_state(rule, ruleid, rulenum, set, pkt, 1921 ulp, pktlen, hashval, info, fibnum, kidx, type); 1922 #ifdef INET6 1923 else if (IS_IP6_FLOW_ID(pkt)) 1924 ret = dyn_add_ipv6_state(rule, ruleid, rulenum, set, pkt, 1925 zoneid, ulp, pktlen, hashval, info, fibnum, kidx, type); 1926 #endif /* INET6 */ 1927 else 1928 ret = EAFNOSUPPORT; 1929 1930 if (type == O_LIMIT) { 1931 if (ret != 0) { 1932 /* 1933 * We failed to create child state for O_LIMIT 1934 * opcode. Since we already counted it in the parent, 1935 * we must revert counter back. The 'rule' points to 1936 * parent state, use it to get dyn_parent. 1937 * 1938 * XXXAE: it should be safe to use 'rule' pointer 1939 * without extra lookup, parent state is referenced 1940 * and should not be freed. 1941 */ 1942 if (IS_IP4_FLOW_ID(&id)) 1943 DPARENT_COUNT_DEC( 1944 ((struct dyn_ipv4_state *)rule)->limit); 1945 #ifdef INET6 1946 else if (IS_IP6_FLOW_ID(&id)) 1947 DPARENT_COUNT_DEC( 1948 ((struct dyn_ipv6_state *)rule)->limit); 1949 #endif 1950 } 1951 } 1952 /* 1953 * EEXIST means that simultaneous thread has created this 1954 * state. Consider this as success. 1955 * 1956 * XXXAE: should we invalidate 'info' content here? 1957 */ 1958 if (ret == EEXIST) 1959 return (0); 1960 return (ret); 1961 } 1962 1963 /* 1964 * Install dynamic state. 1965 * chain - ipfw's instance; 1966 * rule - the parent rule that installs the state; 1967 * cmd - opcode that installs the state; 1968 * args - ipfw arguments; 1969 * ulp - upper level protocol header; 1970 * pktlen - packet length; 1971 * info - dynamic state lookup info; 1972 * tablearg - tablearg id. 1973 * 1974 * Returns non-zero value (failure) if state is not installed because 1975 * of errors or because session limitations are enforced. 1976 */ 1977 int 1978 ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, 1979 const ipfw_insn_limit *cmd, const struct ip_fw_args *args, 1980 const void *ulp, int pktlen, struct ipfw_dyn_info *info, 1981 uint32_t tablearg) 1982 { 1983 uint32_t limit; 1984 uint16_t limit_mask; 1985 1986 if (cmd->o.opcode == O_LIMIT) { 1987 limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit); 1988 limit_mask = cmd->limit_mask; 1989 } else { 1990 limit = 0; 1991 limit_mask = 0; 1992 } 1993 return (dyn_install_state(&args->f_id, 1994 #ifdef INET6 1995 IS_IP6_FLOW_ID(&args->f_id) ? dyn_getscopeid(args): 1996 #endif 1997 0, M_GETFIB(args->m), ulp, pktlen, rule, rule->id, rule->rulenum, 1998 rule->set, info, limit, limit_mask, cmd->o.arg1, cmd->o.opcode)); 1999 } 2000 2001 /* 2002 * Free safe to remove state entries from expired lists. 2003 */ 2004 static void 2005 dyn_free_states(struct ip_fw_chain *chain) 2006 { 2007 struct dyn_ipv4_state *s4, *s4n; 2008 #ifdef INET6 2009 struct dyn_ipv6_state *s6, *s6n; 2010 #endif 2011 int cached_count, i; 2012 2013 /* 2014 * We keep pointers to objects that are in use on each CPU 2015 * in the per-cpu dyn_hp pointer. When object is going to be 2016 * removed, first of it is unlinked from the corresponding 2017 * list. This leads to changing of dyn_bucket_xxx_delver version. 2018 * Unlinked objects is placed into corresponding dyn_expired_xxx 2019 * list. Reader that is going to dereference object pointer checks 2020 * dyn_bucket_xxx_delver version before and after storing pointer 2021 * into dyn_hp. If version is the same, the object is protected 2022 * from freeing and it is safe to dereference. Othervise reader 2023 * tries to iterate list again from the beginning, but this object 2024 * now unlinked and thus will not be accessible. 2025 * 2026 * Copy dyn_hp pointers for each CPU into dyn_hp_cache array. 2027 * It does not matter that some pointer can be changed in 2028 * time while we are copying. We need to check, that objects 2029 * removed in the previous pass are not in use. And if dyn_hp 2030 * pointer does not contain it in the time when we are copying, 2031 * it will not appear there, because it is already unlinked. 2032 * And for new pointers we will not free objects that will be 2033 * unlinked in this pass. 2034 */ 2035 cached_count = 0; 2036 CPU_FOREACH(i) { 2037 dyn_hp_cache[cached_count] = DYNSTATE_GET(i); 2038 if (dyn_hp_cache[cached_count] != NULL) 2039 cached_count++; 2040 } 2041 2042 /* 2043 * Free expired states that are safe to free. 2044 * Check each entry from previous pass in the dyn_expired_xxx 2045 * list, if pointer to the object is in the dyn_hp_cache array, 2046 * keep it until next pass. Otherwise it is safe to free the 2047 * object. 2048 * 2049 * XXXAE: optimize this to use SLIST_REMOVE_AFTER. 2050 */ 2051 #define DYN_FREE_STATES(s, next, name) do { \ 2052 s = SLIST_FIRST(&V_dyn_expired_ ## name); \ 2053 while (s != NULL) { \ 2054 next = SLIST_NEXT(s, expired); \ 2055 for (i = 0; i < cached_count; i++) \ 2056 if (dyn_hp_cache[i] == s) \ 2057 break; \ 2058 if (i == cached_count) { \ 2059 if (s->type == O_LIMIT_PARENT && \ 2060 s->limit->count != 0) { \ 2061 s = next; \ 2062 continue; \ 2063 } \ 2064 SLIST_REMOVE(&V_dyn_expired_ ## name, \ 2065 s, dyn_ ## name ## _state, expired); \ 2066 if (s->type == O_LIMIT_PARENT) \ 2067 uma_zfree(V_dyn_parent_zone, s->limit); \ 2068 else \ 2069 uma_zfree(V_dyn_data_zone, s->data); \ 2070 uma_zfree(V_dyn_ ## name ## _zone, s); \ 2071 } \ 2072 s = next; \ 2073 } \ 2074 } while (0) 2075 2076 /* 2077 * Protect access to expired lists with DYN_EXPIRED_LOCK. 2078 * Userland can invoke ipfw_expire_dyn_states() to delete 2079 * specific states, this will lead to modification of expired 2080 * lists. 2081 * 2082 * XXXAE: do we need DYN_EXPIRED_LOCK? We can just use 2083 * IPFW_UH_WLOCK to protect access to these lists. 2084 */ 2085 DYN_EXPIRED_LOCK(); 2086 DYN_FREE_STATES(s4, s4n, ipv4); 2087 #ifdef INET6 2088 DYN_FREE_STATES(s6, s6n, ipv6); 2089 #endif 2090 DYN_EXPIRED_UNLOCK(); 2091 #undef DYN_FREE_STATES 2092 } 2093 2094 /* 2095 * Returns 1 when state is matched by specified range, otherwise returns 0. 2096 */ 2097 static int 2098 dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt) 2099 { 2100 2101 MPASS(rt != NULL); 2102 /* flush all states */ 2103 if (rt->flags & IPFW_RCFLAG_ALL) 2104 return (1); 2105 if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set) 2106 return (0); 2107 if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && 2108 (rulenum < rt->start_rule || rulenum > rt->end_rule)) 2109 return (0); 2110 return (1); 2111 } 2112 2113 static int 2114 dyn_match_ipv4_state(struct dyn_ipv4_state *s, const ipfw_range_tlv *rt) 2115 { 2116 2117 if (s->type == O_LIMIT_PARENT) 2118 return (dyn_match_range(s->limit->rulenum, 2119 s->limit->set, rt)); 2120 2121 if (s->type == O_LIMIT) 2122 return (dyn_match_range(s->data->rulenum, s->data->set, rt)); 2123 2124 if (dyn_match_range(s->data->rulenum, s->data->set, rt)) 2125 return (1); 2126 2127 return (0); 2128 } 2129 2130 #ifdef INET6 2131 static int 2132 dyn_match_ipv6_state(struct dyn_ipv6_state *s, const ipfw_range_tlv *rt) 2133 { 2134 2135 if (s->type == O_LIMIT_PARENT) 2136 return (dyn_match_range(s->limit->rulenum, 2137 s->limit->set, rt)); 2138 2139 if (s->type == O_LIMIT) 2140 return (dyn_match_range(s->data->rulenum, s->data->set, rt)); 2141 2142 if (dyn_match_range(s->data->rulenum, s->data->set, rt)) 2143 return (1); 2144 2145 return (0); 2146 } 2147 #endif 2148 2149 /* 2150 * Unlink expired entries from states lists. 2151 * @rt can be used to specify the range of states for deletion. 2152 */ 2153 static void 2154 dyn_expire_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) 2155 { 2156 struct dyn_ipv4_slist expired_ipv4; 2157 #ifdef INET6 2158 struct dyn_ipv6_slist expired_ipv6; 2159 struct dyn_ipv6_state *s6, *s6n, *s6p; 2160 #endif 2161 struct dyn_ipv4_state *s4, *s4n, *s4p; 2162 int bucket, removed, length, max_length; 2163 2164 /* 2165 * Unlink expired states from each bucket. 2166 * With acquired bucket lock iterate entries of each lists: 2167 * ipv4, ipv4_parent, ipv6, and ipv6_parent. Check expired time 2168 * and unlink entry from the list, link entry into temporary 2169 * expired_xxx lists then bump "del" bucket version. 2170 * 2171 * When an entry is removed, corresponding states counter is 2172 * decremented. If entry has O_LIMIT type, parent's reference 2173 * counter is decremented. 2174 * 2175 * NOTE: this function can be called from userspace context 2176 * when user deletes rules. In this case all matched states 2177 * will be forcedly unlinked. O_LIMIT_PARENT states will be kept 2178 * in the expired lists until reference counter become zero. 2179 */ 2180 #define DYN_UNLINK_STATES(s, prev, next, exp, af, name, extra) do { \ 2181 length = 0; \ 2182 removed = 0; \ 2183 prev = NULL; \ 2184 s = CK_SLIST_FIRST(&V_dyn_ ## name [bucket]); \ 2185 while (s != NULL) { \ 2186 next = CK_SLIST_NEXT(s, entry); \ 2187 if ((TIME_LEQ((s)->exp, time_uptime) && extra) || \ 2188 (rt != NULL && dyn_match_ ## af ## _state(s, rt))) {\ 2189 if (prev != NULL) \ 2190 CK_SLIST_REMOVE_AFTER(prev, entry); \ 2191 else \ 2192 CK_SLIST_REMOVE_HEAD( \ 2193 &V_dyn_ ## name [bucket], entry); \ 2194 removed++; \ 2195 SLIST_INSERT_HEAD(&expired_ ## af, s, expired); \ 2196 if (s->type == O_LIMIT_PARENT) \ 2197 DYN_COUNT_DEC(dyn_parent_count); \ 2198 else { \ 2199 DYN_COUNT_DEC(dyn_count); \ 2200 if (s->type == O_LIMIT) { \ 2201 s = s->data->parent; \ 2202 DPARENT_COUNT_DEC(s->limit); \ 2203 } \ 2204 } \ 2205 } else { \ 2206 prev = s; \ 2207 length++; \ 2208 } \ 2209 s = next; \ 2210 } \ 2211 if (removed != 0) \ 2212 DYN_BUCKET_VERSION_BUMP(bucket, name ## _del); \ 2213 if (length > max_length) \ 2214 max_length = length; \ 2215 } while (0) 2216 2217 SLIST_INIT(&expired_ipv4); 2218 #ifdef INET6 2219 SLIST_INIT(&expired_ipv6); 2220 #endif 2221 max_length = 0; 2222 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2223 DYN_BUCKET_LOCK(bucket); 2224 DYN_UNLINK_STATES(s4, s4p, s4n, data->expire, ipv4, ipv4, 1); 2225 DYN_UNLINK_STATES(s4, s4p, s4n, limit->expire, ipv4, 2226 ipv4_parent, (s4->limit->count == 0)); 2227 #ifdef INET6 2228 DYN_UNLINK_STATES(s6, s6p, s6n, data->expire, ipv6, ipv6, 1); 2229 DYN_UNLINK_STATES(s6, s6p, s6n, limit->expire, ipv6, 2230 ipv6_parent, (s6->limit->count == 0)); 2231 #endif 2232 DYN_BUCKET_UNLOCK(bucket); 2233 } 2234 /* Update curr_max_length for statistics. */ 2235 V_curr_max_length = max_length; 2236 /* 2237 * Concatenate temporary lists with global expired lists. 2238 */ 2239 DYN_EXPIRED_LOCK(); 2240 SLIST_CONCAT(&V_dyn_expired_ipv4, &expired_ipv4, 2241 dyn_ipv4_state, expired); 2242 #ifdef INET6 2243 SLIST_CONCAT(&V_dyn_expired_ipv6, &expired_ipv6, 2244 dyn_ipv6_state, expired); 2245 #endif 2246 DYN_EXPIRED_UNLOCK(); 2247 #undef DYN_UNLINK_STATES 2248 #undef DYN_UNREF_STATES 2249 } 2250 2251 static struct mbuf * 2252 dyn_mgethdr(int len, uint16_t fibnum) 2253 { 2254 struct mbuf *m; 2255 2256 m = m_gethdr(M_NOWAIT, MT_DATA); 2257 if (m == NULL) 2258 return (NULL); 2259 #ifdef MAC 2260 mac_netinet_firewall_send(m); 2261 #endif 2262 M_SETFIB(m, fibnum); 2263 m->m_data += max_linkhdr; 2264 m->m_flags |= M_SKIP_FIREWALL; 2265 m->m_len = m->m_pkthdr.len = len; 2266 bzero(m->m_data, len); 2267 return (m); 2268 } 2269 2270 static void 2271 dyn_make_keepalive_ipv4(struct mbuf *m, in_addr_t src, in_addr_t dst, 2272 uint32_t seq, uint32_t ack, uint16_t sport, uint16_t dport) 2273 { 2274 struct tcphdr *tcp; 2275 struct ip *ip; 2276 2277 ip = mtod(m, struct ip *); 2278 ip->ip_v = 4; 2279 ip->ip_hl = sizeof(*ip) >> 2; 2280 ip->ip_tos = IPTOS_LOWDELAY; 2281 ip->ip_len = htons(m->m_len); 2282 ip->ip_off |= htons(IP_DF); 2283 ip->ip_ttl = V_ip_defttl; 2284 ip->ip_p = IPPROTO_TCP; 2285 ip->ip_src.s_addr = htonl(src); 2286 ip->ip_dst.s_addr = htonl(dst); 2287 2288 tcp = mtodo(m, sizeof(struct ip)); 2289 tcp->th_sport = htons(sport); 2290 tcp->th_dport = htons(dport); 2291 tcp->th_off = sizeof(struct tcphdr) >> 2; 2292 tcp->th_seq = htonl(seq); 2293 tcp->th_ack = htonl(ack); 2294 tcp->th_flags = TH_ACK; 2295 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2296 htons(sizeof(struct tcphdr) + IPPROTO_TCP)); 2297 2298 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 2299 m->m_pkthdr.csum_flags = CSUM_TCP; 2300 } 2301 2302 static void 2303 dyn_enqueue_keepalive_ipv4(struct mbufq *q, const struct dyn_ipv4_state *s) 2304 { 2305 struct mbuf *m; 2306 2307 if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) { 2308 m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr), 2309 s->data->fibnum); 2310 if (m != NULL) { 2311 dyn_make_keepalive_ipv4(m, s->dst, s->src, 2312 s->data->ack_fwd - 1, s->data->ack_rev, 2313 s->dport, s->sport); 2314 if (mbufq_enqueue(q, m)) { 2315 m_freem(m); 2316 log(LOG_DEBUG, "ipfw: limit for IPv4 " 2317 "keepalive queue is reached.\n"); 2318 return; 2319 } 2320 } 2321 } 2322 2323 if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) { 2324 m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr), 2325 s->data->fibnum); 2326 if (m != NULL) { 2327 dyn_make_keepalive_ipv4(m, s->src, s->dst, 2328 s->data->ack_rev - 1, s->data->ack_fwd, 2329 s->sport, s->dport); 2330 if (mbufq_enqueue(q, m)) { 2331 m_freem(m); 2332 log(LOG_DEBUG, "ipfw: limit for IPv4 " 2333 "keepalive queue is reached.\n"); 2334 return; 2335 } 2336 } 2337 } 2338 } 2339 2340 /* 2341 * Prepare and send keep-alive packets. 2342 */ 2343 static void 2344 dyn_send_keepalive_ipv4(struct ip_fw_chain *chain) 2345 { 2346 struct mbufq q; 2347 struct mbuf *m; 2348 struct dyn_ipv4_state *s; 2349 uint32_t bucket; 2350 2351 mbufq_init(&q, DYN_KEEPALIVE_MAXQ); 2352 IPFW_UH_RLOCK(chain); 2353 /* 2354 * It is safe to not use hazard pointer and just do lockless 2355 * access to the lists, because states entries can not be deleted 2356 * while we hold IPFW_UH_RLOCK. 2357 */ 2358 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2359 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 2360 /* 2361 * Only established TCP connections that will 2362 * become expired withing dyn_keepalive_interval. 2363 */ 2364 if (s->proto != IPPROTO_TCP || 2365 (s->data->state & BOTH_SYN) != BOTH_SYN || 2366 TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 2367 s->data->expire)) 2368 continue; 2369 dyn_enqueue_keepalive_ipv4(&q, s); 2370 } 2371 } 2372 IPFW_UH_RUNLOCK(chain); 2373 while ((m = mbufq_dequeue(&q)) != NULL) 2374 ip_output(m, NULL, NULL, 0, NULL, NULL); 2375 } 2376 2377 #ifdef INET6 2378 static void 2379 dyn_make_keepalive_ipv6(struct mbuf *m, const struct in6_addr *src, 2380 const struct in6_addr *dst, uint32_t zoneid, uint32_t seq, uint32_t ack, 2381 uint16_t sport, uint16_t dport) 2382 { 2383 struct tcphdr *tcp; 2384 struct ip6_hdr *ip6; 2385 2386 ip6 = mtod(m, struct ip6_hdr *); 2387 ip6->ip6_vfc |= IPV6_VERSION; 2388 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 2389 ip6->ip6_nxt = IPPROTO_TCP; 2390 ip6->ip6_hlim = IPV6_DEFHLIM; 2391 ip6->ip6_src = *src; 2392 if (IN6_IS_ADDR_LINKLOCAL(src)) 2393 ip6->ip6_src.s6_addr16[1] = htons(zoneid & 0xffff); 2394 ip6->ip6_dst = *dst; 2395 if (IN6_IS_ADDR_LINKLOCAL(dst)) 2396 ip6->ip6_dst.s6_addr16[1] = htons(zoneid & 0xffff); 2397 2398 tcp = mtodo(m, sizeof(struct ip6_hdr)); 2399 tcp->th_sport = htons(sport); 2400 tcp->th_dport = htons(dport); 2401 tcp->th_off = sizeof(struct tcphdr) >> 2; 2402 tcp->th_seq = htonl(seq); 2403 tcp->th_ack = htonl(ack); 2404 tcp->th_flags = TH_ACK; 2405 tcp->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr), 2406 IPPROTO_TCP, 0); 2407 2408 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 2409 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; 2410 } 2411 2412 static void 2413 dyn_enqueue_keepalive_ipv6(struct mbufq *q, const struct dyn_ipv6_state *s) 2414 { 2415 struct mbuf *m; 2416 2417 if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) { 2418 m = dyn_mgethdr(sizeof(struct ip6_hdr) + 2419 sizeof(struct tcphdr), s->data->fibnum); 2420 if (m != NULL) { 2421 dyn_make_keepalive_ipv6(m, &s->dst, &s->src, 2422 s->zoneid, s->data->ack_fwd - 1, s->data->ack_rev, 2423 s->dport, s->sport); 2424 if (mbufq_enqueue(q, m)) { 2425 m_freem(m); 2426 log(LOG_DEBUG, "ipfw: limit for IPv6 " 2427 "keepalive queue is reached.\n"); 2428 return; 2429 } 2430 } 2431 } 2432 2433 if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) { 2434 m = dyn_mgethdr(sizeof(struct ip6_hdr) + 2435 sizeof(struct tcphdr), s->data->fibnum); 2436 if (m != NULL) { 2437 dyn_make_keepalive_ipv6(m, &s->src, &s->dst, 2438 s->zoneid, s->data->ack_rev - 1, s->data->ack_fwd, 2439 s->sport, s->dport); 2440 if (mbufq_enqueue(q, m)) { 2441 m_freem(m); 2442 log(LOG_DEBUG, "ipfw: limit for IPv6 " 2443 "keepalive queue is reached.\n"); 2444 return; 2445 } 2446 } 2447 } 2448 } 2449 2450 static void 2451 dyn_send_keepalive_ipv6(struct ip_fw_chain *chain) 2452 { 2453 struct mbufq q; 2454 struct mbuf *m; 2455 struct dyn_ipv6_state *s; 2456 uint32_t bucket; 2457 2458 mbufq_init(&q, DYN_KEEPALIVE_MAXQ); 2459 IPFW_UH_RLOCK(chain); 2460 /* 2461 * It is safe to not use hazard pointer and just do lockless 2462 * access to the lists, because states entries can not be deleted 2463 * while we hold IPFW_UH_RLOCK. 2464 */ 2465 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2466 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 2467 /* 2468 * Only established TCP connections that will 2469 * become expired withing dyn_keepalive_interval. 2470 */ 2471 if (s->proto != IPPROTO_TCP || 2472 (s->data->state & BOTH_SYN) != BOTH_SYN || 2473 TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 2474 s->data->expire)) 2475 continue; 2476 dyn_enqueue_keepalive_ipv6(&q, s); 2477 } 2478 } 2479 IPFW_UH_RUNLOCK(chain); 2480 while ((m = mbufq_dequeue(&q)) != NULL) 2481 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2482 } 2483 #endif /* INET6 */ 2484 2485 static void 2486 dyn_grow_hashtable(struct ip_fw_chain *chain, uint32_t new) 2487 { 2488 #ifdef INET6 2489 struct dyn_ipv6ck_slist *ipv6, *ipv6_parent; 2490 uint32_t *ipv6_add, *ipv6_del, *ipv6_parent_add, *ipv6_parent_del; 2491 struct dyn_ipv6_state *s6; 2492 #endif 2493 struct dyn_ipv4ck_slist *ipv4, *ipv4_parent; 2494 uint32_t *ipv4_add, *ipv4_del, *ipv4_parent_add, *ipv4_parent_del; 2495 struct dyn_ipv4_state *s4; 2496 struct mtx *bucket_lock; 2497 void *tmp; 2498 uint32_t bucket; 2499 2500 MPASS(powerof2(new)); 2501 DYN_DEBUG("grow hash size %u -> %u", V_curr_dyn_buckets, new); 2502 /* 2503 * Allocate and initialize new lists. 2504 * XXXAE: on memory pressure this can disable callout timer. 2505 */ 2506 bucket_lock = malloc(new * sizeof(struct mtx), M_IPFW, 2507 M_WAITOK | M_ZERO); 2508 ipv4 = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW, 2509 M_WAITOK | M_ZERO); 2510 ipv4_parent = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW, 2511 M_WAITOK | M_ZERO); 2512 ipv4_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2513 ipv4_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2514 ipv4_parent_add = malloc(new * sizeof(uint32_t), M_IPFW, 2515 M_WAITOK | M_ZERO); 2516 ipv4_parent_del = malloc(new * sizeof(uint32_t), M_IPFW, 2517 M_WAITOK | M_ZERO); 2518 #ifdef INET6 2519 ipv6 = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW, 2520 M_WAITOK | M_ZERO); 2521 ipv6_parent = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW, 2522 M_WAITOK | M_ZERO); 2523 ipv6_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2524 ipv6_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2525 ipv6_parent_add = malloc(new * sizeof(uint32_t), M_IPFW, 2526 M_WAITOK | M_ZERO); 2527 ipv6_parent_del = malloc(new * sizeof(uint32_t), M_IPFW, 2528 M_WAITOK | M_ZERO); 2529 #endif 2530 for (bucket = 0; bucket < new; bucket++) { 2531 DYN_BUCKET_LOCK_INIT(bucket_lock, bucket); 2532 CK_SLIST_INIT(&ipv4[bucket]); 2533 CK_SLIST_INIT(&ipv4_parent[bucket]); 2534 #ifdef INET6 2535 CK_SLIST_INIT(&ipv6[bucket]); 2536 CK_SLIST_INIT(&ipv6_parent[bucket]); 2537 #endif 2538 } 2539 2540 #define DYN_RELINK_STATES(s, hval, i, head, ohead) do { \ 2541 while ((s = CK_SLIST_FIRST(&V_dyn_ ## ohead[i])) != NULL) { \ 2542 CK_SLIST_REMOVE_HEAD(&V_dyn_ ## ohead[i], entry); \ 2543 CK_SLIST_INSERT_HEAD(&head[DYN_BUCKET(s->hval, new)], \ 2544 s, entry); \ 2545 } \ 2546 } while (0) 2547 /* 2548 * Prevent rules changing from userland. 2549 */ 2550 IPFW_UH_WLOCK(chain); 2551 /* 2552 * Hold traffic processing until we finish resize to 2553 * prevent access to states lists. 2554 */ 2555 IPFW_WLOCK(chain); 2556 /* Re-link all dynamic states */ 2557 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2558 DYN_RELINK_STATES(s4, data->hashval, bucket, ipv4, ipv4); 2559 DYN_RELINK_STATES(s4, limit->hashval, bucket, ipv4_parent, 2560 ipv4_parent); 2561 #ifdef INET6 2562 DYN_RELINK_STATES(s6, data->hashval, bucket, ipv6, ipv6); 2563 DYN_RELINK_STATES(s6, limit->hashval, bucket, ipv6_parent, 2564 ipv6_parent); 2565 #endif 2566 } 2567 2568 #define DYN_SWAP_PTR(old, new, tmp) do { \ 2569 tmp = old; \ 2570 old = new; \ 2571 new = tmp; \ 2572 } while (0) 2573 /* Swap pointers */ 2574 DYN_SWAP_PTR(V_dyn_bucket_lock, bucket_lock, tmp); 2575 DYN_SWAP_PTR(V_dyn_ipv4, ipv4, tmp); 2576 DYN_SWAP_PTR(V_dyn_ipv4_parent, ipv4_parent, tmp); 2577 DYN_SWAP_PTR(V_dyn_ipv4_add, ipv4_add, tmp); 2578 DYN_SWAP_PTR(V_dyn_ipv4_parent_add, ipv4_parent_add, tmp); 2579 DYN_SWAP_PTR(V_dyn_ipv4_del, ipv4_del, tmp); 2580 DYN_SWAP_PTR(V_dyn_ipv4_parent_del, ipv4_parent_del, tmp); 2581 2582 #ifdef INET6 2583 DYN_SWAP_PTR(V_dyn_ipv6, ipv6, tmp); 2584 DYN_SWAP_PTR(V_dyn_ipv6_parent, ipv6_parent, tmp); 2585 DYN_SWAP_PTR(V_dyn_ipv6_add, ipv6_add, tmp); 2586 DYN_SWAP_PTR(V_dyn_ipv6_parent_add, ipv6_parent_add, tmp); 2587 DYN_SWAP_PTR(V_dyn_ipv6_del, ipv6_del, tmp); 2588 DYN_SWAP_PTR(V_dyn_ipv6_parent_del, ipv6_parent_del, tmp); 2589 #endif 2590 bucket = V_curr_dyn_buckets; 2591 V_curr_dyn_buckets = new; 2592 2593 IPFW_WUNLOCK(chain); 2594 IPFW_UH_WUNLOCK(chain); 2595 2596 /* Release old resources */ 2597 while (bucket-- != 0) 2598 DYN_BUCKET_LOCK_DESTROY(bucket_lock, bucket); 2599 free(bucket_lock, M_IPFW); 2600 free(ipv4, M_IPFW); 2601 free(ipv4_parent, M_IPFW); 2602 free(ipv4_add, M_IPFW); 2603 free(ipv4_parent_add, M_IPFW); 2604 free(ipv4_del, M_IPFW); 2605 free(ipv4_parent_del, M_IPFW); 2606 #ifdef INET6 2607 free(ipv6, M_IPFW); 2608 free(ipv6_parent, M_IPFW); 2609 free(ipv6_add, M_IPFW); 2610 free(ipv6_parent_add, M_IPFW); 2611 free(ipv6_del, M_IPFW); 2612 free(ipv6_parent_del, M_IPFW); 2613 #endif 2614 } 2615 2616 /* 2617 * This function is used to perform various maintenance 2618 * on dynamic hash lists. Currently it is called every second. 2619 */ 2620 static void 2621 dyn_tick(void *vnetx) 2622 { 2623 uint32_t buckets; 2624 2625 CURVNET_SET((struct vnet *)vnetx); 2626 /* 2627 * First free states unlinked in previous passes. 2628 */ 2629 dyn_free_states(&V_layer3_chain); 2630 /* 2631 * Now unlink others expired states. 2632 * We use IPFW_UH_WLOCK to avoid concurrent call of 2633 * dyn_expire_states(). It is the only function that does 2634 * deletion of state entries from states lists. 2635 */ 2636 IPFW_UH_WLOCK(&V_layer3_chain); 2637 dyn_expire_states(&V_layer3_chain, NULL); 2638 IPFW_UH_WUNLOCK(&V_layer3_chain); 2639 /* 2640 * Send keepalives if they are enabled and the time has come. 2641 */ 2642 if (V_dyn_keepalive != 0 && 2643 V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) { 2644 V_dyn_keepalive_last = time_uptime; 2645 dyn_send_keepalive_ipv4(&V_layer3_chain); 2646 #ifdef INET6 2647 dyn_send_keepalive_ipv6(&V_layer3_chain); 2648 #endif 2649 } 2650 /* 2651 * Check if we need to resize the hash: 2652 * if current number of states exceeds number of buckets in hash, 2653 * and dyn_buckets_max permits to grow the number of buckets, then 2654 * do it. Grow hash size to the minimum power of 2 which is bigger 2655 * than current states count. 2656 */ 2657 if (V_curr_dyn_buckets < V_dyn_buckets_max && 2658 (V_curr_dyn_buckets < V_dyn_count / 2 || ( 2659 V_curr_dyn_buckets < V_dyn_count && V_curr_max_length > 8))) { 2660 buckets = 1 << fls(V_dyn_count); 2661 if (buckets > V_dyn_buckets_max) 2662 buckets = V_dyn_buckets_max; 2663 dyn_grow_hashtable(&V_layer3_chain, buckets); 2664 } 2665 2666 callout_reset_on(&V_dyn_timeout, hz, dyn_tick, vnetx, 0); 2667 CURVNET_RESTORE(); 2668 } 2669 2670 void 2671 ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) 2672 { 2673 /* 2674 * Do not perform any checks if we currently have no dynamic states 2675 */ 2676 if (V_dyn_count == 0) 2677 return; 2678 2679 IPFW_UH_WLOCK_ASSERT(chain); 2680 dyn_expire_states(chain, rt); 2681 } 2682 2683 /* 2684 * Returns size of dynamic states in legacy format 2685 */ 2686 int 2687 ipfw_dyn_len(void) 2688 { 2689 2690 return ((V_dyn_count + V_dyn_parent_count) * sizeof(ipfw_dyn_rule)); 2691 } 2692 2693 /* 2694 * Returns number of dynamic states. 2695 * Used by dump format v1 (current). 2696 */ 2697 uint32_t 2698 ipfw_dyn_get_count(void) 2699 { 2700 2701 return (V_dyn_count + V_dyn_parent_count); 2702 } 2703 2704 /* 2705 * Check if rule contains at least one dynamic opcode. 2706 * 2707 * Returns 1 if such opcode is found, 0 otherwise. 2708 */ 2709 int 2710 ipfw_is_dyn_rule(struct ip_fw *rule) 2711 { 2712 int cmdlen, l; 2713 ipfw_insn *cmd; 2714 2715 l = rule->cmd_len; 2716 cmd = rule->cmd; 2717 cmdlen = 0; 2718 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { 2719 cmdlen = F_LEN(cmd); 2720 2721 switch (cmd->opcode) { 2722 case O_LIMIT: 2723 case O_KEEP_STATE: 2724 case O_PROBE_STATE: 2725 case O_CHECK_STATE: 2726 return (1); 2727 } 2728 } 2729 2730 return (0); 2731 } 2732 2733 static void 2734 dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, 2735 ipfw_dyn_rule *dst) 2736 { 2737 2738 dst->dyn_type = O_LIMIT_PARENT; 2739 dst->kidx = kidx; 2740 dst->count = (uint16_t)DPARENT_COUNT(p); 2741 dst->expire = TIME_LEQ(p->expire, time_uptime) ? 0: 2742 p->expire - time_uptime; 2743 2744 /* 'rule' is used to pass up the rule number and set */ 2745 memcpy(&dst->rule, &p->rulenum, sizeof(p->rulenum)); 2746 /* store set number into high word of dst->rule pointer. */ 2747 memcpy((char *)&dst->rule + sizeof(p->rulenum), &p->set, 2748 sizeof(p->set)); 2749 2750 /* unused fields */ 2751 dst->pcnt = 0; 2752 dst->bcnt = 0; 2753 dst->parent = NULL; 2754 dst->state = 0; 2755 dst->ack_fwd = 0; 2756 dst->ack_rev = 0; 2757 dst->bucket = p->hashval; 2758 /* 2759 * The legacy userland code will interpret a NULL here as a marker 2760 * for the last dynamic rule. 2761 */ 2762 dst->next = (ipfw_dyn_rule *)1; 2763 } 2764 2765 static void 2766 dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type, 2767 ipfw_dyn_rule *dst) 2768 { 2769 2770 dst->dyn_type = type; 2771 dst->kidx = kidx; 2772 dst->pcnt = data->pcnt_fwd + data->pcnt_rev; 2773 dst->bcnt = data->bcnt_fwd + data->bcnt_rev; 2774 dst->expire = TIME_LEQ(data->expire, time_uptime) ? 0: 2775 data->expire - time_uptime; 2776 2777 /* 'rule' is used to pass up the rule number and set */ 2778 memcpy(&dst->rule, &data->rulenum, sizeof(data->rulenum)); 2779 /* store set number into high word of dst->rule pointer. */ 2780 memcpy((char *)&dst->rule + sizeof(data->rulenum), &data->set, 2781 sizeof(data->set)); 2782 2783 /* unused fields */ 2784 dst->parent = NULL; 2785 dst->state = data->state; 2786 dst->ack_fwd = data->ack_fwd; 2787 dst->ack_rev = data->ack_rev; 2788 dst->count = 0; 2789 dst->bucket = data->hashval; 2790 /* 2791 * The legacy userland code will interpret a NULL here as a marker 2792 * for the last dynamic rule. 2793 */ 2794 dst->next = (ipfw_dyn_rule *)1; 2795 } 2796 2797 static void 2798 dyn_export_ipv4_state(const struct dyn_ipv4_state *s, ipfw_dyn_rule *dst) 2799 { 2800 2801 switch (s->type) { 2802 case O_LIMIT_PARENT: 2803 dyn_export_parent(s->limit, s->kidx, dst); 2804 break; 2805 default: 2806 dyn_export_data(s->data, s->kidx, s->type, dst); 2807 } 2808 2809 dst->id.dst_ip = s->dst; 2810 dst->id.src_ip = s->src; 2811 dst->id.dst_port = s->dport; 2812 dst->id.src_port = s->sport; 2813 dst->id.fib = s->data->fibnum; 2814 dst->id.proto = s->proto; 2815 dst->id._flags = 0; 2816 dst->id.addr_type = 4; 2817 2818 memset(&dst->id.dst_ip6, 0, sizeof(dst->id.dst_ip6)); 2819 memset(&dst->id.src_ip6, 0, sizeof(dst->id.src_ip6)); 2820 dst->id.flow_id6 = dst->id.extra = 0; 2821 } 2822 2823 #ifdef INET6 2824 static void 2825 dyn_export_ipv6_state(const struct dyn_ipv6_state *s, ipfw_dyn_rule *dst) 2826 { 2827 2828 switch (s->type) { 2829 case O_LIMIT_PARENT: 2830 dyn_export_parent(s->limit, s->kidx, dst); 2831 break; 2832 default: 2833 dyn_export_data(s->data, s->kidx, s->type, dst); 2834 } 2835 2836 dst->id.src_ip6 = s->src; 2837 dst->id.dst_ip6 = s->dst; 2838 dst->id.dst_port = s->dport; 2839 dst->id.src_port = s->sport; 2840 dst->id.fib = s->data->fibnum; 2841 dst->id.proto = s->proto; 2842 dst->id._flags = 0; 2843 dst->id.addr_type = 6; 2844 2845 dst->id.dst_ip = dst->id.src_ip = 0; 2846 dst->id.flow_id6 = dst->id.extra = 0; 2847 } 2848 #endif /* INET6 */ 2849 2850 /* 2851 * Fills the buffer given by @sd with dynamic states. 2852 * Used by dump format v1 (current). 2853 * 2854 * Returns 0 on success. 2855 */ 2856 int 2857 ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) 2858 { 2859 #ifdef INET6 2860 struct dyn_ipv6_state *s6; 2861 #endif 2862 struct dyn_ipv4_state *s4; 2863 ipfw_obj_dyntlv *dst, *last; 2864 ipfw_obj_ctlv *ctlv; 2865 uint32_t bucket; 2866 2867 if (V_dyn_count == 0) 2868 return (0); 2869 2870 /* 2871 * IPFW_UH_RLOCK garantees that another userland request 2872 * and callout thread will not delete entries from states 2873 * lists. 2874 */ 2875 IPFW_UH_RLOCK_ASSERT(chain); 2876 2877 ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); 2878 if (ctlv == NULL) 2879 return (ENOMEM); 2880 ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; 2881 ctlv->objsize = sizeof(ipfw_obj_dyntlv); 2882 last = NULL; 2883 2884 #define DYN_EXPORT_STATES(s, af, h, b) \ 2885 CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ 2886 dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, \ 2887 sizeof(ipfw_obj_dyntlv)); \ 2888 if (dst == NULL) \ 2889 return (ENOMEM); \ 2890 dyn_export_ ## af ## _state(s, &dst->state); \ 2891 dst->head.length = sizeof(ipfw_obj_dyntlv); \ 2892 dst->head.type = IPFW_TLV_DYN_ENT; \ 2893 last = dst; \ 2894 } 2895 2896 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2897 DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket); 2898 DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket); 2899 #ifdef INET6 2900 DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket); 2901 DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket); 2902 #endif /* INET6 */ 2903 } 2904 2905 /* mark last dynamic rule */ 2906 if (last != NULL) 2907 last->head.flags = IPFW_DF_LAST; /* XXX: unused */ 2908 return (0); 2909 #undef DYN_EXPORT_STATES 2910 } 2911 2912 /* 2913 * Fill given buffer with dynamic states (legacy format). 2914 * IPFW_UH_RLOCK has to be held while calling. 2915 */ 2916 void 2917 ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) 2918 { 2919 #ifdef INET6 2920 struct dyn_ipv6_state *s6; 2921 #endif 2922 struct dyn_ipv4_state *s4; 2923 ipfw_dyn_rule *p, *last = NULL; 2924 char *bp; 2925 uint32_t bucket; 2926 2927 if (V_dyn_count == 0) 2928 return; 2929 bp = *pbp; 2930 2931 IPFW_UH_RLOCK_ASSERT(chain); 2932 2933 #define DYN_EXPORT_STATES(s, af, head, b) \ 2934 CK_SLIST_FOREACH(s, &V_dyn_ ## head[b], entry) { \ 2935 if (bp + sizeof(*p) > ep) \ 2936 break; \ 2937 p = (ipfw_dyn_rule *)bp; \ 2938 dyn_export_ ## af ## _state(s, p); \ 2939 last = p; \ 2940 bp += sizeof(*p); \ 2941 } 2942 2943 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2944 DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket); 2945 DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket); 2946 #ifdef INET6 2947 DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket); 2948 DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket); 2949 #endif /* INET6 */ 2950 } 2951 2952 if (last != NULL) /* mark last dynamic rule */ 2953 last->next = NULL; 2954 *pbp = bp; 2955 #undef DYN_EXPORT_STATES 2956 } 2957 2958 void 2959 ipfw_dyn_init(struct ip_fw_chain *chain) 2960 { 2961 2962 #ifdef IPFIREWALL_JENKINSHASH 2963 V_dyn_hashseed = arc4random(); 2964 #endif 2965 V_dyn_max = 16384; /* max # of states */ 2966 V_dyn_parent_max = 4096; /* max # of parent states */ 2967 V_dyn_buckets_max = 8192; /* must be power of 2 */ 2968 2969 V_dyn_ack_lifetime = 300; 2970 V_dyn_syn_lifetime = 20; 2971 V_dyn_fin_lifetime = 1; 2972 V_dyn_rst_lifetime = 1; 2973 V_dyn_udp_lifetime = 10; 2974 V_dyn_short_lifetime = 5; 2975 2976 V_dyn_keepalive_interval = 20; 2977 V_dyn_keepalive_period = 5; 2978 V_dyn_keepalive = 1; /* send keepalives */ 2979 V_dyn_keepalive_last = time_uptime; 2980 2981 V_dyn_data_zone = uma_zcreate("IPFW dynamic states data", 2982 sizeof(struct dyn_data), NULL, NULL, NULL, NULL, 2983 UMA_ALIGN_PTR, 0); 2984 uma_zone_set_max(V_dyn_data_zone, V_dyn_max); 2985 2986 V_dyn_parent_zone = uma_zcreate("IPFW parent dynamic states", 2987 sizeof(struct dyn_parent), NULL, NULL, NULL, NULL, 2988 UMA_ALIGN_PTR, 0); 2989 uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max); 2990 2991 SLIST_INIT(&V_dyn_expired_ipv4); 2992 V_dyn_ipv4 = NULL; 2993 V_dyn_ipv4_parent = NULL; 2994 V_dyn_ipv4_zone = uma_zcreate("IPFW IPv4 dynamic states", 2995 sizeof(struct dyn_ipv4_state), NULL, NULL, NULL, NULL, 2996 UMA_ALIGN_PTR, 0); 2997 2998 #ifdef INET6 2999 SLIST_INIT(&V_dyn_expired_ipv6); 3000 V_dyn_ipv6 = NULL; 3001 V_dyn_ipv6_parent = NULL; 3002 V_dyn_ipv6_zone = uma_zcreate("IPFW IPv6 dynamic states", 3003 sizeof(struct dyn_ipv6_state), NULL, NULL, NULL, NULL, 3004 UMA_ALIGN_PTR, 0); 3005 #endif 3006 3007 /* Initialize buckets. */ 3008 V_curr_dyn_buckets = 0; 3009 V_dyn_bucket_lock = NULL; 3010 dyn_grow_hashtable(chain, 256); 3011 3012 if (IS_DEFAULT_VNET(curvnet)) 3013 dyn_hp_cache = malloc(mp_ncpus * sizeof(void *), M_IPFW, 3014 M_WAITOK | M_ZERO); 3015 3016 DYN_EXPIRED_LOCK_INIT(); 3017 callout_init(&V_dyn_timeout, 1); 3018 callout_reset(&V_dyn_timeout, hz, dyn_tick, curvnet); 3019 IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); 3020 } 3021 3022 void 3023 ipfw_dyn_uninit(int pass) 3024 { 3025 #ifdef INET6 3026 struct dyn_ipv6_state *s6; 3027 #endif 3028 struct dyn_ipv4_state *s4; 3029 int bucket; 3030 3031 if (pass == 0) { 3032 callout_drain(&V_dyn_timeout); 3033 return; 3034 } 3035 IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); 3036 DYN_EXPIRED_LOCK_DESTROY(); 3037 3038 #define DYN_FREE_STATES_FORCED(CK, s, af, name, en) do { \ 3039 while ((s = CK ## SLIST_FIRST(&V_dyn_ ## name)) != NULL) { \ 3040 CK ## SLIST_REMOVE_HEAD(&V_dyn_ ## name, en); \ 3041 if (s->type == O_LIMIT_PARENT) \ 3042 uma_zfree(V_dyn_parent_zone, s->limit); \ 3043 else \ 3044 uma_zfree(V_dyn_data_zone, s->data); \ 3045 uma_zfree(V_dyn_ ## af ## _zone, s); \ 3046 } \ 3047 } while (0) 3048 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 3049 DYN_BUCKET_LOCK_DESTROY(V_dyn_bucket_lock, bucket); 3050 3051 DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4[bucket], entry); 3052 DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4_parent[bucket], 3053 entry); 3054 #ifdef INET6 3055 DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6[bucket], entry); 3056 DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6_parent[bucket], 3057 entry); 3058 #endif /* INET6 */ 3059 } 3060 DYN_FREE_STATES_FORCED(, s4, ipv4, expired_ipv4, expired); 3061 #ifdef INET6 3062 DYN_FREE_STATES_FORCED(, s6, ipv6, expired_ipv6, expired); 3063 #endif 3064 #undef DYN_FREE_STATES_FORCED 3065 3066 uma_zdestroy(V_dyn_ipv4_zone); 3067 uma_zdestroy(V_dyn_data_zone); 3068 uma_zdestroy(V_dyn_parent_zone); 3069 #ifdef INET6 3070 uma_zdestroy(V_dyn_ipv6_zone); 3071 free(V_dyn_ipv6, M_IPFW); 3072 free(V_dyn_ipv6_parent, M_IPFW); 3073 free(V_dyn_ipv6_add, M_IPFW); 3074 free(V_dyn_ipv6_parent_add, M_IPFW); 3075 free(V_dyn_ipv6_del, M_IPFW); 3076 free(V_dyn_ipv6_parent_del, M_IPFW); 3077 #endif 3078 free(V_dyn_bucket_lock, M_IPFW); 3079 free(V_dyn_ipv4, M_IPFW); 3080 free(V_dyn_ipv4_parent, M_IPFW); 3081 free(V_dyn_ipv4_add, M_IPFW); 3082 free(V_dyn_ipv4_parent_add, M_IPFW); 3083 free(V_dyn_ipv4_del, M_IPFW); 3084 free(V_dyn_ipv4_parent_del, M_IPFW); 3085 if (IS_DEFAULT_VNET(curvnet)) 3086 free(dyn_hp_cache, M_IPFW); 3087 } 3088 3089 3090