1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017-2018 Yandex LLC 5 * Copyright (c) 2017-2018 Andrey V. Elsukov <ae@FreeBSD.org> 6 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ipfw.h" 36 #ifndef INET 37 #error IPFIREWALL requires INET. 38 #endif /* INET */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/hash.h> 43 #include <sys/mbuf.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/pcpu.h> 47 #include <sys/queue.h> 48 #include <sys/rmlock.h> 49 #include <sys/smp.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/syslog.h> 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/pfil.h> 57 #include <net/vnet.h> 58 59 #include <netinet/in.h> 60 #include <netinet/ip.h> 61 #include <netinet/ip_var.h> 62 #include <netinet/ip_fw.h> 63 #include <netinet/tcp_var.h> 64 #include <netinet/udp.h> 65 66 #include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ 67 #ifdef INET6 68 #include <netinet6/in6_var.h> 69 #include <netinet6/ip6_var.h> 70 #include <netinet6/scope6_var.h> 71 #endif 72 73 #include <netpfil/ipfw/ip_fw_private.h> 74 75 #include <machine/in_cksum.h> /* XXX for in_cksum */ 76 77 #ifdef MAC 78 #include <security/mac/mac_framework.h> 79 #endif 80 81 /* 82 * Description of dynamic states. 83 * 84 * Dynamic states are stored in lists accessed through a hash tables 85 * whose size is curr_dyn_buckets. This value can be modified through 86 * the sysctl variable dyn_buckets. 87 * 88 * Currently there are four tables: dyn_ipv4, dyn_ipv6, dyn_ipv4_parent, 89 * and dyn_ipv6_parent. 90 * 91 * When a packet is received, its address fields hashed, then matched 92 * against the entries in the corresponding list by addr_type. 93 * Dynamic states can be used for different purposes: 94 * + stateful rules; 95 * + enforcing limits on the number of sessions; 96 * + in-kernel NAT (not implemented yet) 97 * 98 * The lifetime of dynamic states is regulated by dyn_*_lifetime, 99 * measured in seconds and depending on the flags. 100 * 101 * The total number of dynamic states is equal to UMA zone items count. 102 * The max number of dynamic states is dyn_max. When we reach 103 * the maximum number of rules we do not create anymore. This is 104 * done to avoid consuming too much memory, but also too much 105 * time when searching on each packet (ideally, we should try instead 106 * to put a limit on the length of the list on each bucket...). 107 * 108 * Each state holds a pointer to the parent ipfw rule so we know what 109 * action to perform. Dynamic rules are removed when the parent rule is 110 * deleted. 111 * 112 * There are some limitations with dynamic rules -- we do not 113 * obey the 'randomized match', and we do not do multiple 114 * passes through the firewall. XXX check the latter!!! 115 */ 116 117 /* By default use jenkins hash function */ 118 #define IPFIREWALL_JENKINSHASH 119 120 #define DYN_COUNTER_INC(d, dir, pktlen) do { \ 121 (d)->pcnt_ ## dir++; \ 122 (d)->bcnt_ ## dir += pktlen; \ 123 } while (0) 124 125 struct dyn_data { 126 void *parent; /* pointer to parent rule */ 127 uint32_t chain_id; /* cached ruleset id */ 128 uint32_t f_pos; /* cached rule index */ 129 130 uint32_t hashval; /* hash value used for hash resize */ 131 uint16_t fibnum; /* fib used to send keepalives */ 132 uint8_t _pad[3]; 133 uint8_t set; /* parent rule set number */ 134 uint16_t rulenum; /* parent rule number */ 135 uint32_t ruleid; /* parent rule id */ 136 137 uint32_t state; /* TCP session state and flags */ 138 uint32_t ack_fwd; /* most recent ACKs in forward */ 139 uint32_t ack_rev; /* and reverse direction (used */ 140 /* to generate keepalives) */ 141 uint32_t sync; /* synchronization time */ 142 uint32_t expire; /* expire time */ 143 144 uint64_t pcnt_fwd; /* bytes counter in forward */ 145 uint64_t bcnt_fwd; /* packets counter in forward */ 146 uint64_t pcnt_rev; /* bytes counter in reverse */ 147 uint64_t bcnt_rev; /* packets counter in reverse */ 148 }; 149 150 #define DPARENT_COUNT_DEC(p) do { \ 151 MPASS(p->count > 0); \ 152 ck_pr_dec_32(&(p)->count); \ 153 } while (0) 154 #define DPARENT_COUNT_INC(p) ck_pr_inc_32(&(p)->count) 155 #define DPARENT_COUNT(p) ck_pr_load_32(&(p)->count) 156 struct dyn_parent { 157 void *parent; /* pointer to parent rule */ 158 uint32_t count; /* number of linked states */ 159 uint8_t _pad; 160 uint8_t set; /* parent rule set number */ 161 uint16_t rulenum; /* parent rule number */ 162 uint32_t ruleid; /* parent rule id */ 163 uint32_t hashval; /* hash value used for hash resize */ 164 uint32_t expire; /* expire time */ 165 }; 166 167 struct dyn_ipv4_state { 168 uint8_t type; /* State type */ 169 uint8_t proto; /* UL Protocol */ 170 uint16_t kidx; /* named object index */ 171 uint16_t sport, dport; /* ULP source and destination ports */ 172 in_addr_t src, dst; /* IPv4 source and destination */ 173 174 union { 175 struct dyn_data *data; 176 struct dyn_parent *limit; 177 }; 178 CK_SLIST_ENTRY(dyn_ipv4_state) entry; 179 SLIST_ENTRY(dyn_ipv4_state) expired; 180 }; 181 CK_SLIST_HEAD(dyn_ipv4ck_slist, dyn_ipv4_state); 182 VNET_DEFINE_STATIC(struct dyn_ipv4ck_slist *, dyn_ipv4); 183 VNET_DEFINE_STATIC(struct dyn_ipv4ck_slist *, dyn_ipv4_parent); 184 185 SLIST_HEAD(dyn_ipv4_slist, dyn_ipv4_state); 186 VNET_DEFINE_STATIC(struct dyn_ipv4_slist, dyn_expired_ipv4); 187 #define V_dyn_ipv4 VNET(dyn_ipv4) 188 #define V_dyn_ipv4_parent VNET(dyn_ipv4_parent) 189 #define V_dyn_expired_ipv4 VNET(dyn_expired_ipv4) 190 191 #ifdef INET6 192 struct dyn_ipv6_state { 193 uint8_t type; /* State type */ 194 uint8_t proto; /* UL Protocol */ 195 uint16_t kidx; /* named object index */ 196 uint16_t sport, dport; /* ULP source and destination ports */ 197 struct in6_addr src, dst; /* IPv6 source and destination */ 198 uint32_t zoneid; /* IPv6 scope zone id */ 199 union { 200 struct dyn_data *data; 201 struct dyn_parent *limit; 202 }; 203 CK_SLIST_ENTRY(dyn_ipv6_state) entry; 204 SLIST_ENTRY(dyn_ipv6_state) expired; 205 }; 206 CK_SLIST_HEAD(dyn_ipv6ck_slist, dyn_ipv6_state); 207 VNET_DEFINE_STATIC(struct dyn_ipv6ck_slist *, dyn_ipv6); 208 VNET_DEFINE_STATIC(struct dyn_ipv6ck_slist *, dyn_ipv6_parent); 209 210 SLIST_HEAD(dyn_ipv6_slist, dyn_ipv6_state); 211 VNET_DEFINE_STATIC(struct dyn_ipv6_slist, dyn_expired_ipv6); 212 #define V_dyn_ipv6 VNET(dyn_ipv6) 213 #define V_dyn_ipv6_parent VNET(dyn_ipv6_parent) 214 #define V_dyn_expired_ipv6 VNET(dyn_expired_ipv6) 215 #endif /* INET6 */ 216 217 /* 218 * Per-CPU pointer indicates that specified state is currently in use 219 * and must not be reclaimed by expiration callout. 220 */ 221 static void **dyn_hp_cache; 222 DPCPU_DEFINE_STATIC(void *, dyn_hp); 223 #define DYNSTATE_GET(cpu) ck_pr_load_ptr(DPCPU_ID_PTR((cpu), dyn_hp)) 224 #define DYNSTATE_PROTECT(v) ck_pr_store_ptr(DPCPU_PTR(dyn_hp), (v)) 225 #define DYNSTATE_RELEASE() DYNSTATE_PROTECT(NULL) 226 #define DYNSTATE_CRITICAL_ENTER() critical_enter() 227 #define DYNSTATE_CRITICAL_EXIT() do { \ 228 DYNSTATE_RELEASE(); \ 229 critical_exit(); \ 230 } while (0); 231 232 /* 233 * We keep two version numbers, one is updated when new entry added to 234 * the list. Second is updated when an entry deleted from the list. 235 * Versions are updated under bucket lock. 236 * 237 * Bucket "add" version number is used to know, that in the time between 238 * state lookup (i.e. ipfw_dyn_lookup_state()) and the followed state 239 * creation (i.e. ipfw_dyn_install_state()) another concurrent thread did 240 * not install some state in this bucket. Using this info we can avoid 241 * additional state lookup, because we are sure that we will not install 242 * the state twice. 243 * 244 * Also doing the tracking of bucket "del" version during lookup we can 245 * be sure, that state entry was not unlinked and freed in time between 246 * we read the state pointer and protect it with hazard pointer. 247 * 248 * An entry unlinked from CK list keeps unchanged until it is freed. 249 * Unlinked entries are linked into expired lists using "expired" field. 250 */ 251 252 /* 253 * dyn_expire_lock is used to protect access to dyn_expired_xxx lists. 254 * dyn_bucket_lock is used to get write access to lists in specific bucket. 255 * Currently one dyn_bucket_lock is used for all ipv4, ipv4_parent, ipv6, 256 * and ipv6_parent lists. 257 */ 258 VNET_DEFINE_STATIC(struct mtx, dyn_expire_lock); 259 VNET_DEFINE_STATIC(struct mtx *, dyn_bucket_lock); 260 #define V_dyn_expire_lock VNET(dyn_expire_lock) 261 #define V_dyn_bucket_lock VNET(dyn_bucket_lock) 262 263 /* 264 * Bucket's add/delete generation versions. 265 */ 266 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_add); 267 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_del); 268 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_parent_add); 269 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_parent_del); 270 #define V_dyn_ipv4_add VNET(dyn_ipv4_add) 271 #define V_dyn_ipv4_del VNET(dyn_ipv4_del) 272 #define V_dyn_ipv4_parent_add VNET(dyn_ipv4_parent_add) 273 #define V_dyn_ipv4_parent_del VNET(dyn_ipv4_parent_del) 274 275 #ifdef INET6 276 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_add); 277 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_del); 278 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_parent_add); 279 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_parent_del); 280 #define V_dyn_ipv6_add VNET(dyn_ipv6_add) 281 #define V_dyn_ipv6_del VNET(dyn_ipv6_del) 282 #define V_dyn_ipv6_parent_add VNET(dyn_ipv6_parent_add) 283 #define V_dyn_ipv6_parent_del VNET(dyn_ipv6_parent_del) 284 #endif /* INET6 */ 285 286 #define DYN_BUCKET(h, b) ((h) & (b - 1)) 287 #define DYN_BUCKET_VERSION(b, v) ck_pr_load_32(&V_dyn_ ## v[(b)]) 288 #define DYN_BUCKET_VERSION_BUMP(b, v) ck_pr_inc_32(&V_dyn_ ## v[(b)]) 289 290 #define DYN_BUCKET_LOCK_INIT(lock, b) \ 291 mtx_init(&lock[(b)], "IPFW dynamic bucket", NULL, MTX_DEF) 292 #define DYN_BUCKET_LOCK_DESTROY(lock, b) mtx_destroy(&lock[(b)]) 293 #define DYN_BUCKET_LOCK(b) mtx_lock(&V_dyn_bucket_lock[(b)]) 294 #define DYN_BUCKET_UNLOCK(b) mtx_unlock(&V_dyn_bucket_lock[(b)]) 295 #define DYN_BUCKET_ASSERT(b) mtx_assert(&V_dyn_bucket_lock[(b)], MA_OWNED) 296 297 #define DYN_EXPIRED_LOCK_INIT() \ 298 mtx_init(&V_dyn_expire_lock, "IPFW expired states list", NULL, MTX_DEF) 299 #define DYN_EXPIRED_LOCK_DESTROY() mtx_destroy(&V_dyn_expire_lock) 300 #define DYN_EXPIRED_LOCK() mtx_lock(&V_dyn_expire_lock) 301 #define DYN_EXPIRED_UNLOCK() mtx_unlock(&V_dyn_expire_lock) 302 303 VNET_DEFINE_STATIC(uint32_t, dyn_buckets_max); 304 VNET_DEFINE_STATIC(uint32_t, curr_dyn_buckets); 305 VNET_DEFINE_STATIC(struct callout, dyn_timeout); 306 #define V_dyn_buckets_max VNET(dyn_buckets_max) 307 #define V_curr_dyn_buckets VNET(curr_dyn_buckets) 308 #define V_dyn_timeout VNET(dyn_timeout) 309 310 /* Maximum length of states chain in a bucket */ 311 VNET_DEFINE_STATIC(uint32_t, curr_max_length); 312 #define V_curr_max_length VNET(curr_max_length) 313 314 VNET_DEFINE_STATIC(uint32_t, dyn_keep_states); 315 #define V_dyn_keep_states VNET(dyn_keep_states) 316 317 VNET_DEFINE_STATIC(uma_zone_t, dyn_data_zone); 318 VNET_DEFINE_STATIC(uma_zone_t, dyn_parent_zone); 319 VNET_DEFINE_STATIC(uma_zone_t, dyn_ipv4_zone); 320 #ifdef INET6 321 VNET_DEFINE_STATIC(uma_zone_t, dyn_ipv6_zone); 322 #define V_dyn_ipv6_zone VNET(dyn_ipv6_zone) 323 #endif /* INET6 */ 324 #define V_dyn_data_zone VNET(dyn_data_zone) 325 #define V_dyn_parent_zone VNET(dyn_parent_zone) 326 #define V_dyn_ipv4_zone VNET(dyn_ipv4_zone) 327 328 /* 329 * Timeouts for various events in handing dynamic rules. 330 */ 331 VNET_DEFINE_STATIC(uint32_t, dyn_ack_lifetime); 332 VNET_DEFINE_STATIC(uint32_t, dyn_syn_lifetime); 333 VNET_DEFINE_STATIC(uint32_t, dyn_fin_lifetime); 334 VNET_DEFINE_STATIC(uint32_t, dyn_rst_lifetime); 335 VNET_DEFINE_STATIC(uint32_t, dyn_udp_lifetime); 336 VNET_DEFINE_STATIC(uint32_t, dyn_short_lifetime); 337 338 #define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 339 #define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 340 #define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 341 #define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 342 #define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 343 #define V_dyn_short_lifetime VNET(dyn_short_lifetime) 344 345 /* 346 * Keepalives are sent if dyn_keepalive is set. They are sent every 347 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 348 * seconds of lifetime of a rule. 349 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 350 * than dyn_keepalive_period. 351 */ 352 VNET_DEFINE_STATIC(uint32_t, dyn_keepalive_interval); 353 VNET_DEFINE_STATIC(uint32_t, dyn_keepalive_period); 354 VNET_DEFINE_STATIC(uint32_t, dyn_keepalive); 355 VNET_DEFINE_STATIC(time_t, dyn_keepalive_last); 356 357 #define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 358 #define V_dyn_keepalive_period VNET(dyn_keepalive_period) 359 #define V_dyn_keepalive VNET(dyn_keepalive) 360 #define V_dyn_keepalive_last VNET(dyn_keepalive_last) 361 362 VNET_DEFINE_STATIC(uint32_t, dyn_max); /* max # of dynamic states */ 363 VNET_DEFINE_STATIC(uint32_t, dyn_count); /* number of states */ 364 VNET_DEFINE_STATIC(uint32_t, dyn_parent_max); /* max # of parent states */ 365 VNET_DEFINE_STATIC(uint32_t, dyn_parent_count); /* number of parent states */ 366 367 #define V_dyn_max VNET(dyn_max) 368 #define V_dyn_count VNET(dyn_count) 369 #define V_dyn_parent_max VNET(dyn_parent_max) 370 #define V_dyn_parent_count VNET(dyn_parent_count) 371 372 #define DYN_COUNT_DEC(name) do { \ 373 MPASS((V_ ## name) > 0); \ 374 ck_pr_dec_32(&(V_ ## name)); \ 375 } while (0) 376 #define DYN_COUNT_INC(name) ck_pr_inc_32(&(V_ ## name)) 377 #define DYN_COUNT(name) ck_pr_load_32(&(V_ ## name)) 378 379 static time_t last_log; /* Log ratelimiting */ 380 381 /* 382 * Get/set maximum number of dynamic states in given VNET instance. 383 */ 384 static int 385 sysctl_dyn_max(SYSCTL_HANDLER_ARGS) 386 { 387 uint32_t nstates; 388 int error; 389 390 nstates = V_dyn_max; 391 error = sysctl_handle_32(oidp, &nstates, 0, req); 392 /* Read operation or some error */ 393 if ((error != 0) || (req->newptr == NULL)) 394 return (error); 395 396 V_dyn_max = nstates; 397 uma_zone_set_max(V_dyn_data_zone, V_dyn_max); 398 return (0); 399 } 400 401 static int 402 sysctl_dyn_parent_max(SYSCTL_HANDLER_ARGS) 403 { 404 uint32_t nstates; 405 int error; 406 407 nstates = V_dyn_parent_max; 408 error = sysctl_handle_32(oidp, &nstates, 0, req); 409 /* Read operation or some error */ 410 if ((error != 0) || (req->newptr == NULL)) 411 return (error); 412 413 V_dyn_parent_max = nstates; 414 uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max); 415 return (0); 416 } 417 418 static int 419 sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS) 420 { 421 uint32_t nbuckets; 422 int error; 423 424 nbuckets = V_dyn_buckets_max; 425 error = sysctl_handle_32(oidp, &nbuckets, 0, req); 426 /* Read operation or some error */ 427 if ((error != 0) || (req->newptr == NULL)) 428 return (error); 429 430 if (nbuckets > 256) 431 V_dyn_buckets_max = 1 << fls(nbuckets - 1); 432 else 433 return (EINVAL); 434 return (0); 435 } 436 437 SYSCTL_DECL(_net_inet_ip_fw); 438 439 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_count, 440 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 441 "Current number of dynamic states."); 442 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_parent_count, 443 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_parent_count), 0, 444 "Current number of parent states. "); 445 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 446 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 447 "Current number of buckets for states hash table."); 448 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_max_length, 449 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_max_length), 0, 450 "Current maximum length of states chains in hash buckets."); 451 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 452 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_buckets, 453 "IU", "Max number of buckets for dynamic states hash table."); 454 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 455 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_max, 456 "IU", "Max number of dynamic states."); 457 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_parent_max, 458 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_parent_max, 459 "IU", "Max number of parent dynamic states."); 460 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 461 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 462 "Lifetime of dynamic states for TCP ACK."); 463 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 464 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 465 "Lifetime of dynamic states for TCP SYN."); 466 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 467 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 468 "Lifetime of dynamic states for TCP FIN."); 469 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 470 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 471 "Lifetime of dynamic states for TCP RST."); 472 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 473 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 474 "Lifetime of dynamic states for UDP."); 475 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 476 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 477 "Lifetime of dynamic states for other situations."); 478 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 479 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 480 "Enable keepalives for dynamic states."); 481 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keep_states, 482 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0, 483 "Do not flush dynamic states on rule deletion"); 484 485 486 #ifdef IPFIREWALL_DYNDEBUG 487 #define DYN_DEBUG(fmt, ...) do { \ 488 printf("%s: " fmt "\n", __func__, __VA_ARGS__); \ 489 } while (0) 490 #else 491 #define DYN_DEBUG(fmt, ...) 492 #endif /* !IPFIREWALL_DYNDEBUG */ 493 494 #ifdef INET6 495 /* Functions to work with IPv6 states */ 496 static struct dyn_ipv6_state *dyn_lookup_ipv6_state( 497 const struct ipfw_flow_id *, uint32_t, const void *, 498 struct ipfw_dyn_info *, int); 499 static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *, 500 uint32_t, const void *, int, uint32_t, uint16_t); 501 static struct dyn_ipv6_state *dyn_alloc_ipv6_state( 502 const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t); 503 static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, uint8_t, 504 const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t, 505 struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); 506 static void dyn_export_ipv6_state(const struct dyn_ipv6_state *, 507 ipfw_dyn_rule *); 508 509 static uint32_t dyn_getscopeid(const struct ip_fw_args *); 510 static void dyn_make_keepalive_ipv6(struct mbuf *, const struct in6_addr *, 511 const struct in6_addr *, uint32_t, uint32_t, uint32_t, uint16_t, 512 uint16_t); 513 static void dyn_enqueue_keepalive_ipv6(struct mbufq *, 514 const struct dyn_ipv6_state *); 515 static void dyn_send_keepalive_ipv6(struct ip_fw_chain *); 516 517 static struct dyn_ipv6_state *dyn_lookup_ipv6_parent( 518 const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, 519 uint32_t); 520 static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked( 521 const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, 522 uint32_t); 523 static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t, 524 uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, 525 uint16_t); 526 #endif /* INET6 */ 527 528 /* Functions to work with limit states */ 529 static void *dyn_get_parent_state(const struct ipfw_flow_id *, uint32_t, 530 struct ip_fw *, uint32_t, uint32_t, uint16_t); 531 static struct dyn_ipv4_state *dyn_lookup_ipv4_parent( 532 const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); 533 static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked( 534 const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); 535 static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t, 536 uint8_t, uint32_t); 537 static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t, 538 uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t); 539 540 static void dyn_tick(void *); 541 static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *); 542 static void dyn_free_states(struct ip_fw_chain *); 543 static void dyn_export_parent(const struct dyn_parent *, uint16_t, 544 ipfw_dyn_rule *); 545 static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t, 546 ipfw_dyn_rule *); 547 static uint32_t dyn_update_tcp_state(struct dyn_data *, 548 const struct ipfw_flow_id *, const struct tcphdr *, int); 549 static void dyn_update_proto_state(struct dyn_data *, 550 const struct ipfw_flow_id *, const void *, int, int); 551 552 /* Functions to work with IPv4 states */ 553 struct dyn_ipv4_state *dyn_lookup_ipv4_state(const struct ipfw_flow_id *, 554 const void *, struct ipfw_dyn_info *, int); 555 static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *, 556 const void *, int, uint32_t, uint16_t); 557 static struct dyn_ipv4_state *dyn_alloc_ipv4_state( 558 const struct ipfw_flow_id *, uint16_t, uint8_t); 559 static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, uint8_t, 560 const struct ipfw_flow_id *, const void *, int, uint32_t, 561 struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); 562 static void dyn_export_ipv4_state(const struct dyn_ipv4_state *, 563 ipfw_dyn_rule *); 564 565 /* 566 * Named states support. 567 */ 568 static char *default_state_name = "default"; 569 struct dyn_state_obj { 570 struct named_object no; 571 char name[64]; 572 }; 573 574 #define DYN_STATE_OBJ(ch, cmd) \ 575 ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1)) 576 /* 577 * Classifier callback. 578 * Return 0 if opcode contains object that should be referenced 579 * or rewritten. 580 */ 581 static int 582 dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 583 { 584 585 DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); 586 /* Don't rewrite "check-state any" */ 587 if (cmd->arg1 == 0 && 588 cmd->opcode == O_CHECK_STATE) 589 return (1); 590 591 *puidx = cmd->arg1; 592 *ptype = 0; 593 return (0); 594 } 595 596 static void 597 dyn_update(ipfw_insn *cmd, uint16_t idx) 598 { 599 600 cmd->arg1 = idx; 601 DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); 602 } 603 604 static int 605 dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, 606 struct named_object **pno) 607 { 608 ipfw_obj_ntlv *ntlv; 609 const char *name; 610 611 DYN_DEBUG("uidx %d", ti->uidx); 612 if (ti->uidx != 0) { 613 if (ti->tlvs == NULL) 614 return (EINVAL); 615 /* Search ntlv in the buffer provided by user */ 616 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 617 IPFW_TLV_STATE_NAME); 618 if (ntlv == NULL) 619 return (EINVAL); 620 name = ntlv->name; 621 } else 622 name = default_state_name; 623 /* 624 * Search named object with corresponding name. 625 * Since states objects are global - ignore the set value 626 * and use zero instead. 627 */ 628 *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, 629 IPFW_TLV_STATE_NAME, name); 630 /* 631 * We always return success here. 632 * The caller will check *pno and mark object as unresolved, 633 * then it will automatically create "default" object. 634 */ 635 return (0); 636 } 637 638 static struct named_object * 639 dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) 640 { 641 642 DYN_DEBUG("kidx %d", idx); 643 return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); 644 } 645 646 static int 647 dyn_create(struct ip_fw_chain *ch, struct tid_info *ti, 648 uint16_t *pkidx) 649 { 650 struct namedobj_instance *ni; 651 struct dyn_state_obj *obj; 652 struct named_object *no; 653 ipfw_obj_ntlv *ntlv; 654 char *name; 655 656 DYN_DEBUG("uidx %d", ti->uidx); 657 if (ti->uidx != 0) { 658 if (ti->tlvs == NULL) 659 return (EINVAL); 660 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 661 IPFW_TLV_STATE_NAME); 662 if (ntlv == NULL) 663 return (EINVAL); 664 name = ntlv->name; 665 } else 666 name = default_state_name; 667 668 ni = CHAIN_TO_SRV(ch); 669 obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); 670 obj->no.name = obj->name; 671 obj->no.etlv = IPFW_TLV_STATE_NAME; 672 strlcpy(obj->name, name, sizeof(obj->name)); 673 674 IPFW_UH_WLOCK(ch); 675 no = ipfw_objhash_lookup_name_type(ni, 0, 676 IPFW_TLV_STATE_NAME, name); 677 if (no != NULL) { 678 /* 679 * Object is already created. 680 * Just return its kidx and bump refcount. 681 */ 682 *pkidx = no->kidx; 683 no->refcnt++; 684 IPFW_UH_WUNLOCK(ch); 685 free(obj, M_IPFW); 686 DYN_DEBUG("\tfound kidx %d", *pkidx); 687 return (0); 688 } 689 if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { 690 DYN_DEBUG("\talloc_idx failed for %s", name); 691 IPFW_UH_WUNLOCK(ch); 692 free(obj, M_IPFW); 693 return (ENOSPC); 694 } 695 ipfw_objhash_add(ni, &obj->no); 696 SRV_OBJECT(ch, obj->no.kidx) = obj; 697 obj->no.refcnt++; 698 *pkidx = obj->no.kidx; 699 IPFW_UH_WUNLOCK(ch); 700 DYN_DEBUG("\tcreated kidx %d", *pkidx); 701 return (0); 702 } 703 704 static void 705 dyn_destroy(struct ip_fw_chain *ch, struct named_object *no) 706 { 707 struct dyn_state_obj *obj; 708 709 IPFW_UH_WLOCK_ASSERT(ch); 710 711 KASSERT(no->refcnt == 1, 712 ("Destroying object '%s' (type %u, idx %u) with refcnt %u", 713 no->name, no->etlv, no->kidx, no->refcnt)); 714 DYN_DEBUG("kidx %d", no->kidx); 715 obj = SRV_OBJECT(ch, no->kidx); 716 SRV_OBJECT(ch, no->kidx) = NULL; 717 ipfw_objhash_del(CHAIN_TO_SRV(ch), no); 718 ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx); 719 720 free(obj, M_IPFW); 721 } 722 723 static struct opcode_obj_rewrite dyn_opcodes[] = { 724 { 725 O_KEEP_STATE, IPFW_TLV_STATE_NAME, 726 dyn_classify, dyn_update, 727 dyn_findbyname, dyn_findbykidx, 728 dyn_create, dyn_destroy 729 }, 730 { 731 O_CHECK_STATE, IPFW_TLV_STATE_NAME, 732 dyn_classify, dyn_update, 733 dyn_findbyname, dyn_findbykidx, 734 dyn_create, dyn_destroy 735 }, 736 { 737 O_PROBE_STATE, IPFW_TLV_STATE_NAME, 738 dyn_classify, dyn_update, 739 dyn_findbyname, dyn_findbykidx, 740 dyn_create, dyn_destroy 741 }, 742 { 743 O_LIMIT, IPFW_TLV_STATE_NAME, 744 dyn_classify, dyn_update, 745 dyn_findbyname, dyn_findbykidx, 746 dyn_create, dyn_destroy 747 }, 748 }; 749 750 /* 751 * IMPORTANT: the hash function for dynamic rules must be commutative 752 * in source and destination (ip,port), because rules are bidirectional 753 * and we want to find both in the same bucket. 754 */ 755 #ifndef IPFIREWALL_JENKINSHASH 756 static __inline uint32_t 757 hash_packet(const struct ipfw_flow_id *id) 758 { 759 uint32_t i; 760 761 #ifdef INET6 762 if (IS_IP6_FLOW_ID(id)) 763 i = ntohl((id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 764 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 765 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 766 (id->src_ip6.__u6_addr.__u6_addr32[3])); 767 else 768 #endif /* INET6 */ 769 i = (id->dst_ip) ^ (id->src_ip); 770 i ^= (id->dst_port) ^ (id->src_port); 771 return (i); 772 } 773 774 static __inline uint32_t 775 hash_parent(const struct ipfw_flow_id *id, const void *rule) 776 { 777 778 return (hash_packet(id) ^ ((uintptr_t)rule)); 779 } 780 781 #else /* IPFIREWALL_JENKINSHASH */ 782 783 VNET_DEFINE_STATIC(uint32_t, dyn_hashseed); 784 #define V_dyn_hashseed VNET(dyn_hashseed) 785 786 static __inline int 787 addrcmp4(const struct ipfw_flow_id *id) 788 { 789 790 if (id->src_ip < id->dst_ip) 791 return (0); 792 if (id->src_ip > id->dst_ip) 793 return (1); 794 if (id->src_port <= id->dst_port) 795 return (0); 796 return (1); 797 } 798 799 #ifdef INET6 800 static __inline int 801 addrcmp6(const struct ipfw_flow_id *id) 802 { 803 int ret; 804 805 ret = memcmp(&id->src_ip6, &id->dst_ip6, sizeof(struct in6_addr)); 806 if (ret < 0) 807 return (0); 808 if (ret > 0) 809 return (1); 810 if (id->src_port <= id->dst_port) 811 return (0); 812 return (1); 813 } 814 815 static __inline uint32_t 816 hash_packet6(const struct ipfw_flow_id *id) 817 { 818 struct tuple6 { 819 struct in6_addr addr[2]; 820 uint16_t port[2]; 821 } t6; 822 823 if (addrcmp6(id) == 0) { 824 t6.addr[0] = id->src_ip6; 825 t6.addr[1] = id->dst_ip6; 826 t6.port[0] = id->src_port; 827 t6.port[1] = id->dst_port; 828 } else { 829 t6.addr[0] = id->dst_ip6; 830 t6.addr[1] = id->src_ip6; 831 t6.port[0] = id->dst_port; 832 t6.port[1] = id->src_port; 833 } 834 return (jenkins_hash32((const uint32_t *)&t6, 835 sizeof(t6) / sizeof(uint32_t), V_dyn_hashseed)); 836 } 837 #endif 838 839 static __inline uint32_t 840 hash_packet(const struct ipfw_flow_id *id) 841 { 842 struct tuple4 { 843 in_addr_t addr[2]; 844 uint16_t port[2]; 845 } t4; 846 847 if (IS_IP4_FLOW_ID(id)) { 848 /* All fields are in host byte order */ 849 if (addrcmp4(id) == 0) { 850 t4.addr[0] = id->src_ip; 851 t4.addr[1] = id->dst_ip; 852 t4.port[0] = id->src_port; 853 t4.port[1] = id->dst_port; 854 } else { 855 t4.addr[0] = id->dst_ip; 856 t4.addr[1] = id->src_ip; 857 t4.port[0] = id->dst_port; 858 t4.port[1] = id->src_port; 859 } 860 return (jenkins_hash32((const uint32_t *)&t4, 861 sizeof(t4) / sizeof(uint32_t), V_dyn_hashseed)); 862 } else 863 #ifdef INET6 864 if (IS_IP6_FLOW_ID(id)) 865 return (hash_packet6(id)); 866 #endif 867 return (0); 868 } 869 870 static __inline uint32_t 871 hash_parent(const struct ipfw_flow_id *id, const void *rule) 872 { 873 874 return (jenkins_hash32((const uint32_t *)&rule, 875 sizeof(rule) / sizeof(uint32_t), hash_packet(id))); 876 } 877 #endif /* IPFIREWALL_JENKINSHASH */ 878 879 /* 880 * Print customizable flow id description via log(9) facility. 881 */ 882 static void 883 print_dyn_rule_flags(const struct ipfw_flow_id *id, int dyn_type, 884 int log_flags, char *prefix, char *postfix) 885 { 886 struct in_addr da; 887 #ifdef INET6 888 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 889 #else 890 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 891 #endif 892 893 #ifdef INET6 894 if (IS_IP6_FLOW_ID(id)) { 895 ip6_sprintf(src, &id->src_ip6); 896 ip6_sprintf(dst, &id->dst_ip6); 897 } else 898 #endif 899 { 900 da.s_addr = htonl(id->src_ip); 901 inet_ntop(AF_INET, &da, src, sizeof(src)); 902 da.s_addr = htonl(id->dst_ip); 903 inet_ntop(AF_INET, &da, dst, sizeof(dst)); 904 } 905 log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n", 906 prefix, dyn_type, src, id->src_port, dst, 907 id->dst_port, V_dyn_count, postfix); 908 } 909 910 #define print_dyn_rule(id, dtype, prefix, postfix) \ 911 print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix) 912 913 #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 914 #define TIME_LE(a,b) ((int)((a)-(b)) < 0) 915 #define _SEQ_GE(a,b) ((int)((a)-(b)) >= 0) 916 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 917 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 918 #define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8)) 919 #define ACK_FWD 0x00010000 /* fwd ack seen */ 920 #define ACK_REV 0x00020000 /* rev ack seen */ 921 #define ACK_BOTH (ACK_FWD | ACK_REV) 922 923 static uint32_t 924 dyn_update_tcp_state(struct dyn_data *data, const struct ipfw_flow_id *pkt, 925 const struct tcphdr *tcp, int dir) 926 { 927 uint32_t ack, expire; 928 uint32_t state, old; 929 uint8_t th_flags; 930 931 expire = data->expire; 932 old = state = data->state; 933 th_flags = pkt->_flags & (TH_FIN | TH_SYN | TH_RST); 934 state |= (dir == MATCH_FORWARD) ? th_flags: (th_flags << 8); 935 switch (state & TCP_FLAGS) { 936 case TH_SYN: /* opening */ 937 expire = time_uptime + V_dyn_syn_lifetime; 938 break; 939 940 case BOTH_SYN: /* move to established */ 941 case BOTH_SYN | TH_FIN: /* one side tries to close */ 942 case BOTH_SYN | (TH_FIN << 8): 943 if (tcp == NULL) 944 break; 945 ack = ntohl(tcp->th_ack); 946 if (dir == MATCH_FORWARD) { 947 if (data->ack_fwd == 0 || 948 _SEQ_GE(ack, data->ack_fwd)) { 949 state |= ACK_FWD; 950 if (data->ack_fwd != ack) 951 ck_pr_store_32(&data->ack_fwd, ack); 952 } 953 } else { 954 if (data->ack_rev == 0 || 955 _SEQ_GE(ack, data->ack_rev)) { 956 state |= ACK_REV; 957 if (data->ack_rev != ack) 958 ck_pr_store_32(&data->ack_rev, ack); 959 } 960 } 961 if ((state & ACK_BOTH) == ACK_BOTH) { 962 /* 963 * Set expire time to V_dyn_ack_lifetime only if 964 * we got ACKs for both directions. 965 * We use XOR here to avoid possible state 966 * overwriting in concurrent thread. 967 */ 968 expire = time_uptime + V_dyn_ack_lifetime; 969 ck_pr_xor_32(&data->state, ACK_BOTH); 970 } else if ((data->state & ACK_BOTH) != (state & ACK_BOTH)) 971 ck_pr_or_32(&data->state, state & ACK_BOTH); 972 break; 973 974 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 975 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 976 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 977 expire = time_uptime + V_dyn_fin_lifetime; 978 break; 979 980 default: 981 if (V_dyn_keepalive != 0 && 982 V_dyn_rst_lifetime >= V_dyn_keepalive_period) 983 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 984 expire = time_uptime + V_dyn_rst_lifetime; 985 } 986 /* Save TCP state if it was changed */ 987 if ((state & TCP_FLAGS) != (old & TCP_FLAGS)) 988 ck_pr_or_32(&data->state, state & TCP_FLAGS); 989 return (expire); 990 } 991 992 /* 993 * Update ULP specific state. 994 * For TCP we keep sequence numbers and flags. For other protocols 995 * currently we update only expire time. Packets and bytes counters 996 * are also updated here. 997 */ 998 static void 999 dyn_update_proto_state(struct dyn_data *data, const struct ipfw_flow_id *pkt, 1000 const void *ulp, int pktlen, int dir) 1001 { 1002 uint32_t expire; 1003 1004 /* NOTE: we are in critical section here. */ 1005 switch (pkt->proto) { 1006 case IPPROTO_UDP: 1007 case IPPROTO_UDPLITE: 1008 expire = time_uptime + V_dyn_udp_lifetime; 1009 break; 1010 case IPPROTO_TCP: 1011 expire = dyn_update_tcp_state(data, pkt, ulp, dir); 1012 break; 1013 default: 1014 expire = time_uptime + V_dyn_short_lifetime; 1015 } 1016 /* 1017 * Expiration timer has the per-second granularity, no need to update 1018 * it every time when state is matched. 1019 */ 1020 if (data->expire != expire) 1021 ck_pr_store_32(&data->expire, expire); 1022 1023 if (dir == MATCH_FORWARD) 1024 DYN_COUNTER_INC(data, fwd, pktlen); 1025 else 1026 DYN_COUNTER_INC(data, rev, pktlen); 1027 } 1028 1029 /* 1030 * Lookup IPv4 state. 1031 * Must be called in critical section. 1032 */ 1033 struct dyn_ipv4_state * 1034 dyn_lookup_ipv4_state(const struct ipfw_flow_id *pkt, const void *ulp, 1035 struct ipfw_dyn_info *info, int pktlen) 1036 { 1037 struct dyn_ipv4_state *s; 1038 uint32_t version, bucket; 1039 1040 bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets); 1041 info->version = DYN_BUCKET_VERSION(bucket, ipv4_add); 1042 restart: 1043 version = DYN_BUCKET_VERSION(bucket, ipv4_del); 1044 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 1045 DYNSTATE_PROTECT(s); 1046 if (version != DYN_BUCKET_VERSION(bucket, ipv4_del)) 1047 goto restart; 1048 if (s->proto != pkt->proto) 1049 continue; 1050 if (info->kidx != 0 && s->kidx != info->kidx) 1051 continue; 1052 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1053 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1054 info->direction = MATCH_FORWARD; 1055 break; 1056 } 1057 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1058 s->src == pkt->dst_ip && s->dst == pkt->src_ip) { 1059 info->direction = MATCH_REVERSE; 1060 break; 1061 } 1062 } 1063 1064 if (s != NULL) 1065 dyn_update_proto_state(s->data, pkt, ulp, pktlen, 1066 info->direction); 1067 return (s); 1068 } 1069 1070 /* 1071 * Lookup IPv4 state. 1072 * Simplifed version is used to check that matching state doesn't exist. 1073 */ 1074 static int 1075 dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *pkt, 1076 const void *ulp, int pktlen, uint32_t bucket, uint16_t kidx) 1077 { 1078 struct dyn_ipv4_state *s; 1079 int dir; 1080 1081 dir = MATCH_NONE; 1082 DYN_BUCKET_ASSERT(bucket); 1083 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 1084 if (s->proto != pkt->proto || 1085 s->kidx != kidx) 1086 continue; 1087 if (s->sport == pkt->src_port && 1088 s->dport == pkt->dst_port && 1089 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1090 dir = MATCH_FORWARD; 1091 break; 1092 } 1093 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1094 s->src == pkt->dst_ip && s->dst == pkt->src_ip) { 1095 dir = MATCH_REVERSE; 1096 break; 1097 } 1098 } 1099 if (s != NULL) 1100 dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir); 1101 return (s != NULL); 1102 } 1103 1104 struct dyn_ipv4_state * 1105 dyn_lookup_ipv4_parent(const struct ipfw_flow_id *pkt, const void *rule, 1106 uint32_t ruleid, uint16_t rulenum, uint32_t hashval) 1107 { 1108 struct dyn_ipv4_state *s; 1109 uint32_t version, bucket; 1110 1111 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1112 restart: 1113 version = DYN_BUCKET_VERSION(bucket, ipv4_parent_del); 1114 CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) { 1115 DYNSTATE_PROTECT(s); 1116 if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_del)) 1117 goto restart; 1118 /* 1119 * NOTE: we do not need to check kidx, because parent rule 1120 * can not create states with different kidx. 1121 * And parent rule always created for forward direction. 1122 */ 1123 if (s->limit->parent == rule && 1124 s->limit->ruleid == ruleid && 1125 s->limit->rulenum == rulenum && 1126 s->proto == pkt->proto && 1127 s->sport == pkt->src_port && 1128 s->dport == pkt->dst_port && 1129 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1130 if (s->limit->expire != time_uptime + 1131 V_dyn_short_lifetime) 1132 ck_pr_store_32(&s->limit->expire, 1133 time_uptime + V_dyn_short_lifetime); 1134 break; 1135 } 1136 } 1137 return (s); 1138 } 1139 1140 static struct dyn_ipv4_state * 1141 dyn_lookup_ipv4_parent_locked(const struct ipfw_flow_id *pkt, 1142 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket) 1143 { 1144 struct dyn_ipv4_state *s; 1145 1146 DYN_BUCKET_ASSERT(bucket); 1147 CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) { 1148 if (s->limit->parent == rule && 1149 s->limit->ruleid == ruleid && 1150 s->limit->rulenum == rulenum && 1151 s->proto == pkt->proto && 1152 s->sport == pkt->src_port && 1153 s->dport == pkt->dst_port && 1154 s->src == pkt->src_ip && s->dst == pkt->dst_ip) 1155 break; 1156 } 1157 return (s); 1158 } 1159 1160 1161 #ifdef INET6 1162 static uint32_t 1163 dyn_getscopeid(const struct ip_fw_args *args) 1164 { 1165 1166 /* 1167 * If source or destination address is an scopeid address, we need 1168 * determine the scope zone id to resolve address scope ambiguity. 1169 */ 1170 if (IN6_IS_ADDR_LINKLOCAL(&args->f_id.src_ip6) || 1171 IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) { 1172 MPASS(args->oif != NULL || 1173 args->m->m_pkthdr.rcvif != NULL); 1174 return (in6_getscopezone(args->oif != NULL ? args->oif: 1175 args->m->m_pkthdr.rcvif, IPV6_ADDR_SCOPE_LINKLOCAL)); 1176 } 1177 return (0); 1178 } 1179 1180 /* 1181 * Lookup IPv6 state. 1182 * Must be called in critical section. 1183 */ 1184 static struct dyn_ipv6_state * 1185 dyn_lookup_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1186 const void *ulp, struct ipfw_dyn_info *info, int pktlen) 1187 { 1188 struct dyn_ipv6_state *s; 1189 uint32_t version, bucket; 1190 1191 bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets); 1192 info->version = DYN_BUCKET_VERSION(bucket, ipv6_add); 1193 restart: 1194 version = DYN_BUCKET_VERSION(bucket, ipv6_del); 1195 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 1196 DYNSTATE_PROTECT(s); 1197 if (version != DYN_BUCKET_VERSION(bucket, ipv6_del)) 1198 goto restart; 1199 if (s->proto != pkt->proto || s->zoneid != zoneid) 1200 continue; 1201 if (info->kidx != 0 && s->kidx != info->kidx) 1202 continue; 1203 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1204 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1205 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1206 info->direction = MATCH_FORWARD; 1207 break; 1208 } 1209 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1210 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) && 1211 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) { 1212 info->direction = MATCH_REVERSE; 1213 break; 1214 } 1215 } 1216 if (s != NULL) 1217 dyn_update_proto_state(s->data, pkt, ulp, pktlen, 1218 info->direction); 1219 return (s); 1220 } 1221 1222 /* 1223 * Lookup IPv6 state. 1224 * Simplifed version is used to check that matching state doesn't exist. 1225 */ 1226 static int 1227 dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1228 const void *ulp, int pktlen, uint32_t bucket, uint16_t kidx) 1229 { 1230 struct dyn_ipv6_state *s; 1231 int dir; 1232 1233 dir = MATCH_NONE; 1234 DYN_BUCKET_ASSERT(bucket); 1235 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 1236 if (s->proto != pkt->proto || s->kidx != kidx || 1237 s->zoneid != zoneid) 1238 continue; 1239 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1240 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1241 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1242 dir = MATCH_FORWARD; 1243 break; 1244 } 1245 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1246 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) && 1247 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) { 1248 dir = MATCH_REVERSE; 1249 break; 1250 } 1251 } 1252 if (s != NULL) 1253 dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir); 1254 return (s != NULL); 1255 } 1256 1257 static struct dyn_ipv6_state * 1258 dyn_lookup_ipv6_parent(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1259 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t hashval) 1260 { 1261 struct dyn_ipv6_state *s; 1262 uint32_t version, bucket; 1263 1264 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1265 restart: 1266 version = DYN_BUCKET_VERSION(bucket, ipv6_parent_del); 1267 CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) { 1268 DYNSTATE_PROTECT(s); 1269 if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_del)) 1270 goto restart; 1271 /* 1272 * NOTE: we do not need to check kidx, because parent rule 1273 * can not create states with different kidx. 1274 * Also parent rule always created for forward direction. 1275 */ 1276 if (s->limit->parent == rule && 1277 s->limit->ruleid == ruleid && 1278 s->limit->rulenum == rulenum && 1279 s->proto == pkt->proto && 1280 s->sport == pkt->src_port && 1281 s->dport == pkt->dst_port && s->zoneid == zoneid && 1282 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1283 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1284 if (s->limit->expire != time_uptime + 1285 V_dyn_short_lifetime) 1286 ck_pr_store_32(&s->limit->expire, 1287 time_uptime + V_dyn_short_lifetime); 1288 break; 1289 } 1290 } 1291 return (s); 1292 } 1293 1294 static struct dyn_ipv6_state * 1295 dyn_lookup_ipv6_parent_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1296 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket) 1297 { 1298 struct dyn_ipv6_state *s; 1299 1300 DYN_BUCKET_ASSERT(bucket); 1301 CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) { 1302 if (s->limit->parent == rule && 1303 s->limit->ruleid == ruleid && 1304 s->limit->rulenum == rulenum && 1305 s->proto == pkt->proto && 1306 s->sport == pkt->src_port && 1307 s->dport == pkt->dst_port && s->zoneid == zoneid && 1308 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1309 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) 1310 break; 1311 } 1312 return (s); 1313 } 1314 1315 #endif /* INET6 */ 1316 1317 /* 1318 * Lookup dynamic state. 1319 * pkt - filled by ipfw_chk() ipfw_flow_id; 1320 * ulp - determined by ipfw_chk() upper level protocol header; 1321 * dyn_info - info about matched state to return back; 1322 * Returns pointer to state's parent rule and dyn_info. If there is 1323 * no state, NULL is returned. 1324 * On match ipfw_dyn_lookup() updates state's counters. 1325 */ 1326 struct ip_fw * 1327 ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp, 1328 int pktlen, const ipfw_insn *cmd, struct ipfw_dyn_info *info) 1329 { 1330 struct dyn_data *data; 1331 struct ip_fw *rule; 1332 1333 IPFW_RLOCK_ASSERT(&V_layer3_chain); 1334 1335 data = NULL; 1336 rule = NULL; 1337 info->kidx = cmd->arg1; 1338 info->direction = MATCH_NONE; 1339 info->hashval = hash_packet(&args->f_id); 1340 1341 DYNSTATE_CRITICAL_ENTER(); 1342 if (IS_IP4_FLOW_ID(&args->f_id)) { 1343 struct dyn_ipv4_state *s; 1344 1345 s = dyn_lookup_ipv4_state(&args->f_id, ulp, info, pktlen); 1346 if (s != NULL) { 1347 /* 1348 * Dynamic states are created using the same 5-tuple, 1349 * so it is assumed, that parent rule for O_LIMIT 1350 * state has the same address family. 1351 */ 1352 data = s->data; 1353 if (s->type == O_LIMIT) { 1354 s = data->parent; 1355 rule = s->limit->parent; 1356 } else 1357 rule = data->parent; 1358 } 1359 } 1360 #ifdef INET6 1361 else if (IS_IP6_FLOW_ID(&args->f_id)) { 1362 struct dyn_ipv6_state *s; 1363 1364 s = dyn_lookup_ipv6_state(&args->f_id, dyn_getscopeid(args), 1365 ulp, info, pktlen); 1366 if (s != NULL) { 1367 data = s->data; 1368 if (s->type == O_LIMIT) { 1369 s = data->parent; 1370 rule = s->limit->parent; 1371 } else 1372 rule = data->parent; 1373 } 1374 } 1375 #endif 1376 if (data != NULL) { 1377 /* 1378 * If cached chain id is the same, we can avoid rule index 1379 * lookup. Otherwise do lookup and update chain_id and f_pos. 1380 * It is safe even if there is concurrent thread that want 1381 * update the same state, because chain->id can be changed 1382 * only under IPFW_WLOCK(). 1383 */ 1384 if (data->chain_id != V_layer3_chain.id) { 1385 data->f_pos = ipfw_find_rule(&V_layer3_chain, 1386 data->rulenum, data->ruleid); 1387 /* 1388 * Check that found state has not orphaned. 1389 * When chain->id being changed the parent 1390 * rule can be deleted. If found rule doesn't 1391 * match the parent pointer, consider this 1392 * result as MATCH_NONE and return NULL. 1393 * 1394 * This will lead to creation of new similar state 1395 * that will be added into head of this bucket. 1396 * And the state that we currently have matched 1397 * should be deleted by dyn_expire_states(). 1398 * 1399 * In case when dyn_keep_states is enabled, return 1400 * pointer to default rule and corresponding f_pos 1401 * value. 1402 * XXX: In this case we lose the cache efficiency, 1403 * since f_pos is not cached, because it seems 1404 * there is no easy way to atomically switch 1405 * all fields related to parent rule of given 1406 * state. 1407 */ 1408 if (V_layer3_chain.map[data->f_pos] == rule) { 1409 data->chain_id = V_layer3_chain.id; 1410 info->f_pos = data->f_pos; 1411 } else if (V_dyn_keep_states != 0) { 1412 rule = V_layer3_chain.default_rule; 1413 info->f_pos = V_layer3_chain.n_rules - 1; 1414 } else { 1415 rule = NULL; 1416 info->direction = MATCH_NONE; 1417 DYN_DEBUG("rule %p [%u, %u] is considered " 1418 "invalid in data %p", rule, data->ruleid, 1419 data->rulenum, data); 1420 /* info->f_pos doesn't matter here. */ 1421 } 1422 } else 1423 info->f_pos = data->f_pos; 1424 } 1425 DYNSTATE_CRITICAL_EXIT(); 1426 #if 0 1427 /* 1428 * Return MATCH_NONE if parent rule is in disabled set. 1429 * This will lead to creation of new similar state that 1430 * will be added into head of this bucket. 1431 * 1432 * XXXAE: we need to be able update state's set when parent 1433 * rule set is changed. 1434 */ 1435 if (rule != NULL && (V_set_disable & (1 << rule->set))) { 1436 rule = NULL; 1437 info->direction = MATCH_NONE; 1438 } 1439 #endif 1440 return (rule); 1441 } 1442 1443 static struct dyn_parent * 1444 dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum, 1445 uint8_t set, uint32_t hashval) 1446 { 1447 struct dyn_parent *limit; 1448 1449 limit = uma_zalloc(V_dyn_parent_zone, M_NOWAIT | M_ZERO); 1450 if (limit == NULL) { 1451 if (last_log != time_uptime) { 1452 last_log = time_uptime; 1453 log(LOG_DEBUG, 1454 "ipfw: Cannot allocate parent dynamic state, " 1455 "consider increasing " 1456 "net.inet.ip.fw.dyn_parent_max\n"); 1457 } 1458 return (NULL); 1459 } 1460 1461 limit->parent = parent; 1462 limit->ruleid = ruleid; 1463 limit->rulenum = rulenum; 1464 limit->set = set; 1465 limit->hashval = hashval; 1466 limit->expire = time_uptime + V_dyn_short_lifetime; 1467 return (limit); 1468 } 1469 1470 static struct dyn_data * 1471 dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum, 1472 uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, 1473 uint32_t hashval, uint16_t fibnum) 1474 { 1475 struct dyn_data *data; 1476 1477 data = uma_zalloc(V_dyn_data_zone, M_NOWAIT | M_ZERO); 1478 if (data == NULL) { 1479 if (last_log != time_uptime) { 1480 last_log = time_uptime; 1481 log(LOG_DEBUG, 1482 "ipfw: Cannot allocate dynamic state, " 1483 "consider increasing net.inet.ip.fw.dyn_max\n"); 1484 } 1485 return (NULL); 1486 } 1487 1488 data->parent = parent; 1489 data->ruleid = ruleid; 1490 data->rulenum = rulenum; 1491 data->set = set; 1492 data->fibnum = fibnum; 1493 data->hashval = hashval; 1494 data->expire = time_uptime + V_dyn_syn_lifetime; 1495 dyn_update_proto_state(data, pkt, ulp, pktlen, MATCH_FORWARD); 1496 return (data); 1497 } 1498 1499 static struct dyn_ipv4_state * 1500 dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx, 1501 uint8_t type) 1502 { 1503 struct dyn_ipv4_state *s; 1504 1505 s = uma_zalloc(V_dyn_ipv4_zone, M_NOWAIT | M_ZERO); 1506 if (s == NULL) 1507 return (NULL); 1508 1509 s->type = type; 1510 s->kidx = kidx; 1511 s->proto = pkt->proto; 1512 s->sport = pkt->src_port; 1513 s->dport = pkt->dst_port; 1514 s->src = pkt->src_ip; 1515 s->dst = pkt->dst_ip; 1516 return (s); 1517 } 1518 1519 /* 1520 * Add IPv4 parent state. 1521 * Returns pointer to parent state. When it is not NULL we are in 1522 * critical section and pointer protected by hazard pointer. 1523 * When some error occurs, it returns NULL and exit from critical section 1524 * is not needed. 1525 */ 1526 static struct dyn_ipv4_state * 1527 dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum, 1528 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t hashval, 1529 uint32_t version, uint16_t kidx) 1530 { 1531 struct dyn_ipv4_state *s; 1532 struct dyn_parent *limit; 1533 uint32_t bucket; 1534 1535 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1536 DYN_BUCKET_LOCK(bucket); 1537 if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_add)) { 1538 /* 1539 * Bucket version has been changed since last lookup, 1540 * do lookup again to be sure that state does not exist. 1541 */ 1542 s = dyn_lookup_ipv4_parent_locked(pkt, rule, ruleid, 1543 rulenum, bucket); 1544 if (s != NULL) { 1545 /* 1546 * Simultaneous thread has already created this 1547 * state. Just return it. 1548 */ 1549 DYNSTATE_CRITICAL_ENTER(); 1550 DYNSTATE_PROTECT(s); 1551 DYN_BUCKET_UNLOCK(bucket); 1552 return (s); 1553 } 1554 } 1555 1556 limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); 1557 if (limit == NULL) { 1558 DYN_BUCKET_UNLOCK(bucket); 1559 return (NULL); 1560 } 1561 1562 s = dyn_alloc_ipv4_state(pkt, kidx, O_LIMIT_PARENT); 1563 if (s == NULL) { 1564 DYN_BUCKET_UNLOCK(bucket); 1565 uma_zfree(V_dyn_parent_zone, limit); 1566 return (NULL); 1567 } 1568 1569 s->limit = limit; 1570 CK_SLIST_INSERT_HEAD(&V_dyn_ipv4_parent[bucket], s, entry); 1571 DYN_COUNT_INC(dyn_parent_count); 1572 DYN_BUCKET_VERSION_BUMP(bucket, ipv4_parent_add); 1573 DYNSTATE_CRITICAL_ENTER(); 1574 DYNSTATE_PROTECT(s); 1575 DYN_BUCKET_UNLOCK(bucket); 1576 return (s); 1577 } 1578 1579 static int 1580 dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum, 1581 uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, 1582 uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum, 1583 uint16_t kidx, uint8_t type) 1584 { 1585 struct dyn_ipv4_state *s; 1586 void *data; 1587 uint32_t bucket; 1588 1589 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1590 DYN_BUCKET_LOCK(bucket); 1591 if (info->direction == MATCH_UNKNOWN || 1592 info->kidx != kidx || 1593 info->hashval != hashval || 1594 info->version != DYN_BUCKET_VERSION(bucket, ipv4_add)) { 1595 /* 1596 * Bucket version has been changed since last lookup, 1597 * do lookup again to be sure that state does not exist. 1598 */ 1599 if (dyn_lookup_ipv4_state_locked(pkt, ulp, pktlen, 1600 bucket, kidx) != 0) { 1601 DYN_BUCKET_UNLOCK(bucket); 1602 return (EEXIST); 1603 } 1604 } 1605 1606 data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, 1607 pktlen, hashval, fibnum); 1608 if (data == NULL) { 1609 DYN_BUCKET_UNLOCK(bucket); 1610 return (ENOMEM); 1611 } 1612 1613 s = dyn_alloc_ipv4_state(pkt, kidx, type); 1614 if (s == NULL) { 1615 DYN_BUCKET_UNLOCK(bucket); 1616 uma_zfree(V_dyn_data_zone, data); 1617 return (ENOMEM); 1618 } 1619 1620 s->data = data; 1621 CK_SLIST_INSERT_HEAD(&V_dyn_ipv4[bucket], s, entry); 1622 DYN_COUNT_INC(dyn_count); 1623 DYN_BUCKET_VERSION_BUMP(bucket, ipv4_add); 1624 DYN_BUCKET_UNLOCK(bucket); 1625 return (0); 1626 } 1627 1628 #ifdef INET6 1629 static struct dyn_ipv6_state * 1630 dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1631 uint16_t kidx, uint8_t type) 1632 { 1633 struct dyn_ipv6_state *s; 1634 1635 s = uma_zalloc(V_dyn_ipv6_zone, M_NOWAIT | M_ZERO); 1636 if (s == NULL) 1637 return (NULL); 1638 1639 s->type = type; 1640 s->kidx = kidx; 1641 s->zoneid = zoneid; 1642 s->proto = pkt->proto; 1643 s->sport = pkt->src_port; 1644 s->dport = pkt->dst_port; 1645 s->src = pkt->src_ip6; 1646 s->dst = pkt->dst_ip6; 1647 return (s); 1648 } 1649 1650 /* 1651 * Add IPv6 parent state. 1652 * Returns pointer to parent state. When it is not NULL we are in 1653 * critical section and pointer protected by hazard pointer. 1654 * When some error occurs, it return NULL and exit from critical section 1655 * is not needed. 1656 */ 1657 static struct dyn_ipv6_state * 1658 dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum, 1659 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, 1660 uint32_t hashval, uint32_t version, uint16_t kidx) 1661 { 1662 struct dyn_ipv6_state *s; 1663 struct dyn_parent *limit; 1664 uint32_t bucket; 1665 1666 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1667 DYN_BUCKET_LOCK(bucket); 1668 if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_add)) { 1669 /* 1670 * Bucket version has been changed since last lookup, 1671 * do lookup again to be sure that state does not exist. 1672 */ 1673 s = dyn_lookup_ipv6_parent_locked(pkt, zoneid, rule, ruleid, 1674 rulenum, bucket); 1675 if (s != NULL) { 1676 /* 1677 * Simultaneous thread has already created this 1678 * state. Just return it. 1679 */ 1680 DYNSTATE_CRITICAL_ENTER(); 1681 DYNSTATE_PROTECT(s); 1682 DYN_BUCKET_UNLOCK(bucket); 1683 return (s); 1684 } 1685 } 1686 1687 limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); 1688 if (limit == NULL) { 1689 DYN_BUCKET_UNLOCK(bucket); 1690 return (NULL); 1691 } 1692 1693 s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, O_LIMIT_PARENT); 1694 if (s == NULL) { 1695 DYN_BUCKET_UNLOCK(bucket); 1696 uma_zfree(V_dyn_parent_zone, limit); 1697 return (NULL); 1698 } 1699 1700 s->limit = limit; 1701 CK_SLIST_INSERT_HEAD(&V_dyn_ipv6_parent[bucket], s, entry); 1702 DYN_COUNT_INC(dyn_parent_count); 1703 DYN_BUCKET_VERSION_BUMP(bucket, ipv6_parent_add); 1704 DYNSTATE_CRITICAL_ENTER(); 1705 DYNSTATE_PROTECT(s); 1706 DYN_BUCKET_UNLOCK(bucket); 1707 return (s); 1708 } 1709 1710 static int 1711 dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum, 1712 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, 1713 const void *ulp, int pktlen, uint32_t hashval, struct ipfw_dyn_info *info, 1714 uint16_t fibnum, uint16_t kidx, uint8_t type) 1715 { 1716 struct dyn_ipv6_state *s; 1717 struct dyn_data *data; 1718 uint32_t bucket; 1719 1720 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1721 DYN_BUCKET_LOCK(bucket); 1722 if (info->direction == MATCH_UNKNOWN || 1723 info->kidx != kidx || 1724 info->hashval != hashval || 1725 info->version != DYN_BUCKET_VERSION(bucket, ipv6_add)) { 1726 /* 1727 * Bucket version has been changed since last lookup, 1728 * do lookup again to be sure that state does not exist. 1729 */ 1730 if (dyn_lookup_ipv6_state_locked(pkt, zoneid, ulp, pktlen, 1731 bucket, kidx) != 0) { 1732 DYN_BUCKET_UNLOCK(bucket); 1733 return (EEXIST); 1734 } 1735 } 1736 1737 data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, 1738 pktlen, hashval, fibnum); 1739 if (data == NULL) { 1740 DYN_BUCKET_UNLOCK(bucket); 1741 return (ENOMEM); 1742 } 1743 1744 s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, type); 1745 if (s == NULL) { 1746 DYN_BUCKET_UNLOCK(bucket); 1747 uma_zfree(V_dyn_data_zone, data); 1748 return (ENOMEM); 1749 } 1750 1751 s->data = data; 1752 CK_SLIST_INSERT_HEAD(&V_dyn_ipv6[bucket], s, entry); 1753 DYN_COUNT_INC(dyn_count); 1754 DYN_BUCKET_VERSION_BUMP(bucket, ipv6_add); 1755 DYN_BUCKET_UNLOCK(bucket); 1756 return (0); 1757 } 1758 #endif /* INET6 */ 1759 1760 static void * 1761 dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1762 struct ip_fw *rule, uint32_t hashval, uint32_t limit, uint16_t kidx) 1763 { 1764 char sbuf[24]; 1765 struct dyn_parent *p; 1766 void *ret; 1767 uint32_t bucket, version; 1768 1769 p = NULL; 1770 ret = NULL; 1771 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1772 DYNSTATE_CRITICAL_ENTER(); 1773 if (IS_IP4_FLOW_ID(pkt)) { 1774 struct dyn_ipv4_state *s; 1775 1776 version = DYN_BUCKET_VERSION(bucket, ipv4_parent_add); 1777 s = dyn_lookup_ipv4_parent(pkt, rule, rule->id, 1778 rule->rulenum, bucket); 1779 if (s == NULL) { 1780 /* 1781 * Exit from critical section because dyn_add_parent() 1782 * will acquire bucket lock. 1783 */ 1784 DYNSTATE_CRITICAL_EXIT(); 1785 1786 s = dyn_add_ipv4_parent(rule, rule->id, 1787 rule->rulenum, rule->set, pkt, hashval, 1788 version, kidx); 1789 if (s == NULL) 1790 return (NULL); 1791 /* Now we are in critical section again. */ 1792 } 1793 ret = s; 1794 p = s->limit; 1795 } 1796 #ifdef INET6 1797 else if (IS_IP6_FLOW_ID(pkt)) { 1798 struct dyn_ipv6_state *s; 1799 1800 version = DYN_BUCKET_VERSION(bucket, ipv6_parent_add); 1801 s = dyn_lookup_ipv6_parent(pkt, zoneid, rule, rule->id, 1802 rule->rulenum, bucket); 1803 if (s == NULL) { 1804 /* 1805 * Exit from critical section because dyn_add_parent() 1806 * can acquire bucket mutex. 1807 */ 1808 DYNSTATE_CRITICAL_EXIT(); 1809 1810 s = dyn_add_ipv6_parent(rule, rule->id, 1811 rule->rulenum, rule->set, pkt, zoneid, hashval, 1812 version, kidx); 1813 if (s == NULL) 1814 return (NULL); 1815 /* Now we are in critical section again. */ 1816 } 1817 ret = s; 1818 p = s->limit; 1819 } 1820 #endif 1821 else { 1822 DYNSTATE_CRITICAL_EXIT(); 1823 return (NULL); 1824 } 1825 1826 /* Check the limit */ 1827 if (DPARENT_COUNT(p) >= limit) { 1828 DYNSTATE_CRITICAL_EXIT(); 1829 if (V_fw_verbose && last_log != time_uptime) { 1830 last_log = time_uptime; 1831 snprintf(sbuf, sizeof(sbuf), "%u drop session", 1832 rule->rulenum); 1833 print_dyn_rule_flags(pkt, O_LIMIT, 1834 LOG_SECURITY | LOG_DEBUG, sbuf, 1835 "too many entries"); 1836 } 1837 return (NULL); 1838 } 1839 1840 /* Take new session into account. */ 1841 DPARENT_COUNT_INC(p); 1842 /* 1843 * We must exit from critical section because the following code 1844 * can acquire bucket mutex. 1845 * We rely on the the 'count' field. The state will not expire 1846 * until it has some child states, i.e. 'count' field is not zero. 1847 * Return state pointer, it will be used by child states as parent. 1848 */ 1849 DYNSTATE_CRITICAL_EXIT(); 1850 return (ret); 1851 } 1852 1853 static int 1854 dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1855 uint16_t fibnum, const void *ulp, int pktlen, void *rule, 1856 uint32_t ruleid, uint16_t rulenum, uint8_t set, 1857 struct ipfw_dyn_info *info, uint32_t limit, uint16_t limit_mask, 1858 uint16_t kidx, uint8_t type) 1859 { 1860 struct ipfw_flow_id id; 1861 uint32_t hashval, parent_hashval; 1862 int ret; 1863 1864 MPASS(type == O_LIMIT || type == O_KEEP_STATE); 1865 1866 if (type == O_LIMIT) { 1867 /* Create masked flow id and calculate bucket */ 1868 id.addr_type = pkt->addr_type; 1869 id.proto = pkt->proto; 1870 id.fib = fibnum; /* unused */ 1871 id.src_port = (limit_mask & DYN_SRC_PORT) ? 1872 pkt->src_port: 0; 1873 id.dst_port = (limit_mask & DYN_DST_PORT) ? 1874 pkt->dst_port: 0; 1875 if (IS_IP4_FLOW_ID(pkt)) { 1876 id.src_ip = (limit_mask & DYN_SRC_ADDR) ? 1877 pkt->src_ip: 0; 1878 id.dst_ip = (limit_mask & DYN_DST_ADDR) ? 1879 pkt->dst_ip: 0; 1880 } 1881 #ifdef INET6 1882 else if (IS_IP6_FLOW_ID(pkt)) { 1883 if (limit_mask & DYN_SRC_ADDR) 1884 id.src_ip6 = pkt->src_ip6; 1885 else 1886 memset(&id.src_ip6, 0, sizeof(id.src_ip6)); 1887 if (limit_mask & DYN_DST_ADDR) 1888 id.dst_ip6 = pkt->dst_ip6; 1889 else 1890 memset(&id.dst_ip6, 0, sizeof(id.dst_ip6)); 1891 } 1892 #endif 1893 else 1894 return (EAFNOSUPPORT); 1895 1896 parent_hashval = hash_parent(&id, rule); 1897 rule = dyn_get_parent_state(&id, zoneid, rule, parent_hashval, 1898 limit, kidx); 1899 if (rule == NULL) { 1900 #if 0 1901 if (V_fw_verbose && last_log != time_uptime) { 1902 last_log = time_uptime; 1903 snprintf(sbuf, sizeof(sbuf), 1904 "%u drop session", rule->rulenum); 1905 print_dyn_rule_flags(pkt, O_LIMIT, 1906 LOG_SECURITY | LOG_DEBUG, sbuf, 1907 "too many entries"); 1908 } 1909 #endif 1910 return (EACCES); 1911 } 1912 /* 1913 * Limit is not reached, create new state. 1914 * Now rule points to parent state. 1915 */ 1916 } 1917 1918 hashval = hash_packet(pkt); 1919 if (IS_IP4_FLOW_ID(pkt)) 1920 ret = dyn_add_ipv4_state(rule, ruleid, rulenum, set, pkt, 1921 ulp, pktlen, hashval, info, fibnum, kidx, type); 1922 #ifdef INET6 1923 else if (IS_IP6_FLOW_ID(pkt)) 1924 ret = dyn_add_ipv6_state(rule, ruleid, rulenum, set, pkt, 1925 zoneid, ulp, pktlen, hashval, info, fibnum, kidx, type); 1926 #endif /* INET6 */ 1927 else 1928 ret = EAFNOSUPPORT; 1929 1930 if (type == O_LIMIT) { 1931 if (ret != 0) { 1932 /* 1933 * We failed to create child state for O_LIMIT 1934 * opcode. Since we already counted it in the parent, 1935 * we must revert counter back. The 'rule' points to 1936 * parent state, use it to get dyn_parent. 1937 * 1938 * XXXAE: it should be safe to use 'rule' pointer 1939 * without extra lookup, parent state is referenced 1940 * and should not be freed. 1941 */ 1942 if (IS_IP4_FLOW_ID(&id)) 1943 DPARENT_COUNT_DEC( 1944 ((struct dyn_ipv4_state *)rule)->limit); 1945 #ifdef INET6 1946 else if (IS_IP6_FLOW_ID(&id)) 1947 DPARENT_COUNT_DEC( 1948 ((struct dyn_ipv6_state *)rule)->limit); 1949 #endif 1950 } 1951 } 1952 /* 1953 * EEXIST means that simultaneous thread has created this 1954 * state. Consider this as success. 1955 * 1956 * XXXAE: should we invalidate 'info' content here? 1957 */ 1958 if (ret == EEXIST) 1959 return (0); 1960 return (ret); 1961 } 1962 1963 /* 1964 * Install dynamic state. 1965 * chain - ipfw's instance; 1966 * rule - the parent rule that installs the state; 1967 * cmd - opcode that installs the state; 1968 * args - ipfw arguments; 1969 * ulp - upper level protocol header; 1970 * pktlen - packet length; 1971 * info - dynamic state lookup info; 1972 * tablearg - tablearg id. 1973 * 1974 * Returns non-zero value (failure) if state is not installed because 1975 * of errors or because session limitations are enforced. 1976 */ 1977 int 1978 ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, 1979 const ipfw_insn_limit *cmd, const struct ip_fw_args *args, 1980 const void *ulp, int pktlen, struct ipfw_dyn_info *info, 1981 uint32_t tablearg) 1982 { 1983 uint32_t limit; 1984 uint16_t limit_mask; 1985 1986 if (cmd->o.opcode == O_LIMIT) { 1987 limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit); 1988 limit_mask = cmd->limit_mask; 1989 } else { 1990 limit = 0; 1991 limit_mask = 0; 1992 } 1993 return (dyn_install_state(&args->f_id, 1994 #ifdef INET6 1995 IS_IP6_FLOW_ID(&args->f_id) ? dyn_getscopeid(args): 1996 #endif 1997 0, M_GETFIB(args->m), ulp, pktlen, rule, rule->id, rule->rulenum, 1998 rule->set, info, limit, limit_mask, cmd->o.arg1, cmd->o.opcode)); 1999 } 2000 2001 /* 2002 * Free safe to remove state entries from expired lists. 2003 */ 2004 static void 2005 dyn_free_states(struct ip_fw_chain *chain) 2006 { 2007 struct dyn_ipv4_state *s4, *s4n; 2008 #ifdef INET6 2009 struct dyn_ipv6_state *s6, *s6n; 2010 #endif 2011 int cached_count, i; 2012 2013 /* 2014 * We keep pointers to objects that are in use on each CPU 2015 * in the per-cpu dyn_hp pointer. When object is going to be 2016 * removed, first of it is unlinked from the corresponding 2017 * list. This leads to changing of dyn_bucket_xxx_delver version. 2018 * Unlinked objects is placed into corresponding dyn_expired_xxx 2019 * list. Reader that is going to dereference object pointer checks 2020 * dyn_bucket_xxx_delver version before and after storing pointer 2021 * into dyn_hp. If version is the same, the object is protected 2022 * from freeing and it is safe to dereference. Othervise reader 2023 * tries to iterate list again from the beginning, but this object 2024 * now unlinked and thus will not be accessible. 2025 * 2026 * Copy dyn_hp pointers for each CPU into dyn_hp_cache array. 2027 * It does not matter that some pointer can be changed in 2028 * time while we are copying. We need to check, that objects 2029 * removed in the previous pass are not in use. And if dyn_hp 2030 * pointer does not contain it in the time when we are copying, 2031 * it will not appear there, because it is already unlinked. 2032 * And for new pointers we will not free objects that will be 2033 * unlinked in this pass. 2034 */ 2035 cached_count = 0; 2036 CPU_FOREACH(i) { 2037 dyn_hp_cache[cached_count] = DYNSTATE_GET(i); 2038 if (dyn_hp_cache[cached_count] != NULL) 2039 cached_count++; 2040 } 2041 2042 /* 2043 * Free expired states that are safe to free. 2044 * Check each entry from previous pass in the dyn_expired_xxx 2045 * list, if pointer to the object is in the dyn_hp_cache array, 2046 * keep it until next pass. Otherwise it is safe to free the 2047 * object. 2048 * 2049 * XXXAE: optimize this to use SLIST_REMOVE_AFTER. 2050 */ 2051 #define DYN_FREE_STATES(s, next, name) do { \ 2052 s = SLIST_FIRST(&V_dyn_expired_ ## name); \ 2053 while (s != NULL) { \ 2054 next = SLIST_NEXT(s, expired); \ 2055 for (i = 0; i < cached_count; i++) \ 2056 if (dyn_hp_cache[i] == s) \ 2057 break; \ 2058 if (i == cached_count) { \ 2059 if (s->type == O_LIMIT_PARENT && \ 2060 s->limit->count != 0) { \ 2061 s = next; \ 2062 continue; \ 2063 } \ 2064 SLIST_REMOVE(&V_dyn_expired_ ## name, \ 2065 s, dyn_ ## name ## _state, expired); \ 2066 if (s->type == O_LIMIT_PARENT) \ 2067 uma_zfree(V_dyn_parent_zone, s->limit); \ 2068 else \ 2069 uma_zfree(V_dyn_data_zone, s->data); \ 2070 uma_zfree(V_dyn_ ## name ## _zone, s); \ 2071 } \ 2072 s = next; \ 2073 } \ 2074 } while (0) 2075 2076 /* 2077 * Protect access to expired lists with DYN_EXPIRED_LOCK. 2078 * Userland can invoke ipfw_expire_dyn_states() to delete 2079 * specific states, this will lead to modification of expired 2080 * lists. 2081 * 2082 * XXXAE: do we need DYN_EXPIRED_LOCK? We can just use 2083 * IPFW_UH_WLOCK to protect access to these lists. 2084 */ 2085 DYN_EXPIRED_LOCK(); 2086 DYN_FREE_STATES(s4, s4n, ipv4); 2087 #ifdef INET6 2088 DYN_FREE_STATES(s6, s6n, ipv6); 2089 #endif 2090 DYN_EXPIRED_UNLOCK(); 2091 #undef DYN_FREE_STATES 2092 } 2093 2094 /* 2095 * Returns 1 when state is matched by specified range, otherwise returns 0. 2096 */ 2097 static int 2098 dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt) 2099 { 2100 2101 MPASS(rt != NULL); 2102 /* flush all states */ 2103 if (rt->flags & IPFW_RCFLAG_ALL) 2104 return (1); 2105 if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set) 2106 return (0); 2107 if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && 2108 (rulenum < rt->start_rule || rulenum > rt->end_rule)) 2109 return (0); 2110 return (1); 2111 } 2112 2113 static int 2114 dyn_match_ipv4_state(struct dyn_ipv4_state *s, const ipfw_range_tlv *rt) 2115 { 2116 2117 if (s->type == O_LIMIT_PARENT) 2118 return (dyn_match_range(s->limit->rulenum, 2119 s->limit->set, rt)); 2120 2121 if (s->type == O_LIMIT) 2122 return (dyn_match_range(s->data->rulenum, s->data->set, rt)); 2123 2124 if (V_dyn_keep_states == 0 && 2125 dyn_match_range(s->data->rulenum, s->data->set, rt)) 2126 return (1); 2127 2128 return (0); 2129 } 2130 2131 #ifdef INET6 2132 static int 2133 dyn_match_ipv6_state(struct dyn_ipv6_state *s, const ipfw_range_tlv *rt) 2134 { 2135 2136 if (s->type == O_LIMIT_PARENT) 2137 return (dyn_match_range(s->limit->rulenum, 2138 s->limit->set, rt)); 2139 2140 if (s->type == O_LIMIT) 2141 return (dyn_match_range(s->data->rulenum, s->data->set, rt)); 2142 2143 if (V_dyn_keep_states == 0 && 2144 dyn_match_range(s->data->rulenum, s->data->set, rt)) 2145 return (1); 2146 2147 return (0); 2148 } 2149 #endif 2150 2151 /* 2152 * Unlink expired entries from states lists. 2153 * @rt can be used to specify the range of states for deletion. 2154 */ 2155 static void 2156 dyn_expire_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) 2157 { 2158 struct dyn_ipv4_slist expired_ipv4; 2159 #ifdef INET6 2160 struct dyn_ipv6_slist expired_ipv6; 2161 struct dyn_ipv6_state *s6, *s6n, *s6p; 2162 #endif 2163 struct dyn_ipv4_state *s4, *s4n, *s4p; 2164 int bucket, removed, length, max_length; 2165 2166 /* 2167 * Unlink expired states from each bucket. 2168 * With acquired bucket lock iterate entries of each lists: 2169 * ipv4, ipv4_parent, ipv6, and ipv6_parent. Check expired time 2170 * and unlink entry from the list, link entry into temporary 2171 * expired_xxx lists then bump "del" bucket version. 2172 * 2173 * When an entry is removed, corresponding states counter is 2174 * decremented. If entry has O_LIMIT type, parent's reference 2175 * counter is decremented. 2176 * 2177 * NOTE: this function can be called from userspace context 2178 * when user deletes rules. In this case all matched states 2179 * will be forcedly unlinked. O_LIMIT_PARENT states will be kept 2180 * in the expired lists until reference counter become zero. 2181 */ 2182 #define DYN_UNLINK_STATES(s, prev, next, exp, af, name, extra) do { \ 2183 length = 0; \ 2184 removed = 0; \ 2185 prev = NULL; \ 2186 s = CK_SLIST_FIRST(&V_dyn_ ## name [bucket]); \ 2187 while (s != NULL) { \ 2188 next = CK_SLIST_NEXT(s, entry); \ 2189 if ((TIME_LEQ((s)->exp, time_uptime) && extra) || \ 2190 (rt != NULL && dyn_match_ ## af ## _state(s, rt))) {\ 2191 if (prev != NULL) \ 2192 CK_SLIST_REMOVE_AFTER(prev, entry); \ 2193 else \ 2194 CK_SLIST_REMOVE_HEAD( \ 2195 &V_dyn_ ## name [bucket], entry); \ 2196 removed++; \ 2197 SLIST_INSERT_HEAD(&expired_ ## af, s, expired); \ 2198 if (s->type == O_LIMIT_PARENT) \ 2199 DYN_COUNT_DEC(dyn_parent_count); \ 2200 else { \ 2201 DYN_COUNT_DEC(dyn_count); \ 2202 if (s->type == O_LIMIT) { \ 2203 s = s->data->parent; \ 2204 DPARENT_COUNT_DEC(s->limit); \ 2205 } \ 2206 } \ 2207 } else { \ 2208 prev = s; \ 2209 length++; \ 2210 } \ 2211 s = next; \ 2212 } \ 2213 if (removed != 0) \ 2214 DYN_BUCKET_VERSION_BUMP(bucket, name ## _del); \ 2215 if (length > max_length) \ 2216 max_length = length; \ 2217 } while (0) 2218 2219 SLIST_INIT(&expired_ipv4); 2220 #ifdef INET6 2221 SLIST_INIT(&expired_ipv6); 2222 #endif 2223 max_length = 0; 2224 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2225 DYN_BUCKET_LOCK(bucket); 2226 DYN_UNLINK_STATES(s4, s4p, s4n, data->expire, ipv4, ipv4, 1); 2227 DYN_UNLINK_STATES(s4, s4p, s4n, limit->expire, ipv4, 2228 ipv4_parent, (s4->limit->count == 0)); 2229 #ifdef INET6 2230 DYN_UNLINK_STATES(s6, s6p, s6n, data->expire, ipv6, ipv6, 1); 2231 DYN_UNLINK_STATES(s6, s6p, s6n, limit->expire, ipv6, 2232 ipv6_parent, (s6->limit->count == 0)); 2233 #endif 2234 DYN_BUCKET_UNLOCK(bucket); 2235 } 2236 /* Update curr_max_length for statistics. */ 2237 V_curr_max_length = max_length; 2238 /* 2239 * Concatenate temporary lists with global expired lists. 2240 */ 2241 DYN_EXPIRED_LOCK(); 2242 SLIST_CONCAT(&V_dyn_expired_ipv4, &expired_ipv4, 2243 dyn_ipv4_state, expired); 2244 #ifdef INET6 2245 SLIST_CONCAT(&V_dyn_expired_ipv6, &expired_ipv6, 2246 dyn_ipv6_state, expired); 2247 #endif 2248 DYN_EXPIRED_UNLOCK(); 2249 #undef DYN_UNLINK_STATES 2250 #undef DYN_UNREF_STATES 2251 } 2252 2253 static struct mbuf * 2254 dyn_mgethdr(int len, uint16_t fibnum) 2255 { 2256 struct mbuf *m; 2257 2258 m = m_gethdr(M_NOWAIT, MT_DATA); 2259 if (m == NULL) 2260 return (NULL); 2261 #ifdef MAC 2262 mac_netinet_firewall_send(m); 2263 #endif 2264 M_SETFIB(m, fibnum); 2265 m->m_data += max_linkhdr; 2266 m->m_flags |= M_SKIP_FIREWALL; 2267 m->m_len = m->m_pkthdr.len = len; 2268 bzero(m->m_data, len); 2269 return (m); 2270 } 2271 2272 static void 2273 dyn_make_keepalive_ipv4(struct mbuf *m, in_addr_t src, in_addr_t dst, 2274 uint32_t seq, uint32_t ack, uint16_t sport, uint16_t dport) 2275 { 2276 struct tcphdr *tcp; 2277 struct ip *ip; 2278 2279 ip = mtod(m, struct ip *); 2280 ip->ip_v = 4; 2281 ip->ip_hl = sizeof(*ip) >> 2; 2282 ip->ip_tos = IPTOS_LOWDELAY; 2283 ip->ip_len = htons(m->m_len); 2284 ip->ip_off |= htons(IP_DF); 2285 ip->ip_ttl = V_ip_defttl; 2286 ip->ip_p = IPPROTO_TCP; 2287 ip->ip_src.s_addr = htonl(src); 2288 ip->ip_dst.s_addr = htonl(dst); 2289 2290 tcp = mtodo(m, sizeof(struct ip)); 2291 tcp->th_sport = htons(sport); 2292 tcp->th_dport = htons(dport); 2293 tcp->th_off = sizeof(struct tcphdr) >> 2; 2294 tcp->th_seq = htonl(seq); 2295 tcp->th_ack = htonl(ack); 2296 tcp->th_flags = TH_ACK; 2297 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2298 htons(sizeof(struct tcphdr) + IPPROTO_TCP)); 2299 2300 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 2301 m->m_pkthdr.csum_flags = CSUM_TCP; 2302 } 2303 2304 static void 2305 dyn_enqueue_keepalive_ipv4(struct mbufq *q, const struct dyn_ipv4_state *s) 2306 { 2307 struct mbuf *m; 2308 2309 if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) { 2310 m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr), 2311 s->data->fibnum); 2312 if (m != NULL) { 2313 dyn_make_keepalive_ipv4(m, s->dst, s->src, 2314 s->data->ack_fwd - 1, s->data->ack_rev, 2315 s->dport, s->sport); 2316 if (mbufq_enqueue(q, m)) { 2317 m_freem(m); 2318 log(LOG_DEBUG, "ipfw: limit for IPv4 " 2319 "keepalive queue is reached.\n"); 2320 return; 2321 } 2322 } 2323 } 2324 2325 if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) { 2326 m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr), 2327 s->data->fibnum); 2328 if (m != NULL) { 2329 dyn_make_keepalive_ipv4(m, s->src, s->dst, 2330 s->data->ack_rev - 1, s->data->ack_fwd, 2331 s->sport, s->dport); 2332 if (mbufq_enqueue(q, m)) { 2333 m_freem(m); 2334 log(LOG_DEBUG, "ipfw: limit for IPv4 " 2335 "keepalive queue is reached.\n"); 2336 return; 2337 } 2338 } 2339 } 2340 } 2341 2342 /* 2343 * Prepare and send keep-alive packets. 2344 */ 2345 static void 2346 dyn_send_keepalive_ipv4(struct ip_fw_chain *chain) 2347 { 2348 struct mbufq q; 2349 struct mbuf *m; 2350 struct dyn_ipv4_state *s; 2351 uint32_t bucket; 2352 2353 mbufq_init(&q, INT_MAX); 2354 IPFW_UH_RLOCK(chain); 2355 /* 2356 * It is safe to not use hazard pointer and just do lockless 2357 * access to the lists, because states entries can not be deleted 2358 * while we hold IPFW_UH_RLOCK. 2359 */ 2360 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2361 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 2362 /* 2363 * Only established TCP connections that will 2364 * become expired withing dyn_keepalive_interval. 2365 */ 2366 if (s->proto != IPPROTO_TCP || 2367 (s->data->state & BOTH_SYN) != BOTH_SYN || 2368 TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 2369 s->data->expire)) 2370 continue; 2371 dyn_enqueue_keepalive_ipv4(&q, s); 2372 } 2373 } 2374 IPFW_UH_RUNLOCK(chain); 2375 while ((m = mbufq_dequeue(&q)) != NULL) 2376 ip_output(m, NULL, NULL, 0, NULL, NULL); 2377 } 2378 2379 #ifdef INET6 2380 static void 2381 dyn_make_keepalive_ipv6(struct mbuf *m, const struct in6_addr *src, 2382 const struct in6_addr *dst, uint32_t zoneid, uint32_t seq, uint32_t ack, 2383 uint16_t sport, uint16_t dport) 2384 { 2385 struct tcphdr *tcp; 2386 struct ip6_hdr *ip6; 2387 2388 ip6 = mtod(m, struct ip6_hdr *); 2389 ip6->ip6_vfc |= IPV6_VERSION; 2390 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 2391 ip6->ip6_nxt = IPPROTO_TCP; 2392 ip6->ip6_hlim = IPV6_DEFHLIM; 2393 ip6->ip6_src = *src; 2394 if (IN6_IS_ADDR_LINKLOCAL(src)) 2395 ip6->ip6_src.s6_addr16[1] = htons(zoneid & 0xffff); 2396 ip6->ip6_dst = *dst; 2397 if (IN6_IS_ADDR_LINKLOCAL(dst)) 2398 ip6->ip6_dst.s6_addr16[1] = htons(zoneid & 0xffff); 2399 2400 tcp = mtodo(m, sizeof(struct ip6_hdr)); 2401 tcp->th_sport = htons(sport); 2402 tcp->th_dport = htons(dport); 2403 tcp->th_off = sizeof(struct tcphdr) >> 2; 2404 tcp->th_seq = htonl(seq); 2405 tcp->th_ack = htonl(ack); 2406 tcp->th_flags = TH_ACK; 2407 tcp->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr), 2408 IPPROTO_TCP, 0); 2409 2410 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 2411 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; 2412 } 2413 2414 static void 2415 dyn_enqueue_keepalive_ipv6(struct mbufq *q, const struct dyn_ipv6_state *s) 2416 { 2417 struct mbuf *m; 2418 2419 if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) { 2420 m = dyn_mgethdr(sizeof(struct ip6_hdr) + 2421 sizeof(struct tcphdr), s->data->fibnum); 2422 if (m != NULL) { 2423 dyn_make_keepalive_ipv6(m, &s->dst, &s->src, 2424 s->zoneid, s->data->ack_fwd - 1, s->data->ack_rev, 2425 s->dport, s->sport); 2426 if (mbufq_enqueue(q, m)) { 2427 m_freem(m); 2428 log(LOG_DEBUG, "ipfw: limit for IPv6 " 2429 "keepalive queue is reached.\n"); 2430 return; 2431 } 2432 } 2433 } 2434 2435 if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) { 2436 m = dyn_mgethdr(sizeof(struct ip6_hdr) + 2437 sizeof(struct tcphdr), s->data->fibnum); 2438 if (m != NULL) { 2439 dyn_make_keepalive_ipv6(m, &s->src, &s->dst, 2440 s->zoneid, s->data->ack_rev - 1, s->data->ack_fwd, 2441 s->sport, s->dport); 2442 if (mbufq_enqueue(q, m)) { 2443 m_freem(m); 2444 log(LOG_DEBUG, "ipfw: limit for IPv6 " 2445 "keepalive queue is reached.\n"); 2446 return; 2447 } 2448 } 2449 } 2450 } 2451 2452 static void 2453 dyn_send_keepalive_ipv6(struct ip_fw_chain *chain) 2454 { 2455 struct mbufq q; 2456 struct mbuf *m; 2457 struct dyn_ipv6_state *s; 2458 uint32_t bucket; 2459 2460 mbufq_init(&q, INT_MAX); 2461 IPFW_UH_RLOCK(chain); 2462 /* 2463 * It is safe to not use hazard pointer and just do lockless 2464 * access to the lists, because states entries can not be deleted 2465 * while we hold IPFW_UH_RLOCK. 2466 */ 2467 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2468 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 2469 /* 2470 * Only established TCP connections that will 2471 * become expired withing dyn_keepalive_interval. 2472 */ 2473 if (s->proto != IPPROTO_TCP || 2474 (s->data->state & BOTH_SYN) != BOTH_SYN || 2475 TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 2476 s->data->expire)) 2477 continue; 2478 dyn_enqueue_keepalive_ipv6(&q, s); 2479 } 2480 } 2481 IPFW_UH_RUNLOCK(chain); 2482 while ((m = mbufq_dequeue(&q)) != NULL) 2483 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2484 } 2485 #endif /* INET6 */ 2486 2487 static void 2488 dyn_grow_hashtable(struct ip_fw_chain *chain, uint32_t new) 2489 { 2490 #ifdef INET6 2491 struct dyn_ipv6ck_slist *ipv6, *ipv6_parent; 2492 uint32_t *ipv6_add, *ipv6_del, *ipv6_parent_add, *ipv6_parent_del; 2493 struct dyn_ipv6_state *s6; 2494 #endif 2495 struct dyn_ipv4ck_slist *ipv4, *ipv4_parent; 2496 uint32_t *ipv4_add, *ipv4_del, *ipv4_parent_add, *ipv4_parent_del; 2497 struct dyn_ipv4_state *s4; 2498 struct mtx *bucket_lock; 2499 void *tmp; 2500 uint32_t bucket; 2501 2502 MPASS(powerof2(new)); 2503 DYN_DEBUG("grow hash size %u -> %u", V_curr_dyn_buckets, new); 2504 /* 2505 * Allocate and initialize new lists. 2506 * XXXAE: on memory pressure this can disable callout timer. 2507 */ 2508 bucket_lock = malloc(new * sizeof(struct mtx), M_IPFW, 2509 M_WAITOK | M_ZERO); 2510 ipv4 = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW, 2511 M_WAITOK | M_ZERO); 2512 ipv4_parent = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW, 2513 M_WAITOK | M_ZERO); 2514 ipv4_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2515 ipv4_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2516 ipv4_parent_add = malloc(new * sizeof(uint32_t), M_IPFW, 2517 M_WAITOK | M_ZERO); 2518 ipv4_parent_del = malloc(new * sizeof(uint32_t), M_IPFW, 2519 M_WAITOK | M_ZERO); 2520 #ifdef INET6 2521 ipv6 = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW, 2522 M_WAITOK | M_ZERO); 2523 ipv6_parent = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW, 2524 M_WAITOK | M_ZERO); 2525 ipv6_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2526 ipv6_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2527 ipv6_parent_add = malloc(new * sizeof(uint32_t), M_IPFW, 2528 M_WAITOK | M_ZERO); 2529 ipv6_parent_del = malloc(new * sizeof(uint32_t), M_IPFW, 2530 M_WAITOK | M_ZERO); 2531 #endif 2532 for (bucket = 0; bucket < new; bucket++) { 2533 DYN_BUCKET_LOCK_INIT(bucket_lock, bucket); 2534 CK_SLIST_INIT(&ipv4[bucket]); 2535 CK_SLIST_INIT(&ipv4_parent[bucket]); 2536 #ifdef INET6 2537 CK_SLIST_INIT(&ipv6[bucket]); 2538 CK_SLIST_INIT(&ipv6_parent[bucket]); 2539 #endif 2540 } 2541 2542 #define DYN_RELINK_STATES(s, hval, i, head, ohead) do { \ 2543 while ((s = CK_SLIST_FIRST(&V_dyn_ ## ohead[i])) != NULL) { \ 2544 CK_SLIST_REMOVE_HEAD(&V_dyn_ ## ohead[i], entry); \ 2545 CK_SLIST_INSERT_HEAD(&head[DYN_BUCKET(s->hval, new)], \ 2546 s, entry); \ 2547 } \ 2548 } while (0) 2549 /* 2550 * Prevent rules changing from userland. 2551 */ 2552 IPFW_UH_WLOCK(chain); 2553 /* 2554 * Hold traffic processing until we finish resize to 2555 * prevent access to states lists. 2556 */ 2557 IPFW_WLOCK(chain); 2558 /* Re-link all dynamic states */ 2559 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2560 DYN_RELINK_STATES(s4, data->hashval, bucket, ipv4, ipv4); 2561 DYN_RELINK_STATES(s4, limit->hashval, bucket, ipv4_parent, 2562 ipv4_parent); 2563 #ifdef INET6 2564 DYN_RELINK_STATES(s6, data->hashval, bucket, ipv6, ipv6); 2565 DYN_RELINK_STATES(s6, limit->hashval, bucket, ipv6_parent, 2566 ipv6_parent); 2567 #endif 2568 } 2569 2570 #define DYN_SWAP_PTR(old, new, tmp) do { \ 2571 tmp = old; \ 2572 old = new; \ 2573 new = tmp; \ 2574 } while (0) 2575 /* Swap pointers */ 2576 DYN_SWAP_PTR(V_dyn_bucket_lock, bucket_lock, tmp); 2577 DYN_SWAP_PTR(V_dyn_ipv4, ipv4, tmp); 2578 DYN_SWAP_PTR(V_dyn_ipv4_parent, ipv4_parent, tmp); 2579 DYN_SWAP_PTR(V_dyn_ipv4_add, ipv4_add, tmp); 2580 DYN_SWAP_PTR(V_dyn_ipv4_parent_add, ipv4_parent_add, tmp); 2581 DYN_SWAP_PTR(V_dyn_ipv4_del, ipv4_del, tmp); 2582 DYN_SWAP_PTR(V_dyn_ipv4_parent_del, ipv4_parent_del, tmp); 2583 2584 #ifdef INET6 2585 DYN_SWAP_PTR(V_dyn_ipv6, ipv6, tmp); 2586 DYN_SWAP_PTR(V_dyn_ipv6_parent, ipv6_parent, tmp); 2587 DYN_SWAP_PTR(V_dyn_ipv6_add, ipv6_add, tmp); 2588 DYN_SWAP_PTR(V_dyn_ipv6_parent_add, ipv6_parent_add, tmp); 2589 DYN_SWAP_PTR(V_dyn_ipv6_del, ipv6_del, tmp); 2590 DYN_SWAP_PTR(V_dyn_ipv6_parent_del, ipv6_parent_del, tmp); 2591 #endif 2592 bucket = V_curr_dyn_buckets; 2593 V_curr_dyn_buckets = new; 2594 2595 IPFW_WUNLOCK(chain); 2596 IPFW_UH_WUNLOCK(chain); 2597 2598 /* Release old resources */ 2599 while (bucket-- != 0) 2600 DYN_BUCKET_LOCK_DESTROY(bucket_lock, bucket); 2601 free(bucket_lock, M_IPFW); 2602 free(ipv4, M_IPFW); 2603 free(ipv4_parent, M_IPFW); 2604 free(ipv4_add, M_IPFW); 2605 free(ipv4_parent_add, M_IPFW); 2606 free(ipv4_del, M_IPFW); 2607 free(ipv4_parent_del, M_IPFW); 2608 #ifdef INET6 2609 free(ipv6, M_IPFW); 2610 free(ipv6_parent, M_IPFW); 2611 free(ipv6_add, M_IPFW); 2612 free(ipv6_parent_add, M_IPFW); 2613 free(ipv6_del, M_IPFW); 2614 free(ipv6_parent_del, M_IPFW); 2615 #endif 2616 } 2617 2618 /* 2619 * This function is used to perform various maintenance 2620 * on dynamic hash lists. Currently it is called every second. 2621 */ 2622 static void 2623 dyn_tick(void *vnetx) 2624 { 2625 uint32_t buckets; 2626 2627 CURVNET_SET((struct vnet *)vnetx); 2628 /* 2629 * First free states unlinked in previous passes. 2630 */ 2631 dyn_free_states(&V_layer3_chain); 2632 /* 2633 * Now unlink others expired states. 2634 * We use IPFW_UH_WLOCK to avoid concurrent call of 2635 * dyn_expire_states(). It is the only function that does 2636 * deletion of state entries from states lists. 2637 */ 2638 IPFW_UH_WLOCK(&V_layer3_chain); 2639 dyn_expire_states(&V_layer3_chain, NULL); 2640 IPFW_UH_WUNLOCK(&V_layer3_chain); 2641 /* 2642 * Send keepalives if they are enabled and the time has come. 2643 */ 2644 if (V_dyn_keepalive != 0 && 2645 V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) { 2646 V_dyn_keepalive_last = time_uptime; 2647 dyn_send_keepalive_ipv4(&V_layer3_chain); 2648 #ifdef INET6 2649 dyn_send_keepalive_ipv6(&V_layer3_chain); 2650 #endif 2651 } 2652 /* 2653 * Check if we need to resize the hash: 2654 * if current number of states exceeds number of buckets in hash, 2655 * and dyn_buckets_max permits to grow the number of buckets, then 2656 * do it. Grow hash size to the minimum power of 2 which is bigger 2657 * than current states count. 2658 */ 2659 if (V_curr_dyn_buckets < V_dyn_buckets_max && 2660 (V_curr_dyn_buckets < V_dyn_count / 2 || ( 2661 V_curr_dyn_buckets < V_dyn_count && V_curr_max_length > 8))) { 2662 buckets = 1 << fls(V_dyn_count); 2663 if (buckets > V_dyn_buckets_max) 2664 buckets = V_dyn_buckets_max; 2665 dyn_grow_hashtable(&V_layer3_chain, buckets); 2666 } 2667 2668 callout_reset_on(&V_dyn_timeout, hz, dyn_tick, vnetx, 0); 2669 CURVNET_RESTORE(); 2670 } 2671 2672 void 2673 ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) 2674 { 2675 /* 2676 * Do not perform any checks if we currently have no dynamic states 2677 */ 2678 if (V_dyn_count == 0) 2679 return; 2680 2681 IPFW_UH_WLOCK_ASSERT(chain); 2682 dyn_expire_states(chain, rt); 2683 } 2684 2685 /* 2686 * Returns size of dynamic states in legacy format 2687 */ 2688 int 2689 ipfw_dyn_len(void) 2690 { 2691 2692 return ((V_dyn_count + V_dyn_parent_count) * sizeof(ipfw_dyn_rule)); 2693 } 2694 2695 /* 2696 * Returns number of dynamic states. 2697 * Used by dump format v1 (current). 2698 */ 2699 uint32_t 2700 ipfw_dyn_get_count(void) 2701 { 2702 2703 return (V_dyn_count + V_dyn_parent_count); 2704 } 2705 2706 /* 2707 * Check if rule contains at least one dynamic opcode. 2708 * 2709 * Returns 1 if such opcode is found, 0 otherwise. 2710 */ 2711 int 2712 ipfw_is_dyn_rule(struct ip_fw *rule) 2713 { 2714 int cmdlen, l; 2715 ipfw_insn *cmd; 2716 2717 l = rule->cmd_len; 2718 cmd = rule->cmd; 2719 cmdlen = 0; 2720 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { 2721 cmdlen = F_LEN(cmd); 2722 2723 switch (cmd->opcode) { 2724 case O_LIMIT: 2725 case O_KEEP_STATE: 2726 case O_PROBE_STATE: 2727 case O_CHECK_STATE: 2728 return (1); 2729 } 2730 } 2731 2732 return (0); 2733 } 2734 2735 static void 2736 dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, 2737 ipfw_dyn_rule *dst) 2738 { 2739 2740 dst->dyn_type = O_LIMIT_PARENT; 2741 dst->kidx = kidx; 2742 dst->count = (uint16_t)DPARENT_COUNT(p); 2743 dst->expire = TIME_LEQ(p->expire, time_uptime) ? 0: 2744 p->expire - time_uptime; 2745 2746 /* 'rule' is used to pass up the rule number and set */ 2747 memcpy(&dst->rule, &p->rulenum, sizeof(p->rulenum)); 2748 /* store set number into high word of dst->rule pointer. */ 2749 memcpy((char *)&dst->rule + sizeof(p->rulenum), &p->set, 2750 sizeof(p->set)); 2751 2752 /* unused fields */ 2753 dst->pcnt = 0; 2754 dst->bcnt = 0; 2755 dst->parent = NULL; 2756 dst->state = 0; 2757 dst->ack_fwd = 0; 2758 dst->ack_rev = 0; 2759 dst->bucket = p->hashval; 2760 /* 2761 * The legacy userland code will interpret a NULL here as a marker 2762 * for the last dynamic rule. 2763 */ 2764 dst->next = (ipfw_dyn_rule *)1; 2765 } 2766 2767 static void 2768 dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type, 2769 ipfw_dyn_rule *dst) 2770 { 2771 2772 dst->dyn_type = type; 2773 dst->kidx = kidx; 2774 dst->pcnt = data->pcnt_fwd + data->pcnt_rev; 2775 dst->bcnt = data->bcnt_fwd + data->bcnt_rev; 2776 dst->expire = TIME_LEQ(data->expire, time_uptime) ? 0: 2777 data->expire - time_uptime; 2778 2779 /* 'rule' is used to pass up the rule number and set */ 2780 memcpy(&dst->rule, &data->rulenum, sizeof(data->rulenum)); 2781 /* store set number into high word of dst->rule pointer. */ 2782 memcpy((char *)&dst->rule + sizeof(data->rulenum), &data->set, 2783 sizeof(data->set)); 2784 2785 /* unused fields */ 2786 dst->parent = NULL; 2787 dst->state = data->state; 2788 dst->ack_fwd = data->ack_fwd; 2789 dst->ack_rev = data->ack_rev; 2790 dst->count = 0; 2791 dst->bucket = data->hashval; 2792 /* 2793 * The legacy userland code will interpret a NULL here as a marker 2794 * for the last dynamic rule. 2795 */ 2796 dst->next = (ipfw_dyn_rule *)1; 2797 } 2798 2799 static void 2800 dyn_export_ipv4_state(const struct dyn_ipv4_state *s, ipfw_dyn_rule *dst) 2801 { 2802 2803 switch (s->type) { 2804 case O_LIMIT_PARENT: 2805 dyn_export_parent(s->limit, s->kidx, dst); 2806 break; 2807 default: 2808 dyn_export_data(s->data, s->kidx, s->type, dst); 2809 } 2810 2811 dst->id.dst_ip = s->dst; 2812 dst->id.src_ip = s->src; 2813 dst->id.dst_port = s->dport; 2814 dst->id.src_port = s->sport; 2815 dst->id.fib = s->data->fibnum; 2816 dst->id.proto = s->proto; 2817 dst->id._flags = 0; 2818 dst->id.addr_type = 4; 2819 2820 memset(&dst->id.dst_ip6, 0, sizeof(dst->id.dst_ip6)); 2821 memset(&dst->id.src_ip6, 0, sizeof(dst->id.src_ip6)); 2822 dst->id.flow_id6 = dst->id.extra = 0; 2823 } 2824 2825 #ifdef INET6 2826 static void 2827 dyn_export_ipv6_state(const struct dyn_ipv6_state *s, ipfw_dyn_rule *dst) 2828 { 2829 2830 switch (s->type) { 2831 case O_LIMIT_PARENT: 2832 dyn_export_parent(s->limit, s->kidx, dst); 2833 break; 2834 default: 2835 dyn_export_data(s->data, s->kidx, s->type, dst); 2836 } 2837 2838 dst->id.src_ip6 = s->src; 2839 dst->id.dst_ip6 = s->dst; 2840 dst->id.dst_port = s->dport; 2841 dst->id.src_port = s->sport; 2842 dst->id.fib = s->data->fibnum; 2843 dst->id.proto = s->proto; 2844 dst->id._flags = 0; 2845 dst->id.addr_type = 6; 2846 2847 dst->id.dst_ip = dst->id.src_ip = 0; 2848 dst->id.flow_id6 = dst->id.extra = 0; 2849 } 2850 #endif /* INET6 */ 2851 2852 /* 2853 * Fills the buffer given by @sd with dynamic states. 2854 * Used by dump format v1 (current). 2855 * 2856 * Returns 0 on success. 2857 */ 2858 int 2859 ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) 2860 { 2861 #ifdef INET6 2862 struct dyn_ipv6_state *s6; 2863 #endif 2864 struct dyn_ipv4_state *s4; 2865 ipfw_obj_dyntlv *dst, *last; 2866 ipfw_obj_ctlv *ctlv; 2867 uint32_t bucket; 2868 2869 if (V_dyn_count == 0) 2870 return (0); 2871 2872 /* 2873 * IPFW_UH_RLOCK garantees that another userland request 2874 * and callout thread will not delete entries from states 2875 * lists. 2876 */ 2877 IPFW_UH_RLOCK_ASSERT(chain); 2878 2879 ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); 2880 if (ctlv == NULL) 2881 return (ENOMEM); 2882 ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; 2883 ctlv->objsize = sizeof(ipfw_obj_dyntlv); 2884 last = NULL; 2885 2886 #define DYN_EXPORT_STATES(s, af, h, b) \ 2887 CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ 2888 dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, \ 2889 sizeof(ipfw_obj_dyntlv)); \ 2890 if (dst == NULL) \ 2891 return (ENOMEM); \ 2892 dyn_export_ ## af ## _state(s, &dst->state); \ 2893 dst->head.length = sizeof(ipfw_obj_dyntlv); \ 2894 dst->head.type = IPFW_TLV_DYN_ENT; \ 2895 last = dst; \ 2896 } 2897 2898 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2899 DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket); 2900 DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket); 2901 #ifdef INET6 2902 DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket); 2903 DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket); 2904 #endif /* INET6 */ 2905 } 2906 2907 /* mark last dynamic rule */ 2908 if (last != NULL) 2909 last->head.flags = IPFW_DF_LAST; /* XXX: unused */ 2910 return (0); 2911 #undef DYN_EXPORT_STATES 2912 } 2913 2914 /* 2915 * Fill given buffer with dynamic states (legacy format). 2916 * IPFW_UH_RLOCK has to be held while calling. 2917 */ 2918 void 2919 ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) 2920 { 2921 #ifdef INET6 2922 struct dyn_ipv6_state *s6; 2923 #endif 2924 struct dyn_ipv4_state *s4; 2925 ipfw_dyn_rule *p, *last = NULL; 2926 char *bp; 2927 uint32_t bucket; 2928 2929 if (V_dyn_count == 0) 2930 return; 2931 bp = *pbp; 2932 2933 IPFW_UH_RLOCK_ASSERT(chain); 2934 2935 #define DYN_EXPORT_STATES(s, af, head, b) \ 2936 CK_SLIST_FOREACH(s, &V_dyn_ ## head[b], entry) { \ 2937 if (bp + sizeof(*p) > ep) \ 2938 break; \ 2939 p = (ipfw_dyn_rule *)bp; \ 2940 dyn_export_ ## af ## _state(s, p); \ 2941 last = p; \ 2942 bp += sizeof(*p); \ 2943 } 2944 2945 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2946 DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket); 2947 DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket); 2948 #ifdef INET6 2949 DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket); 2950 DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket); 2951 #endif /* INET6 */ 2952 } 2953 2954 if (last != NULL) /* mark last dynamic rule */ 2955 last->next = NULL; 2956 *pbp = bp; 2957 #undef DYN_EXPORT_STATES 2958 } 2959 2960 void 2961 ipfw_dyn_init(struct ip_fw_chain *chain) 2962 { 2963 2964 #ifdef IPFIREWALL_JENKINSHASH 2965 V_dyn_hashseed = arc4random(); 2966 #endif 2967 V_dyn_max = 16384; /* max # of states */ 2968 V_dyn_parent_max = 4096; /* max # of parent states */ 2969 V_dyn_buckets_max = 8192; /* must be power of 2 */ 2970 2971 V_dyn_ack_lifetime = 300; 2972 V_dyn_syn_lifetime = 20; 2973 V_dyn_fin_lifetime = 1; 2974 V_dyn_rst_lifetime = 1; 2975 V_dyn_udp_lifetime = 10; 2976 V_dyn_short_lifetime = 5; 2977 2978 V_dyn_keepalive_interval = 20; 2979 V_dyn_keepalive_period = 5; 2980 V_dyn_keepalive = 1; /* send keepalives */ 2981 V_dyn_keepalive_last = time_uptime; 2982 2983 V_dyn_data_zone = uma_zcreate("IPFW dynamic states data", 2984 sizeof(struct dyn_data), NULL, NULL, NULL, NULL, 2985 UMA_ALIGN_PTR, 0); 2986 uma_zone_set_max(V_dyn_data_zone, V_dyn_max); 2987 2988 V_dyn_parent_zone = uma_zcreate("IPFW parent dynamic states", 2989 sizeof(struct dyn_parent), NULL, NULL, NULL, NULL, 2990 UMA_ALIGN_PTR, 0); 2991 uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max); 2992 2993 SLIST_INIT(&V_dyn_expired_ipv4); 2994 V_dyn_ipv4 = NULL; 2995 V_dyn_ipv4_parent = NULL; 2996 V_dyn_ipv4_zone = uma_zcreate("IPFW IPv4 dynamic states", 2997 sizeof(struct dyn_ipv4_state), NULL, NULL, NULL, NULL, 2998 UMA_ALIGN_PTR, 0); 2999 3000 #ifdef INET6 3001 SLIST_INIT(&V_dyn_expired_ipv6); 3002 V_dyn_ipv6 = NULL; 3003 V_dyn_ipv6_parent = NULL; 3004 V_dyn_ipv6_zone = uma_zcreate("IPFW IPv6 dynamic states", 3005 sizeof(struct dyn_ipv6_state), NULL, NULL, NULL, NULL, 3006 UMA_ALIGN_PTR, 0); 3007 #endif 3008 3009 /* Initialize buckets. */ 3010 V_curr_dyn_buckets = 0; 3011 V_dyn_bucket_lock = NULL; 3012 dyn_grow_hashtable(chain, 256); 3013 3014 if (IS_DEFAULT_VNET(curvnet)) 3015 dyn_hp_cache = malloc(mp_ncpus * sizeof(void *), M_IPFW, 3016 M_WAITOK | M_ZERO); 3017 3018 DYN_EXPIRED_LOCK_INIT(); 3019 callout_init(&V_dyn_timeout, 1); 3020 callout_reset(&V_dyn_timeout, hz, dyn_tick, curvnet); 3021 IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); 3022 } 3023 3024 void 3025 ipfw_dyn_uninit(int pass) 3026 { 3027 #ifdef INET6 3028 struct dyn_ipv6_state *s6; 3029 #endif 3030 struct dyn_ipv4_state *s4; 3031 int bucket; 3032 3033 if (pass == 0) { 3034 callout_drain(&V_dyn_timeout); 3035 return; 3036 } 3037 IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); 3038 DYN_EXPIRED_LOCK_DESTROY(); 3039 3040 #define DYN_FREE_STATES_FORCED(CK, s, af, name, en) do { \ 3041 while ((s = CK ## SLIST_FIRST(&V_dyn_ ## name)) != NULL) { \ 3042 CK ## SLIST_REMOVE_HEAD(&V_dyn_ ## name, en); \ 3043 if (s->type == O_LIMIT_PARENT) \ 3044 uma_zfree(V_dyn_parent_zone, s->limit); \ 3045 else \ 3046 uma_zfree(V_dyn_data_zone, s->data); \ 3047 uma_zfree(V_dyn_ ## af ## _zone, s); \ 3048 } \ 3049 } while (0) 3050 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 3051 DYN_BUCKET_LOCK_DESTROY(V_dyn_bucket_lock, bucket); 3052 3053 DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4[bucket], entry); 3054 DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4_parent[bucket], 3055 entry); 3056 #ifdef INET6 3057 DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6[bucket], entry); 3058 DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6_parent[bucket], 3059 entry); 3060 #endif /* INET6 */ 3061 } 3062 DYN_FREE_STATES_FORCED(, s4, ipv4, expired_ipv4, expired); 3063 #ifdef INET6 3064 DYN_FREE_STATES_FORCED(, s6, ipv6, expired_ipv6, expired); 3065 #endif 3066 #undef DYN_FREE_STATES_FORCED 3067 3068 uma_zdestroy(V_dyn_ipv4_zone); 3069 uma_zdestroy(V_dyn_data_zone); 3070 uma_zdestroy(V_dyn_parent_zone); 3071 #ifdef INET6 3072 uma_zdestroy(V_dyn_ipv6_zone); 3073 free(V_dyn_ipv6, M_IPFW); 3074 free(V_dyn_ipv6_parent, M_IPFW); 3075 free(V_dyn_ipv6_add, M_IPFW); 3076 free(V_dyn_ipv6_parent_add, M_IPFW); 3077 free(V_dyn_ipv6_del, M_IPFW); 3078 free(V_dyn_ipv6_parent_del, M_IPFW); 3079 #endif 3080 free(V_dyn_bucket_lock, M_IPFW); 3081 free(V_dyn_ipv4, M_IPFW); 3082 free(V_dyn_ipv4_parent, M_IPFW); 3083 free(V_dyn_ipv4_add, M_IPFW); 3084 free(V_dyn_ipv4_parent_add, M_IPFW); 3085 free(V_dyn_ipv4_del, M_IPFW); 3086 free(V_dyn_ipv4_parent_del, M_IPFW); 3087 if (IS_DEFAULT_VNET(curvnet)) 3088 free(dyn_hp_cache, M_IPFW); 3089 } 3090 3091 3092