1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2019 Yandex LLC 5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/counter.h> 33 #include <sys/ck.h> 34 #include <sys/epoch.h> 35 #include <sys/errno.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/rmlock.h> 42 #include <sys/rwlock.h> 43 #include <sys/socket.h> 44 #include <sys/sockopt.h> 45 46 #include <net/if.h> 47 48 #include <netinet/in.h> 49 #include <netinet/ip.h> 50 #include <netinet/ip_var.h> 51 #include <netinet/ip_fw.h> 52 #include <netinet6/ip_fw_nat64.h> 53 54 #include <netpfil/ipfw/ip_fw_private.h> 55 56 #include "nat64lsn.h" 57 58 VNET_DEFINE(uint32_t, nat64lsn_eid) = 0; 59 60 static struct nat64lsn_instance * 61 nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set) 62 { 63 struct named_object *no; 64 65 no = ipfw_objhash_lookup_name_type(ni, set, 66 IPFW_TLV_NAT64LSN_NAME, name); 67 if (no == NULL) 68 return (NULL); 69 return (__containerof(no, struct nat64lsn_instance, no)); 70 } 71 72 static void 73 nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) 74 { 75 76 if (uc->jmaxlen == 0) 77 uc->jmaxlen = NAT64LSN_JMAXLEN; 78 if (uc->jmaxlen > 65536) 79 uc->jmaxlen = 65536; 80 if (uc->nh_delete_delay == 0) 81 uc->nh_delete_delay = NAT64LSN_HOST_AGE; 82 if (uc->pg_delete_delay == 0) 83 uc->pg_delete_delay = NAT64LSN_PG_AGE; 84 if (uc->st_syn_ttl == 0) 85 uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE; 86 if (uc->st_close_ttl == 0) 87 uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE; 88 if (uc->st_estab_ttl == 0) 89 uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE; 90 if (uc->st_udp_ttl == 0) 91 uc->st_udp_ttl = NAT64LSN_UDP_AGE; 92 if (uc->st_icmp_ttl == 0) 93 uc->st_icmp_ttl = NAT64LSN_ICMP_AGE; 94 95 if (uc->states_chunks == 0) 96 uc->states_chunks = 1; 97 else if (uc->states_chunks >= 128) 98 uc->states_chunks = 128; 99 else if (!powerof2(uc->states_chunks)) 100 uc->states_chunks = 1 << fls(uc->states_chunks); 101 } 102 103 /* 104 * Creates new nat64lsn instance. 105 * Data layout (v0)(current): 106 * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ] 107 * 108 * Returns 0 on success 109 */ 110 static int 111 nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 112 struct sockopt_data *sd) 113 { 114 ipfw_obj_lheader *olh; 115 ipfw_nat64lsn_cfg *uc; 116 struct nat64lsn_instance *i; 117 struct nat64lsn_cfg *cfg; 118 struct namedobj_instance *ni; 119 uint32_t addr4, mask4; 120 121 if (sd->valsize != sizeof(*olh) + sizeof(*uc)) 122 return (EINVAL); 123 124 olh = (ipfw_obj_lheader *)sd->kbuf; 125 uc = (ipfw_nat64lsn_cfg *)(olh + 1); 126 127 if (ipfw_check_object_name_generic(uc->name) != 0) 128 return (EINVAL); 129 130 if (uc->set >= IPFW_MAX_SETS) 131 return (EINVAL); 132 133 if (uc->plen4 > 32) 134 return (EINVAL); 135 136 /* 137 * Unspecified address has special meaning. But it must 138 * have valid prefix length. This length will be used to 139 * correctly extract and embedd IPv4 address into IPv6. 140 */ 141 if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 && 142 IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) && 143 nat64_check_prefixlen(uc->plen6) != 0) 144 return (EINVAL); 145 146 /* XXX: Check prefix4 to be global */ 147 addr4 = ntohl(uc->prefix4.s_addr); 148 mask4 = ~((1 << (32 - uc->plen4)) - 1); 149 if ((addr4 & mask4) != addr4) 150 return (EINVAL); 151 152 nat64lsn_default_config(uc); 153 154 ni = CHAIN_TO_SRV(ch); 155 IPFW_UH_RLOCK(ch); 156 if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { 157 IPFW_UH_RUNLOCK(ch); 158 return (EEXIST); 159 } 160 IPFW_UH_RUNLOCK(ch); 161 162 i = malloc(sizeof(struct nat64lsn_instance), M_NAT64LSN, 163 M_WAITOK | M_ZERO); 164 strlcpy(i->name, uc->name, sizeof(i->name)); 165 i->no.name = i->name; 166 i->no.etlv = IPFW_TLV_NAT64LSN_NAME; 167 i->no.set = uc->set; 168 169 cfg = nat64lsn_init_config(ch, addr4, uc->plen4); 170 cfg->base.plat_prefix = uc->prefix6; 171 cfg->base.plat_plen = uc->plen6; 172 cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX; 173 if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix)) 174 cfg->base.flags |= NAT64_WKPFX; 175 else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix)) 176 cfg->base.flags |= NAT64LSN_ANYPREFIX; 177 178 cfg->states_chunks = uc->states_chunks; 179 cfg->jmaxlen = uc->jmaxlen; 180 cfg->host_delete_delay = uc->nh_delete_delay; 181 cfg->pg_delete_delay = uc->pg_delete_delay; 182 cfg->st_syn_ttl = uc->st_syn_ttl; 183 cfg->st_close_ttl = uc->st_close_ttl; 184 cfg->st_estab_ttl = uc->st_estab_ttl; 185 cfg->st_udp_ttl = uc->st_udp_ttl; 186 cfg->st_icmp_ttl = uc->st_icmp_ttl; 187 cfg->nomatch_verdict = IP_FW_DENY; 188 189 IPFW_UH_WLOCK(ch); 190 191 if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { 192 IPFW_UH_WUNLOCK(ch); 193 nat64lsn_destroy_config(cfg); 194 free(i, M_NAT64LSN); 195 return (EEXIST); 196 } 197 198 if (ipfw_objhash_alloc_idx(ni, &i->no.kidx) != 0) { 199 IPFW_UH_WUNLOCK(ch); 200 nat64lsn_destroy_config(cfg); 201 free(i, M_NAT64LSN); 202 return (ENOSPC); 203 } 204 ipfw_objhash_add(ni, &i->no); 205 206 /* Okay, let's link data */ 207 i->cfg = cfg; 208 SRV_OBJECT(ch, i->no.kidx) = i; 209 nat64lsn_start_instance(cfg); 210 211 IPFW_UH_WUNLOCK(ch); 212 return (0); 213 } 214 215 static void 216 nat64lsn_detach_instance(struct ip_fw_chain *ch, 217 struct nat64lsn_instance *i) 218 { 219 220 IPFW_UH_WLOCK_ASSERT(ch); 221 SRV_OBJECT(ch, i->no.kidx) = NULL; 222 ipfw_objhash_del(CHAIN_TO_SRV(ch), &i->no); 223 ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), i->no.kidx); 224 } 225 226 /* 227 * Destroys nat64 instance. 228 * Data layout (v0)(current): 229 * Request: [ ipfw_obj_header ] 230 * 231 * Returns 0 on success 232 */ 233 static int 234 nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 235 struct sockopt_data *sd) 236 { 237 struct nat64lsn_instance *i; 238 ipfw_obj_header *oh; 239 240 if (sd->valsize != sizeof(*oh)) 241 return (EINVAL); 242 243 oh = (ipfw_obj_header *)op3; 244 245 IPFW_UH_WLOCK(ch); 246 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); 247 if (i == NULL) { 248 IPFW_UH_WUNLOCK(ch); 249 return (ENOENT); 250 } 251 252 if (i->no.refcnt > 0) { 253 IPFW_UH_WUNLOCK(ch); 254 return (EBUSY); 255 } 256 257 ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, i->no.kidx); 258 nat64lsn_detach_instance(ch, i); 259 IPFW_UH_WUNLOCK(ch); 260 261 nat64lsn_destroy_config(i->cfg); 262 free(i, M_NAT64LSN); 263 return (0); 264 } 265 266 #define __COPY_STAT_FIELD(_cfg, _stats, _field) \ 267 (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field) 268 static void 269 export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, 270 struct ipfw_nat64lsn_stats *stats) 271 { 272 struct nat64lsn_alias *alias; 273 int i; 274 275 __COPY_STAT_FIELD(cfg, stats, opcnt64); 276 __COPY_STAT_FIELD(cfg, stats, opcnt46); 277 __COPY_STAT_FIELD(cfg, stats, ofrags); 278 __COPY_STAT_FIELD(cfg, stats, ifrags); 279 __COPY_STAT_FIELD(cfg, stats, oerrors); 280 __COPY_STAT_FIELD(cfg, stats, noroute4); 281 __COPY_STAT_FIELD(cfg, stats, noroute6); 282 __COPY_STAT_FIELD(cfg, stats, nomatch4); 283 __COPY_STAT_FIELD(cfg, stats, noproto); 284 __COPY_STAT_FIELD(cfg, stats, nomem); 285 __COPY_STAT_FIELD(cfg, stats, dropped); 286 287 __COPY_STAT_FIELD(cfg, stats, jcalls); 288 __COPY_STAT_FIELD(cfg, stats, jrequests); 289 __COPY_STAT_FIELD(cfg, stats, jhostsreq); 290 __COPY_STAT_FIELD(cfg, stats, jportreq); 291 __COPY_STAT_FIELD(cfg, stats, jhostfails); 292 __COPY_STAT_FIELD(cfg, stats, jportfails); 293 __COPY_STAT_FIELD(cfg, stats, jmaxlen); 294 __COPY_STAT_FIELD(cfg, stats, jnomem); 295 __COPY_STAT_FIELD(cfg, stats, jreinjected); 296 __COPY_STAT_FIELD(cfg, stats, screated); 297 __COPY_STAT_FIELD(cfg, stats, sdeleted); 298 __COPY_STAT_FIELD(cfg, stats, spgcreated); 299 __COPY_STAT_FIELD(cfg, stats, spgdeleted); 300 301 stats->hostcount = cfg->hosts_count; 302 for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { 303 alias = &cfg->aliases[i]; 304 stats->tcpchunks += alias->tcp_pgcount; 305 stats->udpchunks += alias->udp_pgcount; 306 stats->icmpchunks += alias->icmp_pgcount; 307 } 308 } 309 #undef __COPY_STAT_FIELD 310 311 static void 312 nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_instance *i, 313 ipfw_nat64lsn_cfg *uc) 314 { 315 struct nat64lsn_cfg *cfg; 316 317 strlcpy(uc->name, i->no.name, sizeof(uc->name)); 318 uc->set = i->no.set; 319 cfg = i->cfg; 320 321 uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK; 322 uc->states_chunks = cfg->states_chunks; 323 uc->jmaxlen = cfg->jmaxlen; 324 uc->nh_delete_delay = cfg->host_delete_delay; 325 uc->pg_delete_delay = cfg->pg_delete_delay; 326 uc->st_syn_ttl = cfg->st_syn_ttl; 327 uc->st_close_ttl = cfg->st_close_ttl; 328 uc->st_estab_ttl = cfg->st_estab_ttl; 329 uc->st_udp_ttl = cfg->st_udp_ttl; 330 uc->st_icmp_ttl = cfg->st_icmp_ttl; 331 uc->prefix4.s_addr = htonl(cfg->prefix4); 332 uc->prefix6 = cfg->base.plat_prefix; 333 uc->plen4 = cfg->plen4; 334 uc->plen6 = cfg->base.plat_plen; 335 } 336 337 struct nat64_dump_arg { 338 struct ip_fw_chain *ch; 339 struct sockopt_data *sd; 340 }; 341 342 static int 343 export_config_cb(struct namedobj_instance *ni, struct named_object *no, 344 void *arg) 345 { 346 struct nat64_dump_arg *da; 347 ipfw_nat64lsn_cfg *uc; 348 349 da = (struct nat64_dump_arg *)arg; 350 uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd, 351 sizeof(*uc)); 352 nat64lsn_export_config(da->ch, 353 __containerof(no, struct nat64lsn_instance, no), uc); 354 return (0); 355 } 356 357 /* 358 * Lists all nat64 lsn instances currently available in kernel. 359 * Data layout (v0)(current): 360 * Request: [ ipfw_obj_lheader ] 361 * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ] 362 * 363 * Returns 0 on success 364 */ 365 static int 366 nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 367 struct sockopt_data *sd) 368 { 369 ipfw_obj_lheader *olh; 370 struct nat64_dump_arg da; 371 372 /* Check minimum header size */ 373 if (sd->valsize < sizeof(ipfw_obj_lheader)) 374 return (EINVAL); 375 376 olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); 377 378 IPFW_UH_RLOCK(ch); 379 olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), 380 IPFW_TLV_NAT64LSN_NAME); 381 olh->objsize = sizeof(ipfw_nat64lsn_cfg); 382 olh->size = sizeof(*olh) + olh->count * olh->objsize; 383 384 if (sd->valsize < olh->size) { 385 IPFW_UH_RUNLOCK(ch); 386 return (ENOMEM); 387 } 388 memset(&da, 0, sizeof(da)); 389 da.ch = ch; 390 da.sd = sd; 391 ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da, 392 IPFW_TLV_NAT64LSN_NAME); 393 IPFW_UH_RUNLOCK(ch); 394 395 return (0); 396 } 397 398 /* 399 * Change existing nat64lsn instance configuration. 400 * Data layout (v0)(current): 401 * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ] 402 * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ] 403 * 404 * Returns 0 on success 405 */ 406 static int 407 nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, 408 struct sockopt_data *sd) 409 { 410 ipfw_obj_header *oh; 411 ipfw_nat64lsn_cfg *uc; 412 struct nat64lsn_instance *i; 413 struct nat64lsn_cfg *cfg; 414 struct namedobj_instance *ni; 415 416 if (sd->valsize != sizeof(*oh) + sizeof(*uc)) 417 return (EINVAL); 418 419 oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, 420 sizeof(*oh) + sizeof(*uc)); 421 uc = (ipfw_nat64lsn_cfg *)(oh + 1); 422 423 if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || 424 oh->ntlv.set >= IPFW_MAX_SETS) 425 return (EINVAL); 426 427 ni = CHAIN_TO_SRV(ch); 428 if (sd->sopt->sopt_dir == SOPT_GET) { 429 IPFW_UH_RLOCK(ch); 430 i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); 431 if (i == NULL) { 432 IPFW_UH_RUNLOCK(ch); 433 return (ENOENT); 434 } 435 nat64lsn_export_config(ch, i, uc); 436 IPFW_UH_RUNLOCK(ch); 437 return (0); 438 } 439 440 nat64lsn_default_config(uc); 441 442 IPFW_UH_WLOCK(ch); 443 i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); 444 if (i == NULL) { 445 IPFW_UH_WUNLOCK(ch); 446 return (ENOENT); 447 } 448 449 /* 450 * For now allow to change only following values: 451 * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age, 452 * tcp_est_age, udp_age, icmp_age, flags, states_chunks. 453 */ 454 cfg = i->cfg; 455 cfg->states_chunks = uc->states_chunks; 456 cfg->jmaxlen = uc->jmaxlen; 457 cfg->host_delete_delay = uc->nh_delete_delay; 458 cfg->pg_delete_delay = uc->pg_delete_delay; 459 cfg->st_syn_ttl = uc->st_syn_ttl; 460 cfg->st_close_ttl = uc->st_close_ttl; 461 cfg->st_estab_ttl = uc->st_estab_ttl; 462 cfg->st_udp_ttl = uc->st_udp_ttl; 463 cfg->st_icmp_ttl = uc->st_icmp_ttl; 464 cfg->base.flags &= ~NAT64LSN_FLAGSMASK; 465 cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK; 466 467 IPFW_UH_WUNLOCK(ch); 468 469 return (0); 470 } 471 472 /* 473 * Get nat64lsn statistics. 474 * Data layout (v0)(current): 475 * Request: [ ipfw_obj_header ] 476 * Reply: [ ipfw_obj_header ipfw_counter_tlv ] 477 * 478 * Returns 0 on success 479 */ 480 static int 481 nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, 482 struct sockopt_data *sd) 483 { 484 struct ipfw_nat64lsn_stats stats; 485 struct nat64lsn_instance *i; 486 ipfw_obj_header *oh; 487 ipfw_obj_ctlv *ctlv; 488 size_t sz; 489 490 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); 491 if (sd->valsize % sizeof(uint64_t)) 492 return (EINVAL); 493 if (sd->valsize < sz) 494 return (ENOMEM); 495 oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 496 if (oh == NULL) 497 return (EINVAL); 498 memset(&stats, 0, sizeof(stats)); 499 500 IPFW_UH_RLOCK(ch); 501 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); 502 if (i == NULL) { 503 IPFW_UH_RUNLOCK(ch); 504 return (ENOENT); 505 } 506 507 export_stats(ch, i->cfg, &stats); 508 IPFW_UH_RUNLOCK(ch); 509 510 ctlv = (ipfw_obj_ctlv *)(oh + 1); 511 memset(ctlv, 0, sizeof(*ctlv)); 512 ctlv->head.type = IPFW_TLV_COUNTERS; 513 ctlv->head.length = sz - sizeof(ipfw_obj_header); 514 ctlv->count = sizeof(stats) / sizeof(uint64_t); 515 ctlv->objsize = sizeof(uint64_t); 516 ctlv->version = IPFW_NAT64_VERSION; 517 memcpy(ctlv + 1, &stats, sizeof(stats)); 518 return (0); 519 } 520 521 /* 522 * Reset nat64lsn statistics. 523 * Data layout (v0)(current): 524 * Request: [ ipfw_obj_header ] 525 * 526 * Returns 0 on success 527 */ 528 static int 529 nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, 530 struct sockopt_data *sd) 531 { 532 struct nat64lsn_instance *i; 533 ipfw_obj_header *oh; 534 535 if (sd->valsize != sizeof(*oh)) 536 return (EINVAL); 537 oh = (ipfw_obj_header *)sd->kbuf; 538 if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || 539 oh->ntlv.set >= IPFW_MAX_SETS) 540 return (EINVAL); 541 542 IPFW_UH_WLOCK(ch); 543 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); 544 if (i == NULL) { 545 IPFW_UH_WUNLOCK(ch); 546 return (ENOENT); 547 } 548 COUNTER_ARRAY_ZERO(i->cfg->base.stats.cnt, NAT64STATS); 549 IPFW_UH_WUNLOCK(ch); 550 return (0); 551 } 552 553 #ifdef __LP64__ 554 #define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) 555 #else 556 #define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \ 557 ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32) 558 #endif 559 /* 560 * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg 561 * ipfw_nat64lsn_state x count, ... ] ] 562 */ 563 static int 564 nat64lsn_export_states(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx, 565 struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count) 566 { 567 ipfw_nat64lsn_state_v1 *s; 568 struct nat64lsn_state *state; 569 uint64_t freemask; 570 uint32_t i, count; 571 572 /* validate user input */ 573 if (idx->chunk > pg->chunks_count - 1) 574 return (EINVAL); 575 576 FREEMASK_COPY(pg, idx->chunk, freemask); 577 count = 64 - bitcount64(freemask); 578 if (count == 0) 579 return (0); /* Try next PG/chunk */ 580 581 DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d", 582 (uintmax_t)idx->index, count); 583 584 s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd, 585 count * sizeof(ipfw_nat64lsn_state_v1)); 586 if (s == NULL) 587 return (ENOMEM); 588 589 for (i = 0; i < 64; i++) { 590 if (ISSET64(freemask, i)) 591 continue; 592 state = pg->chunks_count == 1 ? &pg->states->state[i] : 593 &pg->states_chunk[idx->chunk]->state[i]; 594 595 s->host6 = state->host->addr; 596 s->daddr.s_addr = htonl(state->ip_dst); 597 s->dport = state->dport; 598 s->sport = state->sport; 599 s->aport = state->aport; 600 s->flags = (uint8_t)(state->flags & 7); 601 s->proto = state->proto; 602 s->idle = GET_AGE(state->timestamp); 603 s++; 604 } 605 *ret_count = count; 606 return (0); 607 } 608 609 #define LAST_IDX 0xFF 610 static int 611 nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg, 612 union nat64lsn_pgidx *idx) 613 { 614 615 /* First iterate over chunks */ 616 if (pg != NULL) { 617 if (idx->chunk < pg->chunks_count - 1) { 618 idx->chunk++; 619 return (0); 620 } 621 } 622 idx->chunk = 0; 623 /* Then over PGs */ 624 if (idx->port < UINT16_MAX - 64) { 625 idx->port += 64; 626 return (0); 627 } 628 idx->port = NAT64_MIN_PORT; 629 /* Then over supported protocols */ 630 switch (idx->proto) { 631 case IPPROTO_ICMP: 632 idx->proto = IPPROTO_TCP; 633 return (0); 634 case IPPROTO_TCP: 635 idx->proto = IPPROTO_UDP; 636 return (0); 637 default: 638 idx->proto = IPPROTO_ICMP; 639 } 640 /* And then over IPv4 alias addresses */ 641 if (idx->addr < cfg->pmask4) { 642 idx->addr++; 643 return (1); /* New states group is needed */ 644 } 645 idx->index = LAST_IDX; 646 return (-1); /* No more states */ 647 } 648 649 static struct nat64lsn_pg* 650 nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx) 651 { 652 struct nat64lsn_alias *alias; 653 int pg_idx; 654 655 alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)]; 656 MPASS(alias->addr == idx->addr); 657 658 pg_idx = (idx->port - NAT64_MIN_PORT) / 64; 659 switch (idx->proto) { 660 case IPPROTO_ICMP: 661 if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32)) 662 return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]); 663 break; 664 case IPPROTO_TCP: 665 if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32)) 666 return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]); 667 break; 668 case IPPROTO_UDP: 669 if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32)) 670 return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]); 671 break; 672 } 673 return (NULL); 674 } 675 676 /* 677 * Lists nat64lsn states. 678 * Data layout (v1)(current): 679 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] 680 * Reply: [ ipfw_obj_header ipfw_obj_data [ 681 * ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ] 682 * 683 * Returns 0 on success 684 */ 685 static int 686 nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 687 struct sockopt_data *sd) 688 { 689 ipfw_obj_header *oh; 690 ipfw_obj_data *od; 691 ipfw_nat64lsn_stg_v1 *stg; 692 struct nat64lsn_instance *i; 693 struct nat64lsn_cfg *cfg; 694 struct nat64lsn_pg *pg; 695 union nat64lsn_pgidx idx; 696 size_t sz; 697 uint32_t count, total; 698 int ret; 699 700 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + 701 sizeof(uint64_t); 702 /* Check minimum header size */ 703 if (sd->valsize < sz) 704 return (EINVAL); 705 706 oh = (ipfw_obj_header *)sd->kbuf; 707 od = (ipfw_obj_data *)(oh + 1); 708 if (od->head.type != IPFW_TLV_OBJDATA || 709 od->head.length != sz - sizeof(ipfw_obj_header)) 710 return (EINVAL); 711 712 idx.index = *(uint64_t *)(od + 1); 713 if (idx.index != 0 && idx.proto != IPPROTO_ICMP && 714 idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP) 715 return (EINVAL); 716 if (idx.index == LAST_IDX) 717 return (EINVAL); 718 719 IPFW_UH_RLOCK(ch); 720 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); 721 if (i == NULL) { 722 IPFW_UH_RUNLOCK(ch); 723 return (ENOENT); 724 } 725 cfg = i->cfg; 726 if (idx.index == 0) { /* Fill in starting point */ 727 idx.addr = cfg->prefix4; 728 idx.proto = IPPROTO_ICMP; 729 idx.port = NAT64_MIN_PORT; 730 } 731 if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 || 732 idx.port < NAT64_MIN_PORT) { 733 IPFW_UH_RUNLOCK(ch); 734 return (EINVAL); 735 } 736 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + 737 sizeof(ipfw_nat64lsn_stg_v1); 738 if (sd->valsize < sz) { 739 IPFW_UH_RUNLOCK(ch); 740 return (ENOMEM); 741 } 742 oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz); 743 od = (ipfw_obj_data *)(oh + 1); 744 od->head.type = IPFW_TLV_OBJDATA; 745 od->head.length = sz - sizeof(ipfw_obj_header); 746 stg = (ipfw_nat64lsn_stg_v1 *)(od + 1); 747 stg->count = total = 0; 748 stg->next.index = idx.index; 749 /* 750 * Acquire CALLOUT_LOCK to avoid races with expiration code. 751 * Thus states, hosts and PGs will not expire while we hold it. 752 */ 753 CALLOUT_LOCK(cfg); 754 ret = 0; 755 do { 756 pg = nat64lsn_get_pg_byidx(cfg, &idx); 757 if (pg != NULL) { 758 count = 0; 759 ret = nat64lsn_export_states(cfg, &idx, pg, 760 sd, &count); 761 if (ret != 0) 762 break; 763 if (count > 0) { 764 stg->count += count; 765 total += count; 766 /* Update total size of reply */ 767 od->head.length += 768 count * sizeof(ipfw_nat64lsn_state_v1); 769 sz += count * sizeof(ipfw_nat64lsn_state_v1); 770 } 771 stg->alias4.s_addr = htonl(idx.addr); 772 } 773 /* Determine new index */ 774 switch (nat64lsn_next_pgidx(cfg, pg, &idx)) { 775 case -1: 776 ret = ENOENT; /* End of search */ 777 break; 778 case 1: /* 779 * Next alias address, new group may be needed. 780 * If states count is zero, use this group. 781 */ 782 if (stg->count == 0) 783 continue; 784 /* Otherwise try to create new group */ 785 sz += sizeof(ipfw_nat64lsn_stg_v1); 786 if (sd->valsize < sz) { 787 ret = ENOMEM; 788 break; 789 } 790 /* Save next index in current group */ 791 stg->next.index = idx.index; 792 stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd, 793 sizeof(ipfw_nat64lsn_stg_v1)); 794 od->head.length += sizeof(ipfw_nat64lsn_stg_v1); 795 stg->count = 0; 796 break; 797 } 798 stg->next.index = idx.index; 799 } while (ret == 0); 800 CALLOUT_UNLOCK(cfg); 801 IPFW_UH_RUNLOCK(ch); 802 return ((total > 0 || idx.index == LAST_IDX) ? 0: ret); 803 } 804 805 static struct ipfw_sopt_handler scodes[] = { 806 { IP_FW_NAT64LSN_CREATE, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_create }, 807 { IP_FW_NAT64LSN_DESTROY, IP_FW3_OPVER, HDIR_SET, nat64lsn_destroy }, 808 { IP_FW_NAT64LSN_CONFIG, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_config }, 809 { IP_FW_NAT64LSN_LIST, IP_FW3_OPVER, HDIR_GET, nat64lsn_list }, 810 { IP_FW_NAT64LSN_STATS, IP_FW3_OPVER, HDIR_GET, nat64lsn_stats }, 811 { IP_FW_NAT64LSN_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nat64lsn_reset_stats }, 812 { IP_FW_NAT64LSN_LIST_STATES, IP_FW3_OPVER, HDIR_GET, nat64lsn_states }, 813 }; 814 815 #define NAT64LSN_ARE_EQUAL(v) (cfg0->v == cfg1->v) 816 static int 817 nat64lsn_cmp_configs(struct nat64lsn_cfg *cfg0, struct nat64lsn_cfg *cfg1) 818 { 819 820 if ((cfg0->base.flags & cfg1->base.flags & NAT64LSN_ALLOW_SWAPCONF) && 821 NAT64LSN_ARE_EQUAL(prefix4) && 822 NAT64LSN_ARE_EQUAL(pmask4) && 823 NAT64LSN_ARE_EQUAL(plen4) && 824 NAT64LSN_ARE_EQUAL(base.plat_plen) && 825 IN6_ARE_ADDR_EQUAL(&cfg0->base.plat_prefix, 826 &cfg1->base.plat_prefix)) 827 return (0); 828 return (1); 829 } 830 #undef NAT64LSN_ARE_EQUAL 831 832 static void 833 nat64lsn_swap_configs(struct nat64lsn_instance *i0, 834 struct nat64lsn_instance *i1) 835 { 836 struct nat64lsn_cfg *cfg; 837 838 cfg = i0->cfg; 839 i0->cfg = i1->cfg; 840 i1->cfg = cfg; 841 } 842 843 /* 844 * NAT64LSN sets swap handler. 845 * 846 * When two sets have NAT64LSN instance with the same name, we check 847 * most important configuration parameters, and if there are no difference, 848 * and both instances have NAT64LSN_ALLOW_SWAPCONF flag, we will exchange 849 * configs between instances. This allows to keep NAT64 states when ipfw's 850 * rules are reloaded using new set. 851 * 852 * XXX: since manage_sets caller doesn't hold IPFW_WLOCK(), it is possible 853 * that some states will be created during switching, because set of rules 854 * is changed a bit earley than named objects. 855 */ 856 static int 857 nat64lsn_swap_sets_cb(struct namedobj_instance *ni, struct named_object *no, 858 void *arg) 859 { 860 struct nat64lsn_instance *i0, *i1; 861 uint8_t *sets; 862 863 sets = arg; 864 if (no->set == sets[0]) { 865 /* 866 * Check if we have instance in new set with the same 867 * config that is sets aware and ready to swap configs. 868 */ 869 i0 = __containerof(no, struct nat64lsn_instance, no); 870 if ((i0->cfg->base.flags & NAT64LSN_ALLOW_SWAPCONF) && 871 (i1 = nat64lsn_find(ni, no->name, sets[1])) != NULL) { 872 /* Compare configs */ 873 if (nat64lsn_cmp_configs(i0->cfg, i1->cfg) == 0) { 874 IPFW_UH_WLOCK_ASSERT(&V_layer3_chain); 875 IPFW_WLOCK(&V_layer3_chain); 876 nat64lsn_swap_configs(i0, i1); 877 IPFW_WUNLOCK(&V_layer3_chain); 878 } 879 } 880 } 881 return (0); 882 } 883 884 static int 885 nat64lsn_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set, 886 enum ipfw_sets_cmd cmd) 887 { 888 uint8_t sets[2]; 889 890 if (cmd == SWAP_ALL) { 891 sets[0] = (uint8_t)set; 892 sets[1] = new_set; 893 ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), 894 nat64lsn_swap_sets_cb, &sets, IPFW_TLV_NAT64LSN_NAME); 895 } 896 return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME, 897 set, new_set, cmd)); 898 } 899 NAT64_DEFINE_OPCODE_REWRITER(nat64lsn, NAT64LSN, opcodes); 900 901 static int 902 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, 903 void *arg) 904 { 905 struct nat64lsn_instance *i; 906 struct ip_fw_chain *ch; 907 908 ch = (struct ip_fw_chain *)arg; 909 i = (struct nat64lsn_instance *)SRV_OBJECT(ch, no->kidx); 910 nat64lsn_detach_instance(ch, i); 911 nat64lsn_destroy_config(i->cfg); 912 free(i, M_NAT64LSN); 913 return (0); 914 } 915 916 int 917 nat64lsn_init(struct ip_fw_chain *ch, int first) 918 { 919 920 if (first != 0) 921 nat64lsn_init_internal(); 922 V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn"); 923 if (V_nat64lsn_eid == 0) 924 return (ENXIO); 925 IPFW_ADD_SOPT_HANDLER(first, scodes); 926 IPFW_ADD_OBJ_REWRITER(first, opcodes); 927 return (0); 928 } 929 930 void 931 nat64lsn_uninit(struct ip_fw_chain *ch, int last) 932 { 933 934 IPFW_DEL_OBJ_REWRITER(last, opcodes); 935 IPFW_DEL_SOPT_HANDLER(last, scodes); 936 ipfw_del_eaction(ch, V_nat64lsn_eid); 937 /* 938 * Since we already have deregistered external action, 939 * our named objects become unaccessible via rules, because 940 * all rules were truncated by ipfw_del_eaction(). 941 * So, we can unlink and destroy our named objects without holding 942 * IPFW_WLOCK(). 943 */ 944 IPFW_UH_WLOCK(ch); 945 ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, 946 IPFW_TLV_NAT64LSN_NAME); 947 V_nat64lsn_eid = 0; 948 IPFW_UH_WUNLOCK(ch); 949 if (last != 0) 950 nat64lsn_uninit_internal(); 951 } 952