1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. 5 * Copyright (c) 2014 Yandex LLC 6 * Copyright (c) 2014 Alexander V. Chernikov 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 /* 34 * Lookup table support for ipfw. 35 * 36 * This file contains handlers for all generic tables' operations: 37 * add/del/flush entries, list/dump tables etc.. 38 * 39 * Table data modification is protected by both UH and runtime lock 40 * while reading configuration/data is protected by UH lock. 41 * 42 * Lookup algorithms for all table types are located in ip_fw_table_algo.c 43 */ 44 45 #include "opt_ipfw.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/rwlock.h> 53 #include <sys/rmlock.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/queue.h> 57 #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ 58 59 #include <netinet/in.h> 60 #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ 61 #include <netinet/ip_fw.h> 62 63 #include <netpfil/ipfw/ip_fw_private.h> 64 #include <netpfil/ipfw/ip_fw_table.h> 65 66 /* 67 * Table has the following `type` concepts: 68 * 69 * `no.type` represents lookup key type (addr, ifp, uid, etc..) 70 * vmask represents bitmask of table values which are present at the moment. 71 * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old 72 * single-value-for-all approach. 73 */ 74 struct table_config { 75 struct named_object no; 76 uint8_t tflags; /* type flags */ 77 uint8_t locked; /* 1 if locked from changes */ 78 uint8_t linked; /* 1 if already linked */ 79 uint8_t ochanged; /* used by set swapping */ 80 uint8_t vshared; /* 1 if using shared value array */ 81 uint8_t spare[3]; 82 uint32_t count; /* Number of records */ 83 uint32_t limit; /* Max number of records */ 84 uint32_t vmask; /* bitmask with supported values */ 85 uint32_t ocount; /* used by set swapping */ 86 uint64_t gencnt; /* generation count */ 87 char tablename[64]; /* table name */ 88 struct table_algo *ta; /* Callbacks for given algo */ 89 void *astate; /* algorithm state */ 90 struct table_info ti_copy; /* data to put to table_info */ 91 struct namedobj_instance *vi; 92 }; 93 94 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, 95 struct table_config **tc); 96 static struct table_config *find_table(struct namedobj_instance *ni, 97 struct tid_info *ti); 98 static struct table_config *alloc_table_config(struct ip_fw_chain *ch, 99 struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags); 100 static void free_table_config(struct namedobj_instance *ni, 101 struct table_config *tc); 102 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, 103 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref); 104 static void link_table(struct ip_fw_chain *ch, struct table_config *tc); 105 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); 106 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, 107 struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc); 108 #define OP_ADD 1 109 #define OP_DEL 0 110 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, 111 struct sockopt_data *sd); 112 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, 113 ipfw_xtable_info *i); 114 static int dump_table_tentry(void *e, void *arg); 115 static int dump_table_xentry(void *e, void *arg); 116 117 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, 118 struct tid_info *b); 119 120 static int check_table_name(const char *name); 121 static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, 122 struct table_config *tc, struct table_info *ti, uint32_t count); 123 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); 124 125 static struct table_algo *find_table_algo(struct tables_config *tableconf, 126 struct tid_info *ti, char *name); 127 128 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); 129 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); 130 131 #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) 132 #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) 133 134 #define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ 135 136 void 137 rollback_toperation_state(struct ip_fw_chain *ch, void *object) 138 { 139 struct tables_config *tcfg; 140 struct op_state *os; 141 142 tcfg = CHAIN_TO_TCFG(ch); 143 TAILQ_FOREACH(os, &tcfg->state_list, next) 144 os->func(object, os); 145 } 146 147 void 148 add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) 149 { 150 struct tables_config *tcfg; 151 152 tcfg = CHAIN_TO_TCFG(ch); 153 TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); 154 } 155 156 void 157 del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) 158 { 159 struct tables_config *tcfg; 160 161 tcfg = CHAIN_TO_TCFG(ch); 162 TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); 163 } 164 165 void 166 tc_ref(struct table_config *tc) 167 { 168 169 tc->no.refcnt++; 170 } 171 172 void 173 tc_unref(struct table_config *tc) 174 { 175 176 tc->no.refcnt--; 177 } 178 179 static struct table_value * 180 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) 181 { 182 struct table_value *pval; 183 184 pval = (struct table_value *)ch->valuestate; 185 186 return (&pval[kidx]); 187 } 188 189 190 /* 191 * Checks if we're able to insert/update entry @tei into table 192 * w.r.t @tc limits. 193 * May alter @tei to indicate insertion error / insert 194 * options. 195 * 196 * Returns 0 if operation can be performed/ 197 */ 198 static int 199 check_table_limit(struct table_config *tc, struct tentry_info *tei) 200 { 201 202 if (tc->limit == 0 || tc->count < tc->limit) 203 return (0); 204 205 if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { 206 /* Notify userland on error cause */ 207 tei->flags |= TEI_FLAGS_LIMIT; 208 return (EFBIG); 209 } 210 211 /* 212 * We have UPDATE flag set. 213 * Permit updating record (if found), 214 * but restrict adding new one since we've 215 * already hit the limit. 216 */ 217 tei->flags |= TEI_FLAGS_DONTADD; 218 219 return (0); 220 } 221 222 /* 223 * Convert algorithm callback return code into 224 * one of pre-defined states known by userland. 225 */ 226 static void 227 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num) 228 { 229 int flag; 230 231 flag = 0; 232 233 switch (error) { 234 case 0: 235 if (op == OP_ADD && num != 0) 236 flag = TEI_FLAGS_ADDED; 237 if (op == OP_DEL) 238 flag = TEI_FLAGS_DELETED; 239 break; 240 case ENOENT: 241 flag = TEI_FLAGS_NOTFOUND; 242 break; 243 case EEXIST: 244 flag = TEI_FLAGS_EXISTS; 245 break; 246 default: 247 flag = TEI_FLAGS_ERROR; 248 } 249 250 tei->flags |= flag; 251 } 252 253 /* 254 * Creates and references table with default parameters. 255 * Saves table config, algo and allocated kidx info @ptc, @pta and 256 * @pkidx if non-zero. 257 * Used for table auto-creation to support old binaries. 258 * 259 * Returns 0 on success. 260 */ 261 static int 262 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, 263 uint16_t *pkidx) 264 { 265 ipfw_xtable_info xi; 266 int error; 267 268 memset(&xi, 0, sizeof(xi)); 269 /* Set default value mask for legacy clients */ 270 xi.vmask = IPFW_VTYPE_LEGACY; 271 272 error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1); 273 if (error != 0) 274 return (error); 275 276 return (0); 277 } 278 279 /* 280 * Find and reference existing table optionally 281 * creating new one. 282 * 283 * Saves found table config into @ptc. 284 * Note function may drop/acquire UH_WLOCK. 285 * Returns 0 if table was found/created and referenced 286 * or non-zero return code. 287 */ 288 static int 289 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, 290 struct tentry_info *tei, uint32_t count, int op, 291 struct table_config **ptc) 292 { 293 struct namedobj_instance *ni; 294 struct table_config *tc; 295 uint16_t kidx; 296 int error; 297 298 IPFW_UH_WLOCK_ASSERT(ch); 299 300 ni = CHAIN_TO_NI(ch); 301 tc = NULL; 302 if ((tc = find_table(ni, ti)) != NULL) { 303 /* check table type */ 304 if (tc->no.subtype != ti->type) 305 return (EINVAL); 306 307 if (tc->locked != 0) 308 return (EACCES); 309 310 /* Try to exit early on limit hit */ 311 if (op == OP_ADD && count == 1 && 312 check_table_limit(tc, tei) != 0) 313 return (EFBIG); 314 315 /* Reference and return */ 316 tc->no.refcnt++; 317 *ptc = tc; 318 return (0); 319 } 320 321 if (op == OP_DEL) 322 return (ESRCH); 323 324 /* Compatibility mode: create new table for old clients */ 325 if ((tei->flags & TEI_FLAGS_COMPAT) == 0) 326 return (ESRCH); 327 328 IPFW_UH_WUNLOCK(ch); 329 error = create_table_compat(ch, ti, &kidx); 330 IPFW_UH_WLOCK(ch); 331 332 if (error != 0) 333 return (error); 334 335 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); 336 KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx)); 337 338 /* OK, now we've got referenced table. */ 339 *ptc = tc; 340 return (0); 341 } 342 343 /* 344 * Rolls back already @added to @tc entries using state array @ta_buf_m. 345 * Assume the following layout: 346 * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases 347 * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) 348 * for storing deleted state 349 */ 350 static void 351 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, 352 struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, 353 uint32_t count, uint32_t added) 354 { 355 struct table_algo *ta; 356 struct tentry_info *ptei; 357 caddr_t v, vv; 358 size_t ta_buf_sz; 359 int error, i; 360 uint32_t num; 361 362 IPFW_UH_WLOCK_ASSERT(ch); 363 364 ta = tc->ta; 365 ta_buf_sz = ta->ta_buf_size; 366 v = ta_buf_m; 367 vv = v + count * ta_buf_sz; 368 for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { 369 ptei = &tei[i]; 370 if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { 371 372 /* 373 * We have old value stored by previous 374 * call in @ptei->value. Do add once again 375 * to restore it. 376 */ 377 error = ta->add(tc->astate, tinfo, ptei, v, &num); 378 KASSERT(error == 0, ("rollback UPDATE fail")); 379 KASSERT(num == 0, ("rollback UPDATE fail2")); 380 continue; 381 } 382 383 error = ta->prepare_del(ch, ptei, vv); 384 KASSERT(error == 0, ("pre-rollback INSERT failed")); 385 error = ta->del(tc->astate, tinfo, ptei, vv, &num); 386 KASSERT(error == 0, ("rollback INSERT failed")); 387 tc->count -= num; 388 } 389 } 390 391 /* 392 * Prepares add/del state for all @count entries in @tei. 393 * Uses either stack buffer (@ta_buf) or allocates a new one. 394 * Stores pointer to allocated buffer back to @ta_buf. 395 * 396 * Returns 0 on success. 397 */ 398 static int 399 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, 400 struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf) 401 { 402 caddr_t ta_buf_m, v; 403 size_t ta_buf_sz, sz; 404 struct tentry_info *ptei; 405 int error, i; 406 407 error = 0; 408 ta_buf_sz = ta->ta_buf_size; 409 if (count == 1) { 410 /* Single add/delete, use on-stack buffer */ 411 memset(*ta_buf, 0, TA_BUF_SZ); 412 ta_buf_m = *ta_buf; 413 } else { 414 415 /* 416 * Multiple adds/deletes, allocate larger buffer 417 * 418 * Note we need 2xcount buffer for add case: 419 * we have hold both ADD state 420 * and DELETE state (this may be needed 421 * if we need to rollback all changes) 422 */ 423 sz = count * ta_buf_sz; 424 ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP, 425 M_WAITOK | M_ZERO); 426 } 427 428 v = ta_buf_m; 429 for (i = 0; i < count; i++, v += ta_buf_sz) { 430 ptei = &tei[i]; 431 error = (op == OP_ADD) ? 432 ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); 433 434 /* 435 * Some syntax error (incorrect mask, or address, or 436 * anything). Return error regardless of atomicity 437 * settings. 438 */ 439 if (error != 0) 440 break; 441 } 442 443 *ta_buf = ta_buf_m; 444 return (error); 445 } 446 447 /* 448 * Flushes allocated state for each @count entries in @tei. 449 * Frees @ta_buf_m if differs from stack buffer @ta_buf. 450 */ 451 static void 452 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, 453 struct tentry_info *tei, uint32_t count, int rollback, 454 caddr_t ta_buf_m, caddr_t ta_buf) 455 { 456 caddr_t v; 457 struct tentry_info *ptei; 458 size_t ta_buf_sz; 459 int i; 460 461 ta_buf_sz = ta->ta_buf_size; 462 463 /* Run cleaning callback anyway */ 464 v = ta_buf_m; 465 for (i = 0; i < count; i++, v += ta_buf_sz) { 466 ptei = &tei[i]; 467 ta->flush_entry(ch, ptei, v); 468 if (ptei->ptv != NULL) { 469 free(ptei->ptv, M_IPFW); 470 ptei->ptv = NULL; 471 } 472 } 473 474 /* Clean up "deleted" state in case of rollback */ 475 if (rollback != 0) { 476 v = ta_buf_m + count * ta_buf_sz; 477 for (i = 0; i < count; i++, v += ta_buf_sz) 478 ta->flush_entry(ch, &tei[i], v); 479 } 480 481 if (ta_buf_m != ta_buf) 482 free(ta_buf_m, M_TEMP); 483 } 484 485 486 static void 487 rollback_add_entry(void *object, struct op_state *_state) 488 { 489 struct ip_fw_chain *ch; 490 struct tableop_state *ts; 491 492 ts = (struct tableop_state *)_state; 493 494 if (ts->tc != object && ts->ch != object) 495 return; 496 497 ch = ts->ch; 498 499 IPFW_UH_WLOCK_ASSERT(ch); 500 501 /* Call specifid unlockers */ 502 rollback_table_values(ts); 503 504 /* Indicate we've called */ 505 ts->modified = 1; 506 } 507 508 /* 509 * Adds/updates one or more entries in table @ti. 510 * 511 * Function may drop/reacquire UH wlock multiple times due to 512 * items alloc, algorithm callbacks (check_space), value linkage 513 * (new values, value storage realloc), etc.. 514 * Other processes like other adds (which may involve storage resize), 515 * table swaps (which changes table data and may change algo type), 516 * table modify (which may change value mask) may be executed 517 * simultaneously so we need to deal with it. 518 * 519 * The following approach was implemented: 520 * we have per-chain linked list, protected with UH lock. 521 * add_table_entry prepares special on-stack structure wthich is passed 522 * to its descendants. Users add this structure to this list before unlock. 523 * After performing needed operations and acquiring UH lock back, each user 524 * checks if structure has changed. If true, it rolls local state back and 525 * returns without error to the caller. 526 * add_table_entry() on its own checks if structure has changed and restarts 527 * its operation from the beginning (goto restart). 528 * 529 * Functions which are modifying fields of interest (currently 530 * resize_shared_value_storage() and swap_tables() ) 531 * traverses given list while holding UH lock immediately before 532 * performing their operations calling function provided be list entry 533 * ( currently rollback_add_entry ) which performs rollback for all necessary 534 * state and sets appropriate values in structure indicating rollback 535 * has happened. 536 * 537 * Algo interaction: 538 * Function references @ti first to ensure table won't 539 * disappear or change its type. 540 * After that, prepare_add callback is called for each @tei entry. 541 * Next, we try to add each entry under UH+WHLOCK 542 * using add() callback. 543 * Finally, we free all state by calling flush_entry callback 544 * for each @tei. 545 * 546 * Returns 0 on success. 547 */ 548 int 549 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, 550 struct tentry_info *tei, uint8_t flags, uint32_t count) 551 { 552 struct table_config *tc; 553 struct table_algo *ta; 554 uint16_t kidx; 555 int error, first_error, i, rollback; 556 uint32_t num, numadd; 557 struct tentry_info *ptei; 558 struct tableop_state ts; 559 char ta_buf[TA_BUF_SZ]; 560 caddr_t ta_buf_m, v; 561 562 memset(&ts, 0, sizeof(ts)); 563 ta = NULL; 564 IPFW_UH_WLOCK(ch); 565 566 /* 567 * Find and reference existing table. 568 */ 569 restart: 570 if (ts.modified != 0) { 571 IPFW_UH_WUNLOCK(ch); 572 flush_batch_buffer(ch, ta, tei, count, rollback, 573 ta_buf_m, ta_buf); 574 memset(&ts, 0, sizeof(ts)); 575 ta = NULL; 576 IPFW_UH_WLOCK(ch); 577 } 578 579 error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); 580 if (error != 0) { 581 IPFW_UH_WUNLOCK(ch); 582 return (error); 583 } 584 ta = tc->ta; 585 586 /* Fill in tablestate */ 587 ts.ch = ch; 588 ts.opstate.func = rollback_add_entry; 589 ts.tc = tc; 590 ts.vshared = tc->vshared; 591 ts.vmask = tc->vmask; 592 ts.ta = ta; 593 ts.tei = tei; 594 ts.count = count; 595 rollback = 0; 596 add_toperation_state(ch, &ts); 597 IPFW_UH_WUNLOCK(ch); 598 599 /* Allocate memory and prepare record(s) */ 600 /* Pass stack buffer by default */ 601 ta_buf_m = ta_buf; 602 error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); 603 604 IPFW_UH_WLOCK(ch); 605 del_toperation_state(ch, &ts); 606 /* Drop reference we've used in first search */ 607 tc->no.refcnt--; 608 609 /* Check prepare_batch_buffer() error */ 610 if (error != 0) 611 goto cleanup; 612 613 /* 614 * Check if table swap has happened. 615 * (so table algo might be changed). 616 * Restart operation to achieve consistent behavior. 617 */ 618 if (ts.modified != 0) 619 goto restart; 620 621 /* 622 * Link all values values to shared/per-table value array. 623 * 624 * May release/reacquire UH_WLOCK. 625 */ 626 error = ipfw_link_table_values(ch, &ts, flags); 627 if (error != 0) 628 goto cleanup; 629 if (ts.modified != 0) 630 goto restart; 631 632 /* 633 * Ensure we are able to add all entries without additional 634 * memory allocations. May release/reacquire UH_WLOCK. 635 */ 636 kidx = tc->no.kidx; 637 error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); 638 if (error != 0) 639 goto cleanup; 640 if (ts.modified != 0) 641 goto restart; 642 643 /* We've got valid table in @tc. Let's try to add data */ 644 kidx = tc->no.kidx; 645 ta = tc->ta; 646 numadd = 0; 647 first_error = 0; 648 649 IPFW_WLOCK(ch); 650 651 v = ta_buf_m; 652 for (i = 0; i < count; i++, v += ta->ta_buf_size) { 653 ptei = &tei[i]; 654 num = 0; 655 /* check limit before adding */ 656 if ((error = check_table_limit(tc, ptei)) == 0) { 657 /* 658 * It should be safe to insert a record w/o 659 * a properly-linked value if atomicity is 660 * not required. 661 * 662 * If the added item does not have a valid value 663 * index, it would get rejected by ta->add(). 664 * */ 665 error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), 666 ptei, v, &num); 667 /* Set status flag to inform userland */ 668 store_tei_result(ptei, OP_ADD, error, num); 669 } 670 if (error == 0) { 671 /* Update number of records to ease limit checking */ 672 tc->count += num; 673 numadd += num; 674 continue; 675 } 676 677 if (first_error == 0) 678 first_error = error; 679 680 /* 681 * Some error have happened. Check our atomicity 682 * settings: continue if atomicity is not required, 683 * rollback changes otherwise. 684 */ 685 if ((flags & IPFW_CTF_ATOMIC) == 0) 686 continue; 687 688 rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), 689 tei, ta_buf_m, count, i); 690 691 rollback = 1; 692 break; 693 } 694 695 IPFW_WUNLOCK(ch); 696 697 ipfw_garbage_table_values(ch, tc, tei, count, rollback); 698 699 /* Permit post-add algorithm grow/rehash. */ 700 if (numadd != 0) 701 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); 702 703 /* Return first error to user, if any */ 704 error = first_error; 705 706 cleanup: 707 IPFW_UH_WUNLOCK(ch); 708 709 flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); 710 711 return (error); 712 } 713 714 /* 715 * Deletes one or more entries in table @ti. 716 * 717 * Returns 0 on success. 718 */ 719 int 720 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, 721 struct tentry_info *tei, uint8_t flags, uint32_t count) 722 { 723 struct table_config *tc; 724 struct table_algo *ta; 725 struct tentry_info *ptei; 726 uint16_t kidx; 727 int error, first_error, i; 728 uint32_t num, numdel; 729 char ta_buf[TA_BUF_SZ]; 730 caddr_t ta_buf_m, v; 731 732 /* 733 * Find and reference existing table. 734 */ 735 IPFW_UH_WLOCK(ch); 736 error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc); 737 if (error != 0) { 738 IPFW_UH_WUNLOCK(ch); 739 return (error); 740 } 741 ta = tc->ta; 742 IPFW_UH_WUNLOCK(ch); 743 744 /* Allocate memory and prepare record(s) */ 745 /* Pass stack buffer by default */ 746 ta_buf_m = ta_buf; 747 error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m); 748 if (error != 0) 749 goto cleanup; 750 751 IPFW_UH_WLOCK(ch); 752 753 /* Drop reference we've used in first search */ 754 tc->no.refcnt--; 755 756 /* 757 * Check if table algo is still the same. 758 * (changed ta may be the result of table swap). 759 */ 760 if (ta != tc->ta) { 761 IPFW_UH_WUNLOCK(ch); 762 error = EINVAL; 763 goto cleanup; 764 } 765 766 kidx = tc->no.kidx; 767 numdel = 0; 768 first_error = 0; 769 770 IPFW_WLOCK(ch); 771 v = ta_buf_m; 772 for (i = 0; i < count; i++, v += ta->ta_buf_size) { 773 ptei = &tei[i]; 774 num = 0; 775 error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, 776 &num); 777 /* Save state for userland */ 778 store_tei_result(ptei, OP_DEL, error, num); 779 if (error != 0 && first_error == 0) 780 first_error = error; 781 tc->count -= num; 782 numdel += num; 783 } 784 IPFW_WUNLOCK(ch); 785 786 /* Unlink non-used values */ 787 ipfw_garbage_table_values(ch, tc, tei, count, 0); 788 789 if (numdel != 0) { 790 /* Run post-del hook to permit shrinking */ 791 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); 792 } 793 794 IPFW_UH_WUNLOCK(ch); 795 796 /* Return first error to user, if any */ 797 error = first_error; 798 799 cleanup: 800 flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf); 801 802 return (error); 803 } 804 805 /* 806 * Ensure that table @tc has enough space to add @count entries without 807 * need for reallocation. 808 * 809 * Callbacks order: 810 * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. 811 * 812 * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. 813 * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage 814 * 3) modify (UH_WLOCK + WLOCK) - switch pointers 815 * 4) flush_modify (UH_WLOCK) - free state, if needed 816 * 817 * Returns 0 on success. 818 */ 819 static int 820 check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, 821 struct table_config *tc, struct table_info *ti, uint32_t count) 822 { 823 struct table_algo *ta; 824 uint64_t pflags; 825 char ta_buf[TA_BUF_SZ]; 826 int error; 827 828 IPFW_UH_WLOCK_ASSERT(ch); 829 830 error = 0; 831 ta = tc->ta; 832 if (ta->need_modify == NULL) 833 return (0); 834 835 /* Acquire reference not to loose @tc between locks/unlocks */ 836 tc->no.refcnt++; 837 838 /* 839 * TODO: think about avoiding race between large add/large delete 840 * operation on algorithm which implements shrinking along with 841 * growing. 842 */ 843 while (true) { 844 pflags = 0; 845 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { 846 error = 0; 847 break; 848 } 849 850 /* We have to shrink/grow table */ 851 if (ts != NULL) 852 add_toperation_state(ch, ts); 853 IPFW_UH_WUNLOCK(ch); 854 855 memset(&ta_buf, 0, sizeof(ta_buf)); 856 error = ta->prepare_mod(ta_buf, &pflags); 857 858 IPFW_UH_WLOCK(ch); 859 if (ts != NULL) 860 del_toperation_state(ch, ts); 861 862 if (error != 0) 863 break; 864 865 if (ts != NULL && ts->modified != 0) { 866 867 /* 868 * Swap operation has happened 869 * so we're currently operating on other 870 * table data. Stop doing this. 871 */ 872 ta->flush_mod(ta_buf); 873 break; 874 } 875 876 /* Check if we still need to alter table */ 877 ti = KIDX_TO_TI(ch, tc->no.kidx); 878 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { 879 IPFW_UH_WUNLOCK(ch); 880 881 /* 882 * Other thread has already performed resize. 883 * Flush our state and return. 884 */ 885 ta->flush_mod(ta_buf); 886 break; 887 } 888 889 error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); 890 if (error == 0) { 891 /* Do actual modification */ 892 IPFW_WLOCK(ch); 893 ta->modify(tc->astate, ti, ta_buf, pflags); 894 IPFW_WUNLOCK(ch); 895 } 896 897 /* Anyway, flush data and retry */ 898 ta->flush_mod(ta_buf); 899 } 900 901 tc->no.refcnt--; 902 return (error); 903 } 904 905 /* 906 * Adds or deletes record in table. 907 * Data layout (v0): 908 * Request: [ ip_fw3_opheader ipfw_table_xentry ] 909 * 910 * Returns 0 on success 911 */ 912 static int 913 manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 914 struct sockopt_data *sd) 915 { 916 ipfw_table_xentry *xent; 917 struct tentry_info tei; 918 struct tid_info ti; 919 struct table_value v; 920 int error, hdrlen, read; 921 922 hdrlen = offsetof(ipfw_table_xentry, k); 923 924 /* Check minimum header size */ 925 if (sd->valsize < (sizeof(*op3) + hdrlen)) 926 return (EINVAL); 927 928 read = sizeof(ip_fw3_opheader); 929 930 /* Check if xentry len field is valid */ 931 xent = (ipfw_table_xentry *)(op3 + 1); 932 if (xent->len < hdrlen || xent->len + read > sd->valsize) 933 return (EINVAL); 934 935 memset(&tei, 0, sizeof(tei)); 936 tei.paddr = &xent->k; 937 tei.masklen = xent->masklen; 938 ipfw_import_table_value_legacy(xent->value, &v); 939 tei.pvalue = &v; 940 /* Old requests compatibility */ 941 tei.flags = TEI_FLAGS_COMPAT; 942 if (xent->type == IPFW_TABLE_ADDR) { 943 if (xent->len - hdrlen == sizeof(in_addr_t)) 944 tei.subtype = AF_INET; 945 else 946 tei.subtype = AF_INET6; 947 } 948 949 memset(&ti, 0, sizeof(ti)); 950 ti.uidx = xent->tbl; 951 ti.type = xent->type; 952 953 error = (op3->opcode == IP_FW_TABLE_XADD) ? 954 add_table_entry(ch, &ti, &tei, 0, 1) : 955 del_table_entry(ch, &ti, &tei, 0, 1); 956 957 return (error); 958 } 959 960 /* 961 * Adds or deletes record in table. 962 * Data layout (v1)(current): 963 * Request: [ ipfw_obj_header 964 * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] 965 * ] 966 * 967 * Returns 0 on success 968 */ 969 static int 970 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 971 struct sockopt_data *sd) 972 { 973 ipfw_obj_tentry *tent, *ptent; 974 ipfw_obj_ctlv *ctlv; 975 ipfw_obj_header *oh; 976 struct tentry_info *ptei, tei, *tei_buf; 977 struct tid_info ti; 978 int error, i, kidx, read; 979 980 /* Check minimum header size */ 981 if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) 982 return (EINVAL); 983 984 /* Check if passed data is too long */ 985 if (sd->valsize != sd->kavail) 986 return (EINVAL); 987 988 oh = (ipfw_obj_header *)sd->kbuf; 989 990 /* Basic length checks for TLVs */ 991 if (oh->ntlv.head.length != sizeof(oh->ntlv)) 992 return (EINVAL); 993 994 read = sizeof(*oh); 995 996 ctlv = (ipfw_obj_ctlv *)(oh + 1); 997 if (ctlv->head.length + read != sd->valsize) 998 return (EINVAL); 999 1000 read += sizeof(*ctlv); 1001 tent = (ipfw_obj_tentry *)(ctlv + 1); 1002 if (ctlv->count * sizeof(*tent) + read != sd->valsize) 1003 return (EINVAL); 1004 1005 if (ctlv->count == 0) 1006 return (0); 1007 1008 /* 1009 * Mark entire buffer as "read". 1010 * This instructs sopt api write it back 1011 * after function return. 1012 */ 1013 ipfw_get_sopt_header(sd, sd->valsize); 1014 1015 /* Perform basic checks for each entry */ 1016 ptent = tent; 1017 kidx = tent->idx; 1018 for (i = 0; i < ctlv->count; i++, ptent++) { 1019 if (ptent->head.length != sizeof(*ptent)) 1020 return (EINVAL); 1021 if (ptent->idx != kidx) 1022 return (ENOTSUP); 1023 } 1024 1025 /* Convert data into kernel request objects */ 1026 objheader_to_ti(oh, &ti); 1027 ti.type = oh->ntlv.type; 1028 ti.uidx = kidx; 1029 1030 /* Use on-stack buffer for single add/del */ 1031 if (ctlv->count == 1) { 1032 memset(&tei, 0, sizeof(tei)); 1033 tei_buf = &tei; 1034 } else 1035 tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, 1036 M_WAITOK | M_ZERO); 1037 1038 ptei = tei_buf; 1039 ptent = tent; 1040 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { 1041 ptei->paddr = &ptent->k; 1042 ptei->subtype = ptent->subtype; 1043 ptei->masklen = ptent->masklen; 1044 if (ptent->head.flags & IPFW_TF_UPDATE) 1045 ptei->flags |= TEI_FLAGS_UPDATE; 1046 1047 ipfw_import_table_value_v1(&ptent->v.value); 1048 ptei->pvalue = (struct table_value *)&ptent->v.value; 1049 } 1050 1051 error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? 1052 add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : 1053 del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); 1054 1055 /* Translate result back to userland */ 1056 ptei = tei_buf; 1057 ptent = tent; 1058 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { 1059 if (ptei->flags & TEI_FLAGS_ADDED) 1060 ptent->result = IPFW_TR_ADDED; 1061 else if (ptei->flags & TEI_FLAGS_DELETED) 1062 ptent->result = IPFW_TR_DELETED; 1063 else if (ptei->flags & TEI_FLAGS_UPDATED) 1064 ptent->result = IPFW_TR_UPDATED; 1065 else if (ptei->flags & TEI_FLAGS_LIMIT) 1066 ptent->result = IPFW_TR_LIMIT; 1067 else if (ptei->flags & TEI_FLAGS_ERROR) 1068 ptent->result = IPFW_TR_ERROR; 1069 else if (ptei->flags & TEI_FLAGS_NOTFOUND) 1070 ptent->result = IPFW_TR_NOTFOUND; 1071 else if (ptei->flags & TEI_FLAGS_EXISTS) 1072 ptent->result = IPFW_TR_EXISTS; 1073 ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value); 1074 } 1075 1076 if (tei_buf != &tei) 1077 free(tei_buf, M_TEMP); 1078 1079 return (error); 1080 } 1081 1082 /* 1083 * Looks up an entry in given table. 1084 * Data layout (v0)(current): 1085 * Request: [ ipfw_obj_header ipfw_obj_tentry ] 1086 * Reply: [ ipfw_obj_header ipfw_obj_tentry ] 1087 * 1088 * Returns 0 on success 1089 */ 1090 static int 1091 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1092 struct sockopt_data *sd) 1093 { 1094 ipfw_obj_tentry *tent; 1095 ipfw_obj_header *oh; 1096 struct tid_info ti; 1097 struct table_config *tc; 1098 struct table_algo *ta; 1099 struct table_info *kti; 1100 struct table_value *pval; 1101 struct namedobj_instance *ni; 1102 int error; 1103 size_t sz; 1104 1105 /* Check minimum header size */ 1106 sz = sizeof(*oh) + sizeof(*tent); 1107 if (sd->valsize != sz) 1108 return (EINVAL); 1109 1110 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 1111 tent = (ipfw_obj_tentry *)(oh + 1); 1112 1113 /* Basic length checks for TLVs */ 1114 if (oh->ntlv.head.length != sizeof(oh->ntlv)) 1115 return (EINVAL); 1116 1117 objheader_to_ti(oh, &ti); 1118 ti.type = oh->ntlv.type; 1119 ti.uidx = tent->idx; 1120 1121 IPFW_UH_RLOCK(ch); 1122 ni = CHAIN_TO_NI(ch); 1123 1124 /* 1125 * Find existing table and check its type . 1126 */ 1127 ta = NULL; 1128 if ((tc = find_table(ni, &ti)) == NULL) { 1129 IPFW_UH_RUNLOCK(ch); 1130 return (ESRCH); 1131 } 1132 1133 /* check table type */ 1134 if (tc->no.subtype != ti.type) { 1135 IPFW_UH_RUNLOCK(ch); 1136 return (EINVAL); 1137 } 1138 1139 kti = KIDX_TO_TI(ch, tc->no.kidx); 1140 ta = tc->ta; 1141 1142 if (ta->find_tentry == NULL) 1143 return (ENOTSUP); 1144 1145 error = ta->find_tentry(tc->astate, kti, tent); 1146 if (error == 0) { 1147 pval = get_table_value(ch, tc, tent->v.kidx); 1148 ipfw_export_table_value_v1(pval, &tent->v.value); 1149 } 1150 IPFW_UH_RUNLOCK(ch); 1151 1152 return (error); 1153 } 1154 1155 /* 1156 * Flushes all entries or destroys given table. 1157 * Data layout (v0)(current): 1158 * Request: [ ipfw_obj_header ] 1159 * 1160 * Returns 0 on success 1161 */ 1162 static int 1163 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1164 struct sockopt_data *sd) 1165 { 1166 int error; 1167 struct _ipfw_obj_header *oh; 1168 struct tid_info ti; 1169 1170 if (sd->valsize != sizeof(*oh)) 1171 return (EINVAL); 1172 1173 oh = (struct _ipfw_obj_header *)op3; 1174 objheader_to_ti(oh, &ti); 1175 1176 if (op3->opcode == IP_FW_TABLE_XDESTROY) 1177 error = destroy_table(ch, &ti); 1178 else if (op3->opcode == IP_FW_TABLE_XFLUSH) 1179 error = flush_table(ch, &ti); 1180 else 1181 return (ENOTSUP); 1182 1183 return (error); 1184 } 1185 1186 static void 1187 restart_flush(void *object, struct op_state *_state) 1188 { 1189 struct tableop_state *ts; 1190 1191 ts = (struct tableop_state *)_state; 1192 1193 if (ts->tc != object) 1194 return; 1195 1196 /* Indicate we've called */ 1197 ts->modified = 1; 1198 } 1199 1200 /* 1201 * Flushes given table. 1202 * 1203 * Function create new table instance with the same 1204 * parameters, swaps it with old one and 1205 * flushes state without holding runtime WLOCK. 1206 * 1207 * Returns 0 on success. 1208 */ 1209 int 1210 flush_table(struct ip_fw_chain *ch, struct tid_info *ti) 1211 { 1212 struct namedobj_instance *ni; 1213 struct table_config *tc; 1214 struct table_algo *ta; 1215 struct table_info ti_old, ti_new, *tablestate; 1216 void *astate_old, *astate_new; 1217 char algostate[64], *pstate; 1218 struct tableop_state ts; 1219 int error, need_gc; 1220 uint16_t kidx; 1221 uint8_t tflags; 1222 1223 /* 1224 * Stage 1: save table algorithm. 1225 * Reference found table to ensure it won't disappear. 1226 */ 1227 IPFW_UH_WLOCK(ch); 1228 ni = CHAIN_TO_NI(ch); 1229 if ((tc = find_table(ni, ti)) == NULL) { 1230 IPFW_UH_WUNLOCK(ch); 1231 return (ESRCH); 1232 } 1233 need_gc = 0; 1234 astate_new = NULL; 1235 memset(&ti_new, 0, sizeof(ti_new)); 1236 restart: 1237 /* Set up swap handler */ 1238 memset(&ts, 0, sizeof(ts)); 1239 ts.opstate.func = restart_flush; 1240 ts.tc = tc; 1241 1242 ta = tc->ta; 1243 /* Do not flush readonly tables */ 1244 if ((ta->flags & TA_FLAG_READONLY) != 0) { 1245 IPFW_UH_WUNLOCK(ch); 1246 return (EACCES); 1247 } 1248 /* Save startup algo parameters */ 1249 if (ta->print_config != NULL) { 1250 ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), 1251 algostate, sizeof(algostate)); 1252 pstate = algostate; 1253 } else 1254 pstate = NULL; 1255 tflags = tc->tflags; 1256 tc->no.refcnt++; 1257 add_toperation_state(ch, &ts); 1258 IPFW_UH_WUNLOCK(ch); 1259 1260 /* 1261 * Stage 1.5: if this is not the first attempt, destroy previous state 1262 */ 1263 if (need_gc != 0) { 1264 ta->destroy(astate_new, &ti_new); 1265 need_gc = 0; 1266 } 1267 1268 /* 1269 * Stage 2: allocate new table instance using same algo. 1270 */ 1271 memset(&ti_new, 0, sizeof(struct table_info)); 1272 error = ta->init(ch, &astate_new, &ti_new, pstate, tflags); 1273 1274 /* 1275 * Stage 3: swap old state pointers with newly-allocated ones. 1276 * Decrease refcount. 1277 */ 1278 IPFW_UH_WLOCK(ch); 1279 tc->no.refcnt--; 1280 del_toperation_state(ch, &ts); 1281 1282 if (error != 0) { 1283 IPFW_UH_WUNLOCK(ch); 1284 return (error); 1285 } 1286 1287 /* 1288 * Restart operation if table swap has happened: 1289 * even if algo may be the same, algo init parameters 1290 * may change. Restart operation instead of doing 1291 * complex checks. 1292 */ 1293 if (ts.modified != 0) { 1294 /* Delay destroying data since we're holding UH lock */ 1295 need_gc = 1; 1296 goto restart; 1297 } 1298 1299 ni = CHAIN_TO_NI(ch); 1300 kidx = tc->no.kidx; 1301 tablestate = (struct table_info *)ch->tablestate; 1302 1303 IPFW_WLOCK(ch); 1304 ti_old = tablestate[kidx]; 1305 tablestate[kidx] = ti_new; 1306 IPFW_WUNLOCK(ch); 1307 1308 astate_old = tc->astate; 1309 tc->astate = astate_new; 1310 tc->ti_copy = ti_new; 1311 tc->count = 0; 1312 1313 /* Notify algo on real @ti address */ 1314 if (ta->change_ti != NULL) 1315 ta->change_ti(tc->astate, &tablestate[kidx]); 1316 1317 /* 1318 * Stage 4: unref values. 1319 */ 1320 ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old); 1321 IPFW_UH_WUNLOCK(ch); 1322 1323 /* 1324 * Stage 5: perform real flush/destroy. 1325 */ 1326 ta->destroy(astate_old, &ti_old); 1327 1328 return (0); 1329 } 1330 1331 /* 1332 * Swaps two tables. 1333 * Data layout (v0)(current): 1334 * Request: [ ipfw_obj_header ipfw_obj_ntlv ] 1335 * 1336 * Returns 0 on success 1337 */ 1338 static int 1339 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1340 struct sockopt_data *sd) 1341 { 1342 int error; 1343 struct _ipfw_obj_header *oh; 1344 struct tid_info ti_a, ti_b; 1345 1346 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) 1347 return (EINVAL); 1348 1349 oh = (struct _ipfw_obj_header *)op3; 1350 ntlv_to_ti(&oh->ntlv, &ti_a); 1351 ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); 1352 1353 error = swap_tables(ch, &ti_a, &ti_b); 1354 1355 return (error); 1356 } 1357 1358 /* 1359 * Swaps two tables of the same type/valtype. 1360 * 1361 * Checks if tables are compatible and limits 1362 * permits swap, than actually perform swap. 1363 * 1364 * Each table consists of 2 different parts: 1365 * config: 1366 * @tc (with name, set, kidx) and rule bindings, which is "stable". 1367 * number of items 1368 * table algo 1369 * runtime: 1370 * runtime data @ti (ch->tablestate) 1371 * runtime cache in @tc 1372 * algo-specific data (@tc->astate) 1373 * 1374 * So we switch: 1375 * all runtime data 1376 * number of items 1377 * table algo 1378 * 1379 * After that we call @ti change handler for each table. 1380 * 1381 * Note that referencing @tc won't protect tc->ta from change. 1382 * XXX: Do we need to restrict swap between locked tables? 1383 * XXX: Do we need to exchange ftype? 1384 * 1385 * Returns 0 on success. 1386 */ 1387 static int 1388 swap_tables(struct ip_fw_chain *ch, struct tid_info *a, 1389 struct tid_info *b) 1390 { 1391 struct namedobj_instance *ni; 1392 struct table_config *tc_a, *tc_b; 1393 struct table_algo *ta; 1394 struct table_info ti, *tablestate; 1395 void *astate; 1396 uint32_t count; 1397 1398 /* 1399 * Stage 1: find both tables and ensure they are of 1400 * the same type. 1401 */ 1402 IPFW_UH_WLOCK(ch); 1403 ni = CHAIN_TO_NI(ch); 1404 if ((tc_a = find_table(ni, a)) == NULL) { 1405 IPFW_UH_WUNLOCK(ch); 1406 return (ESRCH); 1407 } 1408 if ((tc_b = find_table(ni, b)) == NULL) { 1409 IPFW_UH_WUNLOCK(ch); 1410 return (ESRCH); 1411 } 1412 1413 /* It is very easy to swap between the same table */ 1414 if (tc_a == tc_b) { 1415 IPFW_UH_WUNLOCK(ch); 1416 return (0); 1417 } 1418 1419 /* Check type and value are the same */ 1420 if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) { 1421 IPFW_UH_WUNLOCK(ch); 1422 return (EINVAL); 1423 } 1424 1425 /* Check limits before swap */ 1426 if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || 1427 (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { 1428 IPFW_UH_WUNLOCK(ch); 1429 return (EFBIG); 1430 } 1431 1432 /* Check if one of the tables is readonly */ 1433 if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) { 1434 IPFW_UH_WUNLOCK(ch); 1435 return (EACCES); 1436 } 1437 1438 /* Notify we're going to swap */ 1439 rollback_toperation_state(ch, tc_a); 1440 rollback_toperation_state(ch, tc_b); 1441 1442 /* Everything is fine, prepare to swap */ 1443 tablestate = (struct table_info *)ch->tablestate; 1444 ti = tablestate[tc_a->no.kidx]; 1445 ta = tc_a->ta; 1446 astate = tc_a->astate; 1447 count = tc_a->count; 1448 1449 IPFW_WLOCK(ch); 1450 /* a <- b */ 1451 tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; 1452 tc_a->ta = tc_b->ta; 1453 tc_a->astate = tc_b->astate; 1454 tc_a->count = tc_b->count; 1455 /* b <- a */ 1456 tablestate[tc_b->no.kidx] = ti; 1457 tc_b->ta = ta; 1458 tc_b->astate = astate; 1459 tc_b->count = count; 1460 IPFW_WUNLOCK(ch); 1461 1462 /* Ensure tc.ti copies are in sync */ 1463 tc_a->ti_copy = tablestate[tc_a->no.kidx]; 1464 tc_b->ti_copy = tablestate[tc_b->no.kidx]; 1465 1466 /* Notify both tables on @ti change */ 1467 if (tc_a->ta->change_ti != NULL) 1468 tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); 1469 if (tc_b->ta->change_ti != NULL) 1470 tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); 1471 1472 IPFW_UH_WUNLOCK(ch); 1473 1474 return (0); 1475 } 1476 1477 /* 1478 * Destroys table specified by @ti. 1479 * Data layout (v0)(current): 1480 * Request: [ ip_fw3_opheader ] 1481 * 1482 * Returns 0 on success 1483 */ 1484 static int 1485 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) 1486 { 1487 struct namedobj_instance *ni; 1488 struct table_config *tc; 1489 1490 IPFW_UH_WLOCK(ch); 1491 1492 ni = CHAIN_TO_NI(ch); 1493 if ((tc = find_table(ni, ti)) == NULL) { 1494 IPFW_UH_WUNLOCK(ch); 1495 return (ESRCH); 1496 } 1497 1498 /* Do not permit destroying referenced tables */ 1499 if (tc->no.refcnt > 0) { 1500 IPFW_UH_WUNLOCK(ch); 1501 return (EBUSY); 1502 } 1503 1504 IPFW_WLOCK(ch); 1505 unlink_table(ch, tc); 1506 IPFW_WUNLOCK(ch); 1507 1508 /* Free obj index */ 1509 if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) 1510 printf("Error unlinking kidx %d from table %s\n", 1511 tc->no.kidx, tc->tablename); 1512 1513 /* Unref values used in tables while holding UH lock */ 1514 ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy); 1515 IPFW_UH_WUNLOCK(ch); 1516 1517 free_table_config(ni, tc); 1518 1519 return (0); 1520 } 1521 1522 static uint32_t 1523 roundup2p(uint32_t v) 1524 { 1525 1526 v--; 1527 v |= v >> 1; 1528 v |= v >> 2; 1529 v |= v >> 4; 1530 v |= v >> 8; 1531 v |= v >> 16; 1532 v++; 1533 1534 return (v); 1535 } 1536 1537 /* 1538 * Grow tables index. 1539 * 1540 * Returns 0 on success. 1541 */ 1542 int 1543 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) 1544 { 1545 unsigned int ntables_old, tbl; 1546 struct namedobj_instance *ni; 1547 void *new_idx, *old_tablestate, *tablestate; 1548 struct table_info *ti; 1549 struct table_config *tc; 1550 int i, new_blocks; 1551 1552 /* Check new value for validity */ 1553 if (ntables == 0) 1554 return (EINVAL); 1555 if (ntables > IPFW_TABLES_MAX) 1556 ntables = IPFW_TABLES_MAX; 1557 /* Alight to nearest power of 2 */ 1558 ntables = (unsigned int)roundup2p(ntables); 1559 1560 /* Allocate new pointers */ 1561 tablestate = malloc(ntables * sizeof(struct table_info), 1562 M_IPFW, M_WAITOK | M_ZERO); 1563 1564 ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); 1565 1566 IPFW_UH_WLOCK(ch); 1567 1568 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; 1569 ni = CHAIN_TO_NI(ch); 1570 1571 /* Temporary restrict decreasing max_tables */ 1572 if (ntables < V_fw_tables_max) { 1573 1574 /* 1575 * FIXME: Check if we really can shrink 1576 */ 1577 IPFW_UH_WUNLOCK(ch); 1578 return (EINVAL); 1579 } 1580 1581 /* Copy table info/indices */ 1582 memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); 1583 ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); 1584 1585 IPFW_WLOCK(ch); 1586 1587 /* Change pointers */ 1588 old_tablestate = ch->tablestate; 1589 ch->tablestate = tablestate; 1590 ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); 1591 1592 ntables_old = V_fw_tables_max; 1593 V_fw_tables_max = ntables; 1594 1595 IPFW_WUNLOCK(ch); 1596 1597 /* Notify all consumers that their @ti pointer has changed */ 1598 ti = (struct table_info *)ch->tablestate; 1599 for (i = 0; i < tbl; i++, ti++) { 1600 if (ti->lookup == NULL) 1601 continue; 1602 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); 1603 if (tc == NULL || tc->ta->change_ti == NULL) 1604 continue; 1605 1606 tc->ta->change_ti(tc->astate, ti); 1607 } 1608 1609 IPFW_UH_WUNLOCK(ch); 1610 1611 /* Free old pointers */ 1612 free(old_tablestate, M_IPFW); 1613 ipfw_objhash_bitmap_free(new_idx, new_blocks); 1614 1615 return (0); 1616 } 1617 1618 /* 1619 * Lookup table's named object by its @kidx. 1620 */ 1621 struct named_object * 1622 ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx) 1623 { 1624 1625 return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx)); 1626 } 1627 1628 /* 1629 * Take reference to table specified in @ntlv. 1630 * On success return its @kidx. 1631 */ 1632 int 1633 ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx) 1634 { 1635 struct tid_info ti; 1636 struct table_config *tc; 1637 int error; 1638 1639 IPFW_UH_WLOCK_ASSERT(ch); 1640 1641 ntlv_to_ti(ntlv, &ti); 1642 error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc); 1643 if (error != 0) 1644 return (error); 1645 1646 if (tc == NULL) 1647 return (ESRCH); 1648 1649 tc_ref(tc); 1650 *kidx = tc->no.kidx; 1651 1652 return (0); 1653 } 1654 1655 void 1656 ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx) 1657 { 1658 1659 struct namedobj_instance *ni; 1660 struct named_object *no; 1661 1662 IPFW_UH_WLOCK_ASSERT(ch); 1663 ni = CHAIN_TO_NI(ch); 1664 no = ipfw_objhash_lookup_kidx(ni, kidx); 1665 KASSERT(no != NULL, ("Table with index %d not found", kidx)); 1666 no->refcnt--; 1667 } 1668 1669 /* 1670 * Lookup an arbitrary key @paddr of length @plen in table @tbl. 1671 * Stores found value in @val. 1672 * 1673 * Returns 1 if key was found. 1674 */ 1675 int 1676 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, 1677 void *paddr, uint32_t *val) 1678 { 1679 struct table_info *ti; 1680 1681 ti = KIDX_TO_TI(ch, tbl); 1682 1683 return (ti->lookup(ti, paddr, plen, val)); 1684 } 1685 1686 /* 1687 * Info/List/dump support for tables. 1688 * 1689 */ 1690 1691 /* 1692 * High-level 'get' cmds sysctl handlers 1693 */ 1694 1695 /* 1696 * Lists all tables currently available in kernel. 1697 * Data layout (v0)(current): 1698 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size 1699 * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] 1700 * 1701 * Returns 0 on success 1702 */ 1703 static int 1704 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1705 struct sockopt_data *sd) 1706 { 1707 struct _ipfw_obj_lheader *olh; 1708 int error; 1709 1710 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); 1711 if (olh == NULL) 1712 return (EINVAL); 1713 if (sd->valsize < olh->size) 1714 return (EINVAL); 1715 1716 IPFW_UH_RLOCK(ch); 1717 error = export_tables(ch, olh, sd); 1718 IPFW_UH_RUNLOCK(ch); 1719 1720 return (error); 1721 } 1722 1723 /* 1724 * Store table info to buffer provided by @sd. 1725 * Data layout (v0)(current): 1726 * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] 1727 * Reply: [ ipfw_obj_header ipfw_xtable_info ] 1728 * 1729 * Returns 0 on success. 1730 */ 1731 static int 1732 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1733 struct sockopt_data *sd) 1734 { 1735 struct _ipfw_obj_header *oh; 1736 struct table_config *tc; 1737 struct tid_info ti; 1738 size_t sz; 1739 1740 sz = sizeof(*oh) + sizeof(ipfw_xtable_info); 1741 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 1742 if (oh == NULL) 1743 return (EINVAL); 1744 1745 objheader_to_ti(oh, &ti); 1746 1747 IPFW_UH_RLOCK(ch); 1748 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { 1749 IPFW_UH_RUNLOCK(ch); 1750 return (ESRCH); 1751 } 1752 1753 export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); 1754 IPFW_UH_RUNLOCK(ch); 1755 1756 return (0); 1757 } 1758 1759 /* 1760 * Modifies existing table. 1761 * Data layout (v0)(current): 1762 * Request: [ ipfw_obj_header ipfw_xtable_info ] 1763 * 1764 * Returns 0 on success 1765 */ 1766 static int 1767 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1768 struct sockopt_data *sd) 1769 { 1770 struct _ipfw_obj_header *oh; 1771 ipfw_xtable_info *i; 1772 char *tname; 1773 struct tid_info ti; 1774 struct namedobj_instance *ni; 1775 struct table_config *tc; 1776 1777 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) 1778 return (EINVAL); 1779 1780 oh = (struct _ipfw_obj_header *)sd->kbuf; 1781 i = (ipfw_xtable_info *)(oh + 1); 1782 1783 /* 1784 * Verify user-supplied strings. 1785 * Check for null-terminated/zero-length strings/ 1786 */ 1787 tname = oh->ntlv.name; 1788 if (check_table_name(tname) != 0) 1789 return (EINVAL); 1790 1791 objheader_to_ti(oh, &ti); 1792 ti.type = i->type; 1793 1794 IPFW_UH_WLOCK(ch); 1795 ni = CHAIN_TO_NI(ch); 1796 if ((tc = find_table(ni, &ti)) == NULL) { 1797 IPFW_UH_WUNLOCK(ch); 1798 return (ESRCH); 1799 } 1800 1801 /* Do not support any modifications for readonly tables */ 1802 if ((tc->ta->flags & TA_FLAG_READONLY) != 0) { 1803 IPFW_UH_WUNLOCK(ch); 1804 return (EACCES); 1805 } 1806 1807 if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) 1808 tc->limit = i->limit; 1809 if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) 1810 tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); 1811 IPFW_UH_WUNLOCK(ch); 1812 1813 return (0); 1814 } 1815 1816 /* 1817 * Creates new table. 1818 * Data layout (v0)(current): 1819 * Request: [ ipfw_obj_header ipfw_xtable_info ] 1820 * 1821 * Returns 0 on success 1822 */ 1823 static int 1824 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1825 struct sockopt_data *sd) 1826 { 1827 struct _ipfw_obj_header *oh; 1828 ipfw_xtable_info *i; 1829 char *tname, *aname; 1830 struct tid_info ti; 1831 struct namedobj_instance *ni; 1832 1833 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) 1834 return (EINVAL); 1835 1836 oh = (struct _ipfw_obj_header *)sd->kbuf; 1837 i = (ipfw_xtable_info *)(oh + 1); 1838 1839 /* 1840 * Verify user-supplied strings. 1841 * Check for null-terminated/zero-length strings/ 1842 */ 1843 tname = oh->ntlv.name; 1844 aname = i->algoname; 1845 if (check_table_name(tname) != 0 || 1846 strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) 1847 return (EINVAL); 1848 1849 if (aname[0] == '\0') { 1850 /* Use default algorithm */ 1851 aname = NULL; 1852 } 1853 1854 objheader_to_ti(oh, &ti); 1855 ti.type = i->type; 1856 1857 ni = CHAIN_TO_NI(ch); 1858 1859 IPFW_UH_RLOCK(ch); 1860 if (find_table(ni, &ti) != NULL) { 1861 IPFW_UH_RUNLOCK(ch); 1862 return (EEXIST); 1863 } 1864 IPFW_UH_RUNLOCK(ch); 1865 1866 return (create_table_internal(ch, &ti, aname, i, NULL, 0)); 1867 } 1868 1869 /* 1870 * Creates new table based on @ti and @aname. 1871 * 1872 * Assume @aname to be checked and valid. 1873 * Stores allocated table kidx inside @pkidx (if non-NULL). 1874 * Reference created table if @compat is non-zero. 1875 * 1876 * Returns 0 on success. 1877 */ 1878 static int 1879 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, 1880 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat) 1881 { 1882 struct namedobj_instance *ni; 1883 struct table_config *tc, *tc_new, *tmp; 1884 struct table_algo *ta; 1885 uint16_t kidx; 1886 1887 ni = CHAIN_TO_NI(ch); 1888 1889 ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); 1890 if (ta == NULL) 1891 return (ENOTSUP); 1892 1893 tc = alloc_table_config(ch, ti, ta, aname, i->tflags); 1894 if (tc == NULL) 1895 return (ENOMEM); 1896 1897 tc->vmask = i->vmask; 1898 tc->limit = i->limit; 1899 if (ta->flags & TA_FLAG_READONLY) 1900 tc->locked = 1; 1901 else 1902 tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; 1903 1904 IPFW_UH_WLOCK(ch); 1905 1906 /* Check if table has been already created */ 1907 tc_new = find_table(ni, ti); 1908 if (tc_new != NULL) { 1909 1910 /* 1911 * Compat: do not fail if we're 1912 * requesting to create existing table 1913 * which has the same type 1914 */ 1915 if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { 1916 IPFW_UH_WUNLOCK(ch); 1917 free_table_config(ni, tc); 1918 return (EEXIST); 1919 } 1920 1921 /* Exchange tc and tc_new for proper refcounting & freeing */ 1922 tmp = tc; 1923 tc = tc_new; 1924 tc_new = tmp; 1925 } else { 1926 /* New table */ 1927 if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { 1928 IPFW_UH_WUNLOCK(ch); 1929 printf("Unable to allocate table index." 1930 " Consider increasing net.inet.ip.fw.tables_max"); 1931 free_table_config(ni, tc); 1932 return (EBUSY); 1933 } 1934 tc->no.kidx = kidx; 1935 tc->no.etlv = IPFW_TLV_TBL_NAME; 1936 1937 link_table(ch, tc); 1938 } 1939 1940 if (compat != 0) 1941 tc->no.refcnt++; 1942 if (pkidx != NULL) 1943 *pkidx = tc->no.kidx; 1944 1945 IPFW_UH_WUNLOCK(ch); 1946 1947 if (tc_new != NULL) 1948 free_table_config(ni, tc_new); 1949 1950 return (0); 1951 } 1952 1953 static void 1954 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) 1955 { 1956 1957 memset(ti, 0, sizeof(struct tid_info)); 1958 ti->set = ntlv->set; 1959 ti->uidx = ntlv->idx; 1960 ti->tlvs = ntlv; 1961 ti->tlen = ntlv->head.length; 1962 } 1963 1964 static void 1965 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) 1966 { 1967 1968 ntlv_to_ti(&oh->ntlv, ti); 1969 } 1970 1971 struct namedobj_instance * 1972 ipfw_get_table_objhash(struct ip_fw_chain *ch) 1973 { 1974 1975 return (CHAIN_TO_NI(ch)); 1976 } 1977 1978 /* 1979 * Exports basic table info as name TLV. 1980 * Used inside dump_static_rules() to provide info 1981 * about all tables referenced by current ruleset. 1982 * 1983 * Returns 0 on success. 1984 */ 1985 int 1986 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, 1987 struct sockopt_data *sd) 1988 { 1989 struct namedobj_instance *ni; 1990 struct named_object *no; 1991 ipfw_obj_ntlv *ntlv; 1992 1993 ni = CHAIN_TO_NI(ch); 1994 1995 no = ipfw_objhash_lookup_kidx(ni, kidx); 1996 KASSERT(no != NULL, ("invalid table kidx passed")); 1997 1998 ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); 1999 if (ntlv == NULL) 2000 return (ENOMEM); 2001 2002 ntlv->head.type = IPFW_TLV_TBL_NAME; 2003 ntlv->head.length = sizeof(*ntlv); 2004 ntlv->idx = no->kidx; 2005 strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); 2006 2007 return (0); 2008 } 2009 2010 struct dump_args { 2011 struct ip_fw_chain *ch; 2012 struct table_info *ti; 2013 struct table_config *tc; 2014 struct sockopt_data *sd; 2015 uint32_t cnt; 2016 uint16_t uidx; 2017 int error; 2018 uint32_t size; 2019 ipfw_table_entry *ent; 2020 ta_foreach_f *f; 2021 void *farg; 2022 ipfw_obj_tentry tent; 2023 }; 2024 2025 static int 2026 count_ext_entries(void *e, void *arg) 2027 { 2028 struct dump_args *da; 2029 2030 da = (struct dump_args *)arg; 2031 da->cnt++; 2032 2033 return (0); 2034 } 2035 2036 /* 2037 * Gets number of items from table either using 2038 * internal counter or calling algo callback for 2039 * externally-managed tables. 2040 * 2041 * Returns number of records. 2042 */ 2043 static uint32_t 2044 table_get_count(struct ip_fw_chain *ch, struct table_config *tc) 2045 { 2046 struct table_info *ti; 2047 struct table_algo *ta; 2048 struct dump_args da; 2049 2050 ti = KIDX_TO_TI(ch, tc->no.kidx); 2051 ta = tc->ta; 2052 2053 /* Use internal counter for self-managed tables */ 2054 if ((ta->flags & TA_FLAG_READONLY) == 0) 2055 return (tc->count); 2056 2057 /* Use callback to quickly get number of items */ 2058 if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0) 2059 return (ta->get_count(tc->astate, ti)); 2060 2061 /* Count number of iterms ourselves */ 2062 memset(&da, 0, sizeof(da)); 2063 ta->foreach(tc->astate, ti, count_ext_entries, &da); 2064 2065 return (da.cnt); 2066 } 2067 2068 /* 2069 * Exports table @tc info into standard ipfw_xtable_info format. 2070 */ 2071 static void 2072 export_table_info(struct ip_fw_chain *ch, struct table_config *tc, 2073 ipfw_xtable_info *i) 2074 { 2075 struct table_info *ti; 2076 struct table_algo *ta; 2077 2078 i->type = tc->no.subtype; 2079 i->tflags = tc->tflags; 2080 i->vmask = tc->vmask; 2081 i->set = tc->no.set; 2082 i->kidx = tc->no.kidx; 2083 i->refcnt = tc->no.refcnt; 2084 i->count = table_get_count(ch, tc); 2085 i->limit = tc->limit; 2086 i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; 2087 i->size = i->count * sizeof(ipfw_obj_tentry); 2088 i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); 2089 strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); 2090 ti = KIDX_TO_TI(ch, tc->no.kidx); 2091 ta = tc->ta; 2092 if (ta->print_config != NULL) { 2093 /* Use algo function to print table config to string */ 2094 ta->print_config(tc->astate, ti, i->algoname, 2095 sizeof(i->algoname)); 2096 } else 2097 strlcpy(i->algoname, ta->name, sizeof(i->algoname)); 2098 /* Dump algo-specific data, if possible */ 2099 if (ta->dump_tinfo != NULL) { 2100 ta->dump_tinfo(tc->astate, ti, &i->ta_info); 2101 i->ta_info.flags |= IPFW_TATFLAGS_DATA; 2102 } 2103 } 2104 2105 struct dump_table_args { 2106 struct ip_fw_chain *ch; 2107 struct sockopt_data *sd; 2108 }; 2109 2110 static int 2111 export_table_internal(struct namedobj_instance *ni, struct named_object *no, 2112 void *arg) 2113 { 2114 ipfw_xtable_info *i; 2115 struct dump_table_args *dta; 2116 2117 dta = (struct dump_table_args *)arg; 2118 2119 i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); 2120 KASSERT(i != NULL, ("previously checked buffer is not enough")); 2121 2122 export_table_info(dta->ch, (struct table_config *)no, i); 2123 return (0); 2124 } 2125 2126 /* 2127 * Export all tables as ipfw_xtable_info structures to 2128 * storage provided by @sd. 2129 * 2130 * If supplied buffer is too small, fills in required size 2131 * and returns ENOMEM. 2132 * Returns 0 on success. 2133 */ 2134 static int 2135 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, 2136 struct sockopt_data *sd) 2137 { 2138 uint32_t size; 2139 uint32_t count; 2140 struct dump_table_args dta; 2141 2142 count = ipfw_objhash_count(CHAIN_TO_NI(ch)); 2143 size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); 2144 2145 /* Fill in header regadless of buffer size */ 2146 olh->count = count; 2147 olh->objsize = sizeof(ipfw_xtable_info); 2148 2149 if (size > olh->size) { 2150 olh->size = size; 2151 return (ENOMEM); 2152 } 2153 2154 olh->size = size; 2155 2156 dta.ch = ch; 2157 dta.sd = sd; 2158 2159 ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); 2160 2161 return (0); 2162 } 2163 2164 /* 2165 * Dumps all table data 2166 * Data layout (v1)(current): 2167 * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size 2168 * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] 2169 * 2170 * Returns 0 on success 2171 */ 2172 static int 2173 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2174 struct sockopt_data *sd) 2175 { 2176 struct _ipfw_obj_header *oh; 2177 ipfw_xtable_info *i; 2178 struct tid_info ti; 2179 struct table_config *tc; 2180 struct table_algo *ta; 2181 struct dump_args da; 2182 uint32_t sz; 2183 2184 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); 2185 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 2186 if (oh == NULL) 2187 return (EINVAL); 2188 2189 i = (ipfw_xtable_info *)(oh + 1); 2190 objheader_to_ti(oh, &ti); 2191 2192 IPFW_UH_RLOCK(ch); 2193 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { 2194 IPFW_UH_RUNLOCK(ch); 2195 return (ESRCH); 2196 } 2197 export_table_info(ch, tc, i); 2198 2199 if (sd->valsize < i->size) { 2200 2201 /* 2202 * Submitted buffer size is not enough. 2203 * WE've already filled in @i structure with 2204 * relevant table info including size, so we 2205 * can return. Buffer will be flushed automatically. 2206 */ 2207 IPFW_UH_RUNLOCK(ch); 2208 return (ENOMEM); 2209 } 2210 2211 /* 2212 * Do the actual dump in eXtended format 2213 */ 2214 memset(&da, 0, sizeof(da)); 2215 da.ch = ch; 2216 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2217 da.tc = tc; 2218 da.sd = sd; 2219 2220 ta = tc->ta; 2221 2222 ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); 2223 IPFW_UH_RUNLOCK(ch); 2224 2225 return (da.error); 2226 } 2227 2228 /* 2229 * Dumps all table data 2230 * Data layout (version 0)(legacy): 2231 * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() 2232 * Reply: [ ipfw_xtable ipfw_table_xentry x N ] 2233 * 2234 * Returns 0 on success 2235 */ 2236 static int 2237 dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2238 struct sockopt_data *sd) 2239 { 2240 ipfw_xtable *xtbl; 2241 struct tid_info ti; 2242 struct table_config *tc; 2243 struct table_algo *ta; 2244 struct dump_args da; 2245 size_t sz, count; 2246 2247 xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); 2248 if (xtbl == NULL) 2249 return (EINVAL); 2250 2251 memset(&ti, 0, sizeof(ti)); 2252 ti.uidx = xtbl->tbl; 2253 2254 IPFW_UH_RLOCK(ch); 2255 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { 2256 IPFW_UH_RUNLOCK(ch); 2257 return (0); 2258 } 2259 count = table_get_count(ch, tc); 2260 sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); 2261 2262 xtbl->cnt = count; 2263 xtbl->size = sz; 2264 xtbl->type = tc->no.subtype; 2265 xtbl->tbl = ti.uidx; 2266 2267 if (sd->valsize < sz) { 2268 2269 /* 2270 * Submitted buffer size is not enough. 2271 * WE've already filled in @i structure with 2272 * relevant table info including size, so we 2273 * can return. Buffer will be flushed automatically. 2274 */ 2275 IPFW_UH_RUNLOCK(ch); 2276 return (ENOMEM); 2277 } 2278 2279 /* Do the actual dump in eXtended format */ 2280 memset(&da, 0, sizeof(da)); 2281 da.ch = ch; 2282 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2283 da.tc = tc; 2284 da.sd = sd; 2285 2286 ta = tc->ta; 2287 2288 ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); 2289 IPFW_UH_RUNLOCK(ch); 2290 2291 return (0); 2292 } 2293 2294 /* 2295 * Legacy function to retrieve number of items in table. 2296 */ 2297 static int 2298 get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2299 struct sockopt_data *sd) 2300 { 2301 uint32_t *tbl; 2302 struct tid_info ti; 2303 size_t sz; 2304 int error; 2305 2306 sz = sizeof(*op3) + sizeof(uint32_t); 2307 op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz); 2308 if (op3 == NULL) 2309 return (EINVAL); 2310 2311 tbl = (uint32_t *)(op3 + 1); 2312 memset(&ti, 0, sizeof(ti)); 2313 ti.uidx = *tbl; 2314 IPFW_UH_RLOCK(ch); 2315 error = ipfw_count_xtable(ch, &ti, tbl); 2316 IPFW_UH_RUNLOCK(ch); 2317 return (error); 2318 } 2319 2320 /* 2321 * Legacy IP_FW_TABLE_GETSIZE handler 2322 */ 2323 int 2324 ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) 2325 { 2326 struct table_config *tc; 2327 2328 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) 2329 return (ESRCH); 2330 *cnt = table_get_count(ch, tc); 2331 return (0); 2332 } 2333 2334 /* 2335 * Legacy IP_FW_TABLE_XGETSIZE handler 2336 */ 2337 int 2338 ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) 2339 { 2340 struct table_config *tc; 2341 uint32_t count; 2342 2343 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { 2344 *cnt = 0; 2345 return (0); /* 'table all list' requires success */ 2346 } 2347 2348 count = table_get_count(ch, tc); 2349 *cnt = count * sizeof(ipfw_table_xentry); 2350 if (count > 0) 2351 *cnt += sizeof(ipfw_xtable); 2352 return (0); 2353 } 2354 2355 static int 2356 dump_table_entry(void *e, void *arg) 2357 { 2358 struct dump_args *da; 2359 struct table_config *tc; 2360 struct table_algo *ta; 2361 ipfw_table_entry *ent; 2362 struct table_value *pval; 2363 int error; 2364 2365 da = (struct dump_args *)arg; 2366 2367 tc = da->tc; 2368 ta = tc->ta; 2369 2370 /* Out of memory, returning */ 2371 if (da->cnt == da->size) 2372 return (1); 2373 ent = da->ent++; 2374 ent->tbl = da->uidx; 2375 da->cnt++; 2376 2377 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); 2378 if (error != 0) 2379 return (error); 2380 2381 ent->addr = da->tent.k.addr.s_addr; 2382 ent->masklen = da->tent.masklen; 2383 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); 2384 ent->value = ipfw_export_table_value_legacy(pval); 2385 2386 return (0); 2387 } 2388 2389 /* 2390 * Dumps table in pre-8.1 legacy format. 2391 */ 2392 int 2393 ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, 2394 ipfw_table *tbl) 2395 { 2396 struct table_config *tc; 2397 struct table_algo *ta; 2398 struct dump_args da; 2399 2400 tbl->cnt = 0; 2401 2402 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) 2403 return (0); /* XXX: We should return ESRCH */ 2404 2405 ta = tc->ta; 2406 2407 /* This dump format supports IPv4 only */ 2408 if (tc->no.subtype != IPFW_TABLE_ADDR) 2409 return (0); 2410 2411 memset(&da, 0, sizeof(da)); 2412 da.ch = ch; 2413 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2414 da.tc = tc; 2415 da.ent = &tbl->ent[0]; 2416 da.size = tbl->size; 2417 2418 tbl->cnt = 0; 2419 ta->foreach(tc->astate, da.ti, dump_table_entry, &da); 2420 tbl->cnt = da.cnt; 2421 2422 return (0); 2423 } 2424 2425 /* 2426 * Dumps table entry in eXtended format (v1)(current). 2427 */ 2428 static int 2429 dump_table_tentry(void *e, void *arg) 2430 { 2431 struct dump_args *da; 2432 struct table_config *tc; 2433 struct table_algo *ta; 2434 struct table_value *pval; 2435 ipfw_obj_tentry *tent; 2436 int error; 2437 2438 da = (struct dump_args *)arg; 2439 2440 tc = da->tc; 2441 ta = tc->ta; 2442 2443 tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); 2444 /* Out of memory, returning */ 2445 if (tent == NULL) { 2446 da->error = ENOMEM; 2447 return (1); 2448 } 2449 tent->head.length = sizeof(ipfw_obj_tentry); 2450 tent->idx = da->uidx; 2451 2452 error = ta->dump_tentry(tc->astate, da->ti, e, tent); 2453 if (error != 0) 2454 return (error); 2455 2456 pval = get_table_value(da->ch, da->tc, tent->v.kidx); 2457 ipfw_export_table_value_v1(pval, &tent->v.value); 2458 2459 return (0); 2460 } 2461 2462 /* 2463 * Dumps table entry in eXtended format (v0). 2464 */ 2465 static int 2466 dump_table_xentry(void *e, void *arg) 2467 { 2468 struct dump_args *da; 2469 struct table_config *tc; 2470 struct table_algo *ta; 2471 ipfw_table_xentry *xent; 2472 ipfw_obj_tentry *tent; 2473 struct table_value *pval; 2474 int error; 2475 2476 da = (struct dump_args *)arg; 2477 2478 tc = da->tc; 2479 ta = tc->ta; 2480 2481 xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); 2482 /* Out of memory, returning */ 2483 if (xent == NULL) 2484 return (1); 2485 xent->len = sizeof(ipfw_table_xentry); 2486 xent->tbl = da->uidx; 2487 2488 memset(&da->tent, 0, sizeof(da->tent)); 2489 tent = &da->tent; 2490 error = ta->dump_tentry(tc->astate, da->ti, e, tent); 2491 if (error != 0) 2492 return (error); 2493 2494 /* Convert current format to previous one */ 2495 xent->masklen = tent->masklen; 2496 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); 2497 xent->value = ipfw_export_table_value_legacy(pval); 2498 /* Apply some hacks */ 2499 if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) { 2500 xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; 2501 xent->flags = IPFW_TCF_INET; 2502 } else 2503 memcpy(&xent->k, &tent->k, sizeof(xent->k)); 2504 2505 return (0); 2506 } 2507 2508 /* 2509 * Helper function to export table algo data 2510 * to tentry format before calling user function. 2511 * 2512 * Returns 0 on success. 2513 */ 2514 static int 2515 prepare_table_tentry(void *e, void *arg) 2516 { 2517 struct dump_args *da; 2518 struct table_config *tc; 2519 struct table_algo *ta; 2520 int error; 2521 2522 da = (struct dump_args *)arg; 2523 2524 tc = da->tc; 2525 ta = tc->ta; 2526 2527 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); 2528 if (error != 0) 2529 return (error); 2530 2531 da->f(&da->tent, da->farg); 2532 2533 return (0); 2534 } 2535 2536 /* 2537 * Allow external consumers to read table entries in standard format. 2538 */ 2539 int 2540 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, 2541 ta_foreach_f *f, void *arg) 2542 { 2543 struct namedobj_instance *ni; 2544 struct table_config *tc; 2545 struct table_algo *ta; 2546 struct dump_args da; 2547 2548 ni = CHAIN_TO_NI(ch); 2549 2550 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); 2551 if (tc == NULL) 2552 return (ESRCH); 2553 2554 ta = tc->ta; 2555 2556 memset(&da, 0, sizeof(da)); 2557 da.ch = ch; 2558 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2559 da.tc = tc; 2560 da.f = f; 2561 da.farg = arg; 2562 2563 ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da); 2564 2565 return (0); 2566 } 2567 2568 /* 2569 * Table algorithms 2570 */ 2571 2572 /* 2573 * Finds algorithm by index, table type or supplied name. 2574 * 2575 * Returns pointer to algo or NULL. 2576 */ 2577 static struct table_algo * 2578 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) 2579 { 2580 int i, l; 2581 struct table_algo *ta; 2582 2583 if (ti->type > IPFW_TABLE_MAXTYPE) 2584 return (NULL); 2585 2586 /* Search by index */ 2587 if (ti->atype != 0) { 2588 if (ti->atype > tcfg->algo_count) 2589 return (NULL); 2590 return (tcfg->algo[ti->atype]); 2591 } 2592 2593 if (name == NULL) { 2594 /* Return default algorithm for given type if set */ 2595 return (tcfg->def_algo[ti->type]); 2596 } 2597 2598 /* Search by name */ 2599 /* TODO: better search */ 2600 for (i = 1; i <= tcfg->algo_count; i++) { 2601 ta = tcfg->algo[i]; 2602 2603 /* 2604 * One can supply additional algorithm 2605 * parameters so we compare only the first word 2606 * of supplied name: 2607 * 'addr:chash hsize=32' 2608 * '^^^^^^^^^' 2609 * 2610 */ 2611 l = strlen(ta->name); 2612 if (strncmp(name, ta->name, l) != 0) 2613 continue; 2614 if (name[l] != '\0' && name[l] != ' ') 2615 continue; 2616 /* Check if we're requesting proper table type */ 2617 if (ti->type != 0 && ti->type != ta->type) 2618 return (NULL); 2619 return (ta); 2620 } 2621 2622 return (NULL); 2623 } 2624 2625 /* 2626 * Register new table algo @ta. 2627 * Stores algo id inside @idx. 2628 * 2629 * Returns 0 on success. 2630 */ 2631 int 2632 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, 2633 int *idx) 2634 { 2635 struct tables_config *tcfg; 2636 struct table_algo *ta_new; 2637 size_t sz; 2638 2639 if (size > sizeof(struct table_algo)) 2640 return (EINVAL); 2641 2642 /* Check for the required on-stack size for add/del */ 2643 sz = roundup2(ta->ta_buf_size, sizeof(void *)); 2644 if (sz > TA_BUF_SZ) 2645 return (EINVAL); 2646 2647 KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); 2648 2649 /* Copy algorithm data to stable storage. */ 2650 ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); 2651 memcpy(ta_new, ta, size); 2652 2653 tcfg = CHAIN_TO_TCFG(ch); 2654 2655 KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); 2656 2657 tcfg->algo[++tcfg->algo_count] = ta_new; 2658 ta_new->idx = tcfg->algo_count; 2659 2660 /* Set algorithm as default one for given type */ 2661 if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && 2662 tcfg->def_algo[ta_new->type] == NULL) 2663 tcfg->def_algo[ta_new->type] = ta_new; 2664 2665 *idx = ta_new->idx; 2666 2667 return (0); 2668 } 2669 2670 /* 2671 * Unregisters table algo using @idx as id. 2672 * XXX: It is NOT safe to call this function in any place 2673 * other than ipfw instance destroy handler. 2674 */ 2675 void 2676 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) 2677 { 2678 struct tables_config *tcfg; 2679 struct table_algo *ta; 2680 2681 tcfg = CHAIN_TO_TCFG(ch); 2682 2683 KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", 2684 idx, tcfg->algo_count)); 2685 2686 ta = tcfg->algo[idx]; 2687 KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); 2688 2689 if (tcfg->def_algo[ta->type] == ta) 2690 tcfg->def_algo[ta->type] = NULL; 2691 2692 free(ta, M_IPFW); 2693 } 2694 2695 /* 2696 * Lists all table algorithms currently available. 2697 * Data layout (v0)(current): 2698 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size 2699 * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] 2700 * 2701 * Returns 0 on success 2702 */ 2703 static int 2704 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2705 struct sockopt_data *sd) 2706 { 2707 struct _ipfw_obj_lheader *olh; 2708 struct tables_config *tcfg; 2709 ipfw_ta_info *i; 2710 struct table_algo *ta; 2711 uint32_t count, n, size; 2712 2713 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); 2714 if (olh == NULL) 2715 return (EINVAL); 2716 if (sd->valsize < olh->size) 2717 return (EINVAL); 2718 2719 IPFW_UH_RLOCK(ch); 2720 tcfg = CHAIN_TO_TCFG(ch); 2721 count = tcfg->algo_count; 2722 size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); 2723 2724 /* Fill in header regadless of buffer size */ 2725 olh->count = count; 2726 olh->objsize = sizeof(ipfw_ta_info); 2727 2728 if (size > olh->size) { 2729 olh->size = size; 2730 IPFW_UH_RUNLOCK(ch); 2731 return (ENOMEM); 2732 } 2733 olh->size = size; 2734 2735 for (n = 1; n <= count; n++) { 2736 i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); 2737 KASSERT(i != NULL, ("previously checked buffer is not enough")); 2738 ta = tcfg->algo[n]; 2739 strlcpy(i->algoname, ta->name, sizeof(i->algoname)); 2740 i->type = ta->type; 2741 i->refcnt = ta->refcnt; 2742 } 2743 2744 IPFW_UH_RUNLOCK(ch); 2745 2746 return (0); 2747 } 2748 2749 static int 2750 classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 2751 { 2752 /* Basic IPv4/IPv6 or u32 lookups */ 2753 *puidx = cmd->arg1; 2754 /* Assume ADDR by default */ 2755 *ptype = IPFW_TABLE_ADDR; 2756 int v; 2757 2758 if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { 2759 /* 2760 * generic lookup. The key must be 2761 * in 32bit big-endian format. 2762 */ 2763 v = ((ipfw_insn_u32 *)cmd)->d[1]; 2764 switch (v) { 2765 case 0: 2766 case 1: 2767 /* IPv4 src/dst */ 2768 break; 2769 case 2: 2770 case 3: 2771 /* src/dst port */ 2772 *ptype = IPFW_TABLE_NUMBER; 2773 break; 2774 case 4: 2775 /* uid/gid */ 2776 *ptype = IPFW_TABLE_NUMBER; 2777 break; 2778 case 5: 2779 /* jid */ 2780 *ptype = IPFW_TABLE_NUMBER; 2781 break; 2782 case 6: 2783 /* dscp */ 2784 *ptype = IPFW_TABLE_NUMBER; 2785 break; 2786 } 2787 } 2788 2789 return (0); 2790 } 2791 2792 static int 2793 classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 2794 { 2795 ipfw_insn_if *cmdif; 2796 2797 /* Interface table, possibly */ 2798 cmdif = (ipfw_insn_if *)cmd; 2799 if (cmdif->name[0] != '\1') 2800 return (1); 2801 2802 *ptype = IPFW_TABLE_INTERFACE; 2803 *puidx = cmdif->p.kidx; 2804 2805 return (0); 2806 } 2807 2808 static int 2809 classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 2810 { 2811 2812 *puidx = cmd->arg1; 2813 *ptype = IPFW_TABLE_FLOW; 2814 2815 return (0); 2816 } 2817 2818 static void 2819 update_arg1(ipfw_insn *cmd, uint16_t idx) 2820 { 2821 2822 cmd->arg1 = idx; 2823 } 2824 2825 static void 2826 update_via(ipfw_insn *cmd, uint16_t idx) 2827 { 2828 ipfw_insn_if *cmdif; 2829 2830 cmdif = (ipfw_insn_if *)cmd; 2831 cmdif->p.kidx = idx; 2832 } 2833 2834 static int 2835 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, 2836 struct named_object **pno) 2837 { 2838 struct table_config *tc; 2839 int error; 2840 2841 IPFW_UH_WLOCK_ASSERT(ch); 2842 2843 error = find_table_err(CHAIN_TO_NI(ch), ti, &tc); 2844 if (error != 0) 2845 return (error); 2846 2847 *pno = &tc->no; 2848 return (0); 2849 } 2850 2851 /* XXX: sets-sets! */ 2852 static struct named_object * 2853 table_findbykidx(struct ip_fw_chain *ch, uint16_t idx) 2854 { 2855 struct namedobj_instance *ni; 2856 struct table_config *tc; 2857 2858 IPFW_UH_WLOCK_ASSERT(ch); 2859 ni = CHAIN_TO_NI(ch); 2860 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx); 2861 KASSERT(tc != NULL, ("Table with index %d not found", idx)); 2862 2863 return (&tc->no); 2864 } 2865 2866 static int 2867 table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, 2868 enum ipfw_sets_cmd cmd) 2869 { 2870 2871 switch (cmd) { 2872 case SWAP_ALL: 2873 case TEST_ALL: 2874 case MOVE_ALL: 2875 /* 2876 * Always return success, the real action and decision 2877 * should make table_manage_sets_all(). 2878 */ 2879 return (0); 2880 case TEST_ONE: 2881 case MOVE_ONE: 2882 /* 2883 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add 2884 * if set number will be used in hash function. Currently 2885 * we can just use generic handler that replaces set value. 2886 */ 2887 if (V_fw_tables_sets == 0) 2888 return (0); 2889 break; 2890 case COUNT_ONE: 2891 /* 2892 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is 2893 * disabled. This allow skip table's opcodes from additional 2894 * checks when specific rules moved to another set. 2895 */ 2896 if (V_fw_tables_sets == 0) 2897 return (EOPNOTSUPP); 2898 } 2899 /* Use generic sets handler when per-set sysctl is enabled. */ 2900 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, 2901 set, new_set, cmd)); 2902 } 2903 2904 /* 2905 * We register several opcode rewriters for lookup tables. 2906 * All tables opcodes have the same ETLV type, but different subtype. 2907 * To avoid invoking sets handler several times for XXX_ALL commands, 2908 * we use separate manage_sets handler. O_RECV has the lowest value, 2909 * so it should be called first. 2910 */ 2911 static int 2912 table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, 2913 enum ipfw_sets_cmd cmd) 2914 { 2915 2916 switch (cmd) { 2917 case SWAP_ALL: 2918 case TEST_ALL: 2919 /* 2920 * Return success for TEST_ALL, since nothing prevents 2921 * move rules from one set to another. All tables are 2922 * accessible from all sets when per-set tables sysctl 2923 * is disabled. 2924 */ 2925 case MOVE_ALL: 2926 if (V_fw_tables_sets == 0) 2927 return (0); 2928 break; 2929 default: 2930 return (table_manage_sets(ch, set, new_set, cmd)); 2931 } 2932 /* Use generic sets handler when per-set sysctl is enabled. */ 2933 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, 2934 set, new_set, cmd)); 2935 } 2936 2937 static struct opcode_obj_rewrite opcodes[] = { 2938 { 2939 .opcode = O_IP_SRC_LOOKUP, 2940 .etlv = IPFW_TLV_TBL_NAME, 2941 .classifier = classify_srcdst, 2942 .update = update_arg1, 2943 .find_byname = table_findbyname, 2944 .find_bykidx = table_findbykidx, 2945 .create_object = create_table_compat, 2946 .manage_sets = table_manage_sets, 2947 }, 2948 { 2949 .opcode = O_IP_DST_LOOKUP, 2950 .etlv = IPFW_TLV_TBL_NAME, 2951 .classifier = classify_srcdst, 2952 .update = update_arg1, 2953 .find_byname = table_findbyname, 2954 .find_bykidx = table_findbykidx, 2955 .create_object = create_table_compat, 2956 .manage_sets = table_manage_sets, 2957 }, 2958 { 2959 .opcode = O_IP_FLOW_LOOKUP, 2960 .etlv = IPFW_TLV_TBL_NAME, 2961 .classifier = classify_flow, 2962 .update = update_arg1, 2963 .find_byname = table_findbyname, 2964 .find_bykidx = table_findbykidx, 2965 .create_object = create_table_compat, 2966 .manage_sets = table_manage_sets, 2967 }, 2968 { 2969 .opcode = O_XMIT, 2970 .etlv = IPFW_TLV_TBL_NAME, 2971 .classifier = classify_via, 2972 .update = update_via, 2973 .find_byname = table_findbyname, 2974 .find_bykidx = table_findbykidx, 2975 .create_object = create_table_compat, 2976 .manage_sets = table_manage_sets, 2977 }, 2978 { 2979 .opcode = O_RECV, 2980 .etlv = IPFW_TLV_TBL_NAME, 2981 .classifier = classify_via, 2982 .update = update_via, 2983 .find_byname = table_findbyname, 2984 .find_bykidx = table_findbykidx, 2985 .create_object = create_table_compat, 2986 .manage_sets = table_manage_sets_all, 2987 }, 2988 { 2989 .opcode = O_VIA, 2990 .etlv = IPFW_TLV_TBL_NAME, 2991 .classifier = classify_via, 2992 .update = update_via, 2993 .find_byname = table_findbyname, 2994 .find_bykidx = table_findbykidx, 2995 .create_object = create_table_compat, 2996 .manage_sets = table_manage_sets, 2997 }, 2998 }; 2999 3000 static int 3001 test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no, 3002 void *arg __unused) 3003 { 3004 3005 /* Check that there aren't any tables in not default set */ 3006 if (no->set != 0) 3007 return (EBUSY); 3008 return (0); 3009 } 3010 3011 /* 3012 * Switch between "set 0" and "rule's set" table binding, 3013 * Check all ruleset bindings and permits changing 3014 * IFF each binding has both rule AND table in default set (set 0). 3015 * 3016 * Returns 0 on success. 3017 */ 3018 int 3019 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) 3020 { 3021 struct opcode_obj_rewrite *rw; 3022 struct namedobj_instance *ni; 3023 struct named_object *no; 3024 struct ip_fw *rule; 3025 ipfw_insn *cmd; 3026 int cmdlen, i, l; 3027 uint16_t kidx; 3028 uint8_t subtype; 3029 3030 IPFW_UH_WLOCK(ch); 3031 3032 if (V_fw_tables_sets == sets) { 3033 IPFW_UH_WUNLOCK(ch); 3034 return (0); 3035 } 3036 ni = CHAIN_TO_NI(ch); 3037 if (sets == 0) { 3038 /* 3039 * Prevent disabling sets support if we have some tables 3040 * in not default sets. 3041 */ 3042 if (ipfw_objhash_foreach_type(ni, test_sets_cb, 3043 NULL, IPFW_TLV_TBL_NAME) != 0) { 3044 IPFW_UH_WUNLOCK(ch); 3045 return (EBUSY); 3046 } 3047 } 3048 /* 3049 * Scan all rules and examine tables opcodes. 3050 */ 3051 for (i = 0; i < ch->n_rules; i++) { 3052 rule = ch->map[i]; 3053 3054 l = rule->cmd_len; 3055 cmd = rule->cmd; 3056 cmdlen = 0; 3057 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { 3058 cmdlen = F_LEN(cmd); 3059 /* Check only tables opcodes */ 3060 for (kidx = 0, rw = opcodes; 3061 rw < opcodes + nitems(opcodes); rw++) { 3062 if (rw->opcode != cmd->opcode) 3063 continue; 3064 if (rw->classifier(cmd, &kidx, &subtype) == 0) 3065 break; 3066 } 3067 if (kidx == 0) 3068 continue; 3069 no = ipfw_objhash_lookup_kidx(ni, kidx); 3070 /* Check if both table object and rule has the set 0 */ 3071 if (no->set != 0 || rule->set != 0) { 3072 IPFW_UH_WUNLOCK(ch); 3073 return (EBUSY); 3074 } 3075 3076 } 3077 } 3078 V_fw_tables_sets = sets; 3079 IPFW_UH_WUNLOCK(ch); 3080 return (0); 3081 } 3082 3083 /* 3084 * Checks table name for validity. 3085 * Enforce basic length checks, the rest 3086 * should be done in userland. 3087 * 3088 * Returns 0 if name is considered valid. 3089 */ 3090 static int 3091 check_table_name(const char *name) 3092 { 3093 3094 /* 3095 * TODO: do some more complicated checks 3096 */ 3097 return (ipfw_check_object_name_generic(name)); 3098 } 3099 3100 /* 3101 * Finds table config based on either legacy index 3102 * or name in ntlv. 3103 * Note @ti structure contains unchecked data from userland. 3104 * 3105 * Returns 0 in success and fills in @tc with found config 3106 */ 3107 static int 3108 find_table_err(struct namedobj_instance *ni, struct tid_info *ti, 3109 struct table_config **tc) 3110 { 3111 char *name, bname[16]; 3112 struct named_object *no; 3113 ipfw_obj_ntlv *ntlv; 3114 uint32_t set; 3115 3116 if (ti->tlvs != NULL) { 3117 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 3118 IPFW_TLV_TBL_NAME); 3119 if (ntlv == NULL) 3120 return (EINVAL); 3121 name = ntlv->name; 3122 3123 /* 3124 * Use set provided by @ti instead of @ntlv one. 3125 * This is needed due to different sets behavior 3126 * controlled by V_fw_tables_sets. 3127 */ 3128 set = (V_fw_tables_sets != 0) ? ti->set : 0; 3129 } else { 3130 snprintf(bname, sizeof(bname), "%d", ti->uidx); 3131 name = bname; 3132 set = 0; 3133 } 3134 3135 no = ipfw_objhash_lookup_name(ni, set, name); 3136 *tc = (struct table_config *)no; 3137 3138 return (0); 3139 } 3140 3141 /* 3142 * Finds table config based on either legacy index 3143 * or name in ntlv. 3144 * Note @ti structure contains unchecked data from userland. 3145 * 3146 * Returns pointer to table_config or NULL. 3147 */ 3148 static struct table_config * 3149 find_table(struct namedobj_instance *ni, struct tid_info *ti) 3150 { 3151 struct table_config *tc; 3152 3153 if (find_table_err(ni, ti, &tc) != 0) 3154 return (NULL); 3155 3156 return (tc); 3157 } 3158 3159 /* 3160 * Allocate new table config structure using 3161 * specified @algo and @aname. 3162 * 3163 * Returns pointer to config or NULL. 3164 */ 3165 static struct table_config * 3166 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, 3167 struct table_algo *ta, char *aname, uint8_t tflags) 3168 { 3169 char *name, bname[16]; 3170 struct table_config *tc; 3171 int error; 3172 ipfw_obj_ntlv *ntlv; 3173 uint32_t set; 3174 3175 if (ti->tlvs != NULL) { 3176 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 3177 IPFW_TLV_TBL_NAME); 3178 if (ntlv == NULL) 3179 return (NULL); 3180 name = ntlv->name; 3181 set = (V_fw_tables_sets == 0) ? 0 : ntlv->set; 3182 } else { 3183 /* Compat part: convert number to string representation */ 3184 snprintf(bname, sizeof(bname), "%d", ti->uidx); 3185 name = bname; 3186 set = 0; 3187 } 3188 3189 tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); 3190 tc->no.name = tc->tablename; 3191 tc->no.subtype = ta->type; 3192 tc->no.set = set; 3193 tc->tflags = tflags; 3194 tc->ta = ta; 3195 strlcpy(tc->tablename, name, sizeof(tc->tablename)); 3196 /* Set "shared" value type by default */ 3197 tc->vshared = 1; 3198 3199 /* Preallocate data structures for new tables */ 3200 error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags); 3201 if (error != 0) { 3202 free(tc, M_IPFW); 3203 return (NULL); 3204 } 3205 3206 return (tc); 3207 } 3208 3209 /* 3210 * Destroys table state and config. 3211 */ 3212 static void 3213 free_table_config(struct namedobj_instance *ni, struct table_config *tc) 3214 { 3215 3216 KASSERT(tc->linked == 0, ("free() on linked config")); 3217 /* UH lock MUST NOT be held */ 3218 3219 /* 3220 * We're using ta without any locking/referencing. 3221 * TODO: fix this if we're going to use unloadable algos. 3222 */ 3223 tc->ta->destroy(tc->astate, &tc->ti_copy); 3224 free(tc, M_IPFW); 3225 } 3226 3227 /* 3228 * Links @tc to @chain table named instance. 3229 * Sets appropriate type/states in @chain table info. 3230 */ 3231 static void 3232 link_table(struct ip_fw_chain *ch, struct table_config *tc) 3233 { 3234 struct namedobj_instance *ni; 3235 struct table_info *ti; 3236 uint16_t kidx; 3237 3238 IPFW_UH_WLOCK_ASSERT(ch); 3239 3240 ni = CHAIN_TO_NI(ch); 3241 kidx = tc->no.kidx; 3242 3243 ipfw_objhash_add(ni, &tc->no); 3244 3245 ti = KIDX_TO_TI(ch, kidx); 3246 *ti = tc->ti_copy; 3247 3248 /* Notify algo on real @ti address */ 3249 if (tc->ta->change_ti != NULL) 3250 tc->ta->change_ti(tc->astate, ti); 3251 3252 tc->linked = 1; 3253 tc->ta->refcnt++; 3254 } 3255 3256 /* 3257 * Unlinks @tc from @chain table named instance. 3258 * Zeroes states in @chain and stores them in @tc. 3259 */ 3260 static void 3261 unlink_table(struct ip_fw_chain *ch, struct table_config *tc) 3262 { 3263 struct namedobj_instance *ni; 3264 struct table_info *ti; 3265 uint16_t kidx; 3266 3267 IPFW_UH_WLOCK_ASSERT(ch); 3268 IPFW_WLOCK_ASSERT(ch); 3269 3270 ni = CHAIN_TO_NI(ch); 3271 kidx = tc->no.kidx; 3272 3273 /* Clear state. @ti copy is already saved inside @tc */ 3274 ipfw_objhash_del(ni, &tc->no); 3275 ti = KIDX_TO_TI(ch, kidx); 3276 memset(ti, 0, sizeof(struct table_info)); 3277 tc->linked = 0; 3278 tc->ta->refcnt--; 3279 3280 /* Notify algo on real @ti address */ 3281 if (tc->ta->change_ti != NULL) 3282 tc->ta->change_ti(tc->astate, NULL); 3283 } 3284 3285 static struct ipfw_sopt_handler scodes[] = { 3286 { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table }, 3287 { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 }, 3288 { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 }, 3289 { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table }, 3290 { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table }, 3291 { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables }, 3292 { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 }, 3293 { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 }, 3294 { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 }, 3295 { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 }, 3296 { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 }, 3297 { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 }, 3298 { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry }, 3299 { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, 3300 { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, 3301 { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, 3302 }; 3303 3304 static int 3305 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, 3306 void *arg) 3307 { 3308 3309 unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); 3310 if (ipfw_objhash_free_idx(ni, no->kidx) != 0) 3311 printf("Error unlinking kidx %d from table %s\n", 3312 no->kidx, no->name); 3313 free_table_config(ni, (struct table_config *)no); 3314 return (0); 3315 } 3316 3317 /* 3318 * Shuts tables module down. 3319 */ 3320 void 3321 ipfw_destroy_tables(struct ip_fw_chain *ch, int last) 3322 { 3323 3324 IPFW_DEL_SOPT_HANDLER(last, scodes); 3325 IPFW_DEL_OBJ_REWRITER(last, opcodes); 3326 3327 /* Remove all tables from working set */ 3328 IPFW_UH_WLOCK(ch); 3329 IPFW_WLOCK(ch); 3330 ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); 3331 IPFW_WUNLOCK(ch); 3332 IPFW_UH_WUNLOCK(ch); 3333 3334 /* Free pointers itself */ 3335 free(ch->tablestate, M_IPFW); 3336 3337 ipfw_table_value_destroy(ch, last); 3338 ipfw_table_algo_destroy(ch); 3339 3340 ipfw_objhash_destroy(CHAIN_TO_NI(ch)); 3341 free(CHAIN_TO_TCFG(ch), M_IPFW); 3342 } 3343 3344 /* 3345 * Starts tables module. 3346 */ 3347 int 3348 ipfw_init_tables(struct ip_fw_chain *ch, int first) 3349 { 3350 struct tables_config *tcfg; 3351 3352 /* Allocate pointers */ 3353 ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), 3354 M_IPFW, M_WAITOK | M_ZERO); 3355 3356 tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); 3357 tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); 3358 ch->tblcfg = tcfg; 3359 3360 ipfw_table_value_init(ch, first); 3361 ipfw_table_algo_init(ch); 3362 3363 IPFW_ADD_OBJ_REWRITER(first, opcodes); 3364 IPFW_ADD_SOPT_HANDLER(first, scodes); 3365 return (0); 3366 } 3367 3368 3369 3370