1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. 5 * Copyright (c) 2014 Yandex LLC 6 * Copyright (c) 2014 Alexander V. Chernikov 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 /* 34 * Lookup table support for ipfw. 35 * 36 * This file contains handlers for all generic tables' operations: 37 * add/del/flush entries, list/dump tables etc.. 38 * 39 * Table data modification is protected by both UH and runtime lock 40 * while reading configuration/data is protected by UH lock. 41 * 42 * Lookup algorithms for all table types are located in ip_fw_table_algo.c 43 */ 44 45 #include "opt_ipfw.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/rwlock.h> 53 #include <sys/rmlock.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/queue.h> 57 #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ 58 59 #include <netinet/in.h> 60 #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ 61 #include <netinet/ip_fw.h> 62 63 #include <netpfil/ipfw/ip_fw_private.h> 64 #include <netpfil/ipfw/ip_fw_table.h> 65 66 /* 67 * Table has the following `type` concepts: 68 * 69 * `no.type` represents lookup key type (addr, ifp, uid, etc..) 70 * vmask represents bitmask of table values which are present at the moment. 71 * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old 72 * single-value-for-all approach. 73 */ 74 struct table_config { 75 struct named_object no; 76 uint8_t tflags; /* type flags */ 77 uint8_t locked; /* 1 if locked from changes */ 78 uint8_t linked; /* 1 if already linked */ 79 uint8_t ochanged; /* used by set swapping */ 80 uint8_t vshared; /* 1 if using shared value array */ 81 uint8_t spare[3]; 82 uint32_t count; /* Number of records */ 83 uint32_t limit; /* Max number of records */ 84 uint32_t vmask; /* bitmask with supported values */ 85 uint32_t ocount; /* used by set swapping */ 86 uint64_t gencnt; /* generation count */ 87 char tablename[64]; /* table name */ 88 struct table_algo *ta; /* Callbacks for given algo */ 89 void *astate; /* algorithm state */ 90 struct table_info ti_copy; /* data to put to table_info */ 91 struct namedobj_instance *vi; 92 }; 93 94 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, 95 struct table_config **tc); 96 static struct table_config *find_table(struct namedobj_instance *ni, 97 struct tid_info *ti); 98 static struct table_config *alloc_table_config(struct ip_fw_chain *ch, 99 struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags); 100 static void free_table_config(struct namedobj_instance *ni, 101 struct table_config *tc); 102 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, 103 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref); 104 static void link_table(struct ip_fw_chain *ch, struct table_config *tc); 105 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); 106 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, 107 struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc); 108 #define OP_ADD 1 109 #define OP_DEL 0 110 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, 111 struct sockopt_data *sd); 112 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, 113 ipfw_xtable_info *i); 114 static int dump_table_tentry(void *e, void *arg); 115 static int dump_table_xentry(void *e, void *arg); 116 117 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, 118 struct tid_info *b); 119 120 static int check_table_name(const char *name); 121 static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, 122 struct table_config *tc, struct table_info *ti, uint32_t count); 123 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); 124 125 static struct table_algo *find_table_algo(struct tables_config *tableconf, 126 struct tid_info *ti, char *name); 127 128 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); 129 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); 130 131 #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) 132 #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) 133 134 #define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ 135 136 void 137 rollback_toperation_state(struct ip_fw_chain *ch, void *object) 138 { 139 struct tables_config *tcfg; 140 struct op_state *os; 141 142 tcfg = CHAIN_TO_TCFG(ch); 143 TAILQ_FOREACH(os, &tcfg->state_list, next) 144 os->func(object, os); 145 } 146 147 void 148 add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) 149 { 150 struct tables_config *tcfg; 151 152 tcfg = CHAIN_TO_TCFG(ch); 153 TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); 154 } 155 156 void 157 del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) 158 { 159 struct tables_config *tcfg; 160 161 tcfg = CHAIN_TO_TCFG(ch); 162 TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); 163 } 164 165 void 166 tc_ref(struct table_config *tc) 167 { 168 169 tc->no.refcnt++; 170 } 171 172 void 173 tc_unref(struct table_config *tc) 174 { 175 176 tc->no.refcnt--; 177 } 178 179 static struct table_value * 180 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) 181 { 182 struct table_value *pval; 183 184 pval = (struct table_value *)ch->valuestate; 185 186 return (&pval[kidx]); 187 } 188 189 190 /* 191 * Checks if we're able to insert/update entry @tei into table 192 * w.r.t @tc limits. 193 * May alter @tei to indicate insertion error / insert 194 * options. 195 * 196 * Returns 0 if operation can be performed/ 197 */ 198 static int 199 check_table_limit(struct table_config *tc, struct tentry_info *tei) 200 { 201 202 if (tc->limit == 0 || tc->count < tc->limit) 203 return (0); 204 205 if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { 206 /* Notify userland on error cause */ 207 tei->flags |= TEI_FLAGS_LIMIT; 208 return (EFBIG); 209 } 210 211 /* 212 * We have UPDATE flag set. 213 * Permit updating record (if found), 214 * but restrict adding new one since we've 215 * already hit the limit. 216 */ 217 tei->flags |= TEI_FLAGS_DONTADD; 218 219 return (0); 220 } 221 222 /* 223 * Convert algorithm callback return code into 224 * one of pre-defined states known by userland. 225 */ 226 static void 227 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num) 228 { 229 int flag; 230 231 flag = 0; 232 233 switch (error) { 234 case 0: 235 if (op == OP_ADD && num != 0) 236 flag = TEI_FLAGS_ADDED; 237 if (op == OP_DEL) 238 flag = TEI_FLAGS_DELETED; 239 break; 240 case ENOENT: 241 flag = TEI_FLAGS_NOTFOUND; 242 break; 243 case EEXIST: 244 flag = TEI_FLAGS_EXISTS; 245 break; 246 default: 247 flag = TEI_FLAGS_ERROR; 248 } 249 250 tei->flags |= flag; 251 } 252 253 /* 254 * Creates and references table with default parameters. 255 * Saves table config, algo and allocated kidx info @ptc, @pta and 256 * @pkidx if non-zero. 257 * Used for table auto-creation to support old binaries. 258 * 259 * Returns 0 on success. 260 */ 261 static int 262 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, 263 uint16_t *pkidx) 264 { 265 ipfw_xtable_info xi; 266 int error; 267 268 memset(&xi, 0, sizeof(xi)); 269 /* Set default value mask for legacy clients */ 270 xi.vmask = IPFW_VTYPE_LEGACY; 271 272 error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1); 273 if (error != 0) 274 return (error); 275 276 return (0); 277 } 278 279 /* 280 * Find and reference existing table optionally 281 * creating new one. 282 * 283 * Saves found table config into @ptc. 284 * Note function may drop/acquire UH_WLOCK. 285 * Returns 0 if table was found/created and referenced 286 * or non-zero return code. 287 */ 288 static int 289 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, 290 struct tentry_info *tei, uint32_t count, int op, 291 struct table_config **ptc) 292 { 293 struct namedobj_instance *ni; 294 struct table_config *tc; 295 uint16_t kidx; 296 int error; 297 298 IPFW_UH_WLOCK_ASSERT(ch); 299 300 ni = CHAIN_TO_NI(ch); 301 tc = NULL; 302 if ((tc = find_table(ni, ti)) != NULL) { 303 /* check table type */ 304 if (tc->no.subtype != ti->type) 305 return (EINVAL); 306 307 if (tc->locked != 0) 308 return (EACCES); 309 310 /* Try to exit early on limit hit */ 311 if (op == OP_ADD && count == 1 && 312 check_table_limit(tc, tei) != 0) 313 return (EFBIG); 314 315 /* Reference and return */ 316 tc->no.refcnt++; 317 *ptc = tc; 318 return (0); 319 } 320 321 if (op == OP_DEL) 322 return (ESRCH); 323 324 /* Compatibility mode: create new table for old clients */ 325 if ((tei->flags & TEI_FLAGS_COMPAT) == 0) 326 return (ESRCH); 327 328 IPFW_UH_WUNLOCK(ch); 329 error = create_table_compat(ch, ti, &kidx); 330 IPFW_UH_WLOCK(ch); 331 332 if (error != 0) 333 return (error); 334 335 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); 336 KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx)); 337 338 /* OK, now we've got referenced table. */ 339 *ptc = tc; 340 return (0); 341 } 342 343 /* 344 * Rolls back already @added to @tc entries using state array @ta_buf_m. 345 * Assume the following layout: 346 * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases 347 * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) 348 * for storing deleted state 349 */ 350 static void 351 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, 352 struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, 353 uint32_t count, uint32_t added) 354 { 355 struct table_algo *ta; 356 struct tentry_info *ptei; 357 caddr_t v, vv; 358 size_t ta_buf_sz; 359 int error, i; 360 uint32_t num; 361 362 IPFW_UH_WLOCK_ASSERT(ch); 363 364 ta = tc->ta; 365 ta_buf_sz = ta->ta_buf_size; 366 v = ta_buf_m; 367 vv = v + count * ta_buf_sz; 368 for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { 369 ptei = &tei[i]; 370 if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { 371 372 /* 373 * We have old value stored by previous 374 * call in @ptei->value. Do add once again 375 * to restore it. 376 */ 377 error = ta->add(tc->astate, tinfo, ptei, v, &num); 378 KASSERT(error == 0, ("rollback UPDATE fail")); 379 KASSERT(num == 0, ("rollback UPDATE fail2")); 380 continue; 381 } 382 383 error = ta->prepare_del(ch, ptei, vv); 384 KASSERT(error == 0, ("pre-rollback INSERT failed")); 385 error = ta->del(tc->astate, tinfo, ptei, vv, &num); 386 KASSERT(error == 0, ("rollback INSERT failed")); 387 tc->count -= num; 388 } 389 } 390 391 /* 392 * Prepares add/del state for all @count entries in @tei. 393 * Uses either stack buffer (@ta_buf) or allocates a new one. 394 * Stores pointer to allocated buffer back to @ta_buf. 395 * 396 * Returns 0 on success. 397 */ 398 static int 399 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, 400 struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf) 401 { 402 caddr_t ta_buf_m, v; 403 size_t ta_buf_sz, sz; 404 struct tentry_info *ptei; 405 int error, i; 406 407 error = 0; 408 ta_buf_sz = ta->ta_buf_size; 409 if (count == 1) { 410 /* Single add/delete, use on-stack buffer */ 411 memset(*ta_buf, 0, TA_BUF_SZ); 412 ta_buf_m = *ta_buf; 413 } else { 414 415 /* 416 * Multiple adds/deletes, allocate larger buffer 417 * 418 * Note we need 2xcount buffer for add case: 419 * we have hold both ADD state 420 * and DELETE state (this may be needed 421 * if we need to rollback all changes) 422 */ 423 sz = count * ta_buf_sz; 424 ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP, 425 M_WAITOK | M_ZERO); 426 } 427 428 v = ta_buf_m; 429 for (i = 0; i < count; i++, v += ta_buf_sz) { 430 ptei = &tei[i]; 431 error = (op == OP_ADD) ? 432 ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); 433 434 /* 435 * Some syntax error (incorrect mask, or address, or 436 * anything). Return error regardless of atomicity 437 * settings. 438 */ 439 if (error != 0) 440 break; 441 } 442 443 *ta_buf = ta_buf_m; 444 return (error); 445 } 446 447 /* 448 * Flushes allocated state for each @count entries in @tei. 449 * Frees @ta_buf_m if differs from stack buffer @ta_buf. 450 */ 451 static void 452 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, 453 struct tentry_info *tei, uint32_t count, int rollback, 454 caddr_t ta_buf_m, caddr_t ta_buf) 455 { 456 caddr_t v; 457 struct tentry_info *ptei; 458 size_t ta_buf_sz; 459 int i; 460 461 ta_buf_sz = ta->ta_buf_size; 462 463 /* Run cleaning callback anyway */ 464 v = ta_buf_m; 465 for (i = 0; i < count; i++, v += ta_buf_sz) { 466 ptei = &tei[i]; 467 ta->flush_entry(ch, ptei, v); 468 if (ptei->ptv != NULL) { 469 free(ptei->ptv, M_IPFW); 470 ptei->ptv = NULL; 471 } 472 } 473 474 /* Clean up "deleted" state in case of rollback */ 475 if (rollback != 0) { 476 v = ta_buf_m + count * ta_buf_sz; 477 for (i = 0; i < count; i++, v += ta_buf_sz) 478 ta->flush_entry(ch, &tei[i], v); 479 } 480 481 if (ta_buf_m != ta_buf) 482 free(ta_buf_m, M_TEMP); 483 } 484 485 486 static void 487 rollback_add_entry(void *object, struct op_state *_state) 488 { 489 struct ip_fw_chain *ch; 490 struct tableop_state *ts; 491 492 ts = (struct tableop_state *)_state; 493 494 if (ts->tc != object && ts->ch != object) 495 return; 496 497 ch = ts->ch; 498 499 IPFW_UH_WLOCK_ASSERT(ch); 500 501 /* Call specifid unlockers */ 502 rollback_table_values(ts); 503 504 /* Indicate we've called */ 505 ts->modified = 1; 506 } 507 508 /* 509 * Adds/updates one or more entries in table @ti. 510 * 511 * Function may drop/reacquire UH wlock multiple times due to 512 * items alloc, algorithm callbacks (check_space), value linkage 513 * (new values, value storage realloc), etc.. 514 * Other processes like other adds (which may involve storage resize), 515 * table swaps (which changes table data and may change algo type), 516 * table modify (which may change value mask) may be executed 517 * simultaneously so we need to deal with it. 518 * 519 * The following approach was implemented: 520 * we have per-chain linked list, protected with UH lock. 521 * add_table_entry prepares special on-stack structure wthich is passed 522 * to its descendants. Users add this structure to this list before unlock. 523 * After performing needed operations and acquiring UH lock back, each user 524 * checks if structure has changed. If true, it rolls local state back and 525 * returns without error to the caller. 526 * add_table_entry() on its own checks if structure has changed and restarts 527 * its operation from the beginning (goto restart). 528 * 529 * Functions which are modifying fields of interest (currently 530 * resize_shared_value_storage() and swap_tables() ) 531 * traverses given list while holding UH lock immediately before 532 * performing their operations calling function provided be list entry 533 * ( currently rollback_add_entry ) which performs rollback for all necessary 534 * state and sets appropriate values in structure indicating rollback 535 * has happened. 536 * 537 * Algo interaction: 538 * Function references @ti first to ensure table won't 539 * disappear or change its type. 540 * After that, prepare_add callback is called for each @tei entry. 541 * Next, we try to add each entry under UH+WHLOCK 542 * using add() callback. 543 * Finally, we free all state by calling flush_entry callback 544 * for each @tei. 545 * 546 * Returns 0 on success. 547 */ 548 int 549 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, 550 struct tentry_info *tei, uint8_t flags, uint32_t count) 551 { 552 struct table_config *tc; 553 struct table_algo *ta; 554 uint16_t kidx; 555 int error, first_error, i, rollback; 556 uint32_t num, numadd; 557 struct tentry_info *ptei; 558 struct tableop_state ts; 559 char ta_buf[TA_BUF_SZ]; 560 caddr_t ta_buf_m, v; 561 562 memset(&ts, 0, sizeof(ts)); 563 ta = NULL; 564 IPFW_UH_WLOCK(ch); 565 566 /* 567 * Find and reference existing table. 568 */ 569 restart: 570 if (ts.modified != 0) { 571 IPFW_UH_WUNLOCK(ch); 572 flush_batch_buffer(ch, ta, tei, count, rollback, 573 ta_buf_m, ta_buf); 574 memset(&ts, 0, sizeof(ts)); 575 ta = NULL; 576 IPFW_UH_WLOCK(ch); 577 } 578 579 error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); 580 if (error != 0) { 581 IPFW_UH_WUNLOCK(ch); 582 return (error); 583 } 584 ta = tc->ta; 585 586 /* Fill in tablestate */ 587 ts.ch = ch; 588 ts.opstate.func = rollback_add_entry; 589 ts.tc = tc; 590 ts.vshared = tc->vshared; 591 ts.vmask = tc->vmask; 592 ts.ta = ta; 593 ts.tei = tei; 594 ts.count = count; 595 rollback = 0; 596 add_toperation_state(ch, &ts); 597 IPFW_UH_WUNLOCK(ch); 598 599 /* Allocate memory and prepare record(s) */ 600 /* Pass stack buffer by default */ 601 ta_buf_m = ta_buf; 602 error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); 603 604 IPFW_UH_WLOCK(ch); 605 del_toperation_state(ch, &ts); 606 /* Drop reference we've used in first search */ 607 tc->no.refcnt--; 608 609 /* Check prepare_batch_buffer() error */ 610 if (error != 0) 611 goto cleanup; 612 613 /* 614 * Check if table swap has happened. 615 * (so table algo might be changed). 616 * Restart operation to achieve consistent behavior. 617 */ 618 if (ts.modified != 0) 619 goto restart; 620 621 /* 622 * Link all values values to shared/per-table value array. 623 * 624 * May release/reacquire UH_WLOCK. 625 */ 626 error = ipfw_link_table_values(ch, &ts); 627 if (error != 0) 628 goto cleanup; 629 if (ts.modified != 0) 630 goto restart; 631 632 /* 633 * Ensure we are able to add all entries without additional 634 * memory allocations. May release/reacquire UH_WLOCK. 635 */ 636 kidx = tc->no.kidx; 637 error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); 638 if (error != 0) 639 goto cleanup; 640 if (ts.modified != 0) 641 goto restart; 642 643 /* We've got valid table in @tc. Let's try to add data */ 644 kidx = tc->no.kidx; 645 ta = tc->ta; 646 numadd = 0; 647 first_error = 0; 648 649 IPFW_WLOCK(ch); 650 651 v = ta_buf_m; 652 for (i = 0; i < count; i++, v += ta->ta_buf_size) { 653 ptei = &tei[i]; 654 num = 0; 655 /* check limit before adding */ 656 if ((error = check_table_limit(tc, ptei)) == 0) { 657 error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), 658 ptei, v, &num); 659 /* Set status flag to inform userland */ 660 store_tei_result(ptei, OP_ADD, error, num); 661 } 662 if (error == 0) { 663 /* Update number of records to ease limit checking */ 664 tc->count += num; 665 numadd += num; 666 continue; 667 } 668 669 if (first_error == 0) 670 first_error = error; 671 672 /* 673 * Some error have happened. Check our atomicity 674 * settings: continue if atomicity is not required, 675 * rollback changes otherwise. 676 */ 677 if ((flags & IPFW_CTF_ATOMIC) == 0) 678 continue; 679 680 rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), 681 tei, ta_buf_m, count, i); 682 683 rollback = 1; 684 break; 685 } 686 687 IPFW_WUNLOCK(ch); 688 689 ipfw_garbage_table_values(ch, tc, tei, count, rollback); 690 691 /* Permit post-add algorithm grow/rehash. */ 692 if (numadd != 0) 693 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); 694 695 /* Return first error to user, if any */ 696 error = first_error; 697 698 cleanup: 699 IPFW_UH_WUNLOCK(ch); 700 701 flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); 702 703 return (error); 704 } 705 706 /* 707 * Deletes one or more entries in table @ti. 708 * 709 * Returns 0 on success. 710 */ 711 int 712 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, 713 struct tentry_info *tei, uint8_t flags, uint32_t count) 714 { 715 struct table_config *tc; 716 struct table_algo *ta; 717 struct tentry_info *ptei; 718 uint16_t kidx; 719 int error, first_error, i; 720 uint32_t num, numdel; 721 char ta_buf[TA_BUF_SZ]; 722 caddr_t ta_buf_m, v; 723 724 /* 725 * Find and reference existing table. 726 */ 727 IPFW_UH_WLOCK(ch); 728 error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc); 729 if (error != 0) { 730 IPFW_UH_WUNLOCK(ch); 731 return (error); 732 } 733 ta = tc->ta; 734 IPFW_UH_WUNLOCK(ch); 735 736 /* Allocate memory and prepare record(s) */ 737 /* Pass stack buffer by default */ 738 ta_buf_m = ta_buf; 739 error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m); 740 if (error != 0) 741 goto cleanup; 742 743 IPFW_UH_WLOCK(ch); 744 745 /* Drop reference we've used in first search */ 746 tc->no.refcnt--; 747 748 /* 749 * Check if table algo is still the same. 750 * (changed ta may be the result of table swap). 751 */ 752 if (ta != tc->ta) { 753 IPFW_UH_WUNLOCK(ch); 754 error = EINVAL; 755 goto cleanup; 756 } 757 758 kidx = tc->no.kidx; 759 numdel = 0; 760 first_error = 0; 761 762 IPFW_WLOCK(ch); 763 v = ta_buf_m; 764 for (i = 0; i < count; i++, v += ta->ta_buf_size) { 765 ptei = &tei[i]; 766 num = 0; 767 error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, 768 &num); 769 /* Save state for userland */ 770 store_tei_result(ptei, OP_DEL, error, num); 771 if (error != 0 && first_error == 0) 772 first_error = error; 773 tc->count -= num; 774 numdel += num; 775 } 776 IPFW_WUNLOCK(ch); 777 778 /* Unlink non-used values */ 779 ipfw_garbage_table_values(ch, tc, tei, count, 0); 780 781 if (numdel != 0) { 782 /* Run post-del hook to permit shrinking */ 783 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); 784 } 785 786 IPFW_UH_WUNLOCK(ch); 787 788 /* Return first error to user, if any */ 789 error = first_error; 790 791 cleanup: 792 flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf); 793 794 return (error); 795 } 796 797 /* 798 * Ensure that table @tc has enough space to add @count entries without 799 * need for reallocation. 800 * 801 * Callbacks order: 802 * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. 803 * 804 * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. 805 * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage 806 * 3) modify (UH_WLOCK + WLOCK) - switch pointers 807 * 4) flush_modify (UH_WLOCK) - free state, if needed 808 * 809 * Returns 0 on success. 810 */ 811 static int 812 check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, 813 struct table_config *tc, struct table_info *ti, uint32_t count) 814 { 815 struct table_algo *ta; 816 uint64_t pflags; 817 char ta_buf[TA_BUF_SZ]; 818 int error; 819 820 IPFW_UH_WLOCK_ASSERT(ch); 821 822 error = 0; 823 ta = tc->ta; 824 if (ta->need_modify == NULL) 825 return (0); 826 827 /* Acquire reference not to loose @tc between locks/unlocks */ 828 tc->no.refcnt++; 829 830 /* 831 * TODO: think about avoiding race between large add/large delete 832 * operation on algorithm which implements shrinking along with 833 * growing. 834 */ 835 while (true) { 836 pflags = 0; 837 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { 838 error = 0; 839 break; 840 } 841 842 /* We have to shrink/grow table */ 843 if (ts != NULL) 844 add_toperation_state(ch, ts); 845 IPFW_UH_WUNLOCK(ch); 846 847 memset(&ta_buf, 0, sizeof(ta_buf)); 848 error = ta->prepare_mod(ta_buf, &pflags); 849 850 IPFW_UH_WLOCK(ch); 851 if (ts != NULL) 852 del_toperation_state(ch, ts); 853 854 if (error != 0) 855 break; 856 857 if (ts != NULL && ts->modified != 0) { 858 859 /* 860 * Swap operation has happened 861 * so we're currently operating on other 862 * table data. Stop doing this. 863 */ 864 ta->flush_mod(ta_buf); 865 break; 866 } 867 868 /* Check if we still need to alter table */ 869 ti = KIDX_TO_TI(ch, tc->no.kidx); 870 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { 871 IPFW_UH_WUNLOCK(ch); 872 873 /* 874 * Other thread has already performed resize. 875 * Flush our state and return. 876 */ 877 ta->flush_mod(ta_buf); 878 break; 879 } 880 881 error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); 882 if (error == 0) { 883 /* Do actual modification */ 884 IPFW_WLOCK(ch); 885 ta->modify(tc->astate, ti, ta_buf, pflags); 886 IPFW_WUNLOCK(ch); 887 } 888 889 /* Anyway, flush data and retry */ 890 ta->flush_mod(ta_buf); 891 } 892 893 tc->no.refcnt--; 894 return (error); 895 } 896 897 /* 898 * Adds or deletes record in table. 899 * Data layout (v0): 900 * Request: [ ip_fw3_opheader ipfw_table_xentry ] 901 * 902 * Returns 0 on success 903 */ 904 static int 905 manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 906 struct sockopt_data *sd) 907 { 908 ipfw_table_xentry *xent; 909 struct tentry_info tei; 910 struct tid_info ti; 911 struct table_value v; 912 int error, hdrlen, read; 913 914 hdrlen = offsetof(ipfw_table_xentry, k); 915 916 /* Check minimum header size */ 917 if (sd->valsize < (sizeof(*op3) + hdrlen)) 918 return (EINVAL); 919 920 read = sizeof(ip_fw3_opheader); 921 922 /* Check if xentry len field is valid */ 923 xent = (ipfw_table_xentry *)(op3 + 1); 924 if (xent->len < hdrlen || xent->len + read > sd->valsize) 925 return (EINVAL); 926 927 memset(&tei, 0, sizeof(tei)); 928 tei.paddr = &xent->k; 929 tei.masklen = xent->masklen; 930 ipfw_import_table_value_legacy(xent->value, &v); 931 tei.pvalue = &v; 932 /* Old requests compatibility */ 933 tei.flags = TEI_FLAGS_COMPAT; 934 if (xent->type == IPFW_TABLE_ADDR) { 935 if (xent->len - hdrlen == sizeof(in_addr_t)) 936 tei.subtype = AF_INET; 937 else 938 tei.subtype = AF_INET6; 939 } 940 941 memset(&ti, 0, sizeof(ti)); 942 ti.uidx = xent->tbl; 943 ti.type = xent->type; 944 945 error = (op3->opcode == IP_FW_TABLE_XADD) ? 946 add_table_entry(ch, &ti, &tei, 0, 1) : 947 del_table_entry(ch, &ti, &tei, 0, 1); 948 949 return (error); 950 } 951 952 /* 953 * Adds or deletes record in table. 954 * Data layout (v1)(current): 955 * Request: [ ipfw_obj_header 956 * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] 957 * ] 958 * 959 * Returns 0 on success 960 */ 961 static int 962 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 963 struct sockopt_data *sd) 964 { 965 ipfw_obj_tentry *tent, *ptent; 966 ipfw_obj_ctlv *ctlv; 967 ipfw_obj_header *oh; 968 struct tentry_info *ptei, tei, *tei_buf; 969 struct tid_info ti; 970 int error, i, kidx, read; 971 972 /* Check minimum header size */ 973 if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) 974 return (EINVAL); 975 976 /* Check if passed data is too long */ 977 if (sd->valsize != sd->kavail) 978 return (EINVAL); 979 980 oh = (ipfw_obj_header *)sd->kbuf; 981 982 /* Basic length checks for TLVs */ 983 if (oh->ntlv.head.length != sizeof(oh->ntlv)) 984 return (EINVAL); 985 986 read = sizeof(*oh); 987 988 ctlv = (ipfw_obj_ctlv *)(oh + 1); 989 if (ctlv->head.length + read != sd->valsize) 990 return (EINVAL); 991 992 read += sizeof(*ctlv); 993 tent = (ipfw_obj_tentry *)(ctlv + 1); 994 if (ctlv->count * sizeof(*tent) + read != sd->valsize) 995 return (EINVAL); 996 997 if (ctlv->count == 0) 998 return (0); 999 1000 /* 1001 * Mark entire buffer as "read". 1002 * This instructs sopt api write it back 1003 * after function return. 1004 */ 1005 ipfw_get_sopt_header(sd, sd->valsize); 1006 1007 /* Perform basic checks for each entry */ 1008 ptent = tent; 1009 kidx = tent->idx; 1010 for (i = 0; i < ctlv->count; i++, ptent++) { 1011 if (ptent->head.length != sizeof(*ptent)) 1012 return (EINVAL); 1013 if (ptent->idx != kidx) 1014 return (ENOTSUP); 1015 } 1016 1017 /* Convert data into kernel request objects */ 1018 objheader_to_ti(oh, &ti); 1019 ti.type = oh->ntlv.type; 1020 ti.uidx = kidx; 1021 1022 /* Use on-stack buffer for single add/del */ 1023 if (ctlv->count == 1) { 1024 memset(&tei, 0, sizeof(tei)); 1025 tei_buf = &tei; 1026 } else 1027 tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, 1028 M_WAITOK | M_ZERO); 1029 1030 ptei = tei_buf; 1031 ptent = tent; 1032 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { 1033 ptei->paddr = &ptent->k; 1034 ptei->subtype = ptent->subtype; 1035 ptei->masklen = ptent->masklen; 1036 if (ptent->head.flags & IPFW_TF_UPDATE) 1037 ptei->flags |= TEI_FLAGS_UPDATE; 1038 1039 ipfw_import_table_value_v1(&ptent->v.value); 1040 ptei->pvalue = (struct table_value *)&ptent->v.value; 1041 } 1042 1043 error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? 1044 add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : 1045 del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); 1046 1047 /* Translate result back to userland */ 1048 ptei = tei_buf; 1049 ptent = tent; 1050 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { 1051 if (ptei->flags & TEI_FLAGS_ADDED) 1052 ptent->result = IPFW_TR_ADDED; 1053 else if (ptei->flags & TEI_FLAGS_DELETED) 1054 ptent->result = IPFW_TR_DELETED; 1055 else if (ptei->flags & TEI_FLAGS_UPDATED) 1056 ptent->result = IPFW_TR_UPDATED; 1057 else if (ptei->flags & TEI_FLAGS_LIMIT) 1058 ptent->result = IPFW_TR_LIMIT; 1059 else if (ptei->flags & TEI_FLAGS_ERROR) 1060 ptent->result = IPFW_TR_ERROR; 1061 else if (ptei->flags & TEI_FLAGS_NOTFOUND) 1062 ptent->result = IPFW_TR_NOTFOUND; 1063 else if (ptei->flags & TEI_FLAGS_EXISTS) 1064 ptent->result = IPFW_TR_EXISTS; 1065 ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value); 1066 } 1067 1068 if (tei_buf != &tei) 1069 free(tei_buf, M_TEMP); 1070 1071 return (error); 1072 } 1073 1074 /* 1075 * Looks up an entry in given table. 1076 * Data layout (v0)(current): 1077 * Request: [ ipfw_obj_header ipfw_obj_tentry ] 1078 * Reply: [ ipfw_obj_header ipfw_obj_tentry ] 1079 * 1080 * Returns 0 on success 1081 */ 1082 static int 1083 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1084 struct sockopt_data *sd) 1085 { 1086 ipfw_obj_tentry *tent; 1087 ipfw_obj_header *oh; 1088 struct tid_info ti; 1089 struct table_config *tc; 1090 struct table_algo *ta; 1091 struct table_info *kti; 1092 struct table_value *pval; 1093 struct namedobj_instance *ni; 1094 int error; 1095 size_t sz; 1096 1097 /* Check minimum header size */ 1098 sz = sizeof(*oh) + sizeof(*tent); 1099 if (sd->valsize != sz) 1100 return (EINVAL); 1101 1102 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 1103 tent = (ipfw_obj_tentry *)(oh + 1); 1104 1105 /* Basic length checks for TLVs */ 1106 if (oh->ntlv.head.length != sizeof(oh->ntlv)) 1107 return (EINVAL); 1108 1109 objheader_to_ti(oh, &ti); 1110 ti.type = oh->ntlv.type; 1111 ti.uidx = tent->idx; 1112 1113 IPFW_UH_RLOCK(ch); 1114 ni = CHAIN_TO_NI(ch); 1115 1116 /* 1117 * Find existing table and check its type . 1118 */ 1119 ta = NULL; 1120 if ((tc = find_table(ni, &ti)) == NULL) { 1121 IPFW_UH_RUNLOCK(ch); 1122 return (ESRCH); 1123 } 1124 1125 /* check table type */ 1126 if (tc->no.subtype != ti.type) { 1127 IPFW_UH_RUNLOCK(ch); 1128 return (EINVAL); 1129 } 1130 1131 kti = KIDX_TO_TI(ch, tc->no.kidx); 1132 ta = tc->ta; 1133 1134 if (ta->find_tentry == NULL) 1135 return (ENOTSUP); 1136 1137 error = ta->find_tentry(tc->astate, kti, tent); 1138 if (error == 0) { 1139 pval = get_table_value(ch, tc, tent->v.kidx); 1140 ipfw_export_table_value_v1(pval, &tent->v.value); 1141 } 1142 IPFW_UH_RUNLOCK(ch); 1143 1144 return (error); 1145 } 1146 1147 /* 1148 * Flushes all entries or destroys given table. 1149 * Data layout (v0)(current): 1150 * Request: [ ipfw_obj_header ] 1151 * 1152 * Returns 0 on success 1153 */ 1154 static int 1155 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1156 struct sockopt_data *sd) 1157 { 1158 int error; 1159 struct _ipfw_obj_header *oh; 1160 struct tid_info ti; 1161 1162 if (sd->valsize != sizeof(*oh)) 1163 return (EINVAL); 1164 1165 oh = (struct _ipfw_obj_header *)op3; 1166 objheader_to_ti(oh, &ti); 1167 1168 if (op3->opcode == IP_FW_TABLE_XDESTROY) 1169 error = destroy_table(ch, &ti); 1170 else if (op3->opcode == IP_FW_TABLE_XFLUSH) 1171 error = flush_table(ch, &ti); 1172 else 1173 return (ENOTSUP); 1174 1175 return (error); 1176 } 1177 1178 static void 1179 restart_flush(void *object, struct op_state *_state) 1180 { 1181 struct tableop_state *ts; 1182 1183 ts = (struct tableop_state *)_state; 1184 1185 if (ts->tc != object) 1186 return; 1187 1188 /* Indicate we've called */ 1189 ts->modified = 1; 1190 } 1191 1192 /* 1193 * Flushes given table. 1194 * 1195 * Function create new table instance with the same 1196 * parameters, swaps it with old one and 1197 * flushes state without holding runtime WLOCK. 1198 * 1199 * Returns 0 on success. 1200 */ 1201 int 1202 flush_table(struct ip_fw_chain *ch, struct tid_info *ti) 1203 { 1204 struct namedobj_instance *ni; 1205 struct table_config *tc; 1206 struct table_algo *ta; 1207 struct table_info ti_old, ti_new, *tablestate; 1208 void *astate_old, *astate_new; 1209 char algostate[64], *pstate; 1210 struct tableop_state ts; 1211 int error, need_gc; 1212 uint16_t kidx; 1213 uint8_t tflags; 1214 1215 /* 1216 * Stage 1: save table algorithm. 1217 * Reference found table to ensure it won't disappear. 1218 */ 1219 IPFW_UH_WLOCK(ch); 1220 ni = CHAIN_TO_NI(ch); 1221 if ((tc = find_table(ni, ti)) == NULL) { 1222 IPFW_UH_WUNLOCK(ch); 1223 return (ESRCH); 1224 } 1225 need_gc = 0; 1226 astate_new = NULL; 1227 memset(&ti_new, 0, sizeof(ti_new)); 1228 restart: 1229 /* Set up swap handler */ 1230 memset(&ts, 0, sizeof(ts)); 1231 ts.opstate.func = restart_flush; 1232 ts.tc = tc; 1233 1234 ta = tc->ta; 1235 /* Do not flush readonly tables */ 1236 if ((ta->flags & TA_FLAG_READONLY) != 0) { 1237 IPFW_UH_WUNLOCK(ch); 1238 return (EACCES); 1239 } 1240 /* Save startup algo parameters */ 1241 if (ta->print_config != NULL) { 1242 ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), 1243 algostate, sizeof(algostate)); 1244 pstate = algostate; 1245 } else 1246 pstate = NULL; 1247 tflags = tc->tflags; 1248 tc->no.refcnt++; 1249 add_toperation_state(ch, &ts); 1250 IPFW_UH_WUNLOCK(ch); 1251 1252 /* 1253 * Stage 1.5: if this is not the first attempt, destroy previous state 1254 */ 1255 if (need_gc != 0) { 1256 ta->destroy(astate_new, &ti_new); 1257 need_gc = 0; 1258 } 1259 1260 /* 1261 * Stage 2: allocate new table instance using same algo. 1262 */ 1263 memset(&ti_new, 0, sizeof(struct table_info)); 1264 error = ta->init(ch, &astate_new, &ti_new, pstate, tflags); 1265 1266 /* 1267 * Stage 3: swap old state pointers with newly-allocated ones. 1268 * Decrease refcount. 1269 */ 1270 IPFW_UH_WLOCK(ch); 1271 tc->no.refcnt--; 1272 del_toperation_state(ch, &ts); 1273 1274 if (error != 0) { 1275 IPFW_UH_WUNLOCK(ch); 1276 return (error); 1277 } 1278 1279 /* 1280 * Restart operation if table swap has happened: 1281 * even if algo may be the same, algo init parameters 1282 * may change. Restart operation instead of doing 1283 * complex checks. 1284 */ 1285 if (ts.modified != 0) { 1286 /* Delay destroying data since we're holding UH lock */ 1287 need_gc = 1; 1288 goto restart; 1289 } 1290 1291 ni = CHAIN_TO_NI(ch); 1292 kidx = tc->no.kidx; 1293 tablestate = (struct table_info *)ch->tablestate; 1294 1295 IPFW_WLOCK(ch); 1296 ti_old = tablestate[kidx]; 1297 tablestate[kidx] = ti_new; 1298 IPFW_WUNLOCK(ch); 1299 1300 astate_old = tc->astate; 1301 tc->astate = astate_new; 1302 tc->ti_copy = ti_new; 1303 tc->count = 0; 1304 1305 /* Notify algo on real @ti address */ 1306 if (ta->change_ti != NULL) 1307 ta->change_ti(tc->astate, &tablestate[kidx]); 1308 1309 /* 1310 * Stage 4: unref values. 1311 */ 1312 ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old); 1313 IPFW_UH_WUNLOCK(ch); 1314 1315 /* 1316 * Stage 5: perform real flush/destroy. 1317 */ 1318 ta->destroy(astate_old, &ti_old); 1319 1320 return (0); 1321 } 1322 1323 /* 1324 * Swaps two tables. 1325 * Data layout (v0)(current): 1326 * Request: [ ipfw_obj_header ipfw_obj_ntlv ] 1327 * 1328 * Returns 0 on success 1329 */ 1330 static int 1331 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1332 struct sockopt_data *sd) 1333 { 1334 int error; 1335 struct _ipfw_obj_header *oh; 1336 struct tid_info ti_a, ti_b; 1337 1338 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) 1339 return (EINVAL); 1340 1341 oh = (struct _ipfw_obj_header *)op3; 1342 ntlv_to_ti(&oh->ntlv, &ti_a); 1343 ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); 1344 1345 error = swap_tables(ch, &ti_a, &ti_b); 1346 1347 return (error); 1348 } 1349 1350 /* 1351 * Swaps two tables of the same type/valtype. 1352 * 1353 * Checks if tables are compatible and limits 1354 * permits swap, than actually perform swap. 1355 * 1356 * Each table consists of 2 different parts: 1357 * config: 1358 * @tc (with name, set, kidx) and rule bindings, which is "stable". 1359 * number of items 1360 * table algo 1361 * runtime: 1362 * runtime data @ti (ch->tablestate) 1363 * runtime cache in @tc 1364 * algo-specific data (@tc->astate) 1365 * 1366 * So we switch: 1367 * all runtime data 1368 * number of items 1369 * table algo 1370 * 1371 * After that we call @ti change handler for each table. 1372 * 1373 * Note that referencing @tc won't protect tc->ta from change. 1374 * XXX: Do we need to restrict swap between locked tables? 1375 * XXX: Do we need to exchange ftype? 1376 * 1377 * Returns 0 on success. 1378 */ 1379 static int 1380 swap_tables(struct ip_fw_chain *ch, struct tid_info *a, 1381 struct tid_info *b) 1382 { 1383 struct namedobj_instance *ni; 1384 struct table_config *tc_a, *tc_b; 1385 struct table_algo *ta; 1386 struct table_info ti, *tablestate; 1387 void *astate; 1388 uint32_t count; 1389 1390 /* 1391 * Stage 1: find both tables and ensure they are of 1392 * the same type. 1393 */ 1394 IPFW_UH_WLOCK(ch); 1395 ni = CHAIN_TO_NI(ch); 1396 if ((tc_a = find_table(ni, a)) == NULL) { 1397 IPFW_UH_WUNLOCK(ch); 1398 return (ESRCH); 1399 } 1400 if ((tc_b = find_table(ni, b)) == NULL) { 1401 IPFW_UH_WUNLOCK(ch); 1402 return (ESRCH); 1403 } 1404 1405 /* It is very easy to swap between the same table */ 1406 if (tc_a == tc_b) { 1407 IPFW_UH_WUNLOCK(ch); 1408 return (0); 1409 } 1410 1411 /* Check type and value are the same */ 1412 if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) { 1413 IPFW_UH_WUNLOCK(ch); 1414 return (EINVAL); 1415 } 1416 1417 /* Check limits before swap */ 1418 if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || 1419 (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { 1420 IPFW_UH_WUNLOCK(ch); 1421 return (EFBIG); 1422 } 1423 1424 /* Check if one of the tables is readonly */ 1425 if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) { 1426 IPFW_UH_WUNLOCK(ch); 1427 return (EACCES); 1428 } 1429 1430 /* Notify we're going to swap */ 1431 rollback_toperation_state(ch, tc_a); 1432 rollback_toperation_state(ch, tc_b); 1433 1434 /* Everything is fine, prepare to swap */ 1435 tablestate = (struct table_info *)ch->tablestate; 1436 ti = tablestate[tc_a->no.kidx]; 1437 ta = tc_a->ta; 1438 astate = tc_a->astate; 1439 count = tc_a->count; 1440 1441 IPFW_WLOCK(ch); 1442 /* a <- b */ 1443 tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; 1444 tc_a->ta = tc_b->ta; 1445 tc_a->astate = tc_b->astate; 1446 tc_a->count = tc_b->count; 1447 /* b <- a */ 1448 tablestate[tc_b->no.kidx] = ti; 1449 tc_b->ta = ta; 1450 tc_b->astate = astate; 1451 tc_b->count = count; 1452 IPFW_WUNLOCK(ch); 1453 1454 /* Ensure tc.ti copies are in sync */ 1455 tc_a->ti_copy = tablestate[tc_a->no.kidx]; 1456 tc_b->ti_copy = tablestate[tc_b->no.kidx]; 1457 1458 /* Notify both tables on @ti change */ 1459 if (tc_a->ta->change_ti != NULL) 1460 tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); 1461 if (tc_b->ta->change_ti != NULL) 1462 tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); 1463 1464 IPFW_UH_WUNLOCK(ch); 1465 1466 return (0); 1467 } 1468 1469 /* 1470 * Destroys table specified by @ti. 1471 * Data layout (v0)(current): 1472 * Request: [ ip_fw3_opheader ] 1473 * 1474 * Returns 0 on success 1475 */ 1476 static int 1477 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) 1478 { 1479 struct namedobj_instance *ni; 1480 struct table_config *tc; 1481 1482 IPFW_UH_WLOCK(ch); 1483 1484 ni = CHAIN_TO_NI(ch); 1485 if ((tc = find_table(ni, ti)) == NULL) { 1486 IPFW_UH_WUNLOCK(ch); 1487 return (ESRCH); 1488 } 1489 1490 /* Do not permit destroying referenced tables */ 1491 if (tc->no.refcnt > 0) { 1492 IPFW_UH_WUNLOCK(ch); 1493 return (EBUSY); 1494 } 1495 1496 IPFW_WLOCK(ch); 1497 unlink_table(ch, tc); 1498 IPFW_WUNLOCK(ch); 1499 1500 /* Free obj index */ 1501 if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) 1502 printf("Error unlinking kidx %d from table %s\n", 1503 tc->no.kidx, tc->tablename); 1504 1505 /* Unref values used in tables while holding UH lock */ 1506 ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy); 1507 IPFW_UH_WUNLOCK(ch); 1508 1509 free_table_config(ni, tc); 1510 1511 return (0); 1512 } 1513 1514 static uint32_t 1515 roundup2p(uint32_t v) 1516 { 1517 1518 v--; 1519 v |= v >> 1; 1520 v |= v >> 2; 1521 v |= v >> 4; 1522 v |= v >> 8; 1523 v |= v >> 16; 1524 v++; 1525 1526 return (v); 1527 } 1528 1529 /* 1530 * Grow tables index. 1531 * 1532 * Returns 0 on success. 1533 */ 1534 int 1535 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) 1536 { 1537 unsigned int ntables_old, tbl; 1538 struct namedobj_instance *ni; 1539 void *new_idx, *old_tablestate, *tablestate; 1540 struct table_info *ti; 1541 struct table_config *tc; 1542 int i, new_blocks; 1543 1544 /* Check new value for validity */ 1545 if (ntables == 0) 1546 return (EINVAL); 1547 if (ntables > IPFW_TABLES_MAX) 1548 ntables = IPFW_TABLES_MAX; 1549 /* Alight to nearest power of 2 */ 1550 ntables = (unsigned int)roundup2p(ntables); 1551 1552 /* Allocate new pointers */ 1553 tablestate = malloc(ntables * sizeof(struct table_info), 1554 M_IPFW, M_WAITOK | M_ZERO); 1555 1556 ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); 1557 1558 IPFW_UH_WLOCK(ch); 1559 1560 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; 1561 ni = CHAIN_TO_NI(ch); 1562 1563 /* Temporary restrict decreasing max_tables */ 1564 if (ntables < V_fw_tables_max) { 1565 1566 /* 1567 * FIXME: Check if we really can shrink 1568 */ 1569 IPFW_UH_WUNLOCK(ch); 1570 return (EINVAL); 1571 } 1572 1573 /* Copy table info/indices */ 1574 memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); 1575 ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); 1576 1577 IPFW_WLOCK(ch); 1578 1579 /* Change pointers */ 1580 old_tablestate = ch->tablestate; 1581 ch->tablestate = tablestate; 1582 ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); 1583 1584 ntables_old = V_fw_tables_max; 1585 V_fw_tables_max = ntables; 1586 1587 IPFW_WUNLOCK(ch); 1588 1589 /* Notify all consumers that their @ti pointer has changed */ 1590 ti = (struct table_info *)ch->tablestate; 1591 for (i = 0; i < tbl; i++, ti++) { 1592 if (ti->lookup == NULL) 1593 continue; 1594 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); 1595 if (tc == NULL || tc->ta->change_ti == NULL) 1596 continue; 1597 1598 tc->ta->change_ti(tc->astate, ti); 1599 } 1600 1601 IPFW_UH_WUNLOCK(ch); 1602 1603 /* Free old pointers */ 1604 free(old_tablestate, M_IPFW); 1605 ipfw_objhash_bitmap_free(new_idx, new_blocks); 1606 1607 return (0); 1608 } 1609 1610 /* 1611 * Lookup table's named object by its @kidx. 1612 */ 1613 struct named_object * 1614 ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx) 1615 { 1616 1617 return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx)); 1618 } 1619 1620 /* 1621 * Take reference to table specified in @ntlv. 1622 * On success return its @kidx. 1623 */ 1624 int 1625 ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx) 1626 { 1627 struct tid_info ti; 1628 struct table_config *tc; 1629 int error; 1630 1631 IPFW_UH_WLOCK_ASSERT(ch); 1632 1633 ntlv_to_ti(ntlv, &ti); 1634 error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc); 1635 if (error != 0) 1636 return (error); 1637 1638 if (tc == NULL) 1639 return (ESRCH); 1640 1641 tc_ref(tc); 1642 *kidx = tc->no.kidx; 1643 1644 return (0); 1645 } 1646 1647 void 1648 ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx) 1649 { 1650 1651 struct namedobj_instance *ni; 1652 struct named_object *no; 1653 1654 IPFW_UH_WLOCK_ASSERT(ch); 1655 ni = CHAIN_TO_NI(ch); 1656 no = ipfw_objhash_lookup_kidx(ni, kidx); 1657 KASSERT(no != NULL, ("Table with index %d not found", kidx)); 1658 no->refcnt--; 1659 } 1660 1661 /* 1662 * Lookup an arbitrary key @paddr of length @plen in table @tbl. 1663 * Stores found value in @val. 1664 * 1665 * Returns 1 if key was found. 1666 */ 1667 int 1668 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, 1669 void *paddr, uint32_t *val) 1670 { 1671 struct table_info *ti; 1672 1673 ti = KIDX_TO_TI(ch, tbl); 1674 1675 return (ti->lookup(ti, paddr, plen, val)); 1676 } 1677 1678 /* 1679 * Info/List/dump support for tables. 1680 * 1681 */ 1682 1683 /* 1684 * High-level 'get' cmds sysctl handlers 1685 */ 1686 1687 /* 1688 * Lists all tables currently available in kernel. 1689 * Data layout (v0)(current): 1690 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size 1691 * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] 1692 * 1693 * Returns 0 on success 1694 */ 1695 static int 1696 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1697 struct sockopt_data *sd) 1698 { 1699 struct _ipfw_obj_lheader *olh; 1700 int error; 1701 1702 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); 1703 if (olh == NULL) 1704 return (EINVAL); 1705 if (sd->valsize < olh->size) 1706 return (EINVAL); 1707 1708 IPFW_UH_RLOCK(ch); 1709 error = export_tables(ch, olh, sd); 1710 IPFW_UH_RUNLOCK(ch); 1711 1712 return (error); 1713 } 1714 1715 /* 1716 * Store table info to buffer provided by @sd. 1717 * Data layout (v0)(current): 1718 * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] 1719 * Reply: [ ipfw_obj_header ipfw_xtable_info ] 1720 * 1721 * Returns 0 on success. 1722 */ 1723 static int 1724 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1725 struct sockopt_data *sd) 1726 { 1727 struct _ipfw_obj_header *oh; 1728 struct table_config *tc; 1729 struct tid_info ti; 1730 size_t sz; 1731 1732 sz = sizeof(*oh) + sizeof(ipfw_xtable_info); 1733 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 1734 if (oh == NULL) 1735 return (EINVAL); 1736 1737 objheader_to_ti(oh, &ti); 1738 1739 IPFW_UH_RLOCK(ch); 1740 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { 1741 IPFW_UH_RUNLOCK(ch); 1742 return (ESRCH); 1743 } 1744 1745 export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); 1746 IPFW_UH_RUNLOCK(ch); 1747 1748 return (0); 1749 } 1750 1751 /* 1752 * Modifies existing table. 1753 * Data layout (v0)(current): 1754 * Request: [ ipfw_obj_header ipfw_xtable_info ] 1755 * 1756 * Returns 0 on success 1757 */ 1758 static int 1759 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1760 struct sockopt_data *sd) 1761 { 1762 struct _ipfw_obj_header *oh; 1763 ipfw_xtable_info *i; 1764 char *tname; 1765 struct tid_info ti; 1766 struct namedobj_instance *ni; 1767 struct table_config *tc; 1768 1769 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) 1770 return (EINVAL); 1771 1772 oh = (struct _ipfw_obj_header *)sd->kbuf; 1773 i = (ipfw_xtable_info *)(oh + 1); 1774 1775 /* 1776 * Verify user-supplied strings. 1777 * Check for null-terminated/zero-length strings/ 1778 */ 1779 tname = oh->ntlv.name; 1780 if (check_table_name(tname) != 0) 1781 return (EINVAL); 1782 1783 objheader_to_ti(oh, &ti); 1784 ti.type = i->type; 1785 1786 IPFW_UH_WLOCK(ch); 1787 ni = CHAIN_TO_NI(ch); 1788 if ((tc = find_table(ni, &ti)) == NULL) { 1789 IPFW_UH_WUNLOCK(ch); 1790 return (ESRCH); 1791 } 1792 1793 /* Do not support any modifications for readonly tables */ 1794 if ((tc->ta->flags & TA_FLAG_READONLY) != 0) { 1795 IPFW_UH_WUNLOCK(ch); 1796 return (EACCES); 1797 } 1798 1799 if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) 1800 tc->limit = i->limit; 1801 if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) 1802 tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); 1803 IPFW_UH_WUNLOCK(ch); 1804 1805 return (0); 1806 } 1807 1808 /* 1809 * Creates new table. 1810 * Data layout (v0)(current): 1811 * Request: [ ipfw_obj_header ipfw_xtable_info ] 1812 * 1813 * Returns 0 on success 1814 */ 1815 static int 1816 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 1817 struct sockopt_data *sd) 1818 { 1819 struct _ipfw_obj_header *oh; 1820 ipfw_xtable_info *i; 1821 char *tname, *aname; 1822 struct tid_info ti; 1823 struct namedobj_instance *ni; 1824 1825 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) 1826 return (EINVAL); 1827 1828 oh = (struct _ipfw_obj_header *)sd->kbuf; 1829 i = (ipfw_xtable_info *)(oh + 1); 1830 1831 /* 1832 * Verify user-supplied strings. 1833 * Check for null-terminated/zero-length strings/ 1834 */ 1835 tname = oh->ntlv.name; 1836 aname = i->algoname; 1837 if (check_table_name(tname) != 0 || 1838 strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) 1839 return (EINVAL); 1840 1841 if (aname[0] == '\0') { 1842 /* Use default algorithm */ 1843 aname = NULL; 1844 } 1845 1846 objheader_to_ti(oh, &ti); 1847 ti.type = i->type; 1848 1849 ni = CHAIN_TO_NI(ch); 1850 1851 IPFW_UH_RLOCK(ch); 1852 if (find_table(ni, &ti) != NULL) { 1853 IPFW_UH_RUNLOCK(ch); 1854 return (EEXIST); 1855 } 1856 IPFW_UH_RUNLOCK(ch); 1857 1858 return (create_table_internal(ch, &ti, aname, i, NULL, 0)); 1859 } 1860 1861 /* 1862 * Creates new table based on @ti and @aname. 1863 * 1864 * Assume @aname to be checked and valid. 1865 * Stores allocated table kidx inside @pkidx (if non-NULL). 1866 * Reference created table if @compat is non-zero. 1867 * 1868 * Returns 0 on success. 1869 */ 1870 static int 1871 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, 1872 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat) 1873 { 1874 struct namedobj_instance *ni; 1875 struct table_config *tc, *tc_new, *tmp; 1876 struct table_algo *ta; 1877 uint16_t kidx; 1878 1879 ni = CHAIN_TO_NI(ch); 1880 1881 ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); 1882 if (ta == NULL) 1883 return (ENOTSUP); 1884 1885 tc = alloc_table_config(ch, ti, ta, aname, i->tflags); 1886 if (tc == NULL) 1887 return (ENOMEM); 1888 1889 tc->vmask = i->vmask; 1890 tc->limit = i->limit; 1891 if (ta->flags & TA_FLAG_READONLY) 1892 tc->locked = 1; 1893 else 1894 tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; 1895 1896 IPFW_UH_WLOCK(ch); 1897 1898 /* Check if table has been already created */ 1899 tc_new = find_table(ni, ti); 1900 if (tc_new != NULL) { 1901 1902 /* 1903 * Compat: do not fail if we're 1904 * requesting to create existing table 1905 * which has the same type 1906 */ 1907 if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { 1908 IPFW_UH_WUNLOCK(ch); 1909 free_table_config(ni, tc); 1910 return (EEXIST); 1911 } 1912 1913 /* Exchange tc and tc_new for proper refcounting & freeing */ 1914 tmp = tc; 1915 tc = tc_new; 1916 tc_new = tmp; 1917 } else { 1918 /* New table */ 1919 if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { 1920 IPFW_UH_WUNLOCK(ch); 1921 printf("Unable to allocate table index." 1922 " Consider increasing net.inet.ip.fw.tables_max"); 1923 free_table_config(ni, tc); 1924 return (EBUSY); 1925 } 1926 tc->no.kidx = kidx; 1927 tc->no.etlv = IPFW_TLV_TBL_NAME; 1928 1929 link_table(ch, tc); 1930 } 1931 1932 if (compat != 0) 1933 tc->no.refcnt++; 1934 if (pkidx != NULL) 1935 *pkidx = tc->no.kidx; 1936 1937 IPFW_UH_WUNLOCK(ch); 1938 1939 if (tc_new != NULL) 1940 free_table_config(ni, tc_new); 1941 1942 return (0); 1943 } 1944 1945 static void 1946 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) 1947 { 1948 1949 memset(ti, 0, sizeof(struct tid_info)); 1950 ti->set = ntlv->set; 1951 ti->uidx = ntlv->idx; 1952 ti->tlvs = ntlv; 1953 ti->tlen = ntlv->head.length; 1954 } 1955 1956 static void 1957 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) 1958 { 1959 1960 ntlv_to_ti(&oh->ntlv, ti); 1961 } 1962 1963 struct namedobj_instance * 1964 ipfw_get_table_objhash(struct ip_fw_chain *ch) 1965 { 1966 1967 return (CHAIN_TO_NI(ch)); 1968 } 1969 1970 /* 1971 * Exports basic table info as name TLV. 1972 * Used inside dump_static_rules() to provide info 1973 * about all tables referenced by current ruleset. 1974 * 1975 * Returns 0 on success. 1976 */ 1977 int 1978 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, 1979 struct sockopt_data *sd) 1980 { 1981 struct namedobj_instance *ni; 1982 struct named_object *no; 1983 ipfw_obj_ntlv *ntlv; 1984 1985 ni = CHAIN_TO_NI(ch); 1986 1987 no = ipfw_objhash_lookup_kidx(ni, kidx); 1988 KASSERT(no != NULL, ("invalid table kidx passed")); 1989 1990 ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); 1991 if (ntlv == NULL) 1992 return (ENOMEM); 1993 1994 ntlv->head.type = IPFW_TLV_TBL_NAME; 1995 ntlv->head.length = sizeof(*ntlv); 1996 ntlv->idx = no->kidx; 1997 strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); 1998 1999 return (0); 2000 } 2001 2002 struct dump_args { 2003 struct ip_fw_chain *ch; 2004 struct table_info *ti; 2005 struct table_config *tc; 2006 struct sockopt_data *sd; 2007 uint32_t cnt; 2008 uint16_t uidx; 2009 int error; 2010 uint32_t size; 2011 ipfw_table_entry *ent; 2012 ta_foreach_f *f; 2013 void *farg; 2014 ipfw_obj_tentry tent; 2015 }; 2016 2017 static int 2018 count_ext_entries(void *e, void *arg) 2019 { 2020 struct dump_args *da; 2021 2022 da = (struct dump_args *)arg; 2023 da->cnt++; 2024 2025 return (0); 2026 } 2027 2028 /* 2029 * Gets number of items from table either using 2030 * internal counter or calling algo callback for 2031 * externally-managed tables. 2032 * 2033 * Returns number of records. 2034 */ 2035 static uint32_t 2036 table_get_count(struct ip_fw_chain *ch, struct table_config *tc) 2037 { 2038 struct table_info *ti; 2039 struct table_algo *ta; 2040 struct dump_args da; 2041 2042 ti = KIDX_TO_TI(ch, tc->no.kidx); 2043 ta = tc->ta; 2044 2045 /* Use internal counter for self-managed tables */ 2046 if ((ta->flags & TA_FLAG_READONLY) == 0) 2047 return (tc->count); 2048 2049 /* Use callback to quickly get number of items */ 2050 if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0) 2051 return (ta->get_count(tc->astate, ti)); 2052 2053 /* Count number of iterms ourselves */ 2054 memset(&da, 0, sizeof(da)); 2055 ta->foreach(tc->astate, ti, count_ext_entries, &da); 2056 2057 return (da.cnt); 2058 } 2059 2060 /* 2061 * Exports table @tc info into standard ipfw_xtable_info format. 2062 */ 2063 static void 2064 export_table_info(struct ip_fw_chain *ch, struct table_config *tc, 2065 ipfw_xtable_info *i) 2066 { 2067 struct table_info *ti; 2068 struct table_algo *ta; 2069 2070 i->type = tc->no.subtype; 2071 i->tflags = tc->tflags; 2072 i->vmask = tc->vmask; 2073 i->set = tc->no.set; 2074 i->kidx = tc->no.kidx; 2075 i->refcnt = tc->no.refcnt; 2076 i->count = table_get_count(ch, tc); 2077 i->limit = tc->limit; 2078 i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; 2079 i->size = i->count * sizeof(ipfw_obj_tentry); 2080 i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); 2081 strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); 2082 ti = KIDX_TO_TI(ch, tc->no.kidx); 2083 ta = tc->ta; 2084 if (ta->print_config != NULL) { 2085 /* Use algo function to print table config to string */ 2086 ta->print_config(tc->astate, ti, i->algoname, 2087 sizeof(i->algoname)); 2088 } else 2089 strlcpy(i->algoname, ta->name, sizeof(i->algoname)); 2090 /* Dump algo-specific data, if possible */ 2091 if (ta->dump_tinfo != NULL) { 2092 ta->dump_tinfo(tc->astate, ti, &i->ta_info); 2093 i->ta_info.flags |= IPFW_TATFLAGS_DATA; 2094 } 2095 } 2096 2097 struct dump_table_args { 2098 struct ip_fw_chain *ch; 2099 struct sockopt_data *sd; 2100 }; 2101 2102 static int 2103 export_table_internal(struct namedobj_instance *ni, struct named_object *no, 2104 void *arg) 2105 { 2106 ipfw_xtable_info *i; 2107 struct dump_table_args *dta; 2108 2109 dta = (struct dump_table_args *)arg; 2110 2111 i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); 2112 KASSERT(i != NULL, ("previously checked buffer is not enough")); 2113 2114 export_table_info(dta->ch, (struct table_config *)no, i); 2115 return (0); 2116 } 2117 2118 /* 2119 * Export all tables as ipfw_xtable_info structures to 2120 * storage provided by @sd. 2121 * 2122 * If supplied buffer is too small, fills in required size 2123 * and returns ENOMEM. 2124 * Returns 0 on success. 2125 */ 2126 static int 2127 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, 2128 struct sockopt_data *sd) 2129 { 2130 uint32_t size; 2131 uint32_t count; 2132 struct dump_table_args dta; 2133 2134 count = ipfw_objhash_count(CHAIN_TO_NI(ch)); 2135 size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); 2136 2137 /* Fill in header regadless of buffer size */ 2138 olh->count = count; 2139 olh->objsize = sizeof(ipfw_xtable_info); 2140 2141 if (size > olh->size) { 2142 olh->size = size; 2143 return (ENOMEM); 2144 } 2145 2146 olh->size = size; 2147 2148 dta.ch = ch; 2149 dta.sd = sd; 2150 2151 ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); 2152 2153 return (0); 2154 } 2155 2156 /* 2157 * Dumps all table data 2158 * Data layout (v1)(current): 2159 * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size 2160 * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] 2161 * 2162 * Returns 0 on success 2163 */ 2164 static int 2165 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2166 struct sockopt_data *sd) 2167 { 2168 struct _ipfw_obj_header *oh; 2169 ipfw_xtable_info *i; 2170 struct tid_info ti; 2171 struct table_config *tc; 2172 struct table_algo *ta; 2173 struct dump_args da; 2174 uint32_t sz; 2175 2176 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); 2177 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); 2178 if (oh == NULL) 2179 return (EINVAL); 2180 2181 i = (ipfw_xtable_info *)(oh + 1); 2182 objheader_to_ti(oh, &ti); 2183 2184 IPFW_UH_RLOCK(ch); 2185 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { 2186 IPFW_UH_RUNLOCK(ch); 2187 return (ESRCH); 2188 } 2189 export_table_info(ch, tc, i); 2190 2191 if (sd->valsize < i->size) { 2192 2193 /* 2194 * Submitted buffer size is not enough. 2195 * WE've already filled in @i structure with 2196 * relevant table info including size, so we 2197 * can return. Buffer will be flushed automatically. 2198 */ 2199 IPFW_UH_RUNLOCK(ch); 2200 return (ENOMEM); 2201 } 2202 2203 /* 2204 * Do the actual dump in eXtended format 2205 */ 2206 memset(&da, 0, sizeof(da)); 2207 da.ch = ch; 2208 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2209 da.tc = tc; 2210 da.sd = sd; 2211 2212 ta = tc->ta; 2213 2214 ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); 2215 IPFW_UH_RUNLOCK(ch); 2216 2217 return (da.error); 2218 } 2219 2220 /* 2221 * Dumps all table data 2222 * Data layout (version 0)(legacy): 2223 * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() 2224 * Reply: [ ipfw_xtable ipfw_table_xentry x N ] 2225 * 2226 * Returns 0 on success 2227 */ 2228 static int 2229 dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2230 struct sockopt_data *sd) 2231 { 2232 ipfw_xtable *xtbl; 2233 struct tid_info ti; 2234 struct table_config *tc; 2235 struct table_algo *ta; 2236 struct dump_args da; 2237 size_t sz, count; 2238 2239 xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); 2240 if (xtbl == NULL) 2241 return (EINVAL); 2242 2243 memset(&ti, 0, sizeof(ti)); 2244 ti.uidx = xtbl->tbl; 2245 2246 IPFW_UH_RLOCK(ch); 2247 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { 2248 IPFW_UH_RUNLOCK(ch); 2249 return (0); 2250 } 2251 count = table_get_count(ch, tc); 2252 sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); 2253 2254 xtbl->cnt = count; 2255 xtbl->size = sz; 2256 xtbl->type = tc->no.subtype; 2257 xtbl->tbl = ti.uidx; 2258 2259 if (sd->valsize < sz) { 2260 2261 /* 2262 * Submitted buffer size is not enough. 2263 * WE've already filled in @i structure with 2264 * relevant table info including size, so we 2265 * can return. Buffer will be flushed automatically. 2266 */ 2267 IPFW_UH_RUNLOCK(ch); 2268 return (ENOMEM); 2269 } 2270 2271 /* Do the actual dump in eXtended format */ 2272 memset(&da, 0, sizeof(da)); 2273 da.ch = ch; 2274 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2275 da.tc = tc; 2276 da.sd = sd; 2277 2278 ta = tc->ta; 2279 2280 ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); 2281 IPFW_UH_RUNLOCK(ch); 2282 2283 return (0); 2284 } 2285 2286 /* 2287 * Legacy function to retrieve number of items in table. 2288 */ 2289 static int 2290 get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2291 struct sockopt_data *sd) 2292 { 2293 uint32_t *tbl; 2294 struct tid_info ti; 2295 size_t sz; 2296 int error; 2297 2298 sz = sizeof(*op3) + sizeof(uint32_t); 2299 op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz); 2300 if (op3 == NULL) 2301 return (EINVAL); 2302 2303 tbl = (uint32_t *)(op3 + 1); 2304 memset(&ti, 0, sizeof(ti)); 2305 ti.uidx = *tbl; 2306 IPFW_UH_RLOCK(ch); 2307 error = ipfw_count_xtable(ch, &ti, tbl); 2308 IPFW_UH_RUNLOCK(ch); 2309 return (error); 2310 } 2311 2312 /* 2313 * Legacy IP_FW_TABLE_GETSIZE handler 2314 */ 2315 int 2316 ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) 2317 { 2318 struct table_config *tc; 2319 2320 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) 2321 return (ESRCH); 2322 *cnt = table_get_count(ch, tc); 2323 return (0); 2324 } 2325 2326 /* 2327 * Legacy IP_FW_TABLE_XGETSIZE handler 2328 */ 2329 int 2330 ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) 2331 { 2332 struct table_config *tc; 2333 uint32_t count; 2334 2335 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { 2336 *cnt = 0; 2337 return (0); /* 'table all list' requires success */ 2338 } 2339 2340 count = table_get_count(ch, tc); 2341 *cnt = count * sizeof(ipfw_table_xentry); 2342 if (count > 0) 2343 *cnt += sizeof(ipfw_xtable); 2344 return (0); 2345 } 2346 2347 static int 2348 dump_table_entry(void *e, void *arg) 2349 { 2350 struct dump_args *da; 2351 struct table_config *tc; 2352 struct table_algo *ta; 2353 ipfw_table_entry *ent; 2354 struct table_value *pval; 2355 int error; 2356 2357 da = (struct dump_args *)arg; 2358 2359 tc = da->tc; 2360 ta = tc->ta; 2361 2362 /* Out of memory, returning */ 2363 if (da->cnt == da->size) 2364 return (1); 2365 ent = da->ent++; 2366 ent->tbl = da->uidx; 2367 da->cnt++; 2368 2369 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); 2370 if (error != 0) 2371 return (error); 2372 2373 ent->addr = da->tent.k.addr.s_addr; 2374 ent->masklen = da->tent.masklen; 2375 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); 2376 ent->value = ipfw_export_table_value_legacy(pval); 2377 2378 return (0); 2379 } 2380 2381 /* 2382 * Dumps table in pre-8.1 legacy format. 2383 */ 2384 int 2385 ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, 2386 ipfw_table *tbl) 2387 { 2388 struct table_config *tc; 2389 struct table_algo *ta; 2390 struct dump_args da; 2391 2392 tbl->cnt = 0; 2393 2394 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) 2395 return (0); /* XXX: We should return ESRCH */ 2396 2397 ta = tc->ta; 2398 2399 /* This dump format supports IPv4 only */ 2400 if (tc->no.subtype != IPFW_TABLE_ADDR) 2401 return (0); 2402 2403 memset(&da, 0, sizeof(da)); 2404 da.ch = ch; 2405 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2406 da.tc = tc; 2407 da.ent = &tbl->ent[0]; 2408 da.size = tbl->size; 2409 2410 tbl->cnt = 0; 2411 ta->foreach(tc->astate, da.ti, dump_table_entry, &da); 2412 tbl->cnt = da.cnt; 2413 2414 return (0); 2415 } 2416 2417 /* 2418 * Dumps table entry in eXtended format (v1)(current). 2419 */ 2420 static int 2421 dump_table_tentry(void *e, void *arg) 2422 { 2423 struct dump_args *da; 2424 struct table_config *tc; 2425 struct table_algo *ta; 2426 struct table_value *pval; 2427 ipfw_obj_tentry *tent; 2428 int error; 2429 2430 da = (struct dump_args *)arg; 2431 2432 tc = da->tc; 2433 ta = tc->ta; 2434 2435 tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); 2436 /* Out of memory, returning */ 2437 if (tent == NULL) { 2438 da->error = ENOMEM; 2439 return (1); 2440 } 2441 tent->head.length = sizeof(ipfw_obj_tentry); 2442 tent->idx = da->uidx; 2443 2444 error = ta->dump_tentry(tc->astate, da->ti, e, tent); 2445 if (error != 0) 2446 return (error); 2447 2448 pval = get_table_value(da->ch, da->tc, tent->v.kidx); 2449 ipfw_export_table_value_v1(pval, &tent->v.value); 2450 2451 return (0); 2452 } 2453 2454 /* 2455 * Dumps table entry in eXtended format (v0). 2456 */ 2457 static int 2458 dump_table_xentry(void *e, void *arg) 2459 { 2460 struct dump_args *da; 2461 struct table_config *tc; 2462 struct table_algo *ta; 2463 ipfw_table_xentry *xent; 2464 ipfw_obj_tentry *tent; 2465 struct table_value *pval; 2466 int error; 2467 2468 da = (struct dump_args *)arg; 2469 2470 tc = da->tc; 2471 ta = tc->ta; 2472 2473 xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); 2474 /* Out of memory, returning */ 2475 if (xent == NULL) 2476 return (1); 2477 xent->len = sizeof(ipfw_table_xentry); 2478 xent->tbl = da->uidx; 2479 2480 memset(&da->tent, 0, sizeof(da->tent)); 2481 tent = &da->tent; 2482 error = ta->dump_tentry(tc->astate, da->ti, e, tent); 2483 if (error != 0) 2484 return (error); 2485 2486 /* Convert current format to previous one */ 2487 xent->masklen = tent->masklen; 2488 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); 2489 xent->value = ipfw_export_table_value_legacy(pval); 2490 /* Apply some hacks */ 2491 if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) { 2492 xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; 2493 xent->flags = IPFW_TCF_INET; 2494 } else 2495 memcpy(&xent->k, &tent->k, sizeof(xent->k)); 2496 2497 return (0); 2498 } 2499 2500 /* 2501 * Helper function to export table algo data 2502 * to tentry format before calling user function. 2503 * 2504 * Returns 0 on success. 2505 */ 2506 static int 2507 prepare_table_tentry(void *e, void *arg) 2508 { 2509 struct dump_args *da; 2510 struct table_config *tc; 2511 struct table_algo *ta; 2512 int error; 2513 2514 da = (struct dump_args *)arg; 2515 2516 tc = da->tc; 2517 ta = tc->ta; 2518 2519 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); 2520 if (error != 0) 2521 return (error); 2522 2523 da->f(&da->tent, da->farg); 2524 2525 return (0); 2526 } 2527 2528 /* 2529 * Allow external consumers to read table entries in standard format. 2530 */ 2531 int 2532 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, 2533 ta_foreach_f *f, void *arg) 2534 { 2535 struct namedobj_instance *ni; 2536 struct table_config *tc; 2537 struct table_algo *ta; 2538 struct dump_args da; 2539 2540 ni = CHAIN_TO_NI(ch); 2541 2542 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); 2543 if (tc == NULL) 2544 return (ESRCH); 2545 2546 ta = tc->ta; 2547 2548 memset(&da, 0, sizeof(da)); 2549 da.ch = ch; 2550 da.ti = KIDX_TO_TI(ch, tc->no.kidx); 2551 da.tc = tc; 2552 da.f = f; 2553 da.farg = arg; 2554 2555 ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da); 2556 2557 return (0); 2558 } 2559 2560 /* 2561 * Table algorithms 2562 */ 2563 2564 /* 2565 * Finds algorithm by index, table type or supplied name. 2566 * 2567 * Returns pointer to algo or NULL. 2568 */ 2569 static struct table_algo * 2570 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) 2571 { 2572 int i, l; 2573 struct table_algo *ta; 2574 2575 if (ti->type > IPFW_TABLE_MAXTYPE) 2576 return (NULL); 2577 2578 /* Search by index */ 2579 if (ti->atype != 0) { 2580 if (ti->atype > tcfg->algo_count) 2581 return (NULL); 2582 return (tcfg->algo[ti->atype]); 2583 } 2584 2585 if (name == NULL) { 2586 /* Return default algorithm for given type if set */ 2587 return (tcfg->def_algo[ti->type]); 2588 } 2589 2590 /* Search by name */ 2591 /* TODO: better search */ 2592 for (i = 1; i <= tcfg->algo_count; i++) { 2593 ta = tcfg->algo[i]; 2594 2595 /* 2596 * One can supply additional algorithm 2597 * parameters so we compare only the first word 2598 * of supplied name: 2599 * 'addr:chash hsize=32' 2600 * '^^^^^^^^^' 2601 * 2602 */ 2603 l = strlen(ta->name); 2604 if (strncmp(name, ta->name, l) != 0) 2605 continue; 2606 if (name[l] != '\0' && name[l] != ' ') 2607 continue; 2608 /* Check if we're requesting proper table type */ 2609 if (ti->type != 0 && ti->type != ta->type) 2610 return (NULL); 2611 return (ta); 2612 } 2613 2614 return (NULL); 2615 } 2616 2617 /* 2618 * Register new table algo @ta. 2619 * Stores algo id inside @idx. 2620 * 2621 * Returns 0 on success. 2622 */ 2623 int 2624 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, 2625 int *idx) 2626 { 2627 struct tables_config *tcfg; 2628 struct table_algo *ta_new; 2629 size_t sz; 2630 2631 if (size > sizeof(struct table_algo)) 2632 return (EINVAL); 2633 2634 /* Check for the required on-stack size for add/del */ 2635 sz = roundup2(ta->ta_buf_size, sizeof(void *)); 2636 if (sz > TA_BUF_SZ) 2637 return (EINVAL); 2638 2639 KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); 2640 2641 /* Copy algorithm data to stable storage. */ 2642 ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); 2643 memcpy(ta_new, ta, size); 2644 2645 tcfg = CHAIN_TO_TCFG(ch); 2646 2647 KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); 2648 2649 tcfg->algo[++tcfg->algo_count] = ta_new; 2650 ta_new->idx = tcfg->algo_count; 2651 2652 /* Set algorithm as default one for given type */ 2653 if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && 2654 tcfg->def_algo[ta_new->type] == NULL) 2655 tcfg->def_algo[ta_new->type] = ta_new; 2656 2657 *idx = ta_new->idx; 2658 2659 return (0); 2660 } 2661 2662 /* 2663 * Unregisters table algo using @idx as id. 2664 * XXX: It is NOT safe to call this function in any place 2665 * other than ipfw instance destroy handler. 2666 */ 2667 void 2668 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) 2669 { 2670 struct tables_config *tcfg; 2671 struct table_algo *ta; 2672 2673 tcfg = CHAIN_TO_TCFG(ch); 2674 2675 KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", 2676 idx, tcfg->algo_count)); 2677 2678 ta = tcfg->algo[idx]; 2679 KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); 2680 2681 if (tcfg->def_algo[ta->type] == ta) 2682 tcfg->def_algo[ta->type] = NULL; 2683 2684 free(ta, M_IPFW); 2685 } 2686 2687 /* 2688 * Lists all table algorithms currently available. 2689 * Data layout (v0)(current): 2690 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size 2691 * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] 2692 * 2693 * Returns 0 on success 2694 */ 2695 static int 2696 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3, 2697 struct sockopt_data *sd) 2698 { 2699 struct _ipfw_obj_lheader *olh; 2700 struct tables_config *tcfg; 2701 ipfw_ta_info *i; 2702 struct table_algo *ta; 2703 uint32_t count, n, size; 2704 2705 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); 2706 if (olh == NULL) 2707 return (EINVAL); 2708 if (sd->valsize < olh->size) 2709 return (EINVAL); 2710 2711 IPFW_UH_RLOCK(ch); 2712 tcfg = CHAIN_TO_TCFG(ch); 2713 count = tcfg->algo_count; 2714 size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); 2715 2716 /* Fill in header regadless of buffer size */ 2717 olh->count = count; 2718 olh->objsize = sizeof(ipfw_ta_info); 2719 2720 if (size > olh->size) { 2721 olh->size = size; 2722 IPFW_UH_RUNLOCK(ch); 2723 return (ENOMEM); 2724 } 2725 olh->size = size; 2726 2727 for (n = 1; n <= count; n++) { 2728 i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); 2729 KASSERT(i != NULL, ("previously checked buffer is not enough")); 2730 ta = tcfg->algo[n]; 2731 strlcpy(i->algoname, ta->name, sizeof(i->algoname)); 2732 i->type = ta->type; 2733 i->refcnt = ta->refcnt; 2734 } 2735 2736 IPFW_UH_RUNLOCK(ch); 2737 2738 return (0); 2739 } 2740 2741 static int 2742 classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 2743 { 2744 /* Basic IPv4/IPv6 or u32 lookups */ 2745 *puidx = cmd->arg1; 2746 /* Assume ADDR by default */ 2747 *ptype = IPFW_TABLE_ADDR; 2748 int v; 2749 2750 if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { 2751 /* 2752 * generic lookup. The key must be 2753 * in 32bit big-endian format. 2754 */ 2755 v = ((ipfw_insn_u32 *)cmd)->d[1]; 2756 switch (v) { 2757 case 0: 2758 case 1: 2759 /* IPv4 src/dst */ 2760 break; 2761 case 2: 2762 case 3: 2763 /* src/dst port */ 2764 *ptype = IPFW_TABLE_NUMBER; 2765 break; 2766 case 4: 2767 /* uid/gid */ 2768 *ptype = IPFW_TABLE_NUMBER; 2769 break; 2770 case 5: 2771 /* jid */ 2772 *ptype = IPFW_TABLE_NUMBER; 2773 break; 2774 case 6: 2775 /* dscp */ 2776 *ptype = IPFW_TABLE_NUMBER; 2777 break; 2778 } 2779 } 2780 2781 return (0); 2782 } 2783 2784 static int 2785 classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 2786 { 2787 ipfw_insn_if *cmdif; 2788 2789 /* Interface table, possibly */ 2790 cmdif = (ipfw_insn_if *)cmd; 2791 if (cmdif->name[0] != '\1') 2792 return (1); 2793 2794 *ptype = IPFW_TABLE_INTERFACE; 2795 *puidx = cmdif->p.kidx; 2796 2797 return (0); 2798 } 2799 2800 static int 2801 classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 2802 { 2803 2804 *puidx = cmd->arg1; 2805 *ptype = IPFW_TABLE_FLOW; 2806 2807 return (0); 2808 } 2809 2810 static void 2811 update_arg1(ipfw_insn *cmd, uint16_t idx) 2812 { 2813 2814 cmd->arg1 = idx; 2815 } 2816 2817 static void 2818 update_via(ipfw_insn *cmd, uint16_t idx) 2819 { 2820 ipfw_insn_if *cmdif; 2821 2822 cmdif = (ipfw_insn_if *)cmd; 2823 cmdif->p.kidx = idx; 2824 } 2825 2826 static int 2827 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, 2828 struct named_object **pno) 2829 { 2830 struct table_config *tc; 2831 int error; 2832 2833 IPFW_UH_WLOCK_ASSERT(ch); 2834 2835 error = find_table_err(CHAIN_TO_NI(ch), ti, &tc); 2836 if (error != 0) 2837 return (error); 2838 2839 *pno = &tc->no; 2840 return (0); 2841 } 2842 2843 /* XXX: sets-sets! */ 2844 static struct named_object * 2845 table_findbykidx(struct ip_fw_chain *ch, uint16_t idx) 2846 { 2847 struct namedobj_instance *ni; 2848 struct table_config *tc; 2849 2850 IPFW_UH_WLOCK_ASSERT(ch); 2851 ni = CHAIN_TO_NI(ch); 2852 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx); 2853 KASSERT(tc != NULL, ("Table with index %d not found", idx)); 2854 2855 return (&tc->no); 2856 } 2857 2858 static int 2859 table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, 2860 enum ipfw_sets_cmd cmd) 2861 { 2862 2863 switch (cmd) { 2864 case SWAP_ALL: 2865 case TEST_ALL: 2866 case MOVE_ALL: 2867 /* 2868 * Always return success, the real action and decision 2869 * should make table_manage_sets_all(). 2870 */ 2871 return (0); 2872 case TEST_ONE: 2873 case MOVE_ONE: 2874 /* 2875 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add 2876 * if set number will be used in hash function. Currently 2877 * we can just use generic handler that replaces set value. 2878 */ 2879 if (V_fw_tables_sets == 0) 2880 return (0); 2881 break; 2882 case COUNT_ONE: 2883 /* 2884 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is 2885 * disabled. This allow skip table's opcodes from additional 2886 * checks when specific rules moved to another set. 2887 */ 2888 if (V_fw_tables_sets == 0) 2889 return (EOPNOTSUPP); 2890 } 2891 /* Use generic sets handler when per-set sysctl is enabled. */ 2892 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, 2893 set, new_set, cmd)); 2894 } 2895 2896 /* 2897 * We register several opcode rewriters for lookup tables. 2898 * All tables opcodes have the same ETLV type, but different subtype. 2899 * To avoid invoking sets handler several times for XXX_ALL commands, 2900 * we use separate manage_sets handler. O_RECV has the lowest value, 2901 * so it should be called first. 2902 */ 2903 static int 2904 table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, 2905 enum ipfw_sets_cmd cmd) 2906 { 2907 2908 switch (cmd) { 2909 case SWAP_ALL: 2910 case TEST_ALL: 2911 /* 2912 * Return success for TEST_ALL, since nothing prevents 2913 * move rules from one set to another. All tables are 2914 * accessible from all sets when per-set tables sysctl 2915 * is disabled. 2916 */ 2917 case MOVE_ALL: 2918 if (V_fw_tables_sets == 0) 2919 return (0); 2920 break; 2921 default: 2922 return (table_manage_sets(ch, set, new_set, cmd)); 2923 } 2924 /* Use generic sets handler when per-set sysctl is enabled. */ 2925 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, 2926 set, new_set, cmd)); 2927 } 2928 2929 static struct opcode_obj_rewrite opcodes[] = { 2930 { 2931 .opcode = O_IP_SRC_LOOKUP, 2932 .etlv = IPFW_TLV_TBL_NAME, 2933 .classifier = classify_srcdst, 2934 .update = update_arg1, 2935 .find_byname = table_findbyname, 2936 .find_bykidx = table_findbykidx, 2937 .create_object = create_table_compat, 2938 .manage_sets = table_manage_sets, 2939 }, 2940 { 2941 .opcode = O_IP_DST_LOOKUP, 2942 .etlv = IPFW_TLV_TBL_NAME, 2943 .classifier = classify_srcdst, 2944 .update = update_arg1, 2945 .find_byname = table_findbyname, 2946 .find_bykidx = table_findbykidx, 2947 .create_object = create_table_compat, 2948 .manage_sets = table_manage_sets, 2949 }, 2950 { 2951 .opcode = O_IP_FLOW_LOOKUP, 2952 .etlv = IPFW_TLV_TBL_NAME, 2953 .classifier = classify_flow, 2954 .update = update_arg1, 2955 .find_byname = table_findbyname, 2956 .find_bykidx = table_findbykidx, 2957 .create_object = create_table_compat, 2958 .manage_sets = table_manage_sets, 2959 }, 2960 { 2961 .opcode = O_XMIT, 2962 .etlv = IPFW_TLV_TBL_NAME, 2963 .classifier = classify_via, 2964 .update = update_via, 2965 .find_byname = table_findbyname, 2966 .find_bykidx = table_findbykidx, 2967 .create_object = create_table_compat, 2968 .manage_sets = table_manage_sets, 2969 }, 2970 { 2971 .opcode = O_RECV, 2972 .etlv = IPFW_TLV_TBL_NAME, 2973 .classifier = classify_via, 2974 .update = update_via, 2975 .find_byname = table_findbyname, 2976 .find_bykidx = table_findbykidx, 2977 .create_object = create_table_compat, 2978 .manage_sets = table_manage_sets_all, 2979 }, 2980 { 2981 .opcode = O_VIA, 2982 .etlv = IPFW_TLV_TBL_NAME, 2983 .classifier = classify_via, 2984 .update = update_via, 2985 .find_byname = table_findbyname, 2986 .find_bykidx = table_findbykidx, 2987 .create_object = create_table_compat, 2988 .manage_sets = table_manage_sets, 2989 }, 2990 }; 2991 2992 static int 2993 test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no, 2994 void *arg __unused) 2995 { 2996 2997 /* Check that there aren't any tables in not default set */ 2998 if (no->set != 0) 2999 return (EBUSY); 3000 return (0); 3001 } 3002 3003 /* 3004 * Switch between "set 0" and "rule's set" table binding, 3005 * Check all ruleset bindings and permits changing 3006 * IFF each binding has both rule AND table in default set (set 0). 3007 * 3008 * Returns 0 on success. 3009 */ 3010 int 3011 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) 3012 { 3013 struct opcode_obj_rewrite *rw; 3014 struct namedobj_instance *ni; 3015 struct named_object *no; 3016 struct ip_fw *rule; 3017 ipfw_insn *cmd; 3018 int cmdlen, i, l; 3019 uint16_t kidx; 3020 uint8_t subtype; 3021 3022 IPFW_UH_WLOCK(ch); 3023 3024 if (V_fw_tables_sets == sets) { 3025 IPFW_UH_WUNLOCK(ch); 3026 return (0); 3027 } 3028 ni = CHAIN_TO_NI(ch); 3029 if (sets == 0) { 3030 /* 3031 * Prevent disabling sets support if we have some tables 3032 * in not default sets. 3033 */ 3034 if (ipfw_objhash_foreach_type(ni, test_sets_cb, 3035 NULL, IPFW_TLV_TBL_NAME) != 0) { 3036 IPFW_UH_WUNLOCK(ch); 3037 return (EBUSY); 3038 } 3039 } 3040 /* 3041 * Scan all rules and examine tables opcodes. 3042 */ 3043 for (i = 0; i < ch->n_rules; i++) { 3044 rule = ch->map[i]; 3045 3046 l = rule->cmd_len; 3047 cmd = rule->cmd; 3048 cmdlen = 0; 3049 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { 3050 cmdlen = F_LEN(cmd); 3051 /* Check only tables opcodes */ 3052 for (kidx = 0, rw = opcodes; 3053 rw < opcodes + nitems(opcodes); rw++) { 3054 if (rw->opcode != cmd->opcode) 3055 continue; 3056 if (rw->classifier(cmd, &kidx, &subtype) == 0) 3057 break; 3058 } 3059 if (kidx == 0) 3060 continue; 3061 no = ipfw_objhash_lookup_kidx(ni, kidx); 3062 /* Check if both table object and rule has the set 0 */ 3063 if (no->set != 0 || rule->set != 0) { 3064 IPFW_UH_WUNLOCK(ch); 3065 return (EBUSY); 3066 } 3067 3068 } 3069 } 3070 V_fw_tables_sets = sets; 3071 IPFW_UH_WUNLOCK(ch); 3072 return (0); 3073 } 3074 3075 /* 3076 * Checks table name for validity. 3077 * Enforce basic length checks, the rest 3078 * should be done in userland. 3079 * 3080 * Returns 0 if name is considered valid. 3081 */ 3082 static int 3083 check_table_name(const char *name) 3084 { 3085 3086 /* 3087 * TODO: do some more complicated checks 3088 */ 3089 return (ipfw_check_object_name_generic(name)); 3090 } 3091 3092 /* 3093 * Finds table config based on either legacy index 3094 * or name in ntlv. 3095 * Note @ti structure contains unchecked data from userland. 3096 * 3097 * Returns 0 in success and fills in @tc with found config 3098 */ 3099 static int 3100 find_table_err(struct namedobj_instance *ni, struct tid_info *ti, 3101 struct table_config **tc) 3102 { 3103 char *name, bname[16]; 3104 struct named_object *no; 3105 ipfw_obj_ntlv *ntlv; 3106 uint32_t set; 3107 3108 if (ti->tlvs != NULL) { 3109 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 3110 IPFW_TLV_TBL_NAME); 3111 if (ntlv == NULL) 3112 return (EINVAL); 3113 name = ntlv->name; 3114 3115 /* 3116 * Use set provided by @ti instead of @ntlv one. 3117 * This is needed due to different sets behavior 3118 * controlled by V_fw_tables_sets. 3119 */ 3120 set = (V_fw_tables_sets != 0) ? ti->set : 0; 3121 } else { 3122 snprintf(bname, sizeof(bname), "%d", ti->uidx); 3123 name = bname; 3124 set = 0; 3125 } 3126 3127 no = ipfw_objhash_lookup_name(ni, set, name); 3128 *tc = (struct table_config *)no; 3129 3130 return (0); 3131 } 3132 3133 /* 3134 * Finds table config based on either legacy index 3135 * or name in ntlv. 3136 * Note @ti structure contains unchecked data from userland. 3137 * 3138 * Returns pointer to table_config or NULL. 3139 */ 3140 static struct table_config * 3141 find_table(struct namedobj_instance *ni, struct tid_info *ti) 3142 { 3143 struct table_config *tc; 3144 3145 if (find_table_err(ni, ti, &tc) != 0) 3146 return (NULL); 3147 3148 return (tc); 3149 } 3150 3151 /* 3152 * Allocate new table config structure using 3153 * specified @algo and @aname. 3154 * 3155 * Returns pointer to config or NULL. 3156 */ 3157 static struct table_config * 3158 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, 3159 struct table_algo *ta, char *aname, uint8_t tflags) 3160 { 3161 char *name, bname[16]; 3162 struct table_config *tc; 3163 int error; 3164 ipfw_obj_ntlv *ntlv; 3165 uint32_t set; 3166 3167 if (ti->tlvs != NULL) { 3168 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 3169 IPFW_TLV_TBL_NAME); 3170 if (ntlv == NULL) 3171 return (NULL); 3172 name = ntlv->name; 3173 set = (V_fw_tables_sets == 0) ? 0 : ntlv->set; 3174 } else { 3175 /* Compat part: convert number to string representation */ 3176 snprintf(bname, sizeof(bname), "%d", ti->uidx); 3177 name = bname; 3178 set = 0; 3179 } 3180 3181 tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); 3182 tc->no.name = tc->tablename; 3183 tc->no.subtype = ta->type; 3184 tc->no.set = set; 3185 tc->tflags = tflags; 3186 tc->ta = ta; 3187 strlcpy(tc->tablename, name, sizeof(tc->tablename)); 3188 /* Set "shared" value type by default */ 3189 tc->vshared = 1; 3190 3191 /* Preallocate data structures for new tables */ 3192 error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags); 3193 if (error != 0) { 3194 free(tc, M_IPFW); 3195 return (NULL); 3196 } 3197 3198 return (tc); 3199 } 3200 3201 /* 3202 * Destroys table state and config. 3203 */ 3204 static void 3205 free_table_config(struct namedobj_instance *ni, struct table_config *tc) 3206 { 3207 3208 KASSERT(tc->linked == 0, ("free() on linked config")); 3209 /* UH lock MUST NOT be held */ 3210 3211 /* 3212 * We're using ta without any locking/referencing. 3213 * TODO: fix this if we're going to use unloadable algos. 3214 */ 3215 tc->ta->destroy(tc->astate, &tc->ti_copy); 3216 free(tc, M_IPFW); 3217 } 3218 3219 /* 3220 * Links @tc to @chain table named instance. 3221 * Sets appropriate type/states in @chain table info. 3222 */ 3223 static void 3224 link_table(struct ip_fw_chain *ch, struct table_config *tc) 3225 { 3226 struct namedobj_instance *ni; 3227 struct table_info *ti; 3228 uint16_t kidx; 3229 3230 IPFW_UH_WLOCK_ASSERT(ch); 3231 3232 ni = CHAIN_TO_NI(ch); 3233 kidx = tc->no.kidx; 3234 3235 ipfw_objhash_add(ni, &tc->no); 3236 3237 ti = KIDX_TO_TI(ch, kidx); 3238 *ti = tc->ti_copy; 3239 3240 /* Notify algo on real @ti address */ 3241 if (tc->ta->change_ti != NULL) 3242 tc->ta->change_ti(tc->astate, ti); 3243 3244 tc->linked = 1; 3245 tc->ta->refcnt++; 3246 } 3247 3248 /* 3249 * Unlinks @tc from @chain table named instance. 3250 * Zeroes states in @chain and stores them in @tc. 3251 */ 3252 static void 3253 unlink_table(struct ip_fw_chain *ch, struct table_config *tc) 3254 { 3255 struct namedobj_instance *ni; 3256 struct table_info *ti; 3257 uint16_t kidx; 3258 3259 IPFW_UH_WLOCK_ASSERT(ch); 3260 IPFW_WLOCK_ASSERT(ch); 3261 3262 ni = CHAIN_TO_NI(ch); 3263 kidx = tc->no.kidx; 3264 3265 /* Clear state. @ti copy is already saved inside @tc */ 3266 ipfw_objhash_del(ni, &tc->no); 3267 ti = KIDX_TO_TI(ch, kidx); 3268 memset(ti, 0, sizeof(struct table_info)); 3269 tc->linked = 0; 3270 tc->ta->refcnt--; 3271 3272 /* Notify algo on real @ti address */ 3273 if (tc->ta->change_ti != NULL) 3274 tc->ta->change_ti(tc->astate, NULL); 3275 } 3276 3277 static struct ipfw_sopt_handler scodes[] = { 3278 { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table }, 3279 { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 }, 3280 { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 }, 3281 { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table }, 3282 { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table }, 3283 { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables }, 3284 { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 }, 3285 { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 }, 3286 { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 }, 3287 { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 }, 3288 { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 }, 3289 { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 }, 3290 { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry }, 3291 { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, 3292 { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, 3293 { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, 3294 }; 3295 3296 static int 3297 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, 3298 void *arg) 3299 { 3300 3301 unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); 3302 if (ipfw_objhash_free_idx(ni, no->kidx) != 0) 3303 printf("Error unlinking kidx %d from table %s\n", 3304 no->kidx, no->name); 3305 free_table_config(ni, (struct table_config *)no); 3306 return (0); 3307 } 3308 3309 /* 3310 * Shuts tables module down. 3311 */ 3312 void 3313 ipfw_destroy_tables(struct ip_fw_chain *ch, int last) 3314 { 3315 3316 IPFW_DEL_SOPT_HANDLER(last, scodes); 3317 IPFW_DEL_OBJ_REWRITER(last, opcodes); 3318 3319 /* Remove all tables from working set */ 3320 IPFW_UH_WLOCK(ch); 3321 IPFW_WLOCK(ch); 3322 ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); 3323 IPFW_WUNLOCK(ch); 3324 IPFW_UH_WUNLOCK(ch); 3325 3326 /* Free pointers itself */ 3327 free(ch->tablestate, M_IPFW); 3328 3329 ipfw_table_value_destroy(ch, last); 3330 ipfw_table_algo_destroy(ch); 3331 3332 ipfw_objhash_destroy(CHAIN_TO_NI(ch)); 3333 free(CHAIN_TO_TCFG(ch), M_IPFW); 3334 } 3335 3336 /* 3337 * Starts tables module. 3338 */ 3339 int 3340 ipfw_init_tables(struct ip_fw_chain *ch, int first) 3341 { 3342 struct tables_config *tcfg; 3343 3344 /* Allocate pointers */ 3345 ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), 3346 M_IPFW, M_WAITOK | M_ZERO); 3347 3348 tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); 3349 tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); 3350 ch->tblcfg = tcfg; 3351 3352 ipfw_table_value_init(ch, first); 3353 ipfw_table_algo_init(ch); 3354 3355 IPFW_ADD_OBJ_REWRITER(first, opcodes); 3356 IPFW_ADD_SOPT_HANDLER(first, scodes); 3357 return (0); 3358 } 3359 3360 3361 3362