xref: /freebsd/sys/netpfil/ipfw/ip_fw_table.c (revision b0c13e7e2446fde7c559d2b15cc3c2f8d8b959f8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
5  * Copyright (c) 2014-2024 Yandex LLC
6  * Copyright (c) 2014 Alexander V. Chernikov
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 /*
32  * Lookup table support for ipfw.
33  *
34  * This file contains handlers for all generic tables' operations:
35  * add/del/flush entries, list/dump tables etc..
36  *
37  * Table data modification is protected by both UH and runtime lock
38  * while reading configuration/data is protected by UH lock.
39  *
40  * Lookup algorithms for all table types are located in ip_fw_table_algo.c
41  */
42 
43 #include "opt_ipfw.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/rwlock.h>
51 #include <sys/rmlock.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/queue.h>
55 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
56 
57 #include <netinet/in.h>
58 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
59 #include <netinet/ip_fw.h>
60 
61 #include <netpfil/ipfw/ip_fw_private.h>
62 #include <netpfil/ipfw/ip_fw_table.h>
63 
64 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
65     struct table_config **tc);
66 static struct table_config *find_table(struct namedobj_instance *ni,
67     struct tid_info *ti);
68 static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
69     struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
70 static void free_table_config(struct namedobj_instance *ni,
71     struct table_config *tc);
72 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
73     char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int ref);
74 static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
75 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
76 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
77     struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
78 #define	OP_ADD	1
79 #define	OP_DEL	0
80 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
81     struct sockopt_data *sd);
82 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
83     ipfw_xtable_info *i);
84 static int dump_table_tentry(void *e, void *arg);
85 
86 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
87     struct tid_info *b);
88 
89 static int check_table_name(const char *name);
90 static int check_table_space(struct ip_fw_chain *ch, struct table_config *tc,
91     struct table_info *ti, uint32_t count);
92 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
93 
94 static struct table_algo *find_table_algo(struct tables_config *tableconf,
95     struct tid_info *ti, char *name);
96 
97 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
98 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
99 
100 #define	CHAIN_TO_NI(chain)	(CHAIN_TO_TCFG(chain)->namehash)
101 #define	KIDX_TO_TI(ch, k)	(&(((struct table_info *)(ch)->tablestate)[k]))
102 
103 #define	TA_BUF_SZ	128	/* On-stack buffer for add/delete state */
104 
105 static struct table_value *
get_table_value(struct ip_fw_chain * ch,struct table_config * tc,uint32_t kidx)106 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
107 {
108 	struct table_value *pval;
109 
110 	pval = (struct table_value *)ch->valuestate;
111 
112 	return (&pval[kidx]);
113 }
114 
115 /*
116  * Checks if we're able to insert/update entry @tei into table
117  * w.r.t @tc limits.
118  * May alter @tei to indicate insertion error / insert
119  * options.
120  *
121  * Returns 0 if operation can be performed/
122  */
123 static int
check_table_limit(struct table_config * tc,struct tentry_info * tei)124 check_table_limit(struct table_config *tc, struct tentry_info *tei)
125 {
126 
127 	if (tc->limit == 0 || tc->count < tc->limit)
128 		return (0);
129 
130 	if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
131 		/* Notify userland on error cause */
132 		tei->flags |= TEI_FLAGS_LIMIT;
133 		return (EFBIG);
134 	}
135 
136 	/*
137 	 * We have UPDATE flag set.
138 	 * Permit updating record (if found),
139 	 * but restrict adding new one since we've
140 	 * already hit the limit.
141 	 */
142 	tei->flags |= TEI_FLAGS_DONTADD;
143 
144 	return (0);
145 }
146 
147 /*
148  * Convert algorithm callback return code into
149  * one of pre-defined states known by userland.
150  */
151 static void
store_tei_result(struct tentry_info * tei,int op,int error,uint32_t num)152 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
153 {
154 	int flag;
155 
156 	flag = 0;
157 
158 	switch (error) {
159 	case 0:
160 		if (op == OP_ADD && num != 0)
161 			flag = TEI_FLAGS_ADDED;
162 		if (op == OP_DEL)
163 			flag = TEI_FLAGS_DELETED;
164 		break;
165 	case ENOENT:
166 		flag = TEI_FLAGS_NOTFOUND;
167 		break;
168 	case EEXIST:
169 		flag = TEI_FLAGS_EXISTS;
170 		break;
171 	default:
172 		flag = TEI_FLAGS_ERROR;
173 	}
174 
175 	tei->flags |= flag;
176 }
177 
178 /*
179  * Creates and references table with default parameters.
180  * Saves table config, algo and allocated kidx info @ptc, @pta and
181  * @pkidx if non-zero.
182  * Used for table auto-creation to support old binaries.
183  *
184  * Returns 0 on success.
185  */
186 static int
create_table_compat(struct ip_fw_chain * ch,struct tid_info * ti,uint32_t * pkidx)187 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
188     uint32_t *pkidx)
189 {
190 	ipfw_xtable_info xi;
191 	int error;
192 
193 	memset(&xi, 0, sizeof(xi));
194 	/* Set default value mask for legacy clients */
195 	xi.vmask = IPFW_VTYPE_LEGACY;
196 
197 	error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
198 	if (error != 0)
199 		return (error);
200 
201 	return (0);
202 }
203 
204 /*
205  * Find and reference existing table optionally
206  * creating new one.
207  *
208  * Saves found table config into @ptc.
209  * Returns 0 if table was found/created and referenced
210  * or non-zero return code.
211  */
212 static int
find_ref_table(struct ip_fw_chain * ch,struct tid_info * ti,struct tentry_info * tei,uint32_t count,int op,struct table_config ** ptc)213 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
214     struct tentry_info *tei, uint32_t count, int op,
215     struct table_config **ptc)
216 {
217 	struct namedobj_instance *ni;
218 	struct table_config *tc;
219 	uint32_t kidx;
220 	int error;
221 
222 	IPFW_UH_WLOCK_ASSERT(ch);
223 
224 	ni = CHAIN_TO_NI(ch);
225 	tc = NULL;
226 	if ((tc = find_table(ni, ti)) != NULL) {
227 		/* check table type */
228 		if (tc->no.subtype != ti->type)
229 			return (EINVAL);
230 
231 		if (tc->locked != 0)
232 			return (EACCES);
233 
234 		/* Try to exit early on limit hit */
235 		if (op == OP_ADD && count == 1 &&
236 		    check_table_limit(tc, tei) != 0)
237 			return (EFBIG);
238 
239 		/* Reference and return */
240 		tc->no.refcnt++;
241 		*ptc = tc;
242 		return (0);
243 	}
244 
245 	if (op == OP_DEL)
246 		return (ESRCH);
247 
248 	/* Compatibility mode: create new table for old clients */
249 	if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
250 		return (ESRCH);
251 
252 	error = create_table_compat(ch, ti, &kidx);
253 
254 	if (error != 0)
255 		return (error);
256 
257 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
258 	KASSERT(tc != NULL, ("create_table_compat returned bad idx %u", kidx));
259 
260 	/* OK, now we've got referenced table. */
261 	*ptc = tc;
262 	return (0);
263 }
264 
265 /*
266  * Rolls back already @added to @tc entries using state array @ta_buf_m.
267  * Assume the following layout:
268  * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
269  * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
270  *   for storing deleted state
271  */
272 static void
rollback_added_entries(struct ip_fw_chain * ch,struct table_config * tc,struct table_info * tinfo,struct tentry_info * tei,caddr_t ta_buf_m,uint32_t count,uint32_t added)273 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
274     struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
275     uint32_t count, uint32_t added)
276 {
277 	struct table_algo *ta;
278 	struct tentry_info *ptei;
279 	caddr_t v, vv;
280 	size_t ta_buf_sz;
281 	int error __diagused, i;
282 	uint32_t num;
283 
284 	IPFW_UH_WLOCK_ASSERT(ch);
285 
286 	ta = tc->ta;
287 	ta_buf_sz = ta->ta_buf_size;
288 	v = ta_buf_m;
289 	vv = v + count * ta_buf_sz;
290 	for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
291 		ptei = &tei[i];
292 		if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
293 			/*
294 			 * We have old value stored by previous
295 			 * call in @ptei->value. Do add once again
296 			 * to restore it.
297 			 */
298 			error = ta->add(tc->astate, tinfo, ptei, v, &num);
299 			KASSERT(error == 0, ("rollback UPDATE fail"));
300 			KASSERT(num == 0, ("rollback UPDATE fail2"));
301 			continue;
302 		}
303 
304 		error = ta->prepare_del(ch, ptei, vv);
305 		KASSERT(error == 0, ("pre-rollback INSERT failed"));
306 		error = ta->del(tc->astate, tinfo, ptei, vv, &num);
307 		KASSERT(error == 0, ("rollback INSERT failed"));
308 		tc->count -= num;
309 	}
310 }
311 
312 /*
313  * Prepares add/del state for all @count entries in @tei.
314  * Uses either stack buffer (@ta_buf) or allocates a new one.
315  * Stores pointer to allocated buffer back to @ta_buf.
316  *
317  * Returns 0 on success.
318  */
319 static int
prepare_batch_buffer(struct ip_fw_chain * ch,struct table_algo * ta,struct tentry_info * tei,uint32_t count,int op,caddr_t * ta_buf)320 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
321     struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
322 {
323 	caddr_t ta_buf_m, v;
324 	size_t ta_buf_sz, sz;
325 	struct tentry_info *ptei;
326 	int error, i;
327 
328 	error = 0;
329 	ta_buf_sz = ta->ta_buf_size;
330 	if (count == 1) {
331 		/* Single add/delete, use on-stack buffer */
332 		memset(*ta_buf, 0, TA_BUF_SZ);
333 		ta_buf_m = *ta_buf;
334 	} else {
335 		/*
336 		 * Multiple adds/deletes, allocate larger buffer
337 		 *
338 		 * Note we need 2xcount buffer for add case:
339 		 * we have hold both ADD state
340 		 * and DELETE state (this may be needed
341 		 * if we need to rollback all changes)
342 		 */
343 		sz = count * ta_buf_sz;
344 		ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
345 		    M_WAITOK | M_ZERO);
346 	}
347 
348 	v = ta_buf_m;
349 	for (i = 0; i < count; i++, v += ta_buf_sz) {
350 		ptei = &tei[i];
351 		error = (op == OP_ADD) ?
352 		    ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
353 
354 		/*
355 		 * Some syntax error (incorrect mask, or address, or
356 		 * anything). Return error regardless of atomicity
357 		 * settings.
358 		 */
359 		if (error != 0)
360 			break;
361 	}
362 
363 	*ta_buf = ta_buf_m;
364 	return (error);
365 }
366 
367 /*
368  * Flushes allocated state for each @count entries in @tei.
369  * Frees @ta_buf_m if differs from stack buffer @ta_buf.
370  */
371 static void
flush_batch_buffer(struct ip_fw_chain * ch,struct table_algo * ta,struct tentry_info * tei,uint32_t count,int rollback,caddr_t ta_buf_m,caddr_t ta_buf)372 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
373     struct tentry_info *tei, uint32_t count, int rollback,
374     caddr_t ta_buf_m, caddr_t ta_buf)
375 {
376 	caddr_t v;
377 	struct tentry_info *ptei;
378 	size_t ta_buf_sz;
379 	int i;
380 
381 	ta_buf_sz = ta->ta_buf_size;
382 
383 	/* Run cleaning callback anyway */
384 	v = ta_buf_m;
385 	for (i = 0; i < count; i++, v += ta_buf_sz) {
386 		ptei = &tei[i];
387 		ta->flush_entry(ch, ptei, v);
388 		if (ptei->ptv != NULL) {
389 			free(ptei->ptv, M_IPFW);
390 			ptei->ptv = NULL;
391 		}
392 	}
393 
394 	/* Clean up "deleted" state in case of rollback */
395 	if (rollback != 0) {
396 		v = ta_buf_m + count * ta_buf_sz;
397 		for (i = 0; i < count; i++, v += ta_buf_sz)
398 			ta->flush_entry(ch, &tei[i], v);
399 	}
400 
401 	if (ta_buf_m != ta_buf)
402 		free(ta_buf_m, M_TEMP);
403 }
404 
405 /*
406  * Adds/updates one or more entries in table @ti.
407  *
408  * Algo interaction:
409  * Function references @ti first to ensure table won't
410  * disappear or change its type.
411  * After that, prepare_add callback is called for each @tei entry.
412  * Next, we try to add each entry under UH+WHLOCK
413  * using add() callback.
414  * Finally, we free all state by calling flush_entry callback
415  * for each @tei.
416  *
417  * Returns 0 on success.
418  */
419 int
add_table_entry(struct ip_fw_chain * ch,struct tid_info * ti,struct tentry_info * tei,uint8_t flags,uint32_t count)420 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
421     struct tentry_info *tei, uint8_t flags, uint32_t count)
422 {
423 	struct table_config *tc;
424 	struct table_algo *ta;
425 	struct tentry_info *ptei;
426 	char ta_buf[TA_BUF_SZ];
427 	caddr_t ta_buf_m, v;
428 	uint32_t kidx, num, numadd;
429 	int error, first_error, i, rollback = 0;
430 
431 	IPFW_UH_WLOCK(ch);
432 
433 	/*
434 	 * Find and reference existing table.
435 	 */
436 	error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
437 	if (error != 0) {
438 		IPFW_UH_WUNLOCK(ch);
439 		return (error);
440 	}
441 	/* Drop reference we've used in first search */
442 	tc->no.refcnt--;
443 	ta = tc->ta;
444 
445 	/* Allocate memory and prepare record(s) */
446 	/* Pass stack buffer by default */
447 	ta_buf_m = ta_buf;
448 	error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
449 	if (error != 0)
450 		goto cleanup;
451 
452 	/*
453 	 * Link all values values to shared/per-table value array.
454 	 */
455 	error = ipfw_link_table_values(ch, tc, tei, count, flags);
456 	if (error != 0)
457 		goto cleanup;
458 
459 	/*
460 	 * Ensure we are able to add all entries without additional
461 	 * memory allocations.
462 	 */
463 	kidx = tc->no.kidx;
464 	error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), count);
465 	if (error != 0)
466 		goto cleanup;
467 
468 	/* We've got valid table in @tc. Let's try to add data */
469 	kidx = tc->no.kidx;
470 	ta = tc->ta;
471 	numadd = 0;
472 	first_error = 0;
473 
474 	IPFW_WLOCK(ch);
475 
476 	v = ta_buf_m;
477 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
478 		ptei = &tei[i];
479 		num = 0;
480 		/* check limit before adding */
481 		if ((error = check_table_limit(tc, ptei)) == 0) {
482 			/*
483 			 * It should be safe to insert a record w/o
484 			 * a properly-linked value if atomicity is
485 			 * not required.
486 			 *
487 			 * If the added item does not have a valid value
488 			 * index, it would get rejected by ta->add().
489 			 * */
490 			error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
491 			    ptei, v, &num);
492 			/* Set status flag to inform userland */
493 			store_tei_result(ptei, OP_ADD, error, num);
494 		}
495 		if (error == 0) {
496 			/* Update number of records to ease limit checking */
497 			tc->count += num;
498 			numadd += num;
499 			continue;
500 		}
501 
502 		if (first_error == 0)
503 			first_error = error;
504 
505 		/*
506 		 * Some error have happened. Check our atomicity
507 		 * settings: continue if atomicity is not required,
508 		 * rollback changes otherwise.
509 		 */
510 		if ((flags & IPFW_CTF_ATOMIC) == 0)
511 			continue;
512 
513 		rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
514 		    tei, ta_buf_m, count, i);
515 
516 		rollback = 1;
517 		break;
518 	}
519 
520 	IPFW_WUNLOCK(ch);
521 
522 	ipfw_garbage_table_values(ch, tc, tei, count, rollback);
523 
524 	/* Permit post-add algorithm grow/rehash. */
525 	if (numadd != 0)
526 		check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0);
527 
528 	/* Return first error to user, if any */
529 	error = first_error;
530 
531 cleanup:
532 	IPFW_UH_WUNLOCK(ch);
533 
534 	flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
535 
536 	return (error);
537 }
538 
539 /*
540  * Deletes one or more entries in table @ti.
541  *
542  * Returns 0 on success.
543  */
544 int
del_table_entry(struct ip_fw_chain * ch,struct tid_info * ti,struct tentry_info * tei,uint8_t flags,uint32_t count)545 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
546     struct tentry_info *tei, uint8_t flags, uint32_t count)
547 {
548 	struct table_config *tc;
549 	struct table_algo *ta;
550 	struct tentry_info *ptei;
551 	char ta_buf[TA_BUF_SZ];
552 	caddr_t ta_buf_m, v;
553 	uint32_t kidx, num, numdel;
554 	int error, first_error, i;
555 
556 	/*
557 	 * Find and reference existing table.
558 	 */
559 	IPFW_UH_WLOCK(ch);
560 	error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
561 	if (error != 0) {
562 		IPFW_UH_WUNLOCK(ch);
563 		return (error);
564 	}
565 	ta = tc->ta;
566 
567 	/* Allocate memory and prepare record(s) */
568 	/* Pass stack buffer by default */
569 	ta_buf_m = ta_buf;
570 	error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
571 	if (error != 0)
572 		goto cleanup;
573 
574 	/* Drop reference we've used in first search */
575 	tc->no.refcnt--;
576 
577 	/*
578 	 * Check if table algo is still the same.
579 	 * (changed ta may be the result of table swap).
580 	 */
581 	if (ta != tc->ta) {
582 		IPFW_UH_WUNLOCK(ch);
583 		error = EINVAL;
584 		goto cleanup;
585 	}
586 
587 	kidx = tc->no.kidx;
588 	numdel = 0;
589 	first_error = 0;
590 
591 	IPFW_WLOCK(ch);
592 	v = ta_buf_m;
593 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
594 		ptei = &tei[i];
595 		num = 0;
596 		error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
597 		    &num);
598 		/* Save state for userland */
599 		store_tei_result(ptei, OP_DEL, error, num);
600 		if (error != 0 && first_error == 0)
601 			first_error = error;
602 		tc->count -= num;
603 		numdel += num;
604 	}
605 	IPFW_WUNLOCK(ch);
606 
607 	/* Unlink non-used values */
608 	ipfw_garbage_table_values(ch, tc, tei, count, 0);
609 
610 	if (numdel != 0) {
611 		/* Run post-del hook to permit shrinking */
612 		check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0);
613 	}
614 
615 	IPFW_UH_WUNLOCK(ch);
616 
617 	/* Return first error to user, if any */
618 	error = first_error;
619 
620 cleanup:
621 	flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
622 
623 	return (error);
624 }
625 
626 /*
627  * Ensure that table @tc has enough space to add @count entries without
628  * need for reallocation.
629  *
630  * Callbacks order:
631  * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
632  *
633  * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
634  * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
635  * 3) modify (UH_WLOCK + WLOCK) - switch pointers
636  * 4) flush_modify (UH_WLOCK) - free state, if needed
637  *
638  * Returns 0 on success.
639  */
640 static int
check_table_space(struct ip_fw_chain * ch,struct table_config * tc,struct table_info * ti,uint32_t count)641 check_table_space(struct ip_fw_chain *ch, struct table_config *tc,
642     struct table_info *ti, uint32_t count)
643 {
644 	struct table_algo *ta;
645 	uint64_t pflags;
646 	char ta_buf[TA_BUF_SZ];
647 	int error;
648 
649 	IPFW_UH_WLOCK_ASSERT(ch);
650 
651 	error = 0;
652 	ta = tc->ta;
653 	if (ta->need_modify == NULL)
654 		return (0);
655 
656 	/* Acquire reference not to loose @tc between locks/unlocks */
657 	tc->no.refcnt++;
658 
659 	/*
660 	 * TODO: think about avoiding race between large add/large delete
661 	 * operation on algorithm which implements shrinking along with
662 	 * growing.
663 	 */
664 	while (true) {
665 		pflags = 0;
666 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
667 			error = 0;
668 			break;
669 		}
670 
671 		memset(&ta_buf, 0, sizeof(ta_buf));
672 		error = ta->prepare_mod(ta_buf, &pflags);
673 		if (error != 0)
674 			break;
675 
676 		/* Check if we still need to alter table */
677 		ti = KIDX_TO_TI(ch, tc->no.kidx);
678 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
679 			/*
680 			 * Other thread has already performed resize.
681 			 * Flush our state and return.
682 			 */
683 			ta->flush_mod(ta_buf);
684 			break;
685 		}
686 
687 		error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
688 		if (error == 0) {
689 			/* Do actual modification */
690 			IPFW_WLOCK(ch);
691 			ta->modify(tc->astate, ti, ta_buf, pflags);
692 			IPFW_WUNLOCK(ch);
693 		}
694 
695 		/* Anyway, flush data and retry */
696 		ta->flush_mod(ta_buf);
697 	}
698 
699 	tc->no.refcnt--;
700 	return (error);
701 }
702 
703 /*
704  * Adds or deletes record in table.
705  * Data layout (v1)(current):
706  * Request: [ ipfw_obj_header
707  *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
708  * ]
709  *
710  * Returns 0 on success
711  */
712 static int
manage_table_ent_v1(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)713 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
714     struct sockopt_data *sd)
715 {
716 	ipfw_obj_tentry *tent, *ptent;
717 	ipfw_obj_ctlv *ctlv;
718 	ipfw_obj_header *oh;
719 	struct tentry_info *ptei, tei, *tei_buf;
720 	struct tid_info ti;
721 	uint32_t kidx;
722 	int error, i, read;
723 
724 	/* Check minimum header size */
725 	if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
726 		return (EINVAL);
727 
728 	/* Check if passed data is too long */
729 	if (sd->valsize != sd->kavail)
730 		return (EINVAL);
731 
732 	oh = (ipfw_obj_header *)sd->kbuf;
733 
734 	/* Basic length checks for TLVs */
735 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
736 		return (EINVAL);
737 
738 	read = sizeof(*oh);
739 
740 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
741 	if (ctlv->head.length + read != sd->valsize)
742 		return (EINVAL);
743 
744 	read += sizeof(*ctlv);
745 	tent = (ipfw_obj_tentry *)(ctlv + 1);
746 	if (ctlv->count * sizeof(*tent) + read != sd->valsize)
747 		return (EINVAL);
748 
749 	if (ctlv->count == 0)
750 		return (0);
751 
752 	/*
753 	 * Mark entire buffer as "read".
754 	 * This instructs sopt api write it back
755 	 * after function return.
756 	 */
757 	ipfw_get_sopt_header(sd, sd->valsize);
758 
759 	/* Perform basic checks for each entry */
760 	ptent = tent;
761 	kidx = tent->idx;
762 	for (i = 0; i < ctlv->count; i++, ptent++) {
763 		if (ptent->head.length != sizeof(*ptent))
764 			return (EINVAL);
765 		if (ptent->idx != kidx)
766 			return (ENOTSUP);
767 	}
768 
769 	/* Convert data into kernel request objects */
770 	objheader_to_ti(oh, &ti);
771 	ti.type = oh->ntlv.type;
772 	ti.uidx = kidx;
773 
774 	/* Use on-stack buffer for single add/del */
775 	if (ctlv->count == 1) {
776 		memset(&tei, 0, sizeof(tei));
777 		tei_buf = &tei;
778 	} else
779 		tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
780 		    M_WAITOK | M_ZERO);
781 
782 	ptei = tei_buf;
783 	ptent = tent;
784 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
785 		ptei->paddr = &ptent->k;
786 		ptei->subtype = ptent->subtype;
787 		ptei->masklen = ptent->masklen;
788 		if (ptent->head.flags & IPFW_TF_UPDATE)
789 			ptei->flags |= TEI_FLAGS_UPDATE;
790 
791 		ipfw_import_table_value_v1(&ptent->v.value);
792 		ptei->pvalue = (struct table_value *)&ptent->v.value;
793 	}
794 
795 	error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
796 	    add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
797 	    del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
798 
799 	/* Translate result back to userland */
800 	ptei = tei_buf;
801 	ptent = tent;
802 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
803 		if (ptei->flags & TEI_FLAGS_ADDED)
804 			ptent->result = IPFW_TR_ADDED;
805 		else if (ptei->flags & TEI_FLAGS_DELETED)
806 			ptent->result = IPFW_TR_DELETED;
807 		else if (ptei->flags & TEI_FLAGS_UPDATED)
808 			ptent->result = IPFW_TR_UPDATED;
809 		else if (ptei->flags & TEI_FLAGS_LIMIT)
810 			ptent->result = IPFW_TR_LIMIT;
811 		else if (ptei->flags & TEI_FLAGS_ERROR)
812 			ptent->result = IPFW_TR_ERROR;
813 		else if (ptei->flags & TEI_FLAGS_NOTFOUND)
814 			ptent->result = IPFW_TR_NOTFOUND;
815 		else if (ptei->flags & TEI_FLAGS_EXISTS)
816 			ptent->result = IPFW_TR_EXISTS;
817 		ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
818 	}
819 
820 	if (tei_buf != &tei)
821 		free(tei_buf, M_TEMP);
822 
823 	return (error);
824 }
825 
826 /*
827  * Looks up an entry in given table.
828  * Data layout (v0)(current):
829  * Request: [ ipfw_obj_header ipfw_obj_tentry ]
830  * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
831  *
832  * Returns 0 on success
833  */
834 static int
find_table_entry(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)835 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
836     struct sockopt_data *sd)
837 {
838 	ipfw_obj_tentry *tent;
839 	ipfw_obj_header *oh;
840 	struct tid_info ti;
841 	struct table_config *tc;
842 	struct table_info *kti;
843 	struct table_value *pval;
844 	struct namedobj_instance *ni;
845 	int error;
846 	size_t sz;
847 
848 	/* Check minimum header size */
849 	sz = sizeof(*oh) + sizeof(*tent);
850 	if (sd->valsize != sz)
851 		return (EINVAL);
852 
853 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
854 	tent = (ipfw_obj_tentry *)(oh + 1);
855 
856 	/* Basic length checks for TLVs */
857 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
858 		return (EINVAL);
859 
860 	objheader_to_ti(oh, &ti);
861 	ti.type = oh->ntlv.type;
862 	ti.uidx = tent->idx;
863 
864 	IPFW_UH_RLOCK(ch);
865 	ni = CHAIN_TO_NI(ch);
866 
867 	/*
868 	 * Find existing table and check its type .
869 	 */
870 	if ((tc = find_table(ni, &ti)) == NULL) {
871 		IPFW_UH_RUNLOCK(ch);
872 		return (ESRCH);
873 	}
874 
875 	/* check table type */
876 	if (tc->no.subtype != ti.type) {
877 		IPFW_UH_RUNLOCK(ch);
878 		return (EINVAL);
879 	}
880 
881 	kti = KIDX_TO_TI(ch, tc->no.kidx);
882 
883 	error = tc->ta->find_tentry(tc->astate, kti, tent);
884 	if (error == 0) {
885 		pval = get_table_value(ch, tc, tent->v.kidx);
886 		ipfw_export_table_value_v1(pval, &tent->v.value);
887 	}
888 	IPFW_UH_RUNLOCK(ch);
889 
890 	return (error);
891 }
892 
893 /*
894  * Flushes all entries or destroys given table.
895  * Data layout (v0)(current):
896  * Request: [ ipfw_obj_header ]
897  *
898  * Returns 0 on success
899  */
900 static int
flush_table_v0(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)901 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
902     struct sockopt_data *sd)
903 {
904 	int error;
905 	struct _ipfw_obj_header *oh;
906 	struct tid_info ti;
907 
908 	if (sd->valsize != sizeof(*oh))
909 		return (EINVAL);
910 
911 	oh = (struct _ipfw_obj_header *)op3;
912 	objheader_to_ti(oh, &ti);
913 
914 	if (op3->opcode == IP_FW_TABLE_XDESTROY)
915 		error = destroy_table(ch, &ti);
916 	else if (op3->opcode == IP_FW_TABLE_XFLUSH)
917 		error = flush_table(ch, &ti);
918 	else
919 		return (ENOTSUP);
920 
921 	return (error);
922 }
923 
924 /*
925  * Flushes given table.
926  *
927  * Function create new table instance with the same
928  * parameters, swaps it with old one and
929  * flushes state without holding runtime WLOCK.
930  *
931  * Returns 0 on success.
932  */
933 int
flush_table(struct ip_fw_chain * ch,struct tid_info * ti)934 flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
935 {
936 	struct namedobj_instance *ni;
937 	struct table_config *tc;
938 	struct table_algo *ta;
939 	struct table_info ti_old, ti_new, *tablestate;
940 	void *astate_old, *astate_new;
941 	char algostate[64], *pstate;
942 	int error;
943 	uint32_t kidx;
944 	uint8_t tflags;
945 
946 	/*
947 	 * Stage 1: save table algorithm.
948 	 * Reference found table to ensure it won't disappear.
949 	 */
950 	IPFW_UH_WLOCK(ch);
951 	ni = CHAIN_TO_NI(ch);
952 	if ((tc = find_table(ni, ti)) == NULL) {
953 		IPFW_UH_WUNLOCK(ch);
954 		return (ESRCH);
955 	}
956 	astate_new = NULL;
957 	memset(&ti_new, 0, sizeof(ti_new));
958 	ta = tc->ta;
959 	/* Do not flush readonly tables */
960 	if ((ta->flags & TA_FLAG_READONLY) != 0) {
961 		IPFW_UH_WUNLOCK(ch);
962 		return (EACCES);
963 	}
964 	/* Save startup algo parameters */
965 	if (ta->print_config != NULL) {
966 		ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
967 		    algostate, sizeof(algostate));
968 		pstate = algostate;
969 	} else
970 		pstate = NULL;
971 	tflags = tc->tflags;
972 
973 	/*
974 	 * Stage 2: allocate new table instance using same algo.
975 	 */
976 	memset(&ti_new, 0, sizeof(struct table_info));
977 	error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
978 
979 	/*
980 	 * Stage 3: swap old state pointers with newly-allocated ones.
981 	 * Decrease refcount.
982 	 */
983 	if (error != 0) {
984 		IPFW_UH_WUNLOCK(ch);
985 		return (error);
986 	}
987 
988 	ni = CHAIN_TO_NI(ch);
989 	kidx = tc->no.kidx;
990 	tablestate = (struct table_info *)ch->tablestate;
991 
992 	IPFW_WLOCK(ch);
993 	ti_old = tablestate[kidx];
994 	tablestate[kidx] = ti_new;
995 	IPFW_WUNLOCK(ch);
996 
997 	astate_old = tc->astate;
998 	tc->astate = astate_new;
999 	tc->ti_copy = ti_new;
1000 	tc->count = 0;
1001 
1002 	/* Notify algo on real @ti address */
1003 	if (ta->change_ti != NULL)
1004 		ta->change_ti(tc->astate, &tablestate[kidx]);
1005 
1006 	/*
1007 	 * Stage 4: unref values.
1008 	 */
1009 	ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
1010 	IPFW_UH_WUNLOCK(ch);
1011 
1012 	/*
1013 	 * Stage 5: perform real flush/destroy.
1014 	 */
1015 	ta->destroy(astate_old, &ti_old);
1016 
1017 	return (0);
1018 }
1019 
1020 /*
1021  * Swaps two tables.
1022  * Data layout (v0)(current):
1023  * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
1024  *
1025  * Returns 0 on success
1026  */
1027 static int
swap_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1028 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1029     struct sockopt_data *sd)
1030 {
1031 	int error;
1032 	struct _ipfw_obj_header *oh;
1033 	struct tid_info ti_a, ti_b;
1034 
1035 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
1036 		return (EINVAL);
1037 
1038 	oh = (struct _ipfw_obj_header *)op3;
1039 	ntlv_to_ti(&oh->ntlv, &ti_a);
1040 	ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
1041 
1042 	error = swap_tables(ch, &ti_a, &ti_b);
1043 
1044 	return (error);
1045 }
1046 
1047 /*
1048  * Swaps two tables of the same type/valtype.
1049  *
1050  * Checks if tables are compatible and limits
1051  * permits swap, than actually perform swap.
1052  *
1053  * Each table consists of 2 different parts:
1054  * config:
1055  *   @tc (with name, set, kidx) and rule bindings, which is "stable".
1056  *   number of items
1057  *   table algo
1058  * runtime:
1059  *   runtime data @ti (ch->tablestate)
1060  *   runtime cache in @tc
1061  *   algo-specific data (@tc->astate)
1062  *
1063  * So we switch:
1064  *  all runtime data
1065  *   number of items
1066  *   table algo
1067  *
1068  * After that we call @ti change handler for each table.
1069  *
1070  * Note that referencing @tc won't protect tc->ta from change.
1071  * XXX: Do we need to restrict swap between locked tables?
1072  * XXX: Do we need to exchange ftype?
1073  *
1074  * Returns 0 on success.
1075  */
1076 static int
swap_tables(struct ip_fw_chain * ch,struct tid_info * a,struct tid_info * b)1077 swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
1078     struct tid_info *b)
1079 {
1080 	struct namedobj_instance *ni;
1081 	struct table_config *tc_a, *tc_b;
1082 	struct table_algo *ta;
1083 	struct table_info ti, *tablestate;
1084 	void *astate;
1085 	uint32_t count;
1086 
1087 	/*
1088 	 * Stage 1: find both tables and ensure they are of
1089 	 * the same type.
1090 	 */
1091 	IPFW_UH_WLOCK(ch);
1092 	ni = CHAIN_TO_NI(ch);
1093 	if ((tc_a = find_table(ni, a)) == NULL) {
1094 		IPFW_UH_WUNLOCK(ch);
1095 		return (ESRCH);
1096 	}
1097 	if ((tc_b = find_table(ni, b)) == NULL) {
1098 		IPFW_UH_WUNLOCK(ch);
1099 		return (ESRCH);
1100 	}
1101 
1102 	/* It is very easy to swap between the same table */
1103 	if (tc_a == tc_b) {
1104 		IPFW_UH_WUNLOCK(ch);
1105 		return (0);
1106 	}
1107 
1108 	/* Check type and value are the same */
1109 	if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
1110 		IPFW_UH_WUNLOCK(ch);
1111 		return (EINVAL);
1112 	}
1113 
1114 	/* Check limits before swap */
1115 	if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
1116 	    (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
1117 		IPFW_UH_WUNLOCK(ch);
1118 		return (EFBIG);
1119 	}
1120 
1121 	/* Check if one of the tables is readonly */
1122 	if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
1123 		IPFW_UH_WUNLOCK(ch);
1124 		return (EACCES);
1125 	}
1126 
1127 	/* Everything is fine, prepare to swap */
1128 	tablestate = (struct table_info *)ch->tablestate;
1129 	ti = tablestate[tc_a->no.kidx];
1130 	ta = tc_a->ta;
1131 	astate = tc_a->astate;
1132 	count = tc_a->count;
1133 
1134 	IPFW_WLOCK(ch);
1135 	/* a <- b */
1136 	tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
1137 	tc_a->ta = tc_b->ta;
1138 	tc_a->astate = tc_b->astate;
1139 	tc_a->count = tc_b->count;
1140 	/* b <- a */
1141 	tablestate[tc_b->no.kidx] = ti;
1142 	tc_b->ta = ta;
1143 	tc_b->astate = astate;
1144 	tc_b->count = count;
1145 	IPFW_WUNLOCK(ch);
1146 
1147 	/* Ensure tc.ti copies are in sync */
1148 	tc_a->ti_copy = tablestate[tc_a->no.kidx];
1149 	tc_b->ti_copy = tablestate[tc_b->no.kidx];
1150 
1151 	/* Notify both tables on @ti change */
1152 	if (tc_a->ta->change_ti != NULL)
1153 		tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
1154 	if (tc_b->ta->change_ti != NULL)
1155 		tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
1156 
1157 	IPFW_UH_WUNLOCK(ch);
1158 
1159 	return (0);
1160 }
1161 
1162 /*
1163  * Destroys table specified by @ti.
1164  * Data layout (v0)(current):
1165  * Request: [ ip_fw3_opheader ]
1166  *
1167  * Returns 0 on success
1168  */
1169 static int
destroy_table(struct ip_fw_chain * ch,struct tid_info * ti)1170 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
1171 {
1172 	struct namedobj_instance *ni;
1173 	struct table_config *tc;
1174 
1175 	IPFW_UH_WLOCK(ch);
1176 
1177 	ni = CHAIN_TO_NI(ch);
1178 	if ((tc = find_table(ni, ti)) == NULL) {
1179 		IPFW_UH_WUNLOCK(ch);
1180 		return (ESRCH);
1181 	}
1182 
1183 	/* Do not permit destroying referenced tables */
1184 	if (tc->no.refcnt > 0) {
1185 		IPFW_UH_WUNLOCK(ch);
1186 		return (EBUSY);
1187 	}
1188 
1189 	IPFW_WLOCK(ch);
1190 	unlink_table(ch, tc);
1191 	IPFW_WUNLOCK(ch);
1192 
1193 	/* Free obj index */
1194 	if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
1195 		printf("Error unlinking kidx %u from table %s\n",
1196 		    tc->no.kidx, tc->tablename);
1197 
1198 	/* Unref values used in tables while holding UH lock */
1199 	ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
1200 	IPFW_UH_WUNLOCK(ch);
1201 
1202 	free_table_config(ni, tc);
1203 
1204 	return (0);
1205 }
1206 
1207 /*
1208  * Grow tables index.
1209  *
1210  * Returns 0 on success.
1211  */
1212 int
ipfw_resize_tables(struct ip_fw_chain * ch,unsigned int ntables)1213 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
1214 {
1215 	unsigned int tbl;
1216 	struct namedobj_instance *ni;
1217 	void *new_idx, *old_tablestate, *tablestate;
1218 	struct table_info *ti;
1219 	struct table_config *tc;
1220 	int i, new_blocks;
1221 
1222 	/* Check new value for validity */
1223 	if (ntables == 0)
1224 		return (EINVAL);
1225 	if (ntables > IPFW_TABLES_MAX)
1226 		ntables = IPFW_TABLES_MAX;
1227 	/* Alight to nearest power of 2 */
1228 	ntables = roundup_pow_of_two(ntables);
1229 
1230 	/* Allocate new pointers */
1231 	tablestate = malloc(ntables * sizeof(struct table_info),
1232 	    M_IPFW, M_WAITOK | M_ZERO);
1233 
1234 	ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
1235 
1236 	IPFW_UH_WLOCK(ch);
1237 
1238 	tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
1239 	ni = CHAIN_TO_NI(ch);
1240 
1241 	/* Temporary restrict decreasing max_tables */
1242 	if (ntables < V_fw_tables_max) {
1243 		/*
1244 		 * FIXME: Check if we really can shrink
1245 		 */
1246 		IPFW_UH_WUNLOCK(ch);
1247 		return (EINVAL);
1248 	}
1249 
1250 	/* Copy table info/indices */
1251 	memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
1252 	ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
1253 
1254 	IPFW_WLOCK(ch);
1255 
1256 	/* Change pointers */
1257 	old_tablestate = ch->tablestate;
1258 	ch->tablestate = tablestate;
1259 	ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
1260 
1261 	V_fw_tables_max = ntables;
1262 
1263 	IPFW_WUNLOCK(ch);
1264 
1265 	/* Notify all consumers that their @ti pointer has changed */
1266 	ti = (struct table_info *)ch->tablestate;
1267 	for (i = 0; i < tbl; i++, ti++) {
1268 		if (ti->lookup == NULL)
1269 			continue;
1270 		tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
1271 		if (tc == NULL || tc->ta->change_ti == NULL)
1272 			continue;
1273 
1274 		tc->ta->change_ti(tc->astate, ti);
1275 	}
1276 
1277 	IPFW_UH_WUNLOCK(ch);
1278 
1279 	/* Free old pointers */
1280 	free(old_tablestate, M_IPFW);
1281 	ipfw_objhash_bitmap_free(new_idx, new_blocks);
1282 
1283 	return (0);
1284 }
1285 
1286 /*
1287  * Lookup table's named object by its @kidx.
1288  */
1289 struct named_object *
ipfw_objhash_lookup_table_kidx(struct ip_fw_chain * ch,uint32_t kidx)1290 ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint32_t kidx)
1291 {
1292 
1293 	return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
1294 }
1295 
1296 /*
1297  * Take reference to table specified in @ntlv.
1298  * On success return its @kidx.
1299  */
1300 int
ipfw_ref_table(struct ip_fw_chain * ch,ipfw_obj_ntlv * ntlv,uint32_t * kidx)1301 ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint32_t *kidx)
1302 {
1303 	struct tid_info ti;
1304 	struct table_config *tc;
1305 	int error;
1306 
1307 	IPFW_UH_WLOCK_ASSERT(ch);
1308 
1309 	ntlv_to_ti(ntlv, &ti);
1310 	error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
1311 	if (error != 0)
1312 		return (error);
1313 
1314 	if (tc == NULL)
1315 		return (ESRCH);
1316 
1317 	tc->no.refcnt++;
1318 	*kidx = tc->no.kidx;
1319 
1320 	return (0);
1321 }
1322 
1323 void
ipfw_unref_table(struct ip_fw_chain * ch,uint32_t kidx)1324 ipfw_unref_table(struct ip_fw_chain *ch, uint32_t kidx)
1325 {
1326 
1327 	struct namedobj_instance *ni;
1328 	struct named_object *no;
1329 
1330 	IPFW_UH_WLOCK_ASSERT(ch);
1331 	ni = CHAIN_TO_NI(ch);
1332 	no = ipfw_objhash_lookup_kidx(ni, kidx);
1333 	KASSERT(no != NULL, ("Table with index %u not found", kidx));
1334 	no->refcnt--;
1335 }
1336 
1337 /*
1338  * Lookup an arbitrary key @paddr of length @plen in table @tbl.
1339  * Stores found value in @val.
1340  *
1341  * Returns 1 if key was found.
1342  */
1343 int
ipfw_lookup_table(struct ip_fw_chain * ch,uint32_t tbl,uint16_t plen,void * paddr,uint32_t * val)1344 ipfw_lookup_table(struct ip_fw_chain *ch, uint32_t tbl, uint16_t plen,
1345     void *paddr, uint32_t *val)
1346 {
1347 	struct table_info *ti;
1348 
1349 	ti = KIDX_TO_TI(ch, tbl);
1350 
1351 	return (ti->lookup(ti, paddr, plen, val));
1352 }
1353 
1354 /*
1355  * Info/List/dump support for tables.
1356  *
1357  */
1358 
1359 /*
1360  * High-level 'get' cmds sysctl handlers
1361  */
1362 
1363 /*
1364  * Lists all tables currently available in kernel.
1365  * Data layout (v0)(current):
1366  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
1367  * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
1368  *
1369  * Returns 0 on success
1370  */
1371 static int
list_tables(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1372 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1373     struct sockopt_data *sd)
1374 {
1375 	struct _ipfw_obj_lheader *olh;
1376 	int error;
1377 
1378 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
1379 	if (olh == NULL)
1380 		return (EINVAL);
1381 	if (sd->valsize < olh->size)
1382 		return (EINVAL);
1383 
1384 	IPFW_UH_RLOCK(ch);
1385 	error = export_tables(ch, olh, sd);
1386 	IPFW_UH_RUNLOCK(ch);
1387 
1388 	return (error);
1389 }
1390 
1391 /*
1392  * Store table info to buffer provided by @sd.
1393  * Data layout (v0)(current):
1394  * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
1395  * Reply: [ ipfw_obj_header ipfw_xtable_info ]
1396  *
1397  * Returns 0 on success.
1398  */
1399 static int
describe_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1400 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1401     struct sockopt_data *sd)
1402 {
1403 	struct _ipfw_obj_header *oh;
1404 	struct table_config *tc;
1405 	struct tid_info ti;
1406 	size_t sz;
1407 
1408 	sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
1409 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1410 	if (oh == NULL)
1411 		return (EINVAL);
1412 
1413 	objheader_to_ti(oh, &ti);
1414 
1415 	IPFW_UH_RLOCK(ch);
1416 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1417 		IPFW_UH_RUNLOCK(ch);
1418 		return (ESRCH);
1419 	}
1420 
1421 	export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
1422 	IPFW_UH_RUNLOCK(ch);
1423 
1424 	return (0);
1425 }
1426 
1427 /*
1428  * Modifies existing table.
1429  * Data layout (v0)(current):
1430  * Request: [ ipfw_obj_header ipfw_xtable_info ]
1431  *
1432  * Returns 0 on success
1433  */
1434 static int
modify_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1435 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1436     struct sockopt_data *sd)
1437 {
1438 	struct _ipfw_obj_header *oh;
1439 	ipfw_xtable_info *i;
1440 	char *tname;
1441 	struct tid_info ti;
1442 	struct namedobj_instance *ni;
1443 	struct table_config *tc;
1444 
1445 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1446 		return (EINVAL);
1447 
1448 	oh = (struct _ipfw_obj_header *)sd->kbuf;
1449 	i = (ipfw_xtable_info *)(oh + 1);
1450 
1451 	/*
1452 	 * Verify user-supplied strings.
1453 	 * Check for null-terminated/zero-length strings/
1454 	 */
1455 	tname = oh->ntlv.name;
1456 	if (check_table_name(tname) != 0)
1457 		return (EINVAL);
1458 
1459 	objheader_to_ti(oh, &ti);
1460 	ti.type = i->type;
1461 
1462 	IPFW_UH_WLOCK(ch);
1463 	ni = CHAIN_TO_NI(ch);
1464 	if ((tc = find_table(ni, &ti)) == NULL) {
1465 		IPFW_UH_WUNLOCK(ch);
1466 		return (ESRCH);
1467 	}
1468 
1469 	/* Do not support any modifications for readonly tables */
1470 	if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
1471 		IPFW_UH_WUNLOCK(ch);
1472 		return (EACCES);
1473 	}
1474 
1475 	if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
1476 		tc->limit = i->limit;
1477 	if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
1478 		tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
1479 	IPFW_UH_WUNLOCK(ch);
1480 
1481 	return (0);
1482 }
1483 
1484 /*
1485  * Creates new table.
1486  * Data layout (v0)(current):
1487  * Request: [ ipfw_obj_header ipfw_xtable_info ]
1488  *
1489  * Returns 0 on success
1490  */
1491 static int
create_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1492 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1493     struct sockopt_data *sd)
1494 {
1495 	struct _ipfw_obj_header *oh;
1496 	ipfw_xtable_info *i;
1497 	char *tname, *aname;
1498 	struct tid_info ti;
1499 	struct namedobj_instance *ni;
1500 	int rv;
1501 
1502 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1503 		return (EINVAL);
1504 
1505 	oh = (struct _ipfw_obj_header *)sd->kbuf;
1506 	i = (ipfw_xtable_info *)(oh + 1);
1507 
1508 	/*
1509 	 * Verify user-supplied strings.
1510 	 * Check for null-terminated/zero-length strings/
1511 	 */
1512 	tname = oh->ntlv.name;
1513 	aname = i->algoname;
1514 	if (check_table_name(tname) != 0 ||
1515 	    strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
1516 		return (EINVAL);
1517 
1518 	if (aname[0] == '\0') {
1519 		/* Use default algorithm */
1520 		aname = NULL;
1521 	}
1522 
1523 	objheader_to_ti(oh, &ti);
1524 	ti.type = i->type;
1525 
1526 	ni = CHAIN_TO_NI(ch);
1527 
1528 	IPFW_UH_WLOCK(ch);
1529 	if (find_table(ni, &ti) != NULL) {
1530 		IPFW_UH_WUNLOCK(ch);
1531 		return (EEXIST);
1532 	}
1533 	rv = create_table_internal(ch, &ti, aname, i, NULL, 0);
1534 	IPFW_UH_WUNLOCK(ch);
1535 
1536 	return (rv);
1537 }
1538 
1539 /*
1540  * Creates new table based on @ti and @aname.
1541  *
1542  * Assume @aname to be checked and valid.
1543  * Stores allocated table kidx inside @pkidx (if non-NULL).
1544  * Reference created table if @compat is non-zero.
1545  *
1546  * Returns 0 on success.
1547  */
1548 static int
create_table_internal(struct ip_fw_chain * ch,struct tid_info * ti,char * aname,ipfw_xtable_info * i,uint32_t * pkidx,int compat)1549 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
1550     char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int compat)
1551 {
1552 	struct namedobj_instance *ni;
1553 	struct table_config *tc, *tc_new, *tmp;
1554 	struct table_algo *ta;
1555 	uint32_t kidx;
1556 
1557 	IPFW_UH_WLOCK_ASSERT(ch);
1558 
1559 	ni = CHAIN_TO_NI(ch);
1560 
1561 	ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
1562 	if (ta == NULL)
1563 		return (ENOTSUP);
1564 
1565 	tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
1566 	if (tc == NULL)
1567 		return (ENOMEM);
1568 
1569 	tc->vmask = i->vmask;
1570 	tc->limit = i->limit;
1571 	if (ta->flags & TA_FLAG_READONLY)
1572 		tc->locked = 1;
1573 	else
1574 		tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
1575 
1576 	/* Check if table has been already created */
1577 	tc_new = find_table(ni, ti);
1578 	if (tc_new != NULL) {
1579 		/*
1580 		 * Compat: do not fail if we're
1581 		 * requesting to create existing table
1582 		 * which has the same type
1583 		 */
1584 		if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
1585 			free_table_config(ni, tc);
1586 			return (EEXIST);
1587 		}
1588 
1589 		/* Exchange tc and tc_new for proper refcounting & freeing */
1590 		tmp = tc;
1591 		tc = tc_new;
1592 		tc_new = tmp;
1593 	} else {
1594 		/* New table */
1595 		if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
1596 			printf("Unable to allocate table index."
1597 			    " Consider increasing net.inet.ip.fw.tables_max");
1598 			free_table_config(ni, tc);
1599 			return (EBUSY);
1600 		}
1601 		tc->no.kidx = kidx;
1602 		tc->no.etlv = IPFW_TLV_TBL_NAME;
1603 
1604 		link_table(ch, tc);
1605 	}
1606 
1607 	if (compat != 0)
1608 		tc->no.refcnt++;
1609 	if (pkidx != NULL)
1610 		*pkidx = tc->no.kidx;
1611 
1612 	if (tc_new != NULL)
1613 		free_table_config(ni, tc_new);
1614 
1615 	return (0);
1616 }
1617 
1618 static void
ntlv_to_ti(ipfw_obj_ntlv * ntlv,struct tid_info * ti)1619 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
1620 {
1621 
1622 	memset(ti, 0, sizeof(struct tid_info));
1623 	ti->set = ntlv->set;
1624 	ti->uidx = ntlv->idx;
1625 	ti->tlvs = ntlv;
1626 	ti->tlen = ntlv->head.length;
1627 }
1628 
1629 static void
objheader_to_ti(struct _ipfw_obj_header * oh,struct tid_info * ti)1630 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
1631 {
1632 
1633 	ntlv_to_ti(&oh->ntlv, ti);
1634 }
1635 
1636 struct namedobj_instance *
ipfw_get_table_objhash(struct ip_fw_chain * ch)1637 ipfw_get_table_objhash(struct ip_fw_chain *ch)
1638 {
1639 
1640 	return (CHAIN_TO_NI(ch));
1641 }
1642 
1643 /*
1644  * Exports basic table info as name TLV.
1645  * Used inside dump_static_rules() to provide info
1646  * about all tables referenced by current ruleset.
1647  *
1648  * Returns 0 on success.
1649  */
1650 int
ipfw_export_table_ntlv(struct ip_fw_chain * ch,uint32_t kidx,struct sockopt_data * sd)1651 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint32_t kidx,
1652     struct sockopt_data *sd)
1653 {
1654 	struct namedobj_instance *ni;
1655 	struct named_object *no;
1656 	ipfw_obj_ntlv *ntlv;
1657 
1658 	ni = CHAIN_TO_NI(ch);
1659 
1660 	no = ipfw_objhash_lookup_kidx(ni, kidx);
1661 	KASSERT(no != NULL, ("invalid table kidx passed"));
1662 
1663 	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
1664 	if (ntlv == NULL)
1665 		return (ENOMEM);
1666 
1667 	ntlv->head.type = IPFW_TLV_TBL_NAME;
1668 	ntlv->head.length = sizeof(*ntlv);
1669 	ntlv->idx = no->kidx;
1670 	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
1671 
1672 	return (0);
1673 }
1674 
1675 struct dump_args {
1676 	struct ip_fw_chain *ch;
1677 	struct table_info *ti;
1678 	struct table_config *tc;
1679 	struct sockopt_data *sd;
1680 	uint32_t cnt;
1681 	uint16_t uidx;
1682 	int error;
1683 	uint32_t size;
1684 	ta_foreach_f *f;
1685 	void *farg;
1686 	ipfw_obj_tentry tent;
1687 };
1688 
1689 static int
count_ext_entries(void * e,void * arg)1690 count_ext_entries(void *e, void *arg)
1691 {
1692 	struct dump_args *da;
1693 
1694 	da = (struct dump_args *)arg;
1695 	da->cnt++;
1696 
1697 	return (0);
1698 }
1699 
1700 /*
1701  * Gets number of items from table either using
1702  * internal counter or calling algo callback for
1703  * externally-managed tables.
1704  *
1705  * Returns number of records.
1706  */
1707 static uint32_t
table_get_count(struct ip_fw_chain * ch,struct table_config * tc)1708 table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
1709 {
1710 	struct table_info *ti;
1711 	struct table_algo *ta;
1712 	struct dump_args da;
1713 
1714 	ti = KIDX_TO_TI(ch, tc->no.kidx);
1715 	ta = tc->ta;
1716 
1717 	/* Use internal counter for self-managed tables */
1718 	if ((ta->flags & TA_FLAG_READONLY) == 0)
1719 		return (tc->count);
1720 
1721 	/* Use callback to quickly get number of items */
1722 	if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
1723 		return (ta->get_count(tc->astate, ti));
1724 
1725 	/* Count number of iterms ourselves */
1726 	memset(&da, 0, sizeof(da));
1727 	ta->foreach(tc->astate, ti, count_ext_entries, &da);
1728 
1729 	return (da.cnt);
1730 }
1731 
1732 /*
1733  * Exports table @tc info into standard ipfw_xtable_info format.
1734  */
1735 static void
export_table_info(struct ip_fw_chain * ch,struct table_config * tc,ipfw_xtable_info * i)1736 export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
1737     ipfw_xtable_info *i)
1738 {
1739 	struct table_info *ti;
1740 	struct table_algo *ta;
1741 
1742 	i->type = tc->no.subtype;
1743 	i->tflags = tc->tflags;
1744 	i->vmask = tc->vmask;
1745 	i->set = tc->no.set;
1746 	i->kidx = tc->no.kidx;
1747 	i->refcnt = tc->no.refcnt;
1748 	i->count = table_get_count(ch, tc);
1749 	i->limit = tc->limit;
1750 	i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
1751 	i->size = i->count * sizeof(ipfw_obj_tentry);
1752 	i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
1753 	strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
1754 	ti = KIDX_TO_TI(ch, tc->no.kidx);
1755 	ta = tc->ta;
1756 	if (ta->print_config != NULL) {
1757 		/* Use algo function to print table config to string */
1758 		ta->print_config(tc->astate, ti, i->algoname,
1759 		    sizeof(i->algoname));
1760 	} else
1761 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
1762 	/* Dump algo-specific data, if possible */
1763 	if (ta->dump_tinfo != NULL) {
1764 		ta->dump_tinfo(tc->astate, ti, &i->ta_info);
1765 		i->ta_info.flags |= IPFW_TATFLAGS_DATA;
1766 	}
1767 }
1768 
1769 struct dump_table_args {
1770 	struct ip_fw_chain *ch;
1771 	struct sockopt_data *sd;
1772 };
1773 
1774 static int
export_table_internal(struct namedobj_instance * ni,struct named_object * no,void * arg)1775 export_table_internal(struct namedobj_instance *ni, struct named_object *no,
1776     void *arg)
1777 {
1778 	ipfw_xtable_info *i;
1779 	struct dump_table_args *dta;
1780 
1781 	dta = (struct dump_table_args *)arg;
1782 
1783 	i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
1784 	KASSERT(i != NULL, ("previously checked buffer is not enough"));
1785 
1786 	export_table_info(dta->ch, (struct table_config *)no, i);
1787 	return (0);
1788 }
1789 
1790 /*
1791  * Export all tables as ipfw_xtable_info structures to
1792  * storage provided by @sd.
1793  *
1794  * If supplied buffer is too small, fills in required size
1795  * and returns ENOMEM.
1796  * Returns 0 on success.
1797  */
1798 static int
export_tables(struct ip_fw_chain * ch,ipfw_obj_lheader * olh,struct sockopt_data * sd)1799 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
1800     struct sockopt_data *sd)
1801 {
1802 	uint32_t size;
1803 	uint32_t count;
1804 	struct dump_table_args dta;
1805 
1806 	count = ipfw_objhash_count(CHAIN_TO_NI(ch));
1807 	size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
1808 
1809 	/* Fill in header regadless of buffer size */
1810 	olh->count = count;
1811 	olh->objsize = sizeof(ipfw_xtable_info);
1812 
1813 	if (size > olh->size) {
1814 		olh->size = size;
1815 		return (ENOMEM);
1816 	}
1817 
1818 	olh->size = size;
1819 
1820 	dta.ch = ch;
1821 	dta.sd = sd;
1822 
1823 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
1824 
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Dumps all table data
1830  * Data layout (v1)(current):
1831  * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
1832  * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
1833  *
1834  * Returns 0 on success
1835  */
1836 static int
dump_table_v1(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1837 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1838     struct sockopt_data *sd)
1839 {
1840 	struct _ipfw_obj_header *oh;
1841 	ipfw_xtable_info *i;
1842 	struct tid_info ti;
1843 	struct table_config *tc;
1844 	struct table_algo *ta;
1845 	struct dump_args da;
1846 	uint32_t sz;
1847 
1848 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
1849 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1850 	if (oh == NULL)
1851 		return (EINVAL);
1852 
1853 	i = (ipfw_xtable_info *)(oh + 1);
1854 	objheader_to_ti(oh, &ti);
1855 
1856 	IPFW_UH_RLOCK(ch);
1857 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1858 		IPFW_UH_RUNLOCK(ch);
1859 		return (ESRCH);
1860 	}
1861 	export_table_info(ch, tc, i);
1862 
1863 	if (sd->valsize < i->size) {
1864 		/*
1865 		 * Submitted buffer size is not enough.
1866 		 * WE've already filled in @i structure with
1867 		 * relevant table info including size, so we
1868 		 * can return. Buffer will be flushed automatically.
1869 		 */
1870 		IPFW_UH_RUNLOCK(ch);
1871 		return (ENOMEM);
1872 	}
1873 
1874 	/*
1875 	 * Do the actual dump in eXtended format
1876 	 */
1877 	memset(&da, 0, sizeof(da));
1878 	da.ch = ch;
1879 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
1880 	da.tc = tc;
1881 	da.sd = sd;
1882 
1883 	ta = tc->ta;
1884 
1885 	ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
1886 	IPFW_UH_RUNLOCK(ch);
1887 
1888 	return (da.error);
1889 }
1890 
1891 /*
1892  * Dumps table entry in eXtended format (v1)(current).
1893  */
1894 static int
dump_table_tentry(void * e,void * arg)1895 dump_table_tentry(void *e, void *arg)
1896 {
1897 	struct dump_args *da;
1898 	struct table_config *tc;
1899 	struct table_algo *ta;
1900 	struct table_value *pval;
1901 	ipfw_obj_tentry *tent;
1902 	int error;
1903 
1904 	da = (struct dump_args *)arg;
1905 
1906 	tc = da->tc;
1907 	ta = tc->ta;
1908 
1909 	tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
1910 	/* Out of memory, returning */
1911 	if (tent == NULL) {
1912 		da->error = ENOMEM;
1913 		return (1);
1914 	}
1915 	tent->head.length = sizeof(ipfw_obj_tentry);
1916 	tent->idx = da->uidx;
1917 
1918 	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
1919 	if (error != 0)
1920 		return (error);
1921 
1922 	pval = get_table_value(da->ch, da->tc, tent->v.kidx);
1923 	ipfw_export_table_value_v1(pval, &tent->v.value);
1924 
1925 	return (0);
1926 }
1927 
1928 /*
1929  * Helper function to export table algo data
1930  * to tentry format before calling user function.
1931  *
1932  * Returns 0 on success.
1933  */
1934 static int
prepare_table_tentry(void * e,void * arg)1935 prepare_table_tentry(void *e, void *arg)
1936 {
1937 	struct dump_args *da;
1938 	struct table_config *tc;
1939 	struct table_algo *ta;
1940 	int error;
1941 
1942 	da = (struct dump_args *)arg;
1943 
1944 	tc = da->tc;
1945 	ta = tc->ta;
1946 
1947 	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
1948 	if (error != 0)
1949 		return (error);
1950 
1951 	da->f(&da->tent, da->farg);
1952 
1953 	return (0);
1954 }
1955 
1956 /*
1957  * Allow external consumers to read table entries in standard format.
1958  */
1959 int
ipfw_foreach_table_tentry(struct ip_fw_chain * ch,uint32_t kidx,ta_foreach_f * f,void * arg)1960 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint32_t kidx,
1961     ta_foreach_f *f, void *arg)
1962 {
1963 	struct namedobj_instance *ni;
1964 	struct table_config *tc;
1965 	struct table_algo *ta;
1966 	struct dump_args da;
1967 
1968 	ni = CHAIN_TO_NI(ch);
1969 
1970 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
1971 	if (tc == NULL)
1972 		return (ESRCH);
1973 
1974 	ta = tc->ta;
1975 
1976 	memset(&da, 0, sizeof(da));
1977 	da.ch = ch;
1978 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
1979 	da.tc = tc;
1980 	da.f = f;
1981 	da.farg = arg;
1982 
1983 	ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
1984 
1985 	return (0);
1986 }
1987 
1988 /*
1989  * Table algorithms
1990  */
1991 
1992 /*
1993  * Finds algorithm by index, table type or supplied name.
1994  *
1995  * Returns pointer to algo or NULL.
1996  */
1997 static struct table_algo *
find_table_algo(struct tables_config * tcfg,struct tid_info * ti,char * name)1998 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
1999 {
2000 	int i, l;
2001 	struct table_algo *ta;
2002 
2003 	if (ti->type > IPFW_TABLE_MAXTYPE)
2004 		return (NULL);
2005 
2006 	/* Search by index */
2007 	if (ti->atype != 0) {
2008 		if (ti->atype > tcfg->algo_count)
2009 			return (NULL);
2010 		return (tcfg->algo[ti->atype]);
2011 	}
2012 
2013 	if (name == NULL) {
2014 		/* Return default algorithm for given type if set */
2015 		return (tcfg->def_algo[ti->type]);
2016 	}
2017 
2018 	/* Search by name */
2019 	/* TODO: better search */
2020 	for (i = 1; i <= tcfg->algo_count; i++) {
2021 		ta = tcfg->algo[i];
2022 
2023 		/*
2024 		 * One can supply additional algorithm
2025 		 * parameters so we compare only the first word
2026 		 * of supplied name:
2027 		 * 'addr:chash hsize=32'
2028 		 * '^^^^^^^^^'
2029 		 *
2030 		 */
2031 		l = strlen(ta->name);
2032 		if (strncmp(name, ta->name, l) != 0)
2033 			continue;
2034 		if (name[l] != '\0' && name[l] != ' ')
2035 			continue;
2036 		/* Check if we're requesting proper table type */
2037 		if (ti->type != 0 && ti->type != ta->type)
2038 			return (NULL);
2039 		return (ta);
2040 	}
2041 
2042 	return (NULL);
2043 }
2044 
2045 /*
2046  * Register new table algo @ta.
2047  * Stores algo id inside @idx.
2048  *
2049  * Returns 0 on success.
2050  */
2051 int
ipfw_add_table_algo(struct ip_fw_chain * ch,struct table_algo * ta,size_t size,int * idx)2052 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
2053     int *idx)
2054 {
2055 	struct tables_config *tcfg;
2056 	struct table_algo *ta_new;
2057 	size_t sz;
2058 
2059 	if (size > sizeof(struct table_algo))
2060 		return (EINVAL);
2061 
2062 	/* Check for the required on-stack size for add/del */
2063 	sz = roundup2(ta->ta_buf_size, sizeof(void *));
2064 	if (sz > TA_BUF_SZ)
2065 		return (EINVAL);
2066 
2067 	KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
2068 
2069 	/* Copy algorithm data to stable storage. */
2070 	ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
2071 	memcpy(ta_new, ta, size);
2072 
2073 	tcfg = CHAIN_TO_TCFG(ch);
2074 
2075 	KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
2076 
2077 	tcfg->algo[++tcfg->algo_count] = ta_new;
2078 	ta_new->idx = tcfg->algo_count;
2079 
2080 	/* Set algorithm as default one for given type */
2081 	if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
2082 	    tcfg->def_algo[ta_new->type] == NULL)
2083 		tcfg->def_algo[ta_new->type] = ta_new;
2084 
2085 	*idx = ta_new->idx;
2086 
2087 	return (0);
2088 }
2089 
2090 /*
2091  * Unregisters table algo using @idx as id.
2092  * XXX: It is NOT safe to call this function in any place
2093  * other than ipfw instance destroy handler.
2094  */
2095 void
ipfw_del_table_algo(struct ip_fw_chain * ch,int idx)2096 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
2097 {
2098 	struct tables_config *tcfg;
2099 	struct table_algo *ta;
2100 
2101 	tcfg = CHAIN_TO_TCFG(ch);
2102 
2103 	KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
2104 	    idx, tcfg->algo_count));
2105 
2106 	ta = tcfg->algo[idx];
2107 	KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
2108 
2109 	if (tcfg->def_algo[ta->type] == ta)
2110 		tcfg->def_algo[ta->type] = NULL;
2111 
2112 	free(ta, M_IPFW);
2113 }
2114 
2115 /*
2116  * Lists all table algorithms currently available.
2117  * Data layout (v0)(current):
2118  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
2119  * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
2120  *
2121  * Returns 0 on success
2122  */
2123 static int
list_table_algo(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)2124 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2125     struct sockopt_data *sd)
2126 {
2127 	struct _ipfw_obj_lheader *olh;
2128 	struct tables_config *tcfg;
2129 	ipfw_ta_info *i;
2130 	struct table_algo *ta;
2131 	uint32_t count, n, size;
2132 
2133 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
2134 	if (olh == NULL)
2135 		return (EINVAL);
2136 	if (sd->valsize < olh->size)
2137 		return (EINVAL);
2138 
2139 	IPFW_UH_RLOCK(ch);
2140 	tcfg = CHAIN_TO_TCFG(ch);
2141 	count = tcfg->algo_count;
2142 	size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
2143 
2144 	/* Fill in header regadless of buffer size */
2145 	olh->count = count;
2146 	olh->objsize = sizeof(ipfw_ta_info);
2147 
2148 	if (size > olh->size) {
2149 		olh->size = size;
2150 		IPFW_UH_RUNLOCK(ch);
2151 		return (ENOMEM);
2152 	}
2153 	olh->size = size;
2154 
2155 	for (n = 1; n <= count; n++) {
2156 		i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
2157 		KASSERT(i != NULL, ("previously checked buffer is not enough"));
2158 		ta = tcfg->algo[n];
2159 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2160 		i->type = ta->type;
2161 		i->refcnt = ta->refcnt;
2162 	}
2163 
2164 	IPFW_UH_RUNLOCK(ch);
2165 
2166 	return (0);
2167 }
2168 
2169 static int
classify_srcdst(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2170 classify_srcdst(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2171 {
2172 	ipfw_insn_table *cmd;
2173 
2174 	/* Basic IPv4/IPv6 or u32 lookups */
2175 	cmd = insntod(cmd0, table);
2176 	*puidx = cmd->kidx;
2177 	switch(cmd0->arg1) {
2178 	case LOOKUP_DST_IP:
2179 	case LOOKUP_SRC_IP:
2180 	default:
2181 		/* IPv4 src/dst */
2182 		*ptype = IPFW_TABLE_ADDR;
2183 		break;
2184 	case LOOKUP_DST_PORT:
2185 	case LOOKUP_SRC_PORT:
2186 	case LOOKUP_UID:
2187 	case LOOKUP_JAIL:
2188 	case LOOKUP_DSCP:
2189 	case LOOKUP_MARK:
2190 	case LOOKUP_RULENUM:
2191 		*ptype = IPFW_TABLE_NUMBER;
2192 		break;
2193 	case LOOKUP_DST_MAC:
2194 	case LOOKUP_SRC_MAC:
2195 		*ptype = IPFW_TABLE_MAC;
2196 		break;
2197 	}
2198 	return (0);
2199 }
2200 
2201 static int
classify_via(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2202 classify_via(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2203 {
2204 	ipfw_insn_if *cmdif;
2205 
2206 	/* Interface table, possibly */
2207 	cmdif = insntod(cmd0, if);
2208 	if (cmdif->name[0] != '\1')
2209 		return (1);
2210 
2211 	*ptype = IPFW_TABLE_INTERFACE;
2212 	*puidx = cmdif->p.kidx; /* XXXAE */
2213 	return (0);
2214 }
2215 
2216 static int
classify_flow(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2217 classify_flow(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2218 {
2219 	*puidx = insntod(cmd0, table)->kidx;
2220 	*ptype = IPFW_TABLE_FLOW;
2221 	return (0);
2222 }
2223 
2224 static int
classify_mac_lookup(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2225 classify_mac_lookup(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2226 {
2227 	*puidx = insntod(cmd0, table)->kidx;
2228 	*ptype = IPFW_TABLE_MAC;
2229 	return (0);
2230 }
2231 
2232 static void
update_kidx(ipfw_insn * cmd0,uint32_t idx)2233 update_kidx(ipfw_insn *cmd0, uint32_t idx)
2234 {
2235 	insntod(cmd0, table)->kidx = idx;
2236 }
2237 
2238 static void
update_via(ipfw_insn * cmd0,uint32_t idx)2239 update_via(ipfw_insn *cmd0, uint32_t idx)
2240 {
2241 	insntod(cmd0, if)->p.kidx = idx;
2242 }
2243 
2244 static int
table_findbyname(struct ip_fw_chain * ch,struct tid_info * ti,struct named_object ** pno)2245 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
2246     struct named_object **pno)
2247 {
2248 	struct table_config *tc;
2249 	int error;
2250 
2251 	IPFW_UH_WLOCK_ASSERT(ch);
2252 
2253 	error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
2254 	if (error != 0)
2255 		return (error);
2256 
2257 	*pno = &tc->no;
2258 	return (0);
2259 }
2260 
2261 /* XXX: sets-sets! */
2262 static struct named_object *
table_findbykidx(struct ip_fw_chain * ch,uint32_t idx)2263 table_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
2264 {
2265 	struct namedobj_instance *ni;
2266 	struct table_config *tc;
2267 
2268 	IPFW_UH_WLOCK_ASSERT(ch);
2269 	ni = CHAIN_TO_NI(ch);
2270 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
2271 	KASSERT(tc != NULL, ("Table with index %u not found", idx));
2272 
2273 	return (&tc->no);
2274 }
2275 
2276 static int
table_manage_sets(struct ip_fw_chain * ch,uint32_t set,uint8_t new_set,enum ipfw_sets_cmd cmd)2277 table_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
2278     enum ipfw_sets_cmd cmd)
2279 {
2280 
2281 	switch (cmd) {
2282 	case SWAP_ALL:
2283 	case TEST_ALL:
2284 	case MOVE_ALL:
2285 		/*
2286 		 * Always return success, the real action and decision
2287 		 * should make table_manage_sets_all().
2288 		 */
2289 		return (0);
2290 	case TEST_ONE:
2291 	case MOVE_ONE:
2292 		/*
2293 		 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
2294 		 * if set number will be used in hash function. Currently
2295 		 * we can just use generic handler that replaces set value.
2296 		 */
2297 		if (V_fw_tables_sets == 0)
2298 			return (0);
2299 		break;
2300 	case COUNT_ONE:
2301 		/*
2302 		 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
2303 		 * disabled. This allow skip table's opcodes from additional
2304 		 * checks when specific rules moved to another set.
2305 		 */
2306 		if (V_fw_tables_sets == 0)
2307 			return (EOPNOTSUPP);
2308 	}
2309 	/* Use generic sets handler when per-set sysctl is enabled. */
2310 	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2311 	    set, new_set, cmd));
2312 }
2313 
2314 /*
2315  * We register several opcode rewriters for lookup tables.
2316  * All tables opcodes have the same ETLV type, but different subtype.
2317  * To avoid invoking sets handler several times for XXX_ALL commands,
2318  * we use separate manage_sets handler. O_RECV has the lowest value,
2319  * so it should be called first.
2320  */
2321 static int
table_manage_sets_all(struct ip_fw_chain * ch,uint32_t set,uint8_t new_set,enum ipfw_sets_cmd cmd)2322 table_manage_sets_all(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
2323     enum ipfw_sets_cmd cmd)
2324 {
2325 
2326 	switch (cmd) {
2327 	case SWAP_ALL:
2328 	case TEST_ALL:
2329 		/*
2330 		 * Return success for TEST_ALL, since nothing prevents
2331 		 * move rules from one set to another. All tables are
2332 		 * accessible from all sets when per-set tables sysctl
2333 		 * is disabled.
2334 		 */
2335 	case MOVE_ALL:
2336 		if (V_fw_tables_sets == 0)
2337 			return (0);
2338 		break;
2339 	default:
2340 		return (table_manage_sets(ch, set, new_set, cmd));
2341 	}
2342 	/* Use generic sets handler when per-set sysctl is enabled. */
2343 	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2344 	    set, new_set, cmd));
2345 }
2346 
2347 static struct opcode_obj_rewrite opcodes[] = {
2348 	{
2349 		.opcode = O_IP_SRC_LOOKUP,
2350 		.etlv = IPFW_TLV_TBL_NAME,
2351 		.classifier = classify_srcdst,
2352 		.update = update_kidx,
2353 		.find_byname = table_findbyname,
2354 		.find_bykidx = table_findbykidx,
2355 		.create_object = create_table_compat,
2356 		.manage_sets = table_manage_sets,
2357 	},
2358 	{
2359 		.opcode = O_IP_DST_LOOKUP,
2360 		.etlv = IPFW_TLV_TBL_NAME,
2361 		.classifier = classify_srcdst,
2362 		.update = update_kidx,
2363 		.find_byname = table_findbyname,
2364 		.find_bykidx = table_findbykidx,
2365 		.create_object = create_table_compat,
2366 		.manage_sets = table_manage_sets,
2367 	},
2368 	{
2369 		.opcode = O_IP_FLOW_LOOKUP,
2370 		.etlv = IPFW_TLV_TBL_NAME,
2371 		.classifier = classify_flow,
2372 		.update = update_kidx,
2373 		.find_byname = table_findbyname,
2374 		.find_bykidx = table_findbykidx,
2375 		.create_object = create_table_compat,
2376 		.manage_sets = table_manage_sets,
2377 	},
2378 	{
2379 		.opcode = O_MAC_SRC_LOOKUP,
2380 		.etlv = IPFW_TLV_TBL_NAME,
2381 		.classifier = classify_mac_lookup,
2382 		.update = update_kidx,
2383 		.find_byname = table_findbyname,
2384 		.find_bykidx = table_findbykidx,
2385 		.create_object = create_table_compat,
2386 		.manage_sets = table_manage_sets,
2387 	},
2388 	{
2389 		.opcode = O_MAC_DST_LOOKUP,
2390 		.etlv = IPFW_TLV_TBL_NAME,
2391 		.classifier = classify_mac_lookup,
2392 		.update = update_kidx,
2393 		.find_byname = table_findbyname,
2394 		.find_bykidx = table_findbykidx,
2395 		.create_object = create_table_compat,
2396 		.manage_sets = table_manage_sets,
2397 	},
2398 	{
2399 		.opcode = O_XMIT,
2400 		.etlv = IPFW_TLV_TBL_NAME,
2401 		.classifier = classify_via,
2402 		.update = update_via,
2403 		.find_byname = table_findbyname,
2404 		.find_bykidx = table_findbykidx,
2405 		.create_object = create_table_compat,
2406 		.manage_sets = table_manage_sets,
2407 	},
2408 	{
2409 		.opcode = O_RECV,
2410 		.etlv = IPFW_TLV_TBL_NAME,
2411 		.classifier = classify_via,
2412 		.update = update_via,
2413 		.find_byname = table_findbyname,
2414 		.find_bykidx = table_findbykidx,
2415 		.create_object = create_table_compat,
2416 		.manage_sets = table_manage_sets_all,
2417 	},
2418 	{
2419 		.opcode = O_VIA,
2420 		.etlv = IPFW_TLV_TBL_NAME,
2421 		.classifier = classify_via,
2422 		.update = update_via,
2423 		.find_byname = table_findbyname,
2424 		.find_bykidx = table_findbykidx,
2425 		.create_object = create_table_compat,
2426 		.manage_sets = table_manage_sets,
2427 	},
2428 };
2429 
2430 static int
test_sets_cb(struct namedobj_instance * ni __unused,struct named_object * no,void * arg __unused)2431 test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
2432     void *arg __unused)
2433 {
2434 
2435 	/* Check that there aren't any tables in not default set */
2436 	if (no->set != 0)
2437 		return (EBUSY);
2438 	return (0);
2439 }
2440 
2441 /*
2442  * Switch between "set 0" and "rule's set" table binding,
2443  * Check all ruleset bindings and permits changing
2444  * IFF each binding has both rule AND table in default set (set 0).
2445  *
2446  * Returns 0 on success.
2447  */
2448 int
ipfw_switch_tables_namespace(struct ip_fw_chain * ch,unsigned int sets)2449 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
2450 {
2451 	struct opcode_obj_rewrite *rw;
2452 	struct namedobj_instance *ni;
2453 	struct named_object *no;
2454 	struct ip_fw *rule;
2455 	ipfw_insn *cmd;
2456 	int cmdlen, i, l;
2457 	uint32_t kidx;
2458 	uint8_t subtype;
2459 
2460 	IPFW_UH_WLOCK(ch);
2461 
2462 	if (V_fw_tables_sets == sets) {
2463 		IPFW_UH_WUNLOCK(ch);
2464 		return (0);
2465 	}
2466 	ni = CHAIN_TO_NI(ch);
2467 	if (sets == 0) {
2468 		/*
2469 		 * Prevent disabling sets support if we have some tables
2470 		 * in not default sets.
2471 		 */
2472 		if (ipfw_objhash_foreach_type(ni, test_sets_cb,
2473 		    NULL, IPFW_TLV_TBL_NAME) != 0) {
2474 			IPFW_UH_WUNLOCK(ch);
2475 			return (EBUSY);
2476 		}
2477 	}
2478 	/*
2479 	 * Scan all rules and examine tables opcodes.
2480 	 */
2481 	for (i = 0; i < ch->n_rules; i++) {
2482 		rule = ch->map[i];
2483 
2484 		l = rule->cmd_len;
2485 		cmd = rule->cmd;
2486 		cmdlen = 0;
2487 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
2488 			cmdlen = F_LEN(cmd);
2489 			/* Check only tables opcodes */
2490 			for (kidx = 0, rw = opcodes;
2491 			    rw < opcodes + nitems(opcodes); rw++) {
2492 				if (rw->opcode != cmd->opcode)
2493 					continue;
2494 				if (rw->classifier(cmd, &kidx, &subtype) == 0)
2495 					break;
2496 			}
2497 			if (kidx == 0)
2498 				continue;
2499 			no = ipfw_objhash_lookup_kidx(ni, kidx);
2500 			/* Check if both table object and rule has the set 0 */
2501 			if (no->set != 0 || rule->set != 0) {
2502 				IPFW_UH_WUNLOCK(ch);
2503 				return (EBUSY);
2504 			}
2505 		}
2506 	}
2507 	V_fw_tables_sets = sets;
2508 	IPFW_UH_WUNLOCK(ch);
2509 	return (0);
2510 }
2511 
2512 /*
2513  * Checks table name for validity.
2514  * Enforce basic length checks, the rest
2515  * should be done in userland.
2516  *
2517  * Returns 0 if name is considered valid.
2518  */
2519 static int
check_table_name(const char * name)2520 check_table_name(const char *name)
2521 {
2522 
2523 	/*
2524 	 * TODO: do some more complicated checks
2525 	 */
2526 	return (ipfw_check_object_name_generic(name));
2527 }
2528 
2529 /*
2530  * Finds table config based on either legacy index
2531  * or name in ntlv.
2532  * Note @ti structure contains unchecked data from userland.
2533  *
2534  * Returns 0 in success and fills in @tc with found config
2535  */
2536 static int
find_table_err(struct namedobj_instance * ni,struct tid_info * ti,struct table_config ** tc)2537 find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
2538     struct table_config **tc)
2539 {
2540 	char *name, bname[16];
2541 	struct named_object *no;
2542 	ipfw_obj_ntlv *ntlv;
2543 	uint32_t set;
2544 
2545 	if (ti->tlvs != NULL) {
2546 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
2547 		    IPFW_TLV_TBL_NAME);
2548 		if (ntlv == NULL)
2549 			return (EINVAL);
2550 		name = ntlv->name;
2551 
2552 		/*
2553 		 * Use set provided by @ti instead of @ntlv one.
2554 		 * This is needed due to different sets behavior
2555 		 * controlled by V_fw_tables_sets.
2556 		 */
2557 		set = (V_fw_tables_sets != 0) ? ti->set : 0;
2558 	} else {
2559 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
2560 		name = bname;
2561 		set = 0;
2562 	}
2563 
2564 	no = ipfw_objhash_lookup_name(ni, set, name);
2565 	*tc = (struct table_config *)no;
2566 
2567 	return (0);
2568 }
2569 
2570 /*
2571  * Finds table config based on either legacy index
2572  * or name in ntlv.
2573  * Note @ti structure contains unchecked data from userland.
2574  *
2575  * Returns pointer to table_config or NULL.
2576  */
2577 static struct table_config *
find_table(struct namedobj_instance * ni,struct tid_info * ti)2578 find_table(struct namedobj_instance *ni, struct tid_info *ti)
2579 {
2580 	struct table_config *tc;
2581 
2582 	if (find_table_err(ni, ti, &tc) != 0)
2583 		return (NULL);
2584 
2585 	return (tc);
2586 }
2587 
2588 /*
2589  * Allocate new table config structure using
2590  * specified @algo and @aname.
2591  *
2592  * Returns pointer to config or NULL.
2593  */
2594 static struct table_config *
alloc_table_config(struct ip_fw_chain * ch,struct tid_info * ti,struct table_algo * ta,char * aname,uint8_t tflags)2595 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
2596     struct table_algo *ta, char *aname, uint8_t tflags)
2597 {
2598 	char *name, bname[16];
2599 	struct table_config *tc;
2600 	int error;
2601 	ipfw_obj_ntlv *ntlv;
2602 	uint32_t set;
2603 
2604 	if (ti->tlvs != NULL) {
2605 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
2606 		    IPFW_TLV_TBL_NAME);
2607 		if (ntlv == NULL)
2608 			return (NULL);
2609 		name = ntlv->name;
2610 		set = (V_fw_tables_sets == 0) ? 0 : ntlv->set;
2611 	} else {
2612 		/* Compat part: convert number to string representation */
2613 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
2614 		name = bname;
2615 		set = 0;
2616 	}
2617 
2618 	tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
2619 	tc->no.name = tc->tablename;
2620 	tc->no.subtype = ta->type;
2621 	tc->no.set = set;
2622 	tc->tflags = tflags;
2623 	tc->ta = ta;
2624 	strlcpy(tc->tablename, name, sizeof(tc->tablename));
2625 	/* Set "shared" value type by default */
2626 	tc->vshared = 1;
2627 
2628 	/* Preallocate data structures for new tables */
2629 	error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
2630 	if (error != 0) {
2631 		free(tc, M_IPFW);
2632 		return (NULL);
2633 	}
2634 
2635 	return (tc);
2636 }
2637 
2638 /*
2639  * Destroys table state and config.
2640  */
2641 static void
free_table_config(struct namedobj_instance * ni,struct table_config * tc)2642 free_table_config(struct namedobj_instance *ni, struct table_config *tc)
2643 {
2644 
2645 	KASSERT(tc->linked == 0, ("free() on linked config"));
2646 	/* UH lock MUST NOT be held */
2647 
2648 	/*
2649 	 * We're using ta without any locking/referencing.
2650 	 * TODO: fix this if we're going to use unloadable algos.
2651 	 */
2652 	tc->ta->destroy(tc->astate, &tc->ti_copy);
2653 	free(tc, M_IPFW);
2654 }
2655 
2656 /*
2657  * Links @tc to @chain table named instance.
2658  * Sets appropriate type/states in @chain table info.
2659  */
2660 static void
link_table(struct ip_fw_chain * ch,struct table_config * tc)2661 link_table(struct ip_fw_chain *ch, struct table_config *tc)
2662 {
2663 	struct namedobj_instance *ni;
2664 	struct table_info *ti;
2665 	uint16_t kidx;
2666 
2667 	IPFW_UH_WLOCK_ASSERT(ch);
2668 
2669 	ni = CHAIN_TO_NI(ch);
2670 	kidx = tc->no.kidx;
2671 
2672 	ipfw_objhash_add(ni, &tc->no);
2673 
2674 	ti = KIDX_TO_TI(ch, kidx);
2675 	*ti = tc->ti_copy;
2676 
2677 	/* Notify algo on real @ti address */
2678 	if (tc->ta->change_ti != NULL)
2679 		tc->ta->change_ti(tc->astate, ti);
2680 
2681 	tc->linked = 1;
2682 	tc->ta->refcnt++;
2683 }
2684 
2685 /*
2686  * Unlinks @tc from @chain table named instance.
2687  * Zeroes states in @chain and stores them in @tc.
2688  */
2689 static void
unlink_table(struct ip_fw_chain * ch,struct table_config * tc)2690 unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
2691 {
2692 	struct namedobj_instance *ni;
2693 	struct table_info *ti;
2694 	uint16_t kidx;
2695 
2696 	IPFW_UH_WLOCK_ASSERT(ch);
2697 	IPFW_WLOCK_ASSERT(ch);
2698 
2699 	ni = CHAIN_TO_NI(ch);
2700 	kidx = tc->no.kidx;
2701 
2702 	/* Clear state. @ti copy is already saved inside @tc */
2703 	ipfw_objhash_del(ni, &tc->no);
2704 	ti = KIDX_TO_TI(ch, kidx);
2705 	memset(ti, 0, sizeof(struct table_info));
2706 	tc->linked = 0;
2707 	tc->ta->refcnt--;
2708 
2709 	/* Notify algo on real @ti address */
2710 	if (tc->ta->change_ti != NULL)
2711 		tc->ta->change_ti(tc->astate, NULL);
2712 }
2713 
2714 static struct ipfw_sopt_handler	scodes[] = {
2715     { IP_FW_TABLE_XCREATE,	IP_FW3_OPVER, HDIR_SET,	create_table },
2716     { IP_FW_TABLE_XDESTROY,	IP_FW3_OPVER, HDIR_SET,	flush_table_v0 },
2717     { IP_FW_TABLE_XFLUSH,	IP_FW3_OPVER, HDIR_SET,	flush_table_v0 },
2718     { IP_FW_TABLE_XMODIFY,	IP_FW3_OPVER, HDIR_BOTH, modify_table },
2719     { IP_FW_TABLE_XINFO,	IP_FW3_OPVER, HDIR_GET,	describe_table },
2720     { IP_FW_TABLES_XLIST,	IP_FW3_OPVER, HDIR_GET,	list_tables },
2721     { IP_FW_TABLE_XLIST,	IP_FW3_OPVER, HDIR_GET,	dump_table_v1 },
2722     { IP_FW_TABLE_XADD,		IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
2723     { IP_FW_TABLE_XDEL,		IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
2724     { IP_FW_TABLE_XFIND,	IP_FW3_OPVER, HDIR_GET,	find_table_entry },
2725     { IP_FW_TABLE_XSWAP,	IP_FW3_OPVER, HDIR_SET,	swap_table },
2726     { IP_FW_TABLES_ALIST,	IP_FW3_OPVER, HDIR_GET,	list_table_algo },
2727 };
2728 
2729 static int
destroy_table_locked(struct namedobj_instance * ni,struct named_object * no,void * arg)2730 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
2731     void *arg)
2732 {
2733 
2734 	unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
2735 	if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
2736 		printf("Error unlinking kidx %d from table %s\n",
2737 		    no->kidx, no->name);
2738 	free_table_config(ni, (struct table_config *)no);
2739 	return (0);
2740 }
2741 
2742 /*
2743  * Shuts tables module down.
2744  */
2745 void
ipfw_destroy_tables(struct ip_fw_chain * ch,int last)2746 ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
2747 {
2748 
2749 	IPFW_DEL_SOPT_HANDLER(last, scodes);
2750 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
2751 
2752 	/* Remove all tables from working set */
2753 	IPFW_UH_WLOCK(ch);
2754 	IPFW_WLOCK(ch);
2755 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
2756 	IPFW_WUNLOCK(ch);
2757 	IPFW_UH_WUNLOCK(ch);
2758 
2759 	/* Free pointers itself */
2760 	free(ch->tablestate, M_IPFW);
2761 
2762 	ipfw_table_value_destroy(ch, last);
2763 	ipfw_table_algo_destroy(ch);
2764 
2765 	ipfw_objhash_destroy(CHAIN_TO_NI(ch));
2766 	free(CHAIN_TO_TCFG(ch), M_IPFW);
2767 }
2768 
2769 /*
2770  * Starts tables module.
2771  */
2772 int
ipfw_init_tables(struct ip_fw_chain * ch,int first)2773 ipfw_init_tables(struct ip_fw_chain *ch, int first)
2774 {
2775 	struct tables_config *tcfg;
2776 
2777 	/* Allocate pointers */
2778 	ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
2779 	    M_IPFW, M_WAITOK | M_ZERO);
2780 
2781 	tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
2782 	tcfg->namehash = ipfw_objhash_create(V_fw_tables_max,
2783 	    DEFAULT_OBJHASH_SIZE);
2784 	ch->tblcfg = tcfg;
2785 
2786 	ipfw_table_value_init(ch, first);
2787 	ipfw_table_algo_init(ch);
2788 
2789 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
2790 	IPFW_ADD_SOPT_HANDLER(first, scodes);
2791 	return (0);
2792 }
2793