xref: /freebsd/sys/netpfil/ipfw/ip_fw_table.c (revision 4a77657cbc011ea657ccb079fff6b58b295eccb0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
5  * Copyright (c) 2014-2024 Yandex LLC
6  * Copyright (c) 2014 Alexander V. Chernikov
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 /*
32  * Lookup table support for ipfw.
33  *
34  * This file contains handlers for all generic tables' operations:
35  * add/del/flush entries, list/dump tables etc..
36  *
37  * Table data modification is protected by both UH and runtime lock
38  * while reading configuration/data is protected by UH lock.
39  *
40  * Lookup algorithms for all table types are located in ip_fw_table_algo.c
41  */
42 
43 #include "opt_ipfw.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/rwlock.h>
51 #include <sys/rmlock.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/queue.h>
55 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
56 
57 #include <netinet/in.h>
58 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
59 #include <netinet/ip_fw.h>
60 
61 #include <netpfil/ipfw/ip_fw_private.h>
62 #include <netpfil/ipfw/ip_fw_table.h>
63 
64  /*
65  * Table has the following `type` concepts:
66  *
67  * `no.type` represents lookup key type (addr, ifp, uid, etc..)
68  * vmask represents bitmask of table values which are present at the moment.
69  * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
70  * single-value-for-all approach.
71  */
72 struct table_config {
73 	struct named_object	no;
74 	uint8_t		tflags;		/* type flags */
75 	uint8_t		locked;		/* 1 if locked from changes */
76 	uint8_t		linked;		/* 1 if already linked */
77 	uint8_t		ochanged;	/* used by set swapping */
78 	uint8_t		vshared;	/* 1 if using shared value array */
79 	uint8_t		spare[3];
80 	uint32_t	count;		/* Number of records */
81 	uint32_t	limit;		/* Max number of records */
82 	uint32_t	vmask;		/* bitmask with supported values */
83 	uint32_t	ocount;		/* used by set swapping */
84 	uint64_t	gencnt;		/* generation count */
85 	char		tablename[64];	/* table name */
86 	struct table_algo	*ta;	/* Callbacks for given algo */
87 	void		*astate;	/* algorithm state */
88 	struct table_info	ti_copy;	/* data to put to table_info */
89 	struct namedobj_instance	*vi;
90 };
91 
92 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
93     struct table_config **tc);
94 static struct table_config *find_table(struct namedobj_instance *ni,
95     struct tid_info *ti);
96 static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
97     struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
98 static void free_table_config(struct namedobj_instance *ni,
99     struct table_config *tc);
100 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
101     char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int ref);
102 static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
103 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
104 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
105     struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
106 #define	OP_ADD	1
107 #define	OP_DEL	0
108 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
109     struct sockopt_data *sd);
110 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
111     ipfw_xtable_info *i);
112 static int dump_table_tentry(void *e, void *arg);
113 
114 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
115     struct tid_info *b);
116 
117 static int check_table_name(const char *name);
118 static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
119     struct table_config *tc, struct table_info *ti, uint32_t count);
120 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
121 
122 static struct table_algo *find_table_algo(struct tables_config *tableconf,
123     struct tid_info *ti, char *name);
124 
125 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
126 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
127 
128 #define	CHAIN_TO_NI(chain)	(CHAIN_TO_TCFG(chain)->namehash)
129 #define	KIDX_TO_TI(ch, k)	(&(((struct table_info *)(ch)->tablestate)[k]))
130 
131 #define	TA_BUF_SZ	128	/* On-stack buffer for add/delete state */
132 
133 void
rollback_toperation_state(struct ip_fw_chain * ch,void * object)134 rollback_toperation_state(struct ip_fw_chain *ch, void *object)
135 {
136 	struct tables_config *tcfg;
137 	struct op_state *os;
138 
139 	tcfg = CHAIN_TO_TCFG(ch);
140 	TAILQ_FOREACH(os, &tcfg->state_list, next)
141 		os->func(object, os);
142 }
143 
144 void
add_toperation_state(struct ip_fw_chain * ch,struct tableop_state * ts)145 add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
146 {
147 	struct tables_config *tcfg;
148 
149 	tcfg = CHAIN_TO_TCFG(ch);
150 	TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
151 }
152 
153 void
del_toperation_state(struct ip_fw_chain * ch,struct tableop_state * ts)154 del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
155 {
156 	struct tables_config *tcfg;
157 
158 	tcfg = CHAIN_TO_TCFG(ch);
159 	TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
160 }
161 
162 void
tc_ref(struct table_config * tc)163 tc_ref(struct table_config *tc)
164 {
165 
166 	tc->no.refcnt++;
167 }
168 
169 void
tc_unref(struct table_config * tc)170 tc_unref(struct table_config *tc)
171 {
172 
173 	tc->no.refcnt--;
174 }
175 
176 static struct table_value *
get_table_value(struct ip_fw_chain * ch,struct table_config * tc,uint32_t kidx)177 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
178 {
179 	struct table_value *pval;
180 
181 	pval = (struct table_value *)ch->valuestate;
182 
183 	return (&pval[kidx]);
184 }
185 
186 /*
187  * Checks if we're able to insert/update entry @tei into table
188  * w.r.t @tc limits.
189  * May alter @tei to indicate insertion error / insert
190  * options.
191  *
192  * Returns 0 if operation can be performed/
193  */
194 static int
check_table_limit(struct table_config * tc,struct tentry_info * tei)195 check_table_limit(struct table_config *tc, struct tentry_info *tei)
196 {
197 
198 	if (tc->limit == 0 || tc->count < tc->limit)
199 		return (0);
200 
201 	if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
202 		/* Notify userland on error cause */
203 		tei->flags |= TEI_FLAGS_LIMIT;
204 		return (EFBIG);
205 	}
206 
207 	/*
208 	 * We have UPDATE flag set.
209 	 * Permit updating record (if found),
210 	 * but restrict adding new one since we've
211 	 * already hit the limit.
212 	 */
213 	tei->flags |= TEI_FLAGS_DONTADD;
214 
215 	return (0);
216 }
217 
218 /*
219  * Convert algorithm callback return code into
220  * one of pre-defined states known by userland.
221  */
222 static void
store_tei_result(struct tentry_info * tei,int op,int error,uint32_t num)223 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
224 {
225 	int flag;
226 
227 	flag = 0;
228 
229 	switch (error) {
230 	case 0:
231 		if (op == OP_ADD && num != 0)
232 			flag = TEI_FLAGS_ADDED;
233 		if (op == OP_DEL)
234 			flag = TEI_FLAGS_DELETED;
235 		break;
236 	case ENOENT:
237 		flag = TEI_FLAGS_NOTFOUND;
238 		break;
239 	case EEXIST:
240 		flag = TEI_FLAGS_EXISTS;
241 		break;
242 	default:
243 		flag = TEI_FLAGS_ERROR;
244 	}
245 
246 	tei->flags |= flag;
247 }
248 
249 /*
250  * Creates and references table with default parameters.
251  * Saves table config, algo and allocated kidx info @ptc, @pta and
252  * @pkidx if non-zero.
253  * Used for table auto-creation to support old binaries.
254  *
255  * Returns 0 on success.
256  */
257 static int
create_table_compat(struct ip_fw_chain * ch,struct tid_info * ti,uint32_t * pkidx)258 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
259     uint32_t *pkidx)
260 {
261 	ipfw_xtable_info xi;
262 	int error;
263 
264 	memset(&xi, 0, sizeof(xi));
265 	/* Set default value mask for legacy clients */
266 	xi.vmask = IPFW_VTYPE_LEGACY;
267 
268 	error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
269 	if (error != 0)
270 		return (error);
271 
272 	return (0);
273 }
274 
275 /*
276  * Find and reference existing table optionally
277  * creating new one.
278  *
279  * Saves found table config into @ptc.
280  * Note function may drop/acquire UH_WLOCK.
281  * Returns 0 if table was found/created and referenced
282  * or non-zero return code.
283  */
284 static int
find_ref_table(struct ip_fw_chain * ch,struct tid_info * ti,struct tentry_info * tei,uint32_t count,int op,struct table_config ** ptc)285 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
286     struct tentry_info *tei, uint32_t count, int op,
287     struct table_config **ptc)
288 {
289 	struct namedobj_instance *ni;
290 	struct table_config *tc;
291 	uint32_t kidx;
292 	int error;
293 
294 	IPFW_UH_WLOCK_ASSERT(ch);
295 
296 	ni = CHAIN_TO_NI(ch);
297 	tc = NULL;
298 	if ((tc = find_table(ni, ti)) != NULL) {
299 		/* check table type */
300 		if (tc->no.subtype != ti->type)
301 			return (EINVAL);
302 
303 		if (tc->locked != 0)
304 			return (EACCES);
305 
306 		/* Try to exit early on limit hit */
307 		if (op == OP_ADD && count == 1 &&
308 		    check_table_limit(tc, tei) != 0)
309 			return (EFBIG);
310 
311 		/* Reference and return */
312 		tc->no.refcnt++;
313 		*ptc = tc;
314 		return (0);
315 	}
316 
317 	if (op == OP_DEL)
318 		return (ESRCH);
319 
320 	/* Compatibility mode: create new table for old clients */
321 	if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
322 		return (ESRCH);
323 
324 	IPFW_UH_WUNLOCK(ch);
325 	error = create_table_compat(ch, ti, &kidx);
326 	IPFW_UH_WLOCK(ch);
327 
328 	if (error != 0)
329 		return (error);
330 
331 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
332 	KASSERT(tc != NULL, ("create_table_compat returned bad idx %u", kidx));
333 
334 	/* OK, now we've got referenced table. */
335 	*ptc = tc;
336 	return (0);
337 }
338 
339 /*
340  * Rolls back already @added to @tc entries using state array @ta_buf_m.
341  * Assume the following layout:
342  * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
343  * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
344  *   for storing deleted state
345  */
346 static void
rollback_added_entries(struct ip_fw_chain * ch,struct table_config * tc,struct table_info * tinfo,struct tentry_info * tei,caddr_t ta_buf_m,uint32_t count,uint32_t added)347 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
348     struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
349     uint32_t count, uint32_t added)
350 {
351 	struct table_algo *ta;
352 	struct tentry_info *ptei;
353 	caddr_t v, vv;
354 	size_t ta_buf_sz;
355 	int error __diagused, i;
356 	uint32_t num;
357 
358 	IPFW_UH_WLOCK_ASSERT(ch);
359 
360 	ta = tc->ta;
361 	ta_buf_sz = ta->ta_buf_size;
362 	v = ta_buf_m;
363 	vv = v + count * ta_buf_sz;
364 	for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
365 		ptei = &tei[i];
366 		if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
367 			/*
368 			 * We have old value stored by previous
369 			 * call in @ptei->value. Do add once again
370 			 * to restore it.
371 			 */
372 			error = ta->add(tc->astate, tinfo, ptei, v, &num);
373 			KASSERT(error == 0, ("rollback UPDATE fail"));
374 			KASSERT(num == 0, ("rollback UPDATE fail2"));
375 			continue;
376 		}
377 
378 		error = ta->prepare_del(ch, ptei, vv);
379 		KASSERT(error == 0, ("pre-rollback INSERT failed"));
380 		error = ta->del(tc->astate, tinfo, ptei, vv, &num);
381 		KASSERT(error == 0, ("rollback INSERT failed"));
382 		tc->count -= num;
383 	}
384 }
385 
386 /*
387  * Prepares add/del state for all @count entries in @tei.
388  * Uses either stack buffer (@ta_buf) or allocates a new one.
389  * Stores pointer to allocated buffer back to @ta_buf.
390  *
391  * Returns 0 on success.
392  */
393 static int
prepare_batch_buffer(struct ip_fw_chain * ch,struct table_algo * ta,struct tentry_info * tei,uint32_t count,int op,caddr_t * ta_buf)394 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
395     struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
396 {
397 	caddr_t ta_buf_m, v;
398 	size_t ta_buf_sz, sz;
399 	struct tentry_info *ptei;
400 	int error, i;
401 
402 	error = 0;
403 	ta_buf_sz = ta->ta_buf_size;
404 	if (count == 1) {
405 		/* Single add/delete, use on-stack buffer */
406 		memset(*ta_buf, 0, TA_BUF_SZ);
407 		ta_buf_m = *ta_buf;
408 	} else {
409 		/*
410 		 * Multiple adds/deletes, allocate larger buffer
411 		 *
412 		 * Note we need 2xcount buffer for add case:
413 		 * we have hold both ADD state
414 		 * and DELETE state (this may be needed
415 		 * if we need to rollback all changes)
416 		 */
417 		sz = count * ta_buf_sz;
418 		ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
419 		    M_WAITOK | M_ZERO);
420 	}
421 
422 	v = ta_buf_m;
423 	for (i = 0; i < count; i++, v += ta_buf_sz) {
424 		ptei = &tei[i];
425 		error = (op == OP_ADD) ?
426 		    ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
427 
428 		/*
429 		 * Some syntax error (incorrect mask, or address, or
430 		 * anything). Return error regardless of atomicity
431 		 * settings.
432 		 */
433 		if (error != 0)
434 			break;
435 	}
436 
437 	*ta_buf = ta_buf_m;
438 	return (error);
439 }
440 
441 /*
442  * Flushes allocated state for each @count entries in @tei.
443  * Frees @ta_buf_m if differs from stack buffer @ta_buf.
444  */
445 static void
flush_batch_buffer(struct ip_fw_chain * ch,struct table_algo * ta,struct tentry_info * tei,uint32_t count,int rollback,caddr_t ta_buf_m,caddr_t ta_buf)446 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
447     struct tentry_info *tei, uint32_t count, int rollback,
448     caddr_t ta_buf_m, caddr_t ta_buf)
449 {
450 	caddr_t v;
451 	struct tentry_info *ptei;
452 	size_t ta_buf_sz;
453 	int i;
454 
455 	ta_buf_sz = ta->ta_buf_size;
456 
457 	/* Run cleaning callback anyway */
458 	v = ta_buf_m;
459 	for (i = 0; i < count; i++, v += ta_buf_sz) {
460 		ptei = &tei[i];
461 		ta->flush_entry(ch, ptei, v);
462 		if (ptei->ptv != NULL) {
463 			free(ptei->ptv, M_IPFW);
464 			ptei->ptv = NULL;
465 		}
466 	}
467 
468 	/* Clean up "deleted" state in case of rollback */
469 	if (rollback != 0) {
470 		v = ta_buf_m + count * ta_buf_sz;
471 		for (i = 0; i < count; i++, v += ta_buf_sz)
472 			ta->flush_entry(ch, &tei[i], v);
473 	}
474 
475 	if (ta_buf_m != ta_buf)
476 		free(ta_buf_m, M_TEMP);
477 }
478 
479 static void
rollback_add_entry(void * object,struct op_state * _state)480 rollback_add_entry(void *object, struct op_state *_state)
481 {
482 	struct ip_fw_chain *ch __diagused;
483 	struct tableop_state *ts;
484 
485 	ts = (struct tableop_state *)_state;
486 
487 	if (ts->tc != object && ts->ch != object)
488 		return;
489 
490 	ch = ts->ch;
491 
492 	IPFW_UH_WLOCK_ASSERT(ch);
493 
494 	/* Call specifid unlockers */
495 	rollback_table_values(ts);
496 
497 	/* Indicate we've called */
498 	ts->modified = 1;
499 }
500 
501 /*
502  * Adds/updates one or more entries in table @ti.
503  *
504  * Function may drop/reacquire UH wlock multiple times due to
505  * items alloc, algorithm callbacks (check_space), value linkage
506  * (new values, value storage realloc), etc..
507  * Other processes like other adds (which may involve storage resize),
508  * table swaps (which changes table data and may change algo type),
509  * table modify (which may change value mask) may be executed
510  * simultaneously so we need to deal with it.
511  *
512  * The following approach was implemented:
513  * we have per-chain linked list, protected with UH lock.
514  * add_table_entry prepares special on-stack structure wthich is passed
515  * to its descendants. Users add this structure to this list before unlock.
516  * After performing needed operations and acquiring UH lock back, each user
517  * checks if structure has changed. If true, it rolls local state back and
518  * returns without error to the caller.
519  * add_table_entry() on its own checks if structure has changed and restarts
520  * its operation from the beginning (goto restart).
521  *
522  * Functions which are modifying fields of interest (currently
523  *   resize_shared_value_storage() and swap_tables() )
524  * traverses given list while holding UH lock immediately before
525  * performing their operations calling function provided be list entry
526  * ( currently rollback_add_entry  ) which performs rollback for all necessary
527  * state and sets appropriate values in structure indicating rollback
528  * has happened.
529  *
530  * Algo interaction:
531  * Function references @ti first to ensure table won't
532  * disappear or change its type.
533  * After that, prepare_add callback is called for each @tei entry.
534  * Next, we try to add each entry under UH+WHLOCK
535  * using add() callback.
536  * Finally, we free all state by calling flush_entry callback
537  * for each @tei.
538  *
539  * Returns 0 on success.
540  */
541 int
add_table_entry(struct ip_fw_chain * ch,struct tid_info * ti,struct tentry_info * tei,uint8_t flags,uint32_t count)542 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
543     struct tentry_info *tei, uint8_t flags, uint32_t count)
544 {
545 	struct table_config *tc;
546 	struct table_algo *ta;
547 	struct tentry_info *ptei;
548 	struct tableop_state ts;
549 	char ta_buf[TA_BUF_SZ];
550 	caddr_t ta_buf_m, v;
551 	uint32_t kidx, num, numadd;
552 	int error, first_error, i, rollback;
553 
554 	memset(&ts, 0, sizeof(ts));
555 	ta = NULL;
556 	IPFW_UH_WLOCK(ch);
557 
558 	/*
559 	 * Find and reference existing table.
560 	 */
561 restart:
562 	if (ts.modified != 0) {
563 		IPFW_UH_WUNLOCK(ch);
564 		flush_batch_buffer(ch, ta, tei, count, rollback,
565 		    ta_buf_m, ta_buf);
566 		memset(&ts, 0, sizeof(ts));
567 		ta = NULL;
568 		IPFW_UH_WLOCK(ch);
569 	}
570 
571 	error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
572 	if (error != 0) {
573 		IPFW_UH_WUNLOCK(ch);
574 		return (error);
575 	}
576 	ta = tc->ta;
577 
578 	/* Fill in tablestate */
579 	ts.ch = ch;
580 	ts.opstate.func = rollback_add_entry;
581 	ts.tc = tc;
582 	ts.vshared = tc->vshared;
583 	ts.vmask = tc->vmask;
584 	ts.ta = ta;
585 	ts.tei = tei;
586 	ts.count = count;
587 	rollback = 0;
588 	add_toperation_state(ch, &ts);
589 	IPFW_UH_WUNLOCK(ch);
590 
591 	/* Allocate memory and prepare record(s) */
592 	/* Pass stack buffer by default */
593 	ta_buf_m = ta_buf;
594 	error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
595 
596 	IPFW_UH_WLOCK(ch);
597 	del_toperation_state(ch, &ts);
598 	/* Drop reference we've used in first search */
599 	tc->no.refcnt--;
600 
601 	/* Check prepare_batch_buffer() error */
602 	if (error != 0)
603 		goto cleanup;
604 
605 	/*
606 	 * Check if table swap has happened.
607 	 * (so table algo might be changed).
608 	 * Restart operation to achieve consistent behavior.
609 	 */
610 	if (ts.modified != 0)
611 		goto restart;
612 
613 	/*
614 	 * Link all values values to shared/per-table value array.
615 	 *
616 	 * May release/reacquire UH_WLOCK.
617 	 */
618 	error = ipfw_link_table_values(ch, &ts, flags);
619 	if (error != 0)
620 		goto cleanup;
621 	if (ts.modified != 0)
622 		goto restart;
623 
624 	/*
625 	 * Ensure we are able to add all entries without additional
626 	 * memory allocations. May release/reacquire UH_WLOCK.
627 	 */
628 	kidx = tc->no.kidx;
629 	error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
630 	if (error != 0)
631 		goto cleanup;
632 	if (ts.modified != 0)
633 		goto restart;
634 
635 	/* We've got valid table in @tc. Let's try to add data */
636 	kidx = tc->no.kidx;
637 	ta = tc->ta;
638 	numadd = 0;
639 	first_error = 0;
640 
641 	IPFW_WLOCK(ch);
642 
643 	v = ta_buf_m;
644 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
645 		ptei = &tei[i];
646 		num = 0;
647 		/* check limit before adding */
648 		if ((error = check_table_limit(tc, ptei)) == 0) {
649 			/*
650 			 * It should be safe to insert a record w/o
651 			 * a properly-linked value if atomicity is
652 			 * not required.
653 			 *
654 			 * If the added item does not have a valid value
655 			 * index, it would get rejected by ta->add().
656 			 * */
657 			error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
658 			    ptei, v, &num);
659 			/* Set status flag to inform userland */
660 			store_tei_result(ptei, OP_ADD, error, num);
661 		}
662 		if (error == 0) {
663 			/* Update number of records to ease limit checking */
664 			tc->count += num;
665 			numadd += num;
666 			continue;
667 		}
668 
669 		if (first_error == 0)
670 			first_error = error;
671 
672 		/*
673 		 * Some error have happened. Check our atomicity
674 		 * settings: continue if atomicity is not required,
675 		 * rollback changes otherwise.
676 		 */
677 		if ((flags & IPFW_CTF_ATOMIC) == 0)
678 			continue;
679 
680 		rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
681 		    tei, ta_buf_m, count, i);
682 
683 		rollback = 1;
684 		break;
685 	}
686 
687 	IPFW_WUNLOCK(ch);
688 
689 	ipfw_garbage_table_values(ch, tc, tei, count, rollback);
690 
691 	/* Permit post-add algorithm grow/rehash. */
692 	if (numadd != 0)
693 		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
694 
695 	/* Return first error to user, if any */
696 	error = first_error;
697 
698 cleanup:
699 	IPFW_UH_WUNLOCK(ch);
700 
701 	flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
702 
703 	return (error);
704 }
705 
706 /*
707  * Deletes one or more entries in table @ti.
708  *
709  * Returns 0 on success.
710  */
711 int
del_table_entry(struct ip_fw_chain * ch,struct tid_info * ti,struct tentry_info * tei,uint8_t flags,uint32_t count)712 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
713     struct tentry_info *tei, uint8_t flags, uint32_t count)
714 {
715 	struct table_config *tc;
716 	struct table_algo *ta;
717 	struct tentry_info *ptei;
718 	char ta_buf[TA_BUF_SZ];
719 	caddr_t ta_buf_m, v;
720 	uint32_t kidx, num, numdel;
721 	int error, first_error, i;
722 
723 	/*
724 	 * Find and reference existing table.
725 	 */
726 	IPFW_UH_WLOCK(ch);
727 	error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
728 	if (error != 0) {
729 		IPFW_UH_WUNLOCK(ch);
730 		return (error);
731 	}
732 	ta = tc->ta;
733 	IPFW_UH_WUNLOCK(ch);
734 
735 	/* Allocate memory and prepare record(s) */
736 	/* Pass stack buffer by default */
737 	ta_buf_m = ta_buf;
738 	error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
739 	if (error != 0)
740 		goto cleanup;
741 
742 	IPFW_UH_WLOCK(ch);
743 
744 	/* Drop reference we've used in first search */
745 	tc->no.refcnt--;
746 
747 	/*
748 	 * Check if table algo is still the same.
749 	 * (changed ta may be the result of table swap).
750 	 */
751 	if (ta != tc->ta) {
752 		IPFW_UH_WUNLOCK(ch);
753 		error = EINVAL;
754 		goto cleanup;
755 	}
756 
757 	kidx = tc->no.kidx;
758 	numdel = 0;
759 	first_error = 0;
760 
761 	IPFW_WLOCK(ch);
762 	v = ta_buf_m;
763 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
764 		ptei = &tei[i];
765 		num = 0;
766 		error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
767 		    &num);
768 		/* Save state for userland */
769 		store_tei_result(ptei, OP_DEL, error, num);
770 		if (error != 0 && first_error == 0)
771 			first_error = error;
772 		tc->count -= num;
773 		numdel += num;
774 	}
775 	IPFW_WUNLOCK(ch);
776 
777 	/* Unlink non-used values */
778 	ipfw_garbage_table_values(ch, tc, tei, count, 0);
779 
780 	if (numdel != 0) {
781 		/* Run post-del hook to permit shrinking */
782 		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
783 	}
784 
785 	IPFW_UH_WUNLOCK(ch);
786 
787 	/* Return first error to user, if any */
788 	error = first_error;
789 
790 cleanup:
791 	flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
792 
793 	return (error);
794 }
795 
796 /*
797  * Ensure that table @tc has enough space to add @count entries without
798  * need for reallocation.
799  *
800  * Callbacks order:
801  * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
802  *
803  * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
804  * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
805  * 3) modify (UH_WLOCK + WLOCK) - switch pointers
806  * 4) flush_modify (UH_WLOCK) - free state, if needed
807  *
808  * Returns 0 on success.
809  */
810 static int
check_table_space(struct ip_fw_chain * ch,struct tableop_state * ts,struct table_config * tc,struct table_info * ti,uint32_t count)811 check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
812     struct table_config *tc, struct table_info *ti, uint32_t count)
813 {
814 	struct table_algo *ta;
815 	uint64_t pflags;
816 	char ta_buf[TA_BUF_SZ];
817 	int error;
818 
819 	IPFW_UH_WLOCK_ASSERT(ch);
820 
821 	error = 0;
822 	ta = tc->ta;
823 	if (ta->need_modify == NULL)
824 		return (0);
825 
826 	/* Acquire reference not to loose @tc between locks/unlocks */
827 	tc->no.refcnt++;
828 
829 	/*
830 	 * TODO: think about avoiding race between large add/large delete
831 	 * operation on algorithm which implements shrinking along with
832 	 * growing.
833 	 */
834 	while (true) {
835 		pflags = 0;
836 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
837 			error = 0;
838 			break;
839 		}
840 
841 		/* We have to shrink/grow table */
842 		if (ts != NULL)
843 			add_toperation_state(ch, ts);
844 		IPFW_UH_WUNLOCK(ch);
845 
846 		memset(&ta_buf, 0, sizeof(ta_buf));
847 		error = ta->prepare_mod(ta_buf, &pflags);
848 
849 		IPFW_UH_WLOCK(ch);
850 		if (ts != NULL)
851 			del_toperation_state(ch, ts);
852 
853 		if (error != 0)
854 			break;
855 
856 		if (ts != NULL && ts->modified != 0) {
857 			/*
858 			 * Swap operation has happened
859 			 * so we're currently operating on other
860 			 * table data. Stop doing this.
861 			 */
862 			ta->flush_mod(ta_buf);
863 			break;
864 		}
865 
866 		/* Check if we still need to alter table */
867 		ti = KIDX_TO_TI(ch, tc->no.kidx);
868 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
869 			IPFW_UH_WUNLOCK(ch);
870 
871 			/*
872 			 * Other thread has already performed resize.
873 			 * Flush our state and return.
874 			 */
875 			ta->flush_mod(ta_buf);
876 			break;
877 		}
878 
879 		error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
880 		if (error == 0) {
881 			/* Do actual modification */
882 			IPFW_WLOCK(ch);
883 			ta->modify(tc->astate, ti, ta_buf, pflags);
884 			IPFW_WUNLOCK(ch);
885 		}
886 
887 		/* Anyway, flush data and retry */
888 		ta->flush_mod(ta_buf);
889 	}
890 
891 	tc->no.refcnt--;
892 	return (error);
893 }
894 
895 /*
896  * Adds or deletes record in table.
897  * Data layout (v1)(current):
898  * Request: [ ipfw_obj_header
899  *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
900  * ]
901  *
902  * Returns 0 on success
903  */
904 static int
manage_table_ent_v1(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)905 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
906     struct sockopt_data *sd)
907 {
908 	ipfw_obj_tentry *tent, *ptent;
909 	ipfw_obj_ctlv *ctlv;
910 	ipfw_obj_header *oh;
911 	struct tentry_info *ptei, tei, *tei_buf;
912 	struct tid_info ti;
913 	uint32_t kidx;
914 	int error, i, read;
915 
916 	/* Check minimum header size */
917 	if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
918 		return (EINVAL);
919 
920 	/* Check if passed data is too long */
921 	if (sd->valsize != sd->kavail)
922 		return (EINVAL);
923 
924 	oh = (ipfw_obj_header *)sd->kbuf;
925 
926 	/* Basic length checks for TLVs */
927 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
928 		return (EINVAL);
929 
930 	read = sizeof(*oh);
931 
932 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
933 	if (ctlv->head.length + read != sd->valsize)
934 		return (EINVAL);
935 
936 	read += sizeof(*ctlv);
937 	tent = (ipfw_obj_tentry *)(ctlv + 1);
938 	if (ctlv->count * sizeof(*tent) + read != sd->valsize)
939 		return (EINVAL);
940 
941 	if (ctlv->count == 0)
942 		return (0);
943 
944 	/*
945 	 * Mark entire buffer as "read".
946 	 * This instructs sopt api write it back
947 	 * after function return.
948 	 */
949 	ipfw_get_sopt_header(sd, sd->valsize);
950 
951 	/* Perform basic checks for each entry */
952 	ptent = tent;
953 	kidx = tent->idx;
954 	for (i = 0; i < ctlv->count; i++, ptent++) {
955 		if (ptent->head.length != sizeof(*ptent))
956 			return (EINVAL);
957 		if (ptent->idx != kidx)
958 			return (ENOTSUP);
959 	}
960 
961 	/* Convert data into kernel request objects */
962 	objheader_to_ti(oh, &ti);
963 	ti.type = oh->ntlv.type;
964 	ti.uidx = kidx;
965 
966 	/* Use on-stack buffer for single add/del */
967 	if (ctlv->count == 1) {
968 		memset(&tei, 0, sizeof(tei));
969 		tei_buf = &tei;
970 	} else
971 		tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
972 		    M_WAITOK | M_ZERO);
973 
974 	ptei = tei_buf;
975 	ptent = tent;
976 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
977 		ptei->paddr = &ptent->k;
978 		ptei->subtype = ptent->subtype;
979 		ptei->masklen = ptent->masklen;
980 		if (ptent->head.flags & IPFW_TF_UPDATE)
981 			ptei->flags |= TEI_FLAGS_UPDATE;
982 
983 		ipfw_import_table_value_v1(&ptent->v.value);
984 		ptei->pvalue = (struct table_value *)&ptent->v.value;
985 	}
986 
987 	error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
988 	    add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
989 	    del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
990 
991 	/* Translate result back to userland */
992 	ptei = tei_buf;
993 	ptent = tent;
994 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
995 		if (ptei->flags & TEI_FLAGS_ADDED)
996 			ptent->result = IPFW_TR_ADDED;
997 		else if (ptei->flags & TEI_FLAGS_DELETED)
998 			ptent->result = IPFW_TR_DELETED;
999 		else if (ptei->flags & TEI_FLAGS_UPDATED)
1000 			ptent->result = IPFW_TR_UPDATED;
1001 		else if (ptei->flags & TEI_FLAGS_LIMIT)
1002 			ptent->result = IPFW_TR_LIMIT;
1003 		else if (ptei->flags & TEI_FLAGS_ERROR)
1004 			ptent->result = IPFW_TR_ERROR;
1005 		else if (ptei->flags & TEI_FLAGS_NOTFOUND)
1006 			ptent->result = IPFW_TR_NOTFOUND;
1007 		else if (ptei->flags & TEI_FLAGS_EXISTS)
1008 			ptent->result = IPFW_TR_EXISTS;
1009 		ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
1010 	}
1011 
1012 	if (tei_buf != &tei)
1013 		free(tei_buf, M_TEMP);
1014 
1015 	return (error);
1016 }
1017 
1018 /*
1019  * Looks up an entry in given table.
1020  * Data layout (v0)(current):
1021  * Request: [ ipfw_obj_header ipfw_obj_tentry ]
1022  * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
1023  *
1024  * Returns 0 on success
1025  */
1026 static int
find_table_entry(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1027 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1028     struct sockopt_data *sd)
1029 {
1030 	ipfw_obj_tentry *tent;
1031 	ipfw_obj_header *oh;
1032 	struct tid_info ti;
1033 	struct table_config *tc;
1034 	struct table_algo *ta;
1035 	struct table_info *kti;
1036 	struct table_value *pval;
1037 	struct namedobj_instance *ni;
1038 	int error;
1039 	size_t sz;
1040 
1041 	/* Check minimum header size */
1042 	sz = sizeof(*oh) + sizeof(*tent);
1043 	if (sd->valsize != sz)
1044 		return (EINVAL);
1045 
1046 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1047 	tent = (ipfw_obj_tentry *)(oh + 1);
1048 
1049 	/* Basic length checks for TLVs */
1050 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
1051 		return (EINVAL);
1052 
1053 	objheader_to_ti(oh, &ti);
1054 	ti.type = oh->ntlv.type;
1055 	ti.uidx = tent->idx;
1056 
1057 	IPFW_UH_RLOCK(ch);
1058 	ni = CHAIN_TO_NI(ch);
1059 
1060 	/*
1061 	 * Find existing table and check its type .
1062 	 */
1063 	ta = NULL;
1064 	if ((tc = find_table(ni, &ti)) == NULL) {
1065 		IPFW_UH_RUNLOCK(ch);
1066 		return (ESRCH);
1067 	}
1068 
1069 	/* check table type */
1070 	if (tc->no.subtype != ti.type) {
1071 		IPFW_UH_RUNLOCK(ch);
1072 		return (EINVAL);
1073 	}
1074 
1075 	kti = KIDX_TO_TI(ch, tc->no.kidx);
1076 	ta = tc->ta;
1077 
1078 	if (ta->find_tentry == NULL)
1079 		return (ENOTSUP);
1080 
1081 	error = ta->find_tentry(tc->astate, kti, tent);
1082 	if (error == 0) {
1083 		pval = get_table_value(ch, tc, tent->v.kidx);
1084 		ipfw_export_table_value_v1(pval, &tent->v.value);
1085 	}
1086 	IPFW_UH_RUNLOCK(ch);
1087 
1088 	return (error);
1089 }
1090 
1091 /*
1092  * Flushes all entries or destroys given table.
1093  * Data layout (v0)(current):
1094  * Request: [ ipfw_obj_header ]
1095  *
1096  * Returns 0 on success
1097  */
1098 static int
flush_table_v0(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1099 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1100     struct sockopt_data *sd)
1101 {
1102 	int error;
1103 	struct _ipfw_obj_header *oh;
1104 	struct tid_info ti;
1105 
1106 	if (sd->valsize != sizeof(*oh))
1107 		return (EINVAL);
1108 
1109 	oh = (struct _ipfw_obj_header *)op3;
1110 	objheader_to_ti(oh, &ti);
1111 
1112 	if (op3->opcode == IP_FW_TABLE_XDESTROY)
1113 		error = destroy_table(ch, &ti);
1114 	else if (op3->opcode == IP_FW_TABLE_XFLUSH)
1115 		error = flush_table(ch, &ti);
1116 	else
1117 		return (ENOTSUP);
1118 
1119 	return (error);
1120 }
1121 
1122 static void
restart_flush(void * object,struct op_state * _state)1123 restart_flush(void *object, struct op_state *_state)
1124 {
1125 	struct tableop_state *ts;
1126 
1127 	ts = (struct tableop_state *)_state;
1128 
1129 	if (ts->tc != object)
1130 		return;
1131 
1132 	/* Indicate we've called */
1133 	ts->modified = 1;
1134 }
1135 
1136 /*
1137  * Flushes given table.
1138  *
1139  * Function create new table instance with the same
1140  * parameters, swaps it with old one and
1141  * flushes state without holding runtime WLOCK.
1142  *
1143  * Returns 0 on success.
1144  */
1145 int
flush_table(struct ip_fw_chain * ch,struct tid_info * ti)1146 flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
1147 {
1148 	struct namedobj_instance *ni;
1149 	struct table_config *tc;
1150 	struct table_algo *ta;
1151 	struct table_info ti_old, ti_new, *tablestate;
1152 	void *astate_old, *astate_new;
1153 	char algostate[64], *pstate;
1154 	struct tableop_state ts;
1155 	int error, need_gc;
1156 	uint32_t kidx;
1157 	uint8_t tflags;
1158 
1159 	/*
1160 	 * Stage 1: save table algorithm.
1161 	 * Reference found table to ensure it won't disappear.
1162 	 */
1163 	IPFW_UH_WLOCK(ch);
1164 	ni = CHAIN_TO_NI(ch);
1165 	if ((tc = find_table(ni, ti)) == NULL) {
1166 		IPFW_UH_WUNLOCK(ch);
1167 		return (ESRCH);
1168 	}
1169 	need_gc = 0;
1170 	astate_new = NULL;
1171 	memset(&ti_new, 0, sizeof(ti_new));
1172 restart:
1173 	/* Set up swap handler */
1174 	memset(&ts, 0, sizeof(ts));
1175 	ts.opstate.func = restart_flush;
1176 	ts.tc = tc;
1177 
1178 	ta = tc->ta;
1179 	/* Do not flush readonly tables */
1180 	if ((ta->flags & TA_FLAG_READONLY) != 0) {
1181 		IPFW_UH_WUNLOCK(ch);
1182 		return (EACCES);
1183 	}
1184 	/* Save startup algo parameters */
1185 	if (ta->print_config != NULL) {
1186 		ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
1187 		    algostate, sizeof(algostate));
1188 		pstate = algostate;
1189 	} else
1190 		pstate = NULL;
1191 	tflags = tc->tflags;
1192 	tc->no.refcnt++;
1193 	add_toperation_state(ch, &ts);
1194 	IPFW_UH_WUNLOCK(ch);
1195 
1196 	/*
1197 	 * Stage 1.5: if this is not the first attempt, destroy previous state
1198 	 */
1199 	if (need_gc != 0) {
1200 		ta->destroy(astate_new, &ti_new);
1201 		need_gc = 0;
1202 	}
1203 
1204 	/*
1205 	 * Stage 2: allocate new table instance using same algo.
1206 	 */
1207 	memset(&ti_new, 0, sizeof(struct table_info));
1208 	error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
1209 
1210 	/*
1211 	 * Stage 3: swap old state pointers with newly-allocated ones.
1212 	 * Decrease refcount.
1213 	 */
1214 	IPFW_UH_WLOCK(ch);
1215 	tc->no.refcnt--;
1216 	del_toperation_state(ch, &ts);
1217 
1218 	if (error != 0) {
1219 		IPFW_UH_WUNLOCK(ch);
1220 		return (error);
1221 	}
1222 
1223 	/*
1224 	 * Restart operation if table swap has happened:
1225 	 * even if algo may be the same, algo init parameters
1226 	 * may change. Restart operation instead of doing
1227 	 * complex checks.
1228 	 */
1229 	if (ts.modified != 0) {
1230 		/* Delay destroying data since we're holding UH lock */
1231 		need_gc = 1;
1232 		goto restart;
1233 	}
1234 
1235 	ni = CHAIN_TO_NI(ch);
1236 	kidx = tc->no.kidx;
1237 	tablestate = (struct table_info *)ch->tablestate;
1238 
1239 	IPFW_WLOCK(ch);
1240 	ti_old = tablestate[kidx];
1241 	tablestate[kidx] = ti_new;
1242 	IPFW_WUNLOCK(ch);
1243 
1244 	astate_old = tc->astate;
1245 	tc->astate = astate_new;
1246 	tc->ti_copy = ti_new;
1247 	tc->count = 0;
1248 
1249 	/* Notify algo on real @ti address */
1250 	if (ta->change_ti != NULL)
1251 		ta->change_ti(tc->astate, &tablestate[kidx]);
1252 
1253 	/*
1254 	 * Stage 4: unref values.
1255 	 */
1256 	ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
1257 	IPFW_UH_WUNLOCK(ch);
1258 
1259 	/*
1260 	 * Stage 5: perform real flush/destroy.
1261 	 */
1262 	ta->destroy(astate_old, &ti_old);
1263 
1264 	return (0);
1265 }
1266 
1267 /*
1268  * Swaps two tables.
1269  * Data layout (v0)(current):
1270  * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
1271  *
1272  * Returns 0 on success
1273  */
1274 static int
swap_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1275 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1276     struct sockopt_data *sd)
1277 {
1278 	int error;
1279 	struct _ipfw_obj_header *oh;
1280 	struct tid_info ti_a, ti_b;
1281 
1282 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
1283 		return (EINVAL);
1284 
1285 	oh = (struct _ipfw_obj_header *)op3;
1286 	ntlv_to_ti(&oh->ntlv, &ti_a);
1287 	ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
1288 
1289 	error = swap_tables(ch, &ti_a, &ti_b);
1290 
1291 	return (error);
1292 }
1293 
1294 /*
1295  * Swaps two tables of the same type/valtype.
1296  *
1297  * Checks if tables are compatible and limits
1298  * permits swap, than actually perform swap.
1299  *
1300  * Each table consists of 2 different parts:
1301  * config:
1302  *   @tc (with name, set, kidx) and rule bindings, which is "stable".
1303  *   number of items
1304  *   table algo
1305  * runtime:
1306  *   runtime data @ti (ch->tablestate)
1307  *   runtime cache in @tc
1308  *   algo-specific data (@tc->astate)
1309  *
1310  * So we switch:
1311  *  all runtime data
1312  *   number of items
1313  *   table algo
1314  *
1315  * After that we call @ti change handler for each table.
1316  *
1317  * Note that referencing @tc won't protect tc->ta from change.
1318  * XXX: Do we need to restrict swap between locked tables?
1319  * XXX: Do we need to exchange ftype?
1320  *
1321  * Returns 0 on success.
1322  */
1323 static int
swap_tables(struct ip_fw_chain * ch,struct tid_info * a,struct tid_info * b)1324 swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
1325     struct tid_info *b)
1326 {
1327 	struct namedobj_instance *ni;
1328 	struct table_config *tc_a, *tc_b;
1329 	struct table_algo *ta;
1330 	struct table_info ti, *tablestate;
1331 	void *astate;
1332 	uint32_t count;
1333 
1334 	/*
1335 	 * Stage 1: find both tables and ensure they are of
1336 	 * the same type.
1337 	 */
1338 	IPFW_UH_WLOCK(ch);
1339 	ni = CHAIN_TO_NI(ch);
1340 	if ((tc_a = find_table(ni, a)) == NULL) {
1341 		IPFW_UH_WUNLOCK(ch);
1342 		return (ESRCH);
1343 	}
1344 	if ((tc_b = find_table(ni, b)) == NULL) {
1345 		IPFW_UH_WUNLOCK(ch);
1346 		return (ESRCH);
1347 	}
1348 
1349 	/* It is very easy to swap between the same table */
1350 	if (tc_a == tc_b) {
1351 		IPFW_UH_WUNLOCK(ch);
1352 		return (0);
1353 	}
1354 
1355 	/* Check type and value are the same */
1356 	if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
1357 		IPFW_UH_WUNLOCK(ch);
1358 		return (EINVAL);
1359 	}
1360 
1361 	/* Check limits before swap */
1362 	if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
1363 	    (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
1364 		IPFW_UH_WUNLOCK(ch);
1365 		return (EFBIG);
1366 	}
1367 
1368 	/* Check if one of the tables is readonly */
1369 	if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
1370 		IPFW_UH_WUNLOCK(ch);
1371 		return (EACCES);
1372 	}
1373 
1374 	/* Notify we're going to swap */
1375 	rollback_toperation_state(ch, tc_a);
1376 	rollback_toperation_state(ch, tc_b);
1377 
1378 	/* Everything is fine, prepare to swap */
1379 	tablestate = (struct table_info *)ch->tablestate;
1380 	ti = tablestate[tc_a->no.kidx];
1381 	ta = tc_a->ta;
1382 	astate = tc_a->astate;
1383 	count = tc_a->count;
1384 
1385 	IPFW_WLOCK(ch);
1386 	/* a <- b */
1387 	tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
1388 	tc_a->ta = tc_b->ta;
1389 	tc_a->astate = tc_b->astate;
1390 	tc_a->count = tc_b->count;
1391 	/* b <- a */
1392 	tablestate[tc_b->no.kidx] = ti;
1393 	tc_b->ta = ta;
1394 	tc_b->astate = astate;
1395 	tc_b->count = count;
1396 	IPFW_WUNLOCK(ch);
1397 
1398 	/* Ensure tc.ti copies are in sync */
1399 	tc_a->ti_copy = tablestate[tc_a->no.kidx];
1400 	tc_b->ti_copy = tablestate[tc_b->no.kidx];
1401 
1402 	/* Notify both tables on @ti change */
1403 	if (tc_a->ta->change_ti != NULL)
1404 		tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
1405 	if (tc_b->ta->change_ti != NULL)
1406 		tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
1407 
1408 	IPFW_UH_WUNLOCK(ch);
1409 
1410 	return (0);
1411 }
1412 
1413 /*
1414  * Destroys table specified by @ti.
1415  * Data layout (v0)(current):
1416  * Request: [ ip_fw3_opheader ]
1417  *
1418  * Returns 0 on success
1419  */
1420 static int
destroy_table(struct ip_fw_chain * ch,struct tid_info * ti)1421 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
1422 {
1423 	struct namedobj_instance *ni;
1424 	struct table_config *tc;
1425 
1426 	IPFW_UH_WLOCK(ch);
1427 
1428 	ni = CHAIN_TO_NI(ch);
1429 	if ((tc = find_table(ni, ti)) == NULL) {
1430 		IPFW_UH_WUNLOCK(ch);
1431 		return (ESRCH);
1432 	}
1433 
1434 	/* Do not permit destroying referenced tables */
1435 	if (tc->no.refcnt > 0) {
1436 		IPFW_UH_WUNLOCK(ch);
1437 		return (EBUSY);
1438 	}
1439 
1440 	IPFW_WLOCK(ch);
1441 	unlink_table(ch, tc);
1442 	IPFW_WUNLOCK(ch);
1443 
1444 	/* Free obj index */
1445 	if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
1446 		printf("Error unlinking kidx %u from table %s\n",
1447 		    tc->no.kidx, tc->tablename);
1448 
1449 	/* Unref values used in tables while holding UH lock */
1450 	ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
1451 	IPFW_UH_WUNLOCK(ch);
1452 
1453 	free_table_config(ni, tc);
1454 
1455 	return (0);
1456 }
1457 
1458 /*
1459  * Grow tables index.
1460  *
1461  * Returns 0 on success.
1462  */
1463 int
ipfw_resize_tables(struct ip_fw_chain * ch,unsigned int ntables)1464 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
1465 {
1466 	unsigned int tbl;
1467 	struct namedobj_instance *ni;
1468 	void *new_idx, *old_tablestate, *tablestate;
1469 	struct table_info *ti;
1470 	struct table_config *tc;
1471 	int i, new_blocks;
1472 
1473 	/* Check new value for validity */
1474 	if (ntables == 0)
1475 		return (EINVAL);
1476 	if (ntables > IPFW_TABLES_MAX)
1477 		ntables = IPFW_TABLES_MAX;
1478 	/* Alight to nearest power of 2 */
1479 	ntables = roundup_pow_of_two(ntables);
1480 
1481 	/* Allocate new pointers */
1482 	tablestate = malloc(ntables * sizeof(struct table_info),
1483 	    M_IPFW, M_WAITOK | M_ZERO);
1484 
1485 	ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
1486 
1487 	IPFW_UH_WLOCK(ch);
1488 
1489 	tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
1490 	ni = CHAIN_TO_NI(ch);
1491 
1492 	/* Temporary restrict decreasing max_tables */
1493 	if (ntables < V_fw_tables_max) {
1494 		/*
1495 		 * FIXME: Check if we really can shrink
1496 		 */
1497 		IPFW_UH_WUNLOCK(ch);
1498 		return (EINVAL);
1499 	}
1500 
1501 	/* Copy table info/indices */
1502 	memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
1503 	ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
1504 
1505 	IPFW_WLOCK(ch);
1506 
1507 	/* Change pointers */
1508 	old_tablestate = ch->tablestate;
1509 	ch->tablestate = tablestate;
1510 	ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
1511 
1512 	V_fw_tables_max = ntables;
1513 
1514 	IPFW_WUNLOCK(ch);
1515 
1516 	/* Notify all consumers that their @ti pointer has changed */
1517 	ti = (struct table_info *)ch->tablestate;
1518 	for (i = 0; i < tbl; i++, ti++) {
1519 		if (ti->lookup == NULL)
1520 			continue;
1521 		tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
1522 		if (tc == NULL || tc->ta->change_ti == NULL)
1523 			continue;
1524 
1525 		tc->ta->change_ti(tc->astate, ti);
1526 	}
1527 
1528 	IPFW_UH_WUNLOCK(ch);
1529 
1530 	/* Free old pointers */
1531 	free(old_tablestate, M_IPFW);
1532 	ipfw_objhash_bitmap_free(new_idx, new_blocks);
1533 
1534 	return (0);
1535 }
1536 
1537 /*
1538  * Lookup table's named object by its @kidx.
1539  */
1540 struct named_object *
ipfw_objhash_lookup_table_kidx(struct ip_fw_chain * ch,uint32_t kidx)1541 ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint32_t kidx)
1542 {
1543 
1544 	return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
1545 }
1546 
1547 /*
1548  * Take reference to table specified in @ntlv.
1549  * On success return its @kidx.
1550  */
1551 int
ipfw_ref_table(struct ip_fw_chain * ch,ipfw_obj_ntlv * ntlv,uint32_t * kidx)1552 ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint32_t *kidx)
1553 {
1554 	struct tid_info ti;
1555 	struct table_config *tc;
1556 	int error;
1557 
1558 	IPFW_UH_WLOCK_ASSERT(ch);
1559 
1560 	ntlv_to_ti(ntlv, &ti);
1561 	error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
1562 	if (error != 0)
1563 		return (error);
1564 
1565 	if (tc == NULL)
1566 		return (ESRCH);
1567 
1568 	tc_ref(tc);
1569 	*kidx = tc->no.kidx;
1570 
1571 	return (0);
1572 }
1573 
1574 void
ipfw_unref_table(struct ip_fw_chain * ch,uint32_t kidx)1575 ipfw_unref_table(struct ip_fw_chain *ch, uint32_t kidx)
1576 {
1577 
1578 	struct namedobj_instance *ni;
1579 	struct named_object *no;
1580 
1581 	IPFW_UH_WLOCK_ASSERT(ch);
1582 	ni = CHAIN_TO_NI(ch);
1583 	no = ipfw_objhash_lookup_kidx(ni, kidx);
1584 	KASSERT(no != NULL, ("Table with index %u not found", kidx));
1585 	no->refcnt--;
1586 }
1587 
1588 /*
1589  * Lookup an arbitrary key @paddr of length @plen in table @tbl.
1590  * Stores found value in @val.
1591  *
1592  * Returns 1 if key was found.
1593  */
1594 int
ipfw_lookup_table(struct ip_fw_chain * ch,uint32_t tbl,uint16_t plen,void * paddr,uint32_t * val)1595 ipfw_lookup_table(struct ip_fw_chain *ch, uint32_t tbl, uint16_t plen,
1596     void *paddr, uint32_t *val)
1597 {
1598 	struct table_info *ti;
1599 
1600 	ti = KIDX_TO_TI(ch, tbl);
1601 
1602 	return (ti->lookup(ti, paddr, plen, val));
1603 }
1604 
1605 /*
1606  * Info/List/dump support for tables.
1607  *
1608  */
1609 
1610 /*
1611  * High-level 'get' cmds sysctl handlers
1612  */
1613 
1614 /*
1615  * Lists all tables currently available in kernel.
1616  * Data layout (v0)(current):
1617  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
1618  * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
1619  *
1620  * Returns 0 on success
1621  */
1622 static int
list_tables(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1623 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1624     struct sockopt_data *sd)
1625 {
1626 	struct _ipfw_obj_lheader *olh;
1627 	int error;
1628 
1629 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
1630 	if (olh == NULL)
1631 		return (EINVAL);
1632 	if (sd->valsize < olh->size)
1633 		return (EINVAL);
1634 
1635 	IPFW_UH_RLOCK(ch);
1636 	error = export_tables(ch, olh, sd);
1637 	IPFW_UH_RUNLOCK(ch);
1638 
1639 	return (error);
1640 }
1641 
1642 /*
1643  * Store table info to buffer provided by @sd.
1644  * Data layout (v0)(current):
1645  * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
1646  * Reply: [ ipfw_obj_header ipfw_xtable_info ]
1647  *
1648  * Returns 0 on success.
1649  */
1650 static int
describe_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1651 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1652     struct sockopt_data *sd)
1653 {
1654 	struct _ipfw_obj_header *oh;
1655 	struct table_config *tc;
1656 	struct tid_info ti;
1657 	size_t sz;
1658 
1659 	sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
1660 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1661 	if (oh == NULL)
1662 		return (EINVAL);
1663 
1664 	objheader_to_ti(oh, &ti);
1665 
1666 	IPFW_UH_RLOCK(ch);
1667 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1668 		IPFW_UH_RUNLOCK(ch);
1669 		return (ESRCH);
1670 	}
1671 
1672 	export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
1673 	IPFW_UH_RUNLOCK(ch);
1674 
1675 	return (0);
1676 }
1677 
1678 /*
1679  * Modifies existing table.
1680  * Data layout (v0)(current):
1681  * Request: [ ipfw_obj_header ipfw_xtable_info ]
1682  *
1683  * Returns 0 on success
1684  */
1685 static int
modify_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1686 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1687     struct sockopt_data *sd)
1688 {
1689 	struct _ipfw_obj_header *oh;
1690 	ipfw_xtable_info *i;
1691 	char *tname;
1692 	struct tid_info ti;
1693 	struct namedobj_instance *ni;
1694 	struct table_config *tc;
1695 
1696 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1697 		return (EINVAL);
1698 
1699 	oh = (struct _ipfw_obj_header *)sd->kbuf;
1700 	i = (ipfw_xtable_info *)(oh + 1);
1701 
1702 	/*
1703 	 * Verify user-supplied strings.
1704 	 * Check for null-terminated/zero-length strings/
1705 	 */
1706 	tname = oh->ntlv.name;
1707 	if (check_table_name(tname) != 0)
1708 		return (EINVAL);
1709 
1710 	objheader_to_ti(oh, &ti);
1711 	ti.type = i->type;
1712 
1713 	IPFW_UH_WLOCK(ch);
1714 	ni = CHAIN_TO_NI(ch);
1715 	if ((tc = find_table(ni, &ti)) == NULL) {
1716 		IPFW_UH_WUNLOCK(ch);
1717 		return (ESRCH);
1718 	}
1719 
1720 	/* Do not support any modifications for readonly tables */
1721 	if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
1722 		IPFW_UH_WUNLOCK(ch);
1723 		return (EACCES);
1724 	}
1725 
1726 	if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
1727 		tc->limit = i->limit;
1728 	if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
1729 		tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
1730 	IPFW_UH_WUNLOCK(ch);
1731 
1732 	return (0);
1733 }
1734 
1735 /*
1736  * Creates new table.
1737  * Data layout (v0)(current):
1738  * Request: [ ipfw_obj_header ipfw_xtable_info ]
1739  *
1740  * Returns 0 on success
1741  */
1742 static int
create_table(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)1743 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1744     struct sockopt_data *sd)
1745 {
1746 	struct _ipfw_obj_header *oh;
1747 	ipfw_xtable_info *i;
1748 	char *tname, *aname;
1749 	struct tid_info ti;
1750 	struct namedobj_instance *ni;
1751 
1752 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1753 		return (EINVAL);
1754 
1755 	oh = (struct _ipfw_obj_header *)sd->kbuf;
1756 	i = (ipfw_xtable_info *)(oh + 1);
1757 
1758 	/*
1759 	 * Verify user-supplied strings.
1760 	 * Check for null-terminated/zero-length strings/
1761 	 */
1762 	tname = oh->ntlv.name;
1763 	aname = i->algoname;
1764 	if (check_table_name(tname) != 0 ||
1765 	    strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
1766 		return (EINVAL);
1767 
1768 	if (aname[0] == '\0') {
1769 		/* Use default algorithm */
1770 		aname = NULL;
1771 	}
1772 
1773 	objheader_to_ti(oh, &ti);
1774 	ti.type = i->type;
1775 
1776 	ni = CHAIN_TO_NI(ch);
1777 
1778 	IPFW_UH_RLOCK(ch);
1779 	if (find_table(ni, &ti) != NULL) {
1780 		IPFW_UH_RUNLOCK(ch);
1781 		return (EEXIST);
1782 	}
1783 	IPFW_UH_RUNLOCK(ch);
1784 
1785 	return (create_table_internal(ch, &ti, aname, i, NULL, 0));
1786 }
1787 
1788 /*
1789  * Creates new table based on @ti and @aname.
1790  *
1791  * Assume @aname to be checked and valid.
1792  * Stores allocated table kidx inside @pkidx (if non-NULL).
1793  * Reference created table if @compat is non-zero.
1794  *
1795  * Returns 0 on success.
1796  */
1797 static int
create_table_internal(struct ip_fw_chain * ch,struct tid_info * ti,char * aname,ipfw_xtable_info * i,uint32_t * pkidx,int compat)1798 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
1799     char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int compat)
1800 {
1801 	struct namedobj_instance *ni;
1802 	struct table_config *tc, *tc_new, *tmp;
1803 	struct table_algo *ta;
1804 	uint32_t kidx;
1805 
1806 	ni = CHAIN_TO_NI(ch);
1807 
1808 	ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
1809 	if (ta == NULL)
1810 		return (ENOTSUP);
1811 
1812 	tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
1813 	if (tc == NULL)
1814 		return (ENOMEM);
1815 
1816 	tc->vmask = i->vmask;
1817 	tc->limit = i->limit;
1818 	if (ta->flags & TA_FLAG_READONLY)
1819 		tc->locked = 1;
1820 	else
1821 		tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
1822 
1823 	IPFW_UH_WLOCK(ch);
1824 
1825 	/* Check if table has been already created */
1826 	tc_new = find_table(ni, ti);
1827 	if (tc_new != NULL) {
1828 		/*
1829 		 * Compat: do not fail if we're
1830 		 * requesting to create existing table
1831 		 * which has the same type
1832 		 */
1833 		if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
1834 			IPFW_UH_WUNLOCK(ch);
1835 			free_table_config(ni, tc);
1836 			return (EEXIST);
1837 		}
1838 
1839 		/* Exchange tc and tc_new for proper refcounting & freeing */
1840 		tmp = tc;
1841 		tc = tc_new;
1842 		tc_new = tmp;
1843 	} else {
1844 		/* New table */
1845 		if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
1846 			IPFW_UH_WUNLOCK(ch);
1847 			printf("Unable to allocate table index."
1848 			    " Consider increasing net.inet.ip.fw.tables_max");
1849 			free_table_config(ni, tc);
1850 			return (EBUSY);
1851 		}
1852 		tc->no.kidx = kidx;
1853 		tc->no.etlv = IPFW_TLV_TBL_NAME;
1854 
1855 		link_table(ch, tc);
1856 	}
1857 
1858 	if (compat != 0)
1859 		tc->no.refcnt++;
1860 	if (pkidx != NULL)
1861 		*pkidx = tc->no.kidx;
1862 
1863 	IPFW_UH_WUNLOCK(ch);
1864 
1865 	if (tc_new != NULL)
1866 		free_table_config(ni, tc_new);
1867 
1868 	return (0);
1869 }
1870 
1871 static void
ntlv_to_ti(ipfw_obj_ntlv * ntlv,struct tid_info * ti)1872 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
1873 {
1874 
1875 	memset(ti, 0, sizeof(struct tid_info));
1876 	ti->set = ntlv->set;
1877 	ti->uidx = ntlv->idx;
1878 	ti->tlvs = ntlv;
1879 	ti->tlen = ntlv->head.length;
1880 }
1881 
1882 static void
objheader_to_ti(struct _ipfw_obj_header * oh,struct tid_info * ti)1883 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
1884 {
1885 
1886 	ntlv_to_ti(&oh->ntlv, ti);
1887 }
1888 
1889 struct namedobj_instance *
ipfw_get_table_objhash(struct ip_fw_chain * ch)1890 ipfw_get_table_objhash(struct ip_fw_chain *ch)
1891 {
1892 
1893 	return (CHAIN_TO_NI(ch));
1894 }
1895 
1896 /*
1897  * Exports basic table info as name TLV.
1898  * Used inside dump_static_rules() to provide info
1899  * about all tables referenced by current ruleset.
1900  *
1901  * Returns 0 on success.
1902  */
1903 int
ipfw_export_table_ntlv(struct ip_fw_chain * ch,uint32_t kidx,struct sockopt_data * sd)1904 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint32_t kidx,
1905     struct sockopt_data *sd)
1906 {
1907 	struct namedobj_instance *ni;
1908 	struct named_object *no;
1909 	ipfw_obj_ntlv *ntlv;
1910 
1911 	ni = CHAIN_TO_NI(ch);
1912 
1913 	no = ipfw_objhash_lookup_kidx(ni, kidx);
1914 	KASSERT(no != NULL, ("invalid table kidx passed"));
1915 
1916 	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
1917 	if (ntlv == NULL)
1918 		return (ENOMEM);
1919 
1920 	ntlv->head.type = IPFW_TLV_TBL_NAME;
1921 	ntlv->head.length = sizeof(*ntlv);
1922 	ntlv->idx = no->kidx;
1923 	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
1924 
1925 	return (0);
1926 }
1927 
1928 struct dump_args {
1929 	struct ip_fw_chain *ch;
1930 	struct table_info *ti;
1931 	struct table_config *tc;
1932 	struct sockopt_data *sd;
1933 	uint32_t cnt;
1934 	uint16_t uidx;
1935 	int error;
1936 	uint32_t size;
1937 	ta_foreach_f *f;
1938 	void *farg;
1939 	ipfw_obj_tentry tent;
1940 };
1941 
1942 static int
count_ext_entries(void * e,void * arg)1943 count_ext_entries(void *e, void *arg)
1944 {
1945 	struct dump_args *da;
1946 
1947 	da = (struct dump_args *)arg;
1948 	da->cnt++;
1949 
1950 	return (0);
1951 }
1952 
1953 /*
1954  * Gets number of items from table either using
1955  * internal counter or calling algo callback for
1956  * externally-managed tables.
1957  *
1958  * Returns number of records.
1959  */
1960 static uint32_t
table_get_count(struct ip_fw_chain * ch,struct table_config * tc)1961 table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
1962 {
1963 	struct table_info *ti;
1964 	struct table_algo *ta;
1965 	struct dump_args da;
1966 
1967 	ti = KIDX_TO_TI(ch, tc->no.kidx);
1968 	ta = tc->ta;
1969 
1970 	/* Use internal counter for self-managed tables */
1971 	if ((ta->flags & TA_FLAG_READONLY) == 0)
1972 		return (tc->count);
1973 
1974 	/* Use callback to quickly get number of items */
1975 	if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
1976 		return (ta->get_count(tc->astate, ti));
1977 
1978 	/* Count number of iterms ourselves */
1979 	memset(&da, 0, sizeof(da));
1980 	ta->foreach(tc->astate, ti, count_ext_entries, &da);
1981 
1982 	return (da.cnt);
1983 }
1984 
1985 /*
1986  * Exports table @tc info into standard ipfw_xtable_info format.
1987  */
1988 static void
export_table_info(struct ip_fw_chain * ch,struct table_config * tc,ipfw_xtable_info * i)1989 export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
1990     ipfw_xtable_info *i)
1991 {
1992 	struct table_info *ti;
1993 	struct table_algo *ta;
1994 
1995 	i->type = tc->no.subtype;
1996 	i->tflags = tc->tflags;
1997 	i->vmask = tc->vmask;
1998 	i->set = tc->no.set;
1999 	i->kidx = tc->no.kidx;
2000 	i->refcnt = tc->no.refcnt;
2001 	i->count = table_get_count(ch, tc);
2002 	i->limit = tc->limit;
2003 	i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
2004 	i->size = i->count * sizeof(ipfw_obj_tentry);
2005 	i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2006 	strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
2007 	ti = KIDX_TO_TI(ch, tc->no.kidx);
2008 	ta = tc->ta;
2009 	if (ta->print_config != NULL) {
2010 		/* Use algo function to print table config to string */
2011 		ta->print_config(tc->astate, ti, i->algoname,
2012 		    sizeof(i->algoname));
2013 	} else
2014 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2015 	/* Dump algo-specific data, if possible */
2016 	if (ta->dump_tinfo != NULL) {
2017 		ta->dump_tinfo(tc->astate, ti, &i->ta_info);
2018 		i->ta_info.flags |= IPFW_TATFLAGS_DATA;
2019 	}
2020 }
2021 
2022 struct dump_table_args {
2023 	struct ip_fw_chain *ch;
2024 	struct sockopt_data *sd;
2025 };
2026 
2027 static int
export_table_internal(struct namedobj_instance * ni,struct named_object * no,void * arg)2028 export_table_internal(struct namedobj_instance *ni, struct named_object *no,
2029     void *arg)
2030 {
2031 	ipfw_xtable_info *i;
2032 	struct dump_table_args *dta;
2033 
2034 	dta = (struct dump_table_args *)arg;
2035 
2036 	i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
2037 	KASSERT(i != NULL, ("previously checked buffer is not enough"));
2038 
2039 	export_table_info(dta->ch, (struct table_config *)no, i);
2040 	return (0);
2041 }
2042 
2043 /*
2044  * Export all tables as ipfw_xtable_info structures to
2045  * storage provided by @sd.
2046  *
2047  * If supplied buffer is too small, fills in required size
2048  * and returns ENOMEM.
2049  * Returns 0 on success.
2050  */
2051 static int
export_tables(struct ip_fw_chain * ch,ipfw_obj_lheader * olh,struct sockopt_data * sd)2052 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
2053     struct sockopt_data *sd)
2054 {
2055 	uint32_t size;
2056 	uint32_t count;
2057 	struct dump_table_args dta;
2058 
2059 	count = ipfw_objhash_count(CHAIN_TO_NI(ch));
2060 	size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
2061 
2062 	/* Fill in header regadless of buffer size */
2063 	olh->count = count;
2064 	olh->objsize = sizeof(ipfw_xtable_info);
2065 
2066 	if (size > olh->size) {
2067 		olh->size = size;
2068 		return (ENOMEM);
2069 	}
2070 
2071 	olh->size = size;
2072 
2073 	dta.ch = ch;
2074 	dta.sd = sd;
2075 
2076 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
2077 
2078 	return (0);
2079 }
2080 
2081 /*
2082  * Dumps all table data
2083  * Data layout (v1)(current):
2084  * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
2085  * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
2086  *
2087  * Returns 0 on success
2088  */
2089 static int
dump_table_v1(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)2090 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2091     struct sockopt_data *sd)
2092 {
2093 	struct _ipfw_obj_header *oh;
2094 	ipfw_xtable_info *i;
2095 	struct tid_info ti;
2096 	struct table_config *tc;
2097 	struct table_algo *ta;
2098 	struct dump_args da;
2099 	uint32_t sz;
2100 
2101 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2102 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
2103 	if (oh == NULL)
2104 		return (EINVAL);
2105 
2106 	i = (ipfw_xtable_info *)(oh + 1);
2107 	objheader_to_ti(oh, &ti);
2108 
2109 	IPFW_UH_RLOCK(ch);
2110 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2111 		IPFW_UH_RUNLOCK(ch);
2112 		return (ESRCH);
2113 	}
2114 	export_table_info(ch, tc, i);
2115 
2116 	if (sd->valsize < i->size) {
2117 		/*
2118 		 * Submitted buffer size is not enough.
2119 		 * WE've already filled in @i structure with
2120 		 * relevant table info including size, so we
2121 		 * can return. Buffer will be flushed automatically.
2122 		 */
2123 		IPFW_UH_RUNLOCK(ch);
2124 		return (ENOMEM);
2125 	}
2126 
2127 	/*
2128 	 * Do the actual dump in eXtended format
2129 	 */
2130 	memset(&da, 0, sizeof(da));
2131 	da.ch = ch;
2132 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2133 	da.tc = tc;
2134 	da.sd = sd;
2135 
2136 	ta = tc->ta;
2137 
2138 	ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
2139 	IPFW_UH_RUNLOCK(ch);
2140 
2141 	return (da.error);
2142 }
2143 
2144 /*
2145  * Dumps table entry in eXtended format (v1)(current).
2146  */
2147 static int
dump_table_tentry(void * e,void * arg)2148 dump_table_tentry(void *e, void *arg)
2149 {
2150 	struct dump_args *da;
2151 	struct table_config *tc;
2152 	struct table_algo *ta;
2153 	struct table_value *pval;
2154 	ipfw_obj_tentry *tent;
2155 	int error;
2156 
2157 	da = (struct dump_args *)arg;
2158 
2159 	tc = da->tc;
2160 	ta = tc->ta;
2161 
2162 	tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
2163 	/* Out of memory, returning */
2164 	if (tent == NULL) {
2165 		da->error = ENOMEM;
2166 		return (1);
2167 	}
2168 	tent->head.length = sizeof(ipfw_obj_tentry);
2169 	tent->idx = da->uidx;
2170 
2171 	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2172 	if (error != 0)
2173 		return (error);
2174 
2175 	pval = get_table_value(da->ch, da->tc, tent->v.kidx);
2176 	ipfw_export_table_value_v1(pval, &tent->v.value);
2177 
2178 	return (0);
2179 }
2180 
2181 /*
2182  * Helper function to export table algo data
2183  * to tentry format before calling user function.
2184  *
2185  * Returns 0 on success.
2186  */
2187 static int
prepare_table_tentry(void * e,void * arg)2188 prepare_table_tentry(void *e, void *arg)
2189 {
2190 	struct dump_args *da;
2191 	struct table_config *tc;
2192 	struct table_algo *ta;
2193 	int error;
2194 
2195 	da = (struct dump_args *)arg;
2196 
2197 	tc = da->tc;
2198 	ta = tc->ta;
2199 
2200 	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2201 	if (error != 0)
2202 		return (error);
2203 
2204 	da->f(&da->tent, da->farg);
2205 
2206 	return (0);
2207 }
2208 
2209 /*
2210  * Allow external consumers to read table entries in standard format.
2211  */
2212 int
ipfw_foreach_table_tentry(struct ip_fw_chain * ch,uint32_t kidx,ta_foreach_f * f,void * arg)2213 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint32_t kidx,
2214     ta_foreach_f *f, void *arg)
2215 {
2216 	struct namedobj_instance *ni;
2217 	struct table_config *tc;
2218 	struct table_algo *ta;
2219 	struct dump_args da;
2220 
2221 	ni = CHAIN_TO_NI(ch);
2222 
2223 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
2224 	if (tc == NULL)
2225 		return (ESRCH);
2226 
2227 	ta = tc->ta;
2228 
2229 	memset(&da, 0, sizeof(da));
2230 	da.ch = ch;
2231 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2232 	da.tc = tc;
2233 	da.f = f;
2234 	da.farg = arg;
2235 
2236 	ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
2237 
2238 	return (0);
2239 }
2240 
2241 /*
2242  * Table algorithms
2243  */
2244 
2245 /*
2246  * Finds algorithm by index, table type or supplied name.
2247  *
2248  * Returns pointer to algo or NULL.
2249  */
2250 static struct table_algo *
find_table_algo(struct tables_config * tcfg,struct tid_info * ti,char * name)2251 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
2252 {
2253 	int i, l;
2254 	struct table_algo *ta;
2255 
2256 	if (ti->type > IPFW_TABLE_MAXTYPE)
2257 		return (NULL);
2258 
2259 	/* Search by index */
2260 	if (ti->atype != 0) {
2261 		if (ti->atype > tcfg->algo_count)
2262 			return (NULL);
2263 		return (tcfg->algo[ti->atype]);
2264 	}
2265 
2266 	if (name == NULL) {
2267 		/* Return default algorithm for given type if set */
2268 		return (tcfg->def_algo[ti->type]);
2269 	}
2270 
2271 	/* Search by name */
2272 	/* TODO: better search */
2273 	for (i = 1; i <= tcfg->algo_count; i++) {
2274 		ta = tcfg->algo[i];
2275 
2276 		/*
2277 		 * One can supply additional algorithm
2278 		 * parameters so we compare only the first word
2279 		 * of supplied name:
2280 		 * 'addr:chash hsize=32'
2281 		 * '^^^^^^^^^'
2282 		 *
2283 		 */
2284 		l = strlen(ta->name);
2285 		if (strncmp(name, ta->name, l) != 0)
2286 			continue;
2287 		if (name[l] != '\0' && name[l] != ' ')
2288 			continue;
2289 		/* Check if we're requesting proper table type */
2290 		if (ti->type != 0 && ti->type != ta->type)
2291 			return (NULL);
2292 		return (ta);
2293 	}
2294 
2295 	return (NULL);
2296 }
2297 
2298 /*
2299  * Register new table algo @ta.
2300  * Stores algo id inside @idx.
2301  *
2302  * Returns 0 on success.
2303  */
2304 int
ipfw_add_table_algo(struct ip_fw_chain * ch,struct table_algo * ta,size_t size,int * idx)2305 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
2306     int *idx)
2307 {
2308 	struct tables_config *tcfg;
2309 	struct table_algo *ta_new;
2310 	size_t sz;
2311 
2312 	if (size > sizeof(struct table_algo))
2313 		return (EINVAL);
2314 
2315 	/* Check for the required on-stack size for add/del */
2316 	sz = roundup2(ta->ta_buf_size, sizeof(void *));
2317 	if (sz > TA_BUF_SZ)
2318 		return (EINVAL);
2319 
2320 	KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
2321 
2322 	/* Copy algorithm data to stable storage. */
2323 	ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
2324 	memcpy(ta_new, ta, size);
2325 
2326 	tcfg = CHAIN_TO_TCFG(ch);
2327 
2328 	KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
2329 
2330 	tcfg->algo[++tcfg->algo_count] = ta_new;
2331 	ta_new->idx = tcfg->algo_count;
2332 
2333 	/* Set algorithm as default one for given type */
2334 	if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
2335 	    tcfg->def_algo[ta_new->type] == NULL)
2336 		tcfg->def_algo[ta_new->type] = ta_new;
2337 
2338 	*idx = ta_new->idx;
2339 
2340 	return (0);
2341 }
2342 
2343 /*
2344  * Unregisters table algo using @idx as id.
2345  * XXX: It is NOT safe to call this function in any place
2346  * other than ipfw instance destroy handler.
2347  */
2348 void
ipfw_del_table_algo(struct ip_fw_chain * ch,int idx)2349 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
2350 {
2351 	struct tables_config *tcfg;
2352 	struct table_algo *ta;
2353 
2354 	tcfg = CHAIN_TO_TCFG(ch);
2355 
2356 	KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
2357 	    idx, tcfg->algo_count));
2358 
2359 	ta = tcfg->algo[idx];
2360 	KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
2361 
2362 	if (tcfg->def_algo[ta->type] == ta)
2363 		tcfg->def_algo[ta->type] = NULL;
2364 
2365 	free(ta, M_IPFW);
2366 }
2367 
2368 /*
2369  * Lists all table algorithms currently available.
2370  * Data layout (v0)(current):
2371  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
2372  * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
2373  *
2374  * Returns 0 on success
2375  */
2376 static int
list_table_algo(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)2377 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2378     struct sockopt_data *sd)
2379 {
2380 	struct _ipfw_obj_lheader *olh;
2381 	struct tables_config *tcfg;
2382 	ipfw_ta_info *i;
2383 	struct table_algo *ta;
2384 	uint32_t count, n, size;
2385 
2386 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
2387 	if (olh == NULL)
2388 		return (EINVAL);
2389 	if (sd->valsize < olh->size)
2390 		return (EINVAL);
2391 
2392 	IPFW_UH_RLOCK(ch);
2393 	tcfg = CHAIN_TO_TCFG(ch);
2394 	count = tcfg->algo_count;
2395 	size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
2396 
2397 	/* Fill in header regadless of buffer size */
2398 	olh->count = count;
2399 	olh->objsize = sizeof(ipfw_ta_info);
2400 
2401 	if (size > olh->size) {
2402 		olh->size = size;
2403 		IPFW_UH_RUNLOCK(ch);
2404 		return (ENOMEM);
2405 	}
2406 	olh->size = size;
2407 
2408 	for (n = 1; n <= count; n++) {
2409 		i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
2410 		KASSERT(i != NULL, ("previously checked buffer is not enough"));
2411 		ta = tcfg->algo[n];
2412 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2413 		i->type = ta->type;
2414 		i->refcnt = ta->refcnt;
2415 	}
2416 
2417 	IPFW_UH_RUNLOCK(ch);
2418 
2419 	return (0);
2420 }
2421 
2422 static int
classify_srcdst(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2423 classify_srcdst(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2424 {
2425 	ipfw_insn_table *cmd;
2426 
2427 	/* Basic IPv4/IPv6 or u32 lookups */
2428 	cmd = insntod(cmd0, table);
2429 	*puidx = cmd->kidx;
2430 	switch(cmd0->arg1) {
2431 	case LOOKUP_DST_IP:
2432 	case LOOKUP_SRC_IP:
2433 	default:
2434 		/* IPv4 src/dst */
2435 		*ptype = IPFW_TABLE_ADDR;
2436 		break;
2437 	case LOOKUP_DST_PORT:
2438 	case LOOKUP_SRC_PORT:
2439 	case LOOKUP_UID:
2440 	case LOOKUP_JAIL:
2441 	case LOOKUP_DSCP:
2442 	case LOOKUP_MARK:
2443 	case LOOKUP_RULENUM:
2444 		*ptype = IPFW_TABLE_NUMBER;
2445 		break;
2446 	case LOOKUP_DST_MAC:
2447 	case LOOKUP_SRC_MAC:
2448 		*ptype = IPFW_TABLE_MAC;
2449 		break;
2450 	}
2451 	return (0);
2452 }
2453 
2454 static int
classify_via(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2455 classify_via(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2456 {
2457 	ipfw_insn_if *cmdif;
2458 
2459 	/* Interface table, possibly */
2460 	cmdif = insntod(cmd0, if);
2461 	if (cmdif->name[0] != '\1')
2462 		return (1);
2463 
2464 	*ptype = IPFW_TABLE_INTERFACE;
2465 	*puidx = cmdif->p.kidx; /* XXXAE */
2466 	return (0);
2467 }
2468 
2469 static int
classify_flow(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2470 classify_flow(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2471 {
2472 	*puidx = insntod(cmd0, table)->kidx;
2473 	*ptype = IPFW_TABLE_FLOW;
2474 	return (0);
2475 }
2476 
2477 static int
classify_mac_lookup(ipfw_insn * cmd0,uint32_t * puidx,uint8_t * ptype)2478 classify_mac_lookup(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
2479 {
2480 	*puidx = insntod(cmd0, table)->kidx;
2481 	*ptype = IPFW_TABLE_MAC;
2482 	return (0);
2483 }
2484 
2485 static void
update_kidx(ipfw_insn * cmd0,uint32_t idx)2486 update_kidx(ipfw_insn *cmd0, uint32_t idx)
2487 {
2488 	insntod(cmd0, table)->kidx = idx;
2489 }
2490 
2491 static void
update_via(ipfw_insn * cmd0,uint32_t idx)2492 update_via(ipfw_insn *cmd0, uint32_t idx)
2493 {
2494 	insntod(cmd0, if)->p.kidx = idx;
2495 }
2496 
2497 static int
table_findbyname(struct ip_fw_chain * ch,struct tid_info * ti,struct named_object ** pno)2498 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
2499     struct named_object **pno)
2500 {
2501 	struct table_config *tc;
2502 	int error;
2503 
2504 	IPFW_UH_WLOCK_ASSERT(ch);
2505 
2506 	error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
2507 	if (error != 0)
2508 		return (error);
2509 
2510 	*pno = &tc->no;
2511 	return (0);
2512 }
2513 
2514 /* XXX: sets-sets! */
2515 static struct named_object *
table_findbykidx(struct ip_fw_chain * ch,uint32_t idx)2516 table_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
2517 {
2518 	struct namedobj_instance *ni;
2519 	struct table_config *tc;
2520 
2521 	IPFW_UH_WLOCK_ASSERT(ch);
2522 	ni = CHAIN_TO_NI(ch);
2523 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
2524 	KASSERT(tc != NULL, ("Table with index %u not found", idx));
2525 
2526 	return (&tc->no);
2527 }
2528 
2529 static int
table_manage_sets(struct ip_fw_chain * ch,uint32_t set,uint8_t new_set,enum ipfw_sets_cmd cmd)2530 table_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
2531     enum ipfw_sets_cmd cmd)
2532 {
2533 
2534 	switch (cmd) {
2535 	case SWAP_ALL:
2536 	case TEST_ALL:
2537 	case MOVE_ALL:
2538 		/*
2539 		 * Always return success, the real action and decision
2540 		 * should make table_manage_sets_all().
2541 		 */
2542 		return (0);
2543 	case TEST_ONE:
2544 	case MOVE_ONE:
2545 		/*
2546 		 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
2547 		 * if set number will be used in hash function. Currently
2548 		 * we can just use generic handler that replaces set value.
2549 		 */
2550 		if (V_fw_tables_sets == 0)
2551 			return (0);
2552 		break;
2553 	case COUNT_ONE:
2554 		/*
2555 		 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
2556 		 * disabled. This allow skip table's opcodes from additional
2557 		 * checks when specific rules moved to another set.
2558 		 */
2559 		if (V_fw_tables_sets == 0)
2560 			return (EOPNOTSUPP);
2561 	}
2562 	/* Use generic sets handler when per-set sysctl is enabled. */
2563 	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2564 	    set, new_set, cmd));
2565 }
2566 
2567 /*
2568  * We register several opcode rewriters for lookup tables.
2569  * All tables opcodes have the same ETLV type, but different subtype.
2570  * To avoid invoking sets handler several times for XXX_ALL commands,
2571  * we use separate manage_sets handler. O_RECV has the lowest value,
2572  * so it should be called first.
2573  */
2574 static int
table_manage_sets_all(struct ip_fw_chain * ch,uint32_t set,uint8_t new_set,enum ipfw_sets_cmd cmd)2575 table_manage_sets_all(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
2576     enum ipfw_sets_cmd cmd)
2577 {
2578 
2579 	switch (cmd) {
2580 	case SWAP_ALL:
2581 	case TEST_ALL:
2582 		/*
2583 		 * Return success for TEST_ALL, since nothing prevents
2584 		 * move rules from one set to another. All tables are
2585 		 * accessible from all sets when per-set tables sysctl
2586 		 * is disabled.
2587 		 */
2588 	case MOVE_ALL:
2589 		if (V_fw_tables_sets == 0)
2590 			return (0);
2591 		break;
2592 	default:
2593 		return (table_manage_sets(ch, set, new_set, cmd));
2594 	}
2595 	/* Use generic sets handler when per-set sysctl is enabled. */
2596 	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2597 	    set, new_set, cmd));
2598 }
2599 
2600 static struct opcode_obj_rewrite opcodes[] = {
2601 	{
2602 		.opcode = O_IP_SRC_LOOKUP,
2603 		.etlv = IPFW_TLV_TBL_NAME,
2604 		.classifier = classify_srcdst,
2605 		.update = update_kidx,
2606 		.find_byname = table_findbyname,
2607 		.find_bykidx = table_findbykidx,
2608 		.create_object = create_table_compat,
2609 		.manage_sets = table_manage_sets,
2610 	},
2611 	{
2612 		.opcode = O_IP_DST_LOOKUP,
2613 		.etlv = IPFW_TLV_TBL_NAME,
2614 		.classifier = classify_srcdst,
2615 		.update = update_kidx,
2616 		.find_byname = table_findbyname,
2617 		.find_bykidx = table_findbykidx,
2618 		.create_object = create_table_compat,
2619 		.manage_sets = table_manage_sets,
2620 	},
2621 	{
2622 		.opcode = O_IP_FLOW_LOOKUP,
2623 		.etlv = IPFW_TLV_TBL_NAME,
2624 		.classifier = classify_flow,
2625 		.update = update_kidx,
2626 		.find_byname = table_findbyname,
2627 		.find_bykidx = table_findbykidx,
2628 		.create_object = create_table_compat,
2629 		.manage_sets = table_manage_sets,
2630 	},
2631 	{
2632 		.opcode = O_MAC_SRC_LOOKUP,
2633 		.etlv = IPFW_TLV_TBL_NAME,
2634 		.classifier = classify_mac_lookup,
2635 		.update = update_kidx,
2636 		.find_byname = table_findbyname,
2637 		.find_bykidx = table_findbykidx,
2638 		.create_object = create_table_compat,
2639 		.manage_sets = table_manage_sets,
2640 	},
2641 	{
2642 		.opcode = O_MAC_DST_LOOKUP,
2643 		.etlv = IPFW_TLV_TBL_NAME,
2644 		.classifier = classify_mac_lookup,
2645 		.update = update_kidx,
2646 		.find_byname = table_findbyname,
2647 		.find_bykidx = table_findbykidx,
2648 		.create_object = create_table_compat,
2649 		.manage_sets = table_manage_sets,
2650 	},
2651 	{
2652 		.opcode = O_XMIT,
2653 		.etlv = IPFW_TLV_TBL_NAME,
2654 		.classifier = classify_via,
2655 		.update = update_via,
2656 		.find_byname = table_findbyname,
2657 		.find_bykidx = table_findbykidx,
2658 		.create_object = create_table_compat,
2659 		.manage_sets = table_manage_sets,
2660 	},
2661 	{
2662 		.opcode = O_RECV,
2663 		.etlv = IPFW_TLV_TBL_NAME,
2664 		.classifier = classify_via,
2665 		.update = update_via,
2666 		.find_byname = table_findbyname,
2667 		.find_bykidx = table_findbykidx,
2668 		.create_object = create_table_compat,
2669 		.manage_sets = table_manage_sets_all,
2670 	},
2671 	{
2672 		.opcode = O_VIA,
2673 		.etlv = IPFW_TLV_TBL_NAME,
2674 		.classifier = classify_via,
2675 		.update = update_via,
2676 		.find_byname = table_findbyname,
2677 		.find_bykidx = table_findbykidx,
2678 		.create_object = create_table_compat,
2679 		.manage_sets = table_manage_sets,
2680 	},
2681 };
2682 
2683 static int
test_sets_cb(struct namedobj_instance * ni __unused,struct named_object * no,void * arg __unused)2684 test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
2685     void *arg __unused)
2686 {
2687 
2688 	/* Check that there aren't any tables in not default set */
2689 	if (no->set != 0)
2690 		return (EBUSY);
2691 	return (0);
2692 }
2693 
2694 /*
2695  * Switch between "set 0" and "rule's set" table binding,
2696  * Check all ruleset bindings and permits changing
2697  * IFF each binding has both rule AND table in default set (set 0).
2698  *
2699  * Returns 0 on success.
2700  */
2701 int
ipfw_switch_tables_namespace(struct ip_fw_chain * ch,unsigned int sets)2702 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
2703 {
2704 	struct opcode_obj_rewrite *rw;
2705 	struct namedobj_instance *ni;
2706 	struct named_object *no;
2707 	struct ip_fw *rule;
2708 	ipfw_insn *cmd;
2709 	int cmdlen, i, l;
2710 	uint32_t kidx;
2711 	uint8_t subtype;
2712 
2713 	IPFW_UH_WLOCK(ch);
2714 
2715 	if (V_fw_tables_sets == sets) {
2716 		IPFW_UH_WUNLOCK(ch);
2717 		return (0);
2718 	}
2719 	ni = CHAIN_TO_NI(ch);
2720 	if (sets == 0) {
2721 		/*
2722 		 * Prevent disabling sets support if we have some tables
2723 		 * in not default sets.
2724 		 */
2725 		if (ipfw_objhash_foreach_type(ni, test_sets_cb,
2726 		    NULL, IPFW_TLV_TBL_NAME) != 0) {
2727 			IPFW_UH_WUNLOCK(ch);
2728 			return (EBUSY);
2729 		}
2730 	}
2731 	/*
2732 	 * Scan all rules and examine tables opcodes.
2733 	 */
2734 	for (i = 0; i < ch->n_rules; i++) {
2735 		rule = ch->map[i];
2736 
2737 		l = rule->cmd_len;
2738 		cmd = rule->cmd;
2739 		cmdlen = 0;
2740 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
2741 			cmdlen = F_LEN(cmd);
2742 			/* Check only tables opcodes */
2743 			for (kidx = 0, rw = opcodes;
2744 			    rw < opcodes + nitems(opcodes); rw++) {
2745 				if (rw->opcode != cmd->opcode)
2746 					continue;
2747 				if (rw->classifier(cmd, &kidx, &subtype) == 0)
2748 					break;
2749 			}
2750 			if (kidx == 0)
2751 				continue;
2752 			no = ipfw_objhash_lookup_kidx(ni, kidx);
2753 			/* Check if both table object and rule has the set 0 */
2754 			if (no->set != 0 || rule->set != 0) {
2755 				IPFW_UH_WUNLOCK(ch);
2756 				return (EBUSY);
2757 			}
2758 		}
2759 	}
2760 	V_fw_tables_sets = sets;
2761 	IPFW_UH_WUNLOCK(ch);
2762 	return (0);
2763 }
2764 
2765 /*
2766  * Checks table name for validity.
2767  * Enforce basic length checks, the rest
2768  * should be done in userland.
2769  *
2770  * Returns 0 if name is considered valid.
2771  */
2772 static int
check_table_name(const char * name)2773 check_table_name(const char *name)
2774 {
2775 
2776 	/*
2777 	 * TODO: do some more complicated checks
2778 	 */
2779 	return (ipfw_check_object_name_generic(name));
2780 }
2781 
2782 /*
2783  * Finds table config based on either legacy index
2784  * or name in ntlv.
2785  * Note @ti structure contains unchecked data from userland.
2786  *
2787  * Returns 0 in success and fills in @tc with found config
2788  */
2789 static int
find_table_err(struct namedobj_instance * ni,struct tid_info * ti,struct table_config ** tc)2790 find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
2791     struct table_config **tc)
2792 {
2793 	char *name, bname[16];
2794 	struct named_object *no;
2795 	ipfw_obj_ntlv *ntlv;
2796 	uint32_t set;
2797 
2798 	if (ti->tlvs != NULL) {
2799 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
2800 		    IPFW_TLV_TBL_NAME);
2801 		if (ntlv == NULL)
2802 			return (EINVAL);
2803 		name = ntlv->name;
2804 
2805 		/*
2806 		 * Use set provided by @ti instead of @ntlv one.
2807 		 * This is needed due to different sets behavior
2808 		 * controlled by V_fw_tables_sets.
2809 		 */
2810 		set = (V_fw_tables_sets != 0) ? ti->set : 0;
2811 	} else {
2812 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
2813 		name = bname;
2814 		set = 0;
2815 	}
2816 
2817 	no = ipfw_objhash_lookup_name(ni, set, name);
2818 	*tc = (struct table_config *)no;
2819 
2820 	return (0);
2821 }
2822 
2823 /*
2824  * Finds table config based on either legacy index
2825  * or name in ntlv.
2826  * Note @ti structure contains unchecked data from userland.
2827  *
2828  * Returns pointer to table_config or NULL.
2829  */
2830 static struct table_config *
find_table(struct namedobj_instance * ni,struct tid_info * ti)2831 find_table(struct namedobj_instance *ni, struct tid_info *ti)
2832 {
2833 	struct table_config *tc;
2834 
2835 	if (find_table_err(ni, ti, &tc) != 0)
2836 		return (NULL);
2837 
2838 	return (tc);
2839 }
2840 
2841 /*
2842  * Allocate new table config structure using
2843  * specified @algo and @aname.
2844  *
2845  * Returns pointer to config or NULL.
2846  */
2847 static struct table_config *
alloc_table_config(struct ip_fw_chain * ch,struct tid_info * ti,struct table_algo * ta,char * aname,uint8_t tflags)2848 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
2849     struct table_algo *ta, char *aname, uint8_t tflags)
2850 {
2851 	char *name, bname[16];
2852 	struct table_config *tc;
2853 	int error;
2854 	ipfw_obj_ntlv *ntlv;
2855 	uint32_t set;
2856 
2857 	if (ti->tlvs != NULL) {
2858 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
2859 		    IPFW_TLV_TBL_NAME);
2860 		if (ntlv == NULL)
2861 			return (NULL);
2862 		name = ntlv->name;
2863 		set = (V_fw_tables_sets == 0) ? 0 : ntlv->set;
2864 	} else {
2865 		/* Compat part: convert number to string representation */
2866 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
2867 		name = bname;
2868 		set = 0;
2869 	}
2870 
2871 	tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
2872 	tc->no.name = tc->tablename;
2873 	tc->no.subtype = ta->type;
2874 	tc->no.set = set;
2875 	tc->tflags = tflags;
2876 	tc->ta = ta;
2877 	strlcpy(tc->tablename, name, sizeof(tc->tablename));
2878 	/* Set "shared" value type by default */
2879 	tc->vshared = 1;
2880 
2881 	/* Preallocate data structures for new tables */
2882 	error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
2883 	if (error != 0) {
2884 		free(tc, M_IPFW);
2885 		return (NULL);
2886 	}
2887 
2888 	return (tc);
2889 }
2890 
2891 /*
2892  * Destroys table state and config.
2893  */
2894 static void
free_table_config(struct namedobj_instance * ni,struct table_config * tc)2895 free_table_config(struct namedobj_instance *ni, struct table_config *tc)
2896 {
2897 
2898 	KASSERT(tc->linked == 0, ("free() on linked config"));
2899 	/* UH lock MUST NOT be held */
2900 
2901 	/*
2902 	 * We're using ta without any locking/referencing.
2903 	 * TODO: fix this if we're going to use unloadable algos.
2904 	 */
2905 	tc->ta->destroy(tc->astate, &tc->ti_copy);
2906 	free(tc, M_IPFW);
2907 }
2908 
2909 /*
2910  * Links @tc to @chain table named instance.
2911  * Sets appropriate type/states in @chain table info.
2912  */
2913 static void
link_table(struct ip_fw_chain * ch,struct table_config * tc)2914 link_table(struct ip_fw_chain *ch, struct table_config *tc)
2915 {
2916 	struct namedobj_instance *ni;
2917 	struct table_info *ti;
2918 	uint16_t kidx;
2919 
2920 	IPFW_UH_WLOCK_ASSERT(ch);
2921 
2922 	ni = CHAIN_TO_NI(ch);
2923 	kidx = tc->no.kidx;
2924 
2925 	ipfw_objhash_add(ni, &tc->no);
2926 
2927 	ti = KIDX_TO_TI(ch, kidx);
2928 	*ti = tc->ti_copy;
2929 
2930 	/* Notify algo on real @ti address */
2931 	if (tc->ta->change_ti != NULL)
2932 		tc->ta->change_ti(tc->astate, ti);
2933 
2934 	tc->linked = 1;
2935 	tc->ta->refcnt++;
2936 }
2937 
2938 /*
2939  * Unlinks @tc from @chain table named instance.
2940  * Zeroes states in @chain and stores them in @tc.
2941  */
2942 static void
unlink_table(struct ip_fw_chain * ch,struct table_config * tc)2943 unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
2944 {
2945 	struct namedobj_instance *ni;
2946 	struct table_info *ti;
2947 	uint16_t kidx;
2948 
2949 	IPFW_UH_WLOCK_ASSERT(ch);
2950 	IPFW_WLOCK_ASSERT(ch);
2951 
2952 	ni = CHAIN_TO_NI(ch);
2953 	kidx = tc->no.kidx;
2954 
2955 	/* Clear state. @ti copy is already saved inside @tc */
2956 	ipfw_objhash_del(ni, &tc->no);
2957 	ti = KIDX_TO_TI(ch, kidx);
2958 	memset(ti, 0, sizeof(struct table_info));
2959 	tc->linked = 0;
2960 	tc->ta->refcnt--;
2961 
2962 	/* Notify algo on real @ti address */
2963 	if (tc->ta->change_ti != NULL)
2964 		tc->ta->change_ti(tc->astate, NULL);
2965 }
2966 
2967 static struct ipfw_sopt_handler	scodes[] = {
2968     { IP_FW_TABLE_XCREATE,	IP_FW3_OPVER, HDIR_SET,	create_table },
2969     { IP_FW_TABLE_XDESTROY,	IP_FW3_OPVER, HDIR_SET,	flush_table_v0 },
2970     { IP_FW_TABLE_XFLUSH,	IP_FW3_OPVER, HDIR_SET,	flush_table_v0 },
2971     { IP_FW_TABLE_XMODIFY,	IP_FW3_OPVER, HDIR_BOTH, modify_table },
2972     { IP_FW_TABLE_XINFO,	IP_FW3_OPVER, HDIR_GET,	describe_table },
2973     { IP_FW_TABLES_XLIST,	IP_FW3_OPVER, HDIR_GET,	list_tables },
2974     { IP_FW_TABLE_XLIST,	IP_FW3_OPVER, HDIR_GET,	dump_table_v1 },
2975     { IP_FW_TABLE_XADD,		IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
2976     { IP_FW_TABLE_XDEL,		IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
2977     { IP_FW_TABLE_XFIND,	IP_FW3_OPVER, HDIR_GET,	find_table_entry },
2978     { IP_FW_TABLE_XSWAP,	IP_FW3_OPVER, HDIR_SET,	swap_table },
2979     { IP_FW_TABLES_ALIST,	IP_FW3_OPVER, HDIR_GET,	list_table_algo },
2980 };
2981 
2982 static int
destroy_table_locked(struct namedobj_instance * ni,struct named_object * no,void * arg)2983 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
2984     void *arg)
2985 {
2986 
2987 	unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
2988 	if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
2989 		printf("Error unlinking kidx %d from table %s\n",
2990 		    no->kidx, no->name);
2991 	free_table_config(ni, (struct table_config *)no);
2992 	return (0);
2993 }
2994 
2995 /*
2996  * Shuts tables module down.
2997  */
2998 void
ipfw_destroy_tables(struct ip_fw_chain * ch,int last)2999 ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
3000 {
3001 
3002 	IPFW_DEL_SOPT_HANDLER(last, scodes);
3003 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
3004 
3005 	/* Remove all tables from working set */
3006 	IPFW_UH_WLOCK(ch);
3007 	IPFW_WLOCK(ch);
3008 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
3009 	IPFW_WUNLOCK(ch);
3010 	IPFW_UH_WUNLOCK(ch);
3011 
3012 	/* Free pointers itself */
3013 	free(ch->tablestate, M_IPFW);
3014 
3015 	ipfw_table_value_destroy(ch, last);
3016 	ipfw_table_algo_destroy(ch);
3017 
3018 	ipfw_objhash_destroy(CHAIN_TO_NI(ch));
3019 	free(CHAIN_TO_TCFG(ch), M_IPFW);
3020 }
3021 
3022 /*
3023  * Starts tables module.
3024  */
3025 int
ipfw_init_tables(struct ip_fw_chain * ch,int first)3026 ipfw_init_tables(struct ip_fw_chain *ch, int first)
3027 {
3028 	struct tables_config *tcfg;
3029 
3030 	/* Allocate pointers */
3031 	ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
3032 	    M_IPFW, M_WAITOK | M_ZERO);
3033 
3034 	tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
3035 	tcfg->namehash = ipfw_objhash_create(V_fw_tables_max,
3036 	    DEFAULT_OBJHASH_SIZE);
3037 	ch->tblcfg = tcfg;
3038 
3039 	ipfw_table_value_init(ch, first);
3040 	ipfw_table_algo_init(ch);
3041 
3042 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
3043 	IPFW_ADD_SOPT_HANDLER(first, scodes);
3044 	return (0);
3045 }
3046