xref: /freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c (revision 4a77657cbc011ea657ccb079fff6b58b295eccb0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/counter.h>
33 #include <sys/ck.h>
34 #include <sys/epoch.h>
35 #include <sys/errno.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/sockopt.h>
45 
46 #include <net/if.h>
47 
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/ip_fw.h>
52 #include <netinet6/ip_fw_nat64.h>
53 
54 #include <netpfil/ipfw/ip_fw_private.h>
55 
56 #include "nat64lsn.h"
57 
58 VNET_DEFINE(uint32_t, nat64lsn_eid) = 0;
59 
60 static struct nat64lsn_instance *
nat64lsn_find(struct namedobj_instance * ni,const char * name,uint8_t set)61 nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
62 {
63 	struct named_object *no;
64 
65 	no = ipfw_objhash_lookup_name_type(ni, set,
66 	    IPFW_TLV_NAT64LSN_NAME, name);
67 	if (no == NULL)
68 		return (NULL);
69 	return (__containerof(no, struct nat64lsn_instance, no));
70 }
71 
72 static void
nat64lsn_default_config(ipfw_nat64lsn_cfg * uc)73 nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
74 {
75 
76 	if (uc->jmaxlen == 0)
77 		uc->jmaxlen = NAT64LSN_JMAXLEN;
78 	if (uc->jmaxlen > 65536)
79 		uc->jmaxlen = 65536;
80 	if (uc->nh_delete_delay == 0)
81 		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
82 	if (uc->pg_delete_delay == 0)
83 		uc->pg_delete_delay = NAT64LSN_PG_AGE;
84 	if (uc->st_syn_ttl == 0)
85 		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
86 	if (uc->st_close_ttl == 0)
87 		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
88 	if (uc->st_estab_ttl == 0)
89 		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
90 	if (uc->st_udp_ttl == 0)
91 		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
92 	if (uc->st_icmp_ttl == 0)
93 		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
94 
95 	if (uc->states_chunks == 0)
96 		uc->states_chunks = 1;
97 	else if (uc->states_chunks >= 128)
98 		uc->states_chunks = 128;
99 	else if (!powerof2(uc->states_chunks))
100 		uc->states_chunks = 1 << fls(uc->states_chunks);
101 }
102 
103 /*
104  * Creates new nat64lsn instance.
105  * Data layout (v0)(current):
106  * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
107  *
108  * Returns 0 on success
109  */
110 static int
nat64lsn_create(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)111 nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
112     struct sockopt_data *sd)
113 {
114 	ipfw_obj_lheader *olh;
115 	ipfw_nat64lsn_cfg *uc;
116 	struct nat64lsn_instance *i;
117 	struct nat64lsn_cfg *cfg;
118 	struct namedobj_instance *ni;
119 	uint32_t addr4, mask4;
120 
121 	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
122 		return (EINVAL);
123 
124 	olh = (ipfw_obj_lheader *)sd->kbuf;
125 	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
126 
127 	if (ipfw_check_object_name_generic(uc->name) != 0)
128 		return (EINVAL);
129 
130 	if (uc->set >= IPFW_MAX_SETS)
131 		return (EINVAL);
132 
133 	if (uc->plen4 > 32)
134 		return (EINVAL);
135 
136 	/*
137 	 * Unspecified address has special meaning. But it must
138 	 * have valid prefix length. This length will be used to
139 	 * correctly extract and embedd IPv4 address into IPv6.
140 	 */
141 	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
142 	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
143 	    nat64_check_prefixlen(uc->plen6) != 0)
144 		return (EINVAL);
145 
146 	/* XXX: Check prefix4 to be global */
147 	addr4 = ntohl(uc->prefix4.s_addr);
148 	mask4 = ~((1 << (32 - uc->plen4)) - 1);
149 	if ((addr4 & mask4) != addr4)
150 		return (EINVAL);
151 
152 	nat64lsn_default_config(uc);
153 
154 	ni = CHAIN_TO_SRV(ch);
155 	IPFW_UH_RLOCK(ch);
156 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
157 		IPFW_UH_RUNLOCK(ch);
158 		return (EEXIST);
159 	}
160 	IPFW_UH_RUNLOCK(ch);
161 
162 	i = malloc(sizeof(struct nat64lsn_instance), M_NAT64LSN,
163 	    M_WAITOK | M_ZERO);
164 	strlcpy(i->name, uc->name, sizeof(i->name));
165 	i->no.name = i->name;
166 	i->no.etlv = IPFW_TLV_NAT64LSN_NAME;
167 	i->no.set = uc->set;
168 
169 	cfg = nat64lsn_init_config(ch, addr4, uc->plen4);
170 	cfg->base.plat_prefix = uc->prefix6;
171 	cfg->base.plat_plen = uc->plen6;
172 	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
173 	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
174 		cfg->base.flags |= NAT64_WKPFX;
175 	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
176 		cfg->base.flags |= NAT64LSN_ANYPREFIX;
177 
178 	cfg->states_chunks = uc->states_chunks;
179 	cfg->jmaxlen = uc->jmaxlen;
180 	cfg->host_delete_delay = uc->nh_delete_delay;
181 	cfg->pg_delete_delay = uc->pg_delete_delay;
182 	cfg->st_syn_ttl = uc->st_syn_ttl;
183 	cfg->st_close_ttl = uc->st_close_ttl;
184 	cfg->st_estab_ttl = uc->st_estab_ttl;
185 	cfg->st_udp_ttl = uc->st_udp_ttl;
186 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
187 	cfg->nomatch_verdict = IP_FW_DENY;
188 
189 	IPFW_UH_WLOCK(ch);
190 
191 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
192 		IPFW_UH_WUNLOCK(ch);
193 		nat64lsn_destroy_config(cfg);
194 		free(i, M_NAT64LSN);
195 		return (EEXIST);
196 	}
197 
198 	if (ipfw_objhash_alloc_idx(ni, &i->no.kidx) != 0) {
199 		IPFW_UH_WUNLOCK(ch);
200 		nat64lsn_destroy_config(cfg);
201 		free(i, M_NAT64LSN);
202 		return (ENOSPC);
203 	}
204 	ipfw_objhash_add(ni, &i->no);
205 
206 	/* Okay, let's link data */
207 	i->cfg = cfg;
208 	SRV_OBJECT(ch, i->no.kidx) = i;
209 	nat64lsn_start_instance(cfg);
210 
211 	IPFW_UH_WUNLOCK(ch);
212 	return (0);
213 }
214 
215 static void
nat64lsn_detach_instance(struct ip_fw_chain * ch,struct nat64lsn_instance * i)216 nat64lsn_detach_instance(struct ip_fw_chain *ch,
217     struct nat64lsn_instance *i)
218 {
219 
220 	IPFW_UH_WLOCK_ASSERT(ch);
221 	SRV_OBJECT(ch, i->no.kidx) = NULL;
222 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &i->no);
223 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), i->no.kidx);
224 }
225 
226 /*
227  * Destroys nat64 instance.
228  * Data layout (v0)(current):
229  * Request: [ ipfw_obj_header ]
230  *
231  * Returns 0 on success
232  */
233 static int
nat64lsn_destroy(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)234 nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
235     struct sockopt_data *sd)
236 {
237 	struct nat64lsn_instance *i;
238 	ipfw_obj_header *oh;
239 
240 	if (sd->valsize != sizeof(*oh))
241 		return (EINVAL);
242 
243 	oh = (ipfw_obj_header *)op3;
244 
245 	IPFW_UH_WLOCK(ch);
246 	i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
247 	if (i == NULL) {
248 		IPFW_UH_WUNLOCK(ch);
249 		return (ENOENT);
250 	}
251 
252 	if (i->no.refcnt > 0) {
253 		IPFW_UH_WUNLOCK(ch);
254 		return (EBUSY);
255 	}
256 
257 	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, i->no.kidx);
258 	nat64lsn_detach_instance(ch, i);
259 	IPFW_UH_WUNLOCK(ch);
260 
261 	nat64lsn_destroy_config(i->cfg);
262 	free(i, M_NAT64LSN);
263 	return (0);
264 }
265 
266 #define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
267 	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
268 static void
export_stats(struct ip_fw_chain * ch,struct nat64lsn_cfg * cfg,struct ipfw_nat64lsn_stats * stats)269 export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
270     struct ipfw_nat64lsn_stats *stats)
271 {
272 	struct nat64lsn_alias *alias;
273 	int i;
274 
275 	__COPY_STAT_FIELD(cfg, stats, opcnt64);
276 	__COPY_STAT_FIELD(cfg, stats, opcnt46);
277 	__COPY_STAT_FIELD(cfg, stats, ofrags);
278 	__COPY_STAT_FIELD(cfg, stats, ifrags);
279 	__COPY_STAT_FIELD(cfg, stats, oerrors);
280 	__COPY_STAT_FIELD(cfg, stats, noroute4);
281 	__COPY_STAT_FIELD(cfg, stats, noroute6);
282 	__COPY_STAT_FIELD(cfg, stats, nomatch4);
283 	__COPY_STAT_FIELD(cfg, stats, noproto);
284 	__COPY_STAT_FIELD(cfg, stats, nomem);
285 	__COPY_STAT_FIELD(cfg, stats, dropped);
286 
287 	__COPY_STAT_FIELD(cfg, stats, jcalls);
288 	__COPY_STAT_FIELD(cfg, stats, jrequests);
289 	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
290 	__COPY_STAT_FIELD(cfg, stats, jportreq);
291 	__COPY_STAT_FIELD(cfg, stats, jhostfails);
292 	__COPY_STAT_FIELD(cfg, stats, jportfails);
293 	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
294 	__COPY_STAT_FIELD(cfg, stats, jnomem);
295 	__COPY_STAT_FIELD(cfg, stats, jreinjected);
296 	__COPY_STAT_FIELD(cfg, stats, screated);
297 	__COPY_STAT_FIELD(cfg, stats, sdeleted);
298 	__COPY_STAT_FIELD(cfg, stats, spgcreated);
299 	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
300 
301 	stats->hostcount = cfg->hosts_count;
302 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
303 		alias = &cfg->aliases[i];
304 		stats->tcpchunks += alias->tcp_pgcount;
305 		stats->udpchunks += alias->udp_pgcount;
306 		stats->icmpchunks += alias->icmp_pgcount;
307 	}
308 }
309 #undef	__COPY_STAT_FIELD
310 
311 static void
nat64lsn_export_config(struct ip_fw_chain * ch,struct nat64lsn_instance * i,ipfw_nat64lsn_cfg * uc)312 nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_instance *i,
313     ipfw_nat64lsn_cfg *uc)
314 {
315 	struct nat64lsn_cfg *cfg;
316 
317 	strlcpy(uc->name, i->no.name, sizeof(uc->name));
318 	uc->set = i->no.set;
319 	cfg = i->cfg;
320 
321 	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
322 	uc->states_chunks = cfg->states_chunks;
323 	uc->jmaxlen = cfg->jmaxlen;
324 	uc->nh_delete_delay = cfg->host_delete_delay;
325 	uc->pg_delete_delay = cfg->pg_delete_delay;
326 	uc->st_syn_ttl = cfg->st_syn_ttl;
327 	uc->st_close_ttl = cfg->st_close_ttl;
328 	uc->st_estab_ttl = cfg->st_estab_ttl;
329 	uc->st_udp_ttl = cfg->st_udp_ttl;
330 	uc->st_icmp_ttl = cfg->st_icmp_ttl;
331 	uc->prefix4.s_addr = htonl(cfg->prefix4);
332 	uc->prefix6 = cfg->base.plat_prefix;
333 	uc->plen4 = cfg->plen4;
334 	uc->plen6 = cfg->base.plat_plen;
335 }
336 
337 struct nat64_dump_arg {
338 	struct ip_fw_chain *ch;
339 	struct sockopt_data *sd;
340 };
341 
342 static int
export_config_cb(struct namedobj_instance * ni,struct named_object * no,void * arg)343 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
344     void *arg)
345 {
346 	struct nat64_dump_arg *da;
347 	ipfw_nat64lsn_cfg *uc;
348 
349 	da = (struct nat64_dump_arg *)arg;
350 	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
351 	    sizeof(*uc));
352 	nat64lsn_export_config(da->ch,
353 	    __containerof(no, struct nat64lsn_instance, no), uc);
354 	return (0);
355 }
356 
357 /*
358  * Lists all nat64 lsn instances currently available in kernel.
359  * Data layout (v0)(current):
360  * Request: [ ipfw_obj_lheader ]
361  * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
362  *
363  * Returns 0 on success
364  */
365 static int
nat64lsn_list(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)366 nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
367     struct sockopt_data *sd)
368 {
369 	ipfw_obj_lheader *olh;
370 	struct nat64_dump_arg da;
371 
372 	/* Check minimum header size */
373 	if (sd->valsize < sizeof(ipfw_obj_lheader))
374 		return (EINVAL);
375 
376 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
377 
378 	IPFW_UH_RLOCK(ch);
379 	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
380 	    IPFW_TLV_NAT64LSN_NAME);
381 	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
382 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
383 
384 	if (sd->valsize < olh->size) {
385 		IPFW_UH_RUNLOCK(ch);
386 		return (ENOMEM);
387 	}
388 	memset(&da, 0, sizeof(da));
389 	da.ch = ch;
390 	da.sd = sd;
391 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
392 	    IPFW_TLV_NAT64LSN_NAME);
393 	IPFW_UH_RUNLOCK(ch);
394 
395 	return (0);
396 }
397 
398 /*
399  * Change existing nat64lsn instance configuration.
400  * Data layout (v0)(current):
401  * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
402  * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
403  *
404  * Returns 0 on success
405  */
406 static int
nat64lsn_config(struct ip_fw_chain * ch,ip_fw3_opheader * op,struct sockopt_data * sd)407 nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
408     struct sockopt_data *sd)
409 {
410 	ipfw_obj_header *oh;
411 	ipfw_nat64lsn_cfg *uc;
412 	struct nat64lsn_instance *i;
413 	struct nat64lsn_cfg *cfg;
414 	struct namedobj_instance *ni;
415 
416 	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
417 		return (EINVAL);
418 
419 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
420 	    sizeof(*oh) + sizeof(*uc));
421 	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
422 
423 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
424 	    oh->ntlv.set >= IPFW_MAX_SETS)
425 		return (EINVAL);
426 
427 	ni = CHAIN_TO_SRV(ch);
428 	if (sd->sopt->sopt_dir == SOPT_GET) {
429 		IPFW_UH_RLOCK(ch);
430 		i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
431 		if (i == NULL) {
432 			IPFW_UH_RUNLOCK(ch);
433 			return (ENOENT);
434 		}
435 		nat64lsn_export_config(ch, i, uc);
436 		IPFW_UH_RUNLOCK(ch);
437 		return (0);
438 	}
439 
440 	nat64lsn_default_config(uc);
441 
442 	IPFW_UH_WLOCK(ch);
443 	i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
444 	if (i == NULL) {
445 		IPFW_UH_WUNLOCK(ch);
446 		return (ENOENT);
447 	}
448 
449 	/*
450 	 * For now allow to change only following values:
451 	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
452 	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
453 	 */
454 	cfg = i->cfg;
455 	cfg->states_chunks = uc->states_chunks;
456 	cfg->jmaxlen = uc->jmaxlen;
457 	cfg->host_delete_delay = uc->nh_delete_delay;
458 	cfg->pg_delete_delay = uc->pg_delete_delay;
459 	cfg->st_syn_ttl = uc->st_syn_ttl;
460 	cfg->st_close_ttl = uc->st_close_ttl;
461 	cfg->st_estab_ttl = uc->st_estab_ttl;
462 	cfg->st_udp_ttl = uc->st_udp_ttl;
463 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
464 	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
465 	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
466 
467 	IPFW_UH_WUNLOCK(ch);
468 
469 	return (0);
470 }
471 
472 /*
473  * Get nat64lsn statistics.
474  * Data layout (v0)(current):
475  * Request: [ ipfw_obj_header ]
476  * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
477  *
478  * Returns 0 on success
479  */
480 static int
nat64lsn_stats(struct ip_fw_chain * ch,ip_fw3_opheader * op,struct sockopt_data * sd)481 nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
482     struct sockopt_data *sd)
483 {
484 	struct ipfw_nat64lsn_stats stats;
485 	struct nat64lsn_instance *i;
486 	ipfw_obj_header *oh;
487 	ipfw_obj_ctlv *ctlv;
488 	size_t sz;
489 
490 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
491 	if (sd->valsize % sizeof(uint64_t))
492 		return (EINVAL);
493 	if (sd->valsize < sz)
494 		return (ENOMEM);
495 	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
496 	if (oh == NULL)
497 		return (EINVAL);
498 	memset(&stats, 0, sizeof(stats));
499 
500 	IPFW_UH_RLOCK(ch);
501 	i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
502 	if (i == NULL) {
503 		IPFW_UH_RUNLOCK(ch);
504 		return (ENOENT);
505 	}
506 
507 	export_stats(ch, i->cfg, &stats);
508 	IPFW_UH_RUNLOCK(ch);
509 
510 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
511 	memset(ctlv, 0, sizeof(*ctlv));
512 	ctlv->head.type = IPFW_TLV_COUNTERS;
513 	ctlv->head.length = sz - sizeof(ipfw_obj_header);
514 	ctlv->count = sizeof(stats) / sizeof(uint64_t);
515 	ctlv->objsize = sizeof(uint64_t);
516 	ctlv->version = IPFW_NAT64_VERSION;
517 	memcpy(ctlv + 1, &stats, sizeof(stats));
518 	return (0);
519 }
520 
521 /*
522  * Reset nat64lsn statistics.
523  * Data layout (v0)(current):
524  * Request: [ ipfw_obj_header ]
525  *
526  * Returns 0 on success
527  */
528 static int
nat64lsn_reset_stats(struct ip_fw_chain * ch,ip_fw3_opheader * op,struct sockopt_data * sd)529 nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
530     struct sockopt_data *sd)
531 {
532 	struct nat64lsn_instance *i;
533 	ipfw_obj_header *oh;
534 
535 	if (sd->valsize != sizeof(*oh))
536 		return (EINVAL);
537 	oh = (ipfw_obj_header *)sd->kbuf;
538 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
539 	    oh->ntlv.set >= IPFW_MAX_SETS)
540 		return (EINVAL);
541 
542 	IPFW_UH_WLOCK(ch);
543 	i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
544 	if (i == NULL) {
545 		IPFW_UH_WUNLOCK(ch);
546 		return (ENOENT);
547 	}
548 	COUNTER_ARRAY_ZERO(i->cfg->base.stats.cnt, NAT64STATS);
549 	IPFW_UH_WUNLOCK(ch);
550 	return (0);
551 }
552 
553 #ifdef __LP64__
554 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
555 #else
556 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
557     ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
558 #endif
559 /*
560  * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
561  *	ipfw_nat64lsn_state x count, ... ] ]
562  */
563 static int
nat64lsn_export_states(struct nat64lsn_cfg * cfg,union nat64lsn_pgidx * idx,struct nat64lsn_pg * pg,struct sockopt_data * sd,uint32_t * ret_count)564 nat64lsn_export_states(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
565     struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
566 {
567 	ipfw_nat64lsn_state_v1 *s;
568 	struct nat64lsn_state *state;
569 	uint64_t freemask;
570 	uint32_t i, count;
571 
572 	/* validate user input */
573 	if (idx->chunk > pg->chunks_count - 1)
574 		return (EINVAL);
575 
576 	FREEMASK_COPY(pg, idx->chunk, freemask);
577 	count = 64 - bitcount64(freemask);
578 	if (count == 0)
579 		return (0);	/* Try next PG/chunk */
580 
581 	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
582 	    (uintmax_t)idx->index, count);
583 
584 	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
585 	    count * sizeof(ipfw_nat64lsn_state_v1));
586 	if (s == NULL)
587 		return (ENOMEM);
588 
589 	for (i = 0; i < 64; i++) {
590 		if (ISSET64(freemask, i))
591 			continue;
592 		state = pg->chunks_count == 1 ? &pg->states->state[i] :
593 		    &pg->states_chunk[idx->chunk]->state[i];
594 
595 		s->host6 = state->host->addr;
596 		s->daddr.s_addr = htonl(state->ip_dst);
597 		s->dport = state->dport;
598 		s->sport = state->sport;
599 		s->aport = state->aport;
600 		s->flags = (uint8_t)(state->flags & 7);
601 		s->proto = state->proto;
602 		s->idle = GET_AGE(state->timestamp);
603 		s++;
604 	}
605 	*ret_count = count;
606 	return (0);
607 }
608 
609 #define	LAST_IDX	0xFF
610 static int
nat64lsn_next_pgidx(struct nat64lsn_cfg * cfg,struct nat64lsn_pg * pg,union nat64lsn_pgidx * idx)611 nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
612     union nat64lsn_pgidx *idx)
613 {
614 
615 	/* First iterate over chunks */
616 	if (pg != NULL) {
617 		if (idx->chunk < pg->chunks_count - 1) {
618 			idx->chunk++;
619 			return (0);
620 		}
621 	}
622 	idx->chunk = 0;
623 	/* Then over PGs */
624 	if (idx->port < UINT16_MAX - 64) {
625 		idx->port += 64;
626 		return (0);
627 	}
628 	idx->port = NAT64_MIN_PORT;
629 	/* Then over supported protocols */
630 	switch (idx->proto) {
631 	case IPPROTO_ICMP:
632 		idx->proto = IPPROTO_TCP;
633 		return (0);
634 	case IPPROTO_TCP:
635 		idx->proto = IPPROTO_UDP;
636 		return (0);
637 	default:
638 		idx->proto = IPPROTO_ICMP;
639 	}
640 	/* And then over IPv4 alias addresses */
641 	if (idx->addr < cfg->pmask4) {
642 		idx->addr++;
643 		return (1);	/* New states group is needed */
644 	}
645 	idx->index = LAST_IDX;
646 	return (-1);		/* No more states */
647 }
648 
649 static struct nat64lsn_pg*
nat64lsn_get_pg_byidx(struct nat64lsn_cfg * cfg,union nat64lsn_pgidx * idx)650 nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
651 {
652 	struct nat64lsn_alias *alias;
653 	int pg_idx;
654 
655 	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
656 	MPASS(alias->addr == idx->addr);
657 
658 	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
659 	switch (idx->proto) {
660 	case IPPROTO_ICMP:
661 		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
662 			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
663 		break;
664 	case IPPROTO_TCP:
665 		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
666 			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
667 		break;
668 	case IPPROTO_UDP:
669 		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
670 			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
671 		break;
672 	}
673 	return (NULL);
674 }
675 
676 /*
677  * Lists nat64lsn states.
678  * Data layout (v1)(current):
679  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
680  * Reply: [ ipfw_obj_header ipfw_obj_data [
681  *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
682  *
683  * Returns 0 on success
684  */
685 static int
nat64lsn_states(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)686 nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
687     struct sockopt_data *sd)
688 {
689 	ipfw_obj_header *oh;
690 	ipfw_obj_data *od;
691 	ipfw_nat64lsn_stg_v1 *stg;
692 	struct nat64lsn_instance *i;
693 	struct nat64lsn_cfg *cfg;
694 	struct nat64lsn_pg *pg;
695 	union nat64lsn_pgidx idx;
696 	size_t sz;
697 	uint32_t count, total;
698 	int ret;
699 
700 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
701 	    sizeof(uint64_t);
702 	/* Check minimum header size */
703 	if (sd->valsize < sz)
704 		return (EINVAL);
705 
706 	oh = (ipfw_obj_header *)sd->kbuf;
707 	od = (ipfw_obj_data *)(oh + 1);
708 	if (od->head.type != IPFW_TLV_OBJDATA ||
709 	    od->head.length != sz - sizeof(ipfw_obj_header))
710 		return (EINVAL);
711 
712 	idx.index = *(uint64_t *)(od + 1);
713 	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
714 	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
715 		return (EINVAL);
716 	if (idx.index == LAST_IDX)
717 		return (EINVAL);
718 
719 	IPFW_UH_RLOCK(ch);
720 	i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
721 	if (i == NULL) {
722 		IPFW_UH_RUNLOCK(ch);
723 		return (ENOENT);
724 	}
725 	cfg = i->cfg;
726 	if (idx.index == 0) {	/* Fill in starting point */
727 		idx.addr = cfg->prefix4;
728 		idx.proto = IPPROTO_ICMP;
729 		idx.port = NAT64_MIN_PORT;
730 	}
731 	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
732 	    idx.port < NAT64_MIN_PORT) {
733 		IPFW_UH_RUNLOCK(ch);
734 		return (EINVAL);
735 	}
736 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
737 	    sizeof(ipfw_nat64lsn_stg_v1);
738 	if (sd->valsize < sz) {
739 		IPFW_UH_RUNLOCK(ch);
740 		return (ENOMEM);
741 	}
742 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
743 	od = (ipfw_obj_data *)(oh + 1);
744 	od->head.type = IPFW_TLV_OBJDATA;
745 	od->head.length = sz - sizeof(ipfw_obj_header);
746 	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
747 	stg->count = total = 0;
748 	stg->next.index = idx.index;
749 	/*
750 	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
751 	 * Thus states, hosts and PGs will not expire while we hold it.
752 	 */
753 	CALLOUT_LOCK(cfg);
754 	ret = 0;
755 	do {
756 		pg = nat64lsn_get_pg_byidx(cfg, &idx);
757 		if (pg != NULL) {
758 			count = 0;
759 			ret = nat64lsn_export_states(cfg, &idx, pg,
760 			    sd, &count);
761 			if (ret != 0)
762 				break;
763 			if (count > 0) {
764 				stg->count += count;
765 				total += count;
766 				/* Update total size of reply */
767 				od->head.length +=
768 				    count * sizeof(ipfw_nat64lsn_state_v1);
769 				sz += count * sizeof(ipfw_nat64lsn_state_v1);
770 			}
771 			stg->alias4.s_addr = htonl(idx.addr);
772 		}
773 		/* Determine new index */
774 		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
775 		case -1:
776 			ret = ENOENT; /* End of search */
777 			break;
778 		case 1: /*
779 			 * Next alias address, new group may be needed.
780 			 * If states count is zero, use this group.
781 			 */
782 			if (stg->count == 0)
783 				continue;
784 			/* Otherwise try to create new group */
785 			sz += sizeof(ipfw_nat64lsn_stg_v1);
786 			if (sd->valsize < sz) {
787 				ret = ENOMEM;
788 				break;
789 			}
790 			/* Save next index in current group */
791 			stg->next.index = idx.index;
792 			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
793 			    sizeof(ipfw_nat64lsn_stg_v1));
794 			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
795 			stg->count = 0;
796 			break;
797 		}
798 		stg->next.index = idx.index;
799 	} while (ret == 0);
800 	CALLOUT_UNLOCK(cfg);
801 	IPFW_UH_RUNLOCK(ch);
802 	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
803 }
804 
805 static struct ipfw_sopt_handler	scodes[] = {
806     { IP_FW_NAT64LSN_CREATE,	IP_FW3_OPVER,	HDIR_BOTH, nat64lsn_create },
807     { IP_FW_NAT64LSN_DESTROY,	IP_FW3_OPVER,	HDIR_SET,  nat64lsn_destroy },
808     { IP_FW_NAT64LSN_CONFIG,	IP_FW3_OPVER,	HDIR_BOTH, nat64lsn_config },
809     { IP_FW_NAT64LSN_LIST,	IP_FW3_OPVER,	HDIR_GET,  nat64lsn_list },
810     { IP_FW_NAT64LSN_STATS,	IP_FW3_OPVER,	HDIR_GET,  nat64lsn_stats },
811     { IP_FW_NAT64LSN_RESET_STATS, IP_FW3_OPVER,	HDIR_SET,  nat64lsn_reset_stats },
812     { IP_FW_NAT64LSN_LIST_STATES, IP_FW3_OPVER,	HDIR_GET,  nat64lsn_states },
813 };
814 
815 #define	NAT64LSN_ARE_EQUAL(v)	(cfg0->v == cfg1->v)
816 static int
nat64lsn_cmp_configs(struct nat64lsn_cfg * cfg0,struct nat64lsn_cfg * cfg1)817 nat64lsn_cmp_configs(struct nat64lsn_cfg *cfg0, struct nat64lsn_cfg *cfg1)
818 {
819 
820 	if ((cfg0->base.flags & cfg1->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&
821 	    NAT64LSN_ARE_EQUAL(prefix4) &&
822 	    NAT64LSN_ARE_EQUAL(pmask4) &&
823 	    NAT64LSN_ARE_EQUAL(plen4) &&
824 	    NAT64LSN_ARE_EQUAL(base.plat_plen) &&
825 	    IN6_ARE_ADDR_EQUAL(&cfg0->base.plat_prefix,
826 		&cfg1->base.plat_prefix))
827 		return (0);
828 	return (1);
829 }
830 #undef NAT64LSN_ARE_EQUAL
831 
832 static void
nat64lsn_swap_configs(struct nat64lsn_instance * i0,struct nat64lsn_instance * i1)833 nat64lsn_swap_configs(struct nat64lsn_instance *i0,
834     struct nat64lsn_instance *i1)
835 {
836 	struct nat64lsn_cfg *cfg;
837 
838 	cfg = i0->cfg;
839 	i0->cfg = i1->cfg;
840 	i1->cfg = cfg;
841 }
842 
843 /*
844  * NAT64LSN sets swap handler.
845  *
846  * When two sets have NAT64LSN instance with the same name, we check
847  * most important configuration parameters, and if there are no difference,
848  * and both instances have NAT64LSN_ALLOW_SWAPCONF flag, we will exchange
849  * configs between instances. This allows to keep NAT64 states when ipfw's
850  * rules are reloaded using new set.
851  *
852  * XXX: since manage_sets caller doesn't hold IPFW_WLOCK(), it is possible
853  * that some states will be created during switching, because set of rules
854  * is changed a bit earley than named objects.
855  */
856 static int
nat64lsn_swap_sets_cb(struct namedobj_instance * ni,struct named_object * no,void * arg)857 nat64lsn_swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,
858     void *arg)
859 {
860 	struct nat64lsn_instance *i0, *i1;
861 	uint8_t *sets;
862 
863 	sets = arg;
864 	if (no->set == sets[0]) {
865 		/*
866 		 * Check if we have instance in new set with the same
867 		 * config that is sets aware and ready to swap configs.
868 		 */
869 		i0 = __containerof(no, struct nat64lsn_instance, no);
870 		if ((i0->cfg->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&
871 		    (i1 = nat64lsn_find(ni, no->name, sets[1])) != NULL) {
872 			/* Compare configs */
873 			if (nat64lsn_cmp_configs(i0->cfg, i1->cfg) == 0) {
874 				IPFW_UH_WLOCK_ASSERT(&V_layer3_chain);
875 				IPFW_WLOCK(&V_layer3_chain);
876 				nat64lsn_swap_configs(i0, i1);
877 				IPFW_WUNLOCK(&V_layer3_chain);
878 			}
879 		}
880 	}
881 	return (0);
882 }
883 
884 static int
nat64lsn_manage_sets(struct ip_fw_chain * ch,uint32_t set,uint8_t new_set,enum ipfw_sets_cmd cmd)885 nat64lsn_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
886     enum ipfw_sets_cmd cmd)
887 {
888 	uint8_t sets[2];
889 
890 	if (cmd == SWAP_ALL) {
891 		sets[0] = (uint8_t)set;
892 		sets[1] = new_set;
893 		ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch),
894 		    nat64lsn_swap_sets_cb, &sets, IPFW_TLV_NAT64LSN_NAME);
895 	}
896 	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
897 	    set, new_set, cmd));
898 }
899 NAT64_DEFINE_OPCODE_REWRITER(nat64lsn, NAT64LSN, opcodes);
900 
901 static int
destroy_config_cb(struct namedobj_instance * ni,struct named_object * no,void * arg)902 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
903     void *arg)
904 {
905 	struct nat64lsn_instance *i;
906 	struct ip_fw_chain *ch;
907 
908 	ch = (struct ip_fw_chain *)arg;
909 	i = (struct nat64lsn_instance *)SRV_OBJECT(ch, no->kidx);
910 	nat64lsn_detach_instance(ch, i);
911 	nat64lsn_destroy_config(i->cfg);
912 	free(i, M_NAT64LSN);
913 	return (0);
914 }
915 
916 int
nat64lsn_init(struct ip_fw_chain * ch,int first)917 nat64lsn_init(struct ip_fw_chain *ch, int first)
918 {
919 
920 	if (first != 0)
921 		nat64lsn_init_internal();
922 	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
923 	if (V_nat64lsn_eid == 0)
924 		return (ENXIO);
925 	IPFW_ADD_SOPT_HANDLER(first, scodes);
926 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
927 	return (0);
928 }
929 
930 void
nat64lsn_uninit(struct ip_fw_chain * ch,int last)931 nat64lsn_uninit(struct ip_fw_chain *ch, int last)
932 {
933 
934 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
935 	IPFW_DEL_SOPT_HANDLER(last, scodes);
936 	ipfw_del_eaction(ch, V_nat64lsn_eid);
937 	/*
938 	 * Since we already have deregistered external action,
939 	 * our named objects become unaccessible via rules, because
940 	 * all rules were truncated by ipfw_del_eaction().
941 	 * So, we can unlink and destroy our named objects without holding
942 	 * IPFW_WLOCK().
943 	 */
944 	IPFW_UH_WLOCK(ch);
945 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
946 	    IPFW_TLV_NAT64LSN_NAME);
947 	V_nat64lsn_eid = 0;
948 	IPFW_UH_WUNLOCK(ch);
949 	if (last != 0)
950 		nat64lsn_uninit_internal();
951 }
952