1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/counter.h>
33 #include <sys/ck.h>
34 #include <sys/epoch.h>
35 #include <sys/errno.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/sockopt.h>
45
46 #include <net/if.h>
47
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/ip_fw.h>
52 #include <netinet6/ip_fw_nat64.h>
53
54 #include <netpfil/ipfw/ip_fw_private.h>
55
56 #include "nat64lsn.h"
57
58 VNET_DEFINE(uint32_t, nat64lsn_eid) = 0;
59
60 static struct nat64lsn_instance *
nat64lsn_find(struct namedobj_instance * ni,const char * name,uint8_t set)61 nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
62 {
63 struct named_object *no;
64
65 no = ipfw_objhash_lookup_name_type(ni, set,
66 IPFW_TLV_NAT64LSN_NAME, name);
67 if (no == NULL)
68 return (NULL);
69 return (__containerof(no, struct nat64lsn_instance, no));
70 }
71
72 static void
nat64lsn_default_config(ipfw_nat64lsn_cfg * uc)73 nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
74 {
75
76 if (uc->jmaxlen == 0)
77 uc->jmaxlen = NAT64LSN_JMAXLEN;
78 if (uc->jmaxlen > 65536)
79 uc->jmaxlen = 65536;
80 if (uc->nh_delete_delay == 0)
81 uc->nh_delete_delay = NAT64LSN_HOST_AGE;
82 if (uc->pg_delete_delay == 0)
83 uc->pg_delete_delay = NAT64LSN_PG_AGE;
84 if (uc->st_syn_ttl == 0)
85 uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
86 if (uc->st_close_ttl == 0)
87 uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
88 if (uc->st_estab_ttl == 0)
89 uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
90 if (uc->st_udp_ttl == 0)
91 uc->st_udp_ttl = NAT64LSN_UDP_AGE;
92 if (uc->st_icmp_ttl == 0)
93 uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
94
95 if (uc->states_chunks == 0)
96 uc->states_chunks = 1;
97 else if (uc->states_chunks >= 128)
98 uc->states_chunks = 128;
99 else if (!powerof2(uc->states_chunks))
100 uc->states_chunks = 1 << fls(uc->states_chunks);
101 }
102
103 /*
104 * Creates new nat64lsn instance.
105 * Data layout (v0)(current):
106 * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
107 *
108 * Returns 0 on success
109 */
110 static int
nat64lsn_create(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)111 nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
112 struct sockopt_data *sd)
113 {
114 ipfw_obj_lheader *olh;
115 ipfw_nat64lsn_cfg *uc;
116 struct nat64lsn_instance *i;
117 struct nat64lsn_cfg *cfg;
118 struct namedobj_instance *ni;
119 uint32_t addr4, mask4;
120
121 if (sd->valsize != sizeof(*olh) + sizeof(*uc))
122 return (EINVAL);
123
124 olh = (ipfw_obj_lheader *)sd->kbuf;
125 uc = (ipfw_nat64lsn_cfg *)(olh + 1);
126
127 if (ipfw_check_object_name_generic(uc->name) != 0)
128 return (EINVAL);
129
130 if (uc->set >= IPFW_MAX_SETS)
131 return (EINVAL);
132
133 if (uc->plen4 > 32)
134 return (EINVAL);
135
136 /*
137 * Unspecified address has special meaning. But it must
138 * have valid prefix length. This length will be used to
139 * correctly extract and embedd IPv4 address into IPv6.
140 */
141 if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
142 IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
143 nat64_check_prefixlen(uc->plen6) != 0)
144 return (EINVAL);
145
146 /* XXX: Check prefix4 to be global */
147 addr4 = ntohl(uc->prefix4.s_addr);
148 mask4 = ~((1 << (32 - uc->plen4)) - 1);
149 if ((addr4 & mask4) != addr4)
150 return (EINVAL);
151
152 nat64lsn_default_config(uc);
153
154 ni = CHAIN_TO_SRV(ch);
155 IPFW_UH_RLOCK(ch);
156 if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
157 IPFW_UH_RUNLOCK(ch);
158 return (EEXIST);
159 }
160 IPFW_UH_RUNLOCK(ch);
161
162 i = malloc(sizeof(struct nat64lsn_instance), M_NAT64LSN,
163 M_WAITOK | M_ZERO);
164 strlcpy(i->name, uc->name, sizeof(i->name));
165 i->no.name = i->name;
166 i->no.etlv = IPFW_TLV_NAT64LSN_NAME;
167 i->no.set = uc->set;
168
169 cfg = nat64lsn_init_config(ch, addr4, uc->plen4);
170 cfg->base.plat_prefix = uc->prefix6;
171 cfg->base.plat_plen = uc->plen6;
172 cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
173 if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
174 cfg->base.flags |= NAT64_WKPFX;
175 else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
176 cfg->base.flags |= NAT64LSN_ANYPREFIX;
177
178 cfg->states_chunks = uc->states_chunks;
179 cfg->jmaxlen = uc->jmaxlen;
180 cfg->host_delete_delay = uc->nh_delete_delay;
181 cfg->pg_delete_delay = uc->pg_delete_delay;
182 cfg->st_syn_ttl = uc->st_syn_ttl;
183 cfg->st_close_ttl = uc->st_close_ttl;
184 cfg->st_estab_ttl = uc->st_estab_ttl;
185 cfg->st_udp_ttl = uc->st_udp_ttl;
186 cfg->st_icmp_ttl = uc->st_icmp_ttl;
187 cfg->nomatch_verdict = IP_FW_DENY;
188
189 IPFW_UH_WLOCK(ch);
190
191 if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
192 IPFW_UH_WUNLOCK(ch);
193 nat64lsn_destroy_config(cfg);
194 free(i, M_NAT64LSN);
195 return (EEXIST);
196 }
197
198 if (ipfw_objhash_alloc_idx(ni, &i->no.kidx) != 0) {
199 IPFW_UH_WUNLOCK(ch);
200 nat64lsn_destroy_config(cfg);
201 free(i, M_NAT64LSN);
202 return (ENOSPC);
203 }
204 ipfw_objhash_add(ni, &i->no);
205
206 /* Okay, let's link data */
207 i->cfg = cfg;
208 SRV_OBJECT(ch, i->no.kidx) = i;
209 nat64lsn_start_instance(cfg);
210
211 IPFW_UH_WUNLOCK(ch);
212 return (0);
213 }
214
215 static void
nat64lsn_detach_instance(struct ip_fw_chain * ch,struct nat64lsn_instance * i)216 nat64lsn_detach_instance(struct ip_fw_chain *ch,
217 struct nat64lsn_instance *i)
218 {
219
220 IPFW_UH_WLOCK_ASSERT(ch);
221 SRV_OBJECT(ch, i->no.kidx) = NULL;
222 ipfw_objhash_del(CHAIN_TO_SRV(ch), &i->no);
223 ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), i->no.kidx);
224 }
225
226 /*
227 * Destroys nat64 instance.
228 * Data layout (v0)(current):
229 * Request: [ ipfw_obj_header ]
230 *
231 * Returns 0 on success
232 */
233 static int
nat64lsn_destroy(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)234 nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
235 struct sockopt_data *sd)
236 {
237 struct nat64lsn_instance *i;
238 ipfw_obj_header *oh;
239
240 if (sd->valsize != sizeof(*oh))
241 return (EINVAL);
242
243 oh = (ipfw_obj_header *)op3;
244
245 IPFW_UH_WLOCK(ch);
246 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
247 if (i == NULL) {
248 IPFW_UH_WUNLOCK(ch);
249 return (ENOENT);
250 }
251
252 if (i->no.refcnt > 0) {
253 IPFW_UH_WUNLOCK(ch);
254 return (EBUSY);
255 }
256
257 ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, i->no.kidx);
258 nat64lsn_detach_instance(ch, i);
259 IPFW_UH_WUNLOCK(ch);
260
261 nat64lsn_destroy_config(i->cfg);
262 free(i, M_NAT64LSN);
263 return (0);
264 }
265
266 #define __COPY_STAT_FIELD(_cfg, _stats, _field) \
267 (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
268 static void
export_stats(struct ip_fw_chain * ch,struct nat64lsn_cfg * cfg,struct ipfw_nat64lsn_stats * stats)269 export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
270 struct ipfw_nat64lsn_stats *stats)
271 {
272 struct nat64lsn_alias *alias;
273 int i;
274
275 __COPY_STAT_FIELD(cfg, stats, opcnt64);
276 __COPY_STAT_FIELD(cfg, stats, opcnt46);
277 __COPY_STAT_FIELD(cfg, stats, ofrags);
278 __COPY_STAT_FIELD(cfg, stats, ifrags);
279 __COPY_STAT_FIELD(cfg, stats, oerrors);
280 __COPY_STAT_FIELD(cfg, stats, noroute4);
281 __COPY_STAT_FIELD(cfg, stats, noroute6);
282 __COPY_STAT_FIELD(cfg, stats, nomatch4);
283 __COPY_STAT_FIELD(cfg, stats, noproto);
284 __COPY_STAT_FIELD(cfg, stats, nomem);
285 __COPY_STAT_FIELD(cfg, stats, dropped);
286
287 __COPY_STAT_FIELD(cfg, stats, jcalls);
288 __COPY_STAT_FIELD(cfg, stats, jrequests);
289 __COPY_STAT_FIELD(cfg, stats, jhostsreq);
290 __COPY_STAT_FIELD(cfg, stats, jportreq);
291 __COPY_STAT_FIELD(cfg, stats, jhostfails);
292 __COPY_STAT_FIELD(cfg, stats, jportfails);
293 __COPY_STAT_FIELD(cfg, stats, jmaxlen);
294 __COPY_STAT_FIELD(cfg, stats, jnomem);
295 __COPY_STAT_FIELD(cfg, stats, jreinjected);
296 __COPY_STAT_FIELD(cfg, stats, screated);
297 __COPY_STAT_FIELD(cfg, stats, sdeleted);
298 __COPY_STAT_FIELD(cfg, stats, spgcreated);
299 __COPY_STAT_FIELD(cfg, stats, spgdeleted);
300
301 stats->hostcount = cfg->hosts_count;
302 for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
303 alias = &cfg->aliases[i];
304 stats->tcpchunks += alias->tcp_pgcount;
305 stats->udpchunks += alias->udp_pgcount;
306 stats->icmpchunks += alias->icmp_pgcount;
307 }
308 }
309 #undef __COPY_STAT_FIELD
310
311 static void
nat64lsn_export_config(struct ip_fw_chain * ch,struct nat64lsn_instance * i,ipfw_nat64lsn_cfg * uc)312 nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_instance *i,
313 ipfw_nat64lsn_cfg *uc)
314 {
315 struct nat64lsn_cfg *cfg;
316
317 strlcpy(uc->name, i->no.name, sizeof(uc->name));
318 uc->set = i->no.set;
319 cfg = i->cfg;
320
321 uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
322 uc->states_chunks = cfg->states_chunks;
323 uc->jmaxlen = cfg->jmaxlen;
324 uc->nh_delete_delay = cfg->host_delete_delay;
325 uc->pg_delete_delay = cfg->pg_delete_delay;
326 uc->st_syn_ttl = cfg->st_syn_ttl;
327 uc->st_close_ttl = cfg->st_close_ttl;
328 uc->st_estab_ttl = cfg->st_estab_ttl;
329 uc->st_udp_ttl = cfg->st_udp_ttl;
330 uc->st_icmp_ttl = cfg->st_icmp_ttl;
331 uc->prefix4.s_addr = htonl(cfg->prefix4);
332 uc->prefix6 = cfg->base.plat_prefix;
333 uc->plen4 = cfg->plen4;
334 uc->plen6 = cfg->base.plat_plen;
335 }
336
337 struct nat64_dump_arg {
338 struct ip_fw_chain *ch;
339 struct sockopt_data *sd;
340 };
341
342 static int
export_config_cb(struct namedobj_instance * ni,struct named_object * no,void * arg)343 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
344 void *arg)
345 {
346 struct nat64_dump_arg *da;
347 ipfw_nat64lsn_cfg *uc;
348
349 da = (struct nat64_dump_arg *)arg;
350 uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
351 sizeof(*uc));
352 nat64lsn_export_config(da->ch,
353 __containerof(no, struct nat64lsn_instance, no), uc);
354 return (0);
355 }
356
357 /*
358 * Lists all nat64 lsn instances currently available in kernel.
359 * Data layout (v0)(current):
360 * Request: [ ipfw_obj_lheader ]
361 * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
362 *
363 * Returns 0 on success
364 */
365 static int
nat64lsn_list(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)366 nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
367 struct sockopt_data *sd)
368 {
369 ipfw_obj_lheader *olh;
370 struct nat64_dump_arg da;
371
372 /* Check minimum header size */
373 if (sd->valsize < sizeof(ipfw_obj_lheader))
374 return (EINVAL);
375
376 olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
377
378 IPFW_UH_RLOCK(ch);
379 olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
380 IPFW_TLV_NAT64LSN_NAME);
381 olh->objsize = sizeof(ipfw_nat64lsn_cfg);
382 olh->size = sizeof(*olh) + olh->count * olh->objsize;
383
384 if (sd->valsize < olh->size) {
385 IPFW_UH_RUNLOCK(ch);
386 return (ENOMEM);
387 }
388 memset(&da, 0, sizeof(da));
389 da.ch = ch;
390 da.sd = sd;
391 ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
392 IPFW_TLV_NAT64LSN_NAME);
393 IPFW_UH_RUNLOCK(ch);
394
395 return (0);
396 }
397
398 /*
399 * Change existing nat64lsn instance configuration.
400 * Data layout (v0)(current):
401 * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
402 * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
403 *
404 * Returns 0 on success
405 */
406 static int
nat64lsn_config(struct ip_fw_chain * ch,ip_fw3_opheader * op,struct sockopt_data * sd)407 nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
408 struct sockopt_data *sd)
409 {
410 ipfw_obj_header *oh;
411 ipfw_nat64lsn_cfg *uc;
412 struct nat64lsn_instance *i;
413 struct nat64lsn_cfg *cfg;
414 struct namedobj_instance *ni;
415
416 if (sd->valsize != sizeof(*oh) + sizeof(*uc))
417 return (EINVAL);
418
419 oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
420 sizeof(*oh) + sizeof(*uc));
421 uc = (ipfw_nat64lsn_cfg *)(oh + 1);
422
423 if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
424 oh->ntlv.set >= IPFW_MAX_SETS)
425 return (EINVAL);
426
427 ni = CHAIN_TO_SRV(ch);
428 if (sd->sopt->sopt_dir == SOPT_GET) {
429 IPFW_UH_RLOCK(ch);
430 i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
431 if (i == NULL) {
432 IPFW_UH_RUNLOCK(ch);
433 return (ENOENT);
434 }
435 nat64lsn_export_config(ch, i, uc);
436 IPFW_UH_RUNLOCK(ch);
437 return (0);
438 }
439
440 nat64lsn_default_config(uc);
441
442 IPFW_UH_WLOCK(ch);
443 i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
444 if (i == NULL) {
445 IPFW_UH_WUNLOCK(ch);
446 return (ENOENT);
447 }
448
449 /*
450 * For now allow to change only following values:
451 * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
452 * tcp_est_age, udp_age, icmp_age, flags, states_chunks.
453 */
454 cfg = i->cfg;
455 cfg->states_chunks = uc->states_chunks;
456 cfg->jmaxlen = uc->jmaxlen;
457 cfg->host_delete_delay = uc->nh_delete_delay;
458 cfg->pg_delete_delay = uc->pg_delete_delay;
459 cfg->st_syn_ttl = uc->st_syn_ttl;
460 cfg->st_close_ttl = uc->st_close_ttl;
461 cfg->st_estab_ttl = uc->st_estab_ttl;
462 cfg->st_udp_ttl = uc->st_udp_ttl;
463 cfg->st_icmp_ttl = uc->st_icmp_ttl;
464 cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
465 cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
466
467 IPFW_UH_WUNLOCK(ch);
468
469 return (0);
470 }
471
472 /*
473 * Get nat64lsn statistics.
474 * Data layout (v0)(current):
475 * Request: [ ipfw_obj_header ]
476 * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
477 *
478 * Returns 0 on success
479 */
480 static int
nat64lsn_stats(struct ip_fw_chain * ch,ip_fw3_opheader * op,struct sockopt_data * sd)481 nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
482 struct sockopt_data *sd)
483 {
484 struct ipfw_nat64lsn_stats stats;
485 struct nat64lsn_instance *i;
486 ipfw_obj_header *oh;
487 ipfw_obj_ctlv *ctlv;
488 size_t sz;
489
490 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
491 if (sd->valsize % sizeof(uint64_t))
492 return (EINVAL);
493 if (sd->valsize < sz)
494 return (ENOMEM);
495 oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
496 if (oh == NULL)
497 return (EINVAL);
498 memset(&stats, 0, sizeof(stats));
499
500 IPFW_UH_RLOCK(ch);
501 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
502 if (i == NULL) {
503 IPFW_UH_RUNLOCK(ch);
504 return (ENOENT);
505 }
506
507 export_stats(ch, i->cfg, &stats);
508 IPFW_UH_RUNLOCK(ch);
509
510 ctlv = (ipfw_obj_ctlv *)(oh + 1);
511 memset(ctlv, 0, sizeof(*ctlv));
512 ctlv->head.type = IPFW_TLV_COUNTERS;
513 ctlv->head.length = sz - sizeof(ipfw_obj_header);
514 ctlv->count = sizeof(stats) / sizeof(uint64_t);
515 ctlv->objsize = sizeof(uint64_t);
516 ctlv->version = IPFW_NAT64_VERSION;
517 memcpy(ctlv + 1, &stats, sizeof(stats));
518 return (0);
519 }
520
521 /*
522 * Reset nat64lsn statistics.
523 * Data layout (v0)(current):
524 * Request: [ ipfw_obj_header ]
525 *
526 * Returns 0 on success
527 */
528 static int
nat64lsn_reset_stats(struct ip_fw_chain * ch,ip_fw3_opheader * op,struct sockopt_data * sd)529 nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
530 struct sockopt_data *sd)
531 {
532 struct nat64lsn_instance *i;
533 ipfw_obj_header *oh;
534
535 if (sd->valsize != sizeof(*oh))
536 return (EINVAL);
537 oh = (ipfw_obj_header *)sd->kbuf;
538 if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
539 oh->ntlv.set >= IPFW_MAX_SETS)
540 return (EINVAL);
541
542 IPFW_UH_WLOCK(ch);
543 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
544 if (i == NULL) {
545 IPFW_UH_WUNLOCK(ch);
546 return (ENOENT);
547 }
548 COUNTER_ARRAY_ZERO(i->cfg->base.stats.cnt, NAT64STATS);
549 IPFW_UH_WUNLOCK(ch);
550 return (0);
551 }
552
553 #ifdef __LP64__
554 #define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n))
555 #else
556 #define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \
557 ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
558 #endif
559 /*
560 * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
561 * ipfw_nat64lsn_state x count, ... ] ]
562 */
563 static int
nat64lsn_export_states(struct nat64lsn_cfg * cfg,union nat64lsn_pgidx * idx,struct nat64lsn_pg * pg,struct sockopt_data * sd,uint32_t * ret_count)564 nat64lsn_export_states(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
565 struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
566 {
567 ipfw_nat64lsn_state_v1 *s;
568 struct nat64lsn_state *state;
569 uint64_t freemask;
570 uint32_t i, count;
571
572 /* validate user input */
573 if (idx->chunk > pg->chunks_count - 1)
574 return (EINVAL);
575
576 FREEMASK_COPY(pg, idx->chunk, freemask);
577 count = 64 - bitcount64(freemask);
578 if (count == 0)
579 return (0); /* Try next PG/chunk */
580
581 DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
582 (uintmax_t)idx->index, count);
583
584 s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
585 count * sizeof(ipfw_nat64lsn_state_v1));
586 if (s == NULL)
587 return (ENOMEM);
588
589 for (i = 0; i < 64; i++) {
590 if (ISSET64(freemask, i))
591 continue;
592 state = pg->chunks_count == 1 ? &pg->states->state[i] :
593 &pg->states_chunk[idx->chunk]->state[i];
594
595 s->host6 = state->host->addr;
596 s->daddr.s_addr = htonl(state->ip_dst);
597 s->dport = state->dport;
598 s->sport = state->sport;
599 s->aport = state->aport;
600 s->flags = (uint8_t)(state->flags & 7);
601 s->proto = state->proto;
602 s->idle = GET_AGE(state->timestamp);
603 s++;
604 }
605 *ret_count = count;
606 return (0);
607 }
608
609 #define LAST_IDX 0xFF
610 static int
nat64lsn_next_pgidx(struct nat64lsn_cfg * cfg,struct nat64lsn_pg * pg,union nat64lsn_pgidx * idx)611 nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
612 union nat64lsn_pgidx *idx)
613 {
614
615 /* First iterate over chunks */
616 if (pg != NULL) {
617 if (idx->chunk < pg->chunks_count - 1) {
618 idx->chunk++;
619 return (0);
620 }
621 }
622 idx->chunk = 0;
623 /* Then over PGs */
624 if (idx->port < UINT16_MAX - 64) {
625 idx->port += 64;
626 return (0);
627 }
628 idx->port = NAT64_MIN_PORT;
629 /* Then over supported protocols */
630 switch (idx->proto) {
631 case IPPROTO_ICMP:
632 idx->proto = IPPROTO_TCP;
633 return (0);
634 case IPPROTO_TCP:
635 idx->proto = IPPROTO_UDP;
636 return (0);
637 default:
638 idx->proto = IPPROTO_ICMP;
639 }
640 /* And then over IPv4 alias addresses */
641 if (idx->addr < cfg->pmask4) {
642 idx->addr++;
643 return (1); /* New states group is needed */
644 }
645 idx->index = LAST_IDX;
646 return (-1); /* No more states */
647 }
648
649 static struct nat64lsn_pg*
nat64lsn_get_pg_byidx(struct nat64lsn_cfg * cfg,union nat64lsn_pgidx * idx)650 nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
651 {
652 struct nat64lsn_alias *alias;
653 int pg_idx;
654
655 alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
656 MPASS(alias->addr == idx->addr);
657
658 pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
659 switch (idx->proto) {
660 case IPPROTO_ICMP:
661 if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
662 return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
663 break;
664 case IPPROTO_TCP:
665 if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
666 return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
667 break;
668 case IPPROTO_UDP:
669 if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
670 return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
671 break;
672 }
673 return (NULL);
674 }
675
676 /*
677 * Lists nat64lsn states.
678 * Data layout (v1)(current):
679 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
680 * Reply: [ ipfw_obj_header ipfw_obj_data [
681 * ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
682 *
683 * Returns 0 on success
684 */
685 static int
nat64lsn_states(struct ip_fw_chain * ch,ip_fw3_opheader * op3,struct sockopt_data * sd)686 nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
687 struct sockopt_data *sd)
688 {
689 ipfw_obj_header *oh;
690 ipfw_obj_data *od;
691 ipfw_nat64lsn_stg_v1 *stg;
692 struct nat64lsn_instance *i;
693 struct nat64lsn_cfg *cfg;
694 struct nat64lsn_pg *pg;
695 union nat64lsn_pgidx idx;
696 size_t sz;
697 uint32_t count, total;
698 int ret;
699
700 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
701 sizeof(uint64_t);
702 /* Check minimum header size */
703 if (sd->valsize < sz)
704 return (EINVAL);
705
706 oh = (ipfw_obj_header *)sd->kbuf;
707 od = (ipfw_obj_data *)(oh + 1);
708 if (od->head.type != IPFW_TLV_OBJDATA ||
709 od->head.length != sz - sizeof(ipfw_obj_header))
710 return (EINVAL);
711
712 idx.index = *(uint64_t *)(od + 1);
713 if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
714 idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
715 return (EINVAL);
716 if (idx.index == LAST_IDX)
717 return (EINVAL);
718
719 IPFW_UH_RLOCK(ch);
720 i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
721 if (i == NULL) {
722 IPFW_UH_RUNLOCK(ch);
723 return (ENOENT);
724 }
725 cfg = i->cfg;
726 if (idx.index == 0) { /* Fill in starting point */
727 idx.addr = cfg->prefix4;
728 idx.proto = IPPROTO_ICMP;
729 idx.port = NAT64_MIN_PORT;
730 }
731 if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
732 idx.port < NAT64_MIN_PORT) {
733 IPFW_UH_RUNLOCK(ch);
734 return (EINVAL);
735 }
736 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
737 sizeof(ipfw_nat64lsn_stg_v1);
738 if (sd->valsize < sz) {
739 IPFW_UH_RUNLOCK(ch);
740 return (ENOMEM);
741 }
742 oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
743 od = (ipfw_obj_data *)(oh + 1);
744 od->head.type = IPFW_TLV_OBJDATA;
745 od->head.length = sz - sizeof(ipfw_obj_header);
746 stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
747 stg->count = total = 0;
748 stg->next.index = idx.index;
749 /*
750 * Acquire CALLOUT_LOCK to avoid races with expiration code.
751 * Thus states, hosts and PGs will not expire while we hold it.
752 */
753 CALLOUT_LOCK(cfg);
754 ret = 0;
755 do {
756 pg = nat64lsn_get_pg_byidx(cfg, &idx);
757 if (pg != NULL) {
758 count = 0;
759 ret = nat64lsn_export_states(cfg, &idx, pg,
760 sd, &count);
761 if (ret != 0)
762 break;
763 if (count > 0) {
764 stg->count += count;
765 total += count;
766 /* Update total size of reply */
767 od->head.length +=
768 count * sizeof(ipfw_nat64lsn_state_v1);
769 sz += count * sizeof(ipfw_nat64lsn_state_v1);
770 }
771 stg->alias4.s_addr = htonl(idx.addr);
772 }
773 /* Determine new index */
774 switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
775 case -1:
776 ret = ENOENT; /* End of search */
777 break;
778 case 1: /*
779 * Next alias address, new group may be needed.
780 * If states count is zero, use this group.
781 */
782 if (stg->count == 0)
783 continue;
784 /* Otherwise try to create new group */
785 sz += sizeof(ipfw_nat64lsn_stg_v1);
786 if (sd->valsize < sz) {
787 ret = ENOMEM;
788 break;
789 }
790 /* Save next index in current group */
791 stg->next.index = idx.index;
792 stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
793 sizeof(ipfw_nat64lsn_stg_v1));
794 od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
795 stg->count = 0;
796 break;
797 }
798 stg->next.index = idx.index;
799 } while (ret == 0);
800 CALLOUT_UNLOCK(cfg);
801 IPFW_UH_RUNLOCK(ch);
802 return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
803 }
804
805 static struct ipfw_sopt_handler scodes[] = {
806 { IP_FW_NAT64LSN_CREATE, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_create },
807 { IP_FW_NAT64LSN_DESTROY, IP_FW3_OPVER, HDIR_SET, nat64lsn_destroy },
808 { IP_FW_NAT64LSN_CONFIG, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_config },
809 { IP_FW_NAT64LSN_LIST, IP_FW3_OPVER, HDIR_GET, nat64lsn_list },
810 { IP_FW_NAT64LSN_STATS, IP_FW3_OPVER, HDIR_GET, nat64lsn_stats },
811 { IP_FW_NAT64LSN_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nat64lsn_reset_stats },
812 { IP_FW_NAT64LSN_LIST_STATES, IP_FW3_OPVER, HDIR_GET, nat64lsn_states },
813 };
814
815 #define NAT64LSN_ARE_EQUAL(v) (cfg0->v == cfg1->v)
816 static int
nat64lsn_cmp_configs(struct nat64lsn_cfg * cfg0,struct nat64lsn_cfg * cfg1)817 nat64lsn_cmp_configs(struct nat64lsn_cfg *cfg0, struct nat64lsn_cfg *cfg1)
818 {
819
820 if ((cfg0->base.flags & cfg1->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&
821 NAT64LSN_ARE_EQUAL(prefix4) &&
822 NAT64LSN_ARE_EQUAL(pmask4) &&
823 NAT64LSN_ARE_EQUAL(plen4) &&
824 NAT64LSN_ARE_EQUAL(base.plat_plen) &&
825 IN6_ARE_ADDR_EQUAL(&cfg0->base.plat_prefix,
826 &cfg1->base.plat_prefix))
827 return (0);
828 return (1);
829 }
830 #undef NAT64LSN_ARE_EQUAL
831
832 static void
nat64lsn_swap_configs(struct nat64lsn_instance * i0,struct nat64lsn_instance * i1)833 nat64lsn_swap_configs(struct nat64lsn_instance *i0,
834 struct nat64lsn_instance *i1)
835 {
836 struct nat64lsn_cfg *cfg;
837
838 cfg = i0->cfg;
839 i0->cfg = i1->cfg;
840 i1->cfg = cfg;
841 }
842
843 /*
844 * NAT64LSN sets swap handler.
845 *
846 * When two sets have NAT64LSN instance with the same name, we check
847 * most important configuration parameters, and if there are no difference,
848 * and both instances have NAT64LSN_ALLOW_SWAPCONF flag, we will exchange
849 * configs between instances. This allows to keep NAT64 states when ipfw's
850 * rules are reloaded using new set.
851 *
852 * XXX: since manage_sets caller doesn't hold IPFW_WLOCK(), it is possible
853 * that some states will be created during switching, because set of rules
854 * is changed a bit earley than named objects.
855 */
856 static int
nat64lsn_swap_sets_cb(struct namedobj_instance * ni,struct named_object * no,void * arg)857 nat64lsn_swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,
858 void *arg)
859 {
860 struct nat64lsn_instance *i0, *i1;
861 uint8_t *sets;
862
863 sets = arg;
864 if (no->set == sets[0]) {
865 /*
866 * Check if we have instance in new set with the same
867 * config that is sets aware and ready to swap configs.
868 */
869 i0 = __containerof(no, struct nat64lsn_instance, no);
870 if ((i0->cfg->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&
871 (i1 = nat64lsn_find(ni, no->name, sets[1])) != NULL) {
872 /* Compare configs */
873 if (nat64lsn_cmp_configs(i0->cfg, i1->cfg) == 0) {
874 IPFW_UH_WLOCK_ASSERT(&V_layer3_chain);
875 IPFW_WLOCK(&V_layer3_chain);
876 nat64lsn_swap_configs(i0, i1);
877 IPFW_WUNLOCK(&V_layer3_chain);
878 }
879 }
880 }
881 return (0);
882 }
883
884 static int
nat64lsn_manage_sets(struct ip_fw_chain * ch,uint32_t set,uint8_t new_set,enum ipfw_sets_cmd cmd)885 nat64lsn_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
886 enum ipfw_sets_cmd cmd)
887 {
888 uint8_t sets[2];
889
890 if (cmd == SWAP_ALL) {
891 sets[0] = (uint8_t)set;
892 sets[1] = new_set;
893 ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch),
894 nat64lsn_swap_sets_cb, &sets, IPFW_TLV_NAT64LSN_NAME);
895 }
896 return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
897 set, new_set, cmd));
898 }
899 NAT64_DEFINE_OPCODE_REWRITER(nat64lsn, NAT64LSN, opcodes);
900
901 static int
destroy_config_cb(struct namedobj_instance * ni,struct named_object * no,void * arg)902 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
903 void *arg)
904 {
905 struct nat64lsn_instance *i;
906 struct ip_fw_chain *ch;
907
908 ch = (struct ip_fw_chain *)arg;
909 i = (struct nat64lsn_instance *)SRV_OBJECT(ch, no->kidx);
910 nat64lsn_detach_instance(ch, i);
911 nat64lsn_destroy_config(i->cfg);
912 free(i, M_NAT64LSN);
913 return (0);
914 }
915
916 int
nat64lsn_init(struct ip_fw_chain * ch,int first)917 nat64lsn_init(struct ip_fw_chain *ch, int first)
918 {
919
920 if (first != 0)
921 nat64lsn_init_internal();
922 V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
923 if (V_nat64lsn_eid == 0)
924 return (ENXIO);
925 IPFW_ADD_SOPT_HANDLER(first, scodes);
926 IPFW_ADD_OBJ_REWRITER(first, opcodes);
927 return (0);
928 }
929
930 void
nat64lsn_uninit(struct ip_fw_chain * ch,int last)931 nat64lsn_uninit(struct ip_fw_chain *ch, int last)
932 {
933
934 IPFW_DEL_OBJ_REWRITER(last, opcodes);
935 IPFW_DEL_SOPT_HANDLER(last, scodes);
936 ipfw_del_eaction(ch, V_nat64lsn_eid);
937 /*
938 * Since we already have deregistered external action,
939 * our named objects become unaccessible via rules, because
940 * all rules were truncated by ipfw_del_eaction().
941 * So, we can unlink and destroy our named objects without holding
942 * IPFW_WLOCK().
943 */
944 IPFW_UH_WLOCK(ch);
945 ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
946 IPFW_TLV_NAT64LSN_NAME);
947 V_nat64lsn_eid = 0;
948 IPFW_UH_WUNLOCK(ch);
949 if (last != 0)
950 nat64lsn_uninit_internal();
951 }
952