xref: /freebsd/sys/netpfil/ipfw/nptv6/nptv6.c (revision 685dc743dc3b5645e34836464128e1c0558b404b)
1 /*-
2  * Copyright (c) 2016 Yandex LLC
3  * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/counter.h>
32 #include <sys/eventhandler.h>
33 #include <sys/errno.h>
34 #include <sys/kernel.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/module.h>
39 #include <sys/rmlock.h>
40 #include <sys/rwlock.h>
41 #include <sys/socket.h>
42 #include <sys/queue.h>
43 #include <sys/syslog.h>
44 #include <sys/sysctl.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/if_private.h>
49 #include <net/netisr.h>
50 #include <net/pfil.h>
51 #include <net/vnet.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/ip_fw.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet6/in6_var.h>
59 #include <netinet6/ip6_var.h>
60 
61 #include <netpfil/ipfw/ip_fw_private.h>
62 #include <netpfil/ipfw/nptv6/nptv6.h>
63 
64 VNET_DEFINE_STATIC(uint16_t, nptv6_eid) = 0;
65 #define	V_nptv6_eid	VNET(nptv6_eid)
66 #define	IPFW_TLV_NPTV6_NAME	IPFW_TLV_EACTION_NAME(V_nptv6_eid)
67 
68 static eventhandler_tag nptv6_ifaddr_event;
69 
70 static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set);
71 static void nptv6_free_config(struct nptv6_cfg *cfg);
72 static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni,
73     const char *name, uint8_t set);
74 static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp,
75     int offset);
76 static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp,
77     int offset);
78 
79 #define	NPTV6_LOOKUP(chain, cmd)	\
80     (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
81 
82 #ifndef IN6_MASK_ADDR
83 #define IN6_MASK_ADDR(a, m)	do { \
84 	(a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
85 	(a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
86 	(a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
87 	(a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
88 } while (0)
89 #endif
90 #ifndef IN6_ARE_MASKED_ADDR_EQUAL
91 #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)	(	\
92 	(((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
93 	(((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
94 	(((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
95 	(((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
96 #endif
97 
98 #if 0
99 #define	NPTV6_DEBUG(fmt, ...)	do {			\
100 	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
101 } while (0)
102 #define	NPTV6_IPDEBUG(fmt, ...)	do {			\
103 	char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN];	\
104 	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
105 } while (0)
106 #else
107 #define	NPTV6_DEBUG(fmt, ...)
108 #define	NPTV6_IPDEBUG(fmt, ...)
109 #endif
110 
111 static int
112 nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset)
113 {
114 	struct ip6_hdr *ip6;
115 	struct ip6_hbh *hbh;
116 	int proto, hlen;
117 
118 	hlen = (offset == NULL) ? 0: *offset;
119 	if (m->m_len < hlen)
120 		return (-1);
121 	ip6 = mtodo(m, hlen);
122 	hlen += sizeof(*ip6);
123 	proto = ip6->ip6_nxt;
124 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
125 	    proto == IPPROTO_DSTOPTS) {
126 		hbh = mtodo(m, hlen);
127 		if (m->m_len < hlen)
128 			return (-1);
129 		proto = hbh->ip6h_nxt;
130 		hlen += (hbh->ip6h_len + 1) << 3;
131 	}
132 	if (offset != NULL)
133 		*offset = hlen;
134 	return (proto);
135 }
136 
137 static int
138 nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
139 {
140 	struct icmp6_hdr *icmp6;
141 	struct ip6_hdr *ip6;
142 	struct mbuf *m;
143 
144 	m = *mp;
145 	if (offset > m->m_len)
146 		return (-1);
147 	icmp6 = mtodo(m, offset);
148 	NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type);
149 	switch (icmp6->icmp6_type) {
150 	case ICMP6_DST_UNREACH:
151 	case ICMP6_PACKET_TOO_BIG:
152 	case ICMP6_TIME_EXCEEDED:
153 	case ICMP6_PARAM_PROB:
154 		break;
155 	case ICMP6_ECHO_REQUEST:
156 	case ICMP6_ECHO_REPLY:
157 		/* nothing to translate */
158 		return (0);
159 	default:
160 		/*
161 		 * XXX: We can add some checks to not translate NDP and MLD
162 		 * messages. Currently user must explicitly allow these message
163 		 * types, otherwise packets will be dropped.
164 		 */
165 		return (-1);
166 	}
167 	offset += sizeof(*icmp6);
168 	if (offset + sizeof(*ip6) > m->m_pkthdr.len)
169 		return (-1);
170 	if (offset + sizeof(*ip6) > m->m_len)
171 		*mp = m = m_pullup(m, offset + sizeof(*ip6));
172 	if (m == NULL)
173 		return (-1);
174 	ip6 = mtodo(m, offset);
175 	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
176 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
177 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
178 	    ip6->ip6_nxt);
179 	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
180 	    &cfg->external, &cfg->mask))
181 		return (nptv6_rewrite_external(cfg, mp, offset));
182 	else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
183 	    &cfg->internal, &cfg->mask))
184 		return (nptv6_rewrite_internal(cfg, mp, offset));
185 	/*
186 	 * Addresses in the inner IPv6 header doesn't matched to
187 	 * our prefixes.
188 	 */
189 	return (-1);
190 }
191 
192 static int
193 nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a)
194 {
195 	int idx;
196 
197 	if (cfg->flags & NPTV6_48PLEN)
198 		return (3);
199 
200 	/* Search suitable word index for adjustment */
201 	for (idx = 4; idx < 8; idx++)
202 		if (a->s6_addr16[idx] != 0xffff)
203 			break;
204 	/*
205 	 * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with
206 	 * an IID of all-zeros while performing address mapping, that
207 	 * datagram MUST be dropped, and an ICMPv6 Parameter Problem error
208 	 * SHOULD be generated.
209 	 */
210 	if (idx == 8 ||
211 	    (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0))
212 		return (-1);
213 	return (idx);
214 }
215 
216 static void
217 nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst,
218     struct in6_addr *mask)
219 {
220 	int i;
221 
222 	for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) {
223 		dst->s6_addr8[i] &=  ~mask->s6_addr8[i];
224 		dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i];
225 	}
226 }
227 
228 static int
229 nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
230 {
231 	struct in6_addr *addr;
232 	struct ip6_hdr *ip6;
233 	int idx, proto;
234 	uint16_t adj;
235 
236 	ip6 = mtodo(*mp, offset);
237 	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
238 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
239 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
240 	    ip6->ip6_nxt);
241 	if (offset == 0)
242 		addr = &ip6->ip6_src;
243 	else {
244 		/*
245 		 * When we rewriting inner IPv6 header, we need to rewrite
246 		 * destination address back to external prefix. The datagram in
247 		 * the ICMPv6 payload should looks like it was send from
248 		 * external prefix.
249 		 */
250 		addr = &ip6->ip6_dst;
251 	}
252 	idx = nptv6_search_index(cfg, addr);
253 	if (idx < 0) {
254 		/*
255 		 * Do not send ICMPv6 error when offset isn't zero.
256 		 * This means we are rewriting inner IPv6 header in the
257 		 * ICMPv6 error message.
258 		 */
259 		if (offset == 0) {
260 			icmp6_error2(*mp, ICMP6_DST_UNREACH,
261 			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
262 			*mp = NULL;
263 		}
264 		return (IP_FW_DENY);
265 	}
266 	adj = addr->s6_addr16[idx];
267 	nptv6_copy_addr(&cfg->external, addr, &cfg->mask);
268 	adj = cksum_add(adj, cfg->adjustment);
269 	if (adj == 0xffff)
270 		adj = 0;
271 	addr->s6_addr16[idx] = adj;
272 	if (offset == 0) {
273 		/*
274 		 * We may need to translate addresses in the inner IPv6
275 		 * header for ICMPv6 error messages.
276 		 */
277 		proto = nptv6_getlasthdr(cfg, *mp, &offset);
278 		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
279 		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
280 			return (IP_FW_DENY);
281 		NPTV6STAT_INC(cfg, in2ex);
282 	}
283 	return (0);
284 }
285 
286 static int
287 nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
288 {
289 	struct in6_addr *addr;
290 	struct ip6_hdr *ip6;
291 	int idx, proto;
292 	uint16_t adj;
293 
294 	ip6 = mtodo(*mp, offset);
295 	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
296 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
297 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
298 	    ip6->ip6_nxt);
299 	if (offset == 0)
300 		addr = &ip6->ip6_dst;
301 	else {
302 		/*
303 		 * When we rewriting inner IPv6 header, we need to rewrite
304 		 * source address back to internal prefix. The datagram in
305 		 * the ICMPv6 payload should looks like it was send from
306 		 * internal prefix.
307 		 */
308 		addr = &ip6->ip6_src;
309 	}
310 	idx = nptv6_search_index(cfg, addr);
311 	if (idx < 0) {
312 		/*
313 		 * Do not send ICMPv6 error when offset isn't zero.
314 		 * This means we are rewriting inner IPv6 header in the
315 		 * ICMPv6 error message.
316 		 */
317 		if (offset == 0) {
318 			icmp6_error2(*mp, ICMP6_DST_UNREACH,
319 			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
320 			*mp = NULL;
321 		}
322 		return (IP_FW_DENY);
323 	}
324 	adj = addr->s6_addr16[idx];
325 	nptv6_copy_addr(&cfg->internal, addr, &cfg->mask);
326 	adj = cksum_add(adj, ~cfg->adjustment);
327 	if (adj == 0xffff)
328 		adj = 0;
329 	addr->s6_addr16[idx] = adj;
330 	if (offset == 0) {
331 		/*
332 		 * We may need to translate addresses in the inner IPv6
333 		 * header for ICMPv6 error messages.
334 		 */
335 		proto = nptv6_getlasthdr(cfg, *mp, &offset);
336 		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
337 		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
338 			return (IP_FW_DENY);
339 		NPTV6STAT_INC(cfg, ex2in);
340 	}
341 	return (0);
342 }
343 
344 /*
345  * ipfw external action handler.
346  */
347 static int
348 ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
349     ipfw_insn *cmd, int *done)
350 {
351 	struct ip6_hdr *ip6;
352 	struct nptv6_cfg *cfg;
353 	ipfw_insn *icmd;
354 	int ret;
355 
356 	*done = 0; /* try next rule if not matched */
357 	ret = IP_FW_DENY;
358 	icmd = cmd + 1;
359 	if (cmd->opcode != O_EXTERNAL_ACTION ||
360 	    cmd->arg1 != V_nptv6_eid ||
361 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
362 	    (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL ||
363 	    (cfg->flags & NPTV6_READY) == 0)
364 		return (ret);
365 	/*
366 	 * We need act as router, so when forwarding is disabled -
367 	 * do nothing.
368 	 */
369 	if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6)
370 		return (ret);
371 	/*
372 	 * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
373 	 * protocol's headers. Also we skip some checks, that ip6_input(),
374 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
375 	 */
376 	ip6 = mtod(args->m, struct ip6_hdr *);
377 	NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d",
378 	    cmd->arg1, icmd->arg1,
379 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
380 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
381 	    ip6->ip6_nxt);
382 	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
383 	    &cfg->internal, &cfg->mask)) {
384 		/*
385 		 * XXX: Do not translate packets when both src and dst
386 		 * are from internal prefix.
387 		 */
388 		if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
389 		    &cfg->internal, &cfg->mask))
390 			return (ret);
391 		ret = nptv6_rewrite_internal(cfg, &args->m, 0);
392 	} else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
393 	    &cfg->external, &cfg->mask))
394 		ret = nptv6_rewrite_external(cfg, &args->m, 0);
395 	else
396 		return (ret);
397 	/*
398 	 * If address wasn't rewrited - free mbuf and terminate the search.
399 	 */
400 	if (ret != 0) {
401 		if (args->m != NULL) {
402 			m_freem(args->m);
403 			args->m = NULL; /* mark mbuf as consumed */
404 		}
405 		NPTV6STAT_INC(cfg, dropped);
406 		*done = 1;
407 	} else {
408 		/* Terminate the search if one_pass is set */
409 		*done = V_fw_one_pass;
410 		/* Update args->f_id when one_pass is off */
411 		if (*done == 0) {
412 			ip6 = mtod(args->m, struct ip6_hdr *);
413 			args->f_id.src_ip6 = ip6->ip6_src;
414 			args->f_id.dst_ip6 = ip6->ip6_dst;
415 		}
416 	}
417 	return (ret);
418 }
419 
420 static struct nptv6_cfg *
421 nptv6_alloc_config(const char *name, uint8_t set)
422 {
423 	struct nptv6_cfg *cfg;
424 
425 	cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO);
426 	COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK);
427 	cfg->no.name = cfg->name;
428 	cfg->no.etlv = IPFW_TLV_NPTV6_NAME;
429 	cfg->no.set = set;
430 	strlcpy(cfg->name, name, sizeof(cfg->name));
431 	return (cfg);
432 }
433 
434 static void
435 nptv6_free_config(struct nptv6_cfg *cfg)
436 {
437 
438 	COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS);
439 	free(cfg, M_IPFW);
440 }
441 
442 static void
443 nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
444     ipfw_nptv6_cfg *uc)
445 {
446 
447 	uc->internal = cfg->internal;
448 	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
449 		memcpy(uc->if_name, cfg->if_name, IF_NAMESIZE);
450 	else
451 		uc->external = cfg->external;
452 	uc->plen = cfg->plen;
453 	uc->flags = cfg->flags & NPTV6_FLAGSMASK;
454 	uc->set = cfg->no.set;
455 	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
456 }
457 
458 struct nptv6_dump_arg {
459 	struct ip_fw_chain *ch;
460 	struct sockopt_data *sd;
461 };
462 
463 static int
464 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
465     void *arg)
466 {
467 	struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg;
468 	ipfw_nptv6_cfg *uc;
469 
470 	uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
471 	nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc);
472 	return (0);
473 }
474 
475 static struct nptv6_cfg *
476 nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set)
477 {
478 	struct nptv6_cfg *cfg;
479 
480 	cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set,
481 	    IPFW_TLV_NPTV6_NAME, name);
482 
483 	return (cfg);
484 }
485 
486 static void
487 nptv6_calculate_adjustment(struct nptv6_cfg *cfg)
488 {
489 	uint16_t i, e;
490 	uint16_t *p;
491 
492 	/* Calculate checksum of internal prefix */
493 	for (i = 0, p = (uint16_t *)&cfg->internal;
494 	    p < (uint16_t *)(&cfg->internal + 1); p++)
495 		i = cksum_add(i, *p);
496 
497 	/* Calculate checksum of external prefix */
498 	for (e = 0, p = (uint16_t *)&cfg->external;
499 	    p < (uint16_t *)(&cfg->external + 1); p++)
500 		e = cksum_add(e, *p);
501 
502 	/* Adjustment value for Int->Ext direction */
503 	cfg->adjustment = cksum_add(~e, i);
504 }
505 
506 static int
507 nptv6_check_prefix(const struct in6_addr *addr)
508 {
509 
510 	if (IN6_IS_ADDR_MULTICAST(addr) ||
511 	    IN6_IS_ADDR_LINKLOCAL(addr) ||
512 	    IN6_IS_ADDR_LOOPBACK(addr) ||
513 	    IN6_IS_ADDR_UNSPECIFIED(addr))
514 		return (EINVAL);
515 	return (0);
516 }
517 
518 static void
519 nptv6_set_external(struct nptv6_cfg *cfg, struct in6_addr *addr)
520 {
521 
522 	cfg->external = *addr;
523 	IN6_MASK_ADDR(&cfg->external, &cfg->mask);
524 	nptv6_calculate_adjustment(cfg);
525 	cfg->flags |= NPTV6_READY;
526 }
527 
528 /*
529  * Try to determine what prefix to use as external for
530  * configured interface name.
531  */
532 static void
533 nptv6_find_prefix(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
534     struct ifnet *ifp)
535 {
536 	struct epoch_tracker et;
537 	struct ifaddr *ifa;
538 	struct in6_ifaddr *ia;
539 
540 	MPASS(cfg->flags & NPTV6_DYNAMIC_PREFIX);
541 	IPFW_UH_WLOCK_ASSERT(ch);
542 
543 	if (ifp == NULL) {
544 		ifp = ifunit_ref(cfg->if_name);
545 		if (ifp == NULL)
546 			return;
547 	}
548 	NET_EPOCH_ENTER(et);
549 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
550 		if (ifa->ifa_addr->sa_family != AF_INET6)
551 			continue;
552 		ia = (struct in6_ifaddr *)ifa;
553 		if (nptv6_check_prefix(&ia->ia_addr.sin6_addr) ||
554 		    IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
555 		    &cfg->internal, &cfg->mask))
556 			continue;
557 		/* Suitable address is found. */
558 		nptv6_set_external(cfg, &ia->ia_addr.sin6_addr);
559 		break;
560 	}
561 	NET_EPOCH_EXIT(et);
562 	if_rele(ifp);
563 }
564 
565 struct ifaddr_event_args {
566 	struct ifnet *ifp;
567 	const struct in6_addr *addr;
568 	int event;
569 };
570 
571 static int
572 ifaddr_cb(struct namedobj_instance *ni, struct named_object *no,
573     void *arg)
574 {
575 	struct ifaddr_event_args *args;
576 	struct ip_fw_chain *ch;
577 	struct nptv6_cfg *cfg;
578 
579 	ch = &V_layer3_chain;
580 	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
581 	if ((cfg->flags & NPTV6_DYNAMIC_PREFIX) == 0)
582 		return (0);
583 
584 	args = arg;
585 	/* If interface name doesn't match, ignore */
586 	if (strncmp(args->ifp->if_xname, cfg->if_name, IF_NAMESIZE))
587 		return (0);
588 	if (args->ifp->if_flags & IFF_DYING) { /* XXX: is it possible? */
589 		cfg->flags &= ~NPTV6_READY;
590 		return (0);
591 	}
592 	if (args->event == IFADDR_EVENT_DEL) {
593 		/* If instance is not ready, ignore */
594 		if ((cfg->flags & NPTV6_READY) == 0)
595 			return (0);
596 		/* If address does not match the external prefix, ignore */
597 		if (IN6_ARE_MASKED_ADDR_EQUAL(&cfg->external, args->addr,
598 		    &cfg->mask) != 0)
599 			return (0);
600 		/* Otherwise clear READY flag */
601 		cfg->flags &= ~NPTV6_READY;
602 	} else {/* IFADDR_EVENT_ADD */
603 		/* If instance is already ready, ignore */
604 		if (cfg->flags & NPTV6_READY)
605 			return (0);
606 		/* If address is not suitable for prefix, ignore */
607 		if (nptv6_check_prefix(args->addr) ||
608 		    IN6_ARE_MASKED_ADDR_EQUAL(args->addr, &cfg->internal,
609 		    &cfg->mask))
610 			return (0);
611 		/* FALLTHROUGH */
612 	}
613 	MPASS(!(cfg->flags & NPTV6_READY));
614 	/* Try to determine the prefix */
615 	if_ref(args->ifp);
616 	nptv6_find_prefix(ch, cfg, args->ifp);
617 	return (0);
618 }
619 
620 static void
621 nptv6_ifaddrevent_handler(void *arg __unused, struct ifnet *ifp,
622     struct ifaddr *ifa, int event)
623 {
624 	struct ifaddr_event_args args;
625 	struct ip_fw_chain *ch;
626 
627 	if (ifa->ifa_addr->sa_family != AF_INET6)
628 		return;
629 
630 	args.ifp = ifp;
631 	args.addr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
632 	args.event = event;
633 
634 	ch = &V_layer3_chain;
635 	IPFW_UH_WLOCK(ch);
636 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), ifaddr_cb, &args,
637 	    IPFW_TLV_NPTV6_NAME);
638 	IPFW_UH_WUNLOCK(ch);
639 }
640 
641 /*
642  * Creates new NPTv6 instance.
643  * Data layout (v0)(current):
644  * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ]
645  *
646  * Returns 0 on success
647  */
648 static int
649 nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
650     struct sockopt_data *sd)
651 {
652 	struct in6_addr mask;
653 	ipfw_obj_lheader *olh;
654 	ipfw_nptv6_cfg *uc;
655 	struct namedobj_instance *ni;
656 	struct nptv6_cfg *cfg;
657 
658 	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
659 		return (EINVAL);
660 
661 	olh = (ipfw_obj_lheader *)sd->kbuf;
662 	uc = (ipfw_nptv6_cfg *)(olh + 1);
663 	if (ipfw_check_object_name_generic(uc->name) != 0)
664 		return (EINVAL);
665 	if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS)
666 		return (EINVAL);
667 	if (nptv6_check_prefix(&uc->internal))
668 		return (EINVAL);
669 	in6_prefixlen2mask(&mask, uc->plen);
670 	if ((uc->flags & NPTV6_DYNAMIC_PREFIX) == 0 && (
671 	    nptv6_check_prefix(&uc->external) ||
672 	    IN6_ARE_MASKED_ADDR_EQUAL(&uc->external, &uc->internal, &mask)))
673 		return (EINVAL);
674 
675 	ni = CHAIN_TO_SRV(ch);
676 	IPFW_UH_RLOCK(ch);
677 	if (nptv6_find(ni, uc->name, uc->set) != NULL) {
678 		IPFW_UH_RUNLOCK(ch);
679 		return (EEXIST);
680 	}
681 	IPFW_UH_RUNLOCK(ch);
682 
683 	cfg = nptv6_alloc_config(uc->name, uc->set);
684 	cfg->plen = uc->plen;
685 	cfg->flags = uc->flags & NPTV6_FLAGSMASK;
686 	if (cfg->plen <= 48)
687 		cfg->flags |= NPTV6_48PLEN;
688 	cfg->mask = mask;
689 	cfg->internal = uc->internal;
690 	IN6_MASK_ADDR(&cfg->internal, &mask);
691 	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
692 		memcpy(cfg->if_name, uc->if_name, IF_NAMESIZE);
693 	else
694 		nptv6_set_external(cfg, &uc->external);
695 
696 	if ((uc->flags & NPTV6_DYNAMIC_PREFIX) != 0 &&
697 	    nptv6_ifaddr_event == NULL)
698 		nptv6_ifaddr_event = EVENTHANDLER_REGISTER(
699 		    ifaddr_event_ext, nptv6_ifaddrevent_handler, NULL,
700 		    EVENTHANDLER_PRI_ANY);
701 
702 	IPFW_UH_WLOCK(ch);
703 	if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) {
704 		IPFW_UH_WUNLOCK(ch);
705 		nptv6_free_config(cfg);
706 		return (ENOSPC);
707 	}
708 	ipfw_objhash_add(ni, &cfg->no);
709 	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
710 	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
711 		nptv6_find_prefix(ch, cfg, NULL);
712 	IPFW_UH_WUNLOCK(ch);
713 
714 	return (0);
715 }
716 
717 /*
718  * Destroys NPTv6 instance.
719  * Data layout (v0)(current):
720  * Request: [ ipfw_obj_header ]
721  *
722  * Returns 0 on success
723  */
724 static int
725 nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
726     struct sockopt_data *sd)
727 {
728 	ipfw_obj_header *oh;
729 	struct nptv6_cfg *cfg;
730 
731 	if (sd->valsize != sizeof(*oh))
732 		return (EINVAL);
733 
734 	oh = (ipfw_obj_header *)sd->kbuf;
735 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
736 		return (EINVAL);
737 
738 	IPFW_UH_WLOCK(ch);
739 	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
740 	if (cfg == NULL) {
741 		IPFW_UH_WUNLOCK(ch);
742 		return (ESRCH);
743 	}
744 	if (cfg->no.refcnt > 0) {
745 		IPFW_UH_WUNLOCK(ch);
746 		return (EBUSY);
747 	}
748 
749 	ipfw_reset_eaction_instance(ch, V_nptv6_eid, cfg->no.kidx);
750 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
751 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
752 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
753 	IPFW_UH_WUNLOCK(ch);
754 
755 	nptv6_free_config(cfg);
756 	return (0);
757 }
758 
759 /*
760  * Get or change nptv6 instance config.
761  * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ]
762  */
763 static int
764 nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
765     struct sockopt_data *sd)
766 {
767 
768 	return (EOPNOTSUPP);
769 }
770 
771 /*
772  * Lists all NPTv6 instances currently available in kernel.
773  * Data layout (v0)(current):
774  * Request: [ ipfw_obj_lheader ]
775  * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ]
776  *
777  * Returns 0 on success
778  */
779 static int
780 nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
781     struct sockopt_data *sd)
782 {
783 	ipfw_obj_lheader *olh;
784 	struct nptv6_dump_arg da;
785 
786 	/* Check minimum header size */
787 	if (sd->valsize < sizeof(ipfw_obj_lheader))
788 		return (EINVAL);
789 
790 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
791 
792 	IPFW_UH_RLOCK(ch);
793 	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
794 	    IPFW_TLV_NPTV6_NAME);
795 	olh->objsize = sizeof(ipfw_nptv6_cfg);
796 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
797 
798 	if (sd->valsize < olh->size) {
799 		IPFW_UH_RUNLOCK(ch);
800 		return (ENOMEM);
801 	}
802 	memset(&da, 0, sizeof(da));
803 	da.ch = ch;
804 	da.sd = sd;
805 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
806 	    &da, IPFW_TLV_NPTV6_NAME);
807 	IPFW_UH_RUNLOCK(ch);
808 
809 	return (0);
810 }
811 
812 #define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
813 	(_stats)->_field = NPTV6STAT_FETCH(_cfg, _field)
814 static void
815 export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
816     struct ipfw_nptv6_stats *stats)
817 {
818 
819 	__COPY_STAT_FIELD(cfg, stats, in2ex);
820 	__COPY_STAT_FIELD(cfg, stats, ex2in);
821 	__COPY_STAT_FIELD(cfg, stats, dropped);
822 }
823 
824 /*
825  * Get NPTv6 statistics.
826  * Data layout (v0)(current):
827  * Request: [ ipfw_obj_header ]
828  * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
829  *
830  * Returns 0 on success
831  */
832 static int
833 nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
834     struct sockopt_data *sd)
835 {
836 	struct ipfw_nptv6_stats stats;
837 	struct nptv6_cfg *cfg;
838 	ipfw_obj_header *oh;
839 	ipfw_obj_ctlv *ctlv;
840 	size_t sz;
841 
842 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
843 	if (sd->valsize % sizeof(uint64_t))
844 		return (EINVAL);
845 	if (sd->valsize < sz)
846 		return (ENOMEM);
847 	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
848 	if (oh == NULL)
849 		return (EINVAL);
850 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
851 	    oh->ntlv.set >= IPFW_MAX_SETS)
852 		return (EINVAL);
853 	memset(&stats, 0, sizeof(stats));
854 
855 	IPFW_UH_RLOCK(ch);
856 	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
857 	if (cfg == NULL) {
858 		IPFW_UH_RUNLOCK(ch);
859 		return (ESRCH);
860 	}
861 	export_stats(ch, cfg, &stats);
862 	IPFW_UH_RUNLOCK(ch);
863 
864 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
865 	memset(ctlv, 0, sizeof(*ctlv));
866 	ctlv->head.type = IPFW_TLV_COUNTERS;
867 	ctlv->head.length = sz - sizeof(ipfw_obj_header);
868 	ctlv->count = sizeof(stats) / sizeof(uint64_t);
869 	ctlv->objsize = sizeof(uint64_t);
870 	ctlv->version = 1;
871 	memcpy(ctlv + 1, &stats, sizeof(stats));
872 	return (0);
873 }
874 
875 /*
876  * Reset NPTv6 statistics.
877  * Data layout (v0)(current):
878  * Request: [ ipfw_obj_header ]
879  *
880  * Returns 0 on success
881  */
882 static int
883 nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
884     struct sockopt_data *sd)
885 {
886 	struct nptv6_cfg *cfg;
887 	ipfw_obj_header *oh;
888 
889 	if (sd->valsize != sizeof(*oh))
890 		return (EINVAL);
891 	oh = (ipfw_obj_header *)sd->kbuf;
892 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
893 	    oh->ntlv.set >= IPFW_MAX_SETS)
894 		return (EINVAL);
895 
896 	IPFW_UH_WLOCK(ch);
897 	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
898 	if (cfg == NULL) {
899 		IPFW_UH_WUNLOCK(ch);
900 		return (ESRCH);
901 	}
902 	COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS);
903 	IPFW_UH_WUNLOCK(ch);
904 	return (0);
905 }
906 
907 static struct ipfw_sopt_handler	scodes[] = {
908 	{ IP_FW_NPTV6_CREATE, 0,	HDIR_SET,	nptv6_create },
909 	{ IP_FW_NPTV6_DESTROY,0,	HDIR_SET,	nptv6_destroy },
910 	{ IP_FW_NPTV6_CONFIG, 0,	HDIR_BOTH,	nptv6_config },
911 	{ IP_FW_NPTV6_LIST,   0,	HDIR_GET,	nptv6_list },
912 	{ IP_FW_NPTV6_STATS,  0,	HDIR_GET,	nptv6_stats },
913 	{ IP_FW_NPTV6_RESET_STATS,0,	HDIR_SET,	nptv6_reset_stats },
914 };
915 
916 static int
917 nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
918 {
919 	ipfw_insn *icmd;
920 
921 	icmd = cmd - 1;
922 	NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d",
923 	    cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1);
924 	if (icmd->opcode != O_EXTERNAL_ACTION ||
925 	    icmd->arg1 != V_nptv6_eid)
926 		return (1);
927 
928 	*puidx = cmd->arg1;
929 	*ptype = 0;
930 	return (0);
931 }
932 
933 static void
934 nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx)
935 {
936 
937 	cmd->arg1 = idx;
938 	NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
939 }
940 
941 static int
942 nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
943     struct named_object **pno)
944 {
945 	int err;
946 
947 	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
948 	    IPFW_TLV_NPTV6_NAME, pno);
949 	NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err);
950 	return (err);
951 }
952 
953 static struct named_object *
954 nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
955 {
956 	struct namedobj_instance *ni;
957 	struct named_object *no;
958 
959 	IPFW_UH_WLOCK_ASSERT(ch);
960 	ni = CHAIN_TO_SRV(ch);
961 	no = ipfw_objhash_lookup_kidx(ni, idx);
962 	KASSERT(no != NULL, ("NPT with index %d not found", idx));
963 
964 	NPTV6_DEBUG("kidx %u -> %s", idx, no->name);
965 	return (no);
966 }
967 
968 static int
969 nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
970     enum ipfw_sets_cmd cmd)
971 {
972 
973 	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME,
974 	    set, new_set, cmd));
975 }
976 
977 static struct opcode_obj_rewrite opcodes[] = {
978 	{
979 		.opcode	= O_EXTERNAL_INSTANCE,
980 		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
981 		.classifier = nptv6_classify,
982 		.update = nptv6_update_arg1,
983 		.find_byname = nptv6_findbyname,
984 		.find_bykidx = nptv6_findbykidx,
985 		.manage_sets = nptv6_manage_sets,
986 	},
987 };
988 
989 static int
990 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
991     void *arg)
992 {
993 	struct nptv6_cfg *cfg;
994 	struct ip_fw_chain *ch;
995 
996 	ch = (struct ip_fw_chain *)arg;
997 	IPFW_UH_WLOCK_ASSERT(ch);
998 
999 	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
1000 	SRV_OBJECT(ch, no->kidx) = NULL;
1001 	ipfw_objhash_del(ni, &cfg->no);
1002 	ipfw_objhash_free_idx(ni, cfg->no.kidx);
1003 	nptv6_free_config(cfg);
1004 	return (0);
1005 }
1006 
1007 int
1008 nptv6_init(struct ip_fw_chain *ch, int first)
1009 {
1010 
1011 	V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6");
1012 	if (V_nptv6_eid == 0)
1013 		return (ENXIO);
1014 	IPFW_ADD_SOPT_HANDLER(first, scodes);
1015 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
1016 	return (0);
1017 }
1018 
1019 void
1020 nptv6_uninit(struct ip_fw_chain *ch, int last)
1021 {
1022 
1023 	if (last && nptv6_ifaddr_event != NULL)
1024 		EVENTHANDLER_DEREGISTER(ifaddr_event_ext, nptv6_ifaddr_event);
1025 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
1026 	IPFW_DEL_SOPT_HANDLER(last, scodes);
1027 	ipfw_del_eaction(ch, V_nptv6_eid);
1028 	/*
1029 	 * Since we already have deregistered external action,
1030 	 * our named objects become unaccessible via rules, because
1031 	 * all rules were truncated by ipfw_del_eaction().
1032 	 * So, we can unlink and destroy our named objects without holding
1033 	 * IPFW_WLOCK().
1034 	 */
1035 	IPFW_UH_WLOCK(ch);
1036 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
1037 	    IPFW_TLV_NPTV6_NAME);
1038 	V_nptv6_eid = 0;
1039 	IPFW_UH_WUNLOCK(ch);
1040 }
1041