xref: /freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c (revision 47dd1d1b619cc035b82b49a91a25544309ff95ae)
1 /*-
2  * Copyright (c) 2015-2018 Yandex LLC
3  * Copyright (c) 2015-2018 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "opt_ipfw.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/queue.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/if_pflog.h>
49 #include <net/pfil.h>
50 #include <net/netisr.h>
51 #include <net/route.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip_var.h>
57 #include <netinet/ip_fw.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <netinet/ip_icmp.h>
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
63 #include <netinet6/in6_var.h>
64 #include <netinet6/in6_fib.h>
65 #include <netinet6/ip6_var.h>
66 
67 #include <netpfil/pf/pf.h>
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <machine/in_cksum.h>
70 
71 #include "ip_fw_nat64.h"
72 #include "nat64_translate.h"
73 
74 static void
75 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
76 {
77 
78 	logdata->dir = PF_OUT;
79 	logdata->af = family;
80 	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
81 }
82 
83 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
84 static NAT64NOINLINE int nat64_find_route4(struct nhop4_basic *,
85     struct sockaddr_in *, struct mbuf *);
86 static NAT64NOINLINE int nat64_find_route6(struct nhop6_basic *,
87     struct sockaddr_in6 *, struct mbuf *);
88 
89 static NAT64NOINLINE int
90 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
91     struct nat64_counters *stats, void *logdata)
92 {
93 	int error;
94 
95 	if (logdata != NULL)
96 		nat64_log(logdata, m, dst->sa_family);
97 	error = (*ifp->if_output)(ifp, m, dst, NULL);
98 	if (error != 0)
99 		NAT64STAT_INC(stats, oerrors);
100 	return (error);
101 }
102 
103 static NAT64NOINLINE int
104 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
105 {
106 	struct nhop6_basic nh6;
107 	struct nhop4_basic nh4;
108 	struct sockaddr_in6 dst6;
109 	struct sockaddr_in dst4;
110 	struct sockaddr *dst;
111 	struct ip6_hdr *ip6;
112 	struct ip *ip4;
113 	struct ifnet *ifp;
114 	int error;
115 
116 	ip4 = mtod(m, struct ip *);
117 	switch (ip4->ip_v) {
118 	case IPVERSION:
119 		dst4.sin_addr = ip4->ip_dst;
120 		error = nat64_find_route4(&nh4, &dst4, m);
121 		if (error != 0)
122 			NAT64STAT_INC(stats, noroute4);
123 		else {
124 			ifp = nh4.nh_ifp;
125 			dst = (struct sockaddr *)&dst4;
126 		}
127 		break;
128 	case (IPV6_VERSION >> 4):
129 		ip6 = mtod(m, struct ip6_hdr *);
130 		dst6.sin6_addr = ip6->ip6_dst;
131 		error = nat64_find_route6(&nh6, &dst6, m);
132 		if (error != 0)
133 			NAT64STAT_INC(stats, noroute6);
134 		else {
135 			ifp = nh6.nh_ifp;
136 			dst = (struct sockaddr *)&dst6;
137 		}
138 		break;
139 	default:
140 		m_freem(m);
141 		NAT64STAT_INC(stats, dropped);
142 		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
143 		return (EAFNOSUPPORT);
144 	}
145 	if (error != 0) {
146 		m_freem(m);
147 		return (EHOSTUNREACH);
148 	}
149 	if (logdata != NULL)
150 		nat64_log(logdata, m, dst->sa_family);
151 	error = (*ifp->if_output)(ifp, m, dst, NULL);
152 	if (error != 0)
153 		NAT64STAT_INC(stats, oerrors);
154 	return (error);
155 }
156 #else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
157 static NAT64NOINLINE int
158 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
159     struct nat64_counters *stats, void *logdata)
160 {
161 	struct ip *ip4;
162 	int ret, af;
163 
164 	ip4 = mtod(m, struct ip *);
165 	switch (ip4->ip_v) {
166 	case IPVERSION:
167 		af = AF_INET;
168 		ret = NETISR_IP;
169 		break;
170 	case (IPV6_VERSION >> 4):
171 		af = AF_INET6;
172 		ret = NETISR_IPV6;
173 		break;
174 	default:
175 		m_freem(m);
176 		NAT64STAT_INC(stats, dropped);
177 		DPRINTF(DP_DROPS, "unknown IP version");
178 		return (EAFNOSUPPORT);
179 	}
180 	if (logdata != NULL)
181 		nat64_log(logdata, m, af);
182 	ret = netisr_queue(ret, m);
183 	if (ret != 0)
184 		NAT64STAT_INC(stats, oerrors);
185 	return (ret);
186 }
187 
188 static NAT64NOINLINE int
189 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
190 {
191 
192 	return (nat64_output(NULL, m, NULL, stats, logdata));
193 }
194 #endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
195 
196 /*
197  * Check the given IPv6 prefix and length according to RFC6052:
198  *   The prefixes can only have one of the following lengths:
199  *   32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).
200  * Returns zero on success, otherwise EINVAL.
201  */
202 int
203 nat64_check_prefix6(const struct in6_addr *prefix, int length)
204 {
205 
206 	switch (length) {
207 	case 32:
208 	case 40:
209 	case 48:
210 	case 56:
211 	case 64:
212 		/* Well-known prefix has 96 prefix length */
213 		if (IN6_IS_ADDR_WKPFX(prefix))
214 			return (EINVAL);
215 		/* FALLTHROUGH */
216 	case 96:
217 		/* Bits 64 to 71 must be set to zero */
218 		if (prefix->__u6_addr.__u6_addr8[8] != 0)
219 			return (EINVAL);
220 		/* Some extra checks */
221 		if (IN6_IS_ADDR_MULTICAST(prefix) ||
222 		    IN6_IS_ADDR_UNSPECIFIED(prefix) ||
223 		    IN6_IS_ADDR_LOOPBACK(prefix))
224 			return (EINVAL);
225 		return (0);
226 	}
227 	return (EINVAL);
228 }
229 
230 int
231 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)
232 {
233 
234 	if (V_nat64_allow_private)
235 		return (0);
236 
237 	/* WKPFX must not be used to represent non-global IPv4 addresses */
238 	if (cfg->flags & NAT64_WKPFX) {
239 		/* IN_PRIVATE */
240 		if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
241 		    (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
242 		    (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
243 			return (1);
244 		/*
245 		 * RFC 5735:
246 		 *  192.0.0.0/24 - reserved for IETF protocol assignments
247 		 *  192.88.99.0/24 - for use as 6to4 relay anycast addresses
248 		 *  198.18.0.0/15 - for use in benchmark tests
249 		 *  192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
250 		 *   in documentation and example code
251 		 */
252 		if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
253 		    (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
254 		    (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
255 		    (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
256 		    (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
257 		    (ia & htonl(0xffffff00)) == htonl(0xcb007100))
258 			return (1);
259 	}
260 	return (0);
261 }
262 
263 void
264 nat64_embed_ip4(const struct nat64_config *cfg, in_addr_t ia,
265     struct in6_addr *ip6)
266 {
267 
268 	/* assume the prefix6 is properly filled with zeros */
269 	bcopy(&cfg->prefix6, ip6, sizeof(*ip6));
270 	switch (cfg->plen6) {
271 	case 32:
272 	case 96:
273 		ip6->s6_addr32[cfg->plen6 / 32] = ia;
274 		break;
275 	case 40:
276 	case 48:
277 	case 56:
278 #if BYTE_ORDER == BIG_ENDIAN
279 		ip6->s6_addr32[1] = cfg->prefix6.s6_addr32[1] |
280 		    (ia >> (cfg->plen6 % 32));
281 		ip6->s6_addr32[2] = ia << (24 - cfg->plen6 % 32);
282 #elif BYTE_ORDER == LITTLE_ENDIAN
283 		ip6->s6_addr32[1] = cfg->prefix6.s6_addr32[1] |
284 		    (ia << (cfg->plen6 % 32));
285 		ip6->s6_addr32[2] = ia >> (24 - cfg->plen6 % 32);
286 #endif
287 		break;
288 	case 64:
289 #if BYTE_ORDER == BIG_ENDIAN
290 		ip6->s6_addr32[2] = ia >> 8;
291 		ip6->s6_addr32[3] = ia << 24;
292 #elif BYTE_ORDER == LITTLE_ENDIAN
293 		ip6->s6_addr32[2] = ia << 8;
294 		ip6->s6_addr32[3] = ia >> 24;
295 #endif
296 		break;
297 	default:
298 		panic("Wrong plen6");
299 	};
300 	ip6->s6_addr8[8] = 0;
301 }
302 
303 in_addr_t
304 nat64_extract_ip4(const struct nat64_config *cfg, const struct in6_addr *ip6)
305 {
306 	in_addr_t ia;
307 
308 	/*
309 	 * According to RFC 6052 p2.2:
310 	 * IPv4-embedded IPv6 addresses are composed of a variable-length
311 	 * prefix, the embedded IPv4 address, and a variable length suffix.
312 	 * The suffix bits are reserved for future extensions and SHOULD
313 	 * be set to zero.
314 	 */
315 	switch (cfg->plen6) {
316 	case 32:
317 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
318 			goto badip6;
319 		break;
320 	case 40:
321 		if (ip6->s6_addr32[3] != 0 ||
322 		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
323 			goto badip6;
324 		break;
325 	case 48:
326 		if (ip6->s6_addr32[3] != 0 ||
327 		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
328 			goto badip6;
329 		break;
330 	case 56:
331 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
332 			goto badip6;
333 		break;
334 	case 64:
335 		if (ip6->s6_addr8[8] != 0 ||
336 		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
337 			goto badip6;
338 	};
339 	switch (cfg->plen6) {
340 	case 32:
341 	case 96:
342 		ia = ip6->s6_addr32[cfg->plen6 / 32];
343 		break;
344 	case 40:
345 	case 48:
346 	case 56:
347 #if BYTE_ORDER == BIG_ENDIAN
348 		ia = (ip6->s6_addr32[1] << (cfg->plen6 % 32)) |
349 		    (ip6->s6_addr32[2] >> (24 - cfg->plen6 % 32));
350 #elif BYTE_ORDER == LITTLE_ENDIAN
351 		ia = (ip6->s6_addr32[1] >> (cfg->plen6 % 32)) |
352 		    (ip6->s6_addr32[2] << (24 - cfg->plen6 % 32));
353 #endif
354 		break;
355 	case 64:
356 #if BYTE_ORDER == BIG_ENDIAN
357 		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
358 #elif BYTE_ORDER == LITTLE_ENDIAN
359 		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
360 #endif
361 		break;
362 	default:
363 		return (0);
364 	};
365 	if (nat64_check_ip4(ia) != 0 ||
366 	    nat64_check_private_ip4(cfg, ia) != 0)
367 		goto badip4;
368 
369 	return (ia);
370 badip4:
371 	DPRINTF(DP_GENERIC | DP_DROPS,
372 	    "invalid destination address: %08x", ia);
373 	return (0);
374 badip6:
375 	DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");
376 	return (0);
377 }
378 
379 /*
380  * According to RFC 1624 the equation for incremental checksum update is:
381  *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
382  *	HC' = HC - ~m - m'	--	[Eqn. 4]
383  * So, when we are replacing IPv4 addresses to IPv6, we
384  * can assume, that new bytes previously were zeros, and vise versa -
385  * when we replacing IPv6 addresses to IPv4, now unused bytes become
386  * zeros. The payload length in pseudo header has bigger size, but one
387  * half of it should be zero. Using the equation 4 we get:
388  *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
389  *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
390  *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
391  *	  = HC - sum(~m[i] + m'[i])
392  *
393  * The function result should be used as follows:
394  *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
395  *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
396  */
397 static NAT64NOINLINE uint16_t
398 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
399 {
400 	uint32_t sum;
401 	uint16_t *p;
402 
403 	sum = ~ip->ip_src.s_addr >> 16;
404 	sum += ~ip->ip_src.s_addr & 0xffff;
405 	sum += ~ip->ip_dst.s_addr >> 16;
406 	sum += ~ip->ip_dst.s_addr & 0xffff;
407 
408 	for (p = (uint16_t *)&ip6->ip6_src;
409 	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
410 		sum += *p;
411 
412 	while (sum >> 16)
413 		sum = (sum & 0xffff) + (sum >> 16);
414 	return (sum);
415 }
416 
417 static NAT64NOINLINE void
418 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
419     uint16_t plen, uint8_t proto, struct ip *ip)
420 {
421 
422 	/* assume addresses are already initialized */
423 	ip->ip_v = IPVERSION;
424 	ip->ip_hl = sizeof(*ip) >> 2;
425 	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
426 	ip->ip_len = htons(sizeof(*ip) + plen);
427 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
428 	ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
429 #else
430 	/* Forwarding code will decrement TTL. */
431 	ip->ip_ttl = ip6->ip6_hlim;
432 #endif
433 	ip->ip_sum = 0;
434 	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
435 	ip_fillid(ip);
436 	if (frag != NULL) {
437 		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
438 		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
439 			ip->ip_off |= htons(IP_MF);
440 	} else {
441 		ip->ip_off = htons(IP_DF);
442 	}
443 	ip->ip_sum = in_cksum_hdr(ip);
444 }
445 
446 #define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
447 static NAT64NOINLINE int
448 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,
449     struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,
450     uint16_t ip_off)
451 {
452 	struct ip6_frag ip6f;
453 	struct mbuf *n;
454 	uint16_t hlen, len, offset;
455 	int plen;
456 
457 	plen = ntohs(ip6->ip6_plen);
458 	hlen = sizeof(struct ip6_hdr);
459 
460 	/* Fragmentation isn't needed */
461 	if (ip_off == 0 && plen <= mtu - hlen) {
462 		M_PREPEND(m, hlen, M_NOWAIT);
463 		if (m == NULL) {
464 			NAT64STAT_INC(stats, nomem);
465 			return (ENOMEM);
466 		}
467 		bcopy(ip6, mtod(m, void *), hlen);
468 		if (mbufq_enqueue(mq, m) != 0) {
469 			m_freem(m);
470 			NAT64STAT_INC(stats, dropped);
471 			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
472 			return (ENOBUFS);
473 		}
474 		return (0);
475 	}
476 
477 	hlen += sizeof(struct ip6_frag);
478 	ip6f.ip6f_reserved = 0;
479 	ip6f.ip6f_nxt = ip6->ip6_nxt;
480 	ip6->ip6_nxt = IPPROTO_FRAGMENT;
481 	if (ip_off != 0) {
482 		/*
483 		 * We have got an IPv4 fragment.
484 		 * Use offset value and ip_id from original fragment.
485 		 */
486 		ip6f.ip6f_ident = htonl(ntohs(ip_id));
487 		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
488 		NAT64STAT_INC(stats, ifrags);
489 	} else {
490 		/* The packet size exceeds interface MTU */
491 		ip6f.ip6f_ident = htonl(ip6_randomid());
492 		offset = 0; /* First fragment*/
493 	}
494 	while (plen > 0 && m != NULL) {
495 		n = NULL;
496 		len = FRAGSZ(mtu) & ~7;
497 		if (len > plen)
498 			len = plen;
499 		ip6->ip6_plen = htons(len + sizeof(ip6f));
500 		ip6f.ip6f_offlg = ntohs(offset);
501 		if (len < plen || (ip_off & htons(IP_MF)) != 0)
502 			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
503 		offset += len;
504 		plen -= len;
505 		if (plen > 0) {
506 			n = m_split(m, len, M_NOWAIT);
507 			if (n == NULL)
508 				goto fail;
509 		}
510 		M_PREPEND(m, hlen, M_NOWAIT);
511 		if (m == NULL)
512 			goto fail;
513 		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
514 		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
515 		    sizeof(struct ip6_frag));
516 		if (mbufq_enqueue(mq, m) != 0)
517 			goto fail;
518 		m = n;
519 	}
520 	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
521 	return (0);
522 fail:
523 	if (m != NULL)
524 		m_freem(m);
525 	if (n != NULL)
526 		m_freem(n);
527 	mbufq_drain(mq);
528 	NAT64STAT_INC(stats, nomem);
529 	return (ENOMEM);
530 }
531 
532 static NAT64NOINLINE int
533 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst,
534     struct mbuf *m)
535 {
536 
537 	if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0,
538 	    pnh) != 0)
539 		return (EHOSTUNREACH);
540 	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
541 		return (EHOSTUNREACH);
542 	/*
543 	 * XXX: we need to use destination address with embedded scope
544 	 * zone id, because LLTABLE uses such form of addresses for lookup.
545 	 */
546 	dst->sin6_family = AF_INET6;
547 	dst->sin6_len = sizeof(*dst);
548 	dst->sin6_addr = pnh->nh_addr;
549 	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
550 		dst->sin6_addr.s6_addr16[1] =
551 		    htons(pnh->nh_ifp->if_index & 0xffff);
552 	dst->sin6_port = 0;
553 	dst->sin6_scope_id = 0;
554 	dst->sin6_flowinfo = 0;
555 
556 	return (0);
557 }
558 
559 #define	NAT64_ICMP6_PLEN	64
560 static NAT64NOINLINE void
561 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
562     struct nat64_counters *stats, void *logdata)
563 {
564 	struct icmp6_hdr *icmp6;
565 	struct ip6_hdr *ip6, *oip6;
566 	struct mbuf *n;
567 	int len, plen;
568 
569 	len = 0;
570 	plen = nat64_getlasthdr(m, &len);
571 	if (plen < 0) {
572 		DPRINTF(DP_DROPS, "mbuf isn't contigious");
573 		goto freeit;
574 	}
575 	/*
576 	 * Do not send ICMPv6 in reply to ICMPv6 errors.
577 	 */
578 	if (plen == IPPROTO_ICMPV6) {
579 		if (m->m_len < len + sizeof(*icmp6)) {
580 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
581 			goto freeit;
582 		}
583 		icmp6 = mtodo(m, len);
584 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
585 		    icmp6->icmp6_type == ND_REDIRECT) {
586 			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
587 			    "ICMPv6 errors");
588 			goto freeit;
589 		}
590 	}
591 	/*
592 	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
593 		goto freeit;
594 		*/
595 	ip6 = mtod(m, struct ip6_hdr *);
596 	switch (type) {
597 	case ICMP6_DST_UNREACH:
598 	case ICMP6_PACKET_TOO_BIG:
599 	case ICMP6_TIME_EXCEEDED:
600 	case ICMP6_PARAM_PROB:
601 		break;
602 	default:
603 		goto freeit;
604 	}
605 	/* Calculate length of ICMPv6 payload */
606 	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
607 	    m->m_pkthdr.len;
608 
609 	/* Create new ICMPv6 datagram */
610 	plen = len + sizeof(struct icmp6_hdr);
611 	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
612 	    MT_HEADER, M_PKTHDR);
613 	if (n == NULL) {
614 		NAT64STAT_INC(stats, nomem);
615 		m_freem(m);
616 		return;
617 	}
618 	/*
619 	 * Move pkthdr from original mbuf. We should have initialized some
620 	 * fields, because we can reinject this mbuf to netisr and it will
621 	 * go trough input path (it requires at least rcvif should be set).
622 	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
623 	 * in the chain, when we will do M_PREPEND() or make some type of
624 	 * tunneling.
625 	 */
626 	m_move_pkthdr(n, m);
627 	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
628 
629 	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
630 	oip6 = mtod(n, struct ip6_hdr *);
631 	oip6->ip6_src = ip6->ip6_dst;
632 	oip6->ip6_dst = ip6->ip6_src;
633 	oip6->ip6_nxt = IPPROTO_ICMPV6;
634 	oip6->ip6_flow = 0;
635 	oip6->ip6_vfc |= IPV6_VERSION;
636 	oip6->ip6_hlim = V_ip6_defhlim;
637 	oip6->ip6_plen = htons(plen);
638 
639 	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
640 	icmp6->icmp6_cksum = 0;
641 	icmp6->icmp6_type = type;
642 	icmp6->icmp6_code = code;
643 	icmp6->icmp6_mtu = htonl(mtu);
644 
645 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
646 	    sizeof(struct icmp6_hdr)));
647 	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
648 	    sizeof(struct ip6_hdr), plen);
649 	m_freem(m);
650 	nat64_output_one(n, stats, logdata);
651 	return;
652 freeit:
653 	NAT64STAT_INC(stats, dropped);
654 	m_freem(m);
655 }
656 
657 static NAT64NOINLINE int
658 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst,
659     struct mbuf *m)
660 {
661 
662 	if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0)
663 		return (EHOSTUNREACH);
664 	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
665 		return (EHOSTUNREACH);
666 
667 	dst->sin_family = AF_INET;
668 	dst->sin_len = sizeof(*dst);
669 	dst->sin_addr = pnh->nh_addr;
670 	dst->sin_port = 0;
671 	return (0);
672 }
673 
674 #define	NAT64_ICMP_PLEN	64
675 static NAT64NOINLINE void
676 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
677     uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)
678 {
679 	struct icmp *icmp;
680 	struct ip *ip, *oip;
681 	struct mbuf *n;
682 	int len, plen;
683 
684 	ip = mtod(m, struct ip *);
685 	/* Do not send ICMP error if packet is not the first fragment */
686 	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
687 		DPRINTF(DP_DROPS, "not first fragment");
688 		goto freeit;
689 	}
690 	/* Do not send ICMP in reply to ICMP errors */
691 	if (ip->ip_p == IPPROTO_ICMP) {
692 		if (m->m_len < (ip->ip_hl << 2)) {
693 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
694 			goto freeit;
695 		}
696 		icmp = mtodo(m, ip->ip_hl << 2);
697 		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
698 			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
699 			    "ICMP errors");
700 			goto freeit;
701 		}
702 	}
703 	switch (type) {
704 	case ICMP_UNREACH:
705 	case ICMP_TIMXCEED:
706 	case ICMP_PARAMPROB:
707 		break;
708 	default:
709 		goto freeit;
710 	}
711 	/* Calculate length of ICMP payload */
712 	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
713 	    m->m_pkthdr.len;
714 
715 	/* Create new ICMPv4 datagram */
716 	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
717 	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
718 	    MT_HEADER, M_PKTHDR);
719 	if (n == NULL) {
720 		NAT64STAT_INC(stats, nomem);
721 		m_freem(m);
722 		return;
723 	}
724 	m_move_pkthdr(n, m);
725 	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
726 
727 	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
728 	oip = mtod(n, struct ip *);
729 	oip->ip_v = IPVERSION;
730 	oip->ip_hl = sizeof(struct ip) >> 2;
731 	oip->ip_tos = 0;
732 	oip->ip_len = htons(n->m_pkthdr.len);
733 	oip->ip_ttl = V_ip_defttl;
734 	oip->ip_p = IPPROTO_ICMP;
735 	ip_fillid(oip);
736 	oip->ip_off = htons(IP_DF);
737 	oip->ip_src = ip->ip_dst;
738 	oip->ip_dst = ip->ip_src;
739 	oip->ip_sum = 0;
740 	oip->ip_sum = in_cksum_hdr(oip);
741 
742 	icmp = mtodo(n, sizeof(struct ip));
743 	icmp->icmp_type = type;
744 	icmp->icmp_code = code;
745 	icmp->icmp_cksum = 0;
746 	icmp->icmp_pmvoid = 0;
747 	icmp->icmp_nextmtu = htons(mtu);
748 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
749 	    sizeof(struct icmphdr) + sizeof(uint32_t)));
750 	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
751 	    sizeof(struct ip));
752 	m_freem(m);
753 	nat64_output_one(n, stats, logdata);
754 	return;
755 freeit:
756 	NAT64STAT_INC(stats, dropped);
757 	m_freem(m);
758 }
759 
760 /* Translate ICMP echo request/reply into ICMPv6 */
761 static void
762 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
763     uint16_t id, uint8_t type)
764 {
765 	uint16_t old;
766 
767 	old = *(uint16_t *)icmp6;	/* save type+code in one word */
768 	icmp6->icmp6_type = type;
769 	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
770 	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
771 	    old, *(uint16_t *)icmp6);
772 	if (id != 0) {
773 		old = icmp6->icmp6_id;
774 		icmp6->icmp6_id = id;
775 		/* Reflect ICMP id translation in the cksum */
776 		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
777 		    old, id);
778 	}
779 	/* Reflect IPv6 pseudo header in the cksum */
780 	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
781 	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
782 }
783 
784 static NAT64NOINLINE struct mbuf *
785 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
786     int offset, struct nat64_config *cfg)
787 {
788 	struct ip ip;
789 	struct icmp *icmp;
790 	struct tcphdr *tcp;
791 	struct udphdr *udp;
792 	struct ip6_hdr *eip6;
793 	struct mbuf *n;
794 	uint32_t mtu;
795 	int len, hlen, plen;
796 	uint8_t type, code;
797 
798 	if (m->m_len < offset + ICMP_MINLEN)
799 		m = m_pullup(m, offset + ICMP_MINLEN);
800 	if (m == NULL) {
801 		NAT64STAT_INC(&cfg->stats, nomem);
802 		return (m);
803 	}
804 	mtu = 0;
805 	icmp = mtodo(m, offset);
806 	/* RFC 7915 p4.2 */
807 	switch (icmp->icmp_type) {
808 	case ICMP_ECHOREPLY:
809 		type = ICMP6_ECHO_REPLY;
810 		code = 0;
811 		break;
812 	case ICMP_UNREACH:
813 		type = ICMP6_DST_UNREACH;
814 		switch (icmp->icmp_code) {
815 		case ICMP_UNREACH_NET:
816 		case ICMP_UNREACH_HOST:
817 		case ICMP_UNREACH_SRCFAIL:
818 		case ICMP_UNREACH_NET_UNKNOWN:
819 		case ICMP_UNREACH_HOST_UNKNOWN:
820 		case ICMP_UNREACH_TOSNET:
821 		case ICMP_UNREACH_TOSHOST:
822 			code = ICMP6_DST_UNREACH_NOROUTE;
823 			break;
824 		case ICMP_UNREACH_PROTOCOL:
825 			type = ICMP6_PARAM_PROB;
826 			code = ICMP6_PARAMPROB_NEXTHEADER;
827 			break;
828 		case ICMP_UNREACH_PORT:
829 			code = ICMP6_DST_UNREACH_NOPORT;
830 			break;
831 		case ICMP_UNREACH_NEEDFRAG:
832 			type = ICMP6_PACKET_TOO_BIG;
833 			code = 0;
834 			/* XXX: needs an additional look */
835 			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
836 			break;
837 		case ICMP_UNREACH_NET_PROHIB:
838 		case ICMP_UNREACH_HOST_PROHIB:
839 		case ICMP_UNREACH_FILTER_PROHIB:
840 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
841 			code = ICMP6_DST_UNREACH_ADMIN;
842 			break;
843 		default:
844 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
845 			    icmp->icmp_type, icmp->icmp_code);
846 			goto freeit;
847 		}
848 		break;
849 	case ICMP_TIMXCEED:
850 		type = ICMP6_TIME_EXCEEDED;
851 		code = icmp->icmp_code;
852 		break;
853 	case ICMP_ECHO:
854 		type = ICMP6_ECHO_REQUEST;
855 		code = 0;
856 		break;
857 	case ICMP_PARAMPROB:
858 		type = ICMP6_PARAM_PROB;
859 		switch (icmp->icmp_code) {
860 		case ICMP_PARAMPROB_ERRATPTR:
861 		case ICMP_PARAMPROB_LENGTH:
862 			code = ICMP6_PARAMPROB_HEADER;
863 			switch (icmp->icmp_pptr) {
864 			case 0: /* Version/IHL */
865 			case 1: /* Type Of Service */
866 				mtu = icmp->icmp_pptr;
867 				break;
868 			case 2: /* Total Length */
869 			case 3: mtu = 4; /* Payload Length */
870 				break;
871 			case 8: /* Time to Live */
872 				mtu = 7; /* Hop Limit */
873 				break;
874 			case 9: /* Protocol */
875 				mtu = 6; /* Next Header */
876 				break;
877 			case 12: /* Source address */
878 			case 13:
879 			case 14:
880 			case 15:
881 				mtu = 8;
882 				break;
883 			case 16: /* Destination address */
884 			case 17:
885 			case 18:
886 			case 19:
887 				mtu = 24;
888 				break;
889 			default: /* Silently drop */
890 				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
891 				    " code %d, pptr %d", icmp->icmp_type,
892 				    icmp->icmp_code, icmp->icmp_pptr);
893 				goto freeit;
894 			}
895 			break;
896 		default:
897 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
898 			    " code %d, pptr %d", icmp->icmp_type,
899 			    icmp->icmp_code, icmp->icmp_pptr);
900 			goto freeit;
901 		}
902 		break;
903 	default:
904 		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
905 		    icmp->icmp_type, icmp->icmp_code);
906 		goto freeit;
907 	}
908 	/*
909 	 * For echo request/reply we can use original payload,
910 	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
911 	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
912 	 */
913 	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
914 		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
915 		return (m);
916 	}
917 	/*
918 	 * For other types of ICMP messages we need to translate inner
919 	 * IPv4 header to IPv6 header.
920 	 * Assume ICMP src is the same as payload dst
921 	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
922 	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
923 	 * In that case, we already have map for NATIP1 and GWsrc1.
924 	 * The only thing we need is to copy IPv6 map prefix to
925 	 * Hostdst1.
926 	 */
927 	hlen = offset + ICMP_MINLEN;
928 	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
929 		DPRINTF(DP_DROPS, "Message is too short %d",
930 		    m->m_pkthdr.len);
931 		goto freeit;
932 	}
933 	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
934 	if (ip.ip_v != IPVERSION) {
935 		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
936 		goto freeit;
937 	}
938 	hlen += ip.ip_hl << 2; /* Skip inner IP header */
939 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
940 	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
941 	    nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||
942 	    nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {
943 		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
944 		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
945 		goto freeit;
946 	}
947 	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
948 		DPRINTF(DP_DROPS, "Message is too short %d",
949 		    m->m_pkthdr.len);
950 		goto freeit;
951 	}
952 #if 0
953 	/*
954 	 * Check that inner source matches the outer destination.
955 	 * XXX: We need some method to convert IPv4 into IPv6 address here,
956 	 *	and compare IPv6 addresses.
957 	 */
958 	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
959 		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
960 		    "%04x vs %04x", ip.ip_src.s_addr,
961 		    nat64_get_ip4(&ip6->ip6_dst));
962 		goto freeit;
963 	}
964 #endif
965 	/*
966 	 * Create new mbuf for ICMPv6 datagram.
967 	 * NOTE: len is data length just after inner IP header.
968 	 */
969 	len = m->m_pkthdr.len - hlen;
970 	if (sizeof(struct ip6_hdr) +
971 	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
972 		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
973 		    sizeof(struct ip6_hdr);
974 	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
975 	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
976 	if (n == NULL) {
977 		NAT64STAT_INC(&cfg->stats, nomem);
978 		m_freem(m);
979 		return (NULL);
980 	}
981 	m_move_pkthdr(n, m);
982 	M_ALIGN(n, offset + plen + max_hdr);
983 	n->m_len = n->m_pkthdr.len = offset + plen;
984 	/* Adjust ip6_plen in outer header */
985 	ip6->ip6_plen = htons(plen);
986 	/* Construct new inner IPv6 header */
987 	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
988 	eip6->ip6_src = ip6->ip6_dst;
989 	/* Use the fact that we have single /96 prefix for IPv4 map */
990 	eip6->ip6_dst = ip6->ip6_src;
991 	nat64_embed_ip4(cfg, ip.ip_dst.s_addr, &eip6->ip6_dst);
992 
993 	eip6->ip6_flow = htonl(ip.ip_tos << 20);
994 	eip6->ip6_vfc |= IPV6_VERSION;
995 	eip6->ip6_hlim = ip.ip_ttl;
996 	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
997 	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
998 	m_copydata(m, hlen, len, (char *)(eip6 + 1));
999 	/*
1000 	 * We need to translate source port in the inner ULP header,
1001 	 * and adjust ULP checksum.
1002 	 */
1003 	switch (ip.ip_p) {
1004 	case IPPROTO_TCP:
1005 		if (len < offsetof(struct tcphdr, th_sum))
1006 			break;
1007 		tcp = TCP(eip6 + 1);
1008 		if (icmpid != 0) {
1009 			tcp->th_sum = cksum_adjust(tcp->th_sum,
1010 			    tcp->th_sport, icmpid);
1011 			tcp->th_sport = icmpid;
1012 		}
1013 		tcp->th_sum = cksum_add(tcp->th_sum,
1014 		    ~nat64_cksum_convert(eip6, &ip));
1015 		break;
1016 	case IPPROTO_UDP:
1017 		if (len < offsetof(struct udphdr, uh_sum))
1018 			break;
1019 		udp = UDP(eip6 + 1);
1020 		if (icmpid != 0) {
1021 			udp->uh_sum = cksum_adjust(udp->uh_sum,
1022 			    udp->uh_sport, icmpid);
1023 			udp->uh_sport = icmpid;
1024 		}
1025 		udp->uh_sum = cksum_add(udp->uh_sum,
1026 		    ~nat64_cksum_convert(eip6, &ip));
1027 		break;
1028 	case IPPROTO_ICMP:
1029 		/*
1030 		 * Check if this is an ICMP error message for echo request
1031 		 * that we sent. I.e. ULP in the data containing invoking
1032 		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
1033 		 */
1034 		icmp = (struct icmp *)(eip6 + 1);
1035 		if (icmp->icmp_type != ICMP_ECHO) {
1036 			m_freem(n);
1037 			goto freeit;
1038 		}
1039 		/*
1040 		 * For our client this original datagram should looks
1041 		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
1042 		 * Thus we need adjust icmp_cksum and convert type from
1043 		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
1044 		 */
1045 		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
1046 		    ICMP6_ECHO_REQUEST);
1047 	}
1048 	m_freem(m);
1049 	/* Convert ICMPv4 into ICMPv6 header */
1050 	icmp = mtodo(n, offset);
1051 	ICMP6(icmp)->icmp6_type = type;
1052 	ICMP6(icmp)->icmp6_code = code;
1053 	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1054 	ICMP6(icmp)->icmp6_cksum = 0;
1055 	ICMP6(icmp)->icmp6_cksum = cksum_add(
1056 	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1057 	    in_cksum_skip(n, n->m_pkthdr.len, offset));
1058 	return (n);
1059 freeit:
1060 	m_freem(m);
1061 	NAT64STAT_INC(&cfg->stats, dropped);
1062 	return (NULL);
1063 }
1064 
1065 int
1066 nat64_getlasthdr(struct mbuf *m, int *offset)
1067 {
1068 	struct ip6_hdr *ip6;
1069 	struct ip6_hbh *hbh;
1070 	int proto, hlen;
1071 
1072 	if (offset != NULL)
1073 		hlen = *offset;
1074 	else
1075 		hlen = 0;
1076 
1077 	if (m->m_len < hlen + sizeof(*ip6))
1078 		return (-1);
1079 
1080 	ip6 = mtodo(m, hlen);
1081 	hlen += sizeof(*ip6);
1082 	proto = ip6->ip6_nxt;
1083 	/* Skip extension headers */
1084 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1085 	    proto == IPPROTO_DSTOPTS) {
1086 		hbh = mtodo(m, hlen);
1087 		/*
1088 		 * We expect mbuf has contigious data up to
1089 		 * upper level header.
1090 		 */
1091 		if (m->m_len < hlen)
1092 			return (-1);
1093 		/*
1094 		 * We doesn't support Jumbo payload option,
1095 		 * so return error.
1096 		 */
1097 		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1098 			return (-1);
1099 		proto = hbh->ip6h_nxt;
1100 		hlen += (hbh->ip6h_len + 1) << 3;
1101 	}
1102 	if (offset != NULL)
1103 		*offset = hlen;
1104 	return (proto);
1105 }
1106 
1107 int
1108 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1109     struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,
1110     void *logdata)
1111 {
1112 	struct nhop6_basic nh;
1113 	struct ip6_hdr ip6;
1114 	struct sockaddr_in6 dst;
1115 	struct ip *ip;
1116 	struct mbufq mq;
1117 	uint16_t ip_id, ip_off;
1118 	uint16_t *csum;
1119 	int plen, hlen;
1120 	uint8_t proto;
1121 
1122 	ip = mtod(m, struct ip*);
1123 
1124 	if (ip->ip_ttl <= IPTTLDEC) {
1125 		nat64_icmp_reflect(m, ICMP_TIMXCEED,
1126 		    ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);
1127 		return (NAT64RETURN);
1128 	}
1129 
1130 	ip6.ip6_dst = *daddr;
1131 	ip6.ip6_src = *saddr;
1132 
1133 	hlen = ip->ip_hl << 2;
1134 	plen = ntohs(ip->ip_len) - hlen;
1135 	proto = ip->ip_p;
1136 
1137 	/* Save ip_id and ip_off, both are in network byte order */
1138 	ip_id = ip->ip_id;
1139 	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1140 
1141 	/* Fragment length must be multiple of 8 octets */
1142 	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1143 		nat64_icmp_reflect(m, ICMP_PARAMPROB,
1144 		    ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);
1145 		return (NAT64RETURN);
1146 	}
1147 	/* Fragmented ICMP is unsupported */
1148 	if (proto == IPPROTO_ICMP && ip_off != 0) {
1149 		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1150 		NAT64STAT_INC(&cfg->stats, dropped);
1151 		return (NAT64MFREE);
1152 	}
1153 
1154 	dst.sin6_addr = ip6.ip6_dst;
1155 	if (nat64_find_route6(&nh, &dst, m) != 0) {
1156 		NAT64STAT_INC(&cfg->stats, noroute6);
1157 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1158 		    &cfg->stats, logdata);
1159 		return (NAT64RETURN);
1160 	}
1161 	if (nh.nh_mtu < plen + sizeof(ip6) &&
1162 	    (ip->ip_off & htons(IP_DF)) != 0) {
1163 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1164 		    FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
1165 		return (NAT64RETURN);
1166 	}
1167 
1168 	ip6.ip6_flow = htonl(ip->ip_tos << 20);
1169 	ip6.ip6_vfc |= IPV6_VERSION;
1170 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
1171 	ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
1172 #else
1173 	/* Forwarding code will decrement HLIM. */
1174 	ip6.ip6_hlim = ip->ip_ttl;
1175 #endif
1176 	ip6.ip6_plen = htons(plen);
1177 	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1178 	/* Convert checksums. */
1179 	switch (proto) {
1180 	case IPPROTO_TCP:
1181 		csum = &TCP(mtodo(m, hlen))->th_sum;
1182 		if (lport != 0) {
1183 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1184 			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
1185 			tcp->th_dport = lport;
1186 		}
1187 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1188 		break;
1189 	case IPPROTO_UDP:
1190 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1191 		if (lport != 0) {
1192 			struct udphdr *udp = UDP(mtodo(m, hlen));
1193 			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
1194 			udp->uh_dport = lport;
1195 		}
1196 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1197 		break;
1198 	case IPPROTO_ICMP:
1199 		m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);
1200 		if (m == NULL)	/* stats already accounted */
1201 			return (NAT64RETURN);
1202 	}
1203 
1204 	m_adj(m, hlen);
1205 	mbufq_init(&mq, 255);
1206 	nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off);
1207 	while ((m = mbufq_dequeue(&mq)) != NULL) {
1208 		if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1209 		    &cfg->stats, logdata) != 0)
1210 			break;
1211 		NAT64STAT_INC(&cfg->stats, opcnt46);
1212 	}
1213 	mbufq_drain(&mq);
1214 	return (NAT64RETURN);
1215 }
1216 
1217 int
1218 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1219     struct nat64_config *cfg, void *logdata)
1220 {
1221 	struct ip ip;
1222 	struct icmp6_hdr *icmp6;
1223 	struct ip6_frag *ip6f;
1224 	struct ip6_hdr *ip6, *ip6i;
1225 	uint32_t mtu;
1226 	int plen, proto;
1227 	uint8_t type, code;
1228 
1229 	if (hlen == 0) {
1230 		ip6 = mtod(m, struct ip6_hdr *);
1231 		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1232 		    nat64_check_ip6(&ip6->ip6_dst) != 0)
1233 			return (NAT64SKIP);
1234 
1235 		proto = nat64_getlasthdr(m, &hlen);
1236 		if (proto != IPPROTO_ICMPV6) {
1237 			DPRINTF(DP_DROPS,
1238 			    "dropped due to mbuf isn't contigious");
1239 			NAT64STAT_INC(&cfg->stats, dropped);
1240 			return (NAT64MFREE);
1241 		}
1242 	}
1243 
1244 	/*
1245 	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1246 	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1247 	 */
1248 	icmp6 = mtodo(m, hlen);
1249 	mtu = 0;
1250 	switch (icmp6->icmp6_type) {
1251 	case ICMP6_DST_UNREACH:
1252 		type = ICMP_UNREACH;
1253 		switch (icmp6->icmp6_code) {
1254 		case ICMP6_DST_UNREACH_NOROUTE:
1255 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1256 		case ICMP6_DST_UNREACH_ADDR:
1257 			code = ICMP_UNREACH_HOST;
1258 			break;
1259 		case ICMP6_DST_UNREACH_ADMIN:
1260 			code = ICMP_UNREACH_HOST_PROHIB;
1261 			break;
1262 		case ICMP6_DST_UNREACH_NOPORT:
1263 			code = ICMP_UNREACH_PORT;
1264 			break;
1265 		default:
1266 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1267 			    " code %d", icmp6->icmp6_type,
1268 			    icmp6->icmp6_code);
1269 			NAT64STAT_INC(&cfg->stats, dropped);
1270 			return (NAT64MFREE);
1271 		}
1272 		break;
1273 	case ICMP6_PACKET_TOO_BIG:
1274 		type = ICMP_UNREACH;
1275 		code = ICMP_UNREACH_NEEDFRAG;
1276 		mtu = ntohl(icmp6->icmp6_mtu);
1277 		if (mtu < IPV6_MMTU) {
1278 			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1279 			    " code %d", mtu, icmp6->icmp6_type,
1280 			    icmp6->icmp6_code);
1281 			NAT64STAT_INC(&cfg->stats, dropped);
1282 			return (NAT64MFREE);
1283 		}
1284 		/*
1285 		 * Adjust MTU to reflect difference between
1286 		 * IPv6 an IPv4 headers.
1287 		 */
1288 		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1289 		break;
1290 	case ICMP6_TIME_EXCEEDED:
1291 		type = ICMP_TIMXCEED;
1292 		code = icmp6->icmp6_code;
1293 		break;
1294 	case ICMP6_PARAM_PROB:
1295 		switch (icmp6->icmp6_code) {
1296 		case ICMP6_PARAMPROB_HEADER:
1297 			type = ICMP_PARAMPROB;
1298 			code = ICMP_PARAMPROB_ERRATPTR;
1299 			mtu = ntohl(icmp6->icmp6_pptr);
1300 			switch (mtu) {
1301 			case 0: /* Version/Traffic Class */
1302 			case 1: /* Traffic Class/Flow Label */
1303 				break;
1304 			case 4: /* Payload Length */
1305 			case 5:
1306 				mtu = 2;
1307 				break;
1308 			case 6: /* Next Header */
1309 				mtu = 9;
1310 				break;
1311 			case 7: /* Hop Limit */
1312 				mtu = 8;
1313 				break;
1314 			default:
1315 				if (mtu >= 8 && mtu <= 23) {
1316 					mtu = 12; /* Source address */
1317 					break;
1318 				}
1319 				if (mtu >= 24 && mtu <= 39) {
1320 					mtu = 16; /* Destination address */
1321 					break;
1322 				}
1323 				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1324 				    " code %d, pptr %d", icmp6->icmp6_type,
1325 				    icmp6->icmp6_code, mtu);
1326 				NAT64STAT_INC(&cfg->stats, dropped);
1327 				return (NAT64MFREE);
1328 			}
1329 		case ICMP6_PARAMPROB_NEXTHEADER:
1330 			type = ICMP_UNREACH;
1331 			code = ICMP_UNREACH_PROTOCOL;
1332 			break;
1333 		default:
1334 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1335 			    " code %d, pptr %d", icmp6->icmp6_type,
1336 			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1337 			NAT64STAT_INC(&cfg->stats, dropped);
1338 			return (NAT64MFREE);
1339 		}
1340 		break;
1341 	default:
1342 		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1343 		    icmp6->icmp6_type, icmp6->icmp6_code);
1344 		NAT64STAT_INC(&cfg->stats, dropped);
1345 		return (NAT64MFREE);
1346 	}
1347 
1348 	hlen += sizeof(struct icmp6_hdr);
1349 	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1350 		NAT64STAT_INC(&cfg->stats, dropped);
1351 		DPRINTF(DP_DROPS, "Message is too short %d",
1352 		    m->m_pkthdr.len);
1353 		return (NAT64MFREE);
1354 	}
1355 	/*
1356 	 * We need at least ICMP_MINLEN bytes of original datagram payload
1357 	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1358 	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1359 	 * header we will not have to do m_pullup() again.
1360 	 *
1361 	 * What we have here:
1362 	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1363 	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1364 	 * We need to translate it to:
1365 	 *
1366 	 * Outer header: (alias_host, v4exthost)
1367 	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1368 	 *
1369 	 * Assume caller function has checked if v4mapPRefix+v4host
1370 	 * matches configured prefix.
1371 	 * The only two things we should be provided with are mapping between
1372 	 * IPv6iHost <> alias_host and between dport and alias_port.
1373 	 */
1374 	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1375 		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1376 	if (m == NULL) {
1377 		NAT64STAT_INC(&cfg->stats, nomem);
1378 		return (NAT64RETURN);
1379 	}
1380 	ip6 = mtod(m, struct ip6_hdr *);
1381 	ip6i = mtodo(m, hlen);
1382 	ip6f = NULL;
1383 	proto = ip6i->ip6_nxt;
1384 	plen = ntohs(ip6i->ip6_plen);
1385 	hlen += sizeof(struct ip6_hdr);
1386 	if (proto == IPPROTO_FRAGMENT) {
1387 		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1388 		    ICMP_MINLEN)
1389 			goto fail;
1390 		ip6f = mtodo(m, hlen);
1391 		proto = ip6f->ip6f_nxt;
1392 		plen -= sizeof(struct ip6_frag);
1393 		hlen += sizeof(struct ip6_frag);
1394 		/* Ajust MTU to reflect frag header size */
1395 		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1396 			mtu -= sizeof(struct ip6_frag);
1397 	}
1398 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1399 		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1400 		    proto);
1401 		goto fail;
1402 	}
1403 	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1404 	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1405 		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1406 		goto fail;
1407 	}
1408 	/* Check if outer dst is the same as inner src */
1409 	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1410 		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1411 		goto fail;
1412 	}
1413 
1414 	/* Now we need to make a fake IPv4 packet to generate ICMP message */
1415 	ip.ip_dst.s_addr = aaddr;
1416 	ip.ip_src.s_addr = nat64_extract_ip4(cfg, &ip6i->ip6_src);
1417 	/* XXX: Make fake ulp header */
1418 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
1419 	ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
1420 #endif
1421 	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1422 	m_adj(m, hlen - sizeof(struct ip));
1423 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1424 	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,
1425 	    logdata);
1426 	return (NAT64RETURN);
1427 fail:
1428 	/*
1429 	 * We must call m_freem() because mbuf pointer could be
1430 	 * changed with m_pullup().
1431 	 */
1432 	m_freem(m);
1433 	NAT64STAT_INC(&cfg->stats, dropped);
1434 	return (NAT64RETURN);
1435 }
1436 
1437 int
1438 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1439     struct nat64_config *cfg, void *logdata)
1440 {
1441 	struct ip ip;
1442 	struct nhop4_basic nh;
1443 	struct sockaddr_in dst;
1444 	struct ip6_frag *frag;
1445 	struct ip6_hdr *ip6;
1446 	struct icmp6_hdr *icmp6;
1447 	uint16_t *csum;
1448 	int plen, hlen, proto;
1449 
1450 	/*
1451 	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1452 	 * protocol's headers. Also we skip some checks, that ip6_input(),
1453 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1454 	 */
1455 	ip6 = mtod(m, struct ip6_hdr *);
1456 	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1457 	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
1458 		return (NAT64SKIP);
1459 	}
1460 
1461 	/* Starting from this point we must not return zero */
1462 	ip.ip_src.s_addr = aaddr;
1463 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1464 		DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",
1465 		    ip.ip_src.s_addr);
1466 		NAT64STAT_INC(&cfg->stats, dropped);
1467 		return (NAT64MFREE);
1468 	}
1469 
1470 	ip.ip_dst.s_addr = nat64_extract_ip4(cfg, &ip6->ip6_dst);
1471 	if (ip.ip_dst.s_addr == 0) {
1472 		NAT64STAT_INC(&cfg->stats, dropped);
1473 		return (NAT64MFREE);
1474 	}
1475 
1476 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
1477 		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1478 		    ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);
1479 		return (NAT64RETURN);
1480 	}
1481 
1482 	hlen = 0;
1483 	plen = ntohs(ip6->ip6_plen);
1484 	proto = nat64_getlasthdr(m, &hlen);
1485 	if (proto < 0) {
1486 		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1487 		NAT64STAT_INC(&cfg->stats, dropped);
1488 		return (NAT64MFREE);
1489 	}
1490 	frag = NULL;
1491 	if (proto == IPPROTO_FRAGMENT) {
1492 		/* ipfw_chk should m_pullup up to frag header */
1493 		if (m->m_len < hlen + sizeof(*frag)) {
1494 			DPRINTF(DP_DROPS,
1495 			    "dropped due to mbuf isn't contigious");
1496 			NAT64STAT_INC(&cfg->stats, dropped);
1497 			return (NAT64MFREE);
1498 		}
1499 		frag = mtodo(m, hlen);
1500 		proto = frag->ip6f_nxt;
1501 		hlen += sizeof(*frag);
1502 		/* Fragmented ICMPv6 is unsupported */
1503 		if (proto == IPPROTO_ICMPV6) {
1504 			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1505 			NAT64STAT_INC(&cfg->stats, dropped);
1506 			return (NAT64MFREE);
1507 		}
1508 		/* Fragment length must be multiple of 8 octets */
1509 		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1510 		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1511 			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1512 			    ICMP6_PARAMPROB_HEADER,
1513 			    offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,
1514 			    logdata);
1515 			return (NAT64RETURN);
1516 		}
1517 	}
1518 	plen -= hlen - sizeof(struct ip6_hdr);
1519 	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1520 		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1521 		    plen, m->m_pkthdr.len, hlen);
1522 		NAT64STAT_INC(&cfg->stats, dropped);
1523 		return (NAT64MFREE);
1524 	}
1525 
1526 	icmp6 = NULL;	/* Make gcc happy */
1527 	if (proto == IPPROTO_ICMPV6) {
1528 		icmp6 = mtodo(m, hlen);
1529 		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1530 		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1531 			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1532 			    cfg, logdata));
1533 	}
1534 	dst.sin_addr.s_addr = ip.ip_dst.s_addr;
1535 	if (nat64_find_route4(&nh, &dst, m) != 0) {
1536 		NAT64STAT_INC(&cfg->stats, noroute4);
1537 		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1538 		    ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);
1539 		return (NAT64RETURN);
1540 	}
1541 	if (nh.nh_mtu < plen + sizeof(ip)) {
1542 		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu,
1543 		    &cfg->stats, logdata);
1544 		return (NAT64RETURN);
1545 	}
1546 	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1547 	/* Convert checksums. */
1548 	switch (proto) {
1549 	case IPPROTO_TCP:
1550 		csum = &TCP(mtodo(m, hlen))->th_sum;
1551 		if (aport != 0) {
1552 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1553 			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
1554 			tcp->th_sport = aport;
1555 		}
1556 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1557 		break;
1558 	case IPPROTO_UDP:
1559 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1560 		if (aport != 0) {
1561 			struct udphdr *udp = UDP(mtodo(m, hlen));
1562 			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
1563 			udp->uh_sport = aport;
1564 		}
1565 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1566 		break;
1567 	case IPPROTO_ICMPV6:
1568 		/* Checksum in ICMPv6 covers pseudo header */
1569 		csum = &icmp6->icmp6_cksum;
1570 		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1571 		    IPPROTO_ICMPV6, 0));
1572 		/* Convert ICMPv6 types to ICMP */
1573 		proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1574 		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1575 			icmp6->icmp6_type = ICMP_ECHO;
1576 		else /* ICMP6_ECHO_REPLY */
1577 			icmp6->icmp6_type = ICMP_ECHOREPLY;
1578 		*csum = cksum_adjust(*csum, (uint16_t)proto,
1579 		    *(uint16_t *)icmp6);
1580 		if (aport != 0) {
1581 			uint16_t old_id = icmp6->icmp6_id;
1582 			icmp6->icmp6_id = aport;
1583 			*csum = cksum_adjust(*csum, old_id, aport);
1584 		}
1585 		break;
1586 	};
1587 
1588 	m_adj(m, hlen - sizeof(ip));
1589 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1590 	if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1591 	    &cfg->stats, logdata) == 0)
1592 		NAT64STAT_INC(&cfg->stats, opcnt64);
1593 	return (NAT64RETURN);
1594 }
1595 
1596