xref: /freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c (revision bcce9a2b33a8e9187a63f435726a7a801e89f326)
1 /*-
2  * Copyright (c) 2015-2016 Yandex LLC
3  * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "opt_ipfw.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/queue.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/if_pflog.h>
49 #include <net/pfil.h>
50 #include <net/netisr.h>
51 #include <net/route.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip_var.h>
57 #include <netinet/ip_fw.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <netinet/ip_icmp.h>
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
63 #include <netinet6/in6_var.h>
64 #include <netinet6/in6_fib.h>
65 #include <netinet6/ip6_var.h>
66 
67 #include <netpfil/pf/pf.h>
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/ipfw/nat64/ip_fw_nat64.h>
70 #include <netpfil/ipfw/nat64/nat64_translate.h>
71 #include <machine/in_cksum.h>
72 
73 static void
74 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
75 {
76 
77 	logdata->dir = PF_OUT;
78 	logdata->af = family;
79 	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
80 }
81 
82 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
83 static NAT64NOINLINE int nat64_find_route4(struct nhop4_basic *,
84     struct sockaddr_in *, struct mbuf *);
85 static NAT64NOINLINE int nat64_find_route6(struct nhop6_basic *,
86     struct sockaddr_in6 *, struct mbuf *);
87 
88 static NAT64NOINLINE int
89 nat64_output(struct ifnet *ifp, struct mbuf *m,
90     struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
91     void *logdata)
92 {
93 	int error;
94 
95 	if (logdata != NULL)
96 		nat64_log(logdata, m, dst->sa_family);
97 	error = (*ifp->if_output)(ifp, m, dst, ro);
98 	if (error != 0)
99 		NAT64STAT_INC(stats, oerrors);
100 	return (error);
101 }
102 
103 static NAT64NOINLINE int
104 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
105 {
106 	struct nhop6_basic nh6;
107 	struct nhop4_basic nh4;
108 	struct sockaddr_in6 dst6;
109 	struct sockaddr_in dst4;
110 	struct sockaddr *dst;
111 	struct ip6_hdr *ip6;
112 	struct ip *ip4;
113 	struct ifnet *ifp;
114 	int error;
115 
116 	ip4 = mtod(m, struct ip *);
117 	switch (ip4->ip_v) {
118 	case IPVERSION:
119 		dst4.sin_addr = ip4->ip_dst;
120 		error = nat64_find_route4(&nh4, &dst4, m);
121 		if (error != 0)
122 			NAT64STAT_INC(stats, noroute4);
123 		else {
124 			ifp = nh4.nh_ifp;
125 			dst = (struct sockaddr *)&dst4;
126 		}
127 		break;
128 	case (IPV6_VERSION >> 4):
129 		ip6 = mtod(m, struct ip6_hdr *);
130 		dst6.sin6_addr = ip6->ip6_dst;
131 		error = nat64_find_route6(&nh6, &dst6, m);
132 		if (error != 0)
133 			NAT64STAT_INC(stats, noroute6);
134 		else {
135 			ifp = nh6.nh_ifp;
136 			dst = (struct sockaddr *)&dst6;
137 		}
138 		break;
139 	default:
140 		m_freem(m);
141 		NAT64STAT_INC(stats, dropped);
142 		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
143 		return (EAFNOSUPPORT);
144 	}
145 	if (error != 0) {
146 		m_freem(m);
147 		return (EHOSTUNREACH);
148 	}
149 	if (logdata != NULL)
150 		nat64_log(logdata, m, dst->sa_family);
151 	error = (*ifp->if_output)(ifp, m, dst, NULL);
152 	if (error != 0)
153 		NAT64STAT_INC(stats, oerrors);
154 	return (error);
155 }
156 #else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
157 static NAT64NOINLINE int
158 nat64_output(struct ifnet *ifp, struct mbuf *m,
159     struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
160     void *logdata)
161 {
162 	struct ip *ip4;
163 	int ret, af;
164 
165 	ip4 = mtod(m, struct ip *);
166 	switch (ip4->ip_v) {
167 	case IPVERSION:
168 		af = AF_INET;
169 		ret = NETISR_IP;
170 		break;
171 	case (IPV6_VERSION >> 4):
172 		af = AF_INET6;
173 		ret = NETISR_IPV6;
174 		break;
175 	default:
176 		m_freem(m);
177 		NAT64STAT_INC(stats, dropped);
178 		DPRINTF(DP_DROPS, "unknown IP version");
179 		return (EAFNOSUPPORT);
180 	}
181 	if (logdata != NULL)
182 		nat64_log(logdata, m, af);
183 	ret = netisr_queue(ret, m);
184 	if (ret != 0)
185 		NAT64STAT_INC(stats, oerrors);
186 	return (ret);
187 }
188 
189 static NAT64NOINLINE int
190 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
191 {
192 
193 	return (nat64_output(NULL, m, NULL, NULL, stats, logdata));
194 }
195 #endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
196 
197 
198 #if 0
199 void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
200 
201 void
202 print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
203 {
204 	char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
205 
206 	inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
207 	inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
208 	snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
209 }
210 
211 
212 static NAT64NOINLINE int
213 nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
214 {
215 
216 	/* assume the prefix is properly filled with zeros */
217 	bcopy(&cfg->prefix, ip6, sizeof(*ip6));
218 	switch (cfg->plen) {
219 	case 32:
220 	case 96:
221 		ip6->s6_addr32[cfg->plen / 32] = ia;
222 		break;
223 	case 40:
224 	case 48:
225 	case 56:
226 #if BYTE_ORDER == BIG_ENDIAN
227 		ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
228 		    (ia >> (cfg->plen % 32));
229 		ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
230 #elif BYTE_ORDER == LITTLE_ENDIAN
231 		ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
232 		    (ia << (cfg->plen % 32));
233 		ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
234 #endif
235 		break;
236 	case 64:
237 #if BYTE_ORDER == BIG_ENDIAN
238 		ip6->s6_addr32[2] = ia >> 8;
239 		ip6->s6_addr32[3] = ia << 24;
240 #elif BYTE_ORDER == LITTLE_ENDIAN
241 		ip6->s6_addr32[2] = ia << 8;
242 		ip6->s6_addr32[3] = ia >> 24;
243 #endif
244 		break;
245 	default:
246 		return (0);
247 	};
248 	ip6->s6_addr8[8] = 0;
249 	return (1);
250 }
251 
252 static NAT64NOINLINE in_addr_t
253 nat64_extract_ip4(struct in6_addr *ip6, int plen)
254 {
255 	in_addr_t ia;
256 
257 	/*
258 	 * According to RFC 6052 p2.2:
259 	 * IPv4-embedded IPv6 addresses are composed of a variable-length
260 	 * prefix, the embedded IPv4 address, and a variable length suffix.
261 	 * The suffix bits are reserved for future extensions and SHOULD
262 	 * be set to zero.
263 	 */
264 	switch (plen) {
265 	case 32:
266 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
267 			goto badip6;
268 		break;
269 	case 40:
270 		if (ip6->s6_addr32[3] != 0 ||
271 		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
272 			goto badip6;
273 		break;
274 	case 48:
275 		if (ip6->s6_addr32[3] != 0 ||
276 		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
277 			goto badip6;
278 		break;
279 	case 56:
280 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
281 			goto badip6;
282 		break;
283 	case 64:
284 		if (ip6->s6_addr8[8] != 0 ||
285 		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
286 			goto badip6;
287 	};
288 	switch (plen) {
289 	case 32:
290 	case 96:
291 		ia = ip6->s6_addr32[plen / 32];
292 		break;
293 	case 40:
294 	case 48:
295 	case 56:
296 #if BYTE_ORDER == BIG_ENDIAN
297 		ia = (ip6->s6_addr32[1] << (plen % 32)) |
298 		    (ip6->s6_addr32[2] >> (24 - plen % 32));
299 #elif BYTE_ORDER == LITTLE_ENDIAN
300 		ia = (ip6->s6_addr32[1] >> (plen % 32)) |
301 		    (ip6->s6_addr32[2] << (24 - plen % 32));
302 #endif
303 		break;
304 	case 64:
305 #if BYTE_ORDER == BIG_ENDIAN
306 		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
307 #elif BYTE_ORDER == LITTLE_ENDIAN
308 		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
309 #endif
310 		break;
311 	default:
312 		return (0);
313 	};
314 	if (nat64_check_ip4(ia) != 0 ||
315 	    nat64_check_private_ip4(ia) != 0)
316 		goto badip4;
317 
318 	return (ia);
319 badip4:
320 	DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia);
321 	return (0);
322 badip6:
323 	DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address");
324 	return (0);
325 }
326 #endif
327 
328 /*
329  * According to RFC 1624 the equation for incremental checksum update is:
330  *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
331  *	HC' = HC - ~m - m'	--	[Eqn. 4]
332  * So, when we are replacing IPv4 addresses to IPv6, we
333  * can assume, that new bytes previously were zeros, and vise versa -
334  * when we replacing IPv6 addresses to IPv4, now unused bytes become
335  * zeros. The payload length in pseudo header has bigger size, but one
336  * half of it should be zero. Using the equation 4 we get:
337  *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
338  *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
339  *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
340  *	  = HC - sum(~m[i] + m'[i])
341  *
342  * The function result should be used as follows:
343  *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
344  *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
345  */
346 static NAT64NOINLINE uint16_t
347 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
348 {
349 	uint32_t sum;
350 	uint16_t *p;
351 
352 	sum = ~ip->ip_src.s_addr >> 16;
353 	sum += ~ip->ip_src.s_addr & 0xffff;
354 	sum += ~ip->ip_dst.s_addr >> 16;
355 	sum += ~ip->ip_dst.s_addr & 0xffff;
356 
357 	for (p = (uint16_t *)&ip6->ip6_src;
358 	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
359 		sum += *p;
360 
361 	while (sum >> 16)
362 		sum = (sum & 0xffff) + (sum >> 16);
363 	return (sum);
364 }
365 
366 #if __FreeBSD_version < 1100000
367 #define	ip_fillid(ip)		(ip)->ip_id = ip_newid()
368 #endif
369 static NAT64NOINLINE void
370 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
371     uint16_t plen, uint8_t proto, struct ip *ip)
372 {
373 
374 	/* assume addresses are already initialized */
375 	ip->ip_v = IPVERSION;
376 	ip->ip_hl = sizeof(*ip) >> 2;
377 	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
378 	ip->ip_len = htons(sizeof(*ip) + plen);
379 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
380 	ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
381 #else
382 	/* Forwarding code will decrement TTL. */
383 	ip->ip_ttl = ip6->ip6_hlim;
384 #endif
385 	ip->ip_sum = 0;
386 	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
387 	ip_fillid(ip);
388 	if (frag != NULL) {
389 		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
390 		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
391 			ip->ip_off |= htons(IP_MF);
392 	} else {
393 		ip->ip_off = htons(IP_DF);
394 	}
395 	ip->ip_sum = in_cksum_hdr(ip);
396 }
397 
398 #define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
399 static NAT64NOINLINE int
400 nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
401     struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
402 {
403 	struct ip6_frag ip6f;
404 	struct mbuf *n;
405 	uint16_t hlen, len, offset;
406 	int plen;
407 
408 	plen = ntohs(ip6->ip6_plen);
409 	hlen = sizeof(struct ip6_hdr);
410 
411 	/* Fragmentation isn't needed */
412 	if (ip_off == 0 && plen <= mtu - hlen) {
413 		M_PREPEND(m, hlen, M_NOWAIT);
414 		if (m == NULL) {
415 			NAT64STAT_INC(stats, nomem);
416 			return (ENOMEM);
417 		}
418 		bcopy(ip6, mtod(m, void *), hlen);
419 		if (mbufq_enqueue(mq, m) != 0) {
420 			m_freem(m);
421 			NAT64STAT_INC(stats, dropped);
422 			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
423 			return (ENOBUFS);
424 		}
425 		return (0);
426 	}
427 
428 	hlen += sizeof(struct ip6_frag);
429 	ip6f.ip6f_reserved = 0;
430 	ip6f.ip6f_nxt = ip6->ip6_nxt;
431 	ip6->ip6_nxt = IPPROTO_FRAGMENT;
432 	if (ip_off != 0) {
433 		/*
434 		 * We have got an IPv4 fragment.
435 		 * Use offset value and ip_id from original fragment.
436 		 */
437 		ip6f.ip6f_ident = htonl(ntohs(ip_id));
438 		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
439 		NAT64STAT_INC(stats, ifrags);
440 	} else {
441 		/* The packet size exceeds interface MTU */
442 		ip6f.ip6f_ident = htonl(ip6_randomid());
443 		offset = 0; /* First fragment*/
444 	}
445 	while (plen > 0 && m != NULL) {
446 		n = NULL;
447 		len = FRAGSZ(mtu) & ~7;
448 		if (len > plen)
449 			len = plen;
450 		ip6->ip6_plen = htons(len + sizeof(ip6f));
451 		ip6f.ip6f_offlg = ntohs(offset);
452 		if (len < plen || (ip_off & htons(IP_MF)) != 0)
453 			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
454 		offset += len;
455 		plen -= len;
456 		if (plen > 0) {
457 			n = m_split(m, len, M_NOWAIT);
458 			if (n == NULL)
459 				goto fail;
460 		}
461 		M_PREPEND(m, hlen, M_NOWAIT);
462 		if (m == NULL)
463 			goto fail;
464 		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
465 		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
466 		    sizeof(struct ip6_frag));
467 		if (mbufq_enqueue(mq, m) != 0)
468 			goto fail;
469 		m = n;
470 	}
471 	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
472 	return (0);
473 fail:
474 	if (m != NULL)
475 		m_freem(m);
476 	if (n != NULL)
477 		m_freem(n);
478 	mbufq_drain(mq);
479 	NAT64STAT_INC(stats, nomem);
480 	return (ENOMEM);
481 }
482 
483 static NAT64NOINLINE int
484 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst,
485     struct mbuf *m)
486 {
487 
488 	if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0,
489 	    pnh) != 0)
490 		return (EHOSTUNREACH);
491 	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
492 		return (EHOSTUNREACH);
493 	/*
494 	 * XXX: we need to use destination address with embedded scope
495 	 * zone id, because LLTABLE uses such form of addresses for lookup.
496 	 */
497 	dst->sin6_family = AF_INET6;
498 	dst->sin6_len = sizeof(*dst);
499 	dst->sin6_addr = pnh->nh_addr;
500 	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
501 		dst->sin6_addr.s6_addr16[1] =
502 		    htons(pnh->nh_ifp->if_index & 0xffff);
503 	dst->sin6_port = 0;
504 	dst->sin6_scope_id = 0;
505 	dst->sin6_flowinfo = 0;
506 
507 	return (0);
508 }
509 
510 #define	NAT64_ICMP6_PLEN	64
511 static NAT64NOINLINE void
512 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
513     nat64_stats_block *stats, void *logdata)
514 {
515 	struct icmp6_hdr *icmp6;
516 	struct ip6_hdr *ip6, *oip6;
517 	struct mbuf *n;
518 	int len, plen;
519 
520 	len = 0;
521 	plen = nat64_getlasthdr(m, &len);
522 	if (plen < 0) {
523 		DPRINTF(DP_DROPS, "mbuf isn't contigious");
524 		goto freeit;
525 	}
526 	/*
527 	 * Do not send ICMPv6 in reply to ICMPv6 errors.
528 	 */
529 	if (plen == IPPROTO_ICMPV6) {
530 		if (m->m_len < len + sizeof(*icmp6)) {
531 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
532 			goto freeit;
533 		}
534 		icmp6 = mtodo(m, len);
535 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
536 		    icmp6->icmp6_type == ND_REDIRECT) {
537 			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
538 			    "ICMPv6 errors");
539 			goto freeit;
540 		}
541 	}
542 	/*
543 	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
544 		goto freeit;
545 		*/
546 	ip6 = mtod(m, struct ip6_hdr *);
547 	switch (type) {
548 	case ICMP6_DST_UNREACH:
549 	case ICMP6_PACKET_TOO_BIG:
550 	case ICMP6_TIME_EXCEEDED:
551 	case ICMP6_PARAM_PROB:
552 		break;
553 	default:
554 		goto freeit;
555 	}
556 	/* Calculate length of ICMPv6 payload */
557 	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
558 	    m->m_pkthdr.len;
559 
560 	/* Create new ICMPv6 datagram */
561 	plen = len + sizeof(struct icmp6_hdr);
562 	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
563 	    MT_HEADER, M_PKTHDR);
564 	if (n == NULL) {
565 		NAT64STAT_INC(stats, nomem);
566 		m_freem(m);
567 		return;
568 	}
569 	/*
570 	 * Move pkthdr from original mbuf. We should have initialized some
571 	 * fields, because we can reinject this mbuf to netisr and it will
572 	 * go trough input path (it requires at least rcvif should be set).
573 	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
574 	 * in the chain, when we will do M_PREPEND() or make some type of
575 	 * tunneling.
576 	 */
577 	m_move_pkthdr(n, m);
578 	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
579 
580 	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
581 	oip6 = mtod(n, struct ip6_hdr *);
582 	oip6->ip6_src = ip6->ip6_dst;
583 	oip6->ip6_dst = ip6->ip6_src;
584 	oip6->ip6_nxt = IPPROTO_ICMPV6;
585 	oip6->ip6_flow = 0;
586 	oip6->ip6_vfc |= IPV6_VERSION;
587 	oip6->ip6_hlim = V_ip6_defhlim;
588 	oip6->ip6_plen = htons(plen);
589 
590 	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
591 	icmp6->icmp6_cksum = 0;
592 	icmp6->icmp6_type = type;
593 	icmp6->icmp6_code = code;
594 	icmp6->icmp6_mtu = htonl(mtu);
595 
596 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
597 	    sizeof(struct icmp6_hdr)));
598 	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
599 	    sizeof(struct ip6_hdr), plen);
600 	m_freem(m);
601 	nat64_output_one(n, stats, logdata);
602 	return;
603 freeit:
604 	NAT64STAT_INC(stats, dropped);
605 	m_freem(m);
606 }
607 
608 static NAT64NOINLINE int
609 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst,
610     struct mbuf *m)
611 {
612 
613 	if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0)
614 		return (EHOSTUNREACH);
615 	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
616 		return (EHOSTUNREACH);
617 
618 	dst->sin_family = AF_INET;
619 	dst->sin_len = sizeof(*dst);
620 	dst->sin_addr = pnh->nh_addr;
621 	dst->sin_port = 0;
622 	return (0);
623 }
624 
625 #define	NAT64_ICMP_PLEN	64
626 static NAT64NOINLINE void
627 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
628     uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata)
629 {
630 	struct icmp *icmp;
631 	struct ip *ip, *oip;
632 	struct mbuf *n;
633 	int len, plen;
634 
635 	ip = mtod(m, struct ip *);
636 	/* Do not send ICMP error if packet is not the first fragment */
637 	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
638 		DPRINTF(DP_DROPS, "not first fragment");
639 		goto freeit;
640 	}
641 	/* Do not send ICMP in reply to ICMP errors */
642 	if (ip->ip_p == IPPROTO_ICMP) {
643 		if (m->m_len < (ip->ip_hl << 2)) {
644 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
645 			goto freeit;
646 		}
647 		icmp = mtodo(m, ip->ip_hl << 2);
648 		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
649 			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
650 			    "ICMP errors");
651 			goto freeit;
652 		}
653 	}
654 	switch (type) {
655 	case ICMP_UNREACH:
656 	case ICMP_TIMXCEED:
657 	case ICMP_PARAMPROB:
658 		break;
659 	default:
660 		goto freeit;
661 	}
662 	/* Calculate length of ICMP payload */
663 	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
664 	    m->m_pkthdr.len;
665 
666 	/* Create new ICMPv4 datagram */
667 	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
668 	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
669 	    MT_HEADER, M_PKTHDR);
670 	if (n == NULL) {
671 		NAT64STAT_INC(stats, nomem);
672 		m_freem(m);
673 		return;
674 	}
675 	m_move_pkthdr(n, m);
676 	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
677 
678 	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
679 	oip = mtod(n, struct ip *);
680 	oip->ip_v = IPVERSION;
681 	oip->ip_hl = sizeof(struct ip) >> 2;
682 	oip->ip_tos = 0;
683 	oip->ip_len = htons(n->m_pkthdr.len);
684 	oip->ip_ttl = V_ip_defttl;
685 	oip->ip_p = IPPROTO_ICMP;
686 	ip_fillid(oip);
687 	oip->ip_off = htons(IP_DF);
688 	oip->ip_src = ip->ip_dst;
689 	oip->ip_dst = ip->ip_src;
690 	oip->ip_sum = 0;
691 	oip->ip_sum = in_cksum_hdr(oip);
692 
693 	icmp = mtodo(n, sizeof(struct ip));
694 	icmp->icmp_type = type;
695 	icmp->icmp_code = code;
696 	icmp->icmp_cksum = 0;
697 	icmp->icmp_pmvoid = 0;
698 	icmp->icmp_nextmtu = htons(mtu);
699 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
700 	    sizeof(struct icmphdr) + sizeof(uint32_t)));
701 	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
702 	    sizeof(struct ip));
703 	m_freem(m);
704 	nat64_output_one(n, stats, logdata);
705 	return;
706 freeit:
707 	NAT64STAT_INC(stats, dropped);
708 	m_freem(m);
709 }
710 
711 /* Translate ICMP echo request/reply into ICMPv6 */
712 static void
713 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
714     uint16_t id, uint8_t type)
715 {
716 	uint16_t old;
717 
718 	old = *(uint16_t *)icmp6;	/* save type+code in one word */
719 	icmp6->icmp6_type = type;
720 	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
721 	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
722 	    old, *(uint16_t *)icmp6);
723 	if (id != 0) {
724 		old = icmp6->icmp6_id;
725 		icmp6->icmp6_id = id;
726 		/* Reflect ICMP id translation in the cksum */
727 		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
728 		    old, id);
729 	}
730 	/* Reflect IPv6 pseudo header in the cksum */
731 	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
732 	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
733 }
734 
735 static NAT64NOINLINE struct mbuf *
736 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
737     int offset, nat64_stats_block *stats)
738 {
739 	struct ip ip;
740 	struct icmp *icmp;
741 	struct tcphdr *tcp;
742 	struct udphdr *udp;
743 	struct ip6_hdr *eip6;
744 	struct mbuf *n;
745 	uint32_t mtu;
746 	int len, hlen, plen;
747 	uint8_t type, code;
748 
749 	if (m->m_len < offset + ICMP_MINLEN)
750 		m = m_pullup(m, offset + ICMP_MINLEN);
751 	if (m == NULL) {
752 		NAT64STAT_INC(stats, nomem);
753 		return (m);
754 	}
755 	mtu = 0;
756 	icmp = mtodo(m, offset);
757 	/* RFC 7915 p4.2 */
758 	switch (icmp->icmp_type) {
759 	case ICMP_ECHOREPLY:
760 		type = ICMP6_ECHO_REPLY;
761 		code = 0;
762 		break;
763 	case ICMP_UNREACH:
764 		type = ICMP6_DST_UNREACH;
765 		switch (icmp->icmp_code) {
766 		case ICMP_UNREACH_NET:
767 		case ICMP_UNREACH_HOST:
768 		case ICMP_UNREACH_SRCFAIL:
769 		case ICMP_UNREACH_NET_UNKNOWN:
770 		case ICMP_UNREACH_HOST_UNKNOWN:
771 		case ICMP_UNREACH_TOSNET:
772 		case ICMP_UNREACH_TOSHOST:
773 			code = ICMP6_DST_UNREACH_NOROUTE;
774 			break;
775 		case ICMP_UNREACH_PROTOCOL:
776 			type = ICMP6_PARAM_PROB;
777 			code = ICMP6_PARAMPROB_NEXTHEADER;
778 			break;
779 		case ICMP_UNREACH_PORT:
780 			code = ICMP6_DST_UNREACH_NOPORT;
781 			break;
782 		case ICMP_UNREACH_NEEDFRAG:
783 			type = ICMP6_PACKET_TOO_BIG;
784 			code = 0;
785 			/* XXX: needs an additional look */
786 			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
787 			break;
788 		case ICMP_UNREACH_NET_PROHIB:
789 		case ICMP_UNREACH_HOST_PROHIB:
790 		case ICMP_UNREACH_FILTER_PROHIB:
791 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
792 			code = ICMP6_DST_UNREACH_ADMIN;
793 			break;
794 		default:
795 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
796 			    icmp->icmp_type, icmp->icmp_code);
797 			goto freeit;
798 		}
799 		break;
800 	case ICMP_TIMXCEED:
801 		type = ICMP6_TIME_EXCEEDED;
802 		code = icmp->icmp_code;
803 		break;
804 	case ICMP_ECHO:
805 		type = ICMP6_ECHO_REQUEST;
806 		code = 0;
807 		break;
808 	case ICMP_PARAMPROB:
809 		type = ICMP6_PARAM_PROB;
810 		switch (icmp->icmp_code) {
811 		case ICMP_PARAMPROB_ERRATPTR:
812 		case ICMP_PARAMPROB_LENGTH:
813 			code = ICMP6_PARAMPROB_HEADER;
814 			switch (icmp->icmp_pptr) {
815 			case 0: /* Version/IHL */
816 			case 1: /* Type Of Service */
817 				mtu = icmp->icmp_pptr;
818 				break;
819 			case 2: /* Total Length */
820 			case 3: mtu = 4; /* Payload Length */
821 				break;
822 			case 8: /* Time to Live */
823 				mtu = 7; /* Hop Limit */
824 				break;
825 			case 9: /* Protocol */
826 				mtu = 6; /* Next Header */
827 				break;
828 			case 12: /* Source address */
829 			case 13:
830 			case 14:
831 			case 15:
832 				mtu = 8;
833 				break;
834 			case 16: /* Destination address */
835 			case 17:
836 			case 18:
837 			case 19:
838 				mtu = 24;
839 				break;
840 			default: /* Silently drop */
841 				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
842 				    " code %d, pptr %d", icmp->icmp_type,
843 				    icmp->icmp_code, icmp->icmp_pptr);
844 				goto freeit;
845 			}
846 			break;
847 		default:
848 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
849 			    " code %d, pptr %d", icmp->icmp_type,
850 			    icmp->icmp_code, icmp->icmp_pptr);
851 			goto freeit;
852 		}
853 		break;
854 	default:
855 		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
856 		    icmp->icmp_type, icmp->icmp_code);
857 		goto freeit;
858 	}
859 	/*
860 	 * For echo request/reply we can use original payload,
861 	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
862 	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
863 	 */
864 	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
865 		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
866 		return (m);
867 	}
868 	/*
869 	 * For other types of ICMP messages we need to translate inner
870 	 * IPv4 header to IPv6 header.
871 	 * Assume ICMP src is the same as payload dst
872 	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
873 	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
874 	 * In that case, we already have map for NATIP1 and GWsrc1.
875 	 * The only thing we need is to copy IPv6 map prefix to
876 	 * Hostdst1.
877 	 */
878 	hlen = offset + ICMP_MINLEN;
879 	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
880 		DPRINTF(DP_DROPS, "Message is too short %d",
881 		    m->m_pkthdr.len);
882 		goto freeit;
883 	}
884 	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
885 	if (ip.ip_v != IPVERSION) {
886 		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
887 		goto freeit;
888 	}
889 	hlen += ip.ip_hl << 2; /* Skip inner IP header */
890 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
891 	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
892 	    nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
893 	    nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
894 		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
895 		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
896 		goto freeit;
897 	}
898 	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
899 		DPRINTF(DP_DROPS, "Message is too short %d",
900 		    m->m_pkthdr.len);
901 		goto freeit;
902 	}
903 #if 0
904 	/*
905 	 * Check that inner source matches the outer destination.
906 	 * XXX: We need some method to convert IPv4 into IPv6 address here,
907 	 *	and compare IPv6 addresses.
908 	 */
909 	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
910 		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
911 		    "%04x vs %04x", ip.ip_src.s_addr,
912 		    nat64_get_ip4(&ip6->ip6_dst));
913 		goto freeit;
914 	}
915 #endif
916 	/*
917 	 * Create new mbuf for ICMPv6 datagram.
918 	 * NOTE: len is data length just after inner IP header.
919 	 */
920 	len = m->m_pkthdr.len - hlen;
921 	if (sizeof(struct ip6_hdr) +
922 	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
923 		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
924 		    sizeof(struct ip6_hdr);
925 	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
926 	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
927 	if (n == NULL) {
928 		NAT64STAT_INC(stats, nomem);
929 		m_freem(m);
930 		return (NULL);
931 	}
932 	m_move_pkthdr(n, m);
933 	M_ALIGN(n, offset + plen + max_hdr);
934 	n->m_len = n->m_pkthdr.len = offset + plen;
935 	/* Adjust ip6_plen in outer header */
936 	ip6->ip6_plen = htons(plen);
937 	/* Construct new inner IPv6 header */
938 	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
939 	eip6->ip6_src = ip6->ip6_dst;
940 	/* Use the fact that we have single /96 prefix for IPv4 map */
941 	eip6->ip6_dst = ip6->ip6_src;
942 	nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
943 
944 	eip6->ip6_flow = htonl(ip.ip_tos << 20);
945 	eip6->ip6_vfc |= IPV6_VERSION;
946 	eip6->ip6_hlim = ip.ip_ttl;
947 	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
948 	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
949 	m_copydata(m, hlen, len, (char *)(eip6 + 1));
950 	/*
951 	 * We need to translate source port in the inner ULP header,
952 	 * and adjust ULP checksum.
953 	 */
954 	switch (ip.ip_p) {
955 	case IPPROTO_TCP:
956 		if (len < offsetof(struct tcphdr, th_sum))
957 			break;
958 		tcp = TCP(eip6 + 1);
959 		if (icmpid != 0) {
960 			tcp->th_sum = cksum_adjust(tcp->th_sum,
961 			    tcp->th_sport, icmpid);
962 			tcp->th_sport = icmpid;
963 		}
964 		tcp->th_sum = cksum_add(tcp->th_sum,
965 		    ~nat64_cksum_convert(eip6, &ip));
966 		break;
967 	case IPPROTO_UDP:
968 		if (len < offsetof(struct udphdr, uh_sum))
969 			break;
970 		udp = UDP(eip6 + 1);
971 		if (icmpid != 0) {
972 			udp->uh_sum = cksum_adjust(udp->uh_sum,
973 			    udp->uh_sport, icmpid);
974 			udp->uh_sport = icmpid;
975 		}
976 		udp->uh_sum = cksum_add(udp->uh_sum,
977 		    ~nat64_cksum_convert(eip6, &ip));
978 		break;
979 	case IPPROTO_ICMP:
980 		/*
981 		 * Check if this is an ICMP error message for echo request
982 		 * that we sent. I.e. ULP in the data containing invoking
983 		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
984 		 */
985 		icmp = (struct icmp *)(eip6 + 1);
986 		if (icmp->icmp_type != ICMP_ECHO) {
987 			m_freem(n);
988 			goto freeit;
989 		}
990 		/*
991 		 * For our client this original datagram should looks
992 		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
993 		 * Thus we need adjust icmp_cksum and convert type from
994 		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
995 		 */
996 		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
997 		    ICMP6_ECHO_REQUEST);
998 	}
999 	m_freem(m);
1000 	/* Convert ICMPv4 into ICMPv6 header */
1001 	icmp = mtodo(n, offset);
1002 	ICMP6(icmp)->icmp6_type = type;
1003 	ICMP6(icmp)->icmp6_code = code;
1004 	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1005 	ICMP6(icmp)->icmp6_cksum = 0;
1006 	ICMP6(icmp)->icmp6_cksum = cksum_add(
1007 	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1008 	    in_cksum_skip(n, n->m_pkthdr.len, offset));
1009 	return (n);
1010 freeit:
1011 	m_freem(m);
1012 	NAT64STAT_INC(stats, dropped);
1013 	return (NULL);
1014 }
1015 
1016 int
1017 nat64_getlasthdr(struct mbuf *m, int *offset)
1018 {
1019 	struct ip6_hdr *ip6;
1020 	struct ip6_hbh *hbh;
1021 	int proto, hlen;
1022 
1023 	if (offset != NULL)
1024 		hlen = *offset;
1025 	else
1026 		hlen = 0;
1027 
1028 	if (m->m_len < hlen + sizeof(*ip6))
1029 		return (-1);
1030 
1031 	ip6 = mtodo(m, hlen);
1032 	hlen += sizeof(*ip6);
1033 	proto = ip6->ip6_nxt;
1034 	/* Skip extension headers */
1035 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1036 	    proto == IPPROTO_DSTOPTS) {
1037 		hbh = mtodo(m, hlen);
1038 		/*
1039 		 * We expect mbuf has contigious data up to
1040 		 * upper level header.
1041 		 */
1042 		if (m->m_len < hlen)
1043 			return (-1);
1044 		/*
1045 		 * We doesn't support Jumbo payload option,
1046 		 * so return error.
1047 		 */
1048 		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1049 			return (-1);
1050 		proto = hbh->ip6h_nxt;
1051 		hlen += (hbh->ip6h_len + 1) << 3;
1052 	}
1053 	if (offset != NULL)
1054 		*offset = hlen;
1055 	return (proto);
1056 }
1057 
1058 int
1059 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1060     struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
1061     void *logdata)
1062 {
1063 	struct nhop6_basic nh;
1064 	struct ip6_hdr ip6;
1065 	struct sockaddr_in6 dst;
1066 	struct ip *ip;
1067 	struct mbufq mq;
1068 	uint16_t ip_id, ip_off;
1069 	uint16_t *csum;
1070 	int plen, hlen;
1071 	uint8_t proto;
1072 
1073 	ip = mtod(m, struct ip*);
1074 
1075 	if (ip->ip_ttl <= IPTTLDEC) {
1076 		nat64_icmp_reflect(m, ICMP_TIMXCEED,
1077 		    ICMP_TIMXCEED_INTRANS, 0, stats, logdata);
1078 		return (NAT64RETURN);
1079 	}
1080 
1081 	ip6.ip6_dst = *daddr;
1082 	ip6.ip6_src = *saddr;
1083 
1084 	hlen = ip->ip_hl << 2;
1085 	plen = ntohs(ip->ip_len) - hlen;
1086 	proto = ip->ip_p;
1087 
1088 	/* Save ip_id and ip_off, both are in network byte order */
1089 	ip_id = ip->ip_id;
1090 	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1091 
1092 	/* Fragment length must be multiple of 8 octets */
1093 	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1094 		nat64_icmp_reflect(m, ICMP_PARAMPROB,
1095 		    ICMP_PARAMPROB_LENGTH, 0, stats, logdata);
1096 		return (NAT64RETURN);
1097 	}
1098 	/* Fragmented ICMP is unsupported */
1099 	if (proto == IPPROTO_ICMP && ip_off != 0) {
1100 		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1101 		NAT64STAT_INC(stats, dropped);
1102 		return (NAT64MFREE);
1103 	}
1104 
1105 	dst.sin6_addr = ip6.ip6_dst;
1106 	if (nat64_find_route6(&nh, &dst, m) != 0) {
1107 		NAT64STAT_INC(stats, noroute6);
1108 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1109 		    stats, logdata);
1110 		return (NAT64RETURN);
1111 	}
1112 	if (nh.nh_mtu < plen + sizeof(ip6) &&
1113 	    (ip->ip_off & htons(IP_DF)) != 0) {
1114 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1115 		    FRAGSZ(nh.nh_mtu) + sizeof(struct ip), stats, logdata);
1116 		return (NAT64RETURN);
1117 	}
1118 
1119 	ip6.ip6_flow = htonl(ip->ip_tos << 20);
1120 	ip6.ip6_vfc |= IPV6_VERSION;
1121 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
1122 	ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
1123 #else
1124 	/* Forwarding code will decrement HLIM. */
1125 	ip6.ip6_hlim = ip->ip_ttl;
1126 #endif
1127 	ip6.ip6_plen = htons(plen);
1128 	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1129 	/* Convert checksums. */
1130 	switch (proto) {
1131 	case IPPROTO_TCP:
1132 		csum = &TCP(mtodo(m, hlen))->th_sum;
1133 		if (lport != 0) {
1134 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1135 			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
1136 			tcp->th_dport = lport;
1137 		}
1138 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1139 		break;
1140 	case IPPROTO_UDP:
1141 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1142 		if (lport != 0) {
1143 			struct udphdr *udp = UDP(mtodo(m, hlen));
1144 			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
1145 			udp->uh_dport = lport;
1146 		}
1147 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1148 		break;
1149 	case IPPROTO_ICMP:
1150 		m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
1151 		if (m == NULL)	/* stats already accounted */
1152 			return (NAT64RETURN);
1153 	}
1154 
1155 	m_adj(m, hlen);
1156 	mbufq_init(&mq, 255);
1157 	nat64_fragment6(stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off);
1158 	while ((m = mbufq_dequeue(&mq)) != NULL) {
1159 		if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1160 		    NULL, stats, logdata) != 0)
1161 			break;
1162 		NAT64STAT_INC(stats, opcnt46);
1163 	}
1164 	mbufq_drain(&mq);
1165 	return (NAT64RETURN);
1166 }
1167 
1168 int
1169 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1170     nat64_stats_block *stats, void *logdata)
1171 {
1172 	struct ip ip;
1173 	struct icmp6_hdr *icmp6;
1174 	struct ip6_frag *ip6f;
1175 	struct ip6_hdr *ip6, *ip6i;
1176 	uint32_t mtu;
1177 	int plen, proto;
1178 	uint8_t type, code;
1179 
1180 	if (hlen == 0) {
1181 		ip6 = mtod(m, struct ip6_hdr *);
1182 		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1183 		    nat64_check_ip6(&ip6->ip6_dst) != 0)
1184 			return (NAT64SKIP);
1185 
1186 		proto = nat64_getlasthdr(m, &hlen);
1187 		if (proto != IPPROTO_ICMPV6) {
1188 			DPRINTF(DP_DROPS,
1189 			    "dropped due to mbuf isn't contigious");
1190 			NAT64STAT_INC(stats, dropped);
1191 			return (NAT64MFREE);
1192 		}
1193 	}
1194 
1195 	/*
1196 	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1197 	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1198 	 */
1199 	icmp6 = mtodo(m, hlen);
1200 	mtu = 0;
1201 	switch (icmp6->icmp6_type) {
1202 	case ICMP6_DST_UNREACH:
1203 		type = ICMP_UNREACH;
1204 		switch (icmp6->icmp6_code) {
1205 		case ICMP6_DST_UNREACH_NOROUTE:
1206 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1207 		case ICMP6_DST_UNREACH_ADDR:
1208 			code = ICMP_UNREACH_HOST;
1209 			break;
1210 		case ICMP6_DST_UNREACH_ADMIN:
1211 			code = ICMP_UNREACH_HOST_PROHIB;
1212 			break;
1213 		case ICMP6_DST_UNREACH_NOPORT:
1214 			code = ICMP_UNREACH_PORT;
1215 			break;
1216 		default:
1217 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1218 			    " code %d", icmp6->icmp6_type,
1219 			    icmp6->icmp6_code);
1220 			NAT64STAT_INC(stats, dropped);
1221 			return (NAT64MFREE);
1222 		}
1223 		break;
1224 	case ICMP6_PACKET_TOO_BIG:
1225 		type = ICMP_UNREACH;
1226 		code = ICMP_UNREACH_NEEDFRAG;
1227 		mtu = ntohl(icmp6->icmp6_mtu);
1228 		if (mtu < IPV6_MMTU) {
1229 			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1230 			    " code %d", mtu, icmp6->icmp6_type,
1231 			    icmp6->icmp6_code);
1232 			NAT64STAT_INC(stats, dropped);
1233 			return (NAT64MFREE);
1234 		}
1235 		/*
1236 		 * Adjust MTU to reflect difference between
1237 		 * IPv6 an IPv4 headers.
1238 		 */
1239 		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1240 		break;
1241 	case ICMP6_TIME_EXCEEDED:
1242 		type = ICMP_TIMXCEED;
1243 		code = icmp6->icmp6_code;
1244 		break;
1245 	case ICMP6_PARAM_PROB:
1246 		switch (icmp6->icmp6_code) {
1247 		case ICMP6_PARAMPROB_HEADER:
1248 			type = ICMP_PARAMPROB;
1249 			code = ICMP_PARAMPROB_ERRATPTR;
1250 			mtu = ntohl(icmp6->icmp6_pptr);
1251 			switch (mtu) {
1252 			case 0: /* Version/Traffic Class */
1253 			case 1: /* Traffic Class/Flow Label */
1254 				break;
1255 			case 4: /* Payload Length */
1256 			case 5:
1257 				mtu = 2;
1258 				break;
1259 			case 6: /* Next Header */
1260 				mtu = 9;
1261 				break;
1262 			case 7: /* Hop Limit */
1263 				mtu = 8;
1264 				break;
1265 			default:
1266 				if (mtu >= 8 && mtu <= 23) {
1267 					mtu = 12; /* Source address */
1268 					break;
1269 				}
1270 				if (mtu >= 24 && mtu <= 39) {
1271 					mtu = 16; /* Destination address */
1272 					break;
1273 				}
1274 				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1275 				    " code %d, pptr %d", icmp6->icmp6_type,
1276 				    icmp6->icmp6_code, mtu);
1277 				NAT64STAT_INC(stats, dropped);
1278 				return (NAT64MFREE);
1279 			}
1280 		case ICMP6_PARAMPROB_NEXTHEADER:
1281 			type = ICMP_UNREACH;
1282 			code = ICMP_UNREACH_PROTOCOL;
1283 			break;
1284 		default:
1285 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1286 			    " code %d, pptr %d", icmp6->icmp6_type,
1287 			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1288 			NAT64STAT_INC(stats, dropped);
1289 			return (NAT64MFREE);
1290 		}
1291 		break;
1292 	default:
1293 		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1294 		    icmp6->icmp6_type, icmp6->icmp6_code);
1295 		NAT64STAT_INC(stats, dropped);
1296 		return (NAT64MFREE);
1297 	}
1298 
1299 	hlen += sizeof(struct icmp6_hdr);
1300 	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1301 		NAT64STAT_INC(stats, dropped);
1302 		DPRINTF(DP_DROPS, "Message is too short %d",
1303 		    m->m_pkthdr.len);
1304 		return (NAT64MFREE);
1305 	}
1306 	/*
1307 	 * We need at least ICMP_MINLEN bytes of original datagram payload
1308 	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1309 	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1310 	 * header we will not have to do m_pullup() again.
1311 	 *
1312 	 * What we have here:
1313 	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1314 	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1315 	 * We need to translate it to:
1316 	 *
1317 	 * Outer header: (alias_host, v4exthost)
1318 	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1319 	 *
1320 	 * Assume caller function has checked if v4mapPRefix+v4host
1321 	 * matches configured prefix.
1322 	 * The only two things we should be provided with are mapping between
1323 	 * IPv6iHost <> alias_host and between dport and alias_port.
1324 	 */
1325 	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1326 		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1327 	if (m == NULL) {
1328 		NAT64STAT_INC(stats, nomem);
1329 		return (NAT64RETURN);
1330 	}
1331 	ip6 = mtod(m, struct ip6_hdr *);
1332 	ip6i = mtodo(m, hlen);
1333 	ip6f = NULL;
1334 	proto = ip6i->ip6_nxt;
1335 	plen = ntohs(ip6i->ip6_plen);
1336 	hlen += sizeof(struct ip6_hdr);
1337 	if (proto == IPPROTO_FRAGMENT) {
1338 		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1339 		    ICMP_MINLEN)
1340 			goto fail;
1341 		ip6f = mtodo(m, hlen);
1342 		proto = ip6f->ip6f_nxt;
1343 		plen -= sizeof(struct ip6_frag);
1344 		hlen += sizeof(struct ip6_frag);
1345 		/* Ajust MTU to reflect frag header size */
1346 		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1347 			mtu -= sizeof(struct ip6_frag);
1348 	}
1349 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1350 		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1351 		    proto);
1352 		goto fail;
1353 	}
1354 	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1355 	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1356 		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1357 		goto fail;
1358 	}
1359 	/* Check if outer dst is the same as inner src */
1360 	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1361 		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1362 		goto fail;
1363 	}
1364 
1365 	/* Now we need to make a fake IPv4 packet to generate ICMP message */
1366 	ip.ip_dst.s_addr = aaddr;
1367 	ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
1368 	/* XXX: Make fake ulp header */
1369 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
1370 	ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
1371 #endif
1372 	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1373 	m_adj(m, hlen - sizeof(struct ip));
1374 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1375 	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata);
1376 	return (NAT64RETURN);
1377 fail:
1378 	/*
1379 	 * We must call m_freem() because mbuf pointer could be
1380 	 * changed with m_pullup().
1381 	 */
1382 	m_freem(m);
1383 	NAT64STAT_INC(stats, dropped);
1384 	return (NAT64RETURN);
1385 }
1386 
1387 int
1388 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1389     nat64_stats_block *stats, void *logdata)
1390 {
1391 	struct ip ip;
1392 	struct nhop4_basic nh;
1393 	struct sockaddr_in dst;
1394 	struct ip6_frag *frag;
1395 	struct ip6_hdr *ip6;
1396 	struct icmp6_hdr *icmp6;
1397 	uint16_t *csum;
1398 	int plen, hlen, proto;
1399 
1400 	/*
1401 	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1402 	 * protocol's headers. Also we skip some checks, that ip6_input(),
1403 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1404 	 */
1405 	ip6 = mtod(m, struct ip6_hdr *);
1406 	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1407 	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
1408 		return (NAT64SKIP);
1409 	}
1410 
1411 	/* Starting from this point we must not return zero */
1412 	ip.ip_src.s_addr = aaddr;
1413 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1414 		DPRINTF(DP_GENERIC, "invalid source address: %08x",
1415 		    ip.ip_src.s_addr);
1416 		/* XXX: stats? */
1417 		return (NAT64MFREE);
1418 	}
1419 
1420 	ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
1421 	if (ip.ip_dst.s_addr == 0) {
1422 		/* XXX: stats? */
1423 		return (NAT64MFREE);
1424 	}
1425 
1426 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
1427 		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1428 		    ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata);
1429 		return (NAT64RETURN);
1430 	}
1431 
1432 	hlen = 0;
1433 	plen = ntohs(ip6->ip6_plen);
1434 	proto = nat64_getlasthdr(m, &hlen);
1435 	if (proto < 0) {
1436 		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1437 		NAT64STAT_INC(stats, dropped);
1438 		return (NAT64MFREE);
1439 	}
1440 	frag = NULL;
1441 	if (proto == IPPROTO_FRAGMENT) {
1442 		/* ipfw_chk should m_pullup up to frag header */
1443 		if (m->m_len < hlen + sizeof(*frag)) {
1444 			DPRINTF(DP_DROPS,
1445 			    "dropped due to mbuf isn't contigious");
1446 			NAT64STAT_INC(stats, dropped);
1447 			return (NAT64MFREE);
1448 		}
1449 		frag = mtodo(m, hlen);
1450 		proto = frag->ip6f_nxt;
1451 		hlen += sizeof(*frag);
1452 		/* Fragmented ICMPv6 is unsupported */
1453 		if (proto == IPPROTO_ICMPV6) {
1454 			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1455 			NAT64STAT_INC(stats, dropped);
1456 			return (NAT64MFREE);
1457 		}
1458 		/* Fragment length must be multiple of 8 octets */
1459 		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1460 		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1461 			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1462 			    ICMP6_PARAMPROB_HEADER,
1463 			    offsetof(struct ip6_hdr, ip6_plen), stats,
1464 			    logdata);
1465 			return (NAT64RETURN);
1466 		}
1467 	}
1468 	plen -= hlen - sizeof(struct ip6_hdr);
1469 	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1470 		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1471 		    plen, m->m_pkthdr.len, hlen);
1472 		NAT64STAT_INC(stats, dropped);
1473 		return (NAT64MFREE);
1474 	}
1475 
1476 	icmp6 = NULL;	/* Make gcc happy */
1477 	if (proto == IPPROTO_ICMPV6) {
1478 		icmp6 = mtodo(m, hlen);
1479 		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1480 		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1481 			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1482 			    stats, logdata));
1483 	}
1484 	dst.sin_addr.s_addr = ip.ip_dst.s_addr;
1485 	if (nat64_find_route4(&nh, &dst, m) != 0) {
1486 		NAT64STAT_INC(stats, noroute4);
1487 		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1488 		    ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata);
1489 		return (NAT64RETURN);
1490 	}
1491 	if (nh.nh_mtu < plen + sizeof(ip)) {
1492 		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu,
1493 		    stats, logdata);
1494 		return (NAT64RETURN);
1495 	}
1496 	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1497 	/* Convert checksums. */
1498 	switch (proto) {
1499 	case IPPROTO_TCP:
1500 		csum = &TCP(mtodo(m, hlen))->th_sum;
1501 		if (aport != 0) {
1502 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1503 			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
1504 			tcp->th_sport = aport;
1505 		}
1506 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1507 		break;
1508 	case IPPROTO_UDP:
1509 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1510 		if (aport != 0) {
1511 			struct udphdr *udp = UDP(mtodo(m, hlen));
1512 			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
1513 			udp->uh_sport = aport;
1514 		}
1515 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1516 		break;
1517 	case IPPROTO_ICMPV6:
1518 		/* Checksum in ICMPv6 covers pseudo header */
1519 		csum = &icmp6->icmp6_cksum;
1520 		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1521 		    IPPROTO_ICMPV6, 0));
1522 		/* Convert ICMPv6 types to ICMP */
1523 		proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1524 		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1525 			icmp6->icmp6_type = ICMP_ECHO;
1526 		else /* ICMP6_ECHO_REPLY */
1527 			icmp6->icmp6_type = ICMP_ECHOREPLY;
1528 		*csum = cksum_adjust(*csum, (uint16_t)proto,
1529 		    *(uint16_t *)icmp6);
1530 		if (aport != 0) {
1531 			uint16_t old_id = icmp6->icmp6_id;
1532 			icmp6->icmp6_id = aport;
1533 			*csum = cksum_adjust(*csum, old_id, aport);
1534 		}
1535 		break;
1536 	};
1537 
1538 	m_adj(m, hlen - sizeof(ip));
1539 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1540 	if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst, NULL,
1541 	    stats, logdata) == 0)
1542 		NAT64STAT_INC(stats, opcnt64);
1543 	return (NAT64RETURN);
1544 }
1545 
1546