xref: /freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c (revision 62ff619dcc3540659a319be71c9a489f1659e14a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_ipstealth.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/counter.h>
37 #include <sys/errno.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rmlock.h>
43 #include <sys/rwlock.h>
44 #include <sys/socket.h>
45 #include <sys/queue.h>
46 
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_pflog.h>
50 #include <net/pfil.h>
51 #include <net/netisr.h>
52 #include <net/route.h>
53 #include <net/route/nhop.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/in_fib.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip.h>
59 #include <netinet/ip_var.h>
60 #include <netinet/ip_fw.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/ip_icmp.h>
64 #include <netinet/tcp.h>
65 #include <netinet/udp.h>
66 #include <netinet6/in6_var.h>
67 #include <netinet6/in6_fib.h>
68 #include <netinet6/ip6_var.h>
69 #include <netinet6/ip_fw_nat64.h>
70 
71 #include <netpfil/pf/pf.h>
72 #include <netpfil/ipfw/ip_fw_private.h>
73 #include <machine/in_cksum.h>
74 
75 #include "ip_fw_nat64.h"
76 #include "nat64_translate.h"
77 
78 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *,
79     struct sockaddr *, struct nat64_counters *, void *);
80 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *,
81     void *);
82 
83 static struct nhop_object *nat64_find_route4(struct sockaddr_in *,
84     struct mbuf *);
85 static struct nhop_object *nat64_find_route6(struct sockaddr_in6 *,
86     struct mbuf *);
87 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *);
88 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *,
89     struct nat64_counters *, void *);
90 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *,
91     void *);
92 static int nat64_direct_output(struct ifnet *, struct mbuf *,
93     struct sockaddr *, struct nat64_counters *, void *);
94 
95 struct nat64_methods {
96 	nat64_output_t		output;
97 	nat64_output_one_t	output_one;
98 };
99 static const struct nat64_methods nat64_netisr = {
100 	.output = nat64_output,
101 	.output_one = nat64_output_one
102 };
103 static const struct nat64_methods nat64_direct = {
104 	.output = nat64_direct_output,
105 	.output_one = nat64_direct_output_one
106 };
107 
108 /* These variables should be initialized explicitly on module loading */
109 VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out);
110 VNET_DEFINE_STATIC(const int *, nat64ipstealth);
111 VNET_DEFINE_STATIC(const int *, nat64ip6stealth);
112 #define	V_nat64out		VNET(nat64out)
113 #define	V_nat64ipstealth	VNET(nat64ipstealth)
114 #define	V_nat64ip6stealth	VNET(nat64ip6stealth)
115 
116 static const int stealth_on = 1;
117 #ifndef IPSTEALTH
118 static const int stealth_off = 0;
119 #endif
120 
121 void
122 nat64_set_output_method(int direct)
123 {
124 
125 	if (direct != 0) {
126 		V_nat64out = &nat64_direct;
127 #ifdef IPSTEALTH
128 		/* Honor corresponding variables, if IPSTEALTH is defined */
129 		V_nat64ipstealth = &V_ipstealth;
130 		V_nat64ip6stealth = &V_ip6stealth;
131 #else
132 		/* otherwise we need to decrement HLIM/TTL for direct case */
133 		V_nat64ipstealth = V_nat64ip6stealth = &stealth_off;
134 #endif
135 	} else {
136 		V_nat64out = &nat64_netisr;
137 		/* Leave TTL/HLIM decrementing to forwarding code */
138 		V_nat64ipstealth = V_nat64ip6stealth = &stealth_on;
139 	}
140 }
141 
142 int
143 nat64_get_output_method(void)
144 {
145 
146 	return (V_nat64out == &nat64_direct ? 1: 0);
147 }
148 
149 static void
150 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
151 {
152 
153 	logdata->dir = PF_OUT;
154 	logdata->af = family;
155 	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
156 }
157 
158 static int
159 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
160     struct nat64_counters *stats, void *logdata)
161 {
162 	int error;
163 
164 	if (logdata != NULL)
165 		nat64_log(logdata, m, dst->sa_family);
166 	error = (*ifp->if_output)(ifp, m, dst, NULL);
167 	if (error != 0)
168 		NAT64STAT_INC(stats, oerrors);
169 	return (error);
170 }
171 
172 static int
173 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats,
174     void *logdata)
175 {
176 	struct nhop_object *nh4 = NULL;
177 	struct nhop_object *nh6 = NULL;
178 	struct sockaddr_in6 dst6;
179 	struct sockaddr_in dst4;
180 	struct sockaddr *dst;
181 	struct ip6_hdr *ip6;
182 	struct ip *ip4;
183 	struct ifnet *ifp;
184 	int error;
185 
186 	ip4 = mtod(m, struct ip *);
187 	error = 0;
188 	switch (ip4->ip_v) {
189 	case IPVERSION:
190 		dst4.sin_addr = ip4->ip_dst;
191 		nh4 = nat64_find_route4(&dst4, m);
192 		if (nh4 == NULL) {
193 			NAT64STAT_INC(stats, noroute4);
194 			error = EHOSTUNREACH;
195 		} else {
196 			ifp = nh4->nh_ifp;
197 			dst = (struct sockaddr *)&dst4;
198 		}
199 		break;
200 	case (IPV6_VERSION >> 4):
201 		ip6 = mtod(m, struct ip6_hdr *);
202 		dst6.sin6_addr = ip6->ip6_dst;
203 		nh6 = nat64_find_route6(&dst6, m);
204 		if (nh6 == NULL) {
205 			NAT64STAT_INC(stats, noroute6);
206 			error = EHOSTUNREACH;
207 		} else {
208 			ifp = nh6->nh_ifp;
209 			dst = (struct sockaddr *)&dst6;
210 		}
211 		break;
212 	default:
213 		m_freem(m);
214 		NAT64STAT_INC(stats, dropped);
215 		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
216 		return (EAFNOSUPPORT);
217 	}
218 	if (error != 0) {
219 		m_freem(m);
220 		return (EHOSTUNREACH);
221 	}
222 	if (logdata != NULL)
223 		nat64_log(logdata, m, dst->sa_family);
224 	error = (*ifp->if_output)(ifp, m, dst, NULL);
225 	if (error != 0)
226 		NAT64STAT_INC(stats, oerrors);
227 	return (error);
228 }
229 
230 static int
231 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
232     struct nat64_counters *stats, void *logdata)
233 {
234 	struct ip *ip4;
235 	int ret, af;
236 
237 	ip4 = mtod(m, struct ip *);
238 	switch (ip4->ip_v) {
239 	case IPVERSION:
240 		af = AF_INET;
241 		ret = NETISR_IP;
242 		break;
243 	case (IPV6_VERSION >> 4):
244 		af = AF_INET6;
245 		ret = NETISR_IPV6;
246 		break;
247 	default:
248 		m_freem(m);
249 		NAT64STAT_INC(stats, dropped);
250 		DPRINTF(DP_DROPS, "unknown IP version");
251 		return (EAFNOSUPPORT);
252 	}
253 	if (logdata != NULL)
254 		nat64_log(logdata, m, af);
255 	if (m->m_pkthdr.rcvif == NULL)
256 		m->m_pkthdr.rcvif = V_loif;
257 	ret = netisr_queue(ret, m);
258 	if (ret != 0)
259 		NAT64STAT_INC(stats, oerrors);
260 	return (ret);
261 }
262 
263 static int
264 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
265 {
266 
267 	return (nat64_output(NULL, m, NULL, stats, logdata));
268 }
269 
270 /*
271  * Check the given IPv6 prefix and length according to RFC6052:
272  *   The prefixes can only have one of the following lengths:
273  *   32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).
274  * Returns zero on success, otherwise EINVAL.
275  */
276 int
277 nat64_check_prefixlen(int length)
278 {
279 
280 	switch (length) {
281 	case 32:
282 	case 40:
283 	case 48:
284 	case 56:
285 	case 64:
286 	case 96:
287 		return (0);
288 	}
289 	return (EINVAL);
290 }
291 
292 int
293 nat64_check_prefix6(const struct in6_addr *prefix, int length)
294 {
295 
296 	if (nat64_check_prefixlen(length) != 0)
297 		return (EINVAL);
298 
299 	/* Well-known prefix has 96 prefix length */
300 	if (IN6_IS_ADDR_WKPFX(prefix) && length != 96)
301 		return (EINVAL);
302 
303 	/* Bits 64 to 71 must be set to zero */
304 	if (prefix->__u6_addr.__u6_addr8[8] != 0)
305 		return (EINVAL);
306 
307 	/* Some extra checks */
308 	if (IN6_IS_ADDR_MULTICAST(prefix) ||
309 	    IN6_IS_ADDR_UNSPECIFIED(prefix) ||
310 	    IN6_IS_ADDR_LOOPBACK(prefix))
311 		return (EINVAL);
312 	return (0);
313 }
314 
315 int
316 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)
317 {
318 
319 	if (cfg->flags & NAT64_ALLOW_PRIVATE)
320 		return (0);
321 
322 	/* WKPFX must not be used to represent non-global IPv4 addresses */
323 	if (cfg->flags & NAT64_WKPFX) {
324 		/* IN_PRIVATE */
325 		if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
326 		    (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
327 		    (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
328 			return (1);
329 		/*
330 		 * RFC 5735:
331 		 *  192.0.0.0/24 - reserved for IETF protocol assignments
332 		 *  192.88.99.0/24 - for use as 6to4 relay anycast addresses
333 		 *  198.18.0.0/15 - for use in benchmark tests
334 		 *  192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
335 		 *   in documentation and example code
336 		 */
337 		if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
338 		    (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
339 		    (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
340 		    (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
341 		    (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
342 		    (ia & htonl(0xffffff00)) == htonl(0xcb007100))
343 			return (1);
344 	}
345 	return (0);
346 }
347 
348 /*
349  * Embed @ia IPv4 address into @ip6 IPv6 address.
350  * Place to embedding determined from prefix length @plen.
351  */
352 void
353 nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia)
354 {
355 
356 	switch (plen) {
357 	case 32:
358 	case 96:
359 		ip6->s6_addr32[plen / 32] = ia;
360 		break;
361 	case 40:
362 	case 48:
363 	case 56:
364 		/*
365 		 * Preserve prefix bits.
366 		 * Since suffix bits should be zero and reserved for future
367 		 * use, we just overwrite the whole word, where they are.
368 		 */
369 		ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32);
370 #if BYTE_ORDER == BIG_ENDIAN
371 		ip6->s6_addr32[1] |= ia >> (plen % 32);
372 		ip6->s6_addr32[2] = ia << (24 - plen % 32);
373 #elif BYTE_ORDER == LITTLE_ENDIAN
374 		ip6->s6_addr32[1] |= ia << (plen % 32);
375 		ip6->s6_addr32[2] = ia >> (24 - plen % 32);
376 #endif
377 		break;
378 	case 64:
379 #if BYTE_ORDER == BIG_ENDIAN
380 		ip6->s6_addr32[2] = ia >> 8;
381 		ip6->s6_addr32[3] = ia << 24;
382 #elif BYTE_ORDER == LITTLE_ENDIAN
383 		ip6->s6_addr32[2] = ia << 8;
384 		ip6->s6_addr32[3] = ia >> 24;
385 #endif
386 		break;
387 	default:
388 		panic("Wrong plen: %d", plen);
389 	};
390 	/*
391 	 * Bits 64 to 71 of the address are reserved for compatibility
392 	 * with the host identifier format defined in the IPv6 addressing
393 	 * architecture [RFC4291]. These bits MUST be set to zero.
394 	 */
395 	ip6->s6_addr8[8] = 0;
396 }
397 
398 in_addr_t
399 nat64_extract_ip4(const struct in6_addr *ip6, int plen)
400 {
401 	in_addr_t ia;
402 
403 	/*
404 	 * According to RFC 6052 p2.2:
405 	 * IPv4-embedded IPv6 addresses are composed of a variable-length
406 	 * prefix, the embedded IPv4 address, and a variable length suffix.
407 	 * The suffix bits are reserved for future extensions and SHOULD
408 	 * be set to zero.
409 	 */
410 	switch (plen) {
411 	case 32:
412 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
413 			goto badip6;
414 		break;
415 	case 40:
416 		if (ip6->s6_addr32[3] != 0 ||
417 		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
418 			goto badip6;
419 		break;
420 	case 48:
421 		if (ip6->s6_addr32[3] != 0 ||
422 		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
423 			goto badip6;
424 		break;
425 	case 56:
426 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
427 			goto badip6;
428 		break;
429 	case 64:
430 		if (ip6->s6_addr8[8] != 0 ||
431 		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
432 			goto badip6;
433 	};
434 	switch (plen) {
435 	case 32:
436 	case 96:
437 		ia = ip6->s6_addr32[plen / 32];
438 		break;
439 	case 40:
440 	case 48:
441 	case 56:
442 #if BYTE_ORDER == BIG_ENDIAN
443 		ia = (ip6->s6_addr32[1] << (plen % 32)) |
444 		    (ip6->s6_addr32[2] >> (24 - plen % 32));
445 #elif BYTE_ORDER == LITTLE_ENDIAN
446 		ia = (ip6->s6_addr32[1] >> (plen % 32)) |
447 		    (ip6->s6_addr32[2] << (24 - plen % 32));
448 #endif
449 		break;
450 	case 64:
451 #if BYTE_ORDER == BIG_ENDIAN
452 		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
453 #elif BYTE_ORDER == LITTLE_ENDIAN
454 		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
455 #endif
456 		break;
457 	default:
458 		return (0);
459 	};
460 	if (nat64_check_ip4(ia) == 0)
461 		return (ia);
462 
463 	DPRINTF(DP_GENERIC | DP_DROPS,
464 	    "invalid destination address: %08x", ia);
465 	return (0);
466 badip6:
467 	DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");
468 	return (0);
469 }
470 
471 /*
472  * According to RFC 1624 the equation for incremental checksum update is:
473  *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
474  *	HC' = HC - ~m - m'	--	[Eqn. 4]
475  * So, when we are replacing IPv4 addresses to IPv6, we
476  * can assume, that new bytes previously were zeros, and vise versa -
477  * when we replacing IPv6 addresses to IPv4, now unused bytes become
478  * zeros. The payload length in pseudo header has bigger size, but one
479  * half of it should be zero. Using the equation 4 we get:
480  *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
481  *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
482  *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
483  *	  = HC - sum(~m[i] + m'[i])
484  *
485  * The function result should be used as follows:
486  *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
487  *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
488  */
489 static uint16_t
490 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
491 {
492 	uint32_t sum;
493 	uint16_t *p;
494 
495 	sum = ~ip->ip_src.s_addr >> 16;
496 	sum += ~ip->ip_src.s_addr & 0xffff;
497 	sum += ~ip->ip_dst.s_addr >> 16;
498 	sum += ~ip->ip_dst.s_addr & 0xffff;
499 
500 	for (p = (uint16_t *)&ip6->ip6_src;
501 	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
502 		sum += *p;
503 
504 	while (sum >> 16)
505 		sum = (sum & 0xffff) + (sum >> 16);
506 	return (sum);
507 }
508 
509 static void
510 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
511     uint16_t plen, uint8_t proto, struct ip *ip)
512 {
513 
514 	/* assume addresses are already initialized */
515 	ip->ip_v = IPVERSION;
516 	ip->ip_hl = sizeof(*ip) >> 2;
517 	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
518 	ip->ip_len = htons(sizeof(*ip) + plen);
519 	ip->ip_ttl = ip6->ip6_hlim;
520 	if (*V_nat64ip6stealth == 0)
521 		ip->ip_ttl -= IPV6_HLIMDEC;
522 	ip->ip_sum = 0;
523 	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
524 	ip_fillid(ip);
525 	if (frag != NULL) {
526 		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
527 		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
528 			ip->ip_off |= htons(IP_MF);
529 	} else {
530 		ip->ip_off = htons(IP_DF);
531 	}
532 	ip->ip_sum = in_cksum_hdr(ip);
533 }
534 
535 #define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
536 static NAT64NOINLINE int
537 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,
538     struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,
539     uint16_t ip_off)
540 {
541 	struct ip6_frag ip6f;
542 	struct mbuf *n;
543 	uint16_t hlen, len, offset;
544 	int plen;
545 
546 	plen = ntohs(ip6->ip6_plen);
547 	hlen = sizeof(struct ip6_hdr);
548 
549 	/* Fragmentation isn't needed */
550 	if (ip_off == 0 && plen <= mtu - hlen) {
551 		M_PREPEND(m, hlen, M_NOWAIT);
552 		if (m == NULL) {
553 			NAT64STAT_INC(stats, nomem);
554 			return (ENOMEM);
555 		}
556 		bcopy(ip6, mtod(m, void *), hlen);
557 		if (mbufq_enqueue(mq, m) != 0) {
558 			m_freem(m);
559 			NAT64STAT_INC(stats, dropped);
560 			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
561 			return (ENOBUFS);
562 		}
563 		return (0);
564 	}
565 
566 	hlen += sizeof(struct ip6_frag);
567 	ip6f.ip6f_reserved = 0;
568 	ip6f.ip6f_nxt = ip6->ip6_nxt;
569 	ip6->ip6_nxt = IPPROTO_FRAGMENT;
570 	if (ip_off != 0) {
571 		/*
572 		 * We have got an IPv4 fragment.
573 		 * Use offset value and ip_id from original fragment.
574 		 */
575 		ip6f.ip6f_ident = htonl(ntohs(ip_id));
576 		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
577 		NAT64STAT_INC(stats, ifrags);
578 	} else {
579 		/* The packet size exceeds interface MTU */
580 		ip6f.ip6f_ident = htonl(ip6_randomid());
581 		offset = 0; /* First fragment*/
582 	}
583 	while (plen > 0 && m != NULL) {
584 		n = NULL;
585 		len = FRAGSZ(mtu) & ~7;
586 		if (len > plen)
587 			len = plen;
588 		ip6->ip6_plen = htons(len + sizeof(ip6f));
589 		ip6f.ip6f_offlg = ntohs(offset);
590 		if (len < plen || (ip_off & htons(IP_MF)) != 0)
591 			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
592 		offset += len;
593 		plen -= len;
594 		if (plen > 0) {
595 			n = m_split(m, len, M_NOWAIT);
596 			if (n == NULL)
597 				goto fail;
598 		}
599 		M_PREPEND(m, hlen, M_NOWAIT);
600 		if (m == NULL)
601 			goto fail;
602 		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
603 		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
604 		    sizeof(struct ip6_frag));
605 		if (mbufq_enqueue(mq, m) != 0)
606 			goto fail;
607 		m = n;
608 	}
609 	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
610 	return (0);
611 fail:
612 	if (m != NULL)
613 		m_freem(m);
614 	if (n != NULL)
615 		m_freem(n);
616 	mbufq_drain(mq);
617 	NAT64STAT_INC(stats, nomem);
618 	return (ENOMEM);
619 }
620 
621 static struct nhop_object *
622 nat64_find_route6(struct sockaddr_in6 *dst, struct mbuf *m)
623 {
624 	struct nhop_object *nh;
625 
626 	NET_EPOCH_ASSERT();
627 	nh = fib6_lookup(M_GETFIB(m), &dst->sin6_addr, 0, NHR_NONE, 0);
628 	if (nh == NULL)
629 		return (NULL);
630 	if (nh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
631 		return (NULL);
632 
633 	dst->sin6_family = AF_INET6;
634 	dst->sin6_len = sizeof(*dst);
635 	if (nh->nh_flags & NHF_GATEWAY)
636 		dst->sin6_addr = nh->gw6_sa.sin6_addr;
637 	dst->sin6_port = 0;
638 	dst->sin6_scope_id = 0;
639 	dst->sin6_flowinfo = 0;
640 	return (nh);
641 }
642 
643 #define	NAT64_ICMP6_PLEN	64
644 static NAT64NOINLINE void
645 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
646     struct nat64_counters *stats, void *logdata)
647 {
648 	struct icmp6_hdr *icmp6;
649 	struct ip6_hdr *ip6, *oip6;
650 	struct mbuf *n;
651 	int len, plen, proto;
652 
653 	len = 0;
654 	proto = nat64_getlasthdr(m, &len);
655 	if (proto < 0) {
656 		DPRINTF(DP_DROPS, "mbuf isn't contigious");
657 		goto freeit;
658 	}
659 	/*
660 	 * Do not send ICMPv6 in reply to ICMPv6 errors.
661 	 */
662 	if (proto == IPPROTO_ICMPV6) {
663 		if (m->m_len < len + sizeof(*icmp6)) {
664 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
665 			goto freeit;
666 		}
667 		icmp6 = mtodo(m, len);
668 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
669 		    icmp6->icmp6_type == ND_REDIRECT) {
670 			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
671 			    "ICMPv6 errors");
672 			goto freeit;
673 		}
674 		/*
675 		 * If there are extra headers between IPv6 and ICMPv6,
676 		 * strip off them.
677 		 */
678 		if (len > sizeof(struct ip6_hdr)) {
679 			/*
680 			 * NOTE: ipfw_chk already did m_pullup() and it is
681 			 * expected that data is contigious from the start
682 			 * of IPv6 header up to the end of ICMPv6 header.
683 			 */
684 			bcopy(mtod(m, caddr_t),
685 			    mtodo(m, len - sizeof(struct ip6_hdr)),
686 			    sizeof(struct ip6_hdr));
687 			m_adj(m, len - sizeof(struct ip6_hdr));
688 		}
689 	}
690 	/*
691 	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
692 		goto freeit;
693 		*/
694 	ip6 = mtod(m, struct ip6_hdr *);
695 	switch (type) {
696 	case ICMP6_DST_UNREACH:
697 	case ICMP6_PACKET_TOO_BIG:
698 	case ICMP6_TIME_EXCEEDED:
699 	case ICMP6_PARAM_PROB:
700 		break;
701 	default:
702 		goto freeit;
703 	}
704 	/* Calculate length of ICMPv6 payload */
705 	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
706 	    m->m_pkthdr.len;
707 
708 	/* Create new ICMPv6 datagram */
709 	plen = len + sizeof(struct icmp6_hdr);
710 	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
711 	    MT_HEADER, M_PKTHDR);
712 	if (n == NULL) {
713 		NAT64STAT_INC(stats, nomem);
714 		m_freem(m);
715 		return;
716 	}
717 	/*
718 	 * Move pkthdr from original mbuf. We should have initialized some
719 	 * fields, because we can reinject this mbuf to netisr and it will
720 	 * go through input path (it requires at least rcvif should be set).
721 	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
722 	 * in the chain, when we will do M_PREPEND() or make some type of
723 	 * tunneling.
724 	 */
725 	m_move_pkthdr(n, m);
726 	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
727 
728 	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
729 	oip6 = mtod(n, struct ip6_hdr *);
730 	/*
731 	 * Make IPv6 source address selection for reflected datagram.
732 	 * nat64_check_ip6() doesn't allow scoped addresses, therefore
733 	 * we use zero scopeid.
734 	 */
735 	if (in6_selectsrc_addr(M_GETFIB(n), &ip6->ip6_src, 0,
736 	    n->m_pkthdr.rcvif, &oip6->ip6_src, NULL) != 0) {
737 		/*
738 		 * Failed to find proper source address, drop the packet.
739 		 */
740 		m_freem(n);
741 		goto freeit;
742 	}
743 	oip6->ip6_dst = ip6->ip6_src;
744 	oip6->ip6_nxt = IPPROTO_ICMPV6;
745 	oip6->ip6_flow = 0;
746 	oip6->ip6_vfc |= IPV6_VERSION;
747 	oip6->ip6_hlim = V_ip6_defhlim;
748 	oip6->ip6_plen = htons(plen);
749 
750 	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
751 	icmp6->icmp6_cksum = 0;
752 	icmp6->icmp6_type = type;
753 	icmp6->icmp6_code = code;
754 	icmp6->icmp6_mtu = htonl(mtu);
755 
756 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
757 	    sizeof(struct icmp6_hdr)));
758 	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
759 	    sizeof(struct ip6_hdr), plen);
760 	m_freem(m);
761 	V_nat64out->output_one(n, stats, logdata);
762 	return;
763 freeit:
764 	NAT64STAT_INC(stats, dropped);
765 	m_freem(m);
766 }
767 
768 static struct nhop_object *
769 nat64_find_route4(struct sockaddr_in *dst, struct mbuf *m)
770 {
771 	struct nhop_object *nh;
772 
773 	NET_EPOCH_ASSERT();
774 	nh = fib4_lookup(M_GETFIB(m), dst->sin_addr, 0, NHR_NONE, 0);
775 	if (nh == NULL)
776 		return (NULL);
777 	if (nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
778 		return (NULL);
779 
780 	dst->sin_family = AF_INET;
781 	dst->sin_len = sizeof(*dst);
782 	if (nh->nh_flags & NHF_GATEWAY)
783 		dst->sin_addr = nh->gw4_sa.sin_addr;
784 	dst->sin_port = 0;
785 	return (nh);
786 }
787 
788 #define	NAT64_ICMP_PLEN	64
789 static NAT64NOINLINE void
790 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
791     uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)
792 {
793 	struct icmp *icmp;
794 	struct ip *ip, *oip;
795 	struct mbuf *n;
796 	int len, plen;
797 
798 	ip = mtod(m, struct ip *);
799 	/* Do not send ICMP error if packet is not the first fragment */
800 	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
801 		DPRINTF(DP_DROPS, "not first fragment");
802 		goto freeit;
803 	}
804 	/* Do not send ICMP in reply to ICMP errors */
805 	if (ip->ip_p == IPPROTO_ICMP) {
806 		if (m->m_len < (ip->ip_hl << 2)) {
807 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
808 			goto freeit;
809 		}
810 		icmp = mtodo(m, ip->ip_hl << 2);
811 		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
812 			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
813 			    "ICMP errors");
814 			goto freeit;
815 		}
816 	}
817 	switch (type) {
818 	case ICMP_UNREACH:
819 	case ICMP_TIMXCEED:
820 	case ICMP_PARAMPROB:
821 		break;
822 	default:
823 		goto freeit;
824 	}
825 	/* Calculate length of ICMP payload */
826 	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
827 	    m->m_pkthdr.len;
828 
829 	/* Create new ICMPv4 datagram */
830 	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
831 	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
832 	    MT_HEADER, M_PKTHDR);
833 	if (n == NULL) {
834 		NAT64STAT_INC(stats, nomem);
835 		m_freem(m);
836 		return;
837 	}
838 	m_move_pkthdr(n, m);
839 	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
840 
841 	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
842 	oip = mtod(n, struct ip *);
843 	oip->ip_v = IPVERSION;
844 	oip->ip_hl = sizeof(struct ip) >> 2;
845 	oip->ip_tos = 0;
846 	oip->ip_len = htons(n->m_pkthdr.len);
847 	oip->ip_ttl = V_ip_defttl;
848 	oip->ip_p = IPPROTO_ICMP;
849 	ip_fillid(oip);
850 	oip->ip_off = htons(IP_DF);
851 	oip->ip_src = ip->ip_dst;
852 	oip->ip_dst = ip->ip_src;
853 	oip->ip_sum = 0;
854 	oip->ip_sum = in_cksum_hdr(oip);
855 
856 	icmp = mtodo(n, sizeof(struct ip));
857 	icmp->icmp_type = type;
858 	icmp->icmp_code = code;
859 	icmp->icmp_cksum = 0;
860 	icmp->icmp_pmvoid = 0;
861 	icmp->icmp_nextmtu = htons(mtu);
862 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
863 	    sizeof(struct icmphdr) + sizeof(uint32_t)));
864 	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
865 	    sizeof(struct ip));
866 	m_freem(m);
867 	V_nat64out->output_one(n, stats, logdata);
868 	return;
869 freeit:
870 	NAT64STAT_INC(stats, dropped);
871 	m_freem(m);
872 }
873 
874 /* Translate ICMP echo request/reply into ICMPv6 */
875 static void
876 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
877     uint16_t id, uint8_t type)
878 {
879 	uint16_t old;
880 
881 	old = *(uint16_t *)icmp6;	/* save type+code in one word */
882 	icmp6->icmp6_type = type;
883 	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
884 	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
885 	    old, *(uint16_t *)icmp6);
886 	if (id != 0) {
887 		old = icmp6->icmp6_id;
888 		icmp6->icmp6_id = id;
889 		/* Reflect ICMP id translation in the cksum */
890 		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
891 		    old, id);
892 	}
893 	/* Reflect IPv6 pseudo header in the cksum */
894 	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
895 	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
896 }
897 
898 static NAT64NOINLINE struct mbuf *
899 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
900     int offset, struct nat64_config *cfg)
901 {
902 	struct ip ip;
903 	struct icmp *icmp;
904 	struct tcphdr *tcp;
905 	struct udphdr *udp;
906 	struct ip6_hdr *eip6;
907 	struct mbuf *n;
908 	uint32_t mtu;
909 	int len, hlen, plen;
910 	uint8_t type, code;
911 
912 	if (m->m_len < offset + ICMP_MINLEN)
913 		m = m_pullup(m, offset + ICMP_MINLEN);
914 	if (m == NULL) {
915 		NAT64STAT_INC(&cfg->stats, nomem);
916 		return (m);
917 	}
918 	mtu = 0;
919 	icmp = mtodo(m, offset);
920 	/* RFC 7915 p4.2 */
921 	switch (icmp->icmp_type) {
922 	case ICMP_ECHOREPLY:
923 		type = ICMP6_ECHO_REPLY;
924 		code = 0;
925 		break;
926 	case ICMP_UNREACH:
927 		type = ICMP6_DST_UNREACH;
928 		switch (icmp->icmp_code) {
929 		case ICMP_UNREACH_NET:
930 		case ICMP_UNREACH_HOST:
931 		case ICMP_UNREACH_SRCFAIL:
932 		case ICMP_UNREACH_NET_UNKNOWN:
933 		case ICMP_UNREACH_HOST_UNKNOWN:
934 		case ICMP_UNREACH_TOSNET:
935 		case ICMP_UNREACH_TOSHOST:
936 			code = ICMP6_DST_UNREACH_NOROUTE;
937 			break;
938 		case ICMP_UNREACH_PROTOCOL:
939 			type = ICMP6_PARAM_PROB;
940 			code = ICMP6_PARAMPROB_NEXTHEADER;
941 			break;
942 		case ICMP_UNREACH_PORT:
943 			code = ICMP6_DST_UNREACH_NOPORT;
944 			break;
945 		case ICMP_UNREACH_NEEDFRAG:
946 			type = ICMP6_PACKET_TOO_BIG;
947 			code = 0;
948 			/* XXX: needs an additional look */
949 			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
950 			break;
951 		case ICMP_UNREACH_NET_PROHIB:
952 		case ICMP_UNREACH_HOST_PROHIB:
953 		case ICMP_UNREACH_FILTER_PROHIB:
954 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
955 			code = ICMP6_DST_UNREACH_ADMIN;
956 			break;
957 		default:
958 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
959 			    icmp->icmp_type, icmp->icmp_code);
960 			goto freeit;
961 		}
962 		break;
963 	case ICMP_TIMXCEED:
964 		type = ICMP6_TIME_EXCEEDED;
965 		code = icmp->icmp_code;
966 		break;
967 	case ICMP_ECHO:
968 		type = ICMP6_ECHO_REQUEST;
969 		code = 0;
970 		break;
971 	case ICMP_PARAMPROB:
972 		type = ICMP6_PARAM_PROB;
973 		switch (icmp->icmp_code) {
974 		case ICMP_PARAMPROB_ERRATPTR:
975 		case ICMP_PARAMPROB_LENGTH:
976 			code = ICMP6_PARAMPROB_HEADER;
977 			switch (icmp->icmp_pptr) {
978 			case 0: /* Version/IHL */
979 			case 1: /* Type Of Service */
980 				mtu = icmp->icmp_pptr;
981 				break;
982 			case 2: /* Total Length */
983 			case 3: mtu = 4; /* Payload Length */
984 				break;
985 			case 8: /* Time to Live */
986 				mtu = 7; /* Hop Limit */
987 				break;
988 			case 9: /* Protocol */
989 				mtu = 6; /* Next Header */
990 				break;
991 			case 12: /* Source address */
992 			case 13:
993 			case 14:
994 			case 15:
995 				mtu = 8;
996 				break;
997 			case 16: /* Destination address */
998 			case 17:
999 			case 18:
1000 			case 19:
1001 				mtu = 24;
1002 				break;
1003 			default: /* Silently drop */
1004 				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
1005 				    " code %d, pptr %d", icmp->icmp_type,
1006 				    icmp->icmp_code, icmp->icmp_pptr);
1007 				goto freeit;
1008 			}
1009 			break;
1010 		default:
1011 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
1012 			    " code %d, pptr %d", icmp->icmp_type,
1013 			    icmp->icmp_code, icmp->icmp_pptr);
1014 			goto freeit;
1015 		}
1016 		break;
1017 	default:
1018 		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
1019 		    icmp->icmp_type, icmp->icmp_code);
1020 		goto freeit;
1021 	}
1022 	/*
1023 	 * For echo request/reply we can use original payload,
1024 	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
1025 	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
1026 	 */
1027 	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
1028 		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
1029 		return (m);
1030 	}
1031 	/*
1032 	 * For other types of ICMP messages we need to translate inner
1033 	 * IPv4 header to IPv6 header.
1034 	 * Assume ICMP src is the same as payload dst
1035 	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
1036 	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
1037 	 * In that case, we already have map for NATIP1 and GWsrc1.
1038 	 * The only thing we need is to copy IPv6 map prefix to
1039 	 * Hostdst1.
1040 	 */
1041 	hlen = offset + ICMP_MINLEN;
1042 	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
1043 		DPRINTF(DP_DROPS, "Message is too short %d",
1044 		    m->m_pkthdr.len);
1045 		goto freeit;
1046 	}
1047 	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
1048 	if (ip.ip_v != IPVERSION) {
1049 		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
1050 		goto freeit;
1051 	}
1052 	hlen += ip.ip_hl << 2; /* Skip inner IP header */
1053 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
1054 	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
1055 	    nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||
1056 	    nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {
1057 		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
1058 		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
1059 		goto freeit;
1060 	}
1061 	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1062 		DPRINTF(DP_DROPS, "Message is too short %d",
1063 		    m->m_pkthdr.len);
1064 		goto freeit;
1065 	}
1066 #if 0
1067 	/*
1068 	 * Check that inner source matches the outer destination.
1069 	 * XXX: We need some method to convert IPv4 into IPv6 address here,
1070 	 *	and compare IPv6 addresses.
1071 	 */
1072 	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
1073 		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
1074 		    "%04x vs %04x", ip.ip_src.s_addr,
1075 		    nat64_get_ip4(&ip6->ip6_dst));
1076 		goto freeit;
1077 	}
1078 #endif
1079 	/*
1080 	 * Create new mbuf for ICMPv6 datagram.
1081 	 * NOTE: len is data length just after inner IP header.
1082 	 */
1083 	len = m->m_pkthdr.len - hlen;
1084 	if (sizeof(struct ip6_hdr) +
1085 	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
1086 		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
1087 		    sizeof(struct ip6_hdr);
1088 	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
1089 	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
1090 	if (n == NULL) {
1091 		NAT64STAT_INC(&cfg->stats, nomem);
1092 		m_freem(m);
1093 		return (NULL);
1094 	}
1095 	m_move_pkthdr(n, m);
1096 	M_ALIGN(n, offset + plen + max_hdr);
1097 	n->m_len = n->m_pkthdr.len = offset + plen;
1098 	/* Adjust ip6_plen in outer header */
1099 	ip6->ip6_plen = htons(plen);
1100 	/* Construct new inner IPv6 header */
1101 	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
1102 	eip6->ip6_src = ip6->ip6_dst;
1103 
1104 	/* Use the same prefix that we have in outer header */
1105 	eip6->ip6_dst = ip6->ip6_src;
1106 	MPASS(cfg->flags & NAT64_PLATPFX);
1107 	nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr);
1108 
1109 	eip6->ip6_flow = htonl(ip.ip_tos << 20);
1110 	eip6->ip6_vfc |= IPV6_VERSION;
1111 	eip6->ip6_hlim = ip.ip_ttl;
1112 	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
1113 	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
1114 	m_copydata(m, hlen, len, (char *)(eip6 + 1));
1115 	/*
1116 	 * We need to translate source port in the inner ULP header,
1117 	 * and adjust ULP checksum.
1118 	 */
1119 	switch (ip.ip_p) {
1120 	case IPPROTO_TCP:
1121 		if (len < offsetof(struct tcphdr, th_sum))
1122 			break;
1123 		tcp = TCP(eip6 + 1);
1124 		if (icmpid != 0) {
1125 			tcp->th_sum = cksum_adjust(tcp->th_sum,
1126 			    tcp->th_sport, icmpid);
1127 			tcp->th_sport = icmpid;
1128 		}
1129 		tcp->th_sum = cksum_add(tcp->th_sum,
1130 		    ~nat64_cksum_convert(eip6, &ip));
1131 		break;
1132 	case IPPROTO_UDP:
1133 		if (len < offsetof(struct udphdr, uh_sum))
1134 			break;
1135 		udp = UDP(eip6 + 1);
1136 		if (icmpid != 0) {
1137 			udp->uh_sum = cksum_adjust(udp->uh_sum,
1138 			    udp->uh_sport, icmpid);
1139 			udp->uh_sport = icmpid;
1140 		}
1141 		udp->uh_sum = cksum_add(udp->uh_sum,
1142 		    ~nat64_cksum_convert(eip6, &ip));
1143 		break;
1144 	case IPPROTO_ICMP:
1145 		/*
1146 		 * Check if this is an ICMP error message for echo request
1147 		 * that we sent. I.e. ULP in the data containing invoking
1148 		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
1149 		 */
1150 		icmp = (struct icmp *)(eip6 + 1);
1151 		if (icmp->icmp_type != ICMP_ECHO) {
1152 			m_freem(n);
1153 			goto freeit;
1154 		}
1155 		/*
1156 		 * For our client this original datagram should looks
1157 		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
1158 		 * Thus we need adjust icmp_cksum and convert type from
1159 		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
1160 		 */
1161 		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
1162 		    ICMP6_ECHO_REQUEST);
1163 	}
1164 	m_freem(m);
1165 	/* Convert ICMPv4 into ICMPv6 header */
1166 	icmp = mtodo(n, offset);
1167 	ICMP6(icmp)->icmp6_type = type;
1168 	ICMP6(icmp)->icmp6_code = code;
1169 	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1170 	ICMP6(icmp)->icmp6_cksum = 0;
1171 	ICMP6(icmp)->icmp6_cksum = cksum_add(
1172 	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1173 	    in_cksum_skip(n, n->m_pkthdr.len, offset));
1174 	return (n);
1175 freeit:
1176 	m_freem(m);
1177 	NAT64STAT_INC(&cfg->stats, dropped);
1178 	return (NULL);
1179 }
1180 
1181 int
1182 nat64_getlasthdr(struct mbuf *m, int *offset)
1183 {
1184 	struct ip6_hdr *ip6;
1185 	struct ip6_hbh *hbh;
1186 	int proto, hlen;
1187 
1188 	if (offset != NULL)
1189 		hlen = *offset;
1190 	else
1191 		hlen = 0;
1192 
1193 	if (m->m_len < hlen + sizeof(*ip6))
1194 		return (-1);
1195 
1196 	ip6 = mtodo(m, hlen);
1197 	hlen += sizeof(*ip6);
1198 	proto = ip6->ip6_nxt;
1199 	/* Skip extension headers */
1200 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1201 	    proto == IPPROTO_DSTOPTS) {
1202 		hbh = mtodo(m, hlen);
1203 		/*
1204 		 * We expect mbuf has contigious data up to
1205 		 * upper level header.
1206 		 */
1207 		if (m->m_len < hlen)
1208 			return (-1);
1209 		/*
1210 		 * We doesn't support Jumbo payload option,
1211 		 * so return error.
1212 		 */
1213 		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1214 			return (-1);
1215 		proto = hbh->ip6h_nxt;
1216 		hlen += (hbh->ip6h_len + 1) << 3;
1217 	}
1218 	if (offset != NULL)
1219 		*offset = hlen;
1220 	return (proto);
1221 }
1222 
1223 int
1224 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1225     struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,
1226     void *logdata)
1227 {
1228 	struct nhop_object *nh;
1229 	struct ip6_hdr ip6;
1230 	struct sockaddr_in6 dst;
1231 	struct ip *ip;
1232 	struct mbufq mq;
1233 	uint16_t ip_id, ip_off;
1234 	uint16_t *csum;
1235 	int plen, hlen;
1236 	uint8_t proto;
1237 
1238 	ip = mtod(m, struct ip*);
1239 
1240 	if (*V_nat64ipstealth == 0 && ip->ip_ttl <= IPTTLDEC) {
1241 		nat64_icmp_reflect(m, ICMP_TIMXCEED,
1242 		    ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);
1243 		return (NAT64RETURN);
1244 	}
1245 
1246 	ip6.ip6_dst = *daddr;
1247 	ip6.ip6_src = *saddr;
1248 
1249 	hlen = ip->ip_hl << 2;
1250 	plen = ntohs(ip->ip_len) - hlen;
1251 	proto = ip->ip_p;
1252 
1253 	/* Save ip_id and ip_off, both are in network byte order */
1254 	ip_id = ip->ip_id;
1255 	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1256 
1257 	/* Fragment length must be multiple of 8 octets */
1258 	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1259 		nat64_icmp_reflect(m, ICMP_PARAMPROB,
1260 		    ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);
1261 		return (NAT64RETURN);
1262 	}
1263 	/* Fragmented ICMP is unsupported */
1264 	if (proto == IPPROTO_ICMP && ip_off != 0) {
1265 		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1266 		NAT64STAT_INC(&cfg->stats, dropped);
1267 		return (NAT64MFREE);
1268 	}
1269 
1270 	dst.sin6_addr = ip6.ip6_dst;
1271 	nh = nat64_find_route6(&dst, m);
1272 	if (nh == NULL) {
1273 		NAT64STAT_INC(&cfg->stats, noroute6);
1274 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1275 		    &cfg->stats, logdata);
1276 		return (NAT64RETURN);
1277 	}
1278 	if (nh->nh_mtu < plen + sizeof(ip6) &&
1279 	    (ip->ip_off & htons(IP_DF)) != 0) {
1280 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1281 		    FRAGSZ(nh->nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
1282 		return (NAT64RETURN);
1283 	}
1284 
1285 	ip6.ip6_flow = htonl(ip->ip_tos << 20);
1286 	ip6.ip6_vfc |= IPV6_VERSION;
1287 	ip6.ip6_hlim = ip->ip_ttl;
1288 	if (*V_nat64ipstealth == 0)
1289 		ip6.ip6_hlim -= IPTTLDEC;
1290 	ip6.ip6_plen = htons(plen);
1291 	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1292 
1293 	/* Handle delayed checksums if needed. */
1294 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1295 		in_delayed_cksum(m);
1296 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1297 	}
1298 	/* Convert checksums. */
1299 	switch (proto) {
1300 	case IPPROTO_TCP:
1301 		csum = &TCP(mtodo(m, hlen))->th_sum;
1302 		if (lport != 0) {
1303 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1304 			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
1305 			tcp->th_dport = lport;
1306 		}
1307 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1308 		break;
1309 	case IPPROTO_UDP:
1310 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1311 		if (lport != 0) {
1312 			struct udphdr *udp = UDP(mtodo(m, hlen));
1313 			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
1314 			udp->uh_dport = lport;
1315 		}
1316 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1317 		break;
1318 	case IPPROTO_ICMP:
1319 		m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);
1320 		if (m == NULL)	/* stats already accounted */
1321 			return (NAT64RETURN);
1322 	}
1323 
1324 	m_adj(m, hlen);
1325 	mbufq_init(&mq, 255);
1326 	nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh->nh_mtu, ip_id, ip_off);
1327 	while ((m = mbufq_dequeue(&mq)) != NULL) {
1328 		if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,
1329 		    &cfg->stats, logdata) != 0)
1330 			break;
1331 		NAT64STAT_INC(&cfg->stats, opcnt46);
1332 	}
1333 	mbufq_drain(&mq);
1334 	return (NAT64RETURN);
1335 }
1336 
1337 int
1338 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1339     struct nat64_config *cfg, void *logdata)
1340 {
1341 	struct ip ip;
1342 	struct icmp6_hdr *icmp6;
1343 	struct ip6_frag *ip6f;
1344 	struct ip6_hdr *ip6, *ip6i;
1345 	uint32_t mtu;
1346 	int plen, proto;
1347 	uint8_t type, code;
1348 
1349 	if (hlen == 0) {
1350 		ip6 = mtod(m, struct ip6_hdr *);
1351 		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1352 		    nat64_check_ip6(&ip6->ip6_dst) != 0)
1353 			return (NAT64SKIP);
1354 
1355 		proto = nat64_getlasthdr(m, &hlen);
1356 		if (proto != IPPROTO_ICMPV6) {
1357 			DPRINTF(DP_DROPS,
1358 			    "dropped due to mbuf isn't contigious");
1359 			NAT64STAT_INC(&cfg->stats, dropped);
1360 			return (NAT64MFREE);
1361 		}
1362 	}
1363 
1364 	/*
1365 	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1366 	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1367 	 */
1368 	icmp6 = mtodo(m, hlen);
1369 	mtu = 0;
1370 	switch (icmp6->icmp6_type) {
1371 	case ICMP6_DST_UNREACH:
1372 		type = ICMP_UNREACH;
1373 		switch (icmp6->icmp6_code) {
1374 		case ICMP6_DST_UNREACH_NOROUTE:
1375 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1376 		case ICMP6_DST_UNREACH_ADDR:
1377 			code = ICMP_UNREACH_HOST;
1378 			break;
1379 		case ICMP6_DST_UNREACH_ADMIN:
1380 			code = ICMP_UNREACH_HOST_PROHIB;
1381 			break;
1382 		case ICMP6_DST_UNREACH_NOPORT:
1383 			code = ICMP_UNREACH_PORT;
1384 			break;
1385 		default:
1386 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1387 			    " code %d", icmp6->icmp6_type,
1388 			    icmp6->icmp6_code);
1389 			NAT64STAT_INC(&cfg->stats, dropped);
1390 			return (NAT64MFREE);
1391 		}
1392 		break;
1393 	case ICMP6_PACKET_TOO_BIG:
1394 		type = ICMP_UNREACH;
1395 		code = ICMP_UNREACH_NEEDFRAG;
1396 		mtu = ntohl(icmp6->icmp6_mtu);
1397 		if (mtu < IPV6_MMTU) {
1398 			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1399 			    " code %d", mtu, icmp6->icmp6_type,
1400 			    icmp6->icmp6_code);
1401 			NAT64STAT_INC(&cfg->stats, dropped);
1402 			return (NAT64MFREE);
1403 		}
1404 		/*
1405 		 * Adjust MTU to reflect difference between
1406 		 * IPv6 an IPv4 headers.
1407 		 */
1408 		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1409 		break;
1410 	case ICMP6_TIME_EXCEEDED:
1411 		type = ICMP_TIMXCEED;
1412 		code = icmp6->icmp6_code;
1413 		break;
1414 	case ICMP6_PARAM_PROB:
1415 		switch (icmp6->icmp6_code) {
1416 		case ICMP6_PARAMPROB_HEADER:
1417 			type = ICMP_PARAMPROB;
1418 			code = ICMP_PARAMPROB_ERRATPTR;
1419 			mtu = ntohl(icmp6->icmp6_pptr);
1420 			switch (mtu) {
1421 			case 0: /* Version/Traffic Class */
1422 			case 1: /* Traffic Class/Flow Label */
1423 				break;
1424 			case 4: /* Payload Length */
1425 			case 5:
1426 				mtu = 2;
1427 				break;
1428 			case 6: /* Next Header */
1429 				mtu = 9;
1430 				break;
1431 			case 7: /* Hop Limit */
1432 				mtu = 8;
1433 				break;
1434 			default:
1435 				if (mtu >= 8 && mtu <= 23) {
1436 					mtu = 12; /* Source address */
1437 					break;
1438 				}
1439 				if (mtu >= 24 && mtu <= 39) {
1440 					mtu = 16; /* Destination address */
1441 					break;
1442 				}
1443 				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1444 				    " code %d, pptr %d", icmp6->icmp6_type,
1445 				    icmp6->icmp6_code, mtu);
1446 				NAT64STAT_INC(&cfg->stats, dropped);
1447 				return (NAT64MFREE);
1448 			}
1449 		case ICMP6_PARAMPROB_NEXTHEADER:
1450 			type = ICMP_UNREACH;
1451 			code = ICMP_UNREACH_PROTOCOL;
1452 			break;
1453 		default:
1454 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1455 			    " code %d, pptr %d", icmp6->icmp6_type,
1456 			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1457 			NAT64STAT_INC(&cfg->stats, dropped);
1458 			return (NAT64MFREE);
1459 		}
1460 		break;
1461 	default:
1462 		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1463 		    icmp6->icmp6_type, icmp6->icmp6_code);
1464 		NAT64STAT_INC(&cfg->stats, dropped);
1465 		return (NAT64MFREE);
1466 	}
1467 
1468 	hlen += sizeof(struct icmp6_hdr);
1469 	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1470 		NAT64STAT_INC(&cfg->stats, dropped);
1471 		DPRINTF(DP_DROPS, "Message is too short %d",
1472 		    m->m_pkthdr.len);
1473 		return (NAT64MFREE);
1474 	}
1475 	/*
1476 	 * We need at least ICMP_MINLEN bytes of original datagram payload
1477 	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1478 	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1479 	 * header we will not have to do m_pullup() again.
1480 	 *
1481 	 * What we have here:
1482 	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1483 	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1484 	 * We need to translate it to:
1485 	 *
1486 	 * Outer header: (alias_host, v4exthost)
1487 	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1488 	 *
1489 	 * Assume caller function has checked if v4mapPRefix+v4host
1490 	 * matches configured prefix.
1491 	 * The only two things we should be provided with are mapping between
1492 	 * IPv6iHost <> alias_host and between dport and alias_port.
1493 	 */
1494 	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1495 		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1496 	if (m == NULL) {
1497 		NAT64STAT_INC(&cfg->stats, nomem);
1498 		return (NAT64RETURN);
1499 	}
1500 	ip6 = mtod(m, struct ip6_hdr *);
1501 	ip6i = mtodo(m, hlen);
1502 	ip6f = NULL;
1503 	proto = ip6i->ip6_nxt;
1504 	plen = ntohs(ip6i->ip6_plen);
1505 	hlen += sizeof(struct ip6_hdr);
1506 	if (proto == IPPROTO_FRAGMENT) {
1507 		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1508 		    ICMP_MINLEN)
1509 			goto fail;
1510 		ip6f = mtodo(m, hlen);
1511 		proto = ip6f->ip6f_nxt;
1512 		plen -= sizeof(struct ip6_frag);
1513 		hlen += sizeof(struct ip6_frag);
1514 		/* Ajust MTU to reflect frag header size */
1515 		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1516 			mtu -= sizeof(struct ip6_frag);
1517 	}
1518 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1519 		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1520 		    proto);
1521 		goto fail;
1522 	}
1523 	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1524 	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1525 		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1526 		goto fail;
1527 	}
1528 	/* Check if outer dst is the same as inner src */
1529 	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1530 		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1531 		goto fail;
1532 	}
1533 
1534 	/* Now we need to make a fake IPv4 packet to generate ICMP message */
1535 	ip.ip_dst.s_addr = aaddr;
1536 	ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen);
1537 	if (ip.ip_src.s_addr == 0)
1538 		goto fail;
1539 	/* XXX: Make fake ulp header */
1540 	if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */
1541 		ip6i->ip6_hlim += IPV6_HLIMDEC;
1542 	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1543 	m_adj(m, hlen - sizeof(struct ip));
1544 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1545 	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,
1546 	    logdata);
1547 	return (NAT64RETURN);
1548 fail:
1549 	/*
1550 	 * We must call m_freem() because mbuf pointer could be
1551 	 * changed with m_pullup().
1552 	 */
1553 	m_freem(m);
1554 	NAT64STAT_INC(&cfg->stats, dropped);
1555 	return (NAT64RETURN);
1556 }
1557 
1558 int
1559 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1560     struct nat64_config *cfg, void *logdata)
1561 {
1562 	struct ip ip;
1563 	struct nhop_object *nh;
1564 	struct sockaddr_in dst;
1565 	struct ip6_frag *frag;
1566 	struct ip6_hdr *ip6;
1567 	struct icmp6_hdr *icmp6;
1568 	uint16_t *csum;
1569 	int plen, hlen, proto;
1570 
1571 	/*
1572 	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1573 	 * protocol's headers. Also we skip some checks, that ip6_input(),
1574 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1575 	 */
1576 	ip6 = mtod(m, struct ip6_hdr *);
1577 	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1578 	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
1579 		return (NAT64SKIP);
1580 	}
1581 
1582 	/* Starting from this point we must not return zero */
1583 	ip.ip_src.s_addr = aaddr;
1584 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1585 		DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",
1586 		    ip.ip_src.s_addr);
1587 		NAT64STAT_INC(&cfg->stats, dropped);
1588 		return (NAT64MFREE);
1589 	}
1590 
1591 	ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen);
1592 	if (ip.ip_dst.s_addr == 0) {
1593 		NAT64STAT_INC(&cfg->stats, dropped);
1594 		return (NAT64MFREE);
1595 	}
1596 
1597 	if (*V_nat64ip6stealth == 0 && ip6->ip6_hlim <= IPV6_HLIMDEC) {
1598 		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1599 		    ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);
1600 		return (NAT64RETURN);
1601 	}
1602 
1603 	hlen = 0;
1604 	plen = ntohs(ip6->ip6_plen);
1605 	proto = nat64_getlasthdr(m, &hlen);
1606 	if (proto < 0) {
1607 		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1608 		NAT64STAT_INC(&cfg->stats, dropped);
1609 		return (NAT64MFREE);
1610 	}
1611 	frag = NULL;
1612 	if (proto == IPPROTO_FRAGMENT) {
1613 		/* ipfw_chk should m_pullup up to frag header */
1614 		if (m->m_len < hlen + sizeof(*frag)) {
1615 			DPRINTF(DP_DROPS,
1616 			    "dropped due to mbuf isn't contigious");
1617 			NAT64STAT_INC(&cfg->stats, dropped);
1618 			return (NAT64MFREE);
1619 		}
1620 		frag = mtodo(m, hlen);
1621 		proto = frag->ip6f_nxt;
1622 		hlen += sizeof(*frag);
1623 		/* Fragmented ICMPv6 is unsupported */
1624 		if (proto == IPPROTO_ICMPV6) {
1625 			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1626 			NAT64STAT_INC(&cfg->stats, dropped);
1627 			return (NAT64MFREE);
1628 		}
1629 		/* Fragment length must be multiple of 8 octets */
1630 		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1631 		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1632 			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1633 			    ICMP6_PARAMPROB_HEADER,
1634 			    offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,
1635 			    logdata);
1636 			return (NAT64RETURN);
1637 		}
1638 	}
1639 	plen -= hlen - sizeof(struct ip6_hdr);
1640 	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1641 		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1642 		    plen, m->m_pkthdr.len, hlen);
1643 		NAT64STAT_INC(&cfg->stats, dropped);
1644 		return (NAT64MFREE);
1645 	}
1646 
1647 	icmp6 = NULL;	/* Make gcc happy */
1648 	if (proto == IPPROTO_ICMPV6) {
1649 		icmp6 = mtodo(m, hlen);
1650 		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1651 		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1652 			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1653 			    cfg, logdata));
1654 	}
1655 	dst.sin_addr.s_addr = ip.ip_dst.s_addr;
1656 	nh = nat64_find_route4(&dst, m);
1657 	if (nh == NULL) {
1658 		NAT64STAT_INC(&cfg->stats, noroute4);
1659 		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1660 		    ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);
1661 		return (NAT64RETURN);
1662 	}
1663 	if (nh->nh_mtu < plen + sizeof(ip)) {
1664 		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh->nh_mtu,
1665 		    &cfg->stats, logdata);
1666 		return (NAT64RETURN);
1667 	}
1668 	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1669 
1670 	/* Handle delayed checksums if needed. */
1671 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1672 		in6_delayed_cksum(m, plen, hlen);
1673 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
1674 	}
1675 	/* Convert checksums. */
1676 	switch (proto) {
1677 	case IPPROTO_TCP:
1678 		csum = &TCP(mtodo(m, hlen))->th_sum;
1679 		if (aport != 0) {
1680 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1681 			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
1682 			tcp->th_sport = aport;
1683 		}
1684 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1685 		break;
1686 	case IPPROTO_UDP:
1687 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1688 		if (aport != 0) {
1689 			struct udphdr *udp = UDP(mtodo(m, hlen));
1690 			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
1691 			udp->uh_sport = aport;
1692 		}
1693 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1694 		break;
1695 	case IPPROTO_ICMPV6:
1696 		/* Checksum in ICMPv6 covers pseudo header */
1697 		csum = &icmp6->icmp6_cksum;
1698 		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1699 		    IPPROTO_ICMPV6, 0));
1700 		/* Convert ICMPv6 types to ICMP */
1701 		proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1702 		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1703 			icmp6->icmp6_type = ICMP_ECHO;
1704 		else /* ICMP6_ECHO_REPLY */
1705 			icmp6->icmp6_type = ICMP_ECHOREPLY;
1706 		*csum = cksum_adjust(*csum, (uint16_t)proto,
1707 		    *(uint16_t *)icmp6);
1708 		if (aport != 0) {
1709 			uint16_t old_id = icmp6->icmp6_id;
1710 			icmp6->icmp6_id = aport;
1711 			*csum = cksum_adjust(*csum, old_id, aport);
1712 		}
1713 		break;
1714 	};
1715 
1716 	m_adj(m, hlen - sizeof(ip));
1717 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1718 	if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,
1719 	    &cfg->stats, logdata) == 0)
1720 		NAT64STAT_INC(&cfg->stats, opcnt64);
1721 	return (NAT64RETURN);
1722 }
1723