xref: /freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c (revision 23f6875a43f7ce365f2d52cf857da010c47fb03b)
1 /*-
2  * Copyright (c) 2015-2016 Yandex LLC
3  * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "opt_ipfw.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/queue.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/if_pflog.h>
49 #include <net/pfil.h>
50 #include <net/netisr.h>
51 #include <net/route.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip_var.h>
56 #include <netinet/ip_fw.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 #include <netinet/ip_icmp.h>
60 #include <netinet/tcp.h>
61 #include <netinet/udp.h>
62 #include <netinet6/in6_var.h>
63 #include <netinet6/ip6_var.h>
64 
65 #include <netpfil/pf/pf.h>
66 #include <netpfil/ipfw/ip_fw_private.h>
67 #include <netpfil/ipfw/nat64/ip_fw_nat64.h>
68 #include <netpfil/ipfw/nat64/nat64_translate.h>
69 #include <machine/in_cksum.h>
70 
71 static void
72 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
73 {
74 
75 	logdata->dir = PF_OUT;
76 	logdata->af = family;
77 	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
78 }
79 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
80 static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro,
81     in_addr_t dest, struct mbuf *m);
82 static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro,
83     struct in6_addr *dest, struct mbuf *m);
84 
85 static NAT64NOINLINE int
86 nat64_output(struct ifnet *ifp, struct mbuf *m,
87     struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
88     void *logdata)
89 {
90 	int error;
91 
92 	if (logdata != NULL)
93 		nat64_log(logdata, m, dst->sa_family);
94 	error = (*ifp->if_output)(ifp, m, dst, ro);
95 	if (error != 0)
96 		NAT64STAT_INC(stats, oerrors);
97 	return (error);
98 }
99 
100 static NAT64NOINLINE int
101 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
102 {
103 	struct route_in6 ro6;
104 	struct route ro4, *ro;
105 	struct sockaddr *dst;
106 	struct ifnet *ifp;
107 	struct ip6_hdr *ip6;
108 	struct ip *ip4;
109 	int error;
110 
111 	ip4 = mtod(m, struct ip *);
112 	switch (ip4->ip_v) {
113 	case IPVERSION:
114 		ro = &ro4;
115 		dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m);
116 		if (dst == NULL)
117 			NAT64STAT_INC(stats, noroute4);
118 		break;
119 	case (IPV6_VERSION >> 4):
120 		ip6 = (struct ip6_hdr *)ip4;
121 		ro = (struct route *)&ro6;
122 		dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m);
123 		if (dst == NULL)
124 			NAT64STAT_INC(stats, noroute6);
125 		break;
126 	default:
127 		m_freem(m);
128 		NAT64STAT_INC(stats, dropped);
129 		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
130 		return (EAFNOSUPPORT);
131 	}
132 	if (dst == NULL) {
133 		FREE_ROUTE(ro);
134 		m_freem(m);
135 		return (EHOSTUNREACH);
136 	}
137 	if (logdata != NULL)
138 		nat64_log(logdata, m, dst->sa_family);
139 	ifp = ro->ro_rt->rt_ifp;
140 	error = (*ifp->if_output)(ifp, m, dst, ro);
141 	if (error != 0)
142 		NAT64STAT_INC(stats, oerrors);
143 	FREE_ROUTE(ro);
144 	return (error);
145 }
146 #else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
147 static NAT64NOINLINE int
148 nat64_output(struct ifnet *ifp, struct mbuf *m,
149     struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
150     void *logdata)
151 {
152 	struct ip *ip4;
153 	int ret, af;
154 
155 	ip4 = mtod(m, struct ip *);
156 	switch (ip4->ip_v) {
157 	case IPVERSION:
158 		af = AF_INET;
159 		ret = NETISR_IP;
160 		break;
161 	case (IPV6_VERSION >> 4):
162 		af = AF_INET6;
163 		ret = NETISR_IPV6;
164 		break;
165 	default:
166 		m_freem(m);
167 		NAT64STAT_INC(stats, dropped);
168 		DPRINTF(DP_DROPS, "unknown IP version");
169 		return (EAFNOSUPPORT);
170 	}
171 	if (logdata != NULL)
172 		nat64_log(logdata, m, af);
173 	ret = netisr_queue(ret, m);
174 	if (ret != 0)
175 		NAT64STAT_INC(stats, oerrors);
176 	return (ret);
177 }
178 
179 static NAT64NOINLINE int
180 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
181 {
182 
183 	return (nat64_output(NULL, m, NULL, NULL, stats, logdata));
184 }
185 #endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
186 
187 
188 #if 0
189 void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
190 
191 void
192 print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
193 {
194 	char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
195 
196 	inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
197 	inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
198 	snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
199 }
200 
201 
202 static NAT64NOINLINE int
203 nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
204 {
205 
206 	/* assume the prefix is properly filled with zeros */
207 	bcopy(&cfg->prefix, ip6, sizeof(*ip6));
208 	switch (cfg->plen) {
209 	case 32:
210 	case 96:
211 		ip6->s6_addr32[cfg->plen / 32] = ia;
212 		break;
213 	case 40:
214 	case 48:
215 	case 56:
216 #if BYTE_ORDER == BIG_ENDIAN
217 		ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
218 		    (ia >> (cfg->plen % 32));
219 		ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
220 #elif BYTE_ORDER == LITTLE_ENDIAN
221 		ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
222 		    (ia << (cfg->plen % 32));
223 		ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
224 #endif
225 		break;
226 	case 64:
227 #if BYTE_ORDER == BIG_ENDIAN
228 		ip6->s6_addr32[2] = ia >> 8;
229 		ip6->s6_addr32[3] = ia << 24;
230 #elif BYTE_ORDER == LITTLE_ENDIAN
231 		ip6->s6_addr32[2] = ia << 8;
232 		ip6->s6_addr32[3] = ia >> 24;
233 #endif
234 		break;
235 	default:
236 		return (0);
237 	};
238 	ip6->s6_addr8[8] = 0;
239 	return (1);
240 }
241 
242 static NAT64NOINLINE in_addr_t
243 nat64_extract_ip4(struct in6_addr *ip6, int plen)
244 {
245 	in_addr_t ia;
246 
247 	/*
248 	 * According to RFC 6052 p2.2:
249 	 * IPv4-embedded IPv6 addresses are composed of a variable-length
250 	 * prefix, the embedded IPv4 address, and a variable length suffix.
251 	 * The suffix bits are reserved for future extensions and SHOULD
252 	 * be set to zero.
253 	 */
254 	switch (plen) {
255 	case 32:
256 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
257 			goto badip6;
258 		break;
259 	case 40:
260 		if (ip6->s6_addr32[3] != 0 ||
261 		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
262 			goto badip6;
263 		break;
264 	case 48:
265 		if (ip6->s6_addr32[3] != 0 ||
266 		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
267 			goto badip6;
268 		break;
269 	case 56:
270 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
271 			goto badip6;
272 		break;
273 	case 64:
274 		if (ip6->s6_addr8[8] != 0 ||
275 		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
276 			goto badip6;
277 	};
278 	switch (plen) {
279 	case 32:
280 	case 96:
281 		ia = ip6->s6_addr32[plen / 32];
282 		break;
283 	case 40:
284 	case 48:
285 	case 56:
286 #if BYTE_ORDER == BIG_ENDIAN
287 		ia = (ip6->s6_addr32[1] << (plen % 32)) |
288 		    (ip6->s6_addr32[2] >> (24 - plen % 32));
289 #elif BYTE_ORDER == LITTLE_ENDIAN
290 		ia = (ip6->s6_addr32[1] >> (plen % 32)) |
291 		    (ip6->s6_addr32[2] << (24 - plen % 32));
292 #endif
293 		break;
294 	case 64:
295 #if BYTE_ORDER == BIG_ENDIAN
296 		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
297 #elif BYTE_ORDER == LITTLE_ENDIAN
298 		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
299 #endif
300 		break;
301 	default:
302 		return (0);
303 	};
304 	if (nat64_check_ip4(ia) != 0 ||
305 	    nat64_check_private_ip4(ia) != 0)
306 		goto badip4;
307 
308 	return (ia);
309 badip4:
310 	DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia);
311 	return (0);
312 badip6:
313 	DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address");
314 	return (0);
315 }
316 #endif
317 
318 /*
319  * According to RFC 1624 the equation for incremental checksum update is:
320  *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
321  *	HC' = HC - ~m - m'	--	[Eqn. 4]
322  * So, when we are replacing IPv4 addresses to IPv6, we
323  * can assume, that new bytes previously were zeros, and vise versa -
324  * when we replacing IPv6 addresses to IPv4, now unused bytes become
325  * zeros. The payload length in pseudo header has bigger size, but one
326  * half of it should be zero. Using the equation 4 we get:
327  *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
328  *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
329  *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
330  *	  = HC - sum(~m[i] + m'[i])
331  *
332  * The function result should be used as follows:
333  *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
334  *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
335  */
336 static NAT64NOINLINE uint16_t
337 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
338 {
339 	uint32_t sum;
340 	uint16_t *p;
341 
342 	sum = ~ip->ip_src.s_addr >> 16;
343 	sum += ~ip->ip_src.s_addr & 0xffff;
344 	sum += ~ip->ip_dst.s_addr >> 16;
345 	sum += ~ip->ip_dst.s_addr & 0xffff;
346 
347 	for (p = (uint16_t *)&ip6->ip6_src;
348 	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
349 		sum += *p;
350 
351 	while (sum >> 16)
352 		sum = (sum & 0xffff) + (sum >> 16);
353 	return (sum);
354 }
355 
356 #if __FreeBSD_version < 1100000
357 #define	ip_fillid(ip)		(ip)->ip_id = ip_newid()
358 #endif
359 static NAT64NOINLINE void
360 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
361     uint16_t plen, uint8_t proto, struct ip *ip)
362 {
363 
364 	/* assume addresses are already initialized */
365 	ip->ip_v = IPVERSION;
366 	ip->ip_hl = sizeof(*ip) >> 2;
367 	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
368 	ip->ip_len = htons(sizeof(*ip) + plen);
369 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
370 	ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
371 #else
372 	/* Forwarding code will decrement TTL. */
373 	ip->ip_ttl = ip6->ip6_hlim;
374 #endif
375 	ip->ip_sum = 0;
376 	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
377 	ip_fillid(ip);
378 	if (frag != NULL) {
379 		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
380 		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
381 			ip->ip_off |= htons(IP_MF);
382 	} else {
383 		ip->ip_off = htons(IP_DF);
384 	}
385 	ip->ip_sum = in_cksum_hdr(ip);
386 }
387 
388 #define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
389 static NAT64NOINLINE int
390 nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
391     struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
392 {
393 	struct ip6_frag ip6f;
394 	struct mbuf *n;
395 	uint16_t hlen, len, offset;
396 	int plen;
397 
398 	plen = ntohs(ip6->ip6_plen);
399 	hlen = sizeof(struct ip6_hdr);
400 
401 	/* Fragmentation isn't needed */
402 	if (ip_off == 0 && plen <= mtu - hlen) {
403 		M_PREPEND(m, hlen, M_NOWAIT);
404 		if (m == NULL) {
405 			NAT64STAT_INC(stats, nomem);
406 			return (ENOMEM);
407 		}
408 		bcopy(ip6, mtod(m, void *), hlen);
409 		if (mbufq_enqueue(mq, m) != 0) {
410 			m_freem(m);
411 			NAT64STAT_INC(stats, dropped);
412 			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
413 			return (ENOBUFS);
414 		}
415 		return (0);
416 	}
417 
418 	hlen += sizeof(struct ip6_frag);
419 	ip6f.ip6f_reserved = 0;
420 	ip6f.ip6f_nxt = ip6->ip6_nxt;
421 	ip6->ip6_nxt = IPPROTO_FRAGMENT;
422 	if (ip_off != 0) {
423 		/*
424 		 * We have got an IPv4 fragment.
425 		 * Use offset value and ip_id from original fragment.
426 		 */
427 		ip6f.ip6f_ident = htonl(ntohs(ip_id));
428 		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
429 		NAT64STAT_INC(stats, ifrags);
430 	} else {
431 		/* The packet size exceeds interface MTU */
432 		ip6f.ip6f_ident = htonl(ip6_randomid());
433 		offset = 0; /* First fragment*/
434 	}
435 	while (plen > 0 && m != NULL) {
436 		n = NULL;
437 		len = FRAGSZ(mtu) & ~7;
438 		if (len > plen)
439 			len = plen;
440 		ip6->ip6_plen = htons(len + sizeof(ip6f));
441 		ip6f.ip6f_offlg = ntohs(offset);
442 		if (len < plen || (ip_off & htons(IP_MF)) != 0)
443 			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
444 		offset += len;
445 		plen -= len;
446 		if (plen > 0) {
447 			n = m_split(m, len, M_NOWAIT);
448 			if (n == NULL)
449 				goto fail;
450 		}
451 		M_PREPEND(m, hlen, M_NOWAIT);
452 		if (m == NULL)
453 			goto fail;
454 		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
455 		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
456 		    sizeof(struct ip6_frag));
457 		if (mbufq_enqueue(mq, m) != 0)
458 			goto fail;
459 		m = n;
460 	}
461 	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
462 	return (0);
463 fail:
464 	if (m != NULL)
465 		m_freem(m);
466 	if (n != NULL)
467 		m_freem(n);
468 	mbufq_drain(mq);
469 	NAT64STAT_INC(stats, nomem);
470 	return (ENOMEM);
471 }
472 
473 #if __FreeBSD_version < 1100000
474 #define	rt_expire	rt_rmx.rmx_expire
475 #define	rt_mtu		rt_rmx.rmx_mtu
476 #endif
477 static NAT64NOINLINE struct sockaddr*
478 nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m)
479 {
480 	struct sockaddr_in6 *dst;
481 	struct rtentry *rt;
482 
483 	bzero(ro, sizeof(*ro));
484 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
485 	dst->sin6_family = AF_INET6;
486 	dst->sin6_len = sizeof(*dst);
487 	dst->sin6_addr = *dest;
488 	IN6_LOOKUP_ROUTE(ro, M_GETFIB(m));
489 	rt = ro->ro_rt;
490 	if (rt && (rt->rt_flags & RTF_UP) &&
491 	    (rt->rt_ifp->if_flags & IFF_UP) &&
492 	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
493 		if (rt->rt_flags & RTF_GATEWAY)
494 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
495 	} else
496 		return (NULL);
497 	if (((rt->rt_flags & RTF_REJECT) &&
498 	    (rt->rt_expire == 0 ||
499 	    time_uptime < rt->rt_expire)) ||
500 	    rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
501 		return (NULL);
502 	return ((struct sockaddr *)dst);
503 }
504 
505 #define	NAT64_ICMP6_PLEN	64
506 static NAT64NOINLINE void
507 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
508     nat64_stats_block *stats, void *logdata)
509 {
510 	struct icmp6_hdr *icmp6;
511 	struct ip6_hdr *ip6, *oip6;
512 	struct mbuf *n;
513 	int len, plen;
514 
515 	len = 0;
516 	plen = nat64_getlasthdr(m, &len);
517 	if (plen < 0) {
518 		DPRINTF(DP_DROPS, "mbuf isn't contigious");
519 		goto freeit;
520 	}
521 	/*
522 	 * Do not send ICMPv6 in reply to ICMPv6 errors.
523 	 */
524 	if (plen == IPPROTO_ICMPV6) {
525 		if (m->m_len < len + sizeof(*icmp6)) {
526 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
527 			goto freeit;
528 		}
529 		icmp6 = mtodo(m, len);
530 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
531 		    icmp6->icmp6_type == ND_REDIRECT) {
532 			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
533 			    "ICMPv6 errors");
534 			goto freeit;
535 		}
536 	}
537 	/*
538 	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
539 		goto freeit;
540 		*/
541 	ip6 = mtod(m, struct ip6_hdr *);
542 	switch (type) {
543 	case ICMP6_DST_UNREACH:
544 	case ICMP6_PACKET_TOO_BIG:
545 	case ICMP6_TIME_EXCEEDED:
546 	case ICMP6_PARAM_PROB:
547 		break;
548 	default:
549 		goto freeit;
550 	}
551 	/* Calculate length of ICMPv6 payload */
552 	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
553 	    m->m_pkthdr.len;
554 
555 	/* Create new ICMPv6 datagram */
556 	plen = len + sizeof(struct icmp6_hdr);
557 	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
558 	    MT_HEADER, M_PKTHDR);
559 	if (n == NULL) {
560 		NAT64STAT_INC(stats, nomem);
561 		m_freem(m);
562 		return;
563 	}
564 	/*
565 	 * Move pkthdr from original mbuf. We should have initialized some
566 	 * fields, because we can reinject this mbuf to netisr and it will
567 	 * go trough input path (it requires at least rcvif should be set).
568 	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
569 	 * in the chain, when we will do M_PREPEND() or make some type of
570 	 * tunneling.
571 	 */
572 	m_move_pkthdr(n, m);
573 	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
574 
575 	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
576 	oip6 = mtod(n, struct ip6_hdr *);
577 	oip6->ip6_src = ip6->ip6_dst;
578 	oip6->ip6_dst = ip6->ip6_src;
579 	oip6->ip6_nxt = IPPROTO_ICMPV6;
580 	oip6->ip6_flow = 0;
581 	oip6->ip6_vfc |= IPV6_VERSION;
582 	oip6->ip6_hlim = V_ip6_defhlim;
583 	oip6->ip6_plen = htons(plen);
584 
585 	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
586 	icmp6->icmp6_cksum = 0;
587 	icmp6->icmp6_type = type;
588 	icmp6->icmp6_code = code;
589 	icmp6->icmp6_mtu = htonl(mtu);
590 
591 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
592 	    sizeof(struct icmp6_hdr)));
593 	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
594 	    sizeof(struct ip6_hdr), plen);
595 	m_freem(m);
596 	nat64_output_one(n, stats, logdata);
597 	return;
598 freeit:
599 	NAT64STAT_INC(stats, dropped);
600 	m_freem(m);
601 }
602 
603 static NAT64NOINLINE struct sockaddr*
604 nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m)
605 {
606 	struct sockaddr_in *dst;
607 	struct rtentry *rt;
608 
609 	bzero(ro, sizeof(*ro));
610 	dst = (struct sockaddr_in *)&ro->ro_dst;
611 	dst->sin_family = AF_INET;
612 	dst->sin_len = sizeof(*dst);
613 	dst->sin_addr.s_addr = dest;
614 	IN_LOOKUP_ROUTE(ro, M_GETFIB(m));
615 	rt = ro->ro_rt;
616 	if (rt && (rt->rt_flags & RTF_UP) &&
617 	    (rt->rt_ifp->if_flags & IFF_UP) &&
618 	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
619 		if (rt->rt_flags & RTF_GATEWAY)
620 			dst = (struct sockaddr_in *)rt->rt_gateway;
621 	} else
622 		return (NULL);
623 	if (((rt->rt_flags & RTF_REJECT) &&
624 	    (rt->rt_expire == 0 ||
625 	    time_uptime < rt->rt_expire)) ||
626 	    rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
627 		return (NULL);
628 	return ((struct sockaddr *)dst);
629 }
630 
631 #define	NAT64_ICMP_PLEN	64
632 static NAT64NOINLINE void
633 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
634     uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata)
635 {
636 	struct icmp *icmp;
637 	struct ip *ip, *oip;
638 	struct mbuf *n;
639 	int len, plen;
640 
641 	ip = mtod(m, struct ip *);
642 	/* Do not send ICMP error if packet is not the first fragment */
643 	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
644 		DPRINTF(DP_DROPS, "not first fragment");
645 		goto freeit;
646 	}
647 	/* Do not send ICMP in reply to ICMP errors */
648 	if (ip->ip_p == IPPROTO_ICMP) {
649 		if (m->m_len < (ip->ip_hl << 2)) {
650 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
651 			goto freeit;
652 		}
653 		icmp = mtodo(m, ip->ip_hl << 2);
654 		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
655 			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
656 			    "ICMP errors");
657 			goto freeit;
658 		}
659 	}
660 	switch (type) {
661 	case ICMP_UNREACH:
662 	case ICMP_TIMXCEED:
663 	case ICMP_PARAMPROB:
664 		break;
665 	default:
666 		goto freeit;
667 	}
668 	/* Calculate length of ICMP payload */
669 	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
670 	    m->m_pkthdr.len;
671 
672 	/* Create new ICMPv4 datagram */
673 	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
674 	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
675 	    MT_HEADER, M_PKTHDR);
676 	if (n == NULL) {
677 		NAT64STAT_INC(stats, nomem);
678 		m_freem(m);
679 		return;
680 	}
681 	m_move_pkthdr(n, m);
682 	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
683 
684 	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
685 	oip = mtod(n, struct ip *);
686 	oip->ip_v = IPVERSION;
687 	oip->ip_hl = sizeof(struct ip) >> 2;
688 	oip->ip_tos = 0;
689 	oip->ip_len = htons(n->m_pkthdr.len);
690 	oip->ip_ttl = V_ip_defttl;
691 	oip->ip_p = IPPROTO_ICMP;
692 	ip_fillid(oip);
693 	oip->ip_off = htons(IP_DF);
694 	oip->ip_src = ip->ip_dst;
695 	oip->ip_dst = ip->ip_src;
696 	oip->ip_sum = 0;
697 	oip->ip_sum = in_cksum_hdr(oip);
698 
699 	icmp = mtodo(n, sizeof(struct ip));
700 	icmp->icmp_type = type;
701 	icmp->icmp_code = code;
702 	icmp->icmp_cksum = 0;
703 	icmp->icmp_pmvoid = 0;
704 	icmp->icmp_nextmtu = htons(mtu);
705 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
706 	    sizeof(struct icmphdr) + sizeof(uint32_t)));
707 	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
708 	    sizeof(struct ip));
709 	m_freem(m);
710 	nat64_output_one(n, stats, logdata);
711 	return;
712 freeit:
713 	NAT64STAT_INC(stats, dropped);
714 	m_freem(m);
715 }
716 
717 /* Translate ICMP echo request/reply into ICMPv6 */
718 static void
719 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
720     uint16_t id, uint8_t type)
721 {
722 	uint16_t old;
723 
724 	old = *(uint16_t *)icmp6;	/* save type+code in one word */
725 	icmp6->icmp6_type = type;
726 	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
727 	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
728 	    old, *(uint16_t *)icmp6);
729 	if (id != 0) {
730 		old = icmp6->icmp6_id;
731 		icmp6->icmp6_id = id;
732 		/* Reflect ICMP id translation in the cksum */
733 		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
734 		    old, id);
735 	}
736 	/* Reflect IPv6 pseudo header in the cksum */
737 	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
738 	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
739 }
740 
741 static NAT64NOINLINE struct mbuf *
742 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
743     int offset, nat64_stats_block *stats)
744 {
745 	struct ip ip;
746 	struct icmp *icmp;
747 	struct tcphdr *tcp;
748 	struct udphdr *udp;
749 	struct ip6_hdr *eip6;
750 	struct mbuf *n;
751 	uint32_t mtu;
752 	int len, hlen, plen;
753 	uint8_t type, code;
754 
755 	if (m->m_len < offset + ICMP_MINLEN)
756 		m = m_pullup(m, offset + ICMP_MINLEN);
757 	if (m == NULL) {
758 		NAT64STAT_INC(stats, nomem);
759 		return (m);
760 	}
761 	mtu = 0;
762 	icmp = mtodo(m, offset);
763 	/* RFC 7915 p4.2 */
764 	switch (icmp->icmp_type) {
765 	case ICMP_ECHOREPLY:
766 		type = ICMP6_ECHO_REPLY;
767 		code = 0;
768 		break;
769 	case ICMP_UNREACH:
770 		type = ICMP6_DST_UNREACH;
771 		switch (icmp->icmp_code) {
772 		case ICMP_UNREACH_NET:
773 		case ICMP_UNREACH_HOST:
774 		case ICMP_UNREACH_SRCFAIL:
775 		case ICMP_UNREACH_NET_UNKNOWN:
776 		case ICMP_UNREACH_HOST_UNKNOWN:
777 		case ICMP_UNREACH_TOSNET:
778 		case ICMP_UNREACH_TOSHOST:
779 			code = ICMP6_DST_UNREACH_NOROUTE;
780 			break;
781 		case ICMP_UNREACH_PROTOCOL:
782 			type = ICMP6_PARAM_PROB;
783 			code = ICMP6_PARAMPROB_NEXTHEADER;
784 			break;
785 		case ICMP_UNREACH_PORT:
786 			code = ICMP6_DST_UNREACH_NOPORT;
787 			break;
788 		case ICMP_UNREACH_NEEDFRAG:
789 			type = ICMP6_PACKET_TOO_BIG;
790 			code = 0;
791 			/* XXX: needs an additional look */
792 			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
793 			break;
794 		case ICMP_UNREACH_NET_PROHIB:
795 		case ICMP_UNREACH_HOST_PROHIB:
796 		case ICMP_UNREACH_FILTER_PROHIB:
797 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
798 			code = ICMP6_DST_UNREACH_ADMIN;
799 			break;
800 		default:
801 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
802 			    icmp->icmp_type, icmp->icmp_code);
803 			goto freeit;
804 		}
805 		break;
806 	case ICMP_TIMXCEED:
807 		type = ICMP6_TIME_EXCEEDED;
808 		code = icmp->icmp_code;
809 		break;
810 	case ICMP_ECHO:
811 		type = ICMP6_ECHO_REQUEST;
812 		code = 0;
813 		break;
814 	case ICMP_PARAMPROB:
815 		type = ICMP6_PARAM_PROB;
816 		switch (icmp->icmp_code) {
817 		case ICMP_PARAMPROB_ERRATPTR:
818 		case ICMP_PARAMPROB_LENGTH:
819 			code = ICMP6_PARAMPROB_HEADER;
820 			switch (icmp->icmp_pptr) {
821 			case 0: /* Version/IHL */
822 			case 1: /* Type Of Service */
823 				mtu = icmp->icmp_pptr;
824 				break;
825 			case 2: /* Total Length */
826 			case 3: mtu = 4; /* Payload Length */
827 				break;
828 			case 8: /* Time to Live */
829 				mtu = 7; /* Hop Limit */
830 				break;
831 			case 9: /* Protocol */
832 				mtu = 6; /* Next Header */
833 				break;
834 			case 12: /* Source address */
835 			case 13:
836 			case 14:
837 			case 15:
838 				mtu = 8;
839 				break;
840 			case 16: /* Destination address */
841 			case 17:
842 			case 18:
843 			case 19:
844 				mtu = 24;
845 				break;
846 			default: /* Silently drop */
847 				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
848 				    " code %d, pptr %d", icmp->icmp_type,
849 				    icmp->icmp_code, icmp->icmp_pptr);
850 				goto freeit;
851 			}
852 			break;
853 		default:
854 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
855 			    " code %d, pptr %d", icmp->icmp_type,
856 			    icmp->icmp_code, icmp->icmp_pptr);
857 			goto freeit;
858 		}
859 		break;
860 	default:
861 		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
862 		    icmp->icmp_type, icmp->icmp_code);
863 		goto freeit;
864 	}
865 	/*
866 	 * For echo request/reply we can use original payload,
867 	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
868 	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
869 	 */
870 	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
871 		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
872 		return (m);
873 	}
874 	/*
875 	 * For other types of ICMP messages we need to translate inner
876 	 * IPv4 header to IPv6 header.
877 	 * Assume ICMP src is the same as payload dst
878 	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
879 	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
880 	 * In that case, we already have map for NATIP1 and GWsrc1.
881 	 * The only thing we need is to copy IPv6 map prefix to
882 	 * Hostdst1.
883 	 */
884 	hlen = offset + ICMP_MINLEN;
885 	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
886 		DPRINTF(DP_DROPS, "Message is too short %d",
887 		    m->m_pkthdr.len);
888 		goto freeit;
889 	}
890 	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
891 	if (ip.ip_v != IPVERSION) {
892 		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
893 		goto freeit;
894 	}
895 	hlen += ip.ip_hl << 2; /* Skip inner IP header */
896 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
897 	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
898 	    nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
899 	    nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
900 		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
901 		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
902 		goto freeit;
903 	}
904 	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
905 		DPRINTF(DP_DROPS, "Message is too short %d",
906 		    m->m_pkthdr.len);
907 		goto freeit;
908 	}
909 #if 0
910 	/*
911 	 * Check that inner source matches the outer destination.
912 	 * XXX: We need some method to convert IPv4 into IPv6 address here,
913 	 *	and compare IPv6 addresses.
914 	 */
915 	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
916 		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
917 		    "%04x vs %04x", ip.ip_src.s_addr,
918 		    nat64_get_ip4(&ip6->ip6_dst));
919 		goto freeit;
920 	}
921 #endif
922 	/*
923 	 * Create new mbuf for ICMPv6 datagram.
924 	 * NOTE: len is data length just after inner IP header.
925 	 */
926 	len = m->m_pkthdr.len - hlen;
927 	if (sizeof(struct ip6_hdr) +
928 	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
929 		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
930 		    sizeof(struct ip6_hdr);
931 	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
932 	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
933 	if (n == NULL) {
934 		NAT64STAT_INC(stats, nomem);
935 		m_freem(m);
936 		return (NULL);
937 	}
938 	m_move_pkthdr(n, m);
939 	M_ALIGN(n, offset + plen + max_hdr);
940 	n->m_len = n->m_pkthdr.len = offset + plen;
941 	/* Adjust ip6_plen in outer header */
942 	ip6->ip6_plen = htons(plen);
943 	/* Construct new inner IPv6 header */
944 	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
945 	eip6->ip6_src = ip6->ip6_dst;
946 	/* Use the fact that we have single /96 prefix for IPv4 map */
947 	eip6->ip6_dst = ip6->ip6_src;
948 	nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
949 
950 	eip6->ip6_flow = htonl(ip.ip_tos << 20);
951 	eip6->ip6_vfc |= IPV6_VERSION;
952 	eip6->ip6_hlim = ip.ip_ttl;
953 	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
954 	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
955 	m_copydata(m, hlen, len, (char *)(eip6 + 1));
956 	/*
957 	 * We need to translate source port in the inner ULP header,
958 	 * and adjust ULP checksum.
959 	 */
960 	switch (ip.ip_p) {
961 	case IPPROTO_TCP:
962 		if (len < offsetof(struct tcphdr, th_sum))
963 			break;
964 		tcp = TCP(eip6 + 1);
965 		if (icmpid != 0) {
966 			tcp->th_sum = cksum_adjust(tcp->th_sum,
967 			    tcp->th_sport, icmpid);
968 			tcp->th_sport = icmpid;
969 		}
970 		tcp->th_sum = cksum_add(tcp->th_sum,
971 		    ~nat64_cksum_convert(eip6, &ip));
972 		break;
973 	case IPPROTO_UDP:
974 		if (len < offsetof(struct udphdr, uh_sum))
975 			break;
976 		udp = UDP(eip6 + 1);
977 		if (icmpid != 0) {
978 			udp->uh_sum = cksum_adjust(udp->uh_sum,
979 			    udp->uh_sport, icmpid);
980 			udp->uh_sport = icmpid;
981 		}
982 		udp->uh_sum = cksum_add(udp->uh_sum,
983 		    ~nat64_cksum_convert(eip6, &ip));
984 		break;
985 	case IPPROTO_ICMP:
986 		/*
987 		 * Check if this is an ICMP error message for echo request
988 		 * that we sent. I.e. ULP in the data containing invoking
989 		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
990 		 */
991 		icmp = (struct icmp *)(eip6 + 1);
992 		if (icmp->icmp_type != ICMP_ECHO) {
993 			m_freem(n);
994 			goto freeit;
995 		}
996 		/*
997 		 * For our client this original datagram should looks
998 		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
999 		 * Thus we need adjust icmp_cksum and convert type from
1000 		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
1001 		 */
1002 		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
1003 		    ICMP6_ECHO_REQUEST);
1004 	}
1005 	m_freem(m);
1006 	/* Convert ICMPv4 into ICMPv6 header */
1007 	icmp = mtodo(n, offset);
1008 	ICMP6(icmp)->icmp6_type = type;
1009 	ICMP6(icmp)->icmp6_code = code;
1010 	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1011 	ICMP6(icmp)->icmp6_cksum = 0;
1012 	ICMP6(icmp)->icmp6_cksum = cksum_add(
1013 	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1014 	    in_cksum_skip(n, n->m_pkthdr.len, offset));
1015 	return (n);
1016 freeit:
1017 	m_freem(m);
1018 	NAT64STAT_INC(stats, dropped);
1019 	return (NULL);
1020 }
1021 
1022 int
1023 nat64_getlasthdr(struct mbuf *m, int *offset)
1024 {
1025 	struct ip6_hdr *ip6;
1026 	struct ip6_hbh *hbh;
1027 	int proto, hlen;
1028 
1029 	if (offset != NULL)
1030 		hlen = *offset;
1031 	else
1032 		hlen = 0;
1033 
1034 	if (m->m_len < hlen + sizeof(*ip6))
1035 		return (-1);
1036 
1037 	ip6 = mtodo(m, hlen);
1038 	hlen += sizeof(*ip6);
1039 	proto = ip6->ip6_nxt;
1040 	/* Skip extension headers */
1041 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1042 	    proto == IPPROTO_DSTOPTS) {
1043 		hbh = mtodo(m, hlen);
1044 		/*
1045 		 * We expect mbuf has contigious data up to
1046 		 * upper level header.
1047 		 */
1048 		if (m->m_len < hlen)
1049 			return (-1);
1050 		/*
1051 		 * We doesn't support Jumbo payload option,
1052 		 * so return error.
1053 		 */
1054 		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1055 			return (-1);
1056 		proto = hbh->ip6h_nxt;
1057 		hlen += hbh->ip6h_len << 3;
1058 	}
1059 	if (offset != NULL)
1060 		*offset = hlen;
1061 	return (proto);
1062 }
1063 
1064 int
1065 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1066     struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
1067     void *logdata)
1068 {
1069 	struct route_in6 ro;
1070 	struct ip6_hdr ip6;
1071 	struct ifnet *ifp;
1072 	struct ip *ip;
1073 	struct mbufq mq;
1074 	struct sockaddr *dst;
1075 	uint32_t mtu;
1076 	uint16_t ip_id, ip_off;
1077 	uint16_t *csum;
1078 	int plen, hlen;
1079 	uint8_t proto;
1080 
1081 	ip = mtod(m, struct ip*);
1082 
1083 	if (ip->ip_ttl <= IPTTLDEC) {
1084 		nat64_icmp_reflect(m, ICMP_TIMXCEED,
1085 		    ICMP_TIMXCEED_INTRANS, 0, stats, logdata);
1086 		return (NAT64RETURN);
1087 	}
1088 
1089 	ip6.ip6_dst = *daddr;
1090 	ip6.ip6_src = *saddr;
1091 
1092 	hlen = ip->ip_hl << 2;
1093 	plen = ntohs(ip->ip_len) - hlen;
1094 	proto = ip->ip_p;
1095 
1096 	/* Save ip_id and ip_off, both are in network byte order */
1097 	ip_id = ip->ip_id;
1098 	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1099 
1100 	/* Fragment length must be multiple of 8 octets */
1101 	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1102 		nat64_icmp_reflect(m, ICMP_PARAMPROB,
1103 		    ICMP_PARAMPROB_LENGTH, 0, stats, logdata);
1104 		return (NAT64RETURN);
1105 	}
1106 	/* Fragmented ICMP is unsupported */
1107 	if (proto == IPPROTO_ICMP && ip_off != 0) {
1108 		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1109 		NAT64STAT_INC(stats, dropped);
1110 		return (NAT64MFREE);
1111 	}
1112 
1113 	dst = nat64_find_route6(&ro, &ip6.ip6_dst, m);
1114 	if (dst == NULL) {
1115 		FREE_ROUTE(&ro);
1116 		NAT64STAT_INC(stats, noroute6);
1117 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1118 		    stats, logdata);
1119 		return (NAT64RETURN);
1120 	}
1121 	ifp = ro.ro_rt->rt_ifp;
1122 	if (ro.ro_rt->rt_mtu != 0)
1123 		mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
1124 	else
1125 		mtu = ifp->if_mtu;
1126 	if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) {
1127 		FREE_ROUTE(&ro);
1128 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1129 		    FRAGSZ(mtu) + sizeof(struct ip), stats, logdata);
1130 		return (NAT64RETURN);
1131 	}
1132 
1133 	ip6.ip6_flow = htonl(ip->ip_tos << 20);
1134 	ip6.ip6_vfc |= IPV6_VERSION;
1135 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
1136 	ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
1137 #else
1138 	/* Forwarding code will decrement HLIM. */
1139 	ip6.ip6_hlim = ip->ip_ttl;
1140 #endif
1141 	ip6.ip6_plen = htons(plen);
1142 	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1143 	/* Convert checksums. */
1144 	switch (proto) {
1145 	case IPPROTO_TCP:
1146 		csum = &TCP(mtodo(m, hlen))->th_sum;
1147 		if (lport != 0) {
1148 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1149 			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
1150 			tcp->th_dport = lport;
1151 		}
1152 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1153 		break;
1154 	case IPPROTO_UDP:
1155 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1156 		if (lport != 0) {
1157 			struct udphdr *udp = UDP(mtodo(m, hlen));
1158 			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
1159 			udp->uh_dport = lport;
1160 		}
1161 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1162 		break;
1163 	case IPPROTO_ICMP:
1164 		m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
1165 		if (m == NULL) {
1166 			FREE_ROUTE(&ro);
1167 			/* stats already accounted */
1168 			return (NAT64RETURN);
1169 		}
1170 	}
1171 
1172 	m_adj(m, hlen);
1173 	mbufq_init(&mq, 255);
1174 	nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off);
1175 	while ((m = mbufq_dequeue(&mq)) != NULL) {
1176 		if (nat64_output(ifp, m, dst, (struct route *)&ro, stats,
1177 		    logdata) != 0)
1178 			break;
1179 		NAT64STAT_INC(stats, opcnt46);
1180 	}
1181 	mbufq_drain(&mq);
1182 	FREE_ROUTE(&ro);
1183 	return (NAT64RETURN);
1184 }
1185 
1186 int
1187 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1188     nat64_stats_block *stats, void *logdata)
1189 {
1190 	struct ip ip;
1191 	struct icmp6_hdr *icmp6;
1192 	struct ip6_frag *ip6f;
1193 	struct ip6_hdr *ip6, *ip6i;
1194 	uint32_t mtu;
1195 	int plen, proto;
1196 	uint8_t type, code;
1197 
1198 	if (hlen == 0) {
1199 		ip6 = mtod(m, struct ip6_hdr *);
1200 		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1201 		    nat64_check_ip6(&ip6->ip6_dst) != 0)
1202 			return (NAT64SKIP);
1203 
1204 		proto = nat64_getlasthdr(m, &hlen);
1205 		if (proto != IPPROTO_ICMPV6) {
1206 			DPRINTF(DP_DROPS,
1207 			    "dropped due to mbuf isn't contigious");
1208 			NAT64STAT_INC(stats, dropped);
1209 			return (NAT64MFREE);
1210 		}
1211 	}
1212 
1213 	/*
1214 	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1215 	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1216 	 */
1217 	icmp6 = mtodo(m, hlen);
1218 	mtu = 0;
1219 	switch (icmp6->icmp6_type) {
1220 	case ICMP6_DST_UNREACH:
1221 		type = ICMP_UNREACH;
1222 		switch (icmp6->icmp6_code) {
1223 		case ICMP6_DST_UNREACH_NOROUTE:
1224 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1225 		case ICMP6_DST_UNREACH_ADDR:
1226 			code = ICMP_UNREACH_HOST;
1227 			break;
1228 		case ICMP6_DST_UNREACH_ADMIN:
1229 			code = ICMP_UNREACH_HOST_PROHIB;
1230 			break;
1231 		case ICMP6_DST_UNREACH_NOPORT:
1232 			code = ICMP_UNREACH_PORT;
1233 			break;
1234 		default:
1235 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1236 			    " code %d", icmp6->icmp6_type,
1237 			    icmp6->icmp6_code);
1238 			NAT64STAT_INC(stats, dropped);
1239 			return (NAT64MFREE);
1240 		}
1241 		break;
1242 	case ICMP6_PACKET_TOO_BIG:
1243 		type = ICMP_UNREACH;
1244 		code = ICMP_UNREACH_NEEDFRAG;
1245 		mtu = ntohl(icmp6->icmp6_mtu);
1246 		if (mtu < IPV6_MMTU) {
1247 			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1248 			    " code %d", mtu, icmp6->icmp6_type,
1249 			    icmp6->icmp6_code);
1250 			NAT64STAT_INC(stats, dropped);
1251 			return (NAT64MFREE);
1252 		}
1253 		/*
1254 		 * Adjust MTU to reflect difference between
1255 		 * IPv6 an IPv4 headers.
1256 		 */
1257 		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1258 		break;
1259 	case ICMP6_TIME_EXCEEDED:
1260 		type = ICMP_TIMXCEED;
1261 		code = icmp6->icmp6_code;
1262 		break;
1263 	case ICMP6_PARAM_PROB:
1264 		switch (icmp6->icmp6_code) {
1265 		case ICMP6_PARAMPROB_HEADER:
1266 			type = ICMP_PARAMPROB;
1267 			code = ICMP_PARAMPROB_ERRATPTR;
1268 			mtu = ntohl(icmp6->icmp6_pptr);
1269 			switch (mtu) {
1270 			case 0: /* Version/Traffic Class */
1271 			case 1: /* Traffic Class/Flow Label */
1272 				break;
1273 			case 4: /* Payload Length */
1274 			case 5:
1275 				mtu = 2;
1276 				break;
1277 			case 6: /* Next Header */
1278 				mtu = 9;
1279 				break;
1280 			case 7: /* Hop Limit */
1281 				mtu = 8;
1282 				break;
1283 			default:
1284 				if (mtu >= 8 && mtu <= 23) {
1285 					mtu = 12; /* Source address */
1286 					break;
1287 				}
1288 				if (mtu >= 24 && mtu <= 39) {
1289 					mtu = 16; /* Destination address */
1290 					break;
1291 				}
1292 				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1293 				    " code %d, pptr %d", icmp6->icmp6_type,
1294 				    icmp6->icmp6_code, mtu);
1295 				NAT64STAT_INC(stats, dropped);
1296 				return (NAT64MFREE);
1297 			}
1298 		case ICMP6_PARAMPROB_NEXTHEADER:
1299 			type = ICMP_UNREACH;
1300 			code = ICMP_UNREACH_PROTOCOL;
1301 			break;
1302 		default:
1303 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1304 			    " code %d, pptr %d", icmp6->icmp6_type,
1305 			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1306 			NAT64STAT_INC(stats, dropped);
1307 			return (NAT64MFREE);
1308 		}
1309 		break;
1310 	default:
1311 		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1312 		    icmp6->icmp6_type, icmp6->icmp6_code);
1313 		NAT64STAT_INC(stats, dropped);
1314 		return (NAT64MFREE);
1315 	}
1316 
1317 	hlen += sizeof(struct icmp6_hdr);
1318 	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1319 		NAT64STAT_INC(stats, dropped);
1320 		DPRINTF(DP_DROPS, "Message is too short %d",
1321 		    m->m_pkthdr.len);
1322 		return (NAT64MFREE);
1323 	}
1324 	/*
1325 	 * We need at least ICMP_MINLEN bytes of original datagram payload
1326 	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1327 	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1328 	 * header we will not have to do m_pullup() again.
1329 	 *
1330 	 * What we have here:
1331 	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1332 	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1333 	 * We need to translate it to:
1334 	 *
1335 	 * Outer header: (alias_host, v4exthost)
1336 	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1337 	 *
1338 	 * Assume caller function has checked if v4mapPRefix+v4host
1339 	 * matches configured prefix.
1340 	 * The only two things we should be provided with are mapping between
1341 	 * IPv6iHost <> alias_host and between dport and alias_port.
1342 	 */
1343 	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1344 		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1345 	if (m == NULL) {
1346 		NAT64STAT_INC(stats, nomem);
1347 		return (NAT64RETURN);
1348 	}
1349 	ip6 = mtod(m, struct ip6_hdr *);
1350 	ip6i = mtodo(m, hlen);
1351 	ip6f = NULL;
1352 	proto = ip6i->ip6_nxt;
1353 	plen = ntohs(ip6i->ip6_plen);
1354 	hlen += sizeof(struct ip6_hdr);
1355 	if (proto == IPPROTO_FRAGMENT) {
1356 		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1357 		    ICMP_MINLEN)
1358 			goto fail;
1359 		ip6f = mtodo(m, hlen);
1360 		proto = ip6f->ip6f_nxt;
1361 		plen -= sizeof(struct ip6_frag);
1362 		hlen += sizeof(struct ip6_frag);
1363 		/* Ajust MTU to reflect frag header size */
1364 		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1365 			mtu -= sizeof(struct ip6_frag);
1366 	}
1367 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1368 		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1369 		    proto);
1370 		goto fail;
1371 	}
1372 	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1373 	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1374 		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1375 		goto fail;
1376 	}
1377 	/* Check if outer dst is the same as inner src */
1378 	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1379 		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1380 		goto fail;
1381 	}
1382 
1383 	/* Now we need to make a fake IPv4 packet to generate ICMP message */
1384 	ip.ip_dst.s_addr = aaddr;
1385 	ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
1386 	/* XXX: Make fake ulp header */
1387 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
1388 	ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
1389 #endif
1390 	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1391 	m_adj(m, hlen - sizeof(struct ip));
1392 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1393 	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata);
1394 	return (NAT64RETURN);
1395 fail:
1396 	/*
1397 	 * We must call m_freem() because mbuf pointer could be
1398 	 * changed with m_pullup().
1399 	 */
1400 	m_freem(m);
1401 	NAT64STAT_INC(stats, dropped);
1402 	return (NAT64RETURN);
1403 }
1404 
1405 int
1406 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1407     nat64_stats_block *stats, void *logdata)
1408 {
1409 	struct route ro;
1410 	struct ip ip;
1411 	struct ifnet *ifp;
1412 	struct ip6_frag *frag;
1413 	struct ip6_hdr *ip6;
1414 	struct icmp6_hdr *icmp6;
1415 	struct sockaddr *dst;
1416 	uint16_t *csum;
1417 	uint32_t mtu;
1418 	int plen, hlen, proto;
1419 
1420 	/*
1421 	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1422 	 * protocol's headers. Also we skip some checks, that ip6_input(),
1423 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1424 	 */
1425 	ip6 = mtod(m, struct ip6_hdr *);
1426 	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1427 	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
1428 		return (NAT64SKIP);
1429 	}
1430 
1431 	/* Starting from this point we must not return zero */
1432 	ip.ip_src.s_addr = aaddr;
1433 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1434 		DPRINTF(DP_GENERIC, "invalid source address: %08x",
1435 		    ip.ip_src.s_addr);
1436 		/* XXX: stats? */
1437 		return (NAT64MFREE);
1438 	}
1439 
1440 	ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
1441 	if (ip.ip_dst.s_addr == 0) {
1442 		/* XXX: stats? */
1443 		return (NAT64MFREE);
1444 	}
1445 
1446 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
1447 		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1448 		    ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata);
1449 		return (NAT64RETURN);
1450 	}
1451 
1452 	hlen = 0;
1453 	plen = ntohs(ip6->ip6_plen);
1454 	proto = nat64_getlasthdr(m, &hlen);
1455 	if (proto < 0) {
1456 		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1457 		NAT64STAT_INC(stats, dropped);
1458 		return (NAT64MFREE);
1459 	}
1460 	frag = NULL;
1461 	if (proto == IPPROTO_FRAGMENT) {
1462 		/* ipfw_chk should m_pullup up to frag header */
1463 		if (m->m_len < hlen + sizeof(*frag)) {
1464 			DPRINTF(DP_DROPS,
1465 			    "dropped due to mbuf isn't contigious");
1466 			NAT64STAT_INC(stats, dropped);
1467 			return (NAT64MFREE);
1468 		}
1469 		frag = mtodo(m, hlen);
1470 		proto = frag->ip6f_nxt;
1471 		hlen += sizeof(*frag);
1472 		/* Fragmented ICMPv6 is unsupported */
1473 		if (proto == IPPROTO_ICMPV6) {
1474 			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1475 			NAT64STAT_INC(stats, dropped);
1476 			return (NAT64MFREE);
1477 		}
1478 		/* Fragment length must be multiple of 8 octets */
1479 		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1480 		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1481 			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1482 			    ICMP6_PARAMPROB_HEADER,
1483 			    offsetof(struct ip6_hdr, ip6_plen), stats,
1484 			    logdata);
1485 			return (NAT64RETURN);
1486 		}
1487 	}
1488 	plen -= hlen - sizeof(struct ip6_hdr);
1489 	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1490 		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1491 		    plen, m->m_pkthdr.len, hlen);
1492 		NAT64STAT_INC(stats, dropped);
1493 		return (NAT64MFREE);
1494 	}
1495 
1496 	icmp6 = NULL;	/* Make gcc happy */
1497 	if (proto == IPPROTO_ICMPV6) {
1498 		icmp6 = mtodo(m, hlen);
1499 		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1500 		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1501 			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1502 			    stats, logdata));
1503 	}
1504 	dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m);
1505 	if (dst == NULL) {
1506 		FREE_ROUTE(&ro);
1507 		NAT64STAT_INC(stats, noroute4);
1508 		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1509 		    ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata);
1510 		return (NAT64RETURN);
1511 	}
1512 
1513 	ifp = ro.ro_rt->rt_ifp;
1514 	if (ro.ro_rt->rt_mtu != 0)
1515 		mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
1516 	else
1517 		mtu = ifp->if_mtu;
1518 	if (mtu < plen + sizeof(ip)) {
1519 		FREE_ROUTE(&ro);
1520 		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats,
1521 		    logdata);
1522 		return (NAT64RETURN);
1523 	}
1524 	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1525 	/* Convert checksums. */
1526 	switch (proto) {
1527 	case IPPROTO_TCP:
1528 		csum = &TCP(mtodo(m, hlen))->th_sum;
1529 		if (aport != 0) {
1530 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1531 			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
1532 			tcp->th_sport = aport;
1533 		}
1534 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1535 		break;
1536 	case IPPROTO_UDP:
1537 		csum = &UDP(mtodo(m, hlen))->uh_sum;
1538 		if (aport != 0) {
1539 			struct udphdr *udp = UDP(mtodo(m, hlen));
1540 			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
1541 			udp->uh_sport = aport;
1542 		}
1543 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1544 		break;
1545 	case IPPROTO_ICMPV6:
1546 		/* Checksum in ICMPv6 covers pseudo header */
1547 		csum = &icmp6->icmp6_cksum;
1548 		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1549 		    IPPROTO_ICMPV6, 0));
1550 		/* Convert ICMPv6 types to ICMP */
1551 		mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1552 		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1553 			icmp6->icmp6_type = ICMP_ECHO;
1554 		else /* ICMP6_ECHO_REPLY */
1555 			icmp6->icmp6_type = ICMP_ECHOREPLY;
1556 		*csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6);
1557 		if (aport != 0) {
1558 			uint16_t old_id = icmp6->icmp6_id;
1559 			icmp6->icmp6_id = aport;
1560 			*csum = cksum_adjust(*csum, old_id, aport);
1561 		}
1562 		break;
1563 	};
1564 
1565 	m_adj(m, hlen - sizeof(ip));
1566 	bcopy(&ip, mtod(m, void *), sizeof(ip));
1567 	if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0)
1568 		NAT64STAT_INC(stats, opcnt64);
1569 	FREE_ROUTE(&ro);
1570 	return (NAT64RETURN);
1571 }
1572 
1573