1 /*- 2 * Copyright (c) 2015-2018 Yandex LLC 3 * Copyright (c) 2015-2018 Andrey V. Elsukov <ae@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/counter.h> 34 #include <sys/errno.h> 35 #include <sys/kernel.h> 36 #include <sys/lock.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/rmlock.h> 40 #include <sys/rwlock.h> 41 #include <sys/socket.h> 42 #include <sys/queue.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 #include <net/if_pflog.h> 47 #include <net/pfil.h> 48 #include <net/netisr.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/in_fib.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_var.h> 55 #include <netinet/ip_fw.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/ip_icmp.h> 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 #include <netinet6/in6_var.h> 62 #include <netinet6/in6_fib.h> 63 #include <netinet6/ip6_var.h> 64 65 #include <netpfil/pf/pf.h> 66 #include <netpfil/ipfw/ip_fw_private.h> 67 #include <machine/in_cksum.h> 68 69 #include "ip_fw_nat64.h" 70 #include "nat64_translate.h" 71 72 73 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *, 74 struct sockaddr *, struct nat64_counters *, void *); 75 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *, 76 void *); 77 78 static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *, 79 struct mbuf *); 80 static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *, 81 struct mbuf *); 82 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *); 83 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *, 84 struct nat64_counters *, void *); 85 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *, 86 void *); 87 static int nat64_direct_output(struct ifnet *, struct mbuf *, 88 struct sockaddr *, struct nat64_counters *, void *); 89 90 struct nat64_methods { 91 nat64_output_t output; 92 nat64_output_one_t output_one; 93 }; 94 static const struct nat64_methods nat64_netisr = { 95 .output = nat64_output, 96 .output_one = nat64_output_one 97 }; 98 static const struct nat64_methods nat64_direct = { 99 .output = nat64_direct_output, 100 .output_one = nat64_direct_output_one 101 }; 102 VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out) = &nat64_netisr; 103 #define V_nat64out VNET(nat64out) 104 105 void 106 nat64_set_output_method(int direct) 107 { 108 109 V_nat64out = direct != 0 ? &nat64_direct: &nat64_netisr; 110 } 111 112 int 113 nat64_get_output_method(void) 114 { 115 116 return (V_nat64out == &nat64_direct ? 1: 0); 117 } 118 119 static void 120 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 121 { 122 123 logdata->dir = PF_OUT; 124 logdata->af = family; 125 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 126 } 127 128 static int 129 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 130 struct nat64_counters *stats, void *logdata) 131 { 132 int error; 133 134 if (logdata != NULL) 135 nat64_log(logdata, m, dst->sa_family); 136 error = (*ifp->if_output)(ifp, m, dst, NULL); 137 if (error != 0) 138 NAT64STAT_INC(stats, oerrors); 139 return (error); 140 } 141 142 static int 143 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats, 144 void *logdata) 145 { 146 struct nhop6_basic nh6; 147 struct nhop4_basic nh4; 148 struct sockaddr_in6 dst6; 149 struct sockaddr_in dst4; 150 struct sockaddr *dst; 151 struct ip6_hdr *ip6; 152 struct ip *ip4; 153 struct ifnet *ifp; 154 int error; 155 156 ip4 = mtod(m, struct ip *); 157 switch (ip4->ip_v) { 158 case IPVERSION: 159 dst4.sin_addr = ip4->ip_dst; 160 error = nat64_find_route4(&nh4, &dst4, m); 161 if (error != 0) 162 NAT64STAT_INC(stats, noroute4); 163 else { 164 ifp = nh4.nh_ifp; 165 dst = (struct sockaddr *)&dst4; 166 } 167 break; 168 case (IPV6_VERSION >> 4): 169 ip6 = mtod(m, struct ip6_hdr *); 170 dst6.sin6_addr = ip6->ip6_dst; 171 error = nat64_find_route6(&nh6, &dst6, m); 172 if (error != 0) 173 NAT64STAT_INC(stats, noroute6); 174 else { 175 ifp = nh6.nh_ifp; 176 dst = (struct sockaddr *)&dst6; 177 } 178 break; 179 default: 180 m_freem(m); 181 NAT64STAT_INC(stats, dropped); 182 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 183 return (EAFNOSUPPORT); 184 } 185 if (error != 0) { 186 m_freem(m); 187 return (EHOSTUNREACH); 188 } 189 if (logdata != NULL) 190 nat64_log(logdata, m, dst->sa_family); 191 error = (*ifp->if_output)(ifp, m, dst, NULL); 192 if (error != 0) 193 NAT64STAT_INC(stats, oerrors); 194 return (error); 195 } 196 197 static int 198 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 199 struct nat64_counters *stats, void *logdata) 200 { 201 struct ip *ip4; 202 int ret, af; 203 204 ip4 = mtod(m, struct ip *); 205 switch (ip4->ip_v) { 206 case IPVERSION: 207 af = AF_INET; 208 ret = NETISR_IP; 209 break; 210 case (IPV6_VERSION >> 4): 211 af = AF_INET6; 212 ret = NETISR_IPV6; 213 break; 214 default: 215 m_freem(m); 216 NAT64STAT_INC(stats, dropped); 217 DPRINTF(DP_DROPS, "unknown IP version"); 218 return (EAFNOSUPPORT); 219 } 220 if (logdata != NULL) 221 nat64_log(logdata, m, af); 222 ret = netisr_queue(ret, m); 223 if (ret != 0) 224 NAT64STAT_INC(stats, oerrors); 225 return (ret); 226 } 227 228 static int 229 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata) 230 { 231 232 return (nat64_output(NULL, m, NULL, stats, logdata)); 233 } 234 235 /* 236 * Check the given IPv6 prefix and length according to RFC6052: 237 * The prefixes can only have one of the following lengths: 238 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long). 239 * Returns zero on success, otherwise EINVAL. 240 */ 241 int 242 nat64_check_prefix6(const struct in6_addr *prefix, int length) 243 { 244 245 switch (length) { 246 case 32: 247 case 40: 248 case 48: 249 case 56: 250 case 64: 251 /* Well-known prefix has 96 prefix length */ 252 if (IN6_IS_ADDR_WKPFX(prefix)) 253 return (EINVAL); 254 /* FALLTHROUGH */ 255 case 96: 256 /* Bits 64 to 71 must be set to zero */ 257 if (prefix->__u6_addr.__u6_addr8[8] != 0) 258 return (EINVAL); 259 /* Some extra checks */ 260 if (IN6_IS_ADDR_MULTICAST(prefix) || 261 IN6_IS_ADDR_UNSPECIFIED(prefix) || 262 IN6_IS_ADDR_LOOPBACK(prefix)) 263 return (EINVAL); 264 return (0); 265 } 266 return (EINVAL); 267 } 268 269 int 270 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia) 271 { 272 273 if (V_nat64_allow_private) 274 return (0); 275 276 /* WKPFX must not be used to represent non-global IPv4 addresses */ 277 if (cfg->flags & NAT64_WKPFX) { 278 /* IN_PRIVATE */ 279 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || 280 (ia & htonl(0xfff00000)) == htonl(0xac100000) || 281 (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) 282 return (1); 283 /* 284 * RFC 5735: 285 * 192.0.0.0/24 - reserved for IETF protocol assignments 286 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses 287 * 198.18.0.0/15 - for use in benchmark tests 288 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use 289 * in documentation and example code 290 */ 291 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || 292 (ia & htonl(0xffffff00)) == htonl(0xc0586300) || 293 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || 294 (ia & htonl(0xffffff00)) == htonl(0xc0000200) || 295 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || 296 (ia & htonl(0xffffff00)) == htonl(0xcb007100)) 297 return (1); 298 } 299 return (0); 300 } 301 302 void 303 nat64_embed_ip4(const struct nat64_config *cfg, in_addr_t ia, 304 struct in6_addr *ip6) 305 { 306 307 /* assume the prefix6 is properly filled with zeros */ 308 bcopy(&cfg->prefix6, ip6, sizeof(*ip6)); 309 switch (cfg->plen6) { 310 case 32: 311 case 96: 312 ip6->s6_addr32[cfg->plen6 / 32] = ia; 313 break; 314 case 40: 315 case 48: 316 case 56: 317 #if BYTE_ORDER == BIG_ENDIAN 318 ip6->s6_addr32[1] = cfg->prefix6.s6_addr32[1] | 319 (ia >> (cfg->plen6 % 32)); 320 ip6->s6_addr32[2] = ia << (24 - cfg->plen6 % 32); 321 #elif BYTE_ORDER == LITTLE_ENDIAN 322 ip6->s6_addr32[1] = cfg->prefix6.s6_addr32[1] | 323 (ia << (cfg->plen6 % 32)); 324 ip6->s6_addr32[2] = ia >> (24 - cfg->plen6 % 32); 325 #endif 326 break; 327 case 64: 328 #if BYTE_ORDER == BIG_ENDIAN 329 ip6->s6_addr32[2] = ia >> 8; 330 ip6->s6_addr32[3] = ia << 24; 331 #elif BYTE_ORDER == LITTLE_ENDIAN 332 ip6->s6_addr32[2] = ia << 8; 333 ip6->s6_addr32[3] = ia >> 24; 334 #endif 335 break; 336 default: 337 panic("Wrong plen6"); 338 }; 339 ip6->s6_addr8[8] = 0; 340 } 341 342 in_addr_t 343 nat64_extract_ip4(const struct nat64_config *cfg, const struct in6_addr *ip6) 344 { 345 in_addr_t ia; 346 347 /* 348 * According to RFC 6052 p2.2: 349 * IPv4-embedded IPv6 addresses are composed of a variable-length 350 * prefix, the embedded IPv4 address, and a variable length suffix. 351 * The suffix bits are reserved for future extensions and SHOULD 352 * be set to zero. 353 */ 354 switch (cfg->plen6) { 355 case 32: 356 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 357 goto badip6; 358 break; 359 case 40: 360 if (ip6->s6_addr32[3] != 0 || 361 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 362 goto badip6; 363 break; 364 case 48: 365 if (ip6->s6_addr32[3] != 0 || 366 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 367 goto badip6; 368 break; 369 case 56: 370 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 371 goto badip6; 372 break; 373 case 64: 374 if (ip6->s6_addr8[8] != 0 || 375 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 376 goto badip6; 377 }; 378 switch (cfg->plen6) { 379 case 32: 380 case 96: 381 ia = ip6->s6_addr32[cfg->plen6 / 32]; 382 break; 383 case 40: 384 case 48: 385 case 56: 386 #if BYTE_ORDER == BIG_ENDIAN 387 ia = (ip6->s6_addr32[1] << (cfg->plen6 % 32)) | 388 (ip6->s6_addr32[2] >> (24 - cfg->plen6 % 32)); 389 #elif BYTE_ORDER == LITTLE_ENDIAN 390 ia = (ip6->s6_addr32[1] >> (cfg->plen6 % 32)) | 391 (ip6->s6_addr32[2] << (24 - cfg->plen6 % 32)); 392 #endif 393 break; 394 case 64: 395 #if BYTE_ORDER == BIG_ENDIAN 396 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 397 #elif BYTE_ORDER == LITTLE_ENDIAN 398 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 399 #endif 400 break; 401 default: 402 return (0); 403 }; 404 if (nat64_check_ip4(ia) != 0 || 405 nat64_check_private_ip4(cfg, ia) != 0) 406 goto badip4; 407 408 return (ia); 409 badip4: 410 DPRINTF(DP_GENERIC | DP_DROPS, 411 "invalid destination address: %08x", ia); 412 return (0); 413 badip6: 414 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address"); 415 return (0); 416 } 417 418 /* 419 * According to RFC 1624 the equation for incremental checksum update is: 420 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 421 * HC' = HC - ~m - m' -- [Eqn. 4] 422 * So, when we are replacing IPv4 addresses to IPv6, we 423 * can assume, that new bytes previously were zeros, and vise versa - 424 * when we replacing IPv6 addresses to IPv4, now unused bytes become 425 * zeros. The payload length in pseudo header has bigger size, but one 426 * half of it should be zero. Using the equation 4 we get: 427 * HC' = HC - (~m0 + m0') -- m0 is first changed word 428 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 429 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 430 * = HC - sum(~m[i] + m'[i]) 431 * 432 * The function result should be used as follows: 433 * IPv6 to IPv4: HC' = cksum_add(HC, result) 434 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 435 */ 436 static NAT64NOINLINE uint16_t 437 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 438 { 439 uint32_t sum; 440 uint16_t *p; 441 442 sum = ~ip->ip_src.s_addr >> 16; 443 sum += ~ip->ip_src.s_addr & 0xffff; 444 sum += ~ip->ip_dst.s_addr >> 16; 445 sum += ~ip->ip_dst.s_addr & 0xffff; 446 447 for (p = (uint16_t *)&ip6->ip6_src; 448 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 449 sum += *p; 450 451 while (sum >> 16) 452 sum = (sum & 0xffff) + (sum >> 16); 453 return (sum); 454 } 455 456 static NAT64NOINLINE void 457 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 458 uint16_t plen, uint8_t proto, struct ip *ip) 459 { 460 461 /* assume addresses are already initialized */ 462 ip->ip_v = IPVERSION; 463 ip->ip_hl = sizeof(*ip) >> 2; 464 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 465 ip->ip_len = htons(sizeof(*ip) + plen); 466 ip->ip_ttl = ip6->ip6_hlim; 467 /* Forwarding code will decrement TTL for netisr based output. */ 468 if (V_nat64out == &nat64_direct) 469 ip->ip_ttl -= IPV6_HLIMDEC; 470 ip->ip_sum = 0; 471 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 472 ip_fillid(ip); 473 if (frag != NULL) { 474 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 475 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 476 ip->ip_off |= htons(IP_MF); 477 } else { 478 ip->ip_off = htons(IP_DF); 479 } 480 ip->ip_sum = in_cksum_hdr(ip); 481 } 482 483 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 484 static NAT64NOINLINE int 485 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6, 486 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id, 487 uint16_t ip_off) 488 { 489 struct ip6_frag ip6f; 490 struct mbuf *n; 491 uint16_t hlen, len, offset; 492 int plen; 493 494 plen = ntohs(ip6->ip6_plen); 495 hlen = sizeof(struct ip6_hdr); 496 497 /* Fragmentation isn't needed */ 498 if (ip_off == 0 && plen <= mtu - hlen) { 499 M_PREPEND(m, hlen, M_NOWAIT); 500 if (m == NULL) { 501 NAT64STAT_INC(stats, nomem); 502 return (ENOMEM); 503 } 504 bcopy(ip6, mtod(m, void *), hlen); 505 if (mbufq_enqueue(mq, m) != 0) { 506 m_freem(m); 507 NAT64STAT_INC(stats, dropped); 508 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 509 return (ENOBUFS); 510 } 511 return (0); 512 } 513 514 hlen += sizeof(struct ip6_frag); 515 ip6f.ip6f_reserved = 0; 516 ip6f.ip6f_nxt = ip6->ip6_nxt; 517 ip6->ip6_nxt = IPPROTO_FRAGMENT; 518 if (ip_off != 0) { 519 /* 520 * We have got an IPv4 fragment. 521 * Use offset value and ip_id from original fragment. 522 */ 523 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 524 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 525 NAT64STAT_INC(stats, ifrags); 526 } else { 527 /* The packet size exceeds interface MTU */ 528 ip6f.ip6f_ident = htonl(ip6_randomid()); 529 offset = 0; /* First fragment*/ 530 } 531 while (plen > 0 && m != NULL) { 532 n = NULL; 533 len = FRAGSZ(mtu) & ~7; 534 if (len > plen) 535 len = plen; 536 ip6->ip6_plen = htons(len + sizeof(ip6f)); 537 ip6f.ip6f_offlg = ntohs(offset); 538 if (len < plen || (ip_off & htons(IP_MF)) != 0) 539 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 540 offset += len; 541 plen -= len; 542 if (plen > 0) { 543 n = m_split(m, len, M_NOWAIT); 544 if (n == NULL) 545 goto fail; 546 } 547 M_PREPEND(m, hlen, M_NOWAIT); 548 if (m == NULL) 549 goto fail; 550 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 551 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 552 sizeof(struct ip6_frag)); 553 if (mbufq_enqueue(mq, m) != 0) 554 goto fail; 555 m = n; 556 } 557 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 558 return (0); 559 fail: 560 if (m != NULL) 561 m_freem(m); 562 if (n != NULL) 563 m_freem(n); 564 mbufq_drain(mq); 565 NAT64STAT_INC(stats, nomem); 566 return (ENOMEM); 567 } 568 569 static NAT64NOINLINE int 570 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst, 571 struct mbuf *m) 572 { 573 574 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0, 575 pnh) != 0) 576 return (EHOSTUNREACH); 577 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT)) 578 return (EHOSTUNREACH); 579 /* 580 * XXX: we need to use destination address with embedded scope 581 * zone id, because LLTABLE uses such form of addresses for lookup. 582 */ 583 dst->sin6_family = AF_INET6; 584 dst->sin6_len = sizeof(*dst); 585 dst->sin6_addr = pnh->nh_addr; 586 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 587 dst->sin6_addr.s6_addr16[1] = 588 htons(pnh->nh_ifp->if_index & 0xffff); 589 dst->sin6_port = 0; 590 dst->sin6_scope_id = 0; 591 dst->sin6_flowinfo = 0; 592 593 return (0); 594 } 595 596 #define NAT64_ICMP6_PLEN 64 597 static NAT64NOINLINE void 598 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 599 struct nat64_counters *stats, void *logdata) 600 { 601 struct icmp6_hdr *icmp6; 602 struct ip6_hdr *ip6, *oip6; 603 struct mbuf *n; 604 int len, plen; 605 606 len = 0; 607 plen = nat64_getlasthdr(m, &len); 608 if (plen < 0) { 609 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 610 goto freeit; 611 } 612 /* 613 * Do not send ICMPv6 in reply to ICMPv6 errors. 614 */ 615 if (plen == IPPROTO_ICMPV6) { 616 if (m->m_len < len + sizeof(*icmp6)) { 617 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 618 goto freeit; 619 } 620 icmp6 = mtodo(m, len); 621 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 622 icmp6->icmp6_type == ND_REDIRECT) { 623 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 624 "ICMPv6 errors"); 625 goto freeit; 626 } 627 } 628 /* 629 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 630 goto freeit; 631 */ 632 ip6 = mtod(m, struct ip6_hdr *); 633 switch (type) { 634 case ICMP6_DST_UNREACH: 635 case ICMP6_PACKET_TOO_BIG: 636 case ICMP6_TIME_EXCEEDED: 637 case ICMP6_PARAM_PROB: 638 break; 639 default: 640 goto freeit; 641 } 642 /* Calculate length of ICMPv6 payload */ 643 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 644 m->m_pkthdr.len; 645 646 /* Create new ICMPv6 datagram */ 647 plen = len + sizeof(struct icmp6_hdr); 648 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 649 MT_HEADER, M_PKTHDR); 650 if (n == NULL) { 651 NAT64STAT_INC(stats, nomem); 652 m_freem(m); 653 return; 654 } 655 /* 656 * Move pkthdr from original mbuf. We should have initialized some 657 * fields, because we can reinject this mbuf to netisr and it will 658 * go trough input path (it requires at least rcvif should be set). 659 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 660 * in the chain, when we will do M_PREPEND() or make some type of 661 * tunneling. 662 */ 663 m_move_pkthdr(n, m); 664 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 665 666 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 667 oip6 = mtod(n, struct ip6_hdr *); 668 oip6->ip6_src = ip6->ip6_dst; 669 oip6->ip6_dst = ip6->ip6_src; 670 oip6->ip6_nxt = IPPROTO_ICMPV6; 671 oip6->ip6_flow = 0; 672 oip6->ip6_vfc |= IPV6_VERSION; 673 oip6->ip6_hlim = V_ip6_defhlim; 674 oip6->ip6_plen = htons(plen); 675 676 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 677 icmp6->icmp6_cksum = 0; 678 icmp6->icmp6_type = type; 679 icmp6->icmp6_code = code; 680 icmp6->icmp6_mtu = htonl(mtu); 681 682 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 683 sizeof(struct icmp6_hdr))); 684 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 685 sizeof(struct ip6_hdr), plen); 686 m_freem(m); 687 V_nat64out->output_one(n, stats, logdata); 688 return; 689 freeit: 690 NAT64STAT_INC(stats, dropped); 691 m_freem(m); 692 } 693 694 static NAT64NOINLINE int 695 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst, 696 struct mbuf *m) 697 { 698 699 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0) 700 return (EHOSTUNREACH); 701 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT)) 702 return (EHOSTUNREACH); 703 704 dst->sin_family = AF_INET; 705 dst->sin_len = sizeof(*dst); 706 dst->sin_addr = pnh->nh_addr; 707 dst->sin_port = 0; 708 return (0); 709 } 710 711 #define NAT64_ICMP_PLEN 64 712 static NAT64NOINLINE void 713 nat64_icmp_reflect(struct mbuf *m, uint8_t type, 714 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata) 715 { 716 struct icmp *icmp; 717 struct ip *ip, *oip; 718 struct mbuf *n; 719 int len, plen; 720 721 ip = mtod(m, struct ip *); 722 /* Do not send ICMP error if packet is not the first fragment */ 723 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 724 DPRINTF(DP_DROPS, "not first fragment"); 725 goto freeit; 726 } 727 /* Do not send ICMP in reply to ICMP errors */ 728 if (ip->ip_p == IPPROTO_ICMP) { 729 if (m->m_len < (ip->ip_hl << 2)) { 730 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 731 goto freeit; 732 } 733 icmp = mtodo(m, ip->ip_hl << 2); 734 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 735 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 736 "ICMP errors"); 737 goto freeit; 738 } 739 } 740 switch (type) { 741 case ICMP_UNREACH: 742 case ICMP_TIMXCEED: 743 case ICMP_PARAMPROB: 744 break; 745 default: 746 goto freeit; 747 } 748 /* Calculate length of ICMP payload */ 749 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 750 m->m_pkthdr.len; 751 752 /* Create new ICMPv4 datagram */ 753 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 754 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 755 MT_HEADER, M_PKTHDR); 756 if (n == NULL) { 757 NAT64STAT_INC(stats, nomem); 758 m_freem(m); 759 return; 760 } 761 m_move_pkthdr(n, m); 762 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 763 764 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 765 oip = mtod(n, struct ip *); 766 oip->ip_v = IPVERSION; 767 oip->ip_hl = sizeof(struct ip) >> 2; 768 oip->ip_tos = 0; 769 oip->ip_len = htons(n->m_pkthdr.len); 770 oip->ip_ttl = V_ip_defttl; 771 oip->ip_p = IPPROTO_ICMP; 772 ip_fillid(oip); 773 oip->ip_off = htons(IP_DF); 774 oip->ip_src = ip->ip_dst; 775 oip->ip_dst = ip->ip_src; 776 oip->ip_sum = 0; 777 oip->ip_sum = in_cksum_hdr(oip); 778 779 icmp = mtodo(n, sizeof(struct ip)); 780 icmp->icmp_type = type; 781 icmp->icmp_code = code; 782 icmp->icmp_cksum = 0; 783 icmp->icmp_pmvoid = 0; 784 icmp->icmp_nextmtu = htons(mtu); 785 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 786 sizeof(struct icmphdr) + sizeof(uint32_t))); 787 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 788 sizeof(struct ip)); 789 m_freem(m); 790 V_nat64out->output_one(n, stats, logdata); 791 return; 792 freeit: 793 NAT64STAT_INC(stats, dropped); 794 m_freem(m); 795 } 796 797 /* Translate ICMP echo request/reply into ICMPv6 */ 798 static void 799 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 800 uint16_t id, uint8_t type) 801 { 802 uint16_t old; 803 804 old = *(uint16_t *)icmp6; /* save type+code in one word */ 805 icmp6->icmp6_type = type; 806 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 807 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 808 old, *(uint16_t *)icmp6); 809 if (id != 0) { 810 old = icmp6->icmp6_id; 811 icmp6->icmp6_id = id; 812 /* Reflect ICMP id translation in the cksum */ 813 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 814 old, id); 815 } 816 /* Reflect IPv6 pseudo header in the cksum */ 817 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 818 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 819 } 820 821 static NAT64NOINLINE struct mbuf * 822 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 823 int offset, struct nat64_config *cfg) 824 { 825 struct ip ip; 826 struct icmp *icmp; 827 struct tcphdr *tcp; 828 struct udphdr *udp; 829 struct ip6_hdr *eip6; 830 struct mbuf *n; 831 uint32_t mtu; 832 int len, hlen, plen; 833 uint8_t type, code; 834 835 if (m->m_len < offset + ICMP_MINLEN) 836 m = m_pullup(m, offset + ICMP_MINLEN); 837 if (m == NULL) { 838 NAT64STAT_INC(&cfg->stats, nomem); 839 return (m); 840 } 841 mtu = 0; 842 icmp = mtodo(m, offset); 843 /* RFC 7915 p4.2 */ 844 switch (icmp->icmp_type) { 845 case ICMP_ECHOREPLY: 846 type = ICMP6_ECHO_REPLY; 847 code = 0; 848 break; 849 case ICMP_UNREACH: 850 type = ICMP6_DST_UNREACH; 851 switch (icmp->icmp_code) { 852 case ICMP_UNREACH_NET: 853 case ICMP_UNREACH_HOST: 854 case ICMP_UNREACH_SRCFAIL: 855 case ICMP_UNREACH_NET_UNKNOWN: 856 case ICMP_UNREACH_HOST_UNKNOWN: 857 case ICMP_UNREACH_TOSNET: 858 case ICMP_UNREACH_TOSHOST: 859 code = ICMP6_DST_UNREACH_NOROUTE; 860 break; 861 case ICMP_UNREACH_PROTOCOL: 862 type = ICMP6_PARAM_PROB; 863 code = ICMP6_PARAMPROB_NEXTHEADER; 864 break; 865 case ICMP_UNREACH_PORT: 866 code = ICMP6_DST_UNREACH_NOPORT; 867 break; 868 case ICMP_UNREACH_NEEDFRAG: 869 type = ICMP6_PACKET_TOO_BIG; 870 code = 0; 871 /* XXX: needs an additional look */ 872 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 873 break; 874 case ICMP_UNREACH_NET_PROHIB: 875 case ICMP_UNREACH_HOST_PROHIB: 876 case ICMP_UNREACH_FILTER_PROHIB: 877 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 878 code = ICMP6_DST_UNREACH_ADMIN; 879 break; 880 default: 881 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 882 icmp->icmp_type, icmp->icmp_code); 883 goto freeit; 884 } 885 break; 886 case ICMP_TIMXCEED: 887 type = ICMP6_TIME_EXCEEDED; 888 code = icmp->icmp_code; 889 break; 890 case ICMP_ECHO: 891 type = ICMP6_ECHO_REQUEST; 892 code = 0; 893 break; 894 case ICMP_PARAMPROB: 895 type = ICMP6_PARAM_PROB; 896 switch (icmp->icmp_code) { 897 case ICMP_PARAMPROB_ERRATPTR: 898 case ICMP_PARAMPROB_LENGTH: 899 code = ICMP6_PARAMPROB_HEADER; 900 switch (icmp->icmp_pptr) { 901 case 0: /* Version/IHL */ 902 case 1: /* Type Of Service */ 903 mtu = icmp->icmp_pptr; 904 break; 905 case 2: /* Total Length */ 906 case 3: mtu = 4; /* Payload Length */ 907 break; 908 case 8: /* Time to Live */ 909 mtu = 7; /* Hop Limit */ 910 break; 911 case 9: /* Protocol */ 912 mtu = 6; /* Next Header */ 913 break; 914 case 12: /* Source address */ 915 case 13: 916 case 14: 917 case 15: 918 mtu = 8; 919 break; 920 case 16: /* Destination address */ 921 case 17: 922 case 18: 923 case 19: 924 mtu = 24; 925 break; 926 default: /* Silently drop */ 927 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 928 " code %d, pptr %d", icmp->icmp_type, 929 icmp->icmp_code, icmp->icmp_pptr); 930 goto freeit; 931 } 932 break; 933 default: 934 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 935 " code %d, pptr %d", icmp->icmp_type, 936 icmp->icmp_code, icmp->icmp_pptr); 937 goto freeit; 938 } 939 break; 940 default: 941 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 942 icmp->icmp_type, icmp->icmp_code); 943 goto freeit; 944 } 945 /* 946 * For echo request/reply we can use original payload, 947 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 948 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 949 */ 950 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 951 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 952 return (m); 953 } 954 /* 955 * For other types of ICMP messages we need to translate inner 956 * IPv4 header to IPv6 header. 957 * Assume ICMP src is the same as payload dst 958 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 959 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 960 * In that case, we already have map for NATIP1 and GWsrc1. 961 * The only thing we need is to copy IPv6 map prefix to 962 * Hostdst1. 963 */ 964 hlen = offset + ICMP_MINLEN; 965 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 966 DPRINTF(DP_DROPS, "Message is too short %d", 967 m->m_pkthdr.len); 968 goto freeit; 969 } 970 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 971 if (ip.ip_v != IPVERSION) { 972 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 973 goto freeit; 974 } 975 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 976 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 977 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 978 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 || 979 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) { 980 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 981 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 982 goto freeit; 983 } 984 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 985 DPRINTF(DP_DROPS, "Message is too short %d", 986 m->m_pkthdr.len); 987 goto freeit; 988 } 989 #if 0 990 /* 991 * Check that inner source matches the outer destination. 992 * XXX: We need some method to convert IPv4 into IPv6 address here, 993 * and compare IPv6 addresses. 994 */ 995 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 996 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 997 "%04x vs %04x", ip.ip_src.s_addr, 998 nat64_get_ip4(&ip6->ip6_dst)); 999 goto freeit; 1000 } 1001 #endif 1002 /* 1003 * Create new mbuf for ICMPv6 datagram. 1004 * NOTE: len is data length just after inner IP header. 1005 */ 1006 len = m->m_pkthdr.len - hlen; 1007 if (sizeof(struct ip6_hdr) + 1008 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 1009 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 1010 sizeof(struct ip6_hdr); 1011 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 1012 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 1013 if (n == NULL) { 1014 NAT64STAT_INC(&cfg->stats, nomem); 1015 m_freem(m); 1016 return (NULL); 1017 } 1018 m_move_pkthdr(n, m); 1019 M_ALIGN(n, offset + plen + max_hdr); 1020 n->m_len = n->m_pkthdr.len = offset + plen; 1021 /* Adjust ip6_plen in outer header */ 1022 ip6->ip6_plen = htons(plen); 1023 /* Construct new inner IPv6 header */ 1024 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 1025 eip6->ip6_src = ip6->ip6_dst; 1026 /* Use the fact that we have single /96 prefix for IPv4 map */ 1027 eip6->ip6_dst = ip6->ip6_src; 1028 nat64_embed_ip4(cfg, ip.ip_dst.s_addr, &eip6->ip6_dst); 1029 1030 eip6->ip6_flow = htonl(ip.ip_tos << 20); 1031 eip6->ip6_vfc |= IPV6_VERSION; 1032 eip6->ip6_hlim = ip.ip_ttl; 1033 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 1034 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 1035 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 1036 /* 1037 * We need to translate source port in the inner ULP header, 1038 * and adjust ULP checksum. 1039 */ 1040 switch (ip.ip_p) { 1041 case IPPROTO_TCP: 1042 if (len < offsetof(struct tcphdr, th_sum)) 1043 break; 1044 tcp = TCP(eip6 + 1); 1045 if (icmpid != 0) { 1046 tcp->th_sum = cksum_adjust(tcp->th_sum, 1047 tcp->th_sport, icmpid); 1048 tcp->th_sport = icmpid; 1049 } 1050 tcp->th_sum = cksum_add(tcp->th_sum, 1051 ~nat64_cksum_convert(eip6, &ip)); 1052 break; 1053 case IPPROTO_UDP: 1054 if (len < offsetof(struct udphdr, uh_sum)) 1055 break; 1056 udp = UDP(eip6 + 1); 1057 if (icmpid != 0) { 1058 udp->uh_sum = cksum_adjust(udp->uh_sum, 1059 udp->uh_sport, icmpid); 1060 udp->uh_sport = icmpid; 1061 } 1062 udp->uh_sum = cksum_add(udp->uh_sum, 1063 ~nat64_cksum_convert(eip6, &ip)); 1064 break; 1065 case IPPROTO_ICMP: 1066 /* 1067 * Check if this is an ICMP error message for echo request 1068 * that we sent. I.e. ULP in the data containing invoking 1069 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 1070 */ 1071 icmp = (struct icmp *)(eip6 + 1); 1072 if (icmp->icmp_type != ICMP_ECHO) { 1073 m_freem(n); 1074 goto freeit; 1075 } 1076 /* 1077 * For our client this original datagram should looks 1078 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 1079 * Thus we need adjust icmp_cksum and convert type from 1080 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 1081 */ 1082 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 1083 ICMP6_ECHO_REQUEST); 1084 } 1085 m_freem(m); 1086 /* Convert ICMPv4 into ICMPv6 header */ 1087 icmp = mtodo(n, offset); 1088 ICMP6(icmp)->icmp6_type = type; 1089 ICMP6(icmp)->icmp6_code = code; 1090 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1091 ICMP6(icmp)->icmp6_cksum = 0; 1092 ICMP6(icmp)->icmp6_cksum = cksum_add( 1093 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1094 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1095 return (n); 1096 freeit: 1097 m_freem(m); 1098 NAT64STAT_INC(&cfg->stats, dropped); 1099 return (NULL); 1100 } 1101 1102 int 1103 nat64_getlasthdr(struct mbuf *m, int *offset) 1104 { 1105 struct ip6_hdr *ip6; 1106 struct ip6_hbh *hbh; 1107 int proto, hlen; 1108 1109 if (offset != NULL) 1110 hlen = *offset; 1111 else 1112 hlen = 0; 1113 1114 if (m->m_len < hlen + sizeof(*ip6)) 1115 return (-1); 1116 1117 ip6 = mtodo(m, hlen); 1118 hlen += sizeof(*ip6); 1119 proto = ip6->ip6_nxt; 1120 /* Skip extension headers */ 1121 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1122 proto == IPPROTO_DSTOPTS) { 1123 hbh = mtodo(m, hlen); 1124 /* 1125 * We expect mbuf has contigious data up to 1126 * upper level header. 1127 */ 1128 if (m->m_len < hlen) 1129 return (-1); 1130 /* 1131 * We doesn't support Jumbo payload option, 1132 * so return error. 1133 */ 1134 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1135 return (-1); 1136 proto = hbh->ip6h_nxt; 1137 hlen += (hbh->ip6h_len + 1) << 3; 1138 } 1139 if (offset != NULL) 1140 *offset = hlen; 1141 return (proto); 1142 } 1143 1144 int 1145 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1146 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg, 1147 void *logdata) 1148 { 1149 struct nhop6_basic nh; 1150 struct ip6_hdr ip6; 1151 struct sockaddr_in6 dst; 1152 struct ip *ip; 1153 struct mbufq mq; 1154 uint16_t ip_id, ip_off; 1155 uint16_t *csum; 1156 int plen, hlen; 1157 uint8_t proto; 1158 1159 ip = mtod(m, struct ip*); 1160 1161 if (ip->ip_ttl <= IPTTLDEC) { 1162 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1163 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata); 1164 return (NAT64RETURN); 1165 } 1166 1167 ip6.ip6_dst = *daddr; 1168 ip6.ip6_src = *saddr; 1169 1170 hlen = ip->ip_hl << 2; 1171 plen = ntohs(ip->ip_len) - hlen; 1172 proto = ip->ip_p; 1173 1174 /* Save ip_id and ip_off, both are in network byte order */ 1175 ip_id = ip->ip_id; 1176 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1177 1178 /* Fragment length must be multiple of 8 octets */ 1179 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1180 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1181 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata); 1182 return (NAT64RETURN); 1183 } 1184 /* Fragmented ICMP is unsupported */ 1185 if (proto == IPPROTO_ICMP && ip_off != 0) { 1186 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1187 NAT64STAT_INC(&cfg->stats, dropped); 1188 return (NAT64MFREE); 1189 } 1190 1191 dst.sin6_addr = ip6.ip6_dst; 1192 if (nat64_find_route6(&nh, &dst, m) != 0) { 1193 NAT64STAT_INC(&cfg->stats, noroute6); 1194 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1195 &cfg->stats, logdata); 1196 return (NAT64RETURN); 1197 } 1198 if (nh.nh_mtu < plen + sizeof(ip6) && 1199 (ip->ip_off & htons(IP_DF)) != 0) { 1200 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1201 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata); 1202 return (NAT64RETURN); 1203 } 1204 1205 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1206 ip6.ip6_vfc |= IPV6_VERSION; 1207 ip6.ip6_hlim = ip->ip_ttl; 1208 /* Forwarding code will decrement TTL for netisr based output. */ 1209 if (V_nat64out == &nat64_direct) 1210 ip6.ip6_hlim -= IPTTLDEC; 1211 ip6.ip6_plen = htons(plen); 1212 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1213 /* Convert checksums. */ 1214 switch (proto) { 1215 case IPPROTO_TCP: 1216 csum = &TCP(mtodo(m, hlen))->th_sum; 1217 if (lport != 0) { 1218 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1219 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1220 tcp->th_dport = lport; 1221 } 1222 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1223 break; 1224 case IPPROTO_UDP: 1225 csum = &UDP(mtodo(m, hlen))->uh_sum; 1226 if (lport != 0) { 1227 struct udphdr *udp = UDP(mtodo(m, hlen)); 1228 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1229 udp->uh_dport = lport; 1230 } 1231 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1232 break; 1233 case IPPROTO_ICMP: 1234 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg); 1235 if (m == NULL) /* stats already accounted */ 1236 return (NAT64RETURN); 1237 } 1238 1239 m_adj(m, hlen); 1240 mbufq_init(&mq, 255); 1241 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off); 1242 while ((m = mbufq_dequeue(&mq)) != NULL) { 1243 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1244 &cfg->stats, logdata) != 0) 1245 break; 1246 NAT64STAT_INC(&cfg->stats, opcnt46); 1247 } 1248 mbufq_drain(&mq); 1249 return (NAT64RETURN); 1250 } 1251 1252 int 1253 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1254 struct nat64_config *cfg, void *logdata) 1255 { 1256 struct ip ip; 1257 struct icmp6_hdr *icmp6; 1258 struct ip6_frag *ip6f; 1259 struct ip6_hdr *ip6, *ip6i; 1260 uint32_t mtu; 1261 int plen, proto; 1262 uint8_t type, code; 1263 1264 if (hlen == 0) { 1265 ip6 = mtod(m, struct ip6_hdr *); 1266 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1267 nat64_check_ip6(&ip6->ip6_dst) != 0) 1268 return (NAT64SKIP); 1269 1270 proto = nat64_getlasthdr(m, &hlen); 1271 if (proto != IPPROTO_ICMPV6) { 1272 DPRINTF(DP_DROPS, 1273 "dropped due to mbuf isn't contigious"); 1274 NAT64STAT_INC(&cfg->stats, dropped); 1275 return (NAT64MFREE); 1276 } 1277 } 1278 1279 /* 1280 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1281 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1282 */ 1283 icmp6 = mtodo(m, hlen); 1284 mtu = 0; 1285 switch (icmp6->icmp6_type) { 1286 case ICMP6_DST_UNREACH: 1287 type = ICMP_UNREACH; 1288 switch (icmp6->icmp6_code) { 1289 case ICMP6_DST_UNREACH_NOROUTE: 1290 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1291 case ICMP6_DST_UNREACH_ADDR: 1292 code = ICMP_UNREACH_HOST; 1293 break; 1294 case ICMP6_DST_UNREACH_ADMIN: 1295 code = ICMP_UNREACH_HOST_PROHIB; 1296 break; 1297 case ICMP6_DST_UNREACH_NOPORT: 1298 code = ICMP_UNREACH_PORT; 1299 break; 1300 default: 1301 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1302 " code %d", icmp6->icmp6_type, 1303 icmp6->icmp6_code); 1304 NAT64STAT_INC(&cfg->stats, dropped); 1305 return (NAT64MFREE); 1306 } 1307 break; 1308 case ICMP6_PACKET_TOO_BIG: 1309 type = ICMP_UNREACH; 1310 code = ICMP_UNREACH_NEEDFRAG; 1311 mtu = ntohl(icmp6->icmp6_mtu); 1312 if (mtu < IPV6_MMTU) { 1313 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1314 " code %d", mtu, icmp6->icmp6_type, 1315 icmp6->icmp6_code); 1316 NAT64STAT_INC(&cfg->stats, dropped); 1317 return (NAT64MFREE); 1318 } 1319 /* 1320 * Adjust MTU to reflect difference between 1321 * IPv6 an IPv4 headers. 1322 */ 1323 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1324 break; 1325 case ICMP6_TIME_EXCEEDED: 1326 type = ICMP_TIMXCEED; 1327 code = icmp6->icmp6_code; 1328 break; 1329 case ICMP6_PARAM_PROB: 1330 switch (icmp6->icmp6_code) { 1331 case ICMP6_PARAMPROB_HEADER: 1332 type = ICMP_PARAMPROB; 1333 code = ICMP_PARAMPROB_ERRATPTR; 1334 mtu = ntohl(icmp6->icmp6_pptr); 1335 switch (mtu) { 1336 case 0: /* Version/Traffic Class */ 1337 case 1: /* Traffic Class/Flow Label */ 1338 break; 1339 case 4: /* Payload Length */ 1340 case 5: 1341 mtu = 2; 1342 break; 1343 case 6: /* Next Header */ 1344 mtu = 9; 1345 break; 1346 case 7: /* Hop Limit */ 1347 mtu = 8; 1348 break; 1349 default: 1350 if (mtu >= 8 && mtu <= 23) { 1351 mtu = 12; /* Source address */ 1352 break; 1353 } 1354 if (mtu >= 24 && mtu <= 39) { 1355 mtu = 16; /* Destination address */ 1356 break; 1357 } 1358 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1359 " code %d, pptr %d", icmp6->icmp6_type, 1360 icmp6->icmp6_code, mtu); 1361 NAT64STAT_INC(&cfg->stats, dropped); 1362 return (NAT64MFREE); 1363 } 1364 case ICMP6_PARAMPROB_NEXTHEADER: 1365 type = ICMP_UNREACH; 1366 code = ICMP_UNREACH_PROTOCOL; 1367 break; 1368 default: 1369 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1370 " code %d, pptr %d", icmp6->icmp6_type, 1371 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1372 NAT64STAT_INC(&cfg->stats, dropped); 1373 return (NAT64MFREE); 1374 } 1375 break; 1376 default: 1377 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1378 icmp6->icmp6_type, icmp6->icmp6_code); 1379 NAT64STAT_INC(&cfg->stats, dropped); 1380 return (NAT64MFREE); 1381 } 1382 1383 hlen += sizeof(struct icmp6_hdr); 1384 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1385 NAT64STAT_INC(&cfg->stats, dropped); 1386 DPRINTF(DP_DROPS, "Message is too short %d", 1387 m->m_pkthdr.len); 1388 return (NAT64MFREE); 1389 } 1390 /* 1391 * We need at least ICMP_MINLEN bytes of original datagram payload 1392 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1393 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1394 * header we will not have to do m_pullup() again. 1395 * 1396 * What we have here: 1397 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1398 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1399 * We need to translate it to: 1400 * 1401 * Outer header: (alias_host, v4exthost) 1402 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1403 * 1404 * Assume caller function has checked if v4mapPRefix+v4host 1405 * matches configured prefix. 1406 * The only two things we should be provided with are mapping between 1407 * IPv6iHost <> alias_host and between dport and alias_port. 1408 */ 1409 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1410 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1411 if (m == NULL) { 1412 NAT64STAT_INC(&cfg->stats, nomem); 1413 return (NAT64RETURN); 1414 } 1415 ip6 = mtod(m, struct ip6_hdr *); 1416 ip6i = mtodo(m, hlen); 1417 ip6f = NULL; 1418 proto = ip6i->ip6_nxt; 1419 plen = ntohs(ip6i->ip6_plen); 1420 hlen += sizeof(struct ip6_hdr); 1421 if (proto == IPPROTO_FRAGMENT) { 1422 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1423 ICMP_MINLEN) 1424 goto fail; 1425 ip6f = mtodo(m, hlen); 1426 proto = ip6f->ip6f_nxt; 1427 plen -= sizeof(struct ip6_frag); 1428 hlen += sizeof(struct ip6_frag); 1429 /* Ajust MTU to reflect frag header size */ 1430 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1431 mtu -= sizeof(struct ip6_frag); 1432 } 1433 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1434 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1435 proto); 1436 goto fail; 1437 } 1438 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1439 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1440 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1441 goto fail; 1442 } 1443 /* Check if outer dst is the same as inner src */ 1444 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1445 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1446 goto fail; 1447 } 1448 1449 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1450 ip.ip_dst.s_addr = aaddr; 1451 ip.ip_src.s_addr = nat64_extract_ip4(cfg, &ip6i->ip6_src); 1452 /* XXX: Make fake ulp header */ 1453 if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */ 1454 ip6i->ip6_hlim += IPV6_HLIMDEC; 1455 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1456 m_adj(m, hlen - sizeof(struct ip)); 1457 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1458 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats, 1459 logdata); 1460 return (NAT64RETURN); 1461 fail: 1462 /* 1463 * We must call m_freem() because mbuf pointer could be 1464 * changed with m_pullup(). 1465 */ 1466 m_freem(m); 1467 NAT64STAT_INC(&cfg->stats, dropped); 1468 return (NAT64RETURN); 1469 } 1470 1471 int 1472 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1473 struct nat64_config *cfg, void *logdata) 1474 { 1475 struct ip ip; 1476 struct nhop4_basic nh; 1477 struct sockaddr_in dst; 1478 struct ip6_frag *frag; 1479 struct ip6_hdr *ip6; 1480 struct icmp6_hdr *icmp6; 1481 uint16_t *csum; 1482 int plen, hlen, proto; 1483 1484 /* 1485 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1486 * protocol's headers. Also we skip some checks, that ip6_input(), 1487 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1488 */ 1489 ip6 = mtod(m, struct ip6_hdr *); 1490 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1491 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1492 return (NAT64SKIP); 1493 } 1494 1495 /* Starting from this point we must not return zero */ 1496 ip.ip_src.s_addr = aaddr; 1497 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1498 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x", 1499 ip.ip_src.s_addr); 1500 NAT64STAT_INC(&cfg->stats, dropped); 1501 return (NAT64MFREE); 1502 } 1503 1504 ip.ip_dst.s_addr = nat64_extract_ip4(cfg, &ip6->ip6_dst); 1505 if (ip.ip_dst.s_addr == 0) { 1506 NAT64STAT_INC(&cfg->stats, dropped); 1507 return (NAT64MFREE); 1508 } 1509 1510 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 1511 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1512 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata); 1513 return (NAT64RETURN); 1514 } 1515 1516 hlen = 0; 1517 plen = ntohs(ip6->ip6_plen); 1518 proto = nat64_getlasthdr(m, &hlen); 1519 if (proto < 0) { 1520 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1521 NAT64STAT_INC(&cfg->stats, dropped); 1522 return (NAT64MFREE); 1523 } 1524 frag = NULL; 1525 if (proto == IPPROTO_FRAGMENT) { 1526 /* ipfw_chk should m_pullup up to frag header */ 1527 if (m->m_len < hlen + sizeof(*frag)) { 1528 DPRINTF(DP_DROPS, 1529 "dropped due to mbuf isn't contigious"); 1530 NAT64STAT_INC(&cfg->stats, dropped); 1531 return (NAT64MFREE); 1532 } 1533 frag = mtodo(m, hlen); 1534 proto = frag->ip6f_nxt; 1535 hlen += sizeof(*frag); 1536 /* Fragmented ICMPv6 is unsupported */ 1537 if (proto == IPPROTO_ICMPV6) { 1538 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1539 NAT64STAT_INC(&cfg->stats, dropped); 1540 return (NAT64MFREE); 1541 } 1542 /* Fragment length must be multiple of 8 octets */ 1543 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1544 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1545 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1546 ICMP6_PARAMPROB_HEADER, 1547 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats, 1548 logdata); 1549 return (NAT64RETURN); 1550 } 1551 } 1552 plen -= hlen - sizeof(struct ip6_hdr); 1553 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1554 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1555 plen, m->m_pkthdr.len, hlen); 1556 NAT64STAT_INC(&cfg->stats, dropped); 1557 return (NAT64MFREE); 1558 } 1559 1560 icmp6 = NULL; /* Make gcc happy */ 1561 if (proto == IPPROTO_ICMPV6) { 1562 icmp6 = mtodo(m, hlen); 1563 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1564 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1565 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1566 cfg, logdata)); 1567 } 1568 dst.sin_addr.s_addr = ip.ip_dst.s_addr; 1569 if (nat64_find_route4(&nh, &dst, m) != 0) { 1570 NAT64STAT_INC(&cfg->stats, noroute4); 1571 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1572 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata); 1573 return (NAT64RETURN); 1574 } 1575 if (nh.nh_mtu < plen + sizeof(ip)) { 1576 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu, 1577 &cfg->stats, logdata); 1578 return (NAT64RETURN); 1579 } 1580 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1581 /* Convert checksums. */ 1582 switch (proto) { 1583 case IPPROTO_TCP: 1584 csum = &TCP(mtodo(m, hlen))->th_sum; 1585 if (aport != 0) { 1586 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1587 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1588 tcp->th_sport = aport; 1589 } 1590 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1591 break; 1592 case IPPROTO_UDP: 1593 csum = &UDP(mtodo(m, hlen))->uh_sum; 1594 if (aport != 0) { 1595 struct udphdr *udp = UDP(mtodo(m, hlen)); 1596 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1597 udp->uh_sport = aport; 1598 } 1599 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1600 break; 1601 case IPPROTO_ICMPV6: 1602 /* Checksum in ICMPv6 covers pseudo header */ 1603 csum = &icmp6->icmp6_cksum; 1604 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1605 IPPROTO_ICMPV6, 0)); 1606 /* Convert ICMPv6 types to ICMP */ 1607 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1608 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1609 icmp6->icmp6_type = ICMP_ECHO; 1610 else /* ICMP6_ECHO_REPLY */ 1611 icmp6->icmp6_type = ICMP_ECHOREPLY; 1612 *csum = cksum_adjust(*csum, (uint16_t)proto, 1613 *(uint16_t *)icmp6); 1614 if (aport != 0) { 1615 uint16_t old_id = icmp6->icmp6_id; 1616 icmp6->icmp6_id = aport; 1617 *csum = cksum_adjust(*csum, old_id, aport); 1618 } 1619 break; 1620 }; 1621 1622 m_adj(m, hlen - sizeof(ip)); 1623 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1624 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1625 &cfg->stats, logdata) == 0) 1626 NAT64STAT_INC(&cfg->stats, opcnt64); 1627 return (NAT64RETURN); 1628 } 1629 1630