1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015-2019 Yandex LLC 5 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/counter.h> 35 #include <sys/errno.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/mbuf.h> 39 #include <sys/module.h> 40 #include <sys/rmlock.h> 41 #include <sys/rwlock.h> 42 #include <sys/socket.h> 43 #include <sys/queue.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_pflog.h> 48 #include <net/pfil.h> 49 #include <net/netisr.h> 50 #include <net/route.h> 51 52 #include <netinet/in.h> 53 #include <netinet/in_fib.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_fw.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/ip_icmp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/udp.h> 62 #include <netinet6/in6_var.h> 63 #include <netinet6/in6_fib.h> 64 #include <netinet6/ip6_var.h> 65 #include <netinet6/ip_fw_nat64.h> 66 67 #include <netpfil/pf/pf.h> 68 #include <netpfil/ipfw/ip_fw_private.h> 69 #include <machine/in_cksum.h> 70 71 #include "ip_fw_nat64.h" 72 #include "nat64_translate.h" 73 74 75 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *, 76 struct sockaddr *, struct nat64_counters *, void *); 77 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *, 78 void *); 79 80 static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *, 81 struct mbuf *); 82 static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *, 83 struct mbuf *); 84 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *); 85 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *, 86 struct nat64_counters *, void *); 87 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *, 88 void *); 89 static int nat64_direct_output(struct ifnet *, struct mbuf *, 90 struct sockaddr *, struct nat64_counters *, void *); 91 92 struct nat64_methods { 93 nat64_output_t output; 94 nat64_output_one_t output_one; 95 }; 96 static const struct nat64_methods nat64_netisr = { 97 .output = nat64_output, 98 .output_one = nat64_output_one 99 }; 100 static const struct nat64_methods nat64_direct = { 101 .output = nat64_direct_output, 102 .output_one = nat64_direct_output_one 103 }; 104 VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out) = &nat64_netisr; 105 #define V_nat64out VNET(nat64out) 106 107 void 108 nat64_set_output_method(int direct) 109 { 110 111 V_nat64out = direct != 0 ? &nat64_direct: &nat64_netisr; 112 } 113 114 int 115 nat64_get_output_method(void) 116 { 117 118 return (V_nat64out == &nat64_direct ? 1: 0); 119 } 120 121 static void 122 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 123 { 124 125 logdata->dir = PF_OUT; 126 logdata->af = family; 127 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 128 } 129 130 static int 131 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 132 struct nat64_counters *stats, void *logdata) 133 { 134 int error; 135 136 if (logdata != NULL) 137 nat64_log(logdata, m, dst->sa_family); 138 error = (*ifp->if_output)(ifp, m, dst, NULL); 139 if (error != 0) 140 NAT64STAT_INC(stats, oerrors); 141 return (error); 142 } 143 144 static int 145 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats, 146 void *logdata) 147 { 148 struct nhop6_basic nh6; 149 struct nhop4_basic nh4; 150 struct sockaddr_in6 dst6; 151 struct sockaddr_in dst4; 152 struct sockaddr *dst; 153 struct ip6_hdr *ip6; 154 struct ip *ip4; 155 struct ifnet *ifp; 156 int error; 157 158 ip4 = mtod(m, struct ip *); 159 switch (ip4->ip_v) { 160 case IPVERSION: 161 dst4.sin_addr = ip4->ip_dst; 162 error = nat64_find_route4(&nh4, &dst4, m); 163 if (error != 0) 164 NAT64STAT_INC(stats, noroute4); 165 else { 166 ifp = nh4.nh_ifp; 167 dst = (struct sockaddr *)&dst4; 168 } 169 break; 170 case (IPV6_VERSION >> 4): 171 ip6 = mtod(m, struct ip6_hdr *); 172 dst6.sin6_addr = ip6->ip6_dst; 173 error = nat64_find_route6(&nh6, &dst6, m); 174 if (error != 0) 175 NAT64STAT_INC(stats, noroute6); 176 else { 177 ifp = nh6.nh_ifp; 178 dst = (struct sockaddr *)&dst6; 179 } 180 break; 181 default: 182 m_freem(m); 183 NAT64STAT_INC(stats, dropped); 184 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 185 return (EAFNOSUPPORT); 186 } 187 if (error != 0) { 188 m_freem(m); 189 return (EHOSTUNREACH); 190 } 191 if (logdata != NULL) 192 nat64_log(logdata, m, dst->sa_family); 193 error = (*ifp->if_output)(ifp, m, dst, NULL); 194 if (error != 0) 195 NAT64STAT_INC(stats, oerrors); 196 return (error); 197 } 198 199 static int 200 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 201 struct nat64_counters *stats, void *logdata) 202 { 203 struct ip *ip4; 204 int ret, af; 205 206 ip4 = mtod(m, struct ip *); 207 switch (ip4->ip_v) { 208 case IPVERSION: 209 af = AF_INET; 210 ret = NETISR_IP; 211 break; 212 case (IPV6_VERSION >> 4): 213 af = AF_INET6; 214 ret = NETISR_IPV6; 215 break; 216 default: 217 m_freem(m); 218 NAT64STAT_INC(stats, dropped); 219 DPRINTF(DP_DROPS, "unknown IP version"); 220 return (EAFNOSUPPORT); 221 } 222 if (logdata != NULL) 223 nat64_log(logdata, m, af); 224 if (m->m_pkthdr.rcvif == NULL) 225 m->m_pkthdr.rcvif = V_loif; 226 ret = netisr_queue(ret, m); 227 if (ret != 0) 228 NAT64STAT_INC(stats, oerrors); 229 return (ret); 230 } 231 232 static int 233 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata) 234 { 235 236 return (nat64_output(NULL, m, NULL, stats, logdata)); 237 } 238 239 /* 240 * Check the given IPv6 prefix and length according to RFC6052: 241 * The prefixes can only have one of the following lengths: 242 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long). 243 * Returns zero on success, otherwise EINVAL. 244 */ 245 int 246 nat64_check_prefixlen(int length) 247 { 248 249 switch (length) { 250 case 32: 251 case 40: 252 case 48: 253 case 56: 254 case 64: 255 case 96: 256 return (0); 257 } 258 return (EINVAL); 259 } 260 261 int 262 nat64_check_prefix6(const struct in6_addr *prefix, int length) 263 { 264 265 if (nat64_check_prefixlen(length) != 0) 266 return (EINVAL); 267 268 /* Well-known prefix has 96 prefix length */ 269 if (IN6_IS_ADDR_WKPFX(prefix) && length != 96) 270 return (EINVAL); 271 272 /* Bits 64 to 71 must be set to zero */ 273 if (prefix->__u6_addr.__u6_addr8[8] != 0) 274 return (EINVAL); 275 276 /* Some extra checks */ 277 if (IN6_IS_ADDR_MULTICAST(prefix) || 278 IN6_IS_ADDR_UNSPECIFIED(prefix) || 279 IN6_IS_ADDR_LOOPBACK(prefix)) 280 return (EINVAL); 281 return (0); 282 } 283 284 int 285 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia) 286 { 287 288 if (cfg->flags & NAT64_ALLOW_PRIVATE) 289 return (0); 290 291 /* WKPFX must not be used to represent non-global IPv4 addresses */ 292 if (cfg->flags & NAT64_WKPFX) { 293 /* IN_PRIVATE */ 294 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || 295 (ia & htonl(0xfff00000)) == htonl(0xac100000) || 296 (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) 297 return (1); 298 /* 299 * RFC 5735: 300 * 192.0.0.0/24 - reserved for IETF protocol assignments 301 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses 302 * 198.18.0.0/15 - for use in benchmark tests 303 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use 304 * in documentation and example code 305 */ 306 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || 307 (ia & htonl(0xffffff00)) == htonl(0xc0586300) || 308 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || 309 (ia & htonl(0xffffff00)) == htonl(0xc0000200) || 310 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || 311 (ia & htonl(0xffffff00)) == htonl(0xcb007100)) 312 return (1); 313 } 314 return (0); 315 } 316 317 /* 318 * Embed @ia IPv4 address into @ip6 IPv6 address. 319 * Place to embedding determined from prefix length @plen. 320 */ 321 void 322 nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia) 323 { 324 325 switch (plen) { 326 case 32: 327 case 96: 328 ip6->s6_addr32[plen / 32] = ia; 329 break; 330 case 40: 331 case 48: 332 case 56: 333 /* 334 * Preserve prefix bits. 335 * Since suffix bits should be zero and reserved for future 336 * use, we just overwrite the whole word, where they are. 337 */ 338 ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32); 339 #if BYTE_ORDER == BIG_ENDIAN 340 ip6->s6_addr32[1] |= ia >> (plen % 32); 341 ip6->s6_addr32[2] = ia << (24 - plen % 32); 342 #elif BYTE_ORDER == LITTLE_ENDIAN 343 ip6->s6_addr32[1] |= ia << (plen % 32); 344 ip6->s6_addr32[2] = ia >> (24 - plen % 32); 345 #endif 346 break; 347 case 64: 348 #if BYTE_ORDER == BIG_ENDIAN 349 ip6->s6_addr32[2] = ia >> 8; 350 ip6->s6_addr32[3] = ia << 24; 351 #elif BYTE_ORDER == LITTLE_ENDIAN 352 ip6->s6_addr32[2] = ia << 8; 353 ip6->s6_addr32[3] = ia >> 24; 354 #endif 355 break; 356 default: 357 panic("Wrong plen: %d", plen); 358 }; 359 /* 360 * Bits 64 to 71 of the address are reserved for compatibility 361 * with the host identifier format defined in the IPv6 addressing 362 * architecture [RFC4291]. These bits MUST be set to zero. 363 */ 364 ip6->s6_addr8[8] = 0; 365 } 366 367 in_addr_t 368 nat64_extract_ip4(const struct in6_addr *ip6, int plen) 369 { 370 in_addr_t ia; 371 372 /* 373 * According to RFC 6052 p2.2: 374 * IPv4-embedded IPv6 addresses are composed of a variable-length 375 * prefix, the embedded IPv4 address, and a variable length suffix. 376 * The suffix bits are reserved for future extensions and SHOULD 377 * be set to zero. 378 */ 379 switch (plen) { 380 case 32: 381 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 382 goto badip6; 383 break; 384 case 40: 385 if (ip6->s6_addr32[3] != 0 || 386 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 387 goto badip6; 388 break; 389 case 48: 390 if (ip6->s6_addr32[3] != 0 || 391 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 392 goto badip6; 393 break; 394 case 56: 395 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 396 goto badip6; 397 break; 398 case 64: 399 if (ip6->s6_addr8[8] != 0 || 400 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 401 goto badip6; 402 }; 403 switch (plen) { 404 case 32: 405 case 96: 406 ia = ip6->s6_addr32[plen / 32]; 407 break; 408 case 40: 409 case 48: 410 case 56: 411 #if BYTE_ORDER == BIG_ENDIAN 412 ia = (ip6->s6_addr32[1] << (plen % 32)) | 413 (ip6->s6_addr32[2] >> (24 - plen % 32)); 414 #elif BYTE_ORDER == LITTLE_ENDIAN 415 ia = (ip6->s6_addr32[1] >> (plen % 32)) | 416 (ip6->s6_addr32[2] << (24 - plen % 32)); 417 #endif 418 break; 419 case 64: 420 #if BYTE_ORDER == BIG_ENDIAN 421 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 422 #elif BYTE_ORDER == LITTLE_ENDIAN 423 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 424 #endif 425 break; 426 default: 427 return (0); 428 }; 429 if (nat64_check_ip4(ia) == 0) 430 return (ia); 431 432 DPRINTF(DP_GENERIC | DP_DROPS, 433 "invalid destination address: %08x", ia); 434 return (0); 435 badip6: 436 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address"); 437 return (0); 438 } 439 440 /* 441 * According to RFC 1624 the equation for incremental checksum update is: 442 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 443 * HC' = HC - ~m - m' -- [Eqn. 4] 444 * So, when we are replacing IPv4 addresses to IPv6, we 445 * can assume, that new bytes previously were zeros, and vise versa - 446 * when we replacing IPv6 addresses to IPv4, now unused bytes become 447 * zeros. The payload length in pseudo header has bigger size, but one 448 * half of it should be zero. Using the equation 4 we get: 449 * HC' = HC - (~m0 + m0') -- m0 is first changed word 450 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 451 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 452 * = HC - sum(~m[i] + m'[i]) 453 * 454 * The function result should be used as follows: 455 * IPv6 to IPv4: HC' = cksum_add(HC, result) 456 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 457 */ 458 static uint16_t 459 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 460 { 461 uint32_t sum; 462 uint16_t *p; 463 464 sum = ~ip->ip_src.s_addr >> 16; 465 sum += ~ip->ip_src.s_addr & 0xffff; 466 sum += ~ip->ip_dst.s_addr >> 16; 467 sum += ~ip->ip_dst.s_addr & 0xffff; 468 469 for (p = (uint16_t *)&ip6->ip6_src; 470 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 471 sum += *p; 472 473 while (sum >> 16) 474 sum = (sum & 0xffff) + (sum >> 16); 475 return (sum); 476 } 477 478 static void 479 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 480 uint16_t plen, uint8_t proto, struct ip *ip) 481 { 482 483 /* assume addresses are already initialized */ 484 ip->ip_v = IPVERSION; 485 ip->ip_hl = sizeof(*ip) >> 2; 486 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 487 ip->ip_len = htons(sizeof(*ip) + plen); 488 ip->ip_ttl = ip6->ip6_hlim; 489 /* Forwarding code will decrement TTL for netisr based output. */ 490 if (V_nat64out == &nat64_direct) 491 ip->ip_ttl -= IPV6_HLIMDEC; 492 ip->ip_sum = 0; 493 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 494 ip_fillid(ip); 495 if (frag != NULL) { 496 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 497 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 498 ip->ip_off |= htons(IP_MF); 499 } else { 500 ip->ip_off = htons(IP_DF); 501 } 502 ip->ip_sum = in_cksum_hdr(ip); 503 } 504 505 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 506 static NAT64NOINLINE int 507 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6, 508 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id, 509 uint16_t ip_off) 510 { 511 struct ip6_frag ip6f; 512 struct mbuf *n; 513 uint16_t hlen, len, offset; 514 int plen; 515 516 plen = ntohs(ip6->ip6_plen); 517 hlen = sizeof(struct ip6_hdr); 518 519 /* Fragmentation isn't needed */ 520 if (ip_off == 0 && plen <= mtu - hlen) { 521 M_PREPEND(m, hlen, M_NOWAIT); 522 if (m == NULL) { 523 NAT64STAT_INC(stats, nomem); 524 return (ENOMEM); 525 } 526 bcopy(ip6, mtod(m, void *), hlen); 527 if (mbufq_enqueue(mq, m) != 0) { 528 m_freem(m); 529 NAT64STAT_INC(stats, dropped); 530 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 531 return (ENOBUFS); 532 } 533 return (0); 534 } 535 536 hlen += sizeof(struct ip6_frag); 537 ip6f.ip6f_reserved = 0; 538 ip6f.ip6f_nxt = ip6->ip6_nxt; 539 ip6->ip6_nxt = IPPROTO_FRAGMENT; 540 if (ip_off != 0) { 541 /* 542 * We have got an IPv4 fragment. 543 * Use offset value and ip_id from original fragment. 544 */ 545 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 546 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 547 NAT64STAT_INC(stats, ifrags); 548 } else { 549 /* The packet size exceeds interface MTU */ 550 ip6f.ip6f_ident = htonl(ip6_randomid()); 551 offset = 0; /* First fragment*/ 552 } 553 while (plen > 0 && m != NULL) { 554 n = NULL; 555 len = FRAGSZ(mtu) & ~7; 556 if (len > plen) 557 len = plen; 558 ip6->ip6_plen = htons(len + sizeof(ip6f)); 559 ip6f.ip6f_offlg = ntohs(offset); 560 if (len < plen || (ip_off & htons(IP_MF)) != 0) 561 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 562 offset += len; 563 plen -= len; 564 if (plen > 0) { 565 n = m_split(m, len, M_NOWAIT); 566 if (n == NULL) 567 goto fail; 568 } 569 M_PREPEND(m, hlen, M_NOWAIT); 570 if (m == NULL) 571 goto fail; 572 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 573 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 574 sizeof(struct ip6_frag)); 575 if (mbufq_enqueue(mq, m) != 0) 576 goto fail; 577 m = n; 578 } 579 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 580 return (0); 581 fail: 582 if (m != NULL) 583 m_freem(m); 584 if (n != NULL) 585 m_freem(n); 586 mbufq_drain(mq); 587 NAT64STAT_INC(stats, nomem); 588 return (ENOMEM); 589 } 590 591 static NAT64NOINLINE int 592 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst, 593 struct mbuf *m) 594 { 595 596 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0, 597 pnh) != 0) 598 return (EHOSTUNREACH); 599 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT)) 600 return (EHOSTUNREACH); 601 /* 602 * XXX: we need to use destination address with embedded scope 603 * zone id, because LLTABLE uses such form of addresses for lookup. 604 */ 605 dst->sin6_family = AF_INET6; 606 dst->sin6_len = sizeof(*dst); 607 dst->sin6_addr = pnh->nh_addr; 608 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 609 dst->sin6_addr.s6_addr16[1] = 610 htons(pnh->nh_ifp->if_index & 0xffff); 611 dst->sin6_port = 0; 612 dst->sin6_scope_id = 0; 613 dst->sin6_flowinfo = 0; 614 615 return (0); 616 } 617 618 #define NAT64_ICMP6_PLEN 64 619 static NAT64NOINLINE void 620 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 621 struct nat64_counters *stats, void *logdata) 622 { 623 struct icmp6_hdr *icmp6; 624 struct ip6_hdr *ip6, *oip6; 625 struct mbuf *n; 626 int len, plen; 627 628 len = 0; 629 plen = nat64_getlasthdr(m, &len); 630 if (plen < 0) { 631 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 632 goto freeit; 633 } 634 /* 635 * Do not send ICMPv6 in reply to ICMPv6 errors. 636 */ 637 if (plen == IPPROTO_ICMPV6) { 638 if (m->m_len < len + sizeof(*icmp6)) { 639 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 640 goto freeit; 641 } 642 icmp6 = mtodo(m, len); 643 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 644 icmp6->icmp6_type == ND_REDIRECT) { 645 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 646 "ICMPv6 errors"); 647 goto freeit; 648 } 649 } 650 /* 651 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 652 goto freeit; 653 */ 654 ip6 = mtod(m, struct ip6_hdr *); 655 switch (type) { 656 case ICMP6_DST_UNREACH: 657 case ICMP6_PACKET_TOO_BIG: 658 case ICMP6_TIME_EXCEEDED: 659 case ICMP6_PARAM_PROB: 660 break; 661 default: 662 goto freeit; 663 } 664 /* Calculate length of ICMPv6 payload */ 665 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 666 m->m_pkthdr.len; 667 668 /* Create new ICMPv6 datagram */ 669 plen = len + sizeof(struct icmp6_hdr); 670 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 671 MT_HEADER, M_PKTHDR); 672 if (n == NULL) { 673 NAT64STAT_INC(stats, nomem); 674 m_freem(m); 675 return; 676 } 677 /* 678 * Move pkthdr from original mbuf. We should have initialized some 679 * fields, because we can reinject this mbuf to netisr and it will 680 * go trough input path (it requires at least rcvif should be set). 681 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 682 * in the chain, when we will do M_PREPEND() or make some type of 683 * tunneling. 684 */ 685 m_move_pkthdr(n, m); 686 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 687 688 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 689 oip6 = mtod(n, struct ip6_hdr *); 690 oip6->ip6_src = ip6->ip6_dst; 691 oip6->ip6_dst = ip6->ip6_src; 692 oip6->ip6_nxt = IPPROTO_ICMPV6; 693 oip6->ip6_flow = 0; 694 oip6->ip6_vfc |= IPV6_VERSION; 695 oip6->ip6_hlim = V_ip6_defhlim; 696 oip6->ip6_plen = htons(plen); 697 698 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 699 icmp6->icmp6_cksum = 0; 700 icmp6->icmp6_type = type; 701 icmp6->icmp6_code = code; 702 icmp6->icmp6_mtu = htonl(mtu); 703 704 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 705 sizeof(struct icmp6_hdr))); 706 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 707 sizeof(struct ip6_hdr), plen); 708 m_freem(m); 709 V_nat64out->output_one(n, stats, logdata); 710 return; 711 freeit: 712 NAT64STAT_INC(stats, dropped); 713 m_freem(m); 714 } 715 716 static NAT64NOINLINE int 717 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst, 718 struct mbuf *m) 719 { 720 721 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0) 722 return (EHOSTUNREACH); 723 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT)) 724 return (EHOSTUNREACH); 725 726 dst->sin_family = AF_INET; 727 dst->sin_len = sizeof(*dst); 728 dst->sin_addr = pnh->nh_addr; 729 dst->sin_port = 0; 730 return (0); 731 } 732 733 #define NAT64_ICMP_PLEN 64 734 static NAT64NOINLINE void 735 nat64_icmp_reflect(struct mbuf *m, uint8_t type, 736 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata) 737 { 738 struct icmp *icmp; 739 struct ip *ip, *oip; 740 struct mbuf *n; 741 int len, plen; 742 743 ip = mtod(m, struct ip *); 744 /* Do not send ICMP error if packet is not the first fragment */ 745 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 746 DPRINTF(DP_DROPS, "not first fragment"); 747 goto freeit; 748 } 749 /* Do not send ICMP in reply to ICMP errors */ 750 if (ip->ip_p == IPPROTO_ICMP) { 751 if (m->m_len < (ip->ip_hl << 2)) { 752 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 753 goto freeit; 754 } 755 icmp = mtodo(m, ip->ip_hl << 2); 756 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 757 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 758 "ICMP errors"); 759 goto freeit; 760 } 761 } 762 switch (type) { 763 case ICMP_UNREACH: 764 case ICMP_TIMXCEED: 765 case ICMP_PARAMPROB: 766 break; 767 default: 768 goto freeit; 769 } 770 /* Calculate length of ICMP payload */ 771 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 772 m->m_pkthdr.len; 773 774 /* Create new ICMPv4 datagram */ 775 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 776 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 777 MT_HEADER, M_PKTHDR); 778 if (n == NULL) { 779 NAT64STAT_INC(stats, nomem); 780 m_freem(m); 781 return; 782 } 783 m_move_pkthdr(n, m); 784 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 785 786 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 787 oip = mtod(n, struct ip *); 788 oip->ip_v = IPVERSION; 789 oip->ip_hl = sizeof(struct ip) >> 2; 790 oip->ip_tos = 0; 791 oip->ip_len = htons(n->m_pkthdr.len); 792 oip->ip_ttl = V_ip_defttl; 793 oip->ip_p = IPPROTO_ICMP; 794 ip_fillid(oip); 795 oip->ip_off = htons(IP_DF); 796 oip->ip_src = ip->ip_dst; 797 oip->ip_dst = ip->ip_src; 798 oip->ip_sum = 0; 799 oip->ip_sum = in_cksum_hdr(oip); 800 801 icmp = mtodo(n, sizeof(struct ip)); 802 icmp->icmp_type = type; 803 icmp->icmp_code = code; 804 icmp->icmp_cksum = 0; 805 icmp->icmp_pmvoid = 0; 806 icmp->icmp_nextmtu = htons(mtu); 807 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 808 sizeof(struct icmphdr) + sizeof(uint32_t))); 809 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 810 sizeof(struct ip)); 811 m_freem(m); 812 V_nat64out->output_one(n, stats, logdata); 813 return; 814 freeit: 815 NAT64STAT_INC(stats, dropped); 816 m_freem(m); 817 } 818 819 /* Translate ICMP echo request/reply into ICMPv6 */ 820 static void 821 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 822 uint16_t id, uint8_t type) 823 { 824 uint16_t old; 825 826 old = *(uint16_t *)icmp6; /* save type+code in one word */ 827 icmp6->icmp6_type = type; 828 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 829 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 830 old, *(uint16_t *)icmp6); 831 if (id != 0) { 832 old = icmp6->icmp6_id; 833 icmp6->icmp6_id = id; 834 /* Reflect ICMP id translation in the cksum */ 835 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 836 old, id); 837 } 838 /* Reflect IPv6 pseudo header in the cksum */ 839 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 840 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 841 } 842 843 static NAT64NOINLINE struct mbuf * 844 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 845 int offset, struct nat64_config *cfg) 846 { 847 struct ip ip; 848 struct icmp *icmp; 849 struct tcphdr *tcp; 850 struct udphdr *udp; 851 struct ip6_hdr *eip6; 852 struct mbuf *n; 853 uint32_t mtu; 854 int len, hlen, plen; 855 uint8_t type, code; 856 857 if (m->m_len < offset + ICMP_MINLEN) 858 m = m_pullup(m, offset + ICMP_MINLEN); 859 if (m == NULL) { 860 NAT64STAT_INC(&cfg->stats, nomem); 861 return (m); 862 } 863 mtu = 0; 864 icmp = mtodo(m, offset); 865 /* RFC 7915 p4.2 */ 866 switch (icmp->icmp_type) { 867 case ICMP_ECHOREPLY: 868 type = ICMP6_ECHO_REPLY; 869 code = 0; 870 break; 871 case ICMP_UNREACH: 872 type = ICMP6_DST_UNREACH; 873 switch (icmp->icmp_code) { 874 case ICMP_UNREACH_NET: 875 case ICMP_UNREACH_HOST: 876 case ICMP_UNREACH_SRCFAIL: 877 case ICMP_UNREACH_NET_UNKNOWN: 878 case ICMP_UNREACH_HOST_UNKNOWN: 879 case ICMP_UNREACH_TOSNET: 880 case ICMP_UNREACH_TOSHOST: 881 code = ICMP6_DST_UNREACH_NOROUTE; 882 break; 883 case ICMP_UNREACH_PROTOCOL: 884 type = ICMP6_PARAM_PROB; 885 code = ICMP6_PARAMPROB_NEXTHEADER; 886 break; 887 case ICMP_UNREACH_PORT: 888 code = ICMP6_DST_UNREACH_NOPORT; 889 break; 890 case ICMP_UNREACH_NEEDFRAG: 891 type = ICMP6_PACKET_TOO_BIG; 892 code = 0; 893 /* XXX: needs an additional look */ 894 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 895 break; 896 case ICMP_UNREACH_NET_PROHIB: 897 case ICMP_UNREACH_HOST_PROHIB: 898 case ICMP_UNREACH_FILTER_PROHIB: 899 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 900 code = ICMP6_DST_UNREACH_ADMIN; 901 break; 902 default: 903 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 904 icmp->icmp_type, icmp->icmp_code); 905 goto freeit; 906 } 907 break; 908 case ICMP_TIMXCEED: 909 type = ICMP6_TIME_EXCEEDED; 910 code = icmp->icmp_code; 911 break; 912 case ICMP_ECHO: 913 type = ICMP6_ECHO_REQUEST; 914 code = 0; 915 break; 916 case ICMP_PARAMPROB: 917 type = ICMP6_PARAM_PROB; 918 switch (icmp->icmp_code) { 919 case ICMP_PARAMPROB_ERRATPTR: 920 case ICMP_PARAMPROB_LENGTH: 921 code = ICMP6_PARAMPROB_HEADER; 922 switch (icmp->icmp_pptr) { 923 case 0: /* Version/IHL */ 924 case 1: /* Type Of Service */ 925 mtu = icmp->icmp_pptr; 926 break; 927 case 2: /* Total Length */ 928 case 3: mtu = 4; /* Payload Length */ 929 break; 930 case 8: /* Time to Live */ 931 mtu = 7; /* Hop Limit */ 932 break; 933 case 9: /* Protocol */ 934 mtu = 6; /* Next Header */ 935 break; 936 case 12: /* Source address */ 937 case 13: 938 case 14: 939 case 15: 940 mtu = 8; 941 break; 942 case 16: /* Destination address */ 943 case 17: 944 case 18: 945 case 19: 946 mtu = 24; 947 break; 948 default: /* Silently drop */ 949 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 950 " code %d, pptr %d", icmp->icmp_type, 951 icmp->icmp_code, icmp->icmp_pptr); 952 goto freeit; 953 } 954 break; 955 default: 956 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 957 " code %d, pptr %d", icmp->icmp_type, 958 icmp->icmp_code, icmp->icmp_pptr); 959 goto freeit; 960 } 961 break; 962 default: 963 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 964 icmp->icmp_type, icmp->icmp_code); 965 goto freeit; 966 } 967 /* 968 * For echo request/reply we can use original payload, 969 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 970 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 971 */ 972 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 973 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 974 return (m); 975 } 976 /* 977 * For other types of ICMP messages we need to translate inner 978 * IPv4 header to IPv6 header. 979 * Assume ICMP src is the same as payload dst 980 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 981 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 982 * In that case, we already have map for NATIP1 and GWsrc1. 983 * The only thing we need is to copy IPv6 map prefix to 984 * Hostdst1. 985 */ 986 hlen = offset + ICMP_MINLEN; 987 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 988 DPRINTF(DP_DROPS, "Message is too short %d", 989 m->m_pkthdr.len); 990 goto freeit; 991 } 992 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 993 if (ip.ip_v != IPVERSION) { 994 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 995 goto freeit; 996 } 997 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 998 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 999 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 1000 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 || 1001 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) { 1002 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 1003 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 1004 goto freeit; 1005 } 1006 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 1007 DPRINTF(DP_DROPS, "Message is too short %d", 1008 m->m_pkthdr.len); 1009 goto freeit; 1010 } 1011 #if 0 1012 /* 1013 * Check that inner source matches the outer destination. 1014 * XXX: We need some method to convert IPv4 into IPv6 address here, 1015 * and compare IPv6 addresses. 1016 */ 1017 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 1018 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 1019 "%04x vs %04x", ip.ip_src.s_addr, 1020 nat64_get_ip4(&ip6->ip6_dst)); 1021 goto freeit; 1022 } 1023 #endif 1024 /* 1025 * Create new mbuf for ICMPv6 datagram. 1026 * NOTE: len is data length just after inner IP header. 1027 */ 1028 len = m->m_pkthdr.len - hlen; 1029 if (sizeof(struct ip6_hdr) + 1030 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 1031 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 1032 sizeof(struct ip6_hdr); 1033 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 1034 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 1035 if (n == NULL) { 1036 NAT64STAT_INC(&cfg->stats, nomem); 1037 m_freem(m); 1038 return (NULL); 1039 } 1040 m_move_pkthdr(n, m); 1041 M_ALIGN(n, offset + plen + max_hdr); 1042 n->m_len = n->m_pkthdr.len = offset + plen; 1043 /* Adjust ip6_plen in outer header */ 1044 ip6->ip6_plen = htons(plen); 1045 /* Construct new inner IPv6 header */ 1046 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 1047 eip6->ip6_src = ip6->ip6_dst; 1048 1049 /* Use the same prefix that we have in outer header */ 1050 eip6->ip6_dst = ip6->ip6_src; 1051 MPASS(cfg->flags & NAT64_PLATPFX); 1052 nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr); 1053 1054 eip6->ip6_flow = htonl(ip.ip_tos << 20); 1055 eip6->ip6_vfc |= IPV6_VERSION; 1056 eip6->ip6_hlim = ip.ip_ttl; 1057 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 1058 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 1059 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 1060 /* 1061 * We need to translate source port in the inner ULP header, 1062 * and adjust ULP checksum. 1063 */ 1064 switch (ip.ip_p) { 1065 case IPPROTO_TCP: 1066 if (len < offsetof(struct tcphdr, th_sum)) 1067 break; 1068 tcp = TCP(eip6 + 1); 1069 if (icmpid != 0) { 1070 tcp->th_sum = cksum_adjust(tcp->th_sum, 1071 tcp->th_sport, icmpid); 1072 tcp->th_sport = icmpid; 1073 } 1074 tcp->th_sum = cksum_add(tcp->th_sum, 1075 ~nat64_cksum_convert(eip6, &ip)); 1076 break; 1077 case IPPROTO_UDP: 1078 if (len < offsetof(struct udphdr, uh_sum)) 1079 break; 1080 udp = UDP(eip6 + 1); 1081 if (icmpid != 0) { 1082 udp->uh_sum = cksum_adjust(udp->uh_sum, 1083 udp->uh_sport, icmpid); 1084 udp->uh_sport = icmpid; 1085 } 1086 udp->uh_sum = cksum_add(udp->uh_sum, 1087 ~nat64_cksum_convert(eip6, &ip)); 1088 break; 1089 case IPPROTO_ICMP: 1090 /* 1091 * Check if this is an ICMP error message for echo request 1092 * that we sent. I.e. ULP in the data containing invoking 1093 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 1094 */ 1095 icmp = (struct icmp *)(eip6 + 1); 1096 if (icmp->icmp_type != ICMP_ECHO) { 1097 m_freem(n); 1098 goto freeit; 1099 } 1100 /* 1101 * For our client this original datagram should looks 1102 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 1103 * Thus we need adjust icmp_cksum and convert type from 1104 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 1105 */ 1106 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 1107 ICMP6_ECHO_REQUEST); 1108 } 1109 m_freem(m); 1110 /* Convert ICMPv4 into ICMPv6 header */ 1111 icmp = mtodo(n, offset); 1112 ICMP6(icmp)->icmp6_type = type; 1113 ICMP6(icmp)->icmp6_code = code; 1114 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1115 ICMP6(icmp)->icmp6_cksum = 0; 1116 ICMP6(icmp)->icmp6_cksum = cksum_add( 1117 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1118 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1119 return (n); 1120 freeit: 1121 m_freem(m); 1122 NAT64STAT_INC(&cfg->stats, dropped); 1123 return (NULL); 1124 } 1125 1126 int 1127 nat64_getlasthdr(struct mbuf *m, int *offset) 1128 { 1129 struct ip6_hdr *ip6; 1130 struct ip6_hbh *hbh; 1131 int proto, hlen; 1132 1133 if (offset != NULL) 1134 hlen = *offset; 1135 else 1136 hlen = 0; 1137 1138 if (m->m_len < hlen + sizeof(*ip6)) 1139 return (-1); 1140 1141 ip6 = mtodo(m, hlen); 1142 hlen += sizeof(*ip6); 1143 proto = ip6->ip6_nxt; 1144 /* Skip extension headers */ 1145 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1146 proto == IPPROTO_DSTOPTS) { 1147 hbh = mtodo(m, hlen); 1148 /* 1149 * We expect mbuf has contigious data up to 1150 * upper level header. 1151 */ 1152 if (m->m_len < hlen) 1153 return (-1); 1154 /* 1155 * We doesn't support Jumbo payload option, 1156 * so return error. 1157 */ 1158 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1159 return (-1); 1160 proto = hbh->ip6h_nxt; 1161 hlen += (hbh->ip6h_len + 1) << 3; 1162 } 1163 if (offset != NULL) 1164 *offset = hlen; 1165 return (proto); 1166 } 1167 1168 int 1169 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1170 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg, 1171 void *logdata) 1172 { 1173 struct nhop6_basic nh; 1174 struct ip6_hdr ip6; 1175 struct sockaddr_in6 dst; 1176 struct ip *ip; 1177 struct mbufq mq; 1178 uint16_t ip_id, ip_off; 1179 uint16_t *csum; 1180 int plen, hlen; 1181 uint8_t proto; 1182 1183 ip = mtod(m, struct ip*); 1184 1185 if (ip->ip_ttl <= IPTTLDEC) { 1186 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1187 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata); 1188 return (NAT64RETURN); 1189 } 1190 1191 ip6.ip6_dst = *daddr; 1192 ip6.ip6_src = *saddr; 1193 1194 hlen = ip->ip_hl << 2; 1195 plen = ntohs(ip->ip_len) - hlen; 1196 proto = ip->ip_p; 1197 1198 /* Save ip_id and ip_off, both are in network byte order */ 1199 ip_id = ip->ip_id; 1200 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1201 1202 /* Fragment length must be multiple of 8 octets */ 1203 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1204 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1205 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata); 1206 return (NAT64RETURN); 1207 } 1208 /* Fragmented ICMP is unsupported */ 1209 if (proto == IPPROTO_ICMP && ip_off != 0) { 1210 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1211 NAT64STAT_INC(&cfg->stats, dropped); 1212 return (NAT64MFREE); 1213 } 1214 1215 dst.sin6_addr = ip6.ip6_dst; 1216 if (nat64_find_route6(&nh, &dst, m) != 0) { 1217 NAT64STAT_INC(&cfg->stats, noroute6); 1218 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1219 &cfg->stats, logdata); 1220 return (NAT64RETURN); 1221 } 1222 if (nh.nh_mtu < plen + sizeof(ip6) && 1223 (ip->ip_off & htons(IP_DF)) != 0) { 1224 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1225 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata); 1226 return (NAT64RETURN); 1227 } 1228 1229 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1230 ip6.ip6_vfc |= IPV6_VERSION; 1231 ip6.ip6_hlim = ip->ip_ttl; 1232 /* Forwarding code will decrement TTL for netisr based output. */ 1233 if (V_nat64out == &nat64_direct) 1234 ip6.ip6_hlim -= IPTTLDEC; 1235 ip6.ip6_plen = htons(plen); 1236 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1237 /* Convert checksums. */ 1238 switch (proto) { 1239 case IPPROTO_TCP: 1240 csum = &TCP(mtodo(m, hlen))->th_sum; 1241 if (lport != 0) { 1242 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1243 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1244 tcp->th_dport = lport; 1245 } 1246 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1247 break; 1248 case IPPROTO_UDP: 1249 csum = &UDP(mtodo(m, hlen))->uh_sum; 1250 if (lport != 0) { 1251 struct udphdr *udp = UDP(mtodo(m, hlen)); 1252 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1253 udp->uh_dport = lport; 1254 } 1255 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1256 break; 1257 case IPPROTO_ICMP: 1258 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg); 1259 if (m == NULL) /* stats already accounted */ 1260 return (NAT64RETURN); 1261 } 1262 1263 m_adj(m, hlen); 1264 mbufq_init(&mq, 255); 1265 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off); 1266 while ((m = mbufq_dequeue(&mq)) != NULL) { 1267 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1268 &cfg->stats, logdata) != 0) 1269 break; 1270 NAT64STAT_INC(&cfg->stats, opcnt46); 1271 } 1272 mbufq_drain(&mq); 1273 return (NAT64RETURN); 1274 } 1275 1276 int 1277 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1278 struct nat64_config *cfg, void *logdata) 1279 { 1280 struct ip ip; 1281 struct icmp6_hdr *icmp6; 1282 struct ip6_frag *ip6f; 1283 struct ip6_hdr *ip6, *ip6i; 1284 uint32_t mtu; 1285 int plen, proto; 1286 uint8_t type, code; 1287 1288 if (hlen == 0) { 1289 ip6 = mtod(m, struct ip6_hdr *); 1290 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1291 nat64_check_ip6(&ip6->ip6_dst) != 0) 1292 return (NAT64SKIP); 1293 1294 proto = nat64_getlasthdr(m, &hlen); 1295 if (proto != IPPROTO_ICMPV6) { 1296 DPRINTF(DP_DROPS, 1297 "dropped due to mbuf isn't contigious"); 1298 NAT64STAT_INC(&cfg->stats, dropped); 1299 return (NAT64MFREE); 1300 } 1301 } 1302 1303 /* 1304 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1305 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1306 */ 1307 icmp6 = mtodo(m, hlen); 1308 mtu = 0; 1309 switch (icmp6->icmp6_type) { 1310 case ICMP6_DST_UNREACH: 1311 type = ICMP_UNREACH; 1312 switch (icmp6->icmp6_code) { 1313 case ICMP6_DST_UNREACH_NOROUTE: 1314 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1315 case ICMP6_DST_UNREACH_ADDR: 1316 code = ICMP_UNREACH_HOST; 1317 break; 1318 case ICMP6_DST_UNREACH_ADMIN: 1319 code = ICMP_UNREACH_HOST_PROHIB; 1320 break; 1321 case ICMP6_DST_UNREACH_NOPORT: 1322 code = ICMP_UNREACH_PORT; 1323 break; 1324 default: 1325 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1326 " code %d", icmp6->icmp6_type, 1327 icmp6->icmp6_code); 1328 NAT64STAT_INC(&cfg->stats, dropped); 1329 return (NAT64MFREE); 1330 } 1331 break; 1332 case ICMP6_PACKET_TOO_BIG: 1333 type = ICMP_UNREACH; 1334 code = ICMP_UNREACH_NEEDFRAG; 1335 mtu = ntohl(icmp6->icmp6_mtu); 1336 if (mtu < IPV6_MMTU) { 1337 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1338 " code %d", mtu, icmp6->icmp6_type, 1339 icmp6->icmp6_code); 1340 NAT64STAT_INC(&cfg->stats, dropped); 1341 return (NAT64MFREE); 1342 } 1343 /* 1344 * Adjust MTU to reflect difference between 1345 * IPv6 an IPv4 headers. 1346 */ 1347 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1348 break; 1349 case ICMP6_TIME_EXCEEDED: 1350 type = ICMP_TIMXCEED; 1351 code = icmp6->icmp6_code; 1352 break; 1353 case ICMP6_PARAM_PROB: 1354 switch (icmp6->icmp6_code) { 1355 case ICMP6_PARAMPROB_HEADER: 1356 type = ICMP_PARAMPROB; 1357 code = ICMP_PARAMPROB_ERRATPTR; 1358 mtu = ntohl(icmp6->icmp6_pptr); 1359 switch (mtu) { 1360 case 0: /* Version/Traffic Class */ 1361 case 1: /* Traffic Class/Flow Label */ 1362 break; 1363 case 4: /* Payload Length */ 1364 case 5: 1365 mtu = 2; 1366 break; 1367 case 6: /* Next Header */ 1368 mtu = 9; 1369 break; 1370 case 7: /* Hop Limit */ 1371 mtu = 8; 1372 break; 1373 default: 1374 if (mtu >= 8 && mtu <= 23) { 1375 mtu = 12; /* Source address */ 1376 break; 1377 } 1378 if (mtu >= 24 && mtu <= 39) { 1379 mtu = 16; /* Destination address */ 1380 break; 1381 } 1382 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1383 " code %d, pptr %d", icmp6->icmp6_type, 1384 icmp6->icmp6_code, mtu); 1385 NAT64STAT_INC(&cfg->stats, dropped); 1386 return (NAT64MFREE); 1387 } 1388 case ICMP6_PARAMPROB_NEXTHEADER: 1389 type = ICMP_UNREACH; 1390 code = ICMP_UNREACH_PROTOCOL; 1391 break; 1392 default: 1393 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1394 " code %d, pptr %d", icmp6->icmp6_type, 1395 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1396 NAT64STAT_INC(&cfg->stats, dropped); 1397 return (NAT64MFREE); 1398 } 1399 break; 1400 default: 1401 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1402 icmp6->icmp6_type, icmp6->icmp6_code); 1403 NAT64STAT_INC(&cfg->stats, dropped); 1404 return (NAT64MFREE); 1405 } 1406 1407 hlen += sizeof(struct icmp6_hdr); 1408 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1409 NAT64STAT_INC(&cfg->stats, dropped); 1410 DPRINTF(DP_DROPS, "Message is too short %d", 1411 m->m_pkthdr.len); 1412 return (NAT64MFREE); 1413 } 1414 /* 1415 * We need at least ICMP_MINLEN bytes of original datagram payload 1416 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1417 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1418 * header we will not have to do m_pullup() again. 1419 * 1420 * What we have here: 1421 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1422 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1423 * We need to translate it to: 1424 * 1425 * Outer header: (alias_host, v4exthost) 1426 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1427 * 1428 * Assume caller function has checked if v4mapPRefix+v4host 1429 * matches configured prefix. 1430 * The only two things we should be provided with are mapping between 1431 * IPv6iHost <> alias_host and between dport and alias_port. 1432 */ 1433 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1434 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1435 if (m == NULL) { 1436 NAT64STAT_INC(&cfg->stats, nomem); 1437 return (NAT64RETURN); 1438 } 1439 ip6 = mtod(m, struct ip6_hdr *); 1440 ip6i = mtodo(m, hlen); 1441 ip6f = NULL; 1442 proto = ip6i->ip6_nxt; 1443 plen = ntohs(ip6i->ip6_plen); 1444 hlen += sizeof(struct ip6_hdr); 1445 if (proto == IPPROTO_FRAGMENT) { 1446 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1447 ICMP_MINLEN) 1448 goto fail; 1449 ip6f = mtodo(m, hlen); 1450 proto = ip6f->ip6f_nxt; 1451 plen -= sizeof(struct ip6_frag); 1452 hlen += sizeof(struct ip6_frag); 1453 /* Ajust MTU to reflect frag header size */ 1454 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1455 mtu -= sizeof(struct ip6_frag); 1456 } 1457 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1458 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1459 proto); 1460 goto fail; 1461 } 1462 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1463 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1464 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1465 goto fail; 1466 } 1467 /* Check if outer dst is the same as inner src */ 1468 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1469 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1470 goto fail; 1471 } 1472 1473 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1474 ip.ip_dst.s_addr = aaddr; 1475 ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen); 1476 if (ip.ip_src.s_addr == 0) 1477 goto fail; 1478 /* XXX: Make fake ulp header */ 1479 if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */ 1480 ip6i->ip6_hlim += IPV6_HLIMDEC; 1481 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1482 m_adj(m, hlen - sizeof(struct ip)); 1483 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1484 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats, 1485 logdata); 1486 return (NAT64RETURN); 1487 fail: 1488 /* 1489 * We must call m_freem() because mbuf pointer could be 1490 * changed with m_pullup(). 1491 */ 1492 m_freem(m); 1493 NAT64STAT_INC(&cfg->stats, dropped); 1494 return (NAT64RETURN); 1495 } 1496 1497 int 1498 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1499 struct nat64_config *cfg, void *logdata) 1500 { 1501 struct ip ip; 1502 struct nhop4_basic nh; 1503 struct sockaddr_in dst; 1504 struct ip6_frag *frag; 1505 struct ip6_hdr *ip6; 1506 struct icmp6_hdr *icmp6; 1507 uint16_t *csum; 1508 int plen, hlen, proto; 1509 1510 /* 1511 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1512 * protocol's headers. Also we skip some checks, that ip6_input(), 1513 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1514 */ 1515 ip6 = mtod(m, struct ip6_hdr *); 1516 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1517 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1518 return (NAT64SKIP); 1519 } 1520 1521 /* Starting from this point we must not return zero */ 1522 ip.ip_src.s_addr = aaddr; 1523 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1524 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x", 1525 ip.ip_src.s_addr); 1526 NAT64STAT_INC(&cfg->stats, dropped); 1527 return (NAT64MFREE); 1528 } 1529 1530 ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen); 1531 if (ip.ip_dst.s_addr == 0) { 1532 NAT64STAT_INC(&cfg->stats, dropped); 1533 return (NAT64MFREE); 1534 } 1535 1536 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 1537 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1538 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata); 1539 return (NAT64RETURN); 1540 } 1541 1542 hlen = 0; 1543 plen = ntohs(ip6->ip6_plen); 1544 proto = nat64_getlasthdr(m, &hlen); 1545 if (proto < 0) { 1546 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1547 NAT64STAT_INC(&cfg->stats, dropped); 1548 return (NAT64MFREE); 1549 } 1550 frag = NULL; 1551 if (proto == IPPROTO_FRAGMENT) { 1552 /* ipfw_chk should m_pullup up to frag header */ 1553 if (m->m_len < hlen + sizeof(*frag)) { 1554 DPRINTF(DP_DROPS, 1555 "dropped due to mbuf isn't contigious"); 1556 NAT64STAT_INC(&cfg->stats, dropped); 1557 return (NAT64MFREE); 1558 } 1559 frag = mtodo(m, hlen); 1560 proto = frag->ip6f_nxt; 1561 hlen += sizeof(*frag); 1562 /* Fragmented ICMPv6 is unsupported */ 1563 if (proto == IPPROTO_ICMPV6) { 1564 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1565 NAT64STAT_INC(&cfg->stats, dropped); 1566 return (NAT64MFREE); 1567 } 1568 /* Fragment length must be multiple of 8 octets */ 1569 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1570 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1571 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1572 ICMP6_PARAMPROB_HEADER, 1573 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats, 1574 logdata); 1575 return (NAT64RETURN); 1576 } 1577 } 1578 plen -= hlen - sizeof(struct ip6_hdr); 1579 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1580 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1581 plen, m->m_pkthdr.len, hlen); 1582 NAT64STAT_INC(&cfg->stats, dropped); 1583 return (NAT64MFREE); 1584 } 1585 1586 icmp6 = NULL; /* Make gcc happy */ 1587 if (proto == IPPROTO_ICMPV6) { 1588 icmp6 = mtodo(m, hlen); 1589 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1590 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1591 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1592 cfg, logdata)); 1593 } 1594 dst.sin_addr.s_addr = ip.ip_dst.s_addr; 1595 if (nat64_find_route4(&nh, &dst, m) != 0) { 1596 NAT64STAT_INC(&cfg->stats, noroute4); 1597 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1598 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata); 1599 return (NAT64RETURN); 1600 } 1601 if (nh.nh_mtu < plen + sizeof(ip)) { 1602 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu, 1603 &cfg->stats, logdata); 1604 return (NAT64RETURN); 1605 } 1606 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1607 /* Convert checksums. */ 1608 switch (proto) { 1609 case IPPROTO_TCP: 1610 csum = &TCP(mtodo(m, hlen))->th_sum; 1611 if (aport != 0) { 1612 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1613 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1614 tcp->th_sport = aport; 1615 } 1616 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1617 break; 1618 case IPPROTO_UDP: 1619 csum = &UDP(mtodo(m, hlen))->uh_sum; 1620 if (aport != 0) { 1621 struct udphdr *udp = UDP(mtodo(m, hlen)); 1622 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1623 udp->uh_sport = aport; 1624 } 1625 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1626 break; 1627 case IPPROTO_ICMPV6: 1628 /* Checksum in ICMPv6 covers pseudo header */ 1629 csum = &icmp6->icmp6_cksum; 1630 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1631 IPPROTO_ICMPV6, 0)); 1632 /* Convert ICMPv6 types to ICMP */ 1633 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1634 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1635 icmp6->icmp6_type = ICMP_ECHO; 1636 else /* ICMP6_ECHO_REPLY */ 1637 icmp6->icmp6_type = ICMP_ECHOREPLY; 1638 *csum = cksum_adjust(*csum, (uint16_t)proto, 1639 *(uint16_t *)icmp6); 1640 if (aport != 0) { 1641 uint16_t old_id = icmp6->icmp6_id; 1642 icmp6->icmp6_id = aport; 1643 *csum = cksum_adjust(*csum, old_id, aport); 1644 } 1645 break; 1646 }; 1647 1648 m_adj(m, hlen - sizeof(ip)); 1649 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1650 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1651 &cfg->stats, logdata) == 0) 1652 NAT64STAT_INC(&cfg->stats, opcnt64); 1653 return (NAT64RETURN); 1654 } 1655 1656