1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015-2019 Yandex LLC 5 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_ipstealth.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/counter.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/mbuf.h> 41 #include <sys/module.h> 42 #include <sys/rmlock.h> 43 #include <sys/rwlock.h> 44 #include <sys/socket.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_pflog.h> 50 #include <net/pfil.h> 51 #include <net/netisr.h> 52 #include <net/route.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_fib.h> 56 #include <netinet/ip.h> 57 #include <netinet/ip_var.h> 58 #include <netinet/ip_fw.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <netinet/ip_icmp.h> 62 #include <netinet/tcp.h> 63 #include <netinet/udp.h> 64 #include <netinet6/in6_var.h> 65 #include <netinet6/in6_fib.h> 66 #include <netinet6/ip6_var.h> 67 #include <netinet6/ip_fw_nat64.h> 68 69 #include <netpfil/pf/pf.h> 70 #include <netpfil/ipfw/ip_fw_private.h> 71 #include <machine/in_cksum.h> 72 73 #include "ip_fw_nat64.h" 74 #include "nat64_translate.h" 75 76 77 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *, 78 struct sockaddr *, struct nat64_counters *, void *); 79 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *, 80 void *); 81 82 static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *, 83 struct mbuf *); 84 static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *, 85 struct mbuf *); 86 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *); 87 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *, 88 struct nat64_counters *, void *); 89 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *, 90 void *); 91 static int nat64_direct_output(struct ifnet *, struct mbuf *, 92 struct sockaddr *, struct nat64_counters *, void *); 93 94 struct nat64_methods { 95 nat64_output_t output; 96 nat64_output_one_t output_one; 97 }; 98 static const struct nat64_methods nat64_netisr = { 99 .output = nat64_output, 100 .output_one = nat64_output_one 101 }; 102 static const struct nat64_methods nat64_direct = { 103 .output = nat64_direct_output, 104 .output_one = nat64_direct_output_one 105 }; 106 107 /* These variables should be initialized explicitly on module loading */ 108 VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out); 109 VNET_DEFINE_STATIC(const int *, nat64ipstealth); 110 VNET_DEFINE_STATIC(const int *, nat64ip6stealth); 111 #define V_nat64out VNET(nat64out) 112 #define V_nat64ipstealth VNET(nat64ipstealth) 113 #define V_nat64ip6stealth VNET(nat64ip6stealth) 114 115 static const int stealth_on = 1; 116 #ifndef IPSTEALTH 117 static const int stealth_off = 0; 118 #endif 119 120 void 121 nat64_set_output_method(int direct) 122 { 123 124 if (direct != 0) { 125 V_nat64out = &nat64_direct; 126 #ifdef IPSTEALTH 127 /* Honor corresponding variables, if IPSTEALTH is defined */ 128 V_nat64ipstealth = &V_ipstealth; 129 V_nat64ip6stealth = &V_ip6stealth; 130 #else 131 /* otherwise we need to decrement HLIM/TTL for direct case */ 132 V_nat64ipstealth = V_nat64ip6stealth = &stealth_off; 133 #endif 134 } else { 135 V_nat64out = &nat64_netisr; 136 /* Leave TTL/HLIM decrementing to forwarding code */ 137 V_nat64ipstealth = V_nat64ip6stealth = &stealth_on; 138 } 139 } 140 141 int 142 nat64_get_output_method(void) 143 { 144 145 return (V_nat64out == &nat64_direct ? 1: 0); 146 } 147 148 static void 149 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 150 { 151 152 logdata->dir = PF_OUT; 153 logdata->af = family; 154 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 155 } 156 157 static int 158 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 159 struct nat64_counters *stats, void *logdata) 160 { 161 int error; 162 163 if (logdata != NULL) 164 nat64_log(logdata, m, dst->sa_family); 165 error = (*ifp->if_output)(ifp, m, dst, NULL); 166 if (error != 0) 167 NAT64STAT_INC(stats, oerrors); 168 return (error); 169 } 170 171 static int 172 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats, 173 void *logdata) 174 { 175 struct nhop6_basic nh6; 176 struct nhop4_basic nh4; 177 struct sockaddr_in6 dst6; 178 struct sockaddr_in dst4; 179 struct sockaddr *dst; 180 struct ip6_hdr *ip6; 181 struct ip *ip4; 182 struct ifnet *ifp; 183 int error; 184 185 ip4 = mtod(m, struct ip *); 186 switch (ip4->ip_v) { 187 case IPVERSION: 188 dst4.sin_addr = ip4->ip_dst; 189 error = nat64_find_route4(&nh4, &dst4, m); 190 if (error != 0) 191 NAT64STAT_INC(stats, noroute4); 192 else { 193 ifp = nh4.nh_ifp; 194 dst = (struct sockaddr *)&dst4; 195 } 196 break; 197 case (IPV6_VERSION >> 4): 198 ip6 = mtod(m, struct ip6_hdr *); 199 dst6.sin6_addr = ip6->ip6_dst; 200 error = nat64_find_route6(&nh6, &dst6, m); 201 if (error != 0) 202 NAT64STAT_INC(stats, noroute6); 203 else { 204 ifp = nh6.nh_ifp; 205 dst = (struct sockaddr *)&dst6; 206 } 207 break; 208 default: 209 m_freem(m); 210 NAT64STAT_INC(stats, dropped); 211 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 212 return (EAFNOSUPPORT); 213 } 214 if (error != 0) { 215 m_freem(m); 216 return (EHOSTUNREACH); 217 } 218 if (logdata != NULL) 219 nat64_log(logdata, m, dst->sa_family); 220 error = (*ifp->if_output)(ifp, m, dst, NULL); 221 if (error != 0) 222 NAT64STAT_INC(stats, oerrors); 223 return (error); 224 } 225 226 static int 227 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 228 struct nat64_counters *stats, void *logdata) 229 { 230 struct ip *ip4; 231 int ret, af; 232 233 ip4 = mtod(m, struct ip *); 234 switch (ip4->ip_v) { 235 case IPVERSION: 236 af = AF_INET; 237 ret = NETISR_IP; 238 break; 239 case (IPV6_VERSION >> 4): 240 af = AF_INET6; 241 ret = NETISR_IPV6; 242 break; 243 default: 244 m_freem(m); 245 NAT64STAT_INC(stats, dropped); 246 DPRINTF(DP_DROPS, "unknown IP version"); 247 return (EAFNOSUPPORT); 248 } 249 if (logdata != NULL) 250 nat64_log(logdata, m, af); 251 if (m->m_pkthdr.rcvif == NULL) 252 m->m_pkthdr.rcvif = V_loif; 253 ret = netisr_queue(ret, m); 254 if (ret != 0) 255 NAT64STAT_INC(stats, oerrors); 256 return (ret); 257 } 258 259 static int 260 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata) 261 { 262 263 return (nat64_output(NULL, m, NULL, stats, logdata)); 264 } 265 266 /* 267 * Check the given IPv6 prefix and length according to RFC6052: 268 * The prefixes can only have one of the following lengths: 269 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long). 270 * Returns zero on success, otherwise EINVAL. 271 */ 272 int 273 nat64_check_prefixlen(int length) 274 { 275 276 switch (length) { 277 case 32: 278 case 40: 279 case 48: 280 case 56: 281 case 64: 282 case 96: 283 return (0); 284 } 285 return (EINVAL); 286 } 287 288 int 289 nat64_check_prefix6(const struct in6_addr *prefix, int length) 290 { 291 292 if (nat64_check_prefixlen(length) != 0) 293 return (EINVAL); 294 295 /* Well-known prefix has 96 prefix length */ 296 if (IN6_IS_ADDR_WKPFX(prefix) && length != 96) 297 return (EINVAL); 298 299 /* Bits 64 to 71 must be set to zero */ 300 if (prefix->__u6_addr.__u6_addr8[8] != 0) 301 return (EINVAL); 302 303 /* Some extra checks */ 304 if (IN6_IS_ADDR_MULTICAST(prefix) || 305 IN6_IS_ADDR_UNSPECIFIED(prefix) || 306 IN6_IS_ADDR_LOOPBACK(prefix)) 307 return (EINVAL); 308 return (0); 309 } 310 311 int 312 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia) 313 { 314 315 if (cfg->flags & NAT64_ALLOW_PRIVATE) 316 return (0); 317 318 /* WKPFX must not be used to represent non-global IPv4 addresses */ 319 if (cfg->flags & NAT64_WKPFX) { 320 /* IN_PRIVATE */ 321 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || 322 (ia & htonl(0xfff00000)) == htonl(0xac100000) || 323 (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) 324 return (1); 325 /* 326 * RFC 5735: 327 * 192.0.0.0/24 - reserved for IETF protocol assignments 328 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses 329 * 198.18.0.0/15 - for use in benchmark tests 330 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use 331 * in documentation and example code 332 */ 333 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || 334 (ia & htonl(0xffffff00)) == htonl(0xc0586300) || 335 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || 336 (ia & htonl(0xffffff00)) == htonl(0xc0000200) || 337 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || 338 (ia & htonl(0xffffff00)) == htonl(0xcb007100)) 339 return (1); 340 } 341 return (0); 342 } 343 344 /* 345 * Embed @ia IPv4 address into @ip6 IPv6 address. 346 * Place to embedding determined from prefix length @plen. 347 */ 348 void 349 nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia) 350 { 351 352 switch (plen) { 353 case 32: 354 case 96: 355 ip6->s6_addr32[plen / 32] = ia; 356 break; 357 case 40: 358 case 48: 359 case 56: 360 /* 361 * Preserve prefix bits. 362 * Since suffix bits should be zero and reserved for future 363 * use, we just overwrite the whole word, where they are. 364 */ 365 ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32); 366 #if BYTE_ORDER == BIG_ENDIAN 367 ip6->s6_addr32[1] |= ia >> (plen % 32); 368 ip6->s6_addr32[2] = ia << (24 - plen % 32); 369 #elif BYTE_ORDER == LITTLE_ENDIAN 370 ip6->s6_addr32[1] |= ia << (plen % 32); 371 ip6->s6_addr32[2] = ia >> (24 - plen % 32); 372 #endif 373 break; 374 case 64: 375 #if BYTE_ORDER == BIG_ENDIAN 376 ip6->s6_addr32[2] = ia >> 8; 377 ip6->s6_addr32[3] = ia << 24; 378 #elif BYTE_ORDER == LITTLE_ENDIAN 379 ip6->s6_addr32[2] = ia << 8; 380 ip6->s6_addr32[3] = ia >> 24; 381 #endif 382 break; 383 default: 384 panic("Wrong plen: %d", plen); 385 }; 386 /* 387 * Bits 64 to 71 of the address are reserved for compatibility 388 * with the host identifier format defined in the IPv6 addressing 389 * architecture [RFC4291]. These bits MUST be set to zero. 390 */ 391 ip6->s6_addr8[8] = 0; 392 } 393 394 in_addr_t 395 nat64_extract_ip4(const struct in6_addr *ip6, int plen) 396 { 397 in_addr_t ia; 398 399 /* 400 * According to RFC 6052 p2.2: 401 * IPv4-embedded IPv6 addresses are composed of a variable-length 402 * prefix, the embedded IPv4 address, and a variable length suffix. 403 * The suffix bits are reserved for future extensions and SHOULD 404 * be set to zero. 405 */ 406 switch (plen) { 407 case 32: 408 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 409 goto badip6; 410 break; 411 case 40: 412 if (ip6->s6_addr32[3] != 0 || 413 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 414 goto badip6; 415 break; 416 case 48: 417 if (ip6->s6_addr32[3] != 0 || 418 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 419 goto badip6; 420 break; 421 case 56: 422 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 423 goto badip6; 424 break; 425 case 64: 426 if (ip6->s6_addr8[8] != 0 || 427 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 428 goto badip6; 429 }; 430 switch (plen) { 431 case 32: 432 case 96: 433 ia = ip6->s6_addr32[plen / 32]; 434 break; 435 case 40: 436 case 48: 437 case 56: 438 #if BYTE_ORDER == BIG_ENDIAN 439 ia = (ip6->s6_addr32[1] << (plen % 32)) | 440 (ip6->s6_addr32[2] >> (24 - plen % 32)); 441 #elif BYTE_ORDER == LITTLE_ENDIAN 442 ia = (ip6->s6_addr32[1] >> (plen % 32)) | 443 (ip6->s6_addr32[2] << (24 - plen % 32)); 444 #endif 445 break; 446 case 64: 447 #if BYTE_ORDER == BIG_ENDIAN 448 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 449 #elif BYTE_ORDER == LITTLE_ENDIAN 450 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 451 #endif 452 break; 453 default: 454 return (0); 455 }; 456 if (nat64_check_ip4(ia) == 0) 457 return (ia); 458 459 DPRINTF(DP_GENERIC | DP_DROPS, 460 "invalid destination address: %08x", ia); 461 return (0); 462 badip6: 463 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address"); 464 return (0); 465 } 466 467 /* 468 * According to RFC 1624 the equation for incremental checksum update is: 469 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 470 * HC' = HC - ~m - m' -- [Eqn. 4] 471 * So, when we are replacing IPv4 addresses to IPv6, we 472 * can assume, that new bytes previously were zeros, and vise versa - 473 * when we replacing IPv6 addresses to IPv4, now unused bytes become 474 * zeros. The payload length in pseudo header has bigger size, but one 475 * half of it should be zero. Using the equation 4 we get: 476 * HC' = HC - (~m0 + m0') -- m0 is first changed word 477 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 478 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 479 * = HC - sum(~m[i] + m'[i]) 480 * 481 * The function result should be used as follows: 482 * IPv6 to IPv4: HC' = cksum_add(HC, result) 483 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 484 */ 485 static uint16_t 486 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 487 { 488 uint32_t sum; 489 uint16_t *p; 490 491 sum = ~ip->ip_src.s_addr >> 16; 492 sum += ~ip->ip_src.s_addr & 0xffff; 493 sum += ~ip->ip_dst.s_addr >> 16; 494 sum += ~ip->ip_dst.s_addr & 0xffff; 495 496 for (p = (uint16_t *)&ip6->ip6_src; 497 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 498 sum += *p; 499 500 while (sum >> 16) 501 sum = (sum & 0xffff) + (sum >> 16); 502 return (sum); 503 } 504 505 static void 506 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 507 uint16_t plen, uint8_t proto, struct ip *ip) 508 { 509 510 /* assume addresses are already initialized */ 511 ip->ip_v = IPVERSION; 512 ip->ip_hl = sizeof(*ip) >> 2; 513 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 514 ip->ip_len = htons(sizeof(*ip) + plen); 515 ip->ip_ttl = ip6->ip6_hlim; 516 if (*V_nat64ip6stealth == 0) 517 ip->ip_ttl -= IPV6_HLIMDEC; 518 ip->ip_sum = 0; 519 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 520 ip_fillid(ip); 521 if (frag != NULL) { 522 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 523 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 524 ip->ip_off |= htons(IP_MF); 525 } else { 526 ip->ip_off = htons(IP_DF); 527 } 528 ip->ip_sum = in_cksum_hdr(ip); 529 } 530 531 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 532 static NAT64NOINLINE int 533 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6, 534 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id, 535 uint16_t ip_off) 536 { 537 struct ip6_frag ip6f; 538 struct mbuf *n; 539 uint16_t hlen, len, offset; 540 int plen; 541 542 plen = ntohs(ip6->ip6_plen); 543 hlen = sizeof(struct ip6_hdr); 544 545 /* Fragmentation isn't needed */ 546 if (ip_off == 0 && plen <= mtu - hlen) { 547 M_PREPEND(m, hlen, M_NOWAIT); 548 if (m == NULL) { 549 NAT64STAT_INC(stats, nomem); 550 return (ENOMEM); 551 } 552 bcopy(ip6, mtod(m, void *), hlen); 553 if (mbufq_enqueue(mq, m) != 0) { 554 m_freem(m); 555 NAT64STAT_INC(stats, dropped); 556 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 557 return (ENOBUFS); 558 } 559 return (0); 560 } 561 562 hlen += sizeof(struct ip6_frag); 563 ip6f.ip6f_reserved = 0; 564 ip6f.ip6f_nxt = ip6->ip6_nxt; 565 ip6->ip6_nxt = IPPROTO_FRAGMENT; 566 if (ip_off != 0) { 567 /* 568 * We have got an IPv4 fragment. 569 * Use offset value and ip_id from original fragment. 570 */ 571 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 572 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 573 NAT64STAT_INC(stats, ifrags); 574 } else { 575 /* The packet size exceeds interface MTU */ 576 ip6f.ip6f_ident = htonl(ip6_randomid()); 577 offset = 0; /* First fragment*/ 578 } 579 while (plen > 0 && m != NULL) { 580 n = NULL; 581 len = FRAGSZ(mtu) & ~7; 582 if (len > plen) 583 len = plen; 584 ip6->ip6_plen = htons(len + sizeof(ip6f)); 585 ip6f.ip6f_offlg = ntohs(offset); 586 if (len < plen || (ip_off & htons(IP_MF)) != 0) 587 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 588 offset += len; 589 plen -= len; 590 if (plen > 0) { 591 n = m_split(m, len, M_NOWAIT); 592 if (n == NULL) 593 goto fail; 594 } 595 M_PREPEND(m, hlen, M_NOWAIT); 596 if (m == NULL) 597 goto fail; 598 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 599 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 600 sizeof(struct ip6_frag)); 601 if (mbufq_enqueue(mq, m) != 0) 602 goto fail; 603 m = n; 604 } 605 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 606 return (0); 607 fail: 608 if (m != NULL) 609 m_freem(m); 610 if (n != NULL) 611 m_freem(n); 612 mbufq_drain(mq); 613 NAT64STAT_INC(stats, nomem); 614 return (ENOMEM); 615 } 616 617 static NAT64NOINLINE int 618 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst, 619 struct mbuf *m) 620 { 621 622 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0, 623 pnh) != 0) 624 return (EHOSTUNREACH); 625 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT)) 626 return (EHOSTUNREACH); 627 /* 628 * XXX: we need to use destination address with embedded scope 629 * zone id, because LLTABLE uses such form of addresses for lookup. 630 */ 631 dst->sin6_family = AF_INET6; 632 dst->sin6_len = sizeof(*dst); 633 dst->sin6_addr = pnh->nh_addr; 634 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 635 dst->sin6_addr.s6_addr16[1] = 636 htons(pnh->nh_ifp->if_index & 0xffff); 637 dst->sin6_port = 0; 638 dst->sin6_scope_id = 0; 639 dst->sin6_flowinfo = 0; 640 641 return (0); 642 } 643 644 #define NAT64_ICMP6_PLEN 64 645 static NAT64NOINLINE void 646 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 647 struct nat64_counters *stats, void *logdata) 648 { 649 struct icmp6_hdr *icmp6; 650 struct ip6_hdr *ip6, *oip6; 651 struct mbuf *n; 652 int len, plen, proto; 653 654 len = 0; 655 proto = nat64_getlasthdr(m, &len); 656 if (proto < 0) { 657 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 658 goto freeit; 659 } 660 /* 661 * Do not send ICMPv6 in reply to ICMPv6 errors. 662 */ 663 if (proto == IPPROTO_ICMPV6) { 664 if (m->m_len < len + sizeof(*icmp6)) { 665 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 666 goto freeit; 667 } 668 icmp6 = mtodo(m, len); 669 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 670 icmp6->icmp6_type == ND_REDIRECT) { 671 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 672 "ICMPv6 errors"); 673 goto freeit; 674 } 675 /* 676 * If there are extra headers between IPv6 and ICMPv6, 677 * strip off them. 678 */ 679 if (len > sizeof(struct ip6_hdr)) { 680 /* 681 * NOTE: ipfw_chk already did m_pullup() and it is 682 * expected that data is contigious from the start 683 * of IPv6 header up to the end of ICMPv6 header. 684 */ 685 bcopy(mtod(m, caddr_t), 686 mtodo(m, len - sizeof(struct ip6_hdr)), 687 sizeof(struct ip6_hdr)); 688 m_adj(m, len - sizeof(struct ip6_hdr)); 689 } 690 } 691 /* 692 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 693 goto freeit; 694 */ 695 ip6 = mtod(m, struct ip6_hdr *); 696 switch (type) { 697 case ICMP6_DST_UNREACH: 698 case ICMP6_PACKET_TOO_BIG: 699 case ICMP6_TIME_EXCEEDED: 700 case ICMP6_PARAM_PROB: 701 break; 702 default: 703 goto freeit; 704 } 705 /* Calculate length of ICMPv6 payload */ 706 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 707 m->m_pkthdr.len; 708 709 /* Create new ICMPv6 datagram */ 710 plen = len + sizeof(struct icmp6_hdr); 711 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 712 MT_HEADER, M_PKTHDR); 713 if (n == NULL) { 714 NAT64STAT_INC(stats, nomem); 715 m_freem(m); 716 return; 717 } 718 /* 719 * Move pkthdr from original mbuf. We should have initialized some 720 * fields, because we can reinject this mbuf to netisr and it will 721 * go trough input path (it requires at least rcvif should be set). 722 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 723 * in the chain, when we will do M_PREPEND() or make some type of 724 * tunneling. 725 */ 726 m_move_pkthdr(n, m); 727 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 728 729 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 730 oip6 = mtod(n, struct ip6_hdr *); 731 /* 732 * Make IPv6 source address selection for reflected datagram. 733 * nat64_check_ip6() doesn't allow scoped addresses, therefore 734 * we use zero scopeid. 735 */ 736 if (in6_selectsrc_addr(M_GETFIB(n), &ip6->ip6_src, 0, 737 n->m_pkthdr.rcvif, &oip6->ip6_src, NULL) != 0) { 738 /* 739 * Failed to find proper source address, drop the packet. 740 */ 741 m_freem(n); 742 goto freeit; 743 } 744 oip6->ip6_dst = ip6->ip6_src; 745 oip6->ip6_nxt = IPPROTO_ICMPV6; 746 oip6->ip6_flow = 0; 747 oip6->ip6_vfc |= IPV6_VERSION; 748 oip6->ip6_hlim = V_ip6_defhlim; 749 oip6->ip6_plen = htons(plen); 750 751 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 752 icmp6->icmp6_cksum = 0; 753 icmp6->icmp6_type = type; 754 icmp6->icmp6_code = code; 755 icmp6->icmp6_mtu = htonl(mtu); 756 757 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 758 sizeof(struct icmp6_hdr))); 759 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 760 sizeof(struct ip6_hdr), plen); 761 m_freem(m); 762 V_nat64out->output_one(n, stats, logdata); 763 return; 764 freeit: 765 NAT64STAT_INC(stats, dropped); 766 m_freem(m); 767 } 768 769 static NAT64NOINLINE int 770 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst, 771 struct mbuf *m) 772 { 773 774 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0) 775 return (EHOSTUNREACH); 776 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT)) 777 return (EHOSTUNREACH); 778 779 dst->sin_family = AF_INET; 780 dst->sin_len = sizeof(*dst); 781 dst->sin_addr = pnh->nh_addr; 782 dst->sin_port = 0; 783 return (0); 784 } 785 786 #define NAT64_ICMP_PLEN 64 787 static NAT64NOINLINE void 788 nat64_icmp_reflect(struct mbuf *m, uint8_t type, 789 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata) 790 { 791 struct icmp *icmp; 792 struct ip *ip, *oip; 793 struct mbuf *n; 794 int len, plen; 795 796 ip = mtod(m, struct ip *); 797 /* Do not send ICMP error if packet is not the first fragment */ 798 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 799 DPRINTF(DP_DROPS, "not first fragment"); 800 goto freeit; 801 } 802 /* Do not send ICMP in reply to ICMP errors */ 803 if (ip->ip_p == IPPROTO_ICMP) { 804 if (m->m_len < (ip->ip_hl << 2)) { 805 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 806 goto freeit; 807 } 808 icmp = mtodo(m, ip->ip_hl << 2); 809 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 810 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 811 "ICMP errors"); 812 goto freeit; 813 } 814 } 815 switch (type) { 816 case ICMP_UNREACH: 817 case ICMP_TIMXCEED: 818 case ICMP_PARAMPROB: 819 break; 820 default: 821 goto freeit; 822 } 823 /* Calculate length of ICMP payload */ 824 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 825 m->m_pkthdr.len; 826 827 /* Create new ICMPv4 datagram */ 828 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 829 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 830 MT_HEADER, M_PKTHDR); 831 if (n == NULL) { 832 NAT64STAT_INC(stats, nomem); 833 m_freem(m); 834 return; 835 } 836 m_move_pkthdr(n, m); 837 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 838 839 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 840 oip = mtod(n, struct ip *); 841 oip->ip_v = IPVERSION; 842 oip->ip_hl = sizeof(struct ip) >> 2; 843 oip->ip_tos = 0; 844 oip->ip_len = htons(n->m_pkthdr.len); 845 oip->ip_ttl = V_ip_defttl; 846 oip->ip_p = IPPROTO_ICMP; 847 ip_fillid(oip); 848 oip->ip_off = htons(IP_DF); 849 oip->ip_src = ip->ip_dst; 850 oip->ip_dst = ip->ip_src; 851 oip->ip_sum = 0; 852 oip->ip_sum = in_cksum_hdr(oip); 853 854 icmp = mtodo(n, sizeof(struct ip)); 855 icmp->icmp_type = type; 856 icmp->icmp_code = code; 857 icmp->icmp_cksum = 0; 858 icmp->icmp_pmvoid = 0; 859 icmp->icmp_nextmtu = htons(mtu); 860 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 861 sizeof(struct icmphdr) + sizeof(uint32_t))); 862 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 863 sizeof(struct ip)); 864 m_freem(m); 865 V_nat64out->output_one(n, stats, logdata); 866 return; 867 freeit: 868 NAT64STAT_INC(stats, dropped); 869 m_freem(m); 870 } 871 872 /* Translate ICMP echo request/reply into ICMPv6 */ 873 static void 874 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 875 uint16_t id, uint8_t type) 876 { 877 uint16_t old; 878 879 old = *(uint16_t *)icmp6; /* save type+code in one word */ 880 icmp6->icmp6_type = type; 881 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 882 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 883 old, *(uint16_t *)icmp6); 884 if (id != 0) { 885 old = icmp6->icmp6_id; 886 icmp6->icmp6_id = id; 887 /* Reflect ICMP id translation in the cksum */ 888 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 889 old, id); 890 } 891 /* Reflect IPv6 pseudo header in the cksum */ 892 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 893 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 894 } 895 896 static NAT64NOINLINE struct mbuf * 897 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 898 int offset, struct nat64_config *cfg) 899 { 900 struct ip ip; 901 struct icmp *icmp; 902 struct tcphdr *tcp; 903 struct udphdr *udp; 904 struct ip6_hdr *eip6; 905 struct mbuf *n; 906 uint32_t mtu; 907 int len, hlen, plen; 908 uint8_t type, code; 909 910 if (m->m_len < offset + ICMP_MINLEN) 911 m = m_pullup(m, offset + ICMP_MINLEN); 912 if (m == NULL) { 913 NAT64STAT_INC(&cfg->stats, nomem); 914 return (m); 915 } 916 mtu = 0; 917 icmp = mtodo(m, offset); 918 /* RFC 7915 p4.2 */ 919 switch (icmp->icmp_type) { 920 case ICMP_ECHOREPLY: 921 type = ICMP6_ECHO_REPLY; 922 code = 0; 923 break; 924 case ICMP_UNREACH: 925 type = ICMP6_DST_UNREACH; 926 switch (icmp->icmp_code) { 927 case ICMP_UNREACH_NET: 928 case ICMP_UNREACH_HOST: 929 case ICMP_UNREACH_SRCFAIL: 930 case ICMP_UNREACH_NET_UNKNOWN: 931 case ICMP_UNREACH_HOST_UNKNOWN: 932 case ICMP_UNREACH_TOSNET: 933 case ICMP_UNREACH_TOSHOST: 934 code = ICMP6_DST_UNREACH_NOROUTE; 935 break; 936 case ICMP_UNREACH_PROTOCOL: 937 type = ICMP6_PARAM_PROB; 938 code = ICMP6_PARAMPROB_NEXTHEADER; 939 break; 940 case ICMP_UNREACH_PORT: 941 code = ICMP6_DST_UNREACH_NOPORT; 942 break; 943 case ICMP_UNREACH_NEEDFRAG: 944 type = ICMP6_PACKET_TOO_BIG; 945 code = 0; 946 /* XXX: needs an additional look */ 947 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 948 break; 949 case ICMP_UNREACH_NET_PROHIB: 950 case ICMP_UNREACH_HOST_PROHIB: 951 case ICMP_UNREACH_FILTER_PROHIB: 952 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 953 code = ICMP6_DST_UNREACH_ADMIN; 954 break; 955 default: 956 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 957 icmp->icmp_type, icmp->icmp_code); 958 goto freeit; 959 } 960 break; 961 case ICMP_TIMXCEED: 962 type = ICMP6_TIME_EXCEEDED; 963 code = icmp->icmp_code; 964 break; 965 case ICMP_ECHO: 966 type = ICMP6_ECHO_REQUEST; 967 code = 0; 968 break; 969 case ICMP_PARAMPROB: 970 type = ICMP6_PARAM_PROB; 971 switch (icmp->icmp_code) { 972 case ICMP_PARAMPROB_ERRATPTR: 973 case ICMP_PARAMPROB_LENGTH: 974 code = ICMP6_PARAMPROB_HEADER; 975 switch (icmp->icmp_pptr) { 976 case 0: /* Version/IHL */ 977 case 1: /* Type Of Service */ 978 mtu = icmp->icmp_pptr; 979 break; 980 case 2: /* Total Length */ 981 case 3: mtu = 4; /* Payload Length */ 982 break; 983 case 8: /* Time to Live */ 984 mtu = 7; /* Hop Limit */ 985 break; 986 case 9: /* Protocol */ 987 mtu = 6; /* Next Header */ 988 break; 989 case 12: /* Source address */ 990 case 13: 991 case 14: 992 case 15: 993 mtu = 8; 994 break; 995 case 16: /* Destination address */ 996 case 17: 997 case 18: 998 case 19: 999 mtu = 24; 1000 break; 1001 default: /* Silently drop */ 1002 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 1003 " code %d, pptr %d", icmp->icmp_type, 1004 icmp->icmp_code, icmp->icmp_pptr); 1005 goto freeit; 1006 } 1007 break; 1008 default: 1009 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 1010 " code %d, pptr %d", icmp->icmp_type, 1011 icmp->icmp_code, icmp->icmp_pptr); 1012 goto freeit; 1013 } 1014 break; 1015 default: 1016 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 1017 icmp->icmp_type, icmp->icmp_code); 1018 goto freeit; 1019 } 1020 /* 1021 * For echo request/reply we can use original payload, 1022 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 1023 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 1024 */ 1025 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 1026 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 1027 return (m); 1028 } 1029 /* 1030 * For other types of ICMP messages we need to translate inner 1031 * IPv4 header to IPv6 header. 1032 * Assume ICMP src is the same as payload dst 1033 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 1034 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 1035 * In that case, we already have map for NATIP1 and GWsrc1. 1036 * The only thing we need is to copy IPv6 map prefix to 1037 * Hostdst1. 1038 */ 1039 hlen = offset + ICMP_MINLEN; 1040 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 1041 DPRINTF(DP_DROPS, "Message is too short %d", 1042 m->m_pkthdr.len); 1043 goto freeit; 1044 } 1045 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 1046 if (ip.ip_v != IPVERSION) { 1047 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 1048 goto freeit; 1049 } 1050 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 1051 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 1052 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 1053 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 || 1054 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) { 1055 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 1056 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 1057 goto freeit; 1058 } 1059 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 1060 DPRINTF(DP_DROPS, "Message is too short %d", 1061 m->m_pkthdr.len); 1062 goto freeit; 1063 } 1064 #if 0 1065 /* 1066 * Check that inner source matches the outer destination. 1067 * XXX: We need some method to convert IPv4 into IPv6 address here, 1068 * and compare IPv6 addresses. 1069 */ 1070 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 1071 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 1072 "%04x vs %04x", ip.ip_src.s_addr, 1073 nat64_get_ip4(&ip6->ip6_dst)); 1074 goto freeit; 1075 } 1076 #endif 1077 /* 1078 * Create new mbuf for ICMPv6 datagram. 1079 * NOTE: len is data length just after inner IP header. 1080 */ 1081 len = m->m_pkthdr.len - hlen; 1082 if (sizeof(struct ip6_hdr) + 1083 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 1084 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 1085 sizeof(struct ip6_hdr); 1086 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 1087 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 1088 if (n == NULL) { 1089 NAT64STAT_INC(&cfg->stats, nomem); 1090 m_freem(m); 1091 return (NULL); 1092 } 1093 m_move_pkthdr(n, m); 1094 M_ALIGN(n, offset + plen + max_hdr); 1095 n->m_len = n->m_pkthdr.len = offset + plen; 1096 /* Adjust ip6_plen in outer header */ 1097 ip6->ip6_plen = htons(plen); 1098 /* Construct new inner IPv6 header */ 1099 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 1100 eip6->ip6_src = ip6->ip6_dst; 1101 1102 /* Use the same prefix that we have in outer header */ 1103 eip6->ip6_dst = ip6->ip6_src; 1104 MPASS(cfg->flags & NAT64_PLATPFX); 1105 nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr); 1106 1107 eip6->ip6_flow = htonl(ip.ip_tos << 20); 1108 eip6->ip6_vfc |= IPV6_VERSION; 1109 eip6->ip6_hlim = ip.ip_ttl; 1110 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 1111 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 1112 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 1113 /* 1114 * We need to translate source port in the inner ULP header, 1115 * and adjust ULP checksum. 1116 */ 1117 switch (ip.ip_p) { 1118 case IPPROTO_TCP: 1119 if (len < offsetof(struct tcphdr, th_sum)) 1120 break; 1121 tcp = TCP(eip6 + 1); 1122 if (icmpid != 0) { 1123 tcp->th_sum = cksum_adjust(tcp->th_sum, 1124 tcp->th_sport, icmpid); 1125 tcp->th_sport = icmpid; 1126 } 1127 tcp->th_sum = cksum_add(tcp->th_sum, 1128 ~nat64_cksum_convert(eip6, &ip)); 1129 break; 1130 case IPPROTO_UDP: 1131 if (len < offsetof(struct udphdr, uh_sum)) 1132 break; 1133 udp = UDP(eip6 + 1); 1134 if (icmpid != 0) { 1135 udp->uh_sum = cksum_adjust(udp->uh_sum, 1136 udp->uh_sport, icmpid); 1137 udp->uh_sport = icmpid; 1138 } 1139 udp->uh_sum = cksum_add(udp->uh_sum, 1140 ~nat64_cksum_convert(eip6, &ip)); 1141 break; 1142 case IPPROTO_ICMP: 1143 /* 1144 * Check if this is an ICMP error message for echo request 1145 * that we sent. I.e. ULP in the data containing invoking 1146 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 1147 */ 1148 icmp = (struct icmp *)(eip6 + 1); 1149 if (icmp->icmp_type != ICMP_ECHO) { 1150 m_freem(n); 1151 goto freeit; 1152 } 1153 /* 1154 * For our client this original datagram should looks 1155 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 1156 * Thus we need adjust icmp_cksum and convert type from 1157 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 1158 */ 1159 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 1160 ICMP6_ECHO_REQUEST); 1161 } 1162 m_freem(m); 1163 /* Convert ICMPv4 into ICMPv6 header */ 1164 icmp = mtodo(n, offset); 1165 ICMP6(icmp)->icmp6_type = type; 1166 ICMP6(icmp)->icmp6_code = code; 1167 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1168 ICMP6(icmp)->icmp6_cksum = 0; 1169 ICMP6(icmp)->icmp6_cksum = cksum_add( 1170 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1171 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1172 return (n); 1173 freeit: 1174 m_freem(m); 1175 NAT64STAT_INC(&cfg->stats, dropped); 1176 return (NULL); 1177 } 1178 1179 int 1180 nat64_getlasthdr(struct mbuf *m, int *offset) 1181 { 1182 struct ip6_hdr *ip6; 1183 struct ip6_hbh *hbh; 1184 int proto, hlen; 1185 1186 if (offset != NULL) 1187 hlen = *offset; 1188 else 1189 hlen = 0; 1190 1191 if (m->m_len < hlen + sizeof(*ip6)) 1192 return (-1); 1193 1194 ip6 = mtodo(m, hlen); 1195 hlen += sizeof(*ip6); 1196 proto = ip6->ip6_nxt; 1197 /* Skip extension headers */ 1198 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1199 proto == IPPROTO_DSTOPTS) { 1200 hbh = mtodo(m, hlen); 1201 /* 1202 * We expect mbuf has contigious data up to 1203 * upper level header. 1204 */ 1205 if (m->m_len < hlen) 1206 return (-1); 1207 /* 1208 * We doesn't support Jumbo payload option, 1209 * so return error. 1210 */ 1211 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1212 return (-1); 1213 proto = hbh->ip6h_nxt; 1214 hlen += (hbh->ip6h_len + 1) << 3; 1215 } 1216 if (offset != NULL) 1217 *offset = hlen; 1218 return (proto); 1219 } 1220 1221 int 1222 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1223 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg, 1224 void *logdata) 1225 { 1226 struct nhop6_basic nh; 1227 struct ip6_hdr ip6; 1228 struct sockaddr_in6 dst; 1229 struct ip *ip; 1230 struct mbufq mq; 1231 uint16_t ip_id, ip_off; 1232 uint16_t *csum; 1233 int plen, hlen; 1234 uint8_t proto; 1235 1236 ip = mtod(m, struct ip*); 1237 1238 if (*V_nat64ipstealth == 0 && ip->ip_ttl <= IPTTLDEC) { 1239 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1240 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata); 1241 return (NAT64RETURN); 1242 } 1243 1244 ip6.ip6_dst = *daddr; 1245 ip6.ip6_src = *saddr; 1246 1247 hlen = ip->ip_hl << 2; 1248 plen = ntohs(ip->ip_len) - hlen; 1249 proto = ip->ip_p; 1250 1251 /* Save ip_id and ip_off, both are in network byte order */ 1252 ip_id = ip->ip_id; 1253 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1254 1255 /* Fragment length must be multiple of 8 octets */ 1256 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1257 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1258 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata); 1259 return (NAT64RETURN); 1260 } 1261 /* Fragmented ICMP is unsupported */ 1262 if (proto == IPPROTO_ICMP && ip_off != 0) { 1263 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1264 NAT64STAT_INC(&cfg->stats, dropped); 1265 return (NAT64MFREE); 1266 } 1267 1268 dst.sin6_addr = ip6.ip6_dst; 1269 if (nat64_find_route6(&nh, &dst, m) != 0) { 1270 NAT64STAT_INC(&cfg->stats, noroute6); 1271 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1272 &cfg->stats, logdata); 1273 return (NAT64RETURN); 1274 } 1275 if (nh.nh_mtu < plen + sizeof(ip6) && 1276 (ip->ip_off & htons(IP_DF)) != 0) { 1277 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1278 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata); 1279 return (NAT64RETURN); 1280 } 1281 1282 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1283 ip6.ip6_vfc |= IPV6_VERSION; 1284 ip6.ip6_hlim = ip->ip_ttl; 1285 if (*V_nat64ipstealth == 0) 1286 ip6.ip6_hlim -= IPTTLDEC; 1287 ip6.ip6_plen = htons(plen); 1288 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1289 /* Convert checksums. */ 1290 switch (proto) { 1291 case IPPROTO_TCP: 1292 csum = &TCP(mtodo(m, hlen))->th_sum; 1293 if (lport != 0) { 1294 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1295 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1296 tcp->th_dport = lport; 1297 } 1298 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1299 break; 1300 case IPPROTO_UDP: 1301 csum = &UDP(mtodo(m, hlen))->uh_sum; 1302 if (lport != 0) { 1303 struct udphdr *udp = UDP(mtodo(m, hlen)); 1304 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1305 udp->uh_dport = lport; 1306 } 1307 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1308 break; 1309 case IPPROTO_ICMP: 1310 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg); 1311 if (m == NULL) /* stats already accounted */ 1312 return (NAT64RETURN); 1313 } 1314 1315 m_adj(m, hlen); 1316 mbufq_init(&mq, 255); 1317 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off); 1318 while ((m = mbufq_dequeue(&mq)) != NULL) { 1319 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1320 &cfg->stats, logdata) != 0) 1321 break; 1322 NAT64STAT_INC(&cfg->stats, opcnt46); 1323 } 1324 mbufq_drain(&mq); 1325 return (NAT64RETURN); 1326 } 1327 1328 int 1329 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1330 struct nat64_config *cfg, void *logdata) 1331 { 1332 struct ip ip; 1333 struct icmp6_hdr *icmp6; 1334 struct ip6_frag *ip6f; 1335 struct ip6_hdr *ip6, *ip6i; 1336 uint32_t mtu; 1337 int plen, proto; 1338 uint8_t type, code; 1339 1340 if (hlen == 0) { 1341 ip6 = mtod(m, struct ip6_hdr *); 1342 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1343 nat64_check_ip6(&ip6->ip6_dst) != 0) 1344 return (NAT64SKIP); 1345 1346 proto = nat64_getlasthdr(m, &hlen); 1347 if (proto != IPPROTO_ICMPV6) { 1348 DPRINTF(DP_DROPS, 1349 "dropped due to mbuf isn't contigious"); 1350 NAT64STAT_INC(&cfg->stats, dropped); 1351 return (NAT64MFREE); 1352 } 1353 } 1354 1355 /* 1356 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1357 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1358 */ 1359 icmp6 = mtodo(m, hlen); 1360 mtu = 0; 1361 switch (icmp6->icmp6_type) { 1362 case ICMP6_DST_UNREACH: 1363 type = ICMP_UNREACH; 1364 switch (icmp6->icmp6_code) { 1365 case ICMP6_DST_UNREACH_NOROUTE: 1366 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1367 case ICMP6_DST_UNREACH_ADDR: 1368 code = ICMP_UNREACH_HOST; 1369 break; 1370 case ICMP6_DST_UNREACH_ADMIN: 1371 code = ICMP_UNREACH_HOST_PROHIB; 1372 break; 1373 case ICMP6_DST_UNREACH_NOPORT: 1374 code = ICMP_UNREACH_PORT; 1375 break; 1376 default: 1377 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1378 " code %d", icmp6->icmp6_type, 1379 icmp6->icmp6_code); 1380 NAT64STAT_INC(&cfg->stats, dropped); 1381 return (NAT64MFREE); 1382 } 1383 break; 1384 case ICMP6_PACKET_TOO_BIG: 1385 type = ICMP_UNREACH; 1386 code = ICMP_UNREACH_NEEDFRAG; 1387 mtu = ntohl(icmp6->icmp6_mtu); 1388 if (mtu < IPV6_MMTU) { 1389 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1390 " code %d", mtu, icmp6->icmp6_type, 1391 icmp6->icmp6_code); 1392 NAT64STAT_INC(&cfg->stats, dropped); 1393 return (NAT64MFREE); 1394 } 1395 /* 1396 * Adjust MTU to reflect difference between 1397 * IPv6 an IPv4 headers. 1398 */ 1399 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1400 break; 1401 case ICMP6_TIME_EXCEEDED: 1402 type = ICMP_TIMXCEED; 1403 code = icmp6->icmp6_code; 1404 break; 1405 case ICMP6_PARAM_PROB: 1406 switch (icmp6->icmp6_code) { 1407 case ICMP6_PARAMPROB_HEADER: 1408 type = ICMP_PARAMPROB; 1409 code = ICMP_PARAMPROB_ERRATPTR; 1410 mtu = ntohl(icmp6->icmp6_pptr); 1411 switch (mtu) { 1412 case 0: /* Version/Traffic Class */ 1413 case 1: /* Traffic Class/Flow Label */ 1414 break; 1415 case 4: /* Payload Length */ 1416 case 5: 1417 mtu = 2; 1418 break; 1419 case 6: /* Next Header */ 1420 mtu = 9; 1421 break; 1422 case 7: /* Hop Limit */ 1423 mtu = 8; 1424 break; 1425 default: 1426 if (mtu >= 8 && mtu <= 23) { 1427 mtu = 12; /* Source address */ 1428 break; 1429 } 1430 if (mtu >= 24 && mtu <= 39) { 1431 mtu = 16; /* Destination address */ 1432 break; 1433 } 1434 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1435 " code %d, pptr %d", icmp6->icmp6_type, 1436 icmp6->icmp6_code, mtu); 1437 NAT64STAT_INC(&cfg->stats, dropped); 1438 return (NAT64MFREE); 1439 } 1440 case ICMP6_PARAMPROB_NEXTHEADER: 1441 type = ICMP_UNREACH; 1442 code = ICMP_UNREACH_PROTOCOL; 1443 break; 1444 default: 1445 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1446 " code %d, pptr %d", icmp6->icmp6_type, 1447 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1448 NAT64STAT_INC(&cfg->stats, dropped); 1449 return (NAT64MFREE); 1450 } 1451 break; 1452 default: 1453 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1454 icmp6->icmp6_type, icmp6->icmp6_code); 1455 NAT64STAT_INC(&cfg->stats, dropped); 1456 return (NAT64MFREE); 1457 } 1458 1459 hlen += sizeof(struct icmp6_hdr); 1460 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1461 NAT64STAT_INC(&cfg->stats, dropped); 1462 DPRINTF(DP_DROPS, "Message is too short %d", 1463 m->m_pkthdr.len); 1464 return (NAT64MFREE); 1465 } 1466 /* 1467 * We need at least ICMP_MINLEN bytes of original datagram payload 1468 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1469 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1470 * header we will not have to do m_pullup() again. 1471 * 1472 * What we have here: 1473 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1474 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1475 * We need to translate it to: 1476 * 1477 * Outer header: (alias_host, v4exthost) 1478 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1479 * 1480 * Assume caller function has checked if v4mapPRefix+v4host 1481 * matches configured prefix. 1482 * The only two things we should be provided with are mapping between 1483 * IPv6iHost <> alias_host and between dport and alias_port. 1484 */ 1485 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1486 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1487 if (m == NULL) { 1488 NAT64STAT_INC(&cfg->stats, nomem); 1489 return (NAT64RETURN); 1490 } 1491 ip6 = mtod(m, struct ip6_hdr *); 1492 ip6i = mtodo(m, hlen); 1493 ip6f = NULL; 1494 proto = ip6i->ip6_nxt; 1495 plen = ntohs(ip6i->ip6_plen); 1496 hlen += sizeof(struct ip6_hdr); 1497 if (proto == IPPROTO_FRAGMENT) { 1498 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1499 ICMP_MINLEN) 1500 goto fail; 1501 ip6f = mtodo(m, hlen); 1502 proto = ip6f->ip6f_nxt; 1503 plen -= sizeof(struct ip6_frag); 1504 hlen += sizeof(struct ip6_frag); 1505 /* Ajust MTU to reflect frag header size */ 1506 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1507 mtu -= sizeof(struct ip6_frag); 1508 } 1509 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1510 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1511 proto); 1512 goto fail; 1513 } 1514 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1515 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1516 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1517 goto fail; 1518 } 1519 /* Check if outer dst is the same as inner src */ 1520 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1521 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1522 goto fail; 1523 } 1524 1525 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1526 ip.ip_dst.s_addr = aaddr; 1527 ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen); 1528 if (ip.ip_src.s_addr == 0) 1529 goto fail; 1530 /* XXX: Make fake ulp header */ 1531 if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */ 1532 ip6i->ip6_hlim += IPV6_HLIMDEC; 1533 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1534 m_adj(m, hlen - sizeof(struct ip)); 1535 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1536 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats, 1537 logdata); 1538 return (NAT64RETURN); 1539 fail: 1540 /* 1541 * We must call m_freem() because mbuf pointer could be 1542 * changed with m_pullup(). 1543 */ 1544 m_freem(m); 1545 NAT64STAT_INC(&cfg->stats, dropped); 1546 return (NAT64RETURN); 1547 } 1548 1549 int 1550 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1551 struct nat64_config *cfg, void *logdata) 1552 { 1553 struct ip ip; 1554 struct nhop4_basic nh; 1555 struct sockaddr_in dst; 1556 struct ip6_frag *frag; 1557 struct ip6_hdr *ip6; 1558 struct icmp6_hdr *icmp6; 1559 uint16_t *csum; 1560 int plen, hlen, proto; 1561 1562 /* 1563 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1564 * protocol's headers. Also we skip some checks, that ip6_input(), 1565 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1566 */ 1567 ip6 = mtod(m, struct ip6_hdr *); 1568 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1569 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1570 return (NAT64SKIP); 1571 } 1572 1573 /* Starting from this point we must not return zero */ 1574 ip.ip_src.s_addr = aaddr; 1575 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1576 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x", 1577 ip.ip_src.s_addr); 1578 NAT64STAT_INC(&cfg->stats, dropped); 1579 return (NAT64MFREE); 1580 } 1581 1582 ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen); 1583 if (ip.ip_dst.s_addr == 0) { 1584 NAT64STAT_INC(&cfg->stats, dropped); 1585 return (NAT64MFREE); 1586 } 1587 1588 if (*V_nat64ip6stealth == 0 && ip6->ip6_hlim <= IPV6_HLIMDEC) { 1589 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1590 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata); 1591 return (NAT64RETURN); 1592 } 1593 1594 hlen = 0; 1595 plen = ntohs(ip6->ip6_plen); 1596 proto = nat64_getlasthdr(m, &hlen); 1597 if (proto < 0) { 1598 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1599 NAT64STAT_INC(&cfg->stats, dropped); 1600 return (NAT64MFREE); 1601 } 1602 frag = NULL; 1603 if (proto == IPPROTO_FRAGMENT) { 1604 /* ipfw_chk should m_pullup up to frag header */ 1605 if (m->m_len < hlen + sizeof(*frag)) { 1606 DPRINTF(DP_DROPS, 1607 "dropped due to mbuf isn't contigious"); 1608 NAT64STAT_INC(&cfg->stats, dropped); 1609 return (NAT64MFREE); 1610 } 1611 frag = mtodo(m, hlen); 1612 proto = frag->ip6f_nxt; 1613 hlen += sizeof(*frag); 1614 /* Fragmented ICMPv6 is unsupported */ 1615 if (proto == IPPROTO_ICMPV6) { 1616 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1617 NAT64STAT_INC(&cfg->stats, dropped); 1618 return (NAT64MFREE); 1619 } 1620 /* Fragment length must be multiple of 8 octets */ 1621 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1622 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1623 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1624 ICMP6_PARAMPROB_HEADER, 1625 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats, 1626 logdata); 1627 return (NAT64RETURN); 1628 } 1629 } 1630 plen -= hlen - sizeof(struct ip6_hdr); 1631 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1632 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1633 plen, m->m_pkthdr.len, hlen); 1634 NAT64STAT_INC(&cfg->stats, dropped); 1635 return (NAT64MFREE); 1636 } 1637 1638 icmp6 = NULL; /* Make gcc happy */ 1639 if (proto == IPPROTO_ICMPV6) { 1640 icmp6 = mtodo(m, hlen); 1641 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1642 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1643 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1644 cfg, logdata)); 1645 } 1646 dst.sin_addr.s_addr = ip.ip_dst.s_addr; 1647 if (nat64_find_route4(&nh, &dst, m) != 0) { 1648 NAT64STAT_INC(&cfg->stats, noroute4); 1649 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1650 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata); 1651 return (NAT64RETURN); 1652 } 1653 if (nh.nh_mtu < plen + sizeof(ip)) { 1654 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu, 1655 &cfg->stats, logdata); 1656 return (NAT64RETURN); 1657 } 1658 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1659 /* Convert checksums. */ 1660 switch (proto) { 1661 case IPPROTO_TCP: 1662 csum = &TCP(mtodo(m, hlen))->th_sum; 1663 if (aport != 0) { 1664 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1665 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1666 tcp->th_sport = aport; 1667 } 1668 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1669 break; 1670 case IPPROTO_UDP: 1671 csum = &UDP(mtodo(m, hlen))->uh_sum; 1672 if (aport != 0) { 1673 struct udphdr *udp = UDP(mtodo(m, hlen)); 1674 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1675 udp->uh_sport = aport; 1676 } 1677 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1678 break; 1679 case IPPROTO_ICMPV6: 1680 /* Checksum in ICMPv6 covers pseudo header */ 1681 csum = &icmp6->icmp6_cksum; 1682 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1683 IPPROTO_ICMPV6, 0)); 1684 /* Convert ICMPv6 types to ICMP */ 1685 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1686 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1687 icmp6->icmp6_type = ICMP_ECHO; 1688 else /* ICMP6_ECHO_REPLY */ 1689 icmp6->icmp6_type = ICMP_ECHOREPLY; 1690 *csum = cksum_adjust(*csum, (uint16_t)proto, 1691 *(uint16_t *)icmp6); 1692 if (aport != 0) { 1693 uint16_t old_id = icmp6->icmp6_id; 1694 icmp6->icmp6_id = aport; 1695 *csum = cksum_adjust(*csum, old_id, aport); 1696 } 1697 break; 1698 }; 1699 1700 m_adj(m, hlen - sizeof(ip)); 1701 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1702 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1703 &cfg->stats, logdata) == 0) 1704 NAT64STAT_INC(&cfg->stats, opcnt64); 1705 return (NAT64RETURN); 1706 } 1707 1708