1 /*- 2 * Copyright (c) 2015-2018 Yandex LLC 3 * Copyright (c) 2015-2018 Andrey V. Elsukov <ae@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_ipfw.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/counter.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/rmlock.h> 42 #include <sys/rwlock.h> 43 #include <sys/socket.h> 44 #include <sys/queue.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_pflog.h> 49 #include <net/pfil.h> 50 #include <net/netisr.h> 51 #include <net/route.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_fib.h> 55 #include <netinet/ip.h> 56 #include <netinet/ip_var.h> 57 #include <netinet/ip_fw.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/udp.h> 63 #include <netinet6/in6_var.h> 64 #include <netinet6/in6_fib.h> 65 #include <netinet6/ip6_var.h> 66 67 #include <netpfil/pf/pf.h> 68 #include <netpfil/ipfw/ip_fw_private.h> 69 #include <machine/in_cksum.h> 70 71 #include "ip_fw_nat64.h" 72 #include "nat64_translate.h" 73 74 static void 75 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 76 { 77 78 logdata->dir = PF_OUT; 79 logdata->af = family; 80 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 81 } 82 83 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 84 static NAT64NOINLINE int nat64_find_route4(struct nhop4_basic *, 85 struct sockaddr_in *, struct mbuf *); 86 static NAT64NOINLINE int nat64_find_route6(struct nhop6_basic *, 87 struct sockaddr_in6 *, struct mbuf *); 88 89 static NAT64NOINLINE int 90 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 91 struct nat64_counters *stats, void *logdata) 92 { 93 int error; 94 95 if (logdata != NULL) 96 nat64_log(logdata, m, dst->sa_family); 97 error = (*ifp->if_output)(ifp, m, dst, NULL); 98 if (error != 0) 99 NAT64STAT_INC(stats, oerrors); 100 return (error); 101 } 102 103 static NAT64NOINLINE int 104 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata) 105 { 106 struct nhop6_basic nh6; 107 struct nhop4_basic nh4; 108 struct sockaddr_in6 dst6; 109 struct sockaddr_in dst4; 110 struct sockaddr *dst; 111 struct ip6_hdr *ip6; 112 struct ip *ip4; 113 struct ifnet *ifp; 114 int error; 115 116 ip4 = mtod(m, struct ip *); 117 switch (ip4->ip_v) { 118 case IPVERSION: 119 dst4.sin_addr = ip4->ip_dst; 120 error = nat64_find_route4(&nh4, &dst4, m); 121 if (error != 0) 122 NAT64STAT_INC(stats, noroute4); 123 else { 124 ifp = nh4.nh_ifp; 125 dst = (struct sockaddr *)&dst4; 126 } 127 break; 128 case (IPV6_VERSION >> 4): 129 ip6 = mtod(m, struct ip6_hdr *); 130 dst6.sin6_addr = ip6->ip6_dst; 131 error = nat64_find_route6(&nh6, &dst6, m); 132 if (error != 0) 133 NAT64STAT_INC(stats, noroute6); 134 else { 135 ifp = nh6.nh_ifp; 136 dst = (struct sockaddr *)&dst6; 137 } 138 break; 139 default: 140 m_freem(m); 141 NAT64STAT_INC(stats, dropped); 142 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 143 return (EAFNOSUPPORT); 144 } 145 if (error != 0) { 146 m_freem(m); 147 return (EHOSTUNREACH); 148 } 149 if (logdata != NULL) 150 nat64_log(logdata, m, dst->sa_family); 151 error = (*ifp->if_output)(ifp, m, dst, NULL); 152 if (error != 0) 153 NAT64STAT_INC(stats, oerrors); 154 return (error); 155 } 156 #else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ 157 static NAT64NOINLINE int 158 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 159 struct nat64_counters *stats, void *logdata) 160 { 161 struct ip *ip4; 162 int ret, af; 163 164 ip4 = mtod(m, struct ip *); 165 switch (ip4->ip_v) { 166 case IPVERSION: 167 af = AF_INET; 168 ret = NETISR_IP; 169 break; 170 case (IPV6_VERSION >> 4): 171 af = AF_INET6; 172 ret = NETISR_IPV6; 173 break; 174 default: 175 m_freem(m); 176 NAT64STAT_INC(stats, dropped); 177 DPRINTF(DP_DROPS, "unknown IP version"); 178 return (EAFNOSUPPORT); 179 } 180 if (logdata != NULL) 181 nat64_log(logdata, m, af); 182 ret = netisr_queue(ret, m); 183 if (ret != 0) 184 NAT64STAT_INC(stats, oerrors); 185 return (ret); 186 } 187 188 static NAT64NOINLINE int 189 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata) 190 { 191 192 return (nat64_output(NULL, m, NULL, stats, logdata)); 193 } 194 #endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ 195 196 /* 197 * Check the given IPv6 prefix and length according to RFC6052: 198 * The prefixes can only have one of the following lengths: 199 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long). 200 * Returns zero on success, otherwise EINVAL. 201 */ 202 int 203 nat64_check_prefix6(const struct in6_addr *prefix, int length) 204 { 205 206 switch (length) { 207 case 32: 208 case 40: 209 case 48: 210 case 56: 211 case 64: 212 /* Well-known prefix has 96 prefix length */ 213 if (IN6_IS_ADDR_WKPFX(prefix)) 214 return (EINVAL); 215 /* FALLTHROUGH */ 216 case 96: 217 /* Bits 64 to 71 must be set to zero */ 218 if (prefix->__u6_addr.__u6_addr8[8] != 0) 219 return (EINVAL); 220 /* Some extra checks */ 221 if (IN6_IS_ADDR_MULTICAST(prefix) || 222 IN6_IS_ADDR_UNSPECIFIED(prefix) || 223 IN6_IS_ADDR_LOOPBACK(prefix)) 224 return (EINVAL); 225 return (0); 226 } 227 return (EINVAL); 228 } 229 230 int 231 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia) 232 { 233 234 if (V_nat64_allow_private) 235 return (0); 236 237 /* WKPFX must not be used to represent non-global IPv4 addresses */ 238 if (cfg->flags & NAT64_WKPFX) { 239 /* IN_PRIVATE */ 240 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || 241 (ia & htonl(0xfff00000)) == htonl(0xac100000) || 242 (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) 243 return (1); 244 /* 245 * RFC 5735: 246 * 192.0.0.0/24 - reserved for IETF protocol assignments 247 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses 248 * 198.18.0.0/15 - for use in benchmark tests 249 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use 250 * in documentation and example code 251 */ 252 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || 253 (ia & htonl(0xffffff00)) == htonl(0xc0586300) || 254 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || 255 (ia & htonl(0xffffff00)) == htonl(0xc0000200) || 256 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || 257 (ia & htonl(0xffffff00)) == htonl(0xcb007100)) 258 return (1); 259 } 260 return (0); 261 } 262 263 void 264 nat64_embed_ip4(const struct nat64_config *cfg, in_addr_t ia, 265 struct in6_addr *ip6) 266 { 267 268 /* assume the prefix6 is properly filled with zeros */ 269 bcopy(&cfg->prefix6, ip6, sizeof(*ip6)); 270 switch (cfg->plen6) { 271 case 32: 272 case 96: 273 ip6->s6_addr32[cfg->plen6 / 32] = ia; 274 break; 275 case 40: 276 case 48: 277 case 56: 278 #if BYTE_ORDER == BIG_ENDIAN 279 ip6->s6_addr32[1] = cfg->prefix6.s6_addr32[1] | 280 (ia >> (cfg->plen6 % 32)); 281 ip6->s6_addr32[2] = ia << (24 - cfg->plen6 % 32); 282 #elif BYTE_ORDER == LITTLE_ENDIAN 283 ip6->s6_addr32[1] = cfg->prefix6.s6_addr32[1] | 284 (ia << (cfg->plen6 % 32)); 285 ip6->s6_addr32[2] = ia >> (24 - cfg->plen6 % 32); 286 #endif 287 break; 288 case 64: 289 #if BYTE_ORDER == BIG_ENDIAN 290 ip6->s6_addr32[2] = ia >> 8; 291 ip6->s6_addr32[3] = ia << 24; 292 #elif BYTE_ORDER == LITTLE_ENDIAN 293 ip6->s6_addr32[2] = ia << 8; 294 ip6->s6_addr32[3] = ia >> 24; 295 #endif 296 break; 297 default: 298 panic("Wrong plen6"); 299 }; 300 ip6->s6_addr8[8] = 0; 301 } 302 303 in_addr_t 304 nat64_extract_ip4(const struct nat64_config *cfg, const struct in6_addr *ip6) 305 { 306 in_addr_t ia; 307 308 /* 309 * According to RFC 6052 p2.2: 310 * IPv4-embedded IPv6 addresses are composed of a variable-length 311 * prefix, the embedded IPv4 address, and a variable length suffix. 312 * The suffix bits are reserved for future extensions and SHOULD 313 * be set to zero. 314 */ 315 switch (cfg->plen6) { 316 case 32: 317 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 318 goto badip6; 319 break; 320 case 40: 321 if (ip6->s6_addr32[3] != 0 || 322 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 323 goto badip6; 324 break; 325 case 48: 326 if (ip6->s6_addr32[3] != 0 || 327 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 328 goto badip6; 329 break; 330 case 56: 331 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 332 goto badip6; 333 break; 334 case 64: 335 if (ip6->s6_addr8[8] != 0 || 336 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 337 goto badip6; 338 }; 339 switch (cfg->plen6) { 340 case 32: 341 case 96: 342 ia = ip6->s6_addr32[cfg->plen6 / 32]; 343 break; 344 case 40: 345 case 48: 346 case 56: 347 #if BYTE_ORDER == BIG_ENDIAN 348 ia = (ip6->s6_addr32[1] << (cfg->plen6 % 32)) | 349 (ip6->s6_addr32[2] >> (24 - cfg->plen6 % 32)); 350 #elif BYTE_ORDER == LITTLE_ENDIAN 351 ia = (ip6->s6_addr32[1] >> (cfg->plen6 % 32)) | 352 (ip6->s6_addr32[2] << (24 - cfg->plen6 % 32)); 353 #endif 354 break; 355 case 64: 356 #if BYTE_ORDER == BIG_ENDIAN 357 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 358 #elif BYTE_ORDER == LITTLE_ENDIAN 359 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 360 #endif 361 break; 362 default: 363 return (0); 364 }; 365 if (nat64_check_ip4(ia) != 0 || 366 nat64_check_private_ip4(cfg, ia) != 0) 367 goto badip4; 368 369 return (ia); 370 badip4: 371 DPRINTF(DP_GENERIC | DP_DROPS, 372 "invalid destination address: %08x", ia); 373 return (0); 374 badip6: 375 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address"); 376 return (0); 377 } 378 379 /* 380 * According to RFC 1624 the equation for incremental checksum update is: 381 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 382 * HC' = HC - ~m - m' -- [Eqn. 4] 383 * So, when we are replacing IPv4 addresses to IPv6, we 384 * can assume, that new bytes previously were zeros, and vise versa - 385 * when we replacing IPv6 addresses to IPv4, now unused bytes become 386 * zeros. The payload length in pseudo header has bigger size, but one 387 * half of it should be zero. Using the equation 4 we get: 388 * HC' = HC - (~m0 + m0') -- m0 is first changed word 389 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 390 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 391 * = HC - sum(~m[i] + m'[i]) 392 * 393 * The function result should be used as follows: 394 * IPv6 to IPv4: HC' = cksum_add(HC, result) 395 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 396 */ 397 static NAT64NOINLINE uint16_t 398 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 399 { 400 uint32_t sum; 401 uint16_t *p; 402 403 sum = ~ip->ip_src.s_addr >> 16; 404 sum += ~ip->ip_src.s_addr & 0xffff; 405 sum += ~ip->ip_dst.s_addr >> 16; 406 sum += ~ip->ip_dst.s_addr & 0xffff; 407 408 for (p = (uint16_t *)&ip6->ip6_src; 409 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 410 sum += *p; 411 412 while (sum >> 16) 413 sum = (sum & 0xffff) + (sum >> 16); 414 return (sum); 415 } 416 417 static NAT64NOINLINE void 418 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 419 uint16_t plen, uint8_t proto, struct ip *ip) 420 { 421 422 /* assume addresses are already initialized */ 423 ip->ip_v = IPVERSION; 424 ip->ip_hl = sizeof(*ip) >> 2; 425 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 426 ip->ip_len = htons(sizeof(*ip) + plen); 427 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 428 ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC; 429 #else 430 /* Forwarding code will decrement TTL. */ 431 ip->ip_ttl = ip6->ip6_hlim; 432 #endif 433 ip->ip_sum = 0; 434 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 435 ip_fillid(ip); 436 if (frag != NULL) { 437 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 438 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 439 ip->ip_off |= htons(IP_MF); 440 } else { 441 ip->ip_off = htons(IP_DF); 442 } 443 ip->ip_sum = in_cksum_hdr(ip); 444 } 445 446 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 447 static NAT64NOINLINE int 448 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6, 449 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id, 450 uint16_t ip_off) 451 { 452 struct ip6_frag ip6f; 453 struct mbuf *n; 454 uint16_t hlen, len, offset; 455 int plen; 456 457 plen = ntohs(ip6->ip6_plen); 458 hlen = sizeof(struct ip6_hdr); 459 460 /* Fragmentation isn't needed */ 461 if (ip_off == 0 && plen <= mtu - hlen) { 462 M_PREPEND(m, hlen, M_NOWAIT); 463 if (m == NULL) { 464 NAT64STAT_INC(stats, nomem); 465 return (ENOMEM); 466 } 467 bcopy(ip6, mtod(m, void *), hlen); 468 if (mbufq_enqueue(mq, m) != 0) { 469 m_freem(m); 470 NAT64STAT_INC(stats, dropped); 471 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 472 return (ENOBUFS); 473 } 474 return (0); 475 } 476 477 hlen += sizeof(struct ip6_frag); 478 ip6f.ip6f_reserved = 0; 479 ip6f.ip6f_nxt = ip6->ip6_nxt; 480 ip6->ip6_nxt = IPPROTO_FRAGMENT; 481 if (ip_off != 0) { 482 /* 483 * We have got an IPv4 fragment. 484 * Use offset value and ip_id from original fragment. 485 */ 486 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 487 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 488 NAT64STAT_INC(stats, ifrags); 489 } else { 490 /* The packet size exceeds interface MTU */ 491 ip6f.ip6f_ident = htonl(ip6_randomid()); 492 offset = 0; /* First fragment*/ 493 } 494 while (plen > 0 && m != NULL) { 495 n = NULL; 496 len = FRAGSZ(mtu) & ~7; 497 if (len > plen) 498 len = plen; 499 ip6->ip6_plen = htons(len + sizeof(ip6f)); 500 ip6f.ip6f_offlg = ntohs(offset); 501 if (len < plen || (ip_off & htons(IP_MF)) != 0) 502 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 503 offset += len; 504 plen -= len; 505 if (plen > 0) { 506 n = m_split(m, len, M_NOWAIT); 507 if (n == NULL) 508 goto fail; 509 } 510 M_PREPEND(m, hlen, M_NOWAIT); 511 if (m == NULL) 512 goto fail; 513 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 514 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 515 sizeof(struct ip6_frag)); 516 if (mbufq_enqueue(mq, m) != 0) 517 goto fail; 518 m = n; 519 } 520 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 521 return (0); 522 fail: 523 if (m != NULL) 524 m_freem(m); 525 if (n != NULL) 526 m_freem(n); 527 mbufq_drain(mq); 528 NAT64STAT_INC(stats, nomem); 529 return (ENOMEM); 530 } 531 532 static NAT64NOINLINE int 533 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst, 534 struct mbuf *m) 535 { 536 537 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0, 538 pnh) != 0) 539 return (EHOSTUNREACH); 540 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT)) 541 return (EHOSTUNREACH); 542 /* 543 * XXX: we need to use destination address with embedded scope 544 * zone id, because LLTABLE uses such form of addresses for lookup. 545 */ 546 dst->sin6_family = AF_INET6; 547 dst->sin6_len = sizeof(*dst); 548 dst->sin6_addr = pnh->nh_addr; 549 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 550 dst->sin6_addr.s6_addr16[1] = 551 htons(pnh->nh_ifp->if_index & 0xffff); 552 dst->sin6_port = 0; 553 dst->sin6_scope_id = 0; 554 dst->sin6_flowinfo = 0; 555 556 return (0); 557 } 558 559 #define NAT64_ICMP6_PLEN 64 560 static NAT64NOINLINE void 561 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 562 struct nat64_counters *stats, void *logdata) 563 { 564 struct icmp6_hdr *icmp6; 565 struct ip6_hdr *ip6, *oip6; 566 struct mbuf *n; 567 int len, plen; 568 569 len = 0; 570 plen = nat64_getlasthdr(m, &len); 571 if (plen < 0) { 572 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 573 goto freeit; 574 } 575 /* 576 * Do not send ICMPv6 in reply to ICMPv6 errors. 577 */ 578 if (plen == IPPROTO_ICMPV6) { 579 if (m->m_len < len + sizeof(*icmp6)) { 580 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 581 goto freeit; 582 } 583 icmp6 = mtodo(m, len); 584 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 585 icmp6->icmp6_type == ND_REDIRECT) { 586 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 587 "ICMPv6 errors"); 588 goto freeit; 589 } 590 } 591 /* 592 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 593 goto freeit; 594 */ 595 ip6 = mtod(m, struct ip6_hdr *); 596 switch (type) { 597 case ICMP6_DST_UNREACH: 598 case ICMP6_PACKET_TOO_BIG: 599 case ICMP6_TIME_EXCEEDED: 600 case ICMP6_PARAM_PROB: 601 break; 602 default: 603 goto freeit; 604 } 605 /* Calculate length of ICMPv6 payload */ 606 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 607 m->m_pkthdr.len; 608 609 /* Create new ICMPv6 datagram */ 610 plen = len + sizeof(struct icmp6_hdr); 611 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 612 MT_HEADER, M_PKTHDR); 613 if (n == NULL) { 614 NAT64STAT_INC(stats, nomem); 615 m_freem(m); 616 return; 617 } 618 /* 619 * Move pkthdr from original mbuf. We should have initialized some 620 * fields, because we can reinject this mbuf to netisr and it will 621 * go trough input path (it requires at least rcvif should be set). 622 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 623 * in the chain, when we will do M_PREPEND() or make some type of 624 * tunneling. 625 */ 626 m_move_pkthdr(n, m); 627 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 628 629 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 630 oip6 = mtod(n, struct ip6_hdr *); 631 oip6->ip6_src = ip6->ip6_dst; 632 oip6->ip6_dst = ip6->ip6_src; 633 oip6->ip6_nxt = IPPROTO_ICMPV6; 634 oip6->ip6_flow = 0; 635 oip6->ip6_vfc |= IPV6_VERSION; 636 oip6->ip6_hlim = V_ip6_defhlim; 637 oip6->ip6_plen = htons(plen); 638 639 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 640 icmp6->icmp6_cksum = 0; 641 icmp6->icmp6_type = type; 642 icmp6->icmp6_code = code; 643 icmp6->icmp6_mtu = htonl(mtu); 644 645 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 646 sizeof(struct icmp6_hdr))); 647 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 648 sizeof(struct ip6_hdr), plen); 649 m_freem(m); 650 nat64_output_one(n, stats, logdata); 651 return; 652 freeit: 653 NAT64STAT_INC(stats, dropped); 654 m_freem(m); 655 } 656 657 static NAT64NOINLINE int 658 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst, 659 struct mbuf *m) 660 { 661 662 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0) 663 return (EHOSTUNREACH); 664 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT)) 665 return (EHOSTUNREACH); 666 667 dst->sin_family = AF_INET; 668 dst->sin_len = sizeof(*dst); 669 dst->sin_addr = pnh->nh_addr; 670 dst->sin_port = 0; 671 return (0); 672 } 673 674 #define NAT64_ICMP_PLEN 64 675 static NAT64NOINLINE void 676 nat64_icmp_reflect(struct mbuf *m, uint8_t type, 677 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata) 678 { 679 struct icmp *icmp; 680 struct ip *ip, *oip; 681 struct mbuf *n; 682 int len, plen; 683 684 ip = mtod(m, struct ip *); 685 /* Do not send ICMP error if packet is not the first fragment */ 686 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 687 DPRINTF(DP_DROPS, "not first fragment"); 688 goto freeit; 689 } 690 /* Do not send ICMP in reply to ICMP errors */ 691 if (ip->ip_p == IPPROTO_ICMP) { 692 if (m->m_len < (ip->ip_hl << 2)) { 693 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 694 goto freeit; 695 } 696 icmp = mtodo(m, ip->ip_hl << 2); 697 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 698 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 699 "ICMP errors"); 700 goto freeit; 701 } 702 } 703 switch (type) { 704 case ICMP_UNREACH: 705 case ICMP_TIMXCEED: 706 case ICMP_PARAMPROB: 707 break; 708 default: 709 goto freeit; 710 } 711 /* Calculate length of ICMP payload */ 712 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 713 m->m_pkthdr.len; 714 715 /* Create new ICMPv4 datagram */ 716 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 717 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 718 MT_HEADER, M_PKTHDR); 719 if (n == NULL) { 720 NAT64STAT_INC(stats, nomem); 721 m_freem(m); 722 return; 723 } 724 m_move_pkthdr(n, m); 725 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 726 727 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 728 oip = mtod(n, struct ip *); 729 oip->ip_v = IPVERSION; 730 oip->ip_hl = sizeof(struct ip) >> 2; 731 oip->ip_tos = 0; 732 oip->ip_len = htons(n->m_pkthdr.len); 733 oip->ip_ttl = V_ip_defttl; 734 oip->ip_p = IPPROTO_ICMP; 735 ip_fillid(oip); 736 oip->ip_off = htons(IP_DF); 737 oip->ip_src = ip->ip_dst; 738 oip->ip_dst = ip->ip_src; 739 oip->ip_sum = 0; 740 oip->ip_sum = in_cksum_hdr(oip); 741 742 icmp = mtodo(n, sizeof(struct ip)); 743 icmp->icmp_type = type; 744 icmp->icmp_code = code; 745 icmp->icmp_cksum = 0; 746 icmp->icmp_pmvoid = 0; 747 icmp->icmp_nextmtu = htons(mtu); 748 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 749 sizeof(struct icmphdr) + sizeof(uint32_t))); 750 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 751 sizeof(struct ip)); 752 m_freem(m); 753 nat64_output_one(n, stats, logdata); 754 return; 755 freeit: 756 NAT64STAT_INC(stats, dropped); 757 m_freem(m); 758 } 759 760 /* Translate ICMP echo request/reply into ICMPv6 */ 761 static void 762 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 763 uint16_t id, uint8_t type) 764 { 765 uint16_t old; 766 767 old = *(uint16_t *)icmp6; /* save type+code in one word */ 768 icmp6->icmp6_type = type; 769 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 770 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 771 old, *(uint16_t *)icmp6); 772 if (id != 0) { 773 old = icmp6->icmp6_id; 774 icmp6->icmp6_id = id; 775 /* Reflect ICMP id translation in the cksum */ 776 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 777 old, id); 778 } 779 /* Reflect IPv6 pseudo header in the cksum */ 780 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 781 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 782 } 783 784 static NAT64NOINLINE struct mbuf * 785 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 786 int offset, struct nat64_config *cfg) 787 { 788 struct ip ip; 789 struct icmp *icmp; 790 struct tcphdr *tcp; 791 struct udphdr *udp; 792 struct ip6_hdr *eip6; 793 struct mbuf *n; 794 uint32_t mtu; 795 int len, hlen, plen; 796 uint8_t type, code; 797 798 if (m->m_len < offset + ICMP_MINLEN) 799 m = m_pullup(m, offset + ICMP_MINLEN); 800 if (m == NULL) { 801 NAT64STAT_INC(&cfg->stats, nomem); 802 return (m); 803 } 804 mtu = 0; 805 icmp = mtodo(m, offset); 806 /* RFC 7915 p4.2 */ 807 switch (icmp->icmp_type) { 808 case ICMP_ECHOREPLY: 809 type = ICMP6_ECHO_REPLY; 810 code = 0; 811 break; 812 case ICMP_UNREACH: 813 type = ICMP6_DST_UNREACH; 814 switch (icmp->icmp_code) { 815 case ICMP_UNREACH_NET: 816 case ICMP_UNREACH_HOST: 817 case ICMP_UNREACH_SRCFAIL: 818 case ICMP_UNREACH_NET_UNKNOWN: 819 case ICMP_UNREACH_HOST_UNKNOWN: 820 case ICMP_UNREACH_TOSNET: 821 case ICMP_UNREACH_TOSHOST: 822 code = ICMP6_DST_UNREACH_NOROUTE; 823 break; 824 case ICMP_UNREACH_PROTOCOL: 825 type = ICMP6_PARAM_PROB; 826 code = ICMP6_PARAMPROB_NEXTHEADER; 827 break; 828 case ICMP_UNREACH_PORT: 829 code = ICMP6_DST_UNREACH_NOPORT; 830 break; 831 case ICMP_UNREACH_NEEDFRAG: 832 type = ICMP6_PACKET_TOO_BIG; 833 code = 0; 834 /* XXX: needs an additional look */ 835 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 836 break; 837 case ICMP_UNREACH_NET_PROHIB: 838 case ICMP_UNREACH_HOST_PROHIB: 839 case ICMP_UNREACH_FILTER_PROHIB: 840 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 841 code = ICMP6_DST_UNREACH_ADMIN; 842 break; 843 default: 844 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 845 icmp->icmp_type, icmp->icmp_code); 846 goto freeit; 847 } 848 break; 849 case ICMP_TIMXCEED: 850 type = ICMP6_TIME_EXCEEDED; 851 code = icmp->icmp_code; 852 break; 853 case ICMP_ECHO: 854 type = ICMP6_ECHO_REQUEST; 855 code = 0; 856 break; 857 case ICMP_PARAMPROB: 858 type = ICMP6_PARAM_PROB; 859 switch (icmp->icmp_code) { 860 case ICMP_PARAMPROB_ERRATPTR: 861 case ICMP_PARAMPROB_LENGTH: 862 code = ICMP6_PARAMPROB_HEADER; 863 switch (icmp->icmp_pptr) { 864 case 0: /* Version/IHL */ 865 case 1: /* Type Of Service */ 866 mtu = icmp->icmp_pptr; 867 break; 868 case 2: /* Total Length */ 869 case 3: mtu = 4; /* Payload Length */ 870 break; 871 case 8: /* Time to Live */ 872 mtu = 7; /* Hop Limit */ 873 break; 874 case 9: /* Protocol */ 875 mtu = 6; /* Next Header */ 876 break; 877 case 12: /* Source address */ 878 case 13: 879 case 14: 880 case 15: 881 mtu = 8; 882 break; 883 case 16: /* Destination address */ 884 case 17: 885 case 18: 886 case 19: 887 mtu = 24; 888 break; 889 default: /* Silently drop */ 890 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 891 " code %d, pptr %d", icmp->icmp_type, 892 icmp->icmp_code, icmp->icmp_pptr); 893 goto freeit; 894 } 895 break; 896 default: 897 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 898 " code %d, pptr %d", icmp->icmp_type, 899 icmp->icmp_code, icmp->icmp_pptr); 900 goto freeit; 901 } 902 break; 903 default: 904 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 905 icmp->icmp_type, icmp->icmp_code); 906 goto freeit; 907 } 908 /* 909 * For echo request/reply we can use original payload, 910 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 911 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 912 */ 913 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 914 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 915 return (m); 916 } 917 /* 918 * For other types of ICMP messages we need to translate inner 919 * IPv4 header to IPv6 header. 920 * Assume ICMP src is the same as payload dst 921 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 922 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 923 * In that case, we already have map for NATIP1 and GWsrc1. 924 * The only thing we need is to copy IPv6 map prefix to 925 * Hostdst1. 926 */ 927 hlen = offset + ICMP_MINLEN; 928 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 929 DPRINTF(DP_DROPS, "Message is too short %d", 930 m->m_pkthdr.len); 931 goto freeit; 932 } 933 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 934 if (ip.ip_v != IPVERSION) { 935 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 936 goto freeit; 937 } 938 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 939 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 940 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 941 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 || 942 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) { 943 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 944 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 945 goto freeit; 946 } 947 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 948 DPRINTF(DP_DROPS, "Message is too short %d", 949 m->m_pkthdr.len); 950 goto freeit; 951 } 952 #if 0 953 /* 954 * Check that inner source matches the outer destination. 955 * XXX: We need some method to convert IPv4 into IPv6 address here, 956 * and compare IPv6 addresses. 957 */ 958 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 959 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 960 "%04x vs %04x", ip.ip_src.s_addr, 961 nat64_get_ip4(&ip6->ip6_dst)); 962 goto freeit; 963 } 964 #endif 965 /* 966 * Create new mbuf for ICMPv6 datagram. 967 * NOTE: len is data length just after inner IP header. 968 */ 969 len = m->m_pkthdr.len - hlen; 970 if (sizeof(struct ip6_hdr) + 971 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 972 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 973 sizeof(struct ip6_hdr); 974 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 975 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 976 if (n == NULL) { 977 NAT64STAT_INC(&cfg->stats, nomem); 978 m_freem(m); 979 return (NULL); 980 } 981 m_move_pkthdr(n, m); 982 M_ALIGN(n, offset + plen + max_hdr); 983 n->m_len = n->m_pkthdr.len = offset + plen; 984 /* Adjust ip6_plen in outer header */ 985 ip6->ip6_plen = htons(plen); 986 /* Construct new inner IPv6 header */ 987 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 988 eip6->ip6_src = ip6->ip6_dst; 989 /* Use the fact that we have single /96 prefix for IPv4 map */ 990 eip6->ip6_dst = ip6->ip6_src; 991 nat64_embed_ip4(cfg, ip.ip_dst.s_addr, &eip6->ip6_dst); 992 993 eip6->ip6_flow = htonl(ip.ip_tos << 20); 994 eip6->ip6_vfc |= IPV6_VERSION; 995 eip6->ip6_hlim = ip.ip_ttl; 996 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 997 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 998 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 999 /* 1000 * We need to translate source port in the inner ULP header, 1001 * and adjust ULP checksum. 1002 */ 1003 switch (ip.ip_p) { 1004 case IPPROTO_TCP: 1005 if (len < offsetof(struct tcphdr, th_sum)) 1006 break; 1007 tcp = TCP(eip6 + 1); 1008 if (icmpid != 0) { 1009 tcp->th_sum = cksum_adjust(tcp->th_sum, 1010 tcp->th_sport, icmpid); 1011 tcp->th_sport = icmpid; 1012 } 1013 tcp->th_sum = cksum_add(tcp->th_sum, 1014 ~nat64_cksum_convert(eip6, &ip)); 1015 break; 1016 case IPPROTO_UDP: 1017 if (len < offsetof(struct udphdr, uh_sum)) 1018 break; 1019 udp = UDP(eip6 + 1); 1020 if (icmpid != 0) { 1021 udp->uh_sum = cksum_adjust(udp->uh_sum, 1022 udp->uh_sport, icmpid); 1023 udp->uh_sport = icmpid; 1024 } 1025 udp->uh_sum = cksum_add(udp->uh_sum, 1026 ~nat64_cksum_convert(eip6, &ip)); 1027 break; 1028 case IPPROTO_ICMP: 1029 /* 1030 * Check if this is an ICMP error message for echo request 1031 * that we sent. I.e. ULP in the data containing invoking 1032 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 1033 */ 1034 icmp = (struct icmp *)(eip6 + 1); 1035 if (icmp->icmp_type != ICMP_ECHO) { 1036 m_freem(n); 1037 goto freeit; 1038 } 1039 /* 1040 * For our client this original datagram should looks 1041 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 1042 * Thus we need adjust icmp_cksum and convert type from 1043 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 1044 */ 1045 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 1046 ICMP6_ECHO_REQUEST); 1047 } 1048 m_freem(m); 1049 /* Convert ICMPv4 into ICMPv6 header */ 1050 icmp = mtodo(n, offset); 1051 ICMP6(icmp)->icmp6_type = type; 1052 ICMP6(icmp)->icmp6_code = code; 1053 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1054 ICMP6(icmp)->icmp6_cksum = 0; 1055 ICMP6(icmp)->icmp6_cksum = cksum_add( 1056 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1057 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1058 return (n); 1059 freeit: 1060 m_freem(m); 1061 NAT64STAT_INC(&cfg->stats, dropped); 1062 return (NULL); 1063 } 1064 1065 int 1066 nat64_getlasthdr(struct mbuf *m, int *offset) 1067 { 1068 struct ip6_hdr *ip6; 1069 struct ip6_hbh *hbh; 1070 int proto, hlen; 1071 1072 if (offset != NULL) 1073 hlen = *offset; 1074 else 1075 hlen = 0; 1076 1077 if (m->m_len < hlen + sizeof(*ip6)) 1078 return (-1); 1079 1080 ip6 = mtodo(m, hlen); 1081 hlen += sizeof(*ip6); 1082 proto = ip6->ip6_nxt; 1083 /* Skip extension headers */ 1084 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1085 proto == IPPROTO_DSTOPTS) { 1086 hbh = mtodo(m, hlen); 1087 /* 1088 * We expect mbuf has contigious data up to 1089 * upper level header. 1090 */ 1091 if (m->m_len < hlen) 1092 return (-1); 1093 /* 1094 * We doesn't support Jumbo payload option, 1095 * so return error. 1096 */ 1097 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1098 return (-1); 1099 proto = hbh->ip6h_nxt; 1100 hlen += (hbh->ip6h_len + 1) << 3; 1101 } 1102 if (offset != NULL) 1103 *offset = hlen; 1104 return (proto); 1105 } 1106 1107 int 1108 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1109 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg, 1110 void *logdata) 1111 { 1112 struct nhop6_basic nh; 1113 struct ip6_hdr ip6; 1114 struct sockaddr_in6 dst; 1115 struct ip *ip; 1116 struct mbufq mq; 1117 uint16_t ip_id, ip_off; 1118 uint16_t *csum; 1119 int plen, hlen; 1120 uint8_t proto; 1121 1122 ip = mtod(m, struct ip*); 1123 1124 if (ip->ip_ttl <= IPTTLDEC) { 1125 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1126 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata); 1127 return (NAT64RETURN); 1128 } 1129 1130 ip6.ip6_dst = *daddr; 1131 ip6.ip6_src = *saddr; 1132 1133 hlen = ip->ip_hl << 2; 1134 plen = ntohs(ip->ip_len) - hlen; 1135 proto = ip->ip_p; 1136 1137 /* Save ip_id and ip_off, both are in network byte order */ 1138 ip_id = ip->ip_id; 1139 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1140 1141 /* Fragment length must be multiple of 8 octets */ 1142 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1143 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1144 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata); 1145 return (NAT64RETURN); 1146 } 1147 /* Fragmented ICMP is unsupported */ 1148 if (proto == IPPROTO_ICMP && ip_off != 0) { 1149 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1150 NAT64STAT_INC(&cfg->stats, dropped); 1151 return (NAT64MFREE); 1152 } 1153 1154 dst.sin6_addr = ip6.ip6_dst; 1155 if (nat64_find_route6(&nh, &dst, m) != 0) { 1156 NAT64STAT_INC(&cfg->stats, noroute6); 1157 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1158 &cfg->stats, logdata); 1159 return (NAT64RETURN); 1160 } 1161 if (nh.nh_mtu < plen + sizeof(ip6) && 1162 (ip->ip_off & htons(IP_DF)) != 0) { 1163 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1164 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata); 1165 return (NAT64RETURN); 1166 } 1167 1168 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1169 ip6.ip6_vfc |= IPV6_VERSION; 1170 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 1171 ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC; 1172 #else 1173 /* Forwarding code will decrement HLIM. */ 1174 ip6.ip6_hlim = ip->ip_ttl; 1175 #endif 1176 ip6.ip6_plen = htons(plen); 1177 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1178 /* Convert checksums. */ 1179 switch (proto) { 1180 case IPPROTO_TCP: 1181 csum = &TCP(mtodo(m, hlen))->th_sum; 1182 if (lport != 0) { 1183 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1184 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1185 tcp->th_dport = lport; 1186 } 1187 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1188 break; 1189 case IPPROTO_UDP: 1190 csum = &UDP(mtodo(m, hlen))->uh_sum; 1191 if (lport != 0) { 1192 struct udphdr *udp = UDP(mtodo(m, hlen)); 1193 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1194 udp->uh_dport = lport; 1195 } 1196 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1197 break; 1198 case IPPROTO_ICMP: 1199 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg); 1200 if (m == NULL) /* stats already accounted */ 1201 return (NAT64RETURN); 1202 } 1203 1204 m_adj(m, hlen); 1205 mbufq_init(&mq, 255); 1206 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off); 1207 while ((m = mbufq_dequeue(&mq)) != NULL) { 1208 if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1209 &cfg->stats, logdata) != 0) 1210 break; 1211 NAT64STAT_INC(&cfg->stats, opcnt46); 1212 } 1213 mbufq_drain(&mq); 1214 return (NAT64RETURN); 1215 } 1216 1217 int 1218 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1219 struct nat64_config *cfg, void *logdata) 1220 { 1221 struct ip ip; 1222 struct icmp6_hdr *icmp6; 1223 struct ip6_frag *ip6f; 1224 struct ip6_hdr *ip6, *ip6i; 1225 uint32_t mtu; 1226 int plen, proto; 1227 uint8_t type, code; 1228 1229 if (hlen == 0) { 1230 ip6 = mtod(m, struct ip6_hdr *); 1231 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1232 nat64_check_ip6(&ip6->ip6_dst) != 0) 1233 return (NAT64SKIP); 1234 1235 proto = nat64_getlasthdr(m, &hlen); 1236 if (proto != IPPROTO_ICMPV6) { 1237 DPRINTF(DP_DROPS, 1238 "dropped due to mbuf isn't contigious"); 1239 NAT64STAT_INC(&cfg->stats, dropped); 1240 return (NAT64MFREE); 1241 } 1242 } 1243 1244 /* 1245 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1246 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1247 */ 1248 icmp6 = mtodo(m, hlen); 1249 mtu = 0; 1250 switch (icmp6->icmp6_type) { 1251 case ICMP6_DST_UNREACH: 1252 type = ICMP_UNREACH; 1253 switch (icmp6->icmp6_code) { 1254 case ICMP6_DST_UNREACH_NOROUTE: 1255 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1256 case ICMP6_DST_UNREACH_ADDR: 1257 code = ICMP_UNREACH_HOST; 1258 break; 1259 case ICMP6_DST_UNREACH_ADMIN: 1260 code = ICMP_UNREACH_HOST_PROHIB; 1261 break; 1262 case ICMP6_DST_UNREACH_NOPORT: 1263 code = ICMP_UNREACH_PORT; 1264 break; 1265 default: 1266 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1267 " code %d", icmp6->icmp6_type, 1268 icmp6->icmp6_code); 1269 NAT64STAT_INC(&cfg->stats, dropped); 1270 return (NAT64MFREE); 1271 } 1272 break; 1273 case ICMP6_PACKET_TOO_BIG: 1274 type = ICMP_UNREACH; 1275 code = ICMP_UNREACH_NEEDFRAG; 1276 mtu = ntohl(icmp6->icmp6_mtu); 1277 if (mtu < IPV6_MMTU) { 1278 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1279 " code %d", mtu, icmp6->icmp6_type, 1280 icmp6->icmp6_code); 1281 NAT64STAT_INC(&cfg->stats, dropped); 1282 return (NAT64MFREE); 1283 } 1284 /* 1285 * Adjust MTU to reflect difference between 1286 * IPv6 an IPv4 headers. 1287 */ 1288 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1289 break; 1290 case ICMP6_TIME_EXCEEDED: 1291 type = ICMP_TIMXCEED; 1292 code = icmp6->icmp6_code; 1293 break; 1294 case ICMP6_PARAM_PROB: 1295 switch (icmp6->icmp6_code) { 1296 case ICMP6_PARAMPROB_HEADER: 1297 type = ICMP_PARAMPROB; 1298 code = ICMP_PARAMPROB_ERRATPTR; 1299 mtu = ntohl(icmp6->icmp6_pptr); 1300 switch (mtu) { 1301 case 0: /* Version/Traffic Class */ 1302 case 1: /* Traffic Class/Flow Label */ 1303 break; 1304 case 4: /* Payload Length */ 1305 case 5: 1306 mtu = 2; 1307 break; 1308 case 6: /* Next Header */ 1309 mtu = 9; 1310 break; 1311 case 7: /* Hop Limit */ 1312 mtu = 8; 1313 break; 1314 default: 1315 if (mtu >= 8 && mtu <= 23) { 1316 mtu = 12; /* Source address */ 1317 break; 1318 } 1319 if (mtu >= 24 && mtu <= 39) { 1320 mtu = 16; /* Destination address */ 1321 break; 1322 } 1323 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1324 " code %d, pptr %d", icmp6->icmp6_type, 1325 icmp6->icmp6_code, mtu); 1326 NAT64STAT_INC(&cfg->stats, dropped); 1327 return (NAT64MFREE); 1328 } 1329 case ICMP6_PARAMPROB_NEXTHEADER: 1330 type = ICMP_UNREACH; 1331 code = ICMP_UNREACH_PROTOCOL; 1332 break; 1333 default: 1334 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1335 " code %d, pptr %d", icmp6->icmp6_type, 1336 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1337 NAT64STAT_INC(&cfg->stats, dropped); 1338 return (NAT64MFREE); 1339 } 1340 break; 1341 default: 1342 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1343 icmp6->icmp6_type, icmp6->icmp6_code); 1344 NAT64STAT_INC(&cfg->stats, dropped); 1345 return (NAT64MFREE); 1346 } 1347 1348 hlen += sizeof(struct icmp6_hdr); 1349 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1350 NAT64STAT_INC(&cfg->stats, dropped); 1351 DPRINTF(DP_DROPS, "Message is too short %d", 1352 m->m_pkthdr.len); 1353 return (NAT64MFREE); 1354 } 1355 /* 1356 * We need at least ICMP_MINLEN bytes of original datagram payload 1357 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1358 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1359 * header we will not have to do m_pullup() again. 1360 * 1361 * What we have here: 1362 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1363 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1364 * We need to translate it to: 1365 * 1366 * Outer header: (alias_host, v4exthost) 1367 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1368 * 1369 * Assume caller function has checked if v4mapPRefix+v4host 1370 * matches configured prefix. 1371 * The only two things we should be provided with are mapping between 1372 * IPv6iHost <> alias_host and between dport and alias_port. 1373 */ 1374 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1375 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1376 if (m == NULL) { 1377 NAT64STAT_INC(&cfg->stats, nomem); 1378 return (NAT64RETURN); 1379 } 1380 ip6 = mtod(m, struct ip6_hdr *); 1381 ip6i = mtodo(m, hlen); 1382 ip6f = NULL; 1383 proto = ip6i->ip6_nxt; 1384 plen = ntohs(ip6i->ip6_plen); 1385 hlen += sizeof(struct ip6_hdr); 1386 if (proto == IPPROTO_FRAGMENT) { 1387 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1388 ICMP_MINLEN) 1389 goto fail; 1390 ip6f = mtodo(m, hlen); 1391 proto = ip6f->ip6f_nxt; 1392 plen -= sizeof(struct ip6_frag); 1393 hlen += sizeof(struct ip6_frag); 1394 /* Ajust MTU to reflect frag header size */ 1395 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1396 mtu -= sizeof(struct ip6_frag); 1397 } 1398 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1399 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1400 proto); 1401 goto fail; 1402 } 1403 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1404 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1405 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1406 goto fail; 1407 } 1408 /* Check if outer dst is the same as inner src */ 1409 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1410 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1411 goto fail; 1412 } 1413 1414 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1415 ip.ip_dst.s_addr = aaddr; 1416 ip.ip_src.s_addr = nat64_extract_ip4(cfg, &ip6i->ip6_src); 1417 /* XXX: Make fake ulp header */ 1418 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 1419 ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */ 1420 #endif 1421 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1422 m_adj(m, hlen - sizeof(struct ip)); 1423 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1424 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats, 1425 logdata); 1426 return (NAT64RETURN); 1427 fail: 1428 /* 1429 * We must call m_freem() because mbuf pointer could be 1430 * changed with m_pullup(). 1431 */ 1432 m_freem(m); 1433 NAT64STAT_INC(&cfg->stats, dropped); 1434 return (NAT64RETURN); 1435 } 1436 1437 int 1438 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1439 struct nat64_config *cfg, void *logdata) 1440 { 1441 struct ip ip; 1442 struct nhop4_basic nh; 1443 struct sockaddr_in dst; 1444 struct ip6_frag *frag; 1445 struct ip6_hdr *ip6; 1446 struct icmp6_hdr *icmp6; 1447 uint16_t *csum; 1448 int plen, hlen, proto; 1449 1450 /* 1451 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1452 * protocol's headers. Also we skip some checks, that ip6_input(), 1453 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1454 */ 1455 ip6 = mtod(m, struct ip6_hdr *); 1456 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1457 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1458 return (NAT64SKIP); 1459 } 1460 1461 /* Starting from this point we must not return zero */ 1462 ip.ip_src.s_addr = aaddr; 1463 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1464 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x", 1465 ip.ip_src.s_addr); 1466 NAT64STAT_INC(&cfg->stats, dropped); 1467 return (NAT64MFREE); 1468 } 1469 1470 ip.ip_dst.s_addr = nat64_extract_ip4(cfg, &ip6->ip6_dst); 1471 if (ip.ip_dst.s_addr == 0) { 1472 NAT64STAT_INC(&cfg->stats, dropped); 1473 return (NAT64MFREE); 1474 } 1475 1476 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 1477 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1478 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata); 1479 return (NAT64RETURN); 1480 } 1481 1482 hlen = 0; 1483 plen = ntohs(ip6->ip6_plen); 1484 proto = nat64_getlasthdr(m, &hlen); 1485 if (proto < 0) { 1486 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1487 NAT64STAT_INC(&cfg->stats, dropped); 1488 return (NAT64MFREE); 1489 } 1490 frag = NULL; 1491 if (proto == IPPROTO_FRAGMENT) { 1492 /* ipfw_chk should m_pullup up to frag header */ 1493 if (m->m_len < hlen + sizeof(*frag)) { 1494 DPRINTF(DP_DROPS, 1495 "dropped due to mbuf isn't contigious"); 1496 NAT64STAT_INC(&cfg->stats, dropped); 1497 return (NAT64MFREE); 1498 } 1499 frag = mtodo(m, hlen); 1500 proto = frag->ip6f_nxt; 1501 hlen += sizeof(*frag); 1502 /* Fragmented ICMPv6 is unsupported */ 1503 if (proto == IPPROTO_ICMPV6) { 1504 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1505 NAT64STAT_INC(&cfg->stats, dropped); 1506 return (NAT64MFREE); 1507 } 1508 /* Fragment length must be multiple of 8 octets */ 1509 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1510 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1511 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1512 ICMP6_PARAMPROB_HEADER, 1513 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats, 1514 logdata); 1515 return (NAT64RETURN); 1516 } 1517 } 1518 plen -= hlen - sizeof(struct ip6_hdr); 1519 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1520 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1521 plen, m->m_pkthdr.len, hlen); 1522 NAT64STAT_INC(&cfg->stats, dropped); 1523 return (NAT64MFREE); 1524 } 1525 1526 icmp6 = NULL; /* Make gcc happy */ 1527 if (proto == IPPROTO_ICMPV6) { 1528 icmp6 = mtodo(m, hlen); 1529 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1530 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1531 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1532 cfg, logdata)); 1533 } 1534 dst.sin_addr.s_addr = ip.ip_dst.s_addr; 1535 if (nat64_find_route4(&nh, &dst, m) != 0) { 1536 NAT64STAT_INC(&cfg->stats, noroute4); 1537 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1538 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata); 1539 return (NAT64RETURN); 1540 } 1541 if (nh.nh_mtu < plen + sizeof(ip)) { 1542 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu, 1543 &cfg->stats, logdata); 1544 return (NAT64RETURN); 1545 } 1546 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1547 /* Convert checksums. */ 1548 switch (proto) { 1549 case IPPROTO_TCP: 1550 csum = &TCP(mtodo(m, hlen))->th_sum; 1551 if (aport != 0) { 1552 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1553 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1554 tcp->th_sport = aport; 1555 } 1556 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1557 break; 1558 case IPPROTO_UDP: 1559 csum = &UDP(mtodo(m, hlen))->uh_sum; 1560 if (aport != 0) { 1561 struct udphdr *udp = UDP(mtodo(m, hlen)); 1562 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1563 udp->uh_sport = aport; 1564 } 1565 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1566 break; 1567 case IPPROTO_ICMPV6: 1568 /* Checksum in ICMPv6 covers pseudo header */ 1569 csum = &icmp6->icmp6_cksum; 1570 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1571 IPPROTO_ICMPV6, 0)); 1572 /* Convert ICMPv6 types to ICMP */ 1573 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1574 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1575 icmp6->icmp6_type = ICMP_ECHO; 1576 else /* ICMP6_ECHO_REPLY */ 1577 icmp6->icmp6_type = ICMP_ECHOREPLY; 1578 *csum = cksum_adjust(*csum, (uint16_t)proto, 1579 *(uint16_t *)icmp6); 1580 if (aport != 0) { 1581 uint16_t old_id = icmp6->icmp6_id; 1582 icmp6->icmp6_id = aport; 1583 *csum = cksum_adjust(*csum, old_id, aport); 1584 } 1585 break; 1586 }; 1587 1588 m_adj(m, hlen - sizeof(ip)); 1589 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1590 if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1591 &cfg->stats, logdata) == 0) 1592 NAT64STAT_INC(&cfg->stats, opcnt64); 1593 return (NAT64RETURN); 1594 } 1595 1596