1 /*- 2 * Copyright (c) 2015-2016 Yandex LLC 3 * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_ipfw.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/counter.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/rmlock.h> 42 #include <sys/rwlock.h> 43 #include <sys/socket.h> 44 #include <sys/queue.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_pflog.h> 49 #include <net/pfil.h> 50 #include <net/netisr.h> 51 #include <net/route.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_fib.h> 55 #include <netinet/ip.h> 56 #include <netinet/ip_var.h> 57 #include <netinet/ip_fw.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/udp.h> 63 #include <netinet6/in6_var.h> 64 #include <netinet6/in6_fib.h> 65 #include <netinet6/ip6_var.h> 66 67 #include <netpfil/pf/pf.h> 68 #include <netpfil/ipfw/ip_fw_private.h> 69 #include <netpfil/ipfw/nat64/ip_fw_nat64.h> 70 #include <netpfil/ipfw/nat64/nat64_translate.h> 71 #include <machine/in_cksum.h> 72 73 static void 74 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 75 { 76 77 logdata->dir = PF_OUT; 78 logdata->af = family; 79 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 80 } 81 82 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 83 static NAT64NOINLINE int nat64_find_route4(struct nhop4_basic *, 84 struct sockaddr_in *, struct mbuf *); 85 static NAT64NOINLINE int nat64_find_route6(struct nhop6_basic *, 86 struct sockaddr_in6 *, struct mbuf *); 87 88 static NAT64NOINLINE int 89 nat64_output(struct ifnet *ifp, struct mbuf *m, 90 struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, 91 void *logdata) 92 { 93 int error; 94 95 if (logdata != NULL) 96 nat64_log(logdata, m, dst->sa_family); 97 error = (*ifp->if_output)(ifp, m, dst, ro); 98 if (error != 0) 99 NAT64STAT_INC(stats, oerrors); 100 return (error); 101 } 102 103 static NAT64NOINLINE int 104 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) 105 { 106 struct nhop6_basic nh6; 107 struct nhop4_basic nh4; 108 struct sockaddr_in6 dst6; 109 struct sockaddr_in dst4; 110 struct sockaddr *dst; 111 struct ip6_hdr *ip6; 112 struct ip *ip4; 113 struct ifnet *ifp; 114 int error; 115 116 ip4 = mtod(m, struct ip *); 117 switch (ip4->ip_v) { 118 case IPVERSION: 119 dst4.sin_addr = ip4->ip_dst; 120 error = nat64_find_route4(&nh4, &dst4, m); 121 if (error != 0) 122 NAT64STAT_INC(stats, noroute4); 123 else { 124 ifp = nh4.nh_ifp; 125 dst = (struct sockaddr *)&dst4; 126 } 127 break; 128 case (IPV6_VERSION >> 4): 129 ip6 = mtod(m, struct ip6_hdr *); 130 dst6.sin6_addr = ip6->ip6_dst; 131 error = nat64_find_route6(&nh6, &dst6, m); 132 if (error != 0) 133 NAT64STAT_INC(stats, noroute6); 134 else { 135 ifp = nh6.nh_ifp; 136 dst = (struct sockaddr *)&dst6; 137 } 138 break; 139 default: 140 m_freem(m); 141 NAT64STAT_INC(stats, dropped); 142 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 143 return (EAFNOSUPPORT); 144 } 145 if (error != 0) { 146 m_freem(m); 147 return (EHOSTUNREACH); 148 } 149 if (logdata != NULL) 150 nat64_log(logdata, m, dst->sa_family); 151 error = (*ifp->if_output)(ifp, m, dst, NULL); 152 if (error != 0) 153 NAT64STAT_INC(stats, oerrors); 154 return (error); 155 } 156 #else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ 157 static NAT64NOINLINE int 158 nat64_output(struct ifnet *ifp, struct mbuf *m, 159 struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, 160 void *logdata) 161 { 162 struct ip *ip4; 163 int ret, af; 164 165 ip4 = mtod(m, struct ip *); 166 switch (ip4->ip_v) { 167 case IPVERSION: 168 af = AF_INET; 169 ret = NETISR_IP; 170 break; 171 case (IPV6_VERSION >> 4): 172 af = AF_INET6; 173 ret = NETISR_IPV6; 174 break; 175 default: 176 m_freem(m); 177 NAT64STAT_INC(stats, dropped); 178 DPRINTF(DP_DROPS, "unknown IP version"); 179 return (EAFNOSUPPORT); 180 } 181 if (logdata != NULL) 182 nat64_log(logdata, m, af); 183 ret = netisr_queue(ret, m); 184 if (ret != 0) 185 NAT64STAT_INC(stats, oerrors); 186 return (ret); 187 } 188 189 static NAT64NOINLINE int 190 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) 191 { 192 193 return (nat64_output(NULL, m, NULL, NULL, stats, logdata)); 194 } 195 #endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ 196 197 198 #if 0 199 void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize); 200 201 void 202 print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize) 203 { 204 char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; 205 206 inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf)); 207 inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf)); 208 snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt); 209 } 210 211 212 static NAT64NOINLINE int 213 nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6) 214 { 215 216 /* assume the prefix is properly filled with zeros */ 217 bcopy(&cfg->prefix, ip6, sizeof(*ip6)); 218 switch (cfg->plen) { 219 case 32: 220 case 96: 221 ip6->s6_addr32[cfg->plen / 32] = ia; 222 break; 223 case 40: 224 case 48: 225 case 56: 226 #if BYTE_ORDER == BIG_ENDIAN 227 ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | 228 (ia >> (cfg->plen % 32)); 229 ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32); 230 #elif BYTE_ORDER == LITTLE_ENDIAN 231 ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | 232 (ia << (cfg->plen % 32)); 233 ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32); 234 #endif 235 break; 236 case 64: 237 #if BYTE_ORDER == BIG_ENDIAN 238 ip6->s6_addr32[2] = ia >> 8; 239 ip6->s6_addr32[3] = ia << 24; 240 #elif BYTE_ORDER == LITTLE_ENDIAN 241 ip6->s6_addr32[2] = ia << 8; 242 ip6->s6_addr32[3] = ia >> 24; 243 #endif 244 break; 245 default: 246 return (0); 247 }; 248 ip6->s6_addr8[8] = 0; 249 return (1); 250 } 251 252 static NAT64NOINLINE in_addr_t 253 nat64_extract_ip4(struct in6_addr *ip6, int plen) 254 { 255 in_addr_t ia; 256 257 /* 258 * According to RFC 6052 p2.2: 259 * IPv4-embedded IPv6 addresses are composed of a variable-length 260 * prefix, the embedded IPv4 address, and a variable length suffix. 261 * The suffix bits are reserved for future extensions and SHOULD 262 * be set to zero. 263 */ 264 switch (plen) { 265 case 32: 266 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 267 goto badip6; 268 break; 269 case 40: 270 if (ip6->s6_addr32[3] != 0 || 271 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 272 goto badip6; 273 break; 274 case 48: 275 if (ip6->s6_addr32[3] != 0 || 276 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 277 goto badip6; 278 break; 279 case 56: 280 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 281 goto badip6; 282 break; 283 case 64: 284 if (ip6->s6_addr8[8] != 0 || 285 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 286 goto badip6; 287 }; 288 switch (plen) { 289 case 32: 290 case 96: 291 ia = ip6->s6_addr32[plen / 32]; 292 break; 293 case 40: 294 case 48: 295 case 56: 296 #if BYTE_ORDER == BIG_ENDIAN 297 ia = (ip6->s6_addr32[1] << (plen % 32)) | 298 (ip6->s6_addr32[2] >> (24 - plen % 32)); 299 #elif BYTE_ORDER == LITTLE_ENDIAN 300 ia = (ip6->s6_addr32[1] >> (plen % 32)) | 301 (ip6->s6_addr32[2] << (24 - plen % 32)); 302 #endif 303 break; 304 case 64: 305 #if BYTE_ORDER == BIG_ENDIAN 306 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 307 #elif BYTE_ORDER == LITTLE_ENDIAN 308 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 309 #endif 310 break; 311 default: 312 return (0); 313 }; 314 if (nat64_check_ip4(ia) != 0 || 315 nat64_check_private_ip4(ia) != 0) 316 goto badip4; 317 318 return (ia); 319 badip4: 320 DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia); 321 return (0); 322 badip6: 323 DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address"); 324 return (0); 325 } 326 #endif 327 328 /* 329 * According to RFC 1624 the equation for incremental checksum update is: 330 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 331 * HC' = HC - ~m - m' -- [Eqn. 4] 332 * So, when we are replacing IPv4 addresses to IPv6, we 333 * can assume, that new bytes previously were zeros, and vise versa - 334 * when we replacing IPv6 addresses to IPv4, now unused bytes become 335 * zeros. The payload length in pseudo header has bigger size, but one 336 * half of it should be zero. Using the equation 4 we get: 337 * HC' = HC - (~m0 + m0') -- m0 is first changed word 338 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 339 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 340 * = HC - sum(~m[i] + m'[i]) 341 * 342 * The function result should be used as follows: 343 * IPv6 to IPv4: HC' = cksum_add(HC, result) 344 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 345 */ 346 static NAT64NOINLINE uint16_t 347 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 348 { 349 uint32_t sum; 350 uint16_t *p; 351 352 sum = ~ip->ip_src.s_addr >> 16; 353 sum += ~ip->ip_src.s_addr & 0xffff; 354 sum += ~ip->ip_dst.s_addr >> 16; 355 sum += ~ip->ip_dst.s_addr & 0xffff; 356 357 for (p = (uint16_t *)&ip6->ip6_src; 358 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 359 sum += *p; 360 361 while (sum >> 16) 362 sum = (sum & 0xffff) + (sum >> 16); 363 return (sum); 364 } 365 366 #if __FreeBSD_version < 1100000 367 #define ip_fillid(ip) (ip)->ip_id = ip_newid() 368 #endif 369 static NAT64NOINLINE void 370 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 371 uint16_t plen, uint8_t proto, struct ip *ip) 372 { 373 374 /* assume addresses are already initialized */ 375 ip->ip_v = IPVERSION; 376 ip->ip_hl = sizeof(*ip) >> 2; 377 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 378 ip->ip_len = htons(sizeof(*ip) + plen); 379 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 380 ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC; 381 #else 382 /* Forwarding code will decrement TTL. */ 383 ip->ip_ttl = ip6->ip6_hlim; 384 #endif 385 ip->ip_sum = 0; 386 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 387 ip_fillid(ip); 388 if (frag != NULL) { 389 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 390 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 391 ip->ip_off |= htons(IP_MF); 392 } else { 393 ip->ip_off = htons(IP_DF); 394 } 395 ip->ip_sum = in_cksum_hdr(ip); 396 } 397 398 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 399 static NAT64NOINLINE int 400 nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq, 401 struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off) 402 { 403 struct ip6_frag ip6f; 404 struct mbuf *n; 405 uint16_t hlen, len, offset; 406 int plen; 407 408 plen = ntohs(ip6->ip6_plen); 409 hlen = sizeof(struct ip6_hdr); 410 411 /* Fragmentation isn't needed */ 412 if (ip_off == 0 && plen <= mtu - hlen) { 413 M_PREPEND(m, hlen, M_NOWAIT); 414 if (m == NULL) { 415 NAT64STAT_INC(stats, nomem); 416 return (ENOMEM); 417 } 418 bcopy(ip6, mtod(m, void *), hlen); 419 if (mbufq_enqueue(mq, m) != 0) { 420 m_freem(m); 421 NAT64STAT_INC(stats, dropped); 422 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 423 return (ENOBUFS); 424 } 425 return (0); 426 } 427 428 hlen += sizeof(struct ip6_frag); 429 ip6f.ip6f_reserved = 0; 430 ip6f.ip6f_nxt = ip6->ip6_nxt; 431 ip6->ip6_nxt = IPPROTO_FRAGMENT; 432 if (ip_off != 0) { 433 /* 434 * We have got an IPv4 fragment. 435 * Use offset value and ip_id from original fragment. 436 */ 437 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 438 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 439 NAT64STAT_INC(stats, ifrags); 440 } else { 441 /* The packet size exceeds interface MTU */ 442 ip6f.ip6f_ident = htonl(ip6_randomid()); 443 offset = 0; /* First fragment*/ 444 } 445 while (plen > 0 && m != NULL) { 446 n = NULL; 447 len = FRAGSZ(mtu) & ~7; 448 if (len > plen) 449 len = plen; 450 ip6->ip6_plen = htons(len + sizeof(ip6f)); 451 ip6f.ip6f_offlg = ntohs(offset); 452 if (len < plen || (ip_off & htons(IP_MF)) != 0) 453 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 454 offset += len; 455 plen -= len; 456 if (plen > 0) { 457 n = m_split(m, len, M_NOWAIT); 458 if (n == NULL) 459 goto fail; 460 } 461 M_PREPEND(m, hlen, M_NOWAIT); 462 if (m == NULL) 463 goto fail; 464 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 465 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 466 sizeof(struct ip6_frag)); 467 if (mbufq_enqueue(mq, m) != 0) 468 goto fail; 469 m = n; 470 } 471 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 472 return (0); 473 fail: 474 if (m != NULL) 475 m_freem(m); 476 if (n != NULL) 477 m_freem(n); 478 mbufq_drain(mq); 479 NAT64STAT_INC(stats, nomem); 480 return (ENOMEM); 481 } 482 483 static NAT64NOINLINE int 484 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst, 485 struct mbuf *m) 486 { 487 488 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0, 489 pnh) != 0) 490 return (EHOSTUNREACH); 491 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT)) 492 return (EHOSTUNREACH); 493 /* 494 * XXX: we need to use destination address with embedded scope 495 * zone id, because LLTABLE uses such form of addresses for lookup. 496 */ 497 dst->sin6_family = AF_INET6; 498 dst->sin6_len = sizeof(*dst); 499 dst->sin6_addr = pnh->nh_addr; 500 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 501 dst->sin6_addr.s6_addr16[1] = 502 htons(pnh->nh_ifp->if_index & 0xffff); 503 dst->sin6_port = 0; 504 dst->sin6_scope_id = 0; 505 dst->sin6_flowinfo = 0; 506 507 return (0); 508 } 509 510 #define NAT64_ICMP6_PLEN 64 511 static NAT64NOINLINE void 512 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 513 nat64_stats_block *stats, void *logdata) 514 { 515 struct icmp6_hdr *icmp6; 516 struct ip6_hdr *ip6, *oip6; 517 struct mbuf *n; 518 int len, plen; 519 520 len = 0; 521 plen = nat64_getlasthdr(m, &len); 522 if (plen < 0) { 523 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 524 goto freeit; 525 } 526 /* 527 * Do not send ICMPv6 in reply to ICMPv6 errors. 528 */ 529 if (plen == IPPROTO_ICMPV6) { 530 if (m->m_len < len + sizeof(*icmp6)) { 531 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 532 goto freeit; 533 } 534 icmp6 = mtodo(m, len); 535 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 536 icmp6->icmp6_type == ND_REDIRECT) { 537 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 538 "ICMPv6 errors"); 539 goto freeit; 540 } 541 } 542 /* 543 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 544 goto freeit; 545 */ 546 ip6 = mtod(m, struct ip6_hdr *); 547 switch (type) { 548 case ICMP6_DST_UNREACH: 549 case ICMP6_PACKET_TOO_BIG: 550 case ICMP6_TIME_EXCEEDED: 551 case ICMP6_PARAM_PROB: 552 break; 553 default: 554 goto freeit; 555 } 556 /* Calculate length of ICMPv6 payload */ 557 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 558 m->m_pkthdr.len; 559 560 /* Create new ICMPv6 datagram */ 561 plen = len + sizeof(struct icmp6_hdr); 562 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 563 MT_HEADER, M_PKTHDR); 564 if (n == NULL) { 565 NAT64STAT_INC(stats, nomem); 566 m_freem(m); 567 return; 568 } 569 /* 570 * Move pkthdr from original mbuf. We should have initialized some 571 * fields, because we can reinject this mbuf to netisr and it will 572 * go trough input path (it requires at least rcvif should be set). 573 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 574 * in the chain, when we will do M_PREPEND() or make some type of 575 * tunneling. 576 */ 577 m_move_pkthdr(n, m); 578 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 579 580 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 581 oip6 = mtod(n, struct ip6_hdr *); 582 oip6->ip6_src = ip6->ip6_dst; 583 oip6->ip6_dst = ip6->ip6_src; 584 oip6->ip6_nxt = IPPROTO_ICMPV6; 585 oip6->ip6_flow = 0; 586 oip6->ip6_vfc |= IPV6_VERSION; 587 oip6->ip6_hlim = V_ip6_defhlim; 588 oip6->ip6_plen = htons(plen); 589 590 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 591 icmp6->icmp6_cksum = 0; 592 icmp6->icmp6_type = type; 593 icmp6->icmp6_code = code; 594 icmp6->icmp6_mtu = htonl(mtu); 595 596 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 597 sizeof(struct icmp6_hdr))); 598 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 599 sizeof(struct ip6_hdr), plen); 600 m_freem(m); 601 nat64_output_one(n, stats, logdata); 602 return; 603 freeit: 604 NAT64STAT_INC(stats, dropped); 605 m_freem(m); 606 } 607 608 static NAT64NOINLINE int 609 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst, 610 struct mbuf *m) 611 { 612 613 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0) 614 return (EHOSTUNREACH); 615 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT)) 616 return (EHOSTUNREACH); 617 618 dst->sin_family = AF_INET; 619 dst->sin_len = sizeof(*dst); 620 dst->sin_addr = pnh->nh_addr; 621 dst->sin_port = 0; 622 return (0); 623 } 624 625 #define NAT64_ICMP_PLEN 64 626 static NAT64NOINLINE void 627 nat64_icmp_reflect(struct mbuf *m, uint8_t type, 628 uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata) 629 { 630 struct icmp *icmp; 631 struct ip *ip, *oip; 632 struct mbuf *n; 633 int len, plen; 634 635 ip = mtod(m, struct ip *); 636 /* Do not send ICMP error if packet is not the first fragment */ 637 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 638 DPRINTF(DP_DROPS, "not first fragment"); 639 goto freeit; 640 } 641 /* Do not send ICMP in reply to ICMP errors */ 642 if (ip->ip_p == IPPROTO_ICMP) { 643 if (m->m_len < (ip->ip_hl << 2)) { 644 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 645 goto freeit; 646 } 647 icmp = mtodo(m, ip->ip_hl << 2); 648 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 649 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 650 "ICMP errors"); 651 goto freeit; 652 } 653 } 654 switch (type) { 655 case ICMP_UNREACH: 656 case ICMP_TIMXCEED: 657 case ICMP_PARAMPROB: 658 break; 659 default: 660 goto freeit; 661 } 662 /* Calculate length of ICMP payload */ 663 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 664 m->m_pkthdr.len; 665 666 /* Create new ICMPv4 datagram */ 667 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 668 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 669 MT_HEADER, M_PKTHDR); 670 if (n == NULL) { 671 NAT64STAT_INC(stats, nomem); 672 m_freem(m); 673 return; 674 } 675 m_move_pkthdr(n, m); 676 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 677 678 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 679 oip = mtod(n, struct ip *); 680 oip->ip_v = IPVERSION; 681 oip->ip_hl = sizeof(struct ip) >> 2; 682 oip->ip_tos = 0; 683 oip->ip_len = htons(n->m_pkthdr.len); 684 oip->ip_ttl = V_ip_defttl; 685 oip->ip_p = IPPROTO_ICMP; 686 ip_fillid(oip); 687 oip->ip_off = htons(IP_DF); 688 oip->ip_src = ip->ip_dst; 689 oip->ip_dst = ip->ip_src; 690 oip->ip_sum = 0; 691 oip->ip_sum = in_cksum_hdr(oip); 692 693 icmp = mtodo(n, sizeof(struct ip)); 694 icmp->icmp_type = type; 695 icmp->icmp_code = code; 696 icmp->icmp_cksum = 0; 697 icmp->icmp_pmvoid = 0; 698 icmp->icmp_nextmtu = htons(mtu); 699 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 700 sizeof(struct icmphdr) + sizeof(uint32_t))); 701 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 702 sizeof(struct ip)); 703 m_freem(m); 704 nat64_output_one(n, stats, logdata); 705 return; 706 freeit: 707 NAT64STAT_INC(stats, dropped); 708 m_freem(m); 709 } 710 711 /* Translate ICMP echo request/reply into ICMPv6 */ 712 static void 713 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 714 uint16_t id, uint8_t type) 715 { 716 uint16_t old; 717 718 old = *(uint16_t *)icmp6; /* save type+code in one word */ 719 icmp6->icmp6_type = type; 720 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 721 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 722 old, *(uint16_t *)icmp6); 723 if (id != 0) { 724 old = icmp6->icmp6_id; 725 icmp6->icmp6_id = id; 726 /* Reflect ICMP id translation in the cksum */ 727 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 728 old, id); 729 } 730 /* Reflect IPv6 pseudo header in the cksum */ 731 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 732 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 733 } 734 735 static NAT64NOINLINE struct mbuf * 736 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 737 int offset, nat64_stats_block *stats) 738 { 739 struct ip ip; 740 struct icmp *icmp; 741 struct tcphdr *tcp; 742 struct udphdr *udp; 743 struct ip6_hdr *eip6; 744 struct mbuf *n; 745 uint32_t mtu; 746 int len, hlen, plen; 747 uint8_t type, code; 748 749 if (m->m_len < offset + ICMP_MINLEN) 750 m = m_pullup(m, offset + ICMP_MINLEN); 751 if (m == NULL) { 752 NAT64STAT_INC(stats, nomem); 753 return (m); 754 } 755 mtu = 0; 756 icmp = mtodo(m, offset); 757 /* RFC 7915 p4.2 */ 758 switch (icmp->icmp_type) { 759 case ICMP_ECHOREPLY: 760 type = ICMP6_ECHO_REPLY; 761 code = 0; 762 break; 763 case ICMP_UNREACH: 764 type = ICMP6_DST_UNREACH; 765 switch (icmp->icmp_code) { 766 case ICMP_UNREACH_NET: 767 case ICMP_UNREACH_HOST: 768 case ICMP_UNREACH_SRCFAIL: 769 case ICMP_UNREACH_NET_UNKNOWN: 770 case ICMP_UNREACH_HOST_UNKNOWN: 771 case ICMP_UNREACH_TOSNET: 772 case ICMP_UNREACH_TOSHOST: 773 code = ICMP6_DST_UNREACH_NOROUTE; 774 break; 775 case ICMP_UNREACH_PROTOCOL: 776 type = ICMP6_PARAM_PROB; 777 code = ICMP6_PARAMPROB_NEXTHEADER; 778 break; 779 case ICMP_UNREACH_PORT: 780 code = ICMP6_DST_UNREACH_NOPORT; 781 break; 782 case ICMP_UNREACH_NEEDFRAG: 783 type = ICMP6_PACKET_TOO_BIG; 784 code = 0; 785 /* XXX: needs an additional look */ 786 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 787 break; 788 case ICMP_UNREACH_NET_PROHIB: 789 case ICMP_UNREACH_HOST_PROHIB: 790 case ICMP_UNREACH_FILTER_PROHIB: 791 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 792 code = ICMP6_DST_UNREACH_ADMIN; 793 break; 794 default: 795 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 796 icmp->icmp_type, icmp->icmp_code); 797 goto freeit; 798 } 799 break; 800 case ICMP_TIMXCEED: 801 type = ICMP6_TIME_EXCEEDED; 802 code = icmp->icmp_code; 803 break; 804 case ICMP_ECHO: 805 type = ICMP6_ECHO_REQUEST; 806 code = 0; 807 break; 808 case ICMP_PARAMPROB: 809 type = ICMP6_PARAM_PROB; 810 switch (icmp->icmp_code) { 811 case ICMP_PARAMPROB_ERRATPTR: 812 case ICMP_PARAMPROB_LENGTH: 813 code = ICMP6_PARAMPROB_HEADER; 814 switch (icmp->icmp_pptr) { 815 case 0: /* Version/IHL */ 816 case 1: /* Type Of Service */ 817 mtu = icmp->icmp_pptr; 818 break; 819 case 2: /* Total Length */ 820 case 3: mtu = 4; /* Payload Length */ 821 break; 822 case 8: /* Time to Live */ 823 mtu = 7; /* Hop Limit */ 824 break; 825 case 9: /* Protocol */ 826 mtu = 6; /* Next Header */ 827 break; 828 case 12: /* Source address */ 829 case 13: 830 case 14: 831 case 15: 832 mtu = 8; 833 break; 834 case 16: /* Destination address */ 835 case 17: 836 case 18: 837 case 19: 838 mtu = 24; 839 break; 840 default: /* Silently drop */ 841 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 842 " code %d, pptr %d", icmp->icmp_type, 843 icmp->icmp_code, icmp->icmp_pptr); 844 goto freeit; 845 } 846 break; 847 default: 848 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 849 " code %d, pptr %d", icmp->icmp_type, 850 icmp->icmp_code, icmp->icmp_pptr); 851 goto freeit; 852 } 853 break; 854 default: 855 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 856 icmp->icmp_type, icmp->icmp_code); 857 goto freeit; 858 } 859 /* 860 * For echo request/reply we can use original payload, 861 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 862 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 863 */ 864 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 865 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 866 return (m); 867 } 868 /* 869 * For other types of ICMP messages we need to translate inner 870 * IPv4 header to IPv6 header. 871 * Assume ICMP src is the same as payload dst 872 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 873 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 874 * In that case, we already have map for NATIP1 and GWsrc1. 875 * The only thing we need is to copy IPv6 map prefix to 876 * Hostdst1. 877 */ 878 hlen = offset + ICMP_MINLEN; 879 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 880 DPRINTF(DP_DROPS, "Message is too short %d", 881 m->m_pkthdr.len); 882 goto freeit; 883 } 884 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 885 if (ip.ip_v != IPVERSION) { 886 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 887 goto freeit; 888 } 889 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 890 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 891 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 892 nat64_check_private_ip4(ip.ip_src.s_addr) != 0 || 893 nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) { 894 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 895 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 896 goto freeit; 897 } 898 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 899 DPRINTF(DP_DROPS, "Message is too short %d", 900 m->m_pkthdr.len); 901 goto freeit; 902 } 903 #if 0 904 /* 905 * Check that inner source matches the outer destination. 906 * XXX: We need some method to convert IPv4 into IPv6 address here, 907 * and compare IPv6 addresses. 908 */ 909 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 910 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 911 "%04x vs %04x", ip.ip_src.s_addr, 912 nat64_get_ip4(&ip6->ip6_dst)); 913 goto freeit; 914 } 915 #endif 916 /* 917 * Create new mbuf for ICMPv6 datagram. 918 * NOTE: len is data length just after inner IP header. 919 */ 920 len = m->m_pkthdr.len - hlen; 921 if (sizeof(struct ip6_hdr) + 922 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 923 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 924 sizeof(struct ip6_hdr); 925 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 926 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 927 if (n == NULL) { 928 NAT64STAT_INC(stats, nomem); 929 m_freem(m); 930 return (NULL); 931 } 932 m_move_pkthdr(n, m); 933 M_ALIGN(n, offset + plen + max_hdr); 934 n->m_len = n->m_pkthdr.len = offset + plen; 935 /* Adjust ip6_plen in outer header */ 936 ip6->ip6_plen = htons(plen); 937 /* Construct new inner IPv6 header */ 938 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 939 eip6->ip6_src = ip6->ip6_dst; 940 /* Use the fact that we have single /96 prefix for IPv4 map */ 941 eip6->ip6_dst = ip6->ip6_src; 942 nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr); 943 944 eip6->ip6_flow = htonl(ip.ip_tos << 20); 945 eip6->ip6_vfc |= IPV6_VERSION; 946 eip6->ip6_hlim = ip.ip_ttl; 947 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 948 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 949 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 950 /* 951 * We need to translate source port in the inner ULP header, 952 * and adjust ULP checksum. 953 */ 954 switch (ip.ip_p) { 955 case IPPROTO_TCP: 956 if (len < offsetof(struct tcphdr, th_sum)) 957 break; 958 tcp = TCP(eip6 + 1); 959 if (icmpid != 0) { 960 tcp->th_sum = cksum_adjust(tcp->th_sum, 961 tcp->th_sport, icmpid); 962 tcp->th_sport = icmpid; 963 } 964 tcp->th_sum = cksum_add(tcp->th_sum, 965 ~nat64_cksum_convert(eip6, &ip)); 966 break; 967 case IPPROTO_UDP: 968 if (len < offsetof(struct udphdr, uh_sum)) 969 break; 970 udp = UDP(eip6 + 1); 971 if (icmpid != 0) { 972 udp->uh_sum = cksum_adjust(udp->uh_sum, 973 udp->uh_sport, icmpid); 974 udp->uh_sport = icmpid; 975 } 976 udp->uh_sum = cksum_add(udp->uh_sum, 977 ~nat64_cksum_convert(eip6, &ip)); 978 break; 979 case IPPROTO_ICMP: 980 /* 981 * Check if this is an ICMP error message for echo request 982 * that we sent. I.e. ULP in the data containing invoking 983 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 984 */ 985 icmp = (struct icmp *)(eip6 + 1); 986 if (icmp->icmp_type != ICMP_ECHO) { 987 m_freem(n); 988 goto freeit; 989 } 990 /* 991 * For our client this original datagram should looks 992 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 993 * Thus we need adjust icmp_cksum and convert type from 994 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 995 */ 996 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 997 ICMP6_ECHO_REQUEST); 998 } 999 m_freem(m); 1000 /* Convert ICMPv4 into ICMPv6 header */ 1001 icmp = mtodo(n, offset); 1002 ICMP6(icmp)->icmp6_type = type; 1003 ICMP6(icmp)->icmp6_code = code; 1004 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1005 ICMP6(icmp)->icmp6_cksum = 0; 1006 ICMP6(icmp)->icmp6_cksum = cksum_add( 1007 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1008 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1009 return (n); 1010 freeit: 1011 m_freem(m); 1012 NAT64STAT_INC(stats, dropped); 1013 return (NULL); 1014 } 1015 1016 int 1017 nat64_getlasthdr(struct mbuf *m, int *offset) 1018 { 1019 struct ip6_hdr *ip6; 1020 struct ip6_hbh *hbh; 1021 int proto, hlen; 1022 1023 if (offset != NULL) 1024 hlen = *offset; 1025 else 1026 hlen = 0; 1027 1028 if (m->m_len < hlen + sizeof(*ip6)) 1029 return (-1); 1030 1031 ip6 = mtodo(m, hlen); 1032 hlen += sizeof(*ip6); 1033 proto = ip6->ip6_nxt; 1034 /* Skip extension headers */ 1035 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1036 proto == IPPROTO_DSTOPTS) { 1037 hbh = mtodo(m, hlen); 1038 /* 1039 * We expect mbuf has contigious data up to 1040 * upper level header. 1041 */ 1042 if (m->m_len < hlen) 1043 return (-1); 1044 /* 1045 * We doesn't support Jumbo payload option, 1046 * so return error. 1047 */ 1048 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1049 return (-1); 1050 proto = hbh->ip6h_nxt; 1051 hlen += (hbh->ip6h_len + 1) << 3; 1052 } 1053 if (offset != NULL) 1054 *offset = hlen; 1055 return (proto); 1056 } 1057 1058 int 1059 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1060 struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats, 1061 void *logdata) 1062 { 1063 struct nhop6_basic nh; 1064 struct ip6_hdr ip6; 1065 struct sockaddr_in6 dst; 1066 struct ip *ip; 1067 struct mbufq mq; 1068 uint16_t ip_id, ip_off; 1069 uint16_t *csum; 1070 int plen, hlen; 1071 uint8_t proto; 1072 1073 ip = mtod(m, struct ip*); 1074 1075 if (ip->ip_ttl <= IPTTLDEC) { 1076 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1077 ICMP_TIMXCEED_INTRANS, 0, stats, logdata); 1078 return (NAT64RETURN); 1079 } 1080 1081 ip6.ip6_dst = *daddr; 1082 ip6.ip6_src = *saddr; 1083 1084 hlen = ip->ip_hl << 2; 1085 plen = ntohs(ip->ip_len) - hlen; 1086 proto = ip->ip_p; 1087 1088 /* Save ip_id and ip_off, both are in network byte order */ 1089 ip_id = ip->ip_id; 1090 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1091 1092 /* Fragment length must be multiple of 8 octets */ 1093 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1094 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1095 ICMP_PARAMPROB_LENGTH, 0, stats, logdata); 1096 return (NAT64RETURN); 1097 } 1098 /* Fragmented ICMP is unsupported */ 1099 if (proto == IPPROTO_ICMP && ip_off != 0) { 1100 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1101 NAT64STAT_INC(stats, dropped); 1102 return (NAT64MFREE); 1103 } 1104 1105 dst.sin6_addr = ip6.ip6_dst; 1106 if (nat64_find_route6(&nh, &dst, m) != 0) { 1107 NAT64STAT_INC(stats, noroute6); 1108 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1109 stats, logdata); 1110 return (NAT64RETURN); 1111 } 1112 if (nh.nh_mtu < plen + sizeof(ip6) && 1113 (ip->ip_off & htons(IP_DF)) != 0) { 1114 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1115 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), stats, logdata); 1116 return (NAT64RETURN); 1117 } 1118 1119 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1120 ip6.ip6_vfc |= IPV6_VERSION; 1121 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 1122 ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC; 1123 #else 1124 /* Forwarding code will decrement HLIM. */ 1125 ip6.ip6_hlim = ip->ip_ttl; 1126 #endif 1127 ip6.ip6_plen = htons(plen); 1128 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1129 /* Convert checksums. */ 1130 switch (proto) { 1131 case IPPROTO_TCP: 1132 csum = &TCP(mtodo(m, hlen))->th_sum; 1133 if (lport != 0) { 1134 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1135 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1136 tcp->th_dport = lport; 1137 } 1138 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1139 break; 1140 case IPPROTO_UDP: 1141 csum = &UDP(mtodo(m, hlen))->uh_sum; 1142 if (lport != 0) { 1143 struct udphdr *udp = UDP(mtodo(m, hlen)); 1144 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1145 udp->uh_dport = lport; 1146 } 1147 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1148 break; 1149 case IPPROTO_ICMP: 1150 m = nat64_icmp_translate(m, &ip6, lport, hlen, stats); 1151 if (m == NULL) /* stats already accounted */ 1152 return (NAT64RETURN); 1153 } 1154 1155 m_adj(m, hlen); 1156 mbufq_init(&mq, 255); 1157 nat64_fragment6(stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off); 1158 while ((m = mbufq_dequeue(&mq)) != NULL) { 1159 if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1160 NULL, stats, logdata) != 0) 1161 break; 1162 NAT64STAT_INC(stats, opcnt46); 1163 } 1164 mbufq_drain(&mq); 1165 return (NAT64RETURN); 1166 } 1167 1168 int 1169 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1170 nat64_stats_block *stats, void *logdata) 1171 { 1172 struct ip ip; 1173 struct icmp6_hdr *icmp6; 1174 struct ip6_frag *ip6f; 1175 struct ip6_hdr *ip6, *ip6i; 1176 uint32_t mtu; 1177 int plen, proto; 1178 uint8_t type, code; 1179 1180 if (hlen == 0) { 1181 ip6 = mtod(m, struct ip6_hdr *); 1182 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1183 nat64_check_ip6(&ip6->ip6_dst) != 0) 1184 return (NAT64SKIP); 1185 1186 proto = nat64_getlasthdr(m, &hlen); 1187 if (proto != IPPROTO_ICMPV6) { 1188 DPRINTF(DP_DROPS, 1189 "dropped due to mbuf isn't contigious"); 1190 NAT64STAT_INC(stats, dropped); 1191 return (NAT64MFREE); 1192 } 1193 } 1194 1195 /* 1196 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1197 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1198 */ 1199 icmp6 = mtodo(m, hlen); 1200 mtu = 0; 1201 switch (icmp6->icmp6_type) { 1202 case ICMP6_DST_UNREACH: 1203 type = ICMP_UNREACH; 1204 switch (icmp6->icmp6_code) { 1205 case ICMP6_DST_UNREACH_NOROUTE: 1206 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1207 case ICMP6_DST_UNREACH_ADDR: 1208 code = ICMP_UNREACH_HOST; 1209 break; 1210 case ICMP6_DST_UNREACH_ADMIN: 1211 code = ICMP_UNREACH_HOST_PROHIB; 1212 break; 1213 case ICMP6_DST_UNREACH_NOPORT: 1214 code = ICMP_UNREACH_PORT; 1215 break; 1216 default: 1217 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1218 " code %d", icmp6->icmp6_type, 1219 icmp6->icmp6_code); 1220 NAT64STAT_INC(stats, dropped); 1221 return (NAT64MFREE); 1222 } 1223 break; 1224 case ICMP6_PACKET_TOO_BIG: 1225 type = ICMP_UNREACH; 1226 code = ICMP_UNREACH_NEEDFRAG; 1227 mtu = ntohl(icmp6->icmp6_mtu); 1228 if (mtu < IPV6_MMTU) { 1229 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1230 " code %d", mtu, icmp6->icmp6_type, 1231 icmp6->icmp6_code); 1232 NAT64STAT_INC(stats, dropped); 1233 return (NAT64MFREE); 1234 } 1235 /* 1236 * Adjust MTU to reflect difference between 1237 * IPv6 an IPv4 headers. 1238 */ 1239 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1240 break; 1241 case ICMP6_TIME_EXCEEDED: 1242 type = ICMP_TIMXCEED; 1243 code = icmp6->icmp6_code; 1244 break; 1245 case ICMP6_PARAM_PROB: 1246 switch (icmp6->icmp6_code) { 1247 case ICMP6_PARAMPROB_HEADER: 1248 type = ICMP_PARAMPROB; 1249 code = ICMP_PARAMPROB_ERRATPTR; 1250 mtu = ntohl(icmp6->icmp6_pptr); 1251 switch (mtu) { 1252 case 0: /* Version/Traffic Class */ 1253 case 1: /* Traffic Class/Flow Label */ 1254 break; 1255 case 4: /* Payload Length */ 1256 case 5: 1257 mtu = 2; 1258 break; 1259 case 6: /* Next Header */ 1260 mtu = 9; 1261 break; 1262 case 7: /* Hop Limit */ 1263 mtu = 8; 1264 break; 1265 default: 1266 if (mtu >= 8 && mtu <= 23) { 1267 mtu = 12; /* Source address */ 1268 break; 1269 } 1270 if (mtu >= 24 && mtu <= 39) { 1271 mtu = 16; /* Destination address */ 1272 break; 1273 } 1274 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1275 " code %d, pptr %d", icmp6->icmp6_type, 1276 icmp6->icmp6_code, mtu); 1277 NAT64STAT_INC(stats, dropped); 1278 return (NAT64MFREE); 1279 } 1280 case ICMP6_PARAMPROB_NEXTHEADER: 1281 type = ICMP_UNREACH; 1282 code = ICMP_UNREACH_PROTOCOL; 1283 break; 1284 default: 1285 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1286 " code %d, pptr %d", icmp6->icmp6_type, 1287 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1288 NAT64STAT_INC(stats, dropped); 1289 return (NAT64MFREE); 1290 } 1291 break; 1292 default: 1293 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1294 icmp6->icmp6_type, icmp6->icmp6_code); 1295 NAT64STAT_INC(stats, dropped); 1296 return (NAT64MFREE); 1297 } 1298 1299 hlen += sizeof(struct icmp6_hdr); 1300 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1301 NAT64STAT_INC(stats, dropped); 1302 DPRINTF(DP_DROPS, "Message is too short %d", 1303 m->m_pkthdr.len); 1304 return (NAT64MFREE); 1305 } 1306 /* 1307 * We need at least ICMP_MINLEN bytes of original datagram payload 1308 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1309 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1310 * header we will not have to do m_pullup() again. 1311 * 1312 * What we have here: 1313 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1314 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1315 * We need to translate it to: 1316 * 1317 * Outer header: (alias_host, v4exthost) 1318 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1319 * 1320 * Assume caller function has checked if v4mapPRefix+v4host 1321 * matches configured prefix. 1322 * The only two things we should be provided with are mapping between 1323 * IPv6iHost <> alias_host and between dport and alias_port. 1324 */ 1325 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1326 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1327 if (m == NULL) { 1328 NAT64STAT_INC(stats, nomem); 1329 return (NAT64RETURN); 1330 } 1331 ip6 = mtod(m, struct ip6_hdr *); 1332 ip6i = mtodo(m, hlen); 1333 ip6f = NULL; 1334 proto = ip6i->ip6_nxt; 1335 plen = ntohs(ip6i->ip6_plen); 1336 hlen += sizeof(struct ip6_hdr); 1337 if (proto == IPPROTO_FRAGMENT) { 1338 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1339 ICMP_MINLEN) 1340 goto fail; 1341 ip6f = mtodo(m, hlen); 1342 proto = ip6f->ip6f_nxt; 1343 plen -= sizeof(struct ip6_frag); 1344 hlen += sizeof(struct ip6_frag); 1345 /* Ajust MTU to reflect frag header size */ 1346 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1347 mtu -= sizeof(struct ip6_frag); 1348 } 1349 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1350 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1351 proto); 1352 goto fail; 1353 } 1354 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1355 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1356 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1357 goto fail; 1358 } 1359 /* Check if outer dst is the same as inner src */ 1360 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1361 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1362 goto fail; 1363 } 1364 1365 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1366 ip.ip_dst.s_addr = aaddr; 1367 ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src); 1368 /* XXX: Make fake ulp header */ 1369 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 1370 ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */ 1371 #endif 1372 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1373 m_adj(m, hlen - sizeof(struct ip)); 1374 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1375 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata); 1376 return (NAT64RETURN); 1377 fail: 1378 /* 1379 * We must call m_freem() because mbuf pointer could be 1380 * changed with m_pullup(). 1381 */ 1382 m_freem(m); 1383 NAT64STAT_INC(stats, dropped); 1384 return (NAT64RETURN); 1385 } 1386 1387 int 1388 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1389 nat64_stats_block *stats, void *logdata) 1390 { 1391 struct ip ip; 1392 struct nhop4_basic nh; 1393 struct sockaddr_in dst; 1394 struct ip6_frag *frag; 1395 struct ip6_hdr *ip6; 1396 struct icmp6_hdr *icmp6; 1397 uint16_t *csum; 1398 int plen, hlen, proto; 1399 1400 /* 1401 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1402 * protocol's headers. Also we skip some checks, that ip6_input(), 1403 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1404 */ 1405 ip6 = mtod(m, struct ip6_hdr *); 1406 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1407 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1408 return (NAT64SKIP); 1409 } 1410 1411 /* Starting from this point we must not return zero */ 1412 ip.ip_src.s_addr = aaddr; 1413 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1414 DPRINTF(DP_GENERIC, "invalid source address: %08x", 1415 ip.ip_src.s_addr); 1416 /* XXX: stats? */ 1417 return (NAT64MFREE); 1418 } 1419 1420 ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst); 1421 if (ip.ip_dst.s_addr == 0) { 1422 /* XXX: stats? */ 1423 return (NAT64MFREE); 1424 } 1425 1426 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 1427 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1428 ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata); 1429 return (NAT64RETURN); 1430 } 1431 1432 hlen = 0; 1433 plen = ntohs(ip6->ip6_plen); 1434 proto = nat64_getlasthdr(m, &hlen); 1435 if (proto < 0) { 1436 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1437 NAT64STAT_INC(stats, dropped); 1438 return (NAT64MFREE); 1439 } 1440 frag = NULL; 1441 if (proto == IPPROTO_FRAGMENT) { 1442 /* ipfw_chk should m_pullup up to frag header */ 1443 if (m->m_len < hlen + sizeof(*frag)) { 1444 DPRINTF(DP_DROPS, 1445 "dropped due to mbuf isn't contigious"); 1446 NAT64STAT_INC(stats, dropped); 1447 return (NAT64MFREE); 1448 } 1449 frag = mtodo(m, hlen); 1450 proto = frag->ip6f_nxt; 1451 hlen += sizeof(*frag); 1452 /* Fragmented ICMPv6 is unsupported */ 1453 if (proto == IPPROTO_ICMPV6) { 1454 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1455 NAT64STAT_INC(stats, dropped); 1456 return (NAT64MFREE); 1457 } 1458 /* Fragment length must be multiple of 8 octets */ 1459 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1460 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1461 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1462 ICMP6_PARAMPROB_HEADER, 1463 offsetof(struct ip6_hdr, ip6_plen), stats, 1464 logdata); 1465 return (NAT64RETURN); 1466 } 1467 } 1468 plen -= hlen - sizeof(struct ip6_hdr); 1469 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1470 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1471 plen, m->m_pkthdr.len, hlen); 1472 NAT64STAT_INC(stats, dropped); 1473 return (NAT64MFREE); 1474 } 1475 1476 icmp6 = NULL; /* Make gcc happy */ 1477 if (proto == IPPROTO_ICMPV6) { 1478 icmp6 = mtodo(m, hlen); 1479 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1480 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1481 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1482 stats, logdata)); 1483 } 1484 dst.sin_addr.s_addr = ip.ip_dst.s_addr; 1485 if (nat64_find_route4(&nh, &dst, m) != 0) { 1486 NAT64STAT_INC(stats, noroute4); 1487 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1488 ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata); 1489 return (NAT64RETURN); 1490 } 1491 if (nh.nh_mtu < plen + sizeof(ip)) { 1492 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu, 1493 stats, logdata); 1494 return (NAT64RETURN); 1495 } 1496 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1497 /* Convert checksums. */ 1498 switch (proto) { 1499 case IPPROTO_TCP: 1500 csum = &TCP(mtodo(m, hlen))->th_sum; 1501 if (aport != 0) { 1502 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1503 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1504 tcp->th_sport = aport; 1505 } 1506 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1507 break; 1508 case IPPROTO_UDP: 1509 csum = &UDP(mtodo(m, hlen))->uh_sum; 1510 if (aport != 0) { 1511 struct udphdr *udp = UDP(mtodo(m, hlen)); 1512 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1513 udp->uh_sport = aport; 1514 } 1515 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1516 break; 1517 case IPPROTO_ICMPV6: 1518 /* Checksum in ICMPv6 covers pseudo header */ 1519 csum = &icmp6->icmp6_cksum; 1520 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1521 IPPROTO_ICMPV6, 0)); 1522 /* Convert ICMPv6 types to ICMP */ 1523 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1524 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1525 icmp6->icmp6_type = ICMP_ECHO; 1526 else /* ICMP6_ECHO_REPLY */ 1527 icmp6->icmp6_type = ICMP_ECHOREPLY; 1528 *csum = cksum_adjust(*csum, (uint16_t)proto, 1529 *(uint16_t *)icmp6); 1530 if (aport != 0) { 1531 uint16_t old_id = icmp6->icmp6_id; 1532 icmp6->icmp6_id = aport; 1533 *csum = cksum_adjust(*csum, old_id, aport); 1534 } 1535 break; 1536 }; 1537 1538 m_adj(m, hlen - sizeof(ip)); 1539 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1540 if (nat64_output(nh.nh_ifp, m, (struct sockaddr *)&dst, NULL, 1541 stats, logdata) == 0) 1542 NAT64STAT_INC(stats, opcnt64); 1543 return (NAT64RETURN); 1544 } 1545 1546