1 /*- 2 * Copyright (c) 2015-2016 Yandex LLC 3 * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_ipfw.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/counter.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/rmlock.h> 42 #include <sys/rwlock.h> 43 #include <sys/socket.h> 44 #include <sys/queue.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_pflog.h> 49 #include <net/pfil.h> 50 #include <net/netisr.h> 51 #include <net/route.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_fw.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/ip_icmp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/udp.h> 62 #include <netinet6/in6_var.h> 63 #include <netinet6/ip6_var.h> 64 65 #include <netpfil/pf/pf.h> 66 #include <netpfil/ipfw/ip_fw_private.h> 67 #include <netpfil/ipfw/nat64/ip_fw_nat64.h> 68 #include <netpfil/ipfw/nat64/nat64_translate.h> 69 #include <machine/in_cksum.h> 70 71 static void 72 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 73 { 74 75 logdata->dir = PF_OUT; 76 logdata->af = family; 77 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 78 } 79 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 80 static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro, 81 in_addr_t dest, struct mbuf *m); 82 static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro, 83 struct in6_addr *dest, struct mbuf *m); 84 85 static NAT64NOINLINE int 86 nat64_output(struct ifnet *ifp, struct mbuf *m, 87 struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, 88 void *logdata) 89 { 90 int error; 91 92 if (logdata != NULL) 93 nat64_log(logdata, m, dst->sa_family); 94 error = (*ifp->if_output)(ifp, m, dst, ro); 95 if (error != 0) 96 NAT64STAT_INC(stats, oerrors); 97 return (error); 98 } 99 100 static NAT64NOINLINE int 101 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) 102 { 103 struct route_in6 ro6; 104 struct route ro4, *ro; 105 struct sockaddr *dst; 106 struct ifnet *ifp; 107 struct ip6_hdr *ip6; 108 struct ip *ip4; 109 int error; 110 111 ip4 = mtod(m, struct ip *); 112 switch (ip4->ip_v) { 113 case IPVERSION: 114 ro = &ro4; 115 dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m); 116 if (dst == NULL) 117 NAT64STAT_INC(stats, noroute4); 118 break; 119 case (IPV6_VERSION >> 4): 120 ip6 = (struct ip6_hdr *)ip4; 121 ro = (struct route *)&ro6; 122 dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m); 123 if (dst == NULL) 124 NAT64STAT_INC(stats, noroute6); 125 break; 126 default: 127 m_freem(m); 128 NAT64STAT_INC(stats, dropped); 129 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 130 return (EAFNOSUPPORT); 131 } 132 if (dst == NULL) { 133 FREE_ROUTE(ro); 134 m_freem(m); 135 return (EHOSTUNREACH); 136 } 137 if (logdata != NULL) 138 nat64_log(logdata, m, dst->sa_family); 139 ifp = ro->ro_rt->rt_ifp; 140 error = (*ifp->if_output)(ifp, m, dst, ro); 141 if (error != 0) 142 NAT64STAT_INC(stats, oerrors); 143 FREE_ROUTE(ro); 144 return (error); 145 } 146 #else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ 147 static NAT64NOINLINE int 148 nat64_output(struct ifnet *ifp, struct mbuf *m, 149 struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, 150 void *logdata) 151 { 152 struct ip *ip4; 153 int ret, af; 154 155 ip4 = mtod(m, struct ip *); 156 switch (ip4->ip_v) { 157 case IPVERSION: 158 af = AF_INET; 159 ret = NETISR_IP; 160 break; 161 case (IPV6_VERSION >> 4): 162 af = AF_INET6; 163 ret = NETISR_IPV6; 164 break; 165 default: 166 m_freem(m); 167 NAT64STAT_INC(stats, dropped); 168 DPRINTF(DP_DROPS, "unknown IP version"); 169 return (EAFNOSUPPORT); 170 } 171 if (logdata != NULL) 172 nat64_log(logdata, m, af); 173 ret = netisr_queue(ret, m); 174 if (ret != 0) 175 NAT64STAT_INC(stats, oerrors); 176 return (ret); 177 } 178 179 static NAT64NOINLINE int 180 nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) 181 { 182 183 return (nat64_output(NULL, m, NULL, NULL, stats, logdata)); 184 } 185 #endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ 186 187 188 #if 0 189 void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize); 190 191 void 192 print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize) 193 { 194 char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; 195 196 inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf)); 197 inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf)); 198 snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt); 199 } 200 201 202 static NAT64NOINLINE int 203 nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6) 204 { 205 206 /* assume the prefix is properly filled with zeros */ 207 bcopy(&cfg->prefix, ip6, sizeof(*ip6)); 208 switch (cfg->plen) { 209 case 32: 210 case 96: 211 ip6->s6_addr32[cfg->plen / 32] = ia; 212 break; 213 case 40: 214 case 48: 215 case 56: 216 #if BYTE_ORDER == BIG_ENDIAN 217 ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | 218 (ia >> (cfg->plen % 32)); 219 ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32); 220 #elif BYTE_ORDER == LITTLE_ENDIAN 221 ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | 222 (ia << (cfg->plen % 32)); 223 ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32); 224 #endif 225 break; 226 case 64: 227 #if BYTE_ORDER == BIG_ENDIAN 228 ip6->s6_addr32[2] = ia >> 8; 229 ip6->s6_addr32[3] = ia << 24; 230 #elif BYTE_ORDER == LITTLE_ENDIAN 231 ip6->s6_addr32[2] = ia << 8; 232 ip6->s6_addr32[3] = ia >> 24; 233 #endif 234 break; 235 default: 236 return (0); 237 }; 238 ip6->s6_addr8[8] = 0; 239 return (1); 240 } 241 242 static NAT64NOINLINE in_addr_t 243 nat64_extract_ip4(struct in6_addr *ip6, int plen) 244 { 245 in_addr_t ia; 246 247 /* 248 * According to RFC 6052 p2.2: 249 * IPv4-embedded IPv6 addresses are composed of a variable-length 250 * prefix, the embedded IPv4 address, and a variable length suffix. 251 * The suffix bits are reserved for future extensions and SHOULD 252 * be set to zero. 253 */ 254 switch (plen) { 255 case 32: 256 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 257 goto badip6; 258 break; 259 case 40: 260 if (ip6->s6_addr32[3] != 0 || 261 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 262 goto badip6; 263 break; 264 case 48: 265 if (ip6->s6_addr32[3] != 0 || 266 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 267 goto badip6; 268 break; 269 case 56: 270 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 271 goto badip6; 272 break; 273 case 64: 274 if (ip6->s6_addr8[8] != 0 || 275 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 276 goto badip6; 277 }; 278 switch (plen) { 279 case 32: 280 case 96: 281 ia = ip6->s6_addr32[plen / 32]; 282 break; 283 case 40: 284 case 48: 285 case 56: 286 #if BYTE_ORDER == BIG_ENDIAN 287 ia = (ip6->s6_addr32[1] << (plen % 32)) | 288 (ip6->s6_addr32[2] >> (24 - plen % 32)); 289 #elif BYTE_ORDER == LITTLE_ENDIAN 290 ia = (ip6->s6_addr32[1] >> (plen % 32)) | 291 (ip6->s6_addr32[2] << (24 - plen % 32)); 292 #endif 293 break; 294 case 64: 295 #if BYTE_ORDER == BIG_ENDIAN 296 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 297 #elif BYTE_ORDER == LITTLE_ENDIAN 298 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 299 #endif 300 break; 301 default: 302 return (0); 303 }; 304 if (nat64_check_ip4(ia) != 0 || 305 nat64_check_private_ip4(ia) != 0) 306 goto badip4; 307 308 return (ia); 309 badip4: 310 DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia); 311 return (0); 312 badip6: 313 DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address"); 314 return (0); 315 } 316 #endif 317 318 /* 319 * According to RFC 1624 the equation for incremental checksum update is: 320 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 321 * HC' = HC - ~m - m' -- [Eqn. 4] 322 * So, when we are replacing IPv4 addresses to IPv6, we 323 * can assume, that new bytes previously were zeros, and vise versa - 324 * when we replacing IPv6 addresses to IPv4, now unused bytes become 325 * zeros. The payload length in pseudo header has bigger size, but one 326 * half of it should be zero. Using the equation 4 we get: 327 * HC' = HC - (~m0 + m0') -- m0 is first changed word 328 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 329 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 330 * = HC - sum(~m[i] + m'[i]) 331 * 332 * The function result should be used as follows: 333 * IPv6 to IPv4: HC' = cksum_add(HC, result) 334 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 335 */ 336 static NAT64NOINLINE uint16_t 337 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 338 { 339 uint32_t sum; 340 uint16_t *p; 341 342 sum = ~ip->ip_src.s_addr >> 16; 343 sum += ~ip->ip_src.s_addr & 0xffff; 344 sum += ~ip->ip_dst.s_addr >> 16; 345 sum += ~ip->ip_dst.s_addr & 0xffff; 346 347 for (p = (uint16_t *)&ip6->ip6_src; 348 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 349 sum += *p; 350 351 while (sum >> 16) 352 sum = (sum & 0xffff) + (sum >> 16); 353 return (sum); 354 } 355 356 #if __FreeBSD_version < 1100000 357 #define ip_fillid(ip) (ip)->ip_id = ip_newid() 358 #endif 359 static NAT64NOINLINE void 360 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 361 uint16_t plen, uint8_t proto, struct ip *ip) 362 { 363 364 /* assume addresses are already initialized */ 365 ip->ip_v = IPVERSION; 366 ip->ip_hl = sizeof(*ip) >> 2; 367 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 368 ip->ip_len = htons(sizeof(*ip) + plen); 369 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 370 ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC; 371 #else 372 /* Forwarding code will decrement TTL. */ 373 ip->ip_ttl = ip6->ip6_hlim; 374 #endif 375 ip->ip_sum = 0; 376 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 377 ip_fillid(ip); 378 if (frag != NULL) { 379 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 380 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 381 ip->ip_off |= htons(IP_MF); 382 } else { 383 ip->ip_off = htons(IP_DF); 384 } 385 ip->ip_sum = in_cksum_hdr(ip); 386 } 387 388 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 389 static NAT64NOINLINE int 390 nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq, 391 struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off) 392 { 393 struct ip6_frag ip6f; 394 struct mbuf *n; 395 uint16_t hlen, len, offset; 396 int plen; 397 398 plen = ntohs(ip6->ip6_plen); 399 hlen = sizeof(struct ip6_hdr); 400 401 /* Fragmentation isn't needed */ 402 if (ip_off == 0 && plen <= mtu - hlen) { 403 M_PREPEND(m, hlen, M_NOWAIT); 404 if (m == NULL) { 405 NAT64STAT_INC(stats, nomem); 406 return (ENOMEM); 407 } 408 bcopy(ip6, mtod(m, void *), hlen); 409 if (mbufq_enqueue(mq, m) != 0) { 410 m_freem(m); 411 NAT64STAT_INC(stats, dropped); 412 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 413 return (ENOBUFS); 414 } 415 return (0); 416 } 417 418 hlen += sizeof(struct ip6_frag); 419 ip6f.ip6f_reserved = 0; 420 ip6f.ip6f_nxt = ip6->ip6_nxt; 421 ip6->ip6_nxt = IPPROTO_FRAGMENT; 422 if (ip_off != 0) { 423 /* 424 * We have got an IPv4 fragment. 425 * Use offset value and ip_id from original fragment. 426 */ 427 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 428 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 429 NAT64STAT_INC(stats, ifrags); 430 } else { 431 /* The packet size exceeds interface MTU */ 432 ip6f.ip6f_ident = htonl(ip6_randomid()); 433 offset = 0; /* First fragment*/ 434 } 435 while (plen > 0 && m != NULL) { 436 n = NULL; 437 len = FRAGSZ(mtu) & ~7; 438 if (len > plen) 439 len = plen; 440 ip6->ip6_plen = htons(len + sizeof(ip6f)); 441 ip6f.ip6f_offlg = ntohs(offset); 442 if (len < plen || (ip_off & htons(IP_MF)) != 0) 443 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 444 offset += len; 445 plen -= len; 446 if (plen > 0) { 447 n = m_split(m, len, M_NOWAIT); 448 if (n == NULL) 449 goto fail; 450 } 451 M_PREPEND(m, hlen, M_NOWAIT); 452 if (m == NULL) 453 goto fail; 454 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 455 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 456 sizeof(struct ip6_frag)); 457 if (mbufq_enqueue(mq, m) != 0) 458 goto fail; 459 m = n; 460 } 461 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 462 return (0); 463 fail: 464 if (m != NULL) 465 m_freem(m); 466 if (n != NULL) 467 m_freem(n); 468 mbufq_drain(mq); 469 NAT64STAT_INC(stats, nomem); 470 return (ENOMEM); 471 } 472 473 #if __FreeBSD_version < 1100000 474 #define rt_expire rt_rmx.rmx_expire 475 #define rt_mtu rt_rmx.rmx_mtu 476 #endif 477 static NAT64NOINLINE struct sockaddr* 478 nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m) 479 { 480 struct sockaddr_in6 *dst; 481 struct rtentry *rt; 482 483 bzero(ro, sizeof(*ro)); 484 dst = (struct sockaddr_in6 *)&ro->ro_dst; 485 dst->sin6_family = AF_INET6; 486 dst->sin6_len = sizeof(*dst); 487 dst->sin6_addr = *dest; 488 IN6_LOOKUP_ROUTE(ro, M_GETFIB(m)); 489 rt = ro->ro_rt; 490 if (rt && (rt->rt_flags & RTF_UP) && 491 (rt->rt_ifp->if_flags & IFF_UP) && 492 (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { 493 if (rt->rt_flags & RTF_GATEWAY) 494 dst = (struct sockaddr_in6 *)rt->rt_gateway; 495 } else 496 return (NULL); 497 if (((rt->rt_flags & RTF_REJECT) && 498 (rt->rt_expire == 0 || 499 time_uptime < rt->rt_expire)) || 500 rt->rt_ifp->if_link_state == LINK_STATE_DOWN) 501 return (NULL); 502 return ((struct sockaddr *)dst); 503 } 504 505 #define NAT64_ICMP6_PLEN 64 506 static NAT64NOINLINE void 507 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 508 nat64_stats_block *stats, void *logdata) 509 { 510 struct icmp6_hdr *icmp6; 511 struct ip6_hdr *ip6, *oip6; 512 struct mbuf *n; 513 int len, plen; 514 515 len = 0; 516 plen = nat64_getlasthdr(m, &len); 517 if (plen < 0) { 518 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 519 goto freeit; 520 } 521 /* 522 * Do not send ICMPv6 in reply to ICMPv6 errors. 523 */ 524 if (plen == IPPROTO_ICMPV6) { 525 if (m->m_len < len + sizeof(*icmp6)) { 526 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 527 goto freeit; 528 } 529 icmp6 = mtodo(m, len); 530 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 531 icmp6->icmp6_type == ND_REDIRECT) { 532 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 533 "ICMPv6 errors"); 534 goto freeit; 535 } 536 } 537 /* 538 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 539 goto freeit; 540 */ 541 ip6 = mtod(m, struct ip6_hdr *); 542 switch (type) { 543 case ICMP6_DST_UNREACH: 544 case ICMP6_PACKET_TOO_BIG: 545 case ICMP6_TIME_EXCEEDED: 546 case ICMP6_PARAM_PROB: 547 break; 548 default: 549 goto freeit; 550 } 551 /* Calculate length of ICMPv6 payload */ 552 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 553 m->m_pkthdr.len; 554 555 /* Create new ICMPv6 datagram */ 556 plen = len + sizeof(struct icmp6_hdr); 557 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 558 MT_HEADER, M_PKTHDR); 559 if (n == NULL) { 560 NAT64STAT_INC(stats, nomem); 561 m_freem(m); 562 return; 563 } 564 /* 565 * Move pkthdr from original mbuf. We should have initialized some 566 * fields, because we can reinject this mbuf to netisr and it will 567 * go trough input path (it requires at least rcvif should be set). 568 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 569 * in the chain, when we will do M_PREPEND() or make some type of 570 * tunneling. 571 */ 572 m_move_pkthdr(n, m); 573 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 574 575 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 576 oip6 = mtod(n, struct ip6_hdr *); 577 oip6->ip6_src = ip6->ip6_dst; 578 oip6->ip6_dst = ip6->ip6_src; 579 oip6->ip6_nxt = IPPROTO_ICMPV6; 580 oip6->ip6_flow = 0; 581 oip6->ip6_vfc |= IPV6_VERSION; 582 oip6->ip6_hlim = V_ip6_defhlim; 583 oip6->ip6_plen = htons(plen); 584 585 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 586 icmp6->icmp6_cksum = 0; 587 icmp6->icmp6_type = type; 588 icmp6->icmp6_code = code; 589 icmp6->icmp6_mtu = htonl(mtu); 590 591 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 592 sizeof(struct icmp6_hdr))); 593 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 594 sizeof(struct ip6_hdr), plen); 595 m_freem(m); 596 nat64_output_one(n, stats, logdata); 597 return; 598 freeit: 599 NAT64STAT_INC(stats, dropped); 600 m_freem(m); 601 } 602 603 static NAT64NOINLINE struct sockaddr* 604 nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m) 605 { 606 struct sockaddr_in *dst; 607 struct rtentry *rt; 608 609 bzero(ro, sizeof(*ro)); 610 dst = (struct sockaddr_in *)&ro->ro_dst; 611 dst->sin_family = AF_INET; 612 dst->sin_len = sizeof(*dst); 613 dst->sin_addr.s_addr = dest; 614 IN_LOOKUP_ROUTE(ro, M_GETFIB(m)); 615 rt = ro->ro_rt; 616 if (rt && (rt->rt_flags & RTF_UP) && 617 (rt->rt_ifp->if_flags & IFF_UP) && 618 (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { 619 if (rt->rt_flags & RTF_GATEWAY) 620 dst = (struct sockaddr_in *)rt->rt_gateway; 621 } else 622 return (NULL); 623 if (((rt->rt_flags & RTF_REJECT) && 624 (rt->rt_expire == 0 || 625 time_uptime < rt->rt_expire)) || 626 rt->rt_ifp->if_link_state == LINK_STATE_DOWN) 627 return (NULL); 628 return ((struct sockaddr *)dst); 629 } 630 631 #define NAT64_ICMP_PLEN 64 632 static NAT64NOINLINE void 633 nat64_icmp_reflect(struct mbuf *m, uint8_t type, 634 uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata) 635 { 636 struct icmp *icmp; 637 struct ip *ip, *oip; 638 struct mbuf *n; 639 int len, plen; 640 641 ip = mtod(m, struct ip *); 642 /* Do not send ICMP error if packet is not the first fragment */ 643 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 644 DPRINTF(DP_DROPS, "not first fragment"); 645 goto freeit; 646 } 647 /* Do not send ICMP in reply to ICMP errors */ 648 if (ip->ip_p == IPPROTO_ICMP) { 649 if (m->m_len < (ip->ip_hl << 2)) { 650 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 651 goto freeit; 652 } 653 icmp = mtodo(m, ip->ip_hl << 2); 654 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 655 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 656 "ICMP errors"); 657 goto freeit; 658 } 659 } 660 switch (type) { 661 case ICMP_UNREACH: 662 case ICMP_TIMXCEED: 663 case ICMP_PARAMPROB: 664 break; 665 default: 666 goto freeit; 667 } 668 /* Calculate length of ICMP payload */ 669 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 670 m->m_pkthdr.len; 671 672 /* Create new ICMPv4 datagram */ 673 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 674 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 675 MT_HEADER, M_PKTHDR); 676 if (n == NULL) { 677 NAT64STAT_INC(stats, nomem); 678 m_freem(m); 679 return; 680 } 681 m_move_pkthdr(n, m); 682 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 683 684 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 685 oip = mtod(n, struct ip *); 686 oip->ip_v = IPVERSION; 687 oip->ip_hl = sizeof(struct ip) >> 2; 688 oip->ip_tos = 0; 689 oip->ip_len = htons(n->m_pkthdr.len); 690 oip->ip_ttl = V_ip_defttl; 691 oip->ip_p = IPPROTO_ICMP; 692 ip_fillid(oip); 693 oip->ip_off = htons(IP_DF); 694 oip->ip_src = ip->ip_dst; 695 oip->ip_dst = ip->ip_src; 696 oip->ip_sum = 0; 697 oip->ip_sum = in_cksum_hdr(oip); 698 699 icmp = mtodo(n, sizeof(struct ip)); 700 icmp->icmp_type = type; 701 icmp->icmp_code = code; 702 icmp->icmp_cksum = 0; 703 icmp->icmp_pmvoid = 0; 704 icmp->icmp_nextmtu = htons(mtu); 705 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 706 sizeof(struct icmphdr) + sizeof(uint32_t))); 707 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 708 sizeof(struct ip)); 709 m_freem(m); 710 nat64_output_one(n, stats, logdata); 711 return; 712 freeit: 713 NAT64STAT_INC(stats, dropped); 714 m_freem(m); 715 } 716 717 /* Translate ICMP echo request/reply into ICMPv6 */ 718 static void 719 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 720 uint16_t id, uint8_t type) 721 { 722 uint16_t old; 723 724 old = *(uint16_t *)icmp6; /* save type+code in one word */ 725 icmp6->icmp6_type = type; 726 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 727 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 728 old, *(uint16_t *)icmp6); 729 if (id != 0) { 730 old = icmp6->icmp6_id; 731 icmp6->icmp6_id = id; 732 /* Reflect ICMP id translation in the cksum */ 733 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 734 old, id); 735 } 736 /* Reflect IPv6 pseudo header in the cksum */ 737 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 738 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 739 } 740 741 static NAT64NOINLINE struct mbuf * 742 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 743 int offset, nat64_stats_block *stats) 744 { 745 struct ip ip; 746 struct icmp *icmp; 747 struct tcphdr *tcp; 748 struct udphdr *udp; 749 struct ip6_hdr *eip6; 750 struct mbuf *n; 751 uint32_t mtu; 752 int len, hlen, plen; 753 uint8_t type, code; 754 755 if (m->m_len < offset + ICMP_MINLEN) 756 m = m_pullup(m, offset + ICMP_MINLEN); 757 if (m == NULL) { 758 NAT64STAT_INC(stats, nomem); 759 return (m); 760 } 761 mtu = 0; 762 icmp = mtodo(m, offset); 763 /* RFC 7915 p4.2 */ 764 switch (icmp->icmp_type) { 765 case ICMP_ECHOREPLY: 766 type = ICMP6_ECHO_REPLY; 767 code = 0; 768 break; 769 case ICMP_UNREACH: 770 type = ICMP6_DST_UNREACH; 771 switch (icmp->icmp_code) { 772 case ICMP_UNREACH_NET: 773 case ICMP_UNREACH_HOST: 774 case ICMP_UNREACH_SRCFAIL: 775 case ICMP_UNREACH_NET_UNKNOWN: 776 case ICMP_UNREACH_HOST_UNKNOWN: 777 case ICMP_UNREACH_TOSNET: 778 case ICMP_UNREACH_TOSHOST: 779 code = ICMP6_DST_UNREACH_NOROUTE; 780 break; 781 case ICMP_UNREACH_PROTOCOL: 782 type = ICMP6_PARAM_PROB; 783 code = ICMP6_PARAMPROB_NEXTHEADER; 784 break; 785 case ICMP_UNREACH_PORT: 786 code = ICMP6_DST_UNREACH_NOPORT; 787 break; 788 case ICMP_UNREACH_NEEDFRAG: 789 type = ICMP6_PACKET_TOO_BIG; 790 code = 0; 791 /* XXX: needs an additional look */ 792 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 793 break; 794 case ICMP_UNREACH_NET_PROHIB: 795 case ICMP_UNREACH_HOST_PROHIB: 796 case ICMP_UNREACH_FILTER_PROHIB: 797 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 798 code = ICMP6_DST_UNREACH_ADMIN; 799 break; 800 default: 801 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 802 icmp->icmp_type, icmp->icmp_code); 803 goto freeit; 804 } 805 break; 806 case ICMP_TIMXCEED: 807 type = ICMP6_TIME_EXCEEDED; 808 code = icmp->icmp_code; 809 break; 810 case ICMP_ECHO: 811 type = ICMP6_ECHO_REQUEST; 812 code = 0; 813 break; 814 case ICMP_PARAMPROB: 815 type = ICMP6_PARAM_PROB; 816 switch (icmp->icmp_code) { 817 case ICMP_PARAMPROB_ERRATPTR: 818 case ICMP_PARAMPROB_LENGTH: 819 code = ICMP6_PARAMPROB_HEADER; 820 switch (icmp->icmp_pptr) { 821 case 0: /* Version/IHL */ 822 case 1: /* Type Of Service */ 823 mtu = icmp->icmp_pptr; 824 break; 825 case 2: /* Total Length */ 826 case 3: mtu = 4; /* Payload Length */ 827 break; 828 case 8: /* Time to Live */ 829 mtu = 7; /* Hop Limit */ 830 break; 831 case 9: /* Protocol */ 832 mtu = 6; /* Next Header */ 833 break; 834 case 12: /* Source address */ 835 case 13: 836 case 14: 837 case 15: 838 mtu = 8; 839 break; 840 case 16: /* Destination address */ 841 case 17: 842 case 18: 843 case 19: 844 mtu = 24; 845 break; 846 default: /* Silently drop */ 847 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 848 " code %d, pptr %d", icmp->icmp_type, 849 icmp->icmp_code, icmp->icmp_pptr); 850 goto freeit; 851 } 852 break; 853 default: 854 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 855 " code %d, pptr %d", icmp->icmp_type, 856 icmp->icmp_code, icmp->icmp_pptr); 857 goto freeit; 858 } 859 break; 860 default: 861 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 862 icmp->icmp_type, icmp->icmp_code); 863 goto freeit; 864 } 865 /* 866 * For echo request/reply we can use original payload, 867 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 868 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 869 */ 870 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 871 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 872 return (m); 873 } 874 /* 875 * For other types of ICMP messages we need to translate inner 876 * IPv4 header to IPv6 header. 877 * Assume ICMP src is the same as payload dst 878 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 879 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 880 * In that case, we already have map for NATIP1 and GWsrc1. 881 * The only thing we need is to copy IPv6 map prefix to 882 * Hostdst1. 883 */ 884 hlen = offset + ICMP_MINLEN; 885 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 886 DPRINTF(DP_DROPS, "Message is too short %d", 887 m->m_pkthdr.len); 888 goto freeit; 889 } 890 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 891 if (ip.ip_v != IPVERSION) { 892 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 893 goto freeit; 894 } 895 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 896 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 897 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 898 nat64_check_private_ip4(ip.ip_src.s_addr) != 0 || 899 nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) { 900 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 901 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 902 goto freeit; 903 } 904 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 905 DPRINTF(DP_DROPS, "Message is too short %d", 906 m->m_pkthdr.len); 907 goto freeit; 908 } 909 #if 0 910 /* 911 * Check that inner source matches the outer destination. 912 * XXX: We need some method to convert IPv4 into IPv6 address here, 913 * and compare IPv6 addresses. 914 */ 915 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 916 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 917 "%04x vs %04x", ip.ip_src.s_addr, 918 nat64_get_ip4(&ip6->ip6_dst)); 919 goto freeit; 920 } 921 #endif 922 /* 923 * Create new mbuf for ICMPv6 datagram. 924 * NOTE: len is data length just after inner IP header. 925 */ 926 len = m->m_pkthdr.len - hlen; 927 if (sizeof(struct ip6_hdr) + 928 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 929 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 930 sizeof(struct ip6_hdr); 931 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 932 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 933 if (n == NULL) { 934 NAT64STAT_INC(stats, nomem); 935 m_freem(m); 936 return (NULL); 937 } 938 m_move_pkthdr(n, m); 939 M_ALIGN(n, offset + plen + max_hdr); 940 n->m_len = n->m_pkthdr.len = offset + plen; 941 /* Adjust ip6_plen in outer header */ 942 ip6->ip6_plen = htons(plen); 943 /* Construct new inner IPv6 header */ 944 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 945 eip6->ip6_src = ip6->ip6_dst; 946 /* Use the fact that we have single /96 prefix for IPv4 map */ 947 eip6->ip6_dst = ip6->ip6_src; 948 nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr); 949 950 eip6->ip6_flow = htonl(ip.ip_tos << 20); 951 eip6->ip6_vfc |= IPV6_VERSION; 952 eip6->ip6_hlim = ip.ip_ttl; 953 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 954 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 955 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 956 /* 957 * We need to translate source port in the inner ULP header, 958 * and adjust ULP checksum. 959 */ 960 switch (ip.ip_p) { 961 case IPPROTO_TCP: 962 if (len < offsetof(struct tcphdr, th_sum)) 963 break; 964 tcp = TCP(eip6 + 1); 965 if (icmpid != 0) { 966 tcp->th_sum = cksum_adjust(tcp->th_sum, 967 tcp->th_sport, icmpid); 968 tcp->th_sport = icmpid; 969 } 970 tcp->th_sum = cksum_add(tcp->th_sum, 971 ~nat64_cksum_convert(eip6, &ip)); 972 break; 973 case IPPROTO_UDP: 974 if (len < offsetof(struct udphdr, uh_sum)) 975 break; 976 udp = UDP(eip6 + 1); 977 if (icmpid != 0) { 978 udp->uh_sum = cksum_adjust(udp->uh_sum, 979 udp->uh_sport, icmpid); 980 udp->uh_sport = icmpid; 981 } 982 udp->uh_sum = cksum_add(udp->uh_sum, 983 ~nat64_cksum_convert(eip6, &ip)); 984 break; 985 case IPPROTO_ICMP: 986 /* 987 * Check if this is an ICMP error message for echo request 988 * that we sent. I.e. ULP in the data containing invoking 989 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 990 */ 991 icmp = (struct icmp *)(eip6 + 1); 992 if (icmp->icmp_type != ICMP_ECHO) { 993 m_freem(n); 994 goto freeit; 995 } 996 /* 997 * For our client this original datagram should looks 998 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 999 * Thus we need adjust icmp_cksum and convert type from 1000 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 1001 */ 1002 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 1003 ICMP6_ECHO_REQUEST); 1004 } 1005 m_freem(m); 1006 /* Convert ICMPv4 into ICMPv6 header */ 1007 icmp = mtodo(n, offset); 1008 ICMP6(icmp)->icmp6_type = type; 1009 ICMP6(icmp)->icmp6_code = code; 1010 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1011 ICMP6(icmp)->icmp6_cksum = 0; 1012 ICMP6(icmp)->icmp6_cksum = cksum_add( 1013 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1014 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1015 return (n); 1016 freeit: 1017 m_freem(m); 1018 NAT64STAT_INC(stats, dropped); 1019 return (NULL); 1020 } 1021 1022 int 1023 nat64_getlasthdr(struct mbuf *m, int *offset) 1024 { 1025 struct ip6_hdr *ip6; 1026 struct ip6_hbh *hbh; 1027 int proto, hlen; 1028 1029 if (offset != NULL) 1030 hlen = *offset; 1031 else 1032 hlen = 0; 1033 1034 if (m->m_len < hlen + sizeof(*ip6)) 1035 return (-1); 1036 1037 ip6 = mtodo(m, hlen); 1038 hlen += sizeof(*ip6); 1039 proto = ip6->ip6_nxt; 1040 /* Skip extension headers */ 1041 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1042 proto == IPPROTO_DSTOPTS) { 1043 hbh = mtodo(m, hlen); 1044 /* 1045 * We expect mbuf has contigious data up to 1046 * upper level header. 1047 */ 1048 if (m->m_len < hlen) 1049 return (-1); 1050 /* 1051 * We doesn't support Jumbo payload option, 1052 * so return error. 1053 */ 1054 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1055 return (-1); 1056 proto = hbh->ip6h_nxt; 1057 hlen += hbh->ip6h_len << 3; 1058 } 1059 if (offset != NULL) 1060 *offset = hlen; 1061 return (proto); 1062 } 1063 1064 int 1065 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1066 struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats, 1067 void *logdata) 1068 { 1069 struct route_in6 ro; 1070 struct ip6_hdr ip6; 1071 struct ifnet *ifp; 1072 struct ip *ip; 1073 struct mbufq mq; 1074 struct sockaddr *dst; 1075 uint32_t mtu; 1076 uint16_t ip_id, ip_off; 1077 uint16_t *csum; 1078 int plen, hlen; 1079 uint8_t proto; 1080 1081 ip = mtod(m, struct ip*); 1082 1083 if (ip->ip_ttl <= IPTTLDEC) { 1084 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1085 ICMP_TIMXCEED_INTRANS, 0, stats, logdata); 1086 return (NAT64RETURN); 1087 } 1088 1089 ip6.ip6_dst = *daddr; 1090 ip6.ip6_src = *saddr; 1091 1092 hlen = ip->ip_hl << 2; 1093 plen = ntohs(ip->ip_len) - hlen; 1094 proto = ip->ip_p; 1095 1096 /* Save ip_id and ip_off, both are in network byte order */ 1097 ip_id = ip->ip_id; 1098 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1099 1100 /* Fragment length must be multiple of 8 octets */ 1101 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1102 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1103 ICMP_PARAMPROB_LENGTH, 0, stats, logdata); 1104 return (NAT64RETURN); 1105 } 1106 /* Fragmented ICMP is unsupported */ 1107 if (proto == IPPROTO_ICMP && ip_off != 0) { 1108 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1109 NAT64STAT_INC(stats, dropped); 1110 return (NAT64MFREE); 1111 } 1112 1113 dst = nat64_find_route6(&ro, &ip6.ip6_dst, m); 1114 if (dst == NULL) { 1115 FREE_ROUTE(&ro); 1116 NAT64STAT_INC(stats, noroute6); 1117 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1118 stats, logdata); 1119 return (NAT64RETURN); 1120 } 1121 ifp = ro.ro_rt->rt_ifp; 1122 if (ro.ro_rt->rt_mtu != 0) 1123 mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); 1124 else 1125 mtu = ifp->if_mtu; 1126 if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) { 1127 FREE_ROUTE(&ro); 1128 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1129 FRAGSZ(mtu) + sizeof(struct ip), stats, logdata); 1130 return (NAT64RETURN); 1131 } 1132 1133 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1134 ip6.ip6_vfc |= IPV6_VERSION; 1135 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 1136 ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC; 1137 #else 1138 /* Forwarding code will decrement HLIM. */ 1139 ip6.ip6_hlim = ip->ip_ttl; 1140 #endif 1141 ip6.ip6_plen = htons(plen); 1142 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1143 /* Convert checksums. */ 1144 switch (proto) { 1145 case IPPROTO_TCP: 1146 csum = &TCP(mtodo(m, hlen))->th_sum; 1147 if (lport != 0) { 1148 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1149 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1150 tcp->th_dport = lport; 1151 } 1152 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1153 break; 1154 case IPPROTO_UDP: 1155 csum = &UDP(mtodo(m, hlen))->uh_sum; 1156 if (lport != 0) { 1157 struct udphdr *udp = UDP(mtodo(m, hlen)); 1158 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1159 udp->uh_dport = lport; 1160 } 1161 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1162 break; 1163 case IPPROTO_ICMP: 1164 m = nat64_icmp_translate(m, &ip6, lport, hlen, stats); 1165 if (m == NULL) { 1166 FREE_ROUTE(&ro); 1167 /* stats already accounted */ 1168 return (NAT64RETURN); 1169 } 1170 } 1171 1172 m_adj(m, hlen); 1173 mbufq_init(&mq, 255); 1174 nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off); 1175 while ((m = mbufq_dequeue(&mq)) != NULL) { 1176 if (nat64_output(ifp, m, dst, (struct route *)&ro, stats, 1177 logdata) != 0) 1178 break; 1179 NAT64STAT_INC(stats, opcnt46); 1180 } 1181 mbufq_drain(&mq); 1182 FREE_ROUTE(&ro); 1183 return (NAT64RETURN); 1184 } 1185 1186 int 1187 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1188 nat64_stats_block *stats, void *logdata) 1189 { 1190 struct ip ip; 1191 struct icmp6_hdr *icmp6; 1192 struct ip6_frag *ip6f; 1193 struct ip6_hdr *ip6, *ip6i; 1194 uint32_t mtu; 1195 int plen, proto; 1196 uint8_t type, code; 1197 1198 if (hlen == 0) { 1199 ip6 = mtod(m, struct ip6_hdr *); 1200 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1201 nat64_check_ip6(&ip6->ip6_dst) != 0) 1202 return (NAT64SKIP); 1203 1204 proto = nat64_getlasthdr(m, &hlen); 1205 if (proto != IPPROTO_ICMPV6) { 1206 DPRINTF(DP_DROPS, 1207 "dropped due to mbuf isn't contigious"); 1208 NAT64STAT_INC(stats, dropped); 1209 return (NAT64MFREE); 1210 } 1211 } 1212 1213 /* 1214 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1215 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1216 */ 1217 icmp6 = mtodo(m, hlen); 1218 mtu = 0; 1219 switch (icmp6->icmp6_type) { 1220 case ICMP6_DST_UNREACH: 1221 type = ICMP_UNREACH; 1222 switch (icmp6->icmp6_code) { 1223 case ICMP6_DST_UNREACH_NOROUTE: 1224 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1225 case ICMP6_DST_UNREACH_ADDR: 1226 code = ICMP_UNREACH_HOST; 1227 break; 1228 case ICMP6_DST_UNREACH_ADMIN: 1229 code = ICMP_UNREACH_HOST_PROHIB; 1230 break; 1231 case ICMP6_DST_UNREACH_NOPORT: 1232 code = ICMP_UNREACH_PORT; 1233 break; 1234 default: 1235 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1236 " code %d", icmp6->icmp6_type, 1237 icmp6->icmp6_code); 1238 NAT64STAT_INC(stats, dropped); 1239 return (NAT64MFREE); 1240 } 1241 break; 1242 case ICMP6_PACKET_TOO_BIG: 1243 type = ICMP_UNREACH; 1244 code = ICMP_UNREACH_NEEDFRAG; 1245 mtu = ntohl(icmp6->icmp6_mtu); 1246 if (mtu < IPV6_MMTU) { 1247 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1248 " code %d", mtu, icmp6->icmp6_type, 1249 icmp6->icmp6_code); 1250 NAT64STAT_INC(stats, dropped); 1251 return (NAT64MFREE); 1252 } 1253 /* 1254 * Adjust MTU to reflect difference between 1255 * IPv6 an IPv4 headers. 1256 */ 1257 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1258 break; 1259 case ICMP6_TIME_EXCEEDED: 1260 type = ICMP_TIMXCEED; 1261 code = icmp6->icmp6_code; 1262 break; 1263 case ICMP6_PARAM_PROB: 1264 switch (icmp6->icmp6_code) { 1265 case ICMP6_PARAMPROB_HEADER: 1266 type = ICMP_PARAMPROB; 1267 code = ICMP_PARAMPROB_ERRATPTR; 1268 mtu = ntohl(icmp6->icmp6_pptr); 1269 switch (mtu) { 1270 case 0: /* Version/Traffic Class */ 1271 case 1: /* Traffic Class/Flow Label */ 1272 break; 1273 case 4: /* Payload Length */ 1274 case 5: 1275 mtu = 2; 1276 break; 1277 case 6: /* Next Header */ 1278 mtu = 9; 1279 break; 1280 case 7: /* Hop Limit */ 1281 mtu = 8; 1282 break; 1283 default: 1284 if (mtu >= 8 && mtu <= 23) { 1285 mtu = 12; /* Source address */ 1286 break; 1287 } 1288 if (mtu >= 24 && mtu <= 39) { 1289 mtu = 16; /* Destination address */ 1290 break; 1291 } 1292 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1293 " code %d, pptr %d", icmp6->icmp6_type, 1294 icmp6->icmp6_code, mtu); 1295 NAT64STAT_INC(stats, dropped); 1296 return (NAT64MFREE); 1297 } 1298 case ICMP6_PARAMPROB_NEXTHEADER: 1299 type = ICMP_UNREACH; 1300 code = ICMP_UNREACH_PROTOCOL; 1301 break; 1302 default: 1303 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1304 " code %d, pptr %d", icmp6->icmp6_type, 1305 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1306 NAT64STAT_INC(stats, dropped); 1307 return (NAT64MFREE); 1308 } 1309 break; 1310 default: 1311 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1312 icmp6->icmp6_type, icmp6->icmp6_code); 1313 NAT64STAT_INC(stats, dropped); 1314 return (NAT64MFREE); 1315 } 1316 1317 hlen += sizeof(struct icmp6_hdr); 1318 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1319 NAT64STAT_INC(stats, dropped); 1320 DPRINTF(DP_DROPS, "Message is too short %d", 1321 m->m_pkthdr.len); 1322 return (NAT64MFREE); 1323 } 1324 /* 1325 * We need at least ICMP_MINLEN bytes of original datagram payload 1326 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1327 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1328 * header we will not have to do m_pullup() again. 1329 * 1330 * What we have here: 1331 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1332 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1333 * We need to translate it to: 1334 * 1335 * Outer header: (alias_host, v4exthost) 1336 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1337 * 1338 * Assume caller function has checked if v4mapPRefix+v4host 1339 * matches configured prefix. 1340 * The only two things we should be provided with are mapping between 1341 * IPv6iHost <> alias_host and between dport and alias_port. 1342 */ 1343 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1344 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1345 if (m == NULL) { 1346 NAT64STAT_INC(stats, nomem); 1347 return (NAT64RETURN); 1348 } 1349 ip6 = mtod(m, struct ip6_hdr *); 1350 ip6i = mtodo(m, hlen); 1351 ip6f = NULL; 1352 proto = ip6i->ip6_nxt; 1353 plen = ntohs(ip6i->ip6_plen); 1354 hlen += sizeof(struct ip6_hdr); 1355 if (proto == IPPROTO_FRAGMENT) { 1356 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1357 ICMP_MINLEN) 1358 goto fail; 1359 ip6f = mtodo(m, hlen); 1360 proto = ip6f->ip6f_nxt; 1361 plen -= sizeof(struct ip6_frag); 1362 hlen += sizeof(struct ip6_frag); 1363 /* Ajust MTU to reflect frag header size */ 1364 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1365 mtu -= sizeof(struct ip6_frag); 1366 } 1367 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1368 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1369 proto); 1370 goto fail; 1371 } 1372 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1373 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1374 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1375 goto fail; 1376 } 1377 /* Check if outer dst is the same as inner src */ 1378 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1379 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1380 goto fail; 1381 } 1382 1383 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1384 ip.ip_dst.s_addr = aaddr; 1385 ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src); 1386 /* XXX: Make fake ulp header */ 1387 #ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT 1388 ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */ 1389 #endif 1390 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1391 m_adj(m, hlen - sizeof(struct ip)); 1392 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1393 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata); 1394 return (NAT64RETURN); 1395 fail: 1396 /* 1397 * We must call m_freem() because mbuf pointer could be 1398 * changed with m_pullup(). 1399 */ 1400 m_freem(m); 1401 NAT64STAT_INC(stats, dropped); 1402 return (NAT64RETURN); 1403 } 1404 1405 int 1406 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1407 nat64_stats_block *stats, void *logdata) 1408 { 1409 struct route ro; 1410 struct ip ip; 1411 struct ifnet *ifp; 1412 struct ip6_frag *frag; 1413 struct ip6_hdr *ip6; 1414 struct icmp6_hdr *icmp6; 1415 struct sockaddr *dst; 1416 uint16_t *csum; 1417 uint32_t mtu; 1418 int plen, hlen, proto; 1419 1420 /* 1421 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1422 * protocol's headers. Also we skip some checks, that ip6_input(), 1423 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1424 */ 1425 ip6 = mtod(m, struct ip6_hdr *); 1426 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1427 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1428 return (NAT64SKIP); 1429 } 1430 1431 /* Starting from this point we must not return zero */ 1432 ip.ip_src.s_addr = aaddr; 1433 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1434 DPRINTF(DP_GENERIC, "invalid source address: %08x", 1435 ip.ip_src.s_addr); 1436 /* XXX: stats? */ 1437 return (NAT64MFREE); 1438 } 1439 1440 ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst); 1441 if (ip.ip_dst.s_addr == 0) { 1442 /* XXX: stats? */ 1443 return (NAT64MFREE); 1444 } 1445 1446 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 1447 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1448 ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata); 1449 return (NAT64RETURN); 1450 } 1451 1452 hlen = 0; 1453 plen = ntohs(ip6->ip6_plen); 1454 proto = nat64_getlasthdr(m, &hlen); 1455 if (proto < 0) { 1456 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1457 NAT64STAT_INC(stats, dropped); 1458 return (NAT64MFREE); 1459 } 1460 frag = NULL; 1461 if (proto == IPPROTO_FRAGMENT) { 1462 /* ipfw_chk should m_pullup up to frag header */ 1463 if (m->m_len < hlen + sizeof(*frag)) { 1464 DPRINTF(DP_DROPS, 1465 "dropped due to mbuf isn't contigious"); 1466 NAT64STAT_INC(stats, dropped); 1467 return (NAT64MFREE); 1468 } 1469 frag = mtodo(m, hlen); 1470 proto = frag->ip6f_nxt; 1471 hlen += sizeof(*frag); 1472 /* Fragmented ICMPv6 is unsupported */ 1473 if (proto == IPPROTO_ICMPV6) { 1474 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1475 NAT64STAT_INC(stats, dropped); 1476 return (NAT64MFREE); 1477 } 1478 /* Fragment length must be multiple of 8 octets */ 1479 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1480 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1481 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1482 ICMP6_PARAMPROB_HEADER, 1483 offsetof(struct ip6_hdr, ip6_plen), stats, 1484 logdata); 1485 return (NAT64RETURN); 1486 } 1487 } 1488 plen -= hlen - sizeof(struct ip6_hdr); 1489 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1490 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1491 plen, m->m_pkthdr.len, hlen); 1492 NAT64STAT_INC(stats, dropped); 1493 return (NAT64MFREE); 1494 } 1495 1496 icmp6 = NULL; /* Make gcc happy */ 1497 if (proto == IPPROTO_ICMPV6) { 1498 icmp6 = mtodo(m, hlen); 1499 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1500 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1501 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1502 stats, logdata)); 1503 } 1504 dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m); 1505 if (dst == NULL) { 1506 FREE_ROUTE(&ro); 1507 NAT64STAT_INC(stats, noroute4); 1508 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1509 ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata); 1510 return (NAT64RETURN); 1511 } 1512 1513 ifp = ro.ro_rt->rt_ifp; 1514 if (ro.ro_rt->rt_mtu != 0) 1515 mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); 1516 else 1517 mtu = ifp->if_mtu; 1518 if (mtu < plen + sizeof(ip)) { 1519 FREE_ROUTE(&ro); 1520 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats, 1521 logdata); 1522 return (NAT64RETURN); 1523 } 1524 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1525 /* Convert checksums. */ 1526 switch (proto) { 1527 case IPPROTO_TCP: 1528 csum = &TCP(mtodo(m, hlen))->th_sum; 1529 if (aport != 0) { 1530 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1531 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1532 tcp->th_sport = aport; 1533 } 1534 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1535 break; 1536 case IPPROTO_UDP: 1537 csum = &UDP(mtodo(m, hlen))->uh_sum; 1538 if (aport != 0) { 1539 struct udphdr *udp = UDP(mtodo(m, hlen)); 1540 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1541 udp->uh_sport = aport; 1542 } 1543 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1544 break; 1545 case IPPROTO_ICMPV6: 1546 /* Checksum in ICMPv6 covers pseudo header */ 1547 csum = &icmp6->icmp6_cksum; 1548 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1549 IPPROTO_ICMPV6, 0)); 1550 /* Convert ICMPv6 types to ICMP */ 1551 mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1552 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1553 icmp6->icmp6_type = ICMP_ECHO; 1554 else /* ICMP6_ECHO_REPLY */ 1555 icmp6->icmp6_type = ICMP_ECHOREPLY; 1556 *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6); 1557 if (aport != 0) { 1558 uint16_t old_id = icmp6->icmp6_id; 1559 icmp6->icmp6_id = aport; 1560 *csum = cksum_adjust(*csum, old_id, aport); 1561 } 1562 break; 1563 }; 1564 1565 m_adj(m, hlen - sizeof(ip)); 1566 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1567 if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0) 1568 NAT64STAT_INC(stats, opcnt64); 1569 FREE_ROUTE(&ro); 1570 return (NAT64RETURN); 1571 } 1572 1573