1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. 4 * Copyright (c) 2008 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * Portions of this software were developed by Robert N. M. Watson under 9 * contract to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include "opt_ipfw.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_ipsec.h" 45 46 #include <sys/param.h> 47 #include <sys/domain.h> 48 #include <sys/eventhandler.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/protosw.h> 57 #include <sys/signalvar.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sx.h> 61 #include <sys/sysctl.h> 62 #include <sys/syslog.h> 63 #include <sys/systm.h> 64 65 #include <vm/uma.h> 66 67 #include <net/if.h> 68 #include <net/route.h> 69 70 #include <netinet/in.h> 71 #include <netinet/in_pcb.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/in_var.h> 74 #include <netinet/ip.h> 75 #ifdef INET6 76 #include <netinet/ip6.h> 77 #endif 78 #include <netinet/ip_icmp.h> 79 #include <netinet/icmp_var.h> 80 #include <netinet/ip_var.h> 81 #include <netinet/ip_options.h> 82 #ifdef INET6 83 #include <netinet6/ip6_var.h> 84 #endif 85 #include <netinet/udp.h> 86 #include <netinet/udp_var.h> 87 88 #ifdef IPSEC 89 #include <netipsec/ipsec.h> 90 #include <netipsec/esp.h> 91 #endif 92 93 #include <machine/in_cksum.h> 94 95 #include <security/mac/mac_framework.h> 96 97 /* 98 * UDP protocol implementation. 99 * Per RFC 768, August, 1980. 100 */ 101 102 /* 103 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums 104 * removes the only data integrity mechanism for packets and malformed 105 * packets that would otherwise be discarded due to bad checksums, and may 106 * cause problems (especially for NFS data blocks). 107 */ 108 static int udp_cksum = 1; 109 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum, 110 0, "compute udp checksum"); 111 112 int udp_log_in_vain = 0; 113 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 114 &udp_log_in_vain, 0, "Log all incoming UDP packets"); 115 116 VNET_DEFINE(int, udp_blackhole) = 0; 117 SYSCTL_VNET_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 118 &VNET_NAME(udp_blackhole), 0, 119 "Do not send port unreachables for refused connects"); 120 121 u_long udp_sendspace = 9216; /* really max datagram size */ 122 /* 40 1K datagrams */ 123 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 124 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 125 126 u_long udp_recvspace = 40 * (1024 + 127 #ifdef INET6 128 sizeof(struct sockaddr_in6) 129 #else 130 sizeof(struct sockaddr_in) 131 #endif 132 ); 133 134 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 135 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); 136 137 VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */ 138 VNET_DEFINE(struct inpcbinfo, udbinfo); 139 static VNET_DEFINE(uma_zone_t, udpcb_zone); 140 #define V_udpcb_zone VNET(udpcb_zone) 141 142 #ifndef UDBHASHSIZE 143 #define UDBHASHSIZE 128 144 #endif 145 146 VNET_DEFINE(struct udpstat, udpstat); /* from udp_var.h */ 147 SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, 148 &VNET_NAME(udpstat), udpstat, 149 "UDP statistics (struct udpstat, netinet/udp_var.h)"); 150 151 #ifdef INET 152 static void udp_detach(struct socket *so); 153 static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, 154 struct mbuf *, struct thread *); 155 #endif 156 157 #ifdef IPSEC 158 #ifdef IPSEC_NAT_T 159 #define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP) 160 #ifdef INET 161 static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int); 162 #endif 163 #endif /* IPSEC_NAT_T */ 164 #endif /* IPSEC */ 165 166 static void 167 udp_zone_change(void *tag) 168 { 169 170 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 171 uma_zone_set_max(V_udpcb_zone, maxsockets); 172 } 173 174 static int 175 udp_inpcb_init(void *mem, int size, int flags) 176 { 177 struct inpcb *inp; 178 179 inp = mem; 180 INP_LOCK_INIT(inp, "inp", "udpinp"); 181 return (0); 182 } 183 184 void 185 udp_init(void) 186 { 187 188 in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE, 189 "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE); 190 V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), 191 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 192 uma_zone_set_max(V_udpcb_zone, maxsockets); 193 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, 194 EVENTHANDLER_PRI_ANY); 195 } 196 197 /* 198 * Kernel module interface for updating udpstat. The argument is an index 199 * into udpstat treated as an array of u_long. While this encodes the 200 * general layout of udpstat into the caller, it doesn't encode its location, 201 * so that future changes to add, for example, per-CPU stats support won't 202 * cause binary compatibility problems for kernel modules. 203 */ 204 void 205 kmod_udpstat_inc(int statnum) 206 { 207 208 (*((u_long *)&V_udpstat + statnum))++; 209 } 210 211 int 212 udp_newudpcb(struct inpcb *inp) 213 { 214 struct udpcb *up; 215 216 up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO); 217 if (up == NULL) 218 return (ENOBUFS); 219 inp->inp_ppcb = up; 220 return (0); 221 } 222 223 void 224 udp_discardcb(struct udpcb *up) 225 { 226 227 uma_zfree(V_udpcb_zone, up); 228 } 229 230 #ifdef VIMAGE 231 void 232 udp_destroy(void) 233 { 234 235 in_pcbinfo_destroy(&V_udbinfo); 236 uma_zdestroy(V_udpcb_zone); 237 } 238 #endif 239 240 #ifdef INET 241 /* 242 * Subroutine of udp_input(), which appends the provided mbuf chain to the 243 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that 244 * contains the source address. If the socket ends up being an IPv6 socket, 245 * udp_append() will convert to a sockaddr_in6 before passing the address 246 * into the socket code. 247 */ 248 static void 249 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, 250 struct sockaddr_in *udp_in) 251 { 252 struct sockaddr *append_sa; 253 struct socket *so; 254 struct mbuf *opts = 0; 255 #ifdef INET6 256 struct sockaddr_in6 udp_in6; 257 #endif 258 struct udpcb *up; 259 260 INP_LOCK_ASSERT(inp); 261 262 /* 263 * Engage the tunneling protocol. 264 */ 265 up = intoudpcb(inp); 266 if (up->u_tun_func != NULL) { 267 (*up->u_tun_func)(n, off, inp); 268 return; 269 } 270 271 if (n == NULL) 272 return; 273 274 off += sizeof(struct udphdr); 275 276 #ifdef IPSEC 277 /* Check AH/ESP integrity. */ 278 if (ipsec4_in_reject(n, inp)) { 279 m_freem(n); 280 V_ipsec4stat.in_polvio++; 281 return; 282 } 283 #ifdef IPSEC_NAT_T 284 up = intoudpcb(inp); 285 KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); 286 if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */ 287 n = udp4_espdecap(inp, n, off); 288 if (n == NULL) /* Consumed. */ 289 return; 290 } 291 #endif /* IPSEC_NAT_T */ 292 #endif /* IPSEC */ 293 #ifdef MAC 294 if (mac_inpcb_check_deliver(inp, n) != 0) { 295 m_freem(n); 296 return; 297 } 298 #endif /* MAC */ 299 if (inp->inp_flags & INP_CONTROLOPTS || 300 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { 301 #ifdef INET6 302 if (inp->inp_vflag & INP_IPV6) 303 (void)ip6_savecontrol_v4(inp, n, &opts, NULL); 304 else 305 #endif /* INET6 */ 306 ip_savecontrol(inp, &opts, ip, n); 307 } 308 #ifdef INET6 309 if (inp->inp_vflag & INP_IPV6) { 310 bzero(&udp_in6, sizeof(udp_in6)); 311 udp_in6.sin6_len = sizeof(udp_in6); 312 udp_in6.sin6_family = AF_INET6; 313 in6_sin_2_v4mapsin6(udp_in, &udp_in6); 314 append_sa = (struct sockaddr *)&udp_in6; 315 } else 316 #endif /* INET6 */ 317 append_sa = (struct sockaddr *)udp_in; 318 m_adj(n, off); 319 320 so = inp->inp_socket; 321 SOCKBUF_LOCK(&so->so_rcv); 322 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) { 323 SOCKBUF_UNLOCK(&so->so_rcv); 324 m_freem(n); 325 if (opts) 326 m_freem(opts); 327 UDPSTAT_INC(udps_fullsock); 328 } else 329 sorwakeup_locked(so); 330 } 331 332 void 333 udp_input(struct mbuf *m, int off) 334 { 335 int iphlen = off; 336 struct ip *ip; 337 struct udphdr *uh; 338 struct ifnet *ifp; 339 struct inpcb *inp; 340 int len; 341 struct ip save_ip; 342 struct sockaddr_in udp_in; 343 #ifdef IPFIREWALL_FORWARD 344 struct m_tag *fwd_tag; 345 #endif 346 347 ifp = m->m_pkthdr.rcvif; 348 UDPSTAT_INC(udps_ipackets); 349 350 /* 351 * Strip IP options, if any; should skip this, make available to 352 * user, and use on returned packets, but we don't yet have a way to 353 * check the checksum with options still present. 354 */ 355 if (iphlen > sizeof (struct ip)) { 356 ip_stripoptions(m, (struct mbuf *)0); 357 iphlen = sizeof(struct ip); 358 } 359 360 /* 361 * Get IP and UDP header together in first mbuf. 362 */ 363 ip = mtod(m, struct ip *); 364 if (m->m_len < iphlen + sizeof(struct udphdr)) { 365 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { 366 UDPSTAT_INC(udps_hdrops); 367 return; 368 } 369 ip = mtod(m, struct ip *); 370 } 371 uh = (struct udphdr *)((caddr_t)ip + iphlen); 372 373 /* 374 * Destination port of 0 is illegal, based on RFC768. 375 */ 376 if (uh->uh_dport == 0) 377 goto badunlocked; 378 379 /* 380 * Construct sockaddr format source address. Stuff source address 381 * and datagram in user buffer. 382 */ 383 bzero(&udp_in, sizeof(udp_in)); 384 udp_in.sin_len = sizeof(udp_in); 385 udp_in.sin_family = AF_INET; 386 udp_in.sin_port = uh->uh_sport; 387 udp_in.sin_addr = ip->ip_src; 388 389 /* 390 * Make mbuf data length reflect UDP length. If not enough data to 391 * reflect UDP length, drop. 392 */ 393 len = ntohs((u_short)uh->uh_ulen); 394 if (ip->ip_len != len) { 395 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 396 UDPSTAT_INC(udps_badlen); 397 goto badunlocked; 398 } 399 m_adj(m, len - ip->ip_len); 400 /* ip->ip_len = len; */ 401 } 402 403 /* 404 * Save a copy of the IP header in case we want restore it for 405 * sending an ICMP error message in response. 406 */ 407 if (!V_udp_blackhole) 408 save_ip = *ip; 409 else 410 memset(&save_ip, 0, sizeof(save_ip)); 411 412 /* 413 * Checksum extended UDP header and data. 414 */ 415 if (uh->uh_sum) { 416 u_short uh_sum; 417 418 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 419 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 420 uh_sum = m->m_pkthdr.csum_data; 421 else 422 uh_sum = in_pseudo(ip->ip_src.s_addr, 423 ip->ip_dst.s_addr, htonl((u_short)len + 424 m->m_pkthdr.csum_data + IPPROTO_UDP)); 425 uh_sum ^= 0xffff; 426 } else { 427 char b[9]; 428 429 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 430 bzero(((struct ipovly *)ip)->ih_x1, 9); 431 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 432 uh_sum = in_cksum(m, len + sizeof (struct ip)); 433 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 434 } 435 if (uh_sum) { 436 UDPSTAT_INC(udps_badsum); 437 m_freem(m); 438 return; 439 } 440 } else 441 UDPSTAT_INC(udps_nosum); 442 443 #ifdef IPFIREWALL_FORWARD 444 /* 445 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. 446 */ 447 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 448 if (fwd_tag != NULL) { 449 struct sockaddr_in *next_hop; 450 451 /* 452 * Do the hack. 453 */ 454 next_hop = (struct sockaddr_in *)(fwd_tag + 1); 455 ip->ip_dst = next_hop->sin_addr; 456 uh->uh_dport = ntohs(next_hop->sin_port); 457 458 /* 459 * Remove the tag from the packet. We don't need it anymore. 460 */ 461 m_tag_delete(m, fwd_tag); 462 } 463 #endif 464 465 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 466 in_broadcast(ip->ip_dst, ifp)) { 467 struct inpcb *last; 468 struct ip_moptions *imo; 469 470 INP_INFO_RLOCK(&V_udbinfo); 471 last = NULL; 472 LIST_FOREACH(inp, &V_udb, inp_list) { 473 if (inp->inp_lport != uh->uh_dport) 474 continue; 475 #ifdef INET6 476 if ((inp->inp_vflag & INP_IPV4) == 0) 477 continue; 478 #endif 479 if (inp->inp_laddr.s_addr != INADDR_ANY && 480 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 481 continue; 482 if (inp->inp_faddr.s_addr != INADDR_ANY && 483 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 484 continue; 485 if (inp->inp_fport != 0 && 486 inp->inp_fport != uh->uh_sport) 487 continue; 488 489 INP_RLOCK(inp); 490 491 /* 492 * XXXRW: Because we weren't holding either the inpcb 493 * or the hash lock when we checked for a match 494 * before, we should probably recheck now that the 495 * inpcb lock is held. 496 */ 497 498 /* 499 * Handle socket delivery policy for any-source 500 * and source-specific multicast. [RFC3678] 501 */ 502 imo = inp->inp_moptions; 503 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 504 struct sockaddr_in group; 505 int blocked; 506 if (imo == NULL) { 507 INP_RUNLOCK(inp); 508 continue; 509 } 510 bzero(&group, sizeof(struct sockaddr_in)); 511 group.sin_len = sizeof(struct sockaddr_in); 512 group.sin_family = AF_INET; 513 group.sin_addr = ip->ip_dst; 514 515 blocked = imo_multi_filter(imo, ifp, 516 (struct sockaddr *)&group, 517 (struct sockaddr *)&udp_in); 518 if (blocked != MCAST_PASS) { 519 if (blocked == MCAST_NOTGMEMBER) 520 IPSTAT_INC(ips_notmember); 521 if (blocked == MCAST_NOTSMEMBER || 522 blocked == MCAST_MUTED) 523 UDPSTAT_INC(udps_filtermcast); 524 INP_RUNLOCK(inp); 525 continue; 526 } 527 } 528 if (last != NULL) { 529 struct mbuf *n; 530 531 n = m_copy(m, 0, M_COPYALL); 532 udp_append(last, ip, n, iphlen, &udp_in); 533 INP_RUNLOCK(last); 534 } 535 last = inp; 536 /* 537 * Don't look for additional matches if this one does 538 * not have either the SO_REUSEPORT or SO_REUSEADDR 539 * socket options set. This heuristic avoids 540 * searching through all pcbs in the common case of a 541 * non-shared port. It assumes that an application 542 * will never clear these options after setting them. 543 */ 544 if ((last->inp_socket->so_options & 545 (SO_REUSEPORT|SO_REUSEADDR)) == 0) 546 break; 547 } 548 549 if (last == NULL) { 550 /* 551 * No matching pcb found; discard datagram. (No need 552 * to send an ICMP Port Unreachable for a broadcast 553 * or multicast datgram.) 554 */ 555 UDPSTAT_INC(udps_noportbcast); 556 if (inp) 557 INP_RUNLOCK(inp); 558 INP_INFO_RUNLOCK(&V_udbinfo); 559 goto badunlocked; 560 } 561 udp_append(last, ip, m, iphlen, &udp_in); 562 INP_RUNLOCK(last); 563 INP_INFO_RUNLOCK(&V_udbinfo); 564 return; 565 } 566 567 /* 568 * Locate pcb for datagram. 569 */ 570 inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport, 571 ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, 572 ifp, m); 573 if (inp == NULL) { 574 if (udp_log_in_vain) { 575 char buf[4*sizeof "123"]; 576 577 strcpy(buf, inet_ntoa(ip->ip_dst)); 578 log(LOG_INFO, 579 "Connection attempt to UDP %s:%d from %s:%d\n", 580 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 581 ntohs(uh->uh_sport)); 582 } 583 UDPSTAT_INC(udps_noport); 584 if (m->m_flags & (M_BCAST | M_MCAST)) { 585 UDPSTAT_INC(udps_noportbcast); 586 goto badunlocked; 587 } 588 if (V_udp_blackhole) 589 goto badunlocked; 590 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 591 goto badunlocked; 592 *ip = save_ip; 593 ip->ip_len += iphlen; 594 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 595 return; 596 } 597 598 /* 599 * Check the minimum TTL for socket. 600 */ 601 INP_RLOCK_ASSERT(inp); 602 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { 603 INP_RUNLOCK(inp); 604 m_freem(m); 605 return; 606 } 607 udp_append(inp, ip, m, iphlen, &udp_in); 608 INP_RUNLOCK(inp); 609 return; 610 611 badunlocked: 612 m_freem(m); 613 } 614 #endif /* INET */ 615 616 /* 617 * Notify a udp user of an asynchronous error; just wake up so that they can 618 * collect error status. 619 */ 620 struct inpcb * 621 udp_notify(struct inpcb *inp, int errno) 622 { 623 624 /* 625 * While udp_ctlinput() always calls udp_notify() with a read lock 626 * when invoking it directly, in_pcbnotifyall() currently uses write 627 * locks due to sharing code with TCP. For now, accept either a read 628 * or a write lock, but a read lock is sufficient. 629 */ 630 INP_LOCK_ASSERT(inp); 631 632 inp->inp_socket->so_error = errno; 633 sorwakeup(inp->inp_socket); 634 sowwakeup(inp->inp_socket); 635 return (inp); 636 } 637 638 #ifdef INET 639 void 640 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) 641 { 642 struct ip *ip = vip; 643 struct udphdr *uh; 644 struct in_addr faddr; 645 struct inpcb *inp; 646 647 faddr = ((struct sockaddr_in *)sa)->sin_addr; 648 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 649 return; 650 651 /* 652 * Redirects don't need to be handled up here. 653 */ 654 if (PRC_IS_REDIRECT(cmd)) 655 return; 656 657 /* 658 * Hostdead is ugly because it goes linearly through all PCBs. 659 * 660 * XXX: We never get this from ICMP, otherwise it makes an excellent 661 * DoS attack on machines with many connections. 662 */ 663 if (cmd == PRC_HOSTDEAD) 664 ip = NULL; 665 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 666 return; 667 if (ip != NULL) { 668 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 669 inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport, 670 ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL); 671 if (inp != NULL) { 672 INP_RLOCK_ASSERT(inp); 673 if (inp->inp_socket != NULL) { 674 udp_notify(inp, inetctlerrmap[cmd]); 675 } 676 INP_RUNLOCK(inp); 677 } 678 } else 679 in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd], 680 udp_notify); 681 } 682 #endif /* INET */ 683 684 static int 685 udp_pcblist(SYSCTL_HANDLER_ARGS) 686 { 687 int error, i, n; 688 struct inpcb *inp, **inp_list; 689 inp_gen_t gencnt; 690 struct xinpgen xig; 691 692 /* 693 * The process of preparing the PCB list is too time-consuming and 694 * resource-intensive to repeat twice on every request. 695 */ 696 if (req->oldptr == 0) { 697 n = V_udbinfo.ipi_count; 698 n += imax(n / 8, 10); 699 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); 700 return (0); 701 } 702 703 if (req->newptr != 0) 704 return (EPERM); 705 706 /* 707 * OK, now we're committed to doing something. 708 */ 709 INP_INFO_RLOCK(&V_udbinfo); 710 gencnt = V_udbinfo.ipi_gencnt; 711 n = V_udbinfo.ipi_count; 712 INP_INFO_RUNLOCK(&V_udbinfo); 713 714 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) 715 + n * sizeof(struct xinpcb)); 716 if (error != 0) 717 return (error); 718 719 xig.xig_len = sizeof xig; 720 xig.xig_count = n; 721 xig.xig_gen = gencnt; 722 xig.xig_sogen = so_gencnt; 723 error = SYSCTL_OUT(req, &xig, sizeof xig); 724 if (error) 725 return (error); 726 727 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 728 if (inp_list == 0) 729 return (ENOMEM); 730 731 INP_INFO_RLOCK(&V_udbinfo); 732 for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; 733 inp = LIST_NEXT(inp, inp_list)) { 734 INP_WLOCK(inp); 735 if (inp->inp_gencnt <= gencnt && 736 cr_canseeinpcb(req->td->td_ucred, inp) == 0) { 737 in_pcbref(inp); 738 inp_list[i++] = inp; 739 } 740 INP_WUNLOCK(inp); 741 } 742 INP_INFO_RUNLOCK(&V_udbinfo); 743 n = i; 744 745 error = 0; 746 for (i = 0; i < n; i++) { 747 inp = inp_list[i]; 748 INP_RLOCK(inp); 749 if (inp->inp_gencnt <= gencnt) { 750 struct xinpcb xi; 751 752 bzero(&xi, sizeof(xi)); 753 xi.xi_len = sizeof xi; 754 /* XXX should avoid extra copy */ 755 bcopy(inp, &xi.xi_inp, sizeof *inp); 756 if (inp->inp_socket) 757 sotoxsocket(inp->inp_socket, &xi.xi_socket); 758 xi.xi_inp.inp_gencnt = inp->inp_gencnt; 759 INP_RUNLOCK(inp); 760 error = SYSCTL_OUT(req, &xi, sizeof xi); 761 } else 762 INP_RUNLOCK(inp); 763 } 764 INP_INFO_WLOCK(&V_udbinfo); 765 for (i = 0; i < n; i++) { 766 inp = inp_list[i]; 767 INP_RLOCK(inp); 768 if (!in_pcbrele_rlocked(inp)) 769 INP_RUNLOCK(inp); 770 } 771 INP_INFO_WUNLOCK(&V_udbinfo); 772 773 if (!error) { 774 /* 775 * Give the user an updated idea of our state. If the 776 * generation differs from what we told her before, she knows 777 * that something happened while we were processing this 778 * request, and it might be necessary to retry. 779 */ 780 INP_INFO_RLOCK(&V_udbinfo); 781 xig.xig_gen = V_udbinfo.ipi_gencnt; 782 xig.xig_sogen = so_gencnt; 783 xig.xig_count = V_udbinfo.ipi_count; 784 INP_INFO_RUNLOCK(&V_udbinfo); 785 error = SYSCTL_OUT(req, &xig, sizeof xig); 786 } 787 free(inp_list, M_TEMP); 788 return (error); 789 } 790 791 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, 792 CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, 793 udp_pcblist, "S,xinpcb", "List of active UDP sockets"); 794 795 #ifdef INET 796 static int 797 udp_getcred(SYSCTL_HANDLER_ARGS) 798 { 799 struct xucred xuc; 800 struct sockaddr_in addrs[2]; 801 struct inpcb *inp; 802 int error; 803 804 error = priv_check(req->td, PRIV_NETINET_GETCRED); 805 if (error) 806 return (error); 807 error = SYSCTL_IN(req, addrs, sizeof(addrs)); 808 if (error) 809 return (error); 810 inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, 811 addrs[0].sin_addr, addrs[0].sin_port, 812 INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); 813 if (inp != NULL) { 814 INP_RLOCK_ASSERT(inp); 815 if (inp->inp_socket == NULL) 816 error = ENOENT; 817 if (error == 0) 818 error = cr_canseeinpcb(req->td->td_ucred, inp); 819 if (error == 0) 820 cru2x(inp->inp_cred, &xuc); 821 INP_RUNLOCK(inp); 822 } else 823 error = ENOENT; 824 if (error == 0) 825 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); 826 return (error); 827 } 828 829 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, 830 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, 831 udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); 832 #endif /* INET */ 833 834 int 835 udp_ctloutput(struct socket *so, struct sockopt *sopt) 836 { 837 int error = 0, optval; 838 struct inpcb *inp; 839 #ifdef IPSEC_NAT_T 840 struct udpcb *up; 841 #endif 842 843 inp = sotoinpcb(so); 844 KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); 845 INP_WLOCK(inp); 846 if (sopt->sopt_level != IPPROTO_UDP) { 847 #ifdef INET6 848 if (INP_CHECK_SOCKAF(so, AF_INET6)) { 849 INP_WUNLOCK(inp); 850 error = ip6_ctloutput(so, sopt); 851 } 852 #endif 853 #if defined(INET) && defined(INET6) 854 else 855 #endif 856 #ifdef INET 857 { 858 INP_WUNLOCK(inp); 859 error = ip_ctloutput(so, sopt); 860 } 861 #endif 862 return (error); 863 } 864 865 switch (sopt->sopt_dir) { 866 case SOPT_SET: 867 switch (sopt->sopt_name) { 868 case UDP_ENCAP: 869 INP_WUNLOCK(inp); 870 error = sooptcopyin(sopt, &optval, sizeof optval, 871 sizeof optval); 872 if (error) 873 break; 874 inp = sotoinpcb(so); 875 KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); 876 INP_WLOCK(inp); 877 #ifdef IPSEC_NAT_T 878 up = intoudpcb(inp); 879 KASSERT(up != NULL, ("%s: up == NULL", __func__)); 880 #endif 881 switch (optval) { 882 case 0: 883 /* Clear all UDP encap. */ 884 #ifdef IPSEC_NAT_T 885 up->u_flags &= ~UF_ESPINUDP_ALL; 886 #endif 887 break; 888 #ifdef IPSEC_NAT_T 889 case UDP_ENCAP_ESPINUDP: 890 case UDP_ENCAP_ESPINUDP_NON_IKE: 891 up->u_flags &= ~UF_ESPINUDP_ALL; 892 if (optval == UDP_ENCAP_ESPINUDP) 893 up->u_flags |= UF_ESPINUDP; 894 else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE) 895 up->u_flags |= UF_ESPINUDP_NON_IKE; 896 break; 897 #endif 898 default: 899 error = EINVAL; 900 break; 901 } 902 INP_WUNLOCK(inp); 903 break; 904 default: 905 INP_WUNLOCK(inp); 906 error = ENOPROTOOPT; 907 break; 908 } 909 break; 910 case SOPT_GET: 911 switch (sopt->sopt_name) { 912 #ifdef IPSEC_NAT_T 913 case UDP_ENCAP: 914 up = intoudpcb(inp); 915 KASSERT(up != NULL, ("%s: up == NULL", __func__)); 916 optval = up->u_flags & UF_ESPINUDP_ALL; 917 INP_WUNLOCK(inp); 918 error = sooptcopyout(sopt, &optval, sizeof optval); 919 break; 920 #endif 921 default: 922 INP_WUNLOCK(inp); 923 error = ENOPROTOOPT; 924 break; 925 } 926 break; 927 } 928 return (error); 929 } 930 931 #ifdef INET 932 #define UH_WLOCKED 2 933 #define UH_RLOCKED 1 934 #define UH_UNLOCKED 0 935 static int 936 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, 937 struct mbuf *control, struct thread *td) 938 { 939 struct udpiphdr *ui; 940 int len = m->m_pkthdr.len; 941 struct in_addr faddr, laddr; 942 struct cmsghdr *cm; 943 struct sockaddr_in *sin, src; 944 int error = 0; 945 int ipflags; 946 u_short fport, lport; 947 int unlock_udbinfo; 948 949 /* 950 * udp_output() may need to temporarily bind or connect the current 951 * inpcb. As such, we don't know up front whether we will need the 952 * pcbinfo lock or not. Do any work to decide what is needed up 953 * front before acquiring any locks. 954 */ 955 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 956 if (control) 957 m_freem(control); 958 m_freem(m); 959 return (EMSGSIZE); 960 } 961 962 src.sin_family = 0; 963 if (control != NULL) { 964 /* 965 * XXX: Currently, we assume all the optional information is 966 * stored in a single mbuf. 967 */ 968 if (control->m_next) { 969 m_freem(control); 970 m_freem(m); 971 return (EINVAL); 972 } 973 for (; control->m_len > 0; 974 control->m_data += CMSG_ALIGN(cm->cmsg_len), 975 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { 976 cm = mtod(control, struct cmsghdr *); 977 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 978 || cm->cmsg_len > control->m_len) { 979 error = EINVAL; 980 break; 981 } 982 if (cm->cmsg_level != IPPROTO_IP) 983 continue; 984 985 switch (cm->cmsg_type) { 986 case IP_SENDSRCADDR: 987 if (cm->cmsg_len != 988 CMSG_LEN(sizeof(struct in_addr))) { 989 error = EINVAL; 990 break; 991 } 992 bzero(&src, sizeof(src)); 993 src.sin_family = AF_INET; 994 src.sin_len = sizeof(src); 995 src.sin_port = inp->inp_lport; 996 src.sin_addr = 997 *(struct in_addr *)CMSG_DATA(cm); 998 break; 999 1000 default: 1001 error = ENOPROTOOPT; 1002 break; 1003 } 1004 if (error) 1005 break; 1006 } 1007 m_freem(control); 1008 } 1009 if (error) { 1010 m_freem(m); 1011 return (error); 1012 } 1013 1014 /* 1015 * Depending on whether or not the application has bound or connected 1016 * the socket, we may have to do varying levels of work. The optimal 1017 * case is for a connected UDP socket, as a global lock isn't 1018 * required at all. 1019 * 1020 * In order to decide which we need, we require stability of the 1021 * inpcb binding, which we ensure by acquiring a read lock on the 1022 * inpcb. This doesn't strictly follow the lock order, so we play 1023 * the trylock and retry game; note that we may end up with more 1024 * conservative locks than required the second time around, so later 1025 * assertions have to accept that. Further analysis of the number of 1026 * misses under contention is required. 1027 * 1028 * XXXRW: Check that hash locking update here is correct. 1029 */ 1030 sin = (struct sockaddr_in *)addr; 1031 INP_RLOCK(inp); 1032 if (sin != NULL && 1033 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { 1034 INP_RUNLOCK(inp); 1035 INP_WLOCK(inp); 1036 INP_HASH_WLOCK(&V_udbinfo); 1037 unlock_udbinfo = UH_WLOCKED; 1038 } else if ((sin != NULL && ( 1039 (sin->sin_addr.s_addr == INADDR_ANY) || 1040 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 1041 (inp->inp_laddr.s_addr == INADDR_ANY) || 1042 (inp->inp_lport == 0))) || 1043 (src.sin_family == AF_INET)) { 1044 INP_HASH_RLOCK(&V_udbinfo); 1045 unlock_udbinfo = UH_RLOCKED; 1046 } else 1047 unlock_udbinfo = UH_UNLOCKED; 1048 1049 /* 1050 * If the IP_SENDSRCADDR control message was specified, override the 1051 * source address for this datagram. Its use is invalidated if the 1052 * address thus specified is incomplete or clobbers other inpcbs. 1053 */ 1054 laddr = inp->inp_laddr; 1055 lport = inp->inp_lport; 1056 if (src.sin_family == AF_INET) { 1057 INP_HASH_LOCK_ASSERT(&V_udbinfo); 1058 if ((lport == 0) || 1059 (laddr.s_addr == INADDR_ANY && 1060 src.sin_addr.s_addr == INADDR_ANY)) { 1061 error = EINVAL; 1062 goto release; 1063 } 1064 error = in_pcbbind_setup(inp, (struct sockaddr *)&src, 1065 &laddr.s_addr, &lport, td->td_ucred); 1066 if (error) 1067 goto release; 1068 } 1069 1070 /* 1071 * If a UDP socket has been connected, then a local address/port will 1072 * have been selected and bound. 1073 * 1074 * If a UDP socket has not been connected to, then an explicit 1075 * destination address must be used, in which case a local 1076 * address/port may not have been selected and bound. 1077 */ 1078 if (sin != NULL) { 1079 INP_LOCK_ASSERT(inp); 1080 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1081 error = EISCONN; 1082 goto release; 1083 } 1084 1085 /* 1086 * Jail may rewrite the destination address, so let it do 1087 * that before we use it. 1088 */ 1089 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); 1090 if (error) 1091 goto release; 1092 1093 /* 1094 * If a local address or port hasn't yet been selected, or if 1095 * the destination address needs to be rewritten due to using 1096 * a special INADDR_ constant, invoke in_pcbconnect_setup() 1097 * to do the heavy lifting. Once a port is selected, we 1098 * commit the binding back to the socket; we also commit the 1099 * binding of the address if in jail. 1100 * 1101 * If we already have a valid binding and we're not 1102 * requesting a destination address rewrite, use a fast path. 1103 */ 1104 if (inp->inp_laddr.s_addr == INADDR_ANY || 1105 inp->inp_lport == 0 || 1106 sin->sin_addr.s_addr == INADDR_ANY || 1107 sin->sin_addr.s_addr == INADDR_BROADCAST) { 1108 INP_HASH_LOCK_ASSERT(&V_udbinfo); 1109 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, 1110 &lport, &faddr.s_addr, &fport, NULL, 1111 td->td_ucred); 1112 if (error) 1113 goto release; 1114 1115 /* 1116 * XXXRW: Why not commit the port if the address is 1117 * !INADDR_ANY? 1118 */ 1119 /* Commit the local port if newly assigned. */ 1120 if (inp->inp_laddr.s_addr == INADDR_ANY && 1121 inp->inp_lport == 0) { 1122 INP_WLOCK_ASSERT(inp); 1123 INP_HASH_WLOCK_ASSERT(&V_udbinfo); 1124 /* 1125 * Remember addr if jailed, to prevent 1126 * rebinding. 1127 */ 1128 if (prison_flag(td->td_ucred, PR_IP4)) 1129 inp->inp_laddr = laddr; 1130 inp->inp_lport = lport; 1131 if (in_pcbinshash(inp) != 0) { 1132 inp->inp_lport = 0; 1133 error = EAGAIN; 1134 goto release; 1135 } 1136 inp->inp_flags |= INP_ANONPORT; 1137 } 1138 } else { 1139 faddr = sin->sin_addr; 1140 fport = sin->sin_port; 1141 } 1142 } else { 1143 INP_LOCK_ASSERT(inp); 1144 faddr = inp->inp_faddr; 1145 fport = inp->inp_fport; 1146 if (faddr.s_addr == INADDR_ANY) { 1147 error = ENOTCONN; 1148 goto release; 1149 } 1150 } 1151 1152 /* 1153 * Calculate data length and get a mbuf for UDP, IP, and possible 1154 * link-layer headers. Immediate slide the data pointer back forward 1155 * since we won't use that space at this layer. 1156 */ 1157 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT); 1158 if (m == NULL) { 1159 error = ENOBUFS; 1160 goto release; 1161 } 1162 m->m_data += max_linkhdr; 1163 m->m_len -= max_linkhdr; 1164 m->m_pkthdr.len -= max_linkhdr; 1165 1166 /* 1167 * Fill in mbuf with extended UDP header and addresses and length put 1168 * into network format. 1169 */ 1170 ui = mtod(m, struct udpiphdr *); 1171 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ 1172 ui->ui_pr = IPPROTO_UDP; 1173 ui->ui_src = laddr; 1174 ui->ui_dst = faddr; 1175 ui->ui_sport = lport; 1176 ui->ui_dport = fport; 1177 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1178 1179 /* 1180 * Set the Don't Fragment bit in the IP header. 1181 */ 1182 if (inp->inp_flags & INP_DONTFRAG) { 1183 struct ip *ip; 1184 1185 ip = (struct ip *)&ui->ui_i; 1186 ip->ip_off |= IP_DF; 1187 } 1188 1189 ipflags = 0; 1190 if (inp->inp_socket->so_options & SO_DONTROUTE) 1191 ipflags |= IP_ROUTETOIF; 1192 if (inp->inp_socket->so_options & SO_BROADCAST) 1193 ipflags |= IP_ALLOWBROADCAST; 1194 if (inp->inp_flags & INP_ONESBCAST) 1195 ipflags |= IP_SENDONES; 1196 1197 #ifdef MAC 1198 mac_inpcb_create_mbuf(inp, m); 1199 #endif 1200 1201 /* 1202 * Set up checksum and output datagram. 1203 */ 1204 if (udp_cksum) { 1205 if (inp->inp_flags & INP_ONESBCAST) 1206 faddr.s_addr = INADDR_BROADCAST; 1207 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, 1208 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1209 m->m_pkthdr.csum_flags = CSUM_UDP; 1210 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1211 } else 1212 ui->ui_sum = 0; 1213 ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; 1214 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1215 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1216 UDPSTAT_INC(udps_opackets); 1217 1218 if (unlock_udbinfo == UH_WLOCKED) 1219 INP_HASH_WUNLOCK(&V_udbinfo); 1220 else if (unlock_udbinfo == UH_RLOCKED) 1221 INP_HASH_RUNLOCK(&V_udbinfo); 1222 error = ip_output(m, inp->inp_options, NULL, ipflags, 1223 inp->inp_moptions, inp); 1224 if (unlock_udbinfo == UH_WLOCKED) 1225 INP_WUNLOCK(inp); 1226 else 1227 INP_RUNLOCK(inp); 1228 return (error); 1229 1230 release: 1231 if (unlock_udbinfo == UH_WLOCKED) { 1232 INP_HASH_WUNLOCK(&V_udbinfo); 1233 INP_WUNLOCK(inp); 1234 } else if (unlock_udbinfo == UH_RLOCKED) { 1235 INP_HASH_RUNLOCK(&V_udbinfo); 1236 INP_RUNLOCK(inp); 1237 } else 1238 INP_RUNLOCK(inp); 1239 m_freem(m); 1240 return (error); 1241 } 1242 1243 1244 #if defined(IPSEC) && defined(IPSEC_NAT_T) 1245 /* 1246 * Potentially decap ESP in UDP frame. Check for an ESP header 1247 * and optional marker; if present, strip the UDP header and 1248 * push the result through IPSec. 1249 * 1250 * Returns mbuf to be processed (potentially re-allocated) or 1251 * NULL if consumed and/or processed. 1252 */ 1253 static struct mbuf * 1254 udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off) 1255 { 1256 size_t minlen, payload, skip, iphlen; 1257 caddr_t data; 1258 struct udpcb *up; 1259 struct m_tag *tag; 1260 struct udphdr *udphdr; 1261 struct ip *ip; 1262 1263 INP_RLOCK_ASSERT(inp); 1264 1265 /* 1266 * Pull up data so the longest case is contiguous: 1267 * IP/UDP hdr + non ESP marker + ESP hdr. 1268 */ 1269 minlen = off + sizeof(uint64_t) + sizeof(struct esp); 1270 if (minlen > m->m_pkthdr.len) 1271 minlen = m->m_pkthdr.len; 1272 if ((m = m_pullup(m, minlen)) == NULL) { 1273 V_ipsec4stat.in_inval++; 1274 return (NULL); /* Bypass caller processing. */ 1275 } 1276 data = mtod(m, caddr_t); /* Points to ip header. */ 1277 payload = m->m_len - off; /* Size of payload. */ 1278 1279 if (payload == 1 && data[off] == '\xff') 1280 return (m); /* NB: keepalive packet, no decap. */ 1281 1282 up = intoudpcb(inp); 1283 KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); 1284 KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0, 1285 ("u_flags 0x%x", up->u_flags)); 1286 1287 /* 1288 * Check that the payload is large enough to hold an 1289 * ESP header and compute the amount of data to remove. 1290 * 1291 * NB: the caller has already done a pullup for us. 1292 * XXX can we assume alignment and eliminate bcopys? 1293 */ 1294 if (up->u_flags & UF_ESPINUDP_NON_IKE) { 1295 /* 1296 * draft-ietf-ipsec-nat-t-ike-0[01].txt and 1297 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring 1298 * possible AH mode non-IKE marker+non-ESP marker 1299 * from draft-ietf-ipsec-udp-encaps-00.txt. 1300 */ 1301 uint64_t marker; 1302 1303 if (payload <= sizeof(uint64_t) + sizeof(struct esp)) 1304 return (m); /* NB: no decap. */ 1305 bcopy(data + off, &marker, sizeof(uint64_t)); 1306 if (marker != 0) /* Non-IKE marker. */ 1307 return (m); /* NB: no decap. */ 1308 skip = sizeof(uint64_t) + sizeof(struct udphdr); 1309 } else { 1310 uint32_t spi; 1311 1312 if (payload <= sizeof(struct esp)) { 1313 V_ipsec4stat.in_inval++; 1314 m_freem(m); 1315 return (NULL); /* Discard. */ 1316 } 1317 bcopy(data + off, &spi, sizeof(uint32_t)); 1318 if (spi == 0) /* Non-ESP marker. */ 1319 return (m); /* NB: no decap. */ 1320 skip = sizeof(struct udphdr); 1321 } 1322 1323 /* 1324 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember 1325 * the UDP ports. This is required if we want to select 1326 * the right SPD for multiple hosts behind same NAT. 1327 * 1328 * NB: ports are maintained in network byte order everywhere 1329 * in the NAT-T code. 1330 */ 1331 tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS, 1332 2 * sizeof(uint16_t), M_NOWAIT); 1333 if (tag == NULL) { 1334 V_ipsec4stat.in_nomem++; 1335 m_freem(m); 1336 return (NULL); /* Discard. */ 1337 } 1338 iphlen = off - sizeof(struct udphdr); 1339 udphdr = (struct udphdr *)(data + iphlen); 1340 ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport; 1341 ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport; 1342 m_tag_prepend(m, tag); 1343 1344 /* 1345 * Remove the UDP header (and possibly the non ESP marker) 1346 * IP header length is iphlen 1347 * Before: 1348 * <--- off ---> 1349 * +----+------+-----+ 1350 * | IP | UDP | ESP | 1351 * +----+------+-----+ 1352 * <-skip-> 1353 * After: 1354 * +----+-----+ 1355 * | IP | ESP | 1356 * +----+-----+ 1357 * <-skip-> 1358 */ 1359 ovbcopy(data, data + skip, iphlen); 1360 m_adj(m, skip); 1361 1362 ip = mtod(m, struct ip *); 1363 ip->ip_len -= skip; 1364 ip->ip_p = IPPROTO_ESP; 1365 1366 /* 1367 * We cannot yet update the cksums so clear any 1368 * h/w cksum flags as they are no longer valid. 1369 */ 1370 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) 1371 m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 1372 1373 (void) ipsec4_common_input(m, iphlen, ip->ip_p); 1374 return (NULL); /* NB: consumed, bypass processing. */ 1375 } 1376 #endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */ 1377 1378 static void 1379 udp_abort(struct socket *so) 1380 { 1381 struct inpcb *inp; 1382 1383 inp = sotoinpcb(so); 1384 KASSERT(inp != NULL, ("udp_abort: inp == NULL")); 1385 INP_WLOCK(inp); 1386 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1387 INP_HASH_WLOCK(&V_udbinfo); 1388 in_pcbdisconnect(inp); 1389 inp->inp_laddr.s_addr = INADDR_ANY; 1390 INP_HASH_WUNLOCK(&V_udbinfo); 1391 soisdisconnected(so); 1392 } 1393 INP_WUNLOCK(inp); 1394 } 1395 1396 static int 1397 udp_attach(struct socket *so, int proto, struct thread *td) 1398 { 1399 struct inpcb *inp; 1400 int error; 1401 1402 inp = sotoinpcb(so); 1403 KASSERT(inp == NULL, ("udp_attach: inp != NULL")); 1404 error = soreserve(so, udp_sendspace, udp_recvspace); 1405 if (error) 1406 return (error); 1407 INP_INFO_WLOCK(&V_udbinfo); 1408 error = in_pcballoc(so, &V_udbinfo); 1409 if (error) { 1410 INP_INFO_WUNLOCK(&V_udbinfo); 1411 return (error); 1412 } 1413 1414 inp = sotoinpcb(so); 1415 inp->inp_vflag |= INP_IPV4; 1416 inp->inp_ip_ttl = V_ip_defttl; 1417 1418 error = udp_newudpcb(inp); 1419 if (error) { 1420 in_pcbdetach(inp); 1421 in_pcbfree(inp); 1422 INP_INFO_WUNLOCK(&V_udbinfo); 1423 return (error); 1424 } 1425 1426 INP_WUNLOCK(inp); 1427 INP_INFO_WUNLOCK(&V_udbinfo); 1428 return (0); 1429 } 1430 #endif /* INET */ 1431 1432 int 1433 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f) 1434 { 1435 struct inpcb *inp; 1436 struct udpcb *up; 1437 1438 KASSERT(so->so_type == SOCK_DGRAM, 1439 ("udp_set_kernel_tunneling: !dgram")); 1440 inp = sotoinpcb(so); 1441 KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL")); 1442 INP_WLOCK(inp); 1443 up = intoudpcb(inp); 1444 if (up->u_tun_func != NULL) { 1445 INP_WUNLOCK(inp); 1446 return (EBUSY); 1447 } 1448 up->u_tun_func = f; 1449 INP_WUNLOCK(inp); 1450 return (0); 1451 } 1452 1453 #ifdef INET 1454 static int 1455 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 1456 { 1457 struct inpcb *inp; 1458 int error; 1459 1460 inp = sotoinpcb(so); 1461 KASSERT(inp != NULL, ("udp_bind: inp == NULL")); 1462 INP_WLOCK(inp); 1463 INP_HASH_WLOCK(&V_udbinfo); 1464 error = in_pcbbind(inp, nam, td->td_ucred); 1465 INP_HASH_WUNLOCK(&V_udbinfo); 1466 INP_WUNLOCK(inp); 1467 return (error); 1468 } 1469 1470 static void 1471 udp_close(struct socket *so) 1472 { 1473 struct inpcb *inp; 1474 1475 inp = sotoinpcb(so); 1476 KASSERT(inp != NULL, ("udp_close: inp == NULL")); 1477 INP_WLOCK(inp); 1478 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1479 INP_HASH_WLOCK(&V_udbinfo); 1480 in_pcbdisconnect(inp); 1481 inp->inp_laddr.s_addr = INADDR_ANY; 1482 INP_HASH_WUNLOCK(&V_udbinfo); 1483 soisdisconnected(so); 1484 } 1485 INP_WUNLOCK(inp); 1486 } 1487 1488 static int 1489 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1490 { 1491 struct inpcb *inp; 1492 int error; 1493 struct sockaddr_in *sin; 1494 1495 inp = sotoinpcb(so); 1496 KASSERT(inp != NULL, ("udp_connect: inp == NULL")); 1497 INP_WLOCK(inp); 1498 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1499 INP_WUNLOCK(inp); 1500 return (EISCONN); 1501 } 1502 sin = (struct sockaddr_in *)nam; 1503 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); 1504 if (error != 0) { 1505 INP_WUNLOCK(inp); 1506 return (error); 1507 } 1508 INP_HASH_WLOCK(&V_udbinfo); 1509 error = in_pcbconnect(inp, nam, td->td_ucred); 1510 INP_HASH_WUNLOCK(&V_udbinfo); 1511 if (error == 0) 1512 soisconnected(so); 1513 INP_WUNLOCK(inp); 1514 return (error); 1515 } 1516 1517 static void 1518 udp_detach(struct socket *so) 1519 { 1520 struct inpcb *inp; 1521 struct udpcb *up; 1522 1523 inp = sotoinpcb(so); 1524 KASSERT(inp != NULL, ("udp_detach: inp == NULL")); 1525 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 1526 ("udp_detach: not disconnected")); 1527 INP_INFO_WLOCK(&V_udbinfo); 1528 INP_WLOCK(inp); 1529 up = intoudpcb(inp); 1530 KASSERT(up != NULL, ("%s: up == NULL", __func__)); 1531 inp->inp_ppcb = NULL; 1532 in_pcbdetach(inp); 1533 in_pcbfree(inp); 1534 INP_INFO_WUNLOCK(&V_udbinfo); 1535 udp_discardcb(up); 1536 } 1537 1538 static int 1539 udp_disconnect(struct socket *so) 1540 { 1541 struct inpcb *inp; 1542 1543 inp = sotoinpcb(so); 1544 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); 1545 INP_WLOCK(inp); 1546 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1547 INP_WUNLOCK(inp); 1548 return (ENOTCONN); 1549 } 1550 INP_HASH_WLOCK(&V_udbinfo); 1551 in_pcbdisconnect(inp); 1552 inp->inp_laddr.s_addr = INADDR_ANY; 1553 INP_HASH_WUNLOCK(&V_udbinfo); 1554 SOCK_LOCK(so); 1555 so->so_state &= ~SS_ISCONNECTED; /* XXX */ 1556 SOCK_UNLOCK(so); 1557 INP_WUNLOCK(inp); 1558 return (0); 1559 } 1560 1561 static int 1562 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, 1563 struct mbuf *control, struct thread *td) 1564 { 1565 struct inpcb *inp; 1566 1567 inp = sotoinpcb(so); 1568 KASSERT(inp != NULL, ("udp_send: inp == NULL")); 1569 return (udp_output(inp, m, addr, control, td)); 1570 } 1571 #endif /* INET */ 1572 1573 int 1574 udp_shutdown(struct socket *so) 1575 { 1576 struct inpcb *inp; 1577 1578 inp = sotoinpcb(so); 1579 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); 1580 INP_WLOCK(inp); 1581 socantsendmore(so); 1582 INP_WUNLOCK(inp); 1583 return (0); 1584 } 1585 1586 #ifdef INET 1587 struct pr_usrreqs udp_usrreqs = { 1588 .pru_abort = udp_abort, 1589 .pru_attach = udp_attach, 1590 .pru_bind = udp_bind, 1591 .pru_connect = udp_connect, 1592 .pru_control = in_control, 1593 .pru_detach = udp_detach, 1594 .pru_disconnect = udp_disconnect, 1595 .pru_peeraddr = in_getpeeraddr, 1596 .pru_send = udp_send, 1597 .pru_soreceive = soreceive_dgram, 1598 .pru_sosend = sosend_dgram, 1599 .pru_shutdown = udp_shutdown, 1600 .pru_sockaddr = in_getsockaddr, 1601 .pru_sosetlabel = in_pcbsosetlabel, 1602 .pru_close = udp_close, 1603 }; 1604 #endif /* INET */ 1605