1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 30 * $FreeBSD$ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_ipsec.h" 35 #include "opt_mac.h" 36 #include "opt_random_ip_id.h" 37 38 #include <sys/param.h> 39 #include <sys/jail.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mac.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/proc.h> 46 #include <sys/protosw.h> 47 #include <sys/signalvar.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sx.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 54 #include <vm/uma.h> 55 56 #include <net/if.h> 57 #include <net/route.h> 58 59 #include <netinet/in.h> 60 #include <netinet/in_systm.h> 61 #include <netinet/in_pcb.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/ip_mroute.h> 66 67 #include <netinet/ip_fw.h> 68 #include <netinet/ip_dummynet.h> 69 70 #ifdef FAST_IPSEC 71 #include <netipsec/ipsec.h> 72 #endif /*FAST_IPSEC*/ 73 74 #ifdef IPSEC 75 #include <netinet6/ipsec.h> 76 #endif /*IPSEC*/ 77 78 struct inpcbhead ripcb; 79 struct inpcbinfo ripcbinfo; 80 81 /* control hooks for ipfw and dummynet */ 82 ip_fw_ctl_t *ip_fw_ctl_ptr; 83 ip_dn_ctl_t *ip_dn_ctl_ptr; 84 85 /* 86 * hooks for multicast routing. They all default to NULL, 87 * so leave them not initialized and rely on BSS being set to 0. 88 */ 89 90 /* The socket used to communicate with the multicast routing daemon. */ 91 struct socket *ip_mrouter; 92 93 /* The various mrouter and rsvp functions */ 94 int (*ip_mrouter_set)(struct socket *, struct sockopt *); 95 int (*ip_mrouter_get)(struct socket *, struct sockopt *); 96 int (*ip_mrouter_done)(void); 97 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 98 struct ip_moptions *); 99 int (*mrt_ioctl)(int, caddr_t); 100 int (*legal_vif_num)(int); 101 u_long (*ip_mcast_src)(int); 102 103 void (*rsvp_input_p)(struct mbuf *m, int off); 104 int (*ip_rsvp_vif)(struct socket *, struct sockopt *); 105 void (*ip_rsvp_force_done)(struct socket *); 106 107 /* 108 * Nominal space allocated to a raw ip socket. 109 */ 110 #define RIPSNDQ 8192 111 #define RIPRCVQ 8192 112 113 /* 114 * Raw interface to IP protocol. 115 */ 116 117 /* 118 * Initialize raw connection block q. 119 */ 120 void 121 rip_init() 122 { 123 INP_INFO_LOCK_INIT(&ripcbinfo, "rip"); 124 LIST_INIT(&ripcb); 125 ripcbinfo.listhead = &ripcb; 126 /* 127 * XXX We don't use the hash list for raw IP, but it's easier 128 * to allocate a one entry hash list than it is to check all 129 * over the place for hashbase == NULL. 130 */ 131 ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); 132 ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); 133 ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), 134 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 135 uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets); 136 } 137 138 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 139 140 static int 141 raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n) 142 { 143 int policyfail = 0; 144 145 INP_LOCK_ASSERT(last); 146 147 #if defined(IPSEC) || defined(FAST_IPSEC) 148 /* check AH/ESP integrity. */ 149 if (ipsec4_in_reject(n, last)) { 150 policyfail = 1; 151 #ifdef IPSEC 152 ipsecstat.in_polvio++; 153 #endif /*IPSEC*/ 154 /* do not inject data to pcb */ 155 } 156 #endif /*IPSEC || FAST_IPSEC*/ 157 #ifdef MAC 158 if (!policyfail && mac_check_inpcb_deliver(last, n) != 0) 159 policyfail = 1; 160 #endif 161 if (!policyfail) { 162 struct mbuf *opts = NULL; 163 164 if ((last->inp_flags & INP_CONTROLOPTS) || 165 (last->inp_socket->so_options & SO_TIMESTAMP)) 166 ip_savecontrol(last, &opts, ip, n); 167 if (sbappendaddr(&last->inp_socket->so_rcv, 168 (struct sockaddr *)&ripsrc, n, opts) == 0) { 169 /* should notify about lost packet */ 170 m_freem(n); 171 if (opts) 172 m_freem(opts); 173 } else 174 sorwakeup(last->inp_socket); 175 } else 176 m_freem(n); 177 return policyfail; 178 } 179 180 /* 181 * Setup generic address and protocol structures 182 * for raw_input routine, then pass them along with 183 * mbuf chain. 184 */ 185 void 186 rip_input(struct mbuf *m, int off) 187 { 188 struct ip *ip = mtod(m, struct ip *); 189 int proto = ip->ip_p; 190 struct inpcb *inp, *last; 191 192 INP_INFO_RLOCK(&ripcbinfo); 193 ripsrc.sin_addr = ip->ip_src; 194 last = NULL; 195 LIST_FOREACH(inp, &ripcb, inp_list) { 196 INP_LOCK(inp); 197 if (inp->inp_ip_p && inp->inp_ip_p != proto) { 198 docontinue: 199 INP_UNLOCK(inp); 200 continue; 201 } 202 #ifdef INET6 203 if ((inp->inp_vflag & INP_IPV4) == 0) 204 goto docontinue; 205 #endif 206 if (inp->inp_laddr.s_addr && 207 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 208 goto docontinue; 209 if (inp->inp_faddr.s_addr && 210 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 211 goto docontinue; 212 if (jailed(inp->inp_socket->so_cred)) 213 if (htonl(prison_getip(inp->inp_socket->so_cred)) != 214 ip->ip_dst.s_addr) 215 goto docontinue; 216 if (last) { 217 struct mbuf *n; 218 219 n = m_copy(m, 0, (int)M_COPYALL); 220 if (n != NULL) 221 (void) raw_append(last, ip, n); 222 /* XXX count dropped packet */ 223 INP_UNLOCK(last); 224 } 225 last = inp; 226 } 227 if (last != NULL) { 228 if (raw_append(last, ip, m) != 0) 229 ipstat.ips_delivered--; 230 INP_UNLOCK(last); 231 } else { 232 m_freem(m); 233 ipstat.ips_noproto++; 234 ipstat.ips_delivered--; 235 } 236 INP_INFO_RUNLOCK(&ripcbinfo); 237 } 238 239 /* 240 * Generate IP header and pass packet to ip_output. 241 * Tack on options user may have setup with control call. 242 */ 243 int 244 rip_output(struct mbuf *m, struct socket *so, u_long dst) 245 { 246 struct ip *ip; 247 struct inpcb *inp = sotoinpcb(so); 248 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; 249 250 #ifdef MAC 251 INP_LOCK(inp); 252 mac_create_mbuf_from_inpcb(inp, m); 253 INP_UNLOCK(inp); 254 #endif 255 256 /* 257 * If the user handed us a complete IP packet, use it. 258 * Otherwise, allocate an mbuf for a header and fill it in. 259 */ 260 if ((inp->inp_flags & INP_HDRINCL) == 0) { 261 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { 262 m_freem(m); 263 return(EMSGSIZE); 264 } 265 M_PREPEND(m, sizeof(struct ip), M_TRYWAIT); 266 if (m == NULL) 267 return(ENOBUFS); 268 ip = mtod(m, struct ip *); 269 ip->ip_tos = inp->inp_ip_tos; 270 ip->ip_off = 0; 271 ip->ip_p = inp->inp_ip_p; 272 ip->ip_len = m->m_pkthdr.len; 273 if (jailed(inp->inp_socket->so_cred)) 274 ip->ip_src.s_addr = 275 htonl(prison_getip(inp->inp_socket->so_cred)); 276 else 277 ip->ip_src = inp->inp_laddr; 278 ip->ip_dst.s_addr = dst; 279 ip->ip_ttl = inp->inp_ip_ttl; 280 } else { 281 if (m->m_pkthdr.len > IP_MAXPACKET) { 282 m_freem(m); 283 return(EMSGSIZE); 284 } 285 ip = mtod(m, struct ip *); 286 if (jailed(inp->inp_socket->so_cred)) { 287 if (ip->ip_src.s_addr != 288 htonl(prison_getip(inp->inp_socket->so_cred))) { 289 m_freem(m); 290 return (EPERM); 291 } 292 } 293 /* don't allow both user specified and setsockopt options, 294 and don't allow packet length sizes that will crash */ 295 if (((ip->ip_hl != (sizeof (*ip) >> 2)) 296 && inp->inp_options) 297 || (ip->ip_len > m->m_pkthdr.len) 298 || (ip->ip_len < (ip->ip_hl << 2))) { 299 m_freem(m); 300 return EINVAL; 301 } 302 if (ip->ip_id == 0) 303 #ifdef RANDOM_IP_ID 304 ip->ip_id = ip_randomid(); 305 #else 306 ip->ip_id = htons(ip_id++); 307 #endif 308 /* XXX prevent ip_output from overwriting header fields */ 309 flags |= IP_RAWOUTPUT; 310 ipstat.ips_rawout++; 311 } 312 313 if (inp->inp_flags & INP_ONESBCAST) 314 flags |= IP_SENDONES; 315 316 return (ip_output(m, inp->inp_options, NULL, flags, 317 inp->inp_moptions, inp)); 318 } 319 320 /* 321 * Raw IP socket option processing. 322 * 323 * Note that access to all of the IP administrative functions here is 324 * implicitly protected by suser() as gaining access to a raw socket 325 * requires either that the thread pass a suser() check, or that it be 326 * passed a raw socket by another thread that has passed a suser() check. 327 * If FreeBSD moves to a more fine-grained access control mechanism, 328 * additional checks will need to be placed here if the raw IP attachment 329 * check is not equivilent the the check required for these 330 * administrative operations; in some cases, these checks are already 331 * present. 332 */ 333 int 334 rip_ctloutput(struct socket *so, struct sockopt *sopt) 335 { 336 struct inpcb *inp = sotoinpcb(so); 337 int error, optval; 338 339 if (sopt->sopt_level != IPPROTO_IP) 340 return (EINVAL); 341 342 error = 0; 343 344 switch (sopt->sopt_dir) { 345 case SOPT_GET: 346 switch (sopt->sopt_name) { 347 case IP_HDRINCL: 348 optval = inp->inp_flags & INP_HDRINCL; 349 error = sooptcopyout(sopt, &optval, sizeof optval); 350 break; 351 352 case IP_FW_ADD: /* ADD actually returns the body... */ 353 case IP_FW_GET: 354 if (IPFW_LOADED) 355 error = ip_fw_ctl_ptr(sopt); 356 else 357 error = ENOPROTOOPT; 358 break; 359 360 case IP_DUMMYNET_GET: 361 if (DUMMYNET_LOADED) 362 error = ip_dn_ctl_ptr(sopt); 363 else 364 error = ENOPROTOOPT; 365 break ; 366 367 case MRT_INIT: 368 case MRT_DONE: 369 case MRT_ADD_VIF: 370 case MRT_DEL_VIF: 371 case MRT_ADD_MFC: 372 case MRT_DEL_MFC: 373 case MRT_VERSION: 374 case MRT_ASSERT: 375 case MRT_API_SUPPORT: 376 case MRT_API_CONFIG: 377 case MRT_ADD_BW_UPCALL: 378 case MRT_DEL_BW_UPCALL: 379 error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : 380 EOPNOTSUPP; 381 break; 382 383 default: 384 error = ip_ctloutput(so, sopt); 385 break; 386 } 387 break; 388 389 case SOPT_SET: 390 switch (sopt->sopt_name) { 391 case IP_HDRINCL: 392 error = sooptcopyin(sopt, &optval, sizeof optval, 393 sizeof optval); 394 if (error) 395 break; 396 if (optval) 397 inp->inp_flags |= INP_HDRINCL; 398 else 399 inp->inp_flags &= ~INP_HDRINCL; 400 break; 401 402 case IP_FW_ADD: 403 case IP_FW_DEL: 404 case IP_FW_FLUSH: 405 case IP_FW_ZERO: 406 case IP_FW_RESETLOG: 407 if (IPFW_LOADED) 408 error = ip_fw_ctl_ptr(sopt); 409 else 410 error = ENOPROTOOPT; 411 break; 412 413 case IP_DUMMYNET_CONFIGURE: 414 case IP_DUMMYNET_DEL: 415 case IP_DUMMYNET_FLUSH: 416 if (DUMMYNET_LOADED) 417 error = ip_dn_ctl_ptr(sopt); 418 else 419 error = ENOPROTOOPT ; 420 break ; 421 422 case IP_RSVP_ON: 423 error = ip_rsvp_init(so); 424 break; 425 426 case IP_RSVP_OFF: 427 error = ip_rsvp_done(); 428 break; 429 430 case IP_RSVP_VIF_ON: 431 case IP_RSVP_VIF_OFF: 432 error = ip_rsvp_vif ? 433 ip_rsvp_vif(so, sopt) : EINVAL; 434 break; 435 436 case MRT_INIT: 437 case MRT_DONE: 438 case MRT_ADD_VIF: 439 case MRT_DEL_VIF: 440 case MRT_ADD_MFC: 441 case MRT_DEL_MFC: 442 case MRT_VERSION: 443 case MRT_ASSERT: 444 case MRT_API_SUPPORT: 445 case MRT_API_CONFIG: 446 case MRT_ADD_BW_UPCALL: 447 case MRT_DEL_BW_UPCALL: 448 error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : 449 EOPNOTSUPP; 450 break; 451 452 default: 453 error = ip_ctloutput(so, sopt); 454 break; 455 } 456 break; 457 } 458 459 return (error); 460 } 461 462 /* 463 * This function exists solely to receive the PRC_IFDOWN messages which 464 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, 465 * and calls in_ifadown() to remove all routes corresponding to that address. 466 * It also receives the PRC_IFUP messages from if_up() and reinstalls the 467 * interface routes. 468 */ 469 void 470 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) 471 { 472 struct in_ifaddr *ia; 473 struct ifnet *ifp; 474 int err; 475 int flags; 476 477 switch (cmd) { 478 case PRC_IFDOWN: 479 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 480 if (ia->ia_ifa.ifa_addr == sa 481 && (ia->ia_flags & IFA_ROUTE)) { 482 /* 483 * in_ifscrub kills the interface route. 484 */ 485 in_ifscrub(ia->ia_ifp, ia); 486 /* 487 * in_ifadown gets rid of all the rest of 488 * the routes. This is not quite the right 489 * thing to do, but at least if we are running 490 * a routing process they will come back. 491 */ 492 in_ifadown(&ia->ia_ifa, 0); 493 break; 494 } 495 } 496 break; 497 498 case PRC_IFUP: 499 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 500 if (ia->ia_ifa.ifa_addr == sa) 501 break; 502 } 503 if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) 504 return; 505 flags = RTF_UP; 506 ifp = ia->ia_ifa.ifa_ifp; 507 508 if ((ifp->if_flags & IFF_LOOPBACK) 509 || (ifp->if_flags & IFF_POINTOPOINT)) 510 flags |= RTF_HOST; 511 512 err = rtinit(&ia->ia_ifa, RTM_ADD, flags); 513 if (err == 0) 514 ia->ia_flags |= IFA_ROUTE; 515 break; 516 } 517 } 518 519 u_long rip_sendspace = RIPSNDQ; 520 u_long rip_recvspace = RIPRCVQ; 521 522 SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, 523 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); 524 SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, 525 &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); 526 527 static int 528 rip_attach(struct socket *so, int proto, struct thread *td) 529 { 530 struct inpcb *inp; 531 int error; 532 533 /* XXX why not lower? */ 534 INP_INFO_WLOCK(&ripcbinfo); 535 inp = sotoinpcb(so); 536 if (inp) { 537 /* XXX counter, printf */ 538 INP_INFO_WUNLOCK(&ripcbinfo); 539 return EINVAL; 540 } 541 if (td && jailed(td->td_ucred) && !jail_allow_raw_sockets) { 542 INP_INFO_WUNLOCK(&ripcbinfo); 543 return (EPERM); 544 } 545 if (td && (error = suser_cred(td->td_ucred, PRISON_ROOT)) != 0) { 546 INP_INFO_WUNLOCK(&ripcbinfo); 547 return error; 548 } 549 if (proto >= IPPROTO_MAX || proto < 0) { 550 INP_INFO_WUNLOCK(&ripcbinfo); 551 return EPROTONOSUPPORT; 552 } 553 554 error = soreserve(so, rip_sendspace, rip_recvspace); 555 if (error) { 556 INP_INFO_WUNLOCK(&ripcbinfo); 557 return error; 558 } 559 error = in_pcballoc(so, &ripcbinfo, "rawinp"); 560 if (error) { 561 INP_INFO_WUNLOCK(&ripcbinfo); 562 return error; 563 } 564 inp = (struct inpcb *)so->so_pcb; 565 INP_LOCK(inp); 566 INP_INFO_WUNLOCK(&ripcbinfo); 567 inp->inp_vflag |= INP_IPV4; 568 inp->inp_ip_p = proto; 569 inp->inp_ip_ttl = ip_defttl; 570 INP_UNLOCK(inp); 571 return 0; 572 } 573 574 static void 575 rip_pcbdetach(struct socket *so, struct inpcb *inp) 576 { 577 INP_INFO_WLOCK_ASSERT(&ripcbinfo); 578 INP_LOCK_ASSERT(inp); 579 580 if (so == ip_mrouter && ip_mrouter_done) 581 ip_mrouter_done(); 582 if (ip_rsvp_force_done) 583 ip_rsvp_force_done(so); 584 if (so == ip_rsvpd) 585 ip_rsvp_done(); 586 in_pcbdetach(inp); 587 } 588 589 static int 590 rip_detach(struct socket *so) 591 { 592 struct inpcb *inp; 593 594 INP_INFO_WLOCK(&ripcbinfo); 595 inp = sotoinpcb(so); 596 if (inp == 0) { 597 /* XXX counter, printf */ 598 INP_INFO_WUNLOCK(&ripcbinfo); 599 return EINVAL; 600 } 601 INP_LOCK(inp); 602 rip_pcbdetach(so, inp); 603 INP_INFO_WUNLOCK(&ripcbinfo); 604 return 0; 605 } 606 607 static int 608 rip_abort(struct socket *so) 609 { 610 struct inpcb *inp; 611 612 INP_INFO_WLOCK(&ripcbinfo); 613 inp = sotoinpcb(so); 614 if (inp == 0) { 615 INP_INFO_WUNLOCK(&ripcbinfo); 616 return EINVAL; /* ??? possible? panic instead? */ 617 } 618 INP_LOCK(inp); 619 soisdisconnected(so); 620 if (so->so_state & SS_NOFDREF) 621 rip_pcbdetach(so, inp); 622 else 623 INP_UNLOCK(inp); 624 INP_INFO_WUNLOCK(&ripcbinfo); 625 return 0; 626 } 627 628 static int 629 rip_disconnect(struct socket *so) 630 { 631 if ((so->so_state & SS_ISCONNECTED) == 0) 632 return ENOTCONN; 633 return rip_abort(so); 634 } 635 636 static int 637 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 638 { 639 struct sockaddr_in *addr = (struct sockaddr_in *)nam; 640 struct inpcb *inp; 641 642 if (nam->sa_len != sizeof(*addr)) 643 return EINVAL; 644 645 if (jailed(td->td_ucred)) { 646 if (addr->sin_addr.s_addr == INADDR_ANY) 647 addr->sin_addr.s_addr = 648 htonl(prison_getip(td->td_ucred)); 649 if (htonl(prison_getip(td->td_ucred)) != addr->sin_addr.s_addr) 650 return (EADDRNOTAVAIL); 651 } 652 653 if (TAILQ_EMPTY(&ifnet) || 654 (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || 655 (addr->sin_addr.s_addr && 656 ifa_ifwithaddr((struct sockaddr *)addr) == 0)) 657 return EADDRNOTAVAIL; 658 659 INP_INFO_WLOCK(&ripcbinfo); 660 inp = sotoinpcb(so); 661 if (inp == 0) { 662 INP_INFO_WUNLOCK(&ripcbinfo); 663 return EINVAL; 664 } 665 INP_LOCK(inp); 666 inp->inp_laddr = addr->sin_addr; 667 INP_UNLOCK(inp); 668 INP_INFO_WUNLOCK(&ripcbinfo); 669 return 0; 670 } 671 672 static int 673 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 674 { 675 struct sockaddr_in *addr = (struct sockaddr_in *)nam; 676 struct inpcb *inp; 677 678 if (nam->sa_len != sizeof(*addr)) 679 return EINVAL; 680 if (TAILQ_EMPTY(&ifnet)) 681 return EADDRNOTAVAIL; 682 if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) 683 return EAFNOSUPPORT; 684 685 INP_INFO_WLOCK(&ripcbinfo); 686 inp = sotoinpcb(so); 687 if (inp == 0) { 688 INP_INFO_WUNLOCK(&ripcbinfo); 689 return EINVAL; 690 } 691 INP_LOCK(inp); 692 inp->inp_faddr = addr->sin_addr; 693 soisconnected(so); 694 INP_UNLOCK(inp); 695 INP_INFO_WUNLOCK(&ripcbinfo); 696 return 0; 697 } 698 699 static int 700 rip_shutdown(struct socket *so) 701 { 702 struct inpcb *inp; 703 704 INP_INFO_RLOCK(&ripcbinfo); 705 inp = sotoinpcb(so); 706 if (inp == 0) { 707 INP_INFO_RUNLOCK(&ripcbinfo); 708 return EINVAL; 709 } 710 INP_LOCK(inp); 711 INP_INFO_RUNLOCK(&ripcbinfo); 712 socantsendmore(so); 713 INP_UNLOCK(inp); 714 return 0; 715 } 716 717 static int 718 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 719 struct mbuf *control, struct thread *td) 720 { 721 struct inpcb *inp; 722 u_long dst; 723 int ret; 724 725 INP_INFO_WLOCK(&ripcbinfo); 726 inp = sotoinpcb(so); 727 if (so->so_state & SS_ISCONNECTED) { 728 if (nam) { 729 INP_INFO_WUNLOCK(&ripcbinfo); 730 m_freem(m); 731 return EISCONN; 732 } 733 dst = inp->inp_faddr.s_addr; 734 } else { 735 if (nam == NULL) { 736 INP_INFO_WUNLOCK(&ripcbinfo); 737 m_freem(m); 738 return ENOTCONN; 739 } 740 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; 741 } 742 INP_LOCK(inp); 743 ret = rip_output(m, so, dst); 744 INP_UNLOCK(inp); 745 INP_INFO_WUNLOCK(&ripcbinfo); 746 return ret; 747 } 748 749 static int 750 rip_pcblist(SYSCTL_HANDLER_ARGS) 751 { 752 int error, i, n; 753 struct inpcb *inp, **inp_list; 754 inp_gen_t gencnt; 755 struct xinpgen xig; 756 757 /* 758 * The process of preparing the TCB list is too time-consuming and 759 * resource-intensive to repeat twice on every request. 760 */ 761 if (req->oldptr == 0) { 762 n = ripcbinfo.ipi_count; 763 req->oldidx = 2 * (sizeof xig) 764 + (n + n/8) * sizeof(struct xinpcb); 765 return 0; 766 } 767 768 if (req->newptr != 0) 769 return EPERM; 770 771 /* 772 * OK, now we're committed to doing something. 773 */ 774 INP_INFO_RLOCK(&ripcbinfo); 775 gencnt = ripcbinfo.ipi_gencnt; 776 n = ripcbinfo.ipi_count; 777 INP_INFO_RUNLOCK(&ripcbinfo); 778 779 xig.xig_len = sizeof xig; 780 xig.xig_count = n; 781 xig.xig_gen = gencnt; 782 xig.xig_sogen = so_gencnt; 783 error = SYSCTL_OUT(req, &xig, sizeof xig); 784 if (error) 785 return error; 786 787 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 788 if (inp_list == 0) 789 return ENOMEM; 790 791 INP_INFO_RLOCK(&ripcbinfo); 792 for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n; 793 inp = LIST_NEXT(inp, inp_list)) { 794 INP_LOCK(inp); 795 if (inp->inp_gencnt <= gencnt && 796 cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) { 797 /* XXX held references? */ 798 inp_list[i++] = inp; 799 } 800 INP_UNLOCK(inp); 801 } 802 INP_INFO_RUNLOCK(&ripcbinfo); 803 n = i; 804 805 error = 0; 806 for (i = 0; i < n; i++) { 807 inp = inp_list[i]; 808 if (inp->inp_gencnt <= gencnt) { 809 struct xinpcb xi; 810 xi.xi_len = sizeof xi; 811 /* XXX should avoid extra copy */ 812 bcopy(inp, &xi.xi_inp, sizeof *inp); 813 if (inp->inp_socket) 814 sotoxsocket(inp->inp_socket, &xi.xi_socket); 815 error = SYSCTL_OUT(req, &xi, sizeof xi); 816 } 817 } 818 if (!error) { 819 /* 820 * Give the user an updated idea of our state. 821 * If the generation differs from what we told 822 * her before, she knows that something happened 823 * while we were processing this request, and it 824 * might be necessary to retry. 825 */ 826 INP_INFO_RLOCK(&ripcbinfo); 827 xig.xig_gen = ripcbinfo.ipi_gencnt; 828 xig.xig_sogen = so_gencnt; 829 xig.xig_count = ripcbinfo.ipi_count; 830 INP_INFO_RUNLOCK(&ripcbinfo); 831 error = SYSCTL_OUT(req, &xig, sizeof xig); 832 } 833 free(inp_list, M_TEMP); 834 return error; 835 } 836 837 /* 838 * This is the wrapper function for in_setsockaddr. We just pass down 839 * the pcbinfo for in_setpeeraddr to lock. 840 */ 841 static int 842 rip_sockaddr(struct socket *so, struct sockaddr **nam) 843 { 844 return (in_setsockaddr(so, nam, &ripcbinfo)); 845 } 846 847 /* 848 * This is the wrapper function for in_setpeeraddr. We just pass down 849 * the pcbinfo for in_setpeeraddr to lock. 850 */ 851 static int 852 rip_peeraddr(struct socket *so, struct sockaddr **nam) 853 { 854 return (in_setpeeraddr(so, nam, &ripcbinfo)); 855 } 856 857 858 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, 859 rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); 860 861 struct pr_usrreqs rip_usrreqs = { 862 rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, 863 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, 864 pru_listen_notsupp, rip_peeraddr, pru_rcvd_notsupp, 865 pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, 866 rip_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel 867 }; 868