1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.47 1998/12/07 21:58:29 archie Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/proc.h> 40 #include <sys/fcntl.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/domain.h> 44 #include <sys/kernel.h> 45 #include <sys/poll.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 #include <sys/uio.h> 53 #include <vm/vm_zone.h> 54 55 #include <machine/limits.h> 56 57 struct vm_zone *socket_zone; 58 so_gen_t so_gencnt; /* generation count for sockets */ 59 60 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 61 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 62 63 static int somaxconn = SOMAXCONN; 64 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 65 0, ""); 66 67 /* 68 * Socket operation routines. 69 * These routines are called by the routines in 70 * sys_socket.c or from a system process, and 71 * implement the semantics of socket operations by 72 * switching out to the protocol specific routines. 73 */ 74 75 /* 76 * Get a socket structure from our zone, and initialize it. 77 * We don't implement `waitok' yet (see comments in uipc_domain.c). 78 * Note that it would probably be better to allocate socket 79 * and PCB at the same time, but I'm not convinced that all 80 * the protocols can be easily modified to do this. 81 */ 82 struct socket * 83 soalloc(waitok) 84 int waitok; 85 { 86 struct socket *so; 87 88 so = zalloci(socket_zone); 89 if (so) { 90 /* XXX race condition for reentrant kernel */ 91 bzero(so, sizeof *so); 92 so->so_gencnt = ++so_gencnt; 93 so->so_zone = socket_zone; 94 } 95 return so; 96 } 97 98 int 99 socreate(dom, aso, type, proto, p) 100 int dom; 101 struct socket **aso; 102 register int type; 103 int proto; 104 struct proc *p; 105 { 106 register struct protosw *prp; 107 register struct socket *so; 108 register int error; 109 110 if (proto) 111 prp = pffindproto(dom, proto, type); 112 else 113 prp = pffindtype(dom, type); 114 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 115 return (EPROTONOSUPPORT); 116 if (prp->pr_type != type) 117 return (EPROTOTYPE); 118 so = soalloc(p != 0); 119 if (so == 0) 120 return (ENOBUFS); 121 122 TAILQ_INIT(&so->so_incomp); 123 TAILQ_INIT(&so->so_comp); 124 so->so_type = type; 125 if (p != 0) 126 so->so_uid = p->p_ucred->cr_uid; 127 so->so_proto = prp; 128 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 129 if (error) { 130 so->so_state |= SS_NOFDREF; 131 sofree(so); 132 return (error); 133 } 134 *aso = so; 135 return (0); 136 } 137 138 int 139 sobind(so, nam, p) 140 struct socket *so; 141 struct sockaddr *nam; 142 struct proc *p; 143 { 144 int s = splnet(); 145 int error; 146 147 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 148 splx(s); 149 return (error); 150 } 151 152 void 153 sodealloc(so) 154 struct socket *so; 155 { 156 so->so_gencnt = ++so_gencnt; 157 zfreei(so->so_zone, so); 158 } 159 160 int 161 solisten(so, backlog, p) 162 register struct socket *so; 163 int backlog; 164 struct proc *p; 165 { 166 int s, error; 167 168 s = splnet(); 169 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 170 if (error) { 171 splx(s); 172 return (error); 173 } 174 if (so->so_comp.tqh_first == NULL) 175 so->so_options |= SO_ACCEPTCONN; 176 if (backlog < 0 || backlog > somaxconn) 177 backlog = somaxconn; 178 so->so_qlimit = backlog; 179 splx(s); 180 return (0); 181 } 182 183 void 184 sofree(so) 185 register struct socket *so; 186 { 187 struct socket *head = so->so_head; 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (head != NULL) { 192 if (so->so_state & SS_INCOMP) { 193 TAILQ_REMOVE(&head->so_incomp, so, so_list); 194 head->so_incqlen--; 195 } else if (so->so_state & SS_COMP) { 196 TAILQ_REMOVE(&head->so_comp, so, so_list); 197 } else { 198 panic("sofree: not queued"); 199 } 200 head->so_qlen--; 201 so->so_state &= ~(SS_INCOMP|SS_COMP); 202 so->so_head = NULL; 203 } 204 sbrelease(&so->so_snd); 205 sorflush(so); 206 sodealloc(so); 207 } 208 209 /* 210 * Close a socket on last file table reference removal. 211 * Initiate disconnect if connected. 212 * Free socket when disconnect complete. 213 */ 214 int 215 soclose(so) 216 register struct socket *so; 217 { 218 int s = splnet(); /* conservative */ 219 int error = 0; 220 221 funsetown(so->so_sigio); 222 if (so->so_options & SO_ACCEPTCONN) { 223 struct socket *sp, *sonext; 224 225 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 226 sonext = sp->so_list.tqe_next; 227 (void) soabort(sp); 228 } 229 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 230 sonext = sp->so_list.tqe_next; 231 (void) soabort(sp); 232 } 233 } 234 if (so->so_pcb == 0) 235 goto discard; 236 if (so->so_state & SS_ISCONNECTED) { 237 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 238 error = sodisconnect(so); 239 if (error) 240 goto drop; 241 } 242 if (so->so_options & SO_LINGER) { 243 if ((so->so_state & SS_ISDISCONNECTING) && 244 (so->so_state & SS_NBIO)) 245 goto drop; 246 while (so->so_state & SS_ISCONNECTED) { 247 error = tsleep((caddr_t)&so->so_timeo, 248 PSOCK | PCATCH, "soclos", so->so_linger); 249 if (error) 250 break; 251 } 252 } 253 } 254 drop: 255 if (so->so_pcb) { 256 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 257 if (error == 0) 258 error = error2; 259 } 260 discard: 261 if (so->so_state & SS_NOFDREF) 262 panic("soclose: NOFDREF"); 263 so->so_state |= SS_NOFDREF; 264 sofree(so); 265 splx(s); 266 return (error); 267 } 268 269 /* 270 * Must be called at splnet... 271 */ 272 int 273 soabort(so) 274 struct socket *so; 275 { 276 277 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 278 } 279 280 int 281 soaccept(so, nam) 282 register struct socket *so; 283 struct sockaddr **nam; 284 { 285 int s = splnet(); 286 int error; 287 288 if ((so->so_state & SS_NOFDREF) == 0) 289 panic("soaccept: !NOFDREF"); 290 so->so_state &= ~SS_NOFDREF; 291 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 292 splx(s); 293 return (error); 294 } 295 296 int 297 soconnect(so, nam, p) 298 register struct socket *so; 299 struct sockaddr *nam; 300 struct proc *p; 301 { 302 int s; 303 int error; 304 305 if (so->so_options & SO_ACCEPTCONN) 306 return (EOPNOTSUPP); 307 s = splnet(); 308 /* 309 * If protocol is connection-based, can only connect once. 310 * Otherwise, if connected, try to disconnect first. 311 * This allows user to disconnect by connecting to, e.g., 312 * a null address. 313 */ 314 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 315 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 316 (error = sodisconnect(so)))) 317 error = EISCONN; 318 else 319 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 320 splx(s); 321 return (error); 322 } 323 324 int 325 soconnect2(so1, so2) 326 register struct socket *so1; 327 struct socket *so2; 328 { 329 int s = splnet(); 330 int error; 331 332 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 333 splx(s); 334 return (error); 335 } 336 337 int 338 sodisconnect(so) 339 register struct socket *so; 340 { 341 int s = splnet(); 342 int error; 343 344 if ((so->so_state & SS_ISCONNECTED) == 0) { 345 error = ENOTCONN; 346 goto bad; 347 } 348 if (so->so_state & SS_ISDISCONNECTING) { 349 error = EALREADY; 350 goto bad; 351 } 352 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 353 bad: 354 splx(s); 355 return (error); 356 } 357 358 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 359 /* 360 * Send on a socket. 361 * If send must go all at once and message is larger than 362 * send buffering, then hard error. 363 * Lock against other senders. 364 * If must go all at once and not enough room now, then 365 * inform user that this would block and do nothing. 366 * Otherwise, if nonblocking, send as much as possible. 367 * The data to be sent is described by "uio" if nonzero, 368 * otherwise by the mbuf chain "top" (which must be null 369 * if uio is not). Data provided in mbuf chain must be small 370 * enough to send all at once. 371 * 372 * Returns nonzero on error, timeout or signal; callers 373 * must check for short counts if EINTR/ERESTART are returned. 374 * Data and control buffers are freed on return. 375 */ 376 int 377 sosend(so, addr, uio, top, control, flags, p) 378 register struct socket *so; 379 struct sockaddr *addr; 380 struct uio *uio; 381 struct mbuf *top; 382 struct mbuf *control; 383 int flags; 384 struct proc *p; 385 { 386 struct mbuf **mp; 387 register struct mbuf *m; 388 register long space, len, resid; 389 int clen = 0, error, s, dontroute, mlen; 390 int atomic = sosendallatonce(so) || top; 391 392 if (uio) 393 resid = uio->uio_resid; 394 else 395 resid = top->m_pkthdr.len; 396 /* 397 * In theory resid should be unsigned. 398 * However, space must be signed, as it might be less than 0 399 * if we over-committed, and we must use a signed comparison 400 * of space and resid. On the other hand, a negative resid 401 * causes us to loop sending 0-length segments to the protocol. 402 * 403 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 404 * type sockets since that's an error. 405 */ 406 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 407 error = EINVAL; 408 goto out; 409 } 410 411 dontroute = 412 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 413 (so->so_proto->pr_flags & PR_ATOMIC); 414 if (p) 415 p->p_stats->p_ru.ru_msgsnd++; 416 if (control) 417 clen = control->m_len; 418 #define snderr(errno) { error = errno; splx(s); goto release; } 419 420 restart: 421 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 422 if (error) 423 goto out; 424 do { 425 s = splnet(); 426 if (so->so_state & SS_CANTSENDMORE) 427 snderr(EPIPE); 428 if (so->so_error) { 429 error = so->so_error; 430 so->so_error = 0; 431 splx(s); 432 goto release; 433 } 434 if ((so->so_state & SS_ISCONNECTED) == 0) { 435 /* 436 * `sendto' and `sendmsg' is allowed on a connection- 437 * based socket if it supports implied connect. 438 * Return ENOTCONN if not connected and no address is 439 * supplied. 440 */ 441 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 442 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 443 if ((so->so_state & SS_ISCONFIRMING) == 0 && 444 !(resid == 0 && clen != 0)) 445 snderr(ENOTCONN); 446 } else if (addr == 0) 447 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 448 ENOTCONN : EDESTADDRREQ); 449 } 450 space = sbspace(&so->so_snd); 451 if (flags & MSG_OOB) 452 space += 1024; 453 if ((atomic && resid > so->so_snd.sb_hiwat) || 454 clen > so->so_snd.sb_hiwat) 455 snderr(EMSGSIZE); 456 if (space < resid + clen && uio && 457 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 458 if (so->so_state & SS_NBIO) 459 snderr(EWOULDBLOCK); 460 sbunlock(&so->so_snd); 461 error = sbwait(&so->so_snd); 462 splx(s); 463 if (error) 464 goto out; 465 goto restart; 466 } 467 splx(s); 468 mp = ⊤ 469 space -= clen; 470 do { 471 if (uio == NULL) { 472 /* 473 * Data is prepackaged in "top". 474 */ 475 resid = 0; 476 if (flags & MSG_EOR) 477 top->m_flags |= M_EOR; 478 } else do { 479 if (top == 0) { 480 MGETHDR(m, M_WAIT, MT_DATA); 481 mlen = MHLEN; 482 m->m_pkthdr.len = 0; 483 m->m_pkthdr.rcvif = (struct ifnet *)0; 484 } else { 485 MGET(m, M_WAIT, MT_DATA); 486 mlen = MLEN; 487 } 488 if (resid >= MINCLSIZE) { 489 MCLGET(m, M_WAIT); 490 if ((m->m_flags & M_EXT) == 0) 491 goto nopages; 492 mlen = MCLBYTES; 493 len = min(min(mlen, resid), space); 494 } else { 495 nopages: 496 len = min(min(mlen, resid), space); 497 /* 498 * For datagram protocols, leave room 499 * for protocol headers in first mbuf. 500 */ 501 if (atomic && top == 0 && len < mlen) 502 MH_ALIGN(m, len); 503 } 504 space -= len; 505 error = uiomove(mtod(m, caddr_t), (int)len, uio); 506 resid = uio->uio_resid; 507 m->m_len = len; 508 *mp = m; 509 top->m_pkthdr.len += len; 510 if (error) 511 goto release; 512 mp = &m->m_next; 513 if (resid <= 0) { 514 if (flags & MSG_EOR) 515 top->m_flags |= M_EOR; 516 break; 517 } 518 } while (space > 0 && atomic); 519 if (dontroute) 520 so->so_options |= SO_DONTROUTE; 521 s = splnet(); /* XXX */ 522 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 523 (flags & MSG_OOB) ? PRUS_OOB : 524 /* 525 * If the user set MSG_EOF, the protocol 526 * understands this flag and nothing left to 527 * send then use PRU_SEND_EOF instead of PRU_SEND. 528 */ 529 ((flags & MSG_EOF) && 530 (so->so_proto->pr_flags & PR_IMPLOPCL) && 531 (resid <= 0)) ? 532 PRUS_EOF : 0, 533 top, addr, control, p); 534 splx(s); 535 if (dontroute) 536 so->so_options &= ~SO_DONTROUTE; 537 clen = 0; 538 control = 0; 539 top = 0; 540 mp = ⊤ 541 if (error) 542 goto release; 543 } while (resid && space > 0); 544 } while (resid); 545 546 release: 547 sbunlock(&so->so_snd); 548 out: 549 if (top) 550 m_freem(top); 551 if (control) 552 m_freem(control); 553 return (error); 554 } 555 556 /* 557 * Implement receive operations on a socket. 558 * We depend on the way that records are added to the sockbuf 559 * by sbappend*. In particular, each record (mbufs linked through m_next) 560 * must begin with an address if the protocol so specifies, 561 * followed by an optional mbuf or mbufs containing ancillary data, 562 * and then zero or more mbufs of data. 563 * In order to avoid blocking network interrupts for the entire time here, 564 * we splx() while doing the actual copy to user space. 565 * Although the sockbuf is locked, new data may still be appended, 566 * and thus we must maintain consistency of the sockbuf during that time. 567 * 568 * The caller may receive the data as a single mbuf chain by supplying 569 * an mbuf **mp0 for use in returning the chain. The uio is then used 570 * only for the count in uio_resid. 571 */ 572 int 573 soreceive(so, psa, uio, mp0, controlp, flagsp) 574 register struct socket *so; 575 struct sockaddr **psa; 576 struct uio *uio; 577 struct mbuf **mp0; 578 struct mbuf **controlp; 579 int *flagsp; 580 { 581 register struct mbuf *m, **mp; 582 register int flags, len, error, s, offset; 583 struct protosw *pr = so->so_proto; 584 struct mbuf *nextrecord; 585 int moff, type = 0; 586 int orig_resid = uio->uio_resid; 587 588 mp = mp0; 589 if (psa) 590 *psa = 0; 591 if (controlp) 592 *controlp = 0; 593 if (flagsp) 594 flags = *flagsp &~ MSG_EOR; 595 else 596 flags = 0; 597 if (flags & MSG_OOB) { 598 m = m_get(M_WAIT, MT_DATA); 599 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 600 if (error) 601 goto bad; 602 do { 603 error = uiomove(mtod(m, caddr_t), 604 (int) min(uio->uio_resid, m->m_len), uio); 605 m = m_free(m); 606 } while (uio->uio_resid && error == 0 && m); 607 bad: 608 if (m) 609 m_freem(m); 610 return (error); 611 } 612 if (mp) 613 *mp = (struct mbuf *)0; 614 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 615 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 616 617 restart: 618 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 619 if (error) 620 return (error); 621 s = splnet(); 622 623 m = so->so_rcv.sb_mb; 624 /* 625 * If we have less data than requested, block awaiting more 626 * (subject to any timeout) if: 627 * 1. the current count is less than the low water mark, or 628 * 2. MSG_WAITALL is set, and it is possible to do the entire 629 * receive operation at once if we block (resid <= hiwat). 630 * 3. MSG_DONTWAIT is not set 631 * If MSG_WAITALL is set but resid is larger than the receive buffer, 632 * we have to do the receive in sections, and thus risk returning 633 * a short count if a timeout or signal occurs after we start. 634 */ 635 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 636 so->so_rcv.sb_cc < uio->uio_resid) && 637 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 638 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 639 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 640 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); 641 if (so->so_error) { 642 if (m) 643 goto dontblock; 644 error = so->so_error; 645 if ((flags & MSG_PEEK) == 0) 646 so->so_error = 0; 647 goto release; 648 } 649 if (so->so_state & SS_CANTRCVMORE) { 650 if (m) 651 goto dontblock; 652 else 653 goto release; 654 } 655 for (; m; m = m->m_next) 656 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 657 m = so->so_rcv.sb_mb; 658 goto dontblock; 659 } 660 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 661 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 662 error = ENOTCONN; 663 goto release; 664 } 665 if (uio->uio_resid == 0) 666 goto release; 667 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 668 error = EWOULDBLOCK; 669 goto release; 670 } 671 sbunlock(&so->so_rcv); 672 error = sbwait(&so->so_rcv); 673 splx(s); 674 if (error) 675 return (error); 676 goto restart; 677 } 678 dontblock: 679 if (uio->uio_procp) 680 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 681 nextrecord = m->m_nextpkt; 682 if (pr->pr_flags & PR_ADDR) { 683 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 684 orig_resid = 0; 685 if (psa) 686 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 687 mp0 == 0); 688 if (flags & MSG_PEEK) { 689 m = m->m_next; 690 } else { 691 sbfree(&so->so_rcv, m); 692 MFREE(m, so->so_rcv.sb_mb); 693 m = so->so_rcv.sb_mb; 694 } 695 } 696 while (m && m->m_type == MT_CONTROL && error == 0) { 697 if (flags & MSG_PEEK) { 698 if (controlp) 699 *controlp = m_copy(m, 0, m->m_len); 700 m = m->m_next; 701 } else { 702 sbfree(&so->so_rcv, m); 703 if (controlp) { 704 if (pr->pr_domain->dom_externalize && 705 mtod(m, struct cmsghdr *)->cmsg_type == 706 SCM_RIGHTS) 707 error = (*pr->pr_domain->dom_externalize)(m); 708 *controlp = m; 709 so->so_rcv.sb_mb = m->m_next; 710 m->m_next = 0; 711 m = so->so_rcv.sb_mb; 712 } else { 713 MFREE(m, so->so_rcv.sb_mb); 714 m = so->so_rcv.sb_mb; 715 } 716 } 717 if (controlp) { 718 orig_resid = 0; 719 controlp = &(*controlp)->m_next; 720 } 721 } 722 if (m) { 723 if ((flags & MSG_PEEK) == 0) 724 m->m_nextpkt = nextrecord; 725 type = m->m_type; 726 if (type == MT_OOBDATA) 727 flags |= MSG_OOB; 728 } 729 moff = 0; 730 offset = 0; 731 while (m && uio->uio_resid > 0 && error == 0) { 732 if (m->m_type == MT_OOBDATA) { 733 if (type != MT_OOBDATA) 734 break; 735 } else if (type == MT_OOBDATA) 736 break; 737 else 738 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 739 ("receive 3")); 740 so->so_state &= ~SS_RCVATMARK; 741 len = uio->uio_resid; 742 if (so->so_oobmark && len > so->so_oobmark - offset) 743 len = so->so_oobmark - offset; 744 if (len > m->m_len - moff) 745 len = m->m_len - moff; 746 /* 747 * If mp is set, just pass back the mbufs. 748 * Otherwise copy them out via the uio, then free. 749 * Sockbuf must be consistent here (points to current mbuf, 750 * it points to next record) when we drop priority; 751 * we must note any additions to the sockbuf when we 752 * block interrupts again. 753 */ 754 if (mp == 0) { 755 splx(s); 756 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 757 s = splnet(); 758 if (error) 759 goto release; 760 } else 761 uio->uio_resid -= len; 762 if (len == m->m_len - moff) { 763 if (m->m_flags & M_EOR) 764 flags |= MSG_EOR; 765 if (flags & MSG_PEEK) { 766 m = m->m_next; 767 moff = 0; 768 } else { 769 nextrecord = m->m_nextpkt; 770 sbfree(&so->so_rcv, m); 771 if (mp) { 772 *mp = m; 773 mp = &m->m_next; 774 so->so_rcv.sb_mb = m = m->m_next; 775 *mp = (struct mbuf *)0; 776 } else { 777 MFREE(m, so->so_rcv.sb_mb); 778 m = so->so_rcv.sb_mb; 779 } 780 if (m) 781 m->m_nextpkt = nextrecord; 782 } 783 } else { 784 if (flags & MSG_PEEK) 785 moff += len; 786 else { 787 if (mp) 788 *mp = m_copym(m, 0, len, M_WAIT); 789 m->m_data += len; 790 m->m_len -= len; 791 so->so_rcv.sb_cc -= len; 792 } 793 } 794 if (so->so_oobmark) { 795 if ((flags & MSG_PEEK) == 0) { 796 so->so_oobmark -= len; 797 if (so->so_oobmark == 0) { 798 so->so_state |= SS_RCVATMARK; 799 break; 800 } 801 } else { 802 offset += len; 803 if (offset == so->so_oobmark) 804 break; 805 } 806 } 807 if (flags & MSG_EOR) 808 break; 809 /* 810 * If the MSG_WAITALL flag is set (for non-atomic socket), 811 * we must not quit until "uio->uio_resid == 0" or an error 812 * termination. If a signal/timeout occurs, return 813 * with a short count but without error. 814 * Keep sockbuf locked against other readers. 815 */ 816 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 817 !sosendallatonce(so) && !nextrecord) { 818 if (so->so_error || so->so_state & SS_CANTRCVMORE) 819 break; 820 error = sbwait(&so->so_rcv); 821 if (error) { 822 sbunlock(&so->so_rcv); 823 splx(s); 824 return (0); 825 } 826 m = so->so_rcv.sb_mb; 827 if (m) 828 nextrecord = m->m_nextpkt; 829 } 830 } 831 832 if (m && pr->pr_flags & PR_ATOMIC) { 833 flags |= MSG_TRUNC; 834 if ((flags & MSG_PEEK) == 0) 835 (void) sbdroprecord(&so->so_rcv); 836 } 837 if ((flags & MSG_PEEK) == 0) { 838 if (m == 0) 839 so->so_rcv.sb_mb = nextrecord; 840 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 841 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 842 } 843 if (orig_resid == uio->uio_resid && orig_resid && 844 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 845 sbunlock(&so->so_rcv); 846 splx(s); 847 goto restart; 848 } 849 850 if (flagsp) 851 *flagsp |= flags; 852 release: 853 sbunlock(&so->so_rcv); 854 splx(s); 855 return (error); 856 } 857 858 int 859 soshutdown(so, how) 860 register struct socket *so; 861 register int how; 862 { 863 register struct protosw *pr = so->so_proto; 864 865 how++; 866 if (how & FREAD) 867 sorflush(so); 868 if (how & FWRITE) 869 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 870 return (0); 871 } 872 873 void 874 sorflush(so) 875 register struct socket *so; 876 { 877 register struct sockbuf *sb = &so->so_rcv; 878 register struct protosw *pr = so->so_proto; 879 register int s; 880 struct sockbuf asb; 881 882 sb->sb_flags |= SB_NOINTR; 883 (void) sblock(sb, M_WAITOK); 884 s = splimp(); 885 socantrcvmore(so); 886 sbunlock(sb); 887 asb = *sb; 888 bzero((caddr_t)sb, sizeof (*sb)); 889 splx(s); 890 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 891 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 892 sbrelease(&asb); 893 } 894 895 /* 896 * Perhaps this routine, and sooptcopyout(), below, ought to come in 897 * an additional variant to handle the case where the option value needs 898 * to be some kind of integer, but not a specific size. 899 * In addition to their use here, these functions are also called by the 900 * protocol-level pr_ctloutput() routines. 901 */ 902 int 903 sooptcopyin(sopt, buf, len, minlen) 904 struct sockopt *sopt; 905 void *buf; 906 size_t len; 907 size_t minlen; 908 { 909 size_t valsize; 910 911 /* 912 * If the user gives us more than we wanted, we ignore it, 913 * but if we don't get the minimum length the caller 914 * wants, we return EINVAL. On success, sopt->sopt_valsize 915 * is set to however much we actually retrieved. 916 */ 917 if ((valsize = sopt->sopt_valsize) < minlen) 918 return EINVAL; 919 if (valsize > len) 920 sopt->sopt_valsize = valsize = len; 921 922 if (sopt->sopt_p != 0) 923 return (copyin(sopt->sopt_val, buf, valsize)); 924 925 bcopy(sopt->sopt_val, buf, valsize); 926 return 0; 927 } 928 929 int 930 sosetopt(so, sopt) 931 struct socket *so; 932 struct sockopt *sopt; 933 { 934 int error, optval; 935 struct linger l; 936 struct timeval tv; 937 short val; 938 939 error = 0; 940 if (sopt->sopt_level != SOL_SOCKET) { 941 if (so->so_proto && so->so_proto->pr_ctloutput) 942 return ((*so->so_proto->pr_ctloutput) 943 (so, sopt)); 944 error = ENOPROTOOPT; 945 } else { 946 switch (sopt->sopt_name) { 947 case SO_LINGER: 948 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 949 if (error) 950 goto bad; 951 952 so->so_linger = l.l_linger; 953 if (l.l_onoff) 954 so->so_options |= SO_LINGER; 955 else 956 so->so_options &= ~SO_LINGER; 957 break; 958 959 case SO_DEBUG: 960 case SO_KEEPALIVE: 961 case SO_DONTROUTE: 962 case SO_USELOOPBACK: 963 case SO_BROADCAST: 964 case SO_REUSEADDR: 965 case SO_REUSEPORT: 966 case SO_OOBINLINE: 967 case SO_TIMESTAMP: 968 error = sooptcopyin(sopt, &optval, sizeof optval, 969 sizeof optval); 970 if (error) 971 goto bad; 972 if (optval) 973 so->so_options |= sopt->sopt_name; 974 else 975 so->so_options &= ~sopt->sopt_name; 976 break; 977 978 case SO_SNDBUF: 979 case SO_RCVBUF: 980 case SO_SNDLOWAT: 981 case SO_RCVLOWAT: 982 error = sooptcopyin(sopt, &optval, sizeof optval, 983 sizeof optval); 984 if (error) 985 goto bad; 986 987 /* 988 * Values < 1 make no sense for any of these 989 * options, so disallow them. 990 */ 991 if (optval < 1) { 992 error = EINVAL; 993 goto bad; 994 } 995 996 switch (sopt->sopt_name) { 997 case SO_SNDBUF: 998 case SO_RCVBUF: 999 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1000 &so->so_snd : &so->so_rcv, 1001 (u_long) optval) == 0) { 1002 error = ENOBUFS; 1003 goto bad; 1004 } 1005 break; 1006 1007 /* 1008 * Make sure the low-water is never greater than 1009 * the high-water. 1010 */ 1011 case SO_SNDLOWAT: 1012 so->so_snd.sb_lowat = 1013 (optval > so->so_snd.sb_hiwat) ? 1014 so->so_snd.sb_hiwat : optval; 1015 break; 1016 case SO_RCVLOWAT: 1017 so->so_rcv.sb_lowat = 1018 (optval > so->so_rcv.sb_hiwat) ? 1019 so->so_rcv.sb_hiwat : optval; 1020 break; 1021 } 1022 break; 1023 1024 case SO_SNDTIMEO: 1025 case SO_RCVTIMEO: 1026 error = sooptcopyin(sopt, &tv, sizeof tv, 1027 sizeof tv); 1028 if (error) 1029 goto bad; 1030 1031 if (tv.tv_sec > SHRT_MAX / hz - hz) { 1032 error = EDOM; 1033 goto bad; 1034 } 1035 val = tv.tv_sec * hz + tv.tv_usec / tick; 1036 1037 switch (sopt->sopt_name) { 1038 case SO_SNDTIMEO: 1039 so->so_snd.sb_timeo = val; 1040 break; 1041 case SO_RCVTIMEO: 1042 so->so_rcv.sb_timeo = val; 1043 break; 1044 } 1045 break; 1046 1047 default: 1048 error = ENOPROTOOPT; 1049 break; 1050 } 1051 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1052 (void) ((*so->so_proto->pr_ctloutput) 1053 (so, sopt)); 1054 } 1055 } 1056 bad: 1057 return (error); 1058 } 1059 1060 /* Helper routine for getsockopt */ 1061 int 1062 sooptcopyout(sopt, buf, len) 1063 struct sockopt *sopt; 1064 void *buf; 1065 size_t len; 1066 { 1067 int error; 1068 size_t valsize; 1069 1070 error = 0; 1071 1072 /* 1073 * Documented get behavior is that we always return a value, 1074 * possibly truncated to fit in the user's buffer. 1075 * Traditional behavior is that we always tell the user 1076 * precisely how much we copied, rather than something useful 1077 * like the total amount we had available for her. 1078 * Note that this interface is not idempotent; the entire answer must 1079 * generated ahead of time. 1080 */ 1081 valsize = min(len, sopt->sopt_valsize); 1082 sopt->sopt_valsize = valsize; 1083 if (sopt->sopt_val != 0) { 1084 if (sopt->sopt_p != 0) 1085 error = copyout(buf, sopt->sopt_val, valsize); 1086 else 1087 bcopy(buf, sopt->sopt_val, valsize); 1088 } 1089 return error; 1090 } 1091 1092 int 1093 sogetopt(so, sopt) 1094 struct socket *so; 1095 struct sockopt *sopt; 1096 { 1097 int error, optval; 1098 struct linger l; 1099 struct timeval tv; 1100 1101 error = 0; 1102 if (sopt->sopt_level != SOL_SOCKET) { 1103 if (so->so_proto && so->so_proto->pr_ctloutput) { 1104 return ((*so->so_proto->pr_ctloutput) 1105 (so, sopt)); 1106 } else 1107 return (ENOPROTOOPT); 1108 } else { 1109 switch (sopt->sopt_name) { 1110 case SO_LINGER: 1111 l.l_onoff = so->so_options & SO_LINGER; 1112 l.l_linger = so->so_linger; 1113 error = sooptcopyout(sopt, &l, sizeof l); 1114 break; 1115 1116 case SO_USELOOPBACK: 1117 case SO_DONTROUTE: 1118 case SO_DEBUG: 1119 case SO_KEEPALIVE: 1120 case SO_REUSEADDR: 1121 case SO_REUSEPORT: 1122 case SO_BROADCAST: 1123 case SO_OOBINLINE: 1124 case SO_TIMESTAMP: 1125 optval = so->so_options & sopt->sopt_name; 1126 integer: 1127 error = sooptcopyout(sopt, &optval, sizeof optval); 1128 break; 1129 1130 case SO_TYPE: 1131 optval = so->so_type; 1132 goto integer; 1133 1134 case SO_ERROR: 1135 optval = so->so_error; 1136 so->so_error = 0; 1137 goto integer; 1138 1139 case SO_SNDBUF: 1140 optval = so->so_snd.sb_hiwat; 1141 goto integer; 1142 1143 case SO_RCVBUF: 1144 optval = so->so_rcv.sb_hiwat; 1145 goto integer; 1146 1147 case SO_SNDLOWAT: 1148 optval = so->so_snd.sb_lowat; 1149 goto integer; 1150 1151 case SO_RCVLOWAT: 1152 optval = so->so_rcv.sb_lowat; 1153 goto integer; 1154 1155 case SO_SNDTIMEO: 1156 case SO_RCVTIMEO: 1157 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1158 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1159 1160 tv.tv_sec = optval / hz; 1161 tv.tv_usec = (optval % hz) * tick; 1162 error = sooptcopyout(sopt, &tv, sizeof tv); 1163 break; 1164 1165 default: 1166 error = ENOPROTOOPT; 1167 break; 1168 } 1169 return (error); 1170 } 1171 } 1172 1173 void 1174 sohasoutofband(so) 1175 register struct socket *so; 1176 { 1177 if (so->so_sigio != NULL) 1178 pgsigio(so->so_sigio, SIGURG, 0); 1179 selwakeup(&so->so_rcv.sb_sel); 1180 } 1181 1182 int 1183 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1184 { 1185 int revents = 0; 1186 int s = splnet(); 1187 1188 if (events & (POLLIN | POLLRDNORM)) 1189 if (soreadable(so)) 1190 revents |= events & (POLLIN | POLLRDNORM); 1191 1192 if (events & (POLLOUT | POLLWRNORM)) 1193 if (sowriteable(so)) 1194 revents |= events & (POLLOUT | POLLWRNORM); 1195 1196 if (events & (POLLPRI | POLLRDBAND)) 1197 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1198 revents |= events & (POLLPRI | POLLRDBAND); 1199 1200 if (revents == 0) { 1201 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1202 selrecord(p, &so->so_rcv.sb_sel); 1203 so->so_rcv.sb_flags |= SB_SEL; 1204 } 1205 1206 if (events & (POLLOUT | POLLWRNORM)) { 1207 selrecord(p, &so->so_snd.sb_sel); 1208 so->so_snd.sb_flags |= SB_SEL; 1209 } 1210 } 1211 1212 splx(s); 1213 return (revents); 1214 } 1215