1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.30 1997/09/02 20:05:57 bde Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/proc.h> 40 #include <sys/fcntl.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/domain.h> 44 #include <sys/kernel.h> 45 #include <sys/poll.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 53 #include <machine/limits.h> 54 55 static int somaxconn = SOMAXCONN; 56 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 57 0, ""); 58 59 /* 60 * Socket operation routines. 61 * These routines are called by the routines in 62 * sys_socket.c or from a system process, and 63 * implement the semantics of socket operations by 64 * switching out to the protocol specific routines. 65 */ 66 /*ARGSUSED*/ 67 int 68 socreate(dom, aso, type, proto, p) 69 int dom; 70 struct socket **aso; 71 register int type; 72 int proto; 73 struct proc *p; 74 { 75 register struct protosw *prp; 76 register struct socket *so; 77 register int error; 78 79 if (proto) 80 prp = pffindproto(dom, proto, type); 81 else 82 prp = pffindtype(dom, type); 83 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 84 return (EPROTONOSUPPORT); 85 if (prp->pr_type != type) 86 return (EPROTOTYPE); 87 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 88 bzero((caddr_t)so, sizeof(*so)); 89 TAILQ_INIT(&so->so_incomp); 90 TAILQ_INIT(&so->so_comp); 91 so->so_type = type; 92 so->so_proto = prp; 93 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 94 if (error) { 95 so->so_state |= SS_NOFDREF; 96 sofree(so); 97 return (error); 98 } 99 *aso = so; 100 return (0); 101 } 102 103 int 104 sobind(so, nam, p) 105 struct socket *so; 106 struct sockaddr *nam; 107 struct proc *p; 108 { 109 int s = splnet(); 110 int error; 111 112 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 113 splx(s); 114 return (error); 115 } 116 117 int 118 solisten(so, backlog, p) 119 register struct socket *so; 120 int backlog; 121 struct proc *p; 122 { 123 int s = splnet(), error; 124 125 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 126 if (error) { 127 splx(s); 128 return (error); 129 } 130 if (so->so_comp.tqh_first == NULL) 131 so->so_options |= SO_ACCEPTCONN; 132 if (backlog < 0 || backlog > somaxconn) 133 backlog = somaxconn; 134 so->so_qlimit = backlog; 135 splx(s); 136 return (0); 137 } 138 139 void 140 sofree(so) 141 register struct socket *so; 142 { 143 struct socket *head = so->so_head; 144 145 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 146 return; 147 if (head != NULL) { 148 if (so->so_state & SS_INCOMP) { 149 TAILQ_REMOVE(&head->so_incomp, so, so_list); 150 head->so_incqlen--; 151 } else if (so->so_state & SS_COMP) { 152 TAILQ_REMOVE(&head->so_comp, so, so_list); 153 } else { 154 panic("sofree: not queued"); 155 } 156 head->so_qlen--; 157 so->so_state &= ~(SS_INCOMP|SS_COMP); 158 so->so_head = NULL; 159 } 160 sbrelease(&so->so_snd); 161 sorflush(so); 162 FREE(so, M_SOCKET); 163 } 164 165 /* 166 * Close a socket on last file table reference removal. 167 * Initiate disconnect if connected. 168 * Free socket when disconnect complete. 169 */ 170 int 171 soclose(so) 172 register struct socket *so; 173 { 174 int s = splnet(); /* conservative */ 175 int error = 0; 176 177 if (so->so_options & SO_ACCEPTCONN) { 178 struct socket *sp, *sonext; 179 180 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 181 sonext = sp->so_list.tqe_next; 182 (void) soabort(sp); 183 } 184 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 185 sonext = sp->so_list.tqe_next; 186 (void) soabort(sp); 187 } 188 } 189 if (so->so_pcb == 0) 190 goto discard; 191 if (so->so_state & SS_ISCONNECTED) { 192 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 193 error = sodisconnect(so); 194 if (error) 195 goto drop; 196 } 197 if (so->so_options & SO_LINGER) { 198 if ((so->so_state & SS_ISDISCONNECTING) && 199 (so->so_state & SS_NBIO)) 200 goto drop; 201 while (so->so_state & SS_ISCONNECTED) { 202 error = tsleep((caddr_t)&so->so_timeo, 203 PSOCK | PCATCH, "soclos", so->so_linger); 204 if (error) 205 break; 206 } 207 } 208 } 209 drop: 210 if (so->so_pcb) { 211 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 212 if (error == 0) 213 error = error2; 214 } 215 discard: 216 if (so->so_state & SS_NOFDREF) 217 panic("soclose: NOFDREF"); 218 so->so_state |= SS_NOFDREF; 219 sofree(so); 220 splx(s); 221 return (error); 222 } 223 224 /* 225 * Must be called at splnet... 226 */ 227 int 228 soabort(so) 229 struct socket *so; 230 { 231 232 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 233 } 234 235 int 236 soaccept(so, nam) 237 register struct socket *so; 238 struct sockaddr **nam; 239 { 240 int s = splnet(); 241 int error; 242 243 if ((so->so_state & SS_NOFDREF) == 0) 244 panic("soaccept: !NOFDREF"); 245 so->so_state &= ~SS_NOFDREF; 246 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 247 splx(s); 248 return (error); 249 } 250 251 int 252 soconnect(so, nam, p) 253 register struct socket *so; 254 struct sockaddr *nam; 255 struct proc *p; 256 { 257 int s; 258 int error; 259 260 if (so->so_options & SO_ACCEPTCONN) 261 return (EOPNOTSUPP); 262 s = splnet(); 263 /* 264 * If protocol is connection-based, can only connect once. 265 * Otherwise, if connected, try to disconnect first. 266 * This allows user to disconnect by connecting to, e.g., 267 * a null address. 268 */ 269 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 270 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 271 (error = sodisconnect(so)))) 272 error = EISCONN; 273 else 274 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 275 splx(s); 276 return (error); 277 } 278 279 int 280 soconnect2(so1, so2) 281 register struct socket *so1; 282 struct socket *so2; 283 { 284 int s = splnet(); 285 int error; 286 287 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 288 splx(s); 289 return (error); 290 } 291 292 int 293 sodisconnect(so) 294 register struct socket *so; 295 { 296 int s = splnet(); 297 int error; 298 299 if ((so->so_state & SS_ISCONNECTED) == 0) { 300 error = ENOTCONN; 301 goto bad; 302 } 303 if (so->so_state & SS_ISDISCONNECTING) { 304 error = EALREADY; 305 goto bad; 306 } 307 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 308 bad: 309 splx(s); 310 return (error); 311 } 312 313 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 314 /* 315 * Send on a socket. 316 * If send must go all at once and message is larger than 317 * send buffering, then hard error. 318 * Lock against other senders. 319 * If must go all at once and not enough room now, then 320 * inform user that this would block and do nothing. 321 * Otherwise, if nonblocking, send as much as possible. 322 * The data to be sent is described by "uio" if nonzero, 323 * otherwise by the mbuf chain "top" (which must be null 324 * if uio is not). Data provided in mbuf chain must be small 325 * enough to send all at once. 326 * 327 * Returns nonzero on error, timeout or signal; callers 328 * must check for short counts if EINTR/ERESTART are returned. 329 * Data and control buffers are freed on return. 330 */ 331 int 332 sosend(so, addr, uio, top, control, flags, p) 333 register struct socket *so; 334 struct sockaddr *addr; 335 struct uio *uio; 336 struct mbuf *top; 337 struct mbuf *control; 338 int flags; 339 struct proc *p; 340 { 341 struct mbuf **mp; 342 register struct mbuf *m; 343 register long space, len, resid; 344 int clen = 0, error, s, dontroute, mlen; 345 int atomic = sosendallatonce(so) || top; 346 347 if (uio) 348 resid = uio->uio_resid; 349 else 350 resid = top->m_pkthdr.len; 351 /* 352 * In theory resid should be unsigned. 353 * However, space must be signed, as it might be less than 0 354 * if we over-committed, and we must use a signed comparison 355 * of space and resid. On the other hand, a negative resid 356 * causes us to loop sending 0-length segments to the protocol. 357 */ 358 if (resid < 0) 359 return (EINVAL); 360 dontroute = 361 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 362 (so->so_proto->pr_flags & PR_ATOMIC); 363 p->p_stats->p_ru.ru_msgsnd++; 364 if (control) 365 clen = control->m_len; 366 #define snderr(errno) { error = errno; splx(s); goto release; } 367 368 restart: 369 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 370 if (error) 371 goto out; 372 do { 373 s = splnet(); 374 if (so->so_state & SS_CANTSENDMORE) 375 snderr(EPIPE); 376 if (so->so_error) 377 snderr(so->so_error); 378 if ((so->so_state & SS_ISCONNECTED) == 0) { 379 /* 380 * `sendto' and `sendmsg' is allowed on a connection- 381 * based socket if it supports implied connect. 382 * Return ENOTCONN if not connected and no address is 383 * supplied. 384 */ 385 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 386 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 387 if ((so->so_state & SS_ISCONFIRMING) == 0 && 388 !(resid == 0 && clen != 0)) 389 snderr(ENOTCONN); 390 } else if (addr == 0) 391 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 392 ENOTCONN : EDESTADDRREQ); 393 } 394 space = sbspace(&so->so_snd); 395 if (flags & MSG_OOB) 396 space += 1024; 397 if ((atomic && resid > so->so_snd.sb_hiwat) || 398 clen > so->so_snd.sb_hiwat) 399 snderr(EMSGSIZE); 400 if (space < resid + clen && uio && 401 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 402 if (so->so_state & SS_NBIO) 403 snderr(EWOULDBLOCK); 404 sbunlock(&so->so_snd); 405 error = sbwait(&so->so_snd); 406 splx(s); 407 if (error) 408 goto out; 409 goto restart; 410 } 411 splx(s); 412 mp = ⊤ 413 space -= clen; 414 do { 415 if (uio == NULL) { 416 /* 417 * Data is prepackaged in "top". 418 */ 419 resid = 0; 420 if (flags & MSG_EOR) 421 top->m_flags |= M_EOR; 422 } else do { 423 if (top == 0) { 424 MGETHDR(m, M_WAIT, MT_DATA); 425 mlen = MHLEN; 426 m->m_pkthdr.len = 0; 427 m->m_pkthdr.rcvif = (struct ifnet *)0; 428 } else { 429 MGET(m, M_WAIT, MT_DATA); 430 mlen = MLEN; 431 } 432 if (resid >= MINCLSIZE) { 433 MCLGET(m, M_WAIT); 434 if ((m->m_flags & M_EXT) == 0) 435 goto nopages; 436 mlen = MCLBYTES; 437 len = min(min(mlen, resid), space); 438 } else { 439 nopages: 440 len = min(min(mlen, resid), space); 441 /* 442 * For datagram protocols, leave room 443 * for protocol headers in first mbuf. 444 */ 445 if (atomic && top == 0 && len < mlen) 446 MH_ALIGN(m, len); 447 } 448 space -= len; 449 error = uiomove(mtod(m, caddr_t), (int)len, uio); 450 resid = uio->uio_resid; 451 m->m_len = len; 452 *mp = m; 453 top->m_pkthdr.len += len; 454 if (error) 455 goto release; 456 mp = &m->m_next; 457 if (resid <= 0) { 458 if (flags & MSG_EOR) 459 top->m_flags |= M_EOR; 460 break; 461 } 462 } while (space > 0 && atomic); 463 if (dontroute) 464 so->so_options |= SO_DONTROUTE; 465 s = splnet(); /* XXX */ 466 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 467 (flags & MSG_OOB) ? PRUS_OOB : 468 /* 469 * If the user set MSG_EOF, the protocol 470 * understands this flag and nothing left to 471 * send then use PRU_SEND_EOF instead of PRU_SEND. 472 */ 473 ((flags & MSG_EOF) && 474 (so->so_proto->pr_flags & PR_IMPLOPCL) && 475 (resid <= 0)) ? 476 PRUS_EOF : 0, 477 top, addr, control, p); 478 splx(s); 479 if (dontroute) 480 so->so_options &= ~SO_DONTROUTE; 481 clen = 0; 482 control = 0; 483 top = 0; 484 mp = ⊤ 485 if (error) 486 goto release; 487 } while (resid && space > 0); 488 } while (resid); 489 490 release: 491 sbunlock(&so->so_snd); 492 out: 493 if (top) 494 m_freem(top); 495 if (control) 496 m_freem(control); 497 return (error); 498 } 499 500 /* 501 * Implement receive operations on a socket. 502 * We depend on the way that records are added to the sockbuf 503 * by sbappend*. In particular, each record (mbufs linked through m_next) 504 * must begin with an address if the protocol so specifies, 505 * followed by an optional mbuf or mbufs containing ancillary data, 506 * and then zero or more mbufs of data. 507 * In order to avoid blocking network interrupts for the entire time here, 508 * we splx() while doing the actual copy to user space. 509 * Although the sockbuf is locked, new data may still be appended, 510 * and thus we must maintain consistency of the sockbuf during that time. 511 * 512 * The caller may receive the data as a single mbuf chain by supplying 513 * an mbuf **mp0 for use in returning the chain. The uio is then used 514 * only for the count in uio_resid. 515 */ 516 int 517 soreceive(so, psa, uio, mp0, controlp, flagsp) 518 register struct socket *so; 519 struct sockaddr **psa; 520 struct uio *uio; 521 struct mbuf **mp0; 522 struct mbuf **controlp; 523 int *flagsp; 524 { 525 register struct mbuf *m, **mp; 526 register int flags, len, error, s, offset; 527 struct protosw *pr = so->so_proto; 528 struct mbuf *nextrecord; 529 int moff, type = 0; 530 int orig_resid = uio->uio_resid; 531 532 mp = mp0; 533 if (psa) 534 *psa = 0; 535 if (controlp) 536 *controlp = 0; 537 if (flagsp) 538 flags = *flagsp &~ MSG_EOR; 539 else 540 flags = 0; 541 if (flags & MSG_OOB) { 542 m = m_get(M_WAIT, MT_DATA); 543 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 544 if (error) 545 goto bad; 546 do { 547 error = uiomove(mtod(m, caddr_t), 548 (int) min(uio->uio_resid, m->m_len), uio); 549 m = m_free(m); 550 } while (uio->uio_resid && error == 0 && m); 551 bad: 552 if (m) 553 m_freem(m); 554 return (error); 555 } 556 if (mp) 557 *mp = (struct mbuf *)0; 558 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 559 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 560 561 restart: 562 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 563 if (error) 564 return (error); 565 s = splnet(); 566 567 m = so->so_rcv.sb_mb; 568 /* 569 * If we have less data than requested, block awaiting more 570 * (subject to any timeout) if: 571 * 1. the current count is less than the low water mark, or 572 * 2. MSG_WAITALL is set, and it is possible to do the entire 573 * receive operation at once if we block (resid <= hiwat). 574 * 3. MSG_DONTWAIT is not set 575 * If MSG_WAITALL is set but resid is larger than the receive buffer, 576 * we have to do the receive in sections, and thus risk returning 577 * a short count if a timeout or signal occurs after we start. 578 */ 579 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 580 so->so_rcv.sb_cc < uio->uio_resid) && 581 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 582 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 583 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 584 #ifdef DIAGNOSTIC 585 if (m == 0 && so->so_rcv.sb_cc) 586 panic("receive 1"); 587 #endif 588 if (so->so_error) { 589 if (m) 590 goto dontblock; 591 error = so->so_error; 592 if ((flags & MSG_PEEK) == 0) 593 so->so_error = 0; 594 goto release; 595 } 596 if (so->so_state & SS_CANTRCVMORE) { 597 if (m) 598 goto dontblock; 599 else 600 goto release; 601 } 602 for (; m; m = m->m_next) 603 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 604 m = so->so_rcv.sb_mb; 605 goto dontblock; 606 } 607 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 608 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 609 error = ENOTCONN; 610 goto release; 611 } 612 if (uio->uio_resid == 0) 613 goto release; 614 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 615 error = EWOULDBLOCK; 616 goto release; 617 } 618 sbunlock(&so->so_rcv); 619 error = sbwait(&so->so_rcv); 620 splx(s); 621 if (error) 622 return (error); 623 goto restart; 624 } 625 dontblock: 626 if (uio->uio_procp) 627 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 628 nextrecord = m->m_nextpkt; 629 if (pr->pr_flags & PR_ADDR) { 630 #ifdef DIAGNOSTIC 631 if (m->m_type != MT_SONAME) 632 panic("receive 1a"); 633 #endif 634 orig_resid = 0; 635 if (psa) 636 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 637 mp0 == 0); 638 if (flags & MSG_PEEK) { 639 m = m->m_next; 640 } else { 641 sbfree(&so->so_rcv, m); 642 MFREE(m, so->so_rcv.sb_mb); 643 m = so->so_rcv.sb_mb; 644 } 645 } 646 while (m && m->m_type == MT_CONTROL && error == 0) { 647 if (flags & MSG_PEEK) { 648 if (controlp) 649 *controlp = m_copy(m, 0, m->m_len); 650 m = m->m_next; 651 } else { 652 sbfree(&so->so_rcv, m); 653 if (controlp) { 654 if (pr->pr_domain->dom_externalize && 655 mtod(m, struct cmsghdr *)->cmsg_type == 656 SCM_RIGHTS) 657 error = (*pr->pr_domain->dom_externalize)(m); 658 *controlp = m; 659 so->so_rcv.sb_mb = m->m_next; 660 m->m_next = 0; 661 m = so->so_rcv.sb_mb; 662 } else { 663 MFREE(m, so->so_rcv.sb_mb); 664 m = so->so_rcv.sb_mb; 665 } 666 } 667 if (controlp) { 668 orig_resid = 0; 669 controlp = &(*controlp)->m_next; 670 } 671 } 672 if (m) { 673 if ((flags & MSG_PEEK) == 0) 674 m->m_nextpkt = nextrecord; 675 type = m->m_type; 676 if (type == MT_OOBDATA) 677 flags |= MSG_OOB; 678 } 679 moff = 0; 680 offset = 0; 681 while (m && uio->uio_resid > 0 && error == 0) { 682 if (m->m_type == MT_OOBDATA) { 683 if (type != MT_OOBDATA) 684 break; 685 } else if (type == MT_OOBDATA) 686 break; 687 #ifdef DIAGNOSTIC 688 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 689 panic("receive 3"); 690 #endif 691 so->so_state &= ~SS_RCVATMARK; 692 len = uio->uio_resid; 693 if (so->so_oobmark && len > so->so_oobmark - offset) 694 len = so->so_oobmark - offset; 695 if (len > m->m_len - moff) 696 len = m->m_len - moff; 697 /* 698 * If mp is set, just pass back the mbufs. 699 * Otherwise copy them out via the uio, then free. 700 * Sockbuf must be consistent here (points to current mbuf, 701 * it points to next record) when we drop priority; 702 * we must note any additions to the sockbuf when we 703 * block interrupts again. 704 */ 705 if (mp == 0) { 706 splx(s); 707 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 708 s = splnet(); 709 if (error) 710 goto release; 711 } else 712 uio->uio_resid -= len; 713 if (len == m->m_len - moff) { 714 if (m->m_flags & M_EOR) 715 flags |= MSG_EOR; 716 if (flags & MSG_PEEK) { 717 m = m->m_next; 718 moff = 0; 719 } else { 720 nextrecord = m->m_nextpkt; 721 sbfree(&so->so_rcv, m); 722 if (mp) { 723 *mp = m; 724 mp = &m->m_next; 725 so->so_rcv.sb_mb = m = m->m_next; 726 *mp = (struct mbuf *)0; 727 } else { 728 MFREE(m, so->so_rcv.sb_mb); 729 m = so->so_rcv.sb_mb; 730 } 731 if (m) 732 m->m_nextpkt = nextrecord; 733 } 734 } else { 735 if (flags & MSG_PEEK) 736 moff += len; 737 else { 738 if (mp) 739 *mp = m_copym(m, 0, len, M_WAIT); 740 m->m_data += len; 741 m->m_len -= len; 742 so->so_rcv.sb_cc -= len; 743 } 744 } 745 if (so->so_oobmark) { 746 if ((flags & MSG_PEEK) == 0) { 747 so->so_oobmark -= len; 748 if (so->so_oobmark == 0) { 749 so->so_state |= SS_RCVATMARK; 750 break; 751 } 752 } else { 753 offset += len; 754 if (offset == so->so_oobmark) 755 break; 756 } 757 } 758 if (flags & MSG_EOR) 759 break; 760 /* 761 * If the MSG_WAITALL flag is set (for non-atomic socket), 762 * we must not quit until "uio->uio_resid == 0" or an error 763 * termination. If a signal/timeout occurs, return 764 * with a short count but without error. 765 * Keep sockbuf locked against other readers. 766 */ 767 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 768 !sosendallatonce(so) && !nextrecord) { 769 if (so->so_error || so->so_state & SS_CANTRCVMORE) 770 break; 771 error = sbwait(&so->so_rcv); 772 if (error) { 773 sbunlock(&so->so_rcv); 774 splx(s); 775 return (0); 776 } 777 m = so->so_rcv.sb_mb; 778 if (m) 779 nextrecord = m->m_nextpkt; 780 } 781 } 782 783 if (m && pr->pr_flags & PR_ATOMIC) { 784 flags |= MSG_TRUNC; 785 if ((flags & MSG_PEEK) == 0) 786 (void) sbdroprecord(&so->so_rcv); 787 } 788 if ((flags & MSG_PEEK) == 0) { 789 if (m == 0) 790 so->so_rcv.sb_mb = nextrecord; 791 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 792 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 793 } 794 if (orig_resid == uio->uio_resid && orig_resid && 795 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 796 sbunlock(&so->so_rcv); 797 splx(s); 798 goto restart; 799 } 800 801 if (flagsp) 802 *flagsp |= flags; 803 release: 804 sbunlock(&so->so_rcv); 805 splx(s); 806 return (error); 807 } 808 809 int 810 soshutdown(so, how) 811 register struct socket *so; 812 register int how; 813 { 814 register struct protosw *pr = so->so_proto; 815 816 how++; 817 if (how & FREAD) 818 sorflush(so); 819 if (how & FWRITE) 820 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 821 return (0); 822 } 823 824 void 825 sorflush(so) 826 register struct socket *so; 827 { 828 register struct sockbuf *sb = &so->so_rcv; 829 register struct protosw *pr = so->so_proto; 830 register int s; 831 struct sockbuf asb; 832 833 sb->sb_flags |= SB_NOINTR; 834 (void) sblock(sb, M_WAITOK); 835 s = splimp(); 836 socantrcvmore(so); 837 sbunlock(sb); 838 asb = *sb; 839 bzero((caddr_t)sb, sizeof (*sb)); 840 splx(s); 841 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 842 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 843 sbrelease(&asb); 844 } 845 846 int 847 sosetopt(so, level, optname, m0, p) 848 register struct socket *so; 849 int level, optname; 850 struct mbuf *m0; 851 struct proc *p; 852 { 853 int error = 0; 854 register struct mbuf *m = m0; 855 856 if (level != SOL_SOCKET) { 857 if (so->so_proto && so->so_proto->pr_ctloutput) 858 return ((*so->so_proto->pr_ctloutput) 859 (PRCO_SETOPT, so, level, optname, &m0, p)); 860 error = ENOPROTOOPT; 861 } else { 862 switch (optname) { 863 864 case SO_LINGER: 865 if (m == NULL || m->m_len != sizeof (struct linger)) { 866 error = EINVAL; 867 goto bad; 868 } 869 so->so_linger = mtod(m, struct linger *)->l_linger; 870 /* fall thru... */ 871 872 case SO_DEBUG: 873 case SO_KEEPALIVE: 874 case SO_DONTROUTE: 875 case SO_USELOOPBACK: 876 case SO_BROADCAST: 877 case SO_REUSEADDR: 878 case SO_REUSEPORT: 879 case SO_OOBINLINE: 880 case SO_TIMESTAMP: 881 if (m == NULL || m->m_len < sizeof (int)) { 882 error = EINVAL; 883 goto bad; 884 } 885 if (*mtod(m, int *)) 886 so->so_options |= optname; 887 else 888 so->so_options &= ~optname; 889 break; 890 891 case SO_SNDBUF: 892 case SO_RCVBUF: 893 case SO_SNDLOWAT: 894 case SO_RCVLOWAT: 895 { 896 int optval; 897 898 if (m == NULL || m->m_len < sizeof (int)) { 899 error = EINVAL; 900 goto bad; 901 } 902 903 /* 904 * Values < 1 make no sense for any of these 905 * options, so disallow them. 906 */ 907 optval = *mtod(m, int *); 908 if (optval < 1) { 909 error = EINVAL; 910 goto bad; 911 } 912 913 switch (optname) { 914 915 case SO_SNDBUF: 916 case SO_RCVBUF: 917 if (sbreserve(optname == SO_SNDBUF ? 918 &so->so_snd : &so->so_rcv, 919 (u_long) optval) == 0) { 920 error = ENOBUFS; 921 goto bad; 922 } 923 break; 924 925 /* 926 * Make sure the low-water is never greater than 927 * the high-water. 928 */ 929 case SO_SNDLOWAT: 930 so->so_snd.sb_lowat = 931 (optval > so->so_snd.sb_hiwat) ? 932 so->so_snd.sb_hiwat : optval; 933 break; 934 case SO_RCVLOWAT: 935 so->so_rcv.sb_lowat = 936 (optval > so->so_rcv.sb_hiwat) ? 937 so->so_rcv.sb_hiwat : optval; 938 break; 939 } 940 break; 941 } 942 943 case SO_SNDTIMEO: 944 case SO_RCVTIMEO: 945 { 946 struct timeval *tv; 947 short val; 948 949 if (m == NULL || m->m_len < sizeof (*tv)) { 950 error = EINVAL; 951 goto bad; 952 } 953 tv = mtod(m, struct timeval *); 954 if (tv->tv_sec > SHRT_MAX / hz - hz) { 955 error = EDOM; 956 goto bad; 957 } 958 val = tv->tv_sec * hz + tv->tv_usec / tick; 959 960 switch (optname) { 961 962 case SO_SNDTIMEO: 963 so->so_snd.sb_timeo = val; 964 break; 965 case SO_RCVTIMEO: 966 so->so_rcv.sb_timeo = val; 967 break; 968 } 969 break; 970 } 971 972 default: 973 error = ENOPROTOOPT; 974 break; 975 } 976 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 977 (void) ((*so->so_proto->pr_ctloutput) 978 (PRCO_SETOPT, so, level, optname, &m0, p)); 979 m = NULL; /* freed by protocol */ 980 } 981 } 982 bad: 983 if (m) 984 (void) m_free(m); 985 return (error); 986 } 987 988 int 989 sogetopt(so, level, optname, mp, p) 990 register struct socket *so; 991 int level, optname; 992 struct mbuf **mp; 993 struct proc *p; 994 { 995 register struct mbuf *m; 996 997 if (level != SOL_SOCKET) { 998 if (so->so_proto && so->so_proto->pr_ctloutput) { 999 return ((*so->so_proto->pr_ctloutput) 1000 (PRCO_GETOPT, so, level, optname, mp, p)); 1001 } else 1002 return (ENOPROTOOPT); 1003 } else { 1004 m = m_get(M_WAIT, MT_SOOPTS); 1005 m->m_len = sizeof (int); 1006 1007 switch (optname) { 1008 1009 case SO_LINGER: 1010 m->m_len = sizeof (struct linger); 1011 mtod(m, struct linger *)->l_onoff = 1012 so->so_options & SO_LINGER; 1013 mtod(m, struct linger *)->l_linger = so->so_linger; 1014 break; 1015 1016 case SO_USELOOPBACK: 1017 case SO_DONTROUTE: 1018 case SO_DEBUG: 1019 case SO_KEEPALIVE: 1020 case SO_REUSEADDR: 1021 case SO_REUSEPORT: 1022 case SO_BROADCAST: 1023 case SO_OOBINLINE: 1024 case SO_TIMESTAMP: 1025 *mtod(m, int *) = so->so_options & optname; 1026 break; 1027 1028 case SO_TYPE: 1029 *mtod(m, int *) = so->so_type; 1030 break; 1031 1032 case SO_ERROR: 1033 *mtod(m, int *) = so->so_error; 1034 so->so_error = 0; 1035 break; 1036 1037 case SO_SNDBUF: 1038 *mtod(m, int *) = so->so_snd.sb_hiwat; 1039 break; 1040 1041 case SO_RCVBUF: 1042 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1043 break; 1044 1045 case SO_SNDLOWAT: 1046 *mtod(m, int *) = so->so_snd.sb_lowat; 1047 break; 1048 1049 case SO_RCVLOWAT: 1050 *mtod(m, int *) = so->so_rcv.sb_lowat; 1051 break; 1052 1053 case SO_SNDTIMEO: 1054 case SO_RCVTIMEO: 1055 { 1056 int val = (optname == SO_SNDTIMEO ? 1057 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1058 1059 m->m_len = sizeof(struct timeval); 1060 mtod(m, struct timeval *)->tv_sec = val / hz; 1061 mtod(m, struct timeval *)->tv_usec = 1062 (val % hz) * tick; 1063 break; 1064 } 1065 1066 default: 1067 (void)m_free(m); 1068 return (ENOPROTOOPT); 1069 } 1070 *mp = m; 1071 return (0); 1072 } 1073 } 1074 1075 void 1076 sohasoutofband(so) 1077 register struct socket *so; 1078 { 1079 struct proc *p; 1080 1081 if (so->so_pgid < 0) 1082 gsignal(-so->so_pgid, SIGURG); 1083 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1084 psignal(p, SIGURG); 1085 selwakeup(&so->so_rcv.sb_sel); 1086 } 1087 1088 int 1089 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1090 { 1091 int revents = 0; 1092 int s = splnet(); 1093 1094 if (events & (POLLIN | POLLRDNORM)) 1095 if (soreadable(so)) 1096 revents |= events & (POLLIN | POLLRDNORM); 1097 1098 if (events & (POLLOUT | POLLWRNORM)) 1099 if (sowriteable(so)) 1100 revents |= events & (POLLOUT | POLLWRNORM); 1101 1102 if (events & (POLLPRI | POLLRDBAND)) 1103 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1104 revents |= events & (POLLPRI | POLLRDBAND); 1105 1106 if (revents == 0) { 1107 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1108 selrecord(p, &so->so_rcv.sb_sel); 1109 so->so_rcv.sb_flags |= SB_SEL; 1110 } 1111 1112 if (events & (POLLOUT | POLLWRNORM)) { 1113 selrecord(p, &so->so_snd.sb_sel); 1114 so->so_snd.sb_flags |= SB_SEL; 1115 } 1116 } 1117 1118 splx(s); 1119 return (revents); 1120 } 1121