1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.20 1996/10/07 04:32:26 pst Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/queue.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/file.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 53 static int somaxconn = SOMAXCONN; 54 SYSCTL_INT(_kern, KERN_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); 55 56 /* 57 * Socket operation routines. 58 * These routines are called by the routines in 59 * sys_socket.c or from a system process, and 60 * implement the semantics of socket operations by 61 * switching out to the protocol specific routines. 62 */ 63 /*ARGSUSED*/ 64 int 65 socreate(dom, aso, type, proto, p) 66 int dom; 67 struct socket **aso; 68 register int type; 69 int proto; 70 struct proc *p; 71 { 72 register struct protosw *prp; 73 register struct socket *so; 74 register int error; 75 76 if (proto) 77 prp = pffindproto(dom, proto, type); 78 else 79 prp = pffindtype(dom, type); 80 if (prp == 0 || prp->pr_usrreqs == 0) 81 return (EPROTONOSUPPORT); 82 if (prp->pr_type != type) 83 return (EPROTOTYPE); 84 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 85 bzero((caddr_t)so, sizeof(*so)); 86 TAILQ_INIT(&so->so_incomp); 87 TAILQ_INIT(&so->so_comp); 88 so->so_type = type; 89 if (p->p_ucred->cr_uid == 0) 90 so->so_state = SS_PRIV; 91 so->so_proto = prp; 92 error = (*prp->pr_usrreqs->pru_attach)(so, proto); 93 if (error) { 94 so->so_state |= SS_NOFDREF; 95 sofree(so); 96 return (error); 97 } 98 *aso = so; 99 return (0); 100 } 101 102 int 103 sobind(so, nam) 104 struct socket *so; 105 struct mbuf *nam; 106 { 107 int s = splnet(); 108 int error; 109 110 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam); 111 splx(s); 112 return (error); 113 } 114 115 int 116 solisten(so, backlog) 117 register struct socket *so; 118 int backlog; 119 { 120 int s = splnet(), error; 121 122 error = (*so->so_proto->pr_usrreqs->pru_listen)(so); 123 if (error) { 124 splx(s); 125 return (error); 126 } 127 if (so->so_comp.tqh_first == NULL) 128 so->so_options |= SO_ACCEPTCONN; 129 if (backlog < 0 || backlog > somaxconn) 130 backlog = somaxconn; 131 so->so_qlimit = backlog; 132 splx(s); 133 return (0); 134 } 135 136 void 137 sofree(so) 138 register struct socket *so; 139 { 140 struct socket *head = so->so_head; 141 142 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 143 return; 144 if (head != NULL) { 145 if (so->so_state & SS_INCOMP) { 146 TAILQ_REMOVE(&head->so_incomp, so, so_list); 147 head->so_incqlen--; 148 } else if (so->so_state & SS_COMP) { 149 TAILQ_REMOVE(&head->so_comp, so, so_list); 150 } else { 151 panic("sofree: not queued"); 152 } 153 head->so_qlen--; 154 so->so_state &= ~(SS_INCOMP|SS_COMP); 155 so->so_head = NULL; 156 } 157 sbrelease(&so->so_snd); 158 sorflush(so); 159 FREE(so, M_SOCKET); 160 } 161 162 /* 163 * Close a socket on last file table reference removal. 164 * Initiate disconnect if connected. 165 * Free socket when disconnect complete. 166 */ 167 int 168 soclose(so) 169 register struct socket *so; 170 { 171 int s = splnet(); /* conservative */ 172 int error = 0; 173 174 if (so->so_options & SO_ACCEPTCONN) { 175 struct socket *sp, *sonext; 176 177 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 178 sonext = sp->so_list.tqe_next; 179 (void) soabort(sp); 180 } 181 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 182 sonext = sp->so_list.tqe_next; 183 (void) soabort(sp); 184 } 185 } 186 if (so->so_pcb == 0) 187 goto discard; 188 if (so->so_state & SS_ISCONNECTED) { 189 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 190 error = sodisconnect(so); 191 if (error) 192 goto drop; 193 } 194 if (so->so_options & SO_LINGER) { 195 if ((so->so_state & SS_ISDISCONNECTING) && 196 (so->so_state & SS_NBIO)) 197 goto drop; 198 while (so->so_state & SS_ISCONNECTED) { 199 error = tsleep((caddr_t)&so->so_timeo, 200 PSOCK | PCATCH, "soclos", so->so_linger); 201 if (error) 202 break; 203 } 204 } 205 } 206 drop: 207 if (so->so_pcb) { 208 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 209 if (error == 0) 210 error = error2; 211 } 212 discard: 213 if (so->so_state & SS_NOFDREF) 214 panic("soclose: NOFDREF"); 215 so->so_state |= SS_NOFDREF; 216 sofree(so); 217 splx(s); 218 return (error); 219 } 220 221 /* 222 * Must be called at splnet... 223 */ 224 int 225 soabort(so) 226 struct socket *so; 227 { 228 229 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 230 } 231 232 int 233 soaccept(so, nam) 234 register struct socket *so; 235 struct mbuf *nam; 236 { 237 int s = splnet(); 238 int error; 239 240 if ((so->so_state & SS_NOFDREF) == 0) 241 panic("soaccept: !NOFDREF"); 242 so->so_state &= ~SS_NOFDREF; 243 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 244 splx(s); 245 return (error); 246 } 247 248 int 249 soconnect(so, nam) 250 register struct socket *so; 251 struct mbuf *nam; 252 { 253 int s; 254 int error; 255 256 if (so->so_options & SO_ACCEPTCONN) 257 return (EOPNOTSUPP); 258 s = splnet(); 259 /* 260 * If protocol is connection-based, can only connect once. 261 * Otherwise, if connected, try to disconnect first. 262 * This allows user to disconnect by connecting to, e.g., 263 * a null address. 264 */ 265 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 266 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 267 (error = sodisconnect(so)))) 268 error = EISCONN; 269 else 270 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam); 271 splx(s); 272 return (error); 273 } 274 275 int 276 soconnect2(so1, so2) 277 register struct socket *so1; 278 struct socket *so2; 279 { 280 int s = splnet(); 281 int error; 282 283 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 284 splx(s); 285 return (error); 286 } 287 288 int 289 sodisconnect(so) 290 register struct socket *so; 291 { 292 int s = splnet(); 293 int error; 294 295 if ((so->so_state & SS_ISCONNECTED) == 0) { 296 error = ENOTCONN; 297 goto bad; 298 } 299 if (so->so_state & SS_ISDISCONNECTING) { 300 error = EALREADY; 301 goto bad; 302 } 303 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 304 bad: 305 splx(s); 306 return (error); 307 } 308 309 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 310 /* 311 * Send on a socket. 312 * If send must go all at once and message is larger than 313 * send buffering, then hard error. 314 * Lock against other senders. 315 * If must go all at once and not enough room now, then 316 * inform user that this would block and do nothing. 317 * Otherwise, if nonblocking, send as much as possible. 318 * The data to be sent is described by "uio" if nonzero, 319 * otherwise by the mbuf chain "top" (which must be null 320 * if uio is not). Data provided in mbuf chain must be small 321 * enough to send all at once. 322 * 323 * Returns nonzero on error, timeout or signal; callers 324 * must check for short counts if EINTR/ERESTART are returned. 325 * Data and control buffers are freed on return. 326 */ 327 int 328 sosend(so, addr, uio, top, control, flags) 329 register struct socket *so; 330 struct mbuf *addr; 331 struct uio *uio; 332 struct mbuf *top; 333 struct mbuf *control; 334 int flags; 335 { 336 struct proc *p = curproc; /* XXX */ 337 struct mbuf **mp; 338 register struct mbuf *m; 339 register long space, len, resid; 340 int clen = 0, error, s, dontroute, mlen; 341 int atomic = sosendallatonce(so) || top; 342 343 if (uio) 344 resid = uio->uio_resid; 345 else 346 resid = top->m_pkthdr.len; 347 /* 348 * In theory resid should be unsigned. 349 * However, space must be signed, as it might be less than 0 350 * if we over-committed, and we must use a signed comparison 351 * of space and resid. On the other hand, a negative resid 352 * causes us to loop sending 0-length segments to the protocol. 353 */ 354 if (resid < 0) 355 return (EINVAL); 356 dontroute = 357 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 358 (so->so_proto->pr_flags & PR_ATOMIC); 359 p->p_stats->p_ru.ru_msgsnd++; 360 if (control) 361 clen = control->m_len; 362 #define snderr(errno) { error = errno; splx(s); goto release; } 363 364 restart: 365 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 366 if (error) 367 goto out; 368 do { 369 s = splnet(); 370 if (so->so_state & SS_CANTSENDMORE) 371 snderr(EPIPE); 372 if (so->so_error) 373 snderr(so->so_error); 374 if ((so->so_state & SS_ISCONNECTED) == 0) { 375 /* 376 * `sendto' and `sendmsg' is allowed on a connection- 377 * based socket if it supports implied connect. 378 * Return ENOTCONN if not connected and no address is 379 * supplied. 380 */ 381 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 382 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 383 if ((so->so_state & SS_ISCONFIRMING) == 0 && 384 !(resid == 0 && clen != 0)) 385 snderr(ENOTCONN); 386 } else if (addr == 0) 387 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 388 ENOTCONN : EDESTADDRREQ); 389 } 390 space = sbspace(&so->so_snd); 391 if (flags & MSG_OOB) 392 space += 1024; 393 if ((atomic && resid > so->so_snd.sb_hiwat) || 394 clen > so->so_snd.sb_hiwat) 395 snderr(EMSGSIZE); 396 if (space < resid + clen && uio && 397 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 398 if (so->so_state & SS_NBIO) 399 snderr(EWOULDBLOCK); 400 sbunlock(&so->so_snd); 401 error = sbwait(&so->so_snd); 402 splx(s); 403 if (error) 404 goto out; 405 goto restart; 406 } 407 splx(s); 408 mp = ⊤ 409 space -= clen; 410 do { 411 if (uio == NULL) { 412 /* 413 * Data is prepackaged in "top". 414 */ 415 resid = 0; 416 if (flags & MSG_EOR) 417 top->m_flags |= M_EOR; 418 } else do { 419 if (top == 0) { 420 MGETHDR(m, M_WAIT, MT_DATA); 421 mlen = MHLEN; 422 m->m_pkthdr.len = 0; 423 m->m_pkthdr.rcvif = (struct ifnet *)0; 424 } else { 425 MGET(m, M_WAIT, MT_DATA); 426 mlen = MLEN; 427 } 428 if (resid >= MINCLSIZE) { 429 MCLGET(m, M_WAIT); 430 if ((m->m_flags & M_EXT) == 0) 431 goto nopages; 432 mlen = MCLBYTES; 433 len = min(min(mlen, resid), space); 434 } else { 435 nopages: 436 len = min(min(mlen, resid), space); 437 /* 438 * For datagram protocols, leave room 439 * for protocol headers in first mbuf. 440 */ 441 if (atomic && top == 0 && len < mlen) 442 MH_ALIGN(m, len); 443 } 444 space -= len; 445 error = uiomove(mtod(m, caddr_t), (int)len, uio); 446 resid = uio->uio_resid; 447 m->m_len = len; 448 *mp = m; 449 top->m_pkthdr.len += len; 450 if (error) 451 goto release; 452 mp = &m->m_next; 453 if (resid <= 0) { 454 if (flags & MSG_EOR) 455 top->m_flags |= M_EOR; 456 break; 457 } 458 } while (space > 0 && atomic); 459 if (dontroute) 460 so->so_options |= SO_DONTROUTE; 461 s = splnet(); /* XXX */ 462 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 463 (flags & MSG_OOB) ? PRUS_OOB : 464 /* 465 * If the user set MSG_EOF, the protocol 466 * understands this flag and nothing left to 467 * send then use PRU_SEND_EOF instead of PRU_SEND. 468 */ 469 ((flags & MSG_EOF) && 470 (so->so_proto->pr_flags & PR_IMPLOPCL) && 471 (resid <= 0)) ? 472 PRUS_EOF : 0, 473 top, addr, control); 474 splx(s); 475 if (dontroute) 476 so->so_options &= ~SO_DONTROUTE; 477 clen = 0; 478 control = 0; 479 top = 0; 480 mp = ⊤ 481 if (error) 482 goto release; 483 } while (resid && space > 0); 484 } while (resid); 485 486 release: 487 sbunlock(&so->so_snd); 488 out: 489 if (top) 490 m_freem(top); 491 if (control) 492 m_freem(control); 493 return (error); 494 } 495 496 /* 497 * Implement receive operations on a socket. 498 * We depend on the way that records are added to the sockbuf 499 * by sbappend*. In particular, each record (mbufs linked through m_next) 500 * must begin with an address if the protocol so specifies, 501 * followed by an optional mbuf or mbufs containing ancillary data, 502 * and then zero or more mbufs of data. 503 * In order to avoid blocking network interrupts for the entire time here, 504 * we splx() while doing the actual copy to user space. 505 * Although the sockbuf is locked, new data may still be appended, 506 * and thus we must maintain consistency of the sockbuf during that time. 507 * 508 * The caller may receive the data as a single mbuf chain by supplying 509 * an mbuf **mp0 for use in returning the chain. The uio is then used 510 * only for the count in uio_resid. 511 */ 512 int 513 soreceive(so, paddr, uio, mp0, controlp, flagsp) 514 register struct socket *so; 515 struct mbuf **paddr; 516 struct uio *uio; 517 struct mbuf **mp0; 518 struct mbuf **controlp; 519 int *flagsp; 520 { 521 register struct mbuf *m, **mp; 522 register int flags, len, error, s, offset; 523 struct protosw *pr = so->so_proto; 524 struct mbuf *nextrecord; 525 int moff, type = 0; 526 int orig_resid = uio->uio_resid; 527 528 mp = mp0; 529 if (paddr) 530 *paddr = 0; 531 if (controlp) 532 *controlp = 0; 533 if (flagsp) 534 flags = *flagsp &~ MSG_EOR; 535 else 536 flags = 0; 537 if (flags & MSG_OOB) { 538 m = m_get(M_WAIT, MT_DATA); 539 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 540 if (error) 541 goto bad; 542 do { 543 error = uiomove(mtod(m, caddr_t), 544 (int) min(uio->uio_resid, m->m_len), uio); 545 m = m_free(m); 546 } while (uio->uio_resid && error == 0 && m); 547 bad: 548 if (m) 549 m_freem(m); 550 return (error); 551 } 552 if (mp) 553 *mp = (struct mbuf *)0; 554 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 555 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 556 557 restart: 558 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 559 if (error) 560 return (error); 561 s = splnet(); 562 563 m = so->so_rcv.sb_mb; 564 /* 565 * If we have less data than requested, block awaiting more 566 * (subject to any timeout) if: 567 * 1. the current count is less than the low water mark, or 568 * 2. MSG_WAITALL is set, and it is possible to do the entire 569 * receive operation at once if we block (resid <= hiwat). 570 * 3. MSG_DONTWAIT is not set 571 * If MSG_WAITALL is set but resid is larger than the receive buffer, 572 * we have to do the receive in sections, and thus risk returning 573 * a short count if a timeout or signal occurs after we start. 574 */ 575 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 576 so->so_rcv.sb_cc < uio->uio_resid) && 577 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 578 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 579 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 580 #ifdef DIAGNOSTIC 581 if (m == 0 && so->so_rcv.sb_cc) 582 panic("receive 1"); 583 #endif 584 if (so->so_error) { 585 if (m) 586 goto dontblock; 587 error = so->so_error; 588 if ((flags & MSG_PEEK) == 0) 589 so->so_error = 0; 590 goto release; 591 } 592 if (so->so_state & SS_CANTRCVMORE) { 593 if (m) 594 goto dontblock; 595 else 596 goto release; 597 } 598 for (; m; m = m->m_next) 599 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 600 m = so->so_rcv.sb_mb; 601 goto dontblock; 602 } 603 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 604 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 605 error = ENOTCONN; 606 goto release; 607 } 608 if (uio->uio_resid == 0) 609 goto release; 610 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 611 error = EWOULDBLOCK; 612 goto release; 613 } 614 sbunlock(&so->so_rcv); 615 error = sbwait(&so->so_rcv); 616 splx(s); 617 if (error) 618 return (error); 619 goto restart; 620 } 621 dontblock: 622 if (uio->uio_procp) 623 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 624 nextrecord = m->m_nextpkt; 625 if (pr->pr_flags & PR_ADDR) { 626 #ifdef DIAGNOSTIC 627 if (m->m_type != MT_SONAME) 628 panic("receive 1a"); 629 #endif 630 orig_resid = 0; 631 if (flags & MSG_PEEK) { 632 if (paddr) 633 *paddr = m_copy(m, 0, m->m_len); 634 m = m->m_next; 635 } else { 636 sbfree(&so->so_rcv, m); 637 if (paddr) { 638 *paddr = m; 639 so->so_rcv.sb_mb = m->m_next; 640 m->m_next = 0; 641 m = so->so_rcv.sb_mb; 642 } else { 643 MFREE(m, so->so_rcv.sb_mb); 644 m = so->so_rcv.sb_mb; 645 } 646 } 647 } 648 while (m && m->m_type == MT_CONTROL && error == 0) { 649 if (flags & MSG_PEEK) { 650 if (controlp) 651 *controlp = m_copy(m, 0, m->m_len); 652 m = m->m_next; 653 } else { 654 sbfree(&so->so_rcv, m); 655 if (controlp) { 656 if (pr->pr_domain->dom_externalize && 657 mtod(m, struct cmsghdr *)->cmsg_type == 658 SCM_RIGHTS) 659 error = (*pr->pr_domain->dom_externalize)(m); 660 *controlp = m; 661 so->so_rcv.sb_mb = m->m_next; 662 m->m_next = 0; 663 m = so->so_rcv.sb_mb; 664 } else { 665 MFREE(m, so->so_rcv.sb_mb); 666 m = so->so_rcv.sb_mb; 667 } 668 } 669 if (controlp) { 670 orig_resid = 0; 671 controlp = &(*controlp)->m_next; 672 } 673 } 674 if (m) { 675 if ((flags & MSG_PEEK) == 0) 676 m->m_nextpkt = nextrecord; 677 type = m->m_type; 678 if (type == MT_OOBDATA) 679 flags |= MSG_OOB; 680 } 681 moff = 0; 682 offset = 0; 683 while (m && uio->uio_resid > 0 && error == 0) { 684 if (m->m_type == MT_OOBDATA) { 685 if (type != MT_OOBDATA) 686 break; 687 } else if (type == MT_OOBDATA) 688 break; 689 #ifdef DIAGNOSTIC 690 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 691 panic("receive 3"); 692 #endif 693 so->so_state &= ~SS_RCVATMARK; 694 len = uio->uio_resid; 695 if (so->so_oobmark && len > so->so_oobmark - offset) 696 len = so->so_oobmark - offset; 697 if (len > m->m_len - moff) 698 len = m->m_len - moff; 699 /* 700 * If mp is set, just pass back the mbufs. 701 * Otherwise copy them out via the uio, then free. 702 * Sockbuf must be consistent here (points to current mbuf, 703 * it points to next record) when we drop priority; 704 * we must note any additions to the sockbuf when we 705 * block interrupts again. 706 */ 707 if (mp == 0) { 708 splx(s); 709 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 710 s = splnet(); 711 if (error) 712 goto release; 713 } else 714 uio->uio_resid -= len; 715 if (len == m->m_len - moff) { 716 if (m->m_flags & M_EOR) 717 flags |= MSG_EOR; 718 if (flags & MSG_PEEK) { 719 m = m->m_next; 720 moff = 0; 721 } else { 722 nextrecord = m->m_nextpkt; 723 sbfree(&so->so_rcv, m); 724 if (mp) { 725 *mp = m; 726 mp = &m->m_next; 727 so->so_rcv.sb_mb = m = m->m_next; 728 *mp = (struct mbuf *)0; 729 } else { 730 MFREE(m, so->so_rcv.sb_mb); 731 m = so->so_rcv.sb_mb; 732 } 733 if (m) 734 m->m_nextpkt = nextrecord; 735 } 736 } else { 737 if (flags & MSG_PEEK) 738 moff += len; 739 else { 740 if (mp) 741 *mp = m_copym(m, 0, len, M_WAIT); 742 m->m_data += len; 743 m->m_len -= len; 744 so->so_rcv.sb_cc -= len; 745 } 746 } 747 if (so->so_oobmark) { 748 if ((flags & MSG_PEEK) == 0) { 749 so->so_oobmark -= len; 750 if (so->so_oobmark == 0) { 751 so->so_state |= SS_RCVATMARK; 752 break; 753 } 754 } else { 755 offset += len; 756 if (offset == so->so_oobmark) 757 break; 758 } 759 } 760 if (flags & MSG_EOR) 761 break; 762 /* 763 * If the MSG_WAITALL flag is set (for non-atomic socket), 764 * we must not quit until "uio->uio_resid == 0" or an error 765 * termination. If a signal/timeout occurs, return 766 * with a short count but without error. 767 * Keep sockbuf locked against other readers. 768 */ 769 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 770 !sosendallatonce(so) && !nextrecord) { 771 if (so->so_error || so->so_state & SS_CANTRCVMORE) 772 break; 773 error = sbwait(&so->so_rcv); 774 if (error) { 775 sbunlock(&so->so_rcv); 776 splx(s); 777 return (0); 778 } 779 m = so->so_rcv.sb_mb; 780 if (m) 781 nextrecord = m->m_nextpkt; 782 } 783 } 784 785 if (m && pr->pr_flags & PR_ATOMIC) { 786 flags |= MSG_TRUNC; 787 if ((flags & MSG_PEEK) == 0) 788 (void) sbdroprecord(&so->so_rcv); 789 } 790 if ((flags & MSG_PEEK) == 0) { 791 if (m == 0) 792 so->so_rcv.sb_mb = nextrecord; 793 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 794 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 795 } 796 if (orig_resid == uio->uio_resid && orig_resid && 797 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 798 sbunlock(&so->so_rcv); 799 splx(s); 800 goto restart; 801 } 802 803 if (flagsp) 804 *flagsp |= flags; 805 release: 806 sbunlock(&so->so_rcv); 807 splx(s); 808 return (error); 809 } 810 811 int 812 soshutdown(so, how) 813 register struct socket *so; 814 register int how; 815 { 816 register struct protosw *pr = so->so_proto; 817 818 how++; 819 if (how & FREAD) 820 sorflush(so); 821 if (how & FWRITE) 822 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 823 return (0); 824 } 825 826 void 827 sorflush(so) 828 register struct socket *so; 829 { 830 register struct sockbuf *sb = &so->so_rcv; 831 register struct protosw *pr = so->so_proto; 832 register int s; 833 struct sockbuf asb; 834 835 sb->sb_flags |= SB_NOINTR; 836 (void) sblock(sb, M_WAITOK); 837 s = splimp(); 838 socantrcvmore(so); 839 sbunlock(sb); 840 asb = *sb; 841 bzero((caddr_t)sb, sizeof (*sb)); 842 splx(s); 843 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 844 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 845 sbrelease(&asb); 846 } 847 848 int 849 sosetopt(so, level, optname, m0) 850 register struct socket *so; 851 int level, optname; 852 struct mbuf *m0; 853 { 854 int error = 0; 855 register struct mbuf *m = m0; 856 857 if (level != SOL_SOCKET) { 858 if (so->so_proto && so->so_proto->pr_ctloutput) 859 return ((*so->so_proto->pr_ctloutput) 860 (PRCO_SETOPT, so, level, optname, &m0)); 861 error = ENOPROTOOPT; 862 } else { 863 switch (optname) { 864 865 case SO_LINGER: 866 if (m == NULL || m->m_len != sizeof (struct linger)) { 867 error = EINVAL; 868 goto bad; 869 } 870 so->so_linger = mtod(m, struct linger *)->l_linger; 871 /* fall thru... */ 872 873 case SO_DEBUG: 874 case SO_KEEPALIVE: 875 case SO_DONTROUTE: 876 case SO_USELOOPBACK: 877 case SO_BROADCAST: 878 case SO_REUSEADDR: 879 case SO_REUSEPORT: 880 case SO_OOBINLINE: 881 case SO_TIMESTAMP: 882 if (m == NULL || m->m_len < sizeof (int)) { 883 error = EINVAL; 884 goto bad; 885 } 886 if (*mtod(m, int *)) 887 so->so_options |= optname; 888 else 889 so->so_options &= ~optname; 890 break; 891 892 case SO_SNDBUF: 893 case SO_RCVBUF: 894 case SO_SNDLOWAT: 895 case SO_RCVLOWAT: 896 if (m == NULL || m->m_len < sizeof (int)) { 897 error = EINVAL; 898 goto bad; 899 } 900 switch (optname) { 901 902 case SO_SNDBUF: 903 case SO_RCVBUF: 904 if (sbreserve(optname == SO_SNDBUF ? 905 &so->so_snd : &so->so_rcv, 906 (u_long) *mtod(m, int *)) == 0) { 907 error = ENOBUFS; 908 goto bad; 909 } 910 break; 911 912 case SO_SNDLOWAT: 913 so->so_snd.sb_lowat = *mtod(m, int *); 914 break; 915 case SO_RCVLOWAT: 916 so->so_rcv.sb_lowat = *mtod(m, int *); 917 break; 918 } 919 break; 920 921 case SO_SNDTIMEO: 922 case SO_RCVTIMEO: 923 { 924 struct timeval *tv; 925 short val; 926 927 if (m == NULL || m->m_len < sizeof (*tv)) { 928 error = EINVAL; 929 goto bad; 930 } 931 tv = mtod(m, struct timeval *); 932 if (tv->tv_sec > SHRT_MAX / hz - hz) { 933 error = EDOM; 934 goto bad; 935 } 936 val = tv->tv_sec * hz + tv->tv_usec / tick; 937 938 switch (optname) { 939 940 case SO_SNDTIMEO: 941 so->so_snd.sb_timeo = val; 942 break; 943 case SO_RCVTIMEO: 944 so->so_rcv.sb_timeo = val; 945 break; 946 } 947 break; 948 } 949 950 case SO_PRIVSTATE: 951 /* we don't care what the parameter is... */ 952 so->so_state &= ~SS_PRIV; 953 break; 954 955 default: 956 error = ENOPROTOOPT; 957 break; 958 } 959 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 960 (void) ((*so->so_proto->pr_ctloutput) 961 (PRCO_SETOPT, so, level, optname, &m0)); 962 m = NULL; /* freed by protocol */ 963 } 964 } 965 bad: 966 if (m) 967 (void) m_free(m); 968 return (error); 969 } 970 971 int 972 sogetopt(so, level, optname, mp) 973 register struct socket *so; 974 int level, optname; 975 struct mbuf **mp; 976 { 977 register struct mbuf *m; 978 979 if (level != SOL_SOCKET) { 980 if (so->so_proto && so->so_proto->pr_ctloutput) { 981 return ((*so->so_proto->pr_ctloutput) 982 (PRCO_GETOPT, so, level, optname, mp)); 983 } else 984 return (ENOPROTOOPT); 985 } else { 986 m = m_get(M_WAIT, MT_SOOPTS); 987 m->m_len = sizeof (int); 988 989 switch (optname) { 990 991 case SO_LINGER: 992 m->m_len = sizeof (struct linger); 993 mtod(m, struct linger *)->l_onoff = 994 so->so_options & SO_LINGER; 995 mtod(m, struct linger *)->l_linger = so->so_linger; 996 break; 997 998 case SO_USELOOPBACK: 999 case SO_DONTROUTE: 1000 case SO_DEBUG: 1001 case SO_KEEPALIVE: 1002 case SO_REUSEADDR: 1003 case SO_REUSEPORT: 1004 case SO_BROADCAST: 1005 case SO_OOBINLINE: 1006 case SO_TIMESTAMP: 1007 *mtod(m, int *) = so->so_options & optname; 1008 break; 1009 1010 case SO_PRIVSTATE: 1011 *mtod(m, int *) = so->so_state & SS_PRIV; 1012 break; 1013 1014 case SO_TYPE: 1015 *mtod(m, int *) = so->so_type; 1016 break; 1017 1018 case SO_ERROR: 1019 *mtod(m, int *) = so->so_error; 1020 so->so_error = 0; 1021 break; 1022 1023 case SO_SNDBUF: 1024 *mtod(m, int *) = so->so_snd.sb_hiwat; 1025 break; 1026 1027 case SO_RCVBUF: 1028 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1029 break; 1030 1031 case SO_SNDLOWAT: 1032 *mtod(m, int *) = so->so_snd.sb_lowat; 1033 break; 1034 1035 case SO_RCVLOWAT: 1036 *mtod(m, int *) = so->so_rcv.sb_lowat; 1037 break; 1038 1039 case SO_SNDTIMEO: 1040 case SO_RCVTIMEO: 1041 { 1042 int val = (optname == SO_SNDTIMEO ? 1043 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1044 1045 m->m_len = sizeof(struct timeval); 1046 mtod(m, struct timeval *)->tv_sec = val / hz; 1047 mtod(m, struct timeval *)->tv_usec = 1048 (val % hz) * tick; 1049 break; 1050 } 1051 1052 default: 1053 (void)m_free(m); 1054 return (ENOPROTOOPT); 1055 } 1056 *mp = m; 1057 return (0); 1058 } 1059 } 1060 1061 void 1062 sohasoutofband(so) 1063 register struct socket *so; 1064 { 1065 struct proc *p; 1066 1067 if (so->so_pgid < 0) 1068 gsignal(-so->so_pgid, SIGURG); 1069 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1070 psignal(p, SIGURG); 1071 selwakeup(&so->so_rcv.sb_sel); 1072 } 1073