1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.18 1996/05/09 20:14:57 wollman Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/queue.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/file.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 53 static int somaxconn = SOMAXCONN; 54 SYSCTL_INT(_kern, KERN_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); 55 56 /* 57 * Socket operation routines. 58 * These routines are called by the routines in 59 * sys_socket.c or from a system process, and 60 * implement the semantics of socket operations by 61 * switching out to the protocol specific routines. 62 */ 63 /*ARGSUSED*/ 64 int 65 socreate(dom, aso, type, proto, p) 66 int dom; 67 struct socket **aso; 68 register int type; 69 int proto; 70 struct proc *p; 71 { 72 register struct protosw *prp; 73 register struct socket *so; 74 register int error; 75 76 if (proto) 77 prp = pffindproto(dom, proto, type); 78 else 79 prp = pffindtype(dom, type); 80 if (prp == 0 || prp->pr_usrreqs == 0) 81 return (EPROTONOSUPPORT); 82 if (prp->pr_type != type) 83 return (EPROTOTYPE); 84 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 85 bzero((caddr_t)so, sizeof(*so)); 86 TAILQ_INIT(&so->so_incomp); 87 TAILQ_INIT(&so->so_comp); 88 so->so_type = type; 89 if (p->p_ucred->cr_uid == 0) 90 so->so_state = SS_PRIV; 91 so->so_proto = prp; 92 error = (*prp->pr_usrreqs->pru_attach)(so, proto); 93 if (error) { 94 so->so_state |= SS_NOFDREF; 95 sofree(so); 96 return (error); 97 } 98 *aso = so; 99 return (0); 100 } 101 102 int 103 sobind(so, nam) 104 struct socket *so; 105 struct mbuf *nam; 106 { 107 int s = splnet(); 108 int error; 109 110 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam); 111 splx(s); 112 return (error); 113 } 114 115 int 116 solisten(so, backlog) 117 register struct socket *so; 118 int backlog; 119 { 120 int s = splnet(), error; 121 122 error = (*so->so_proto->pr_usrreqs->pru_listen)(so); 123 if (error) { 124 splx(s); 125 return (error); 126 } 127 if (so->so_comp.tqh_first == NULL) 128 so->so_options |= SO_ACCEPTCONN; 129 if (backlog < 0 || backlog > somaxconn) 130 backlog = somaxconn; 131 so->so_qlimit = backlog; 132 splx(s); 133 return (0); 134 } 135 136 void 137 sofree(so) 138 register struct socket *so; 139 { 140 struct socket *head = so->so_head; 141 142 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 143 return; 144 if (head != NULL) { 145 if (so->so_state & SS_INCOMP) { 146 TAILQ_REMOVE(&head->so_incomp, so, so_list); 147 } else if (so->so_state & SS_COMP) { 148 TAILQ_REMOVE(&head->so_comp, so, so_list); 149 } else { 150 panic("sofree: not queued"); 151 } 152 head->so_qlen--; 153 so->so_state &= ~(SS_INCOMP|SS_COMP); 154 so->so_head = NULL; 155 } 156 sbrelease(&so->so_snd); 157 sorflush(so); 158 FREE(so, M_SOCKET); 159 } 160 161 /* 162 * Close a socket on last file table reference removal. 163 * Initiate disconnect if connected. 164 * Free socket when disconnect complete. 165 */ 166 int 167 soclose(so) 168 register struct socket *so; 169 { 170 int s = splnet(); /* conservative */ 171 int error = 0; 172 173 if (so->so_options & SO_ACCEPTCONN) { 174 struct socket *sp, *sonext; 175 176 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 177 sonext = sp->so_list.tqe_next; 178 (void) soabort(sp); 179 } 180 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 181 sonext = sp->so_list.tqe_next; 182 (void) soabort(sp); 183 } 184 } 185 if (so->so_pcb == 0) 186 goto discard; 187 if (so->so_state & SS_ISCONNECTED) { 188 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 189 error = sodisconnect(so); 190 if (error) 191 goto drop; 192 } 193 if (so->so_options & SO_LINGER) { 194 if ((so->so_state & SS_ISDISCONNECTING) && 195 (so->so_state & SS_NBIO)) 196 goto drop; 197 while (so->so_state & SS_ISCONNECTED) { 198 error = tsleep((caddr_t)&so->so_timeo, 199 PSOCK | PCATCH, "soclos", so->so_linger); 200 if (error) 201 break; 202 } 203 } 204 } 205 drop: 206 if (so->so_pcb) { 207 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 208 if (error == 0) 209 error = error2; 210 } 211 discard: 212 if (so->so_state & SS_NOFDREF) 213 panic("soclose: NOFDREF"); 214 so->so_state |= SS_NOFDREF; 215 sofree(so); 216 splx(s); 217 return (error); 218 } 219 220 /* 221 * Must be called at splnet... 222 */ 223 int 224 soabort(so) 225 struct socket *so; 226 { 227 228 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 229 } 230 231 int 232 soaccept(so, nam) 233 register struct socket *so; 234 struct mbuf *nam; 235 { 236 int s = splnet(); 237 int error; 238 239 if ((so->so_state & SS_NOFDREF) == 0) 240 panic("soaccept: !NOFDREF"); 241 so->so_state &= ~SS_NOFDREF; 242 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 243 splx(s); 244 return (error); 245 } 246 247 int 248 soconnect(so, nam) 249 register struct socket *so; 250 struct mbuf *nam; 251 { 252 int s; 253 int error; 254 255 if (so->so_options & SO_ACCEPTCONN) 256 return (EOPNOTSUPP); 257 s = splnet(); 258 /* 259 * If protocol is connection-based, can only connect once. 260 * Otherwise, if connected, try to disconnect first. 261 * This allows user to disconnect by connecting to, e.g., 262 * a null address. 263 */ 264 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 265 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 266 (error = sodisconnect(so)))) 267 error = EISCONN; 268 else 269 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam); 270 splx(s); 271 return (error); 272 } 273 274 int 275 soconnect2(so1, so2) 276 register struct socket *so1; 277 struct socket *so2; 278 { 279 int s = splnet(); 280 int error; 281 282 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 283 splx(s); 284 return (error); 285 } 286 287 int 288 sodisconnect(so) 289 register struct socket *so; 290 { 291 int s = splnet(); 292 int error; 293 294 if ((so->so_state & SS_ISCONNECTED) == 0) { 295 error = ENOTCONN; 296 goto bad; 297 } 298 if (so->so_state & SS_ISDISCONNECTING) { 299 error = EALREADY; 300 goto bad; 301 } 302 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 303 bad: 304 splx(s); 305 return (error); 306 } 307 308 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 309 /* 310 * Send on a socket. 311 * If send must go all at once and message is larger than 312 * send buffering, then hard error. 313 * Lock against other senders. 314 * If must go all at once and not enough room now, then 315 * inform user that this would block and do nothing. 316 * Otherwise, if nonblocking, send as much as possible. 317 * The data to be sent is described by "uio" if nonzero, 318 * otherwise by the mbuf chain "top" (which must be null 319 * if uio is not). Data provided in mbuf chain must be small 320 * enough to send all at once. 321 * 322 * Returns nonzero on error, timeout or signal; callers 323 * must check for short counts if EINTR/ERESTART are returned. 324 * Data and control buffers are freed on return. 325 */ 326 int 327 sosend(so, addr, uio, top, control, flags) 328 register struct socket *so; 329 struct mbuf *addr; 330 struct uio *uio; 331 struct mbuf *top; 332 struct mbuf *control; 333 int flags; 334 { 335 struct proc *p = curproc; /* XXX */ 336 struct mbuf **mp; 337 register struct mbuf *m; 338 register long space, len, resid; 339 int clen = 0, error, s, dontroute, mlen; 340 int atomic = sosendallatonce(so) || top; 341 342 if (uio) 343 resid = uio->uio_resid; 344 else 345 resid = top->m_pkthdr.len; 346 /* 347 * In theory resid should be unsigned. 348 * However, space must be signed, as it might be less than 0 349 * if we over-committed, and we must use a signed comparison 350 * of space and resid. On the other hand, a negative resid 351 * causes us to loop sending 0-length segments to the protocol. 352 */ 353 if (resid < 0) 354 return (EINVAL); 355 dontroute = 356 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 357 (so->so_proto->pr_flags & PR_ATOMIC); 358 p->p_stats->p_ru.ru_msgsnd++; 359 if (control) 360 clen = control->m_len; 361 #define snderr(errno) { error = errno; splx(s); goto release; } 362 363 restart: 364 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 365 if (error) 366 goto out; 367 do { 368 s = splnet(); 369 if (so->so_state & SS_CANTSENDMORE) 370 snderr(EPIPE); 371 if (so->so_error) 372 snderr(so->so_error); 373 if ((so->so_state & SS_ISCONNECTED) == 0) { 374 /* 375 * `sendto' and `sendmsg' is allowed on a connection- 376 * based socket if it supports implied connect. 377 * Return ENOTCONN if not connected and no address is 378 * supplied. 379 */ 380 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 381 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 382 if ((so->so_state & SS_ISCONFIRMING) == 0 && 383 !(resid == 0 && clen != 0)) 384 snderr(ENOTCONN); 385 } else if (addr == 0) 386 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 387 ENOTCONN : EDESTADDRREQ); 388 } 389 space = sbspace(&so->so_snd); 390 if (flags & MSG_OOB) 391 space += 1024; 392 if ((atomic && resid > so->so_snd.sb_hiwat) || 393 clen > so->so_snd.sb_hiwat) 394 snderr(EMSGSIZE); 395 if (space < resid + clen && uio && 396 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 397 if (so->so_state & SS_NBIO) 398 snderr(EWOULDBLOCK); 399 sbunlock(&so->so_snd); 400 error = sbwait(&so->so_snd); 401 splx(s); 402 if (error) 403 goto out; 404 goto restart; 405 } 406 splx(s); 407 mp = ⊤ 408 space -= clen; 409 do { 410 if (uio == NULL) { 411 /* 412 * Data is prepackaged in "top". 413 */ 414 resid = 0; 415 if (flags & MSG_EOR) 416 top->m_flags |= M_EOR; 417 } else do { 418 if (top == 0) { 419 MGETHDR(m, M_WAIT, MT_DATA); 420 mlen = MHLEN; 421 m->m_pkthdr.len = 0; 422 m->m_pkthdr.rcvif = (struct ifnet *)0; 423 } else { 424 MGET(m, M_WAIT, MT_DATA); 425 mlen = MLEN; 426 } 427 if (resid >= MINCLSIZE) { 428 MCLGET(m, M_WAIT); 429 if ((m->m_flags & M_EXT) == 0) 430 goto nopages; 431 mlen = MCLBYTES; 432 len = min(min(mlen, resid), space); 433 } else { 434 nopages: 435 len = min(min(mlen, resid), space); 436 /* 437 * For datagram protocols, leave room 438 * for protocol headers in first mbuf. 439 */ 440 if (atomic && top == 0 && len < mlen) 441 MH_ALIGN(m, len); 442 } 443 space -= len; 444 error = uiomove(mtod(m, caddr_t), (int)len, uio); 445 resid = uio->uio_resid; 446 m->m_len = len; 447 *mp = m; 448 top->m_pkthdr.len += len; 449 if (error) 450 goto release; 451 mp = &m->m_next; 452 if (resid <= 0) { 453 if (flags & MSG_EOR) 454 top->m_flags |= M_EOR; 455 break; 456 } 457 } while (space > 0 && atomic); 458 if (dontroute) 459 so->so_options |= SO_DONTROUTE; 460 s = splnet(); /* XXX */ 461 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 462 (flags & MSG_OOB) ? PRUS_OOB : 463 /* 464 * If the user set MSG_EOF, the protocol 465 * understands this flag and nothing left to 466 * send then use PRU_SEND_EOF instead of PRU_SEND. 467 */ 468 ((flags & MSG_EOF) && 469 (so->so_proto->pr_flags & PR_IMPLOPCL) && 470 (resid <= 0)) ? 471 PRUS_EOF : 0, 472 top, addr, control); 473 splx(s); 474 if (dontroute) 475 so->so_options &= ~SO_DONTROUTE; 476 clen = 0; 477 control = 0; 478 top = 0; 479 mp = ⊤ 480 if (error) 481 goto release; 482 } while (resid && space > 0); 483 } while (resid); 484 485 release: 486 sbunlock(&so->so_snd); 487 out: 488 if (top) 489 m_freem(top); 490 if (control) 491 m_freem(control); 492 return (error); 493 } 494 495 /* 496 * Implement receive operations on a socket. 497 * We depend on the way that records are added to the sockbuf 498 * by sbappend*. In particular, each record (mbufs linked through m_next) 499 * must begin with an address if the protocol so specifies, 500 * followed by an optional mbuf or mbufs containing ancillary data, 501 * and then zero or more mbufs of data. 502 * In order to avoid blocking network interrupts for the entire time here, 503 * we splx() while doing the actual copy to user space. 504 * Although the sockbuf is locked, new data may still be appended, 505 * and thus we must maintain consistency of the sockbuf during that time. 506 * 507 * The caller may receive the data as a single mbuf chain by supplying 508 * an mbuf **mp0 for use in returning the chain. The uio is then used 509 * only for the count in uio_resid. 510 */ 511 int 512 soreceive(so, paddr, uio, mp0, controlp, flagsp) 513 register struct socket *so; 514 struct mbuf **paddr; 515 struct uio *uio; 516 struct mbuf **mp0; 517 struct mbuf **controlp; 518 int *flagsp; 519 { 520 register struct mbuf *m, **mp; 521 register int flags, len, error, s, offset; 522 struct protosw *pr = so->so_proto; 523 struct mbuf *nextrecord; 524 int moff, type = 0; 525 int orig_resid = uio->uio_resid; 526 527 mp = mp0; 528 if (paddr) 529 *paddr = 0; 530 if (controlp) 531 *controlp = 0; 532 if (flagsp) 533 flags = *flagsp &~ MSG_EOR; 534 else 535 flags = 0; 536 if (flags & MSG_OOB) { 537 m = m_get(M_WAIT, MT_DATA); 538 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 539 if (error) 540 goto bad; 541 do { 542 error = uiomove(mtod(m, caddr_t), 543 (int) min(uio->uio_resid, m->m_len), uio); 544 m = m_free(m); 545 } while (uio->uio_resid && error == 0 && m); 546 bad: 547 if (m) 548 m_freem(m); 549 return (error); 550 } 551 if (mp) 552 *mp = (struct mbuf *)0; 553 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 554 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 555 556 restart: 557 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 558 if (error) 559 return (error); 560 s = splnet(); 561 562 m = so->so_rcv.sb_mb; 563 /* 564 * If we have less data than requested, block awaiting more 565 * (subject to any timeout) if: 566 * 1. the current count is less than the low water mark, or 567 * 2. MSG_WAITALL is set, and it is possible to do the entire 568 * receive operation at once if we block (resid <= hiwat). 569 * 3. MSG_DONTWAIT is not set 570 * If MSG_WAITALL is set but resid is larger than the receive buffer, 571 * we have to do the receive in sections, and thus risk returning 572 * a short count if a timeout or signal occurs after we start. 573 */ 574 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 575 so->so_rcv.sb_cc < uio->uio_resid) && 576 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 577 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 578 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 579 #ifdef DIAGNOSTIC 580 if (m == 0 && so->so_rcv.sb_cc) 581 panic("receive 1"); 582 #endif 583 if (so->so_error) { 584 if (m) 585 goto dontblock; 586 error = so->so_error; 587 if ((flags & MSG_PEEK) == 0) 588 so->so_error = 0; 589 goto release; 590 } 591 if (so->so_state & SS_CANTRCVMORE) { 592 if (m) 593 goto dontblock; 594 else 595 goto release; 596 } 597 for (; m; m = m->m_next) 598 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 599 m = so->so_rcv.sb_mb; 600 goto dontblock; 601 } 602 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 603 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 604 error = ENOTCONN; 605 goto release; 606 } 607 if (uio->uio_resid == 0) 608 goto release; 609 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 610 error = EWOULDBLOCK; 611 goto release; 612 } 613 sbunlock(&so->so_rcv); 614 error = sbwait(&so->so_rcv); 615 splx(s); 616 if (error) 617 return (error); 618 goto restart; 619 } 620 dontblock: 621 if (uio->uio_procp) 622 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 623 nextrecord = m->m_nextpkt; 624 if (pr->pr_flags & PR_ADDR) { 625 #ifdef DIAGNOSTIC 626 if (m->m_type != MT_SONAME) 627 panic("receive 1a"); 628 #endif 629 orig_resid = 0; 630 if (flags & MSG_PEEK) { 631 if (paddr) 632 *paddr = m_copy(m, 0, m->m_len); 633 m = m->m_next; 634 } else { 635 sbfree(&so->so_rcv, m); 636 if (paddr) { 637 *paddr = m; 638 so->so_rcv.sb_mb = m->m_next; 639 m->m_next = 0; 640 m = so->so_rcv.sb_mb; 641 } else { 642 MFREE(m, so->so_rcv.sb_mb); 643 m = so->so_rcv.sb_mb; 644 } 645 } 646 } 647 while (m && m->m_type == MT_CONTROL && error == 0) { 648 if (flags & MSG_PEEK) { 649 if (controlp) 650 *controlp = m_copy(m, 0, m->m_len); 651 m = m->m_next; 652 } else { 653 sbfree(&so->so_rcv, m); 654 if (controlp) { 655 if (pr->pr_domain->dom_externalize && 656 mtod(m, struct cmsghdr *)->cmsg_type == 657 SCM_RIGHTS) 658 error = (*pr->pr_domain->dom_externalize)(m); 659 *controlp = m; 660 so->so_rcv.sb_mb = m->m_next; 661 m->m_next = 0; 662 m = so->so_rcv.sb_mb; 663 } else { 664 MFREE(m, so->so_rcv.sb_mb); 665 m = so->so_rcv.sb_mb; 666 } 667 } 668 if (controlp) { 669 orig_resid = 0; 670 controlp = &(*controlp)->m_next; 671 } 672 } 673 if (m) { 674 if ((flags & MSG_PEEK) == 0) 675 m->m_nextpkt = nextrecord; 676 type = m->m_type; 677 if (type == MT_OOBDATA) 678 flags |= MSG_OOB; 679 } 680 moff = 0; 681 offset = 0; 682 while (m && uio->uio_resid > 0 && error == 0) { 683 if (m->m_type == MT_OOBDATA) { 684 if (type != MT_OOBDATA) 685 break; 686 } else if (type == MT_OOBDATA) 687 break; 688 #ifdef DIAGNOSTIC 689 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 690 panic("receive 3"); 691 #endif 692 so->so_state &= ~SS_RCVATMARK; 693 len = uio->uio_resid; 694 if (so->so_oobmark && len > so->so_oobmark - offset) 695 len = so->so_oobmark - offset; 696 if (len > m->m_len - moff) 697 len = m->m_len - moff; 698 /* 699 * If mp is set, just pass back the mbufs. 700 * Otherwise copy them out via the uio, then free. 701 * Sockbuf must be consistent here (points to current mbuf, 702 * it points to next record) when we drop priority; 703 * we must note any additions to the sockbuf when we 704 * block interrupts again. 705 */ 706 if (mp == 0) { 707 splx(s); 708 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 709 s = splnet(); 710 } else 711 uio->uio_resid -= len; 712 if (len == m->m_len - moff) { 713 if (m->m_flags & M_EOR) 714 flags |= MSG_EOR; 715 if (flags & MSG_PEEK) { 716 m = m->m_next; 717 moff = 0; 718 } else { 719 nextrecord = m->m_nextpkt; 720 sbfree(&so->so_rcv, m); 721 if (mp) { 722 *mp = m; 723 mp = &m->m_next; 724 so->so_rcv.sb_mb = m = m->m_next; 725 *mp = (struct mbuf *)0; 726 } else { 727 MFREE(m, so->so_rcv.sb_mb); 728 m = so->so_rcv.sb_mb; 729 } 730 if (m) 731 m->m_nextpkt = nextrecord; 732 } 733 } else { 734 if (flags & MSG_PEEK) 735 moff += len; 736 else { 737 if (mp) 738 *mp = m_copym(m, 0, len, M_WAIT); 739 m->m_data += len; 740 m->m_len -= len; 741 so->so_rcv.sb_cc -= len; 742 } 743 } 744 if (so->so_oobmark) { 745 if ((flags & MSG_PEEK) == 0) { 746 so->so_oobmark -= len; 747 if (so->so_oobmark == 0) { 748 so->so_state |= SS_RCVATMARK; 749 break; 750 } 751 } else { 752 offset += len; 753 if (offset == so->so_oobmark) 754 break; 755 } 756 } 757 if (flags & MSG_EOR) 758 break; 759 /* 760 * If the MSG_WAITALL flag is set (for non-atomic socket), 761 * we must not quit until "uio->uio_resid == 0" or an error 762 * termination. If a signal/timeout occurs, return 763 * with a short count but without error. 764 * Keep sockbuf locked against other readers. 765 */ 766 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 767 !sosendallatonce(so) && !nextrecord) { 768 if (so->so_error || so->so_state & SS_CANTRCVMORE) 769 break; 770 error = sbwait(&so->so_rcv); 771 if (error) { 772 sbunlock(&so->so_rcv); 773 splx(s); 774 return (0); 775 } 776 m = so->so_rcv.sb_mb; 777 if (m) 778 nextrecord = m->m_nextpkt; 779 } 780 } 781 782 if (m && pr->pr_flags & PR_ATOMIC) { 783 flags |= MSG_TRUNC; 784 if ((flags & MSG_PEEK) == 0) 785 (void) sbdroprecord(&so->so_rcv); 786 } 787 if ((flags & MSG_PEEK) == 0) { 788 if (m == 0) 789 so->so_rcv.sb_mb = nextrecord; 790 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 791 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 792 } 793 if (orig_resid == uio->uio_resid && orig_resid && 794 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 795 sbunlock(&so->so_rcv); 796 splx(s); 797 goto restart; 798 } 799 800 if (flagsp) 801 *flagsp |= flags; 802 release: 803 sbunlock(&so->so_rcv); 804 splx(s); 805 return (error); 806 } 807 808 int 809 soshutdown(so, how) 810 register struct socket *so; 811 register int how; 812 { 813 register struct protosw *pr = so->so_proto; 814 815 how++; 816 if (how & FREAD) 817 sorflush(so); 818 if (how & FWRITE) 819 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 820 return (0); 821 } 822 823 void 824 sorflush(so) 825 register struct socket *so; 826 { 827 register struct sockbuf *sb = &so->so_rcv; 828 register struct protosw *pr = so->so_proto; 829 register int s; 830 struct sockbuf asb; 831 832 sb->sb_flags |= SB_NOINTR; 833 (void) sblock(sb, M_WAITOK); 834 s = splimp(); 835 socantrcvmore(so); 836 sbunlock(sb); 837 asb = *sb; 838 bzero((caddr_t)sb, sizeof (*sb)); 839 splx(s); 840 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 841 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 842 sbrelease(&asb); 843 } 844 845 int 846 sosetopt(so, level, optname, m0) 847 register struct socket *so; 848 int level, optname; 849 struct mbuf *m0; 850 { 851 int error = 0; 852 register struct mbuf *m = m0; 853 854 if (level != SOL_SOCKET) { 855 if (so->so_proto && so->so_proto->pr_ctloutput) 856 return ((*so->so_proto->pr_ctloutput) 857 (PRCO_SETOPT, so, level, optname, &m0)); 858 error = ENOPROTOOPT; 859 } else { 860 switch (optname) { 861 862 case SO_LINGER: 863 if (m == NULL || m->m_len != sizeof (struct linger)) { 864 error = EINVAL; 865 goto bad; 866 } 867 so->so_linger = mtod(m, struct linger *)->l_linger; 868 /* fall thru... */ 869 870 case SO_DEBUG: 871 case SO_KEEPALIVE: 872 case SO_DONTROUTE: 873 case SO_USELOOPBACK: 874 case SO_BROADCAST: 875 case SO_REUSEADDR: 876 case SO_REUSEPORT: 877 case SO_OOBINLINE: 878 case SO_TIMESTAMP: 879 if (m == NULL || m->m_len < sizeof (int)) { 880 error = EINVAL; 881 goto bad; 882 } 883 if (*mtod(m, int *)) 884 so->so_options |= optname; 885 else 886 so->so_options &= ~optname; 887 break; 888 889 case SO_SNDBUF: 890 case SO_RCVBUF: 891 case SO_SNDLOWAT: 892 case SO_RCVLOWAT: 893 if (m == NULL || m->m_len < sizeof (int)) { 894 error = EINVAL; 895 goto bad; 896 } 897 switch (optname) { 898 899 case SO_SNDBUF: 900 case SO_RCVBUF: 901 if (sbreserve(optname == SO_SNDBUF ? 902 &so->so_snd : &so->so_rcv, 903 (u_long) *mtod(m, int *)) == 0) { 904 error = ENOBUFS; 905 goto bad; 906 } 907 break; 908 909 case SO_SNDLOWAT: 910 so->so_snd.sb_lowat = *mtod(m, int *); 911 break; 912 case SO_RCVLOWAT: 913 so->so_rcv.sb_lowat = *mtod(m, int *); 914 break; 915 } 916 break; 917 918 case SO_SNDTIMEO: 919 case SO_RCVTIMEO: 920 { 921 struct timeval *tv; 922 short val; 923 924 if (m == NULL || m->m_len < sizeof (*tv)) { 925 error = EINVAL; 926 goto bad; 927 } 928 tv = mtod(m, struct timeval *); 929 if (tv->tv_sec > SHRT_MAX / hz - hz) { 930 error = EDOM; 931 goto bad; 932 } 933 val = tv->tv_sec * hz + tv->tv_usec / tick; 934 935 switch (optname) { 936 937 case SO_SNDTIMEO: 938 so->so_snd.sb_timeo = val; 939 break; 940 case SO_RCVTIMEO: 941 so->so_rcv.sb_timeo = val; 942 break; 943 } 944 break; 945 } 946 947 case SO_PRIVSTATE: 948 /* we don't care what the parameter is... */ 949 so->so_state &= ~SS_PRIV; 950 break; 951 952 default: 953 error = ENOPROTOOPT; 954 break; 955 } 956 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 957 (void) ((*so->so_proto->pr_ctloutput) 958 (PRCO_SETOPT, so, level, optname, &m0)); 959 m = NULL; /* freed by protocol */ 960 } 961 } 962 bad: 963 if (m) 964 (void) m_free(m); 965 return (error); 966 } 967 968 int 969 sogetopt(so, level, optname, mp) 970 register struct socket *so; 971 int level, optname; 972 struct mbuf **mp; 973 { 974 register struct mbuf *m; 975 976 if (level != SOL_SOCKET) { 977 if (so->so_proto && so->so_proto->pr_ctloutput) { 978 return ((*so->so_proto->pr_ctloutput) 979 (PRCO_GETOPT, so, level, optname, mp)); 980 } else 981 return (ENOPROTOOPT); 982 } else { 983 m = m_get(M_WAIT, MT_SOOPTS); 984 m->m_len = sizeof (int); 985 986 switch (optname) { 987 988 case SO_LINGER: 989 m->m_len = sizeof (struct linger); 990 mtod(m, struct linger *)->l_onoff = 991 so->so_options & SO_LINGER; 992 mtod(m, struct linger *)->l_linger = so->so_linger; 993 break; 994 995 case SO_USELOOPBACK: 996 case SO_DONTROUTE: 997 case SO_DEBUG: 998 case SO_KEEPALIVE: 999 case SO_REUSEADDR: 1000 case SO_REUSEPORT: 1001 case SO_BROADCAST: 1002 case SO_OOBINLINE: 1003 case SO_TIMESTAMP: 1004 *mtod(m, int *) = so->so_options & optname; 1005 break; 1006 1007 case SO_PRIVSTATE: 1008 *mtod(m, int *) = so->so_state & SS_PRIV; 1009 break; 1010 1011 case SO_TYPE: 1012 *mtod(m, int *) = so->so_type; 1013 break; 1014 1015 case SO_ERROR: 1016 *mtod(m, int *) = so->so_error; 1017 so->so_error = 0; 1018 break; 1019 1020 case SO_SNDBUF: 1021 *mtod(m, int *) = so->so_snd.sb_hiwat; 1022 break; 1023 1024 case SO_RCVBUF: 1025 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1026 break; 1027 1028 case SO_SNDLOWAT: 1029 *mtod(m, int *) = so->so_snd.sb_lowat; 1030 break; 1031 1032 case SO_RCVLOWAT: 1033 *mtod(m, int *) = so->so_rcv.sb_lowat; 1034 break; 1035 1036 case SO_SNDTIMEO: 1037 case SO_RCVTIMEO: 1038 { 1039 int val = (optname == SO_SNDTIMEO ? 1040 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1041 1042 m->m_len = sizeof(struct timeval); 1043 mtod(m, struct timeval *)->tv_sec = val / hz; 1044 mtod(m, struct timeval *)->tv_usec = 1045 (val % hz) * tick; 1046 break; 1047 } 1048 1049 default: 1050 (void)m_free(m); 1051 return (ENOPROTOOPT); 1052 } 1053 *mp = m; 1054 return (0); 1055 } 1056 } 1057 1058 void 1059 sohasoutofband(so) 1060 register struct socket *so; 1061 { 1062 struct proc *p; 1063 1064 if (so->so_pgid < 0) 1065 gsignal(-so->so_pgid, SIGURG); 1066 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1067 psignal(p, SIGURG); 1068 selwakeup(&so->so_rcv.sb_sel); 1069 } 1070