1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.23 1997/02/22 09:39:28 peter Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/queue.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/file.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 53 static int somaxconn = SOMAXCONN; 54 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 55 0, ""); 56 57 /* 58 * Socket operation routines. 59 * These routines are called by the routines in 60 * sys_socket.c or from a system process, and 61 * implement the semantics of socket operations by 62 * switching out to the protocol specific routines. 63 */ 64 /*ARGSUSED*/ 65 int 66 socreate(dom, aso, type, proto, p) 67 int dom; 68 struct socket **aso; 69 register int type; 70 int proto; 71 struct proc *p; 72 { 73 register struct protosw *prp; 74 register struct socket *so; 75 register int error; 76 77 if (proto) 78 prp = pffindproto(dom, proto, type); 79 else 80 prp = pffindtype(dom, type); 81 if (prp == 0 || prp->pr_usrreqs == 0) 82 return (EPROTONOSUPPORT); 83 if (prp->pr_type != type) 84 return (EPROTOTYPE); 85 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 86 bzero((caddr_t)so, sizeof(*so)); 87 TAILQ_INIT(&so->so_incomp); 88 TAILQ_INIT(&so->so_comp); 89 so->so_type = type; 90 if (p->p_ucred->cr_uid == 0) 91 so->so_state = SS_PRIV; 92 so->so_proto = prp; 93 error = (*prp->pr_usrreqs->pru_attach)(so, proto); 94 if (error) { 95 so->so_state |= SS_NOFDREF; 96 sofree(so); 97 return (error); 98 } 99 *aso = so; 100 return (0); 101 } 102 103 int 104 sobind(so, nam) 105 struct socket *so; 106 struct mbuf *nam; 107 { 108 int s = splnet(); 109 int error; 110 111 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam); 112 splx(s); 113 return (error); 114 } 115 116 int 117 solisten(so, backlog) 118 register struct socket *so; 119 int backlog; 120 { 121 int s = splnet(), error; 122 123 error = (*so->so_proto->pr_usrreqs->pru_listen)(so); 124 if (error) { 125 splx(s); 126 return (error); 127 } 128 if (so->so_comp.tqh_first == NULL) 129 so->so_options |= SO_ACCEPTCONN; 130 if (backlog < 0 || backlog > somaxconn) 131 backlog = somaxconn; 132 so->so_qlimit = backlog; 133 splx(s); 134 return (0); 135 } 136 137 void 138 sofree(so) 139 register struct socket *so; 140 { 141 struct socket *head = so->so_head; 142 143 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 144 return; 145 if (head != NULL) { 146 if (so->so_state & SS_INCOMP) { 147 TAILQ_REMOVE(&head->so_incomp, so, so_list); 148 head->so_incqlen--; 149 } else if (so->so_state & SS_COMP) { 150 TAILQ_REMOVE(&head->so_comp, so, so_list); 151 } else { 152 panic("sofree: not queued"); 153 } 154 head->so_qlen--; 155 so->so_state &= ~(SS_INCOMP|SS_COMP); 156 so->so_head = NULL; 157 } 158 sbrelease(&so->so_snd); 159 sorflush(so); 160 FREE(so, M_SOCKET); 161 } 162 163 /* 164 * Close a socket on last file table reference removal. 165 * Initiate disconnect if connected. 166 * Free socket when disconnect complete. 167 */ 168 int 169 soclose(so) 170 register struct socket *so; 171 { 172 int s = splnet(); /* conservative */ 173 int error = 0; 174 175 if (so->so_options & SO_ACCEPTCONN) { 176 struct socket *sp, *sonext; 177 178 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 179 sonext = sp->so_list.tqe_next; 180 (void) soabort(sp); 181 } 182 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 183 sonext = sp->so_list.tqe_next; 184 (void) soabort(sp); 185 } 186 } 187 if (so->so_pcb == 0) 188 goto discard; 189 if (so->so_state & SS_ISCONNECTED) { 190 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 191 error = sodisconnect(so); 192 if (error) 193 goto drop; 194 } 195 if (so->so_options & SO_LINGER) { 196 if ((so->so_state & SS_ISDISCONNECTING) && 197 (so->so_state & SS_NBIO)) 198 goto drop; 199 while (so->so_state & SS_ISCONNECTED) { 200 error = tsleep((caddr_t)&so->so_timeo, 201 PSOCK | PCATCH, "soclos", so->so_linger); 202 if (error) 203 break; 204 } 205 } 206 } 207 drop: 208 if (so->so_pcb) { 209 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 210 if (error == 0) 211 error = error2; 212 } 213 discard: 214 if (so->so_state & SS_NOFDREF) 215 panic("soclose: NOFDREF"); 216 so->so_state |= SS_NOFDREF; 217 sofree(so); 218 splx(s); 219 return (error); 220 } 221 222 /* 223 * Must be called at splnet... 224 */ 225 int 226 soabort(so) 227 struct socket *so; 228 { 229 230 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 231 } 232 233 int 234 soaccept(so, nam) 235 register struct socket *so; 236 struct mbuf *nam; 237 { 238 int s = splnet(); 239 int error; 240 241 if ((so->so_state & SS_NOFDREF) == 0) 242 panic("soaccept: !NOFDREF"); 243 so->so_state &= ~SS_NOFDREF; 244 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 245 splx(s); 246 return (error); 247 } 248 249 int 250 soconnect(so, nam) 251 register struct socket *so; 252 struct mbuf *nam; 253 { 254 int s; 255 int error; 256 257 if (so->so_options & SO_ACCEPTCONN) 258 return (EOPNOTSUPP); 259 s = splnet(); 260 /* 261 * If protocol is connection-based, can only connect once. 262 * Otherwise, if connected, try to disconnect first. 263 * This allows user to disconnect by connecting to, e.g., 264 * a null address. 265 */ 266 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 267 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 268 (error = sodisconnect(so)))) 269 error = EISCONN; 270 else 271 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam); 272 splx(s); 273 return (error); 274 } 275 276 int 277 soconnect2(so1, so2) 278 register struct socket *so1; 279 struct socket *so2; 280 { 281 int s = splnet(); 282 int error; 283 284 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 285 splx(s); 286 return (error); 287 } 288 289 int 290 sodisconnect(so) 291 register struct socket *so; 292 { 293 int s = splnet(); 294 int error; 295 296 if ((so->so_state & SS_ISCONNECTED) == 0) { 297 error = ENOTCONN; 298 goto bad; 299 } 300 if (so->so_state & SS_ISDISCONNECTING) { 301 error = EALREADY; 302 goto bad; 303 } 304 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 305 bad: 306 splx(s); 307 return (error); 308 } 309 310 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 311 /* 312 * Send on a socket. 313 * If send must go all at once and message is larger than 314 * send buffering, then hard error. 315 * Lock against other senders. 316 * If must go all at once and not enough room now, then 317 * inform user that this would block and do nothing. 318 * Otherwise, if nonblocking, send as much as possible. 319 * The data to be sent is described by "uio" if nonzero, 320 * otherwise by the mbuf chain "top" (which must be null 321 * if uio is not). Data provided in mbuf chain must be small 322 * enough to send all at once. 323 * 324 * Returns nonzero on error, timeout or signal; callers 325 * must check for short counts if EINTR/ERESTART are returned. 326 * Data and control buffers are freed on return. 327 */ 328 int 329 sosend(so, addr, uio, top, control, flags) 330 register struct socket *so; 331 struct mbuf *addr; 332 struct uio *uio; 333 struct mbuf *top; 334 struct mbuf *control; 335 int flags; 336 { 337 struct proc *p = curproc; /* XXX */ 338 struct mbuf **mp; 339 register struct mbuf *m; 340 register long space, len, resid; 341 int clen = 0, error, s, dontroute, mlen; 342 int atomic = sosendallatonce(so) || top; 343 344 if (uio) 345 resid = uio->uio_resid; 346 else 347 resid = top->m_pkthdr.len; 348 /* 349 * In theory resid should be unsigned. 350 * However, space must be signed, as it might be less than 0 351 * if we over-committed, and we must use a signed comparison 352 * of space and resid. On the other hand, a negative resid 353 * causes us to loop sending 0-length segments to the protocol. 354 */ 355 if (resid < 0) 356 return (EINVAL); 357 dontroute = 358 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 359 (so->so_proto->pr_flags & PR_ATOMIC); 360 p->p_stats->p_ru.ru_msgsnd++; 361 if (control) 362 clen = control->m_len; 363 #define snderr(errno) { error = errno; splx(s); goto release; } 364 365 restart: 366 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 367 if (error) 368 goto out; 369 do { 370 s = splnet(); 371 if (so->so_state & SS_CANTSENDMORE) 372 snderr(EPIPE); 373 if (so->so_error) 374 snderr(so->so_error); 375 if ((so->so_state & SS_ISCONNECTED) == 0) { 376 /* 377 * `sendto' and `sendmsg' is allowed on a connection- 378 * based socket if it supports implied connect. 379 * Return ENOTCONN if not connected and no address is 380 * supplied. 381 */ 382 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 383 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 384 if ((so->so_state & SS_ISCONFIRMING) == 0 && 385 !(resid == 0 && clen != 0)) 386 snderr(ENOTCONN); 387 } else if (addr == 0) 388 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 389 ENOTCONN : EDESTADDRREQ); 390 } 391 space = sbspace(&so->so_snd); 392 if (flags & MSG_OOB) 393 space += 1024; 394 if ((atomic && resid > so->so_snd.sb_hiwat) || 395 clen > so->so_snd.sb_hiwat) 396 snderr(EMSGSIZE); 397 if (space < resid + clen && uio && 398 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 399 if (so->so_state & SS_NBIO) 400 snderr(EWOULDBLOCK); 401 sbunlock(&so->so_snd); 402 error = sbwait(&so->so_snd); 403 splx(s); 404 if (error) 405 goto out; 406 goto restart; 407 } 408 splx(s); 409 mp = ⊤ 410 space -= clen; 411 do { 412 if (uio == NULL) { 413 /* 414 * Data is prepackaged in "top". 415 */ 416 resid = 0; 417 if (flags & MSG_EOR) 418 top->m_flags |= M_EOR; 419 } else do { 420 if (top == 0) { 421 MGETHDR(m, M_WAIT, MT_DATA); 422 mlen = MHLEN; 423 m->m_pkthdr.len = 0; 424 m->m_pkthdr.rcvif = (struct ifnet *)0; 425 } else { 426 MGET(m, M_WAIT, MT_DATA); 427 mlen = MLEN; 428 } 429 if (resid >= MINCLSIZE) { 430 MCLGET(m, M_WAIT); 431 if ((m->m_flags & M_EXT) == 0) 432 goto nopages; 433 mlen = MCLBYTES; 434 len = min(min(mlen, resid), space); 435 } else { 436 nopages: 437 len = min(min(mlen, resid), space); 438 /* 439 * For datagram protocols, leave room 440 * for protocol headers in first mbuf. 441 */ 442 if (atomic && top == 0 && len < mlen) 443 MH_ALIGN(m, len); 444 } 445 space -= len; 446 error = uiomove(mtod(m, caddr_t), (int)len, uio); 447 resid = uio->uio_resid; 448 m->m_len = len; 449 *mp = m; 450 top->m_pkthdr.len += len; 451 if (error) 452 goto release; 453 mp = &m->m_next; 454 if (resid <= 0) { 455 if (flags & MSG_EOR) 456 top->m_flags |= M_EOR; 457 break; 458 } 459 } while (space > 0 && atomic); 460 if (dontroute) 461 so->so_options |= SO_DONTROUTE; 462 s = splnet(); /* XXX */ 463 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 464 (flags & MSG_OOB) ? PRUS_OOB : 465 /* 466 * If the user set MSG_EOF, the protocol 467 * understands this flag and nothing left to 468 * send then use PRU_SEND_EOF instead of PRU_SEND. 469 */ 470 ((flags & MSG_EOF) && 471 (so->so_proto->pr_flags & PR_IMPLOPCL) && 472 (resid <= 0)) ? 473 PRUS_EOF : 0, 474 top, addr, control); 475 splx(s); 476 if (dontroute) 477 so->so_options &= ~SO_DONTROUTE; 478 clen = 0; 479 control = 0; 480 top = 0; 481 mp = ⊤ 482 if (error) 483 goto release; 484 } while (resid && space > 0); 485 } while (resid); 486 487 release: 488 sbunlock(&so->so_snd); 489 out: 490 if (top) 491 m_freem(top); 492 if (control) 493 m_freem(control); 494 return (error); 495 } 496 497 /* 498 * Implement receive operations on a socket. 499 * We depend on the way that records are added to the sockbuf 500 * by sbappend*. In particular, each record (mbufs linked through m_next) 501 * must begin with an address if the protocol so specifies, 502 * followed by an optional mbuf or mbufs containing ancillary data, 503 * and then zero or more mbufs of data. 504 * In order to avoid blocking network interrupts for the entire time here, 505 * we splx() while doing the actual copy to user space. 506 * Although the sockbuf is locked, new data may still be appended, 507 * and thus we must maintain consistency of the sockbuf during that time. 508 * 509 * The caller may receive the data as a single mbuf chain by supplying 510 * an mbuf **mp0 for use in returning the chain. The uio is then used 511 * only for the count in uio_resid. 512 */ 513 int 514 soreceive(so, paddr, uio, mp0, controlp, flagsp) 515 register struct socket *so; 516 struct mbuf **paddr; 517 struct uio *uio; 518 struct mbuf **mp0; 519 struct mbuf **controlp; 520 int *flagsp; 521 { 522 register struct mbuf *m, **mp; 523 register int flags, len, error, s, offset; 524 struct protosw *pr = so->so_proto; 525 struct mbuf *nextrecord; 526 int moff, type = 0; 527 int orig_resid = uio->uio_resid; 528 529 mp = mp0; 530 if (paddr) 531 *paddr = 0; 532 if (controlp) 533 *controlp = 0; 534 if (flagsp) 535 flags = *flagsp &~ MSG_EOR; 536 else 537 flags = 0; 538 if (flags & MSG_OOB) { 539 m = m_get(M_WAIT, MT_DATA); 540 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 541 if (error) 542 goto bad; 543 do { 544 error = uiomove(mtod(m, caddr_t), 545 (int) min(uio->uio_resid, m->m_len), uio); 546 m = m_free(m); 547 } while (uio->uio_resid && error == 0 && m); 548 bad: 549 if (m) 550 m_freem(m); 551 return (error); 552 } 553 if (mp) 554 *mp = (struct mbuf *)0; 555 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 556 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 557 558 restart: 559 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 560 if (error) 561 return (error); 562 s = splnet(); 563 564 m = so->so_rcv.sb_mb; 565 /* 566 * If we have less data than requested, block awaiting more 567 * (subject to any timeout) if: 568 * 1. the current count is less than the low water mark, or 569 * 2. MSG_WAITALL is set, and it is possible to do the entire 570 * receive operation at once if we block (resid <= hiwat). 571 * 3. MSG_DONTWAIT is not set 572 * If MSG_WAITALL is set but resid is larger than the receive buffer, 573 * we have to do the receive in sections, and thus risk returning 574 * a short count if a timeout or signal occurs after we start. 575 */ 576 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 577 so->so_rcv.sb_cc < uio->uio_resid) && 578 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 579 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 580 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 581 #ifdef DIAGNOSTIC 582 if (m == 0 && so->so_rcv.sb_cc) 583 panic("receive 1"); 584 #endif 585 if (so->so_error) { 586 if (m) 587 goto dontblock; 588 error = so->so_error; 589 if ((flags & MSG_PEEK) == 0) 590 so->so_error = 0; 591 goto release; 592 } 593 if (so->so_state & SS_CANTRCVMORE) { 594 if (m) 595 goto dontblock; 596 else 597 goto release; 598 } 599 for (; m; m = m->m_next) 600 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 601 m = so->so_rcv.sb_mb; 602 goto dontblock; 603 } 604 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 605 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 606 error = ENOTCONN; 607 goto release; 608 } 609 if (uio->uio_resid == 0) 610 goto release; 611 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 612 error = EWOULDBLOCK; 613 goto release; 614 } 615 sbunlock(&so->so_rcv); 616 error = sbwait(&so->so_rcv); 617 splx(s); 618 if (error) 619 return (error); 620 goto restart; 621 } 622 dontblock: 623 if (uio->uio_procp) 624 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 625 nextrecord = m->m_nextpkt; 626 if (pr->pr_flags & PR_ADDR) { 627 #ifdef DIAGNOSTIC 628 if (m->m_type != MT_SONAME) 629 panic("receive 1a"); 630 #endif 631 orig_resid = 0; 632 if (flags & MSG_PEEK) { 633 if (paddr) 634 *paddr = m_copy(m, 0, m->m_len); 635 m = m->m_next; 636 } else { 637 sbfree(&so->so_rcv, m); 638 if (paddr) { 639 *paddr = m; 640 so->so_rcv.sb_mb = m->m_next; 641 m->m_next = 0; 642 m = so->so_rcv.sb_mb; 643 } else { 644 MFREE(m, so->so_rcv.sb_mb); 645 m = so->so_rcv.sb_mb; 646 } 647 } 648 } 649 while (m && m->m_type == MT_CONTROL && error == 0) { 650 if (flags & MSG_PEEK) { 651 if (controlp) 652 *controlp = m_copy(m, 0, m->m_len); 653 m = m->m_next; 654 } else { 655 sbfree(&so->so_rcv, m); 656 if (controlp) { 657 if (pr->pr_domain->dom_externalize && 658 mtod(m, struct cmsghdr *)->cmsg_type == 659 SCM_RIGHTS) 660 error = (*pr->pr_domain->dom_externalize)(m); 661 *controlp = m; 662 so->so_rcv.sb_mb = m->m_next; 663 m->m_next = 0; 664 m = so->so_rcv.sb_mb; 665 } else { 666 MFREE(m, so->so_rcv.sb_mb); 667 m = so->so_rcv.sb_mb; 668 } 669 } 670 if (controlp) { 671 orig_resid = 0; 672 controlp = &(*controlp)->m_next; 673 } 674 } 675 if (m) { 676 if ((flags & MSG_PEEK) == 0) 677 m->m_nextpkt = nextrecord; 678 type = m->m_type; 679 if (type == MT_OOBDATA) 680 flags |= MSG_OOB; 681 } 682 moff = 0; 683 offset = 0; 684 while (m && uio->uio_resid > 0 && error == 0) { 685 if (m->m_type == MT_OOBDATA) { 686 if (type != MT_OOBDATA) 687 break; 688 } else if (type == MT_OOBDATA) 689 break; 690 #ifdef DIAGNOSTIC 691 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 692 panic("receive 3"); 693 #endif 694 so->so_state &= ~SS_RCVATMARK; 695 len = uio->uio_resid; 696 if (so->so_oobmark && len > so->so_oobmark - offset) 697 len = so->so_oobmark - offset; 698 if (len > m->m_len - moff) 699 len = m->m_len - moff; 700 /* 701 * If mp is set, just pass back the mbufs. 702 * Otherwise copy them out via the uio, then free. 703 * Sockbuf must be consistent here (points to current mbuf, 704 * it points to next record) when we drop priority; 705 * we must note any additions to the sockbuf when we 706 * block interrupts again. 707 */ 708 if (mp == 0) { 709 splx(s); 710 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 711 s = splnet(); 712 if (error) 713 goto release; 714 } else 715 uio->uio_resid -= len; 716 if (len == m->m_len - moff) { 717 if (m->m_flags & M_EOR) 718 flags |= MSG_EOR; 719 if (flags & MSG_PEEK) { 720 m = m->m_next; 721 moff = 0; 722 } else { 723 nextrecord = m->m_nextpkt; 724 sbfree(&so->so_rcv, m); 725 if (mp) { 726 *mp = m; 727 mp = &m->m_next; 728 so->so_rcv.sb_mb = m = m->m_next; 729 *mp = (struct mbuf *)0; 730 } else { 731 MFREE(m, so->so_rcv.sb_mb); 732 m = so->so_rcv.sb_mb; 733 } 734 if (m) 735 m->m_nextpkt = nextrecord; 736 } 737 } else { 738 if (flags & MSG_PEEK) 739 moff += len; 740 else { 741 if (mp) 742 *mp = m_copym(m, 0, len, M_WAIT); 743 m->m_data += len; 744 m->m_len -= len; 745 so->so_rcv.sb_cc -= len; 746 } 747 } 748 if (so->so_oobmark) { 749 if ((flags & MSG_PEEK) == 0) { 750 so->so_oobmark -= len; 751 if (so->so_oobmark == 0) { 752 so->so_state |= SS_RCVATMARK; 753 break; 754 } 755 } else { 756 offset += len; 757 if (offset == so->so_oobmark) 758 break; 759 } 760 } 761 if (flags & MSG_EOR) 762 break; 763 /* 764 * If the MSG_WAITALL flag is set (for non-atomic socket), 765 * we must not quit until "uio->uio_resid == 0" or an error 766 * termination. If a signal/timeout occurs, return 767 * with a short count but without error. 768 * Keep sockbuf locked against other readers. 769 */ 770 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 771 !sosendallatonce(so) && !nextrecord) { 772 if (so->so_error || so->so_state & SS_CANTRCVMORE) 773 break; 774 error = sbwait(&so->so_rcv); 775 if (error) { 776 sbunlock(&so->so_rcv); 777 splx(s); 778 return (0); 779 } 780 m = so->so_rcv.sb_mb; 781 if (m) 782 nextrecord = m->m_nextpkt; 783 } 784 } 785 786 if (m && pr->pr_flags & PR_ATOMIC) { 787 flags |= MSG_TRUNC; 788 if ((flags & MSG_PEEK) == 0) 789 (void) sbdroprecord(&so->so_rcv); 790 } 791 if ((flags & MSG_PEEK) == 0) { 792 if (m == 0) 793 so->so_rcv.sb_mb = nextrecord; 794 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 795 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 796 } 797 if (orig_resid == uio->uio_resid && orig_resid && 798 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 799 sbunlock(&so->so_rcv); 800 splx(s); 801 goto restart; 802 } 803 804 if (flagsp) 805 *flagsp |= flags; 806 release: 807 sbunlock(&so->so_rcv); 808 splx(s); 809 return (error); 810 } 811 812 int 813 soshutdown(so, how) 814 register struct socket *so; 815 register int how; 816 { 817 register struct protosw *pr = so->so_proto; 818 819 how++; 820 if (how & FREAD) 821 sorflush(so); 822 if (how & FWRITE) 823 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 824 return (0); 825 } 826 827 void 828 sorflush(so) 829 register struct socket *so; 830 { 831 register struct sockbuf *sb = &so->so_rcv; 832 register struct protosw *pr = so->so_proto; 833 register int s; 834 struct sockbuf asb; 835 836 sb->sb_flags |= SB_NOINTR; 837 (void) sblock(sb, M_WAITOK); 838 s = splimp(); 839 socantrcvmore(so); 840 sbunlock(sb); 841 asb = *sb; 842 bzero((caddr_t)sb, sizeof (*sb)); 843 splx(s); 844 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 845 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 846 sbrelease(&asb); 847 } 848 849 int 850 sosetopt(so, level, optname, m0) 851 register struct socket *so; 852 int level, optname; 853 struct mbuf *m0; 854 { 855 int error = 0; 856 register struct mbuf *m = m0; 857 858 if (level != SOL_SOCKET) { 859 if (so->so_proto && so->so_proto->pr_ctloutput) 860 return ((*so->so_proto->pr_ctloutput) 861 (PRCO_SETOPT, so, level, optname, &m0)); 862 error = ENOPROTOOPT; 863 } else { 864 switch (optname) { 865 866 case SO_LINGER: 867 if (m == NULL || m->m_len != sizeof (struct linger)) { 868 error = EINVAL; 869 goto bad; 870 } 871 so->so_linger = mtod(m, struct linger *)->l_linger; 872 /* fall thru... */ 873 874 case SO_DEBUG: 875 case SO_KEEPALIVE: 876 case SO_DONTROUTE: 877 case SO_USELOOPBACK: 878 case SO_BROADCAST: 879 case SO_REUSEADDR: 880 case SO_REUSEPORT: 881 case SO_OOBINLINE: 882 case SO_TIMESTAMP: 883 if (m == NULL || m->m_len < sizeof (int)) { 884 error = EINVAL; 885 goto bad; 886 } 887 if (*mtod(m, int *)) 888 so->so_options |= optname; 889 else 890 so->so_options &= ~optname; 891 break; 892 893 case SO_SNDBUF: 894 case SO_RCVBUF: 895 case SO_SNDLOWAT: 896 case SO_RCVLOWAT: 897 if (m == NULL || m->m_len < sizeof (int)) { 898 error = EINVAL; 899 goto bad; 900 } 901 switch (optname) { 902 903 case SO_SNDBUF: 904 case SO_RCVBUF: 905 if (sbreserve(optname == SO_SNDBUF ? 906 &so->so_snd : &so->so_rcv, 907 (u_long) *mtod(m, int *)) == 0) { 908 error = ENOBUFS; 909 goto bad; 910 } 911 break; 912 913 case SO_SNDLOWAT: 914 so->so_snd.sb_lowat = *mtod(m, int *); 915 break; 916 case SO_RCVLOWAT: 917 so->so_rcv.sb_lowat = *mtod(m, int *); 918 break; 919 } 920 break; 921 922 case SO_SNDTIMEO: 923 case SO_RCVTIMEO: 924 { 925 struct timeval *tv; 926 short val; 927 928 if (m == NULL || m->m_len < sizeof (*tv)) { 929 error = EINVAL; 930 goto bad; 931 } 932 tv = mtod(m, struct timeval *); 933 if (tv->tv_sec > SHRT_MAX / hz - hz) { 934 error = EDOM; 935 goto bad; 936 } 937 val = tv->tv_sec * hz + tv->tv_usec / tick; 938 939 switch (optname) { 940 941 case SO_SNDTIMEO: 942 so->so_snd.sb_timeo = val; 943 break; 944 case SO_RCVTIMEO: 945 so->so_rcv.sb_timeo = val; 946 break; 947 } 948 break; 949 } 950 951 case SO_PRIVSTATE: 952 /* we don't care what the parameter is... */ 953 so->so_state &= ~SS_PRIV; 954 break; 955 956 default: 957 error = ENOPROTOOPT; 958 break; 959 } 960 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 961 (void) ((*so->so_proto->pr_ctloutput) 962 (PRCO_SETOPT, so, level, optname, &m0)); 963 m = NULL; /* freed by protocol */ 964 } 965 } 966 bad: 967 if (m) 968 (void) m_free(m); 969 return (error); 970 } 971 972 int 973 sogetopt(so, level, optname, mp) 974 register struct socket *so; 975 int level, optname; 976 struct mbuf **mp; 977 { 978 register struct mbuf *m; 979 980 if (level != SOL_SOCKET) { 981 if (so->so_proto && so->so_proto->pr_ctloutput) { 982 return ((*so->so_proto->pr_ctloutput) 983 (PRCO_GETOPT, so, level, optname, mp)); 984 } else 985 return (ENOPROTOOPT); 986 } else { 987 m = m_get(M_WAIT, MT_SOOPTS); 988 m->m_len = sizeof (int); 989 990 switch (optname) { 991 992 case SO_LINGER: 993 m->m_len = sizeof (struct linger); 994 mtod(m, struct linger *)->l_onoff = 995 so->so_options & SO_LINGER; 996 mtod(m, struct linger *)->l_linger = so->so_linger; 997 break; 998 999 case SO_USELOOPBACK: 1000 case SO_DONTROUTE: 1001 case SO_DEBUG: 1002 case SO_KEEPALIVE: 1003 case SO_REUSEADDR: 1004 case SO_REUSEPORT: 1005 case SO_BROADCAST: 1006 case SO_OOBINLINE: 1007 case SO_TIMESTAMP: 1008 *mtod(m, int *) = so->so_options & optname; 1009 break; 1010 1011 case SO_PRIVSTATE: 1012 *mtod(m, int *) = so->so_state & SS_PRIV; 1013 break; 1014 1015 case SO_TYPE: 1016 *mtod(m, int *) = so->so_type; 1017 break; 1018 1019 case SO_ERROR: 1020 *mtod(m, int *) = so->so_error; 1021 so->so_error = 0; 1022 break; 1023 1024 case SO_SNDBUF: 1025 *mtod(m, int *) = so->so_snd.sb_hiwat; 1026 break; 1027 1028 case SO_RCVBUF: 1029 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1030 break; 1031 1032 case SO_SNDLOWAT: 1033 *mtod(m, int *) = so->so_snd.sb_lowat; 1034 break; 1035 1036 case SO_RCVLOWAT: 1037 *mtod(m, int *) = so->so_rcv.sb_lowat; 1038 break; 1039 1040 case SO_SNDTIMEO: 1041 case SO_RCVTIMEO: 1042 { 1043 int val = (optname == SO_SNDTIMEO ? 1044 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1045 1046 m->m_len = sizeof(struct timeval); 1047 mtod(m, struct timeval *)->tv_sec = val / hz; 1048 mtod(m, struct timeval *)->tv_usec = 1049 (val % hz) * tick; 1050 break; 1051 } 1052 1053 default: 1054 (void)m_free(m); 1055 return (ENOPROTOOPT); 1056 } 1057 *mp = m; 1058 return (0); 1059 } 1060 } 1061 1062 void 1063 sohasoutofband(so) 1064 register struct socket *so; 1065 { 1066 struct proc *p; 1067 1068 if (so->so_pgid < 0) 1069 gsignal(-so->so_pgid, SIGURG); 1070 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1071 psignal(p, SIGURG); 1072 selwakeup(&so->so_rcv.sb_sel); 1073 } 1074