1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.11 1995/08/25 20:27:46 bde Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/proc.h> 40 #include <sys/file.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/domain.h> 44 #include <sys/kernel.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/resourcevar.h> 49 #include <sys/signalvar.h> 50 #include <sys/sysctl.h> 51 52 static int somaxconn = SOMAXCONN; 53 SYSCTL_INT(_kern, KERN_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); 54 55 /* 56 * Socket operation routines. 57 * These routines are called by the routines in 58 * sys_socket.c or from a system process, and 59 * implement the semantics of socket operations by 60 * switching out to the protocol specific routines. 61 */ 62 /*ARGSUSED*/ 63 int 64 socreate(dom, aso, type, proto) 65 int dom; 66 struct socket **aso; 67 register int type; 68 int proto; 69 { 70 struct proc *p = curproc; /* XXX */ 71 register struct protosw *prp; 72 register struct socket *so; 73 register int error; 74 75 if (proto) 76 prp = pffindproto(dom, proto, type); 77 else 78 prp = pffindtype(dom, type); 79 if (prp == 0 || prp->pr_usrreq == 0) 80 return (EPROTONOSUPPORT); 81 if (prp->pr_type != type) 82 return (EPROTOTYPE); 83 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 84 bzero((caddr_t)so, sizeof(*so)); 85 so->so_type = type; 86 if (p->p_ucred->cr_uid == 0) 87 so->so_state = SS_PRIV; 88 so->so_proto = prp; 89 error = 90 (*prp->pr_usrreq)(so, PRU_ATTACH, 91 (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0); 92 if (error) { 93 so->so_state |= SS_NOFDREF; 94 sofree(so); 95 return (error); 96 } 97 *aso = so; 98 return (0); 99 } 100 101 int 102 sobind(so, nam) 103 struct socket *so; 104 struct mbuf *nam; 105 { 106 int s = splnet(); 107 int error; 108 109 error = 110 (*so->so_proto->pr_usrreq)(so, PRU_BIND, 111 (struct mbuf *)0, nam, (struct mbuf *)0); 112 splx(s); 113 return (error); 114 } 115 116 int 117 solisten(so, backlog) 118 register struct socket *so; 119 int backlog; 120 { 121 int s = splnet(), error; 122 123 error = 124 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, 125 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); 126 if (error) { 127 splx(s); 128 return (error); 129 } 130 if (so->so_q == 0) 131 so->so_options |= SO_ACCEPTCONN; 132 if (backlog < 0 || backlog > somaxconn) 133 backlog = somaxconn; 134 so->so_qlimit = backlog; 135 splx(s); 136 return (0); 137 } 138 139 void 140 sofree(so) 141 register struct socket *so; 142 { 143 144 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 145 return; 146 if (so->so_head) { 147 if (!soqremque(so, 0) && !soqremque(so, 1)) 148 panic("sofree dq"); 149 so->so_head = 0; 150 } 151 sbrelease(&so->so_snd); 152 sorflush(so); 153 FREE(so, M_SOCKET); 154 } 155 156 /* 157 * Close a socket on last file table reference removal. 158 * Initiate disconnect if connected. 159 * Free socket when disconnect complete. 160 */ 161 int 162 soclose(so) 163 register struct socket *so; 164 { 165 int s = splnet(); /* conservative */ 166 int error = 0; 167 168 if (so->so_options & SO_ACCEPTCONN) { 169 while (so->so_q0) 170 (void) soabort(so->so_q0); 171 while (so->so_q) 172 (void) soabort(so->so_q); 173 } 174 if (so->so_pcb == 0) 175 goto discard; 176 if (so->so_state & SS_ISCONNECTED) { 177 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 178 error = sodisconnect(so); 179 if (error) 180 goto drop; 181 } 182 if (so->so_options & SO_LINGER) { 183 if ((so->so_state & SS_ISDISCONNECTING) && 184 (so->so_state & SS_NBIO)) 185 goto drop; 186 while (so->so_state & SS_ISCONNECTED) { 187 error = tsleep((caddr_t)&so->so_timeo, 188 PSOCK | PCATCH, netcls, so->so_linger); 189 if (error) 190 break; 191 } 192 } 193 } 194 drop: 195 if (so->so_pcb) { 196 int error2 = 197 (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 198 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); 199 if (error == 0) 200 error = error2; 201 } 202 discard: 203 if (so->so_state & SS_NOFDREF) 204 panic("soclose: NOFDREF"); 205 so->so_state |= SS_NOFDREF; 206 sofree(so); 207 splx(s); 208 return (error); 209 } 210 211 /* 212 * Must be called at splnet... 213 */ 214 int 215 soabort(so) 216 struct socket *so; 217 { 218 219 return ( 220 (*so->so_proto->pr_usrreq)(so, PRU_ABORT, 221 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); 222 } 223 224 int 225 soaccept(so, nam) 226 register struct socket *so; 227 struct mbuf *nam; 228 { 229 int s = splnet(); 230 int error; 231 232 if ((so->so_state & SS_NOFDREF) == 0) 233 panic("soaccept: !NOFDREF"); 234 so->so_state &= ~SS_NOFDREF; 235 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 236 (struct mbuf *)0, nam, (struct mbuf *)0); 237 splx(s); 238 return (error); 239 } 240 241 int 242 soconnect(so, nam) 243 register struct socket *so; 244 struct mbuf *nam; 245 { 246 int s; 247 int error; 248 249 if (so->so_options & SO_ACCEPTCONN) 250 return (EOPNOTSUPP); 251 s = splnet(); 252 /* 253 * If protocol is connection-based, can only connect once. 254 * Otherwise, if connected, try to disconnect first. 255 * This allows user to disconnect by connecting to, e.g., 256 * a null address. 257 */ 258 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 259 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 260 (error = sodisconnect(so)))) 261 error = EISCONN; 262 else 263 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 264 (struct mbuf *)0, nam, (struct mbuf *)0); 265 splx(s); 266 return (error); 267 } 268 269 int 270 soconnect2(so1, so2) 271 register struct socket *so1; 272 struct socket *so2; 273 { 274 int s = splnet(); 275 int error; 276 277 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 278 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0); 279 splx(s); 280 return (error); 281 } 282 283 int 284 sodisconnect(so) 285 register struct socket *so; 286 { 287 int s = splnet(); 288 int error; 289 290 if ((so->so_state & SS_ISCONNECTED) == 0) { 291 error = ENOTCONN; 292 goto bad; 293 } 294 if (so->so_state & SS_ISDISCONNECTING) { 295 error = EALREADY; 296 goto bad; 297 } 298 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 299 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); 300 bad: 301 splx(s); 302 return (error); 303 } 304 305 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 306 /* 307 * Send on a socket. 308 * If send must go all at once and message is larger than 309 * send buffering, then hard error. 310 * Lock against other senders. 311 * If must go all at once and not enough room now, then 312 * inform user that this would block and do nothing. 313 * Otherwise, if nonblocking, send as much as possible. 314 * The data to be sent is described by "uio" if nonzero, 315 * otherwise by the mbuf chain "top" (which must be null 316 * if uio is not). Data provided in mbuf chain must be small 317 * enough to send all at once. 318 * 319 * Returns nonzero on error, timeout or signal; callers 320 * must check for short counts if EINTR/ERESTART are returned. 321 * Data and control buffers are freed on return. 322 */ 323 int 324 sosend(so, addr, uio, top, control, flags) 325 register struct socket *so; 326 struct mbuf *addr; 327 struct uio *uio; 328 struct mbuf *top; 329 struct mbuf *control; 330 int flags; 331 { 332 struct proc *p = curproc; /* XXX */ 333 struct mbuf **mp; 334 register struct mbuf *m; 335 register long space, len, resid; 336 int clen = 0, error, s, dontroute, mlen; 337 int atomic = sosendallatonce(so) || top; 338 339 if (uio) 340 resid = uio->uio_resid; 341 else 342 resid = top->m_pkthdr.len; 343 /* 344 * In theory resid should be unsigned. 345 * However, space must be signed, as it might be less than 0 346 * if we over-committed, and we must use a signed comparison 347 * of space and resid. On the other hand, a negative resid 348 * causes us to loop sending 0-length segments to the protocol. 349 */ 350 if (resid < 0) 351 return (EINVAL); 352 dontroute = 353 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 354 (so->so_proto->pr_flags & PR_ATOMIC); 355 p->p_stats->p_ru.ru_msgsnd++; 356 if (control) 357 clen = control->m_len; 358 #define snderr(errno) { error = errno; splx(s); goto release; } 359 360 restart: 361 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 362 if (error) 363 goto out; 364 do { 365 s = splnet(); 366 if (so->so_state & SS_CANTSENDMORE) 367 snderr(EPIPE); 368 if (so->so_error) 369 snderr(so->so_error); 370 if ((so->so_state & SS_ISCONNECTED) == 0) { 371 /* 372 * `sendto' and `sendmsg' is allowed on a connection- 373 * based socket if it supports implied connect. 374 * Return ENOTCONN if not connected and no address is 375 * supplied. 376 */ 377 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 378 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 379 if ((so->so_state & SS_ISCONFIRMING) == 0 && 380 !(resid == 0 && clen != 0)) 381 snderr(ENOTCONN); 382 } else if (addr == 0) 383 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 384 ENOTCONN : EDESTADDRREQ); 385 } 386 space = sbspace(&so->so_snd); 387 if (flags & MSG_OOB) 388 space += 1024; 389 if ((atomic && resid > so->so_snd.sb_hiwat) || 390 clen > so->so_snd.sb_hiwat) 391 snderr(EMSGSIZE); 392 if (space < resid + clen && uio && 393 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 394 if (so->so_state & SS_NBIO) 395 snderr(EWOULDBLOCK); 396 sbunlock(&so->so_snd); 397 error = sbwait(&so->so_snd); 398 splx(s); 399 if (error) 400 goto out; 401 goto restart; 402 } 403 splx(s); 404 mp = ⊤ 405 space -= clen; 406 do { 407 if (uio == NULL) { 408 /* 409 * Data is prepackaged in "top". 410 */ 411 resid = 0; 412 if (flags & MSG_EOR) 413 top->m_flags |= M_EOR; 414 } else do { 415 if (top == 0) { 416 MGETHDR(m, M_WAIT, MT_DATA); 417 mlen = MHLEN; 418 m->m_pkthdr.len = 0; 419 m->m_pkthdr.rcvif = (struct ifnet *)0; 420 } else { 421 MGET(m, M_WAIT, MT_DATA); 422 mlen = MLEN; 423 } 424 if (resid >= MINCLSIZE) { 425 MCLGET(m, M_WAIT); 426 if ((m->m_flags & M_EXT) == 0) 427 goto nopages; 428 mlen = MCLBYTES; 429 len = min(min(mlen, resid), space); 430 } else { 431 nopages: 432 len = min(min(mlen, resid), space); 433 /* 434 * For datagram protocols, leave room 435 * for protocol headers in first mbuf. 436 */ 437 if (atomic && top == 0 && len < mlen) 438 MH_ALIGN(m, len); 439 } 440 space -= len; 441 error = uiomove(mtod(m, caddr_t), (int)len, uio); 442 resid = uio->uio_resid; 443 m->m_len = len; 444 *mp = m; 445 top->m_pkthdr.len += len; 446 if (error) 447 goto release; 448 mp = &m->m_next; 449 if (resid <= 0) { 450 if (flags & MSG_EOR) 451 top->m_flags |= M_EOR; 452 break; 453 } 454 } while (space > 0 && atomic); 455 if (dontroute) 456 so->so_options |= SO_DONTROUTE; 457 s = splnet(); /* XXX */ 458 error = (*so->so_proto->pr_usrreq)(so, 459 (flags & MSG_OOB) ? PRU_SENDOOB : 460 /* 461 * If the user set MSG_EOF, the protocol 462 * understands this flag and nothing left to 463 * send then use PRU_SEND_EOF instead of PRU_SEND. 464 */ 465 ((flags & MSG_EOF) && 466 (so->so_proto->pr_flags & PR_IMPLOPCL) && 467 (resid <= 0)) ? 468 PRU_SEND_EOF : PRU_SEND, 469 top, addr, control); 470 splx(s); 471 if (dontroute) 472 so->so_options &= ~SO_DONTROUTE; 473 clen = 0; 474 control = 0; 475 top = 0; 476 mp = ⊤ 477 if (error) 478 goto release; 479 } while (resid && space > 0); 480 } while (resid); 481 482 release: 483 sbunlock(&so->so_snd); 484 out: 485 if (top) 486 m_freem(top); 487 if (control) 488 m_freem(control); 489 return (error); 490 } 491 492 /* 493 * Implement receive operations on a socket. 494 * We depend on the way that records are added to the sockbuf 495 * by sbappend*. In particular, each record (mbufs linked through m_next) 496 * must begin with an address if the protocol so specifies, 497 * followed by an optional mbuf or mbufs containing ancillary data, 498 * and then zero or more mbufs of data. 499 * In order to avoid blocking network interrupts for the entire time here, 500 * we splx() while doing the actual copy to user space. 501 * Although the sockbuf is locked, new data may still be appended, 502 * and thus we must maintain consistency of the sockbuf during that time. 503 * 504 * The caller may receive the data as a single mbuf chain by supplying 505 * an mbuf **mp0 for use in returning the chain. The uio is then used 506 * only for the count in uio_resid. 507 */ 508 int 509 soreceive(so, paddr, uio, mp0, controlp, flagsp) 510 register struct socket *so; 511 struct mbuf **paddr; 512 struct uio *uio; 513 struct mbuf **mp0; 514 struct mbuf **controlp; 515 int *flagsp; 516 { 517 register struct mbuf *m, **mp; 518 register int flags, len, error, s, offset; 519 struct protosw *pr = so->so_proto; 520 struct mbuf *nextrecord; 521 int moff, type = 0; 522 int orig_resid = uio->uio_resid; 523 524 mp = mp0; 525 if (paddr) 526 *paddr = 0; 527 if (controlp) 528 *controlp = 0; 529 if (flagsp) 530 flags = *flagsp &~ MSG_EOR; 531 else 532 flags = 0; 533 if (flags & MSG_OOB) { 534 m = m_get(M_WAIT, MT_DATA); 535 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, 536 m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0); 537 if (error) 538 goto bad; 539 do { 540 error = uiomove(mtod(m, caddr_t), 541 (int) min(uio->uio_resid, m->m_len), uio); 542 m = m_free(m); 543 } while (uio->uio_resid && error == 0 && m); 544 bad: 545 if (m) 546 m_freem(m); 547 return (error); 548 } 549 if (mp) 550 *mp = (struct mbuf *)0; 551 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 552 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 553 (struct mbuf *)0, (struct mbuf *)0); 554 555 restart: 556 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 557 if (error) 558 return (error); 559 s = splnet(); 560 561 m = so->so_rcv.sb_mb; 562 /* 563 * If we have less data than requested, block awaiting more 564 * (subject to any timeout) if: 565 * 1. the current count is less than the low water mark, or 566 * 2. MSG_WAITALL is set, and it is possible to do the entire 567 * receive operation at once if we block (resid <= hiwat). 568 * 3. MSG_DONTWAIT is not set 569 * If MSG_WAITALL is set but resid is larger than the receive buffer, 570 * we have to do the receive in sections, and thus risk returning 571 * a short count if a timeout or signal occurs after we start. 572 */ 573 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 574 so->so_rcv.sb_cc < uio->uio_resid) && 575 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 576 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 577 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 578 #ifdef DIAGNOSTIC 579 if (m == 0 && so->so_rcv.sb_cc) 580 panic("receive 1"); 581 #endif 582 if (so->so_error) { 583 if (m) 584 goto dontblock; 585 error = so->so_error; 586 if ((flags & MSG_PEEK) == 0) 587 so->so_error = 0; 588 goto release; 589 } 590 if (so->so_state & SS_CANTRCVMORE) { 591 if (m) 592 goto dontblock; 593 else 594 goto release; 595 } 596 for (; m; m = m->m_next) 597 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 598 m = so->so_rcv.sb_mb; 599 goto dontblock; 600 } 601 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 602 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 603 error = ENOTCONN; 604 goto release; 605 } 606 if (uio->uio_resid == 0) 607 goto release; 608 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 609 error = EWOULDBLOCK; 610 goto release; 611 } 612 sbunlock(&so->so_rcv); 613 error = sbwait(&so->so_rcv); 614 splx(s); 615 if (error) 616 return (error); 617 goto restart; 618 } 619 dontblock: 620 if (uio->uio_procp) 621 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 622 nextrecord = m->m_nextpkt; 623 if (pr->pr_flags & PR_ADDR) { 624 #ifdef DIAGNOSTIC 625 if (m->m_type != MT_SONAME) 626 panic("receive 1a"); 627 #endif 628 orig_resid = 0; 629 if (flags & MSG_PEEK) { 630 if (paddr) 631 *paddr = m_copy(m, 0, m->m_len); 632 m = m->m_next; 633 } else { 634 sbfree(&so->so_rcv, m); 635 if (paddr) { 636 *paddr = m; 637 so->so_rcv.sb_mb = m->m_next; 638 m->m_next = 0; 639 m = so->so_rcv.sb_mb; 640 } else { 641 MFREE(m, so->so_rcv.sb_mb); 642 m = so->so_rcv.sb_mb; 643 } 644 } 645 } 646 while (m && m->m_type == MT_CONTROL && error == 0) { 647 if (flags & MSG_PEEK) { 648 if (controlp) 649 *controlp = m_copy(m, 0, m->m_len); 650 m = m->m_next; 651 } else { 652 sbfree(&so->so_rcv, m); 653 if (controlp) { 654 if (pr->pr_domain->dom_externalize && 655 mtod(m, struct cmsghdr *)->cmsg_type == 656 SCM_RIGHTS) 657 error = (*pr->pr_domain->dom_externalize)(m); 658 *controlp = m; 659 so->so_rcv.sb_mb = m->m_next; 660 m->m_next = 0; 661 m = so->so_rcv.sb_mb; 662 } else { 663 MFREE(m, so->so_rcv.sb_mb); 664 m = so->so_rcv.sb_mb; 665 } 666 } 667 if (controlp) { 668 orig_resid = 0; 669 controlp = &(*controlp)->m_next; 670 } 671 } 672 if (m) { 673 if ((flags & MSG_PEEK) == 0) 674 m->m_nextpkt = nextrecord; 675 type = m->m_type; 676 if (type == MT_OOBDATA) 677 flags |= MSG_OOB; 678 } 679 moff = 0; 680 offset = 0; 681 while (m && uio->uio_resid > 0 && error == 0) { 682 if (m->m_type == MT_OOBDATA) { 683 if (type != MT_OOBDATA) 684 break; 685 } else if (type == MT_OOBDATA) 686 break; 687 #ifdef DIAGNOSTIC 688 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 689 panic("receive 3"); 690 #endif 691 so->so_state &= ~SS_RCVATMARK; 692 len = uio->uio_resid; 693 if (so->so_oobmark && len > so->so_oobmark - offset) 694 len = so->so_oobmark - offset; 695 if (len > m->m_len - moff) 696 len = m->m_len - moff; 697 /* 698 * If mp is set, just pass back the mbufs. 699 * Otherwise copy them out via the uio, then free. 700 * Sockbuf must be consistent here (points to current mbuf, 701 * it points to next record) when we drop priority; 702 * we must note any additions to the sockbuf when we 703 * block interrupts again. 704 */ 705 if (mp == 0) { 706 splx(s); 707 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 708 s = splnet(); 709 } else 710 uio->uio_resid -= len; 711 if (len == m->m_len - moff) { 712 if (m->m_flags & M_EOR) 713 flags |= MSG_EOR; 714 if (flags & MSG_PEEK) { 715 m = m->m_next; 716 moff = 0; 717 } else { 718 nextrecord = m->m_nextpkt; 719 sbfree(&so->so_rcv, m); 720 if (mp) { 721 *mp = m; 722 mp = &m->m_next; 723 so->so_rcv.sb_mb = m = m->m_next; 724 *mp = (struct mbuf *)0; 725 } else { 726 MFREE(m, so->so_rcv.sb_mb); 727 m = so->so_rcv.sb_mb; 728 } 729 if (m) 730 m->m_nextpkt = nextrecord; 731 } 732 } else { 733 if (flags & MSG_PEEK) 734 moff += len; 735 else { 736 if (mp) 737 *mp = m_copym(m, 0, len, M_WAIT); 738 m->m_data += len; 739 m->m_len -= len; 740 so->so_rcv.sb_cc -= len; 741 } 742 } 743 if (so->so_oobmark) { 744 if ((flags & MSG_PEEK) == 0) { 745 so->so_oobmark -= len; 746 if (so->so_oobmark == 0) { 747 so->so_state |= SS_RCVATMARK; 748 break; 749 } 750 } else { 751 offset += len; 752 if (offset == so->so_oobmark) 753 break; 754 } 755 } 756 if (flags & MSG_EOR) 757 break; 758 /* 759 * If the MSG_WAITALL flag is set (for non-atomic socket), 760 * we must not quit until "uio->uio_resid == 0" or an error 761 * termination. If a signal/timeout occurs, return 762 * with a short count but without error. 763 * Keep sockbuf locked against other readers. 764 */ 765 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 766 !sosendallatonce(so) && !nextrecord) { 767 if (so->so_error || so->so_state & SS_CANTRCVMORE) 768 break; 769 error = sbwait(&so->so_rcv); 770 if (error) { 771 sbunlock(&so->so_rcv); 772 splx(s); 773 return (0); 774 } 775 m = so->so_rcv.sb_mb; 776 if (m) 777 nextrecord = m->m_nextpkt; 778 } 779 } 780 781 if (m && pr->pr_flags & PR_ATOMIC) { 782 flags |= MSG_TRUNC; 783 if ((flags & MSG_PEEK) == 0) 784 (void) sbdroprecord(&so->so_rcv); 785 } 786 if ((flags & MSG_PEEK) == 0) { 787 if (m == 0) 788 so->so_rcv.sb_mb = nextrecord; 789 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 790 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 791 (struct mbuf *)flags, (struct mbuf *)0); 792 } 793 if (orig_resid == uio->uio_resid && orig_resid && 794 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 795 sbunlock(&so->so_rcv); 796 splx(s); 797 goto restart; 798 } 799 800 if (flagsp) 801 *flagsp |= flags; 802 release: 803 sbunlock(&so->so_rcv); 804 splx(s); 805 return (error); 806 } 807 808 int 809 soshutdown(so, how) 810 register struct socket *so; 811 register int how; 812 { 813 register struct protosw *pr = so->so_proto; 814 815 how++; 816 if (how & FREAD) 817 sorflush(so); 818 if (how & FWRITE) 819 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN, 820 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); 821 return (0); 822 } 823 824 void 825 sorflush(so) 826 register struct socket *so; 827 { 828 register struct sockbuf *sb = &so->so_rcv; 829 register struct protosw *pr = so->so_proto; 830 register int s; 831 struct sockbuf asb; 832 833 sb->sb_flags |= SB_NOINTR; 834 (void) sblock(sb, M_WAITOK); 835 s = splimp(); 836 socantrcvmore(so); 837 sbunlock(sb); 838 asb = *sb; 839 bzero((caddr_t)sb, sizeof (*sb)); 840 splx(s); 841 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 842 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 843 sbrelease(&asb); 844 } 845 846 int 847 sosetopt(so, level, optname, m0) 848 register struct socket *so; 849 int level, optname; 850 struct mbuf *m0; 851 { 852 int error = 0; 853 register struct mbuf *m = m0; 854 855 if (level != SOL_SOCKET) { 856 if (so->so_proto && so->so_proto->pr_ctloutput) 857 return ((*so->so_proto->pr_ctloutput) 858 (PRCO_SETOPT, so, level, optname, &m0)); 859 error = ENOPROTOOPT; 860 } else { 861 switch (optname) { 862 863 case SO_LINGER: 864 if (m == NULL || m->m_len != sizeof (struct linger)) { 865 error = EINVAL; 866 goto bad; 867 } 868 so->so_linger = mtod(m, struct linger *)->l_linger; 869 /* fall thru... */ 870 871 case SO_DEBUG: 872 case SO_KEEPALIVE: 873 case SO_DONTROUTE: 874 case SO_USELOOPBACK: 875 case SO_BROADCAST: 876 case SO_REUSEADDR: 877 case SO_REUSEPORT: 878 case SO_OOBINLINE: 879 if (m == NULL || m->m_len < sizeof (int)) { 880 error = EINVAL; 881 goto bad; 882 } 883 if (*mtod(m, int *)) 884 so->so_options |= optname; 885 else 886 so->so_options &= ~optname; 887 break; 888 889 case SO_SNDBUF: 890 case SO_RCVBUF: 891 case SO_SNDLOWAT: 892 case SO_RCVLOWAT: 893 if (m == NULL || m->m_len < sizeof (int)) { 894 error = EINVAL; 895 goto bad; 896 } 897 switch (optname) { 898 899 case SO_SNDBUF: 900 case SO_RCVBUF: 901 if (sbreserve(optname == SO_SNDBUF ? 902 &so->so_snd : &so->so_rcv, 903 (u_long) *mtod(m, int *)) == 0) { 904 error = ENOBUFS; 905 goto bad; 906 } 907 break; 908 909 case SO_SNDLOWAT: 910 so->so_snd.sb_lowat = *mtod(m, int *); 911 break; 912 case SO_RCVLOWAT: 913 so->so_rcv.sb_lowat = *mtod(m, int *); 914 break; 915 } 916 break; 917 918 case SO_SNDTIMEO: 919 case SO_RCVTIMEO: 920 { 921 struct timeval *tv; 922 short val; 923 924 if (m == NULL || m->m_len < sizeof (*tv)) { 925 error = EINVAL; 926 goto bad; 927 } 928 tv = mtod(m, struct timeval *); 929 if (tv->tv_sec > SHRT_MAX / hz - hz) { 930 error = EDOM; 931 goto bad; 932 } 933 val = tv->tv_sec * hz + tv->tv_usec / tick; 934 935 switch (optname) { 936 937 case SO_SNDTIMEO: 938 so->so_snd.sb_timeo = val; 939 break; 940 case SO_RCVTIMEO: 941 so->so_rcv.sb_timeo = val; 942 break; 943 } 944 break; 945 } 946 947 default: 948 error = ENOPROTOOPT; 949 break; 950 } 951 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 952 (void) ((*so->so_proto->pr_ctloutput) 953 (PRCO_SETOPT, so, level, optname, &m0)); 954 m = NULL; /* freed by protocol */ 955 } 956 } 957 bad: 958 if (m) 959 (void) m_free(m); 960 return (error); 961 } 962 963 int 964 sogetopt(so, level, optname, mp) 965 register struct socket *so; 966 int level, optname; 967 struct mbuf **mp; 968 { 969 register struct mbuf *m; 970 971 if (level != SOL_SOCKET) { 972 if (so->so_proto && so->so_proto->pr_ctloutput) { 973 return ((*so->so_proto->pr_ctloutput) 974 (PRCO_GETOPT, so, level, optname, mp)); 975 } else 976 return (ENOPROTOOPT); 977 } else { 978 m = m_get(M_WAIT, MT_SOOPTS); 979 m->m_len = sizeof (int); 980 981 switch (optname) { 982 983 case SO_LINGER: 984 m->m_len = sizeof (struct linger); 985 mtod(m, struct linger *)->l_onoff = 986 so->so_options & SO_LINGER; 987 mtod(m, struct linger *)->l_linger = so->so_linger; 988 break; 989 990 case SO_USELOOPBACK: 991 case SO_DONTROUTE: 992 case SO_DEBUG: 993 case SO_KEEPALIVE: 994 case SO_REUSEADDR: 995 case SO_REUSEPORT: 996 case SO_BROADCAST: 997 case SO_OOBINLINE: 998 *mtod(m, int *) = so->so_options & optname; 999 break; 1000 1001 case SO_TYPE: 1002 *mtod(m, int *) = so->so_type; 1003 break; 1004 1005 case SO_ERROR: 1006 *mtod(m, int *) = so->so_error; 1007 so->so_error = 0; 1008 break; 1009 1010 case SO_SNDBUF: 1011 *mtod(m, int *) = so->so_snd.sb_hiwat; 1012 break; 1013 1014 case SO_RCVBUF: 1015 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1016 break; 1017 1018 case SO_SNDLOWAT: 1019 *mtod(m, int *) = so->so_snd.sb_lowat; 1020 break; 1021 1022 case SO_RCVLOWAT: 1023 *mtod(m, int *) = so->so_rcv.sb_lowat; 1024 break; 1025 1026 case SO_SNDTIMEO: 1027 case SO_RCVTIMEO: 1028 { 1029 int val = (optname == SO_SNDTIMEO ? 1030 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1031 1032 m->m_len = sizeof(struct timeval); 1033 mtod(m, struct timeval *)->tv_sec = val / hz; 1034 mtod(m, struct timeval *)->tv_usec = 1035 (val % hz) * tick; 1036 break; 1037 } 1038 1039 default: 1040 (void)m_free(m); 1041 return (ENOPROTOOPT); 1042 } 1043 *mp = m; 1044 return (0); 1045 } 1046 } 1047 1048 void 1049 sohasoutofband(so) 1050 register struct socket *so; 1051 { 1052 struct proc *p; 1053 1054 if (so->so_pgid < 0) 1055 gsignal(-so->so_pgid, SIGURG); 1056 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1057 psignal(p, SIGURG); 1058 selwakeup(&so->so_rcv.sb_sel); 1059 } 1060