1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.26 1997/04/27 20:00:44 wollman Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/queue.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/fcntl.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 53 static int somaxconn = SOMAXCONN; 54 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 55 0, ""); 56 57 /* 58 * Socket operation routines. 59 * These routines are called by the routines in 60 * sys_socket.c or from a system process, and 61 * implement the semantics of socket operations by 62 * switching out to the protocol specific routines. 63 */ 64 /*ARGSUSED*/ 65 int 66 socreate(dom, aso, type, proto, p) 67 int dom; 68 struct socket **aso; 69 register int type; 70 int proto; 71 struct proc *p; 72 { 73 register struct protosw *prp; 74 register struct socket *so; 75 register int error; 76 77 if (proto) 78 prp = pffindproto(dom, proto, type); 79 else 80 prp = pffindtype(dom, type); 81 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 82 return (EPROTONOSUPPORT); 83 if (prp->pr_type != type) 84 return (EPROTOTYPE); 85 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 86 bzero((caddr_t)so, sizeof(*so)); 87 TAILQ_INIT(&so->so_incomp); 88 TAILQ_INIT(&so->so_comp); 89 so->so_type = type; 90 so->so_proto = prp; 91 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 92 if (error) { 93 so->so_state |= SS_NOFDREF; 94 sofree(so); 95 return (error); 96 } 97 *aso = so; 98 return (0); 99 } 100 101 int 102 sobind(so, nam, p) 103 struct socket *so; 104 struct mbuf *nam; 105 struct proc *p; 106 { 107 int s = splnet(); 108 int error; 109 110 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 111 splx(s); 112 return (error); 113 } 114 115 int 116 solisten(so, backlog, p) 117 register struct socket *so; 118 int backlog; 119 struct proc *p; 120 { 121 int s = splnet(), error; 122 123 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 124 if (error) { 125 splx(s); 126 return (error); 127 } 128 if (so->so_comp.tqh_first == NULL) 129 so->so_options |= SO_ACCEPTCONN; 130 if (backlog < 0 || backlog > somaxconn) 131 backlog = somaxconn; 132 so->so_qlimit = backlog; 133 splx(s); 134 return (0); 135 } 136 137 void 138 sofree(so) 139 register struct socket *so; 140 { 141 struct socket *head = so->so_head; 142 143 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 144 return; 145 if (head != NULL) { 146 if (so->so_state & SS_INCOMP) { 147 TAILQ_REMOVE(&head->so_incomp, so, so_list); 148 head->so_incqlen--; 149 } else if (so->so_state & SS_COMP) { 150 TAILQ_REMOVE(&head->so_comp, so, so_list); 151 } else { 152 panic("sofree: not queued"); 153 } 154 head->so_qlen--; 155 so->so_state &= ~(SS_INCOMP|SS_COMP); 156 so->so_head = NULL; 157 } 158 sbrelease(&so->so_snd); 159 sorflush(so); 160 FREE(so, M_SOCKET); 161 } 162 163 /* 164 * Close a socket on last file table reference removal. 165 * Initiate disconnect if connected. 166 * Free socket when disconnect complete. 167 */ 168 int 169 soclose(so) 170 register struct socket *so; 171 { 172 int s = splnet(); /* conservative */ 173 int error = 0; 174 175 if (so->so_options & SO_ACCEPTCONN) { 176 struct socket *sp, *sonext; 177 178 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 179 sonext = sp->so_list.tqe_next; 180 (void) soabort(sp); 181 } 182 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 183 sonext = sp->so_list.tqe_next; 184 (void) soabort(sp); 185 } 186 } 187 if (so->so_pcb == 0) 188 goto discard; 189 if (so->so_state & SS_ISCONNECTED) { 190 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 191 error = sodisconnect(so); 192 if (error) 193 goto drop; 194 } 195 if (so->so_options & SO_LINGER) { 196 if ((so->so_state & SS_ISDISCONNECTING) && 197 (so->so_state & SS_NBIO)) 198 goto drop; 199 while (so->so_state & SS_ISCONNECTED) { 200 error = tsleep((caddr_t)&so->so_timeo, 201 PSOCK | PCATCH, "soclos", so->so_linger); 202 if (error) 203 break; 204 } 205 } 206 } 207 drop: 208 if (so->so_pcb) { 209 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 210 if (error == 0) 211 error = error2; 212 } 213 discard: 214 if (so->so_state & SS_NOFDREF) 215 panic("soclose: NOFDREF"); 216 so->so_state |= SS_NOFDREF; 217 sofree(so); 218 splx(s); 219 return (error); 220 } 221 222 /* 223 * Must be called at splnet... 224 */ 225 int 226 soabort(so) 227 struct socket *so; 228 { 229 230 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 231 } 232 233 int 234 soaccept(so, nam) 235 register struct socket *so; 236 struct mbuf *nam; 237 { 238 int s = splnet(); 239 int error; 240 241 if ((so->so_state & SS_NOFDREF) == 0) 242 panic("soaccept: !NOFDREF"); 243 so->so_state &= ~SS_NOFDREF; 244 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 245 splx(s); 246 return (error); 247 } 248 249 int 250 soconnect(so, nam, p) 251 register struct socket *so; 252 struct mbuf *nam; 253 struct proc *p; 254 { 255 int s; 256 int error; 257 258 if (so->so_options & SO_ACCEPTCONN) 259 return (EOPNOTSUPP); 260 s = splnet(); 261 /* 262 * If protocol is connection-based, can only connect once. 263 * Otherwise, if connected, try to disconnect first. 264 * This allows user to disconnect by connecting to, e.g., 265 * a null address. 266 */ 267 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 268 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 269 (error = sodisconnect(so)))) 270 error = EISCONN; 271 else 272 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 273 splx(s); 274 return (error); 275 } 276 277 int 278 soconnect2(so1, so2) 279 register struct socket *so1; 280 struct socket *so2; 281 { 282 int s = splnet(); 283 int error; 284 285 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 286 splx(s); 287 return (error); 288 } 289 290 int 291 sodisconnect(so) 292 register struct socket *so; 293 { 294 int s = splnet(); 295 int error; 296 297 if ((so->so_state & SS_ISCONNECTED) == 0) { 298 error = ENOTCONN; 299 goto bad; 300 } 301 if (so->so_state & SS_ISDISCONNECTING) { 302 error = EALREADY; 303 goto bad; 304 } 305 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 306 bad: 307 splx(s); 308 return (error); 309 } 310 311 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 312 /* 313 * Send on a socket. 314 * If send must go all at once and message is larger than 315 * send buffering, then hard error. 316 * Lock against other senders. 317 * If must go all at once and not enough room now, then 318 * inform user that this would block and do nothing. 319 * Otherwise, if nonblocking, send as much as possible. 320 * The data to be sent is described by "uio" if nonzero, 321 * otherwise by the mbuf chain "top" (which must be null 322 * if uio is not). Data provided in mbuf chain must be small 323 * enough to send all at once. 324 * 325 * Returns nonzero on error, timeout or signal; callers 326 * must check for short counts if EINTR/ERESTART are returned. 327 * Data and control buffers are freed on return. 328 */ 329 int 330 sosend(so, addr, uio, top, control, flags) 331 register struct socket *so; 332 struct mbuf *addr; 333 struct uio *uio; 334 struct mbuf *top; 335 struct mbuf *control; 336 int flags; 337 { 338 struct proc *p = curproc; /* XXX */ 339 struct mbuf **mp; 340 register struct mbuf *m; 341 register long space, len, resid; 342 int clen = 0, error, s, dontroute, mlen; 343 int atomic = sosendallatonce(so) || top; 344 345 if (uio) 346 resid = uio->uio_resid; 347 else 348 resid = top->m_pkthdr.len; 349 /* 350 * In theory resid should be unsigned. 351 * However, space must be signed, as it might be less than 0 352 * if we over-committed, and we must use a signed comparison 353 * of space and resid. On the other hand, a negative resid 354 * causes us to loop sending 0-length segments to the protocol. 355 */ 356 if (resid < 0) 357 return (EINVAL); 358 dontroute = 359 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 360 (so->so_proto->pr_flags & PR_ATOMIC); 361 p->p_stats->p_ru.ru_msgsnd++; 362 if (control) 363 clen = control->m_len; 364 #define snderr(errno) { error = errno; splx(s); goto release; } 365 366 restart: 367 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 368 if (error) 369 goto out; 370 do { 371 s = splnet(); 372 if (so->so_state & SS_CANTSENDMORE) 373 snderr(EPIPE); 374 if (so->so_error) 375 snderr(so->so_error); 376 if ((so->so_state & SS_ISCONNECTED) == 0) { 377 /* 378 * `sendto' and `sendmsg' is allowed on a connection- 379 * based socket if it supports implied connect. 380 * Return ENOTCONN if not connected and no address is 381 * supplied. 382 */ 383 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 384 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 385 if ((so->so_state & SS_ISCONFIRMING) == 0 && 386 !(resid == 0 && clen != 0)) 387 snderr(ENOTCONN); 388 } else if (addr == 0) 389 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 390 ENOTCONN : EDESTADDRREQ); 391 } 392 space = sbspace(&so->so_snd); 393 if (flags & MSG_OOB) 394 space += 1024; 395 if ((atomic && resid > so->so_snd.sb_hiwat) || 396 clen > so->so_snd.sb_hiwat) 397 snderr(EMSGSIZE); 398 if (space < resid + clen && uio && 399 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 400 if (so->so_state & SS_NBIO) 401 snderr(EWOULDBLOCK); 402 sbunlock(&so->so_snd); 403 error = sbwait(&so->so_snd); 404 splx(s); 405 if (error) 406 goto out; 407 goto restart; 408 } 409 splx(s); 410 mp = ⊤ 411 space -= clen; 412 do { 413 if (uio == NULL) { 414 /* 415 * Data is prepackaged in "top". 416 */ 417 resid = 0; 418 if (flags & MSG_EOR) 419 top->m_flags |= M_EOR; 420 } else do { 421 if (top == 0) { 422 MGETHDR(m, M_WAIT, MT_DATA); 423 mlen = MHLEN; 424 m->m_pkthdr.len = 0; 425 m->m_pkthdr.rcvif = (struct ifnet *)0; 426 } else { 427 MGET(m, M_WAIT, MT_DATA); 428 mlen = MLEN; 429 } 430 if (resid >= MINCLSIZE) { 431 MCLGET(m, M_WAIT); 432 if ((m->m_flags & M_EXT) == 0) 433 goto nopages; 434 mlen = MCLBYTES; 435 len = min(min(mlen, resid), space); 436 } else { 437 nopages: 438 len = min(min(mlen, resid), space); 439 /* 440 * For datagram protocols, leave room 441 * for protocol headers in first mbuf. 442 */ 443 if (atomic && top == 0 && len < mlen) 444 MH_ALIGN(m, len); 445 } 446 space -= len; 447 error = uiomove(mtod(m, caddr_t), (int)len, uio); 448 resid = uio->uio_resid; 449 m->m_len = len; 450 *mp = m; 451 top->m_pkthdr.len += len; 452 if (error) 453 goto release; 454 mp = &m->m_next; 455 if (resid <= 0) { 456 if (flags & MSG_EOR) 457 top->m_flags |= M_EOR; 458 break; 459 } 460 } while (space > 0 && atomic); 461 if (dontroute) 462 so->so_options |= SO_DONTROUTE; 463 s = splnet(); /* XXX */ 464 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 465 (flags & MSG_OOB) ? PRUS_OOB : 466 /* 467 * If the user set MSG_EOF, the protocol 468 * understands this flag and nothing left to 469 * send then use PRU_SEND_EOF instead of PRU_SEND. 470 */ 471 ((flags & MSG_EOF) && 472 (so->so_proto->pr_flags & PR_IMPLOPCL) && 473 (resid <= 0)) ? 474 PRUS_EOF : 0, 475 top, addr, control, p); 476 splx(s); 477 if (dontroute) 478 so->so_options &= ~SO_DONTROUTE; 479 clen = 0; 480 control = 0; 481 top = 0; 482 mp = ⊤ 483 if (error) 484 goto release; 485 } while (resid && space > 0); 486 } while (resid); 487 488 release: 489 sbunlock(&so->so_snd); 490 out: 491 if (top) 492 m_freem(top); 493 if (control) 494 m_freem(control); 495 return (error); 496 } 497 498 /* 499 * Implement receive operations on a socket. 500 * We depend on the way that records are added to the sockbuf 501 * by sbappend*. In particular, each record (mbufs linked through m_next) 502 * must begin with an address if the protocol so specifies, 503 * followed by an optional mbuf or mbufs containing ancillary data, 504 * and then zero or more mbufs of data. 505 * In order to avoid blocking network interrupts for the entire time here, 506 * we splx() while doing the actual copy to user space. 507 * Although the sockbuf is locked, new data may still be appended, 508 * and thus we must maintain consistency of the sockbuf during that time. 509 * 510 * The caller may receive the data as a single mbuf chain by supplying 511 * an mbuf **mp0 for use in returning the chain. The uio is then used 512 * only for the count in uio_resid. 513 */ 514 int 515 soreceive(so, paddr, uio, mp0, controlp, flagsp) 516 register struct socket *so; 517 struct mbuf **paddr; 518 struct uio *uio; 519 struct mbuf **mp0; 520 struct mbuf **controlp; 521 int *flagsp; 522 { 523 register struct mbuf *m, **mp; 524 register int flags, len, error, s, offset; 525 struct protosw *pr = so->so_proto; 526 struct mbuf *nextrecord; 527 int moff, type = 0; 528 int orig_resid = uio->uio_resid; 529 530 mp = mp0; 531 if (paddr) 532 *paddr = 0; 533 if (controlp) 534 *controlp = 0; 535 if (flagsp) 536 flags = *flagsp &~ MSG_EOR; 537 else 538 flags = 0; 539 if (flags & MSG_OOB) { 540 m = m_get(M_WAIT, MT_DATA); 541 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 542 if (error) 543 goto bad; 544 do { 545 error = uiomove(mtod(m, caddr_t), 546 (int) min(uio->uio_resid, m->m_len), uio); 547 m = m_free(m); 548 } while (uio->uio_resid && error == 0 && m); 549 bad: 550 if (m) 551 m_freem(m); 552 return (error); 553 } 554 if (mp) 555 *mp = (struct mbuf *)0; 556 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 557 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 558 559 restart: 560 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 561 if (error) 562 return (error); 563 s = splnet(); 564 565 m = so->so_rcv.sb_mb; 566 /* 567 * If we have less data than requested, block awaiting more 568 * (subject to any timeout) if: 569 * 1. the current count is less than the low water mark, or 570 * 2. MSG_WAITALL is set, and it is possible to do the entire 571 * receive operation at once if we block (resid <= hiwat). 572 * 3. MSG_DONTWAIT is not set 573 * If MSG_WAITALL is set but resid is larger than the receive buffer, 574 * we have to do the receive in sections, and thus risk returning 575 * a short count if a timeout or signal occurs after we start. 576 */ 577 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 578 so->so_rcv.sb_cc < uio->uio_resid) && 579 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 580 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 581 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 582 #ifdef DIAGNOSTIC 583 if (m == 0 && so->so_rcv.sb_cc) 584 panic("receive 1"); 585 #endif 586 if (so->so_error) { 587 if (m) 588 goto dontblock; 589 error = so->so_error; 590 if ((flags & MSG_PEEK) == 0) 591 so->so_error = 0; 592 goto release; 593 } 594 if (so->so_state & SS_CANTRCVMORE) { 595 if (m) 596 goto dontblock; 597 else 598 goto release; 599 } 600 for (; m; m = m->m_next) 601 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 602 m = so->so_rcv.sb_mb; 603 goto dontblock; 604 } 605 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 606 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 607 error = ENOTCONN; 608 goto release; 609 } 610 if (uio->uio_resid == 0) 611 goto release; 612 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 613 error = EWOULDBLOCK; 614 goto release; 615 } 616 sbunlock(&so->so_rcv); 617 error = sbwait(&so->so_rcv); 618 splx(s); 619 if (error) 620 return (error); 621 goto restart; 622 } 623 dontblock: 624 if (uio->uio_procp) 625 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 626 nextrecord = m->m_nextpkt; 627 if (pr->pr_flags & PR_ADDR) { 628 #ifdef DIAGNOSTIC 629 if (m->m_type != MT_SONAME) 630 panic("receive 1a"); 631 #endif 632 orig_resid = 0; 633 if (flags & MSG_PEEK) { 634 if (paddr) 635 *paddr = m_copy(m, 0, m->m_len); 636 m = m->m_next; 637 } else { 638 sbfree(&so->so_rcv, m); 639 if (paddr) { 640 *paddr = m; 641 so->so_rcv.sb_mb = m->m_next; 642 m->m_next = 0; 643 m = so->so_rcv.sb_mb; 644 } else { 645 MFREE(m, so->so_rcv.sb_mb); 646 m = so->so_rcv.sb_mb; 647 } 648 } 649 } 650 while (m && m->m_type == MT_CONTROL && error == 0) { 651 if (flags & MSG_PEEK) { 652 if (controlp) 653 *controlp = m_copy(m, 0, m->m_len); 654 m = m->m_next; 655 } else { 656 sbfree(&so->so_rcv, m); 657 if (controlp) { 658 if (pr->pr_domain->dom_externalize && 659 mtod(m, struct cmsghdr *)->cmsg_type == 660 SCM_RIGHTS) 661 error = (*pr->pr_domain->dom_externalize)(m); 662 *controlp = m; 663 so->so_rcv.sb_mb = m->m_next; 664 m->m_next = 0; 665 m = so->so_rcv.sb_mb; 666 } else { 667 MFREE(m, so->so_rcv.sb_mb); 668 m = so->so_rcv.sb_mb; 669 } 670 } 671 if (controlp) { 672 orig_resid = 0; 673 controlp = &(*controlp)->m_next; 674 } 675 } 676 if (m) { 677 if ((flags & MSG_PEEK) == 0) 678 m->m_nextpkt = nextrecord; 679 type = m->m_type; 680 if (type == MT_OOBDATA) 681 flags |= MSG_OOB; 682 } 683 moff = 0; 684 offset = 0; 685 while (m && uio->uio_resid > 0 && error == 0) { 686 if (m->m_type == MT_OOBDATA) { 687 if (type != MT_OOBDATA) 688 break; 689 } else if (type == MT_OOBDATA) 690 break; 691 #ifdef DIAGNOSTIC 692 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 693 panic("receive 3"); 694 #endif 695 so->so_state &= ~SS_RCVATMARK; 696 len = uio->uio_resid; 697 if (so->so_oobmark && len > so->so_oobmark - offset) 698 len = so->so_oobmark - offset; 699 if (len > m->m_len - moff) 700 len = m->m_len - moff; 701 /* 702 * If mp is set, just pass back the mbufs. 703 * Otherwise copy them out via the uio, then free. 704 * Sockbuf must be consistent here (points to current mbuf, 705 * it points to next record) when we drop priority; 706 * we must note any additions to the sockbuf when we 707 * block interrupts again. 708 */ 709 if (mp == 0) { 710 splx(s); 711 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 712 s = splnet(); 713 if (error) 714 goto release; 715 } else 716 uio->uio_resid -= len; 717 if (len == m->m_len - moff) { 718 if (m->m_flags & M_EOR) 719 flags |= MSG_EOR; 720 if (flags & MSG_PEEK) { 721 m = m->m_next; 722 moff = 0; 723 } else { 724 nextrecord = m->m_nextpkt; 725 sbfree(&so->so_rcv, m); 726 if (mp) { 727 *mp = m; 728 mp = &m->m_next; 729 so->so_rcv.sb_mb = m = m->m_next; 730 *mp = (struct mbuf *)0; 731 } else { 732 MFREE(m, so->so_rcv.sb_mb); 733 m = so->so_rcv.sb_mb; 734 } 735 if (m) 736 m->m_nextpkt = nextrecord; 737 } 738 } else { 739 if (flags & MSG_PEEK) 740 moff += len; 741 else { 742 if (mp) 743 *mp = m_copym(m, 0, len, M_WAIT); 744 m->m_data += len; 745 m->m_len -= len; 746 so->so_rcv.sb_cc -= len; 747 } 748 } 749 if (so->so_oobmark) { 750 if ((flags & MSG_PEEK) == 0) { 751 so->so_oobmark -= len; 752 if (so->so_oobmark == 0) { 753 so->so_state |= SS_RCVATMARK; 754 break; 755 } 756 } else { 757 offset += len; 758 if (offset == so->so_oobmark) 759 break; 760 } 761 } 762 if (flags & MSG_EOR) 763 break; 764 /* 765 * If the MSG_WAITALL flag is set (for non-atomic socket), 766 * we must not quit until "uio->uio_resid == 0" or an error 767 * termination. If a signal/timeout occurs, return 768 * with a short count but without error. 769 * Keep sockbuf locked against other readers. 770 */ 771 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 772 !sosendallatonce(so) && !nextrecord) { 773 if (so->so_error || so->so_state & SS_CANTRCVMORE) 774 break; 775 error = sbwait(&so->so_rcv); 776 if (error) { 777 sbunlock(&so->so_rcv); 778 splx(s); 779 return (0); 780 } 781 m = so->so_rcv.sb_mb; 782 if (m) 783 nextrecord = m->m_nextpkt; 784 } 785 } 786 787 if (m && pr->pr_flags & PR_ATOMIC) { 788 flags |= MSG_TRUNC; 789 if ((flags & MSG_PEEK) == 0) 790 (void) sbdroprecord(&so->so_rcv); 791 } 792 if ((flags & MSG_PEEK) == 0) { 793 if (m == 0) 794 so->so_rcv.sb_mb = nextrecord; 795 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 796 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 797 } 798 if (orig_resid == uio->uio_resid && orig_resid && 799 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 800 sbunlock(&so->so_rcv); 801 splx(s); 802 goto restart; 803 } 804 805 if (flagsp) 806 *flagsp |= flags; 807 release: 808 sbunlock(&so->so_rcv); 809 splx(s); 810 return (error); 811 } 812 813 int 814 soshutdown(so, how) 815 register struct socket *so; 816 register int how; 817 { 818 register struct protosw *pr = so->so_proto; 819 820 how++; 821 if (how & FREAD) 822 sorflush(so); 823 if (how & FWRITE) 824 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 825 return (0); 826 } 827 828 void 829 sorflush(so) 830 register struct socket *so; 831 { 832 register struct sockbuf *sb = &so->so_rcv; 833 register struct protosw *pr = so->so_proto; 834 register int s; 835 struct sockbuf asb; 836 837 sb->sb_flags |= SB_NOINTR; 838 (void) sblock(sb, M_WAITOK); 839 s = splimp(); 840 socantrcvmore(so); 841 sbunlock(sb); 842 asb = *sb; 843 bzero((caddr_t)sb, sizeof (*sb)); 844 splx(s); 845 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 846 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 847 sbrelease(&asb); 848 } 849 850 int 851 sosetopt(so, level, optname, m0, p) 852 register struct socket *so; 853 int level, optname; 854 struct mbuf *m0; 855 struct proc *p; 856 { 857 int error = 0; 858 register struct mbuf *m = m0; 859 860 if (level != SOL_SOCKET) { 861 if (so->so_proto && so->so_proto->pr_ctloutput) 862 return ((*so->so_proto->pr_ctloutput) 863 (PRCO_SETOPT, so, level, optname, &m0, p)); 864 error = ENOPROTOOPT; 865 } else { 866 switch (optname) { 867 868 case SO_LINGER: 869 if (m == NULL || m->m_len != sizeof (struct linger)) { 870 error = EINVAL; 871 goto bad; 872 } 873 so->so_linger = mtod(m, struct linger *)->l_linger; 874 /* fall thru... */ 875 876 case SO_DEBUG: 877 case SO_KEEPALIVE: 878 case SO_DONTROUTE: 879 case SO_USELOOPBACK: 880 case SO_BROADCAST: 881 case SO_REUSEADDR: 882 case SO_REUSEPORT: 883 case SO_OOBINLINE: 884 case SO_TIMESTAMP: 885 if (m == NULL || m->m_len < sizeof (int)) { 886 error = EINVAL; 887 goto bad; 888 } 889 if (*mtod(m, int *)) 890 so->so_options |= optname; 891 else 892 so->so_options &= ~optname; 893 break; 894 895 case SO_SNDBUF: 896 case SO_RCVBUF: 897 case SO_SNDLOWAT: 898 case SO_RCVLOWAT: 899 { 900 int optval; 901 902 if (m == NULL || m->m_len < sizeof (int)) { 903 error = EINVAL; 904 goto bad; 905 } 906 907 /* 908 * Values < 1 make no sense for any of these 909 * options, so disallow them. 910 */ 911 optval = *mtod(m, int *); 912 if (optval < 1) { 913 error = EINVAL; 914 goto bad; 915 } 916 917 switch (optname) { 918 919 case SO_SNDBUF: 920 case SO_RCVBUF: 921 if (sbreserve(optname == SO_SNDBUF ? 922 &so->so_snd : &so->so_rcv, 923 (u_long) optval) == 0) { 924 error = ENOBUFS; 925 goto bad; 926 } 927 break; 928 929 /* 930 * Make sure the low-water is never greater than 931 * the high-water. 932 */ 933 case SO_SNDLOWAT: 934 so->so_snd.sb_lowat = 935 (optval > so->so_snd.sb_hiwat) ? 936 so->so_snd.sb_hiwat : optval; 937 break; 938 case SO_RCVLOWAT: 939 so->so_rcv.sb_lowat = 940 (optval > so->so_rcv.sb_hiwat) ? 941 so->so_rcv.sb_hiwat : optval; 942 break; 943 } 944 break; 945 } 946 947 case SO_SNDTIMEO: 948 case SO_RCVTIMEO: 949 { 950 struct timeval *tv; 951 short val; 952 953 if (m == NULL || m->m_len < sizeof (*tv)) { 954 error = EINVAL; 955 goto bad; 956 } 957 tv = mtod(m, struct timeval *); 958 if (tv->tv_sec > SHRT_MAX / hz - hz) { 959 error = EDOM; 960 goto bad; 961 } 962 val = tv->tv_sec * hz + tv->tv_usec / tick; 963 964 switch (optname) { 965 966 case SO_SNDTIMEO: 967 so->so_snd.sb_timeo = val; 968 break; 969 case SO_RCVTIMEO: 970 so->so_rcv.sb_timeo = val; 971 break; 972 } 973 break; 974 } 975 976 default: 977 error = ENOPROTOOPT; 978 break; 979 } 980 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 981 (void) ((*so->so_proto->pr_ctloutput) 982 (PRCO_SETOPT, so, level, optname, &m0, p)); 983 m = NULL; /* freed by protocol */ 984 } 985 } 986 bad: 987 if (m) 988 (void) m_free(m); 989 return (error); 990 } 991 992 int 993 sogetopt(so, level, optname, mp, p) 994 register struct socket *so; 995 int level, optname; 996 struct mbuf **mp; 997 struct proc *p; 998 { 999 register struct mbuf *m; 1000 1001 if (level != SOL_SOCKET) { 1002 if (so->so_proto && so->so_proto->pr_ctloutput) { 1003 return ((*so->so_proto->pr_ctloutput) 1004 (PRCO_GETOPT, so, level, optname, mp, p)); 1005 } else 1006 return (ENOPROTOOPT); 1007 } else { 1008 m = m_get(M_WAIT, MT_SOOPTS); 1009 m->m_len = sizeof (int); 1010 1011 switch (optname) { 1012 1013 case SO_LINGER: 1014 m->m_len = sizeof (struct linger); 1015 mtod(m, struct linger *)->l_onoff = 1016 so->so_options & SO_LINGER; 1017 mtod(m, struct linger *)->l_linger = so->so_linger; 1018 break; 1019 1020 case SO_USELOOPBACK: 1021 case SO_DONTROUTE: 1022 case SO_DEBUG: 1023 case SO_KEEPALIVE: 1024 case SO_REUSEADDR: 1025 case SO_REUSEPORT: 1026 case SO_BROADCAST: 1027 case SO_OOBINLINE: 1028 case SO_TIMESTAMP: 1029 *mtod(m, int *) = so->so_options & optname; 1030 break; 1031 1032 case SO_TYPE: 1033 *mtod(m, int *) = so->so_type; 1034 break; 1035 1036 case SO_ERROR: 1037 *mtod(m, int *) = so->so_error; 1038 so->so_error = 0; 1039 break; 1040 1041 case SO_SNDBUF: 1042 *mtod(m, int *) = so->so_snd.sb_hiwat; 1043 break; 1044 1045 case SO_RCVBUF: 1046 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1047 break; 1048 1049 case SO_SNDLOWAT: 1050 *mtod(m, int *) = so->so_snd.sb_lowat; 1051 break; 1052 1053 case SO_RCVLOWAT: 1054 *mtod(m, int *) = so->so_rcv.sb_lowat; 1055 break; 1056 1057 case SO_SNDTIMEO: 1058 case SO_RCVTIMEO: 1059 { 1060 int val = (optname == SO_SNDTIMEO ? 1061 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1062 1063 m->m_len = sizeof(struct timeval); 1064 mtod(m, struct timeval *)->tv_sec = val / hz; 1065 mtod(m, struct timeval *)->tv_usec = 1066 (val % hz) * tick; 1067 break; 1068 } 1069 1070 default: 1071 (void)m_free(m); 1072 return (ENOPROTOOPT); 1073 } 1074 *mp = m; 1075 return (0); 1076 } 1077 } 1078 1079 void 1080 sohasoutofband(so) 1081 register struct socket *so; 1082 { 1083 struct proc *p; 1084 1085 if (so->so_pgid < 0) 1086 gsignal(-so->so_pgid, SIGURG); 1087 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1088 psignal(p, SIGURG); 1089 selwakeup(&so->so_rcv.sb_sel); 1090 } 1091 1092 int 1093 soselect(struct socket *so, int which, struct proc *p) 1094 { 1095 int s = splnet(); 1096 switch (which) { 1097 1098 case FREAD: 1099 if (soreadable(so)) { 1100 splx(s); 1101 return (1); 1102 } 1103 selrecord(p, &so->so_rcv.sb_sel); 1104 so->so_rcv.sb_flags |= SB_SEL; 1105 break; 1106 1107 case FWRITE: 1108 if (sowriteable(so)) { 1109 splx(s); 1110 return (1); 1111 } 1112 selrecord(p, &so->so_snd.sb_sel); 1113 so->so_snd.sb_flags |= SB_SEL; 1114 break; 1115 1116 case 0: 1117 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { 1118 splx(s); 1119 return (1); 1120 } 1121 selrecord(p, &so->so_rcv.sb_sel); 1122 so->so_rcv.sb_flags |= SB_SEL; 1123 break; 1124 } 1125 splx(s); 1126 return (0); 1127 } 1128