1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.41 1998/07/06 19:27:14 fenner Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/proc.h> 40 #include <sys/fcntl.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/domain.h> 44 #include <sys/kernel.h> 45 #include <sys/poll.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/sysctl.h> 52 #include <sys/uio.h> 53 #include <vm/vm_zone.h> 54 55 #include <machine/limits.h> 56 57 struct vm_zone *socket_zone; 58 so_gen_t so_gencnt; /* generation count for sockets */ 59 60 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 61 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 62 63 static int somaxconn = SOMAXCONN; 64 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 65 0, ""); 66 67 /* 68 * Socket operation routines. 69 * These routines are called by the routines in 70 * sys_socket.c or from a system process, and 71 * implement the semantics of socket operations by 72 * switching out to the protocol specific routines. 73 */ 74 75 /* 76 * Get a socket structure from our zone, and initialize it. 77 * We don't implement `waitok' yet (see comments in uipc_domain.c). 78 * Note that it would probably be better to allocate socket 79 * and PCB at the same time, but I'm not convinced that all 80 * the protocols can be easily modified to do this. 81 */ 82 struct socket * 83 soalloc(waitok) 84 int waitok; 85 { 86 struct socket *so; 87 88 so = zalloci(socket_zone); 89 if (so) { 90 /* XXX race condition for reentrant kernel */ 91 bzero(so, sizeof *so); 92 so->so_gencnt = ++so_gencnt; 93 so->so_zone = socket_zone; 94 } 95 return so; 96 } 97 98 int 99 socreate(dom, aso, type, proto, p) 100 int dom; 101 struct socket **aso; 102 register int type; 103 int proto; 104 struct proc *p; 105 { 106 register struct protosw *prp; 107 register struct socket *so; 108 register int error; 109 110 if (proto) 111 prp = pffindproto(dom, proto, type); 112 else 113 prp = pffindtype(dom, type); 114 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 115 return (EPROTONOSUPPORT); 116 if (prp->pr_type != type) 117 return (EPROTOTYPE); 118 so = soalloc(p != 0); 119 if (so == 0) 120 return (ENOBUFS); 121 122 TAILQ_INIT(&so->so_incomp); 123 TAILQ_INIT(&so->so_comp); 124 so->so_type = type; 125 if (p != 0) 126 so->so_uid = p->p_ucred->cr_uid; 127 so->so_proto = prp; 128 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 129 if (error) { 130 so->so_state |= SS_NOFDREF; 131 sofree(so); 132 return (error); 133 } 134 *aso = so; 135 return (0); 136 } 137 138 int 139 sobind(so, nam, p) 140 struct socket *so; 141 struct sockaddr *nam; 142 struct proc *p; 143 { 144 int s = splnet(); 145 int error; 146 147 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 148 splx(s); 149 return (error); 150 } 151 152 void 153 sodealloc(so) 154 struct socket *so; 155 { 156 so->so_gencnt = ++so_gencnt; 157 zfreei(so->so_zone, so); 158 } 159 160 int 161 solisten(so, backlog, p) 162 register struct socket *so; 163 int backlog; 164 struct proc *p; 165 { 166 int s, error; 167 168 s = splnet(); 169 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 170 if (error) { 171 splx(s); 172 return (error); 173 } 174 if (so->so_comp.tqh_first == NULL) 175 so->so_options |= SO_ACCEPTCONN; 176 if (backlog < 0 || backlog > somaxconn) 177 backlog = somaxconn; 178 so->so_qlimit = backlog; 179 splx(s); 180 return (0); 181 } 182 183 void 184 sofree(so) 185 register struct socket *so; 186 { 187 struct socket *head = so->so_head; 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (head != NULL) { 192 if (so->so_state & SS_INCOMP) { 193 TAILQ_REMOVE(&head->so_incomp, so, so_list); 194 head->so_incqlen--; 195 } else if (so->so_state & SS_COMP) { 196 TAILQ_REMOVE(&head->so_comp, so, so_list); 197 } else { 198 panic("sofree: not queued"); 199 } 200 head->so_qlen--; 201 so->so_state &= ~(SS_INCOMP|SS_COMP); 202 so->so_head = NULL; 203 } 204 sbrelease(&so->so_snd); 205 sorflush(so); 206 sodealloc(so); 207 } 208 209 /* 210 * Close a socket on last file table reference removal. 211 * Initiate disconnect if connected. 212 * Free socket when disconnect complete. 213 */ 214 int 215 soclose(so) 216 register struct socket *so; 217 { 218 int s = splnet(); /* conservative */ 219 int error = 0; 220 221 if (so->so_options & SO_ACCEPTCONN) { 222 struct socket *sp, *sonext; 223 224 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 225 sonext = sp->so_list.tqe_next; 226 (void) soabort(sp); 227 } 228 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 229 sonext = sp->so_list.tqe_next; 230 (void) soabort(sp); 231 } 232 } 233 if (so->so_pcb == 0) 234 goto discard; 235 if (so->so_state & SS_ISCONNECTED) { 236 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 237 error = sodisconnect(so); 238 if (error) 239 goto drop; 240 } 241 if (so->so_options & SO_LINGER) { 242 if ((so->so_state & SS_ISDISCONNECTING) && 243 (so->so_state & SS_NBIO)) 244 goto drop; 245 while (so->so_state & SS_ISCONNECTED) { 246 error = tsleep((caddr_t)&so->so_timeo, 247 PSOCK | PCATCH, "soclos", so->so_linger); 248 if (error) 249 break; 250 } 251 } 252 } 253 drop: 254 if (so->so_pcb) { 255 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 256 if (error == 0) 257 error = error2; 258 } 259 discard: 260 if (so->so_state & SS_NOFDREF) 261 panic("soclose: NOFDREF"); 262 so->so_state |= SS_NOFDREF; 263 sofree(so); 264 splx(s); 265 return (error); 266 } 267 268 /* 269 * Must be called at splnet... 270 */ 271 int 272 soabort(so) 273 struct socket *so; 274 { 275 276 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 277 } 278 279 int 280 soaccept(so, nam) 281 register struct socket *so; 282 struct sockaddr **nam; 283 { 284 int s = splnet(); 285 int error; 286 287 if ((so->so_state & SS_NOFDREF) == 0) 288 panic("soaccept: !NOFDREF"); 289 so->so_state &= ~SS_NOFDREF; 290 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 291 splx(s); 292 return (error); 293 } 294 295 int 296 soconnect(so, nam, p) 297 register struct socket *so; 298 struct sockaddr *nam; 299 struct proc *p; 300 { 301 int s; 302 int error; 303 304 if (so->so_options & SO_ACCEPTCONN) 305 return (EOPNOTSUPP); 306 s = splnet(); 307 /* 308 * If protocol is connection-based, can only connect once. 309 * Otherwise, if connected, try to disconnect first. 310 * This allows user to disconnect by connecting to, e.g., 311 * a null address. 312 */ 313 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 314 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 315 (error = sodisconnect(so)))) 316 error = EISCONN; 317 else 318 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 319 splx(s); 320 return (error); 321 } 322 323 int 324 soconnect2(so1, so2) 325 register struct socket *so1; 326 struct socket *so2; 327 { 328 int s = splnet(); 329 int error; 330 331 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 332 splx(s); 333 return (error); 334 } 335 336 int 337 sodisconnect(so) 338 register struct socket *so; 339 { 340 int s = splnet(); 341 int error; 342 343 if ((so->so_state & SS_ISCONNECTED) == 0) { 344 error = ENOTCONN; 345 goto bad; 346 } 347 if (so->so_state & SS_ISDISCONNECTING) { 348 error = EALREADY; 349 goto bad; 350 } 351 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 352 bad: 353 splx(s); 354 return (error); 355 } 356 357 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 358 /* 359 * Send on a socket. 360 * If send must go all at once and message is larger than 361 * send buffering, then hard error. 362 * Lock against other senders. 363 * If must go all at once and not enough room now, then 364 * inform user that this would block and do nothing. 365 * Otherwise, if nonblocking, send as much as possible. 366 * The data to be sent is described by "uio" if nonzero, 367 * otherwise by the mbuf chain "top" (which must be null 368 * if uio is not). Data provided in mbuf chain must be small 369 * enough to send all at once. 370 * 371 * Returns nonzero on error, timeout or signal; callers 372 * must check for short counts if EINTR/ERESTART are returned. 373 * Data and control buffers are freed on return. 374 */ 375 int 376 sosend(so, addr, uio, top, control, flags, p) 377 register struct socket *so; 378 struct sockaddr *addr; 379 struct uio *uio; 380 struct mbuf *top; 381 struct mbuf *control; 382 int flags; 383 struct proc *p; 384 { 385 struct mbuf **mp; 386 register struct mbuf *m; 387 register long space, len, resid; 388 int clen = 0, error, s, dontroute, mlen; 389 int atomic = sosendallatonce(so) || top; 390 391 if (uio) 392 resid = uio->uio_resid; 393 else 394 resid = top->m_pkthdr.len; 395 /* 396 * In theory resid should be unsigned. 397 * However, space must be signed, as it might be less than 0 398 * if we over-committed, and we must use a signed comparison 399 * of space and resid. On the other hand, a negative resid 400 * causes us to loop sending 0-length segments to the protocol. 401 * 402 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 403 * type sockets since that's an error. 404 */ 405 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 406 error = EINVAL; 407 goto out; 408 } 409 410 dontroute = 411 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 412 (so->so_proto->pr_flags & PR_ATOMIC); 413 if (p) 414 p->p_stats->p_ru.ru_msgsnd++; 415 if (control) 416 clen = control->m_len; 417 #define snderr(errno) { error = errno; splx(s); goto release; } 418 419 restart: 420 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 421 if (error) 422 goto out; 423 do { 424 s = splnet(); 425 if (so->so_state & SS_CANTSENDMORE) 426 snderr(EPIPE); 427 if (so->so_error) { 428 error = so->so_error; 429 so->so_error = 0; 430 splx(s); 431 goto release; 432 } 433 if ((so->so_state & SS_ISCONNECTED) == 0) { 434 /* 435 * `sendto' and `sendmsg' is allowed on a connection- 436 * based socket if it supports implied connect. 437 * Return ENOTCONN if not connected and no address is 438 * supplied. 439 */ 440 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 441 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 442 if ((so->so_state & SS_ISCONFIRMING) == 0 && 443 !(resid == 0 && clen != 0)) 444 snderr(ENOTCONN); 445 } else if (addr == 0) 446 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 447 ENOTCONN : EDESTADDRREQ); 448 } 449 space = sbspace(&so->so_snd); 450 if (flags & MSG_OOB) 451 space += 1024; 452 if ((atomic && resid > so->so_snd.sb_hiwat) || 453 clen > so->so_snd.sb_hiwat) 454 snderr(EMSGSIZE); 455 if (space < resid + clen && uio && 456 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 457 if (so->so_state & SS_NBIO) 458 snderr(EWOULDBLOCK); 459 sbunlock(&so->so_snd); 460 error = sbwait(&so->so_snd); 461 splx(s); 462 if (error) 463 goto out; 464 goto restart; 465 } 466 splx(s); 467 mp = ⊤ 468 space -= clen; 469 do { 470 if (uio == NULL) { 471 /* 472 * Data is prepackaged in "top". 473 */ 474 resid = 0; 475 if (flags & MSG_EOR) 476 top->m_flags |= M_EOR; 477 } else do { 478 if (top == 0) { 479 MGETHDR(m, M_WAIT, MT_DATA); 480 mlen = MHLEN; 481 m->m_pkthdr.len = 0; 482 m->m_pkthdr.rcvif = (struct ifnet *)0; 483 } else { 484 MGET(m, M_WAIT, MT_DATA); 485 mlen = MLEN; 486 } 487 if (resid >= MINCLSIZE) { 488 MCLGET(m, M_WAIT); 489 if ((m->m_flags & M_EXT) == 0) 490 goto nopages; 491 mlen = MCLBYTES; 492 len = min(min(mlen, resid), space); 493 } else { 494 nopages: 495 len = min(min(mlen, resid), space); 496 /* 497 * For datagram protocols, leave room 498 * for protocol headers in first mbuf. 499 */ 500 if (atomic && top == 0 && len < mlen) 501 MH_ALIGN(m, len); 502 } 503 space -= len; 504 error = uiomove(mtod(m, caddr_t), (int)len, uio); 505 resid = uio->uio_resid; 506 m->m_len = len; 507 *mp = m; 508 top->m_pkthdr.len += len; 509 if (error) 510 goto release; 511 mp = &m->m_next; 512 if (resid <= 0) { 513 if (flags & MSG_EOR) 514 top->m_flags |= M_EOR; 515 break; 516 } 517 } while (space > 0 && atomic); 518 if (dontroute) 519 so->so_options |= SO_DONTROUTE; 520 s = splnet(); /* XXX */ 521 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 522 (flags & MSG_OOB) ? PRUS_OOB : 523 /* 524 * If the user set MSG_EOF, the protocol 525 * understands this flag and nothing left to 526 * send then use PRU_SEND_EOF instead of PRU_SEND. 527 */ 528 ((flags & MSG_EOF) && 529 (so->so_proto->pr_flags & PR_IMPLOPCL) && 530 (resid <= 0)) ? 531 PRUS_EOF : 0, 532 top, addr, control, p); 533 splx(s); 534 if (dontroute) 535 so->so_options &= ~SO_DONTROUTE; 536 clen = 0; 537 control = 0; 538 top = 0; 539 mp = ⊤ 540 if (error) 541 goto release; 542 } while (resid && space > 0); 543 } while (resid); 544 545 release: 546 sbunlock(&so->so_snd); 547 out: 548 if (top) 549 m_freem(top); 550 if (control) 551 m_freem(control); 552 return (error); 553 } 554 555 /* 556 * Implement receive operations on a socket. 557 * We depend on the way that records are added to the sockbuf 558 * by sbappend*. In particular, each record (mbufs linked through m_next) 559 * must begin with an address if the protocol so specifies, 560 * followed by an optional mbuf or mbufs containing ancillary data, 561 * and then zero or more mbufs of data. 562 * In order to avoid blocking network interrupts for the entire time here, 563 * we splx() while doing the actual copy to user space. 564 * Although the sockbuf is locked, new data may still be appended, 565 * and thus we must maintain consistency of the sockbuf during that time. 566 * 567 * The caller may receive the data as a single mbuf chain by supplying 568 * an mbuf **mp0 for use in returning the chain. The uio is then used 569 * only for the count in uio_resid. 570 */ 571 int 572 soreceive(so, psa, uio, mp0, controlp, flagsp) 573 register struct socket *so; 574 struct sockaddr **psa; 575 struct uio *uio; 576 struct mbuf **mp0; 577 struct mbuf **controlp; 578 int *flagsp; 579 { 580 register struct mbuf *m, **mp; 581 register int flags, len, error, s, offset; 582 struct protosw *pr = so->so_proto; 583 struct mbuf *nextrecord; 584 int moff, type = 0; 585 int orig_resid = uio->uio_resid; 586 587 mp = mp0; 588 if (psa) 589 *psa = 0; 590 if (controlp) 591 *controlp = 0; 592 if (flagsp) 593 flags = *flagsp &~ MSG_EOR; 594 else 595 flags = 0; 596 if (flags & MSG_OOB) { 597 m = m_get(M_WAIT, MT_DATA); 598 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 599 if (error) 600 goto bad; 601 do { 602 error = uiomove(mtod(m, caddr_t), 603 (int) min(uio->uio_resid, m->m_len), uio); 604 m = m_free(m); 605 } while (uio->uio_resid && error == 0 && m); 606 bad: 607 if (m) 608 m_freem(m); 609 return (error); 610 } 611 if (mp) 612 *mp = (struct mbuf *)0; 613 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 614 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 615 616 restart: 617 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 618 if (error) 619 return (error); 620 s = splnet(); 621 622 m = so->so_rcv.sb_mb; 623 /* 624 * If we have less data than requested, block awaiting more 625 * (subject to any timeout) if: 626 * 1. the current count is less than the low water mark, or 627 * 2. MSG_WAITALL is set, and it is possible to do the entire 628 * receive operation at once if we block (resid <= hiwat). 629 * 3. MSG_DONTWAIT is not set 630 * If MSG_WAITALL is set but resid is larger than the receive buffer, 631 * we have to do the receive in sections, and thus risk returning 632 * a short count if a timeout or signal occurs after we start. 633 */ 634 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 635 so->so_rcv.sb_cc < uio->uio_resid) && 636 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 637 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 638 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 639 #ifdef DIAGNOSTIC 640 if (m == 0 && so->so_rcv.sb_cc) 641 panic("receive 1"); 642 #endif 643 if (so->so_error) { 644 if (m) 645 goto dontblock; 646 error = so->so_error; 647 if ((flags & MSG_PEEK) == 0) 648 so->so_error = 0; 649 goto release; 650 } 651 if (so->so_state & SS_CANTRCVMORE) { 652 if (m) 653 goto dontblock; 654 else 655 goto release; 656 } 657 for (; m; m = m->m_next) 658 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 659 m = so->so_rcv.sb_mb; 660 goto dontblock; 661 } 662 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 663 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 664 error = ENOTCONN; 665 goto release; 666 } 667 if (uio->uio_resid == 0) 668 goto release; 669 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 670 error = EWOULDBLOCK; 671 goto release; 672 } 673 sbunlock(&so->so_rcv); 674 error = sbwait(&so->so_rcv); 675 splx(s); 676 if (error) 677 return (error); 678 goto restart; 679 } 680 dontblock: 681 if (uio->uio_procp) 682 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 683 nextrecord = m->m_nextpkt; 684 if (pr->pr_flags & PR_ADDR) { 685 #ifdef DIAGNOSTIC 686 if (m->m_type != MT_SONAME) 687 panic("receive 1a"); 688 #endif 689 orig_resid = 0; 690 if (psa) 691 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 692 mp0 == 0); 693 if (flags & MSG_PEEK) { 694 m = m->m_next; 695 } else { 696 sbfree(&so->so_rcv, m); 697 MFREE(m, so->so_rcv.sb_mb); 698 m = so->so_rcv.sb_mb; 699 } 700 } 701 while (m && m->m_type == MT_CONTROL && error == 0) { 702 if (flags & MSG_PEEK) { 703 if (controlp) 704 *controlp = m_copy(m, 0, m->m_len); 705 m = m->m_next; 706 } else { 707 sbfree(&so->so_rcv, m); 708 if (controlp) { 709 if (pr->pr_domain->dom_externalize && 710 mtod(m, struct cmsghdr *)->cmsg_type == 711 SCM_RIGHTS) 712 error = (*pr->pr_domain->dom_externalize)(m); 713 *controlp = m; 714 so->so_rcv.sb_mb = m->m_next; 715 m->m_next = 0; 716 m = so->so_rcv.sb_mb; 717 } else { 718 MFREE(m, so->so_rcv.sb_mb); 719 m = so->so_rcv.sb_mb; 720 } 721 } 722 if (controlp) { 723 orig_resid = 0; 724 controlp = &(*controlp)->m_next; 725 } 726 } 727 if (m) { 728 if ((flags & MSG_PEEK) == 0) 729 m->m_nextpkt = nextrecord; 730 type = m->m_type; 731 if (type == MT_OOBDATA) 732 flags |= MSG_OOB; 733 } 734 moff = 0; 735 offset = 0; 736 while (m && uio->uio_resid > 0 && error == 0) { 737 if (m->m_type == MT_OOBDATA) { 738 if (type != MT_OOBDATA) 739 break; 740 } else if (type == MT_OOBDATA) 741 break; 742 #ifdef DIAGNOSTIC 743 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 744 panic("receive 3"); 745 #endif 746 so->so_state &= ~SS_RCVATMARK; 747 len = uio->uio_resid; 748 if (so->so_oobmark && len > so->so_oobmark - offset) 749 len = so->so_oobmark - offset; 750 if (len > m->m_len - moff) 751 len = m->m_len - moff; 752 /* 753 * If mp is set, just pass back the mbufs. 754 * Otherwise copy them out via the uio, then free. 755 * Sockbuf must be consistent here (points to current mbuf, 756 * it points to next record) when we drop priority; 757 * we must note any additions to the sockbuf when we 758 * block interrupts again. 759 */ 760 if (mp == 0) { 761 splx(s); 762 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 763 s = splnet(); 764 if (error) 765 goto release; 766 } else 767 uio->uio_resid -= len; 768 if (len == m->m_len - moff) { 769 if (m->m_flags & M_EOR) 770 flags |= MSG_EOR; 771 if (flags & MSG_PEEK) { 772 m = m->m_next; 773 moff = 0; 774 } else { 775 nextrecord = m->m_nextpkt; 776 sbfree(&so->so_rcv, m); 777 if (mp) { 778 *mp = m; 779 mp = &m->m_next; 780 so->so_rcv.sb_mb = m = m->m_next; 781 *mp = (struct mbuf *)0; 782 } else { 783 MFREE(m, so->so_rcv.sb_mb); 784 m = so->so_rcv.sb_mb; 785 } 786 if (m) 787 m->m_nextpkt = nextrecord; 788 } 789 } else { 790 if (flags & MSG_PEEK) 791 moff += len; 792 else { 793 if (mp) 794 *mp = m_copym(m, 0, len, M_WAIT); 795 m->m_data += len; 796 m->m_len -= len; 797 so->so_rcv.sb_cc -= len; 798 } 799 } 800 if (so->so_oobmark) { 801 if ((flags & MSG_PEEK) == 0) { 802 so->so_oobmark -= len; 803 if (so->so_oobmark == 0) { 804 so->so_state |= SS_RCVATMARK; 805 break; 806 } 807 } else { 808 offset += len; 809 if (offset == so->so_oobmark) 810 break; 811 } 812 } 813 if (flags & MSG_EOR) 814 break; 815 /* 816 * If the MSG_WAITALL flag is set (for non-atomic socket), 817 * we must not quit until "uio->uio_resid == 0" or an error 818 * termination. If a signal/timeout occurs, return 819 * with a short count but without error. 820 * Keep sockbuf locked against other readers. 821 */ 822 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 823 !sosendallatonce(so) && !nextrecord) { 824 if (so->so_error || so->so_state & SS_CANTRCVMORE) 825 break; 826 error = sbwait(&so->so_rcv); 827 if (error) { 828 sbunlock(&so->so_rcv); 829 splx(s); 830 return (0); 831 } 832 m = so->so_rcv.sb_mb; 833 if (m) 834 nextrecord = m->m_nextpkt; 835 } 836 } 837 838 if (m && pr->pr_flags & PR_ATOMIC) { 839 flags |= MSG_TRUNC; 840 if ((flags & MSG_PEEK) == 0) 841 (void) sbdroprecord(&so->so_rcv); 842 } 843 if ((flags & MSG_PEEK) == 0) { 844 if (m == 0) 845 so->so_rcv.sb_mb = nextrecord; 846 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 847 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 848 } 849 if (orig_resid == uio->uio_resid && orig_resid && 850 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 851 sbunlock(&so->so_rcv); 852 splx(s); 853 goto restart; 854 } 855 856 if (flagsp) 857 *flagsp |= flags; 858 release: 859 sbunlock(&so->so_rcv); 860 splx(s); 861 return (error); 862 } 863 864 int 865 soshutdown(so, how) 866 register struct socket *so; 867 register int how; 868 { 869 register struct protosw *pr = so->so_proto; 870 871 how++; 872 if (how & FREAD) 873 sorflush(so); 874 if (how & FWRITE) 875 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 876 return (0); 877 } 878 879 void 880 sorflush(so) 881 register struct socket *so; 882 { 883 register struct sockbuf *sb = &so->so_rcv; 884 register struct protosw *pr = so->so_proto; 885 register int s; 886 struct sockbuf asb; 887 888 sb->sb_flags |= SB_NOINTR; 889 (void) sblock(sb, M_WAITOK); 890 s = splimp(); 891 socantrcvmore(so); 892 sbunlock(sb); 893 asb = *sb; 894 bzero((caddr_t)sb, sizeof (*sb)); 895 splx(s); 896 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 897 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 898 sbrelease(&asb); 899 } 900 901 int 902 sosetopt(so, level, optname, m0, p) 903 register struct socket *so; 904 int level, optname; 905 struct mbuf *m0; 906 struct proc *p; 907 { 908 int error = 0; 909 register struct mbuf *m = m0; 910 911 if (level != SOL_SOCKET) { 912 if (so->so_proto && so->so_proto->pr_ctloutput) 913 return ((*so->so_proto->pr_ctloutput) 914 (PRCO_SETOPT, so, level, optname, &m0, p)); 915 error = ENOPROTOOPT; 916 } else { 917 switch (optname) { 918 919 case SO_LINGER: 920 if (m == NULL || m->m_len != sizeof (struct linger)) { 921 error = EINVAL; 922 goto bad; 923 } 924 so->so_linger = mtod(m, struct linger *)->l_linger; 925 /* fall thru... */ 926 927 case SO_DEBUG: 928 case SO_KEEPALIVE: 929 case SO_DONTROUTE: 930 case SO_USELOOPBACK: 931 case SO_BROADCAST: 932 case SO_REUSEADDR: 933 case SO_REUSEPORT: 934 case SO_OOBINLINE: 935 case SO_TIMESTAMP: 936 if (m == NULL || m->m_len < sizeof (int)) { 937 error = EINVAL; 938 goto bad; 939 } 940 if (*mtod(m, int *)) 941 so->so_options |= optname; 942 else 943 so->so_options &= ~optname; 944 break; 945 946 case SO_SNDBUF: 947 case SO_RCVBUF: 948 case SO_SNDLOWAT: 949 case SO_RCVLOWAT: 950 { 951 int optval; 952 953 if (m == NULL || m->m_len < sizeof (int)) { 954 error = EINVAL; 955 goto bad; 956 } 957 958 /* 959 * Values < 1 make no sense for any of these 960 * options, so disallow them. 961 */ 962 optval = *mtod(m, int *); 963 if (optval < 1) { 964 error = EINVAL; 965 goto bad; 966 } 967 968 switch (optname) { 969 970 case SO_SNDBUF: 971 case SO_RCVBUF: 972 if (sbreserve(optname == SO_SNDBUF ? 973 &so->so_snd : &so->so_rcv, 974 (u_long) optval) == 0) { 975 error = ENOBUFS; 976 goto bad; 977 } 978 break; 979 980 /* 981 * Make sure the low-water is never greater than 982 * the high-water. 983 */ 984 case SO_SNDLOWAT: 985 so->so_snd.sb_lowat = 986 (optval > so->so_snd.sb_hiwat) ? 987 so->so_snd.sb_hiwat : optval; 988 break; 989 case SO_RCVLOWAT: 990 so->so_rcv.sb_lowat = 991 (optval > so->so_rcv.sb_hiwat) ? 992 so->so_rcv.sb_hiwat : optval; 993 break; 994 } 995 break; 996 } 997 998 case SO_SNDTIMEO: 999 case SO_RCVTIMEO: 1000 { 1001 struct timeval *tv; 1002 short val; 1003 1004 if (m == NULL || m->m_len < sizeof (*tv)) { 1005 error = EINVAL; 1006 goto bad; 1007 } 1008 tv = mtod(m, struct timeval *); 1009 if (tv->tv_sec > SHRT_MAX / hz - hz) { 1010 error = EDOM; 1011 goto bad; 1012 } 1013 val = tv->tv_sec * hz + tv->tv_usec / tick; 1014 1015 switch (optname) { 1016 1017 case SO_SNDTIMEO: 1018 so->so_snd.sb_timeo = val; 1019 break; 1020 case SO_RCVTIMEO: 1021 so->so_rcv.sb_timeo = val; 1022 break; 1023 } 1024 break; 1025 } 1026 1027 default: 1028 error = ENOPROTOOPT; 1029 break; 1030 } 1031 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1032 (void) ((*so->so_proto->pr_ctloutput) 1033 (PRCO_SETOPT, so, level, optname, &m0, p)); 1034 m = NULL; /* freed by protocol */ 1035 } 1036 } 1037 bad: 1038 if (m) 1039 (void) m_free(m); 1040 return (error); 1041 } 1042 1043 int 1044 sogetopt(so, level, optname, mp, p) 1045 register struct socket *so; 1046 int level, optname; 1047 struct mbuf **mp; 1048 struct proc *p; 1049 { 1050 register struct mbuf *m; 1051 1052 if (level != SOL_SOCKET) { 1053 if (so->so_proto && so->so_proto->pr_ctloutput) { 1054 return ((*so->so_proto->pr_ctloutput) 1055 (PRCO_GETOPT, so, level, optname, mp, p)); 1056 } else 1057 return (ENOPROTOOPT); 1058 } else { 1059 m = m_get(M_WAIT, MT_SOOPTS); 1060 m->m_len = sizeof (int); 1061 1062 switch (optname) { 1063 1064 case SO_LINGER: 1065 m->m_len = sizeof (struct linger); 1066 mtod(m, struct linger *)->l_onoff = 1067 so->so_options & SO_LINGER; 1068 mtod(m, struct linger *)->l_linger = so->so_linger; 1069 break; 1070 1071 case SO_USELOOPBACK: 1072 case SO_DONTROUTE: 1073 case SO_DEBUG: 1074 case SO_KEEPALIVE: 1075 case SO_REUSEADDR: 1076 case SO_REUSEPORT: 1077 case SO_BROADCAST: 1078 case SO_OOBINLINE: 1079 case SO_TIMESTAMP: 1080 *mtod(m, int *) = so->so_options & optname; 1081 break; 1082 1083 case SO_TYPE: 1084 *mtod(m, int *) = so->so_type; 1085 break; 1086 1087 case SO_ERROR: 1088 *mtod(m, int *) = so->so_error; 1089 so->so_error = 0; 1090 break; 1091 1092 case SO_SNDBUF: 1093 *mtod(m, int *) = so->so_snd.sb_hiwat; 1094 break; 1095 1096 case SO_RCVBUF: 1097 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1098 break; 1099 1100 case SO_SNDLOWAT: 1101 *mtod(m, int *) = so->so_snd.sb_lowat; 1102 break; 1103 1104 case SO_RCVLOWAT: 1105 *mtod(m, int *) = so->so_rcv.sb_lowat; 1106 break; 1107 1108 case SO_SNDTIMEO: 1109 case SO_RCVTIMEO: 1110 { 1111 int val = (optname == SO_SNDTIMEO ? 1112 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1113 1114 m->m_len = sizeof(struct timeval); 1115 mtod(m, struct timeval *)->tv_sec = val / hz; 1116 mtod(m, struct timeval *)->tv_usec = 1117 (val % hz) * tick; 1118 break; 1119 } 1120 1121 default: 1122 (void)m_free(m); 1123 return (ENOPROTOOPT); 1124 } 1125 *mp = m; 1126 return (0); 1127 } 1128 } 1129 1130 void 1131 sohasoutofband(so) 1132 register struct socket *so; 1133 { 1134 struct proc *p; 1135 1136 if (so->so_pgid < 0) 1137 gsignal(-so->so_pgid, SIGURG); 1138 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1139 psignal(p, SIGURG); 1140 selwakeup(&so->so_rcv.sb_sel); 1141 } 1142 1143 int 1144 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1145 { 1146 int revents = 0; 1147 int s = splnet(); 1148 1149 if (events & (POLLIN | POLLRDNORM)) 1150 if (soreadable(so)) 1151 revents |= events & (POLLIN | POLLRDNORM); 1152 1153 if (events & (POLLOUT | POLLWRNORM)) 1154 if (sowriteable(so)) 1155 revents |= events & (POLLOUT | POLLWRNORM); 1156 1157 if (events & (POLLPRI | POLLRDBAND)) 1158 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1159 revents |= events & (POLLPRI | POLLRDBAND); 1160 1161 if (revents == 0) { 1162 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1163 selrecord(p, &so->so_rcv.sb_sel); 1164 so->so_rcv.sb_flags |= SB_SEL; 1165 } 1166 1167 if (events & (POLLOUT | POLLWRNORM)) { 1168 selrecord(p, &so->so_snd.sb_sel); 1169 so->so_snd.sb_flags |= SB_SEL; 1170 } 1171 } 1172 1173 splx(s); 1174 return (revents); 1175 } 1176