1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_inet.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/mutex.h> 46 #include <sys/domain.h> 47 #include <sys/file.h> /* for struct knote */ 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/event.h> 51 #include <sys/poll.h> 52 #include <sys/proc.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signalvar.h> 58 #include <sys/sysctl.h> 59 #include <sys/uio.h> 60 #include <sys/jail.h> 61 62 #include <vm/vm_zone.h> 63 64 #include <machine/limits.h> 65 66 #ifdef INET 67 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 68 #endif 69 70 static void filt_sordetach(struct knote *kn); 71 static int filt_soread(struct knote *kn, long hint); 72 static void filt_sowdetach(struct knote *kn); 73 static int filt_sowrite(struct knote *kn, long hint); 74 static int filt_solisten(struct knote *kn, long hint); 75 76 static struct filterops solisten_filtops = 77 { 1, NULL, filt_sordetach, filt_solisten }; 78 static struct filterops soread_filtops = 79 { 1, NULL, filt_sordetach, filt_soread }; 80 static struct filterops sowrite_filtops = 81 { 1, NULL, filt_sowdetach, filt_sowrite }; 82 83 struct vm_zone *socket_zone; 84 so_gen_t so_gencnt; /* generation count for sockets */ 85 86 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 87 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 88 89 SYSCTL_DECL(_kern_ipc); 90 91 static int somaxconn = SOMAXCONN; 92 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 93 &somaxconn, 0, "Maximum pending socket connection queue size"); 94 static int numopensockets; 95 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, 96 &numopensockets, 0, "Number of open sockets"); 97 98 99 /* 100 * Socket operation routines. 101 * These routines are called by the routines in 102 * sys_socket.c or from a system process, and 103 * implement the semantics of socket operations by 104 * switching out to the protocol specific routines. 105 */ 106 107 /* 108 * Get a socket structure from our zone, and initialize it. 109 * We don't implement `waitok' yet (see comments in uipc_domain.c). 110 * Note that it would probably be better to allocate socket 111 * and PCB at the same time, but I'm not convinced that all 112 * the protocols can be easily modified to do this. 113 * 114 * soalloc() returns a socket with a ref count of 0. 115 */ 116 struct socket * 117 soalloc(waitok) 118 int waitok; 119 { 120 struct socket *so; 121 122 so = zalloc(socket_zone); 123 if (so) { 124 /* XXX race condition for reentrant kernel */ 125 bzero(so, sizeof *so); 126 so->so_gencnt = ++so_gencnt; 127 so->so_zone = socket_zone; 128 /* sx_init(&so->so_sxlock, "socket sxlock"); */ 129 TAILQ_INIT(&so->so_aiojobq); 130 ++numopensockets; 131 } 132 return so; 133 } 134 135 /* 136 * socreate returns a socket with a ref count of 1. The socket should be 137 * closed with soclose(). 138 */ 139 int 140 socreate(dom, aso, type, proto, td) 141 int dom; 142 struct socket **aso; 143 register int type; 144 int proto; 145 struct thread *td; 146 { 147 register struct protosw *prp; 148 register struct socket *so; 149 register int error; 150 151 if (proto) 152 prp = pffindproto(dom, proto, type); 153 else 154 prp = pffindtype(dom, type); 155 156 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 157 return (EPROTONOSUPPORT); 158 159 if (jailed(td->td_proc->p_ucred) && jail_socket_unixiproute_only && 160 prp->pr_domain->dom_family != PF_LOCAL && 161 prp->pr_domain->dom_family != PF_INET && 162 prp->pr_domain->dom_family != PF_ROUTE) { 163 return (EPROTONOSUPPORT); 164 } 165 166 if (prp->pr_type != type) 167 return (EPROTOTYPE); 168 so = soalloc(td != 0); 169 if (so == 0) 170 return (ENOBUFS); 171 172 TAILQ_INIT(&so->so_incomp); 173 TAILQ_INIT(&so->so_comp); 174 so->so_type = type; 175 so->so_cred = crhold(td->td_proc->p_ucred); 176 so->so_proto = prp; 177 soref(so); 178 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); 179 if (error) { 180 so->so_state |= SS_NOFDREF; 181 sorele(so); 182 return (error); 183 } 184 *aso = so; 185 return (0); 186 } 187 188 int 189 sobind(so, nam, td) 190 struct socket *so; 191 struct sockaddr *nam; 192 struct thread *td; 193 { 194 int s = splnet(); 195 int error; 196 197 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td); 198 splx(s); 199 return (error); 200 } 201 202 static void 203 sodealloc(struct socket *so) 204 { 205 206 KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); 207 so->so_gencnt = ++so_gencnt; 208 if (so->so_rcv.sb_hiwat) 209 (void)chgsbsize(so->so_cred->cr_uidinfo, 210 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); 211 if (so->so_snd.sb_hiwat) 212 (void)chgsbsize(so->so_cred->cr_uidinfo, 213 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); 214 #ifdef INET 215 if (so->so_accf != NULL) { 216 if (so->so_accf->so_accept_filter != NULL && 217 so->so_accf->so_accept_filter->accf_destroy != NULL) { 218 so->so_accf->so_accept_filter->accf_destroy(so); 219 } 220 if (so->so_accf->so_accept_filter_str != NULL) 221 FREE(so->so_accf->so_accept_filter_str, M_ACCF); 222 FREE(so->so_accf, M_ACCF); 223 } 224 #endif 225 crfree(so->so_cred); 226 /* sx_destroy(&so->so_sxlock); */ 227 zfree(so->so_zone, so); 228 --numopensockets; 229 } 230 231 int 232 solisten(so, backlog, td) 233 register struct socket *so; 234 int backlog; 235 struct thread *td; 236 { 237 int s, error; 238 239 s = splnet(); 240 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td); 241 if (error) { 242 splx(s); 243 return (error); 244 } 245 if (TAILQ_EMPTY(&so->so_comp)) 246 so->so_options |= SO_ACCEPTCONN; 247 if (backlog < 0 || backlog > somaxconn) 248 backlog = somaxconn; 249 so->so_qlimit = backlog; 250 splx(s); 251 return (0); 252 } 253 254 void 255 sofree(so) 256 register struct socket *so; 257 { 258 struct socket *head = so->so_head; 259 260 KASSERT(so->so_count == 0, ("socket %p so_count not 0", so)); 261 262 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 263 return; 264 if (head != NULL) { 265 if (so->so_state & SS_INCOMP) { 266 TAILQ_REMOVE(&head->so_incomp, so, so_list); 267 head->so_incqlen--; 268 } else if (so->so_state & SS_COMP) { 269 /* 270 * We must not decommission a socket that's 271 * on the accept(2) queue. If we do, then 272 * accept(2) may hang after select(2) indicated 273 * that the listening socket was ready. 274 */ 275 return; 276 } else { 277 panic("sofree: not queued"); 278 } 279 head->so_qlen--; 280 so->so_state &= ~SS_INCOMP; 281 so->so_head = NULL; 282 } 283 sbrelease(&so->so_snd, so); 284 sorflush(so); 285 sodealloc(so); 286 } 287 288 /* 289 * Close a socket on last file table reference removal. 290 * Initiate disconnect if connected. 291 * Free socket when disconnect complete. 292 * 293 * This function will sorele() the socket. Note that soclose() may be 294 * called prior to the ref count reaching zero. The actual socket 295 * structure will not be freed until the ref count reaches zero. 296 */ 297 int 298 soclose(so) 299 register struct socket *so; 300 { 301 int s = splnet(); /* conservative */ 302 int error = 0; 303 304 funsetown(so->so_sigio); 305 if (so->so_options & SO_ACCEPTCONN) { 306 struct socket *sp, *sonext; 307 308 sp = TAILQ_FIRST(&so->so_incomp); 309 for (; sp != NULL; sp = sonext) { 310 sonext = TAILQ_NEXT(sp, so_list); 311 (void) soabort(sp); 312 } 313 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) { 314 sonext = TAILQ_NEXT(sp, so_list); 315 /* Dequeue from so_comp since sofree() won't do it */ 316 TAILQ_REMOVE(&so->so_comp, sp, so_list); 317 so->so_qlen--; 318 sp->so_state &= ~SS_COMP; 319 sp->so_head = NULL; 320 (void) soabort(sp); 321 } 322 } 323 if (so->so_pcb == 0) 324 goto discard; 325 if (so->so_state & SS_ISCONNECTED) { 326 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 327 error = sodisconnect(so); 328 if (error) 329 goto drop; 330 } 331 if (so->so_options & SO_LINGER) { 332 if ((so->so_state & SS_ISDISCONNECTING) && 333 (so->so_state & SS_NBIO)) 334 goto drop; 335 while (so->so_state & SS_ISCONNECTED) { 336 error = tsleep((caddr_t)&so->so_timeo, 337 PSOCK | PCATCH, "soclos", so->so_linger * hz); 338 if (error) 339 break; 340 } 341 } 342 } 343 drop: 344 if (so->so_pcb) { 345 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 346 if (error == 0) 347 error = error2; 348 } 349 discard: 350 if (so->so_state & SS_NOFDREF) 351 panic("soclose: NOFDREF"); 352 so->so_state |= SS_NOFDREF; 353 sorele(so); 354 splx(s); 355 return (error); 356 } 357 358 /* 359 * Must be called at splnet... 360 */ 361 int 362 soabort(so) 363 struct socket *so; 364 { 365 int error; 366 367 error = (*so->so_proto->pr_usrreqs->pru_abort)(so); 368 if (error) { 369 sotryfree(so); /* note: does not decrement the ref count */ 370 return error; 371 } 372 return (0); 373 } 374 375 int 376 soaccept(so, nam) 377 register struct socket *so; 378 struct sockaddr **nam; 379 { 380 int s = splnet(); 381 int error; 382 383 if ((so->so_state & SS_NOFDREF) == 0) 384 panic("soaccept: !NOFDREF"); 385 so->so_state &= ~SS_NOFDREF; 386 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 387 splx(s); 388 return (error); 389 } 390 391 int 392 soconnect(so, nam, td) 393 register struct socket *so; 394 struct sockaddr *nam; 395 struct thread *td; 396 { 397 int s; 398 int error; 399 400 if (so->so_options & SO_ACCEPTCONN) 401 return (EOPNOTSUPP); 402 s = splnet(); 403 /* 404 * If protocol is connection-based, can only connect once. 405 * Otherwise, if connected, try to disconnect first. 406 * This allows user to disconnect by connecting to, e.g., 407 * a null address. 408 */ 409 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 410 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 411 (error = sodisconnect(so)))) 412 error = EISCONN; 413 else 414 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); 415 splx(s); 416 return (error); 417 } 418 419 int 420 soconnect2(so1, so2) 421 register struct socket *so1; 422 struct socket *so2; 423 { 424 int s = splnet(); 425 int error; 426 427 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 428 splx(s); 429 return (error); 430 } 431 432 int 433 sodisconnect(so) 434 register struct socket *so; 435 { 436 int s = splnet(); 437 int error; 438 439 if ((so->so_state & SS_ISCONNECTED) == 0) { 440 error = ENOTCONN; 441 goto bad; 442 } 443 if (so->so_state & SS_ISDISCONNECTING) { 444 error = EALREADY; 445 goto bad; 446 } 447 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 448 bad: 449 splx(s); 450 return (error); 451 } 452 453 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 454 /* 455 * Send on a socket. 456 * If send must go all at once and message is larger than 457 * send buffering, then hard error. 458 * Lock against other senders. 459 * If must go all at once and not enough room now, then 460 * inform user that this would block and do nothing. 461 * Otherwise, if nonblocking, send as much as possible. 462 * The data to be sent is described by "uio" if nonzero, 463 * otherwise by the mbuf chain "top" (which must be null 464 * if uio is not). Data provided in mbuf chain must be small 465 * enough to send all at once. 466 * 467 * Returns nonzero on error, timeout or signal; callers 468 * must check for short counts if EINTR/ERESTART are returned. 469 * Data and control buffers are freed on return. 470 */ 471 int 472 sosend(so, addr, uio, top, control, flags, td) 473 register struct socket *so; 474 struct sockaddr *addr; 475 struct uio *uio; 476 struct mbuf *top; 477 struct mbuf *control; 478 int flags; 479 struct thread *td; 480 { 481 struct mbuf **mp; 482 register struct mbuf *m; 483 register long space, len, resid; 484 int clen = 0, error, s, dontroute, mlen; 485 int atomic = sosendallatonce(so) || top; 486 487 if (uio) 488 resid = uio->uio_resid; 489 else 490 resid = top->m_pkthdr.len; 491 /* 492 * In theory resid should be unsigned. 493 * However, space must be signed, as it might be less than 0 494 * if we over-committed, and we must use a signed comparison 495 * of space and resid. On the other hand, a negative resid 496 * causes us to loop sending 0-length segments to the protocol. 497 * 498 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 499 * type sockets since that's an error. 500 */ 501 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 502 error = EINVAL; 503 goto out; 504 } 505 506 dontroute = 507 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 508 (so->so_proto->pr_flags & PR_ATOMIC); 509 if (td) 510 td->td_proc->p_stats->p_ru.ru_msgsnd++; 511 if (control) 512 clen = control->m_len; 513 #define snderr(errno) { error = errno; splx(s); goto release; } 514 515 restart: 516 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 517 if (error) 518 goto out; 519 do { 520 s = splnet(); 521 if (so->so_state & SS_CANTSENDMORE) 522 snderr(EPIPE); 523 if (so->so_error) { 524 error = so->so_error; 525 so->so_error = 0; 526 splx(s); 527 goto release; 528 } 529 if ((so->so_state & SS_ISCONNECTED) == 0) { 530 /* 531 * `sendto' and `sendmsg' is allowed on a connection- 532 * based socket if it supports implied connect. 533 * Return ENOTCONN if not connected and no address is 534 * supplied. 535 */ 536 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 537 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 538 if ((so->so_state & SS_ISCONFIRMING) == 0 && 539 !(resid == 0 && clen != 0)) 540 snderr(ENOTCONN); 541 } else if (addr == 0) 542 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 543 ENOTCONN : EDESTADDRREQ); 544 } 545 space = sbspace(&so->so_snd); 546 if (flags & MSG_OOB) 547 space += 1024; 548 if ((atomic && resid > so->so_snd.sb_hiwat) || 549 clen > so->so_snd.sb_hiwat) 550 snderr(EMSGSIZE); 551 if (space < resid + clen && uio && 552 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 553 if (so->so_state & SS_NBIO) 554 snderr(EWOULDBLOCK); 555 sbunlock(&so->so_snd); 556 error = sbwait(&so->so_snd); 557 splx(s); 558 if (error) 559 goto out; 560 goto restart; 561 } 562 splx(s); 563 mp = ⊤ 564 space -= clen; 565 do { 566 if (uio == NULL) { 567 /* 568 * Data is prepackaged in "top". 569 */ 570 resid = 0; 571 if (flags & MSG_EOR) 572 top->m_flags |= M_EOR; 573 } else do { 574 if (top == 0) { 575 MGETHDR(m, M_TRYWAIT, MT_DATA); 576 if (m == NULL) { 577 error = ENOBUFS; 578 goto release; 579 } 580 mlen = MHLEN; 581 m->m_pkthdr.len = 0; 582 m->m_pkthdr.rcvif = (struct ifnet *)0; 583 } else { 584 MGET(m, M_TRYWAIT, MT_DATA); 585 if (m == NULL) { 586 error = ENOBUFS; 587 goto release; 588 } 589 mlen = MLEN; 590 } 591 if (resid >= MINCLSIZE) { 592 MCLGET(m, M_TRYWAIT); 593 if ((m->m_flags & M_EXT) == 0) 594 goto nopages; 595 mlen = MCLBYTES; 596 len = min(min(mlen, resid), space); 597 } else { 598 nopages: 599 len = min(min(mlen, resid), space); 600 /* 601 * For datagram protocols, leave room 602 * for protocol headers in first mbuf. 603 */ 604 if (atomic && top == 0 && len < mlen) 605 MH_ALIGN(m, len); 606 } 607 space -= len; 608 error = uiomove(mtod(m, caddr_t), (int)len, uio); 609 resid = uio->uio_resid; 610 m->m_len = len; 611 *mp = m; 612 top->m_pkthdr.len += len; 613 if (error) 614 goto release; 615 mp = &m->m_next; 616 if (resid <= 0) { 617 if (flags & MSG_EOR) 618 top->m_flags |= M_EOR; 619 break; 620 } 621 } while (space > 0 && atomic); 622 if (dontroute) 623 so->so_options |= SO_DONTROUTE; 624 s = splnet(); /* XXX */ 625 /* 626 * XXX all the SS_CANTSENDMORE checks previously 627 * done could be out of date. We could have recieved 628 * a reset packet in an interrupt or maybe we slept 629 * while doing page faults in uiomove() etc. We could 630 * probably recheck again inside the splnet() protection 631 * here, but there are probably other places that this 632 * also happens. We must rethink this. 633 */ 634 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 635 (flags & MSG_OOB) ? PRUS_OOB : 636 /* 637 * If the user set MSG_EOF, the protocol 638 * understands this flag and nothing left to 639 * send then use PRU_SEND_EOF instead of PRU_SEND. 640 */ 641 ((flags & MSG_EOF) && 642 (so->so_proto->pr_flags & PR_IMPLOPCL) && 643 (resid <= 0)) ? 644 PRUS_EOF : 645 /* If there is more to send set PRUS_MORETOCOME */ 646 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 647 top, addr, control, td); 648 splx(s); 649 if (dontroute) 650 so->so_options &= ~SO_DONTROUTE; 651 clen = 0; 652 control = 0; 653 top = 0; 654 mp = ⊤ 655 if (error) 656 goto release; 657 } while (resid && space > 0); 658 } while (resid); 659 660 release: 661 sbunlock(&so->so_snd); 662 out: 663 if (top) 664 m_freem(top); 665 if (control) 666 m_freem(control); 667 return (error); 668 } 669 670 /* 671 * Implement receive operations on a socket. 672 * We depend on the way that records are added to the sockbuf 673 * by sbappend*. In particular, each record (mbufs linked through m_next) 674 * must begin with an address if the protocol so specifies, 675 * followed by an optional mbuf or mbufs containing ancillary data, 676 * and then zero or more mbufs of data. 677 * In order to avoid blocking network interrupts for the entire time here, 678 * we splx() while doing the actual copy to user space. 679 * Although the sockbuf is locked, new data may still be appended, 680 * and thus we must maintain consistency of the sockbuf during that time. 681 * 682 * The caller may receive the data as a single mbuf chain by supplying 683 * an mbuf **mp0 for use in returning the chain. The uio is then used 684 * only for the count in uio_resid. 685 */ 686 int 687 soreceive(so, psa, uio, mp0, controlp, flagsp) 688 register struct socket *so; 689 struct sockaddr **psa; 690 struct uio *uio; 691 struct mbuf **mp0; 692 struct mbuf **controlp; 693 int *flagsp; 694 { 695 struct mbuf *m, **mp; 696 register int flags, len, error, s, offset; 697 struct protosw *pr = so->so_proto; 698 struct mbuf *nextrecord; 699 int moff, type = 0; 700 int orig_resid = uio->uio_resid; 701 702 mp = mp0; 703 if (psa) 704 *psa = 0; 705 if (controlp) 706 *controlp = 0; 707 if (flagsp) 708 flags = *flagsp &~ MSG_EOR; 709 else 710 flags = 0; 711 if (flags & MSG_OOB) { 712 m = m_get(M_TRYWAIT, MT_DATA); 713 if (m == NULL) 714 return (ENOBUFS); 715 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 716 if (error) 717 goto bad; 718 do { 719 error = uiomove(mtod(m, caddr_t), 720 (int) min(uio->uio_resid, m->m_len), uio); 721 m = m_free(m); 722 } while (uio->uio_resid && error == 0 && m); 723 bad: 724 if (m) 725 m_freem(m); 726 return (error); 727 } 728 if (mp) 729 *mp = (struct mbuf *)0; 730 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 731 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 732 733 restart: 734 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 735 if (error) 736 return (error); 737 s = splnet(); 738 739 m = so->so_rcv.sb_mb; 740 /* 741 * If we have less data than requested, block awaiting more 742 * (subject to any timeout) if: 743 * 1. the current count is less than the low water mark, or 744 * 2. MSG_WAITALL is set, and it is possible to do the entire 745 * receive operation at once if we block (resid <= hiwat). 746 * 3. MSG_DONTWAIT is not set 747 * If MSG_WAITALL is set but resid is larger than the receive buffer, 748 * we have to do the receive in sections, and thus risk returning 749 * a short count if a timeout or signal occurs after we start. 750 */ 751 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 752 so->so_rcv.sb_cc < uio->uio_resid) && 753 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 754 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 755 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 756 KASSERT(m != 0 || !so->so_rcv.sb_cc, 757 ("receive: m == %p so->so_rcv.sb_cc == %lu", 758 m, so->so_rcv.sb_cc)); 759 if (so->so_error) { 760 if (m) 761 goto dontblock; 762 error = so->so_error; 763 if ((flags & MSG_PEEK) == 0) 764 so->so_error = 0; 765 goto release; 766 } 767 if (so->so_state & SS_CANTRCVMORE) { 768 if (m) 769 goto dontblock; 770 else 771 goto release; 772 } 773 for (; m; m = m->m_next) 774 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 775 m = so->so_rcv.sb_mb; 776 goto dontblock; 777 } 778 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 779 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 780 error = ENOTCONN; 781 goto release; 782 } 783 if (uio->uio_resid == 0) 784 goto release; 785 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 786 error = EWOULDBLOCK; 787 goto release; 788 } 789 sbunlock(&so->so_rcv); 790 error = sbwait(&so->so_rcv); 791 splx(s); 792 if (error) 793 return (error); 794 goto restart; 795 } 796 dontblock: 797 if (uio->uio_td) 798 uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; 799 nextrecord = m->m_nextpkt; 800 if (pr->pr_flags & PR_ADDR) { 801 KASSERT(m->m_type == MT_SONAME, 802 ("m->m_type == %d", m->m_type)); 803 orig_resid = 0; 804 if (psa) 805 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 806 mp0 == 0); 807 if (flags & MSG_PEEK) { 808 m = m->m_next; 809 } else { 810 sbfree(&so->so_rcv, m); 811 MFREE(m, so->so_rcv.sb_mb); 812 m = so->so_rcv.sb_mb; 813 } 814 } 815 while (m && m->m_type == MT_CONTROL && error == 0) { 816 if (flags & MSG_PEEK) { 817 if (controlp) 818 *controlp = m_copy(m, 0, m->m_len); 819 m = m->m_next; 820 } else { 821 sbfree(&so->so_rcv, m); 822 so->so_rcv.sb_mb = m->m_next; 823 m->m_next = NULL; 824 if (pr->pr_domain->dom_externalize) 825 error = 826 (*pr->pr_domain->dom_externalize)(m, controlp); 827 else if (controlp) 828 *controlp = m; 829 else 830 m_freem(m); 831 m = so->so_rcv.sb_mb; 832 } 833 if (controlp) { 834 orig_resid = 0; 835 do 836 controlp = &(*controlp)->m_next; 837 while (*controlp != NULL); 838 } 839 } 840 if (m) { 841 if ((flags & MSG_PEEK) == 0) 842 m->m_nextpkt = nextrecord; 843 type = m->m_type; 844 if (type == MT_OOBDATA) 845 flags |= MSG_OOB; 846 } 847 moff = 0; 848 offset = 0; 849 while (m && uio->uio_resid > 0 && error == 0) { 850 if (m->m_type == MT_OOBDATA) { 851 if (type != MT_OOBDATA) 852 break; 853 } else if (type == MT_OOBDATA) 854 break; 855 else 856 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 857 ("m->m_type == %d", m->m_type)); 858 so->so_state &= ~SS_RCVATMARK; 859 len = uio->uio_resid; 860 if (so->so_oobmark && len > so->so_oobmark - offset) 861 len = so->so_oobmark - offset; 862 if (len > m->m_len - moff) 863 len = m->m_len - moff; 864 /* 865 * If mp is set, just pass back the mbufs. 866 * Otherwise copy them out via the uio, then free. 867 * Sockbuf must be consistent here (points to current mbuf, 868 * it points to next record) when we drop priority; 869 * we must note any additions to the sockbuf when we 870 * block interrupts again. 871 */ 872 if (mp == 0) { 873 splx(s); 874 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 875 s = splnet(); 876 if (error) 877 goto release; 878 } else 879 uio->uio_resid -= len; 880 if (len == m->m_len - moff) { 881 if (m->m_flags & M_EOR) 882 flags |= MSG_EOR; 883 if (flags & MSG_PEEK) { 884 m = m->m_next; 885 moff = 0; 886 } else { 887 nextrecord = m->m_nextpkt; 888 sbfree(&so->so_rcv, m); 889 if (mp) { 890 *mp = m; 891 mp = &m->m_next; 892 so->so_rcv.sb_mb = m = m->m_next; 893 *mp = (struct mbuf *)0; 894 } else { 895 MFREE(m, so->so_rcv.sb_mb); 896 m = so->so_rcv.sb_mb; 897 } 898 if (m) 899 m->m_nextpkt = nextrecord; 900 } 901 } else { 902 if (flags & MSG_PEEK) 903 moff += len; 904 else { 905 if (mp) 906 *mp = m_copym(m, 0, len, M_TRYWAIT); 907 m->m_data += len; 908 m->m_len -= len; 909 so->so_rcv.sb_cc -= len; 910 } 911 } 912 if (so->so_oobmark) { 913 if ((flags & MSG_PEEK) == 0) { 914 so->so_oobmark -= len; 915 if (so->so_oobmark == 0) { 916 so->so_state |= SS_RCVATMARK; 917 break; 918 } 919 } else { 920 offset += len; 921 if (offset == so->so_oobmark) 922 break; 923 } 924 } 925 if (flags & MSG_EOR) 926 break; 927 /* 928 * If the MSG_WAITALL flag is set (for non-atomic socket), 929 * we must not quit until "uio->uio_resid == 0" or an error 930 * termination. If a signal/timeout occurs, return 931 * with a short count but without error. 932 * Keep sockbuf locked against other readers. 933 */ 934 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 935 !sosendallatonce(so) && !nextrecord) { 936 if (so->so_error || so->so_state & SS_CANTRCVMORE) 937 break; 938 /* 939 * Notify the protocol that some data has been 940 * drained before blocking. 941 */ 942 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 943 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 944 error = sbwait(&so->so_rcv); 945 if (error) { 946 sbunlock(&so->so_rcv); 947 splx(s); 948 return (0); 949 } 950 m = so->so_rcv.sb_mb; 951 if (m) 952 nextrecord = m->m_nextpkt; 953 } 954 } 955 956 if (m && pr->pr_flags & PR_ATOMIC) { 957 flags |= MSG_TRUNC; 958 if ((flags & MSG_PEEK) == 0) 959 (void) sbdroprecord(&so->so_rcv); 960 } 961 if ((flags & MSG_PEEK) == 0) { 962 if (m == 0) 963 so->so_rcv.sb_mb = nextrecord; 964 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 965 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 966 } 967 if (orig_resid == uio->uio_resid && orig_resid && 968 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 969 sbunlock(&so->so_rcv); 970 splx(s); 971 goto restart; 972 } 973 974 if (flagsp) 975 *flagsp |= flags; 976 release: 977 sbunlock(&so->so_rcv); 978 splx(s); 979 return (error); 980 } 981 982 int 983 soshutdown(so, how) 984 register struct socket *so; 985 register int how; 986 { 987 register struct protosw *pr = so->so_proto; 988 989 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 990 return (EINVAL); 991 992 if (how != SHUT_WR) 993 sorflush(so); 994 if (how != SHUT_RD) 995 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 996 return (0); 997 } 998 999 void 1000 sorflush(so) 1001 register struct socket *so; 1002 { 1003 register struct sockbuf *sb = &so->so_rcv; 1004 register struct protosw *pr = so->so_proto; 1005 register int s; 1006 struct sockbuf asb; 1007 1008 sb->sb_flags |= SB_NOINTR; 1009 (void) sblock(sb, M_WAITOK); 1010 s = splimp(); 1011 socantrcvmore(so); 1012 sbunlock(sb); 1013 asb = *sb; 1014 bzero((caddr_t)sb, sizeof (*sb)); 1015 splx(s); 1016 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1017 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1018 sbrelease(&asb, so); 1019 } 1020 1021 #ifdef INET 1022 static int 1023 do_setopt_accept_filter(so, sopt) 1024 struct socket *so; 1025 struct sockopt *sopt; 1026 { 1027 struct accept_filter_arg *afap = NULL; 1028 struct accept_filter *afp; 1029 struct so_accf *af = so->so_accf; 1030 int error = 0; 1031 1032 /* do not set/remove accept filters on non listen sockets */ 1033 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1034 error = EINVAL; 1035 goto out; 1036 } 1037 1038 /* removing the filter */ 1039 if (sopt == NULL) { 1040 if (af != NULL) { 1041 if (af->so_accept_filter != NULL && 1042 af->so_accept_filter->accf_destroy != NULL) { 1043 af->so_accept_filter->accf_destroy(so); 1044 } 1045 if (af->so_accept_filter_str != NULL) { 1046 FREE(af->so_accept_filter_str, M_ACCF); 1047 } 1048 FREE(af, M_ACCF); 1049 so->so_accf = NULL; 1050 } 1051 so->so_options &= ~SO_ACCEPTFILTER; 1052 return (0); 1053 } 1054 /* adding a filter */ 1055 /* must remove previous filter first */ 1056 if (af != NULL) { 1057 error = EINVAL; 1058 goto out; 1059 } 1060 /* don't put large objects on the kernel stack */ 1061 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK); 1062 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1063 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1064 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1065 if (error) 1066 goto out; 1067 afp = accept_filt_get(afap->af_name); 1068 if (afp == NULL) { 1069 error = ENOENT; 1070 goto out; 1071 } 1072 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1073 if (afp->accf_create != NULL) { 1074 if (afap->af_name[0] != '\0') { 1075 int len = strlen(afap->af_name) + 1; 1076 1077 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK); 1078 strcpy(af->so_accept_filter_str, afap->af_name); 1079 } 1080 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1081 if (af->so_accept_filter_arg == NULL) { 1082 FREE(af->so_accept_filter_str, M_ACCF); 1083 FREE(af, M_ACCF); 1084 so->so_accf = NULL; 1085 error = EINVAL; 1086 goto out; 1087 } 1088 } 1089 af->so_accept_filter = afp; 1090 so->so_accf = af; 1091 so->so_options |= SO_ACCEPTFILTER; 1092 out: 1093 if (afap != NULL) 1094 FREE(afap, M_TEMP); 1095 return (error); 1096 } 1097 #endif /* INET */ 1098 1099 /* 1100 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1101 * an additional variant to handle the case where the option value needs 1102 * to be some kind of integer, but not a specific size. 1103 * In addition to their use here, these functions are also called by the 1104 * protocol-level pr_ctloutput() routines. 1105 */ 1106 int 1107 sooptcopyin(sopt, buf, len, minlen) 1108 struct sockopt *sopt; 1109 void *buf; 1110 size_t len; 1111 size_t minlen; 1112 { 1113 size_t valsize; 1114 1115 /* 1116 * If the user gives us more than we wanted, we ignore it, 1117 * but if we don't get the minimum length the caller 1118 * wants, we return EINVAL. On success, sopt->sopt_valsize 1119 * is set to however much we actually retrieved. 1120 */ 1121 if ((valsize = sopt->sopt_valsize) < minlen) 1122 return EINVAL; 1123 if (valsize > len) 1124 sopt->sopt_valsize = valsize = len; 1125 1126 if (sopt->sopt_td != 0) 1127 return (copyin(sopt->sopt_val, buf, valsize)); 1128 1129 bcopy(sopt->sopt_val, buf, valsize); 1130 return 0; 1131 } 1132 1133 int 1134 sosetopt(so, sopt) 1135 struct socket *so; 1136 struct sockopt *sopt; 1137 { 1138 int error, optval; 1139 struct linger l; 1140 struct timeval tv; 1141 u_long val; 1142 1143 error = 0; 1144 if (sopt->sopt_level != SOL_SOCKET) { 1145 if (so->so_proto && so->so_proto->pr_ctloutput) 1146 return ((*so->so_proto->pr_ctloutput) 1147 (so, sopt)); 1148 error = ENOPROTOOPT; 1149 } else { 1150 switch (sopt->sopt_name) { 1151 #ifdef INET 1152 case SO_ACCEPTFILTER: 1153 error = do_setopt_accept_filter(so, sopt); 1154 if (error) 1155 goto bad; 1156 break; 1157 #endif 1158 case SO_LINGER: 1159 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1160 if (error) 1161 goto bad; 1162 1163 so->so_linger = l.l_linger; 1164 if (l.l_onoff) 1165 so->so_options |= SO_LINGER; 1166 else 1167 so->so_options &= ~SO_LINGER; 1168 break; 1169 1170 case SO_DEBUG: 1171 case SO_KEEPALIVE: 1172 case SO_DONTROUTE: 1173 case SO_USELOOPBACK: 1174 case SO_BROADCAST: 1175 case SO_REUSEADDR: 1176 case SO_REUSEPORT: 1177 case SO_OOBINLINE: 1178 case SO_TIMESTAMP: 1179 error = sooptcopyin(sopt, &optval, sizeof optval, 1180 sizeof optval); 1181 if (error) 1182 goto bad; 1183 if (optval) 1184 so->so_options |= sopt->sopt_name; 1185 else 1186 so->so_options &= ~sopt->sopt_name; 1187 break; 1188 1189 case SO_SNDBUF: 1190 case SO_RCVBUF: 1191 case SO_SNDLOWAT: 1192 case SO_RCVLOWAT: 1193 error = sooptcopyin(sopt, &optval, sizeof optval, 1194 sizeof optval); 1195 if (error) 1196 goto bad; 1197 1198 /* 1199 * Values < 1 make no sense for any of these 1200 * options, so disallow them. 1201 */ 1202 if (optval < 1) { 1203 error = EINVAL; 1204 goto bad; 1205 } 1206 1207 switch (sopt->sopt_name) { 1208 case SO_SNDBUF: 1209 case SO_RCVBUF: 1210 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1211 &so->so_snd : &so->so_rcv, (u_long)optval, 1212 so, curthread) == 0) { 1213 error = ENOBUFS; 1214 goto bad; 1215 } 1216 break; 1217 1218 /* 1219 * Make sure the low-water is never greater than 1220 * the high-water. 1221 */ 1222 case SO_SNDLOWAT: 1223 so->so_snd.sb_lowat = 1224 (optval > so->so_snd.sb_hiwat) ? 1225 so->so_snd.sb_hiwat : optval; 1226 break; 1227 case SO_RCVLOWAT: 1228 so->so_rcv.sb_lowat = 1229 (optval > so->so_rcv.sb_hiwat) ? 1230 so->so_rcv.sb_hiwat : optval; 1231 break; 1232 } 1233 break; 1234 1235 case SO_SNDTIMEO: 1236 case SO_RCVTIMEO: 1237 error = sooptcopyin(sopt, &tv, sizeof tv, 1238 sizeof tv); 1239 if (error) 1240 goto bad; 1241 1242 /* assert(hz > 0); */ 1243 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1244 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1245 error = EDOM; 1246 goto bad; 1247 } 1248 /* assert(tick > 0); */ 1249 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1250 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 1251 if (val > SHRT_MAX) { 1252 error = EDOM; 1253 goto bad; 1254 } 1255 1256 switch (sopt->sopt_name) { 1257 case SO_SNDTIMEO: 1258 so->so_snd.sb_timeo = val; 1259 break; 1260 case SO_RCVTIMEO: 1261 so->so_rcv.sb_timeo = val; 1262 break; 1263 } 1264 break; 1265 default: 1266 error = ENOPROTOOPT; 1267 break; 1268 } 1269 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1270 (void) ((*so->so_proto->pr_ctloutput) 1271 (so, sopt)); 1272 } 1273 } 1274 bad: 1275 return (error); 1276 } 1277 1278 /* Helper routine for getsockopt */ 1279 int 1280 sooptcopyout(sopt, buf, len) 1281 struct sockopt *sopt; 1282 void *buf; 1283 size_t len; 1284 { 1285 int error; 1286 size_t valsize; 1287 1288 error = 0; 1289 1290 /* 1291 * Documented get behavior is that we always return a value, 1292 * possibly truncated to fit in the user's buffer. 1293 * Traditional behavior is that we always tell the user 1294 * precisely how much we copied, rather than something useful 1295 * like the total amount we had available for her. 1296 * Note that this interface is not idempotent; the entire answer must 1297 * generated ahead of time. 1298 */ 1299 valsize = min(len, sopt->sopt_valsize); 1300 sopt->sopt_valsize = valsize; 1301 if (sopt->sopt_val != 0) { 1302 if (sopt->sopt_td != 0) 1303 error = copyout(buf, sopt->sopt_val, valsize); 1304 else 1305 bcopy(buf, sopt->sopt_val, valsize); 1306 } 1307 return error; 1308 } 1309 1310 int 1311 sogetopt(so, sopt) 1312 struct socket *so; 1313 struct sockopt *sopt; 1314 { 1315 int error, optval; 1316 struct linger l; 1317 struct timeval tv; 1318 #ifdef INET 1319 struct accept_filter_arg *afap; 1320 #endif 1321 1322 error = 0; 1323 if (sopt->sopt_level != SOL_SOCKET) { 1324 if (so->so_proto && so->so_proto->pr_ctloutput) { 1325 return ((*so->so_proto->pr_ctloutput) 1326 (so, sopt)); 1327 } else 1328 return (ENOPROTOOPT); 1329 } else { 1330 switch (sopt->sopt_name) { 1331 #ifdef INET 1332 case SO_ACCEPTFILTER: 1333 if ((so->so_options & SO_ACCEPTCONN) == 0) 1334 return (EINVAL); 1335 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), 1336 M_TEMP, M_WAITOK | M_ZERO); 1337 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1338 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1339 if (so->so_accf->so_accept_filter_str != NULL) 1340 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1341 } 1342 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1343 FREE(afap, M_TEMP); 1344 break; 1345 #endif 1346 1347 case SO_LINGER: 1348 l.l_onoff = so->so_options & SO_LINGER; 1349 l.l_linger = so->so_linger; 1350 error = sooptcopyout(sopt, &l, sizeof l); 1351 break; 1352 1353 case SO_USELOOPBACK: 1354 case SO_DONTROUTE: 1355 case SO_DEBUG: 1356 case SO_KEEPALIVE: 1357 case SO_REUSEADDR: 1358 case SO_REUSEPORT: 1359 case SO_BROADCAST: 1360 case SO_OOBINLINE: 1361 case SO_TIMESTAMP: 1362 optval = so->so_options & sopt->sopt_name; 1363 integer: 1364 error = sooptcopyout(sopt, &optval, sizeof optval); 1365 break; 1366 1367 case SO_TYPE: 1368 optval = so->so_type; 1369 goto integer; 1370 1371 case SO_ERROR: 1372 optval = so->so_error; 1373 so->so_error = 0; 1374 goto integer; 1375 1376 case SO_SNDBUF: 1377 optval = so->so_snd.sb_hiwat; 1378 goto integer; 1379 1380 case SO_RCVBUF: 1381 optval = so->so_rcv.sb_hiwat; 1382 goto integer; 1383 1384 case SO_SNDLOWAT: 1385 optval = so->so_snd.sb_lowat; 1386 goto integer; 1387 1388 case SO_RCVLOWAT: 1389 optval = so->so_rcv.sb_lowat; 1390 goto integer; 1391 1392 case SO_SNDTIMEO: 1393 case SO_RCVTIMEO: 1394 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1395 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1396 1397 tv.tv_sec = optval / hz; 1398 tv.tv_usec = (optval % hz) * tick; 1399 error = sooptcopyout(sopt, &tv, sizeof tv); 1400 break; 1401 1402 default: 1403 error = ENOPROTOOPT; 1404 break; 1405 } 1406 return (error); 1407 } 1408 } 1409 1410 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1411 int 1412 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1413 { 1414 struct mbuf *m, *m_prev; 1415 int sopt_size = sopt->sopt_valsize; 1416 1417 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 1418 if (m == 0) 1419 return ENOBUFS; 1420 if (sopt_size > MLEN) { 1421 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT); 1422 if ((m->m_flags & M_EXT) == 0) { 1423 m_free(m); 1424 return ENOBUFS; 1425 } 1426 m->m_len = min(MCLBYTES, sopt_size); 1427 } else { 1428 m->m_len = min(MLEN, sopt_size); 1429 } 1430 sopt_size -= m->m_len; 1431 *mp = m; 1432 m_prev = m; 1433 1434 while (sopt_size) { 1435 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 1436 if (m == 0) { 1437 m_freem(*mp); 1438 return ENOBUFS; 1439 } 1440 if (sopt_size > MLEN) { 1441 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT); 1442 if ((m->m_flags & M_EXT) == 0) { 1443 m_freem(*mp); 1444 return ENOBUFS; 1445 } 1446 m->m_len = min(MCLBYTES, sopt_size); 1447 } else { 1448 m->m_len = min(MLEN, sopt_size); 1449 } 1450 sopt_size -= m->m_len; 1451 m_prev->m_next = m; 1452 m_prev = m; 1453 } 1454 return 0; 1455 } 1456 1457 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1458 int 1459 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1460 { 1461 struct mbuf *m0 = m; 1462 1463 if (sopt->sopt_val == NULL) 1464 return 0; 1465 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1466 if (sopt->sopt_td != NULL) { 1467 int error; 1468 1469 error = copyin(sopt->sopt_val, mtod(m, char *), 1470 m->m_len); 1471 if (error != 0) { 1472 m_freem(m0); 1473 return(error); 1474 } 1475 } else 1476 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 1477 sopt->sopt_valsize -= m->m_len; 1478 (caddr_t)sopt->sopt_val += m->m_len; 1479 m = m->m_next; 1480 } 1481 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1482 panic("ip6_sooptmcopyin"); 1483 return 0; 1484 } 1485 1486 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1487 int 1488 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1489 { 1490 struct mbuf *m0 = m; 1491 size_t valsize = 0; 1492 1493 if (sopt->sopt_val == NULL) 1494 return 0; 1495 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1496 if (sopt->sopt_td != NULL) { 1497 int error; 1498 1499 error = copyout(mtod(m, char *), sopt->sopt_val, 1500 m->m_len); 1501 if (error != 0) { 1502 m_freem(m0); 1503 return(error); 1504 } 1505 } else 1506 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 1507 sopt->sopt_valsize -= m->m_len; 1508 (caddr_t)sopt->sopt_val += m->m_len; 1509 valsize += m->m_len; 1510 m = m->m_next; 1511 } 1512 if (m != NULL) { 1513 /* enough soopt buffer should be given from user-land */ 1514 m_freem(m0); 1515 return(EINVAL); 1516 } 1517 sopt->sopt_valsize = valsize; 1518 return 0; 1519 } 1520 1521 void 1522 sohasoutofband(so) 1523 register struct socket *so; 1524 { 1525 if (so->so_sigio != NULL) 1526 pgsigio(so->so_sigio, SIGURG, 0); 1527 selwakeup(&so->so_rcv.sb_sel); 1528 } 1529 1530 int 1531 sopoll(struct socket *so, int events, struct ucred *cred, struct thread *td) 1532 { 1533 int revents = 0; 1534 int s = splnet(); 1535 1536 if (events & (POLLIN | POLLRDNORM)) 1537 if (soreadable(so)) 1538 revents |= events & (POLLIN | POLLRDNORM); 1539 1540 if (events & (POLLOUT | POLLWRNORM)) 1541 if (sowriteable(so)) 1542 revents |= events & (POLLOUT | POLLWRNORM); 1543 1544 if (events & (POLLPRI | POLLRDBAND)) 1545 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1546 revents |= events & (POLLPRI | POLLRDBAND); 1547 1548 if (revents == 0) { 1549 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1550 selrecord(td, &so->so_rcv.sb_sel); 1551 so->so_rcv.sb_flags |= SB_SEL; 1552 } 1553 1554 if (events & (POLLOUT | POLLWRNORM)) { 1555 selrecord(td, &so->so_snd.sb_sel); 1556 so->so_snd.sb_flags |= SB_SEL; 1557 } 1558 } 1559 1560 splx(s); 1561 return (revents); 1562 } 1563 1564 int 1565 sokqfilter(struct file *fp, struct knote *kn) 1566 { 1567 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1568 struct sockbuf *sb; 1569 int s; 1570 1571 switch (kn->kn_filter) { 1572 case EVFILT_READ: 1573 if (so->so_options & SO_ACCEPTCONN) 1574 kn->kn_fop = &solisten_filtops; 1575 else 1576 kn->kn_fop = &soread_filtops; 1577 sb = &so->so_rcv; 1578 break; 1579 case EVFILT_WRITE: 1580 kn->kn_fop = &sowrite_filtops; 1581 sb = &so->so_snd; 1582 break; 1583 default: 1584 return (1); 1585 } 1586 1587 s = splnet(); 1588 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1589 sb->sb_flags |= SB_KNOTE; 1590 splx(s); 1591 return (0); 1592 } 1593 1594 static void 1595 filt_sordetach(struct knote *kn) 1596 { 1597 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1598 int s = splnet(); 1599 1600 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1601 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1602 so->so_rcv.sb_flags &= ~SB_KNOTE; 1603 splx(s); 1604 } 1605 1606 /*ARGSUSED*/ 1607 static int 1608 filt_soread(struct knote *kn, long hint) 1609 { 1610 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1611 1612 kn->kn_data = so->so_rcv.sb_cc; 1613 if (so->so_state & SS_CANTRCVMORE) { 1614 kn->kn_flags |= EV_EOF; 1615 kn->kn_fflags = so->so_error; 1616 return (1); 1617 } 1618 if (so->so_error) /* temporary udp error */ 1619 return (1); 1620 if (kn->kn_sfflags & NOTE_LOWAT) 1621 return (kn->kn_data >= kn->kn_sdata); 1622 return (kn->kn_data >= so->so_rcv.sb_lowat); 1623 } 1624 1625 static void 1626 filt_sowdetach(struct knote *kn) 1627 { 1628 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1629 int s = splnet(); 1630 1631 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1632 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1633 so->so_snd.sb_flags &= ~SB_KNOTE; 1634 splx(s); 1635 } 1636 1637 /*ARGSUSED*/ 1638 static int 1639 filt_sowrite(struct knote *kn, long hint) 1640 { 1641 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1642 1643 kn->kn_data = sbspace(&so->so_snd); 1644 if (so->so_state & SS_CANTSENDMORE) { 1645 kn->kn_flags |= EV_EOF; 1646 kn->kn_fflags = so->so_error; 1647 return (1); 1648 } 1649 if (so->so_error) /* temporary udp error */ 1650 return (1); 1651 if (((so->so_state & SS_ISCONNECTED) == 0) && 1652 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1653 return (0); 1654 if (kn->kn_sfflags & NOTE_LOWAT) 1655 return (kn->kn_data >= kn->kn_sdata); 1656 return (kn->kn_data >= so->so_snd.sb_lowat); 1657 } 1658 1659 /*ARGSUSED*/ 1660 static int 1661 filt_solisten(struct knote *kn, long hint) 1662 { 1663 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1664 1665 kn->kn_data = so->so_qlen - so->so_incqlen; 1666 return (! TAILQ_EMPTY(&so->so_comp)); 1667 } 1668 1669 int 1670 socheckuid(struct socket *so, uid_t uid) 1671 { 1672 1673 if (so == NULL) 1674 return (EPERM); 1675 if (so->so_cred->cr_uid == uid) 1676 return (0); 1677 return (EPERM); 1678 } 1679