1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_inet.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/mutex.h> 46 #include <sys/domain.h> 47 #include <sys/file.h> /* for struct knote */ 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/event.h> 51 #include <sys/poll.h> 52 #include <sys/proc.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signalvar.h> 58 #include <sys/sysctl.h> 59 #include <sys/uio.h> 60 #include <sys/jail.h> 61 62 #include <vm/vm_zone.h> 63 64 #include <machine/limits.h> 65 66 #ifdef INET 67 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 68 #endif 69 70 static void filt_sordetach(struct knote *kn); 71 static int filt_soread(struct knote *kn, long hint); 72 static void filt_sowdetach(struct knote *kn); 73 static int filt_sowrite(struct knote *kn, long hint); 74 static int filt_solisten(struct knote *kn, long hint); 75 76 static struct filterops solisten_filtops = 77 { 1, NULL, filt_sordetach, filt_solisten }; 78 static struct filterops soread_filtops = 79 { 1, NULL, filt_sordetach, filt_soread }; 80 static struct filterops sowrite_filtops = 81 { 1, NULL, filt_sowdetach, filt_sowrite }; 82 83 struct vm_zone *socket_zone; 84 so_gen_t so_gencnt; /* generation count for sockets */ 85 86 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 87 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 88 89 SYSCTL_DECL(_kern_ipc); 90 91 static int somaxconn = SOMAXCONN; 92 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 93 &somaxconn, 0, "Maximum pending socket connection queue size"); 94 static int numopensockets; 95 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, 96 &numopensockets, 0, "Number of open sockets"); 97 98 99 /* 100 * Socket operation routines. 101 * These routines are called by the routines in 102 * sys_socket.c or from a system process, and 103 * implement the semantics of socket operations by 104 * switching out to the protocol specific routines. 105 */ 106 107 /* 108 * Get a socket structure from our zone, and initialize it. 109 * We don't implement `waitok' yet (see comments in uipc_domain.c). 110 * Note that it would probably be better to allocate socket 111 * and PCB at the same time, but I'm not convinced that all 112 * the protocols can be easily modified to do this. 113 * 114 * soalloc() returns a socket with a ref count of 0. 115 */ 116 struct socket * 117 soalloc(waitok) 118 int waitok; 119 { 120 struct socket *so; 121 122 so = zalloc(socket_zone); 123 if (so) { 124 /* XXX race condition for reentrant kernel */ 125 bzero(so, sizeof *so); 126 so->so_gencnt = ++so_gencnt; 127 so->so_zone = socket_zone; 128 /* sx_init(&so->so_sxlock, "socket sxlock"); */ 129 TAILQ_INIT(&so->so_aiojobq); 130 ++numopensockets; 131 } 132 return so; 133 } 134 135 /* 136 * socreate returns a socket with a ref count of 1. The socket should be 137 * closed with soclose(). 138 */ 139 int 140 socreate(dom, aso, type, proto, cred, td) 141 int dom; 142 struct socket **aso; 143 register int type; 144 int proto; 145 struct ucred *cred; 146 struct thread *td; 147 { 148 register struct protosw *prp; 149 register struct socket *so; 150 register int error; 151 152 if (proto) 153 prp = pffindproto(dom, proto, type); 154 else 155 prp = pffindtype(dom, type); 156 157 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 158 return (EPROTONOSUPPORT); 159 160 if (jailed(td->td_proc->p_ucred) && jail_socket_unixiproute_only && 161 prp->pr_domain->dom_family != PF_LOCAL && 162 prp->pr_domain->dom_family != PF_INET && 163 prp->pr_domain->dom_family != PF_ROUTE) { 164 return (EPROTONOSUPPORT); 165 } 166 167 if (prp->pr_type != type) 168 return (EPROTOTYPE); 169 so = soalloc(td != 0); 170 if (so == 0) 171 return (ENOBUFS); 172 173 TAILQ_INIT(&so->so_incomp); 174 TAILQ_INIT(&so->so_comp); 175 so->so_type = type; 176 so->so_cred = crhold(cred); 177 so->so_proto = prp; 178 soref(so); 179 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); 180 if (error) { 181 so->so_state |= SS_NOFDREF; 182 sorele(so); 183 return (error); 184 } 185 *aso = so; 186 return (0); 187 } 188 189 int 190 sobind(so, nam, td) 191 struct socket *so; 192 struct sockaddr *nam; 193 struct thread *td; 194 { 195 int s = splnet(); 196 int error; 197 198 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td); 199 splx(s); 200 return (error); 201 } 202 203 static void 204 sodealloc(struct socket *so) 205 { 206 207 KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); 208 so->so_gencnt = ++so_gencnt; 209 if (so->so_rcv.sb_hiwat) 210 (void)chgsbsize(so->so_cred->cr_uidinfo, 211 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); 212 if (so->so_snd.sb_hiwat) 213 (void)chgsbsize(so->so_cred->cr_uidinfo, 214 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); 215 #ifdef INET 216 if (so->so_accf != NULL) { 217 if (so->so_accf->so_accept_filter != NULL && 218 so->so_accf->so_accept_filter->accf_destroy != NULL) { 219 so->so_accf->so_accept_filter->accf_destroy(so); 220 } 221 if (so->so_accf->so_accept_filter_str != NULL) 222 FREE(so->so_accf->so_accept_filter_str, M_ACCF); 223 FREE(so->so_accf, M_ACCF); 224 } 225 #endif 226 crfree(so->so_cred); 227 /* sx_destroy(&so->so_sxlock); */ 228 zfree(so->so_zone, so); 229 --numopensockets; 230 } 231 232 int 233 solisten(so, backlog, td) 234 register struct socket *so; 235 int backlog; 236 struct thread *td; 237 { 238 int s, error; 239 240 s = splnet(); 241 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td); 242 if (error) { 243 splx(s); 244 return (error); 245 } 246 if (TAILQ_EMPTY(&so->so_comp)) 247 so->so_options |= SO_ACCEPTCONN; 248 if (backlog < 0 || backlog > somaxconn) 249 backlog = somaxconn; 250 so->so_qlimit = backlog; 251 splx(s); 252 return (0); 253 } 254 255 void 256 sofree(so) 257 register struct socket *so; 258 { 259 struct socket *head = so->so_head; 260 261 KASSERT(so->so_count == 0, ("socket %p so_count not 0", so)); 262 263 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 264 return; 265 if (head != NULL) { 266 if (so->so_state & SS_INCOMP) { 267 TAILQ_REMOVE(&head->so_incomp, so, so_list); 268 head->so_incqlen--; 269 } else if (so->so_state & SS_COMP) { 270 /* 271 * We must not decommission a socket that's 272 * on the accept(2) queue. If we do, then 273 * accept(2) may hang after select(2) indicated 274 * that the listening socket was ready. 275 */ 276 return; 277 } else { 278 panic("sofree: not queued"); 279 } 280 head->so_qlen--; 281 so->so_state &= ~SS_INCOMP; 282 so->so_head = NULL; 283 } 284 sbrelease(&so->so_snd, so); 285 sorflush(so); 286 sodealloc(so); 287 } 288 289 /* 290 * Close a socket on last file table reference removal. 291 * Initiate disconnect if connected. 292 * Free socket when disconnect complete. 293 * 294 * This function will sorele() the socket. Note that soclose() may be 295 * called prior to the ref count reaching zero. The actual socket 296 * structure will not be freed until the ref count reaches zero. 297 */ 298 int 299 soclose(so) 300 register struct socket *so; 301 { 302 int s = splnet(); /* conservative */ 303 int error = 0; 304 305 funsetown(so->so_sigio); 306 if (so->so_options & SO_ACCEPTCONN) { 307 struct socket *sp, *sonext; 308 309 sp = TAILQ_FIRST(&so->so_incomp); 310 for (; sp != NULL; sp = sonext) { 311 sonext = TAILQ_NEXT(sp, so_list); 312 (void) soabort(sp); 313 } 314 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) { 315 sonext = TAILQ_NEXT(sp, so_list); 316 /* Dequeue from so_comp since sofree() won't do it */ 317 TAILQ_REMOVE(&so->so_comp, sp, so_list); 318 so->so_qlen--; 319 sp->so_state &= ~SS_COMP; 320 sp->so_head = NULL; 321 (void) soabort(sp); 322 } 323 } 324 if (so->so_pcb == 0) 325 goto discard; 326 if (so->so_state & SS_ISCONNECTED) { 327 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 328 error = sodisconnect(so); 329 if (error) 330 goto drop; 331 } 332 if (so->so_options & SO_LINGER) { 333 if ((so->so_state & SS_ISDISCONNECTING) && 334 (so->so_state & SS_NBIO)) 335 goto drop; 336 while (so->so_state & SS_ISCONNECTED) { 337 error = tsleep((caddr_t)&so->so_timeo, 338 PSOCK | PCATCH, "soclos", so->so_linger * hz); 339 if (error) 340 break; 341 } 342 } 343 } 344 drop: 345 if (so->so_pcb) { 346 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 347 if (error == 0) 348 error = error2; 349 } 350 discard: 351 if (so->so_state & SS_NOFDREF) 352 panic("soclose: NOFDREF"); 353 so->so_state |= SS_NOFDREF; 354 sorele(so); 355 splx(s); 356 return (error); 357 } 358 359 /* 360 * Must be called at splnet... 361 */ 362 int 363 soabort(so) 364 struct socket *so; 365 { 366 int error; 367 368 error = (*so->so_proto->pr_usrreqs->pru_abort)(so); 369 if (error) { 370 sotryfree(so); /* note: does not decrement the ref count */ 371 return error; 372 } 373 return (0); 374 } 375 376 int 377 soaccept(so, nam) 378 register struct socket *so; 379 struct sockaddr **nam; 380 { 381 int s = splnet(); 382 int error; 383 384 if ((so->so_state & SS_NOFDREF) == 0) 385 panic("soaccept: !NOFDREF"); 386 so->so_state &= ~SS_NOFDREF; 387 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 388 splx(s); 389 return (error); 390 } 391 392 int 393 soconnect(so, nam, td) 394 register struct socket *so; 395 struct sockaddr *nam; 396 struct thread *td; 397 { 398 int s; 399 int error; 400 401 if (so->so_options & SO_ACCEPTCONN) 402 return (EOPNOTSUPP); 403 s = splnet(); 404 /* 405 * If protocol is connection-based, can only connect once. 406 * Otherwise, if connected, try to disconnect first. 407 * This allows user to disconnect by connecting to, e.g., 408 * a null address. 409 */ 410 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 411 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 412 (error = sodisconnect(so)))) 413 error = EISCONN; 414 else 415 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); 416 splx(s); 417 return (error); 418 } 419 420 int 421 soconnect2(so1, so2) 422 register struct socket *so1; 423 struct socket *so2; 424 { 425 int s = splnet(); 426 int error; 427 428 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 429 splx(s); 430 return (error); 431 } 432 433 int 434 sodisconnect(so) 435 register struct socket *so; 436 { 437 int s = splnet(); 438 int error; 439 440 if ((so->so_state & SS_ISCONNECTED) == 0) { 441 error = ENOTCONN; 442 goto bad; 443 } 444 if (so->so_state & SS_ISDISCONNECTING) { 445 error = EALREADY; 446 goto bad; 447 } 448 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 449 bad: 450 splx(s); 451 return (error); 452 } 453 454 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 455 /* 456 * Send on a socket. 457 * If send must go all at once and message is larger than 458 * send buffering, then hard error. 459 * Lock against other senders. 460 * If must go all at once and not enough room now, then 461 * inform user that this would block and do nothing. 462 * Otherwise, if nonblocking, send as much as possible. 463 * The data to be sent is described by "uio" if nonzero, 464 * otherwise by the mbuf chain "top" (which must be null 465 * if uio is not). Data provided in mbuf chain must be small 466 * enough to send all at once. 467 * 468 * Returns nonzero on error, timeout or signal; callers 469 * must check for short counts if EINTR/ERESTART are returned. 470 * Data and control buffers are freed on return. 471 */ 472 int 473 sosend(so, addr, uio, top, control, flags, td) 474 register struct socket *so; 475 struct sockaddr *addr; 476 struct uio *uio; 477 struct mbuf *top; 478 struct mbuf *control; 479 int flags; 480 struct thread *td; 481 { 482 struct mbuf **mp; 483 register struct mbuf *m; 484 register long space, len, resid; 485 int clen = 0, error, s, dontroute, mlen; 486 int atomic = sosendallatonce(so) || top; 487 488 if (uio) 489 resid = uio->uio_resid; 490 else 491 resid = top->m_pkthdr.len; 492 /* 493 * In theory resid should be unsigned. 494 * However, space must be signed, as it might be less than 0 495 * if we over-committed, and we must use a signed comparison 496 * of space and resid. On the other hand, a negative resid 497 * causes us to loop sending 0-length segments to the protocol. 498 * 499 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 500 * type sockets since that's an error. 501 */ 502 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 503 error = EINVAL; 504 goto out; 505 } 506 507 dontroute = 508 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 509 (so->so_proto->pr_flags & PR_ATOMIC); 510 if (td) 511 td->td_proc->p_stats->p_ru.ru_msgsnd++; 512 if (control) 513 clen = control->m_len; 514 #define snderr(errno) { error = errno; splx(s); goto release; } 515 516 restart: 517 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 518 if (error) 519 goto out; 520 do { 521 s = splnet(); 522 if (so->so_state & SS_CANTSENDMORE) 523 snderr(EPIPE); 524 if (so->so_error) { 525 error = so->so_error; 526 so->so_error = 0; 527 splx(s); 528 goto release; 529 } 530 if ((so->so_state & SS_ISCONNECTED) == 0) { 531 /* 532 * `sendto' and `sendmsg' is allowed on a connection- 533 * based socket if it supports implied connect. 534 * Return ENOTCONN if not connected and no address is 535 * supplied. 536 */ 537 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 538 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 539 if ((so->so_state & SS_ISCONFIRMING) == 0 && 540 !(resid == 0 && clen != 0)) 541 snderr(ENOTCONN); 542 } else if (addr == 0) 543 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 544 ENOTCONN : EDESTADDRREQ); 545 } 546 space = sbspace(&so->so_snd); 547 if (flags & MSG_OOB) 548 space += 1024; 549 if ((atomic && resid > so->so_snd.sb_hiwat) || 550 clen > so->so_snd.sb_hiwat) 551 snderr(EMSGSIZE); 552 if (space < resid + clen && uio && 553 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 554 if (so->so_state & SS_NBIO) 555 snderr(EWOULDBLOCK); 556 sbunlock(&so->so_snd); 557 error = sbwait(&so->so_snd); 558 splx(s); 559 if (error) 560 goto out; 561 goto restart; 562 } 563 splx(s); 564 mp = ⊤ 565 space -= clen; 566 do { 567 if (uio == NULL) { 568 /* 569 * Data is prepackaged in "top". 570 */ 571 resid = 0; 572 if (flags & MSG_EOR) 573 top->m_flags |= M_EOR; 574 } else do { 575 if (top == 0) { 576 MGETHDR(m, M_TRYWAIT, MT_DATA); 577 if (m == NULL) { 578 error = ENOBUFS; 579 goto release; 580 } 581 mlen = MHLEN; 582 m->m_pkthdr.len = 0; 583 m->m_pkthdr.rcvif = (struct ifnet *)0; 584 } else { 585 MGET(m, M_TRYWAIT, MT_DATA); 586 if (m == NULL) { 587 error = ENOBUFS; 588 goto release; 589 } 590 mlen = MLEN; 591 } 592 if (resid >= MINCLSIZE) { 593 MCLGET(m, M_TRYWAIT); 594 if ((m->m_flags & M_EXT) == 0) 595 goto nopages; 596 mlen = MCLBYTES; 597 len = min(min(mlen, resid), space); 598 } else { 599 nopages: 600 len = min(min(mlen, resid), space); 601 /* 602 * For datagram protocols, leave room 603 * for protocol headers in first mbuf. 604 */ 605 if (atomic && top == 0 && len < mlen) 606 MH_ALIGN(m, len); 607 } 608 space -= len; 609 error = uiomove(mtod(m, caddr_t), (int)len, uio); 610 resid = uio->uio_resid; 611 m->m_len = len; 612 *mp = m; 613 top->m_pkthdr.len += len; 614 if (error) 615 goto release; 616 mp = &m->m_next; 617 if (resid <= 0) { 618 if (flags & MSG_EOR) 619 top->m_flags |= M_EOR; 620 break; 621 } 622 } while (space > 0 && atomic); 623 if (dontroute) 624 so->so_options |= SO_DONTROUTE; 625 s = splnet(); /* XXX */ 626 /* 627 * XXX all the SS_CANTSENDMORE checks previously 628 * done could be out of date. We could have recieved 629 * a reset packet in an interrupt or maybe we slept 630 * while doing page faults in uiomove() etc. We could 631 * probably recheck again inside the splnet() protection 632 * here, but there are probably other places that this 633 * also happens. We must rethink this. 634 */ 635 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 636 (flags & MSG_OOB) ? PRUS_OOB : 637 /* 638 * If the user set MSG_EOF, the protocol 639 * understands this flag and nothing left to 640 * send then use PRU_SEND_EOF instead of PRU_SEND. 641 */ 642 ((flags & MSG_EOF) && 643 (so->so_proto->pr_flags & PR_IMPLOPCL) && 644 (resid <= 0)) ? 645 PRUS_EOF : 646 /* If there is more to send set PRUS_MORETOCOME */ 647 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 648 top, addr, control, td); 649 splx(s); 650 if (dontroute) 651 so->so_options &= ~SO_DONTROUTE; 652 clen = 0; 653 control = 0; 654 top = 0; 655 mp = ⊤ 656 if (error) 657 goto release; 658 } while (resid && space > 0); 659 } while (resid); 660 661 release: 662 sbunlock(&so->so_snd); 663 out: 664 if (top) 665 m_freem(top); 666 if (control) 667 m_freem(control); 668 return (error); 669 } 670 671 /* 672 * Implement receive operations on a socket. 673 * We depend on the way that records are added to the sockbuf 674 * by sbappend*. In particular, each record (mbufs linked through m_next) 675 * must begin with an address if the protocol so specifies, 676 * followed by an optional mbuf or mbufs containing ancillary data, 677 * and then zero or more mbufs of data. 678 * In order to avoid blocking network interrupts for the entire time here, 679 * we splx() while doing the actual copy to user space. 680 * Although the sockbuf is locked, new data may still be appended, 681 * and thus we must maintain consistency of the sockbuf during that time. 682 * 683 * The caller may receive the data as a single mbuf chain by supplying 684 * an mbuf **mp0 for use in returning the chain. The uio is then used 685 * only for the count in uio_resid. 686 */ 687 int 688 soreceive(so, psa, uio, mp0, controlp, flagsp) 689 register struct socket *so; 690 struct sockaddr **psa; 691 struct uio *uio; 692 struct mbuf **mp0; 693 struct mbuf **controlp; 694 int *flagsp; 695 { 696 struct mbuf *m, **mp; 697 register int flags, len, error, s, offset; 698 struct protosw *pr = so->so_proto; 699 struct mbuf *nextrecord; 700 int moff, type = 0; 701 int orig_resid = uio->uio_resid; 702 703 mp = mp0; 704 if (psa) 705 *psa = 0; 706 if (controlp) 707 *controlp = 0; 708 if (flagsp) 709 flags = *flagsp &~ MSG_EOR; 710 else 711 flags = 0; 712 if (flags & MSG_OOB) { 713 m = m_get(M_TRYWAIT, MT_DATA); 714 if (m == NULL) 715 return (ENOBUFS); 716 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 717 if (error) 718 goto bad; 719 do { 720 error = uiomove(mtod(m, caddr_t), 721 (int) min(uio->uio_resid, m->m_len), uio); 722 m = m_free(m); 723 } while (uio->uio_resid && error == 0 && m); 724 bad: 725 if (m) 726 m_freem(m); 727 return (error); 728 } 729 if (mp) 730 *mp = (struct mbuf *)0; 731 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 732 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 733 734 restart: 735 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 736 if (error) 737 return (error); 738 s = splnet(); 739 740 m = so->so_rcv.sb_mb; 741 /* 742 * If we have less data than requested, block awaiting more 743 * (subject to any timeout) if: 744 * 1. the current count is less than the low water mark, or 745 * 2. MSG_WAITALL is set, and it is possible to do the entire 746 * receive operation at once if we block (resid <= hiwat). 747 * 3. MSG_DONTWAIT is not set 748 * If MSG_WAITALL is set but resid is larger than the receive buffer, 749 * we have to do the receive in sections, and thus risk returning 750 * a short count if a timeout or signal occurs after we start. 751 */ 752 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 753 so->so_rcv.sb_cc < uio->uio_resid) && 754 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 755 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 756 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 757 KASSERT(m != 0 || !so->so_rcv.sb_cc, 758 ("receive: m == %p so->so_rcv.sb_cc == %lu", 759 m, so->so_rcv.sb_cc)); 760 if (so->so_error) { 761 if (m) 762 goto dontblock; 763 error = so->so_error; 764 if ((flags & MSG_PEEK) == 0) 765 so->so_error = 0; 766 goto release; 767 } 768 if (so->so_state & SS_CANTRCVMORE) { 769 if (m) 770 goto dontblock; 771 else 772 goto release; 773 } 774 for (; m; m = m->m_next) 775 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 776 m = so->so_rcv.sb_mb; 777 goto dontblock; 778 } 779 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 780 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 781 error = ENOTCONN; 782 goto release; 783 } 784 if (uio->uio_resid == 0) 785 goto release; 786 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 787 error = EWOULDBLOCK; 788 goto release; 789 } 790 sbunlock(&so->so_rcv); 791 error = sbwait(&so->so_rcv); 792 splx(s); 793 if (error) 794 return (error); 795 goto restart; 796 } 797 dontblock: 798 if (uio->uio_td) 799 uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; 800 nextrecord = m->m_nextpkt; 801 if (pr->pr_flags & PR_ADDR) { 802 KASSERT(m->m_type == MT_SONAME, 803 ("m->m_type == %d", m->m_type)); 804 orig_resid = 0; 805 if (psa) 806 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 807 mp0 == 0); 808 if (flags & MSG_PEEK) { 809 m = m->m_next; 810 } else { 811 sbfree(&so->so_rcv, m); 812 so->so_rcv.sb_mb = m_free(m); 813 m = so->so_rcv.sb_mb; 814 } 815 } 816 while (m && m->m_type == MT_CONTROL && error == 0) { 817 if (flags & MSG_PEEK) { 818 if (controlp) 819 *controlp = m_copy(m, 0, m->m_len); 820 m = m->m_next; 821 } else { 822 sbfree(&so->so_rcv, m); 823 so->so_rcv.sb_mb = m->m_next; 824 m->m_next = NULL; 825 if (pr->pr_domain->dom_externalize) 826 error = 827 (*pr->pr_domain->dom_externalize)(m, controlp); 828 else if (controlp) 829 *controlp = m; 830 else 831 m_freem(m); 832 m = so->so_rcv.sb_mb; 833 } 834 if (controlp) { 835 orig_resid = 0; 836 do 837 controlp = &(*controlp)->m_next; 838 while (*controlp != NULL); 839 } 840 } 841 if (m) { 842 if ((flags & MSG_PEEK) == 0) 843 m->m_nextpkt = nextrecord; 844 type = m->m_type; 845 if (type == MT_OOBDATA) 846 flags |= MSG_OOB; 847 } 848 moff = 0; 849 offset = 0; 850 while (m && uio->uio_resid > 0 && error == 0) { 851 if (m->m_type == MT_OOBDATA) { 852 if (type != MT_OOBDATA) 853 break; 854 } else if (type == MT_OOBDATA) 855 break; 856 else 857 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 858 ("m->m_type == %d", m->m_type)); 859 so->so_state &= ~SS_RCVATMARK; 860 len = uio->uio_resid; 861 if (so->so_oobmark && len > so->so_oobmark - offset) 862 len = so->so_oobmark - offset; 863 if (len > m->m_len - moff) 864 len = m->m_len - moff; 865 /* 866 * If mp is set, just pass back the mbufs. 867 * Otherwise copy them out via the uio, then free. 868 * Sockbuf must be consistent here (points to current mbuf, 869 * it points to next record) when we drop priority; 870 * we must note any additions to the sockbuf when we 871 * block interrupts again. 872 */ 873 if (mp == 0) { 874 splx(s); 875 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 876 s = splnet(); 877 if (error) 878 goto release; 879 } else 880 uio->uio_resid -= len; 881 if (len == m->m_len - moff) { 882 if (m->m_flags & M_EOR) 883 flags |= MSG_EOR; 884 if (flags & MSG_PEEK) { 885 m = m->m_next; 886 moff = 0; 887 } else { 888 nextrecord = m->m_nextpkt; 889 sbfree(&so->so_rcv, m); 890 if (mp) { 891 *mp = m; 892 mp = &m->m_next; 893 so->so_rcv.sb_mb = m = m->m_next; 894 *mp = (struct mbuf *)0; 895 } else { 896 so->so_rcv.sb_mb = m_free(m); 897 m = so->so_rcv.sb_mb; 898 } 899 if (m) 900 m->m_nextpkt = nextrecord; 901 } 902 } else { 903 if (flags & MSG_PEEK) 904 moff += len; 905 else { 906 if (mp) 907 *mp = m_copym(m, 0, len, M_TRYWAIT); 908 m->m_data += len; 909 m->m_len -= len; 910 so->so_rcv.sb_cc -= len; 911 } 912 } 913 if (so->so_oobmark) { 914 if ((flags & MSG_PEEK) == 0) { 915 so->so_oobmark -= len; 916 if (so->so_oobmark == 0) { 917 so->so_state |= SS_RCVATMARK; 918 break; 919 } 920 } else { 921 offset += len; 922 if (offset == so->so_oobmark) 923 break; 924 } 925 } 926 if (flags & MSG_EOR) 927 break; 928 /* 929 * If the MSG_WAITALL flag is set (for non-atomic socket), 930 * we must not quit until "uio->uio_resid == 0" or an error 931 * termination. If a signal/timeout occurs, return 932 * with a short count but without error. 933 * Keep sockbuf locked against other readers. 934 */ 935 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 936 !sosendallatonce(so) && !nextrecord) { 937 if (so->so_error || so->so_state & SS_CANTRCVMORE) 938 break; 939 /* 940 * Notify the protocol that some data has been 941 * drained before blocking. 942 */ 943 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 944 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 945 error = sbwait(&so->so_rcv); 946 if (error) { 947 sbunlock(&so->so_rcv); 948 splx(s); 949 return (0); 950 } 951 m = so->so_rcv.sb_mb; 952 if (m) 953 nextrecord = m->m_nextpkt; 954 } 955 } 956 957 if (m && pr->pr_flags & PR_ATOMIC) { 958 flags |= MSG_TRUNC; 959 if ((flags & MSG_PEEK) == 0) 960 (void) sbdroprecord(&so->so_rcv); 961 } 962 if ((flags & MSG_PEEK) == 0) { 963 if (m == 0) 964 so->so_rcv.sb_mb = nextrecord; 965 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 966 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 967 } 968 if (orig_resid == uio->uio_resid && orig_resid && 969 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 970 sbunlock(&so->so_rcv); 971 splx(s); 972 goto restart; 973 } 974 975 if (flagsp) 976 *flagsp |= flags; 977 release: 978 sbunlock(&so->so_rcv); 979 splx(s); 980 return (error); 981 } 982 983 int 984 soshutdown(so, how) 985 register struct socket *so; 986 register int how; 987 { 988 register struct protosw *pr = so->so_proto; 989 990 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 991 return (EINVAL); 992 993 if (how != SHUT_WR) 994 sorflush(so); 995 if (how != SHUT_RD) 996 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 997 return (0); 998 } 999 1000 void 1001 sorflush(so) 1002 register struct socket *so; 1003 { 1004 register struct sockbuf *sb = &so->so_rcv; 1005 register struct protosw *pr = so->so_proto; 1006 register int s; 1007 struct sockbuf asb; 1008 1009 sb->sb_flags |= SB_NOINTR; 1010 (void) sblock(sb, M_WAITOK); 1011 s = splimp(); 1012 socantrcvmore(so); 1013 sbunlock(sb); 1014 asb = *sb; 1015 bzero((caddr_t)sb, sizeof (*sb)); 1016 splx(s); 1017 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1018 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1019 sbrelease(&asb, so); 1020 } 1021 1022 #ifdef INET 1023 static int 1024 do_setopt_accept_filter(so, sopt) 1025 struct socket *so; 1026 struct sockopt *sopt; 1027 { 1028 struct accept_filter_arg *afap = NULL; 1029 struct accept_filter *afp; 1030 struct so_accf *af = so->so_accf; 1031 int error = 0; 1032 1033 /* do not set/remove accept filters on non listen sockets */ 1034 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1035 error = EINVAL; 1036 goto out; 1037 } 1038 1039 /* removing the filter */ 1040 if (sopt == NULL) { 1041 if (af != NULL) { 1042 if (af->so_accept_filter != NULL && 1043 af->so_accept_filter->accf_destroy != NULL) { 1044 af->so_accept_filter->accf_destroy(so); 1045 } 1046 if (af->so_accept_filter_str != NULL) { 1047 FREE(af->so_accept_filter_str, M_ACCF); 1048 } 1049 FREE(af, M_ACCF); 1050 so->so_accf = NULL; 1051 } 1052 so->so_options &= ~SO_ACCEPTFILTER; 1053 return (0); 1054 } 1055 /* adding a filter */ 1056 /* must remove previous filter first */ 1057 if (af != NULL) { 1058 error = EINVAL; 1059 goto out; 1060 } 1061 /* don't put large objects on the kernel stack */ 1062 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK); 1063 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1064 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1065 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1066 if (error) 1067 goto out; 1068 afp = accept_filt_get(afap->af_name); 1069 if (afp == NULL) { 1070 error = ENOENT; 1071 goto out; 1072 } 1073 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1074 if (afp->accf_create != NULL) { 1075 if (afap->af_name[0] != '\0') { 1076 int len = strlen(afap->af_name) + 1; 1077 1078 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK); 1079 strcpy(af->so_accept_filter_str, afap->af_name); 1080 } 1081 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1082 if (af->so_accept_filter_arg == NULL) { 1083 FREE(af->so_accept_filter_str, M_ACCF); 1084 FREE(af, M_ACCF); 1085 so->so_accf = NULL; 1086 error = EINVAL; 1087 goto out; 1088 } 1089 } 1090 af->so_accept_filter = afp; 1091 so->so_accf = af; 1092 so->so_options |= SO_ACCEPTFILTER; 1093 out: 1094 if (afap != NULL) 1095 FREE(afap, M_TEMP); 1096 return (error); 1097 } 1098 #endif /* INET */ 1099 1100 /* 1101 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1102 * an additional variant to handle the case where the option value needs 1103 * to be some kind of integer, but not a specific size. 1104 * In addition to their use here, these functions are also called by the 1105 * protocol-level pr_ctloutput() routines. 1106 */ 1107 int 1108 sooptcopyin(sopt, buf, len, minlen) 1109 struct sockopt *sopt; 1110 void *buf; 1111 size_t len; 1112 size_t minlen; 1113 { 1114 size_t valsize; 1115 1116 /* 1117 * If the user gives us more than we wanted, we ignore it, 1118 * but if we don't get the minimum length the caller 1119 * wants, we return EINVAL. On success, sopt->sopt_valsize 1120 * is set to however much we actually retrieved. 1121 */ 1122 if ((valsize = sopt->sopt_valsize) < minlen) 1123 return EINVAL; 1124 if (valsize > len) 1125 sopt->sopt_valsize = valsize = len; 1126 1127 if (sopt->sopt_td != 0) 1128 return (copyin(sopt->sopt_val, buf, valsize)); 1129 1130 bcopy(sopt->sopt_val, buf, valsize); 1131 return 0; 1132 } 1133 1134 int 1135 sosetopt(so, sopt) 1136 struct socket *so; 1137 struct sockopt *sopt; 1138 { 1139 int error, optval; 1140 struct linger l; 1141 struct timeval tv; 1142 u_long val; 1143 1144 error = 0; 1145 if (sopt->sopt_level != SOL_SOCKET) { 1146 if (so->so_proto && so->so_proto->pr_ctloutput) 1147 return ((*so->so_proto->pr_ctloutput) 1148 (so, sopt)); 1149 error = ENOPROTOOPT; 1150 } else { 1151 switch (sopt->sopt_name) { 1152 #ifdef INET 1153 case SO_ACCEPTFILTER: 1154 error = do_setopt_accept_filter(so, sopt); 1155 if (error) 1156 goto bad; 1157 break; 1158 #endif 1159 case SO_LINGER: 1160 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1161 if (error) 1162 goto bad; 1163 1164 so->so_linger = l.l_linger; 1165 if (l.l_onoff) 1166 so->so_options |= SO_LINGER; 1167 else 1168 so->so_options &= ~SO_LINGER; 1169 break; 1170 1171 case SO_DEBUG: 1172 case SO_KEEPALIVE: 1173 case SO_DONTROUTE: 1174 case SO_USELOOPBACK: 1175 case SO_BROADCAST: 1176 case SO_REUSEADDR: 1177 case SO_REUSEPORT: 1178 case SO_OOBINLINE: 1179 case SO_TIMESTAMP: 1180 error = sooptcopyin(sopt, &optval, sizeof optval, 1181 sizeof optval); 1182 if (error) 1183 goto bad; 1184 if (optval) 1185 so->so_options |= sopt->sopt_name; 1186 else 1187 so->so_options &= ~sopt->sopt_name; 1188 break; 1189 1190 case SO_SNDBUF: 1191 case SO_RCVBUF: 1192 case SO_SNDLOWAT: 1193 case SO_RCVLOWAT: 1194 error = sooptcopyin(sopt, &optval, sizeof optval, 1195 sizeof optval); 1196 if (error) 1197 goto bad; 1198 1199 /* 1200 * Values < 1 make no sense for any of these 1201 * options, so disallow them. 1202 */ 1203 if (optval < 1) { 1204 error = EINVAL; 1205 goto bad; 1206 } 1207 1208 switch (sopt->sopt_name) { 1209 case SO_SNDBUF: 1210 case SO_RCVBUF: 1211 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1212 &so->so_snd : &so->so_rcv, (u_long)optval, 1213 so, curthread) == 0) { 1214 error = ENOBUFS; 1215 goto bad; 1216 } 1217 break; 1218 1219 /* 1220 * Make sure the low-water is never greater than 1221 * the high-water. 1222 */ 1223 case SO_SNDLOWAT: 1224 so->so_snd.sb_lowat = 1225 (optval > so->so_snd.sb_hiwat) ? 1226 so->so_snd.sb_hiwat : optval; 1227 break; 1228 case SO_RCVLOWAT: 1229 so->so_rcv.sb_lowat = 1230 (optval > so->so_rcv.sb_hiwat) ? 1231 so->so_rcv.sb_hiwat : optval; 1232 break; 1233 } 1234 break; 1235 1236 case SO_SNDTIMEO: 1237 case SO_RCVTIMEO: 1238 error = sooptcopyin(sopt, &tv, sizeof tv, 1239 sizeof tv); 1240 if (error) 1241 goto bad; 1242 1243 /* assert(hz > 0); */ 1244 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1245 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1246 error = EDOM; 1247 goto bad; 1248 } 1249 /* assert(tick > 0); */ 1250 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1251 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 1252 if (val > SHRT_MAX) { 1253 error = EDOM; 1254 goto bad; 1255 } 1256 1257 switch (sopt->sopt_name) { 1258 case SO_SNDTIMEO: 1259 so->so_snd.sb_timeo = val; 1260 break; 1261 case SO_RCVTIMEO: 1262 so->so_rcv.sb_timeo = val; 1263 break; 1264 } 1265 break; 1266 default: 1267 error = ENOPROTOOPT; 1268 break; 1269 } 1270 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1271 (void) ((*so->so_proto->pr_ctloutput) 1272 (so, sopt)); 1273 } 1274 } 1275 bad: 1276 return (error); 1277 } 1278 1279 /* Helper routine for getsockopt */ 1280 int 1281 sooptcopyout(sopt, buf, len) 1282 struct sockopt *sopt; 1283 void *buf; 1284 size_t len; 1285 { 1286 int error; 1287 size_t valsize; 1288 1289 error = 0; 1290 1291 /* 1292 * Documented get behavior is that we always return a value, 1293 * possibly truncated to fit in the user's buffer. 1294 * Traditional behavior is that we always tell the user 1295 * precisely how much we copied, rather than something useful 1296 * like the total amount we had available for her. 1297 * Note that this interface is not idempotent; the entire answer must 1298 * generated ahead of time. 1299 */ 1300 valsize = min(len, sopt->sopt_valsize); 1301 sopt->sopt_valsize = valsize; 1302 if (sopt->sopt_val != 0) { 1303 if (sopt->sopt_td != 0) 1304 error = copyout(buf, sopt->sopt_val, valsize); 1305 else 1306 bcopy(buf, sopt->sopt_val, valsize); 1307 } 1308 return error; 1309 } 1310 1311 int 1312 sogetopt(so, sopt) 1313 struct socket *so; 1314 struct sockopt *sopt; 1315 { 1316 int error, optval; 1317 struct linger l; 1318 struct timeval tv; 1319 #ifdef INET 1320 struct accept_filter_arg *afap; 1321 #endif 1322 1323 error = 0; 1324 if (sopt->sopt_level != SOL_SOCKET) { 1325 if (so->so_proto && so->so_proto->pr_ctloutput) { 1326 return ((*so->so_proto->pr_ctloutput) 1327 (so, sopt)); 1328 } else 1329 return (ENOPROTOOPT); 1330 } else { 1331 switch (sopt->sopt_name) { 1332 #ifdef INET 1333 case SO_ACCEPTFILTER: 1334 if ((so->so_options & SO_ACCEPTCONN) == 0) 1335 return (EINVAL); 1336 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), 1337 M_TEMP, M_WAITOK | M_ZERO); 1338 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1339 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1340 if (so->so_accf->so_accept_filter_str != NULL) 1341 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1342 } 1343 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1344 FREE(afap, M_TEMP); 1345 break; 1346 #endif 1347 1348 case SO_LINGER: 1349 l.l_onoff = so->so_options & SO_LINGER; 1350 l.l_linger = so->so_linger; 1351 error = sooptcopyout(sopt, &l, sizeof l); 1352 break; 1353 1354 case SO_USELOOPBACK: 1355 case SO_DONTROUTE: 1356 case SO_DEBUG: 1357 case SO_KEEPALIVE: 1358 case SO_REUSEADDR: 1359 case SO_REUSEPORT: 1360 case SO_BROADCAST: 1361 case SO_OOBINLINE: 1362 case SO_TIMESTAMP: 1363 optval = so->so_options & sopt->sopt_name; 1364 integer: 1365 error = sooptcopyout(sopt, &optval, sizeof optval); 1366 break; 1367 1368 case SO_TYPE: 1369 optval = so->so_type; 1370 goto integer; 1371 1372 case SO_ERROR: 1373 optval = so->so_error; 1374 so->so_error = 0; 1375 goto integer; 1376 1377 case SO_SNDBUF: 1378 optval = so->so_snd.sb_hiwat; 1379 goto integer; 1380 1381 case SO_RCVBUF: 1382 optval = so->so_rcv.sb_hiwat; 1383 goto integer; 1384 1385 case SO_SNDLOWAT: 1386 optval = so->so_snd.sb_lowat; 1387 goto integer; 1388 1389 case SO_RCVLOWAT: 1390 optval = so->so_rcv.sb_lowat; 1391 goto integer; 1392 1393 case SO_SNDTIMEO: 1394 case SO_RCVTIMEO: 1395 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1396 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1397 1398 tv.tv_sec = optval / hz; 1399 tv.tv_usec = (optval % hz) * tick; 1400 error = sooptcopyout(sopt, &tv, sizeof tv); 1401 break; 1402 1403 default: 1404 error = ENOPROTOOPT; 1405 break; 1406 } 1407 return (error); 1408 } 1409 } 1410 1411 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1412 int 1413 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1414 { 1415 struct mbuf *m, *m_prev; 1416 int sopt_size = sopt->sopt_valsize; 1417 1418 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 1419 if (m == 0) 1420 return ENOBUFS; 1421 if (sopt_size > MLEN) { 1422 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT); 1423 if ((m->m_flags & M_EXT) == 0) { 1424 m_free(m); 1425 return ENOBUFS; 1426 } 1427 m->m_len = min(MCLBYTES, sopt_size); 1428 } else { 1429 m->m_len = min(MLEN, sopt_size); 1430 } 1431 sopt_size -= m->m_len; 1432 *mp = m; 1433 m_prev = m; 1434 1435 while (sopt_size) { 1436 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 1437 if (m == 0) { 1438 m_freem(*mp); 1439 return ENOBUFS; 1440 } 1441 if (sopt_size > MLEN) { 1442 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT); 1443 if ((m->m_flags & M_EXT) == 0) { 1444 m_freem(*mp); 1445 return ENOBUFS; 1446 } 1447 m->m_len = min(MCLBYTES, sopt_size); 1448 } else { 1449 m->m_len = min(MLEN, sopt_size); 1450 } 1451 sopt_size -= m->m_len; 1452 m_prev->m_next = m; 1453 m_prev = m; 1454 } 1455 return 0; 1456 } 1457 1458 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1459 int 1460 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1461 { 1462 struct mbuf *m0 = m; 1463 1464 if (sopt->sopt_val == NULL) 1465 return 0; 1466 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1467 if (sopt->sopt_td != NULL) { 1468 int error; 1469 1470 error = copyin(sopt->sopt_val, mtod(m, char *), 1471 m->m_len); 1472 if (error != 0) { 1473 m_freem(m0); 1474 return(error); 1475 } 1476 } else 1477 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 1478 sopt->sopt_valsize -= m->m_len; 1479 (caddr_t)sopt->sopt_val += m->m_len; 1480 m = m->m_next; 1481 } 1482 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1483 panic("ip6_sooptmcopyin"); 1484 return 0; 1485 } 1486 1487 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1488 int 1489 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1490 { 1491 struct mbuf *m0 = m; 1492 size_t valsize = 0; 1493 1494 if (sopt->sopt_val == NULL) 1495 return 0; 1496 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1497 if (sopt->sopt_td != NULL) { 1498 int error; 1499 1500 error = copyout(mtod(m, char *), sopt->sopt_val, 1501 m->m_len); 1502 if (error != 0) { 1503 m_freem(m0); 1504 return(error); 1505 } 1506 } else 1507 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 1508 sopt->sopt_valsize -= m->m_len; 1509 (caddr_t)sopt->sopt_val += m->m_len; 1510 valsize += m->m_len; 1511 m = m->m_next; 1512 } 1513 if (m != NULL) { 1514 /* enough soopt buffer should be given from user-land */ 1515 m_freem(m0); 1516 return(EINVAL); 1517 } 1518 sopt->sopt_valsize = valsize; 1519 return 0; 1520 } 1521 1522 void 1523 sohasoutofband(so) 1524 register struct socket *so; 1525 { 1526 if (so->so_sigio != NULL) 1527 pgsigio(so->so_sigio, SIGURG, 0); 1528 selwakeup(&so->so_rcv.sb_sel); 1529 } 1530 1531 int 1532 sopoll(struct socket *so, int events, struct ucred *cred, struct thread *td) 1533 { 1534 int revents = 0; 1535 int s = splnet(); 1536 1537 if (events & (POLLIN | POLLRDNORM)) 1538 if (soreadable(so)) 1539 revents |= events & (POLLIN | POLLRDNORM); 1540 1541 if (events & POLLINIGNEOF) 1542 if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || 1543 !TAILQ_EMPTY(&so->so_comp) || so->so_error) 1544 revents |= POLLINIGNEOF; 1545 1546 if (events & (POLLOUT | POLLWRNORM)) 1547 if (sowriteable(so)) 1548 revents |= events & (POLLOUT | POLLWRNORM); 1549 1550 if (events & (POLLPRI | POLLRDBAND)) 1551 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1552 revents |= events & (POLLPRI | POLLRDBAND); 1553 1554 if (revents == 0) { 1555 if (events & 1556 (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | 1557 POLLRDBAND)) { 1558 selrecord(td, &so->so_rcv.sb_sel); 1559 so->so_rcv.sb_flags |= SB_SEL; 1560 } 1561 1562 if (events & (POLLOUT | POLLWRNORM)) { 1563 selrecord(td, &so->so_snd.sb_sel); 1564 so->so_snd.sb_flags |= SB_SEL; 1565 } 1566 } 1567 1568 splx(s); 1569 return (revents); 1570 } 1571 1572 int 1573 sokqfilter(struct file *fp, struct knote *kn) 1574 { 1575 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1576 struct sockbuf *sb; 1577 int s; 1578 1579 switch (kn->kn_filter) { 1580 case EVFILT_READ: 1581 if (so->so_options & SO_ACCEPTCONN) 1582 kn->kn_fop = &solisten_filtops; 1583 else 1584 kn->kn_fop = &soread_filtops; 1585 sb = &so->so_rcv; 1586 break; 1587 case EVFILT_WRITE: 1588 kn->kn_fop = &sowrite_filtops; 1589 sb = &so->so_snd; 1590 break; 1591 default: 1592 return (1); 1593 } 1594 1595 s = splnet(); 1596 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1597 sb->sb_flags |= SB_KNOTE; 1598 splx(s); 1599 return (0); 1600 } 1601 1602 static void 1603 filt_sordetach(struct knote *kn) 1604 { 1605 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1606 int s = splnet(); 1607 1608 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1609 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1610 so->so_rcv.sb_flags &= ~SB_KNOTE; 1611 splx(s); 1612 } 1613 1614 /*ARGSUSED*/ 1615 static int 1616 filt_soread(struct knote *kn, long hint) 1617 { 1618 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1619 1620 kn->kn_data = so->so_rcv.sb_cc; 1621 if (so->so_state & SS_CANTRCVMORE) { 1622 kn->kn_flags |= EV_EOF; 1623 kn->kn_fflags = so->so_error; 1624 return (1); 1625 } 1626 if (so->so_error) /* temporary udp error */ 1627 return (1); 1628 if (kn->kn_sfflags & NOTE_LOWAT) 1629 return (kn->kn_data >= kn->kn_sdata); 1630 return (kn->kn_data >= so->so_rcv.sb_lowat); 1631 } 1632 1633 static void 1634 filt_sowdetach(struct knote *kn) 1635 { 1636 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1637 int s = splnet(); 1638 1639 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1640 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1641 so->so_snd.sb_flags &= ~SB_KNOTE; 1642 splx(s); 1643 } 1644 1645 /*ARGSUSED*/ 1646 static int 1647 filt_sowrite(struct knote *kn, long hint) 1648 { 1649 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1650 1651 kn->kn_data = sbspace(&so->so_snd); 1652 if (so->so_state & SS_CANTSENDMORE) { 1653 kn->kn_flags |= EV_EOF; 1654 kn->kn_fflags = so->so_error; 1655 return (1); 1656 } 1657 if (so->so_error) /* temporary udp error */ 1658 return (1); 1659 if (((so->so_state & SS_ISCONNECTED) == 0) && 1660 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1661 return (0); 1662 if (kn->kn_sfflags & NOTE_LOWAT) 1663 return (kn->kn_data >= kn->kn_sdata); 1664 return (kn->kn_data >= so->so_snd.sb_lowat); 1665 } 1666 1667 /*ARGSUSED*/ 1668 static int 1669 filt_solisten(struct knote *kn, long hint) 1670 { 1671 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1672 1673 kn->kn_data = so->so_qlen - so->so_incqlen; 1674 return (! TAILQ_EMPTY(&so->so_comp)); 1675 } 1676 1677 int 1678 socheckuid(struct socket *so, uid_t uid) 1679 { 1680 1681 if (so == NULL) 1682 return (EPERM); 1683 if (so->so_cred->cr_uid == uid) 1684 return (0); 1685 return (EPERM); 1686 } 1687