1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_inet.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/mutex.h> 46 #include <sys/domain.h> 47 #include <sys/file.h> /* for struct knote */ 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/event.h> 51 #include <sys/poll.h> 52 #include <sys/proc.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signalvar.h> 58 #include <sys/sysctl.h> 59 #include <sys/uio.h> 60 #include <sys/jail.h> 61 62 #include <vm/uma.h> 63 64 #include <machine/limits.h> 65 66 #ifdef INET 67 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 68 #endif 69 70 static void filt_sordetach(struct knote *kn); 71 static int filt_soread(struct knote *kn, long hint); 72 static void filt_sowdetach(struct knote *kn); 73 static int filt_sowrite(struct knote *kn, long hint); 74 static int filt_solisten(struct knote *kn, long hint); 75 76 static struct filterops solisten_filtops = 77 { 1, NULL, filt_sordetach, filt_solisten }; 78 static struct filterops soread_filtops = 79 { 1, NULL, filt_sordetach, filt_soread }; 80 static struct filterops sowrite_filtops = 81 { 1, NULL, filt_sowdetach, filt_sowrite }; 82 83 uma_zone_t socket_zone; 84 so_gen_t so_gencnt; /* generation count for sockets */ 85 86 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 87 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 88 89 SYSCTL_DECL(_kern_ipc); 90 91 static int somaxconn = SOMAXCONN; 92 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 93 &somaxconn, 0, "Maximum pending socket connection queue size"); 94 static int numopensockets; 95 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, 96 &numopensockets, 0, "Number of open sockets"); 97 98 99 /* 100 * Socket operation routines. 101 * These routines are called by the routines in 102 * sys_socket.c or from a system process, and 103 * implement the semantics of socket operations by 104 * switching out to the protocol specific routines. 105 */ 106 107 /* 108 * Get a socket structure from our zone, and initialize it. 109 * Note that it would probably be better to allocate socket 110 * and PCB at the same time, but I'm not convinced that all 111 * the protocols can be easily modified to do this. 112 * 113 * soalloc() returns a socket with a ref count of 0. 114 */ 115 struct socket * 116 soalloc(waitok) 117 int waitok; 118 { 119 struct socket *so; 120 int flag; 121 122 if (waitok == 1) 123 flag = M_WAITOK; 124 else 125 flag = M_NOWAIT; 126 flag |= M_ZERO; 127 so = uma_zalloc(socket_zone, flag); 128 if (so) { 129 /* XXX race condition for reentrant kernel */ 130 so->so_gencnt = ++so_gencnt; 131 /* sx_init(&so->so_sxlock, "socket sxlock"); */ 132 TAILQ_INIT(&so->so_aiojobq); 133 ++numopensockets; 134 } 135 return so; 136 } 137 138 /* 139 * socreate returns a socket with a ref count of 1. The socket should be 140 * closed with soclose(). 141 */ 142 int 143 socreate(dom, aso, type, proto, cred, td) 144 int dom; 145 struct socket **aso; 146 register int type; 147 int proto; 148 struct ucred *cred; 149 struct thread *td; 150 { 151 register struct protosw *prp; 152 register struct socket *so; 153 register int error; 154 155 if (proto) 156 prp = pffindproto(dom, proto, type); 157 else 158 prp = pffindtype(dom, type); 159 160 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 161 return (EPROTONOSUPPORT); 162 163 if (jailed(td->td_ucred) && jail_socket_unixiproute_only && 164 prp->pr_domain->dom_family != PF_LOCAL && 165 prp->pr_domain->dom_family != PF_INET && 166 prp->pr_domain->dom_family != PF_ROUTE) { 167 return (EPROTONOSUPPORT); 168 } 169 170 if (prp->pr_type != type) 171 return (EPROTOTYPE); 172 so = soalloc(M_NOWAIT); 173 if (so == NULL) 174 return (ENOBUFS); 175 176 TAILQ_INIT(&so->so_incomp); 177 TAILQ_INIT(&so->so_comp); 178 so->so_type = type; 179 so->so_cred = crhold(cred); 180 so->so_proto = prp; 181 soref(so); 182 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); 183 if (error) { 184 so->so_state |= SS_NOFDREF; 185 sorele(so); 186 return (error); 187 } 188 *aso = so; 189 return (0); 190 } 191 192 int 193 sobind(so, nam, td) 194 struct socket *so; 195 struct sockaddr *nam; 196 struct thread *td; 197 { 198 int s = splnet(); 199 int error; 200 201 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td); 202 splx(s); 203 return (error); 204 } 205 206 static void 207 sodealloc(struct socket *so) 208 { 209 210 KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); 211 so->so_gencnt = ++so_gencnt; 212 if (so->so_rcv.sb_hiwat) 213 (void)chgsbsize(so->so_cred->cr_uidinfo, 214 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); 215 if (so->so_snd.sb_hiwat) 216 (void)chgsbsize(so->so_cred->cr_uidinfo, 217 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); 218 #ifdef INET 219 if (so->so_accf != NULL) { 220 if (so->so_accf->so_accept_filter != NULL && 221 so->so_accf->so_accept_filter->accf_destroy != NULL) { 222 so->so_accf->so_accept_filter->accf_destroy(so); 223 } 224 if (so->so_accf->so_accept_filter_str != NULL) 225 FREE(so->so_accf->so_accept_filter_str, M_ACCF); 226 FREE(so->so_accf, M_ACCF); 227 } 228 #endif 229 crfree(so->so_cred); 230 /* sx_destroy(&so->so_sxlock); */ 231 uma_zfree(socket_zone, so); 232 --numopensockets; 233 } 234 235 int 236 solisten(so, backlog, td) 237 register struct socket *so; 238 int backlog; 239 struct thread *td; 240 { 241 int s, error; 242 243 s = splnet(); 244 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td); 245 if (error) { 246 splx(s); 247 return (error); 248 } 249 if (TAILQ_EMPTY(&so->so_comp)) 250 so->so_options |= SO_ACCEPTCONN; 251 if (backlog < 0 || backlog > somaxconn) 252 backlog = somaxconn; 253 so->so_qlimit = backlog; 254 splx(s); 255 return (0); 256 } 257 258 void 259 sofree(so) 260 register struct socket *so; 261 { 262 struct socket *head = so->so_head; 263 264 KASSERT(so->so_count == 0, ("socket %p so_count not 0", so)); 265 266 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 267 return; 268 if (head != NULL) { 269 if (so->so_state & SS_INCOMP) { 270 TAILQ_REMOVE(&head->so_incomp, so, so_list); 271 head->so_incqlen--; 272 } else if (so->so_state & SS_COMP) { 273 /* 274 * We must not decommission a socket that's 275 * on the accept(2) queue. If we do, then 276 * accept(2) may hang after select(2) indicated 277 * that the listening socket was ready. 278 */ 279 return; 280 } else { 281 panic("sofree: not queued"); 282 } 283 so->so_state &= ~SS_INCOMP; 284 so->so_head = NULL; 285 } 286 sbrelease(&so->so_snd, so); 287 sorflush(so); 288 sodealloc(so); 289 } 290 291 /* 292 * Close a socket on last file table reference removal. 293 * Initiate disconnect if connected. 294 * Free socket when disconnect complete. 295 * 296 * This function will sorele() the socket. Note that soclose() may be 297 * called prior to the ref count reaching zero. The actual socket 298 * structure will not be freed until the ref count reaches zero. 299 */ 300 int 301 soclose(so) 302 register struct socket *so; 303 { 304 int s = splnet(); /* conservative */ 305 int error = 0; 306 307 funsetown(&so->so_sigio); 308 if (so->so_options & SO_ACCEPTCONN) { 309 struct socket *sp, *sonext; 310 311 sp = TAILQ_FIRST(&so->so_incomp); 312 for (; sp != NULL; sp = sonext) { 313 sonext = TAILQ_NEXT(sp, so_list); 314 (void) soabort(sp); 315 } 316 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) { 317 sonext = TAILQ_NEXT(sp, so_list); 318 /* Dequeue from so_comp since sofree() won't do it */ 319 TAILQ_REMOVE(&so->so_comp, sp, so_list); 320 so->so_qlen--; 321 sp->so_state &= ~SS_COMP; 322 sp->so_head = NULL; 323 (void) soabort(sp); 324 } 325 } 326 if (so->so_pcb == 0) 327 goto discard; 328 if (so->so_state & SS_ISCONNECTED) { 329 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 330 error = sodisconnect(so); 331 if (error) 332 goto drop; 333 } 334 if (so->so_options & SO_LINGER) { 335 if ((so->so_state & SS_ISDISCONNECTING) && 336 (so->so_state & SS_NBIO)) 337 goto drop; 338 while (so->so_state & SS_ISCONNECTED) { 339 error = tsleep((caddr_t)&so->so_timeo, 340 PSOCK | PCATCH, "soclos", so->so_linger * hz); 341 if (error) 342 break; 343 } 344 } 345 } 346 drop: 347 if (so->so_pcb) { 348 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 349 if (error == 0) 350 error = error2; 351 } 352 discard: 353 if (so->so_state & SS_NOFDREF) 354 panic("soclose: NOFDREF"); 355 so->so_state |= SS_NOFDREF; 356 sorele(so); 357 splx(s); 358 return (error); 359 } 360 361 /* 362 * Must be called at splnet... 363 */ 364 int 365 soabort(so) 366 struct socket *so; 367 { 368 int error; 369 370 error = (*so->so_proto->pr_usrreqs->pru_abort)(so); 371 if (error) { 372 sotryfree(so); /* note: does not decrement the ref count */ 373 return error; 374 } 375 return (0); 376 } 377 378 int 379 soaccept(so, nam) 380 register struct socket *so; 381 struct sockaddr **nam; 382 { 383 int s = splnet(); 384 int error; 385 386 if ((so->so_state & SS_NOFDREF) == 0) 387 panic("soaccept: !NOFDREF"); 388 so->so_state &= ~SS_NOFDREF; 389 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 390 splx(s); 391 return (error); 392 } 393 394 int 395 soconnect(so, nam, td) 396 register struct socket *so; 397 struct sockaddr *nam; 398 struct thread *td; 399 { 400 int s; 401 int error; 402 403 if (so->so_options & SO_ACCEPTCONN) 404 return (EOPNOTSUPP); 405 s = splnet(); 406 /* 407 * If protocol is connection-based, can only connect once. 408 * Otherwise, if connected, try to disconnect first. 409 * This allows user to disconnect by connecting to, e.g., 410 * a null address. 411 */ 412 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 413 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 414 (error = sodisconnect(so)))) 415 error = EISCONN; 416 else 417 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); 418 splx(s); 419 return (error); 420 } 421 422 int 423 soconnect2(so1, so2) 424 register struct socket *so1; 425 struct socket *so2; 426 { 427 int s = splnet(); 428 int error; 429 430 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 431 splx(s); 432 return (error); 433 } 434 435 int 436 sodisconnect(so) 437 register struct socket *so; 438 { 439 int s = splnet(); 440 int error; 441 442 if ((so->so_state & SS_ISCONNECTED) == 0) { 443 error = ENOTCONN; 444 goto bad; 445 } 446 if (so->so_state & SS_ISDISCONNECTING) { 447 error = EALREADY; 448 goto bad; 449 } 450 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 451 bad: 452 splx(s); 453 return (error); 454 } 455 456 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 457 /* 458 * Send on a socket. 459 * If send must go all at once and message is larger than 460 * send buffering, then hard error. 461 * Lock against other senders. 462 * If must go all at once and not enough room now, then 463 * inform user that this would block and do nothing. 464 * Otherwise, if nonblocking, send as much as possible. 465 * The data to be sent is described by "uio" if nonzero, 466 * otherwise by the mbuf chain "top" (which must be null 467 * if uio is not). Data provided in mbuf chain must be small 468 * enough to send all at once. 469 * 470 * Returns nonzero on error, timeout or signal; callers 471 * must check for short counts if EINTR/ERESTART are returned. 472 * Data and control buffers are freed on return. 473 */ 474 int 475 sosend(so, addr, uio, top, control, flags, td) 476 register struct socket *so; 477 struct sockaddr *addr; 478 struct uio *uio; 479 struct mbuf *top; 480 struct mbuf *control; 481 int flags; 482 struct thread *td; 483 { 484 struct mbuf **mp; 485 register struct mbuf *m; 486 register long space, len, resid; 487 int clen = 0, error, s, dontroute, mlen; 488 int atomic = sosendallatonce(so) || top; 489 490 if (uio) 491 resid = uio->uio_resid; 492 else 493 resid = top->m_pkthdr.len; 494 /* 495 * In theory resid should be unsigned. 496 * However, space must be signed, as it might be less than 0 497 * if we over-committed, and we must use a signed comparison 498 * of space and resid. On the other hand, a negative resid 499 * causes us to loop sending 0-length segments to the protocol. 500 * 501 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 502 * type sockets since that's an error. 503 */ 504 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 505 error = EINVAL; 506 goto out; 507 } 508 509 dontroute = 510 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 511 (so->so_proto->pr_flags & PR_ATOMIC); 512 if (td) 513 td->td_proc->p_stats->p_ru.ru_msgsnd++; 514 if (control) 515 clen = control->m_len; 516 #define snderr(errno) { error = errno; splx(s); goto release; } 517 518 restart: 519 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 520 if (error) 521 goto out; 522 do { 523 s = splnet(); 524 if (so->so_state & SS_CANTSENDMORE) 525 snderr(EPIPE); 526 if (so->so_error) { 527 error = so->so_error; 528 so->so_error = 0; 529 splx(s); 530 goto release; 531 } 532 if ((so->so_state & SS_ISCONNECTED) == 0) { 533 /* 534 * `sendto' and `sendmsg' is allowed on a connection- 535 * based socket if it supports implied connect. 536 * Return ENOTCONN if not connected and no address is 537 * supplied. 538 */ 539 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 540 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 541 if ((so->so_state & SS_ISCONFIRMING) == 0 && 542 !(resid == 0 && clen != 0)) 543 snderr(ENOTCONN); 544 } else if (addr == 0) 545 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 546 ENOTCONN : EDESTADDRREQ); 547 } 548 space = sbspace(&so->so_snd); 549 if (flags & MSG_OOB) 550 space += 1024; 551 if ((atomic && resid > so->so_snd.sb_hiwat) || 552 clen > so->so_snd.sb_hiwat) 553 snderr(EMSGSIZE); 554 if (space < resid + clen && 555 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 556 if (so->so_state & SS_NBIO) 557 snderr(EWOULDBLOCK); 558 sbunlock(&so->so_snd); 559 error = sbwait(&so->so_snd); 560 splx(s); 561 if (error) 562 goto out; 563 goto restart; 564 } 565 splx(s); 566 mp = ⊤ 567 space -= clen; 568 do { 569 if (uio == NULL) { 570 /* 571 * Data is prepackaged in "top". 572 */ 573 resid = 0; 574 if (flags & MSG_EOR) 575 top->m_flags |= M_EOR; 576 } else do { 577 if (top == 0) { 578 MGETHDR(m, M_TRYWAIT, MT_DATA); 579 if (m == NULL) { 580 error = ENOBUFS; 581 goto release; 582 } 583 mlen = MHLEN; 584 m->m_pkthdr.len = 0; 585 m->m_pkthdr.rcvif = (struct ifnet *)0; 586 } else { 587 MGET(m, M_TRYWAIT, MT_DATA); 588 if (m == NULL) { 589 error = ENOBUFS; 590 goto release; 591 } 592 mlen = MLEN; 593 } 594 if (resid >= MINCLSIZE) { 595 MCLGET(m, M_TRYWAIT); 596 if ((m->m_flags & M_EXT) == 0) 597 goto nopages; 598 mlen = MCLBYTES; 599 len = min(min(mlen, resid), space); 600 } else { 601 nopages: 602 len = min(min(mlen, resid), space); 603 /* 604 * For datagram protocols, leave room 605 * for protocol headers in first mbuf. 606 */ 607 if (atomic && top == 0 && len < mlen) 608 MH_ALIGN(m, len); 609 } 610 space -= len; 611 error = uiomove(mtod(m, caddr_t), (int)len, uio); 612 resid = uio->uio_resid; 613 m->m_len = len; 614 *mp = m; 615 top->m_pkthdr.len += len; 616 if (error) 617 goto release; 618 mp = &m->m_next; 619 if (resid <= 0) { 620 if (flags & MSG_EOR) 621 top->m_flags |= M_EOR; 622 break; 623 } 624 } while (space > 0 && atomic); 625 if (dontroute) 626 so->so_options |= SO_DONTROUTE; 627 s = splnet(); /* XXX */ 628 /* 629 * XXX all the SS_CANTSENDMORE checks previously 630 * done could be out of date. We could have recieved 631 * a reset packet in an interrupt or maybe we slept 632 * while doing page faults in uiomove() etc. We could 633 * probably recheck again inside the splnet() protection 634 * here, but there are probably other places that this 635 * also happens. We must rethink this. 636 */ 637 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 638 (flags & MSG_OOB) ? PRUS_OOB : 639 /* 640 * If the user set MSG_EOF, the protocol 641 * understands this flag and nothing left to 642 * send then use PRU_SEND_EOF instead of PRU_SEND. 643 */ 644 ((flags & MSG_EOF) && 645 (so->so_proto->pr_flags & PR_IMPLOPCL) && 646 (resid <= 0)) ? 647 PRUS_EOF : 648 /* If there is more to send set PRUS_MORETOCOME */ 649 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 650 top, addr, control, td); 651 splx(s); 652 if (dontroute) 653 so->so_options &= ~SO_DONTROUTE; 654 clen = 0; 655 control = 0; 656 top = 0; 657 mp = ⊤ 658 if (error) 659 goto release; 660 } while (resid && space > 0); 661 } while (resid); 662 663 release: 664 sbunlock(&so->so_snd); 665 out: 666 if (top) 667 m_freem(top); 668 if (control) 669 m_freem(control); 670 return (error); 671 } 672 673 /* 674 * Implement receive operations on a socket. 675 * We depend on the way that records are added to the sockbuf 676 * by sbappend*. In particular, each record (mbufs linked through m_next) 677 * must begin with an address if the protocol so specifies, 678 * followed by an optional mbuf or mbufs containing ancillary data, 679 * and then zero or more mbufs of data. 680 * In order to avoid blocking network interrupts for the entire time here, 681 * we splx() while doing the actual copy to user space. 682 * Although the sockbuf is locked, new data may still be appended, 683 * and thus we must maintain consistency of the sockbuf during that time. 684 * 685 * The caller may receive the data as a single mbuf chain by supplying 686 * an mbuf **mp0 for use in returning the chain. The uio is then used 687 * only for the count in uio_resid. 688 */ 689 int 690 soreceive(so, psa, uio, mp0, controlp, flagsp) 691 register struct socket *so; 692 struct sockaddr **psa; 693 struct uio *uio; 694 struct mbuf **mp0; 695 struct mbuf **controlp; 696 int *flagsp; 697 { 698 struct mbuf *m, **mp; 699 register int flags, len, error, s, offset; 700 struct protosw *pr = so->so_proto; 701 struct mbuf *nextrecord; 702 int moff, type = 0; 703 int orig_resid = uio->uio_resid; 704 705 mp = mp0; 706 if (psa) 707 *psa = 0; 708 if (controlp) 709 *controlp = 0; 710 if (flagsp) 711 flags = *flagsp &~ MSG_EOR; 712 else 713 flags = 0; 714 if (flags & MSG_OOB) { 715 m = m_get(M_TRYWAIT, MT_DATA); 716 if (m == NULL) 717 return (ENOBUFS); 718 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 719 if (error) 720 goto bad; 721 do { 722 error = uiomove(mtod(m, caddr_t), 723 (int) min(uio->uio_resid, m->m_len), uio); 724 m = m_free(m); 725 } while (uio->uio_resid && error == 0 && m); 726 bad: 727 if (m) 728 m_freem(m); 729 return (error); 730 } 731 if (mp) 732 *mp = (struct mbuf *)0; 733 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 734 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 735 736 restart: 737 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 738 if (error) 739 return (error); 740 s = splnet(); 741 742 m = so->so_rcv.sb_mb; 743 /* 744 * If we have less data than requested, block awaiting more 745 * (subject to any timeout) if: 746 * 1. the current count is less than the low water mark, or 747 * 2. MSG_WAITALL is set, and it is possible to do the entire 748 * receive operation at once if we block (resid <= hiwat). 749 * 3. MSG_DONTWAIT is not set 750 * If MSG_WAITALL is set but resid is larger than the receive buffer, 751 * we have to do the receive in sections, and thus risk returning 752 * a short count if a timeout or signal occurs after we start. 753 */ 754 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 755 so->so_rcv.sb_cc < uio->uio_resid) && 756 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 757 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 758 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 759 KASSERT(m != 0 || !so->so_rcv.sb_cc, 760 ("receive: m == %p so->so_rcv.sb_cc == %lu", 761 m, so->so_rcv.sb_cc)); 762 if (so->so_error) { 763 if (m) 764 goto dontblock; 765 error = so->so_error; 766 if ((flags & MSG_PEEK) == 0) 767 so->so_error = 0; 768 goto release; 769 } 770 if (so->so_state & SS_CANTRCVMORE) { 771 if (m) 772 goto dontblock; 773 else 774 goto release; 775 } 776 for (; m; m = m->m_next) 777 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 778 m = so->so_rcv.sb_mb; 779 goto dontblock; 780 } 781 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 782 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 783 error = ENOTCONN; 784 goto release; 785 } 786 if (uio->uio_resid == 0) 787 goto release; 788 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 789 error = EWOULDBLOCK; 790 goto release; 791 } 792 sbunlock(&so->so_rcv); 793 error = sbwait(&so->so_rcv); 794 splx(s); 795 if (error) 796 return (error); 797 goto restart; 798 } 799 dontblock: 800 if (uio->uio_td) 801 uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; 802 nextrecord = m->m_nextpkt; 803 if (pr->pr_flags & PR_ADDR) { 804 KASSERT(m->m_type == MT_SONAME, 805 ("m->m_type == %d", m->m_type)); 806 orig_resid = 0; 807 if (psa) 808 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 809 mp0 == 0); 810 if (flags & MSG_PEEK) { 811 m = m->m_next; 812 } else { 813 sbfree(&so->so_rcv, m); 814 so->so_rcv.sb_mb = m_free(m); 815 m = so->so_rcv.sb_mb; 816 } 817 } 818 while (m && m->m_type == MT_CONTROL && error == 0) { 819 if (flags & MSG_PEEK) { 820 if (controlp) 821 *controlp = m_copy(m, 0, m->m_len); 822 m = m->m_next; 823 } else { 824 sbfree(&so->so_rcv, m); 825 so->so_rcv.sb_mb = m->m_next; 826 m->m_next = NULL; 827 if (pr->pr_domain->dom_externalize) 828 error = 829 (*pr->pr_domain->dom_externalize)(m, controlp); 830 else if (controlp) 831 *controlp = m; 832 else 833 m_freem(m); 834 m = so->so_rcv.sb_mb; 835 } 836 if (controlp) { 837 orig_resid = 0; 838 do 839 controlp = &(*controlp)->m_next; 840 while (*controlp != NULL); 841 } 842 } 843 if (m) { 844 if ((flags & MSG_PEEK) == 0) 845 m->m_nextpkt = nextrecord; 846 type = m->m_type; 847 if (type == MT_OOBDATA) 848 flags |= MSG_OOB; 849 } 850 moff = 0; 851 offset = 0; 852 while (m && uio->uio_resid > 0 && error == 0) { 853 if (m->m_type == MT_OOBDATA) { 854 if (type != MT_OOBDATA) 855 break; 856 } else if (type == MT_OOBDATA) 857 break; 858 else 859 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 860 ("m->m_type == %d", m->m_type)); 861 so->so_state &= ~SS_RCVATMARK; 862 len = uio->uio_resid; 863 if (so->so_oobmark && len > so->so_oobmark - offset) 864 len = so->so_oobmark - offset; 865 if (len > m->m_len - moff) 866 len = m->m_len - moff; 867 /* 868 * If mp is set, just pass back the mbufs. 869 * Otherwise copy them out via the uio, then free. 870 * Sockbuf must be consistent here (points to current mbuf, 871 * it points to next record) when we drop priority; 872 * we must note any additions to the sockbuf when we 873 * block interrupts again. 874 */ 875 if (mp == 0) { 876 splx(s); 877 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 878 s = splnet(); 879 if (error) 880 goto release; 881 } else 882 uio->uio_resid -= len; 883 if (len == m->m_len - moff) { 884 if (m->m_flags & M_EOR) 885 flags |= MSG_EOR; 886 if (flags & MSG_PEEK) { 887 m = m->m_next; 888 moff = 0; 889 } else { 890 nextrecord = m->m_nextpkt; 891 sbfree(&so->so_rcv, m); 892 if (mp) { 893 *mp = m; 894 mp = &m->m_next; 895 so->so_rcv.sb_mb = m = m->m_next; 896 *mp = (struct mbuf *)0; 897 } else { 898 so->so_rcv.sb_mb = m_free(m); 899 m = so->so_rcv.sb_mb; 900 } 901 if (m) 902 m->m_nextpkt = nextrecord; 903 } 904 } else { 905 if (flags & MSG_PEEK) 906 moff += len; 907 else { 908 if (mp) 909 *mp = m_copym(m, 0, len, M_TRYWAIT); 910 m->m_data += len; 911 m->m_len -= len; 912 so->so_rcv.sb_cc -= len; 913 } 914 } 915 if (so->so_oobmark) { 916 if ((flags & MSG_PEEK) == 0) { 917 so->so_oobmark -= len; 918 if (so->so_oobmark == 0) { 919 so->so_state |= SS_RCVATMARK; 920 break; 921 } 922 } else { 923 offset += len; 924 if (offset == so->so_oobmark) 925 break; 926 } 927 } 928 if (flags & MSG_EOR) 929 break; 930 /* 931 * If the MSG_WAITALL flag is set (for non-atomic socket), 932 * we must not quit until "uio->uio_resid == 0" or an error 933 * termination. If a signal/timeout occurs, return 934 * with a short count but without error. 935 * Keep sockbuf locked against other readers. 936 */ 937 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 938 !sosendallatonce(so) && !nextrecord) { 939 if (so->so_error || so->so_state & SS_CANTRCVMORE) 940 break; 941 /* 942 * Notify the protocol that some data has been 943 * drained before blocking. 944 */ 945 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 946 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 947 error = sbwait(&so->so_rcv); 948 if (error) { 949 sbunlock(&so->so_rcv); 950 splx(s); 951 return (0); 952 } 953 m = so->so_rcv.sb_mb; 954 if (m) 955 nextrecord = m->m_nextpkt; 956 } 957 } 958 959 if (m && pr->pr_flags & PR_ATOMIC) { 960 flags |= MSG_TRUNC; 961 if ((flags & MSG_PEEK) == 0) 962 (void) sbdroprecord(&so->so_rcv); 963 } 964 if ((flags & MSG_PEEK) == 0) { 965 if (m == 0) 966 so->so_rcv.sb_mb = nextrecord; 967 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 968 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 969 } 970 if (orig_resid == uio->uio_resid && orig_resid && 971 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 972 sbunlock(&so->so_rcv); 973 splx(s); 974 goto restart; 975 } 976 977 if (flagsp) 978 *flagsp |= flags; 979 release: 980 sbunlock(&so->so_rcv); 981 splx(s); 982 return (error); 983 } 984 985 int 986 soshutdown(so, how) 987 register struct socket *so; 988 register int how; 989 { 990 register struct protosw *pr = so->so_proto; 991 992 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 993 return (EINVAL); 994 995 if (how != SHUT_WR) 996 sorflush(so); 997 if (how != SHUT_RD) 998 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 999 return (0); 1000 } 1001 1002 void 1003 sorflush(so) 1004 register struct socket *so; 1005 { 1006 register struct sockbuf *sb = &so->so_rcv; 1007 register struct protosw *pr = so->so_proto; 1008 register int s; 1009 struct sockbuf asb; 1010 1011 sb->sb_flags |= SB_NOINTR; 1012 (void) sblock(sb, M_WAITOK); 1013 s = splimp(); 1014 socantrcvmore(so); 1015 sbunlock(sb); 1016 asb = *sb; 1017 bzero((caddr_t)sb, sizeof (*sb)); 1018 splx(s); 1019 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1020 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1021 sbrelease(&asb, so); 1022 } 1023 1024 #ifdef INET 1025 static int 1026 do_setopt_accept_filter(so, sopt) 1027 struct socket *so; 1028 struct sockopt *sopt; 1029 { 1030 struct accept_filter_arg *afap = NULL; 1031 struct accept_filter *afp; 1032 struct so_accf *af = so->so_accf; 1033 int error = 0; 1034 1035 /* do not set/remove accept filters on non listen sockets */ 1036 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1037 error = EINVAL; 1038 goto out; 1039 } 1040 1041 /* removing the filter */ 1042 if (sopt == NULL) { 1043 if (af != NULL) { 1044 if (af->so_accept_filter != NULL && 1045 af->so_accept_filter->accf_destroy != NULL) { 1046 af->so_accept_filter->accf_destroy(so); 1047 } 1048 if (af->so_accept_filter_str != NULL) { 1049 FREE(af->so_accept_filter_str, M_ACCF); 1050 } 1051 FREE(af, M_ACCF); 1052 so->so_accf = NULL; 1053 } 1054 so->so_options &= ~SO_ACCEPTFILTER; 1055 return (0); 1056 } 1057 /* adding a filter */ 1058 /* must remove previous filter first */ 1059 if (af != NULL) { 1060 error = EINVAL; 1061 goto out; 1062 } 1063 /* don't put large objects on the kernel stack */ 1064 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK); 1065 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1066 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1067 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1068 if (error) 1069 goto out; 1070 afp = accept_filt_get(afap->af_name); 1071 if (afp == NULL) { 1072 error = ENOENT; 1073 goto out; 1074 } 1075 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1076 if (afp->accf_create != NULL) { 1077 if (afap->af_name[0] != '\0') { 1078 int len = strlen(afap->af_name) + 1; 1079 1080 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK); 1081 strcpy(af->so_accept_filter_str, afap->af_name); 1082 } 1083 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1084 if (af->so_accept_filter_arg == NULL) { 1085 FREE(af->so_accept_filter_str, M_ACCF); 1086 FREE(af, M_ACCF); 1087 so->so_accf = NULL; 1088 error = EINVAL; 1089 goto out; 1090 } 1091 } 1092 af->so_accept_filter = afp; 1093 so->so_accf = af; 1094 so->so_options |= SO_ACCEPTFILTER; 1095 out: 1096 if (afap != NULL) 1097 FREE(afap, M_TEMP); 1098 return (error); 1099 } 1100 #endif /* INET */ 1101 1102 /* 1103 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1104 * an additional variant to handle the case where the option value needs 1105 * to be some kind of integer, but not a specific size. 1106 * In addition to their use here, these functions are also called by the 1107 * protocol-level pr_ctloutput() routines. 1108 */ 1109 int 1110 sooptcopyin(sopt, buf, len, minlen) 1111 struct sockopt *sopt; 1112 void *buf; 1113 size_t len; 1114 size_t minlen; 1115 { 1116 size_t valsize; 1117 1118 /* 1119 * If the user gives us more than we wanted, we ignore it, 1120 * but if we don't get the minimum length the caller 1121 * wants, we return EINVAL. On success, sopt->sopt_valsize 1122 * is set to however much we actually retrieved. 1123 */ 1124 if ((valsize = sopt->sopt_valsize) < minlen) 1125 return EINVAL; 1126 if (valsize > len) 1127 sopt->sopt_valsize = valsize = len; 1128 1129 if (sopt->sopt_td != 0) 1130 return (copyin(sopt->sopt_val, buf, valsize)); 1131 1132 bcopy(sopt->sopt_val, buf, valsize); 1133 return 0; 1134 } 1135 1136 int 1137 sosetopt(so, sopt) 1138 struct socket *so; 1139 struct sockopt *sopt; 1140 { 1141 int error, optval; 1142 struct linger l; 1143 struct timeval tv; 1144 u_long val; 1145 1146 error = 0; 1147 if (sopt->sopt_level != SOL_SOCKET) { 1148 if (so->so_proto && so->so_proto->pr_ctloutput) 1149 return ((*so->so_proto->pr_ctloutput) 1150 (so, sopt)); 1151 error = ENOPROTOOPT; 1152 } else { 1153 switch (sopt->sopt_name) { 1154 #ifdef INET 1155 case SO_ACCEPTFILTER: 1156 error = do_setopt_accept_filter(so, sopt); 1157 if (error) 1158 goto bad; 1159 break; 1160 #endif 1161 case SO_LINGER: 1162 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1163 if (error) 1164 goto bad; 1165 1166 so->so_linger = l.l_linger; 1167 if (l.l_onoff) 1168 so->so_options |= SO_LINGER; 1169 else 1170 so->so_options &= ~SO_LINGER; 1171 break; 1172 1173 case SO_DEBUG: 1174 case SO_KEEPALIVE: 1175 case SO_DONTROUTE: 1176 case SO_USELOOPBACK: 1177 case SO_BROADCAST: 1178 case SO_REUSEADDR: 1179 case SO_REUSEPORT: 1180 case SO_OOBINLINE: 1181 case SO_TIMESTAMP: 1182 error = sooptcopyin(sopt, &optval, sizeof optval, 1183 sizeof optval); 1184 if (error) 1185 goto bad; 1186 if (optval) 1187 so->so_options |= sopt->sopt_name; 1188 else 1189 so->so_options &= ~sopt->sopt_name; 1190 break; 1191 1192 case SO_SNDBUF: 1193 case SO_RCVBUF: 1194 case SO_SNDLOWAT: 1195 case SO_RCVLOWAT: 1196 error = sooptcopyin(sopt, &optval, sizeof optval, 1197 sizeof optval); 1198 if (error) 1199 goto bad; 1200 1201 /* 1202 * Values < 1 make no sense for any of these 1203 * options, so disallow them. 1204 */ 1205 if (optval < 1) { 1206 error = EINVAL; 1207 goto bad; 1208 } 1209 1210 switch (sopt->sopt_name) { 1211 case SO_SNDBUF: 1212 case SO_RCVBUF: 1213 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1214 &so->so_snd : &so->so_rcv, (u_long)optval, 1215 so, curthread) == 0) { 1216 error = ENOBUFS; 1217 goto bad; 1218 } 1219 break; 1220 1221 /* 1222 * Make sure the low-water is never greater than 1223 * the high-water. 1224 */ 1225 case SO_SNDLOWAT: 1226 so->so_snd.sb_lowat = 1227 (optval > so->so_snd.sb_hiwat) ? 1228 so->so_snd.sb_hiwat : optval; 1229 break; 1230 case SO_RCVLOWAT: 1231 so->so_rcv.sb_lowat = 1232 (optval > so->so_rcv.sb_hiwat) ? 1233 so->so_rcv.sb_hiwat : optval; 1234 break; 1235 } 1236 break; 1237 1238 case SO_SNDTIMEO: 1239 case SO_RCVTIMEO: 1240 error = sooptcopyin(sopt, &tv, sizeof tv, 1241 sizeof tv); 1242 if (error) 1243 goto bad; 1244 1245 /* assert(hz > 0); */ 1246 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1247 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1248 error = EDOM; 1249 goto bad; 1250 } 1251 /* assert(tick > 0); */ 1252 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1253 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 1254 if (val > SHRT_MAX) { 1255 error = EDOM; 1256 goto bad; 1257 } 1258 1259 switch (sopt->sopt_name) { 1260 case SO_SNDTIMEO: 1261 so->so_snd.sb_timeo = val; 1262 break; 1263 case SO_RCVTIMEO: 1264 so->so_rcv.sb_timeo = val; 1265 break; 1266 } 1267 break; 1268 default: 1269 error = ENOPROTOOPT; 1270 break; 1271 } 1272 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1273 (void) ((*so->so_proto->pr_ctloutput) 1274 (so, sopt)); 1275 } 1276 } 1277 bad: 1278 return (error); 1279 } 1280 1281 /* Helper routine for getsockopt */ 1282 int 1283 sooptcopyout(sopt, buf, len) 1284 struct sockopt *sopt; 1285 void *buf; 1286 size_t len; 1287 { 1288 int error; 1289 size_t valsize; 1290 1291 error = 0; 1292 1293 /* 1294 * Documented get behavior is that we always return a value, 1295 * possibly truncated to fit in the user's buffer. 1296 * Traditional behavior is that we always tell the user 1297 * precisely how much we copied, rather than something useful 1298 * like the total amount we had available for her. 1299 * Note that this interface is not idempotent; the entire answer must 1300 * generated ahead of time. 1301 */ 1302 valsize = min(len, sopt->sopt_valsize); 1303 sopt->sopt_valsize = valsize; 1304 if (sopt->sopt_val != 0) { 1305 if (sopt->sopt_td != 0) 1306 error = copyout(buf, sopt->sopt_val, valsize); 1307 else 1308 bcopy(buf, sopt->sopt_val, valsize); 1309 } 1310 return error; 1311 } 1312 1313 int 1314 sogetopt(so, sopt) 1315 struct socket *so; 1316 struct sockopt *sopt; 1317 { 1318 int error, optval; 1319 struct linger l; 1320 struct timeval tv; 1321 #ifdef INET 1322 struct accept_filter_arg *afap; 1323 #endif 1324 1325 error = 0; 1326 if (sopt->sopt_level != SOL_SOCKET) { 1327 if (so->so_proto && so->so_proto->pr_ctloutput) { 1328 return ((*so->so_proto->pr_ctloutput) 1329 (so, sopt)); 1330 } else 1331 return (ENOPROTOOPT); 1332 } else { 1333 switch (sopt->sopt_name) { 1334 #ifdef INET 1335 case SO_ACCEPTFILTER: 1336 if ((so->so_options & SO_ACCEPTCONN) == 0) 1337 return (EINVAL); 1338 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), 1339 M_TEMP, M_WAITOK | M_ZERO); 1340 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1341 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1342 if (so->so_accf->so_accept_filter_str != NULL) 1343 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1344 } 1345 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1346 FREE(afap, M_TEMP); 1347 break; 1348 #endif 1349 1350 case SO_LINGER: 1351 l.l_onoff = so->so_options & SO_LINGER; 1352 l.l_linger = so->so_linger; 1353 error = sooptcopyout(sopt, &l, sizeof l); 1354 break; 1355 1356 case SO_USELOOPBACK: 1357 case SO_DONTROUTE: 1358 case SO_DEBUG: 1359 case SO_KEEPALIVE: 1360 case SO_REUSEADDR: 1361 case SO_REUSEPORT: 1362 case SO_BROADCAST: 1363 case SO_OOBINLINE: 1364 case SO_TIMESTAMP: 1365 optval = so->so_options & sopt->sopt_name; 1366 integer: 1367 error = sooptcopyout(sopt, &optval, sizeof optval); 1368 break; 1369 1370 case SO_TYPE: 1371 optval = so->so_type; 1372 goto integer; 1373 1374 case SO_ERROR: 1375 optval = so->so_error; 1376 so->so_error = 0; 1377 goto integer; 1378 1379 case SO_SNDBUF: 1380 optval = so->so_snd.sb_hiwat; 1381 goto integer; 1382 1383 case SO_RCVBUF: 1384 optval = so->so_rcv.sb_hiwat; 1385 goto integer; 1386 1387 case SO_SNDLOWAT: 1388 optval = so->so_snd.sb_lowat; 1389 goto integer; 1390 1391 case SO_RCVLOWAT: 1392 optval = so->so_rcv.sb_lowat; 1393 goto integer; 1394 1395 case SO_SNDTIMEO: 1396 case SO_RCVTIMEO: 1397 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1398 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1399 1400 tv.tv_sec = optval / hz; 1401 tv.tv_usec = (optval % hz) * tick; 1402 error = sooptcopyout(sopt, &tv, sizeof tv); 1403 break; 1404 1405 default: 1406 error = ENOPROTOOPT; 1407 break; 1408 } 1409 return (error); 1410 } 1411 } 1412 1413 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1414 int 1415 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1416 { 1417 struct mbuf *m, *m_prev; 1418 int sopt_size = sopt->sopt_valsize; 1419 1420 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 1421 if (m == 0) 1422 return ENOBUFS; 1423 if (sopt_size > MLEN) { 1424 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT); 1425 if ((m->m_flags & M_EXT) == 0) { 1426 m_free(m); 1427 return ENOBUFS; 1428 } 1429 m->m_len = min(MCLBYTES, sopt_size); 1430 } else { 1431 m->m_len = min(MLEN, sopt_size); 1432 } 1433 sopt_size -= m->m_len; 1434 *mp = m; 1435 m_prev = m; 1436 1437 while (sopt_size) { 1438 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 1439 if (m == 0) { 1440 m_freem(*mp); 1441 return ENOBUFS; 1442 } 1443 if (sopt_size > MLEN) { 1444 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT); 1445 if ((m->m_flags & M_EXT) == 0) { 1446 m_freem(*mp); 1447 return ENOBUFS; 1448 } 1449 m->m_len = min(MCLBYTES, sopt_size); 1450 } else { 1451 m->m_len = min(MLEN, sopt_size); 1452 } 1453 sopt_size -= m->m_len; 1454 m_prev->m_next = m; 1455 m_prev = m; 1456 } 1457 return 0; 1458 } 1459 1460 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1461 int 1462 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1463 { 1464 struct mbuf *m0 = m; 1465 1466 if (sopt->sopt_val == NULL) 1467 return 0; 1468 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1469 if (sopt->sopt_td != NULL) { 1470 int error; 1471 1472 error = copyin(sopt->sopt_val, mtod(m, char *), 1473 m->m_len); 1474 if (error != 0) { 1475 m_freem(m0); 1476 return(error); 1477 } 1478 } else 1479 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 1480 sopt->sopt_valsize -= m->m_len; 1481 (caddr_t)sopt->sopt_val += m->m_len; 1482 m = m->m_next; 1483 } 1484 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1485 panic("ip6_sooptmcopyin"); 1486 return 0; 1487 } 1488 1489 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1490 int 1491 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1492 { 1493 struct mbuf *m0 = m; 1494 size_t valsize = 0; 1495 1496 if (sopt->sopt_val == NULL) 1497 return 0; 1498 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1499 if (sopt->sopt_td != NULL) { 1500 int error; 1501 1502 error = copyout(mtod(m, char *), sopt->sopt_val, 1503 m->m_len); 1504 if (error != 0) { 1505 m_freem(m0); 1506 return(error); 1507 } 1508 } else 1509 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 1510 sopt->sopt_valsize -= m->m_len; 1511 (caddr_t)sopt->sopt_val += m->m_len; 1512 valsize += m->m_len; 1513 m = m->m_next; 1514 } 1515 if (m != NULL) { 1516 /* enough soopt buffer should be given from user-land */ 1517 m_freem(m0); 1518 return(EINVAL); 1519 } 1520 sopt->sopt_valsize = valsize; 1521 return 0; 1522 } 1523 1524 void 1525 sohasoutofband(so) 1526 register struct socket *so; 1527 { 1528 if (so->so_sigio != NULL) 1529 pgsigio(&so->so_sigio, SIGURG, 0); 1530 selwakeup(&so->so_rcv.sb_sel); 1531 } 1532 1533 int 1534 sopoll(struct socket *so, int events, struct ucred *cred, struct thread *td) 1535 { 1536 int revents = 0; 1537 int s = splnet(); 1538 1539 if (events & (POLLIN | POLLRDNORM)) 1540 if (soreadable(so)) 1541 revents |= events & (POLLIN | POLLRDNORM); 1542 1543 if (events & POLLINIGNEOF) 1544 if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || 1545 !TAILQ_EMPTY(&so->so_comp) || so->so_error) 1546 revents |= POLLINIGNEOF; 1547 1548 if (events & (POLLOUT | POLLWRNORM)) 1549 if (sowriteable(so)) 1550 revents |= events & (POLLOUT | POLLWRNORM); 1551 1552 if (events & (POLLPRI | POLLRDBAND)) 1553 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1554 revents |= events & (POLLPRI | POLLRDBAND); 1555 1556 if (revents == 0) { 1557 if (events & 1558 (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | 1559 POLLRDBAND)) { 1560 selrecord(td, &so->so_rcv.sb_sel); 1561 so->so_rcv.sb_flags |= SB_SEL; 1562 } 1563 1564 if (events & (POLLOUT | POLLWRNORM)) { 1565 selrecord(td, &so->so_snd.sb_sel); 1566 so->so_snd.sb_flags |= SB_SEL; 1567 } 1568 } 1569 1570 splx(s); 1571 return (revents); 1572 } 1573 1574 int 1575 sokqfilter(struct file *fp, struct knote *kn) 1576 { 1577 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1578 struct sockbuf *sb; 1579 int s; 1580 1581 switch (kn->kn_filter) { 1582 case EVFILT_READ: 1583 if (so->so_options & SO_ACCEPTCONN) 1584 kn->kn_fop = &solisten_filtops; 1585 else 1586 kn->kn_fop = &soread_filtops; 1587 sb = &so->so_rcv; 1588 break; 1589 case EVFILT_WRITE: 1590 kn->kn_fop = &sowrite_filtops; 1591 sb = &so->so_snd; 1592 break; 1593 default: 1594 return (1); 1595 } 1596 1597 s = splnet(); 1598 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1599 sb->sb_flags |= SB_KNOTE; 1600 splx(s); 1601 return (0); 1602 } 1603 1604 static void 1605 filt_sordetach(struct knote *kn) 1606 { 1607 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1608 int s = splnet(); 1609 1610 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1611 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1612 so->so_rcv.sb_flags &= ~SB_KNOTE; 1613 splx(s); 1614 } 1615 1616 /*ARGSUSED*/ 1617 static int 1618 filt_soread(struct knote *kn, long hint) 1619 { 1620 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1621 1622 kn->kn_data = so->so_rcv.sb_cc; 1623 if (so->so_state & SS_CANTRCVMORE) { 1624 kn->kn_flags |= EV_EOF; 1625 kn->kn_fflags = so->so_error; 1626 return (1); 1627 } 1628 if (so->so_error) /* temporary udp error */ 1629 return (1); 1630 if (kn->kn_sfflags & NOTE_LOWAT) 1631 return (kn->kn_data >= kn->kn_sdata); 1632 return (kn->kn_data >= so->so_rcv.sb_lowat); 1633 } 1634 1635 static void 1636 filt_sowdetach(struct knote *kn) 1637 { 1638 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1639 int s = splnet(); 1640 1641 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1642 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1643 so->so_snd.sb_flags &= ~SB_KNOTE; 1644 splx(s); 1645 } 1646 1647 /*ARGSUSED*/ 1648 static int 1649 filt_sowrite(struct knote *kn, long hint) 1650 { 1651 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1652 1653 kn->kn_data = sbspace(&so->so_snd); 1654 if (so->so_state & SS_CANTSENDMORE) { 1655 kn->kn_flags |= EV_EOF; 1656 kn->kn_fflags = so->so_error; 1657 return (1); 1658 } 1659 if (so->so_error) /* temporary udp error */ 1660 return (1); 1661 if (((so->so_state & SS_ISCONNECTED) == 0) && 1662 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1663 return (0); 1664 if (kn->kn_sfflags & NOTE_LOWAT) 1665 return (kn->kn_data >= kn->kn_sdata); 1666 return (kn->kn_data >= so->so_snd.sb_lowat); 1667 } 1668 1669 /*ARGSUSED*/ 1670 static int 1671 filt_solisten(struct knote *kn, long hint) 1672 { 1673 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1674 1675 kn->kn_data = so->so_qlen; 1676 return (! TAILQ_EMPTY(&so->so_comp)); 1677 } 1678 1679 int 1680 socheckuid(struct socket *so, uid_t uid) 1681 { 1682 1683 if (so == NULL) 1684 return (EPERM); 1685 if (so->so_cred->cr_uid == uid) 1686 return (0); 1687 return (EPERM); 1688 } 1689