xref: /freebsd/sys/kern/uipc_syscalls.c (revision e0c27215058b5786c78fcfb3963eebe61a989511)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "opt_compat.h"
43 #include "opt_ktrace.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/mac.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/malloc.h>
54 #include <sys/filedesc.h>
55 #include <sys/event.h>
56 #include <sys/proc.h>
57 #include <sys/fcntl.h>
58 #include <sys/file.h>
59 #include <sys/filio.h>
60 #include <sys/mount.h>
61 #include <sys/mbuf.h>
62 #include <sys/protosw.h>
63 #include <sys/socket.h>
64 #include <sys/socketvar.h>
65 #include <sys/signalvar.h>
66 #include <sys/syscallsubr.h>
67 #include <sys/uio.h>
68 #include <sys/vnode.h>
69 #ifdef KTRACE
70 #include <sys/ktrace.h>
71 #endif
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_pageout.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_extern.h>
79 
80 static void sf_buf_init(void *arg);
81 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
82 
83 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
84 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
85 
86 static int accept1(struct thread *td, struct accept_args *uap, int compat);
87 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
88 static int getsockname1(struct thread *td, struct getsockname_args *uap,
89 			int compat);
90 static int getpeername1(struct thread *td, struct getpeername_args *uap,
91 			int compat);
92 
93 /*
94  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
95  * sf_freelist head with the sf_lock mutex.
96  */
97 static struct {
98 	SLIST_HEAD(, sf_buf) sf_head;
99 	struct mtx sf_lock;
100 } sf_freelist;
101 
102 static u_int sf_buf_alloc_want;
103 
104 /*
105  * System call interface to the socket abstraction.
106  */
107 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
108 #define COMPAT_OLDSOCK
109 #endif
110 
111 /*
112  * MPSAFE
113  */
114 int
115 socket(td, uap)
116 	struct thread *td;
117 	register struct socket_args /* {
118 		int	domain;
119 		int	type;
120 		int	protocol;
121 	} */ *uap;
122 {
123 	struct filedesc *fdp;
124 	struct socket *so;
125 	struct file *fp;
126 	int fd, error;
127 
128 	mtx_lock(&Giant);
129 	fdp = td->td_proc->p_fd;
130 	error = falloc(td, &fp, &fd);
131 	if (error)
132 		goto done2;
133 	fhold(fp);
134 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
135 	    td->td_ucred, td);
136 	FILEDESC_LOCK(fdp);
137 	if (error) {
138 		if (fdp->fd_ofiles[fd] == fp) {
139 			fdp->fd_ofiles[fd] = NULL;
140 			FILEDESC_UNLOCK(fdp);
141 			fdrop(fp, td);
142 		} else
143 			FILEDESC_UNLOCK(fdp);
144 	} else {
145 		fp->f_data = so;	/* already has ref count */
146 		fp->f_flag = FREAD|FWRITE;
147 		fp->f_ops = &socketops;
148 		fp->f_type = DTYPE_SOCKET;
149 		FILEDESC_UNLOCK(fdp);
150 		td->td_retval[0] = fd;
151 	}
152 	fdrop(fp, td);
153 done2:
154 	mtx_unlock(&Giant);
155 	return (error);
156 }
157 
158 /*
159  * MPSAFE
160  */
161 /* ARGSUSED */
162 int
163 bind(td, uap)
164 	struct thread *td;
165 	register struct bind_args /* {
166 		int	s;
167 		caddr_t	name;
168 		int	namelen;
169 	} */ *uap;
170 {
171 	struct sockaddr *sa;
172 	int error;
173 
174 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 		return (error);
176 
177 	return (kern_bind(td, uap->s, sa));
178 }
179 
180 int
181 kern_bind(td, fd, sa)
182 	struct thread *td;
183 	int fd;
184 	struct sockaddr *sa;
185 {
186 	struct socket *so;
187 	int error;
188 
189 	mtx_lock(&Giant);
190 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
191 		goto done2;
192 #ifdef MAC
193 	error = mac_check_socket_bind(td->td_ucred, so, sa);
194 	if (error)
195 		goto done1;
196 #endif
197 	error = sobind(so, sa, td);
198 #ifdef MAC
199 done1:
200 #endif
201 	fputsock(so);
202 done2:
203 	mtx_unlock(&Giant);
204 	FREE(sa, M_SONAME);
205 	return (error);
206 }
207 
208 /*
209  * MPSAFE
210  */
211 /* ARGSUSED */
212 int
213 listen(td, uap)
214 	struct thread *td;
215 	register struct listen_args /* {
216 		int	s;
217 		int	backlog;
218 	} */ *uap;
219 {
220 	struct socket *so;
221 	int error;
222 
223 	mtx_lock(&Giant);
224 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
225 #ifdef MAC
226 		error = mac_check_socket_listen(td->td_ucred, so);
227 		if (error)
228 			goto done;
229 #endif
230 		error = solisten(so, uap->backlog, td);
231 #ifdef MAC
232 done:
233 #endif
234 		fputsock(so);
235 	}
236 	mtx_unlock(&Giant);
237 	return(error);
238 }
239 
240 /*
241  * accept1()
242  * MPSAFE
243  */
244 static int
245 accept1(td, uap, compat)
246 	struct thread *td;
247 	register struct accept_args /* {
248 		int	s;
249 		caddr_t	name;
250 		int	*anamelen;
251 	} */ *uap;
252 	int compat;
253 {
254 	struct filedesc *fdp;
255 	struct file *nfp = NULL;
256 	struct sockaddr *sa;
257 	int namelen, error, s;
258 	struct socket *head, *so;
259 	int fd;
260 	u_int fflag;
261 	pid_t pgid;
262 	int tmp;
263 
264 	mtx_lock(&Giant);
265 	fdp = td->td_proc->p_fd;
266 	if (uap->name) {
267 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
268 		if(error)
269 			goto done2;
270 		if (namelen < 0) {
271 			error = EINVAL;
272 			goto done2;
273 		}
274 	}
275 	error = fgetsock(td, uap->s, &head, &fflag);
276 	if (error)
277 		goto done2;
278 	s = splnet();
279 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
280 		splx(s);
281 		error = EINVAL;
282 		goto done;
283 	}
284 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
285 		if (head->so_state & SS_CANTRCVMORE) {
286 			head->so_error = ECONNABORTED;
287 			break;
288 		}
289 		if ((head->so_state & SS_NBIO) != 0) {
290 			head->so_error = EWOULDBLOCK;
291 			break;
292 		}
293 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
294 		    "accept", 0);
295 		if (error) {
296 			splx(s);
297 			goto done;
298 		}
299 	}
300 	if (head->so_error) {
301 		error = head->so_error;
302 		head->so_error = 0;
303 		splx(s);
304 		goto done;
305 	}
306 
307 	/*
308 	 * At this point we know that there is at least one connection
309 	 * ready to be accepted. Remove it from the queue prior to
310 	 * allocating the file descriptor for it since falloc() may
311 	 * block allowing another process to accept the connection
312 	 * instead.
313 	 */
314 	so = TAILQ_FIRST(&head->so_comp);
315 	TAILQ_REMOVE(&head->so_comp, so, so_list);
316 	head->so_qlen--;
317 
318 	error = falloc(td, &nfp, &fd);
319 	if (error) {
320 		/*
321 		 * Probably ran out of file descriptors. Put the
322 		 * unaccepted connection back onto the queue and
323 		 * do another wakeup so some other process might
324 		 * have a chance at it.
325 		 */
326 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
327 		head->so_qlen++;
328 		wakeup_one(&head->so_timeo);
329 		splx(s);
330 		goto done;
331 	}
332 	fhold(nfp);
333 	td->td_retval[0] = fd;
334 
335 	/* connection has been removed from the listen queue */
336 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
337 
338 	so->so_state &= ~SS_COMP;
339 	so->so_head = NULL;
340 	pgid = fgetown(&head->so_sigio);
341 	if (pgid != 0)
342 		fsetown(pgid, &so->so_sigio);
343 
344 	FILE_LOCK(nfp);
345 	soref(so);			/* file descriptor reference */
346 	nfp->f_data = so;	/* nfp has ref count from falloc */
347 	nfp->f_flag = fflag;
348 	nfp->f_ops = &socketops;
349 	nfp->f_type = DTYPE_SOCKET;
350 	FILE_UNLOCK(nfp);
351 	/* Sync socket nonblocking/async state with file flags */
352 	tmp = fflag & FNONBLOCK;
353 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
354 	tmp = fflag & FASYNC;
355 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
356 	sa = 0;
357 	error = soaccept(so, &sa);
358 	if (error) {
359 		/*
360 		 * return a namelen of zero for older code which might
361 	 	 * ignore the return value from accept.
362 		 */
363 		if (uap->name != NULL) {
364 			namelen = 0;
365 			(void) copyout(&namelen,
366 			    uap->anamelen, sizeof(*uap->anamelen));
367 		}
368 		goto noconnection;
369 	}
370 	if (sa == NULL) {
371 		namelen = 0;
372 		if (uap->name)
373 			goto gotnoname;
374 		splx(s);
375 		error = 0;
376 		goto done;
377 	}
378 	if (uap->name) {
379 		/* check sa_len before it is destroyed */
380 		if (namelen > sa->sa_len)
381 			namelen = sa->sa_len;
382 #ifdef COMPAT_OLDSOCK
383 		if (compat)
384 			((struct osockaddr *)sa)->sa_family =
385 			    sa->sa_family;
386 #endif
387 		error = copyout(sa, uap->name, (u_int)namelen);
388 		if (!error)
389 gotnoname:
390 			error = copyout(&namelen,
391 			    uap->anamelen, sizeof (*uap->anamelen));
392 	}
393 noconnection:
394 	if (sa)
395 		FREE(sa, M_SONAME);
396 
397 	/*
398 	 * close the new descriptor, assuming someone hasn't ripped it
399 	 * out from under us.
400 	 */
401 	if (error) {
402 		FILEDESC_LOCK(fdp);
403 		if (fdp->fd_ofiles[fd] == nfp) {
404 			fdp->fd_ofiles[fd] = NULL;
405 			FILEDESC_UNLOCK(fdp);
406 			fdrop(nfp, td);
407 		} else {
408 			FILEDESC_UNLOCK(fdp);
409 		}
410 	}
411 	splx(s);
412 
413 	/*
414 	 * Release explicitly held references before returning.
415 	 */
416 done:
417 	if (nfp != NULL)
418 		fdrop(nfp, td);
419 	fputsock(head);
420 done2:
421 	mtx_unlock(&Giant);
422 	return (error);
423 }
424 
425 /*
426  * MPSAFE (accept1() is MPSAFE)
427  */
428 int
429 accept(td, uap)
430 	struct thread *td;
431 	struct accept_args *uap;
432 {
433 
434 	return (accept1(td, uap, 0));
435 }
436 
437 #ifdef COMPAT_OLDSOCK
438 /*
439  * MPSAFE (accept1() is MPSAFE)
440  */
441 int
442 oaccept(td, uap)
443 	struct thread *td;
444 	struct accept_args *uap;
445 {
446 
447 	return (accept1(td, uap, 1));
448 }
449 #endif /* COMPAT_OLDSOCK */
450 
451 /*
452  * MPSAFE
453  */
454 /* ARGSUSED */
455 int
456 connect(td, uap)
457 	struct thread *td;
458 	register struct connect_args /* {
459 		int	s;
460 		caddr_t	name;
461 		int	namelen;
462 	} */ *uap;
463 {
464 	struct sockaddr *sa;
465 	int error;
466 
467 	error = getsockaddr(&sa, uap->name, uap->namelen);
468 	if (error)
469 		return error;
470 
471 	return (kern_connect(td, uap->s, sa));
472 }
473 
474 
475 int
476 kern_connect(td, fd, sa)
477 	struct thread *td;
478 	int fd;
479 	struct sockaddr *sa;
480 {
481 	struct socket *so;
482 	int error, s;
483 
484 	mtx_lock(&Giant);
485 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
486 		goto done2;
487 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
488 		error = EALREADY;
489 		goto done1;
490 	}
491 #ifdef MAC
492 	error = mac_check_socket_connect(td->td_ucred, so, sa);
493 	if (error)
494 		goto bad;
495 #endif
496 	error = soconnect(so, sa, td);
497 	if (error)
498 		goto bad;
499 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
500 		error = EINPROGRESS;
501 		goto done1;
502 	}
503 	s = splnet();
504 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
505 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
506 		if (error)
507 			break;
508 	}
509 	if (error == 0) {
510 		error = so->so_error;
511 		so->so_error = 0;
512 	}
513 	splx(s);
514 bad:
515 	so->so_state &= ~SS_ISCONNECTING;
516 	if (error == ERESTART)
517 		error = EINTR;
518 done1:
519 	fputsock(so);
520 done2:
521 	mtx_unlock(&Giant);
522 	FREE(sa, M_SONAME);
523 	return (error);
524 }
525 
526 /*
527  * MPSAFE
528  */
529 int
530 socketpair(td, uap)
531 	struct thread *td;
532 	register struct socketpair_args /* {
533 		int	domain;
534 		int	type;
535 		int	protocol;
536 		int	*rsv;
537 	} */ *uap;
538 {
539 	register struct filedesc *fdp = td->td_proc->p_fd;
540 	struct file *fp1, *fp2;
541 	struct socket *so1, *so2;
542 	int fd, error, sv[2];
543 
544 	mtx_lock(&Giant);
545 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
546 	    td->td_ucred, td);
547 	if (error)
548 		goto done2;
549 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
550 	    td->td_ucred, td);
551 	if (error)
552 		goto free1;
553 	error = falloc(td, &fp1, &fd);
554 	if (error)
555 		goto free2;
556 	fhold(fp1);
557 	sv[0] = fd;
558 	fp1->f_data = so1;	/* so1 already has ref count */
559 	error = falloc(td, &fp2, &fd);
560 	if (error)
561 		goto free3;
562 	fhold(fp2);
563 	fp2->f_data = so2;	/* so2 already has ref count */
564 	sv[1] = fd;
565 	error = soconnect2(so1, so2);
566 	if (error)
567 		goto free4;
568 	if (uap->type == SOCK_DGRAM) {
569 		/*
570 		 * Datagram socket connection is asymmetric.
571 		 */
572 		 error = soconnect2(so2, so1);
573 		 if (error)
574 			goto free4;
575 	}
576 	FILE_LOCK(fp1);
577 	fp1->f_flag = FREAD|FWRITE;
578 	fp1->f_ops = &socketops;
579 	fp1->f_type = DTYPE_SOCKET;
580 	FILE_UNLOCK(fp1);
581 	FILE_LOCK(fp2);
582 	fp2->f_flag = FREAD|FWRITE;
583 	fp2->f_ops = &socketops;
584 	fp2->f_type = DTYPE_SOCKET;
585 	FILE_UNLOCK(fp2);
586 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
587 	fdrop(fp1, td);
588 	fdrop(fp2, td);
589 	goto done2;
590 free4:
591 	FILEDESC_LOCK(fdp);
592 	if (fdp->fd_ofiles[sv[1]] == fp2) {
593 		fdp->fd_ofiles[sv[1]] = NULL;
594 		FILEDESC_UNLOCK(fdp);
595 		fdrop(fp2, td);
596 	} else
597 		FILEDESC_UNLOCK(fdp);
598 	fdrop(fp2, td);
599 free3:
600 	FILEDESC_LOCK(fdp);
601 	if (fdp->fd_ofiles[sv[0]] == fp1) {
602 		fdp->fd_ofiles[sv[0]] = NULL;
603 		FILEDESC_UNLOCK(fdp);
604 		fdrop(fp1, td);
605 	} else
606 		FILEDESC_UNLOCK(fdp);
607 	fdrop(fp1, td);
608 free2:
609 	(void)soclose(so2);
610 free1:
611 	(void)soclose(so1);
612 done2:
613 	mtx_unlock(&Giant);
614 	return (error);
615 }
616 
617 static int
618 sendit(td, s, mp, flags)
619 	register struct thread *td;
620 	int s;
621 	register struct msghdr *mp;
622 	int flags;
623 {
624 	struct mbuf *control;
625 	struct sockaddr *to;
626 	int error;
627 
628 	mtx_lock(&Giant);
629 	if (mp->msg_name != NULL) {
630 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
631 		if (error) {
632 			to = NULL;
633 			goto bad;
634 		}
635 		mp->msg_name = to;
636 	} else
637 		to = NULL;
638 
639 	if (mp->msg_control) {
640 		if (mp->msg_controllen < sizeof(struct cmsghdr)
641 #ifdef COMPAT_OLDSOCK
642 		    && mp->msg_flags != MSG_COMPAT
643 #endif
644 		) {
645 			error = EINVAL;
646 			goto bad;
647 		}
648 		error = sockargs(&control, mp->msg_control,
649 		    mp->msg_controllen, MT_CONTROL);
650 		if (error)
651 			goto bad;
652 #ifdef COMPAT_OLDSOCK
653 		if (mp->msg_flags == MSG_COMPAT) {
654 			register struct cmsghdr *cm;
655 
656 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
657 			if (control == 0) {
658 				error = ENOBUFS;
659 				goto bad;
660 			} else {
661 				cm = mtod(control, struct cmsghdr *);
662 				cm->cmsg_len = control->m_len;
663 				cm->cmsg_level = SOL_SOCKET;
664 				cm->cmsg_type = SCM_RIGHTS;
665 			}
666 		}
667 #endif
668 	} else {
669 		control = NULL;
670 	}
671 
672 	error = kern_sendit(td, s, mp, flags, control);
673 
674 bad:
675 	if (to)
676 		FREE(to, M_SONAME);
677 	mtx_unlock(&Giant);
678 	return (error);
679 }
680 
681 int
682 kern_sendit(td, s, mp, flags, control)
683 	struct thread *td;
684 	int s;
685 	struct msghdr *mp;
686 	int flags;
687 	struct mbuf *control;
688 {
689 	struct uio auio;
690 	struct iovec *iov;
691 	struct socket *so;
692 	int i;
693 	int len, error;
694 #ifdef KTRACE
695 	struct iovec *ktriov = NULL;
696 	struct uio ktruio;
697 	int iovlen;
698 #endif
699 
700 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
701 		goto bad2;
702 
703 #ifdef MAC
704 	error = mac_check_socket_send(td->td_ucred, so);
705 	if (error)
706 		goto bad;
707 #endif
708 
709 	auio.uio_iov = mp->msg_iov;
710 	auio.uio_iovcnt = mp->msg_iovlen;
711 	auio.uio_segflg = UIO_USERSPACE;
712 	auio.uio_rw = UIO_WRITE;
713 	auio.uio_td = td;
714 	auio.uio_offset = 0;			/* XXX */
715 	auio.uio_resid = 0;
716 	iov = mp->msg_iov;
717 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
718 		if ((auio.uio_resid += iov->iov_len) < 0) {
719 			error = EINVAL;
720 			goto bad;
721 		}
722 	}
723 #ifdef KTRACE
724 	if (KTRPOINT(td, KTR_GENIO)) {
725 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
726 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
727 		bcopy(auio.uio_iov, ktriov, iovlen);
728 		ktruio = auio;
729 	}
730 #endif
731 	len = auio.uio_resid;
732 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
733 	    0, control, flags, td);
734 	if (error) {
735 		if (auio.uio_resid != len && (error == ERESTART ||
736 		    error == EINTR || error == EWOULDBLOCK))
737 			error = 0;
738 		/* Generation of SIGPIPE can be controlled per socket */
739 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
740 			PROC_LOCK(td->td_proc);
741 			psignal(td->td_proc, SIGPIPE);
742 			PROC_UNLOCK(td->td_proc);
743 		}
744 	}
745 	if (error == 0)
746 		td->td_retval[0] = len - auio.uio_resid;
747 #ifdef KTRACE
748 	if (ktriov != NULL) {
749 		if (error == 0) {
750 			ktruio.uio_iov = ktriov;
751 			ktruio.uio_resid = td->td_retval[0];
752 			ktrgenio(s, UIO_WRITE, &ktruio, error);
753 		}
754 		FREE(ktriov, M_TEMP);
755 	}
756 #endif
757 bad:
758 	fputsock(so);
759 bad2:
760 	return (error);
761 }
762 
763 /*
764  * MPSAFE
765  */
766 int
767 sendto(td, uap)
768 	struct thread *td;
769 	register struct sendto_args /* {
770 		int	s;
771 		caddr_t	buf;
772 		size_t	len;
773 		int	flags;
774 		caddr_t	to;
775 		int	tolen;
776 	} */ *uap;
777 {
778 	struct msghdr msg;
779 	struct iovec aiov;
780 	int error;
781 
782 	msg.msg_name = uap->to;
783 	msg.msg_namelen = uap->tolen;
784 	msg.msg_iov = &aiov;
785 	msg.msg_iovlen = 1;
786 	msg.msg_control = 0;
787 #ifdef COMPAT_OLDSOCK
788 	msg.msg_flags = 0;
789 #endif
790 	aiov.iov_base = uap->buf;
791 	aiov.iov_len = uap->len;
792 	error = sendit(td, uap->s, &msg, uap->flags);
793 	return (error);
794 }
795 
796 #ifdef COMPAT_OLDSOCK
797 /*
798  * MPSAFE
799  */
800 int
801 osend(td, uap)
802 	struct thread *td;
803 	register struct osend_args /* {
804 		int	s;
805 		caddr_t	buf;
806 		int	len;
807 		int	flags;
808 	} */ *uap;
809 {
810 	struct msghdr msg;
811 	struct iovec aiov;
812 	int error;
813 
814 	msg.msg_name = 0;
815 	msg.msg_namelen = 0;
816 	msg.msg_iov = &aiov;
817 	msg.msg_iovlen = 1;
818 	aiov.iov_base = uap->buf;
819 	aiov.iov_len = uap->len;
820 	msg.msg_control = 0;
821 	msg.msg_flags = 0;
822 	error = sendit(td, uap->s, &msg, uap->flags);
823 	return (error);
824 }
825 
826 /*
827  * MPSAFE
828  */
829 int
830 osendmsg(td, uap)
831 	struct thread *td;
832 	register struct osendmsg_args /* {
833 		int	s;
834 		caddr_t	msg;
835 		int	flags;
836 	} */ *uap;
837 {
838 	struct msghdr msg;
839 	struct iovec aiov[UIO_SMALLIOV], *iov;
840 	int error;
841 
842 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
843 	if (error)
844 		goto done2;
845 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
846 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
847 			error = EMSGSIZE;
848 			goto done2;
849 		}
850 		MALLOC(iov, struct iovec *,
851 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
852 		      M_WAITOK);
853 	} else {
854 		iov = aiov;
855 	}
856 	error = copyin(msg.msg_iov, iov,
857 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
858 	if (error)
859 		goto done;
860 	msg.msg_flags = MSG_COMPAT;
861 	msg.msg_iov = iov;
862 	error = sendit(td, uap->s, &msg, uap->flags);
863 done:
864 	if (iov != aiov)
865 		FREE(iov, M_IOV);
866 done2:
867 	return (error);
868 }
869 #endif
870 
871 /*
872  * MPSAFE
873  */
874 int
875 sendmsg(td, uap)
876 	struct thread *td;
877 	register struct sendmsg_args /* {
878 		int	s;
879 		caddr_t	msg;
880 		int	flags;
881 	} */ *uap;
882 {
883 	struct msghdr msg;
884 	struct iovec aiov[UIO_SMALLIOV], *iov;
885 	int error;
886 
887 	error = copyin(uap->msg, &msg, sizeof (msg));
888 	if (error)
889 		goto done2;
890 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
891 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
892 			error = EMSGSIZE;
893 			goto done2;
894 		}
895 		MALLOC(iov, struct iovec *,
896 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
897 		       M_WAITOK);
898 	} else {
899 		iov = aiov;
900 	}
901 	if (msg.msg_iovlen &&
902 	    (error = copyin(msg.msg_iov, iov,
903 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
904 		goto done;
905 	msg.msg_iov = iov;
906 #ifdef COMPAT_OLDSOCK
907 	msg.msg_flags = 0;
908 #endif
909 	error = sendit(td, uap->s, &msg, uap->flags);
910 done:
911 	if (iov != aiov)
912 		FREE(iov, M_IOV);
913 done2:
914 	return (error);
915 }
916 
917 static int
918 recvit(td, s, mp, namelenp)
919 	register struct thread *td;
920 	int s;
921 	register struct msghdr *mp;
922 	void *namelenp;
923 {
924 	struct uio auio;
925 	register struct iovec *iov;
926 	register int i;
927 	int len, error;
928 	struct mbuf *m, *control = 0;
929 	caddr_t ctlbuf;
930 	struct socket *so;
931 	struct sockaddr *fromsa = 0;
932 #ifdef KTRACE
933 	struct iovec *ktriov = NULL;
934 	struct uio ktruio;
935 	int iovlen;
936 #endif
937 
938 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
939 		return (error);
940 
941 #ifdef MAC
942 	error = mac_check_socket_receive(td->td_ucred, so);
943 	if (error) {
944 		fputsock(so);
945 		return (error);
946 	}
947 #endif
948 
949 	auio.uio_iov = mp->msg_iov;
950 	auio.uio_iovcnt = mp->msg_iovlen;
951 	auio.uio_segflg = UIO_USERSPACE;
952 	auio.uio_rw = UIO_READ;
953 	auio.uio_td = td;
954 	auio.uio_offset = 0;			/* XXX */
955 	auio.uio_resid = 0;
956 	iov = mp->msg_iov;
957 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
958 		if ((auio.uio_resid += iov->iov_len) < 0) {
959 			fputsock(so);
960 			return (EINVAL);
961 		}
962 	}
963 #ifdef KTRACE
964 	if (KTRPOINT(td, KTR_GENIO)) {
965 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
966 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
967 		bcopy(auio.uio_iov, ktriov, iovlen);
968 		ktruio = auio;
969 	}
970 #endif
971 	len = auio.uio_resid;
972 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
973 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
974 	    &mp->msg_flags);
975 	if (error) {
976 		if (auio.uio_resid != len && (error == ERESTART ||
977 		    error == EINTR || error == EWOULDBLOCK))
978 			error = 0;
979 	}
980 #ifdef KTRACE
981 	if (ktriov != NULL) {
982 		if (error == 0) {
983 			ktruio.uio_iov = ktriov;
984 			ktruio.uio_resid = len - auio.uio_resid;
985 			ktrgenio(s, UIO_READ, &ktruio, error);
986 		}
987 		FREE(ktriov, M_TEMP);
988 	}
989 #endif
990 	if (error)
991 		goto out;
992 	td->td_retval[0] = len - auio.uio_resid;
993 	if (mp->msg_name) {
994 		len = mp->msg_namelen;
995 		if (len <= 0 || fromsa == 0)
996 			len = 0;
997 		else {
998 			/* save sa_len before it is destroyed by MSG_COMPAT */
999 			len = MIN(len, fromsa->sa_len);
1000 #ifdef COMPAT_OLDSOCK
1001 			if (mp->msg_flags & MSG_COMPAT)
1002 				((struct osockaddr *)fromsa)->sa_family =
1003 				    fromsa->sa_family;
1004 #endif
1005 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1006 			if (error)
1007 				goto out;
1008 		}
1009 		mp->msg_namelen = len;
1010 		if (namelenp &&
1011 		    (error = copyout(&len, namelenp, sizeof (int)))) {
1012 #ifdef COMPAT_OLDSOCK
1013 			if (mp->msg_flags & MSG_COMPAT)
1014 				error = 0;	/* old recvfrom didn't check */
1015 			else
1016 #endif
1017 			goto out;
1018 		}
1019 	}
1020 	if (mp->msg_control) {
1021 #ifdef COMPAT_OLDSOCK
1022 		/*
1023 		 * We assume that old recvmsg calls won't receive access
1024 		 * rights and other control info, esp. as control info
1025 		 * is always optional and those options didn't exist in 4.3.
1026 		 * If we receive rights, trim the cmsghdr; anything else
1027 		 * is tossed.
1028 		 */
1029 		if (control && mp->msg_flags & MSG_COMPAT) {
1030 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1031 			    SOL_SOCKET ||
1032 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1033 			    SCM_RIGHTS) {
1034 				mp->msg_controllen = 0;
1035 				goto out;
1036 			}
1037 			control->m_len -= sizeof (struct cmsghdr);
1038 			control->m_data += sizeof (struct cmsghdr);
1039 		}
1040 #endif
1041 		len = mp->msg_controllen;
1042 		m = control;
1043 		mp->msg_controllen = 0;
1044 		ctlbuf = mp->msg_control;
1045 
1046 		while (m && len > 0) {
1047 			unsigned int tocopy;
1048 
1049 			if (len >= m->m_len)
1050 				tocopy = m->m_len;
1051 			else {
1052 				mp->msg_flags |= MSG_CTRUNC;
1053 				tocopy = len;
1054 			}
1055 
1056 			if ((error = copyout(mtod(m, caddr_t),
1057 					ctlbuf, tocopy)) != 0)
1058 				goto out;
1059 
1060 			ctlbuf += tocopy;
1061 			len -= tocopy;
1062 			m = m->m_next;
1063 		}
1064 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1065 	}
1066 out:
1067 	fputsock(so);
1068 	if (fromsa)
1069 		FREE(fromsa, M_SONAME);
1070 	if (control)
1071 		m_freem(control);
1072 	return (error);
1073 }
1074 
1075 /*
1076  * MPSAFE
1077  */
1078 int
1079 recvfrom(td, uap)
1080 	struct thread *td;
1081 	register struct recvfrom_args /* {
1082 		int	s;
1083 		caddr_t	buf;
1084 		size_t	len;
1085 		int	flags;
1086 		caddr_t	from;
1087 		int	*fromlenaddr;
1088 	} */ *uap;
1089 {
1090 	struct msghdr msg;
1091 	struct iovec aiov;
1092 	int error;
1093 
1094 	mtx_lock(&Giant);
1095 	if (uap->fromlenaddr) {
1096 		error = copyin(uap->fromlenaddr,
1097 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1098 		if (error)
1099 			goto done2;
1100 	} else {
1101 		msg.msg_namelen = 0;
1102 	}
1103 	msg.msg_name = uap->from;
1104 	msg.msg_iov = &aiov;
1105 	msg.msg_iovlen = 1;
1106 	aiov.iov_base = uap->buf;
1107 	aiov.iov_len = uap->len;
1108 	msg.msg_control = 0;
1109 	msg.msg_flags = uap->flags;
1110 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1111 done2:
1112 	mtx_unlock(&Giant);
1113 	return(error);
1114 }
1115 
1116 #ifdef COMPAT_OLDSOCK
1117 /*
1118  * MPSAFE
1119  */
1120 int
1121 orecvfrom(td, uap)
1122 	struct thread *td;
1123 	struct recvfrom_args *uap;
1124 {
1125 
1126 	uap->flags |= MSG_COMPAT;
1127 	return (recvfrom(td, uap));
1128 }
1129 #endif
1130 
1131 
1132 #ifdef COMPAT_OLDSOCK
1133 /*
1134  * MPSAFE
1135  */
1136 int
1137 orecv(td, uap)
1138 	struct thread *td;
1139 	register struct orecv_args /* {
1140 		int	s;
1141 		caddr_t	buf;
1142 		int	len;
1143 		int	flags;
1144 	} */ *uap;
1145 {
1146 	struct msghdr msg;
1147 	struct iovec aiov;
1148 	int error;
1149 
1150 	mtx_lock(&Giant);
1151 	msg.msg_name = 0;
1152 	msg.msg_namelen = 0;
1153 	msg.msg_iov = &aiov;
1154 	msg.msg_iovlen = 1;
1155 	aiov.iov_base = uap->buf;
1156 	aiov.iov_len = uap->len;
1157 	msg.msg_control = 0;
1158 	msg.msg_flags = uap->flags;
1159 	error = recvit(td, uap->s, &msg, NULL);
1160 	mtx_unlock(&Giant);
1161 	return (error);
1162 }
1163 
1164 /*
1165  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1166  * overlays the new one, missing only the flags, and with the (old) access
1167  * rights where the control fields are now.
1168  *
1169  * MPSAFE
1170  */
1171 int
1172 orecvmsg(td, uap)
1173 	struct thread *td;
1174 	register struct orecvmsg_args /* {
1175 		int	s;
1176 		struct	omsghdr *msg;
1177 		int	flags;
1178 	} */ *uap;
1179 {
1180 	struct msghdr msg;
1181 	struct iovec aiov[UIO_SMALLIOV], *iov;
1182 	int error;
1183 
1184 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1185 	if (error)
1186 		return (error);
1187 
1188 	mtx_lock(&Giant);
1189 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1190 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1191 			error = EMSGSIZE;
1192 			goto done2;
1193 		}
1194 		MALLOC(iov, struct iovec *,
1195 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1196 		      M_WAITOK);
1197 	} else {
1198 		iov = aiov;
1199 	}
1200 	msg.msg_flags = uap->flags | MSG_COMPAT;
1201 	error = copyin(msg.msg_iov, iov,
1202 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1203 	if (error)
1204 		goto done;
1205 	msg.msg_iov = iov;
1206 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1207 
1208 	if (msg.msg_controllen && error == 0)
1209 		error = copyout(&msg.msg_controllen,
1210 		    &uap->msg->msg_accrightslen, sizeof (int));
1211 done:
1212 	if (iov != aiov)
1213 		FREE(iov, M_IOV);
1214 done2:
1215 	mtx_unlock(&Giant);
1216 	return (error);
1217 }
1218 #endif
1219 
1220 /*
1221  * MPSAFE
1222  */
1223 int
1224 recvmsg(td, uap)
1225 	struct thread *td;
1226 	register struct recvmsg_args /* {
1227 		int	s;
1228 		struct	msghdr *msg;
1229 		int	flags;
1230 	} */ *uap;
1231 {
1232 	struct msghdr msg;
1233 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1234 	register int error;
1235 
1236 	mtx_lock(&Giant);
1237 	error = copyin(uap->msg, &msg, sizeof (msg));
1238 	if (error)
1239 		goto done2;
1240 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1241 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1242 			error = EMSGSIZE;
1243 			goto done2;
1244 		}
1245 		MALLOC(iov, struct iovec *,
1246 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1247 		       M_WAITOK);
1248 	} else {
1249 		iov = aiov;
1250 	}
1251 #ifdef COMPAT_OLDSOCK
1252 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1253 #else
1254 	msg.msg_flags = uap->flags;
1255 #endif
1256 	uiov = msg.msg_iov;
1257 	msg.msg_iov = iov;
1258 	error = copyin(uiov, iov,
1259 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1260 	if (error)
1261 		goto done;
1262 	error = recvit(td, uap->s, &msg, NULL);
1263 	if (!error) {
1264 		msg.msg_iov = uiov;
1265 		error = copyout(&msg, uap->msg, sizeof(msg));
1266 	}
1267 done:
1268 	if (iov != aiov)
1269 		FREE(iov, M_IOV);
1270 done2:
1271 	mtx_unlock(&Giant);
1272 	return (error);
1273 }
1274 
1275 /*
1276  * MPSAFE
1277  */
1278 /* ARGSUSED */
1279 int
1280 shutdown(td, uap)
1281 	struct thread *td;
1282 	register struct shutdown_args /* {
1283 		int	s;
1284 		int	how;
1285 	} */ *uap;
1286 {
1287 	struct socket *so;
1288 	int error;
1289 
1290 	mtx_lock(&Giant);
1291 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1292 		error = soshutdown(so, uap->how);
1293 		fputsock(so);
1294 	}
1295 	mtx_unlock(&Giant);
1296 	return(error);
1297 }
1298 
1299 /*
1300  * MPSAFE
1301  */
1302 /* ARGSUSED */
1303 int
1304 setsockopt(td, uap)
1305 	struct thread *td;
1306 	register struct setsockopt_args /* {
1307 		int	s;
1308 		int	level;
1309 		int	name;
1310 		caddr_t	val;
1311 		int	valsize;
1312 	} */ *uap;
1313 {
1314 	struct socket *so;
1315 	struct sockopt sopt;
1316 	int error;
1317 
1318 	if (uap->val == 0 && uap->valsize != 0)
1319 		return (EFAULT);
1320 	if (uap->valsize < 0)
1321 		return (EINVAL);
1322 
1323 	mtx_lock(&Giant);
1324 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1325 		sopt.sopt_dir = SOPT_SET;
1326 		sopt.sopt_level = uap->level;
1327 		sopt.sopt_name = uap->name;
1328 		sopt.sopt_val = uap->val;
1329 		sopt.sopt_valsize = uap->valsize;
1330 		sopt.sopt_td = td;
1331 		error = sosetopt(so, &sopt);
1332 		fputsock(so);
1333 	}
1334 	mtx_unlock(&Giant);
1335 	return(error);
1336 }
1337 
1338 /*
1339  * MPSAFE
1340  */
1341 /* ARGSUSED */
1342 int
1343 getsockopt(td, uap)
1344 	struct thread *td;
1345 	register struct getsockopt_args /* {
1346 		int	s;
1347 		int	level;
1348 		int	name;
1349 		caddr_t	val;
1350 		int	*avalsize;
1351 	} */ *uap;
1352 {
1353 	int	valsize, error;
1354 	struct  socket *so;
1355 	struct	sockopt sopt;
1356 
1357 	mtx_lock(&Giant);
1358 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1359 		goto done2;
1360 	if (uap->val) {
1361 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1362 		if (error)
1363 			goto done1;
1364 		if (valsize < 0) {
1365 			error = EINVAL;
1366 			goto done1;
1367 		}
1368 	} else {
1369 		valsize = 0;
1370 	}
1371 
1372 	sopt.sopt_dir = SOPT_GET;
1373 	sopt.sopt_level = uap->level;
1374 	sopt.sopt_name = uap->name;
1375 	sopt.sopt_val = uap->val;
1376 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1377 	sopt.sopt_td = td;
1378 
1379 	error = sogetopt(so, &sopt);
1380 	if (error == 0) {
1381 		valsize = sopt.sopt_valsize;
1382 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1383 	}
1384 done1:
1385 	fputsock(so);
1386 done2:
1387 	mtx_unlock(&Giant);
1388 	return (error);
1389 }
1390 
1391 /*
1392  * getsockname1() - Get socket name.
1393  *
1394  * MPSAFE
1395  */
1396 /* ARGSUSED */
1397 static int
1398 getsockname1(td, uap, compat)
1399 	struct thread *td;
1400 	register struct getsockname_args /* {
1401 		int	fdes;
1402 		caddr_t	asa;
1403 		int	*alen;
1404 	} */ *uap;
1405 	int compat;
1406 {
1407 	struct socket *so;
1408 	struct sockaddr *sa;
1409 	int len, error;
1410 
1411 	mtx_lock(&Giant);
1412 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1413 		goto done2;
1414 	error = copyin(uap->alen, &len, sizeof (len));
1415 	if (error)
1416 		goto done1;
1417 	if (len < 0) {
1418 		error = EINVAL;
1419 		goto done1;
1420 	}
1421 	sa = 0;
1422 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1423 	if (error)
1424 		goto bad;
1425 	if (sa == 0) {
1426 		len = 0;
1427 		goto gotnothing;
1428 	}
1429 
1430 	len = MIN(len, sa->sa_len);
1431 #ifdef COMPAT_OLDSOCK
1432 	if (compat)
1433 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1434 #endif
1435 	error = copyout(sa, uap->asa, (u_int)len);
1436 	if (error == 0)
1437 gotnothing:
1438 		error = copyout(&len, uap->alen, sizeof (len));
1439 bad:
1440 	if (sa)
1441 		FREE(sa, M_SONAME);
1442 done1:
1443 	fputsock(so);
1444 done2:
1445 	mtx_unlock(&Giant);
1446 	return (error);
1447 }
1448 
1449 /*
1450  * MPSAFE
1451  */
1452 int
1453 getsockname(td, uap)
1454 	struct thread *td;
1455 	struct getsockname_args *uap;
1456 {
1457 
1458 	return (getsockname1(td, uap, 0));
1459 }
1460 
1461 #ifdef COMPAT_OLDSOCK
1462 /*
1463  * MPSAFE
1464  */
1465 int
1466 ogetsockname(td, uap)
1467 	struct thread *td;
1468 	struct getsockname_args *uap;
1469 {
1470 
1471 	return (getsockname1(td, uap, 1));
1472 }
1473 #endif /* COMPAT_OLDSOCK */
1474 
1475 /*
1476  * getpeername1() - Get name of peer for connected socket.
1477  *
1478  * MPSAFE
1479  */
1480 /* ARGSUSED */
1481 static int
1482 getpeername1(td, uap, compat)
1483 	struct thread *td;
1484 	register struct getpeername_args /* {
1485 		int	fdes;
1486 		caddr_t	asa;
1487 		int	*alen;
1488 	} */ *uap;
1489 	int compat;
1490 {
1491 	struct socket *so;
1492 	struct sockaddr *sa;
1493 	int len, error;
1494 
1495 	mtx_lock(&Giant);
1496 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1497 		goto done2;
1498 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1499 		error = ENOTCONN;
1500 		goto done1;
1501 	}
1502 	error = copyin(uap->alen, &len, sizeof (len));
1503 	if (error)
1504 		goto done1;
1505 	if (len < 0) {
1506 		error = EINVAL;
1507 		goto done1;
1508 	}
1509 	sa = 0;
1510 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1511 	if (error)
1512 		goto bad;
1513 	if (sa == 0) {
1514 		len = 0;
1515 		goto gotnothing;
1516 	}
1517 	len = MIN(len, sa->sa_len);
1518 #ifdef COMPAT_OLDSOCK
1519 	if (compat)
1520 		((struct osockaddr *)sa)->sa_family =
1521 		    sa->sa_family;
1522 #endif
1523 	error = copyout(sa, uap->asa, (u_int)len);
1524 	if (error)
1525 		goto bad;
1526 gotnothing:
1527 	error = copyout(&len, uap->alen, sizeof (len));
1528 bad:
1529 	if (sa)
1530 		FREE(sa, M_SONAME);
1531 done1:
1532 	fputsock(so);
1533 done2:
1534 	mtx_unlock(&Giant);
1535 	return (error);
1536 }
1537 
1538 /*
1539  * MPSAFE
1540  */
1541 int
1542 getpeername(td, uap)
1543 	struct thread *td;
1544 	struct getpeername_args *uap;
1545 {
1546 
1547 	return (getpeername1(td, uap, 0));
1548 }
1549 
1550 #ifdef COMPAT_OLDSOCK
1551 /*
1552  * MPSAFE
1553  */
1554 int
1555 ogetpeername(td, uap)
1556 	struct thread *td;
1557 	struct ogetpeername_args *uap;
1558 {
1559 
1560 	/* XXX uap should have type `getpeername_args *' to begin with. */
1561 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1562 }
1563 #endif /* COMPAT_OLDSOCK */
1564 
1565 int
1566 sockargs(mp, buf, buflen, type)
1567 	struct mbuf **mp;
1568 	caddr_t buf;
1569 	int buflen, type;
1570 {
1571 	register struct sockaddr *sa;
1572 	register struct mbuf *m;
1573 	int error;
1574 
1575 	if ((u_int)buflen > MLEN) {
1576 #ifdef COMPAT_OLDSOCK
1577 		if (type == MT_SONAME && (u_int)buflen <= 112)
1578 			buflen = MLEN;		/* unix domain compat. hack */
1579 		else
1580 #endif
1581 		return (EINVAL);
1582 	}
1583 	m = m_get(M_TRYWAIT, type);
1584 	if (m == NULL)
1585 		return (ENOBUFS);
1586 	m->m_len = buflen;
1587 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1588 	if (error)
1589 		(void) m_free(m);
1590 	else {
1591 		*mp = m;
1592 		if (type == MT_SONAME) {
1593 			sa = mtod(m, struct sockaddr *);
1594 
1595 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1596 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1597 				sa->sa_family = sa->sa_len;
1598 #endif
1599 			sa->sa_len = buflen;
1600 		}
1601 	}
1602 	return (error);
1603 }
1604 
1605 int
1606 getsockaddr(namp, uaddr, len)
1607 	struct sockaddr **namp;
1608 	caddr_t uaddr;
1609 	size_t len;
1610 {
1611 	struct sockaddr *sa;
1612 	int error;
1613 
1614 	if (len > SOCK_MAXADDRLEN)
1615 		return ENAMETOOLONG;
1616 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1617 	error = copyin(uaddr, sa, len);
1618 	if (error) {
1619 		FREE(sa, M_SONAME);
1620 	} else {
1621 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1622 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1623 			sa->sa_family = sa->sa_len;
1624 #endif
1625 		sa->sa_len = len;
1626 		*namp = sa;
1627 	}
1628 	return error;
1629 }
1630 
1631 /*
1632  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1633  */
1634 static void
1635 sf_buf_init(void *arg)
1636 {
1637 	struct sf_buf *sf_bufs;
1638 	vm_offset_t sf_base;
1639 	int i;
1640 
1641 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1642 	mtx_lock(&sf_freelist.sf_lock);
1643 	SLIST_INIT(&sf_freelist.sf_head);
1644 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1645 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1646 	    M_NOWAIT | M_ZERO);
1647 	for (i = 0; i < nsfbufs; i++) {
1648 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1649 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1650 	}
1651 	sf_buf_alloc_want = 0;
1652 	mtx_unlock(&sf_freelist.sf_lock);
1653 }
1654 
1655 /*
1656  * Get an sf_buf from the freelist. Will block if none are available.
1657  */
1658 struct sf_buf *
1659 sf_buf_alloc(struct vm_page *m)
1660 {
1661 	struct sf_buf *sf;
1662 	int error;
1663 
1664 	mtx_lock(&sf_freelist.sf_lock);
1665 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1666 		sf_buf_alloc_want++;
1667 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1668 		    "sfbufa", 0);
1669 		sf_buf_alloc_want--;
1670 
1671 		/*
1672 		 * If we got a signal, don't risk going back to sleep.
1673 		 */
1674 		if (error)
1675 			break;
1676 	}
1677 	if (sf != NULL) {
1678 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1679 		sf->m = m;
1680 		pmap_qenter(sf->kva, &sf->m, 1);
1681 	}
1682 	mtx_unlock(&sf_freelist.sf_lock);
1683 	return (sf);
1684 }
1685 
1686 /*
1687  * Detatch mapped page and release resources back to the system.
1688  */
1689 void
1690 sf_buf_free(void *addr, void *args)
1691 {
1692 	struct sf_buf *sf;
1693 	struct vm_page *m;
1694 
1695 	sf = args;
1696 	pmap_qremove((vm_offset_t)addr, 1);
1697 	m = sf->m;
1698 	vm_page_lock_queues();
1699 	vm_page_unwire(m, 0);
1700 	/*
1701 	 * Check for the object going away on us. This can
1702 	 * happen since we don't hold a reference to it.
1703 	 * If so, we're responsible for freeing the page.
1704 	 */
1705 	if (m->wire_count == 0 && m->object == NULL)
1706 		vm_page_free(m);
1707 	vm_page_unlock_queues();
1708 	sf->m = NULL;
1709 	mtx_lock(&sf_freelist.sf_lock);
1710 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1711 	if (sf_buf_alloc_want > 0)
1712 		wakeup_one(&sf_freelist);
1713 	mtx_unlock(&sf_freelist.sf_lock);
1714 }
1715 
1716 /*
1717  * sendfile(2)
1718  *
1719  * MPSAFE
1720  *
1721  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1722  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1723  *
1724  * Send a file specified by 'fd' and starting at 'offset' to a socket
1725  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1726  * nbytes == 0. Optionally add a header and/or trailer to the socket
1727  * output. If specified, write the total number of bytes sent into *sbytes.
1728  *
1729  */
1730 int
1731 sendfile(struct thread *td, struct sendfile_args *uap)
1732 {
1733 
1734 	return (do_sendfile(td, uap, 0));
1735 }
1736 
1737 #ifdef COMPAT_FREEBSD4
1738 int
1739 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1740 {
1741 	struct sendfile_args args;
1742 
1743 	args.fd = uap->fd;
1744 	args.s = uap->s;
1745 	args.offset = uap->offset;
1746 	args.nbytes = uap->nbytes;
1747 	args.hdtr = uap->hdtr;
1748 	args.sbytes = uap->sbytes;
1749 	args.flags = uap->flags;
1750 
1751 	return (do_sendfile(td, &args, 1));
1752 }
1753 #endif /* COMPAT_FREEBSD4 */
1754 
1755 static int
1756 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1757 {
1758 	struct vnode *vp;
1759 	struct vm_object *obj;
1760 	struct socket *so = NULL;
1761 	struct mbuf *m;
1762 	struct sf_buf *sf;
1763 	struct vm_page *pg;
1764 	struct writev_args nuap;
1765 	struct sf_hdtr hdtr;
1766 	off_t off, xfsize, hdtr_size, sbytes = 0;
1767 	int error, s;
1768 
1769 	mtx_lock(&Giant);
1770 
1771 	hdtr_size = 0;
1772 
1773 	/*
1774 	 * The descriptor must be a regular file and have a backing VM object.
1775 	 */
1776 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1777 		goto done;
1778 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1779 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1780 		error = EINVAL;
1781 		VOP_UNLOCK(vp, 0, td);
1782 		goto done;
1783 	}
1784 	VOP_UNLOCK(vp, 0, td);
1785 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1786 		goto done;
1787 	if (so->so_type != SOCK_STREAM) {
1788 		error = EINVAL;
1789 		goto done;
1790 	}
1791 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1792 		error = ENOTCONN;
1793 		goto done;
1794 	}
1795 	if (uap->offset < 0) {
1796 		error = EINVAL;
1797 		goto done;
1798 	}
1799 
1800 #ifdef MAC
1801 	error = mac_check_socket_send(td->td_ucred, so);
1802 	if (error)
1803 		goto done;
1804 #endif
1805 
1806 	/*
1807 	 * If specified, get the pointer to the sf_hdtr struct for
1808 	 * any headers/trailers.
1809 	 */
1810 	if (uap->hdtr != NULL) {
1811 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1812 		if (error)
1813 			goto done;
1814 		/*
1815 		 * Send any headers. Wimp out and use writev(2).
1816 		 */
1817 		if (hdtr.headers != NULL) {
1818 			nuap.fd = uap->s;
1819 			nuap.iovp = hdtr.headers;
1820 			nuap.iovcnt = hdtr.hdr_cnt;
1821 			error = writev(td, &nuap);
1822 			if (error)
1823 				goto done;
1824 			if (compat)
1825 				sbytes += td->td_retval[0];
1826 			else
1827 				hdtr_size += td->td_retval[0];
1828 		}
1829 	}
1830 
1831 	/*
1832 	 * Protect against multiple writers to the socket.
1833 	 */
1834 	(void) sblock(&so->so_snd, M_WAITOK);
1835 
1836 	/*
1837 	 * Loop through the pages in the file, starting with the requested
1838 	 * offset. Get a file page (do I/O if necessary), map the file page
1839 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1840 	 * it on the socket.
1841 	 */
1842 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1843 		vm_pindex_t pindex;
1844 		vm_offset_t pgoff;
1845 
1846 		pindex = OFF_TO_IDX(off);
1847 		VM_OBJECT_LOCK(obj);
1848 retry_lookup:
1849 		/*
1850 		 * Calculate the amount to transfer. Not to exceed a page,
1851 		 * the EOF, or the passed in nbytes.
1852 		 */
1853 		xfsize = obj->un_pager.vnp.vnp_size - off;
1854 		VM_OBJECT_UNLOCK(obj);
1855 		if (xfsize > PAGE_SIZE)
1856 			xfsize = PAGE_SIZE;
1857 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1858 		if (PAGE_SIZE - pgoff < xfsize)
1859 			xfsize = PAGE_SIZE - pgoff;
1860 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1861 			xfsize = uap->nbytes - sbytes;
1862 		if (xfsize <= 0)
1863 			break;
1864 		/*
1865 		 * Optimize the non-blocking case by looking at the socket space
1866 		 * before going to the extra work of constituting the sf_buf.
1867 		 */
1868 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1869 			if (so->so_state & SS_CANTSENDMORE)
1870 				error = EPIPE;
1871 			else
1872 				error = EAGAIN;
1873 			sbunlock(&so->so_snd);
1874 			goto done;
1875 		}
1876 		VM_OBJECT_LOCK(obj);
1877 		/*
1878 		 * Attempt to look up the page.
1879 		 *
1880 		 *	Allocate if not found
1881 		 *
1882 		 *	Wait and loop if busy.
1883 		 */
1884 		pg = vm_page_lookup(obj, pindex);
1885 
1886 		if (pg == NULL) {
1887 			pg = vm_page_alloc(obj, pindex,
1888 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1889 			if (pg == NULL) {
1890 				VM_OBJECT_UNLOCK(obj);
1891 				VM_WAIT;
1892 				VM_OBJECT_LOCK(obj);
1893 				goto retry_lookup;
1894 			}
1895 			vm_page_lock_queues();
1896 			vm_page_wakeup(pg);
1897 		} else {
1898 			vm_page_lock_queues();
1899 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1900 				goto retry_lookup;
1901 			/*
1902 		 	 * Wire the page so it does not get ripped out from
1903 			 * under us.
1904 			 */
1905 			vm_page_wire(pg);
1906 		}
1907 
1908 		/*
1909 		 * If page is not valid for what we need, initiate I/O
1910 		 */
1911 
1912 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1913 			int bsize, resid;
1914 
1915 			/*
1916 			 * Ensure that our page is still around when the I/O
1917 			 * completes.
1918 			 */
1919 			vm_page_io_start(pg);
1920 			vm_page_unlock_queues();
1921 			VM_OBJECT_UNLOCK(obj);
1922 
1923 			/*
1924 			 * Get the page from backing store.
1925 			 */
1926 			bsize = vp->v_mount->mnt_stat.f_iosize;
1927 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1928 			/*
1929 			 * XXXMAC: Because we don't have fp->f_cred here,
1930 			 * we pass in NOCRED.  This is probably wrong, but
1931 			 * is consistent with our original implementation.
1932 			 */
1933 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1934 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1935 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1936 			    td->td_ucred, NOCRED, &resid, td);
1937 			VOP_UNLOCK(vp, 0, td);
1938 			if (error)
1939 				VM_OBJECT_LOCK(obj);
1940 			vm_page_lock_queues();
1941 			vm_page_flag_clear(pg, PG_ZERO);
1942 			vm_page_io_finish(pg);
1943 			if (error) {
1944 				vm_page_unwire(pg, 0);
1945 				/*
1946 				 * See if anyone else might know about this page.
1947 				 * If not and it is not valid, then free it.
1948 				 */
1949 				if (pg->wire_count == 0 && pg->valid == 0 &&
1950 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1951 				    pg->hold_count == 0) {
1952 					vm_page_busy(pg);
1953 					vm_page_free(pg);
1954 				}
1955 				vm_page_unlock_queues();
1956 				VM_OBJECT_UNLOCK(obj);
1957 				sbunlock(&so->so_snd);
1958 				goto done;
1959 			}
1960 		} else
1961 			VM_OBJECT_UNLOCK(obj);
1962 		vm_page_unlock_queues();
1963 
1964 		/*
1965 		 * Get a sendfile buf. We usually wait as long as necessary,
1966 		 * but this wait can be interrupted.
1967 		 */
1968 		if ((sf = sf_buf_alloc(pg)) == NULL) {
1969 			vm_page_lock_queues();
1970 			vm_page_unwire(pg, 0);
1971 			if (pg->wire_count == 0 && pg->object == NULL)
1972 				vm_page_free(pg);
1973 			vm_page_unlock_queues();
1974 			sbunlock(&so->so_snd);
1975 			error = EINTR;
1976 			goto done;
1977 		}
1978 
1979 		/*
1980 		 * Get an mbuf header and set it up as having external storage.
1981 		 */
1982 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1983 		if (m == NULL) {
1984 			error = ENOBUFS;
1985 			sf_buf_free((void *)sf->kva, sf);
1986 			sbunlock(&so->so_snd);
1987 			goto done;
1988 		}
1989 		/*
1990 		 * Setup external storage for mbuf.
1991 		 */
1992 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY,
1993 		    EXT_SFBUF);
1994 		m->m_data = (char *) sf->kva + pgoff;
1995 		m->m_pkthdr.len = m->m_len = xfsize;
1996 		/*
1997 		 * Add the buffer to the socket buffer chain.
1998 		 */
1999 		s = splnet();
2000 retry_space:
2001 		/*
2002 		 * Make sure that the socket is still able to take more data.
2003 		 * CANTSENDMORE being true usually means that the connection
2004 		 * was closed. so_error is true when an error was sensed after
2005 		 * a previous send.
2006 		 * The state is checked after the page mapping and buffer
2007 		 * allocation above since those operations may block and make
2008 		 * any socket checks stale. From this point forward, nothing
2009 		 * blocks before the pru_send (or more accurately, any blocking
2010 		 * results in a loop back to here to re-check).
2011 		 */
2012 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2013 			if (so->so_state & SS_CANTSENDMORE) {
2014 				error = EPIPE;
2015 			} else {
2016 				error = so->so_error;
2017 				so->so_error = 0;
2018 			}
2019 			m_freem(m);
2020 			sbunlock(&so->so_snd);
2021 			splx(s);
2022 			goto done;
2023 		}
2024 		/*
2025 		 * Wait for socket space to become available. We do this just
2026 		 * after checking the connection state above in order to avoid
2027 		 * a race condition with sbwait().
2028 		 */
2029 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2030 			if (so->so_state & SS_NBIO) {
2031 				m_freem(m);
2032 				sbunlock(&so->so_snd);
2033 				splx(s);
2034 				error = EAGAIN;
2035 				goto done;
2036 			}
2037 			error = sbwait(&so->so_snd);
2038 			/*
2039 			 * An error from sbwait usually indicates that we've
2040 			 * been interrupted by a signal. If we've sent anything
2041 			 * then return bytes sent, otherwise return the error.
2042 			 */
2043 			if (error) {
2044 				m_freem(m);
2045 				sbunlock(&so->so_snd);
2046 				splx(s);
2047 				goto done;
2048 			}
2049 			goto retry_space;
2050 		}
2051 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2052 		splx(s);
2053 		if (error) {
2054 			sbunlock(&so->so_snd);
2055 			goto done;
2056 		}
2057 	}
2058 	sbunlock(&so->so_snd);
2059 
2060 	/*
2061 	 * Send trailers. Wimp out and use writev(2).
2062 	 */
2063 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2064 			nuap.fd = uap->s;
2065 			nuap.iovp = hdtr.trailers;
2066 			nuap.iovcnt = hdtr.trl_cnt;
2067 			error = writev(td, &nuap);
2068 			if (error)
2069 				goto done;
2070 			if (compat)
2071 				sbytes += td->td_retval[0];
2072 			else
2073 				hdtr_size += td->td_retval[0];
2074 	}
2075 
2076 done:
2077 	/*
2078 	 * If there was no error we have to clear td->td_retval[0]
2079 	 * because it may have been set by writev.
2080 	 */
2081 	if (error == 0) {
2082 		td->td_retval[0] = 0;
2083 	}
2084 	if (uap->sbytes != NULL) {
2085 		if (!compat)
2086 			sbytes += hdtr_size;
2087 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2088 	}
2089 	if (vp)
2090 		vrele(vp);
2091 	if (so)
2092 		fputsock(so);
2093 	mtx_unlock(&Giant);
2094 	return (error);
2095 }
2096