xref: /freebsd/sys/kern/uipc_syscalls.c (revision 6780ab54325a71e7e70112b11657973edde8655e)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 #include "opt_mac.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/mutex.h>
50 #include <sys/sysproto.h>
51 #include <sys/malloc.h>
52 #include <sys/filedesc.h>
53 #include <sys/event.h>
54 #include <sys/proc.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/lock.h>
58 #include <sys/mount.h>
59 #include <sys/mbuf.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/signalvar.h>
64 #include <sys/syscallsubr.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77 
78 static void sf_buf_init(void *arg);
79 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
80 
81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
83 
84 static int accept1(struct thread *td, struct accept_args *uap, int compat);
85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
86 static int getsockname1(struct thread *td, struct getsockname_args *uap,
87 			int compat);
88 static int getpeername1(struct thread *td, struct getpeername_args *uap,
89 			int compat);
90 
91 /*
92  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
93  * sf_freelist head with the sf_lock mutex.
94  */
95 static struct {
96 	SLIST_HEAD(, sf_buf) sf_head;
97 	struct mtx sf_lock;
98 } sf_freelist;
99 
100 vm_offset_t sf_base;
101 struct sf_buf *sf_bufs;
102 u_int sf_buf_alloc_want;
103 
104 /*
105  * System call interface to the socket abstraction.
106  */
107 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
108 #define COMPAT_OLDSOCK
109 #endif
110 
111 /*
112  * MPSAFE
113  */
114 int
115 socket(td, uap)
116 	struct thread *td;
117 	register struct socket_args /* {
118 		int	domain;
119 		int	type;
120 		int	protocol;
121 	} */ *uap;
122 {
123 	struct filedesc *fdp;
124 	struct socket *so;
125 	struct file *fp;
126 	int fd, error;
127 
128 	mtx_lock(&Giant);
129 	fdp = td->td_proc->p_fd;
130 	error = falloc(td, &fp, &fd);
131 	if (error)
132 		goto done2;
133 	fhold(fp);
134 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
135 	    td->td_ucred, td);
136 	FILEDESC_LOCK(fdp);
137 	if (error) {
138 		if (fdp->fd_ofiles[fd] == fp) {
139 			fdp->fd_ofiles[fd] = NULL;
140 			FILEDESC_UNLOCK(fdp);
141 			fdrop(fp, td);
142 		} else
143 			FILEDESC_UNLOCK(fdp);
144 	} else {
145 		fp->f_data = so;	/* already has ref count */
146 		fp->f_flag = FREAD|FWRITE;
147 		fp->f_ops = &socketops;
148 		fp->f_type = DTYPE_SOCKET;
149 		FILEDESC_UNLOCK(fdp);
150 		td->td_retval[0] = fd;
151 	}
152 	fdrop(fp, td);
153 done2:
154 	mtx_unlock(&Giant);
155 	return (error);
156 }
157 
158 /*
159  * MPSAFE
160  */
161 /* ARGSUSED */
162 int
163 bind(td, uap)
164 	struct thread *td;
165 	register struct bind_args /* {
166 		int	s;
167 		caddr_t	name;
168 		int	namelen;
169 	} */ *uap;
170 {
171 	struct sockaddr *sa;
172 	int error;
173 
174 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 		return (error);
176 
177 	return (kern_bind(td, uap->s, sa));
178 }
179 
180 int
181 kern_bind(td, fd, sa)
182 	struct thread *td;
183 	int fd;
184 	struct sockaddr *sa;
185 {
186 	struct socket *so;
187 	int error;
188 
189 	mtx_lock(&Giant);
190 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
191 		goto done2;
192 #ifdef MAC
193 	error = mac_check_socket_bind(td->td_ucred, so, sa);
194 	if (error)
195 		goto done1;
196 #endif
197 	error = sobind(so, sa, td);
198 #ifdef MAC
199 done1:
200 #endif
201 	fputsock(so);
202 done2:
203 	mtx_unlock(&Giant);
204 	FREE(sa, M_SONAME);
205 	return (error);
206 }
207 
208 /*
209  * MPSAFE
210  */
211 /* ARGSUSED */
212 int
213 listen(td, uap)
214 	struct thread *td;
215 	register struct listen_args /* {
216 		int	s;
217 		int	backlog;
218 	} */ *uap;
219 {
220 	struct socket *so;
221 	int error;
222 
223 	mtx_lock(&Giant);
224 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
225 #ifdef MAC
226 		error = mac_check_socket_listen(td->td_ucred, so);
227 		if (error)
228 			goto done;
229 #endif
230 		error = solisten(so, uap->backlog, td);
231 #ifdef MAC
232 done:
233 #endif
234 		fputsock(so);
235 	}
236 	mtx_unlock(&Giant);
237 	return(error);
238 }
239 
240 /*
241  * accept1()
242  * MPSAFE
243  */
244 static int
245 accept1(td, uap, compat)
246 	struct thread *td;
247 	register struct accept_args /* {
248 		int	s;
249 		caddr_t	name;
250 		int	*anamelen;
251 	} */ *uap;
252 	int compat;
253 {
254 	struct filedesc *fdp;
255 	struct file *nfp = NULL;
256 	struct sockaddr *sa;
257 	int namelen, error, s;
258 	struct socket *head, *so;
259 	int fd;
260 	u_int fflag;
261 	pid_t pgid;
262 
263 	mtx_lock(&Giant);
264 	fdp = td->td_proc->p_fd;
265 	if (uap->name) {
266 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
267 		if(error)
268 			goto done2;
269 		if (namelen < 0) {
270 			error = EINVAL;
271 			goto done2;
272 		}
273 	}
274 	error = fgetsock(td, uap->s, &head, &fflag);
275 	if (error)
276 		goto done2;
277 	s = splnet();
278 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
279 		splx(s);
280 		error = EINVAL;
281 		goto done;
282 	}
283 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
284 		if (head->so_state & SS_CANTRCVMORE) {
285 			head->so_error = ECONNABORTED;
286 			break;
287 		}
288 		if ((head->so_state & SS_NBIO) != 0) {
289 			head->so_error = EWOULDBLOCK;
290 			break;
291 		}
292 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
293 		    "accept", 0);
294 		if (error) {
295 			splx(s);
296 			goto done;
297 		}
298 	}
299 	if (head->so_error) {
300 		error = head->so_error;
301 		head->so_error = 0;
302 		splx(s);
303 		goto done;
304 	}
305 
306 	/*
307 	 * At this point we know that there is at least one connection
308 	 * ready to be accepted. Remove it from the queue prior to
309 	 * allocating the file descriptor for it since falloc() may
310 	 * block allowing another process to accept the connection
311 	 * instead.
312 	 */
313 	so = TAILQ_FIRST(&head->so_comp);
314 	TAILQ_REMOVE(&head->so_comp, so, so_list);
315 	head->so_qlen--;
316 
317 	error = falloc(td, &nfp, &fd);
318 	if (error) {
319 		/*
320 		 * Probably ran out of file descriptors. Put the
321 		 * unaccepted connection back onto the queue and
322 		 * do another wakeup so some other process might
323 		 * have a chance at it.
324 		 */
325 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
326 		head->so_qlen++;
327 		wakeup_one(&head->so_timeo);
328 		splx(s);
329 		goto done;
330 	}
331 	fhold(nfp);
332 	td->td_retval[0] = fd;
333 
334 	/* connection has been removed from the listen queue */
335 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
336 
337 	so->so_state &= ~SS_COMP;
338 	so->so_head = NULL;
339 	pgid = fgetown(&head->so_sigio);
340 	if (pgid != 0)
341 		fsetown(pgid, &so->so_sigio);
342 
343 	FILE_LOCK(nfp);
344 	soref(so);			/* file descriptor reference */
345 	nfp->f_data = so;	/* nfp has ref count from falloc */
346 	nfp->f_flag = fflag;
347 	nfp->f_ops = &socketops;
348 	nfp->f_type = DTYPE_SOCKET;
349 	FILE_UNLOCK(nfp);
350 	sa = 0;
351 	error = soaccept(so, &sa);
352 	if (error) {
353 		/*
354 		 * return a namelen of zero for older code which might
355 	 	 * ignore the return value from accept.
356 		 */
357 		if (uap->name != NULL) {
358 			namelen = 0;
359 			(void) copyout(&namelen,
360 			    uap->anamelen, sizeof(*uap->anamelen));
361 		}
362 		goto noconnection;
363 	}
364 	if (sa == NULL) {
365 		namelen = 0;
366 		if (uap->name)
367 			goto gotnoname;
368 		splx(s);
369 		error = 0;
370 		goto done;
371 	}
372 	if (uap->name) {
373 		/* check sa_len before it is destroyed */
374 		if (namelen > sa->sa_len)
375 			namelen = sa->sa_len;
376 #ifdef COMPAT_OLDSOCK
377 		if (compat)
378 			((struct osockaddr *)sa)->sa_family =
379 			    sa->sa_family;
380 #endif
381 		error = copyout(sa, uap->name, (u_int)namelen);
382 		if (!error)
383 gotnoname:
384 			error = copyout(&namelen,
385 			    uap->anamelen, sizeof (*uap->anamelen));
386 	}
387 noconnection:
388 	if (sa)
389 		FREE(sa, M_SONAME);
390 
391 	/*
392 	 * close the new descriptor, assuming someone hasn't ripped it
393 	 * out from under us.
394 	 */
395 	if (error) {
396 		FILEDESC_LOCK(fdp);
397 		if (fdp->fd_ofiles[fd] == nfp) {
398 			fdp->fd_ofiles[fd] = NULL;
399 			FILEDESC_UNLOCK(fdp);
400 			fdrop(nfp, td);
401 		} else {
402 			FILEDESC_UNLOCK(fdp);
403 		}
404 	}
405 	splx(s);
406 
407 	/*
408 	 * Release explicitly held references before returning.
409 	 */
410 done:
411 	if (nfp != NULL)
412 		fdrop(nfp, td);
413 	fputsock(head);
414 done2:
415 	mtx_unlock(&Giant);
416 	return (error);
417 }
418 
419 /*
420  * MPSAFE (accept1() is MPSAFE)
421  */
422 int
423 accept(td, uap)
424 	struct thread *td;
425 	struct accept_args *uap;
426 {
427 
428 	return (accept1(td, uap, 0));
429 }
430 
431 #ifdef COMPAT_OLDSOCK
432 /*
433  * MPSAFE (accept1() is MPSAFE)
434  */
435 int
436 oaccept(td, uap)
437 	struct thread *td;
438 	struct accept_args *uap;
439 {
440 
441 	return (accept1(td, uap, 1));
442 }
443 #endif /* COMPAT_OLDSOCK */
444 
445 /*
446  * MPSAFE
447  */
448 /* ARGSUSED */
449 int
450 connect(td, uap)
451 	struct thread *td;
452 	register struct connect_args /* {
453 		int	s;
454 		caddr_t	name;
455 		int	namelen;
456 	} */ *uap;
457 {
458 	struct sockaddr *sa;
459 	int error;
460 
461 	error = getsockaddr(&sa, uap->name, uap->namelen);
462 	if (error)
463 		return error;
464 
465 	return (kern_connect(td, uap->s, sa));
466 }
467 
468 
469 int
470 kern_connect(td, fd, sa)
471 	struct thread *td;
472 	int fd;
473 	struct sockaddr *sa;
474 {
475 	struct socket *so;
476 	int error, s;
477 
478 	mtx_lock(&Giant);
479 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
480 		goto done2;
481 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
482 		error = EALREADY;
483 		goto done1;
484 	}
485 #ifdef MAC
486 	error = mac_check_socket_connect(td->td_ucred, so, sa);
487 	if (error)
488 		goto bad;
489 #endif
490 	error = soconnect(so, sa, td);
491 	if (error)
492 		goto bad;
493 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
494 		error = EINPROGRESS;
495 		goto done1;
496 	}
497 	s = splnet();
498 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
499 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
500 		if (error)
501 			break;
502 	}
503 	if (error == 0) {
504 		error = so->so_error;
505 		so->so_error = 0;
506 	}
507 	splx(s);
508 bad:
509 	so->so_state &= ~SS_ISCONNECTING;
510 	if (error == ERESTART)
511 		error = EINTR;
512 done1:
513 	fputsock(so);
514 done2:
515 	mtx_unlock(&Giant);
516 	FREE(sa, M_SONAME);
517 	return (error);
518 }
519 
520 /*
521  * MPSAFE
522  */
523 int
524 socketpair(td, uap)
525 	struct thread *td;
526 	register struct socketpair_args /* {
527 		int	domain;
528 		int	type;
529 		int	protocol;
530 		int	*rsv;
531 	} */ *uap;
532 {
533 	register struct filedesc *fdp = td->td_proc->p_fd;
534 	struct file *fp1, *fp2;
535 	struct socket *so1, *so2;
536 	int fd, error, sv[2];
537 
538 	mtx_lock(&Giant);
539 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
540 	    td->td_ucred, td);
541 	if (error)
542 		goto done2;
543 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
544 	    td->td_ucred, td);
545 	if (error)
546 		goto free1;
547 	error = falloc(td, &fp1, &fd);
548 	if (error)
549 		goto free2;
550 	fhold(fp1);
551 	sv[0] = fd;
552 	fp1->f_data = so1;	/* so1 already has ref count */
553 	error = falloc(td, &fp2, &fd);
554 	if (error)
555 		goto free3;
556 	fhold(fp2);
557 	fp2->f_data = so2;	/* so2 already has ref count */
558 	sv[1] = fd;
559 	error = soconnect2(so1, so2);
560 	if (error)
561 		goto free4;
562 	if (uap->type == SOCK_DGRAM) {
563 		/*
564 		 * Datagram socket connection is asymmetric.
565 		 */
566 		 error = soconnect2(so2, so1);
567 		 if (error)
568 			goto free4;
569 	}
570 	FILE_LOCK(fp1);
571 	fp1->f_flag = FREAD|FWRITE;
572 	fp1->f_ops = &socketops;
573 	fp1->f_type = DTYPE_SOCKET;
574 	FILE_UNLOCK(fp1);
575 	FILE_LOCK(fp2);
576 	fp2->f_flag = FREAD|FWRITE;
577 	fp2->f_ops = &socketops;
578 	fp2->f_type = DTYPE_SOCKET;
579 	FILE_UNLOCK(fp2);
580 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
581 	fdrop(fp1, td);
582 	fdrop(fp2, td);
583 	goto done2;
584 free4:
585 	FILEDESC_LOCK(fdp);
586 	if (fdp->fd_ofiles[sv[1]] == fp2) {
587 		fdp->fd_ofiles[sv[1]] = NULL;
588 		FILEDESC_UNLOCK(fdp);
589 		fdrop(fp2, td);
590 	} else
591 		FILEDESC_UNLOCK(fdp);
592 	fdrop(fp2, td);
593 free3:
594 	FILEDESC_LOCK(fdp);
595 	if (fdp->fd_ofiles[sv[0]] == fp1) {
596 		fdp->fd_ofiles[sv[0]] = NULL;
597 		FILEDESC_UNLOCK(fdp);
598 		fdrop(fp1, td);
599 	} else
600 		FILEDESC_UNLOCK(fdp);
601 	fdrop(fp1, td);
602 free2:
603 	(void)soclose(so2);
604 free1:
605 	(void)soclose(so1);
606 done2:
607 	mtx_unlock(&Giant);
608 	return (error);
609 }
610 
611 static int
612 sendit(td, s, mp, flags)
613 	register struct thread *td;
614 	int s;
615 	register struct msghdr *mp;
616 	int flags;
617 {
618 	struct uio auio;
619 	register struct iovec *iov;
620 	register int i;
621 	struct mbuf *control;
622 	struct sockaddr *to = NULL;
623 	int len, error;
624 	struct socket *so;
625 #ifdef KTRACE
626 	struct iovec *ktriov = NULL;
627 	struct uio ktruio;
628 	int iovlen;
629 #endif
630 
631 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
632 		return (error);
633 
634 #ifdef MAC
635 	error = mac_check_socket_send(td->td_ucred, so);
636 	if (error)
637 		goto bad;
638 #endif
639 
640 	auio.uio_iov = mp->msg_iov;
641 	auio.uio_iovcnt = mp->msg_iovlen;
642 	auio.uio_segflg = UIO_USERSPACE;
643 	auio.uio_rw = UIO_WRITE;
644 	auio.uio_td = td;
645 	auio.uio_offset = 0;			/* XXX */
646 	auio.uio_resid = 0;
647 	iov = mp->msg_iov;
648 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
649 		if ((auio.uio_resid += iov->iov_len) < 0) {
650 			error = EINVAL;
651 			goto bad;
652 		}
653 	}
654 	if (mp->msg_name) {
655 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
656 		if (error)
657 			goto bad;
658 	}
659 	if (mp->msg_control) {
660 		if (mp->msg_controllen < sizeof(struct cmsghdr)
661 #ifdef COMPAT_OLDSOCK
662 		    && mp->msg_flags != MSG_COMPAT
663 #endif
664 		) {
665 			error = EINVAL;
666 			goto bad;
667 		}
668 		error = sockargs(&control, mp->msg_control,
669 		    mp->msg_controllen, MT_CONTROL);
670 		if (error)
671 			goto bad;
672 #ifdef COMPAT_OLDSOCK
673 		if (mp->msg_flags == MSG_COMPAT) {
674 			register struct cmsghdr *cm;
675 
676 			M_PREPEND(control, sizeof(*cm), 0);
677 			if (control == 0) {
678 				error = ENOBUFS;
679 				goto bad;
680 			} else {
681 				cm = mtod(control, struct cmsghdr *);
682 				cm->cmsg_len = control->m_len;
683 				cm->cmsg_level = SOL_SOCKET;
684 				cm->cmsg_type = SCM_RIGHTS;
685 			}
686 		}
687 #endif
688 	} else {
689 		control = 0;
690 	}
691 #ifdef KTRACE
692 	if (KTRPOINT(td, KTR_GENIO)) {
693 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
694 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0);
695 		bcopy(auio.uio_iov, ktriov, iovlen);
696 		ktruio = auio;
697 	}
698 #endif
699 	len = auio.uio_resid;
700 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
701 						     flags, td);
702 	if (error) {
703 		if (auio.uio_resid != len && (error == ERESTART ||
704 		    error == EINTR || error == EWOULDBLOCK))
705 			error = 0;
706 		/* Generation of SIGPIPE can be controlled per socket */
707 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
708 			PROC_LOCK(td->td_proc);
709 			psignal(td->td_proc, SIGPIPE);
710 			PROC_UNLOCK(td->td_proc);
711 		}
712 	}
713 	if (error == 0)
714 		td->td_retval[0] = len - auio.uio_resid;
715 #ifdef KTRACE
716 	if (ktriov != NULL) {
717 		if (error == 0) {
718 			ktruio.uio_iov = ktriov;
719 			ktruio.uio_resid = td->td_retval[0];
720 			ktrgenio(s, UIO_WRITE, &ktruio, error);
721 		}
722 		FREE(ktriov, M_TEMP);
723 	}
724 #endif
725 bad:
726 	fputsock(so);
727 	if (to)
728 		FREE(to, M_SONAME);
729 	return (error);
730 }
731 
732 /*
733  * MPSAFE
734  */
735 int
736 sendto(td, uap)
737 	struct thread *td;
738 	register struct sendto_args /* {
739 		int	s;
740 		caddr_t	buf;
741 		size_t	len;
742 		int	flags;
743 		caddr_t	to;
744 		int	tolen;
745 	} */ *uap;
746 {
747 	struct msghdr msg;
748 	struct iovec aiov;
749 	int error;
750 
751 	msg.msg_name = uap->to;
752 	msg.msg_namelen = uap->tolen;
753 	msg.msg_iov = &aiov;
754 	msg.msg_iovlen = 1;
755 	msg.msg_control = 0;
756 #ifdef COMPAT_OLDSOCK
757 	msg.msg_flags = 0;
758 #endif
759 	aiov.iov_base = uap->buf;
760 	aiov.iov_len = uap->len;
761 	mtx_lock(&Giant);
762 	error = sendit(td, uap->s, &msg, uap->flags);
763 	mtx_unlock(&Giant);
764 	return (error);
765 }
766 
767 #ifdef COMPAT_OLDSOCK
768 /*
769  * MPSAFE
770  */
771 int
772 osend(td, uap)
773 	struct thread *td;
774 	register struct osend_args /* {
775 		int	s;
776 		caddr_t	buf;
777 		int	len;
778 		int	flags;
779 	} */ *uap;
780 {
781 	struct msghdr msg;
782 	struct iovec aiov;
783 	int error;
784 
785 	msg.msg_name = 0;
786 	msg.msg_namelen = 0;
787 	msg.msg_iov = &aiov;
788 	msg.msg_iovlen = 1;
789 	aiov.iov_base = uap->buf;
790 	aiov.iov_len = uap->len;
791 	msg.msg_control = 0;
792 	msg.msg_flags = 0;
793 	mtx_lock(&Giant);
794 	error = sendit(td, uap->s, &msg, uap->flags);
795 	mtx_unlock(&Giant);
796 	return (error);
797 }
798 
799 /*
800  * MPSAFE
801  */
802 int
803 osendmsg(td, uap)
804 	struct thread *td;
805 	register struct osendmsg_args /* {
806 		int	s;
807 		caddr_t	msg;
808 		int	flags;
809 	} */ *uap;
810 {
811 	struct msghdr msg;
812 	struct iovec aiov[UIO_SMALLIOV], *iov;
813 	int error;
814 
815 	mtx_lock(&Giant);
816 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
817 	if (error)
818 		goto done2;
819 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
820 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
821 			error = EMSGSIZE;
822 			goto done2;
823 		}
824 		MALLOC(iov, struct iovec *,
825 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
826 		      0);
827 	} else {
828 		iov = aiov;
829 	}
830 	error = copyin(msg.msg_iov, iov,
831 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
832 	if (error)
833 		goto done;
834 	msg.msg_flags = MSG_COMPAT;
835 	msg.msg_iov = iov;
836 	error = sendit(td, uap->s, &msg, uap->flags);
837 done:
838 	if (iov != aiov)
839 		FREE(iov, M_IOV);
840 done2:
841 	mtx_unlock(&Giant);
842 	return (error);
843 }
844 #endif
845 
846 /*
847  * MPSAFE
848  */
849 int
850 sendmsg(td, uap)
851 	struct thread *td;
852 	register struct sendmsg_args /* {
853 		int	s;
854 		caddr_t	msg;
855 		int	flags;
856 	} */ *uap;
857 {
858 	struct msghdr msg;
859 	struct iovec aiov[UIO_SMALLIOV], *iov;
860 	int error;
861 
862 	mtx_lock(&Giant);
863 	error = copyin(uap->msg, &msg, sizeof (msg));
864 	if (error)
865 		goto done2;
866 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
867 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
868 			error = EMSGSIZE;
869 			goto done2;
870 		}
871 		MALLOC(iov, struct iovec *,
872 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
873 		       0);
874 	} else {
875 		iov = aiov;
876 	}
877 	if (msg.msg_iovlen &&
878 	    (error = copyin(msg.msg_iov, iov,
879 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
880 		goto done;
881 	msg.msg_iov = iov;
882 #ifdef COMPAT_OLDSOCK
883 	msg.msg_flags = 0;
884 #endif
885 	error = sendit(td, uap->s, &msg, uap->flags);
886 done:
887 	if (iov != aiov)
888 		FREE(iov, M_IOV);
889 done2:
890 	mtx_unlock(&Giant);
891 	return (error);
892 }
893 
894 static int
895 recvit(td, s, mp, namelenp)
896 	register struct thread *td;
897 	int s;
898 	register struct msghdr *mp;
899 	void *namelenp;
900 {
901 	struct uio auio;
902 	register struct iovec *iov;
903 	register int i;
904 	int len, error;
905 	struct mbuf *m, *control = 0;
906 	caddr_t ctlbuf;
907 	struct socket *so;
908 	struct sockaddr *fromsa = 0;
909 #ifdef KTRACE
910 	struct iovec *ktriov = NULL;
911 	struct uio ktruio;
912 	int iovlen;
913 #endif
914 
915 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
916 		return (error);
917 
918 #ifdef MAC
919 	error = mac_check_socket_receive(td->td_ucred, so);
920 	if (error) {
921 		fputsock(so);
922 		return (error);
923 	}
924 #endif
925 
926 	auio.uio_iov = mp->msg_iov;
927 	auio.uio_iovcnt = mp->msg_iovlen;
928 	auio.uio_segflg = UIO_USERSPACE;
929 	auio.uio_rw = UIO_READ;
930 	auio.uio_td = td;
931 	auio.uio_offset = 0;			/* XXX */
932 	auio.uio_resid = 0;
933 	iov = mp->msg_iov;
934 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
935 		if ((auio.uio_resid += iov->iov_len) < 0) {
936 			fputsock(so);
937 			return (EINVAL);
938 		}
939 	}
940 #ifdef KTRACE
941 	if (KTRPOINT(td, KTR_GENIO)) {
942 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
943 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0);
944 		bcopy(auio.uio_iov, ktriov, iovlen);
945 		ktruio = auio;
946 	}
947 #endif
948 	len = auio.uio_resid;
949 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
950 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
951 	    &mp->msg_flags);
952 	if (error) {
953 		if (auio.uio_resid != len && (error == ERESTART ||
954 		    error == EINTR || error == EWOULDBLOCK))
955 			error = 0;
956 	}
957 #ifdef KTRACE
958 	if (ktriov != NULL) {
959 		if (error == 0) {
960 			ktruio.uio_iov = ktriov;
961 			ktruio.uio_resid = len - auio.uio_resid;
962 			ktrgenio(s, UIO_READ, &ktruio, error);
963 		}
964 		FREE(ktriov, M_TEMP);
965 	}
966 #endif
967 	if (error)
968 		goto out;
969 	td->td_retval[0] = len - auio.uio_resid;
970 	if (mp->msg_name) {
971 		len = mp->msg_namelen;
972 		if (len <= 0 || fromsa == 0)
973 			len = 0;
974 		else {
975 			/* save sa_len before it is destroyed by MSG_COMPAT */
976 			len = MIN(len, fromsa->sa_len);
977 #ifdef COMPAT_OLDSOCK
978 			if (mp->msg_flags & MSG_COMPAT)
979 				((struct osockaddr *)fromsa)->sa_family =
980 				    fromsa->sa_family;
981 #endif
982 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
983 			if (error)
984 				goto out;
985 		}
986 		mp->msg_namelen = len;
987 		if (namelenp &&
988 		    (error = copyout(&len, namelenp, sizeof (int)))) {
989 #ifdef COMPAT_OLDSOCK
990 			if (mp->msg_flags & MSG_COMPAT)
991 				error = 0;	/* old recvfrom didn't check */
992 			else
993 #endif
994 			goto out;
995 		}
996 	}
997 	if (mp->msg_control) {
998 #ifdef COMPAT_OLDSOCK
999 		/*
1000 		 * We assume that old recvmsg calls won't receive access
1001 		 * rights and other control info, esp. as control info
1002 		 * is always optional and those options didn't exist in 4.3.
1003 		 * If we receive rights, trim the cmsghdr; anything else
1004 		 * is tossed.
1005 		 */
1006 		if (control && mp->msg_flags & MSG_COMPAT) {
1007 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1008 			    SOL_SOCKET ||
1009 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1010 			    SCM_RIGHTS) {
1011 				mp->msg_controllen = 0;
1012 				goto out;
1013 			}
1014 			control->m_len -= sizeof (struct cmsghdr);
1015 			control->m_data += sizeof (struct cmsghdr);
1016 		}
1017 #endif
1018 		len = mp->msg_controllen;
1019 		m = control;
1020 		mp->msg_controllen = 0;
1021 		ctlbuf = mp->msg_control;
1022 
1023 		while (m && len > 0) {
1024 			unsigned int tocopy;
1025 
1026 			if (len >= m->m_len)
1027 				tocopy = m->m_len;
1028 			else {
1029 				mp->msg_flags |= MSG_CTRUNC;
1030 				tocopy = len;
1031 			}
1032 
1033 			if ((error = copyout(mtod(m, caddr_t),
1034 					ctlbuf, tocopy)) != 0)
1035 				goto out;
1036 
1037 			ctlbuf += tocopy;
1038 			len -= tocopy;
1039 			m = m->m_next;
1040 		}
1041 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1042 	}
1043 out:
1044 	fputsock(so);
1045 	if (fromsa)
1046 		FREE(fromsa, M_SONAME);
1047 	if (control)
1048 		m_freem(control);
1049 	return (error);
1050 }
1051 
1052 /*
1053  * MPSAFE
1054  */
1055 int
1056 recvfrom(td, uap)
1057 	struct thread *td;
1058 	register struct recvfrom_args /* {
1059 		int	s;
1060 		caddr_t	buf;
1061 		size_t	len;
1062 		int	flags;
1063 		caddr_t	from;
1064 		int	*fromlenaddr;
1065 	} */ *uap;
1066 {
1067 	struct msghdr msg;
1068 	struct iovec aiov;
1069 	int error;
1070 
1071 	mtx_lock(&Giant);
1072 	if (uap->fromlenaddr) {
1073 		error = copyin(uap->fromlenaddr,
1074 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1075 		if (error)
1076 			goto done2;
1077 	} else {
1078 		msg.msg_namelen = 0;
1079 	}
1080 	msg.msg_name = uap->from;
1081 	msg.msg_iov = &aiov;
1082 	msg.msg_iovlen = 1;
1083 	aiov.iov_base = uap->buf;
1084 	aiov.iov_len = uap->len;
1085 	msg.msg_control = 0;
1086 	msg.msg_flags = uap->flags;
1087 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1088 done2:
1089 	mtx_unlock(&Giant);
1090 	return(error);
1091 }
1092 
1093 #ifdef COMPAT_OLDSOCK
1094 /*
1095  * MPSAFE
1096  */
1097 int
1098 orecvfrom(td, uap)
1099 	struct thread *td;
1100 	struct recvfrom_args *uap;
1101 {
1102 
1103 	uap->flags |= MSG_COMPAT;
1104 	return (recvfrom(td, uap));
1105 }
1106 #endif
1107 
1108 
1109 #ifdef COMPAT_OLDSOCK
1110 /*
1111  * MPSAFE
1112  */
1113 int
1114 orecv(td, uap)
1115 	struct thread *td;
1116 	register struct orecv_args /* {
1117 		int	s;
1118 		caddr_t	buf;
1119 		int	len;
1120 		int	flags;
1121 	} */ *uap;
1122 {
1123 	struct msghdr msg;
1124 	struct iovec aiov;
1125 	int error;
1126 
1127 	mtx_lock(&Giant);
1128 	msg.msg_name = 0;
1129 	msg.msg_namelen = 0;
1130 	msg.msg_iov = &aiov;
1131 	msg.msg_iovlen = 1;
1132 	aiov.iov_base = uap->buf;
1133 	aiov.iov_len = uap->len;
1134 	msg.msg_control = 0;
1135 	msg.msg_flags = uap->flags;
1136 	error = recvit(td, uap->s, &msg, NULL);
1137 	mtx_unlock(&Giant);
1138 	return (error);
1139 }
1140 
1141 /*
1142  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1143  * overlays the new one, missing only the flags, and with the (old) access
1144  * rights where the control fields are now.
1145  *
1146  * MPSAFE
1147  */
1148 int
1149 orecvmsg(td, uap)
1150 	struct thread *td;
1151 	register struct orecvmsg_args /* {
1152 		int	s;
1153 		struct	omsghdr *msg;
1154 		int	flags;
1155 	} */ *uap;
1156 {
1157 	struct msghdr msg;
1158 	struct iovec aiov[UIO_SMALLIOV], *iov;
1159 	int error;
1160 
1161 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1162 	if (error)
1163 		return (error);
1164 
1165 	mtx_lock(&Giant);
1166 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1167 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1168 			error = EMSGSIZE;
1169 			goto done2;
1170 		}
1171 		MALLOC(iov, struct iovec *,
1172 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1173 		      0);
1174 	} else {
1175 		iov = aiov;
1176 	}
1177 	msg.msg_flags = uap->flags | MSG_COMPAT;
1178 	error = copyin(msg.msg_iov, iov,
1179 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1180 	if (error)
1181 		goto done;
1182 	msg.msg_iov = iov;
1183 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1184 
1185 	if (msg.msg_controllen && error == 0)
1186 		error = copyout(&msg.msg_controllen,
1187 		    &uap->msg->msg_accrightslen, sizeof (int));
1188 done:
1189 	if (iov != aiov)
1190 		FREE(iov, M_IOV);
1191 done2:
1192 	mtx_unlock(&Giant);
1193 	return (error);
1194 }
1195 #endif
1196 
1197 /*
1198  * MPSAFE
1199  */
1200 int
1201 recvmsg(td, uap)
1202 	struct thread *td;
1203 	register struct recvmsg_args /* {
1204 		int	s;
1205 		struct	msghdr *msg;
1206 		int	flags;
1207 	} */ *uap;
1208 {
1209 	struct msghdr msg;
1210 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1211 	register int error;
1212 
1213 	mtx_lock(&Giant);
1214 	error = copyin(uap->msg, &msg, sizeof (msg));
1215 	if (error)
1216 		goto done2;
1217 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1218 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1219 			error = EMSGSIZE;
1220 			goto done2;
1221 		}
1222 		MALLOC(iov, struct iovec *,
1223 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1224 		       0);
1225 	} else {
1226 		iov = aiov;
1227 	}
1228 #ifdef COMPAT_OLDSOCK
1229 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1230 #else
1231 	msg.msg_flags = uap->flags;
1232 #endif
1233 	uiov = msg.msg_iov;
1234 	msg.msg_iov = iov;
1235 	error = copyin(uiov, iov,
1236 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1237 	if (error)
1238 		goto done;
1239 	error = recvit(td, uap->s, &msg, NULL);
1240 	if (!error) {
1241 		msg.msg_iov = uiov;
1242 		error = copyout(&msg, uap->msg, sizeof(msg));
1243 	}
1244 done:
1245 	if (iov != aiov)
1246 		FREE(iov, M_IOV);
1247 done2:
1248 	mtx_unlock(&Giant);
1249 	return (error);
1250 }
1251 
1252 /*
1253  * MPSAFE
1254  */
1255 /* ARGSUSED */
1256 int
1257 shutdown(td, uap)
1258 	struct thread *td;
1259 	register struct shutdown_args /* {
1260 		int	s;
1261 		int	how;
1262 	} */ *uap;
1263 {
1264 	struct socket *so;
1265 	int error;
1266 
1267 	mtx_lock(&Giant);
1268 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1269 		error = soshutdown(so, uap->how);
1270 		fputsock(so);
1271 	}
1272 	mtx_unlock(&Giant);
1273 	return(error);
1274 }
1275 
1276 /*
1277  * MPSAFE
1278  */
1279 /* ARGSUSED */
1280 int
1281 setsockopt(td, uap)
1282 	struct thread *td;
1283 	register struct setsockopt_args /* {
1284 		int	s;
1285 		int	level;
1286 		int	name;
1287 		caddr_t	val;
1288 		int	valsize;
1289 	} */ *uap;
1290 {
1291 	struct socket *so;
1292 	struct sockopt sopt;
1293 	int error;
1294 
1295 	if (uap->val == 0 && uap->valsize != 0)
1296 		return (EFAULT);
1297 	if (uap->valsize < 0)
1298 		return (EINVAL);
1299 
1300 	mtx_lock(&Giant);
1301 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1302 		sopt.sopt_dir = SOPT_SET;
1303 		sopt.sopt_level = uap->level;
1304 		sopt.sopt_name = uap->name;
1305 		sopt.sopt_val = uap->val;
1306 		sopt.sopt_valsize = uap->valsize;
1307 		sopt.sopt_td = td;
1308 		error = sosetopt(so, &sopt);
1309 		fputsock(so);
1310 	}
1311 	mtx_unlock(&Giant);
1312 	return(error);
1313 }
1314 
1315 /*
1316  * MPSAFE
1317  */
1318 /* ARGSUSED */
1319 int
1320 getsockopt(td, uap)
1321 	struct thread *td;
1322 	register struct getsockopt_args /* {
1323 		int	s;
1324 		int	level;
1325 		int	name;
1326 		caddr_t	val;
1327 		int	*avalsize;
1328 	} */ *uap;
1329 {
1330 	int	valsize, error;
1331 	struct  socket *so;
1332 	struct	sockopt sopt;
1333 
1334 	mtx_lock(&Giant);
1335 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1336 		goto done2;
1337 	if (uap->val) {
1338 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1339 		if (error)
1340 			goto done1;
1341 		if (valsize < 0) {
1342 			error = EINVAL;
1343 			goto done1;
1344 		}
1345 	} else {
1346 		valsize = 0;
1347 	}
1348 
1349 	sopt.sopt_dir = SOPT_GET;
1350 	sopt.sopt_level = uap->level;
1351 	sopt.sopt_name = uap->name;
1352 	sopt.sopt_val = uap->val;
1353 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1354 	sopt.sopt_td = td;
1355 
1356 	error = sogetopt(so, &sopt);
1357 	if (error == 0) {
1358 		valsize = sopt.sopt_valsize;
1359 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1360 	}
1361 done1:
1362 	fputsock(so);
1363 done2:
1364 	mtx_unlock(&Giant);
1365 	return (error);
1366 }
1367 
1368 /*
1369  * getsockname1() - Get socket name.
1370  *
1371  * MPSAFE
1372  */
1373 /* ARGSUSED */
1374 static int
1375 getsockname1(td, uap, compat)
1376 	struct thread *td;
1377 	register struct getsockname_args /* {
1378 		int	fdes;
1379 		caddr_t	asa;
1380 		int	*alen;
1381 	} */ *uap;
1382 	int compat;
1383 {
1384 	struct socket *so;
1385 	struct sockaddr *sa;
1386 	int len, error;
1387 
1388 	mtx_lock(&Giant);
1389 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1390 		goto done2;
1391 	error = copyin(uap->alen, &len, sizeof (len));
1392 	if (error)
1393 		goto done1;
1394 	if (len < 0) {
1395 		error = EINVAL;
1396 		goto done1;
1397 	}
1398 	sa = 0;
1399 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1400 	if (error)
1401 		goto bad;
1402 	if (sa == 0) {
1403 		len = 0;
1404 		goto gotnothing;
1405 	}
1406 
1407 	len = MIN(len, sa->sa_len);
1408 #ifdef COMPAT_OLDSOCK
1409 	if (compat)
1410 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1411 #endif
1412 	error = copyout(sa, uap->asa, (u_int)len);
1413 	if (error == 0)
1414 gotnothing:
1415 		error = copyout(&len, uap->alen, sizeof (len));
1416 bad:
1417 	if (sa)
1418 		FREE(sa, M_SONAME);
1419 done1:
1420 	fputsock(so);
1421 done2:
1422 	mtx_unlock(&Giant);
1423 	return (error);
1424 }
1425 
1426 /*
1427  * MPSAFE
1428  */
1429 int
1430 getsockname(td, uap)
1431 	struct thread *td;
1432 	struct getsockname_args *uap;
1433 {
1434 
1435 	return (getsockname1(td, uap, 0));
1436 }
1437 
1438 #ifdef COMPAT_OLDSOCK
1439 /*
1440  * MPSAFE
1441  */
1442 int
1443 ogetsockname(td, uap)
1444 	struct thread *td;
1445 	struct getsockname_args *uap;
1446 {
1447 
1448 	return (getsockname1(td, uap, 1));
1449 }
1450 #endif /* COMPAT_OLDSOCK */
1451 
1452 /*
1453  * getpeername1() - Get name of peer for connected socket.
1454  *
1455  * MPSAFE
1456  */
1457 /* ARGSUSED */
1458 static int
1459 getpeername1(td, uap, compat)
1460 	struct thread *td;
1461 	register struct getpeername_args /* {
1462 		int	fdes;
1463 		caddr_t	asa;
1464 		int	*alen;
1465 	} */ *uap;
1466 	int compat;
1467 {
1468 	struct socket *so;
1469 	struct sockaddr *sa;
1470 	int len, error;
1471 
1472 	mtx_lock(&Giant);
1473 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1474 		goto done2;
1475 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1476 		error = ENOTCONN;
1477 		goto done1;
1478 	}
1479 	error = copyin(uap->alen, &len, sizeof (len));
1480 	if (error)
1481 		goto done1;
1482 	if (len < 0) {
1483 		error = EINVAL;
1484 		goto done1;
1485 	}
1486 	sa = 0;
1487 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1488 	if (error)
1489 		goto bad;
1490 	if (sa == 0) {
1491 		len = 0;
1492 		goto gotnothing;
1493 	}
1494 	len = MIN(len, sa->sa_len);
1495 #ifdef COMPAT_OLDSOCK
1496 	if (compat)
1497 		((struct osockaddr *)sa)->sa_family =
1498 		    sa->sa_family;
1499 #endif
1500 	error = copyout(sa, uap->asa, (u_int)len);
1501 	if (error)
1502 		goto bad;
1503 gotnothing:
1504 	error = copyout(&len, uap->alen, sizeof (len));
1505 bad:
1506 	if (sa)
1507 		FREE(sa, M_SONAME);
1508 done1:
1509 	fputsock(so);
1510 done2:
1511 	mtx_unlock(&Giant);
1512 	return (error);
1513 }
1514 
1515 /*
1516  * MPSAFE
1517  */
1518 int
1519 getpeername(td, uap)
1520 	struct thread *td;
1521 	struct getpeername_args *uap;
1522 {
1523 
1524 	return (getpeername1(td, uap, 0));
1525 }
1526 
1527 #ifdef COMPAT_OLDSOCK
1528 /*
1529  * MPSAFE
1530  */
1531 int
1532 ogetpeername(td, uap)
1533 	struct thread *td;
1534 	struct ogetpeername_args *uap;
1535 {
1536 
1537 	/* XXX uap should have type `getpeername_args *' to begin with. */
1538 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1539 }
1540 #endif /* COMPAT_OLDSOCK */
1541 
1542 int
1543 sockargs(mp, buf, buflen, type)
1544 	struct mbuf **mp;
1545 	caddr_t buf;
1546 	int buflen, type;
1547 {
1548 	register struct sockaddr *sa;
1549 	register struct mbuf *m;
1550 	int error;
1551 
1552 	if ((u_int)buflen > MLEN) {
1553 #ifdef COMPAT_OLDSOCK
1554 		if (type == MT_SONAME && (u_int)buflen <= 112)
1555 			buflen = MLEN;		/* unix domain compat. hack */
1556 		else
1557 #endif
1558 		return (EINVAL);
1559 	}
1560 	m = m_get(0, type);
1561 	if (m == NULL)
1562 		return (ENOBUFS);
1563 	m->m_len = buflen;
1564 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1565 	if (error)
1566 		(void) m_free(m);
1567 	else {
1568 		*mp = m;
1569 		if (type == MT_SONAME) {
1570 			sa = mtod(m, struct sockaddr *);
1571 
1572 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1573 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1574 				sa->sa_family = sa->sa_len;
1575 #endif
1576 			sa->sa_len = buflen;
1577 		}
1578 	}
1579 	return (error);
1580 }
1581 
1582 int
1583 getsockaddr(namp, uaddr, len)
1584 	struct sockaddr **namp;
1585 	caddr_t uaddr;
1586 	size_t len;
1587 {
1588 	struct sockaddr *sa;
1589 	int error;
1590 
1591 	if (len > SOCK_MAXADDRLEN)
1592 		return ENAMETOOLONG;
1593 	MALLOC(sa, struct sockaddr *, len, M_SONAME, 0);
1594 	error = copyin(uaddr, sa, len);
1595 	if (error) {
1596 		FREE(sa, M_SONAME);
1597 	} else {
1598 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1599 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1600 			sa->sa_family = sa->sa_len;
1601 #endif
1602 		sa->sa_len = len;
1603 		*namp = sa;
1604 	}
1605 	return error;
1606 }
1607 
1608 /*
1609  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1610  */
1611 static void
1612 sf_buf_init(void *arg)
1613 {
1614 	int i;
1615 
1616 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1617 	mtx_lock(&sf_freelist.sf_lock);
1618 	SLIST_INIT(&sf_freelist.sf_head);
1619 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1620 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1621 	    M_NOWAIT | M_ZERO);
1622 	for (i = 0; i < nsfbufs; i++) {
1623 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1624 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1625 	}
1626 	sf_buf_alloc_want = 0;
1627 	mtx_unlock(&sf_freelist.sf_lock);
1628 }
1629 
1630 /*
1631  * Get an sf_buf from the freelist. Will block if none are available.
1632  */
1633 struct sf_buf *
1634 sf_buf_alloc()
1635 {
1636 	struct sf_buf *sf;
1637 	int error;
1638 
1639 	mtx_lock(&sf_freelist.sf_lock);
1640 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1641 		sf_buf_alloc_want++;
1642 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1643 		    "sfbufa", 0);
1644 		sf_buf_alloc_want--;
1645 
1646 		/*
1647 		 * If we got a signal, don't risk going back to sleep.
1648 		 */
1649 		if (error)
1650 			break;
1651 	}
1652 	if (sf != NULL)
1653 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1654 	mtx_unlock(&sf_freelist.sf_lock);
1655 	return (sf);
1656 }
1657 
1658 #define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1659 
1660 /*
1661  * Detatch mapped page and release resources back to the system.
1662  */
1663 void
1664 sf_buf_free(void *addr, void *args)
1665 {
1666 	struct sf_buf *sf;
1667 	struct vm_page *m;
1668 
1669 	GIANT_REQUIRED;
1670 
1671 	sf = dtosf(addr);
1672 	pmap_qremove((vm_offset_t)addr, 1);
1673 	m = sf->m;
1674 	vm_page_lock_queues();
1675 	vm_page_unwire(m, 0);
1676 	/*
1677 	 * Check for the object going away on us. This can
1678 	 * happen since we don't hold a reference to it.
1679 	 * If so, we're responsible for freeing the page.
1680 	 */
1681 	if (m->wire_count == 0 && m->object == NULL)
1682 		vm_page_free(m);
1683 	vm_page_unlock_queues();
1684 	sf->m = NULL;
1685 	mtx_lock(&sf_freelist.sf_lock);
1686 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1687 	if (sf_buf_alloc_want > 0)
1688 		wakeup_one(&sf_freelist);
1689 	mtx_unlock(&sf_freelist.sf_lock);
1690 }
1691 
1692 /*
1693  * sendfile(2)
1694  *
1695  * MPSAFE
1696  *
1697  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1698  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1699  *
1700  * Send a file specified by 'fd' and starting at 'offset' to a socket
1701  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1702  * nbytes == 0. Optionally add a header and/or trailer to the socket
1703  * output. If specified, write the total number of bytes sent into *sbytes.
1704  *
1705  */
1706 int
1707 sendfile(struct thread *td, struct sendfile_args *uap)
1708 {
1709 
1710 	return (do_sendfile(td, uap, 0));
1711 }
1712 
1713 #ifdef COMPAT_FREEBSD4
1714 int
1715 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1716 {
1717 	struct sendfile_args args;
1718 
1719 	args.fd = uap->fd;
1720 	args.s = uap->s;
1721 	args.offset = uap->offset;
1722 	args.nbytes = uap->nbytes;
1723 	args.hdtr = uap->hdtr;
1724 	args.sbytes = uap->sbytes;
1725 	args.flags = uap->flags;
1726 
1727 	return (do_sendfile(td, &args, 1));
1728 }
1729 #endif /* COMPAT_FREEBSD4 */
1730 
1731 static int
1732 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1733 {
1734 	struct vnode *vp;
1735 	struct vm_object *obj;
1736 	struct socket *so = NULL;
1737 	struct mbuf *m;
1738 	struct sf_buf *sf;
1739 	struct vm_page *pg;
1740 	struct writev_args nuap;
1741 	struct sf_hdtr hdtr;
1742 	off_t off, xfsize, hdtr_size, sbytes = 0;
1743 	int error, s;
1744 
1745 	mtx_lock(&Giant);
1746 
1747 	hdtr_size = 0;
1748 
1749 	/*
1750 	 * The descriptor must be a regular file and have a backing VM object.
1751 	 */
1752 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1753 		goto done;
1754 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1755 		error = EINVAL;
1756 		goto done;
1757 	}
1758 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1759 		goto done;
1760 	if (so->so_type != SOCK_STREAM) {
1761 		error = EINVAL;
1762 		goto done;
1763 	}
1764 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1765 		error = ENOTCONN;
1766 		goto done;
1767 	}
1768 	if (uap->offset < 0) {
1769 		error = EINVAL;
1770 		goto done;
1771 	}
1772 
1773 #ifdef MAC
1774 	error = mac_check_socket_send(td->td_ucred, so);
1775 	if (error)
1776 		goto done;
1777 #endif
1778 
1779 	/*
1780 	 * If specified, get the pointer to the sf_hdtr struct for
1781 	 * any headers/trailers.
1782 	 */
1783 	if (uap->hdtr != NULL) {
1784 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1785 		if (error)
1786 			goto done;
1787 		/*
1788 		 * Send any headers. Wimp out and use writev(2).
1789 		 */
1790 		if (hdtr.headers != NULL) {
1791 			nuap.fd = uap->s;
1792 			nuap.iovp = hdtr.headers;
1793 			nuap.iovcnt = hdtr.hdr_cnt;
1794 			error = writev(td, &nuap);
1795 			if (error)
1796 				goto done;
1797 			if (compat)
1798 				sbytes += td->td_retval[0];
1799 			else
1800 				hdtr_size += td->td_retval[0];
1801 		}
1802 	}
1803 
1804 	/*
1805 	 * Protect against multiple writers to the socket.
1806 	 */
1807 	(void) sblock(&so->so_snd, 0);
1808 
1809 	/*
1810 	 * Loop through the pages in the file, starting with the requested
1811 	 * offset. Get a file page (do I/O if necessary), map the file page
1812 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1813 	 * it on the socket.
1814 	 */
1815 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1816 		vm_pindex_t pindex;
1817 		vm_offset_t pgoff;
1818 
1819 		pindex = OFF_TO_IDX(off);
1820 retry_lookup:
1821 		/*
1822 		 * Calculate the amount to transfer. Not to exceed a page,
1823 		 * the EOF, or the passed in nbytes.
1824 		 */
1825 		xfsize = obj->un_pager.vnp.vnp_size - off;
1826 		if (xfsize > PAGE_SIZE)
1827 			xfsize = PAGE_SIZE;
1828 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1829 		if (PAGE_SIZE - pgoff < xfsize)
1830 			xfsize = PAGE_SIZE - pgoff;
1831 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1832 			xfsize = uap->nbytes - sbytes;
1833 		if (xfsize <= 0)
1834 			break;
1835 		/*
1836 		 * Optimize the non-blocking case by looking at the socket space
1837 		 * before going to the extra work of constituting the sf_buf.
1838 		 */
1839 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1840 			if (so->so_state & SS_CANTSENDMORE)
1841 				error = EPIPE;
1842 			else
1843 				error = EAGAIN;
1844 			sbunlock(&so->so_snd);
1845 			goto done;
1846 		}
1847 		/*
1848 		 * Attempt to look up the page.
1849 		 *
1850 		 *	Allocate if not found
1851 		 *
1852 		 *	Wait and loop if busy.
1853 		 */
1854 		pg = vm_page_lookup(obj, pindex);
1855 
1856 		if (pg == NULL) {
1857 			pg = vm_page_alloc(obj, pindex,
1858 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1859 			if (pg == NULL) {
1860 				VM_WAIT;
1861 				goto retry_lookup;
1862 			}
1863 			vm_page_lock_queues();
1864 			vm_page_wakeup(pg);
1865 		} else {
1866 			vm_page_lock_queues();
1867 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1868 				goto retry_lookup;
1869 			/*
1870 		 	 * Wire the page so it does not get ripped out from
1871 			 * under us.
1872 			 */
1873 			vm_page_wire(pg);
1874 		}
1875 
1876 		/*
1877 		 * If page is not valid for what we need, initiate I/O
1878 		 */
1879 
1880 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1881 			int bsize, resid;
1882 
1883 			/*
1884 			 * Ensure that our page is still around when the I/O
1885 			 * completes.
1886 			 */
1887 			vm_page_io_start(pg);
1888 			vm_page_unlock_queues();
1889 
1890 			/*
1891 			 * Get the page from backing store.
1892 			 */
1893 			bsize = vp->v_mount->mnt_stat.f_iosize;
1894 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1895 			/*
1896 			 * XXXMAC: Because we don't have fp->f_cred here,
1897 			 * we pass in NOCRED.  This is probably wrong, but
1898 			 * is consistent with our original implementation.
1899 			 */
1900 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1901 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1902 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1903 			    td->td_ucred, NOCRED, &resid, td);
1904 			VOP_UNLOCK(vp, 0, td);
1905 			vm_page_lock_queues();
1906 			vm_page_flag_clear(pg, PG_ZERO);
1907 			vm_page_io_finish(pg);
1908 			if (error) {
1909 				vm_page_unwire(pg, 0);
1910 				/*
1911 				 * See if anyone else might know about this page.
1912 				 * If not and it is not valid, then free it.
1913 				 */
1914 				if (pg->wire_count == 0 && pg->valid == 0 &&
1915 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1916 				    pg->hold_count == 0) {
1917 					vm_page_busy(pg);
1918 					vm_page_free(pg);
1919 				}
1920 				vm_page_unlock_queues();
1921 				sbunlock(&so->so_snd);
1922 				goto done;
1923 			}
1924 		}
1925 		vm_page_unlock_queues();
1926 
1927 		/*
1928 		 * Get a sendfile buf. We usually wait as long as necessary,
1929 		 * but this wait can be interrupted.
1930 		 */
1931 		if ((sf = sf_buf_alloc()) == NULL) {
1932 			vm_page_lock_queues();
1933 			vm_page_unwire(pg, 0);
1934 			if (pg->wire_count == 0 && pg->object == NULL)
1935 				vm_page_free(pg);
1936 			vm_page_unlock_queues();
1937 			sbunlock(&so->so_snd);
1938 			error = EINTR;
1939 			goto done;
1940 		}
1941 
1942 		/*
1943 		 * Allocate a kernel virtual page and insert the physical page
1944 		 * into it.
1945 		 */
1946 		sf->m = pg;
1947 		pmap_qenter(sf->kva, &pg, 1);
1948 		/*
1949 		 * Get an mbuf header and set it up as having external storage.
1950 		 */
1951 		MGETHDR(m, 0, MT_DATA);
1952 		if (m == NULL) {
1953 			error = ENOBUFS;
1954 			sf_buf_free((void *)sf->kva, NULL);
1955 			sbunlock(&so->so_snd);
1956 			goto done;
1957 		}
1958 		/*
1959 		 * Setup external storage for mbuf.
1960 		 */
1961 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1962 		    EXT_SFBUF);
1963 		m->m_data = (char *) sf->kva + pgoff;
1964 		m->m_pkthdr.len = m->m_len = xfsize;
1965 		/*
1966 		 * Add the buffer to the socket buffer chain.
1967 		 */
1968 		s = splnet();
1969 retry_space:
1970 		/*
1971 		 * Make sure that the socket is still able to take more data.
1972 		 * CANTSENDMORE being true usually means that the connection
1973 		 * was closed. so_error is true when an error was sensed after
1974 		 * a previous send.
1975 		 * The state is checked after the page mapping and buffer
1976 		 * allocation above since those operations may block and make
1977 		 * any socket checks stale. From this point forward, nothing
1978 		 * blocks before the pru_send (or more accurately, any blocking
1979 		 * results in a loop back to here to re-check).
1980 		 */
1981 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1982 			if (so->so_state & SS_CANTSENDMORE) {
1983 				error = EPIPE;
1984 			} else {
1985 				error = so->so_error;
1986 				so->so_error = 0;
1987 			}
1988 			m_freem(m);
1989 			sbunlock(&so->so_snd);
1990 			splx(s);
1991 			goto done;
1992 		}
1993 		/*
1994 		 * Wait for socket space to become available. We do this just
1995 		 * after checking the connection state above in order to avoid
1996 		 * a race condition with sbwait().
1997 		 */
1998 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1999 			if (so->so_state & SS_NBIO) {
2000 				m_freem(m);
2001 				sbunlock(&so->so_snd);
2002 				splx(s);
2003 				error = EAGAIN;
2004 				goto done;
2005 			}
2006 			error = sbwait(&so->so_snd);
2007 			/*
2008 			 * An error from sbwait usually indicates that we've
2009 			 * been interrupted by a signal. If we've sent anything
2010 			 * then return bytes sent, otherwise return the error.
2011 			 */
2012 			if (error) {
2013 				m_freem(m);
2014 				sbunlock(&so->so_snd);
2015 				splx(s);
2016 				goto done;
2017 			}
2018 			goto retry_space;
2019 		}
2020 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2021 		splx(s);
2022 		if (error) {
2023 			sbunlock(&so->so_snd);
2024 			goto done;
2025 		}
2026 	}
2027 	sbunlock(&so->so_snd);
2028 
2029 	/*
2030 	 * Send trailers. Wimp out and use writev(2).
2031 	 */
2032 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2033 			nuap.fd = uap->s;
2034 			nuap.iovp = hdtr.trailers;
2035 			nuap.iovcnt = hdtr.trl_cnt;
2036 			error = writev(td, &nuap);
2037 			if (error)
2038 				goto done;
2039 			if (compat)
2040 				sbytes += td->td_retval[0];
2041 			else
2042 				hdtr_size += td->td_retval[0];
2043 	}
2044 
2045 done:
2046 	/*
2047 	 * If there was no error we have to clear td->td_retval[0]
2048 	 * because it may have been set by writev.
2049 	 */
2050 	if (error == 0) {
2051 		td->td_retval[0] = 0;
2052 	}
2053 	if (uap->sbytes != NULL) {
2054 		if (!compat)
2055 			sbytes += hdtr_size;
2056 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2057 	}
2058 	if (vp)
2059 		vrele(vp);
2060 	if (so)
2061 		fputsock(so);
2062 	mtx_unlock(&Giant);
2063 	return (error);
2064 }
2065