xref: /freebsd/sys/kern/uipc_syscalls.c (revision 4b2eaea43fec8e8792be611dea204071a10b655a)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 #include "opt_mac.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/mutex.h>
50 #include <sys/sysproto.h>
51 #include <sys/malloc.h>
52 #include <sys/filedesc.h>
53 #include <sys/event.h>
54 #include <sys/proc.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/lock.h>
58 #include <sys/mount.h>
59 #include <sys/mbuf.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/signalvar.h>
64 #include <sys/uio.h>
65 #include <sys/vnode.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 
70 #include <vm/vm.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_pageout.h>
74 #include <vm/vm_kern.h>
75 #include <vm/vm_extern.h>
76 
77 static void sf_buf_init(void *arg);
78 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
79 
80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
81 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
82 
83 static int accept1(struct thread *td, struct accept_args *uap, int compat);
84 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
85 static int getsockname1(struct thread *td, struct getsockname_args *uap,
86 			int compat);
87 static int getpeername1(struct thread *td, struct getpeername_args *uap,
88 			int compat);
89 
90 /*
91  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92  * sf_freelist head with the sf_lock mutex.
93  */
94 static struct {
95 	SLIST_HEAD(, sf_buf) sf_head;
96 	struct mtx sf_lock;
97 } sf_freelist;
98 
99 vm_offset_t sf_base;
100 struct sf_buf *sf_bufs;
101 u_int sf_buf_alloc_want;
102 
103 /*
104  * System call interface to the socket abstraction.
105  */
106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107 #define COMPAT_OLDSOCK
108 #endif
109 
110 /*
111  * MPSAFE
112  */
113 int
114 socket(td, uap)
115 	struct thread *td;
116 	register struct socket_args /* {
117 		int	domain;
118 		int	type;
119 		int	protocol;
120 	} */ *uap;
121 {
122 	struct filedesc *fdp;
123 	struct socket *so;
124 	struct file *fp;
125 	int fd, error;
126 
127 	mtx_lock(&Giant);
128 	fdp = td->td_proc->p_fd;
129 	error = falloc(td, &fp, &fd);
130 	if (error)
131 		goto done2;
132 	fhold(fp);
133 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
134 	    td->td_ucred, td);
135 	FILEDESC_LOCK(fdp);
136 	if (error) {
137 		if (fdp->fd_ofiles[fd] == fp) {
138 			fdp->fd_ofiles[fd] = NULL;
139 			FILEDESC_UNLOCK(fdp);
140 			fdrop(fp, td);
141 		} else
142 			FILEDESC_UNLOCK(fdp);
143 	} else {
144 		fp->f_data = so;	/* already has ref count */
145 		fp->f_flag = FREAD|FWRITE;
146 		fp->f_ops = &socketops;
147 		fp->f_type = DTYPE_SOCKET;
148 		FILEDESC_UNLOCK(fdp);
149 		td->td_retval[0] = fd;
150 	}
151 	fdrop(fp, td);
152 done2:
153 	mtx_unlock(&Giant);
154 	return (error);
155 }
156 
157 /*
158  * MPSAFE
159  */
160 /* ARGSUSED */
161 int
162 bind(td, uap)
163 	struct thread *td;
164 	register struct bind_args /* {
165 		int	s;
166 		caddr_t	name;
167 		int	namelen;
168 	} */ *uap;
169 {
170 	struct socket *so;
171 	struct sockaddr *sa;
172 	int error;
173 
174 	mtx_lock(&Giant);
175 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
176 		goto done2;
177 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
178 		goto done1;
179 #ifdef MAC
180 	error = mac_check_socket_bind(td->td_ucred, so, sa);
181 	if (error) {
182 		FREE(sa, M_SONAME);
183 		goto done1;
184 	}
185 #endif
186 	error = sobind(so, sa, td);
187 	FREE(sa, M_SONAME);
188 done1:
189 	fputsock(so);
190 done2:
191 	mtx_unlock(&Giant);
192 	return (error);
193 }
194 
195 /*
196  * MPSAFE
197  */
198 /* ARGSUSED */
199 int
200 listen(td, uap)
201 	struct thread *td;
202 	register struct listen_args /* {
203 		int	s;
204 		int	backlog;
205 	} */ *uap;
206 {
207 	struct socket *so;
208 	int error;
209 
210 	mtx_lock(&Giant);
211 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
212 #ifdef MAC
213 		error = mac_check_socket_listen(td->td_ucred, so);
214 		if (error)
215 			goto done;
216 #endif
217 		error = solisten(so, uap->backlog, td);
218 #ifdef MAC
219 done:
220 #endif
221 		fputsock(so);
222 	}
223 	mtx_unlock(&Giant);
224 	return(error);
225 }
226 
227 /*
228  * accept1()
229  * MPSAFE
230  */
231 static int
232 accept1(td, uap, compat)
233 	struct thread *td;
234 	register struct accept_args /* {
235 		int	s;
236 		caddr_t	name;
237 		int	*anamelen;
238 	} */ *uap;
239 	int compat;
240 {
241 	struct filedesc *fdp;
242 	struct file *nfp = NULL;
243 	struct sockaddr *sa;
244 	int namelen, error, s;
245 	struct socket *head, *so;
246 	int fd;
247 	u_int fflag;
248 	pid_t pgid;
249 
250 	mtx_lock(&Giant);
251 	fdp = td->td_proc->p_fd;
252 	if (uap->name) {
253 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
254 		if(error)
255 			goto done2;
256 		if (namelen < 0) {
257 			error = EINVAL;
258 			goto done2;
259 		}
260 	}
261 	error = fgetsock(td, uap->s, &head, &fflag);
262 	if (error)
263 		goto done2;
264 	s = splnet();
265 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
266 		splx(s);
267 		error = EINVAL;
268 		goto done;
269 	}
270 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
271 		if (head->so_state & SS_CANTRCVMORE) {
272 			head->so_error = ECONNABORTED;
273 			break;
274 		}
275 		if ((head->so_state & SS_NBIO) != 0) {
276 			head->so_error = EWOULDBLOCK;
277 			break;
278 		}
279 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
280 		    "accept", 0);
281 		if (error) {
282 			splx(s);
283 			goto done;
284 		}
285 	}
286 	if (head->so_error) {
287 		error = head->so_error;
288 		head->so_error = 0;
289 		splx(s);
290 		goto done;
291 	}
292 
293 	/*
294 	 * At this point we know that there is at least one connection
295 	 * ready to be accepted. Remove it from the queue prior to
296 	 * allocating the file descriptor for it since falloc() may
297 	 * block allowing another process to accept the connection
298 	 * instead.
299 	 */
300 	so = TAILQ_FIRST(&head->so_comp);
301 	TAILQ_REMOVE(&head->so_comp, so, so_list);
302 	head->so_qlen--;
303 
304 	error = falloc(td, &nfp, &fd);
305 	if (error) {
306 		/*
307 		 * Probably ran out of file descriptors. Put the
308 		 * unaccepted connection back onto the queue and
309 		 * do another wakeup so some other process might
310 		 * have a chance at it.
311 		 */
312 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
313 		head->so_qlen++;
314 		wakeup_one(&head->so_timeo);
315 		splx(s);
316 		goto done;
317 	}
318 	fhold(nfp);
319 	td->td_retval[0] = fd;
320 
321 	/* connection has been removed from the listen queue */
322 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
323 
324 	so->so_state &= ~SS_COMP;
325 	so->so_head = NULL;
326 	pgid = fgetown(&head->so_sigio);
327 	if (pgid != 0)
328 		fsetown(pgid, &so->so_sigio);
329 
330 	FILE_LOCK(nfp);
331 	soref(so);			/* file descriptor reference */
332 	nfp->f_data = so;	/* nfp has ref count from falloc */
333 	nfp->f_flag = fflag;
334 	nfp->f_ops = &socketops;
335 	nfp->f_type = DTYPE_SOCKET;
336 	FILE_UNLOCK(nfp);
337 	sa = 0;
338 	error = soaccept(so, &sa);
339 	if (error) {
340 		/*
341 		 * return a namelen of zero for older code which might
342 	 	 * ignore the return value from accept.
343 		 */
344 		if (uap->name != NULL) {
345 			namelen = 0;
346 			(void) copyout(&namelen,
347 			    uap->anamelen, sizeof(*uap->anamelen));
348 		}
349 		goto noconnection;
350 	}
351 	if (sa == NULL) {
352 		namelen = 0;
353 		if (uap->name)
354 			goto gotnoname;
355 		splx(s);
356 		error = 0;
357 		goto done;
358 	}
359 	if (uap->name) {
360 		/* check sa_len before it is destroyed */
361 		if (namelen > sa->sa_len)
362 			namelen = sa->sa_len;
363 #ifdef COMPAT_OLDSOCK
364 		if (compat)
365 			((struct osockaddr *)sa)->sa_family =
366 			    sa->sa_family;
367 #endif
368 		error = copyout(sa, uap->name, (u_int)namelen);
369 		if (!error)
370 gotnoname:
371 			error = copyout(&namelen,
372 			    uap->anamelen, sizeof (*uap->anamelen));
373 	}
374 noconnection:
375 	if (sa)
376 		FREE(sa, M_SONAME);
377 
378 	/*
379 	 * close the new descriptor, assuming someone hasn't ripped it
380 	 * out from under us.
381 	 */
382 	if (error) {
383 		FILEDESC_LOCK(fdp);
384 		if (fdp->fd_ofiles[fd] == nfp) {
385 			fdp->fd_ofiles[fd] = NULL;
386 			FILEDESC_UNLOCK(fdp);
387 			fdrop(nfp, td);
388 		} else {
389 			FILEDESC_UNLOCK(fdp);
390 		}
391 	}
392 	splx(s);
393 
394 	/*
395 	 * Release explicitly held references before returning.
396 	 */
397 done:
398 	if (nfp != NULL)
399 		fdrop(nfp, td);
400 	fputsock(head);
401 done2:
402 	mtx_unlock(&Giant);
403 	return (error);
404 }
405 
406 /*
407  * MPSAFE (accept1() is MPSAFE)
408  */
409 int
410 accept(td, uap)
411 	struct thread *td;
412 	struct accept_args *uap;
413 {
414 
415 	return (accept1(td, uap, 0));
416 }
417 
418 #ifdef COMPAT_OLDSOCK
419 /*
420  * MPSAFE (accept1() is MPSAFE)
421  */
422 int
423 oaccept(td, uap)
424 	struct thread *td;
425 	struct accept_args *uap;
426 {
427 
428 	return (accept1(td, uap, 1));
429 }
430 #endif /* COMPAT_OLDSOCK */
431 
432 /*
433  * MPSAFE
434  */
435 /* ARGSUSED */
436 int
437 connect(td, uap)
438 	struct thread *td;
439 	register struct connect_args /* {
440 		int	s;
441 		caddr_t	name;
442 		int	namelen;
443 	} */ *uap;
444 {
445 	struct socket *so;
446 	struct sockaddr *sa;
447 	int error, s;
448 
449 	mtx_lock(&Giant);
450 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
451 		goto done2;
452 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
453 		error = EALREADY;
454 		goto done1;
455 	}
456 	error = getsockaddr(&sa, uap->name, uap->namelen);
457 	if (error)
458 		goto done1;
459 #ifdef MAC
460 	error = mac_check_socket_connect(td->td_ucred, so, sa);
461 	if (error)
462 		goto bad;
463 #endif
464 	error = soconnect(so, sa, td);
465 	if (error)
466 		goto bad;
467 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
468 		FREE(sa, M_SONAME);
469 		error = EINPROGRESS;
470 		goto done1;
471 	}
472 	s = splnet();
473 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
474 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
475 		if (error)
476 			break;
477 	}
478 	if (error == 0) {
479 		error = so->so_error;
480 		so->so_error = 0;
481 	}
482 	splx(s);
483 bad:
484 	so->so_state &= ~SS_ISCONNECTING;
485 	FREE(sa, M_SONAME);
486 	if (error == ERESTART)
487 		error = EINTR;
488 done1:
489 	fputsock(so);
490 done2:
491 	mtx_unlock(&Giant);
492 	return (error);
493 }
494 
495 /*
496  * MPSAFE
497  */
498 int
499 socketpair(td, uap)
500 	struct thread *td;
501 	register struct socketpair_args /* {
502 		int	domain;
503 		int	type;
504 		int	protocol;
505 		int	*rsv;
506 	} */ *uap;
507 {
508 	register struct filedesc *fdp = td->td_proc->p_fd;
509 	struct file *fp1, *fp2;
510 	struct socket *so1, *so2;
511 	int fd, error, sv[2];
512 
513 	mtx_lock(&Giant);
514 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
515 	    td->td_ucred, td);
516 	if (error)
517 		goto done2;
518 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
519 	    td->td_ucred, td);
520 	if (error)
521 		goto free1;
522 	error = falloc(td, &fp1, &fd);
523 	if (error)
524 		goto free2;
525 	fhold(fp1);
526 	sv[0] = fd;
527 	fp1->f_data = so1;	/* so1 already has ref count */
528 	error = falloc(td, &fp2, &fd);
529 	if (error)
530 		goto free3;
531 	fhold(fp2);
532 	fp2->f_data = so2;	/* so2 already has ref count */
533 	sv[1] = fd;
534 	error = soconnect2(so1, so2);
535 	if (error)
536 		goto free4;
537 	if (uap->type == SOCK_DGRAM) {
538 		/*
539 		 * Datagram socket connection is asymmetric.
540 		 */
541 		 error = soconnect2(so2, so1);
542 		 if (error)
543 			goto free4;
544 	}
545 	FILE_LOCK(fp1);
546 	fp1->f_flag = FREAD|FWRITE;
547 	fp1->f_ops = &socketops;
548 	fp1->f_type = DTYPE_SOCKET;
549 	FILE_UNLOCK(fp1);
550 	FILE_LOCK(fp2);
551 	fp2->f_flag = FREAD|FWRITE;
552 	fp2->f_ops = &socketops;
553 	fp2->f_type = DTYPE_SOCKET;
554 	FILE_UNLOCK(fp2);
555 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
556 	fdrop(fp1, td);
557 	fdrop(fp2, td);
558 	goto done2;
559 free4:
560 	FILEDESC_LOCK(fdp);
561 	if (fdp->fd_ofiles[sv[1]] == fp2) {
562 		fdp->fd_ofiles[sv[1]] = NULL;
563 		FILEDESC_UNLOCK(fdp);
564 		fdrop(fp2, td);
565 	} else
566 		FILEDESC_UNLOCK(fdp);
567 	fdrop(fp2, td);
568 free3:
569 	FILEDESC_LOCK(fdp);
570 	if (fdp->fd_ofiles[sv[0]] == fp1) {
571 		fdp->fd_ofiles[sv[0]] = NULL;
572 		FILEDESC_UNLOCK(fdp);
573 		fdrop(fp1, td);
574 	} else
575 		FILEDESC_UNLOCK(fdp);
576 	fdrop(fp1, td);
577 free2:
578 	(void)soclose(so2);
579 free1:
580 	(void)soclose(so1);
581 done2:
582 	mtx_unlock(&Giant);
583 	return (error);
584 }
585 
586 static int
587 sendit(td, s, mp, flags)
588 	register struct thread *td;
589 	int s;
590 	register struct msghdr *mp;
591 	int flags;
592 {
593 	struct uio auio;
594 	register struct iovec *iov;
595 	register int i;
596 	struct mbuf *control;
597 	struct sockaddr *to = NULL;
598 	int len, error;
599 	struct socket *so;
600 #ifdef KTRACE
601 	struct iovec *ktriov = NULL;
602 	struct uio ktruio;
603 	int iovlen;
604 #endif
605 
606 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
607 		return (error);
608 
609 #ifdef MAC
610 	error = mac_check_socket_send(td->td_ucred, so);
611 	if (error)
612 		goto bad;
613 #endif
614 
615 	auio.uio_iov = mp->msg_iov;
616 	auio.uio_iovcnt = mp->msg_iovlen;
617 	auio.uio_segflg = UIO_USERSPACE;
618 	auio.uio_rw = UIO_WRITE;
619 	auio.uio_td = td;
620 	auio.uio_offset = 0;			/* XXX */
621 	auio.uio_resid = 0;
622 	iov = mp->msg_iov;
623 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
624 		if ((auio.uio_resid += iov->iov_len) < 0) {
625 			error = EINVAL;
626 			goto bad;
627 		}
628 	}
629 	if (mp->msg_name) {
630 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
631 		if (error)
632 			goto bad;
633 	}
634 	if (mp->msg_control) {
635 		if (mp->msg_controllen < sizeof(struct cmsghdr)
636 #ifdef COMPAT_OLDSOCK
637 		    && mp->msg_flags != MSG_COMPAT
638 #endif
639 		) {
640 			error = EINVAL;
641 			goto bad;
642 		}
643 		error = sockargs(&control, mp->msg_control,
644 		    mp->msg_controllen, MT_CONTROL);
645 		if (error)
646 			goto bad;
647 #ifdef COMPAT_OLDSOCK
648 		if (mp->msg_flags == MSG_COMPAT) {
649 			register struct cmsghdr *cm;
650 
651 			M_PREPEND(control, sizeof(*cm), 0);
652 			if (control == 0) {
653 				error = ENOBUFS;
654 				goto bad;
655 			} else {
656 				cm = mtod(control, struct cmsghdr *);
657 				cm->cmsg_len = control->m_len;
658 				cm->cmsg_level = SOL_SOCKET;
659 				cm->cmsg_type = SCM_RIGHTS;
660 			}
661 		}
662 #endif
663 	} else {
664 		control = 0;
665 	}
666 #ifdef KTRACE
667 	if (KTRPOINT(td, KTR_GENIO)) {
668 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
669 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0);
670 		bcopy(auio.uio_iov, ktriov, iovlen);
671 		ktruio = auio;
672 	}
673 #endif
674 	len = auio.uio_resid;
675 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
676 						     flags, td);
677 	if (error) {
678 		if (auio.uio_resid != len && (error == ERESTART ||
679 		    error == EINTR || error == EWOULDBLOCK))
680 			error = 0;
681 		/* Generation of SIGPIPE can be controlled per socket */
682 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
683 			PROC_LOCK(td->td_proc);
684 			psignal(td->td_proc, SIGPIPE);
685 			PROC_UNLOCK(td->td_proc);
686 		}
687 	}
688 	if (error == 0)
689 		td->td_retval[0] = len - auio.uio_resid;
690 #ifdef KTRACE
691 	if (ktriov != NULL) {
692 		if (error == 0) {
693 			ktruio.uio_iov = ktriov;
694 			ktruio.uio_resid = td->td_retval[0];
695 			ktrgenio(s, UIO_WRITE, &ktruio, error);
696 		}
697 		FREE(ktriov, M_TEMP);
698 	}
699 #endif
700 bad:
701 	fputsock(so);
702 	if (to)
703 		FREE(to, M_SONAME);
704 	return (error);
705 }
706 
707 /*
708  * MPSAFE
709  */
710 int
711 sendto(td, uap)
712 	struct thread *td;
713 	register struct sendto_args /* {
714 		int	s;
715 		caddr_t	buf;
716 		size_t	len;
717 		int	flags;
718 		caddr_t	to;
719 		int	tolen;
720 	} */ *uap;
721 {
722 	struct msghdr msg;
723 	struct iovec aiov;
724 	int error;
725 
726 	msg.msg_name = uap->to;
727 	msg.msg_namelen = uap->tolen;
728 	msg.msg_iov = &aiov;
729 	msg.msg_iovlen = 1;
730 	msg.msg_control = 0;
731 #ifdef COMPAT_OLDSOCK
732 	msg.msg_flags = 0;
733 #endif
734 	aiov.iov_base = uap->buf;
735 	aiov.iov_len = uap->len;
736 	mtx_lock(&Giant);
737 	error = sendit(td, uap->s, &msg, uap->flags);
738 	mtx_unlock(&Giant);
739 	return (error);
740 }
741 
742 #ifdef COMPAT_OLDSOCK
743 /*
744  * MPSAFE
745  */
746 int
747 osend(td, uap)
748 	struct thread *td;
749 	register struct osend_args /* {
750 		int	s;
751 		caddr_t	buf;
752 		int	len;
753 		int	flags;
754 	} */ *uap;
755 {
756 	struct msghdr msg;
757 	struct iovec aiov;
758 	int error;
759 
760 	msg.msg_name = 0;
761 	msg.msg_namelen = 0;
762 	msg.msg_iov = &aiov;
763 	msg.msg_iovlen = 1;
764 	aiov.iov_base = uap->buf;
765 	aiov.iov_len = uap->len;
766 	msg.msg_control = 0;
767 	msg.msg_flags = 0;
768 	mtx_lock(&Giant);
769 	error = sendit(td, uap->s, &msg, uap->flags);
770 	mtx_unlock(&Giant);
771 	return (error);
772 }
773 
774 /*
775  * MPSAFE
776  */
777 int
778 osendmsg(td, uap)
779 	struct thread *td;
780 	register struct osendmsg_args /* {
781 		int	s;
782 		caddr_t	msg;
783 		int	flags;
784 	} */ *uap;
785 {
786 	struct msghdr msg;
787 	struct iovec aiov[UIO_SMALLIOV], *iov;
788 	int error;
789 
790 	mtx_lock(&Giant);
791 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
792 	if (error)
793 		goto done2;
794 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
795 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
796 			error = EMSGSIZE;
797 			goto done2;
798 		}
799 		MALLOC(iov, struct iovec *,
800 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
801 		      0);
802 	} else {
803 		iov = aiov;
804 	}
805 	error = copyin(msg.msg_iov, iov,
806 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
807 	if (error)
808 		goto done;
809 	msg.msg_flags = MSG_COMPAT;
810 	msg.msg_iov = iov;
811 	error = sendit(td, uap->s, &msg, uap->flags);
812 done:
813 	if (iov != aiov)
814 		FREE(iov, M_IOV);
815 done2:
816 	mtx_unlock(&Giant);
817 	return (error);
818 }
819 #endif
820 
821 /*
822  * MPSAFE
823  */
824 int
825 sendmsg(td, uap)
826 	struct thread *td;
827 	register struct sendmsg_args /* {
828 		int	s;
829 		caddr_t	msg;
830 		int	flags;
831 	} */ *uap;
832 {
833 	struct msghdr msg;
834 	struct iovec aiov[UIO_SMALLIOV], *iov;
835 	int error;
836 
837 	mtx_lock(&Giant);
838 	error = copyin(uap->msg, &msg, sizeof (msg));
839 	if (error)
840 		goto done2;
841 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
842 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
843 			error = EMSGSIZE;
844 			goto done2;
845 		}
846 		MALLOC(iov, struct iovec *,
847 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
848 		       0);
849 	} else {
850 		iov = aiov;
851 	}
852 	if (msg.msg_iovlen &&
853 	    (error = copyin(msg.msg_iov, iov,
854 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
855 		goto done;
856 	msg.msg_iov = iov;
857 #ifdef COMPAT_OLDSOCK
858 	msg.msg_flags = 0;
859 #endif
860 	error = sendit(td, uap->s, &msg, uap->flags);
861 done:
862 	if (iov != aiov)
863 		FREE(iov, M_IOV);
864 done2:
865 	mtx_unlock(&Giant);
866 	return (error);
867 }
868 
869 static int
870 recvit(td, s, mp, namelenp)
871 	register struct thread *td;
872 	int s;
873 	register struct msghdr *mp;
874 	void *namelenp;
875 {
876 	struct uio auio;
877 	register struct iovec *iov;
878 	register int i;
879 	int len, error;
880 	struct mbuf *m, *control = 0;
881 	caddr_t ctlbuf;
882 	struct socket *so;
883 	struct sockaddr *fromsa = 0;
884 #ifdef KTRACE
885 	struct iovec *ktriov = NULL;
886 	struct uio ktruio;
887 	int iovlen;
888 #endif
889 
890 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
891 		return (error);
892 
893 #ifdef MAC
894 	error = mac_check_socket_receive(td->td_ucred, so);
895 	if (error) {
896 		fputsock(so);
897 		return (error);
898 	}
899 #endif
900 
901 	auio.uio_iov = mp->msg_iov;
902 	auio.uio_iovcnt = mp->msg_iovlen;
903 	auio.uio_segflg = UIO_USERSPACE;
904 	auio.uio_rw = UIO_READ;
905 	auio.uio_td = td;
906 	auio.uio_offset = 0;			/* XXX */
907 	auio.uio_resid = 0;
908 	iov = mp->msg_iov;
909 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
910 		if ((auio.uio_resid += iov->iov_len) < 0) {
911 			fputsock(so);
912 			return (EINVAL);
913 		}
914 	}
915 #ifdef KTRACE
916 	if (KTRPOINT(td, KTR_GENIO)) {
917 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
918 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0);
919 		bcopy(auio.uio_iov, ktriov, iovlen);
920 		ktruio = auio;
921 	}
922 #endif
923 	len = auio.uio_resid;
924 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
925 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
926 	    &mp->msg_flags);
927 	if (error) {
928 		if (auio.uio_resid != len && (error == ERESTART ||
929 		    error == EINTR || error == EWOULDBLOCK))
930 			error = 0;
931 	}
932 #ifdef KTRACE
933 	if (ktriov != NULL) {
934 		if (error == 0) {
935 			ktruio.uio_iov = ktriov;
936 			ktruio.uio_resid = len - auio.uio_resid;
937 			ktrgenio(s, UIO_READ, &ktruio, error);
938 		}
939 		FREE(ktriov, M_TEMP);
940 	}
941 #endif
942 	if (error)
943 		goto out;
944 	td->td_retval[0] = len - auio.uio_resid;
945 	if (mp->msg_name) {
946 		len = mp->msg_namelen;
947 		if (len <= 0 || fromsa == 0)
948 			len = 0;
949 		else {
950 #ifndef MIN
951 #define MIN(a,b) ((a)>(b)?(b):(a))
952 #endif
953 			/* save sa_len before it is destroyed by MSG_COMPAT */
954 			len = MIN(len, fromsa->sa_len);
955 #ifdef COMPAT_OLDSOCK
956 			if (mp->msg_flags & MSG_COMPAT)
957 				((struct osockaddr *)fromsa)->sa_family =
958 				    fromsa->sa_family;
959 #endif
960 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
961 			if (error)
962 				goto out;
963 		}
964 		mp->msg_namelen = len;
965 		if (namelenp &&
966 		    (error = copyout(&len, namelenp, sizeof (int)))) {
967 #ifdef COMPAT_OLDSOCK
968 			if (mp->msg_flags & MSG_COMPAT)
969 				error = 0;	/* old recvfrom didn't check */
970 			else
971 #endif
972 			goto out;
973 		}
974 	}
975 	if (mp->msg_control) {
976 #ifdef COMPAT_OLDSOCK
977 		/*
978 		 * We assume that old recvmsg calls won't receive access
979 		 * rights and other control info, esp. as control info
980 		 * is always optional and those options didn't exist in 4.3.
981 		 * If we receive rights, trim the cmsghdr; anything else
982 		 * is tossed.
983 		 */
984 		if (control && mp->msg_flags & MSG_COMPAT) {
985 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
986 			    SOL_SOCKET ||
987 			    mtod(control, struct cmsghdr *)->cmsg_type !=
988 			    SCM_RIGHTS) {
989 				mp->msg_controllen = 0;
990 				goto out;
991 			}
992 			control->m_len -= sizeof (struct cmsghdr);
993 			control->m_data += sizeof (struct cmsghdr);
994 		}
995 #endif
996 		len = mp->msg_controllen;
997 		m = control;
998 		mp->msg_controllen = 0;
999 		ctlbuf = mp->msg_control;
1000 
1001 		while (m && len > 0) {
1002 			unsigned int tocopy;
1003 
1004 			if (len >= m->m_len)
1005 				tocopy = m->m_len;
1006 			else {
1007 				mp->msg_flags |= MSG_CTRUNC;
1008 				tocopy = len;
1009 			}
1010 
1011 			if ((error = copyout(mtod(m, caddr_t),
1012 					ctlbuf, tocopy)) != 0)
1013 				goto out;
1014 
1015 			ctlbuf += tocopy;
1016 			len -= tocopy;
1017 			m = m->m_next;
1018 		}
1019 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1020 	}
1021 out:
1022 	fputsock(so);
1023 	if (fromsa)
1024 		FREE(fromsa, M_SONAME);
1025 	if (control)
1026 		m_freem(control);
1027 	return (error);
1028 }
1029 
1030 /*
1031  * MPSAFE
1032  */
1033 int
1034 recvfrom(td, uap)
1035 	struct thread *td;
1036 	register struct recvfrom_args /* {
1037 		int	s;
1038 		caddr_t	buf;
1039 		size_t	len;
1040 		int	flags;
1041 		caddr_t	from;
1042 		int	*fromlenaddr;
1043 	} */ *uap;
1044 {
1045 	struct msghdr msg;
1046 	struct iovec aiov;
1047 	int error;
1048 
1049 	mtx_lock(&Giant);
1050 	if (uap->fromlenaddr) {
1051 		error = copyin(uap->fromlenaddr,
1052 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1053 		if (error)
1054 			goto done2;
1055 	} else {
1056 		msg.msg_namelen = 0;
1057 	}
1058 	msg.msg_name = uap->from;
1059 	msg.msg_iov = &aiov;
1060 	msg.msg_iovlen = 1;
1061 	aiov.iov_base = uap->buf;
1062 	aiov.iov_len = uap->len;
1063 	msg.msg_control = 0;
1064 	msg.msg_flags = uap->flags;
1065 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1066 done2:
1067 	mtx_unlock(&Giant);
1068 	return(error);
1069 }
1070 
1071 #ifdef COMPAT_OLDSOCK
1072 /*
1073  * MPSAFE
1074  */
1075 int
1076 orecvfrom(td, uap)
1077 	struct thread *td;
1078 	struct recvfrom_args *uap;
1079 {
1080 
1081 	uap->flags |= MSG_COMPAT;
1082 	return (recvfrom(td, uap));
1083 }
1084 #endif
1085 
1086 
1087 #ifdef COMPAT_OLDSOCK
1088 /*
1089  * MPSAFE
1090  */
1091 int
1092 orecv(td, uap)
1093 	struct thread *td;
1094 	register struct orecv_args /* {
1095 		int	s;
1096 		caddr_t	buf;
1097 		int	len;
1098 		int	flags;
1099 	} */ *uap;
1100 {
1101 	struct msghdr msg;
1102 	struct iovec aiov;
1103 	int error;
1104 
1105 	mtx_lock(&Giant);
1106 	msg.msg_name = 0;
1107 	msg.msg_namelen = 0;
1108 	msg.msg_iov = &aiov;
1109 	msg.msg_iovlen = 1;
1110 	aiov.iov_base = uap->buf;
1111 	aiov.iov_len = uap->len;
1112 	msg.msg_control = 0;
1113 	msg.msg_flags = uap->flags;
1114 	error = recvit(td, uap->s, &msg, NULL);
1115 	mtx_unlock(&Giant);
1116 	return (error);
1117 }
1118 
1119 /*
1120  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1121  * overlays the new one, missing only the flags, and with the (old) access
1122  * rights where the control fields are now.
1123  *
1124  * MPSAFE
1125  */
1126 int
1127 orecvmsg(td, uap)
1128 	struct thread *td;
1129 	register struct orecvmsg_args /* {
1130 		int	s;
1131 		struct	omsghdr *msg;
1132 		int	flags;
1133 	} */ *uap;
1134 {
1135 	struct msghdr msg;
1136 	struct iovec aiov[UIO_SMALLIOV], *iov;
1137 	int error;
1138 
1139 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1140 	if (error)
1141 		return (error);
1142 
1143 	mtx_lock(&Giant);
1144 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1145 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1146 			error = EMSGSIZE;
1147 			goto done2;
1148 		}
1149 		MALLOC(iov, struct iovec *,
1150 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1151 		      0);
1152 	} else {
1153 		iov = aiov;
1154 	}
1155 	msg.msg_flags = uap->flags | MSG_COMPAT;
1156 	error = copyin(msg.msg_iov, iov,
1157 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1158 	if (error)
1159 		goto done;
1160 	msg.msg_iov = iov;
1161 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1162 
1163 	if (msg.msg_controllen && error == 0)
1164 		error = copyout(&msg.msg_controllen,
1165 		    &uap->msg->msg_accrightslen, sizeof (int));
1166 done:
1167 	if (iov != aiov)
1168 		FREE(iov, M_IOV);
1169 done2:
1170 	mtx_unlock(&Giant);
1171 	return (error);
1172 }
1173 #endif
1174 
1175 /*
1176  * MPSAFE
1177  */
1178 int
1179 recvmsg(td, uap)
1180 	struct thread *td;
1181 	register struct recvmsg_args /* {
1182 		int	s;
1183 		struct	msghdr *msg;
1184 		int	flags;
1185 	} */ *uap;
1186 {
1187 	struct msghdr msg;
1188 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1189 	register int error;
1190 
1191 	mtx_lock(&Giant);
1192 	error = copyin(uap->msg, &msg, sizeof (msg));
1193 	if (error)
1194 		goto done2;
1195 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1196 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1197 			error = EMSGSIZE;
1198 			goto done2;
1199 		}
1200 		MALLOC(iov, struct iovec *,
1201 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1202 		       0);
1203 	} else {
1204 		iov = aiov;
1205 	}
1206 #ifdef COMPAT_OLDSOCK
1207 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1208 #else
1209 	msg.msg_flags = uap->flags;
1210 #endif
1211 	uiov = msg.msg_iov;
1212 	msg.msg_iov = iov;
1213 	error = copyin(uiov, iov,
1214 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1215 	if (error)
1216 		goto done;
1217 	error = recvit(td, uap->s, &msg, NULL);
1218 	if (!error) {
1219 		msg.msg_iov = uiov;
1220 		error = copyout(&msg, uap->msg, sizeof(msg));
1221 	}
1222 done:
1223 	if (iov != aiov)
1224 		FREE(iov, M_IOV);
1225 done2:
1226 	mtx_unlock(&Giant);
1227 	return (error);
1228 }
1229 
1230 /*
1231  * MPSAFE
1232  */
1233 /* ARGSUSED */
1234 int
1235 shutdown(td, uap)
1236 	struct thread *td;
1237 	register struct shutdown_args /* {
1238 		int	s;
1239 		int	how;
1240 	} */ *uap;
1241 {
1242 	struct socket *so;
1243 	int error;
1244 
1245 	mtx_lock(&Giant);
1246 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1247 		error = soshutdown(so, uap->how);
1248 		fputsock(so);
1249 	}
1250 	mtx_unlock(&Giant);
1251 	return(error);
1252 }
1253 
1254 /*
1255  * MPSAFE
1256  */
1257 /* ARGSUSED */
1258 int
1259 setsockopt(td, uap)
1260 	struct thread *td;
1261 	register struct setsockopt_args /* {
1262 		int	s;
1263 		int	level;
1264 		int	name;
1265 		caddr_t	val;
1266 		int	valsize;
1267 	} */ *uap;
1268 {
1269 	struct socket *so;
1270 	struct sockopt sopt;
1271 	int error;
1272 
1273 	if (uap->val == 0 && uap->valsize != 0)
1274 		return (EFAULT);
1275 	if (uap->valsize < 0)
1276 		return (EINVAL);
1277 
1278 	mtx_lock(&Giant);
1279 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1280 		sopt.sopt_dir = SOPT_SET;
1281 		sopt.sopt_level = uap->level;
1282 		sopt.sopt_name = uap->name;
1283 		sopt.sopt_val = uap->val;
1284 		sopt.sopt_valsize = uap->valsize;
1285 		sopt.sopt_td = td;
1286 		error = sosetopt(so, &sopt);
1287 		fputsock(so);
1288 	}
1289 	mtx_unlock(&Giant);
1290 	return(error);
1291 }
1292 
1293 /*
1294  * MPSAFE
1295  */
1296 /* ARGSUSED */
1297 int
1298 getsockopt(td, uap)
1299 	struct thread *td;
1300 	register struct getsockopt_args /* {
1301 		int	s;
1302 		int	level;
1303 		int	name;
1304 		caddr_t	val;
1305 		int	*avalsize;
1306 	} */ *uap;
1307 {
1308 	int	valsize, error;
1309 	struct  socket *so;
1310 	struct	sockopt sopt;
1311 
1312 	mtx_lock(&Giant);
1313 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1314 		goto done2;
1315 	if (uap->val) {
1316 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1317 		if (error)
1318 			goto done1;
1319 		if (valsize < 0) {
1320 			error = EINVAL;
1321 			goto done1;
1322 		}
1323 	} else {
1324 		valsize = 0;
1325 	}
1326 
1327 	sopt.sopt_dir = SOPT_GET;
1328 	sopt.sopt_level = uap->level;
1329 	sopt.sopt_name = uap->name;
1330 	sopt.sopt_val = uap->val;
1331 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1332 	sopt.sopt_td = td;
1333 
1334 	error = sogetopt(so, &sopt);
1335 	if (error == 0) {
1336 		valsize = sopt.sopt_valsize;
1337 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1338 	}
1339 done1:
1340 	fputsock(so);
1341 done2:
1342 	mtx_unlock(&Giant);
1343 	return (error);
1344 }
1345 
1346 /*
1347  * getsockname1() - Get socket name.
1348  *
1349  * MPSAFE
1350  */
1351 /* ARGSUSED */
1352 static int
1353 getsockname1(td, uap, compat)
1354 	struct thread *td;
1355 	register struct getsockname_args /* {
1356 		int	fdes;
1357 		caddr_t	asa;
1358 		int	*alen;
1359 	} */ *uap;
1360 	int compat;
1361 {
1362 	struct socket *so;
1363 	struct sockaddr *sa;
1364 	int len, error;
1365 
1366 	mtx_lock(&Giant);
1367 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1368 		goto done2;
1369 	error = copyin(uap->alen, &len, sizeof (len));
1370 	if (error)
1371 		goto done1;
1372 	if (len < 0) {
1373 		error = EINVAL;
1374 		goto done1;
1375 	}
1376 	sa = 0;
1377 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1378 	if (error)
1379 		goto bad;
1380 	if (sa == 0) {
1381 		len = 0;
1382 		goto gotnothing;
1383 	}
1384 
1385 	len = MIN(len, sa->sa_len);
1386 #ifdef COMPAT_OLDSOCK
1387 	if (compat)
1388 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1389 #endif
1390 	error = copyout(sa, uap->asa, (u_int)len);
1391 	if (error == 0)
1392 gotnothing:
1393 		error = copyout(&len, uap->alen, sizeof (len));
1394 bad:
1395 	if (sa)
1396 		FREE(sa, M_SONAME);
1397 done1:
1398 	fputsock(so);
1399 done2:
1400 	mtx_unlock(&Giant);
1401 	return (error);
1402 }
1403 
1404 /*
1405  * MPSAFE
1406  */
1407 int
1408 getsockname(td, uap)
1409 	struct thread *td;
1410 	struct getsockname_args *uap;
1411 {
1412 
1413 	return (getsockname1(td, uap, 0));
1414 }
1415 
1416 #ifdef COMPAT_OLDSOCK
1417 /*
1418  * MPSAFE
1419  */
1420 int
1421 ogetsockname(td, uap)
1422 	struct thread *td;
1423 	struct getsockname_args *uap;
1424 {
1425 
1426 	return (getsockname1(td, uap, 1));
1427 }
1428 #endif /* COMPAT_OLDSOCK */
1429 
1430 /*
1431  * getpeername1() - Get name of peer for connected socket.
1432  *
1433  * MPSAFE
1434  */
1435 /* ARGSUSED */
1436 static int
1437 getpeername1(td, uap, compat)
1438 	struct thread *td;
1439 	register struct getpeername_args /* {
1440 		int	fdes;
1441 		caddr_t	asa;
1442 		int	*alen;
1443 	} */ *uap;
1444 	int compat;
1445 {
1446 	struct socket *so;
1447 	struct sockaddr *sa;
1448 	int len, error;
1449 
1450 	mtx_lock(&Giant);
1451 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1452 		goto done2;
1453 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1454 		error = ENOTCONN;
1455 		goto done1;
1456 	}
1457 	error = copyin(uap->alen, &len, sizeof (len));
1458 	if (error)
1459 		goto done1;
1460 	if (len < 0) {
1461 		error = EINVAL;
1462 		goto done1;
1463 	}
1464 	sa = 0;
1465 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1466 	if (error)
1467 		goto bad;
1468 	if (sa == 0) {
1469 		len = 0;
1470 		goto gotnothing;
1471 	}
1472 	len = MIN(len, sa->sa_len);
1473 #ifdef COMPAT_OLDSOCK
1474 	if (compat)
1475 		((struct osockaddr *)sa)->sa_family =
1476 		    sa->sa_family;
1477 #endif
1478 	error = copyout(sa, uap->asa, (u_int)len);
1479 	if (error)
1480 		goto bad;
1481 gotnothing:
1482 	error = copyout(&len, uap->alen, sizeof (len));
1483 bad:
1484 	if (sa)
1485 		FREE(sa, M_SONAME);
1486 done1:
1487 	fputsock(so);
1488 done2:
1489 	mtx_unlock(&Giant);
1490 	return (error);
1491 }
1492 
1493 /*
1494  * MPSAFE
1495  */
1496 int
1497 getpeername(td, uap)
1498 	struct thread *td;
1499 	struct getpeername_args *uap;
1500 {
1501 
1502 	return (getpeername1(td, uap, 0));
1503 }
1504 
1505 #ifdef COMPAT_OLDSOCK
1506 /*
1507  * MPSAFE
1508  */
1509 int
1510 ogetpeername(td, uap)
1511 	struct thread *td;
1512 	struct ogetpeername_args *uap;
1513 {
1514 
1515 	/* XXX uap should have type `getpeername_args *' to begin with. */
1516 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1517 }
1518 #endif /* COMPAT_OLDSOCK */
1519 
1520 int
1521 sockargs(mp, buf, buflen, type)
1522 	struct mbuf **mp;
1523 	caddr_t buf;
1524 	int buflen, type;
1525 {
1526 	register struct sockaddr *sa;
1527 	register struct mbuf *m;
1528 	int error;
1529 
1530 	if ((u_int)buflen > MLEN) {
1531 #ifdef COMPAT_OLDSOCK
1532 		if (type == MT_SONAME && (u_int)buflen <= 112)
1533 			buflen = MLEN;		/* unix domain compat. hack */
1534 		else
1535 #endif
1536 		return (EINVAL);
1537 	}
1538 	m = m_get(0, type);
1539 	if (m == NULL)
1540 		return (ENOBUFS);
1541 	m->m_len = buflen;
1542 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1543 	if (error)
1544 		(void) m_free(m);
1545 	else {
1546 		*mp = m;
1547 		if (type == MT_SONAME) {
1548 			sa = mtod(m, struct sockaddr *);
1549 
1550 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1551 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1552 				sa->sa_family = sa->sa_len;
1553 #endif
1554 			sa->sa_len = buflen;
1555 		}
1556 	}
1557 	return (error);
1558 }
1559 
1560 int
1561 getsockaddr(namp, uaddr, len)
1562 	struct sockaddr **namp;
1563 	caddr_t uaddr;
1564 	size_t len;
1565 {
1566 	struct sockaddr *sa;
1567 	int error;
1568 
1569 	if (len > SOCK_MAXADDRLEN)
1570 		return ENAMETOOLONG;
1571 	MALLOC(sa, struct sockaddr *, len, M_SONAME, 0);
1572 	error = copyin(uaddr, sa, len);
1573 	if (error) {
1574 		FREE(sa, M_SONAME);
1575 	} else {
1576 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1577 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1578 			sa->sa_family = sa->sa_len;
1579 #endif
1580 		sa->sa_len = len;
1581 		*namp = sa;
1582 	}
1583 	return error;
1584 }
1585 
1586 /*
1587  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1588  */
1589 static void
1590 sf_buf_init(void *arg)
1591 {
1592 	int i;
1593 
1594 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1595 	mtx_lock(&sf_freelist.sf_lock);
1596 	SLIST_INIT(&sf_freelist.sf_head);
1597 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1598 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1599 	    M_NOWAIT | M_ZERO);
1600 	for (i = 0; i < nsfbufs; i++) {
1601 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1602 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1603 	}
1604 	sf_buf_alloc_want = 0;
1605 	mtx_unlock(&sf_freelist.sf_lock);
1606 }
1607 
1608 /*
1609  * Get an sf_buf from the freelist. Will block if none are available.
1610  */
1611 struct sf_buf *
1612 sf_buf_alloc()
1613 {
1614 	struct sf_buf *sf;
1615 	int error;
1616 
1617 	mtx_lock(&sf_freelist.sf_lock);
1618 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1619 		sf_buf_alloc_want++;
1620 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1621 		    "sfbufa", 0);
1622 		sf_buf_alloc_want--;
1623 
1624 		/*
1625 		 * If we got a signal, don't risk going back to sleep.
1626 		 */
1627 		if (error)
1628 			break;
1629 	}
1630 	if (sf != NULL)
1631 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1632 	mtx_unlock(&sf_freelist.sf_lock);
1633 	return (sf);
1634 }
1635 
1636 #define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1637 
1638 /*
1639  * Detatch mapped page and release resources back to the system.
1640  */
1641 void
1642 sf_buf_free(void *addr, void *args)
1643 {
1644 	struct sf_buf *sf;
1645 	struct vm_page *m;
1646 
1647 	GIANT_REQUIRED;
1648 
1649 	sf = dtosf(addr);
1650 	pmap_qremove((vm_offset_t)addr, 1);
1651 	m = sf->m;
1652 	vm_page_lock_queues();
1653 	vm_page_unwire(m, 0);
1654 	/*
1655 	 * Check for the object going away on us. This can
1656 	 * happen since we don't hold a reference to it.
1657 	 * If so, we're responsible for freeing the page.
1658 	 */
1659 	if (m->wire_count == 0 && m->object == NULL)
1660 		vm_page_free(m);
1661 	vm_page_unlock_queues();
1662 	sf->m = NULL;
1663 	mtx_lock(&sf_freelist.sf_lock);
1664 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1665 	if (sf_buf_alloc_want > 0)
1666 		wakeup_one(&sf_freelist);
1667 	mtx_unlock(&sf_freelist.sf_lock);
1668 }
1669 
1670 /*
1671  * sendfile(2)
1672  *
1673  * MPSAFE
1674  *
1675  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1676  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1677  *
1678  * Send a file specified by 'fd' and starting at 'offset' to a socket
1679  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1680  * nbytes == 0. Optionally add a header and/or trailer to the socket
1681  * output. If specified, write the total number of bytes sent into *sbytes.
1682  *
1683  */
1684 int
1685 sendfile(struct thread *td, struct sendfile_args *uap)
1686 {
1687 
1688 	return (do_sendfile(td, uap, 0));
1689 }
1690 
1691 #ifdef COMPAT_FREEBSD4
1692 int
1693 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1694 {
1695 	struct sendfile_args args;
1696 
1697 	args.fd = uap->fd;
1698 	args.s = uap->s;
1699 	args.offset = uap->offset;
1700 	args.nbytes = uap->nbytes;
1701 	args.hdtr = uap->hdtr;
1702 	args.sbytes = uap->sbytes;
1703 	args.flags = uap->flags;
1704 
1705 	return (do_sendfile(td, &args, 1));
1706 }
1707 #endif /* COMPAT_FREEBSD4 */
1708 
1709 static int
1710 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1711 {
1712 	struct vnode *vp;
1713 	struct vm_object *obj;
1714 	struct socket *so = NULL;
1715 	struct mbuf *m;
1716 	struct sf_buf *sf;
1717 	struct vm_page *pg;
1718 	struct writev_args nuap;
1719 	struct sf_hdtr hdtr;
1720 	off_t off, xfsize, hdtr_size, sbytes = 0;
1721 	int error, s;
1722 
1723 	mtx_lock(&Giant);
1724 
1725 	hdtr_size = 0;
1726 
1727 	/*
1728 	 * The descriptor must be a regular file and have a backing VM object.
1729 	 */
1730 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1731 		goto done;
1732 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1733 		error = EINVAL;
1734 		goto done;
1735 	}
1736 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1737 		goto done;
1738 	if (so->so_type != SOCK_STREAM) {
1739 		error = EINVAL;
1740 		goto done;
1741 	}
1742 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1743 		error = ENOTCONN;
1744 		goto done;
1745 	}
1746 	if (uap->offset < 0) {
1747 		error = EINVAL;
1748 		goto done;
1749 	}
1750 
1751 #ifdef MAC
1752 	error = mac_check_socket_send(td->td_ucred, so);
1753 	if (error)
1754 		goto done;
1755 #endif
1756 
1757 	/*
1758 	 * If specified, get the pointer to the sf_hdtr struct for
1759 	 * any headers/trailers.
1760 	 */
1761 	if (uap->hdtr != NULL) {
1762 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1763 		if (error)
1764 			goto done;
1765 		/*
1766 		 * Send any headers. Wimp out and use writev(2).
1767 		 */
1768 		if (hdtr.headers != NULL) {
1769 			nuap.fd = uap->s;
1770 			nuap.iovp = hdtr.headers;
1771 			nuap.iovcnt = hdtr.hdr_cnt;
1772 			error = writev(td, &nuap);
1773 			if (error)
1774 				goto done;
1775 			if (compat)
1776 				sbytes += td->td_retval[0];
1777 			else
1778 				hdtr_size += td->td_retval[0];
1779 		}
1780 	}
1781 
1782 	/*
1783 	 * Protect against multiple writers to the socket.
1784 	 */
1785 	(void) sblock(&so->so_snd, 0);
1786 
1787 	/*
1788 	 * Loop through the pages in the file, starting with the requested
1789 	 * offset. Get a file page (do I/O if necessary), map the file page
1790 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1791 	 * it on the socket.
1792 	 */
1793 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1794 		vm_pindex_t pindex;
1795 		vm_offset_t pgoff;
1796 
1797 		pindex = OFF_TO_IDX(off);
1798 retry_lookup:
1799 		/*
1800 		 * Calculate the amount to transfer. Not to exceed a page,
1801 		 * the EOF, or the passed in nbytes.
1802 		 */
1803 		xfsize = obj->un_pager.vnp.vnp_size - off;
1804 		if (xfsize > PAGE_SIZE)
1805 			xfsize = PAGE_SIZE;
1806 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1807 		if (PAGE_SIZE - pgoff < xfsize)
1808 			xfsize = PAGE_SIZE - pgoff;
1809 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1810 			xfsize = uap->nbytes - sbytes;
1811 		if (xfsize <= 0)
1812 			break;
1813 		/*
1814 		 * Optimize the non-blocking case by looking at the socket space
1815 		 * before going to the extra work of constituting the sf_buf.
1816 		 */
1817 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1818 			if (so->so_state & SS_CANTSENDMORE)
1819 				error = EPIPE;
1820 			else
1821 				error = EAGAIN;
1822 			sbunlock(&so->so_snd);
1823 			goto done;
1824 		}
1825 		/*
1826 		 * Attempt to look up the page.
1827 		 *
1828 		 *	Allocate if not found
1829 		 *
1830 		 *	Wait and loop if busy.
1831 		 */
1832 		pg = vm_page_lookup(obj, pindex);
1833 
1834 		if (pg == NULL) {
1835 			pg = vm_page_alloc(obj, pindex,
1836 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1837 			if (pg == NULL) {
1838 				VM_WAIT;
1839 				goto retry_lookup;
1840 			}
1841 			vm_page_lock_queues();
1842 			vm_page_wakeup(pg);
1843 		} else {
1844 			vm_page_lock_queues();
1845 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1846 				goto retry_lookup;
1847 			/*
1848 		 	 * Wire the page so it does not get ripped out from
1849 			 * under us.
1850 			 */
1851 			vm_page_wire(pg);
1852 		}
1853 
1854 		/*
1855 		 * If page is not valid for what we need, initiate I/O
1856 		 */
1857 
1858 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1859 			int bsize, resid;
1860 
1861 			/*
1862 			 * Ensure that our page is still around when the I/O
1863 			 * completes.
1864 			 */
1865 			vm_page_io_start(pg);
1866 			vm_page_unlock_queues();
1867 
1868 			/*
1869 			 * Get the page from backing store.
1870 			 */
1871 			bsize = vp->v_mount->mnt_stat.f_iosize;
1872 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1873 			/*
1874 			 * XXXMAC: Because we don't have fp->f_cred here,
1875 			 * we pass in NOCRED.  This is probably wrong, but
1876 			 * is consistent with our original implementation.
1877 			 */
1878 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1879 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1880 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1881 			    td->td_ucred, NOCRED, &resid, td);
1882 			VOP_UNLOCK(vp, 0, td);
1883 			vm_page_lock_queues();
1884 			vm_page_flag_clear(pg, PG_ZERO);
1885 			vm_page_io_finish(pg);
1886 			if (error) {
1887 				vm_page_unwire(pg, 0);
1888 				/*
1889 				 * See if anyone else might know about this page.
1890 				 * If not and it is not valid, then free it.
1891 				 */
1892 				if (pg->wire_count == 0 && pg->valid == 0 &&
1893 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1894 				    pg->hold_count == 0) {
1895 					vm_page_busy(pg);
1896 					vm_page_free(pg);
1897 				}
1898 				vm_page_unlock_queues();
1899 				sbunlock(&so->so_snd);
1900 				goto done;
1901 			}
1902 		}
1903 		vm_page_unlock_queues();
1904 
1905 		/*
1906 		 * Get a sendfile buf. We usually wait as long as necessary,
1907 		 * but this wait can be interrupted.
1908 		 */
1909 		if ((sf = sf_buf_alloc()) == NULL) {
1910 			vm_page_lock_queues();
1911 			vm_page_unwire(pg, 0);
1912 			if (pg->wire_count == 0 && pg->object == NULL)
1913 				vm_page_free(pg);
1914 			vm_page_unlock_queues();
1915 			sbunlock(&so->so_snd);
1916 			error = EINTR;
1917 			goto done;
1918 		}
1919 
1920 		/*
1921 		 * Allocate a kernel virtual page and insert the physical page
1922 		 * into it.
1923 		 */
1924 		sf->m = pg;
1925 		pmap_qenter(sf->kva, &pg, 1);
1926 		/*
1927 		 * Get an mbuf header and set it up as having external storage.
1928 		 */
1929 		MGETHDR(m, 0, MT_DATA);
1930 		if (m == NULL) {
1931 			error = ENOBUFS;
1932 			sf_buf_free((void *)sf->kva, NULL);
1933 			sbunlock(&so->so_snd);
1934 			goto done;
1935 		}
1936 		/*
1937 		 * Setup external storage for mbuf.
1938 		 */
1939 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1940 		    EXT_SFBUF);
1941 		m->m_data = (char *) sf->kva + pgoff;
1942 		m->m_pkthdr.len = m->m_len = xfsize;
1943 		/*
1944 		 * Add the buffer to the socket buffer chain.
1945 		 */
1946 		s = splnet();
1947 retry_space:
1948 		/*
1949 		 * Make sure that the socket is still able to take more data.
1950 		 * CANTSENDMORE being true usually means that the connection
1951 		 * was closed. so_error is true when an error was sensed after
1952 		 * a previous send.
1953 		 * The state is checked after the page mapping and buffer
1954 		 * allocation above since those operations may block and make
1955 		 * any socket checks stale. From this point forward, nothing
1956 		 * blocks before the pru_send (or more accurately, any blocking
1957 		 * results in a loop back to here to re-check).
1958 		 */
1959 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1960 			if (so->so_state & SS_CANTSENDMORE) {
1961 				error = EPIPE;
1962 			} else {
1963 				error = so->so_error;
1964 				so->so_error = 0;
1965 			}
1966 			m_freem(m);
1967 			sbunlock(&so->so_snd);
1968 			splx(s);
1969 			goto done;
1970 		}
1971 		/*
1972 		 * Wait for socket space to become available. We do this just
1973 		 * after checking the connection state above in order to avoid
1974 		 * a race condition with sbwait().
1975 		 */
1976 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1977 			if (so->so_state & SS_NBIO) {
1978 				m_freem(m);
1979 				sbunlock(&so->so_snd);
1980 				splx(s);
1981 				error = EAGAIN;
1982 				goto done;
1983 			}
1984 			error = sbwait(&so->so_snd);
1985 			/*
1986 			 * An error from sbwait usually indicates that we've
1987 			 * been interrupted by a signal. If we've sent anything
1988 			 * then return bytes sent, otherwise return the error.
1989 			 */
1990 			if (error) {
1991 				m_freem(m);
1992 				sbunlock(&so->so_snd);
1993 				splx(s);
1994 				goto done;
1995 			}
1996 			goto retry_space;
1997 		}
1998 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1999 		splx(s);
2000 		if (error) {
2001 			sbunlock(&so->so_snd);
2002 			goto done;
2003 		}
2004 	}
2005 	sbunlock(&so->so_snd);
2006 
2007 	/*
2008 	 * Send trailers. Wimp out and use writev(2).
2009 	 */
2010 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2011 			nuap.fd = uap->s;
2012 			nuap.iovp = hdtr.trailers;
2013 			nuap.iovcnt = hdtr.trl_cnt;
2014 			error = writev(td, &nuap);
2015 			if (error)
2016 				goto done;
2017 			if (compat)
2018 				sbytes += td->td_retval[0];
2019 			else
2020 				hdtr_size += td->td_retval[0];
2021 	}
2022 
2023 done:
2024 	/*
2025 	 * If there was no error we have to clear td->td_retval[0]
2026 	 * because it may have been set by writev.
2027 	 */
2028 	if (error == 0) {
2029 		td->td_retval[0] = 0;
2030 	}
2031 	if (uap->sbytes != NULL) {
2032 		if (!compat)
2033 			sbytes += hdtr_size;
2034 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2035 	}
2036 	if (vp)
2037 		vrele(vp);
2038 	if (so)
2039 		fputsock(so);
2040 	mtx_unlock(&Giant);
2041 	return (error);
2042 }
2043