xref: /freebsd/sys/kern/uipc_syscalls.c (revision 411d10a600b6d3bf43415ba408b7e221f8688bc2)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "opt_compat.h"
43 #include "opt_ktrace.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/mac.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/malloc.h>
54 #include <sys/filedesc.h>
55 #include <sys/event.h>
56 #include <sys/proc.h>
57 #include <sys/fcntl.h>
58 #include <sys/file.h>
59 #include <sys/filio.h>
60 #include <sys/mount.h>
61 #include <sys/mbuf.h>
62 #include <sys/protosw.h>
63 #include <sys/socket.h>
64 #include <sys/socketvar.h>
65 #include <sys/signalvar.h>
66 #include <sys/syscallsubr.h>
67 #include <sys/uio.h>
68 #include <sys/vnode.h>
69 #ifdef KTRACE
70 #include <sys/ktrace.h>
71 #endif
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_pageout.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_extern.h>
79 
80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
81 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
82 
83 static int accept1(struct thread *td, struct accept_args *uap, int compat);
84 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
85 static int getsockname1(struct thread *td, struct getsockname_args *uap,
86 			int compat);
87 static int getpeername1(struct thread *td, struct getpeername_args *uap,
88 			int compat);
89 
90 /*
91  * System call interface to the socket abstraction.
92  */
93 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
94 #define COMPAT_OLDSOCK
95 #endif
96 
97 /*
98  * MPSAFE
99  */
100 int
101 socket(td, uap)
102 	struct thread *td;
103 	register struct socket_args /* {
104 		int	domain;
105 		int	type;
106 		int	protocol;
107 	} */ *uap;
108 {
109 	struct filedesc *fdp;
110 	struct socket *so;
111 	struct file *fp;
112 	int fd, error;
113 
114 	mtx_lock(&Giant);
115 	fdp = td->td_proc->p_fd;
116 	error = falloc(td, &fp, &fd);
117 	if (error)
118 		goto done2;
119 	fhold(fp);
120 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
121 	    td->td_ucred, td);
122 	FILEDESC_LOCK(fdp);
123 	if (error) {
124 		if (fdp->fd_ofiles[fd] == fp) {
125 			fdp->fd_ofiles[fd] = NULL;
126 			FILEDESC_UNLOCK(fdp);
127 			fdrop(fp, td);
128 		} else
129 			FILEDESC_UNLOCK(fdp);
130 	} else {
131 		fp->f_data = so;	/* already has ref count */
132 		fp->f_flag = FREAD|FWRITE;
133 		fp->f_ops = &socketops;
134 		fp->f_type = DTYPE_SOCKET;
135 		FILEDESC_UNLOCK(fdp);
136 		td->td_retval[0] = fd;
137 	}
138 	fdrop(fp, td);
139 done2:
140 	mtx_unlock(&Giant);
141 	return (error);
142 }
143 
144 /*
145  * MPSAFE
146  */
147 /* ARGSUSED */
148 int
149 bind(td, uap)
150 	struct thread *td;
151 	register struct bind_args /* {
152 		int	s;
153 		caddr_t	name;
154 		int	namelen;
155 	} */ *uap;
156 {
157 	struct sockaddr *sa;
158 	int error;
159 
160 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
161 		return (error);
162 
163 	return (kern_bind(td, uap->s, sa));
164 }
165 
166 int
167 kern_bind(td, fd, sa)
168 	struct thread *td;
169 	int fd;
170 	struct sockaddr *sa;
171 {
172 	struct socket *so;
173 	int error;
174 
175 	mtx_lock(&Giant);
176 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
177 		goto done2;
178 #ifdef MAC
179 	error = mac_check_socket_bind(td->td_ucred, so, sa);
180 	if (error)
181 		goto done1;
182 #endif
183 	error = sobind(so, sa, td);
184 #ifdef MAC
185 done1:
186 #endif
187 	fputsock(so);
188 done2:
189 	mtx_unlock(&Giant);
190 	FREE(sa, M_SONAME);
191 	return (error);
192 }
193 
194 /*
195  * MPSAFE
196  */
197 /* ARGSUSED */
198 int
199 listen(td, uap)
200 	struct thread *td;
201 	register struct listen_args /* {
202 		int	s;
203 		int	backlog;
204 	} */ *uap;
205 {
206 	struct socket *so;
207 	int error;
208 
209 	mtx_lock(&Giant);
210 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
211 #ifdef MAC
212 		error = mac_check_socket_listen(td->td_ucred, so);
213 		if (error)
214 			goto done;
215 #endif
216 		error = solisten(so, uap->backlog, td);
217 #ifdef MAC
218 done:
219 #endif
220 		fputsock(so);
221 	}
222 	mtx_unlock(&Giant);
223 	return(error);
224 }
225 
226 /*
227  * accept1()
228  * MPSAFE
229  */
230 static int
231 accept1(td, uap, compat)
232 	struct thread *td;
233 	register struct accept_args /* {
234 		int	s;
235 		caddr_t	name;
236 		int	*anamelen;
237 	} */ *uap;
238 	int compat;
239 {
240 	struct filedesc *fdp;
241 	struct file *nfp = NULL;
242 	struct sockaddr *sa;
243 	int namelen, error, s;
244 	struct socket *head, *so;
245 	int fd;
246 	u_int fflag;
247 	pid_t pgid;
248 	int tmp;
249 
250 	fdp = td->td_proc->p_fd;
251 	if (uap->name) {
252 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
253 		if(error)
254 			goto done3;
255 		if (namelen < 0) {
256 			error = EINVAL;
257 			goto done3;
258 		}
259 	}
260 	mtx_lock(&Giant);
261 	error = fgetsock(td, uap->s, &head, &fflag);
262 	if (error)
263 		goto done2;
264 	s = splnet();
265 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
266 		splx(s);
267 		error = EINVAL;
268 		goto done;
269 	}
270 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
271 		if (head->so_state & SS_CANTRCVMORE) {
272 			head->so_error = ECONNABORTED;
273 			break;
274 		}
275 		if ((head->so_state & SS_NBIO) != 0) {
276 			head->so_error = EWOULDBLOCK;
277 			break;
278 		}
279 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
280 		    "accept", 0);
281 		if (error) {
282 			splx(s);
283 			goto done;
284 		}
285 	}
286 	if (head->so_error) {
287 		error = head->so_error;
288 		head->so_error = 0;
289 		splx(s);
290 		goto done;
291 	}
292 
293 	/*
294 	 * At this point we know that there is at least one connection
295 	 * ready to be accepted. Remove it from the queue prior to
296 	 * allocating the file descriptor for it since falloc() may
297 	 * block allowing another process to accept the connection
298 	 * instead.
299 	 */
300 	so = TAILQ_FIRST(&head->so_comp);
301 	TAILQ_REMOVE(&head->so_comp, so, so_list);
302 	head->so_qlen--;
303 
304 	error = falloc(td, &nfp, &fd);
305 	if (error) {
306 		/*
307 		 * Probably ran out of file descriptors. Put the
308 		 * unaccepted connection back onto the queue and
309 		 * do another wakeup so some other process might
310 		 * have a chance at it.
311 		 */
312 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
313 		head->so_qlen++;
314 		wakeup_one(&head->so_timeo);
315 		splx(s);
316 		goto done;
317 	}
318 	fhold(nfp);
319 	td->td_retval[0] = fd;
320 
321 	/* connection has been removed from the listen queue */
322 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
323 
324 	so->so_state &= ~SS_COMP;
325 	so->so_head = NULL;
326 	pgid = fgetown(&head->so_sigio);
327 	if (pgid != 0)
328 		fsetown(pgid, &so->so_sigio);
329 
330 	FILE_LOCK(nfp);
331 	soref(so);			/* file descriptor reference */
332 	nfp->f_data = so;	/* nfp has ref count from falloc */
333 	nfp->f_flag = fflag;
334 	nfp->f_ops = &socketops;
335 	nfp->f_type = DTYPE_SOCKET;
336 	FILE_UNLOCK(nfp);
337 	/* Sync socket nonblocking/async state with file flags */
338 	tmp = fflag & FNONBLOCK;
339 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
340 	tmp = fflag & FASYNC;
341 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
342 	sa = 0;
343 	error = soaccept(so, &sa);
344 	if (error) {
345 		/*
346 		 * return a namelen of zero for older code which might
347 	 	 * ignore the return value from accept.
348 		 */
349 		if (uap->name != NULL) {
350 			namelen = 0;
351 			(void) copyout(&namelen,
352 			    uap->anamelen, sizeof(*uap->anamelen));
353 		}
354 		goto noconnection;
355 	}
356 	if (sa == NULL) {
357 		namelen = 0;
358 		if (uap->name)
359 			goto gotnoname;
360 		splx(s);
361 		error = 0;
362 		goto done;
363 	}
364 	if (uap->name) {
365 		/* check sa_len before it is destroyed */
366 		if (namelen > sa->sa_len)
367 			namelen = sa->sa_len;
368 #ifdef COMPAT_OLDSOCK
369 		if (compat)
370 			((struct osockaddr *)sa)->sa_family =
371 			    sa->sa_family;
372 #endif
373 		error = copyout(sa, uap->name, (u_int)namelen);
374 		if (!error)
375 gotnoname:
376 			error = copyout(&namelen,
377 			    uap->anamelen, sizeof (*uap->anamelen));
378 	}
379 noconnection:
380 	if (sa)
381 		FREE(sa, M_SONAME);
382 
383 	/*
384 	 * close the new descriptor, assuming someone hasn't ripped it
385 	 * out from under us.
386 	 */
387 	if (error) {
388 		FILEDESC_LOCK(fdp);
389 		if (fdp->fd_ofiles[fd] == nfp) {
390 			fdp->fd_ofiles[fd] = NULL;
391 			FILEDESC_UNLOCK(fdp);
392 			fdrop(nfp, td);
393 		} else {
394 			FILEDESC_UNLOCK(fdp);
395 		}
396 	}
397 	splx(s);
398 
399 	/*
400 	 * Release explicitly held references before returning.
401 	 */
402 done:
403 	if (nfp != NULL)
404 		fdrop(nfp, td);
405 	fputsock(head);
406 done2:
407 	mtx_unlock(&Giant);
408 done3:
409 	return (error);
410 }
411 
412 /*
413  * MPSAFE (accept1() is MPSAFE)
414  */
415 int
416 accept(td, uap)
417 	struct thread *td;
418 	struct accept_args *uap;
419 {
420 
421 	return (accept1(td, uap, 0));
422 }
423 
424 #ifdef COMPAT_OLDSOCK
425 /*
426  * MPSAFE (accept1() is MPSAFE)
427  */
428 int
429 oaccept(td, uap)
430 	struct thread *td;
431 	struct accept_args *uap;
432 {
433 
434 	return (accept1(td, uap, 1));
435 }
436 #endif /* COMPAT_OLDSOCK */
437 
438 /*
439  * MPSAFE
440  */
441 /* ARGSUSED */
442 int
443 connect(td, uap)
444 	struct thread *td;
445 	register struct connect_args /* {
446 		int	s;
447 		caddr_t	name;
448 		int	namelen;
449 	} */ *uap;
450 {
451 	struct sockaddr *sa;
452 	int error;
453 
454 	error = getsockaddr(&sa, uap->name, uap->namelen);
455 	if (error)
456 		return error;
457 
458 	return (kern_connect(td, uap->s, sa));
459 }
460 
461 
462 int
463 kern_connect(td, fd, sa)
464 	struct thread *td;
465 	int fd;
466 	struct sockaddr *sa;
467 {
468 	struct socket *so;
469 	int error, s;
470 	int interrupted = 0;
471 
472 	mtx_lock(&Giant);
473 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
474 		goto done2;
475 	if (so->so_state & SS_ISCONNECTING) {
476 		error = EALREADY;
477 		goto done1;
478 	}
479 #ifdef MAC
480 	error = mac_check_socket_connect(td->td_ucred, so, sa);
481 	if (error)
482 		goto bad;
483 #endif
484 	error = soconnect(so, sa, td);
485 	if (error)
486 		goto bad;
487 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
488 		error = EINPROGRESS;
489 		goto done1;
490 	}
491 	s = splnet();
492 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
493 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
494 		if (error) {
495 			if (error == EINTR || error == ERESTART)
496 				interrupted = 1;
497 			break;
498 		}
499 	}
500 	if (error == 0) {
501 		error = so->so_error;
502 		so->so_error = 0;
503 	}
504 	splx(s);
505 bad:
506 	if (!interrupted)
507 		so->so_state &= ~SS_ISCONNECTING;
508 	if (error == ERESTART)
509 		error = EINTR;
510 done1:
511 	fputsock(so);
512 done2:
513 	mtx_unlock(&Giant);
514 	FREE(sa, M_SONAME);
515 	return (error);
516 }
517 
518 /*
519  * MPSAFE
520  */
521 int
522 socketpair(td, uap)
523 	struct thread *td;
524 	register struct socketpair_args /* {
525 		int	domain;
526 		int	type;
527 		int	protocol;
528 		int	*rsv;
529 	} */ *uap;
530 {
531 	register struct filedesc *fdp = td->td_proc->p_fd;
532 	struct file *fp1, *fp2;
533 	struct socket *so1, *so2;
534 	int fd, error, sv[2];
535 
536 	mtx_lock(&Giant);
537 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
538 	    td->td_ucred, td);
539 	if (error)
540 		goto done2;
541 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
542 	    td->td_ucred, td);
543 	if (error)
544 		goto free1;
545 	error = falloc(td, &fp1, &fd);
546 	if (error)
547 		goto free2;
548 	fhold(fp1);
549 	sv[0] = fd;
550 	fp1->f_data = so1;	/* so1 already has ref count */
551 	error = falloc(td, &fp2, &fd);
552 	if (error)
553 		goto free3;
554 	fhold(fp2);
555 	fp2->f_data = so2;	/* so2 already has ref count */
556 	sv[1] = fd;
557 	error = soconnect2(so1, so2);
558 	if (error)
559 		goto free4;
560 	if (uap->type == SOCK_DGRAM) {
561 		/*
562 		 * Datagram socket connection is asymmetric.
563 		 */
564 		 error = soconnect2(so2, so1);
565 		 if (error)
566 			goto free4;
567 	}
568 	FILE_LOCK(fp1);
569 	fp1->f_flag = FREAD|FWRITE;
570 	fp1->f_ops = &socketops;
571 	fp1->f_type = DTYPE_SOCKET;
572 	FILE_UNLOCK(fp1);
573 	FILE_LOCK(fp2);
574 	fp2->f_flag = FREAD|FWRITE;
575 	fp2->f_ops = &socketops;
576 	fp2->f_type = DTYPE_SOCKET;
577 	FILE_UNLOCK(fp2);
578 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
579 	fdrop(fp1, td);
580 	fdrop(fp2, td);
581 	goto done2;
582 free4:
583 	FILEDESC_LOCK(fdp);
584 	if (fdp->fd_ofiles[sv[1]] == fp2) {
585 		fdp->fd_ofiles[sv[1]] = NULL;
586 		FILEDESC_UNLOCK(fdp);
587 		fdrop(fp2, td);
588 	} else
589 		FILEDESC_UNLOCK(fdp);
590 	fdrop(fp2, td);
591 free3:
592 	FILEDESC_LOCK(fdp);
593 	if (fdp->fd_ofiles[sv[0]] == fp1) {
594 		fdp->fd_ofiles[sv[0]] = NULL;
595 		FILEDESC_UNLOCK(fdp);
596 		fdrop(fp1, td);
597 	} else
598 		FILEDESC_UNLOCK(fdp);
599 	fdrop(fp1, td);
600 free2:
601 	(void)soclose(so2);
602 free1:
603 	(void)soclose(so1);
604 done2:
605 	mtx_unlock(&Giant);
606 	return (error);
607 }
608 
609 static int
610 sendit(td, s, mp, flags)
611 	register struct thread *td;
612 	int s;
613 	register struct msghdr *mp;
614 	int flags;
615 {
616 	struct mbuf *control;
617 	struct sockaddr *to;
618 	int error;
619 
620 	if (mp->msg_name != NULL) {
621 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
622 		if (error) {
623 			to = NULL;
624 			goto bad;
625 		}
626 		mp->msg_name = to;
627 	} else
628 		to = NULL;
629 
630 	if (mp->msg_control) {
631 		if (mp->msg_controllen < sizeof(struct cmsghdr)
632 #ifdef COMPAT_OLDSOCK
633 		    && mp->msg_flags != MSG_COMPAT
634 #endif
635 		) {
636 			error = EINVAL;
637 			goto bad;
638 		}
639 		error = sockargs(&control, mp->msg_control,
640 		    mp->msg_controllen, MT_CONTROL);
641 		if (error)
642 			goto bad;
643 #ifdef COMPAT_OLDSOCK
644 		if (mp->msg_flags == MSG_COMPAT) {
645 			register struct cmsghdr *cm;
646 
647 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
648 			if (control == 0) {
649 				error = ENOBUFS;
650 				goto bad;
651 			} else {
652 				cm = mtod(control, struct cmsghdr *);
653 				cm->cmsg_len = control->m_len;
654 				cm->cmsg_level = SOL_SOCKET;
655 				cm->cmsg_type = SCM_RIGHTS;
656 			}
657 		}
658 #endif
659 	} else {
660 		control = NULL;
661 	}
662 
663 	error = kern_sendit(td, s, mp, flags, control);
664 
665 bad:
666 	if (to)
667 		FREE(to, M_SONAME);
668 	return (error);
669 }
670 
671 int
672 kern_sendit(td, s, mp, flags, control)
673 	struct thread *td;
674 	int s;
675 	struct msghdr *mp;
676 	int flags;
677 	struct mbuf *control;
678 {
679 	struct uio auio;
680 	struct iovec *iov;
681 	struct socket *so;
682 	int i;
683 	int len, error;
684 #ifdef KTRACE
685 	struct iovec *ktriov = NULL;
686 	struct uio ktruio;
687 	int iovlen;
688 #endif
689 
690 	mtx_lock(&Giant);
691 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
692 		goto bad2;
693 
694 #ifdef MAC
695 	error = mac_check_socket_send(td->td_ucred, so);
696 	if (error)
697 		goto bad;
698 #endif
699 
700 	auio.uio_iov = mp->msg_iov;
701 	auio.uio_iovcnt = mp->msg_iovlen;
702 	auio.uio_segflg = UIO_USERSPACE;
703 	auio.uio_rw = UIO_WRITE;
704 	auio.uio_td = td;
705 	auio.uio_offset = 0;			/* XXX */
706 	auio.uio_resid = 0;
707 	iov = mp->msg_iov;
708 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
709 		if ((auio.uio_resid += iov->iov_len) < 0) {
710 			error = EINVAL;
711 			goto bad;
712 		}
713 	}
714 #ifdef KTRACE
715 	if (KTRPOINT(td, KTR_GENIO)) {
716 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
717 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
718 		bcopy(auio.uio_iov, ktriov, iovlen);
719 		ktruio = auio;
720 	}
721 #endif
722 	len = auio.uio_resid;
723 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
724 	    0, control, flags, td);
725 	if (error) {
726 		if (auio.uio_resid != len && (error == ERESTART ||
727 		    error == EINTR || error == EWOULDBLOCK))
728 			error = 0;
729 		/* Generation of SIGPIPE can be controlled per socket */
730 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
731 			PROC_LOCK(td->td_proc);
732 			psignal(td->td_proc, SIGPIPE);
733 			PROC_UNLOCK(td->td_proc);
734 		}
735 	}
736 	if (error == 0)
737 		td->td_retval[0] = len - auio.uio_resid;
738 #ifdef KTRACE
739 	if (ktriov != NULL) {
740 		if (error == 0) {
741 			ktruio.uio_iov = ktriov;
742 			ktruio.uio_resid = td->td_retval[0];
743 			ktrgenio(s, UIO_WRITE, &ktruio, error);
744 		}
745 		FREE(ktriov, M_TEMP);
746 	}
747 #endif
748 bad:
749 	fputsock(so);
750 bad2:
751 	mtx_unlock(&Giant);
752 	return (error);
753 }
754 
755 /*
756  * MPSAFE
757  */
758 int
759 sendto(td, uap)
760 	struct thread *td;
761 	register struct sendto_args /* {
762 		int	s;
763 		caddr_t	buf;
764 		size_t	len;
765 		int	flags;
766 		caddr_t	to;
767 		int	tolen;
768 	} */ *uap;
769 {
770 	struct msghdr msg;
771 	struct iovec aiov;
772 	int error;
773 
774 	msg.msg_name = uap->to;
775 	msg.msg_namelen = uap->tolen;
776 	msg.msg_iov = &aiov;
777 	msg.msg_iovlen = 1;
778 	msg.msg_control = 0;
779 #ifdef COMPAT_OLDSOCK
780 	msg.msg_flags = 0;
781 #endif
782 	aiov.iov_base = uap->buf;
783 	aiov.iov_len = uap->len;
784 	error = sendit(td, uap->s, &msg, uap->flags);
785 	return (error);
786 }
787 
788 #ifdef COMPAT_OLDSOCK
789 /*
790  * MPSAFE
791  */
792 int
793 osend(td, uap)
794 	struct thread *td;
795 	register struct osend_args /* {
796 		int	s;
797 		caddr_t	buf;
798 		int	len;
799 		int	flags;
800 	} */ *uap;
801 {
802 	struct msghdr msg;
803 	struct iovec aiov;
804 	int error;
805 
806 	msg.msg_name = 0;
807 	msg.msg_namelen = 0;
808 	msg.msg_iov = &aiov;
809 	msg.msg_iovlen = 1;
810 	aiov.iov_base = uap->buf;
811 	aiov.iov_len = uap->len;
812 	msg.msg_control = 0;
813 	msg.msg_flags = 0;
814 	error = sendit(td, uap->s, &msg, uap->flags);
815 	return (error);
816 }
817 
818 /*
819  * MPSAFE
820  */
821 int
822 osendmsg(td, uap)
823 	struct thread *td;
824 	register struct osendmsg_args /* {
825 		int	s;
826 		caddr_t	msg;
827 		int	flags;
828 	} */ *uap;
829 {
830 	struct msghdr msg;
831 	struct iovec aiov[UIO_SMALLIOV], *iov;
832 	int error;
833 
834 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
835 	if (error)
836 		goto done2;
837 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
838 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
839 			error = EMSGSIZE;
840 			goto done2;
841 		}
842 		MALLOC(iov, struct iovec *,
843 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
844 		      M_WAITOK);
845 	} else {
846 		iov = aiov;
847 	}
848 	error = copyin(msg.msg_iov, iov,
849 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
850 	if (error)
851 		goto done;
852 	msg.msg_flags = MSG_COMPAT;
853 	msg.msg_iov = iov;
854 	error = sendit(td, uap->s, &msg, uap->flags);
855 done:
856 	if (iov != aiov)
857 		FREE(iov, M_IOV);
858 done2:
859 	return (error);
860 }
861 #endif
862 
863 /*
864  * MPSAFE
865  */
866 int
867 sendmsg(td, uap)
868 	struct thread *td;
869 	register struct sendmsg_args /* {
870 		int	s;
871 		caddr_t	msg;
872 		int	flags;
873 	} */ *uap;
874 {
875 	struct msghdr msg;
876 	struct iovec aiov[UIO_SMALLIOV], *iov;
877 	int error;
878 
879 	error = copyin(uap->msg, &msg, sizeof (msg));
880 	if (error)
881 		goto done2;
882 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
883 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
884 			error = EMSGSIZE;
885 			goto done2;
886 		}
887 		MALLOC(iov, struct iovec *,
888 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
889 		       M_WAITOK);
890 	} else {
891 		iov = aiov;
892 	}
893 	if (msg.msg_iovlen &&
894 	    (error = copyin(msg.msg_iov, iov,
895 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
896 		goto done;
897 	msg.msg_iov = iov;
898 #ifdef COMPAT_OLDSOCK
899 	msg.msg_flags = 0;
900 #endif
901 	error = sendit(td, uap->s, &msg, uap->flags);
902 done:
903 	if (iov != aiov)
904 		FREE(iov, M_IOV);
905 done2:
906 	return (error);
907 }
908 
909 static int
910 recvit(td, s, mp, namelenp)
911 	register struct thread *td;
912 	int s;
913 	register struct msghdr *mp;
914 	void *namelenp;
915 {
916 	struct uio auio;
917 	register struct iovec *iov;
918 	register int i;
919 	int len, error;
920 	struct mbuf *m, *control = 0;
921 	caddr_t ctlbuf;
922 	struct socket *so;
923 	struct sockaddr *fromsa = 0;
924 #ifdef KTRACE
925 	struct iovec *ktriov = NULL;
926 	struct uio ktruio;
927 	int iovlen;
928 #endif
929 
930 	mtx_lock(&Giant);
931 	if ((error = fgetsock(td, s, &so, NULL)) != 0) {
932 		mtx_unlock(&Giant);
933 		return (error);
934 	}
935 
936 #ifdef MAC
937 	error = mac_check_socket_receive(td->td_ucred, so);
938 	if (error) {
939 		fputsock(so);
940 		mtx_unlock(&Giant);
941 		return (error);
942 	}
943 #endif
944 
945 	auio.uio_iov = mp->msg_iov;
946 	auio.uio_iovcnt = mp->msg_iovlen;
947 	auio.uio_segflg = UIO_USERSPACE;
948 	auio.uio_rw = UIO_READ;
949 	auio.uio_td = td;
950 	auio.uio_offset = 0;			/* XXX */
951 	auio.uio_resid = 0;
952 	iov = mp->msg_iov;
953 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
954 		if ((auio.uio_resid += iov->iov_len) < 0) {
955 			fputsock(so);
956 			return (EINVAL);
957 		}
958 	}
959 #ifdef KTRACE
960 	if (KTRPOINT(td, KTR_GENIO)) {
961 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
962 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
963 		bcopy(auio.uio_iov, ktriov, iovlen);
964 		ktruio = auio;
965 	}
966 #endif
967 	len = auio.uio_resid;
968 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
969 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
970 	    &mp->msg_flags);
971 	if (error) {
972 		if (auio.uio_resid != len && (error == ERESTART ||
973 		    error == EINTR || error == EWOULDBLOCK))
974 			error = 0;
975 	}
976 #ifdef KTRACE
977 	if (ktriov != NULL) {
978 		if (error == 0) {
979 			ktruio.uio_iov = ktriov;
980 			ktruio.uio_resid = len - auio.uio_resid;
981 			ktrgenio(s, UIO_READ, &ktruio, error);
982 		}
983 		FREE(ktriov, M_TEMP);
984 	}
985 #endif
986 	if (error)
987 		goto out;
988 	td->td_retval[0] = len - auio.uio_resid;
989 	if (mp->msg_name) {
990 		len = mp->msg_namelen;
991 		if (len <= 0 || fromsa == 0)
992 			len = 0;
993 		else {
994 			/* save sa_len before it is destroyed by MSG_COMPAT */
995 			len = MIN(len, fromsa->sa_len);
996 #ifdef COMPAT_OLDSOCK
997 			if (mp->msg_flags & MSG_COMPAT)
998 				((struct osockaddr *)fromsa)->sa_family =
999 				    fromsa->sa_family;
1000 #endif
1001 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1002 			if (error)
1003 				goto out;
1004 		}
1005 		mp->msg_namelen = len;
1006 		if (namelenp &&
1007 		    (error = copyout(&len, namelenp, sizeof (int)))) {
1008 #ifdef COMPAT_OLDSOCK
1009 			if (mp->msg_flags & MSG_COMPAT)
1010 				error = 0;	/* old recvfrom didn't check */
1011 			else
1012 #endif
1013 			goto out;
1014 		}
1015 	}
1016 	if (mp->msg_control) {
1017 #ifdef COMPAT_OLDSOCK
1018 		/*
1019 		 * We assume that old recvmsg calls won't receive access
1020 		 * rights and other control info, esp. as control info
1021 		 * is always optional and those options didn't exist in 4.3.
1022 		 * If we receive rights, trim the cmsghdr; anything else
1023 		 * is tossed.
1024 		 */
1025 		if (control && mp->msg_flags & MSG_COMPAT) {
1026 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1027 			    SOL_SOCKET ||
1028 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1029 			    SCM_RIGHTS) {
1030 				mp->msg_controllen = 0;
1031 				goto out;
1032 			}
1033 			control->m_len -= sizeof (struct cmsghdr);
1034 			control->m_data += sizeof (struct cmsghdr);
1035 		}
1036 #endif
1037 		len = mp->msg_controllen;
1038 		m = control;
1039 		mp->msg_controllen = 0;
1040 		ctlbuf = mp->msg_control;
1041 
1042 		while (m && len > 0) {
1043 			unsigned int tocopy;
1044 
1045 			if (len >= m->m_len)
1046 				tocopy = m->m_len;
1047 			else {
1048 				mp->msg_flags |= MSG_CTRUNC;
1049 				tocopy = len;
1050 			}
1051 
1052 			if ((error = copyout(mtod(m, caddr_t),
1053 					ctlbuf, tocopy)) != 0)
1054 				goto out;
1055 
1056 			ctlbuf += tocopy;
1057 			len -= tocopy;
1058 			m = m->m_next;
1059 		}
1060 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1061 	}
1062 out:
1063 	fputsock(so);
1064 	mtx_unlock(&Giant);
1065 	if (fromsa)
1066 		FREE(fromsa, M_SONAME);
1067 	if (control)
1068 		m_freem(control);
1069 	return (error);
1070 }
1071 
1072 /*
1073  * MPSAFE
1074  */
1075 int
1076 recvfrom(td, uap)
1077 	struct thread *td;
1078 	register struct recvfrom_args /* {
1079 		int	s;
1080 		caddr_t	buf;
1081 		size_t	len;
1082 		int	flags;
1083 		caddr_t	from;
1084 		int	*fromlenaddr;
1085 	} */ *uap;
1086 {
1087 	struct msghdr msg;
1088 	struct iovec aiov;
1089 	int error;
1090 
1091 	if (uap->fromlenaddr) {
1092 		error = copyin(uap->fromlenaddr,
1093 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1094 		if (error)
1095 			goto done2;
1096 	} else {
1097 		msg.msg_namelen = 0;
1098 	}
1099 	msg.msg_name = uap->from;
1100 	msg.msg_iov = &aiov;
1101 	msg.msg_iovlen = 1;
1102 	aiov.iov_base = uap->buf;
1103 	aiov.iov_len = uap->len;
1104 	msg.msg_control = 0;
1105 	msg.msg_flags = uap->flags;
1106 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1107 done2:
1108 	return(error);
1109 }
1110 
1111 #ifdef COMPAT_OLDSOCK
1112 /*
1113  * MPSAFE
1114  */
1115 int
1116 orecvfrom(td, uap)
1117 	struct thread *td;
1118 	struct recvfrom_args *uap;
1119 {
1120 
1121 	uap->flags |= MSG_COMPAT;
1122 	return (recvfrom(td, uap));
1123 }
1124 #endif
1125 
1126 
1127 #ifdef COMPAT_OLDSOCK
1128 /*
1129  * MPSAFE
1130  */
1131 int
1132 orecv(td, uap)
1133 	struct thread *td;
1134 	register struct orecv_args /* {
1135 		int	s;
1136 		caddr_t	buf;
1137 		int	len;
1138 		int	flags;
1139 	} */ *uap;
1140 {
1141 	struct msghdr msg;
1142 	struct iovec aiov;
1143 	int error;
1144 
1145 	msg.msg_name = 0;
1146 	msg.msg_namelen = 0;
1147 	msg.msg_iov = &aiov;
1148 	msg.msg_iovlen = 1;
1149 	aiov.iov_base = uap->buf;
1150 	aiov.iov_len = uap->len;
1151 	msg.msg_control = 0;
1152 	msg.msg_flags = uap->flags;
1153 	error = recvit(td, uap->s, &msg, NULL);
1154 	return (error);
1155 }
1156 
1157 /*
1158  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1159  * overlays the new one, missing only the flags, and with the (old) access
1160  * rights where the control fields are now.
1161  *
1162  * MPSAFE
1163  */
1164 int
1165 orecvmsg(td, uap)
1166 	struct thread *td;
1167 	register struct orecvmsg_args /* {
1168 		int	s;
1169 		struct	omsghdr *msg;
1170 		int	flags;
1171 	} */ *uap;
1172 {
1173 	struct msghdr msg;
1174 	struct iovec aiov[UIO_SMALLIOV], *iov;
1175 	int error;
1176 
1177 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1178 	if (error)
1179 		return (error);
1180 
1181 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1182 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1183 			error = EMSGSIZE;
1184 			goto done2;
1185 		}
1186 		MALLOC(iov, struct iovec *,
1187 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1188 		      M_WAITOK);
1189 	} else {
1190 		iov = aiov;
1191 	}
1192 	msg.msg_flags = uap->flags | MSG_COMPAT;
1193 	error = copyin(msg.msg_iov, iov,
1194 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1195 	if (error)
1196 		goto done;
1197 	msg.msg_iov = iov;
1198 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1199 
1200 	if (msg.msg_controllen && error == 0)
1201 		error = copyout(&msg.msg_controllen,
1202 		    &uap->msg->msg_accrightslen, sizeof (int));
1203 done:
1204 	if (iov != aiov)
1205 		FREE(iov, M_IOV);
1206 done2:
1207 	return (error);
1208 }
1209 #endif
1210 
1211 /*
1212  * MPSAFE
1213  */
1214 int
1215 recvmsg(td, uap)
1216 	struct thread *td;
1217 	register struct recvmsg_args /* {
1218 		int	s;
1219 		struct	msghdr *msg;
1220 		int	flags;
1221 	} */ *uap;
1222 {
1223 	struct msghdr msg;
1224 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1225 	register int error;
1226 
1227 	error = copyin(uap->msg, &msg, sizeof (msg));
1228 	if (error)
1229 		goto done2;
1230 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1231 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1232 			error = EMSGSIZE;
1233 			goto done2;
1234 		}
1235 		MALLOC(iov, struct iovec *,
1236 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1237 		       M_WAITOK);
1238 	} else {
1239 		iov = aiov;
1240 	}
1241 #ifdef COMPAT_OLDSOCK
1242 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1243 #else
1244 	msg.msg_flags = uap->flags;
1245 #endif
1246 	uiov = msg.msg_iov;
1247 	msg.msg_iov = iov;
1248 	error = copyin(uiov, iov,
1249 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1250 	if (error)
1251 		goto done;
1252 	error = recvit(td, uap->s, &msg, NULL);
1253 	if (!error) {
1254 		msg.msg_iov = uiov;
1255 		error = copyout(&msg, uap->msg, sizeof(msg));
1256 	}
1257 done:
1258 	if (iov != aiov)
1259 		FREE(iov, M_IOV);
1260 done2:
1261 	return (error);
1262 }
1263 
1264 /*
1265  * MPSAFE
1266  */
1267 /* ARGSUSED */
1268 int
1269 shutdown(td, uap)
1270 	struct thread *td;
1271 	register struct shutdown_args /* {
1272 		int	s;
1273 		int	how;
1274 	} */ *uap;
1275 {
1276 	struct socket *so;
1277 	int error;
1278 
1279 	mtx_lock(&Giant);
1280 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1281 		error = soshutdown(so, uap->how);
1282 		fputsock(so);
1283 	}
1284 	mtx_unlock(&Giant);
1285 	return(error);
1286 }
1287 
1288 /*
1289  * MPSAFE
1290  */
1291 /* ARGSUSED */
1292 int
1293 setsockopt(td, uap)
1294 	struct thread *td;
1295 	register struct setsockopt_args /* {
1296 		int	s;
1297 		int	level;
1298 		int	name;
1299 		caddr_t	val;
1300 		int	valsize;
1301 	} */ *uap;
1302 {
1303 	struct socket *so;
1304 	struct sockopt sopt;
1305 	int error;
1306 
1307 	if (uap->val == 0 && uap->valsize != 0)
1308 		return (EFAULT);
1309 	if (uap->valsize < 0)
1310 		return (EINVAL);
1311 
1312 	mtx_lock(&Giant);
1313 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1314 		sopt.sopt_dir = SOPT_SET;
1315 		sopt.sopt_level = uap->level;
1316 		sopt.sopt_name = uap->name;
1317 		sopt.sopt_val = uap->val;
1318 		sopt.sopt_valsize = uap->valsize;
1319 		sopt.sopt_td = td;
1320 		error = sosetopt(so, &sopt);
1321 		fputsock(so);
1322 	}
1323 	mtx_unlock(&Giant);
1324 	return(error);
1325 }
1326 
1327 /*
1328  * MPSAFE
1329  */
1330 /* ARGSUSED */
1331 int
1332 getsockopt(td, uap)
1333 	struct thread *td;
1334 	register struct getsockopt_args /* {
1335 		int	s;
1336 		int	level;
1337 		int	name;
1338 		caddr_t	val;
1339 		int	*avalsize;
1340 	} */ *uap;
1341 {
1342 	int	valsize, error;
1343 	struct  socket *so;
1344 	struct	sockopt sopt;
1345 
1346 	mtx_lock(&Giant);
1347 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1348 		goto done2;
1349 	if (uap->val) {
1350 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1351 		if (error)
1352 			goto done1;
1353 		if (valsize < 0) {
1354 			error = EINVAL;
1355 			goto done1;
1356 		}
1357 	} else {
1358 		valsize = 0;
1359 	}
1360 
1361 	sopt.sopt_dir = SOPT_GET;
1362 	sopt.sopt_level = uap->level;
1363 	sopt.sopt_name = uap->name;
1364 	sopt.sopt_val = uap->val;
1365 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1366 	sopt.sopt_td = td;
1367 
1368 	error = sogetopt(so, &sopt);
1369 	if (error == 0) {
1370 		valsize = sopt.sopt_valsize;
1371 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1372 	}
1373 done1:
1374 	fputsock(so);
1375 done2:
1376 	mtx_unlock(&Giant);
1377 	return (error);
1378 }
1379 
1380 /*
1381  * getsockname1() - Get socket name.
1382  *
1383  * MPSAFE
1384  */
1385 /* ARGSUSED */
1386 static int
1387 getsockname1(td, uap, compat)
1388 	struct thread *td;
1389 	register struct getsockname_args /* {
1390 		int	fdes;
1391 		caddr_t	asa;
1392 		int	*alen;
1393 	} */ *uap;
1394 	int compat;
1395 {
1396 	struct socket *so;
1397 	struct sockaddr *sa;
1398 	int len, error;
1399 
1400 	mtx_lock(&Giant);
1401 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1402 		goto done2;
1403 	error = copyin(uap->alen, &len, sizeof (len));
1404 	if (error)
1405 		goto done1;
1406 	if (len < 0) {
1407 		error = EINVAL;
1408 		goto done1;
1409 	}
1410 	sa = 0;
1411 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1412 	if (error)
1413 		goto bad;
1414 	if (sa == 0) {
1415 		len = 0;
1416 		goto gotnothing;
1417 	}
1418 
1419 	len = MIN(len, sa->sa_len);
1420 #ifdef COMPAT_OLDSOCK
1421 	if (compat)
1422 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1423 #endif
1424 	error = copyout(sa, uap->asa, (u_int)len);
1425 	if (error == 0)
1426 gotnothing:
1427 		error = copyout(&len, uap->alen, sizeof (len));
1428 bad:
1429 	if (sa)
1430 		FREE(sa, M_SONAME);
1431 done1:
1432 	fputsock(so);
1433 done2:
1434 	mtx_unlock(&Giant);
1435 	return (error);
1436 }
1437 
1438 /*
1439  * MPSAFE
1440  */
1441 int
1442 getsockname(td, uap)
1443 	struct thread *td;
1444 	struct getsockname_args *uap;
1445 {
1446 
1447 	return (getsockname1(td, uap, 0));
1448 }
1449 
1450 #ifdef COMPAT_OLDSOCK
1451 /*
1452  * MPSAFE
1453  */
1454 int
1455 ogetsockname(td, uap)
1456 	struct thread *td;
1457 	struct getsockname_args *uap;
1458 {
1459 
1460 	return (getsockname1(td, uap, 1));
1461 }
1462 #endif /* COMPAT_OLDSOCK */
1463 
1464 /*
1465  * getpeername1() - Get name of peer for connected socket.
1466  *
1467  * MPSAFE
1468  */
1469 /* ARGSUSED */
1470 static int
1471 getpeername1(td, uap, compat)
1472 	struct thread *td;
1473 	register struct getpeername_args /* {
1474 		int	fdes;
1475 		caddr_t	asa;
1476 		int	*alen;
1477 	} */ *uap;
1478 	int compat;
1479 {
1480 	struct socket *so;
1481 	struct sockaddr *sa;
1482 	int len, error;
1483 
1484 	mtx_lock(&Giant);
1485 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1486 		goto done2;
1487 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1488 		error = ENOTCONN;
1489 		goto done1;
1490 	}
1491 	error = copyin(uap->alen, &len, sizeof (len));
1492 	if (error)
1493 		goto done1;
1494 	if (len < 0) {
1495 		error = EINVAL;
1496 		goto done1;
1497 	}
1498 	sa = 0;
1499 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1500 	if (error)
1501 		goto bad;
1502 	if (sa == 0) {
1503 		len = 0;
1504 		goto gotnothing;
1505 	}
1506 	len = MIN(len, sa->sa_len);
1507 #ifdef COMPAT_OLDSOCK
1508 	if (compat)
1509 		((struct osockaddr *)sa)->sa_family =
1510 		    sa->sa_family;
1511 #endif
1512 	error = copyout(sa, uap->asa, (u_int)len);
1513 	if (error)
1514 		goto bad;
1515 gotnothing:
1516 	error = copyout(&len, uap->alen, sizeof (len));
1517 bad:
1518 	if (sa)
1519 		FREE(sa, M_SONAME);
1520 done1:
1521 	fputsock(so);
1522 done2:
1523 	mtx_unlock(&Giant);
1524 	return (error);
1525 }
1526 
1527 /*
1528  * MPSAFE
1529  */
1530 int
1531 getpeername(td, uap)
1532 	struct thread *td;
1533 	struct getpeername_args *uap;
1534 {
1535 
1536 	return (getpeername1(td, uap, 0));
1537 }
1538 
1539 #ifdef COMPAT_OLDSOCK
1540 /*
1541  * MPSAFE
1542  */
1543 int
1544 ogetpeername(td, uap)
1545 	struct thread *td;
1546 	struct ogetpeername_args *uap;
1547 {
1548 
1549 	/* XXX uap should have type `getpeername_args *' to begin with. */
1550 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1551 }
1552 #endif /* COMPAT_OLDSOCK */
1553 
1554 int
1555 sockargs(mp, buf, buflen, type)
1556 	struct mbuf **mp;
1557 	caddr_t buf;
1558 	int buflen, type;
1559 {
1560 	register struct sockaddr *sa;
1561 	register struct mbuf *m;
1562 	int error;
1563 
1564 	if ((u_int)buflen > MLEN) {
1565 #ifdef COMPAT_OLDSOCK
1566 		if (type == MT_SONAME && (u_int)buflen <= 112)
1567 			buflen = MLEN;		/* unix domain compat. hack */
1568 		else
1569 #endif
1570 		return (EINVAL);
1571 	}
1572 	m = m_get(M_TRYWAIT, type);
1573 	if (m == NULL)
1574 		return (ENOBUFS);
1575 	m->m_len = buflen;
1576 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1577 	if (error)
1578 		(void) m_free(m);
1579 	else {
1580 		*mp = m;
1581 		if (type == MT_SONAME) {
1582 			sa = mtod(m, struct sockaddr *);
1583 
1584 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1585 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1586 				sa->sa_family = sa->sa_len;
1587 #endif
1588 			sa->sa_len = buflen;
1589 		}
1590 	}
1591 	return (error);
1592 }
1593 
1594 int
1595 getsockaddr(namp, uaddr, len)
1596 	struct sockaddr **namp;
1597 	caddr_t uaddr;
1598 	size_t len;
1599 {
1600 	struct sockaddr *sa;
1601 	int error;
1602 
1603 	if (len > SOCK_MAXADDRLEN)
1604 		return ENAMETOOLONG;
1605 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1606 	error = copyin(uaddr, sa, len);
1607 	if (error) {
1608 		FREE(sa, M_SONAME);
1609 	} else {
1610 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1611 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1612 			sa->sa_family = sa->sa_len;
1613 #endif
1614 		sa->sa_len = len;
1615 		*namp = sa;
1616 	}
1617 	return error;
1618 }
1619 
1620 /*
1621  * sendfile(2)
1622  *
1623  * MPSAFE
1624  *
1625  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1626  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1627  *
1628  * Send a file specified by 'fd' and starting at 'offset' to a socket
1629  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1630  * nbytes == 0. Optionally add a header and/or trailer to the socket
1631  * output. If specified, write the total number of bytes sent into *sbytes.
1632  *
1633  */
1634 int
1635 sendfile(struct thread *td, struct sendfile_args *uap)
1636 {
1637 
1638 	return (do_sendfile(td, uap, 0));
1639 }
1640 
1641 #ifdef COMPAT_FREEBSD4
1642 int
1643 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1644 {
1645 	struct sendfile_args args;
1646 
1647 	args.fd = uap->fd;
1648 	args.s = uap->s;
1649 	args.offset = uap->offset;
1650 	args.nbytes = uap->nbytes;
1651 	args.hdtr = uap->hdtr;
1652 	args.sbytes = uap->sbytes;
1653 	args.flags = uap->flags;
1654 
1655 	return (do_sendfile(td, &args, 1));
1656 }
1657 #endif /* COMPAT_FREEBSD4 */
1658 
1659 static int
1660 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1661 {
1662 	struct vnode *vp;
1663 	struct vm_object *obj;
1664 	struct socket *so = NULL;
1665 	struct mbuf *m;
1666 	struct sf_buf *sf;
1667 	struct vm_page *pg;
1668 	struct writev_args nuap;
1669 	struct sf_hdtr hdtr;
1670 	off_t off, xfsize, hdtr_size, sbytes = 0;
1671 	int error, s;
1672 
1673 	mtx_lock(&Giant);
1674 
1675 	hdtr_size = 0;
1676 
1677 	/*
1678 	 * The descriptor must be a regular file and have a backing VM object.
1679 	 */
1680 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1681 		goto done;
1682 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1683 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1684 		error = EINVAL;
1685 		VOP_UNLOCK(vp, 0, td);
1686 		goto done;
1687 	}
1688 	VOP_UNLOCK(vp, 0, td);
1689 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1690 		goto done;
1691 	if (so->so_type != SOCK_STREAM) {
1692 		error = EINVAL;
1693 		goto done;
1694 	}
1695 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1696 		error = ENOTCONN;
1697 		goto done;
1698 	}
1699 	if (uap->offset < 0) {
1700 		error = EINVAL;
1701 		goto done;
1702 	}
1703 
1704 #ifdef MAC
1705 	error = mac_check_socket_send(td->td_ucred, so);
1706 	if (error)
1707 		goto done;
1708 #endif
1709 
1710 	/*
1711 	 * If specified, get the pointer to the sf_hdtr struct for
1712 	 * any headers/trailers.
1713 	 */
1714 	if (uap->hdtr != NULL) {
1715 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1716 		if (error)
1717 			goto done;
1718 		/*
1719 		 * Send any headers. Wimp out and use writev(2).
1720 		 */
1721 		if (hdtr.headers != NULL) {
1722 			nuap.fd = uap->s;
1723 			nuap.iovp = hdtr.headers;
1724 			nuap.iovcnt = hdtr.hdr_cnt;
1725 			error = writev(td, &nuap);
1726 			if (error)
1727 				goto done;
1728 			if (compat)
1729 				sbytes += td->td_retval[0];
1730 			else
1731 				hdtr_size += td->td_retval[0];
1732 		}
1733 	}
1734 
1735 	/*
1736 	 * Protect against multiple writers to the socket.
1737 	 */
1738 	(void) sblock(&so->so_snd, M_WAITOK);
1739 
1740 	/*
1741 	 * Loop through the pages in the file, starting with the requested
1742 	 * offset. Get a file page (do I/O if necessary), map the file page
1743 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1744 	 * it on the socket.
1745 	 */
1746 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1747 		vm_pindex_t pindex;
1748 		vm_offset_t pgoff;
1749 
1750 		pindex = OFF_TO_IDX(off);
1751 		VM_OBJECT_LOCK(obj);
1752 retry_lookup:
1753 		/*
1754 		 * Calculate the amount to transfer. Not to exceed a page,
1755 		 * the EOF, or the passed in nbytes.
1756 		 */
1757 		xfsize = obj->un_pager.vnp.vnp_size - off;
1758 		VM_OBJECT_UNLOCK(obj);
1759 		if (xfsize > PAGE_SIZE)
1760 			xfsize = PAGE_SIZE;
1761 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1762 		if (PAGE_SIZE - pgoff < xfsize)
1763 			xfsize = PAGE_SIZE - pgoff;
1764 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1765 			xfsize = uap->nbytes - sbytes;
1766 		if (xfsize <= 0)
1767 			break;
1768 		/*
1769 		 * Optimize the non-blocking case by looking at the socket space
1770 		 * before going to the extra work of constituting the sf_buf.
1771 		 */
1772 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1773 			if (so->so_state & SS_CANTSENDMORE)
1774 				error = EPIPE;
1775 			else
1776 				error = EAGAIN;
1777 			sbunlock(&so->so_snd);
1778 			goto done;
1779 		}
1780 		VM_OBJECT_LOCK(obj);
1781 		/*
1782 		 * Attempt to look up the page.
1783 		 *
1784 		 *	Allocate if not found
1785 		 *
1786 		 *	Wait and loop if busy.
1787 		 */
1788 		pg = vm_page_lookup(obj, pindex);
1789 
1790 		if (pg == NULL) {
1791 			pg = vm_page_alloc(obj, pindex,
1792 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1793 			if (pg == NULL) {
1794 				VM_OBJECT_UNLOCK(obj);
1795 				VM_WAIT;
1796 				VM_OBJECT_LOCK(obj);
1797 				goto retry_lookup;
1798 			}
1799 			vm_page_lock_queues();
1800 			vm_page_wakeup(pg);
1801 		} else {
1802 			vm_page_lock_queues();
1803 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1804 				goto retry_lookup;
1805 			/*
1806 		 	 * Wire the page so it does not get ripped out from
1807 			 * under us.
1808 			 */
1809 			vm_page_wire(pg);
1810 		}
1811 
1812 		/*
1813 		 * If page is not valid for what we need, initiate I/O
1814 		 */
1815 
1816 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1817 			int bsize, resid;
1818 
1819 			/*
1820 			 * Ensure that our page is still around when the I/O
1821 			 * completes.
1822 			 */
1823 			vm_page_io_start(pg);
1824 			vm_page_unlock_queues();
1825 			VM_OBJECT_UNLOCK(obj);
1826 
1827 			/*
1828 			 * Get the page from backing store.
1829 			 */
1830 			bsize = vp->v_mount->mnt_stat.f_iosize;
1831 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1832 			/*
1833 			 * XXXMAC: Because we don't have fp->f_cred here,
1834 			 * we pass in NOCRED.  This is probably wrong, but
1835 			 * is consistent with our original implementation.
1836 			 */
1837 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1838 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1839 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1840 			    td->td_ucred, NOCRED, &resid, td);
1841 			VOP_UNLOCK(vp, 0, td);
1842 			if (error)
1843 				VM_OBJECT_LOCK(obj);
1844 			vm_page_lock_queues();
1845 			vm_page_flag_clear(pg, PG_ZERO);
1846 			vm_page_io_finish(pg);
1847 			if (error) {
1848 				vm_page_unwire(pg, 0);
1849 				/*
1850 				 * See if anyone else might know about this page.
1851 				 * If not and it is not valid, then free it.
1852 				 */
1853 				if (pg->wire_count == 0 && pg->valid == 0 &&
1854 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1855 				    pg->hold_count == 0) {
1856 					vm_page_busy(pg);
1857 					vm_page_free(pg);
1858 				}
1859 				vm_page_unlock_queues();
1860 				VM_OBJECT_UNLOCK(obj);
1861 				sbunlock(&so->so_snd);
1862 				goto done;
1863 			}
1864 		} else
1865 			VM_OBJECT_UNLOCK(obj);
1866 		vm_page_unlock_queues();
1867 
1868 		/*
1869 		 * Get a sendfile buf. We usually wait as long as necessary,
1870 		 * but this wait can be interrupted.
1871 		 */
1872 		if ((sf = sf_buf_alloc(pg)) == NULL) {
1873 			vm_page_lock_queues();
1874 			vm_page_unwire(pg, 0);
1875 			if (pg->wire_count == 0 && pg->object == NULL)
1876 				vm_page_free(pg);
1877 			vm_page_unlock_queues();
1878 			sbunlock(&so->so_snd);
1879 			error = EINTR;
1880 			goto done;
1881 		}
1882 
1883 		/*
1884 		 * Get an mbuf header and set it up as having external storage.
1885 		 */
1886 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1887 		if (m == NULL) {
1888 			error = ENOBUFS;
1889 			sf_buf_free((void *)sf->kva, sf);
1890 			sbunlock(&so->so_snd);
1891 			goto done;
1892 		}
1893 		/*
1894 		 * Setup external storage for mbuf.
1895 		 */
1896 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY,
1897 		    EXT_SFBUF);
1898 		m->m_data = (char *) sf->kva + pgoff;
1899 		m->m_pkthdr.len = m->m_len = xfsize;
1900 		/*
1901 		 * Add the buffer to the socket buffer chain.
1902 		 */
1903 		s = splnet();
1904 retry_space:
1905 		/*
1906 		 * Make sure that the socket is still able to take more data.
1907 		 * CANTSENDMORE being true usually means that the connection
1908 		 * was closed. so_error is true when an error was sensed after
1909 		 * a previous send.
1910 		 * The state is checked after the page mapping and buffer
1911 		 * allocation above since those operations may block and make
1912 		 * any socket checks stale. From this point forward, nothing
1913 		 * blocks before the pru_send (or more accurately, any blocking
1914 		 * results in a loop back to here to re-check).
1915 		 */
1916 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1917 			if (so->so_state & SS_CANTSENDMORE) {
1918 				error = EPIPE;
1919 			} else {
1920 				error = so->so_error;
1921 				so->so_error = 0;
1922 			}
1923 			m_freem(m);
1924 			sbunlock(&so->so_snd);
1925 			splx(s);
1926 			goto done;
1927 		}
1928 		/*
1929 		 * Wait for socket space to become available. We do this just
1930 		 * after checking the connection state above in order to avoid
1931 		 * a race condition with sbwait().
1932 		 */
1933 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1934 			if (so->so_state & SS_NBIO) {
1935 				m_freem(m);
1936 				sbunlock(&so->so_snd);
1937 				splx(s);
1938 				error = EAGAIN;
1939 				goto done;
1940 			}
1941 			error = sbwait(&so->so_snd);
1942 			/*
1943 			 * An error from sbwait usually indicates that we've
1944 			 * been interrupted by a signal. If we've sent anything
1945 			 * then return bytes sent, otherwise return the error.
1946 			 */
1947 			if (error) {
1948 				m_freem(m);
1949 				sbunlock(&so->so_snd);
1950 				splx(s);
1951 				goto done;
1952 			}
1953 			goto retry_space;
1954 		}
1955 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1956 		splx(s);
1957 		if (error) {
1958 			sbunlock(&so->so_snd);
1959 			goto done;
1960 		}
1961 	}
1962 	sbunlock(&so->so_snd);
1963 
1964 	/*
1965 	 * Send trailers. Wimp out and use writev(2).
1966 	 */
1967 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1968 			nuap.fd = uap->s;
1969 			nuap.iovp = hdtr.trailers;
1970 			nuap.iovcnt = hdtr.trl_cnt;
1971 			error = writev(td, &nuap);
1972 			if (error)
1973 				goto done;
1974 			if (compat)
1975 				sbytes += td->td_retval[0];
1976 			else
1977 				hdtr_size += td->td_retval[0];
1978 	}
1979 
1980 done:
1981 	/*
1982 	 * If there was no error we have to clear td->td_retval[0]
1983 	 * because it may have been set by writev.
1984 	 */
1985 	if (error == 0) {
1986 		td->td_retval[0] = 0;
1987 	}
1988 	if (uap->sbytes != NULL) {
1989 		if (!compat)
1990 			sbytes += hdtr_size;
1991 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
1992 	}
1993 	if (vp)
1994 		vrele(vp);
1995 	if (so)
1996 		fputsock(so);
1997 	mtx_unlock(&Giant);
1998 	return (error);
1999 }
2000