xref: /freebsd/sys/kern/uipc_syscalls.c (revision 2357939bc239bd5334a169b62313806178dd8f30)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/uio.h>
65 #include <sys/vnode.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 
70 #include <vm/vm.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_pageout.h>
74 #include <vm/vm_kern.h>
75 #include <vm/vm_extern.h>
76 
77 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
78 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
79 
80 static int accept1(struct thread *td, struct accept_args *uap, int compat);
81 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
82 static int getsockname1(struct thread *td, struct getsockname_args *uap,
83 			int compat);
84 static int getpeername1(struct thread *td, struct getpeername_args *uap,
85 			int compat);
86 
87 /*
88  * System call interface to the socket abstraction.
89  */
90 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
91 #define COMPAT_OLDSOCK
92 #endif
93 
94 /*
95  * MPSAFE
96  */
97 int
98 socket(td, uap)
99 	struct thread *td;
100 	register struct socket_args /* {
101 		int	domain;
102 		int	type;
103 		int	protocol;
104 	} */ *uap;
105 {
106 	struct filedesc *fdp;
107 	struct socket *so;
108 	struct file *fp;
109 	int fd, error;
110 
111 	fdp = td->td_proc->p_fd;
112 	error = falloc(td, &fp, &fd);
113 	if (error)
114 		return (error);
115 	/* An extra reference on `fp' has been held for us by falloc(). */
116 	NET_LOCK_GIANT();
117 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
118 	    td->td_ucred, td);
119 	NET_UNLOCK_GIANT();
120 	FILEDESC_LOCK(fdp);
121 	if (error) {
122 		if (fdp->fd_ofiles[fd] == fp) {
123 			fdp->fd_ofiles[fd] = NULL;
124 			fdunused(fdp, fd);
125 			FILEDESC_UNLOCK(fdp);
126 			fdrop(fp, td);
127 		} else {
128 			FILEDESC_UNLOCK(fdp);
129 		}
130 	} else {
131 		fp->f_data = so;	/* already has ref count */
132 		fp->f_flag = FREAD|FWRITE;
133 		fp->f_ops = &socketops;
134 		fp->f_type = DTYPE_SOCKET;
135 		FILEDESC_UNLOCK(fdp);
136 		td->td_retval[0] = fd;
137 	}
138 	fdrop(fp, td);
139 	return (error);
140 }
141 
142 /*
143  * MPSAFE
144  */
145 /* ARGSUSED */
146 int
147 bind(td, uap)
148 	struct thread *td;
149 	register struct bind_args /* {
150 		int	s;
151 		caddr_t	name;
152 		int	namelen;
153 	} */ *uap;
154 {
155 	struct sockaddr *sa;
156 	int error;
157 
158 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
159 		return (error);
160 
161 	return (kern_bind(td, uap->s, sa));
162 }
163 
164 int
165 kern_bind(td, fd, sa)
166 	struct thread *td;
167 	int fd;
168 	struct sockaddr *sa;
169 {
170 	struct socket *so;
171 	int error;
172 
173 	NET_LOCK_GIANT();
174 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
175 		goto done2;
176 #ifdef MAC
177 	error = mac_check_socket_bind(td->td_ucred, so, sa);
178 	if (error)
179 		goto done1;
180 #endif
181 	error = sobind(so, sa, td);
182 #ifdef MAC
183 done1:
184 #endif
185 	fputsock(so);
186 done2:
187 	NET_UNLOCK_GIANT();
188 	FREE(sa, M_SONAME);
189 	return (error);
190 }
191 
192 /*
193  * MPSAFE
194  */
195 /* ARGSUSED */
196 int
197 listen(td, uap)
198 	struct thread *td;
199 	register struct listen_args /* {
200 		int	s;
201 		int	backlog;
202 	} */ *uap;
203 {
204 	struct socket *so;
205 	int error;
206 
207 	NET_LOCK_GIANT();
208 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
209 #ifdef MAC
210 		error = mac_check_socket_listen(td->td_ucred, so);
211 		if (error)
212 			goto done;
213 #endif
214 		error = solisten(so, uap->backlog, td);
215 #ifdef MAC
216 done:
217 #endif
218 		fputsock(so);
219 	}
220 	NET_UNLOCK_GIANT();
221 	return(error);
222 }
223 
224 /*
225  * accept1()
226  * MPSAFE
227  */
228 static int
229 accept1(td, uap, compat)
230 	struct thread *td;
231 	register struct accept_args /* {
232 		int	s;
233 		struct sockaddr	* __restrict name;
234 		socklen_t	* __restrict anamelen;
235 	} */ *uap;
236 	int compat;
237 {
238 	struct filedesc *fdp;
239 	struct file *nfp = NULL;
240 	struct sockaddr *sa;
241 	socklen_t namelen;
242 	int error, s;
243 	struct socket *head, *so;
244 	int fd;
245 	u_int fflag;
246 	pid_t pgid;
247 	int tmp;
248 
249 	fdp = td->td_proc->p_fd;
250 	if (uap->name) {
251 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
252 		if(error)
253 			goto done3;
254 		if (namelen < 0) {
255 			error = EINVAL;
256 			goto done3;
257 		}
258 	}
259 	NET_LOCK_GIANT();
260 	error = fgetsock(td, uap->s, &head, &fflag);
261 	if (error)
262 		goto done2;
263 	s = splnet();
264 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
265 		splx(s);
266 		error = EINVAL;
267 		goto done;
268 	}
269 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
270 		if (head->so_state & SS_CANTRCVMORE) {
271 			head->so_error = ECONNABORTED;
272 			break;
273 		}
274 		if ((head->so_state & SS_NBIO) != 0) {
275 			head->so_error = EWOULDBLOCK;
276 			break;
277 		}
278 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
279 		    "accept", 0);
280 		if (error) {
281 			splx(s);
282 			goto done;
283 		}
284 	}
285 	if (head->so_error) {
286 		error = head->so_error;
287 		head->so_error = 0;
288 		splx(s);
289 		goto done;
290 	}
291 
292 	/*
293 	 * At this point we know that there is at least one connection
294 	 * ready to be accepted. Remove it from the queue prior to
295 	 * allocating the file descriptor for it since falloc() may
296 	 * block allowing another process to accept the connection
297 	 * instead.
298 	 */
299 	so = TAILQ_FIRST(&head->so_comp);
300 	TAILQ_REMOVE(&head->so_comp, so, so_list);
301 	head->so_qlen--;
302 
303 	error = falloc(td, &nfp, &fd);
304 	if (error) {
305 		/*
306 		 * Probably ran out of file descriptors. Put the
307 		 * unaccepted connection back onto the queue and
308 		 * do another wakeup so some other process might
309 		 * have a chance at it.
310 		 */
311 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
312 		head->so_qlen++;
313 		wakeup_one(&head->so_timeo);
314 		splx(s);
315 		goto done;
316 	}
317 	/* An extra reference on `nfp' has been held for us by falloc(). */
318 	td->td_retval[0] = fd;
319 
320 	/* connection has been removed from the listen queue */
321 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
322 
323 	so->so_state &= ~SS_COMP;
324 	so->so_head = NULL;
325 	pgid = fgetown(&head->so_sigio);
326 	if (pgid != 0)
327 		fsetown(pgid, &so->so_sigio);
328 
329 	FILE_LOCK(nfp);
330 	soref(so);			/* file descriptor reference */
331 	nfp->f_data = so;	/* nfp has ref count from falloc */
332 	nfp->f_flag = fflag;
333 	nfp->f_ops = &socketops;
334 	nfp->f_type = DTYPE_SOCKET;
335 	FILE_UNLOCK(nfp);
336 	/* Sync socket nonblocking/async state with file flags */
337 	tmp = fflag & FNONBLOCK;
338 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
339 	tmp = fflag & FASYNC;
340 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
341 	sa = 0;
342 	error = soaccept(so, &sa);
343 	if (error) {
344 		/*
345 		 * return a namelen of zero for older code which might
346 		 * ignore the return value from accept.
347 		 */
348 		if (uap->name != NULL) {
349 			namelen = 0;
350 			(void) copyout(&namelen,
351 			    uap->anamelen, sizeof(*uap->anamelen));
352 		}
353 		goto noconnection;
354 	}
355 	if (sa == NULL) {
356 		namelen = 0;
357 		if (uap->name)
358 			goto gotnoname;
359 		splx(s);
360 		error = 0;
361 		goto done;
362 	}
363 	if (uap->name) {
364 		/* check sa_len before it is destroyed */
365 		if (namelen > sa->sa_len)
366 			namelen = sa->sa_len;
367 #ifdef COMPAT_OLDSOCK
368 		if (compat)
369 			((struct osockaddr *)sa)->sa_family =
370 			    sa->sa_family;
371 #endif
372 		error = copyout(sa, uap->name, (u_int)namelen);
373 		if (!error)
374 gotnoname:
375 			error = copyout(&namelen,
376 			    uap->anamelen, sizeof (*uap->anamelen));
377 	}
378 noconnection:
379 	if (sa)
380 		FREE(sa, M_SONAME);
381 
382 	/*
383 	 * close the new descriptor, assuming someone hasn't ripped it
384 	 * out from under us.
385 	 */
386 	if (error) {
387 		FILEDESC_LOCK(fdp);
388 		if (fdp->fd_ofiles[fd] == nfp) {
389 			fdp->fd_ofiles[fd] = NULL;
390 			fdunused(fdp, fd);
391 			FILEDESC_UNLOCK(fdp);
392 			fdrop(nfp, td);
393 		} else {
394 			FILEDESC_UNLOCK(fdp);
395 		}
396 	}
397 	splx(s);
398 
399 	/*
400 	 * Release explicitly held references before returning.
401 	 */
402 done:
403 	if (nfp != NULL)
404 		fdrop(nfp, td);
405 	fputsock(head);
406 done2:
407 	NET_UNLOCK_GIANT();
408 done3:
409 	return (error);
410 }
411 
412 /*
413  * MPSAFE (accept1() is MPSAFE)
414  */
415 int
416 accept(td, uap)
417 	struct thread *td;
418 	struct accept_args *uap;
419 {
420 
421 	return (accept1(td, uap, 0));
422 }
423 
424 #ifdef COMPAT_OLDSOCK
425 /*
426  * MPSAFE (accept1() is MPSAFE)
427  */
428 int
429 oaccept(td, uap)
430 	struct thread *td;
431 	struct accept_args *uap;
432 {
433 
434 	return (accept1(td, uap, 1));
435 }
436 #endif /* COMPAT_OLDSOCK */
437 
438 /*
439  * MPSAFE
440  */
441 /* ARGSUSED */
442 int
443 connect(td, uap)
444 	struct thread *td;
445 	register struct connect_args /* {
446 		int	s;
447 		caddr_t	name;
448 		int	namelen;
449 	} */ *uap;
450 {
451 	struct sockaddr *sa;
452 	int error;
453 
454 	error = getsockaddr(&sa, uap->name, uap->namelen);
455 	if (error)
456 		return (error);
457 
458 	return (kern_connect(td, uap->s, sa));
459 }
460 
461 
462 int
463 kern_connect(td, fd, sa)
464 	struct thread *td;
465 	int fd;
466 	struct sockaddr *sa;
467 {
468 	struct socket *so;
469 	int error, s;
470 	int interrupted = 0;
471 
472 	NET_LOCK_GIANT();
473 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
474 		goto done2;
475 	if (so->so_state & SS_ISCONNECTING) {
476 		error = EALREADY;
477 		goto done1;
478 	}
479 #ifdef MAC
480 	error = mac_check_socket_connect(td->td_ucred, so, sa);
481 	if (error)
482 		goto bad;
483 #endif
484 	error = soconnect(so, sa, td);
485 	if (error)
486 		goto bad;
487 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
488 		error = EINPROGRESS;
489 		goto done1;
490 	}
491 	s = splnet();
492 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
493 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
494 		if (error) {
495 			if (error == EINTR || error == ERESTART)
496 				interrupted = 1;
497 			break;
498 		}
499 	}
500 	if (error == 0) {
501 		error = so->so_error;
502 		so->so_error = 0;
503 	}
504 	splx(s);
505 bad:
506 	if (!interrupted)
507 		so->so_state &= ~SS_ISCONNECTING;
508 	if (error == ERESTART)
509 		error = EINTR;
510 done1:
511 	fputsock(so);
512 done2:
513 	NET_UNLOCK_GIANT();
514 	FREE(sa, M_SONAME);
515 	return (error);
516 }
517 
518 /*
519  * MPSAFE
520  */
521 int
522 socketpair(td, uap)
523 	struct thread *td;
524 	register struct socketpair_args /* {
525 		int	domain;
526 		int	type;
527 		int	protocol;
528 		int	*rsv;
529 	} */ *uap;
530 {
531 	register struct filedesc *fdp = td->td_proc->p_fd;
532 	struct file *fp1, *fp2;
533 	struct socket *so1, *so2;
534 	int fd, error, sv[2];
535 
536 	NET_LOCK_GIANT();
537 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
538 	    td->td_ucred, td);
539 	if (error)
540 		goto done2;
541 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
542 	    td->td_ucred, td);
543 	if (error)
544 		goto free1;
545 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
546 	error = falloc(td, &fp1, &fd);
547 	if (error)
548 		goto free2;
549 	sv[0] = fd;
550 	fp1->f_data = so1;	/* so1 already has ref count */
551 	error = falloc(td, &fp2, &fd);
552 	if (error)
553 		goto free3;
554 	fp2->f_data = so2;	/* so2 already has ref count */
555 	sv[1] = fd;
556 	error = soconnect2(so1, so2);
557 	if (error)
558 		goto free4;
559 	if (uap->type == SOCK_DGRAM) {
560 		/*
561 		 * Datagram socket connection is asymmetric.
562 		 */
563 		 error = soconnect2(so2, so1);
564 		 if (error)
565 			goto free4;
566 	}
567 	FILE_LOCK(fp1);
568 	fp1->f_flag = FREAD|FWRITE;
569 	fp1->f_ops = &socketops;
570 	fp1->f_type = DTYPE_SOCKET;
571 	FILE_UNLOCK(fp1);
572 	FILE_LOCK(fp2);
573 	fp2->f_flag = FREAD|FWRITE;
574 	fp2->f_ops = &socketops;
575 	fp2->f_type = DTYPE_SOCKET;
576 	FILE_UNLOCK(fp2);
577 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
578 	fdrop(fp1, td);
579 	fdrop(fp2, td);
580 	goto done2;
581 free4:
582 	FILEDESC_LOCK(fdp);
583 	if (fdp->fd_ofiles[sv[1]] == fp2) {
584 		fdp->fd_ofiles[sv[1]] = NULL;
585 		fdunused(fdp, sv[1]);
586 		FILEDESC_UNLOCK(fdp);
587 		fdrop(fp2, td);
588 	} else {
589 		FILEDESC_UNLOCK(fdp);
590 	}
591 	fdrop(fp2, td);
592 free3:
593 	FILEDESC_LOCK(fdp);
594 	if (fdp->fd_ofiles[sv[0]] == fp1) {
595 		fdp->fd_ofiles[sv[0]] = NULL;
596 		fdunused(fdp, sv[0]);
597 		FILEDESC_UNLOCK(fdp);
598 		fdrop(fp1, td);
599 	} else {
600 		FILEDESC_UNLOCK(fdp);
601 	}
602 	fdrop(fp1, td);
603 free2:
604 	(void)soclose(so2);
605 free1:
606 	(void)soclose(so1);
607 done2:
608 	NET_UNLOCK_GIANT();
609 	return (error);
610 }
611 
612 static int
613 sendit(td, s, mp, flags)
614 	register struct thread *td;
615 	int s;
616 	register struct msghdr *mp;
617 	int flags;
618 {
619 	struct mbuf *control;
620 	struct sockaddr *to;
621 	int error;
622 
623 	if (mp->msg_name != NULL) {
624 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
625 		if (error) {
626 			to = NULL;
627 			goto bad;
628 		}
629 		mp->msg_name = to;
630 	} else {
631 		to = NULL;
632 	}
633 
634 	if (mp->msg_control) {
635 		if (mp->msg_controllen < sizeof(struct cmsghdr)
636 #ifdef COMPAT_OLDSOCK
637 		    && mp->msg_flags != MSG_COMPAT
638 #endif
639 		) {
640 			error = EINVAL;
641 			goto bad;
642 		}
643 		error = sockargs(&control, mp->msg_control,
644 		    mp->msg_controllen, MT_CONTROL);
645 		if (error)
646 			goto bad;
647 #ifdef COMPAT_OLDSOCK
648 		if (mp->msg_flags == MSG_COMPAT) {
649 			register struct cmsghdr *cm;
650 
651 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
652 			if (control == 0) {
653 				error = ENOBUFS;
654 				goto bad;
655 			} else {
656 				cm = mtod(control, struct cmsghdr *);
657 				cm->cmsg_len = control->m_len;
658 				cm->cmsg_level = SOL_SOCKET;
659 				cm->cmsg_type = SCM_RIGHTS;
660 			}
661 		}
662 #endif
663 	} else {
664 		control = NULL;
665 	}
666 
667 	error = kern_sendit(td, s, mp, flags, control);
668 
669 bad:
670 	if (to)
671 		FREE(to, M_SONAME);
672 	return (error);
673 }
674 
675 int
676 kern_sendit(td, s, mp, flags, control)
677 	struct thread *td;
678 	int s;
679 	struct msghdr *mp;
680 	int flags;
681 	struct mbuf *control;
682 {
683 	struct uio auio;
684 	struct iovec *iov;
685 	struct socket *so;
686 	int i;
687 	int len, error;
688 #ifdef KTRACE
689 	struct iovec *ktriov = NULL;
690 	struct uio ktruio;
691 	int iovlen;
692 #endif
693 
694 	NET_LOCK_GIANT();
695 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
696 		goto bad2;
697 
698 #ifdef MAC
699 	error = mac_check_socket_send(td->td_ucred, so);
700 	if (error)
701 		goto bad;
702 #endif
703 
704 	auio.uio_iov = mp->msg_iov;
705 	auio.uio_iovcnt = mp->msg_iovlen;
706 	auio.uio_segflg = UIO_USERSPACE;
707 	auio.uio_rw = UIO_WRITE;
708 	auio.uio_td = td;
709 	auio.uio_offset = 0;			/* XXX */
710 	auio.uio_resid = 0;
711 	iov = mp->msg_iov;
712 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
713 		if ((auio.uio_resid += iov->iov_len) < 0) {
714 			error = EINVAL;
715 			goto bad;
716 		}
717 	}
718 #ifdef KTRACE
719 	if (KTRPOINT(td, KTR_GENIO)) {
720 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
721 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
722 		bcopy(auio.uio_iov, ktriov, iovlen);
723 		ktruio = auio;
724 	}
725 #endif
726 	len = auio.uio_resid;
727 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
728 	    0, control, flags, td);
729 	if (error) {
730 		if (auio.uio_resid != len && (error == ERESTART ||
731 		    error == EINTR || error == EWOULDBLOCK))
732 			error = 0;
733 		/* Generation of SIGPIPE can be controlled per socket */
734 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
735 			PROC_LOCK(td->td_proc);
736 			psignal(td->td_proc, SIGPIPE);
737 			PROC_UNLOCK(td->td_proc);
738 		}
739 	}
740 	if (error == 0)
741 		td->td_retval[0] = len - auio.uio_resid;
742 #ifdef KTRACE
743 	if (ktriov != NULL) {
744 		if (error == 0) {
745 			ktruio.uio_iov = ktriov;
746 			ktruio.uio_resid = td->td_retval[0];
747 			ktrgenio(s, UIO_WRITE, &ktruio, error);
748 		}
749 		FREE(ktriov, M_TEMP);
750 	}
751 #endif
752 bad:
753 	fputsock(so);
754 bad2:
755 	NET_UNLOCK_GIANT();
756 	return (error);
757 }
758 
759 /*
760  * MPSAFE
761  */
762 int
763 sendto(td, uap)
764 	struct thread *td;
765 	register struct sendto_args /* {
766 		int	s;
767 		caddr_t	buf;
768 		size_t	len;
769 		int	flags;
770 		caddr_t	to;
771 		int	tolen;
772 	} */ *uap;
773 {
774 	struct msghdr msg;
775 	struct iovec aiov;
776 	int error;
777 
778 	msg.msg_name = uap->to;
779 	msg.msg_namelen = uap->tolen;
780 	msg.msg_iov = &aiov;
781 	msg.msg_iovlen = 1;
782 	msg.msg_control = 0;
783 #ifdef COMPAT_OLDSOCK
784 	msg.msg_flags = 0;
785 #endif
786 	aiov.iov_base = uap->buf;
787 	aiov.iov_len = uap->len;
788 	error = sendit(td, uap->s, &msg, uap->flags);
789 	return (error);
790 }
791 
792 #ifdef COMPAT_OLDSOCK
793 /*
794  * MPSAFE
795  */
796 int
797 osend(td, uap)
798 	struct thread *td;
799 	register struct osend_args /* {
800 		int	s;
801 		caddr_t	buf;
802 		int	len;
803 		int	flags;
804 	} */ *uap;
805 {
806 	struct msghdr msg;
807 	struct iovec aiov;
808 	int error;
809 
810 	msg.msg_name = 0;
811 	msg.msg_namelen = 0;
812 	msg.msg_iov = &aiov;
813 	msg.msg_iovlen = 1;
814 	aiov.iov_base = uap->buf;
815 	aiov.iov_len = uap->len;
816 	msg.msg_control = 0;
817 	msg.msg_flags = 0;
818 	error = sendit(td, uap->s, &msg, uap->flags);
819 	return (error);
820 }
821 
822 /*
823  * MPSAFE
824  */
825 int
826 osendmsg(td, uap)
827 	struct thread *td;
828 	register struct osendmsg_args /* {
829 		int	s;
830 		caddr_t	msg;
831 		int	flags;
832 	} */ *uap;
833 {
834 	struct msghdr msg;
835 	struct iovec aiov[UIO_SMALLIOV], *iov;
836 	int error;
837 
838 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
839 	if (error)
840 		goto done2;
841 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
842 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
843 			error = EMSGSIZE;
844 			goto done2;
845 		}
846 		MALLOC(iov, struct iovec *,
847 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
848 		      M_WAITOK);
849 	} else {
850 		iov = aiov;
851 	}
852 	error = copyin(msg.msg_iov, iov,
853 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
854 	if (error)
855 		goto done;
856 	msg.msg_flags = MSG_COMPAT;
857 	msg.msg_iov = iov;
858 	error = sendit(td, uap->s, &msg, uap->flags);
859 done:
860 	if (iov != aiov)
861 		FREE(iov, M_IOV);
862 done2:
863 	return (error);
864 }
865 #endif
866 
867 /*
868  * MPSAFE
869  */
870 int
871 sendmsg(td, uap)
872 	struct thread *td;
873 	register struct sendmsg_args /* {
874 		int	s;
875 		caddr_t	msg;
876 		int	flags;
877 	} */ *uap;
878 {
879 	struct msghdr msg;
880 	struct iovec aiov[UIO_SMALLIOV], *iov;
881 	int error;
882 
883 	error = copyin(uap->msg, &msg, sizeof (msg));
884 	if (error)
885 		goto done2;
886 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
887 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
888 			error = EMSGSIZE;
889 			goto done2;
890 		}
891 		MALLOC(iov, struct iovec *,
892 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
893 		       M_WAITOK);
894 	} else {
895 		iov = aiov;
896 	}
897 	if (msg.msg_iovlen &&
898 	    (error = copyin(msg.msg_iov, iov,
899 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
900 		goto done;
901 	msg.msg_iov = iov;
902 #ifdef COMPAT_OLDSOCK
903 	msg.msg_flags = 0;
904 #endif
905 	error = sendit(td, uap->s, &msg, uap->flags);
906 done:
907 	if (iov != aiov)
908 		FREE(iov, M_IOV);
909 done2:
910 	return (error);
911 }
912 
913 static int
914 recvit(td, s, mp, namelenp)
915 	register struct thread *td;
916 	int s;
917 	register struct msghdr *mp;
918 	void *namelenp;
919 {
920 	struct uio auio;
921 	register struct iovec *iov;
922 	register int i;
923 	socklen_t len;
924 	int error;
925 	struct mbuf *m, *control = 0;
926 	caddr_t ctlbuf;
927 	struct socket *so;
928 	struct sockaddr *fromsa = 0;
929 #ifdef KTRACE
930 	struct iovec *ktriov = NULL;
931 	struct uio ktruio;
932 	int iovlen;
933 #endif
934 
935 	NET_LOCK_GIANT();
936 	if ((error = fgetsock(td, s, &so, NULL)) != 0) {
937 		NET_UNLOCK_GIANT();
938 		return (error);
939 	}
940 
941 #ifdef MAC
942 	error = mac_check_socket_receive(td->td_ucred, so);
943 	if (error) {
944 		fputsock(so);
945 		NET_UNLOCK_GIANT();
946 		return (error);
947 	}
948 #endif
949 
950 	auio.uio_iov = mp->msg_iov;
951 	auio.uio_iovcnt = mp->msg_iovlen;
952 	auio.uio_segflg = UIO_USERSPACE;
953 	auio.uio_rw = UIO_READ;
954 	auio.uio_td = td;
955 	auio.uio_offset = 0;			/* XXX */
956 	auio.uio_resid = 0;
957 	iov = mp->msg_iov;
958 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
959 		if ((auio.uio_resid += iov->iov_len) < 0) {
960 			fputsock(so);
961 			NET_UNLOCK_GIANT();
962 			return (EINVAL);
963 		}
964 	}
965 #ifdef KTRACE
966 	if (KTRPOINT(td, KTR_GENIO)) {
967 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
968 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
969 		bcopy(auio.uio_iov, ktriov, iovlen);
970 		ktruio = auio;
971 	}
972 #endif
973 	len = auio.uio_resid;
974 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
975 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
976 	    &mp->msg_flags);
977 	if (error) {
978 		if (auio.uio_resid != (int)len && (error == ERESTART ||
979 		    error == EINTR || error == EWOULDBLOCK))
980 			error = 0;
981 	}
982 #ifdef KTRACE
983 	if (ktriov != NULL) {
984 		if (error == 0) {
985 			ktruio.uio_iov = ktriov;
986 			ktruio.uio_resid = (int)len - auio.uio_resid;
987 			ktrgenio(s, UIO_READ, &ktruio, error);
988 		}
989 		FREE(ktriov, M_TEMP);
990 	}
991 #endif
992 	if (error)
993 		goto out;
994 	td->td_retval[0] = (int)len - auio.uio_resid;
995 	if (mp->msg_name) {
996 		len = mp->msg_namelen;
997 		if (len <= 0 || fromsa == 0)
998 			len = 0;
999 		else {
1000 			/* save sa_len before it is destroyed by MSG_COMPAT */
1001 			len = MIN(len, fromsa->sa_len);
1002 #ifdef COMPAT_OLDSOCK
1003 			if (mp->msg_flags & MSG_COMPAT)
1004 				((struct osockaddr *)fromsa)->sa_family =
1005 				    fromsa->sa_family;
1006 #endif
1007 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1008 			if (error)
1009 				goto out;
1010 		}
1011 		mp->msg_namelen = len;
1012 		if (namelenp &&
1013 		    (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1014 #ifdef COMPAT_OLDSOCK
1015 			if (mp->msg_flags & MSG_COMPAT)
1016 				error = 0;	/* old recvfrom didn't check */
1017 			else
1018 #endif
1019 			goto out;
1020 		}
1021 	}
1022 	if (mp->msg_control) {
1023 #ifdef COMPAT_OLDSOCK
1024 		/*
1025 		 * We assume that old recvmsg calls won't receive access
1026 		 * rights and other control info, esp. as control info
1027 		 * is always optional and those options didn't exist in 4.3.
1028 		 * If we receive rights, trim the cmsghdr; anything else
1029 		 * is tossed.
1030 		 */
1031 		if (control && mp->msg_flags & MSG_COMPAT) {
1032 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1033 			    SOL_SOCKET ||
1034 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1035 			    SCM_RIGHTS) {
1036 				mp->msg_controllen = 0;
1037 				goto out;
1038 			}
1039 			control->m_len -= sizeof (struct cmsghdr);
1040 			control->m_data += sizeof (struct cmsghdr);
1041 		}
1042 #endif
1043 		len = mp->msg_controllen;
1044 		m = control;
1045 		mp->msg_controllen = 0;
1046 		ctlbuf = mp->msg_control;
1047 
1048 		while (m && len > 0) {
1049 			unsigned int tocopy;
1050 
1051 			if (len >= m->m_len)
1052 				tocopy = m->m_len;
1053 			else {
1054 				mp->msg_flags |= MSG_CTRUNC;
1055 				tocopy = len;
1056 			}
1057 
1058 			if ((error = copyout(mtod(m, caddr_t),
1059 					ctlbuf, tocopy)) != 0)
1060 				goto out;
1061 
1062 			ctlbuf += tocopy;
1063 			len -= tocopy;
1064 			m = m->m_next;
1065 		}
1066 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1067 	}
1068 out:
1069 	fputsock(so);
1070 	NET_UNLOCK_GIANT();
1071 	if (fromsa)
1072 		FREE(fromsa, M_SONAME);
1073 	if (control)
1074 		m_freem(control);
1075 	return (error);
1076 }
1077 
1078 /*
1079  * MPSAFE
1080  */
1081 int
1082 recvfrom(td, uap)
1083 	struct thread *td;
1084 	register struct recvfrom_args /* {
1085 		int	s;
1086 		caddr_t	buf;
1087 		size_t	len;
1088 		int	flags;
1089 		struct sockaddr * __restrict	from;
1090 		socklen_t * __restrict fromlenaddr;
1091 	} */ *uap;
1092 {
1093 	struct msghdr msg;
1094 	struct iovec aiov;
1095 	int error;
1096 
1097 	if (uap->fromlenaddr) {
1098 		error = copyin(uap->fromlenaddr,
1099 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1100 		if (error)
1101 			goto done2;
1102 	} else {
1103 		msg.msg_namelen = 0;
1104 	}
1105 	msg.msg_name = uap->from;
1106 	msg.msg_iov = &aiov;
1107 	msg.msg_iovlen = 1;
1108 	aiov.iov_base = uap->buf;
1109 	aiov.iov_len = uap->len;
1110 	msg.msg_control = 0;
1111 	msg.msg_flags = uap->flags;
1112 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1113 done2:
1114 	return(error);
1115 }
1116 
1117 #ifdef COMPAT_OLDSOCK
1118 /*
1119  * MPSAFE
1120  */
1121 int
1122 orecvfrom(td, uap)
1123 	struct thread *td;
1124 	struct recvfrom_args *uap;
1125 {
1126 
1127 	uap->flags |= MSG_COMPAT;
1128 	return (recvfrom(td, uap));
1129 }
1130 #endif
1131 
1132 
1133 #ifdef COMPAT_OLDSOCK
1134 /*
1135  * MPSAFE
1136  */
1137 int
1138 orecv(td, uap)
1139 	struct thread *td;
1140 	register struct orecv_args /* {
1141 		int	s;
1142 		caddr_t	buf;
1143 		int	len;
1144 		int	flags;
1145 	} */ *uap;
1146 {
1147 	struct msghdr msg;
1148 	struct iovec aiov;
1149 	int error;
1150 
1151 	msg.msg_name = 0;
1152 	msg.msg_namelen = 0;
1153 	msg.msg_iov = &aiov;
1154 	msg.msg_iovlen = 1;
1155 	aiov.iov_base = uap->buf;
1156 	aiov.iov_len = uap->len;
1157 	msg.msg_control = 0;
1158 	msg.msg_flags = uap->flags;
1159 	error = recvit(td, uap->s, &msg, NULL);
1160 	return (error);
1161 }
1162 
1163 /*
1164  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1165  * overlays the new one, missing only the flags, and with the (old) access
1166  * rights where the control fields are now.
1167  *
1168  * MPSAFE
1169  */
1170 int
1171 orecvmsg(td, uap)
1172 	struct thread *td;
1173 	register struct orecvmsg_args /* {
1174 		int	s;
1175 		struct	omsghdr *msg;
1176 		int	flags;
1177 	} */ *uap;
1178 {
1179 	struct msghdr msg;
1180 	struct iovec aiov[UIO_SMALLIOV], *iov;
1181 	int error;
1182 
1183 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1184 	if (error)
1185 		return (error);
1186 
1187 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1188 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1189 			error = EMSGSIZE;
1190 			goto done2;
1191 		}
1192 		MALLOC(iov, struct iovec *,
1193 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1194 		      M_WAITOK);
1195 	} else {
1196 		iov = aiov;
1197 	}
1198 	msg.msg_flags = uap->flags | MSG_COMPAT;
1199 	error = copyin(msg.msg_iov, iov,
1200 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1201 	if (error)
1202 		goto done;
1203 	msg.msg_iov = iov;
1204 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1205 
1206 	if (msg.msg_controllen && error == 0)
1207 		error = copyout(&msg.msg_controllen,
1208 		    &uap->msg->msg_accrightslen, sizeof (int));
1209 done:
1210 	if (iov != aiov)
1211 		FREE(iov, M_IOV);
1212 done2:
1213 	return (error);
1214 }
1215 #endif
1216 
1217 /*
1218  * MPSAFE
1219  */
1220 int
1221 recvmsg(td, uap)
1222 	struct thread *td;
1223 	register struct recvmsg_args /* {
1224 		int	s;
1225 		struct	msghdr *msg;
1226 		int	flags;
1227 	} */ *uap;
1228 {
1229 	struct msghdr msg;
1230 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1231 	register int error;
1232 
1233 	error = copyin(uap->msg, &msg, sizeof (msg));
1234 	if (error)
1235 		goto done2;
1236 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1237 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1238 			error = EMSGSIZE;
1239 			goto done2;
1240 		}
1241 		MALLOC(iov, struct iovec *,
1242 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1243 		       M_WAITOK);
1244 	} else {
1245 		iov = aiov;
1246 	}
1247 #ifdef COMPAT_OLDSOCK
1248 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1249 #else
1250 	msg.msg_flags = uap->flags;
1251 #endif
1252 	uiov = msg.msg_iov;
1253 	msg.msg_iov = iov;
1254 	error = copyin(uiov, iov,
1255 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1256 	if (error)
1257 		goto done;
1258 	error = recvit(td, uap->s, &msg, NULL);
1259 	if (!error) {
1260 		msg.msg_iov = uiov;
1261 		error = copyout(&msg, uap->msg, sizeof(msg));
1262 	}
1263 done:
1264 	if (iov != aiov)
1265 		FREE(iov, M_IOV);
1266 done2:
1267 	return (error);
1268 }
1269 
1270 /*
1271  * MPSAFE
1272  */
1273 /* ARGSUSED */
1274 int
1275 shutdown(td, uap)
1276 	struct thread *td;
1277 	register struct shutdown_args /* {
1278 		int	s;
1279 		int	how;
1280 	} */ *uap;
1281 {
1282 	struct socket *so;
1283 	int error;
1284 
1285 	NET_LOCK_GIANT();
1286 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1287 		error = soshutdown(so, uap->how);
1288 		fputsock(so);
1289 	}
1290 	NET_UNLOCK_GIANT();
1291 	return(error);
1292 }
1293 
1294 /*
1295  * MPSAFE
1296  */
1297 /* ARGSUSED */
1298 int
1299 setsockopt(td, uap)
1300 	struct thread *td;
1301 	register struct setsockopt_args /* {
1302 		int	s;
1303 		int	level;
1304 		int	name;
1305 		caddr_t	val;
1306 		int	valsize;
1307 	} */ *uap;
1308 {
1309 	struct socket *so;
1310 	struct sockopt sopt;
1311 	int error;
1312 
1313 	if (uap->val == 0 && uap->valsize != 0)
1314 		return (EFAULT);
1315 	if (uap->valsize < 0)
1316 		return (EINVAL);
1317 
1318 	NET_LOCK_GIANT();
1319 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1320 		sopt.sopt_dir = SOPT_SET;
1321 		sopt.sopt_level = uap->level;
1322 		sopt.sopt_name = uap->name;
1323 		sopt.sopt_val = uap->val;
1324 		sopt.sopt_valsize = uap->valsize;
1325 		sopt.sopt_td = td;
1326 		error = sosetopt(so, &sopt);
1327 		fputsock(so);
1328 	}
1329 	NET_UNLOCK_GIANT();
1330 	return(error);
1331 }
1332 
1333 /*
1334  * MPSAFE
1335  */
1336 /* ARGSUSED */
1337 int
1338 getsockopt(td, uap)
1339 	struct thread *td;
1340 	register struct getsockopt_args /* {
1341 		int	s;
1342 		int	level;
1343 		int	name;
1344 		void * __restrict	val;
1345 		socklen_t * __restrict avalsize;
1346 	} */ *uap;
1347 {
1348 	socklen_t valsize;
1349 	int	error;
1350 	struct  socket *so;
1351 	struct	sockopt sopt;
1352 
1353 	NET_LOCK_GIANT();
1354 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1355 		goto done2;
1356 	if (uap->val) {
1357 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1358 		if (error)
1359 			goto done1;
1360 		if (valsize < 0) {
1361 			error = EINVAL;
1362 			goto done1;
1363 		}
1364 	} else {
1365 		valsize = 0;
1366 	}
1367 
1368 	sopt.sopt_dir = SOPT_GET;
1369 	sopt.sopt_level = uap->level;
1370 	sopt.sopt_name = uap->name;
1371 	sopt.sopt_val = uap->val;
1372 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1373 	sopt.sopt_td = td;
1374 
1375 	error = sogetopt(so, &sopt);
1376 	if (error == 0) {
1377 		valsize = sopt.sopt_valsize;
1378 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1379 	}
1380 done1:
1381 	fputsock(so);
1382 done2:
1383 	NET_UNLOCK_GIANT();
1384 	return (error);
1385 }
1386 
1387 /*
1388  * getsockname1() - Get socket name.
1389  *
1390  * MPSAFE
1391  */
1392 /* ARGSUSED */
1393 static int
1394 getsockname1(td, uap, compat)
1395 	struct thread *td;
1396 	register struct getsockname_args /* {
1397 		int	fdes;
1398 		struct sockaddr * __restrict asa;
1399 		socklen_t * __restrict alen;
1400 	} */ *uap;
1401 	int compat;
1402 {
1403 	struct socket *so;
1404 	struct sockaddr *sa;
1405 	socklen_t len;
1406 	int error;
1407 
1408 	NET_LOCK_GIANT();
1409 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1410 		goto done2;
1411 	error = copyin(uap->alen, &len, sizeof (len));
1412 	if (error)
1413 		goto done1;
1414 	if (len < 0) {
1415 		error = EINVAL;
1416 		goto done1;
1417 	}
1418 	sa = 0;
1419 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1420 	if (error)
1421 		goto bad;
1422 	if (sa == 0) {
1423 		len = 0;
1424 		goto gotnothing;
1425 	}
1426 
1427 	len = MIN(len, sa->sa_len);
1428 #ifdef COMPAT_OLDSOCK
1429 	if (compat)
1430 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1431 #endif
1432 	error = copyout(sa, uap->asa, (u_int)len);
1433 	if (error == 0)
1434 gotnothing:
1435 		error = copyout(&len, uap->alen, sizeof (len));
1436 bad:
1437 	if (sa)
1438 		FREE(sa, M_SONAME);
1439 done1:
1440 	fputsock(so);
1441 done2:
1442 	NET_UNLOCK_GIANT();
1443 	return (error);
1444 }
1445 
1446 /*
1447  * MPSAFE
1448  */
1449 int
1450 getsockname(td, uap)
1451 	struct thread *td;
1452 	struct getsockname_args *uap;
1453 {
1454 
1455 	return (getsockname1(td, uap, 0));
1456 }
1457 
1458 #ifdef COMPAT_OLDSOCK
1459 /*
1460  * MPSAFE
1461  */
1462 int
1463 ogetsockname(td, uap)
1464 	struct thread *td;
1465 	struct getsockname_args *uap;
1466 {
1467 
1468 	return (getsockname1(td, uap, 1));
1469 }
1470 #endif /* COMPAT_OLDSOCK */
1471 
1472 /*
1473  * getpeername1() - Get name of peer for connected socket.
1474  *
1475  * MPSAFE
1476  */
1477 /* ARGSUSED */
1478 static int
1479 getpeername1(td, uap, compat)
1480 	struct thread *td;
1481 	register struct getpeername_args /* {
1482 		int	fdes;
1483 		struct sockaddr * __restrict	asa;
1484 		socklen_t * __restrict	alen;
1485 	} */ *uap;
1486 	int compat;
1487 {
1488 	struct socket *so;
1489 	struct sockaddr *sa;
1490 	socklen_t len;
1491 	int error;
1492 
1493 	NET_LOCK_GIANT();
1494 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1495 		goto done2;
1496 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1497 		error = ENOTCONN;
1498 		goto done1;
1499 	}
1500 	error = copyin(uap->alen, &len, sizeof (len));
1501 	if (error)
1502 		goto done1;
1503 	if (len < 0) {
1504 		error = EINVAL;
1505 		goto done1;
1506 	}
1507 	sa = 0;
1508 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1509 	if (error)
1510 		goto bad;
1511 	if (sa == 0) {
1512 		len = 0;
1513 		goto gotnothing;
1514 	}
1515 	len = MIN(len, sa->sa_len);
1516 #ifdef COMPAT_OLDSOCK
1517 	if (compat)
1518 		((struct osockaddr *)sa)->sa_family =
1519 		    sa->sa_family;
1520 #endif
1521 	error = copyout(sa, uap->asa, (u_int)len);
1522 	if (error)
1523 		goto bad;
1524 gotnothing:
1525 	error = copyout(&len, uap->alen, sizeof (len));
1526 bad:
1527 	if (sa)
1528 		FREE(sa, M_SONAME);
1529 done1:
1530 	fputsock(so);
1531 done2:
1532 	NET_UNLOCK_GIANT();
1533 	return (error);
1534 }
1535 
1536 /*
1537  * MPSAFE
1538  */
1539 int
1540 getpeername(td, uap)
1541 	struct thread *td;
1542 	struct getpeername_args *uap;
1543 {
1544 
1545 	return (getpeername1(td, uap, 0));
1546 }
1547 
1548 #ifdef COMPAT_OLDSOCK
1549 /*
1550  * MPSAFE
1551  */
1552 int
1553 ogetpeername(td, uap)
1554 	struct thread *td;
1555 	struct ogetpeername_args *uap;
1556 {
1557 
1558 	/* XXX uap should have type `getpeername_args *' to begin with. */
1559 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1560 }
1561 #endif /* COMPAT_OLDSOCK */
1562 
1563 int
1564 sockargs(mp, buf, buflen, type)
1565 	struct mbuf **mp;
1566 	caddr_t buf;
1567 	int buflen, type;
1568 {
1569 	register struct sockaddr *sa;
1570 	register struct mbuf *m;
1571 	int error;
1572 
1573 	if ((u_int)buflen > MLEN) {
1574 #ifdef COMPAT_OLDSOCK
1575 		if (type == MT_SONAME && (u_int)buflen <= 112)
1576 			buflen = MLEN;		/* unix domain compat. hack */
1577 		else
1578 #endif
1579 		return (EINVAL);
1580 	}
1581 	m = m_get(M_TRYWAIT, type);
1582 	if (m == NULL)
1583 		return (ENOBUFS);
1584 	m->m_len = buflen;
1585 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1586 	if (error)
1587 		(void) m_free(m);
1588 	else {
1589 		*mp = m;
1590 		if (type == MT_SONAME) {
1591 			sa = mtod(m, struct sockaddr *);
1592 
1593 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1594 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1595 				sa->sa_family = sa->sa_len;
1596 #endif
1597 			sa->sa_len = buflen;
1598 		}
1599 	}
1600 	return (error);
1601 }
1602 
1603 int
1604 getsockaddr(namp, uaddr, len)
1605 	struct sockaddr **namp;
1606 	caddr_t uaddr;
1607 	size_t len;
1608 {
1609 	struct sockaddr *sa;
1610 	int error;
1611 
1612 	if (len > SOCK_MAXADDRLEN)
1613 		return (ENAMETOOLONG);
1614 	if (len < offsetof(struct sockaddr, sa_data[0]))
1615 		return (EINVAL);
1616 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1617 	error = copyin(uaddr, sa, len);
1618 	if (error) {
1619 		FREE(sa, M_SONAME);
1620 	} else {
1621 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1622 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1623 			sa->sa_family = sa->sa_len;
1624 #endif
1625 		sa->sa_len = len;
1626 		*namp = sa;
1627 	}
1628 	return (error);
1629 }
1630 
1631 /*
1632  * Detach mapped page and release resources back to the system.
1633  */
1634 void
1635 sf_buf_mext(void *addr, void *args)
1636 {
1637 	vm_page_t m;
1638 
1639 	m = sf_buf_page(args);
1640 	sf_buf_free(args);
1641 	vm_page_lock_queues();
1642 	vm_page_unwire(m, 0);
1643 	/*
1644 	 * Check for the object going away on us. This can
1645 	 * happen since we don't hold a reference to it.
1646 	 * If so, we're responsible for freeing the page.
1647 	 */
1648 	if (m->wire_count == 0 && m->object == NULL)
1649 		vm_page_free(m);
1650 	vm_page_unlock_queues();
1651 }
1652 
1653 /*
1654  * sendfile(2)
1655  *
1656  * MPSAFE
1657  *
1658  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1659  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1660  *
1661  * Send a file specified by 'fd' and starting at 'offset' to a socket
1662  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1663  * nbytes == 0. Optionally add a header and/or trailer to the socket
1664  * output. If specified, write the total number of bytes sent into *sbytes.
1665  *
1666  */
1667 int
1668 sendfile(struct thread *td, struct sendfile_args *uap)
1669 {
1670 
1671 	return (do_sendfile(td, uap, 0));
1672 }
1673 
1674 #ifdef COMPAT_FREEBSD4
1675 int
1676 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1677 {
1678 	struct sendfile_args args;
1679 
1680 	args.fd = uap->fd;
1681 	args.s = uap->s;
1682 	args.offset = uap->offset;
1683 	args.nbytes = uap->nbytes;
1684 	args.hdtr = uap->hdtr;
1685 	args.sbytes = uap->sbytes;
1686 	args.flags = uap->flags;
1687 
1688 	return (do_sendfile(td, &args, 1));
1689 }
1690 #endif /* COMPAT_FREEBSD4 */
1691 
1692 static int
1693 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1694 {
1695 	struct vnode *vp;
1696 	struct vm_object *obj;
1697 	struct socket *so = NULL;
1698 	struct mbuf *m, *m_header = NULL;
1699 	struct sf_buf *sf;
1700 	struct vm_page *pg;
1701 	struct writev_args nuap;
1702 	struct sf_hdtr hdtr;
1703 	struct uio hdr_uio;
1704 	off_t off, xfsize, hdtr_size, sbytes = 0;
1705 	int error, s, headersize = 0, headersent = 0;
1706 	struct iovec *hdr_iov = NULL;
1707 
1708 	mtx_lock(&Giant);
1709 
1710 	hdtr_size = 0;
1711 
1712 	/*
1713 	 * The descriptor must be a regular file and have a backing VM object.
1714 	 */
1715 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1716 		goto done;
1717 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1718 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1719 		error = EINVAL;
1720 		VOP_UNLOCK(vp, 0, td);
1721 		goto done;
1722 	}
1723 	VOP_UNLOCK(vp, 0, td);
1724 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1725 		goto done;
1726 	if (so->so_type != SOCK_STREAM) {
1727 		error = EINVAL;
1728 		goto done;
1729 	}
1730 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1731 		error = ENOTCONN;
1732 		goto done;
1733 	}
1734 	if (uap->offset < 0) {
1735 		error = EINVAL;
1736 		goto done;
1737 	}
1738 
1739 #ifdef MAC
1740 	error = mac_check_socket_send(td->td_ucred, so);
1741 	if (error)
1742 		goto done;
1743 #endif
1744 
1745 	/*
1746 	 * If specified, get the pointer to the sf_hdtr struct for
1747 	 * any headers/trailers.
1748 	 */
1749 	if (uap->hdtr != NULL) {
1750 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1751 		if (error)
1752 			goto done;
1753 		/*
1754 		 * Send any headers.
1755 		 */
1756 		if (hdtr.headers != NULL) {
1757 			hdr_uio.uio_td = td;
1758 			hdr_uio.uio_rw = UIO_WRITE;
1759 			error = uiofromiov(hdtr.headers, hdtr.hdr_cnt,
1760 				&hdr_uio);
1761 			if (error)
1762 				goto done;
1763 			/* Cache hdr_iov, m_uiotombuf may change it. */
1764 			hdr_iov = hdr_uio.uio_iov;
1765 			if (hdr_uio.uio_resid > 0) {
1766 				m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0);
1767 				if (m_header == NULL)
1768 					goto done;
1769 				headersize = m_header->m_pkthdr.len;
1770 				if (compat)
1771 					sbytes += headersize;
1772 			}
1773 		}
1774 	}
1775 
1776 	/*
1777 	 * Protect against multiple writers to the socket.
1778 	 */
1779 	(void) sblock(&so->so_snd, M_WAITOK);
1780 
1781 	/*
1782 	 * Loop through the pages in the file, starting with the requested
1783 	 * offset. Get a file page (do I/O if necessary), map the file page
1784 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1785 	 * it on the socket.
1786 	 */
1787 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1788 		vm_pindex_t pindex;
1789 		vm_offset_t pgoff;
1790 
1791 		pindex = OFF_TO_IDX(off);
1792 		VM_OBJECT_LOCK(obj);
1793 retry_lookup:
1794 		/*
1795 		 * Calculate the amount to transfer. Not to exceed a page,
1796 		 * the EOF, or the passed in nbytes.
1797 		 */
1798 		xfsize = obj->un_pager.vnp.vnp_size - off;
1799 		VM_OBJECT_UNLOCK(obj);
1800 		if (xfsize > PAGE_SIZE)
1801 			xfsize = PAGE_SIZE;
1802 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1803 		if (PAGE_SIZE - pgoff < xfsize)
1804 			xfsize = PAGE_SIZE - pgoff;
1805 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1806 			xfsize = uap->nbytes - sbytes;
1807 		if (xfsize <= 0) {
1808 			if (m_header != NULL) {
1809 				m = m_header;
1810 				m_header = NULL;
1811 				goto retry_space;
1812 			} else
1813 				break;
1814 		}
1815 		/*
1816 		 * Optimize the non-blocking case by looking at the socket space
1817 		 * before going to the extra work of constituting the sf_buf.
1818 		 */
1819 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1820 			if (so->so_state & SS_CANTSENDMORE)
1821 				error = EPIPE;
1822 			else
1823 				error = EAGAIN;
1824 			sbunlock(&so->so_snd);
1825 			goto done;
1826 		}
1827 		VM_OBJECT_LOCK(obj);
1828 		/*
1829 		 * Attempt to look up the page.
1830 		 *
1831 		 *	Allocate if not found
1832 		 *
1833 		 *	Wait and loop if busy.
1834 		 */
1835 		pg = vm_page_lookup(obj, pindex);
1836 
1837 		if (pg == NULL) {
1838 			pg = vm_page_alloc(obj, pindex,
1839 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1840 			if (pg == NULL) {
1841 				VM_OBJECT_UNLOCK(obj);
1842 				VM_WAIT;
1843 				VM_OBJECT_LOCK(obj);
1844 				goto retry_lookup;
1845 			}
1846 			vm_page_lock_queues();
1847 			vm_page_wakeup(pg);
1848 		} else {
1849 			vm_page_lock_queues();
1850 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1851 				goto retry_lookup;
1852 			/*
1853 			 * Wire the page so it does not get ripped out from
1854 			 * under us.
1855 			 */
1856 			vm_page_wire(pg);
1857 		}
1858 
1859 		/*
1860 		 * If page is not valid for what we need, initiate I/O
1861 		 */
1862 
1863 		if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1864 			VM_OBJECT_UNLOCK(obj);
1865 		} else if (uap->flags & SF_NODISKIO) {
1866 			error = EBUSY;
1867 		} else {
1868 			int bsize, resid;
1869 
1870 			/*
1871 			 * Ensure that our page is still around when the I/O
1872 			 * completes.
1873 			 */
1874 			vm_page_io_start(pg);
1875 			vm_page_unlock_queues();
1876 			VM_OBJECT_UNLOCK(obj);
1877 
1878 			/*
1879 			 * Get the page from backing store.
1880 			 */
1881 			bsize = vp->v_mount->mnt_stat.f_iosize;
1882 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1883 			/*
1884 			 * XXXMAC: Because we don't have fp->f_cred here,
1885 			 * we pass in NOCRED.  This is probably wrong, but
1886 			 * is consistent with our original implementation.
1887 			 */
1888 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1889 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1890 			    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1891 			    td->td_ucred, NOCRED, &resid, td);
1892 			VOP_UNLOCK(vp, 0, td);
1893 			if (error)
1894 				VM_OBJECT_LOCK(obj);
1895 			vm_page_lock_queues();
1896 			vm_page_io_finish(pg);
1897 			mbstat.sf_iocnt++;
1898 		}
1899 
1900 		if (error) {
1901 			vm_page_unwire(pg, 0);
1902 			/*
1903 			 * See if anyone else might know about this page.
1904 			 * If not and it is not valid, then free it.
1905 			 */
1906 			if (pg->wire_count == 0 && pg->valid == 0 &&
1907 			    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1908 			    pg->hold_count == 0) {
1909 				vm_page_busy(pg);
1910 				vm_page_free(pg);
1911 			}
1912 			vm_page_unlock_queues();
1913 			VM_OBJECT_UNLOCK(obj);
1914 			sbunlock(&so->so_snd);
1915 			goto done;
1916 		}
1917 		vm_page_unlock_queues();
1918 
1919 		/*
1920 		 * Get a sendfile buf. We usually wait as long as necessary,
1921 		 * but this wait can be interrupted.
1922 		 */
1923 		if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) {
1924 			mbstat.sf_allocfail++;
1925 			vm_page_lock_queues();
1926 			vm_page_unwire(pg, 0);
1927 			if (pg->wire_count == 0 && pg->object == NULL)
1928 				vm_page_free(pg);
1929 			vm_page_unlock_queues();
1930 			sbunlock(&so->so_snd);
1931 			error = EINTR;
1932 			goto done;
1933 		}
1934 
1935 		/*
1936 		 * Get an mbuf header and set it up as having external storage.
1937 		 */
1938 		if (m_header)
1939 			MGET(m, M_TRYWAIT, MT_DATA);
1940 		else
1941 			MGETHDR(m, M_TRYWAIT, MT_DATA);
1942 		if (m == NULL) {
1943 			error = ENOBUFS;
1944 			sf_buf_mext((void *)sf_buf_kva(sf), sf);
1945 			sbunlock(&so->so_snd);
1946 			goto done;
1947 		}
1948 		/*
1949 		 * Setup external storage for mbuf.
1950 		 */
1951 		MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1952 		    EXT_SFBUF);
1953 		m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1954 		m->m_pkthdr.len = m->m_len = xfsize;
1955 
1956 		if (m_header) {
1957 			m_cat(m_header, m);
1958 			m = m_header;
1959 			m_header = NULL;
1960 			m_fixhdr(m);
1961 		}
1962 
1963 		/*
1964 		 * Add the buffer to the socket buffer chain.
1965 		 */
1966 		s = splnet();
1967 retry_space:
1968 		/*
1969 		 * Make sure that the socket is still able to take more data.
1970 		 * CANTSENDMORE being true usually means that the connection
1971 		 * was closed. so_error is true when an error was sensed after
1972 		 * a previous send.
1973 		 * The state is checked after the page mapping and buffer
1974 		 * allocation above since those operations may block and make
1975 		 * any socket checks stale. From this point forward, nothing
1976 		 * blocks before the pru_send (or more accurately, any blocking
1977 		 * results in a loop back to here to re-check).
1978 		 */
1979 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1980 			if (so->so_state & SS_CANTSENDMORE) {
1981 				error = EPIPE;
1982 			} else {
1983 				error = so->so_error;
1984 				so->so_error = 0;
1985 			}
1986 			m_freem(m);
1987 			sbunlock(&so->so_snd);
1988 			splx(s);
1989 			goto done;
1990 		}
1991 		/*
1992 		 * Wait for socket space to become available. We do this just
1993 		 * after checking the connection state above in order to avoid
1994 		 * a race condition with sbwait().
1995 		 */
1996 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1997 			if (so->so_state & SS_NBIO) {
1998 				m_freem(m);
1999 				sbunlock(&so->so_snd);
2000 				splx(s);
2001 				error = EAGAIN;
2002 				goto done;
2003 			}
2004 			error = sbwait(&so->so_snd);
2005 			/*
2006 			 * An error from sbwait usually indicates that we've
2007 			 * been interrupted by a signal. If we've sent anything
2008 			 * then return bytes sent, otherwise return the error.
2009 			 */
2010 			if (error) {
2011 				m_freem(m);
2012 				sbunlock(&so->so_snd);
2013 				splx(s);
2014 				goto done;
2015 			}
2016 			goto retry_space;
2017 		}
2018 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2019 		splx(s);
2020 		if (error) {
2021 			sbunlock(&so->so_snd);
2022 			goto done;
2023 		}
2024 		headersent = 1;
2025 	}
2026 	sbunlock(&so->so_snd);
2027 
2028 	/*
2029 	 * Send trailers. Wimp out and use writev(2).
2030 	 */
2031 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2032 			nuap.fd = uap->s;
2033 			nuap.iovp = hdtr.trailers;
2034 			nuap.iovcnt = hdtr.trl_cnt;
2035 			error = writev(td, &nuap);
2036 			if (error)
2037 				goto done;
2038 			if (compat)
2039 				sbytes += td->td_retval[0];
2040 			else
2041 				hdtr_size += td->td_retval[0];
2042 	}
2043 
2044 done:
2045 	if (headersent) {
2046 		if (!compat)
2047 			hdtr_size += headersize;
2048 	} else {
2049 		if (compat)
2050 			sbytes -= headersize;
2051 	}
2052 	/*
2053 	 * If there was no error we have to clear td->td_retval[0]
2054 	 * because it may have been set by writev.
2055 	 */
2056 	if (error == 0) {
2057 		td->td_retval[0] = 0;
2058 	}
2059 	if (uap->sbytes != NULL) {
2060 		if (!compat)
2061 			sbytes += hdtr_size;
2062 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2063 	}
2064 	if (vp)
2065 		vrele(vp);
2066 	if (so)
2067 		fputsock(so);
2068 	if (hdr_iov)
2069 		FREE(hdr_iov, M_IOV);
2070 	if (m_header)
2071 		m_freem(m_header);
2072 
2073 	mtx_unlock(&Giant);
2074 
2075 	if (error == ERESTART)
2076 		error = EINTR;
2077 
2078 	return (error);
2079 }
2080