xref: /freebsd/sys/kern/uipc_syscalls.c (revision 71fe318b852b8dfb3e799cb12ef184750f7f8eac)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 #include "opt_mac.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/mutex.h>
50 #include <sys/sysproto.h>
51 #include <sys/malloc.h>
52 #include <sys/filedesc.h>
53 #include <sys/event.h>
54 #include <sys/proc.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/lock.h>
58 #include <sys/mount.h>
59 #include <sys/mbuf.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/signalvar.h>
64 #include <sys/uio.h>
65 #include <sys/vnode.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 
70 #include <vm/vm.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_pageout.h>
74 #include <vm/vm_kern.h>
75 #include <vm/vm_extern.h>
76 
77 static void sf_buf_init(void *arg);
78 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
79 
80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
81 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
82 
83 static int accept1(struct thread *td, struct accept_args *uap, int compat);
84 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
85 static int getsockname1(struct thread *td, struct getsockname_args *uap,
86 			int compat);
87 static int getpeername1(struct thread *td, struct getpeername_args *uap,
88 			int compat);
89 
90 /*
91  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92  * sf_freelist head with the sf_lock mutex.
93  */
94 static struct {
95 	SLIST_HEAD(, sf_buf) sf_head;
96 	struct mtx sf_lock;
97 } sf_freelist;
98 
99 vm_offset_t sf_base;
100 struct sf_buf *sf_bufs;
101 u_int sf_buf_alloc_want;
102 
103 /*
104  * System call interface to the socket abstraction.
105  */
106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107 #define COMPAT_OLDSOCK
108 #endif
109 
110 extern	struct fileops socketops;
111 
112 /*
113  * MPSAFE
114  */
115 int
116 socket(td, uap)
117 	struct thread *td;
118 	register struct socket_args /* {
119 		int	domain;
120 		int	type;
121 		int	protocol;
122 	} */ *uap;
123 {
124 	struct filedesc *fdp;
125 	struct socket *so;
126 	struct file *fp;
127 	int fd, error;
128 
129 	mtx_lock(&Giant);
130 	fdp = td->td_proc->p_fd;
131 	error = falloc(td, &fp, &fd);
132 	if (error)
133 		goto done2;
134 	fhold(fp);
135 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
136 	    td->td_ucred, td);
137 	FILEDESC_LOCK(fdp);
138 	if (error) {
139 		if (fdp->fd_ofiles[fd] == fp) {
140 			fdp->fd_ofiles[fd] = NULL;
141 			FILEDESC_UNLOCK(fdp);
142 			fdrop(fp, td);
143 		} else
144 			FILEDESC_UNLOCK(fdp);
145 	} else {
146 		fp->f_data = so;	/* already has ref count */
147 		fp->f_flag = FREAD|FWRITE;
148 		fp->f_ops = &socketops;
149 		fp->f_type = DTYPE_SOCKET;
150 		FILEDESC_UNLOCK(fdp);
151 		td->td_retval[0] = fd;
152 	}
153 	fdrop(fp, td);
154 done2:
155 	mtx_unlock(&Giant);
156 	return (error);
157 }
158 
159 /*
160  * MPSAFE
161  */
162 /* ARGSUSED */
163 int
164 bind(td, uap)
165 	struct thread *td;
166 	register struct bind_args /* {
167 		int	s;
168 		caddr_t	name;
169 		int	namelen;
170 	} */ *uap;
171 {
172 	struct socket *so;
173 	struct sockaddr *sa;
174 	int error;
175 
176 	mtx_lock(&Giant);
177 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
178 		goto done2;
179 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
180 		goto done1;
181 #ifdef MAC
182 	error = mac_check_socket_bind(td->td_ucred, so, sa);
183 	if (error) {
184 		FREE(sa, M_SONAME);
185 		goto done1;
186 	}
187 #endif
188 	error = sobind(so, sa, td);
189 	FREE(sa, M_SONAME);
190 done1:
191 	fputsock(so);
192 done2:
193 	mtx_unlock(&Giant);
194 	return (error);
195 }
196 
197 /*
198  * MPSAFE
199  */
200 /* ARGSUSED */
201 int
202 listen(td, uap)
203 	struct thread *td;
204 	register struct listen_args /* {
205 		int	s;
206 		int	backlog;
207 	} */ *uap;
208 {
209 	struct socket *so;
210 	int error;
211 
212 	mtx_lock(&Giant);
213 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
214 #ifdef MAC
215 		error = mac_check_socket_listen(td->td_ucred, so);
216 		if (error)
217 			goto done;
218 #endif
219 		error = solisten(so, uap->backlog, td);
220 #ifdef MAC
221 done:
222 #endif
223 		fputsock(so);
224 	}
225 	mtx_unlock(&Giant);
226 	return(error);
227 }
228 
229 /*
230  * accept1()
231  * MPSAFE
232  */
233 static int
234 accept1(td, uap, compat)
235 	struct thread *td;
236 	register struct accept_args /* {
237 		int	s;
238 		caddr_t	name;
239 		int	*anamelen;
240 	} */ *uap;
241 	int compat;
242 {
243 	struct filedesc *fdp;
244 	struct file *nfp = NULL;
245 	struct sockaddr *sa;
246 	int namelen, error, s;
247 	struct socket *head, *so;
248 	int fd;
249 	u_int fflag;
250 	pid_t pgid;
251 
252 	mtx_lock(&Giant);
253 	fdp = td->td_proc->p_fd;
254 	if (uap->name) {
255 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
256 		if(error)
257 			goto done2;
258 		if (namelen < 0) {
259 			error = EINVAL;
260 			goto done2;
261 		}
262 	}
263 	error = fgetsock(td, uap->s, &head, &fflag);
264 	if (error)
265 		goto done2;
266 	s = splnet();
267 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
268 		splx(s);
269 		error = EINVAL;
270 		goto done;
271 	}
272 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
273 		if (head->so_state & SS_CANTRCVMORE) {
274 			head->so_error = ECONNABORTED;
275 			break;
276 		}
277 		if ((head->so_state & SS_NBIO) != 0) {
278 			head->so_error = EWOULDBLOCK;
279 			break;
280 		}
281 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
282 		    "accept", 0);
283 		if (error) {
284 			splx(s);
285 			goto done;
286 		}
287 	}
288 	if (head->so_error) {
289 		error = head->so_error;
290 		head->so_error = 0;
291 		splx(s);
292 		goto done;
293 	}
294 
295 	/*
296 	 * At this point we know that there is at least one connection
297 	 * ready to be accepted. Remove it from the queue prior to
298 	 * allocating the file descriptor for it since falloc() may
299 	 * block allowing another process to accept the connection
300 	 * instead.
301 	 */
302 	so = TAILQ_FIRST(&head->so_comp);
303 	TAILQ_REMOVE(&head->so_comp, so, so_list);
304 	head->so_qlen--;
305 
306 	error = falloc(td, &nfp, &fd);
307 	if (error) {
308 		/*
309 		 * Probably ran out of file descriptors. Put the
310 		 * unaccepted connection back onto the queue and
311 		 * do another wakeup so some other process might
312 		 * have a chance at it.
313 		 */
314 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
315 		head->so_qlen++;
316 		wakeup_one(&head->so_timeo);
317 		splx(s);
318 		goto done;
319 	}
320 	fhold(nfp);
321 	td->td_retval[0] = fd;
322 
323 	/* connection has been removed from the listen queue */
324 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
325 
326 	so->so_state &= ~SS_COMP;
327 	so->so_head = NULL;
328 	pgid = fgetown(&head->so_sigio);
329 	if (pgid != 0)
330 		fsetown(pgid, &so->so_sigio);
331 
332 	FILE_LOCK(nfp);
333 	soref(so);			/* file descriptor reference */
334 	nfp->f_data = so;		/* nfp has ref count from falloc */
335 	nfp->f_flag = fflag;
336 	nfp->f_ops = &socketops;
337 	nfp->f_type = DTYPE_SOCKET;
338 	FILE_UNLOCK(nfp);
339 	sa = 0;
340 	error = soaccept(so, &sa);
341 	if (error) {
342 		/*
343 		 * return a namelen of zero for older code which might
344 	 	 * ignore the return value from accept.
345 		 */
346 		if (uap->name != NULL) {
347 			namelen = 0;
348 			(void) copyout(&namelen,
349 			    uap->anamelen, sizeof(*uap->anamelen));
350 		}
351 		goto noconnection;
352 	}
353 	if (sa == NULL) {
354 		namelen = 0;
355 		if (uap->name)
356 			goto gotnoname;
357 		splx(s);
358 		error = 0;
359 		goto done;
360 	}
361 	if (uap->name) {
362 		/* check sa_len before it is destroyed */
363 		if (namelen > sa->sa_len)
364 			namelen = sa->sa_len;
365 #ifdef COMPAT_OLDSOCK
366 		if (compat)
367 			((struct osockaddr *)sa)->sa_family =
368 			    sa->sa_family;
369 #endif
370 		error = copyout(sa, uap->name, (u_int)namelen);
371 		if (!error)
372 gotnoname:
373 			error = copyout(&namelen,
374 			    uap->anamelen, sizeof (*uap->anamelen));
375 	}
376 noconnection:
377 	if (sa)
378 		FREE(sa, M_SONAME);
379 
380 	/*
381 	 * close the new descriptor, assuming someone hasn't ripped it
382 	 * out from under us.
383 	 */
384 	if (error) {
385 		FILEDESC_LOCK(fdp);
386 		if (fdp->fd_ofiles[fd] == nfp) {
387 			fdp->fd_ofiles[fd] = NULL;
388 			FILEDESC_UNLOCK(fdp);
389 			fdrop(nfp, td);
390 		} else {
391 			FILEDESC_UNLOCK(fdp);
392 		}
393 	}
394 	splx(s);
395 
396 	/*
397 	 * Release explicitly held references before returning.
398 	 */
399 done:
400 	if (nfp != NULL)
401 		fdrop(nfp, td);
402 	fputsock(head);
403 done2:
404 	mtx_unlock(&Giant);
405 	return (error);
406 }
407 
408 /*
409  * MPSAFE (accept1() is MPSAFE)
410  */
411 int
412 accept(td, uap)
413 	struct thread *td;
414 	struct accept_args *uap;
415 {
416 
417 	return (accept1(td, uap, 0));
418 }
419 
420 #ifdef COMPAT_OLDSOCK
421 /*
422  * MPSAFE (accept1() is MPSAFE)
423  */
424 int
425 oaccept(td, uap)
426 	struct thread *td;
427 	struct accept_args *uap;
428 {
429 
430 	return (accept1(td, uap, 1));
431 }
432 #endif /* COMPAT_OLDSOCK */
433 
434 /*
435  * MPSAFE
436  */
437 /* ARGSUSED */
438 int
439 connect(td, uap)
440 	struct thread *td;
441 	register struct connect_args /* {
442 		int	s;
443 		caddr_t	name;
444 		int	namelen;
445 	} */ *uap;
446 {
447 	struct socket *so;
448 	struct sockaddr *sa;
449 	int error, s;
450 
451 	mtx_lock(&Giant);
452 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
453 		goto done2;
454 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
455 		error = EALREADY;
456 		goto done1;
457 	}
458 	error = getsockaddr(&sa, uap->name, uap->namelen);
459 	if (error)
460 		goto done1;
461 #ifdef MAC
462 	error = mac_check_socket_connect(td->td_ucred, so, sa);
463 	if (error)
464 		goto bad;
465 #endif
466 	error = soconnect(so, sa, td);
467 	if (error)
468 		goto bad;
469 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
470 		FREE(sa, M_SONAME);
471 		error = EINPROGRESS;
472 		goto done1;
473 	}
474 	s = splnet();
475 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
476 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
477 		if (error)
478 			break;
479 	}
480 	if (error == 0) {
481 		error = so->so_error;
482 		so->so_error = 0;
483 	}
484 	splx(s);
485 bad:
486 	so->so_state &= ~SS_ISCONNECTING;
487 	FREE(sa, M_SONAME);
488 	if (error == ERESTART)
489 		error = EINTR;
490 done1:
491 	fputsock(so);
492 done2:
493 	mtx_unlock(&Giant);
494 	return (error);
495 }
496 
497 /*
498  * MPSAFE
499  */
500 int
501 socketpair(td, uap)
502 	struct thread *td;
503 	register struct socketpair_args /* {
504 		int	domain;
505 		int	type;
506 		int	protocol;
507 		int	*rsv;
508 	} */ *uap;
509 {
510 	register struct filedesc *fdp = td->td_proc->p_fd;
511 	struct file *fp1, *fp2;
512 	struct socket *so1, *so2;
513 	int fd, error, sv[2];
514 
515 	mtx_lock(&Giant);
516 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
517 	    td->td_ucred, td);
518 	if (error)
519 		goto done2;
520 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
521 	    td->td_ucred, td);
522 	if (error)
523 		goto free1;
524 	error = falloc(td, &fp1, &fd);
525 	if (error)
526 		goto free2;
527 	fhold(fp1);
528 	sv[0] = fd;
529 	fp1->f_data = so1;		/* so1 already has ref count */
530 	error = falloc(td, &fp2, &fd);
531 	if (error)
532 		goto free3;
533 	fhold(fp2);
534 	fp2->f_data = so2;		/* so2 already has ref count */
535 	sv[1] = fd;
536 	error = soconnect2(so1, so2);
537 	if (error)
538 		goto free4;
539 	if (uap->type == SOCK_DGRAM) {
540 		/*
541 		 * Datagram socket connection is asymmetric.
542 		 */
543 		 error = soconnect2(so2, so1);
544 		 if (error)
545 			goto free4;
546 	}
547 	FILE_LOCK(fp1);
548 	fp1->f_flag = FREAD|FWRITE;
549 	fp1->f_ops = &socketops;
550 	fp1->f_type = DTYPE_SOCKET;
551 	FILE_UNLOCK(fp1);
552 	FILE_LOCK(fp2);
553 	fp2->f_flag = FREAD|FWRITE;
554 	fp2->f_ops = &socketops;
555 	fp2->f_type = DTYPE_SOCKET;
556 	FILE_UNLOCK(fp2);
557 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
558 	fdrop(fp1, td);
559 	fdrop(fp2, td);
560 	goto done2;
561 free4:
562 	FILEDESC_LOCK(fdp);
563 	if (fdp->fd_ofiles[sv[1]] == fp2) {
564 		fdp->fd_ofiles[sv[1]] = NULL;
565 		FILEDESC_UNLOCK(fdp);
566 		fdrop(fp2, td);
567 	} else
568 		FILEDESC_UNLOCK(fdp);
569 	fdrop(fp2, td);
570 free3:
571 	FILEDESC_LOCK(fdp);
572 	if (fdp->fd_ofiles[sv[0]] == fp1) {
573 		fdp->fd_ofiles[sv[0]] = NULL;
574 		FILEDESC_UNLOCK(fdp);
575 		fdrop(fp1, td);
576 	} else
577 		FILEDESC_UNLOCK(fdp);
578 	fdrop(fp1, td);
579 free2:
580 	(void)soclose(so2);
581 free1:
582 	(void)soclose(so1);
583 done2:
584 	mtx_unlock(&Giant);
585 	return (error);
586 }
587 
588 static int
589 sendit(td, s, mp, flags)
590 	register struct thread *td;
591 	int s;
592 	register struct msghdr *mp;
593 	int flags;
594 {
595 	struct uio auio;
596 	register struct iovec *iov;
597 	register int i;
598 	struct mbuf *control;
599 	struct sockaddr *to = NULL;
600 	int len, error;
601 	struct socket *so;
602 #ifdef KTRACE
603 	struct iovec *ktriov = NULL;
604 	struct uio ktruio;
605 	int iovlen;
606 #endif
607 
608 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
609 		return (error);
610 
611 #ifdef MAC
612 	error = mac_check_socket_send(td->td_ucred, so);
613 	if (error)
614 		goto bad;
615 #endif
616 
617 	auio.uio_iov = mp->msg_iov;
618 	auio.uio_iovcnt = mp->msg_iovlen;
619 	auio.uio_segflg = UIO_USERSPACE;
620 	auio.uio_rw = UIO_WRITE;
621 	auio.uio_td = td;
622 	auio.uio_offset = 0;			/* XXX */
623 	auio.uio_resid = 0;
624 	iov = mp->msg_iov;
625 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
626 		if ((auio.uio_resid += iov->iov_len) < 0) {
627 			error = EINVAL;
628 			goto bad;
629 		}
630 	}
631 	if (mp->msg_name) {
632 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
633 		if (error)
634 			goto bad;
635 	}
636 	if (mp->msg_control) {
637 		if (mp->msg_controllen < sizeof(struct cmsghdr)
638 #ifdef COMPAT_OLDSOCK
639 		    && mp->msg_flags != MSG_COMPAT
640 #endif
641 		) {
642 			error = EINVAL;
643 			goto bad;
644 		}
645 		error = sockargs(&control, mp->msg_control,
646 		    mp->msg_controllen, MT_CONTROL);
647 		if (error)
648 			goto bad;
649 #ifdef COMPAT_OLDSOCK
650 		if (mp->msg_flags == MSG_COMPAT) {
651 			register struct cmsghdr *cm;
652 
653 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
654 			if (control == 0) {
655 				error = ENOBUFS;
656 				goto bad;
657 			} else {
658 				cm = mtod(control, struct cmsghdr *);
659 				cm->cmsg_len = control->m_len;
660 				cm->cmsg_level = SOL_SOCKET;
661 				cm->cmsg_type = SCM_RIGHTS;
662 			}
663 		}
664 #endif
665 	} else {
666 		control = 0;
667 	}
668 #ifdef KTRACE
669 	if (KTRPOINT(td, KTR_GENIO)) {
670 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
671 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
672 		bcopy(auio.uio_iov, ktriov, iovlen);
673 		ktruio = auio;
674 	}
675 #endif
676 	len = auio.uio_resid;
677 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
678 						     flags, td);
679 	if (error) {
680 		if (auio.uio_resid != len && (error == ERESTART ||
681 		    error == EINTR || error == EWOULDBLOCK))
682 			error = 0;
683 		/* Generation of SIGPIPE can be controlled per socket */
684 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
685 			PROC_LOCK(td->td_proc);
686 			psignal(td->td_proc, SIGPIPE);
687 			PROC_UNLOCK(td->td_proc);
688 		}
689 	}
690 	if (error == 0)
691 		td->td_retval[0] = len - auio.uio_resid;
692 #ifdef KTRACE
693 	if (ktriov != NULL) {
694 		if (error == 0) {
695 			ktruio.uio_iov = ktriov;
696 			ktruio.uio_resid = td->td_retval[0];
697 			ktrgenio(s, UIO_WRITE, &ktruio, error);
698 		}
699 		FREE(ktriov, M_TEMP);
700 	}
701 #endif
702 bad:
703 	fputsock(so);
704 	if (to)
705 		FREE(to, M_SONAME);
706 	return (error);
707 }
708 
709 /*
710  * MPSAFE
711  */
712 int
713 sendto(td, uap)
714 	struct thread *td;
715 	register struct sendto_args /* {
716 		int	s;
717 		caddr_t	buf;
718 		size_t	len;
719 		int	flags;
720 		caddr_t	to;
721 		int	tolen;
722 	} */ *uap;
723 {
724 	struct msghdr msg;
725 	struct iovec aiov;
726 	int error;
727 
728 	msg.msg_name = uap->to;
729 	msg.msg_namelen = uap->tolen;
730 	msg.msg_iov = &aiov;
731 	msg.msg_iovlen = 1;
732 	msg.msg_control = 0;
733 #ifdef COMPAT_OLDSOCK
734 	msg.msg_flags = 0;
735 #endif
736 	aiov.iov_base = uap->buf;
737 	aiov.iov_len = uap->len;
738 	mtx_lock(&Giant);
739 	error = sendit(td, uap->s, &msg, uap->flags);
740 	mtx_unlock(&Giant);
741 	return (error);
742 }
743 
744 #ifdef COMPAT_OLDSOCK
745 /*
746  * MPSAFE
747  */
748 int
749 osend(td, uap)
750 	struct thread *td;
751 	register struct osend_args /* {
752 		int	s;
753 		caddr_t	buf;
754 		int	len;
755 		int	flags;
756 	} */ *uap;
757 {
758 	struct msghdr msg;
759 	struct iovec aiov;
760 	int error;
761 
762 	msg.msg_name = 0;
763 	msg.msg_namelen = 0;
764 	msg.msg_iov = &aiov;
765 	msg.msg_iovlen = 1;
766 	aiov.iov_base = uap->buf;
767 	aiov.iov_len = uap->len;
768 	msg.msg_control = 0;
769 	msg.msg_flags = 0;
770 	mtx_lock(&Giant);
771 	error = sendit(td, uap->s, &msg, uap->flags);
772 	mtx_unlock(&Giant);
773 	return (error);
774 }
775 
776 /*
777  * MPSAFE
778  */
779 int
780 osendmsg(td, uap)
781 	struct thread *td;
782 	register struct osendmsg_args /* {
783 		int	s;
784 		caddr_t	msg;
785 		int	flags;
786 	} */ *uap;
787 {
788 	struct msghdr msg;
789 	struct iovec aiov[UIO_SMALLIOV], *iov;
790 	int error;
791 
792 	mtx_lock(&Giant);
793 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
794 	if (error)
795 		goto done2;
796 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
797 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
798 			error = EMSGSIZE;
799 			goto done2;
800 		}
801 		MALLOC(iov, struct iovec *,
802 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
803 		      M_WAITOK);
804 	} else {
805 		iov = aiov;
806 	}
807 	error = copyin(msg.msg_iov, iov,
808 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
809 	if (error)
810 		goto done;
811 	msg.msg_flags = MSG_COMPAT;
812 	msg.msg_iov = iov;
813 	error = sendit(td, uap->s, &msg, uap->flags);
814 done:
815 	if (iov != aiov)
816 		FREE(iov, M_IOV);
817 done2:
818 	mtx_unlock(&Giant);
819 	return (error);
820 }
821 #endif
822 
823 /*
824  * MPSAFE
825  */
826 int
827 sendmsg(td, uap)
828 	struct thread *td;
829 	register struct sendmsg_args /* {
830 		int	s;
831 		caddr_t	msg;
832 		int	flags;
833 	} */ *uap;
834 {
835 	struct msghdr msg;
836 	struct iovec aiov[UIO_SMALLIOV], *iov;
837 	int error;
838 
839 	mtx_lock(&Giant);
840 	error = copyin(uap->msg, &msg, sizeof (msg));
841 	if (error)
842 		goto done2;
843 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
844 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
845 			error = EMSGSIZE;
846 			goto done2;
847 		}
848 		MALLOC(iov, struct iovec *,
849 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
850 		       M_WAITOK);
851 	} else {
852 		iov = aiov;
853 	}
854 	if (msg.msg_iovlen &&
855 	    (error = copyin(msg.msg_iov, iov,
856 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
857 		goto done;
858 	msg.msg_iov = iov;
859 #ifdef COMPAT_OLDSOCK
860 	msg.msg_flags = 0;
861 #endif
862 	error = sendit(td, uap->s, &msg, uap->flags);
863 done:
864 	if (iov != aiov)
865 		FREE(iov, M_IOV);
866 done2:
867 	mtx_unlock(&Giant);
868 	return (error);
869 }
870 
871 static int
872 recvit(td, s, mp, namelenp)
873 	register struct thread *td;
874 	int s;
875 	register struct msghdr *mp;
876 	void *namelenp;
877 {
878 	struct uio auio;
879 	register struct iovec *iov;
880 	register int i;
881 	int len, error;
882 	struct mbuf *m, *control = 0;
883 	caddr_t ctlbuf;
884 	struct socket *so;
885 	struct sockaddr *fromsa = 0;
886 #ifdef KTRACE
887 	struct iovec *ktriov = NULL;
888 	struct uio ktruio;
889 	int iovlen;
890 #endif
891 
892 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
893 		return (error);
894 
895 #ifdef MAC
896 	error = mac_check_socket_receive(td->td_ucred, so);
897 	if (error) {
898 		fputsock(so);
899 		return (error);
900 	}
901 #endif
902 
903 	auio.uio_iov = mp->msg_iov;
904 	auio.uio_iovcnt = mp->msg_iovlen;
905 	auio.uio_segflg = UIO_USERSPACE;
906 	auio.uio_rw = UIO_READ;
907 	auio.uio_td = td;
908 	auio.uio_offset = 0;			/* XXX */
909 	auio.uio_resid = 0;
910 	iov = mp->msg_iov;
911 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
912 		if ((auio.uio_resid += iov->iov_len) < 0) {
913 			fputsock(so);
914 			return (EINVAL);
915 		}
916 	}
917 #ifdef KTRACE
918 	if (KTRPOINT(td, KTR_GENIO)) {
919 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
920 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
921 		bcopy(auio.uio_iov, ktriov, iovlen);
922 		ktruio = auio;
923 	}
924 #endif
925 	len = auio.uio_resid;
926 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
927 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
928 	    &mp->msg_flags);
929 	if (error) {
930 		if (auio.uio_resid != len && (error == ERESTART ||
931 		    error == EINTR || error == EWOULDBLOCK))
932 			error = 0;
933 	}
934 #ifdef KTRACE
935 	if (ktriov != NULL) {
936 		if (error == 0) {
937 			ktruio.uio_iov = ktriov;
938 			ktruio.uio_resid = len - auio.uio_resid;
939 			ktrgenio(s, UIO_READ, &ktruio, error);
940 		}
941 		FREE(ktriov, M_TEMP);
942 	}
943 #endif
944 	if (error)
945 		goto out;
946 	td->td_retval[0] = len - auio.uio_resid;
947 	if (mp->msg_name) {
948 		len = mp->msg_namelen;
949 		if (len <= 0 || fromsa == 0)
950 			len = 0;
951 		else {
952 #ifndef MIN
953 #define MIN(a,b) ((a)>(b)?(b):(a))
954 #endif
955 			/* save sa_len before it is destroyed by MSG_COMPAT */
956 			len = MIN(len, fromsa->sa_len);
957 #ifdef COMPAT_OLDSOCK
958 			if (mp->msg_flags & MSG_COMPAT)
959 				((struct osockaddr *)fromsa)->sa_family =
960 				    fromsa->sa_family;
961 #endif
962 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
963 			if (error)
964 				goto out;
965 		}
966 		mp->msg_namelen = len;
967 		if (namelenp &&
968 		    (error = copyout(&len, namelenp, sizeof (int)))) {
969 #ifdef COMPAT_OLDSOCK
970 			if (mp->msg_flags & MSG_COMPAT)
971 				error = 0;	/* old recvfrom didn't check */
972 			else
973 #endif
974 			goto out;
975 		}
976 	}
977 	if (mp->msg_control) {
978 #ifdef COMPAT_OLDSOCK
979 		/*
980 		 * We assume that old recvmsg calls won't receive access
981 		 * rights and other control info, esp. as control info
982 		 * is always optional and those options didn't exist in 4.3.
983 		 * If we receive rights, trim the cmsghdr; anything else
984 		 * is tossed.
985 		 */
986 		if (control && mp->msg_flags & MSG_COMPAT) {
987 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
988 			    SOL_SOCKET ||
989 			    mtod(control, struct cmsghdr *)->cmsg_type !=
990 			    SCM_RIGHTS) {
991 				mp->msg_controllen = 0;
992 				goto out;
993 			}
994 			control->m_len -= sizeof (struct cmsghdr);
995 			control->m_data += sizeof (struct cmsghdr);
996 		}
997 #endif
998 		len = mp->msg_controllen;
999 		m = control;
1000 		mp->msg_controllen = 0;
1001 		ctlbuf = mp->msg_control;
1002 
1003 		while (m && len > 0) {
1004 			unsigned int tocopy;
1005 
1006 			if (len >= m->m_len)
1007 				tocopy = m->m_len;
1008 			else {
1009 				mp->msg_flags |= MSG_CTRUNC;
1010 				tocopy = len;
1011 			}
1012 
1013 			if ((error = copyout(mtod(m, caddr_t),
1014 					ctlbuf, tocopy)) != 0)
1015 				goto out;
1016 
1017 			ctlbuf += tocopy;
1018 			len -= tocopy;
1019 			m = m->m_next;
1020 		}
1021 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1022 	}
1023 out:
1024 	fputsock(so);
1025 	if (fromsa)
1026 		FREE(fromsa, M_SONAME);
1027 	if (control)
1028 		m_freem(control);
1029 	return (error);
1030 }
1031 
1032 /*
1033  * MPSAFE
1034  */
1035 int
1036 recvfrom(td, uap)
1037 	struct thread *td;
1038 	register struct recvfrom_args /* {
1039 		int	s;
1040 		caddr_t	buf;
1041 		size_t	len;
1042 		int	flags;
1043 		caddr_t	from;
1044 		int	*fromlenaddr;
1045 	} */ *uap;
1046 {
1047 	struct msghdr msg;
1048 	struct iovec aiov;
1049 	int error;
1050 
1051 	mtx_lock(&Giant);
1052 	if (uap->fromlenaddr) {
1053 		error = copyin(uap->fromlenaddr,
1054 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1055 		if (error)
1056 			goto done2;
1057 	} else {
1058 		msg.msg_namelen = 0;
1059 	}
1060 	msg.msg_name = uap->from;
1061 	msg.msg_iov = &aiov;
1062 	msg.msg_iovlen = 1;
1063 	aiov.iov_base = uap->buf;
1064 	aiov.iov_len = uap->len;
1065 	msg.msg_control = 0;
1066 	msg.msg_flags = uap->flags;
1067 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1068 done2:
1069 	mtx_unlock(&Giant);
1070 	return(error);
1071 }
1072 
1073 #ifdef COMPAT_OLDSOCK
1074 /*
1075  * MPSAFE
1076  */
1077 int
1078 orecvfrom(td, uap)
1079 	struct thread *td;
1080 	struct recvfrom_args *uap;
1081 {
1082 
1083 	uap->flags |= MSG_COMPAT;
1084 	return (recvfrom(td, uap));
1085 }
1086 #endif
1087 
1088 
1089 #ifdef COMPAT_OLDSOCK
1090 /*
1091  * MPSAFE
1092  */
1093 int
1094 orecv(td, uap)
1095 	struct thread *td;
1096 	register struct orecv_args /* {
1097 		int	s;
1098 		caddr_t	buf;
1099 		int	len;
1100 		int	flags;
1101 	} */ *uap;
1102 {
1103 	struct msghdr msg;
1104 	struct iovec aiov;
1105 	int error;
1106 
1107 	mtx_lock(&Giant);
1108 	msg.msg_name = 0;
1109 	msg.msg_namelen = 0;
1110 	msg.msg_iov = &aiov;
1111 	msg.msg_iovlen = 1;
1112 	aiov.iov_base = uap->buf;
1113 	aiov.iov_len = uap->len;
1114 	msg.msg_control = 0;
1115 	msg.msg_flags = uap->flags;
1116 	error = recvit(td, uap->s, &msg, NULL);
1117 	mtx_unlock(&Giant);
1118 	return (error);
1119 }
1120 
1121 /*
1122  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1123  * overlays the new one, missing only the flags, and with the (old) access
1124  * rights where the control fields are now.
1125  *
1126  * MPSAFE
1127  */
1128 int
1129 orecvmsg(td, uap)
1130 	struct thread *td;
1131 	register struct orecvmsg_args /* {
1132 		int	s;
1133 		struct	omsghdr *msg;
1134 		int	flags;
1135 	} */ *uap;
1136 {
1137 	struct msghdr msg;
1138 	struct iovec aiov[UIO_SMALLIOV], *iov;
1139 	int error;
1140 
1141 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1142 	if (error)
1143 		return (error);
1144 
1145 	mtx_lock(&Giant);
1146 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1147 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1148 			error = EMSGSIZE;
1149 			goto done2;
1150 		}
1151 		MALLOC(iov, struct iovec *,
1152 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1153 		      M_WAITOK);
1154 	} else {
1155 		iov = aiov;
1156 	}
1157 	msg.msg_flags = uap->flags | MSG_COMPAT;
1158 	error = copyin(msg.msg_iov, iov,
1159 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1160 	if (error)
1161 		goto done;
1162 	msg.msg_iov = iov;
1163 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1164 
1165 	if (msg.msg_controllen && error == 0)
1166 		error = copyout(&msg.msg_controllen,
1167 		    &uap->msg->msg_accrightslen, sizeof (int));
1168 done:
1169 	if (iov != aiov)
1170 		FREE(iov, M_IOV);
1171 done2:
1172 	mtx_unlock(&Giant);
1173 	return (error);
1174 }
1175 #endif
1176 
1177 /*
1178  * MPSAFE
1179  */
1180 int
1181 recvmsg(td, uap)
1182 	struct thread *td;
1183 	register struct recvmsg_args /* {
1184 		int	s;
1185 		struct	msghdr *msg;
1186 		int	flags;
1187 	} */ *uap;
1188 {
1189 	struct msghdr msg;
1190 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1191 	register int error;
1192 
1193 	mtx_lock(&Giant);
1194 	error = copyin(uap->msg, &msg, sizeof (msg));
1195 	if (error)
1196 		goto done2;
1197 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1198 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1199 			error = EMSGSIZE;
1200 			goto done2;
1201 		}
1202 		MALLOC(iov, struct iovec *,
1203 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1204 		       M_WAITOK);
1205 	} else {
1206 		iov = aiov;
1207 	}
1208 #ifdef COMPAT_OLDSOCK
1209 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1210 #else
1211 	msg.msg_flags = uap->flags;
1212 #endif
1213 	uiov = msg.msg_iov;
1214 	msg.msg_iov = iov;
1215 	error = copyin(uiov, iov,
1216 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1217 	if (error)
1218 		goto done;
1219 	error = recvit(td, uap->s, &msg, NULL);
1220 	if (!error) {
1221 		msg.msg_iov = uiov;
1222 		error = copyout(&msg, uap->msg, sizeof(msg));
1223 	}
1224 done:
1225 	if (iov != aiov)
1226 		FREE(iov, M_IOV);
1227 done2:
1228 	mtx_unlock(&Giant);
1229 	return (error);
1230 }
1231 
1232 /*
1233  * MPSAFE
1234  */
1235 /* ARGSUSED */
1236 int
1237 shutdown(td, uap)
1238 	struct thread *td;
1239 	register struct shutdown_args /* {
1240 		int	s;
1241 		int	how;
1242 	} */ *uap;
1243 {
1244 	struct socket *so;
1245 	int error;
1246 
1247 	mtx_lock(&Giant);
1248 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1249 		error = soshutdown(so, uap->how);
1250 		fputsock(so);
1251 	}
1252 	mtx_unlock(&Giant);
1253 	return(error);
1254 }
1255 
1256 /*
1257  * MPSAFE
1258  */
1259 /* ARGSUSED */
1260 int
1261 setsockopt(td, uap)
1262 	struct thread *td;
1263 	register struct setsockopt_args /* {
1264 		int	s;
1265 		int	level;
1266 		int	name;
1267 		caddr_t	val;
1268 		int	valsize;
1269 	} */ *uap;
1270 {
1271 	struct socket *so;
1272 	struct sockopt sopt;
1273 	int error;
1274 
1275 	if (uap->val == 0 && uap->valsize != 0)
1276 		return (EFAULT);
1277 	if (uap->valsize < 0)
1278 		return (EINVAL);
1279 
1280 	mtx_lock(&Giant);
1281 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1282 		sopt.sopt_dir = SOPT_SET;
1283 		sopt.sopt_level = uap->level;
1284 		sopt.sopt_name = uap->name;
1285 		sopt.sopt_val = uap->val;
1286 		sopt.sopt_valsize = uap->valsize;
1287 		sopt.sopt_td = td;
1288 		error = sosetopt(so, &sopt);
1289 		fputsock(so);
1290 	}
1291 	mtx_unlock(&Giant);
1292 	return(error);
1293 }
1294 
1295 /*
1296  * MPSAFE
1297  */
1298 /* ARGSUSED */
1299 int
1300 getsockopt(td, uap)
1301 	struct thread *td;
1302 	register struct getsockopt_args /* {
1303 		int	s;
1304 		int	level;
1305 		int	name;
1306 		caddr_t	val;
1307 		int	*avalsize;
1308 	} */ *uap;
1309 {
1310 	int	valsize, error;
1311 	struct  socket *so;
1312 	struct	sockopt sopt;
1313 
1314 	mtx_lock(&Giant);
1315 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1316 		goto done2;
1317 	if (uap->val) {
1318 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1319 		if (error)
1320 			goto done1;
1321 		if (valsize < 0) {
1322 			error = EINVAL;
1323 			goto done1;
1324 		}
1325 	} else {
1326 		valsize = 0;
1327 	}
1328 
1329 	sopt.sopt_dir = SOPT_GET;
1330 	sopt.sopt_level = uap->level;
1331 	sopt.sopt_name = uap->name;
1332 	sopt.sopt_val = uap->val;
1333 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1334 	sopt.sopt_td = td;
1335 
1336 	error = sogetopt(so, &sopt);
1337 	if (error == 0) {
1338 		valsize = sopt.sopt_valsize;
1339 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1340 	}
1341 done1:
1342 	fputsock(so);
1343 done2:
1344 	mtx_unlock(&Giant);
1345 	return (error);
1346 }
1347 
1348 /*
1349  * getsockname1() - Get socket name.
1350  *
1351  * MPSAFE
1352  */
1353 /* ARGSUSED */
1354 static int
1355 getsockname1(td, uap, compat)
1356 	struct thread *td;
1357 	register struct getsockname_args /* {
1358 		int	fdes;
1359 		caddr_t	asa;
1360 		int	*alen;
1361 	} */ *uap;
1362 	int compat;
1363 {
1364 	struct socket *so;
1365 	struct sockaddr *sa;
1366 	int len, error;
1367 
1368 	mtx_lock(&Giant);
1369 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1370 		goto done2;
1371 	error = copyin(uap->alen, &len, sizeof (len));
1372 	if (error)
1373 		goto done1;
1374 	if (len < 0) {
1375 		error = EINVAL;
1376 		goto done1;
1377 	}
1378 	sa = 0;
1379 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1380 	if (error)
1381 		goto bad;
1382 	if (sa == 0) {
1383 		len = 0;
1384 		goto gotnothing;
1385 	}
1386 
1387 	len = MIN(len, sa->sa_len);
1388 #ifdef COMPAT_OLDSOCK
1389 	if (compat)
1390 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1391 #endif
1392 	error = copyout(sa, uap->asa, (u_int)len);
1393 	if (error == 0)
1394 gotnothing:
1395 		error = copyout(&len, uap->alen, sizeof (len));
1396 bad:
1397 	if (sa)
1398 		FREE(sa, M_SONAME);
1399 done1:
1400 	fputsock(so);
1401 done2:
1402 	mtx_unlock(&Giant);
1403 	return (error);
1404 }
1405 
1406 /*
1407  * MPSAFE
1408  */
1409 int
1410 getsockname(td, uap)
1411 	struct thread *td;
1412 	struct getsockname_args *uap;
1413 {
1414 
1415 	return (getsockname1(td, uap, 0));
1416 }
1417 
1418 #ifdef COMPAT_OLDSOCK
1419 /*
1420  * MPSAFE
1421  */
1422 int
1423 ogetsockname(td, uap)
1424 	struct thread *td;
1425 	struct getsockname_args *uap;
1426 {
1427 
1428 	return (getsockname1(td, uap, 1));
1429 }
1430 #endif /* COMPAT_OLDSOCK */
1431 
1432 /*
1433  * getpeername1() - Get name of peer for connected socket.
1434  *
1435  * MPSAFE
1436  */
1437 /* ARGSUSED */
1438 static int
1439 getpeername1(td, uap, compat)
1440 	struct thread *td;
1441 	register struct getpeername_args /* {
1442 		int	fdes;
1443 		caddr_t	asa;
1444 		int	*alen;
1445 	} */ *uap;
1446 	int compat;
1447 {
1448 	struct socket *so;
1449 	struct sockaddr *sa;
1450 	int len, error;
1451 
1452 	mtx_lock(&Giant);
1453 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1454 		goto done2;
1455 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1456 		error = ENOTCONN;
1457 		goto done1;
1458 	}
1459 	error = copyin(uap->alen, &len, sizeof (len));
1460 	if (error)
1461 		goto done1;
1462 	if (len < 0) {
1463 		error = EINVAL;
1464 		goto done1;
1465 	}
1466 	sa = 0;
1467 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1468 	if (error)
1469 		goto bad;
1470 	if (sa == 0) {
1471 		len = 0;
1472 		goto gotnothing;
1473 	}
1474 	len = MIN(len, sa->sa_len);
1475 #ifdef COMPAT_OLDSOCK
1476 	if (compat)
1477 		((struct osockaddr *)sa)->sa_family =
1478 		    sa->sa_family;
1479 #endif
1480 	error = copyout(sa, uap->asa, (u_int)len);
1481 	if (error)
1482 		goto bad;
1483 gotnothing:
1484 	error = copyout(&len, uap->alen, sizeof (len));
1485 bad:
1486 	if (sa)
1487 		FREE(sa, M_SONAME);
1488 done1:
1489 	fputsock(so);
1490 done2:
1491 	mtx_unlock(&Giant);
1492 	return (error);
1493 }
1494 
1495 /*
1496  * MPSAFE
1497  */
1498 int
1499 getpeername(td, uap)
1500 	struct thread *td;
1501 	struct getpeername_args *uap;
1502 {
1503 
1504 	return (getpeername1(td, uap, 0));
1505 }
1506 
1507 #ifdef COMPAT_OLDSOCK
1508 /*
1509  * MPSAFE
1510  */
1511 int
1512 ogetpeername(td, uap)
1513 	struct thread *td;
1514 	struct ogetpeername_args *uap;
1515 {
1516 
1517 	/* XXX uap should have type `getpeername_args *' to begin with. */
1518 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1519 }
1520 #endif /* COMPAT_OLDSOCK */
1521 
1522 int
1523 sockargs(mp, buf, buflen, type)
1524 	struct mbuf **mp;
1525 	caddr_t buf;
1526 	int buflen, type;
1527 {
1528 	register struct sockaddr *sa;
1529 	register struct mbuf *m;
1530 	int error;
1531 
1532 	if ((u_int)buflen > MLEN) {
1533 #ifdef COMPAT_OLDSOCK
1534 		if (type == MT_SONAME && (u_int)buflen <= 112)
1535 			buflen = MLEN;		/* unix domain compat. hack */
1536 		else
1537 #endif
1538 		return (EINVAL);
1539 	}
1540 	m = m_get(M_TRYWAIT, type);
1541 	if (m == NULL)
1542 		return (ENOBUFS);
1543 	m->m_len = buflen;
1544 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1545 	if (error)
1546 		(void) m_free(m);
1547 	else {
1548 		*mp = m;
1549 		if (type == MT_SONAME) {
1550 			sa = mtod(m, struct sockaddr *);
1551 
1552 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1553 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1554 				sa->sa_family = sa->sa_len;
1555 #endif
1556 			sa->sa_len = buflen;
1557 		}
1558 	}
1559 	return (error);
1560 }
1561 
1562 int
1563 getsockaddr(namp, uaddr, len)
1564 	struct sockaddr **namp;
1565 	caddr_t uaddr;
1566 	size_t len;
1567 {
1568 	struct sockaddr *sa;
1569 	int error;
1570 
1571 	if (len > SOCK_MAXADDRLEN)
1572 		return ENAMETOOLONG;
1573 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1574 	error = copyin(uaddr, sa, len);
1575 	if (error) {
1576 		FREE(sa, M_SONAME);
1577 	} else {
1578 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1579 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1580 			sa->sa_family = sa->sa_len;
1581 #endif
1582 		sa->sa_len = len;
1583 		*namp = sa;
1584 	}
1585 	return error;
1586 }
1587 
1588 /*
1589  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1590  */
1591 static void
1592 sf_buf_init(void *arg)
1593 {
1594 	int i;
1595 
1596 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1597 	mtx_lock(&sf_freelist.sf_lock);
1598 	SLIST_INIT(&sf_freelist.sf_head);
1599 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1600 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1601 	    M_NOWAIT | M_ZERO);
1602 	for (i = 0; i < nsfbufs; i++) {
1603 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1604 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1605 	}
1606 	sf_buf_alloc_want = 0;
1607 	mtx_unlock(&sf_freelist.sf_lock);
1608 }
1609 
1610 /*
1611  * Get an sf_buf from the freelist. Will block if none are available.
1612  */
1613 struct sf_buf *
1614 sf_buf_alloc()
1615 {
1616 	struct sf_buf *sf;
1617 	int error;
1618 
1619 	mtx_lock(&sf_freelist.sf_lock);
1620 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1621 		sf_buf_alloc_want++;
1622 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1623 		    "sfbufa", 0);
1624 		sf_buf_alloc_want--;
1625 
1626 		/*
1627 		 * If we got a signal, don't risk going back to sleep.
1628 		 */
1629 		if (error)
1630 			break;
1631 	}
1632 	if (sf != NULL)
1633 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1634 	mtx_unlock(&sf_freelist.sf_lock);
1635 	return (sf);
1636 }
1637 
1638 #define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1639 
1640 /*
1641  * Detatch mapped page and release resources back to the system.
1642  */
1643 void
1644 sf_buf_free(void *addr, void *args)
1645 {
1646 	struct sf_buf *sf;
1647 	struct vm_page *m;
1648 
1649 	GIANT_REQUIRED;
1650 
1651 	sf = dtosf(addr);
1652 	pmap_qremove((vm_offset_t)addr, 1);
1653 	m = sf->m;
1654 	vm_page_lock_queues();
1655 	vm_page_unwire(m, 0);
1656 	/*
1657 	 * Check for the object going away on us. This can
1658 	 * happen since we don't hold a reference to it.
1659 	 * If so, we're responsible for freeing the page.
1660 	 */
1661 	if (m->wire_count == 0 && m->object == NULL)
1662 		vm_page_free(m);
1663 	vm_page_unlock_queues();
1664 	sf->m = NULL;
1665 	mtx_lock(&sf_freelist.sf_lock);
1666 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1667 	if (sf_buf_alloc_want > 0)
1668 		wakeup_one(&sf_freelist);
1669 	mtx_unlock(&sf_freelist.sf_lock);
1670 }
1671 
1672 /*
1673  * sendfile(2)
1674  *
1675  * MPSAFE
1676  *
1677  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1678  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1679  *
1680  * Send a file specified by 'fd' and starting at 'offset' to a socket
1681  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1682  * nbytes == 0. Optionally add a header and/or trailer to the socket
1683  * output. If specified, write the total number of bytes sent into *sbytes.
1684  *
1685  */
1686 int
1687 sendfile(struct thread *td, struct sendfile_args *uap)
1688 {
1689 
1690 	return (do_sendfile(td, uap, 0));
1691 }
1692 
1693 #ifdef COMPAT_FREEBSD4
1694 int
1695 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1696 {
1697 	struct sendfile_args args;
1698 
1699 	args.fd = uap->fd;
1700 	args.s = uap->s;
1701 	args.offset = uap->offset;
1702 	args.nbytes = uap->nbytes;
1703 	args.hdtr = uap->hdtr;
1704 	args.sbytes = uap->sbytes;
1705 	args.flags = uap->flags;
1706 
1707 	return (do_sendfile(td, &args, 1));
1708 }
1709 #endif /* COMPAT_FREEBSD4 */
1710 
1711 static int
1712 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1713 {
1714 	struct vnode *vp;
1715 	struct vm_object *obj;
1716 	struct socket *so = NULL;
1717 	struct mbuf *m;
1718 	struct sf_buf *sf;
1719 	struct vm_page *pg;
1720 	struct writev_args nuap;
1721 	struct sf_hdtr hdtr;
1722 	off_t off, xfsize, hdtr_size, sbytes = 0;
1723 	int error, s;
1724 
1725 	mtx_lock(&Giant);
1726 
1727 	hdtr_size = 0;
1728 
1729 	/*
1730 	 * The descriptor must be a regular file and have a backing VM object.
1731 	 */
1732 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1733 		goto done;
1734 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1735 		error = EINVAL;
1736 		goto done;
1737 	}
1738 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1739 		goto done;
1740 	if (so->so_type != SOCK_STREAM) {
1741 		error = EINVAL;
1742 		goto done;
1743 	}
1744 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1745 		error = ENOTCONN;
1746 		goto done;
1747 	}
1748 	if (uap->offset < 0) {
1749 		error = EINVAL;
1750 		goto done;
1751 	}
1752 
1753 #ifdef MAC
1754 	error = mac_check_socket_send(td->td_ucred, so);
1755 	if (error)
1756 		goto done;
1757 #endif
1758 
1759 	/*
1760 	 * If specified, get the pointer to the sf_hdtr struct for
1761 	 * any headers/trailers.
1762 	 */
1763 	if (uap->hdtr != NULL) {
1764 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1765 		if (error)
1766 			goto done;
1767 		/*
1768 		 * Send any headers. Wimp out and use writev(2).
1769 		 */
1770 		if (hdtr.headers != NULL) {
1771 			nuap.fd = uap->s;
1772 			nuap.iovp = hdtr.headers;
1773 			nuap.iovcnt = hdtr.hdr_cnt;
1774 			error = writev(td, &nuap);
1775 			if (error)
1776 				goto done;
1777 			if (compat)
1778 				sbytes += td->td_retval[0];
1779 			else
1780 				hdtr_size += td->td_retval[0];
1781 		}
1782 	}
1783 
1784 	/*
1785 	 * Protect against multiple writers to the socket.
1786 	 */
1787 	(void) sblock(&so->so_snd, M_WAITOK);
1788 
1789 	/*
1790 	 * Loop through the pages in the file, starting with the requested
1791 	 * offset. Get a file page (do I/O if necessary), map the file page
1792 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1793 	 * it on the socket.
1794 	 */
1795 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1796 		vm_pindex_t pindex;
1797 		vm_offset_t pgoff;
1798 
1799 		pindex = OFF_TO_IDX(off);
1800 retry_lookup:
1801 		/*
1802 		 * Calculate the amount to transfer. Not to exceed a page,
1803 		 * the EOF, or the passed in nbytes.
1804 		 */
1805 		xfsize = obj->un_pager.vnp.vnp_size - off;
1806 		if (xfsize > PAGE_SIZE)
1807 			xfsize = PAGE_SIZE;
1808 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1809 		if (PAGE_SIZE - pgoff < xfsize)
1810 			xfsize = PAGE_SIZE - pgoff;
1811 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1812 			xfsize = uap->nbytes - sbytes;
1813 		if (xfsize <= 0)
1814 			break;
1815 		/*
1816 		 * Optimize the non-blocking case by looking at the socket space
1817 		 * before going to the extra work of constituting the sf_buf.
1818 		 */
1819 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1820 			if (so->so_state & SS_CANTSENDMORE)
1821 				error = EPIPE;
1822 			else
1823 				error = EAGAIN;
1824 			sbunlock(&so->so_snd);
1825 			goto done;
1826 		}
1827 		/*
1828 		 * Attempt to look up the page.
1829 		 *
1830 		 *	Allocate if not found
1831 		 *
1832 		 *	Wait and loop if busy.
1833 		 */
1834 		pg = vm_page_lookup(obj, pindex);
1835 
1836 		if (pg == NULL) {
1837 			pg = vm_page_alloc(obj, pindex,
1838 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1839 			if (pg == NULL) {
1840 				VM_WAIT;
1841 				goto retry_lookup;
1842 			}
1843 			vm_page_lock_queues();
1844 			vm_page_wakeup(pg);
1845 		} else {
1846 			vm_page_lock_queues();
1847 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1848 				goto retry_lookup;
1849 			/*
1850 		 	 * Wire the page so it does not get ripped out from
1851 			 * under us.
1852 			 */
1853 			vm_page_wire(pg);
1854 		}
1855 
1856 		/*
1857 		 * If page is not valid for what we need, initiate I/O
1858 		 */
1859 
1860 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1861 			int bsize, resid;
1862 
1863 			/*
1864 			 * Ensure that our page is still around when the I/O
1865 			 * completes.
1866 			 */
1867 			vm_page_io_start(pg);
1868 			vm_page_unlock_queues();
1869 
1870 			/*
1871 			 * Get the page from backing store.
1872 			 */
1873 			bsize = vp->v_mount->mnt_stat.f_iosize;
1874 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1875 			/*
1876 			 * XXXMAC: Because we don't have fp->f_cred here,
1877 			 * we pass in NOCRED.  This is probably wrong, but
1878 			 * is consistent with our original implementation.
1879 			 */
1880 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1881 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1882 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1883 			    td->td_ucred, NOCRED, &resid, td);
1884 			VOP_UNLOCK(vp, 0, td);
1885 			vm_page_lock_queues();
1886 			vm_page_flag_clear(pg, PG_ZERO);
1887 			vm_page_io_finish(pg);
1888 			if (error) {
1889 				vm_page_unwire(pg, 0);
1890 				/*
1891 				 * See if anyone else might know about this page.
1892 				 * If not and it is not valid, then free it.
1893 				 */
1894 				if (pg->wire_count == 0 && pg->valid == 0 &&
1895 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1896 				    pg->hold_count == 0) {
1897 					vm_page_busy(pg);
1898 					vm_page_free(pg);
1899 				}
1900 				vm_page_unlock_queues();
1901 				sbunlock(&so->so_snd);
1902 				goto done;
1903 			}
1904 		}
1905 		vm_page_unlock_queues();
1906 
1907 		/*
1908 		 * Get a sendfile buf. We usually wait as long as necessary,
1909 		 * but this wait can be interrupted.
1910 		 */
1911 		if ((sf = sf_buf_alloc()) == NULL) {
1912 			vm_page_lock_queues();
1913 			vm_page_unwire(pg, 0);
1914 			if (pg->wire_count == 0 && pg->object == NULL)
1915 				vm_page_free(pg);
1916 			vm_page_unlock_queues();
1917 			sbunlock(&so->so_snd);
1918 			error = EINTR;
1919 			goto done;
1920 		}
1921 
1922 		/*
1923 		 * Allocate a kernel virtual page and insert the physical page
1924 		 * into it.
1925 		 */
1926 		sf->m = pg;
1927 		pmap_qenter(sf->kva, &pg, 1);
1928 		/*
1929 		 * Get an mbuf header and set it up as having external storage.
1930 		 */
1931 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1932 		if (m == NULL) {
1933 			error = ENOBUFS;
1934 			sf_buf_free((void *)sf->kva, NULL);
1935 			sbunlock(&so->so_snd);
1936 			goto done;
1937 		}
1938 		/*
1939 		 * Setup external storage for mbuf.
1940 		 */
1941 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1942 		    EXT_SFBUF);
1943 		m->m_data = (char *) sf->kva + pgoff;
1944 		m->m_pkthdr.len = m->m_len = xfsize;
1945 		/*
1946 		 * Add the buffer to the socket buffer chain.
1947 		 */
1948 		s = splnet();
1949 retry_space:
1950 		/*
1951 		 * Make sure that the socket is still able to take more data.
1952 		 * CANTSENDMORE being true usually means that the connection
1953 		 * was closed. so_error is true when an error was sensed after
1954 		 * a previous send.
1955 		 * The state is checked after the page mapping and buffer
1956 		 * allocation above since those operations may block and make
1957 		 * any socket checks stale. From this point forward, nothing
1958 		 * blocks before the pru_send (or more accurately, any blocking
1959 		 * results in a loop back to here to re-check).
1960 		 */
1961 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1962 			if (so->so_state & SS_CANTSENDMORE) {
1963 				error = EPIPE;
1964 			} else {
1965 				error = so->so_error;
1966 				so->so_error = 0;
1967 			}
1968 			m_freem(m);
1969 			sbunlock(&so->so_snd);
1970 			splx(s);
1971 			goto done;
1972 		}
1973 		/*
1974 		 * Wait for socket space to become available. We do this just
1975 		 * after checking the connection state above in order to avoid
1976 		 * a race condition with sbwait().
1977 		 */
1978 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1979 			if (so->so_state & SS_NBIO) {
1980 				m_freem(m);
1981 				sbunlock(&so->so_snd);
1982 				splx(s);
1983 				error = EAGAIN;
1984 				goto done;
1985 			}
1986 			error = sbwait(&so->so_snd);
1987 			/*
1988 			 * An error from sbwait usually indicates that we've
1989 			 * been interrupted by a signal. If we've sent anything
1990 			 * then return bytes sent, otherwise return the error.
1991 			 */
1992 			if (error) {
1993 				m_freem(m);
1994 				sbunlock(&so->so_snd);
1995 				splx(s);
1996 				goto done;
1997 			}
1998 			goto retry_space;
1999 		}
2000 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2001 		splx(s);
2002 		if (error) {
2003 			sbunlock(&so->so_snd);
2004 			goto done;
2005 		}
2006 	}
2007 	sbunlock(&so->so_snd);
2008 
2009 	/*
2010 	 * Send trailers. Wimp out and use writev(2).
2011 	 */
2012 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2013 			nuap.fd = uap->s;
2014 			nuap.iovp = hdtr.trailers;
2015 			nuap.iovcnt = hdtr.trl_cnt;
2016 			error = writev(td, &nuap);
2017 			if (error)
2018 				goto done;
2019 			if (compat)
2020 				sbytes += td->td_retval[0];
2021 			else
2022 				hdtr_size += td->td_retval[0];
2023 	}
2024 
2025 done:
2026 	/*
2027 	 * If there was no error we have to clear td->td_retval[0]
2028 	 * because it may have been set by writev.
2029 	 */
2030 	if (error == 0) {
2031 		td->td_retval[0] = 0;
2032 	}
2033 	if (uap->sbytes != NULL) {
2034 		if (!compat)
2035 			sbytes += hdtr_size;
2036 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2037 	}
2038 	if (vp)
2039 		vrele(vp);
2040 	if (so)
2041 		fputsock(so);
2042 	mtx_unlock(&Giant);
2043 	return (error);
2044 }
2045