xref: /freebsd/sys/kern/uipc_syscalls.c (revision eacee0ff7ec955b32e09515246bd97b6edcd2b0f)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/lock.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/signalvar.h>
62 #include <sys/uio.h>
63 #include <sys/vnode.h>
64 #ifdef KTRACE
65 #include <sys/ktrace.h>
66 #endif
67 
68 #include <vm/vm.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74 
75 static void sf_buf_init(void *arg);
76 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
77 static struct sf_buf *sf_buf_alloc(void);
78 static void sf_buf_free(caddr_t addr, void *args);
79 
80 static int sendit __P((struct thread *td, int s, struct msghdr *mp, int flags));
81 static int recvit __P((struct thread *td, int s, struct msghdr *mp,
82 		       caddr_t namelenp));
83 
84 static int accept1 __P((struct thread *td, struct accept_args *uap, int compat));
85 static int getsockname1 __P((struct thread *td, struct getsockname_args *uap,
86 			     int compat));
87 static int getpeername1 __P((struct thread *td, struct getpeername_args *uap,
88 			     int compat));
89 
90 /*
91  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92  * sf_freelist head with the sf_lock mutex.
93  */
94 static struct {
95 	SLIST_HEAD(, sf_buf) sf_head;
96 	struct mtx sf_lock;
97 } sf_freelist;
98 
99 static vm_offset_t sf_base;
100 static struct sf_buf *sf_bufs;
101 static u_int sf_buf_alloc_want;
102 
103 /*
104  * System call interface to the socket abstraction.
105  */
106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107 #define COMPAT_OLDSOCK
108 #endif
109 
110 extern	struct fileops socketops;
111 
112 /*
113  * MPSAFE
114  */
115 int
116 socket(td, uap)
117 	struct thread *td;
118 	register struct socket_args /* {
119 		int	domain;
120 		int	type;
121 		int	protocol;
122 	} */ *uap;
123 {
124 	struct filedesc *fdp;
125 	struct socket *so;
126 	struct file *fp;
127 	int fd, error;
128 
129 	mtx_lock(&Giant);
130 	fdp = td->td_proc->p_fd;
131 	error = falloc(td, &fp, &fd);
132 	if (error)
133 		goto done2;
134 	fhold(fp);
135 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
136 	    td->td_proc->p_ucred, td);
137 	FILEDESC_LOCK(fdp);
138 	if (error) {
139 		if (fdp->fd_ofiles[fd] == fp) {
140 			fdp->fd_ofiles[fd] = NULL;
141 			FILEDESC_UNLOCK(fdp);
142 			fdrop(fp, td);
143 		} else
144 			FILEDESC_UNLOCK(fdp);
145 	} else {
146 		fp->f_data = (caddr_t)so;	/* already has ref count */
147 		fp->f_flag = FREAD|FWRITE;
148 		fp->f_ops = &socketops;
149 		fp->f_type = DTYPE_SOCKET;
150 		FILEDESC_UNLOCK(fdp);
151 		td->td_retval[0] = fd;
152 	}
153 	fdrop(fp, td);
154 done2:
155 	mtx_unlock(&Giant);
156 	return (error);
157 }
158 
159 /*
160  * MPSAFE
161  */
162 /* ARGSUSED */
163 int
164 bind(td, uap)
165 	struct thread *td;
166 	register struct bind_args /* {
167 		int	s;
168 		caddr_t	name;
169 		int	namelen;
170 	} */ *uap;
171 {
172 	struct socket *so;
173 	struct sockaddr *sa;
174 	int error;
175 
176 	mtx_lock(&Giant);
177 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
178 		goto done2;
179 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
180 		goto done1;
181 	error = sobind(so, sa, td);
182 	FREE(sa, M_SONAME);
183 done1:
184 	fputsock(so);
185 done2:
186 	mtx_unlock(&Giant);
187 	return (error);
188 }
189 
190 /*
191  * MPSAFE
192  */
193 /* ARGSUSED */
194 int
195 listen(td, uap)
196 	struct thread *td;
197 	register struct listen_args /* {
198 		int	s;
199 		int	backlog;
200 	} */ *uap;
201 {
202 	struct socket *so;
203 	int error;
204 
205 	mtx_lock(&Giant);
206 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
207 		error = solisten(so, uap->backlog, td);
208 		fputsock(so);
209 	}
210 	mtx_unlock(&Giant);
211 	return(error);
212 }
213 
214 /*
215  * accept1()
216  * MPSAFE
217  */
218 static int
219 accept1(td, uap, compat)
220 	struct thread *td;
221 	register struct accept_args /* {
222 		int	s;
223 		caddr_t	name;
224 		int	*anamelen;
225 	} */ *uap;
226 	int compat;
227 {
228 	struct filedesc *fdp;
229 	struct file *nfp = NULL;
230 	struct sockaddr *sa;
231 	int namelen, error, s;
232 	struct socket *head, *so;
233 	int fd;
234 	u_int fflag;
235 
236 	mtx_lock(&Giant);
237 	fdp = td->td_proc->p_fd;
238 	if (uap->name) {
239 		error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
240 			sizeof (namelen));
241 		if(error)
242 			goto done2;
243 	}
244 	error = fgetsock(td, uap->s, &head, &fflag);
245 	if (error)
246 		goto done2;
247 	s = splnet();
248 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
249 		splx(s);
250 		error = EINVAL;
251 		goto done;
252 	}
253 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
254 		splx(s);
255 		error = EWOULDBLOCK;
256 		goto done;
257 	}
258 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
259 		if (head->so_state & SS_CANTRCVMORE) {
260 			head->so_error = ECONNABORTED;
261 			break;
262 		}
263 		error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
264 		    "accept", 0);
265 		if (error) {
266 			splx(s);
267 			goto done;
268 		}
269 	}
270 	if (head->so_error) {
271 		error = head->so_error;
272 		head->so_error = 0;
273 		splx(s);
274 		goto done;
275 	}
276 
277 	/*
278 	 * At this point we know that there is at least one connection
279 	 * ready to be accepted. Remove it from the queue prior to
280 	 * allocating the file descriptor for it since falloc() may
281 	 * block allowing another process to accept the connection
282 	 * instead.
283 	 */
284 	so = TAILQ_FIRST(&head->so_comp);
285 	TAILQ_REMOVE(&head->so_comp, so, so_list);
286 	head->so_qlen--;
287 
288 	error = falloc(td, &nfp, &fd);
289 	if (error) {
290 		/*
291 		 * Probably ran out of file descriptors. Put the
292 		 * unaccepted connection back onto the queue and
293 		 * do another wakeup so some other process might
294 		 * have a chance at it.
295 		 */
296 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
297 		head->so_qlen++;
298 		wakeup_one(&head->so_timeo);
299 		splx(s);
300 		goto done;
301 	}
302 	fhold(nfp);
303 	td->td_retval[0] = fd;
304 
305 	/* connection has been removed from the listen queue */
306 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
307 
308 	so->so_state &= ~SS_COMP;
309 	so->so_head = NULL;
310 	if (head->so_sigio != NULL)
311 		fsetown(fgetown(head->so_sigio), &so->so_sigio);
312 
313 	FILE_LOCK(nfp);
314 	soref(so);			/* file descriptor reference */
315 	nfp->f_data = (caddr_t)so;	/* nfp has ref count from falloc */
316 	nfp->f_flag = fflag;
317 	nfp->f_ops = &socketops;
318 	nfp->f_type = DTYPE_SOCKET;
319 	FILE_UNLOCK(nfp);
320 	sa = 0;
321 	error = soaccept(so, &sa);
322 	if (error) {
323 		/*
324 		 * return a namelen of zero for older code which might
325 	 	 * ignore the return value from accept.
326 		 */
327 		if (uap->name != NULL) {
328 			namelen = 0;
329 			(void) copyout((caddr_t)&namelen,
330 			    (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
331 		}
332 		goto noconnection;
333 	}
334 	if (sa == NULL) {
335 		namelen = 0;
336 		if (uap->name)
337 			goto gotnoname;
338 		splx(s);
339 		error = 0;
340 		goto done;
341 	}
342 	if (uap->name) {
343 		/* check sa_len before it is destroyed */
344 		if (namelen > sa->sa_len)
345 			namelen = sa->sa_len;
346 #ifdef COMPAT_OLDSOCK
347 		if (compat)
348 			((struct osockaddr *)sa)->sa_family =
349 			    sa->sa_family;
350 #endif
351 		error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
352 		if (!error)
353 gotnoname:
354 			error = copyout((caddr_t)&namelen,
355 			    (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
356 	}
357 noconnection:
358 	if (sa)
359 		FREE(sa, M_SONAME);
360 
361 	/*
362 	 * close the new descriptor, assuming someone hasn't ripped it
363 	 * out from under us.
364 	 */
365 	if (error) {
366 		FILEDESC_LOCK(fdp);
367 		if (fdp->fd_ofiles[fd] == nfp) {
368 			fdp->fd_ofiles[fd] = NULL;
369 			FILEDESC_UNLOCK(fdp);
370 			fdrop(nfp, td);
371 		} else {
372 			FILEDESC_UNLOCK(fdp);
373 		}
374 	}
375 	splx(s);
376 
377 	/*
378 	 * Release explicitly held references before returning.
379 	 */
380 done:
381 	if (nfp != NULL)
382 		fdrop(nfp, td);
383 	fputsock(head);
384 done2:
385 	mtx_unlock(&Giant);
386 	return (error);
387 }
388 
389 /*
390  * MPSAFE (accept1() is MPSAFE)
391  */
392 int
393 accept(td, uap)
394 	struct thread *td;
395 	struct accept_args *uap;
396 {
397 
398 	return (accept1(td, uap, 0));
399 }
400 
401 #ifdef COMPAT_OLDSOCK
402 /*
403  * MPSAFE (accept1() is MPSAFE)
404  */
405 int
406 oaccept(td, uap)
407 	struct thread *td;
408 	struct accept_args *uap;
409 {
410 
411 	return (accept1(td, uap, 1));
412 }
413 #endif /* COMPAT_OLDSOCK */
414 
415 /*
416  * MPSAFE
417  */
418 /* ARGSUSED */
419 int
420 connect(td, uap)
421 	struct thread *td;
422 	register struct connect_args /* {
423 		int	s;
424 		caddr_t	name;
425 		int	namelen;
426 	} */ *uap;
427 {
428 	struct socket *so;
429 	struct sockaddr *sa;
430 	int error, s;
431 
432 	mtx_lock(&Giant);
433 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
434 		goto done2;
435 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
436 		error = EALREADY;
437 		goto done1;
438 	}
439 	error = getsockaddr(&sa, uap->name, uap->namelen);
440 	if (error)
441 		goto done1;
442 	error = soconnect(so, sa, td);
443 	if (error)
444 		goto bad;
445 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
446 		FREE(sa, M_SONAME);
447 		error = EINPROGRESS;
448 		goto done1;
449 	}
450 	s = splnet();
451 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
452 		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0);
453 		if (error)
454 			break;
455 	}
456 	if (error == 0) {
457 		error = so->so_error;
458 		so->so_error = 0;
459 	}
460 	splx(s);
461 bad:
462 	so->so_state &= ~SS_ISCONNECTING;
463 	FREE(sa, M_SONAME);
464 	if (error == ERESTART)
465 		error = EINTR;
466 done1:
467 	fputsock(so);
468 done2:
469 	mtx_unlock(&Giant);
470 	return (error);
471 }
472 
473 /*
474  * MPSAFE
475  */
476 int
477 socketpair(td, uap)
478 	struct thread *td;
479 	register struct socketpair_args /* {
480 		int	domain;
481 		int	type;
482 		int	protocol;
483 		int	*rsv;
484 	} */ *uap;
485 {
486 	register struct filedesc *fdp = td->td_proc->p_fd;
487 	struct file *fp1, *fp2;
488 	struct socket *so1, *so2;
489 	int fd, error, sv[2];
490 
491 	mtx_lock(&Giant);
492 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
493 	    td->td_proc->p_ucred, td);
494 	if (error)
495 		goto done2;
496 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
497 	    td->td_proc->p_ucred, td);
498 	if (error)
499 		goto free1;
500 	error = falloc(td, &fp1, &fd);
501 	if (error)
502 		goto free2;
503 	fhold(fp1);
504 	sv[0] = fd;
505 	fp1->f_data = (caddr_t)so1;	/* so1 already has ref count */
506 	error = falloc(td, &fp2, &fd);
507 	if (error)
508 		goto free3;
509 	fhold(fp2);
510 	fp2->f_data = (caddr_t)so2;	/* so2 already has ref count */
511 	sv[1] = fd;
512 	error = soconnect2(so1, so2);
513 	if (error)
514 		goto free4;
515 	if (uap->type == SOCK_DGRAM) {
516 		/*
517 		 * Datagram socket connection is asymmetric.
518 		 */
519 		 error = soconnect2(so2, so1);
520 		 if (error)
521 			goto free4;
522 	}
523 	FILE_LOCK(fp1);
524 	fp1->f_flag = FREAD|FWRITE;
525 	fp1->f_ops = &socketops;
526 	fp1->f_type = DTYPE_SOCKET;
527 	FILE_UNLOCK(fp1);
528 	FILE_LOCK(fp2);
529 	fp2->f_flag = FREAD|FWRITE;
530 	fp2->f_ops = &socketops;
531 	fp2->f_type = DTYPE_SOCKET;
532 	FILE_UNLOCK(fp2);
533 	error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
534 	fdrop(fp1, td);
535 	fdrop(fp2, td);
536 	goto done2;
537 free4:
538 	FILEDESC_LOCK(fdp);
539 	if (fdp->fd_ofiles[sv[1]] == fp2) {
540 		fdp->fd_ofiles[sv[1]] = NULL;
541 		FILEDESC_UNLOCK(fdp);
542 		fdrop(fp2, td);
543 	} else
544 		FILEDESC_UNLOCK(fdp);
545 	fdrop(fp2, td);
546 free3:
547 	FILEDESC_LOCK(fdp);
548 	if (fdp->fd_ofiles[sv[0]] == fp1) {
549 		fdp->fd_ofiles[sv[0]] = NULL;
550 		FILEDESC_UNLOCK(fdp);
551 		fdrop(fp1, td);
552 	} else
553 		FILEDESC_UNLOCK(fdp);
554 	fdrop(fp1, td);
555 free2:
556 	(void)soclose(so2);
557 free1:
558 	(void)soclose(so1);
559 done2:
560 	mtx_unlock(&Giant);
561 	return (error);
562 }
563 
564 static int
565 sendit(td, s, mp, flags)
566 	register struct thread *td;
567 	int s;
568 	register struct msghdr *mp;
569 	int flags;
570 {
571 	struct uio auio;
572 	register struct iovec *iov;
573 	register int i;
574 	struct mbuf *control;
575 	struct sockaddr *to = NULL;
576 	int len, error;
577 	struct socket *so;
578 #ifdef KTRACE
579 	struct iovec *ktriov = NULL;
580 	struct uio ktruio;
581 #endif
582 
583 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
584 		return (error);
585 	auio.uio_iov = mp->msg_iov;
586 	auio.uio_iovcnt = mp->msg_iovlen;
587 	auio.uio_segflg = UIO_USERSPACE;
588 	auio.uio_rw = UIO_WRITE;
589 	auio.uio_td = td;
590 	auio.uio_offset = 0;			/* XXX */
591 	auio.uio_resid = 0;
592 	iov = mp->msg_iov;
593 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
594 		if ((auio.uio_resid += iov->iov_len) < 0) {
595 			error = EINVAL;
596 			goto bad;
597 		}
598 	}
599 	if (mp->msg_name) {
600 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
601 		if (error)
602 			goto bad;
603 	}
604 	if (mp->msg_control) {
605 		if (mp->msg_controllen < sizeof(struct cmsghdr)
606 #ifdef COMPAT_OLDSOCK
607 		    && mp->msg_flags != MSG_COMPAT
608 #endif
609 		) {
610 			error = EINVAL;
611 			goto bad;
612 		}
613 		error = sockargs(&control, mp->msg_control,
614 		    mp->msg_controllen, MT_CONTROL);
615 		if (error)
616 			goto bad;
617 #ifdef COMPAT_OLDSOCK
618 		if (mp->msg_flags == MSG_COMPAT) {
619 			register struct cmsghdr *cm;
620 
621 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
622 			if (control == 0) {
623 				error = ENOBUFS;
624 				goto bad;
625 			} else {
626 				cm = mtod(control, struct cmsghdr *);
627 				cm->cmsg_len = control->m_len;
628 				cm->cmsg_level = SOL_SOCKET;
629 				cm->cmsg_type = SCM_RIGHTS;
630 			}
631 		}
632 #endif
633 	} else {
634 		control = 0;
635 	}
636 #ifdef KTRACE
637 	if (KTRPOINT(td->td_proc, KTR_GENIO)) {
638 		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
639 
640 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
641 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
642 		ktruio = auio;
643 	}
644 #endif
645 	len = auio.uio_resid;
646 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
647 						     flags, td);
648 	if (error) {
649 		if (auio.uio_resid != len && (error == ERESTART ||
650 		    error == EINTR || error == EWOULDBLOCK))
651 			error = 0;
652 		if (error == EPIPE) {
653 			PROC_LOCK(td->td_proc);
654 			psignal(td->td_proc, SIGPIPE);
655 			PROC_UNLOCK(td->td_proc);
656 		}
657 	}
658 	if (error == 0)
659 		td->td_retval[0] = len - auio.uio_resid;
660 #ifdef KTRACE
661 	if (ktriov != NULL) {
662 		if (error == 0) {
663 			ktruio.uio_iov = ktriov;
664 			ktruio.uio_resid = td->td_retval[0];
665 			ktrgenio(td->td_proc->p_tracep, s, UIO_WRITE, &ktruio, error);
666 		}
667 		FREE(ktriov, M_TEMP);
668 	}
669 #endif
670 bad:
671 	fputsock(so);
672 	if (to)
673 		FREE(to, M_SONAME);
674 	return (error);
675 }
676 
677 /*
678  * MPSAFE
679  */
680 int
681 sendto(td, uap)
682 	struct thread *td;
683 	register struct sendto_args /* {
684 		int	s;
685 		caddr_t	buf;
686 		size_t	len;
687 		int	flags;
688 		caddr_t	to;
689 		int	tolen;
690 	} */ *uap;
691 {
692 	struct msghdr msg;
693 	struct iovec aiov;
694 	int error;
695 
696 	msg.msg_name = uap->to;
697 	msg.msg_namelen = uap->tolen;
698 	msg.msg_iov = &aiov;
699 	msg.msg_iovlen = 1;
700 	msg.msg_control = 0;
701 #ifdef COMPAT_OLDSOCK
702 	msg.msg_flags = 0;
703 #endif
704 	aiov.iov_base = uap->buf;
705 	aiov.iov_len = uap->len;
706 	mtx_lock(&Giant);
707 	error = sendit(td, uap->s, &msg, uap->flags);
708 	mtx_unlock(&Giant);
709 	return (error);
710 }
711 
712 #ifdef COMPAT_OLDSOCK
713 /*
714  * MPSAFE
715  */
716 int
717 osend(td, uap)
718 	struct thread *td;
719 	register struct osend_args /* {
720 		int	s;
721 		caddr_t	buf;
722 		int	len;
723 		int	flags;
724 	} */ *uap;
725 {
726 	struct msghdr msg;
727 	struct iovec aiov;
728 	int error;
729 
730 	msg.msg_name = 0;
731 	msg.msg_namelen = 0;
732 	msg.msg_iov = &aiov;
733 	msg.msg_iovlen = 1;
734 	aiov.iov_base = uap->buf;
735 	aiov.iov_len = uap->len;
736 	msg.msg_control = 0;
737 	msg.msg_flags = 0;
738 	mtx_lock(&Giant);
739 	error = sendit(td, uap->s, &msg, uap->flags);
740 	mtx_unlock(&Giant);
741 	return (error);
742 }
743 
744 /*
745  * MPSAFE
746  */
747 int
748 osendmsg(td, uap)
749 	struct thread *td;
750 	register struct osendmsg_args /* {
751 		int	s;
752 		caddr_t	msg;
753 		int	flags;
754 	} */ *uap;
755 {
756 	struct msghdr msg;
757 	struct iovec aiov[UIO_SMALLIOV], *iov;
758 	int error;
759 
760 	mtx_lock(&Giant);
761 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
762 	if (error)
763 		goto done2;
764 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
765 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
766 			error = EMSGSIZE;
767 			goto done2;
768 		}
769 		MALLOC(iov, struct iovec *,
770 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
771 		      M_WAITOK);
772 	} else {
773 		iov = aiov;
774 	}
775 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
776 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
777 	if (error)
778 		goto done;
779 	msg.msg_flags = MSG_COMPAT;
780 	msg.msg_iov = iov;
781 	error = sendit(td, uap->s, &msg, uap->flags);
782 done:
783 	if (iov != aiov)
784 		FREE(iov, M_IOV);
785 done2:
786 	mtx_unlock(&Giant);
787 	return (error);
788 }
789 #endif
790 
791 /*
792  * MPSAFE
793  */
794 int
795 sendmsg(td, uap)
796 	struct thread *td;
797 	register struct sendmsg_args /* {
798 		int	s;
799 		caddr_t	msg;
800 		int	flags;
801 	} */ *uap;
802 {
803 	struct msghdr msg;
804 	struct iovec aiov[UIO_SMALLIOV], *iov;
805 	int error;
806 
807 	mtx_lock(&Giant);
808 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
809 	if (error)
810 		goto done2;
811 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
812 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
813 			error = EMSGSIZE;
814 			goto done2;
815 		}
816 		MALLOC(iov, struct iovec *,
817 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
818 		       M_WAITOK);
819 	} else {
820 		iov = aiov;
821 	}
822 	if (msg.msg_iovlen &&
823 	    (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
824 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
825 		goto done;
826 	msg.msg_iov = iov;
827 #ifdef COMPAT_OLDSOCK
828 	msg.msg_flags = 0;
829 #endif
830 	error = sendit(td, uap->s, &msg, uap->flags);
831 done:
832 	if (iov != aiov)
833 		FREE(iov, M_IOV);
834 done2:
835 	mtx_unlock(&Giant);
836 	return (error);
837 }
838 
839 static int
840 recvit(td, s, mp, namelenp)
841 	register struct thread *td;
842 	int s;
843 	register struct msghdr *mp;
844 	caddr_t namelenp;
845 {
846 	struct uio auio;
847 	register struct iovec *iov;
848 	register int i;
849 	int len, error;
850 	struct mbuf *m, *control = 0;
851 	caddr_t ctlbuf;
852 	struct socket *so;
853 	struct sockaddr *fromsa = 0;
854 #ifdef KTRACE
855 	struct iovec *ktriov = NULL;
856 	struct uio ktruio;
857 #endif
858 
859 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
860 		return (error);
861 	auio.uio_iov = mp->msg_iov;
862 	auio.uio_iovcnt = mp->msg_iovlen;
863 	auio.uio_segflg = UIO_USERSPACE;
864 	auio.uio_rw = UIO_READ;
865 	auio.uio_td = td;
866 	auio.uio_offset = 0;			/* XXX */
867 	auio.uio_resid = 0;
868 	iov = mp->msg_iov;
869 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
870 		if ((auio.uio_resid += iov->iov_len) < 0) {
871 			fputsock(so);
872 			return (EINVAL);
873 		}
874 	}
875 #ifdef KTRACE
876 	if (KTRPOINT(td->td_proc, KTR_GENIO)) {
877 		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
878 
879 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
880 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
881 		ktruio = auio;
882 	}
883 #endif
884 	len = auio.uio_resid;
885 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
886 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
887 	    &mp->msg_flags);
888 	if (error) {
889 		if (auio.uio_resid != len && (error == ERESTART ||
890 		    error == EINTR || error == EWOULDBLOCK))
891 			error = 0;
892 	}
893 #ifdef KTRACE
894 	if (ktriov != NULL) {
895 		if (error == 0) {
896 			ktruio.uio_iov = ktriov;
897 			ktruio.uio_resid = len - auio.uio_resid;
898 			ktrgenio(td->td_proc->p_tracep, s, UIO_READ, &ktruio, error);
899 		}
900 		FREE(ktriov, M_TEMP);
901 	}
902 #endif
903 	if (error)
904 		goto out;
905 	td->td_retval[0] = len - auio.uio_resid;
906 	if (mp->msg_name) {
907 		len = mp->msg_namelen;
908 		if (len <= 0 || fromsa == 0)
909 			len = 0;
910 		else {
911 #ifndef MIN
912 #define MIN(a,b) ((a)>(b)?(b):(a))
913 #endif
914 			/* save sa_len before it is destroyed by MSG_COMPAT */
915 			len = MIN(len, fromsa->sa_len);
916 #ifdef COMPAT_OLDSOCK
917 			if (mp->msg_flags & MSG_COMPAT)
918 				((struct osockaddr *)fromsa)->sa_family =
919 				    fromsa->sa_family;
920 #endif
921 			error = copyout(fromsa,
922 			    (caddr_t)mp->msg_name, (unsigned)len);
923 			if (error)
924 				goto out;
925 		}
926 		mp->msg_namelen = len;
927 		if (namelenp &&
928 		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
929 #ifdef COMPAT_OLDSOCK
930 			if (mp->msg_flags & MSG_COMPAT)
931 				error = 0;	/* old recvfrom didn't check */
932 			else
933 #endif
934 			goto out;
935 		}
936 	}
937 	if (mp->msg_control) {
938 #ifdef COMPAT_OLDSOCK
939 		/*
940 		 * We assume that old recvmsg calls won't receive access
941 		 * rights and other control info, esp. as control info
942 		 * is always optional and those options didn't exist in 4.3.
943 		 * If we receive rights, trim the cmsghdr; anything else
944 		 * is tossed.
945 		 */
946 		if (control && mp->msg_flags & MSG_COMPAT) {
947 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
948 			    SOL_SOCKET ||
949 			    mtod(control, struct cmsghdr *)->cmsg_type !=
950 			    SCM_RIGHTS) {
951 				mp->msg_controllen = 0;
952 				goto out;
953 			}
954 			control->m_len -= sizeof (struct cmsghdr);
955 			control->m_data += sizeof (struct cmsghdr);
956 		}
957 #endif
958 		len = mp->msg_controllen;
959 		m = control;
960 		mp->msg_controllen = 0;
961 		ctlbuf = (caddr_t) mp->msg_control;
962 
963 		while (m && len > 0) {
964 			unsigned int tocopy;
965 
966 			if (len >= m->m_len)
967 				tocopy = m->m_len;
968 			else {
969 				mp->msg_flags |= MSG_CTRUNC;
970 				tocopy = len;
971 			}
972 
973 			if ((error = copyout((caddr_t)mtod(m, caddr_t),
974 					ctlbuf, tocopy)) != 0)
975 				goto out;
976 
977 			ctlbuf += tocopy;
978 			len -= tocopy;
979 			m = m->m_next;
980 		}
981 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
982 	}
983 out:
984 	fputsock(so);
985 	if (fromsa)
986 		FREE(fromsa, M_SONAME);
987 	if (control)
988 		m_freem(control);
989 	return (error);
990 }
991 
992 /*
993  * MPSAFE
994  */
995 int
996 recvfrom(td, uap)
997 	struct thread *td;
998 	register struct recvfrom_args /* {
999 		int	s;
1000 		caddr_t	buf;
1001 		size_t	len;
1002 		int	flags;
1003 		caddr_t	from;
1004 		int	*fromlenaddr;
1005 	} */ *uap;
1006 {
1007 	struct msghdr msg;
1008 	struct iovec aiov;
1009 	int error;
1010 
1011 	mtx_lock(&Giant);
1012 	if (uap->fromlenaddr) {
1013 		error = copyin((caddr_t)uap->fromlenaddr,
1014 		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1015 		if (error)
1016 			goto done2;
1017 	} else {
1018 		msg.msg_namelen = 0;
1019 	}
1020 	msg.msg_name = uap->from;
1021 	msg.msg_iov = &aiov;
1022 	msg.msg_iovlen = 1;
1023 	aiov.iov_base = uap->buf;
1024 	aiov.iov_len = uap->len;
1025 	msg.msg_control = 0;
1026 	msg.msg_flags = uap->flags;
1027 	error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr);
1028 done2:
1029 	mtx_unlock(&Giant);
1030 	return(error);
1031 }
1032 
1033 #ifdef COMPAT_OLDSOCK
1034 /*
1035  * MPSAFE
1036  */
1037 int
1038 orecvfrom(td, uap)
1039 	struct thread *td;
1040 	struct recvfrom_args *uap;
1041 {
1042 
1043 	uap->flags |= MSG_COMPAT;
1044 	return (recvfrom(td, uap));
1045 }
1046 #endif
1047 
1048 
1049 #ifdef COMPAT_OLDSOCK
1050 /*
1051  * MPSAFE
1052  */
1053 int
1054 orecv(td, uap)
1055 	struct thread *td;
1056 	register struct orecv_args /* {
1057 		int	s;
1058 		caddr_t	buf;
1059 		int	len;
1060 		int	flags;
1061 	} */ *uap;
1062 {
1063 	struct msghdr msg;
1064 	struct iovec aiov;
1065 	int error;
1066 
1067 	mtx_lock(&Giant);
1068 	msg.msg_name = 0;
1069 	msg.msg_namelen = 0;
1070 	msg.msg_iov = &aiov;
1071 	msg.msg_iovlen = 1;
1072 	aiov.iov_base = uap->buf;
1073 	aiov.iov_len = uap->len;
1074 	msg.msg_control = 0;
1075 	msg.msg_flags = uap->flags;
1076 	error = recvit(td, uap->s, &msg, (caddr_t)0);
1077 	mtx_unlock(&Giant);
1078 	return (error);
1079 }
1080 
1081 /*
1082  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1083  * overlays the new one, missing only the flags, and with the (old) access
1084  * rights where the control fields are now.
1085  *
1086  * MPSAFE
1087  */
1088 int
1089 orecvmsg(td, uap)
1090 	struct thread *td;
1091 	register struct orecvmsg_args /* {
1092 		int	s;
1093 		struct	omsghdr *msg;
1094 		int	flags;
1095 	} */ *uap;
1096 {
1097 	struct msghdr msg;
1098 	struct iovec aiov[UIO_SMALLIOV], *iov;
1099 	int error;
1100 
1101 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1102 	    sizeof (struct omsghdr));
1103 	if (error)
1104 		return (error);
1105 
1106 	mtx_lock(&Giant);
1107 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1108 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1109 			error = EMSGSIZE;
1110 			goto done2;
1111 		}
1112 		MALLOC(iov, struct iovec *,
1113 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1114 		      M_WAITOK);
1115 	} else {
1116 		iov = aiov;
1117 	}
1118 	msg.msg_flags = uap->flags | MSG_COMPAT;
1119 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1120 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1121 	if (error)
1122 		goto done;
1123 	msg.msg_iov = iov;
1124 	error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
1125 
1126 	if (msg.msg_controllen && error == 0)
1127 		error = copyout((caddr_t)&msg.msg_controllen,
1128 		    (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1129 done:
1130 	if (iov != aiov)
1131 		FREE(iov, M_IOV);
1132 done2:
1133 	mtx_unlock(&Giant);
1134 	return (error);
1135 }
1136 #endif
1137 
1138 /*
1139  * MPSAFE
1140  */
1141 int
1142 recvmsg(td, uap)
1143 	struct thread *td;
1144 	register struct recvmsg_args /* {
1145 		int	s;
1146 		struct	msghdr *msg;
1147 		int	flags;
1148 	} */ *uap;
1149 {
1150 	struct msghdr msg;
1151 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1152 	register int error;
1153 
1154 	mtx_lock(&Giant);
1155 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1156 	if (error)
1157 		goto done2;
1158 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1159 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1160 			error = EMSGSIZE;
1161 			goto done2;
1162 		}
1163 		MALLOC(iov, struct iovec *,
1164 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1165 		       M_WAITOK);
1166 	} else {
1167 		iov = aiov;
1168 	}
1169 #ifdef COMPAT_OLDSOCK
1170 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1171 #else
1172 	msg.msg_flags = uap->flags;
1173 #endif
1174 	uiov = msg.msg_iov;
1175 	msg.msg_iov = iov;
1176 	error = copyin((caddr_t)uiov, (caddr_t)iov,
1177 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1178 	if (error)
1179 		goto done;
1180 	error = recvit(td, uap->s, &msg, (caddr_t)0);
1181 	if (!error) {
1182 		msg.msg_iov = uiov;
1183 		error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1184 	}
1185 done:
1186 	if (iov != aiov)
1187 		FREE(iov, M_IOV);
1188 done2:
1189 	mtx_unlock(&Giant);
1190 	return (error);
1191 }
1192 
1193 /*
1194  * MPSAFE
1195  */
1196 /* ARGSUSED */
1197 int
1198 shutdown(td, uap)
1199 	struct thread *td;
1200 	register struct shutdown_args /* {
1201 		int	s;
1202 		int	how;
1203 	} */ *uap;
1204 {
1205 	struct socket *so;
1206 	int error;
1207 
1208 	mtx_lock(&Giant);
1209 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1210 		error = soshutdown(so, uap->how);
1211 		fputsock(so);
1212 	}
1213 	mtx_unlock(&Giant);
1214 	return(error);
1215 }
1216 
1217 /*
1218  * MPSAFE
1219  */
1220 /* ARGSUSED */
1221 int
1222 setsockopt(td, uap)
1223 	struct thread *td;
1224 	register struct setsockopt_args /* {
1225 		int	s;
1226 		int	level;
1227 		int	name;
1228 		caddr_t	val;
1229 		int	valsize;
1230 	} */ *uap;
1231 {
1232 	struct socket *so;
1233 	struct sockopt sopt;
1234 	int error;
1235 
1236 	if (uap->val == 0 && uap->valsize != 0)
1237 		return (EFAULT);
1238 	if (uap->valsize < 0)
1239 		return (EINVAL);
1240 
1241 	mtx_lock(&Giant);
1242 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1243 		sopt.sopt_dir = SOPT_SET;
1244 		sopt.sopt_level = uap->level;
1245 		sopt.sopt_name = uap->name;
1246 		sopt.sopt_val = uap->val;
1247 		sopt.sopt_valsize = uap->valsize;
1248 		sopt.sopt_td = td;
1249 		error = sosetopt(so, &sopt);
1250 		fputsock(so);
1251 	}
1252 	mtx_unlock(&Giant);
1253 	return(error);
1254 }
1255 
1256 /*
1257  * MPSAFE
1258  */
1259 /* ARGSUSED */
1260 int
1261 getsockopt(td, uap)
1262 	struct thread *td;
1263 	register struct getsockopt_args /* {
1264 		int	s;
1265 		int	level;
1266 		int	name;
1267 		caddr_t	val;
1268 		int	*avalsize;
1269 	} */ *uap;
1270 {
1271 	int	valsize, error;
1272 	struct  socket *so;
1273 	struct	sockopt sopt;
1274 
1275 	mtx_lock(&Giant);
1276 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1277 		goto done2;
1278 	if (uap->val) {
1279 		error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1280 		    sizeof (valsize));
1281 		if (error)
1282 			goto done1;
1283 		if (valsize < 0) {
1284 			error = EINVAL;
1285 			goto done1;
1286 		}
1287 	} else {
1288 		valsize = 0;
1289 	}
1290 
1291 	sopt.sopt_dir = SOPT_GET;
1292 	sopt.sopt_level = uap->level;
1293 	sopt.sopt_name = uap->name;
1294 	sopt.sopt_val = uap->val;
1295 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1296 	sopt.sopt_td = td;
1297 
1298 	error = sogetopt(so, &sopt);
1299 	if (error == 0) {
1300 		valsize = sopt.sopt_valsize;
1301 		error = copyout((caddr_t)&valsize,
1302 				(caddr_t)uap->avalsize, sizeof (valsize));
1303 	}
1304 done1:
1305 	fputsock(so);
1306 done2:
1307 	mtx_unlock(&Giant);
1308 	return (error);
1309 }
1310 
1311 /*
1312  * getsockname1() - Get socket name.
1313  *
1314  * MPSAFE
1315  */
1316 /* ARGSUSED */
1317 static int
1318 getsockname1(td, uap, compat)
1319 	struct thread *td;
1320 	register struct getsockname_args /* {
1321 		int	fdes;
1322 		caddr_t	asa;
1323 		int	*alen;
1324 	} */ *uap;
1325 	int compat;
1326 {
1327 	struct socket *so;
1328 	struct sockaddr *sa;
1329 	int len, error;
1330 
1331 	mtx_lock(&Giant);
1332 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1333 		goto done2;
1334 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1335 	if (error)
1336 		goto done1;
1337 	sa = 0;
1338 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1339 	if (error)
1340 		goto bad;
1341 	if (sa == 0) {
1342 		len = 0;
1343 		goto gotnothing;
1344 	}
1345 
1346 	len = MIN(len, sa->sa_len);
1347 #ifdef COMPAT_OLDSOCK
1348 	if (compat)
1349 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1350 #endif
1351 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1352 	if (error == 0)
1353 gotnothing:
1354 		error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1355 		    sizeof (len));
1356 bad:
1357 	if (sa)
1358 		FREE(sa, M_SONAME);
1359 done1:
1360 	fputsock(so);
1361 done2:
1362 	mtx_unlock(&Giant);
1363 	return (error);
1364 }
1365 
1366 /*
1367  * MPSAFE
1368  */
1369 int
1370 getsockname(td, uap)
1371 	struct thread *td;
1372 	struct getsockname_args *uap;
1373 {
1374 
1375 	return (getsockname1(td, uap, 0));
1376 }
1377 
1378 #ifdef COMPAT_OLDSOCK
1379 /*
1380  * MPSAFE
1381  */
1382 int
1383 ogetsockname(td, uap)
1384 	struct thread *td;
1385 	struct getsockname_args *uap;
1386 {
1387 
1388 	return (getsockname1(td, uap, 1));
1389 }
1390 #endif /* COMPAT_OLDSOCK */
1391 
1392 /*
1393  * getpeername1() - Get name of peer for connected socket.
1394  *
1395  * MPSAFE
1396  */
1397 /* ARGSUSED */
1398 static int
1399 getpeername1(td, uap, compat)
1400 	struct thread *td;
1401 	register struct getpeername_args /* {
1402 		int	fdes;
1403 		caddr_t	asa;
1404 		int	*alen;
1405 	} */ *uap;
1406 	int compat;
1407 {
1408 	struct socket *so;
1409 	struct sockaddr *sa;
1410 	int len, error;
1411 
1412 	mtx_lock(&Giant);
1413 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1414 		goto done2;
1415 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1416 		error = ENOTCONN;
1417 		goto done1;
1418 	}
1419 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1420 	if (error)
1421 		goto done1;
1422 	sa = 0;
1423 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1424 	if (error)
1425 		goto bad;
1426 	if (sa == 0) {
1427 		len = 0;
1428 		goto gotnothing;
1429 	}
1430 	len = MIN(len, sa->sa_len);
1431 #ifdef COMPAT_OLDSOCK
1432 	if (compat)
1433 		((struct osockaddr *)sa)->sa_family =
1434 		    sa->sa_family;
1435 #endif
1436 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1437 	if (error)
1438 		goto bad;
1439 gotnothing:
1440 	error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1441 bad:
1442 	if (sa)
1443 		FREE(sa, M_SONAME);
1444 done1:
1445 	fputsock(so);
1446 done2:
1447 	mtx_unlock(&Giant);
1448 	return (error);
1449 }
1450 
1451 /*
1452  * MPSAFE
1453  */
1454 int
1455 getpeername(td, uap)
1456 	struct thread *td;
1457 	struct getpeername_args *uap;
1458 {
1459 
1460 	return (getpeername1(td, uap, 0));
1461 }
1462 
1463 #ifdef COMPAT_OLDSOCK
1464 /*
1465  * MPSAFE
1466  */
1467 int
1468 ogetpeername(td, uap)
1469 	struct thread *td;
1470 	struct ogetpeername_args *uap;
1471 {
1472 
1473 	/* XXX uap should have type `getpeername_args *' to begin with. */
1474 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1475 }
1476 #endif /* COMPAT_OLDSOCK */
1477 
1478 int
1479 sockargs(mp, buf, buflen, type)
1480 	struct mbuf **mp;
1481 	caddr_t buf;
1482 	int buflen, type;
1483 {
1484 	register struct sockaddr *sa;
1485 	register struct mbuf *m;
1486 	int error;
1487 
1488 	if ((u_int)buflen > MLEN) {
1489 #ifdef COMPAT_OLDSOCK
1490 		if (type == MT_SONAME && (u_int)buflen <= 112)
1491 			buflen = MLEN;		/* unix domain compat. hack */
1492 		else
1493 #endif
1494 		return (EINVAL);
1495 	}
1496 	m = m_get(M_TRYWAIT, type);
1497 	if (m == NULL)
1498 		return (ENOBUFS);
1499 	m->m_len = buflen;
1500 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1501 	if (error)
1502 		(void) m_free(m);
1503 	else {
1504 		*mp = m;
1505 		if (type == MT_SONAME) {
1506 			sa = mtod(m, struct sockaddr *);
1507 
1508 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1509 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1510 				sa->sa_family = sa->sa_len;
1511 #endif
1512 			sa->sa_len = buflen;
1513 		}
1514 	}
1515 	return (error);
1516 }
1517 
1518 int
1519 getsockaddr(namp, uaddr, len)
1520 	struct sockaddr **namp;
1521 	caddr_t uaddr;
1522 	size_t len;
1523 {
1524 	struct sockaddr *sa;
1525 	int error;
1526 
1527 	if (len > SOCK_MAXADDRLEN)
1528 		return ENAMETOOLONG;
1529 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1530 	error = copyin(uaddr, sa, len);
1531 	if (error) {
1532 		FREE(sa, M_SONAME);
1533 	} else {
1534 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1535 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1536 			sa->sa_family = sa->sa_len;
1537 #endif
1538 		sa->sa_len = len;
1539 		*namp = sa;
1540 	}
1541 	return error;
1542 }
1543 
1544 /*
1545  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1546  * XXX - The sf_buf functions are currently private to sendfile(2), so have
1547  * been made static, but may be useful in the future for doing zero-copy in
1548  * other parts of the networking code.
1549  */
1550 static void
1551 sf_buf_init(void *arg)
1552 {
1553 	int i;
1554 
1555 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF);
1556 	mtx_lock(&sf_freelist.sf_lock);
1557 	SLIST_INIT(&sf_freelist.sf_head);
1558 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1559 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1560 	    M_NOWAIT | M_ZERO);
1561 	for (i = 0; i < nsfbufs; i++) {
1562 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1563 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1564 	}
1565 	sf_buf_alloc_want = 0;
1566 	mtx_unlock(&sf_freelist.sf_lock);
1567 }
1568 
1569 /*
1570  * Get an sf_buf from the freelist. Will block if none are available.
1571  */
1572 static struct sf_buf *
1573 sf_buf_alloc()
1574 {
1575 	struct sf_buf *sf;
1576 	int error;
1577 
1578 	mtx_lock(&sf_freelist.sf_lock);
1579 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1580 		sf_buf_alloc_want++;
1581 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1582 		    "sfbufa", 0);
1583 		sf_buf_alloc_want--;
1584 
1585 		/*
1586 		 * If we got a signal, don't risk going back to sleep.
1587 		 */
1588 		if (error)
1589 			break;
1590 	}
1591 	if (sf != NULL)
1592 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1593 	mtx_unlock(&sf_freelist.sf_lock);
1594 	return (sf);
1595 }
1596 
1597 #define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1598 
1599 /*
1600  * Detatch mapped page and release resources back to the system.
1601  */
1602 static void
1603 sf_buf_free(caddr_t addr, void *args)
1604 {
1605 	struct sf_buf *sf;
1606 	struct vm_page *m;
1607 
1608 	GIANT_REQUIRED;
1609 
1610 	sf = dtosf(addr);
1611 	pmap_qremove((vm_offset_t)addr, 1);
1612 	m = sf->m;
1613 	vm_page_unwire(m, 0);
1614 	/*
1615 	 * Check for the object going away on us. This can
1616 	 * happen since we don't hold a reference to it.
1617 	 * If so, we're responsible for freeing the page.
1618 	 */
1619 	if (m->wire_count == 0 && m->object == NULL)
1620 		vm_page_free(m);
1621 	sf->m = NULL;
1622 	mtx_lock(&sf_freelist.sf_lock);
1623 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1624 	if (sf_buf_alloc_want > 0)
1625 		wakeup_one(&sf_freelist);
1626 	mtx_unlock(&sf_freelist.sf_lock);
1627 }
1628 
1629 /*
1630  * sendfile(2)
1631  *
1632  * MPSAFE
1633  *
1634  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1635  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1636  *
1637  * Send a file specified by 'fd' and starting at 'offset' to a socket
1638  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1639  * nbytes == 0. Optionally add a header and/or trailer to the socket
1640  * output. If specified, write the total number of bytes sent into *sbytes.
1641  *
1642  */
1643 int
1644 sendfile(struct thread *td, struct sendfile_args *uap)
1645 {
1646 	struct vnode *vp;
1647 	struct vm_object *obj;
1648 	struct socket *so = NULL;
1649 	struct mbuf *m;
1650 	struct sf_buf *sf;
1651 	struct vm_page *pg;
1652 	struct writev_args nuap;
1653 	struct sf_hdtr hdtr;
1654 	off_t off, xfsize, hdtr_size, sbytes = 0;
1655 	int error, s;
1656 
1657 	mtx_lock(&Giant);
1658 
1659 	hdtr_size = 0;
1660 
1661 	/*
1662 	 * The descriptor must be a regular file and have a backing VM object.
1663 	 */
1664 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1665 		goto done;
1666 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1667 		error = EINVAL;
1668 		goto done;
1669 	}
1670 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1671 		goto done;
1672 	if (so->so_type != SOCK_STREAM) {
1673 		error = EINVAL;
1674 		goto done;
1675 	}
1676 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1677 		error = ENOTCONN;
1678 		goto done;
1679 	}
1680 	if (uap->offset < 0) {
1681 		error = EINVAL;
1682 		goto done;
1683 	}
1684 
1685 	/*
1686 	 * If specified, get the pointer to the sf_hdtr struct for
1687 	 * any headers/trailers.
1688 	 */
1689 	if (uap->hdtr != NULL) {
1690 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1691 		if (error)
1692 			goto done;
1693 		/*
1694 		 * Send any headers. Wimp out and use writev(2).
1695 		 */
1696 		if (hdtr.headers != NULL) {
1697 			nuap.fd = uap->s;
1698 			nuap.iovp = hdtr.headers;
1699 			nuap.iovcnt = hdtr.hdr_cnt;
1700 			error = writev(td, &nuap);
1701 			if (error)
1702 				goto done;
1703 			hdtr_size += td->td_retval[0];
1704 		}
1705 	}
1706 
1707 	/*
1708 	 * Protect against multiple writers to the socket.
1709 	 */
1710 	(void) sblock(&so->so_snd, M_WAITOK);
1711 
1712 	/*
1713 	 * Loop through the pages in the file, starting with the requested
1714 	 * offset. Get a file page (do I/O if necessary), map the file page
1715 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1716 	 * it on the socket.
1717 	 */
1718 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1719 		vm_pindex_t pindex;
1720 		vm_offset_t pgoff;
1721 
1722 		pindex = OFF_TO_IDX(off);
1723 retry_lookup:
1724 		/*
1725 		 * Calculate the amount to transfer. Not to exceed a page,
1726 		 * the EOF, or the passed in nbytes.
1727 		 */
1728 		xfsize = obj->un_pager.vnp.vnp_size - off;
1729 		if (xfsize > PAGE_SIZE)
1730 			xfsize = PAGE_SIZE;
1731 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1732 		if (PAGE_SIZE - pgoff < xfsize)
1733 			xfsize = PAGE_SIZE - pgoff;
1734 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1735 			xfsize = uap->nbytes - sbytes;
1736 		if (xfsize <= 0)
1737 			break;
1738 		/*
1739 		 * Optimize the non-blocking case by looking at the socket space
1740 		 * before going to the extra work of constituting the sf_buf.
1741 		 */
1742 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1743 			if (so->so_state & SS_CANTSENDMORE)
1744 				error = EPIPE;
1745 			else
1746 				error = EAGAIN;
1747 			sbunlock(&so->so_snd);
1748 			goto done;
1749 		}
1750 		/*
1751 		 * Attempt to look up the page.
1752 		 *
1753 		 *	Allocate if not found
1754 		 *
1755 		 *	Wait and loop if busy.
1756 		 */
1757 		pg = vm_page_lookup(obj, pindex);
1758 
1759 		if (pg == NULL) {
1760 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1761 			if (pg == NULL) {
1762 				VM_WAIT;
1763 				goto retry_lookup;
1764 			}
1765 			vm_page_wakeup(pg);
1766 		} else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1767 			goto retry_lookup;
1768 		}
1769 
1770 		/*
1771 		 * Wire the page so it does not get ripped out from under
1772 		 * us.
1773 		 */
1774 
1775 		vm_page_wire(pg);
1776 
1777 		/*
1778 		 * If page is not valid for what we need, initiate I/O
1779 		 */
1780 
1781 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1782 			struct uio auio;
1783 			struct iovec aiov;
1784 			int bsize;
1785 
1786 			/*
1787 			 * Ensure that our page is still around when the I/O
1788 			 * completes.
1789 			 */
1790 			vm_page_io_start(pg);
1791 
1792 			/*
1793 			 * Get the page from backing store.
1794 			 */
1795 			bsize = vp->v_mount->mnt_stat.f_iosize;
1796 			auio.uio_iov = &aiov;
1797 			auio.uio_iovcnt = 1;
1798 			aiov.iov_base = 0;
1799 			aiov.iov_len = MAXBSIZE;
1800 			auio.uio_resid = MAXBSIZE;
1801 			auio.uio_offset = trunc_page(off);
1802 			auio.uio_segflg = UIO_NOCOPY;
1803 			auio.uio_rw = UIO_READ;
1804 			auio.uio_td = td;
1805 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1806 			error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1807 			        td->td_proc->p_ucred);
1808 			VOP_UNLOCK(vp, 0, td);
1809 			vm_page_flag_clear(pg, PG_ZERO);
1810 			vm_page_io_finish(pg);
1811 			if (error) {
1812 				vm_page_unwire(pg, 0);
1813 				/*
1814 				 * See if anyone else might know about this page.
1815 				 * If not and it is not valid, then free it.
1816 				 */
1817 				if (pg->wire_count == 0 && pg->valid == 0 &&
1818 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1819 				    pg->hold_count == 0) {
1820 					vm_page_busy(pg);
1821 					vm_page_free(pg);
1822 				}
1823 				sbunlock(&so->so_snd);
1824 				goto done;
1825 			}
1826 		}
1827 
1828 
1829 		/*
1830 		 * Get a sendfile buf. We usually wait as long as necessary,
1831 		 * but this wait can be interrupted.
1832 		 */
1833 		if ((sf = sf_buf_alloc()) == NULL) {
1834 			vm_page_unwire(pg, 0);
1835 			if (pg->wire_count == 0 && pg->object == NULL)
1836 				vm_page_free(pg);
1837 			sbunlock(&so->so_snd);
1838 			error = EINTR;
1839 			goto done;
1840 		}
1841 
1842 		/*
1843 		 * Allocate a kernel virtual page and insert the physical page
1844 		 * into it.
1845 		 */
1846 		sf->m = pg;
1847 		pmap_qenter(sf->kva, &pg, 1);
1848 		/*
1849 		 * Get an mbuf header and set it up as having external storage.
1850 		 */
1851 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1852 		if (m == NULL) {
1853 			error = ENOBUFS;
1854 			sf_buf_free((void *)sf->kva, NULL);
1855 			sbunlock(&so->so_snd);
1856 			goto done;
1857 		}
1858 		/*
1859 		 * Setup external storage for mbuf.
1860 		 */
1861 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1862 		    EXT_SFBUF);
1863 		m->m_data = (char *) sf->kva + pgoff;
1864 		m->m_pkthdr.len = m->m_len = xfsize;
1865 		/*
1866 		 * Add the buffer to the socket buffer chain.
1867 		 */
1868 		s = splnet();
1869 retry_space:
1870 		/*
1871 		 * Make sure that the socket is still able to take more data.
1872 		 * CANTSENDMORE being true usually means that the connection
1873 		 * was closed. so_error is true when an error was sensed after
1874 		 * a previous send.
1875 		 * The state is checked after the page mapping and buffer
1876 		 * allocation above since those operations may block and make
1877 		 * any socket checks stale. From this point forward, nothing
1878 		 * blocks before the pru_send (or more accurately, any blocking
1879 		 * results in a loop back to here to re-check).
1880 		 */
1881 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1882 			if (so->so_state & SS_CANTSENDMORE) {
1883 				error = EPIPE;
1884 			} else {
1885 				error = so->so_error;
1886 				so->so_error = 0;
1887 			}
1888 			m_freem(m);
1889 			sbunlock(&so->so_snd);
1890 			splx(s);
1891 			goto done;
1892 		}
1893 		/*
1894 		 * Wait for socket space to become available. We do this just
1895 		 * after checking the connection state above in order to avoid
1896 		 * a race condition with sbwait().
1897 		 */
1898 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1899 			if (so->so_state & SS_NBIO) {
1900 				m_freem(m);
1901 				sbunlock(&so->so_snd);
1902 				splx(s);
1903 				error = EAGAIN;
1904 				goto done;
1905 			}
1906 			error = sbwait(&so->so_snd);
1907 			/*
1908 			 * An error from sbwait usually indicates that we've
1909 			 * been interrupted by a signal. If we've sent anything
1910 			 * then return bytes sent, otherwise return the error.
1911 			 */
1912 			if (error) {
1913 				m_freem(m);
1914 				sbunlock(&so->so_snd);
1915 				splx(s);
1916 				goto done;
1917 			}
1918 			goto retry_space;
1919 		}
1920 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1921 		splx(s);
1922 		if (error) {
1923 			sbunlock(&so->so_snd);
1924 			goto done;
1925 		}
1926 	}
1927 	sbunlock(&so->so_snd);
1928 
1929 	/*
1930 	 * Send trailers. Wimp out and use writev(2).
1931 	 */
1932 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1933 			nuap.fd = uap->s;
1934 			nuap.iovp = hdtr.trailers;
1935 			nuap.iovcnt = hdtr.trl_cnt;
1936 			error = writev(td, &nuap);
1937 			if (error)
1938 				goto done;
1939 			hdtr_size += td->td_retval[0];
1940 	}
1941 
1942 done:
1943 	/*
1944 	 * If there was no error we have to clear td->td_retval[0]
1945 	 * because it may have been set by writev.
1946 	 */
1947 	if (error == 0) {
1948 		td->td_retval[0] = 0;
1949 	}
1950 	if (uap->sbytes != NULL) {
1951 		sbytes += hdtr_size;
1952 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
1953 	}
1954 	if (vp)
1955 		vrele(vp);
1956 	if (so)
1957 		fputsock(so);
1958 	mtx_unlock(&Giant);
1959 	return (error);
1960 }
1961