xref: /freebsd/sys/kern/uipc_syscalls.c (revision 09e8dea79366f1e5b3a73e8a271b26e4b6bf2e6a)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/lock.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/signalvar.h>
62 #include <sys/uio.h>
63 #include <sys/vnode.h>
64 #ifdef KTRACE
65 #include <sys/ktrace.h>
66 #endif
67 
68 #include <vm/vm.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74 
75 static void sf_buf_init(void *arg);
76 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
77 struct sf_buf *sf_buf_alloc(void);
78 void sf_buf_free(caddr_t addr, void *args);
79 
80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
81 static int recvit(struct thread *td, int s, struct msghdr *mp,
82 		  caddr_t namelenp);
83 
84 static int accept1(struct thread *td, struct accept_args *uap, int compat);
85 static int getsockname1(struct thread *td, struct getsockname_args *uap,
86 			int compat);
87 static int getpeername1(struct thread *td, struct getpeername_args *uap,
88 			int compat);
89 
90 /*
91  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92  * sf_freelist head with the sf_lock mutex.
93  */
94 static struct {
95 	SLIST_HEAD(, sf_buf) sf_head;
96 	struct mtx sf_lock;
97 } sf_freelist;
98 
99 vm_offset_t sf_base;
100 struct sf_buf *sf_bufs;
101 u_int sf_buf_alloc_want;
102 
103 /*
104  * System call interface to the socket abstraction.
105  */
106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107 #define COMPAT_OLDSOCK
108 #endif
109 
110 extern	struct fileops socketops;
111 
112 /*
113  * MPSAFE
114  */
115 int
116 socket(td, uap)
117 	struct thread *td;
118 	register struct socket_args /* {
119 		int	domain;
120 		int	type;
121 		int	protocol;
122 	} */ *uap;
123 {
124 	struct filedesc *fdp;
125 	struct socket *so;
126 	struct file *fp;
127 	int fd, error;
128 
129 	mtx_lock(&Giant);
130 	fdp = td->td_proc->p_fd;
131 	error = falloc(td, &fp, &fd);
132 	if (error)
133 		goto done2;
134 	fhold(fp);
135 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
136 	    td->td_ucred, td);
137 	FILEDESC_LOCK(fdp);
138 	if (error) {
139 		if (fdp->fd_ofiles[fd] == fp) {
140 			fdp->fd_ofiles[fd] = NULL;
141 			FILEDESC_UNLOCK(fdp);
142 			fdrop(fp, td);
143 		} else
144 			FILEDESC_UNLOCK(fdp);
145 	} else {
146 		fp->f_data = (caddr_t)so;	/* already has ref count */
147 		fp->f_flag = FREAD|FWRITE;
148 		fp->f_ops = &socketops;
149 		fp->f_type = DTYPE_SOCKET;
150 		FILEDESC_UNLOCK(fdp);
151 		td->td_retval[0] = fd;
152 	}
153 	fdrop(fp, td);
154 done2:
155 	mtx_unlock(&Giant);
156 	return (error);
157 }
158 
159 /*
160  * MPSAFE
161  */
162 /* ARGSUSED */
163 int
164 bind(td, uap)
165 	struct thread *td;
166 	register struct bind_args /* {
167 		int	s;
168 		caddr_t	name;
169 		int	namelen;
170 	} */ *uap;
171 {
172 	struct socket *so;
173 	struct sockaddr *sa;
174 	int error;
175 
176 	mtx_lock(&Giant);
177 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
178 		goto done2;
179 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
180 		goto done1;
181 	error = sobind(so, sa, td);
182 	FREE(sa, M_SONAME);
183 done1:
184 	fputsock(so);
185 done2:
186 	mtx_unlock(&Giant);
187 	return (error);
188 }
189 
190 /*
191  * MPSAFE
192  */
193 /* ARGSUSED */
194 int
195 listen(td, uap)
196 	struct thread *td;
197 	register struct listen_args /* {
198 		int	s;
199 		int	backlog;
200 	} */ *uap;
201 {
202 	struct socket *so;
203 	int error;
204 
205 	mtx_lock(&Giant);
206 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
207 		error = solisten(so, uap->backlog, td);
208 		fputsock(so);
209 	}
210 	mtx_unlock(&Giant);
211 	return(error);
212 }
213 
214 /*
215  * accept1()
216  * MPSAFE
217  */
218 static int
219 accept1(td, uap, compat)
220 	struct thread *td;
221 	register struct accept_args /* {
222 		int	s;
223 		caddr_t	name;
224 		int	*anamelen;
225 	} */ *uap;
226 	int compat;
227 {
228 	struct filedesc *fdp;
229 	struct file *nfp = NULL;
230 	struct sockaddr *sa;
231 	int namelen, error, s;
232 	struct socket *head, *so;
233 	int fd;
234 	u_int fflag;
235 
236 	mtx_lock(&Giant);
237 	fdp = td->td_proc->p_fd;
238 	if (uap->name) {
239 		error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
240 			sizeof (namelen));
241 		if(error)
242 			goto done2;
243 	}
244 	error = fgetsock(td, uap->s, &head, &fflag);
245 	if (error)
246 		goto done2;
247 	s = splnet();
248 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
249 		splx(s);
250 		error = EINVAL;
251 		goto done;
252 	}
253 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
254 		splx(s);
255 		error = EWOULDBLOCK;
256 		goto done;
257 	}
258 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
259 		if (head->so_state & SS_CANTRCVMORE) {
260 			head->so_error = ECONNABORTED;
261 			break;
262 		}
263 		error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
264 		    "accept", 0);
265 		if (error) {
266 			splx(s);
267 			goto done;
268 		}
269 	}
270 	if (head->so_error) {
271 		error = head->so_error;
272 		head->so_error = 0;
273 		splx(s);
274 		goto done;
275 	}
276 
277 	/*
278 	 * At this point we know that there is at least one connection
279 	 * ready to be accepted. Remove it from the queue prior to
280 	 * allocating the file descriptor for it since falloc() may
281 	 * block allowing another process to accept the connection
282 	 * instead.
283 	 */
284 	so = TAILQ_FIRST(&head->so_comp);
285 	TAILQ_REMOVE(&head->so_comp, so, so_list);
286 	head->so_qlen--;
287 
288 	error = falloc(td, &nfp, &fd);
289 	if (error) {
290 		/*
291 		 * Probably ran out of file descriptors. Put the
292 		 * unaccepted connection back onto the queue and
293 		 * do another wakeup so some other process might
294 		 * have a chance at it.
295 		 */
296 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
297 		head->so_qlen++;
298 		wakeup_one(&head->so_timeo);
299 		splx(s);
300 		goto done;
301 	}
302 	fhold(nfp);
303 	td->td_retval[0] = fd;
304 
305 	/* connection has been removed from the listen queue */
306 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
307 
308 	so->so_state &= ~SS_COMP;
309 	so->so_head = NULL;
310 	if (head->so_sigio != NULL)
311 		fsetown(fgetown(head->so_sigio), &so->so_sigio);
312 
313 	FILE_LOCK(nfp);
314 	soref(so);			/* file descriptor reference */
315 	nfp->f_data = (caddr_t)so;	/* nfp has ref count from falloc */
316 	nfp->f_flag = fflag;
317 	nfp->f_ops = &socketops;
318 	nfp->f_type = DTYPE_SOCKET;
319 	FILE_UNLOCK(nfp);
320 	sa = 0;
321 	error = soaccept(so, &sa);
322 	if (error) {
323 		/*
324 		 * return a namelen of zero for older code which might
325 	 	 * ignore the return value from accept.
326 		 */
327 		if (uap->name != NULL) {
328 			namelen = 0;
329 			(void) copyout((caddr_t)&namelen,
330 			    (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
331 		}
332 		goto noconnection;
333 	}
334 	if (sa == NULL) {
335 		namelen = 0;
336 		if (uap->name)
337 			goto gotnoname;
338 		splx(s);
339 		error = 0;
340 		goto done;
341 	}
342 	if (uap->name) {
343 		/* check sa_len before it is destroyed */
344 		if (namelen > sa->sa_len)
345 			namelen = sa->sa_len;
346 #ifdef COMPAT_OLDSOCK
347 		if (compat)
348 			((struct osockaddr *)sa)->sa_family =
349 			    sa->sa_family;
350 #endif
351 		error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
352 		if (!error)
353 gotnoname:
354 			error = copyout((caddr_t)&namelen,
355 			    (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
356 	}
357 noconnection:
358 	if (sa)
359 		FREE(sa, M_SONAME);
360 
361 	/*
362 	 * close the new descriptor, assuming someone hasn't ripped it
363 	 * out from under us.
364 	 */
365 	if (error) {
366 		FILEDESC_LOCK(fdp);
367 		if (fdp->fd_ofiles[fd] == nfp) {
368 			fdp->fd_ofiles[fd] = NULL;
369 			FILEDESC_UNLOCK(fdp);
370 			fdrop(nfp, td);
371 		} else {
372 			FILEDESC_UNLOCK(fdp);
373 		}
374 	}
375 	splx(s);
376 
377 	/*
378 	 * Release explicitly held references before returning.
379 	 */
380 done:
381 	if (nfp != NULL)
382 		fdrop(nfp, td);
383 	fputsock(head);
384 done2:
385 	mtx_unlock(&Giant);
386 	return (error);
387 }
388 
389 /*
390  * MPSAFE (accept1() is MPSAFE)
391  */
392 int
393 accept(td, uap)
394 	struct thread *td;
395 	struct accept_args *uap;
396 {
397 
398 	return (accept1(td, uap, 0));
399 }
400 
401 #ifdef COMPAT_OLDSOCK
402 /*
403  * MPSAFE (accept1() is MPSAFE)
404  */
405 int
406 oaccept(td, uap)
407 	struct thread *td;
408 	struct accept_args *uap;
409 {
410 
411 	return (accept1(td, uap, 1));
412 }
413 #endif /* COMPAT_OLDSOCK */
414 
415 /*
416  * MPSAFE
417  */
418 /* ARGSUSED */
419 int
420 connect(td, uap)
421 	struct thread *td;
422 	register struct connect_args /* {
423 		int	s;
424 		caddr_t	name;
425 		int	namelen;
426 	} */ *uap;
427 {
428 	struct socket *so;
429 	struct sockaddr *sa;
430 	int error, s;
431 
432 	mtx_lock(&Giant);
433 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
434 		goto done2;
435 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
436 		error = EALREADY;
437 		goto done1;
438 	}
439 	error = getsockaddr(&sa, uap->name, uap->namelen);
440 	if (error)
441 		goto done1;
442 	error = soconnect(so, sa, td);
443 	if (error)
444 		goto bad;
445 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
446 		FREE(sa, M_SONAME);
447 		error = EINPROGRESS;
448 		goto done1;
449 	}
450 	s = splnet();
451 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
452 		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0);
453 		if (error)
454 			break;
455 	}
456 	if (error == 0) {
457 		error = so->so_error;
458 		so->so_error = 0;
459 	}
460 	splx(s);
461 bad:
462 	so->so_state &= ~SS_ISCONNECTING;
463 	FREE(sa, M_SONAME);
464 	if (error == ERESTART)
465 		error = EINTR;
466 done1:
467 	fputsock(so);
468 done2:
469 	mtx_unlock(&Giant);
470 	return (error);
471 }
472 
473 /*
474  * MPSAFE
475  */
476 int
477 socketpair(td, uap)
478 	struct thread *td;
479 	register struct socketpair_args /* {
480 		int	domain;
481 		int	type;
482 		int	protocol;
483 		int	*rsv;
484 	} */ *uap;
485 {
486 	register struct filedesc *fdp = td->td_proc->p_fd;
487 	struct file *fp1, *fp2;
488 	struct socket *so1, *so2;
489 	int fd, error, sv[2];
490 
491 	mtx_lock(&Giant);
492 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
493 	    td->td_ucred, td);
494 	if (error)
495 		goto done2;
496 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
497 	    td->td_ucred, td);
498 	if (error)
499 		goto free1;
500 	error = falloc(td, &fp1, &fd);
501 	if (error)
502 		goto free2;
503 	fhold(fp1);
504 	sv[0] = fd;
505 	fp1->f_data = (caddr_t)so1;	/* so1 already has ref count */
506 	error = falloc(td, &fp2, &fd);
507 	if (error)
508 		goto free3;
509 	fhold(fp2);
510 	fp2->f_data = (caddr_t)so2;	/* so2 already has ref count */
511 	sv[1] = fd;
512 	error = soconnect2(so1, so2);
513 	if (error)
514 		goto free4;
515 	if (uap->type == SOCK_DGRAM) {
516 		/*
517 		 * Datagram socket connection is asymmetric.
518 		 */
519 		 error = soconnect2(so2, so1);
520 		 if (error)
521 			goto free4;
522 	}
523 	FILE_LOCK(fp1);
524 	fp1->f_flag = FREAD|FWRITE;
525 	fp1->f_ops = &socketops;
526 	fp1->f_type = DTYPE_SOCKET;
527 	FILE_UNLOCK(fp1);
528 	FILE_LOCK(fp2);
529 	fp2->f_flag = FREAD|FWRITE;
530 	fp2->f_ops = &socketops;
531 	fp2->f_type = DTYPE_SOCKET;
532 	FILE_UNLOCK(fp2);
533 	error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
534 	fdrop(fp1, td);
535 	fdrop(fp2, td);
536 	goto done2;
537 free4:
538 	FILEDESC_LOCK(fdp);
539 	if (fdp->fd_ofiles[sv[1]] == fp2) {
540 		fdp->fd_ofiles[sv[1]] = NULL;
541 		FILEDESC_UNLOCK(fdp);
542 		fdrop(fp2, td);
543 	} else
544 		FILEDESC_UNLOCK(fdp);
545 	fdrop(fp2, td);
546 free3:
547 	FILEDESC_LOCK(fdp);
548 	if (fdp->fd_ofiles[sv[0]] == fp1) {
549 		fdp->fd_ofiles[sv[0]] = NULL;
550 		FILEDESC_UNLOCK(fdp);
551 		fdrop(fp1, td);
552 	} else
553 		FILEDESC_UNLOCK(fdp);
554 	fdrop(fp1, td);
555 free2:
556 	(void)soclose(so2);
557 free1:
558 	(void)soclose(so1);
559 done2:
560 	mtx_unlock(&Giant);
561 	return (error);
562 }
563 
564 static int
565 sendit(td, s, mp, flags)
566 	register struct thread *td;
567 	int s;
568 	register struct msghdr *mp;
569 	int flags;
570 {
571 	struct uio auio;
572 	register struct iovec *iov;
573 	register int i;
574 	struct mbuf *control;
575 	struct sockaddr *to = NULL;
576 	int len, error;
577 	struct socket *so;
578 #ifdef KTRACE
579 	struct iovec *ktriov = NULL;
580 	struct uio ktruio;
581 	int iovlen;
582 #endif
583 
584 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
585 		return (error);
586 	auio.uio_iov = mp->msg_iov;
587 	auio.uio_iovcnt = mp->msg_iovlen;
588 	auio.uio_segflg = UIO_USERSPACE;
589 	auio.uio_rw = UIO_WRITE;
590 	auio.uio_td = td;
591 	auio.uio_offset = 0;			/* XXX */
592 	auio.uio_resid = 0;
593 	iov = mp->msg_iov;
594 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
595 		if ((auio.uio_resid += iov->iov_len) < 0) {
596 			error = EINVAL;
597 			goto bad;
598 		}
599 	}
600 	if (mp->msg_name) {
601 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
602 		if (error)
603 			goto bad;
604 	}
605 	if (mp->msg_control) {
606 		if (mp->msg_controllen < sizeof(struct cmsghdr)
607 #ifdef COMPAT_OLDSOCK
608 		    && mp->msg_flags != MSG_COMPAT
609 #endif
610 		) {
611 			error = EINVAL;
612 			goto bad;
613 		}
614 		error = sockargs(&control, mp->msg_control,
615 		    mp->msg_controllen, MT_CONTROL);
616 		if (error)
617 			goto bad;
618 #ifdef COMPAT_OLDSOCK
619 		if (mp->msg_flags == MSG_COMPAT) {
620 			register struct cmsghdr *cm;
621 
622 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
623 			if (control == 0) {
624 				error = ENOBUFS;
625 				goto bad;
626 			} else {
627 				cm = mtod(control, struct cmsghdr *);
628 				cm->cmsg_len = control->m_len;
629 				cm->cmsg_level = SOL_SOCKET;
630 				cm->cmsg_type = SCM_RIGHTS;
631 			}
632 		}
633 #endif
634 	} else {
635 		control = 0;
636 	}
637 #ifdef KTRACE
638 	if (KTRPOINT(td, KTR_GENIO)) {
639 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
640 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
641 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
642 		ktruio = auio;
643 	}
644 #endif
645 	len = auio.uio_resid;
646 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
647 						     flags, td);
648 	if (error) {
649 		if (auio.uio_resid != len && (error == ERESTART ||
650 		    error == EINTR || error == EWOULDBLOCK))
651 			error = 0;
652 		/* Generation of SIGPIPE can be controlled per socket */
653 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
654 			PROC_LOCK(td->td_proc);
655 			psignal(td->td_proc, SIGPIPE);
656 			PROC_UNLOCK(td->td_proc);
657 		}
658 	}
659 	if (error == 0)
660 		td->td_retval[0] = len - auio.uio_resid;
661 #ifdef KTRACE
662 	if (ktriov != NULL) {
663 		if (error == 0) {
664 			ktruio.uio_iov = ktriov;
665 			ktruio.uio_resid = td->td_retval[0];
666 			ktrgenio(s, UIO_WRITE, &ktruio, error);
667 		}
668 		FREE(ktriov, M_TEMP);
669 	}
670 #endif
671 bad:
672 	fputsock(so);
673 	if (to)
674 		FREE(to, M_SONAME);
675 	return (error);
676 }
677 
678 /*
679  * MPSAFE
680  */
681 int
682 sendto(td, uap)
683 	struct thread *td;
684 	register struct sendto_args /* {
685 		int	s;
686 		caddr_t	buf;
687 		size_t	len;
688 		int	flags;
689 		caddr_t	to;
690 		int	tolen;
691 	} */ *uap;
692 {
693 	struct msghdr msg;
694 	struct iovec aiov;
695 	int error;
696 
697 	msg.msg_name = uap->to;
698 	msg.msg_namelen = uap->tolen;
699 	msg.msg_iov = &aiov;
700 	msg.msg_iovlen = 1;
701 	msg.msg_control = 0;
702 #ifdef COMPAT_OLDSOCK
703 	msg.msg_flags = 0;
704 #endif
705 	aiov.iov_base = uap->buf;
706 	aiov.iov_len = uap->len;
707 	mtx_lock(&Giant);
708 	error = sendit(td, uap->s, &msg, uap->flags);
709 	mtx_unlock(&Giant);
710 	return (error);
711 }
712 
713 #ifdef COMPAT_OLDSOCK
714 /*
715  * MPSAFE
716  */
717 int
718 osend(td, uap)
719 	struct thread *td;
720 	register struct osend_args /* {
721 		int	s;
722 		caddr_t	buf;
723 		int	len;
724 		int	flags;
725 	} */ *uap;
726 {
727 	struct msghdr msg;
728 	struct iovec aiov;
729 	int error;
730 
731 	msg.msg_name = 0;
732 	msg.msg_namelen = 0;
733 	msg.msg_iov = &aiov;
734 	msg.msg_iovlen = 1;
735 	aiov.iov_base = uap->buf;
736 	aiov.iov_len = uap->len;
737 	msg.msg_control = 0;
738 	msg.msg_flags = 0;
739 	mtx_lock(&Giant);
740 	error = sendit(td, uap->s, &msg, uap->flags);
741 	mtx_unlock(&Giant);
742 	return (error);
743 }
744 
745 /*
746  * MPSAFE
747  */
748 int
749 osendmsg(td, uap)
750 	struct thread *td;
751 	register struct osendmsg_args /* {
752 		int	s;
753 		caddr_t	msg;
754 		int	flags;
755 	} */ *uap;
756 {
757 	struct msghdr msg;
758 	struct iovec aiov[UIO_SMALLIOV], *iov;
759 	int error;
760 
761 	mtx_lock(&Giant);
762 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
763 	if (error)
764 		goto done2;
765 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
766 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
767 			error = EMSGSIZE;
768 			goto done2;
769 		}
770 		MALLOC(iov, struct iovec *,
771 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
772 		      M_WAITOK);
773 	} else {
774 		iov = aiov;
775 	}
776 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
777 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
778 	if (error)
779 		goto done;
780 	msg.msg_flags = MSG_COMPAT;
781 	msg.msg_iov = iov;
782 	error = sendit(td, uap->s, &msg, uap->flags);
783 done:
784 	if (iov != aiov)
785 		FREE(iov, M_IOV);
786 done2:
787 	mtx_unlock(&Giant);
788 	return (error);
789 }
790 #endif
791 
792 /*
793  * MPSAFE
794  */
795 int
796 sendmsg(td, uap)
797 	struct thread *td;
798 	register struct sendmsg_args /* {
799 		int	s;
800 		caddr_t	msg;
801 		int	flags;
802 	} */ *uap;
803 {
804 	struct msghdr msg;
805 	struct iovec aiov[UIO_SMALLIOV], *iov;
806 	int error;
807 
808 	mtx_lock(&Giant);
809 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
810 	if (error)
811 		goto done2;
812 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
813 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
814 			error = EMSGSIZE;
815 			goto done2;
816 		}
817 		MALLOC(iov, struct iovec *,
818 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
819 		       M_WAITOK);
820 	} else {
821 		iov = aiov;
822 	}
823 	if (msg.msg_iovlen &&
824 	    (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
825 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
826 		goto done;
827 	msg.msg_iov = iov;
828 #ifdef COMPAT_OLDSOCK
829 	msg.msg_flags = 0;
830 #endif
831 	error = sendit(td, uap->s, &msg, uap->flags);
832 done:
833 	if (iov != aiov)
834 		FREE(iov, M_IOV);
835 done2:
836 	mtx_unlock(&Giant);
837 	return (error);
838 }
839 
840 static int
841 recvit(td, s, mp, namelenp)
842 	register struct thread *td;
843 	int s;
844 	register struct msghdr *mp;
845 	caddr_t namelenp;
846 {
847 	struct uio auio;
848 	register struct iovec *iov;
849 	register int i;
850 	int len, error;
851 	struct mbuf *m, *control = 0;
852 	caddr_t ctlbuf;
853 	struct socket *so;
854 	struct sockaddr *fromsa = 0;
855 #ifdef KTRACE
856 	struct iovec *ktriov = NULL;
857 	struct uio ktruio;
858 	int iovlen;
859 #endif
860 
861 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
862 		return (error);
863 	auio.uio_iov = mp->msg_iov;
864 	auio.uio_iovcnt = mp->msg_iovlen;
865 	auio.uio_segflg = UIO_USERSPACE;
866 	auio.uio_rw = UIO_READ;
867 	auio.uio_td = td;
868 	auio.uio_offset = 0;			/* XXX */
869 	auio.uio_resid = 0;
870 	iov = mp->msg_iov;
871 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
872 		if ((auio.uio_resid += iov->iov_len) < 0) {
873 			fputsock(so);
874 			return (EINVAL);
875 		}
876 	}
877 #ifdef KTRACE
878 	if (KTRPOINT(td, KTR_GENIO)) {
879 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
880 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
881 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
882 		ktruio = auio;
883 	}
884 #endif
885 	len = auio.uio_resid;
886 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
887 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
888 	    &mp->msg_flags);
889 	if (error) {
890 		if (auio.uio_resid != len && (error == ERESTART ||
891 		    error == EINTR || error == EWOULDBLOCK))
892 			error = 0;
893 	}
894 #ifdef KTRACE
895 	if (ktriov != NULL) {
896 		if (error == 0) {
897 			ktruio.uio_iov = ktriov;
898 			ktruio.uio_resid = len - auio.uio_resid;
899 			ktrgenio(s, UIO_READ, &ktruio, error);
900 		}
901 		FREE(ktriov, M_TEMP);
902 	}
903 #endif
904 	if (error)
905 		goto out;
906 	td->td_retval[0] = len - auio.uio_resid;
907 	if (mp->msg_name) {
908 		len = mp->msg_namelen;
909 		if (len <= 0 || fromsa == 0)
910 			len = 0;
911 		else {
912 #ifndef MIN
913 #define MIN(a,b) ((a)>(b)?(b):(a))
914 #endif
915 			/* save sa_len before it is destroyed by MSG_COMPAT */
916 			len = MIN(len, fromsa->sa_len);
917 #ifdef COMPAT_OLDSOCK
918 			if (mp->msg_flags & MSG_COMPAT)
919 				((struct osockaddr *)fromsa)->sa_family =
920 				    fromsa->sa_family;
921 #endif
922 			error = copyout(fromsa,
923 			    (caddr_t)mp->msg_name, (unsigned)len);
924 			if (error)
925 				goto out;
926 		}
927 		mp->msg_namelen = len;
928 		if (namelenp &&
929 		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
930 #ifdef COMPAT_OLDSOCK
931 			if (mp->msg_flags & MSG_COMPAT)
932 				error = 0;	/* old recvfrom didn't check */
933 			else
934 #endif
935 			goto out;
936 		}
937 	}
938 	if (mp->msg_control) {
939 #ifdef COMPAT_OLDSOCK
940 		/*
941 		 * We assume that old recvmsg calls won't receive access
942 		 * rights and other control info, esp. as control info
943 		 * is always optional and those options didn't exist in 4.3.
944 		 * If we receive rights, trim the cmsghdr; anything else
945 		 * is tossed.
946 		 */
947 		if (control && mp->msg_flags & MSG_COMPAT) {
948 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
949 			    SOL_SOCKET ||
950 			    mtod(control, struct cmsghdr *)->cmsg_type !=
951 			    SCM_RIGHTS) {
952 				mp->msg_controllen = 0;
953 				goto out;
954 			}
955 			control->m_len -= sizeof (struct cmsghdr);
956 			control->m_data += sizeof (struct cmsghdr);
957 		}
958 #endif
959 		len = mp->msg_controllen;
960 		m = control;
961 		mp->msg_controllen = 0;
962 		ctlbuf = (caddr_t) mp->msg_control;
963 
964 		while (m && len > 0) {
965 			unsigned int tocopy;
966 
967 			if (len >= m->m_len)
968 				tocopy = m->m_len;
969 			else {
970 				mp->msg_flags |= MSG_CTRUNC;
971 				tocopy = len;
972 			}
973 
974 			if ((error = copyout((caddr_t)mtod(m, caddr_t),
975 					ctlbuf, tocopy)) != 0)
976 				goto out;
977 
978 			ctlbuf += tocopy;
979 			len -= tocopy;
980 			m = m->m_next;
981 		}
982 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
983 	}
984 out:
985 	fputsock(so);
986 	if (fromsa)
987 		FREE(fromsa, M_SONAME);
988 	if (control)
989 		m_freem(control);
990 	return (error);
991 }
992 
993 /*
994  * MPSAFE
995  */
996 int
997 recvfrom(td, uap)
998 	struct thread *td;
999 	register struct recvfrom_args /* {
1000 		int	s;
1001 		caddr_t	buf;
1002 		size_t	len;
1003 		int	flags;
1004 		caddr_t	from;
1005 		int	*fromlenaddr;
1006 	} */ *uap;
1007 {
1008 	struct msghdr msg;
1009 	struct iovec aiov;
1010 	int error;
1011 
1012 	mtx_lock(&Giant);
1013 	if (uap->fromlenaddr) {
1014 		error = copyin((caddr_t)uap->fromlenaddr,
1015 		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1016 		if (error)
1017 			goto done2;
1018 	} else {
1019 		msg.msg_namelen = 0;
1020 	}
1021 	msg.msg_name = uap->from;
1022 	msg.msg_iov = &aiov;
1023 	msg.msg_iovlen = 1;
1024 	aiov.iov_base = uap->buf;
1025 	aiov.iov_len = uap->len;
1026 	msg.msg_control = 0;
1027 	msg.msg_flags = uap->flags;
1028 	error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr);
1029 done2:
1030 	mtx_unlock(&Giant);
1031 	return(error);
1032 }
1033 
1034 #ifdef COMPAT_OLDSOCK
1035 /*
1036  * MPSAFE
1037  */
1038 int
1039 orecvfrom(td, uap)
1040 	struct thread *td;
1041 	struct recvfrom_args *uap;
1042 {
1043 
1044 	uap->flags |= MSG_COMPAT;
1045 	return (recvfrom(td, uap));
1046 }
1047 #endif
1048 
1049 
1050 #ifdef COMPAT_OLDSOCK
1051 /*
1052  * MPSAFE
1053  */
1054 int
1055 orecv(td, uap)
1056 	struct thread *td;
1057 	register struct orecv_args /* {
1058 		int	s;
1059 		caddr_t	buf;
1060 		int	len;
1061 		int	flags;
1062 	} */ *uap;
1063 {
1064 	struct msghdr msg;
1065 	struct iovec aiov;
1066 	int error;
1067 
1068 	mtx_lock(&Giant);
1069 	msg.msg_name = 0;
1070 	msg.msg_namelen = 0;
1071 	msg.msg_iov = &aiov;
1072 	msg.msg_iovlen = 1;
1073 	aiov.iov_base = uap->buf;
1074 	aiov.iov_len = uap->len;
1075 	msg.msg_control = 0;
1076 	msg.msg_flags = uap->flags;
1077 	error = recvit(td, uap->s, &msg, (caddr_t)0);
1078 	mtx_unlock(&Giant);
1079 	return (error);
1080 }
1081 
1082 /*
1083  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1084  * overlays the new one, missing only the flags, and with the (old) access
1085  * rights where the control fields are now.
1086  *
1087  * MPSAFE
1088  */
1089 int
1090 orecvmsg(td, uap)
1091 	struct thread *td;
1092 	register struct orecvmsg_args /* {
1093 		int	s;
1094 		struct	omsghdr *msg;
1095 		int	flags;
1096 	} */ *uap;
1097 {
1098 	struct msghdr msg;
1099 	struct iovec aiov[UIO_SMALLIOV], *iov;
1100 	int error;
1101 
1102 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1103 	    sizeof (struct omsghdr));
1104 	if (error)
1105 		return (error);
1106 
1107 	mtx_lock(&Giant);
1108 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1109 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1110 			error = EMSGSIZE;
1111 			goto done2;
1112 		}
1113 		MALLOC(iov, struct iovec *,
1114 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1115 		      M_WAITOK);
1116 	} else {
1117 		iov = aiov;
1118 	}
1119 	msg.msg_flags = uap->flags | MSG_COMPAT;
1120 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1121 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1122 	if (error)
1123 		goto done;
1124 	msg.msg_iov = iov;
1125 	error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
1126 
1127 	if (msg.msg_controllen && error == 0)
1128 		error = copyout((caddr_t)&msg.msg_controllen,
1129 		    (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1130 done:
1131 	if (iov != aiov)
1132 		FREE(iov, M_IOV);
1133 done2:
1134 	mtx_unlock(&Giant);
1135 	return (error);
1136 }
1137 #endif
1138 
1139 /*
1140  * MPSAFE
1141  */
1142 int
1143 recvmsg(td, uap)
1144 	struct thread *td;
1145 	register struct recvmsg_args /* {
1146 		int	s;
1147 		struct	msghdr *msg;
1148 		int	flags;
1149 	} */ *uap;
1150 {
1151 	struct msghdr msg;
1152 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1153 	register int error;
1154 
1155 	mtx_lock(&Giant);
1156 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1157 	if (error)
1158 		goto done2;
1159 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1160 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1161 			error = EMSGSIZE;
1162 			goto done2;
1163 		}
1164 		MALLOC(iov, struct iovec *,
1165 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1166 		       M_WAITOK);
1167 	} else {
1168 		iov = aiov;
1169 	}
1170 #ifdef COMPAT_OLDSOCK
1171 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1172 #else
1173 	msg.msg_flags = uap->flags;
1174 #endif
1175 	uiov = msg.msg_iov;
1176 	msg.msg_iov = iov;
1177 	error = copyin((caddr_t)uiov, (caddr_t)iov,
1178 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1179 	if (error)
1180 		goto done;
1181 	error = recvit(td, uap->s, &msg, (caddr_t)0);
1182 	if (!error) {
1183 		msg.msg_iov = uiov;
1184 		error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1185 	}
1186 done:
1187 	if (iov != aiov)
1188 		FREE(iov, M_IOV);
1189 done2:
1190 	mtx_unlock(&Giant);
1191 	return (error);
1192 }
1193 
1194 /*
1195  * MPSAFE
1196  */
1197 /* ARGSUSED */
1198 int
1199 shutdown(td, uap)
1200 	struct thread *td;
1201 	register struct shutdown_args /* {
1202 		int	s;
1203 		int	how;
1204 	} */ *uap;
1205 {
1206 	struct socket *so;
1207 	int error;
1208 
1209 	mtx_lock(&Giant);
1210 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1211 		error = soshutdown(so, uap->how);
1212 		fputsock(so);
1213 	}
1214 	mtx_unlock(&Giant);
1215 	return(error);
1216 }
1217 
1218 /*
1219  * MPSAFE
1220  */
1221 /* ARGSUSED */
1222 int
1223 setsockopt(td, uap)
1224 	struct thread *td;
1225 	register struct setsockopt_args /* {
1226 		int	s;
1227 		int	level;
1228 		int	name;
1229 		caddr_t	val;
1230 		int	valsize;
1231 	} */ *uap;
1232 {
1233 	struct socket *so;
1234 	struct sockopt sopt;
1235 	int error;
1236 
1237 	if (uap->val == 0 && uap->valsize != 0)
1238 		return (EFAULT);
1239 	if (uap->valsize < 0)
1240 		return (EINVAL);
1241 
1242 	mtx_lock(&Giant);
1243 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1244 		sopt.sopt_dir = SOPT_SET;
1245 		sopt.sopt_level = uap->level;
1246 		sopt.sopt_name = uap->name;
1247 		sopt.sopt_val = uap->val;
1248 		sopt.sopt_valsize = uap->valsize;
1249 		sopt.sopt_td = td;
1250 		error = sosetopt(so, &sopt);
1251 		fputsock(so);
1252 	}
1253 	mtx_unlock(&Giant);
1254 	return(error);
1255 }
1256 
1257 /*
1258  * MPSAFE
1259  */
1260 /* ARGSUSED */
1261 int
1262 getsockopt(td, uap)
1263 	struct thread *td;
1264 	register struct getsockopt_args /* {
1265 		int	s;
1266 		int	level;
1267 		int	name;
1268 		caddr_t	val;
1269 		int	*avalsize;
1270 	} */ *uap;
1271 {
1272 	int	valsize, error;
1273 	struct  socket *so;
1274 	struct	sockopt sopt;
1275 
1276 	mtx_lock(&Giant);
1277 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1278 		goto done2;
1279 	if (uap->val) {
1280 		error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1281 		    sizeof (valsize));
1282 		if (error)
1283 			goto done1;
1284 		if (valsize < 0) {
1285 			error = EINVAL;
1286 			goto done1;
1287 		}
1288 	} else {
1289 		valsize = 0;
1290 	}
1291 
1292 	sopt.sopt_dir = SOPT_GET;
1293 	sopt.sopt_level = uap->level;
1294 	sopt.sopt_name = uap->name;
1295 	sopt.sopt_val = uap->val;
1296 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1297 	sopt.sopt_td = td;
1298 
1299 	error = sogetopt(so, &sopt);
1300 	if (error == 0) {
1301 		valsize = sopt.sopt_valsize;
1302 		error = copyout((caddr_t)&valsize,
1303 				(caddr_t)uap->avalsize, sizeof (valsize));
1304 	}
1305 done1:
1306 	fputsock(so);
1307 done2:
1308 	mtx_unlock(&Giant);
1309 	return (error);
1310 }
1311 
1312 /*
1313  * getsockname1() - Get socket name.
1314  *
1315  * MPSAFE
1316  */
1317 /* ARGSUSED */
1318 static int
1319 getsockname1(td, uap, compat)
1320 	struct thread *td;
1321 	register struct getsockname_args /* {
1322 		int	fdes;
1323 		caddr_t	asa;
1324 		int	*alen;
1325 	} */ *uap;
1326 	int compat;
1327 {
1328 	struct socket *so;
1329 	struct sockaddr *sa;
1330 	int len, error;
1331 
1332 	mtx_lock(&Giant);
1333 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1334 		goto done2;
1335 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1336 	if (error)
1337 		goto done1;
1338 	sa = 0;
1339 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1340 	if (error)
1341 		goto bad;
1342 	if (sa == 0) {
1343 		len = 0;
1344 		goto gotnothing;
1345 	}
1346 
1347 	len = MIN(len, sa->sa_len);
1348 #ifdef COMPAT_OLDSOCK
1349 	if (compat)
1350 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1351 #endif
1352 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1353 	if (error == 0)
1354 gotnothing:
1355 		error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1356 		    sizeof (len));
1357 bad:
1358 	if (sa)
1359 		FREE(sa, M_SONAME);
1360 done1:
1361 	fputsock(so);
1362 done2:
1363 	mtx_unlock(&Giant);
1364 	return (error);
1365 }
1366 
1367 /*
1368  * MPSAFE
1369  */
1370 int
1371 getsockname(td, uap)
1372 	struct thread *td;
1373 	struct getsockname_args *uap;
1374 {
1375 
1376 	return (getsockname1(td, uap, 0));
1377 }
1378 
1379 #ifdef COMPAT_OLDSOCK
1380 /*
1381  * MPSAFE
1382  */
1383 int
1384 ogetsockname(td, uap)
1385 	struct thread *td;
1386 	struct getsockname_args *uap;
1387 {
1388 
1389 	return (getsockname1(td, uap, 1));
1390 }
1391 #endif /* COMPAT_OLDSOCK */
1392 
1393 /*
1394  * getpeername1() - Get name of peer for connected socket.
1395  *
1396  * MPSAFE
1397  */
1398 /* ARGSUSED */
1399 static int
1400 getpeername1(td, uap, compat)
1401 	struct thread *td;
1402 	register struct getpeername_args /* {
1403 		int	fdes;
1404 		caddr_t	asa;
1405 		int	*alen;
1406 	} */ *uap;
1407 	int compat;
1408 {
1409 	struct socket *so;
1410 	struct sockaddr *sa;
1411 	int len, error;
1412 
1413 	mtx_lock(&Giant);
1414 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1415 		goto done2;
1416 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1417 		error = ENOTCONN;
1418 		goto done1;
1419 	}
1420 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1421 	if (error)
1422 		goto done1;
1423 	sa = 0;
1424 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1425 	if (error)
1426 		goto bad;
1427 	if (sa == 0) {
1428 		len = 0;
1429 		goto gotnothing;
1430 	}
1431 	len = MIN(len, sa->sa_len);
1432 #ifdef COMPAT_OLDSOCK
1433 	if (compat)
1434 		((struct osockaddr *)sa)->sa_family =
1435 		    sa->sa_family;
1436 #endif
1437 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1438 	if (error)
1439 		goto bad;
1440 gotnothing:
1441 	error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1442 bad:
1443 	if (sa)
1444 		FREE(sa, M_SONAME);
1445 done1:
1446 	fputsock(so);
1447 done2:
1448 	mtx_unlock(&Giant);
1449 	return (error);
1450 }
1451 
1452 /*
1453  * MPSAFE
1454  */
1455 int
1456 getpeername(td, uap)
1457 	struct thread *td;
1458 	struct getpeername_args *uap;
1459 {
1460 
1461 	return (getpeername1(td, uap, 0));
1462 }
1463 
1464 #ifdef COMPAT_OLDSOCK
1465 /*
1466  * MPSAFE
1467  */
1468 int
1469 ogetpeername(td, uap)
1470 	struct thread *td;
1471 	struct ogetpeername_args *uap;
1472 {
1473 
1474 	/* XXX uap should have type `getpeername_args *' to begin with. */
1475 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1476 }
1477 #endif /* COMPAT_OLDSOCK */
1478 
1479 int
1480 sockargs(mp, buf, buflen, type)
1481 	struct mbuf **mp;
1482 	caddr_t buf;
1483 	int buflen, type;
1484 {
1485 	register struct sockaddr *sa;
1486 	register struct mbuf *m;
1487 	int error;
1488 
1489 	if ((u_int)buflen > MLEN) {
1490 #ifdef COMPAT_OLDSOCK
1491 		if (type == MT_SONAME && (u_int)buflen <= 112)
1492 			buflen = MLEN;		/* unix domain compat. hack */
1493 		else
1494 #endif
1495 		return (EINVAL);
1496 	}
1497 	m = m_get(M_TRYWAIT, type);
1498 	if (m == NULL)
1499 		return (ENOBUFS);
1500 	m->m_len = buflen;
1501 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1502 	if (error)
1503 		(void) m_free(m);
1504 	else {
1505 		*mp = m;
1506 		if (type == MT_SONAME) {
1507 			sa = mtod(m, struct sockaddr *);
1508 
1509 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1510 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1511 				sa->sa_family = sa->sa_len;
1512 #endif
1513 			sa->sa_len = buflen;
1514 		}
1515 	}
1516 	return (error);
1517 }
1518 
1519 int
1520 getsockaddr(namp, uaddr, len)
1521 	struct sockaddr **namp;
1522 	caddr_t uaddr;
1523 	size_t len;
1524 {
1525 	struct sockaddr *sa;
1526 	int error;
1527 
1528 	if (len > SOCK_MAXADDRLEN)
1529 		return ENAMETOOLONG;
1530 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1531 	error = copyin(uaddr, sa, len);
1532 	if (error) {
1533 		FREE(sa, M_SONAME);
1534 	} else {
1535 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1536 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1537 			sa->sa_family = sa->sa_len;
1538 #endif
1539 		sa->sa_len = len;
1540 		*namp = sa;
1541 	}
1542 	return error;
1543 }
1544 
1545 /*
1546  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1547  * XXX - The sf_buf functions are currently private to sendfile(2), so have
1548  * been made static, but may be useful in the future for doing zero-copy in
1549  * other parts of the networking code.
1550  */
1551 static void
1552 sf_buf_init(void *arg)
1553 {
1554 	int i;
1555 
1556 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1557 	mtx_lock(&sf_freelist.sf_lock);
1558 	SLIST_INIT(&sf_freelist.sf_head);
1559 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1560 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1561 	    M_NOWAIT | M_ZERO);
1562 	for (i = 0; i < nsfbufs; i++) {
1563 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1564 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1565 	}
1566 	sf_buf_alloc_want = 0;
1567 	mtx_unlock(&sf_freelist.sf_lock);
1568 }
1569 
1570 /*
1571  * Get an sf_buf from the freelist. Will block if none are available.
1572  */
1573 struct sf_buf *
1574 sf_buf_alloc()
1575 {
1576 	struct sf_buf *sf;
1577 	int error;
1578 
1579 	mtx_lock(&sf_freelist.sf_lock);
1580 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1581 		sf_buf_alloc_want++;
1582 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1583 		    "sfbufa", 0);
1584 		sf_buf_alloc_want--;
1585 
1586 		/*
1587 		 * If we got a signal, don't risk going back to sleep.
1588 		 */
1589 		if (error)
1590 			break;
1591 	}
1592 	if (sf != NULL)
1593 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1594 	mtx_unlock(&sf_freelist.sf_lock);
1595 	return (sf);
1596 }
1597 
1598 #define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1599 
1600 /*
1601  * Detatch mapped page and release resources back to the system.
1602  */
1603 void
1604 sf_buf_free(caddr_t addr, void *args)
1605 {
1606 	struct sf_buf *sf;
1607 	struct vm_page *m;
1608 
1609 	GIANT_REQUIRED;
1610 
1611 	sf = dtosf(addr);
1612 	pmap_qremove((vm_offset_t)addr, 1);
1613 	m = sf->m;
1614 	vm_page_unwire(m, 0);
1615 	/*
1616 	 * Check for the object going away on us. This can
1617 	 * happen since we don't hold a reference to it.
1618 	 * If so, we're responsible for freeing the page.
1619 	 */
1620 	if (m->wire_count == 0 && m->object == NULL)
1621 		vm_page_free(m);
1622 	sf->m = NULL;
1623 	mtx_lock(&sf_freelist.sf_lock);
1624 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1625 	if (sf_buf_alloc_want > 0)
1626 		wakeup_one(&sf_freelist);
1627 	mtx_unlock(&sf_freelist.sf_lock);
1628 }
1629 
1630 /*
1631  * sendfile(2)
1632  *
1633  * MPSAFE
1634  *
1635  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1636  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1637  *
1638  * Send a file specified by 'fd' and starting at 'offset' to a socket
1639  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1640  * nbytes == 0. Optionally add a header and/or trailer to the socket
1641  * output. If specified, write the total number of bytes sent into *sbytes.
1642  *
1643  */
1644 int
1645 sendfile(struct thread *td, struct sendfile_args *uap)
1646 {
1647 	struct vnode *vp;
1648 	struct vm_object *obj;
1649 	struct socket *so = NULL;
1650 	struct mbuf *m;
1651 	struct sf_buf *sf;
1652 	struct vm_page *pg;
1653 	struct writev_args nuap;
1654 	struct sf_hdtr hdtr;
1655 	off_t off, xfsize, hdtr_size, sbytes = 0;
1656 	int error, s;
1657 
1658 	mtx_lock(&Giant);
1659 
1660 	hdtr_size = 0;
1661 
1662 	/*
1663 	 * The descriptor must be a regular file and have a backing VM object.
1664 	 */
1665 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1666 		goto done;
1667 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1668 		error = EINVAL;
1669 		goto done;
1670 	}
1671 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1672 		goto done;
1673 	if (so->so_type != SOCK_STREAM) {
1674 		error = EINVAL;
1675 		goto done;
1676 	}
1677 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1678 		error = ENOTCONN;
1679 		goto done;
1680 	}
1681 	if (uap->offset < 0) {
1682 		error = EINVAL;
1683 		goto done;
1684 	}
1685 
1686 	/*
1687 	 * If specified, get the pointer to the sf_hdtr struct for
1688 	 * any headers/trailers.
1689 	 */
1690 	if (uap->hdtr != NULL) {
1691 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1692 		if (error)
1693 			goto done;
1694 		/*
1695 		 * Send any headers. Wimp out and use writev(2).
1696 		 */
1697 		if (hdtr.headers != NULL) {
1698 			nuap.fd = uap->s;
1699 			nuap.iovp = hdtr.headers;
1700 			nuap.iovcnt = hdtr.hdr_cnt;
1701 			error = writev(td, &nuap);
1702 			if (error)
1703 				goto done;
1704 			hdtr_size += td->td_retval[0];
1705 		}
1706 	}
1707 
1708 	/*
1709 	 * Protect against multiple writers to the socket.
1710 	 */
1711 	(void) sblock(&so->so_snd, M_WAITOK);
1712 
1713 	/*
1714 	 * Loop through the pages in the file, starting with the requested
1715 	 * offset. Get a file page (do I/O if necessary), map the file page
1716 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1717 	 * it on the socket.
1718 	 */
1719 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1720 		vm_pindex_t pindex;
1721 		vm_offset_t pgoff;
1722 
1723 		pindex = OFF_TO_IDX(off);
1724 retry_lookup:
1725 		/*
1726 		 * Calculate the amount to transfer. Not to exceed a page,
1727 		 * the EOF, or the passed in nbytes.
1728 		 */
1729 		xfsize = obj->un_pager.vnp.vnp_size - off;
1730 		if (xfsize > PAGE_SIZE)
1731 			xfsize = PAGE_SIZE;
1732 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1733 		if (PAGE_SIZE - pgoff < xfsize)
1734 			xfsize = PAGE_SIZE - pgoff;
1735 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1736 			xfsize = uap->nbytes - sbytes;
1737 		if (xfsize <= 0)
1738 			break;
1739 		/*
1740 		 * Optimize the non-blocking case by looking at the socket space
1741 		 * before going to the extra work of constituting the sf_buf.
1742 		 */
1743 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1744 			if (so->so_state & SS_CANTSENDMORE)
1745 				error = EPIPE;
1746 			else
1747 				error = EAGAIN;
1748 			sbunlock(&so->so_snd);
1749 			goto done;
1750 		}
1751 		/*
1752 		 * Attempt to look up the page.
1753 		 *
1754 		 *	Allocate if not found
1755 		 *
1756 		 *	Wait and loop if busy.
1757 		 */
1758 		pg = vm_page_lookup(obj, pindex);
1759 
1760 		if (pg == NULL) {
1761 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1762 			if (pg == NULL) {
1763 				VM_WAIT;
1764 				goto retry_lookup;
1765 			}
1766 			vm_page_wakeup(pg);
1767 		} else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1768 			goto retry_lookup;
1769 		}
1770 
1771 		/*
1772 		 * Wire the page so it does not get ripped out from under
1773 		 * us.
1774 		 */
1775 
1776 		vm_page_wire(pg);
1777 
1778 		/*
1779 		 * If page is not valid for what we need, initiate I/O
1780 		 */
1781 
1782 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1783 			int bsize;
1784 
1785 			/*
1786 			 * Ensure that our page is still around when the I/O
1787 			 * completes.
1788 			 */
1789 			vm_page_io_start(pg);
1790 
1791 			/*
1792 			 * Get the page from backing store.
1793 			 */
1794 			bsize = vp->v_mount->mnt_stat.f_iosize;
1795 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1796 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1797 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1798 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1799 			    td->td_ucred, NULL, td);
1800 			VOP_UNLOCK(vp, 0, td);
1801 			vm_page_flag_clear(pg, PG_ZERO);
1802 			vm_page_io_finish(pg);
1803 			if (error) {
1804 				vm_page_unwire(pg, 0);
1805 				/*
1806 				 * See if anyone else might know about this page.
1807 				 * If not and it is not valid, then free it.
1808 				 */
1809 				if (pg->wire_count == 0 && pg->valid == 0 &&
1810 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1811 				    pg->hold_count == 0) {
1812 					vm_page_busy(pg);
1813 					vm_page_free(pg);
1814 				}
1815 				sbunlock(&so->so_snd);
1816 				goto done;
1817 			}
1818 		}
1819 
1820 
1821 		/*
1822 		 * Get a sendfile buf. We usually wait as long as necessary,
1823 		 * but this wait can be interrupted.
1824 		 */
1825 		if ((sf = sf_buf_alloc()) == NULL) {
1826 			vm_page_unwire(pg, 0);
1827 			if (pg->wire_count == 0 && pg->object == NULL)
1828 				vm_page_free(pg);
1829 			sbunlock(&so->so_snd);
1830 			error = EINTR;
1831 			goto done;
1832 		}
1833 
1834 		/*
1835 		 * Allocate a kernel virtual page and insert the physical page
1836 		 * into it.
1837 		 */
1838 		sf->m = pg;
1839 		pmap_qenter(sf->kva, &pg, 1);
1840 		/*
1841 		 * Get an mbuf header and set it up as having external storage.
1842 		 */
1843 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1844 		if (m == NULL) {
1845 			error = ENOBUFS;
1846 			sf_buf_free((void *)sf->kva, NULL);
1847 			sbunlock(&so->so_snd);
1848 			goto done;
1849 		}
1850 		/*
1851 		 * Setup external storage for mbuf.
1852 		 */
1853 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1854 		    EXT_SFBUF);
1855 		m->m_data = (char *) sf->kva + pgoff;
1856 		m->m_pkthdr.len = m->m_len = xfsize;
1857 		/*
1858 		 * Add the buffer to the socket buffer chain.
1859 		 */
1860 		s = splnet();
1861 retry_space:
1862 		/*
1863 		 * Make sure that the socket is still able to take more data.
1864 		 * CANTSENDMORE being true usually means that the connection
1865 		 * was closed. so_error is true when an error was sensed after
1866 		 * a previous send.
1867 		 * The state is checked after the page mapping and buffer
1868 		 * allocation above since those operations may block and make
1869 		 * any socket checks stale. From this point forward, nothing
1870 		 * blocks before the pru_send (or more accurately, any blocking
1871 		 * results in a loop back to here to re-check).
1872 		 */
1873 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1874 			if (so->so_state & SS_CANTSENDMORE) {
1875 				error = EPIPE;
1876 			} else {
1877 				error = so->so_error;
1878 				so->so_error = 0;
1879 			}
1880 			m_freem(m);
1881 			sbunlock(&so->so_snd);
1882 			splx(s);
1883 			goto done;
1884 		}
1885 		/*
1886 		 * Wait for socket space to become available. We do this just
1887 		 * after checking the connection state above in order to avoid
1888 		 * a race condition with sbwait().
1889 		 */
1890 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1891 			if (so->so_state & SS_NBIO) {
1892 				m_freem(m);
1893 				sbunlock(&so->so_snd);
1894 				splx(s);
1895 				error = EAGAIN;
1896 				goto done;
1897 			}
1898 			error = sbwait(&so->so_snd);
1899 			/*
1900 			 * An error from sbwait usually indicates that we've
1901 			 * been interrupted by a signal. If we've sent anything
1902 			 * then return bytes sent, otherwise return the error.
1903 			 */
1904 			if (error) {
1905 				m_freem(m);
1906 				sbunlock(&so->so_snd);
1907 				splx(s);
1908 				goto done;
1909 			}
1910 			goto retry_space;
1911 		}
1912 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1913 		splx(s);
1914 		if (error) {
1915 			sbunlock(&so->so_snd);
1916 			goto done;
1917 		}
1918 	}
1919 	sbunlock(&so->so_snd);
1920 
1921 	/*
1922 	 * Send trailers. Wimp out and use writev(2).
1923 	 */
1924 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1925 			nuap.fd = uap->s;
1926 			nuap.iovp = hdtr.trailers;
1927 			nuap.iovcnt = hdtr.trl_cnt;
1928 			error = writev(td, &nuap);
1929 			if (error)
1930 				goto done;
1931 			hdtr_size += td->td_retval[0];
1932 	}
1933 
1934 done:
1935 	/*
1936 	 * If there was no error we have to clear td->td_retval[0]
1937 	 * because it may have been set by writev.
1938 	 */
1939 	if (error == 0) {
1940 		td->td_retval[0] = 0;
1941 	}
1942 	if (uap->sbytes != NULL) {
1943 		sbytes += hdtr_size;
1944 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
1945 	}
1946 	if (vp)
1947 		vrele(vp);
1948 	if (so)
1949 		fputsock(so);
1950 	mtx_unlock(&Giant);
1951 	return (error);
1952 }
1953