xref: /freebsd/sys/kern/uipc_syscalls.c (revision 41466b50c1d5bfd1cf6adaae547a579a75d7c04e)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/lock.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/signalvar.h>
62 #include <sys/uio.h>
63 #include <sys/vnode.h>
64 #ifdef KTRACE
65 #include <sys/ktrace.h>
66 #endif
67 
68 #include <vm/vm.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74 
75 static void sf_buf_init(void *arg);
76 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
77 static struct sf_buf *sf_buf_alloc(void);
78 static void sf_buf_free(caddr_t addr, void *args);
79 
80 static int sendit __P((struct thread *td, int s, struct msghdr *mp, int flags));
81 static int recvit __P((struct thread *td, int s, struct msghdr *mp,
82 		       caddr_t namelenp));
83 
84 static int accept1 __P((struct thread *td, struct accept_args *uap, int compat));
85 static int getsockname1 __P((struct thread *td, struct getsockname_args *uap,
86 			     int compat));
87 static int getpeername1 __P((struct thread *td, struct getpeername_args *uap,
88 			     int compat));
89 
90 /*
91  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92  * sf_freelist head with the sf_lock mutex.
93  */
94 static struct {
95 	SLIST_HEAD(, sf_buf) sf_head;
96 	struct mtx sf_lock;
97 } sf_freelist;
98 
99 static vm_offset_t sf_base;
100 static struct sf_buf *sf_bufs;
101 static u_int sf_buf_alloc_want;
102 
103 /*
104  * System call interface to the socket abstraction.
105  */
106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107 #define COMPAT_OLDSOCK
108 #endif
109 
110 extern	struct fileops socketops;
111 
112 /*
113  * MPSAFE
114  */
115 int
116 socket(td, uap)
117 	struct thread *td;
118 	register struct socket_args /* {
119 		int	domain;
120 		int	type;
121 		int	protocol;
122 	} */ *uap;
123 {
124 	struct filedesc *fdp;
125 	struct socket *so;
126 	struct file *fp;
127 	int fd, error;
128 
129 	mtx_lock(&Giant);
130 	fdp = td->td_proc->p_fd;
131 	error = falloc(td, &fp, &fd);
132 	if (error)
133 		goto done2;
134 	fhold(fp);
135 	error = socreate(uap->domain, &so, uap->type, uap->protocol, td);
136 	if (error) {
137 		if (fdp->fd_ofiles[fd] == fp) {
138 			fdp->fd_ofiles[fd] = NULL;
139 			fdrop(fp, td);
140 		}
141 	} else {
142 		fp->f_data = (caddr_t)so;
143 		fp->f_flag = FREAD|FWRITE;
144 		fp->f_ops = &socketops;
145 		fp->f_type = DTYPE_SOCKET;
146 		td->td_retval[0] = fd;
147 	}
148 	fdrop(fp, td);
149 done2:
150 	mtx_unlock(&Giant);
151 	return (error);
152 }
153 
154 /*
155  * MPSAFE
156  */
157 /* ARGSUSED */
158 int
159 bind(td, uap)
160 	struct thread *td;
161 	register struct bind_args /* {
162 		int	s;
163 		caddr_t	name;
164 		int	namelen;
165 	} */ *uap;
166 {
167 	struct file *fp;
168 	struct sockaddr *sa;
169 	int error;
170 
171 	mtx_lock(&Giant);
172 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
173 	if (error)
174 		goto done2;
175 	error = getsockaddr(&sa, uap->name, uap->namelen);
176 	if (error) {
177 		fdrop(fp, td);
178 		goto done2;
179 	}
180 	error = sobind((struct socket *)fp->f_data, sa, td);
181 	FREE(sa, M_SONAME);
182 	fdrop(fp, td);
183 done2:
184 	mtx_unlock(&Giant);
185 	return (error);
186 }
187 
188 /*
189  * MPSAFE
190  */
191 /* ARGSUSED */
192 int
193 listen(td, uap)
194 	struct thread *td;
195 	register struct listen_args /* {
196 		int	s;
197 		int	backlog;
198 	} */ *uap;
199 {
200 	struct file *fp;
201 	int error;
202 
203 	mtx_lock(&Giant);
204 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
205 	if (error == 0) {
206 		error = solisten((struct socket *)fp->f_data, uap->backlog, td);
207 		fdrop(fp, td);
208 	}
209 	mtx_unlock(&Giant);
210 	return(error);
211 }
212 
213 /*
214  * accept1()
215  * MPSAFE
216  */
217 static int
218 accept1(td, uap, compat)
219 	struct thread *td;
220 	register struct accept_args /* {
221 		int	s;
222 		caddr_t	name;
223 		int	*anamelen;
224 	} */ *uap;
225 	int compat;
226 {
227 	struct filedesc *fdp;
228 	struct file *lfp = NULL;
229 	struct file *nfp = NULL;
230 	struct sockaddr *sa;
231 	int namelen, error, s;
232 	struct socket *head, *so;
233 	int fd;
234 	short fflag;		/* type must match fp->f_flag */
235 
236 	mtx_lock(&Giant);
237 	fdp = td->td_proc->p_fd;
238 	if (uap->name) {
239 		error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
240 			sizeof (namelen));
241 		if(error)
242 			goto done2;
243 	}
244 	error = holdsock(fdp, uap->s, &lfp);
245 	if (error)
246 		goto done2;
247 	s = splnet();
248 	head = (struct socket *)lfp->f_data;
249 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
250 		splx(s);
251 		error = EINVAL;
252 		goto done;
253 	}
254 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
255 		splx(s);
256 		error = EWOULDBLOCK;
257 		goto done;
258 	}
259 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
260 		if (head->so_state & SS_CANTRCVMORE) {
261 			head->so_error = ECONNABORTED;
262 			break;
263 		}
264 		error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
265 		    "accept", 0);
266 		if (error) {
267 			splx(s);
268 			goto done;
269 		}
270 	}
271 	if (head->so_error) {
272 		error = head->so_error;
273 		head->so_error = 0;
274 		splx(s);
275 		goto done;
276 	}
277 
278 	/*
279 	 * At this point we know that there is at least one connection
280 	 * ready to be accepted. Remove it from the queue prior to
281 	 * allocating the file descriptor for it since falloc() may
282 	 * block allowing another process to accept the connection
283 	 * instead.
284 	 */
285 	so = TAILQ_FIRST(&head->so_comp);
286 	TAILQ_REMOVE(&head->so_comp, so, so_list);
287 	head->so_qlen--;
288 
289 	fflag = lfp->f_flag;
290 	error = falloc(td, &nfp, &fd);
291 	if (error) {
292 		/*
293 		 * Probably ran out of file descriptors. Put the
294 		 * unaccepted connection back onto the queue and
295 		 * do another wakeup so some other process might
296 		 * have a chance at it.
297 		 */
298 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
299 		head->so_qlen++;
300 		wakeup_one(&head->so_timeo);
301 		splx(s);
302 		goto done;
303 	}
304 	fhold(nfp);
305 	td->td_retval[0] = fd;
306 
307 	/* connection has been removed from the listen queue */
308 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
309 
310 	so->so_state &= ~SS_COMP;
311 	so->so_head = NULL;
312 	if (head->so_sigio != NULL)
313 		fsetown(fgetown(head->so_sigio), &so->so_sigio);
314 
315 	nfp->f_data = (caddr_t)so;
316 	nfp->f_flag = fflag;
317 	nfp->f_ops = &socketops;
318 	nfp->f_type = DTYPE_SOCKET;
319 	sa = 0;
320 	error = soaccept(so, &sa);
321 	if (error) {
322 		/*
323 		 * return a namelen of zero for older code which might
324 	 	 * ignore the return value from accept.
325 		 */
326 		if (uap->name != NULL) {
327 			namelen = 0;
328 			(void) copyout((caddr_t)&namelen,
329 			    (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
330 		}
331 		goto noconnection;
332 	}
333 	if (sa == NULL) {
334 		namelen = 0;
335 		if (uap->name)
336 			goto gotnoname;
337 		splx(s);
338 		error = 0;
339 		goto done;
340 	}
341 	if (uap->name) {
342 		/* check sa_len before it is destroyed */
343 		if (namelen > sa->sa_len)
344 			namelen = sa->sa_len;
345 #ifdef COMPAT_OLDSOCK
346 		if (compat)
347 			((struct osockaddr *)sa)->sa_family =
348 			    sa->sa_family;
349 #endif
350 		error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
351 		if (!error)
352 gotnoname:
353 			error = copyout((caddr_t)&namelen,
354 			    (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
355 	}
356 noconnection:
357 	if (sa)
358 		FREE(sa, M_SONAME);
359 
360 	/*
361 	 * close the new descriptor, assuming someone hasn't ripped it
362 	 * out from under us.
363 	 */
364 	if (error) {
365 		if (fdp->fd_ofiles[fd] == nfp) {
366 			fdp->fd_ofiles[fd] = NULL;
367 			fdrop(nfp, td);
368 		}
369 	}
370 	splx(s);
371 
372 	/*
373 	 * Release explicitly held references before returning.
374 	 */
375 done:
376 	if (nfp != NULL)
377 		fdrop(nfp, td);
378 	fdrop(lfp, td);
379 done2:
380 	mtx_unlock(&Giant);
381 	return (error);
382 }
383 
384 /*
385  * MPSAFE (accept1() is MPSAFE)
386  */
387 int
388 accept(td, uap)
389 	struct thread *td;
390 	struct accept_args *uap;
391 {
392 
393 	return (accept1(td, uap, 0));
394 }
395 
396 #ifdef COMPAT_OLDSOCK
397 /*
398  * MPSAFE (accept1() is MPSAFE)
399  */
400 int
401 oaccept(td, uap)
402 	struct thread *td;
403 	struct accept_args *uap;
404 {
405 
406 	return (accept1(td, uap, 1));
407 }
408 #endif /* COMPAT_OLDSOCK */
409 
410 /*
411  * MPSAFE
412  */
413 /* ARGSUSED */
414 int
415 connect(td, uap)
416 	struct thread *td;
417 	register struct connect_args /* {
418 		int	s;
419 		caddr_t	name;
420 		int	namelen;
421 	} */ *uap;
422 {
423 	struct file *fp;
424 	register struct socket *so;
425 	struct sockaddr *sa;
426 	int error, s;
427 
428 	mtx_lock(&Giant);
429 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
430 	if (error)
431 		goto done2;
432 	so = (struct socket *)fp->f_data;
433 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
434 		error = EALREADY;
435 		goto done;
436 	}
437 	error = getsockaddr(&sa, uap->name, uap->namelen);
438 	if (error)
439 		goto done;
440 	error = soconnect(so, sa, td);
441 	if (error)
442 		goto bad;
443 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
444 		FREE(sa, M_SONAME);
445 		error = EINPROGRESS;
446 		goto done;
447 	}
448 	s = splnet();
449 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
450 		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
451 		    "connec", 0);
452 		if (error)
453 			break;
454 	}
455 	if (error == 0) {
456 		error = so->so_error;
457 		so->so_error = 0;
458 	}
459 	splx(s);
460 bad:
461 	so->so_state &= ~SS_ISCONNECTING;
462 	FREE(sa, M_SONAME);
463 	if (error == ERESTART)
464 		error = EINTR;
465 done:
466 	fdrop(fp, td);
467 done2:
468 	mtx_unlock(&Giant);
469 	return (error);
470 }
471 
472 /*
473  * MPSAFE
474  */
475 int
476 socketpair(td, uap)
477 	struct thread *td;
478 	register struct socketpair_args /* {
479 		int	domain;
480 		int	type;
481 		int	protocol;
482 		int	*rsv;
483 	} */ *uap;
484 {
485 	register struct filedesc *fdp = td->td_proc->p_fd;
486 	struct file *fp1, *fp2;
487 	struct socket *so1, *so2;
488 	int fd, error, sv[2];
489 
490 	mtx_lock(&Giant);
491 	error = socreate(uap->domain, &so1, uap->type, uap->protocol, td);
492 	if (error)
493 		goto done2;
494 	error = socreate(uap->domain, &so2, uap->type, uap->protocol, td);
495 	if (error)
496 		goto free1;
497 	error = falloc(td, &fp1, &fd);
498 	if (error)
499 		goto free2;
500 	fhold(fp1);
501 	sv[0] = fd;
502 	fp1->f_data = (caddr_t)so1;
503 	error = falloc(td, &fp2, &fd);
504 	if (error)
505 		goto free3;
506 	fhold(fp2);
507 	fp2->f_data = (caddr_t)so2;
508 	sv[1] = fd;
509 	error = soconnect2(so1, so2);
510 	if (error)
511 		goto free4;
512 	if (uap->type == SOCK_DGRAM) {
513 		/*
514 		 * Datagram socket connection is asymmetric.
515 		 */
516 		 error = soconnect2(so2, so1);
517 		 if (error)
518 			goto free4;
519 	}
520 	fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
521 	fp1->f_ops = fp2->f_ops = &socketops;
522 	fp1->f_type = fp2->f_type = DTYPE_SOCKET;
523 	error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
524 	fdrop(fp1, td);
525 	fdrop(fp2, td);
526 	goto done2;
527 free4:
528 	if (fdp->fd_ofiles[sv[1]] == fp2) {
529 		fdp->fd_ofiles[sv[1]] = NULL;
530 		fdrop(fp2, td);
531 	}
532 	fdrop(fp2, td);
533 free3:
534 	if (fdp->fd_ofiles[sv[0]] == fp1) {
535 		fdp->fd_ofiles[sv[0]] = NULL;
536 		fdrop(fp1, td);
537 	}
538 	fdrop(fp1, td);
539 free2:
540 	(void)soclose(so2);
541 free1:
542 	(void)soclose(so1);
543 done2:
544 	mtx_unlock(&Giant);
545 	return (error);
546 }
547 
548 static int
549 sendit(td, s, mp, flags)
550 	register struct thread *td;
551 	int s;
552 	register struct msghdr *mp;
553 	int flags;
554 {
555 	struct file *fp;
556 	struct uio auio;
557 	register struct iovec *iov;
558 	register int i;
559 	struct mbuf *control;
560 	struct sockaddr *to;
561 	int len, error;
562 	struct socket *so;
563 #ifdef KTRACE
564 	struct iovec *ktriov = NULL;
565 	struct uio ktruio;
566 #endif
567 
568 	error = holdsock(td->td_proc->p_fd, s, &fp);
569 	if (error)
570 		return (error);
571 	auio.uio_iov = mp->msg_iov;
572 	auio.uio_iovcnt = mp->msg_iovlen;
573 	auio.uio_segflg = UIO_USERSPACE;
574 	auio.uio_rw = UIO_WRITE;
575 	auio.uio_td = td;
576 	auio.uio_offset = 0;			/* XXX */
577 	auio.uio_resid = 0;
578 	iov = mp->msg_iov;
579 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
580 		if ((auio.uio_resid += iov->iov_len) < 0) {
581 			fdrop(fp, td);
582 			return (EINVAL);
583 		}
584 	}
585 	if (mp->msg_name) {
586 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
587 		if (error) {
588 			fdrop(fp, td);
589 			return (error);
590 		}
591 	} else {
592 		to = 0;
593 	}
594 	if (mp->msg_control) {
595 		if (mp->msg_controllen < sizeof(struct cmsghdr)
596 #ifdef COMPAT_OLDSOCK
597 		    && mp->msg_flags != MSG_COMPAT
598 #endif
599 		) {
600 			error = EINVAL;
601 			goto bad;
602 		}
603 		error = sockargs(&control, mp->msg_control,
604 		    mp->msg_controllen, MT_CONTROL);
605 		if (error)
606 			goto bad;
607 #ifdef COMPAT_OLDSOCK
608 		if (mp->msg_flags == MSG_COMPAT) {
609 			register struct cmsghdr *cm;
610 
611 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
612 			if (control == 0) {
613 				error = ENOBUFS;
614 				goto bad;
615 			} else {
616 				cm = mtod(control, struct cmsghdr *);
617 				cm->cmsg_len = control->m_len;
618 				cm->cmsg_level = SOL_SOCKET;
619 				cm->cmsg_type = SCM_RIGHTS;
620 			}
621 		}
622 #endif
623 	} else {
624 		control = 0;
625 	}
626 #ifdef KTRACE
627 	if (KTRPOINT(td->td_proc, KTR_GENIO)) {
628 		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
629 
630 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
631 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
632 		ktruio = auio;
633 	}
634 #endif
635 	len = auio.uio_resid;
636 	so = (struct socket *)fp->f_data;
637 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
638 						     flags, td);
639 	if (error) {
640 		if (auio.uio_resid != len && (error == ERESTART ||
641 		    error == EINTR || error == EWOULDBLOCK))
642 			error = 0;
643 		if (error == EPIPE) {
644 			PROC_LOCK(td->td_proc);
645 			psignal(td->td_proc, SIGPIPE);
646 			PROC_UNLOCK(td->td_proc);
647 		}
648 	}
649 	if (error == 0)
650 		td->td_retval[0] = len - auio.uio_resid;
651 #ifdef KTRACE
652 	if (ktriov != NULL) {
653 		if (error == 0) {
654 			ktruio.uio_iov = ktriov;
655 			ktruio.uio_resid = td->td_retval[0];
656 			ktrgenio(td->td_proc->p_tracep, s, UIO_WRITE, &ktruio, error);
657 		}
658 		FREE(ktriov, M_TEMP);
659 	}
660 #endif
661 bad:
662 	fdrop(fp, td);
663 	if (to)
664 		FREE(to, M_SONAME);
665 	return (error);
666 }
667 
668 /*
669  * MPSAFE
670  */
671 int
672 sendto(td, uap)
673 	struct thread *td;
674 	register struct sendto_args /* {
675 		int	s;
676 		caddr_t	buf;
677 		size_t	len;
678 		int	flags;
679 		caddr_t	to;
680 		int	tolen;
681 	} */ *uap;
682 {
683 	struct msghdr msg;
684 	struct iovec aiov;
685 	int error;
686 
687 	msg.msg_name = uap->to;
688 	msg.msg_namelen = uap->tolen;
689 	msg.msg_iov = &aiov;
690 	msg.msg_iovlen = 1;
691 	msg.msg_control = 0;
692 #ifdef COMPAT_OLDSOCK
693 	msg.msg_flags = 0;
694 #endif
695 	aiov.iov_base = uap->buf;
696 	aiov.iov_len = uap->len;
697 	mtx_lock(&Giant);
698 	error = sendit(td, uap->s, &msg, uap->flags);
699 	mtx_unlock(&Giant);
700 	return (error);
701 }
702 
703 #ifdef COMPAT_OLDSOCK
704 /*
705  * MPSAFE
706  */
707 int
708 osend(td, uap)
709 	struct thread *td;
710 	register struct osend_args /* {
711 		int	s;
712 		caddr_t	buf;
713 		int	len;
714 		int	flags;
715 	} */ *uap;
716 {
717 	struct msghdr msg;
718 	struct iovec aiov;
719 	int error;
720 
721 	msg.msg_name = 0;
722 	msg.msg_namelen = 0;
723 	msg.msg_iov = &aiov;
724 	msg.msg_iovlen = 1;
725 	aiov.iov_base = uap->buf;
726 	aiov.iov_len = uap->len;
727 	msg.msg_control = 0;
728 	msg.msg_flags = 0;
729 	mtx_lock(&Giant);
730 	error = sendit(td, uap->s, &msg, uap->flags);
731 	mtx_unlock(&Giant);
732 	return (error);
733 }
734 
735 /*
736  * MPSAFE
737  */
738 int
739 osendmsg(td, uap)
740 	struct thread *td;
741 	register struct osendmsg_args /* {
742 		int	s;
743 		caddr_t	msg;
744 		int	flags;
745 	} */ *uap;
746 {
747 	struct msghdr msg;
748 	struct iovec aiov[UIO_SMALLIOV], *iov;
749 	int error;
750 
751 	mtx_lock(&Giant);
752 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
753 	if (error)
754 		goto done2;
755 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
756 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
757 			error = EMSGSIZE;
758 			goto done2;
759 		}
760 		MALLOC(iov, struct iovec *,
761 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
762 		      M_WAITOK);
763 	} else {
764 		iov = aiov;
765 	}
766 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
767 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
768 	if (error)
769 		goto done;
770 	msg.msg_flags = MSG_COMPAT;
771 	msg.msg_iov = iov;
772 	error = sendit(td, uap->s, &msg, uap->flags);
773 done:
774 	if (iov != aiov)
775 		FREE(iov, M_IOV);
776 done2:
777 	mtx_unlock(&Giant);
778 	return (error);
779 }
780 #endif
781 
782 /*
783  * MPSAFE
784  */
785 int
786 sendmsg(td, uap)
787 	struct thread *td;
788 	register struct sendmsg_args /* {
789 		int	s;
790 		caddr_t	msg;
791 		int	flags;
792 	} */ *uap;
793 {
794 	struct msghdr msg;
795 	struct iovec aiov[UIO_SMALLIOV], *iov;
796 	int error;
797 
798 	mtx_lock(&Giant);
799 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
800 	if (error)
801 		goto done2;
802 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
803 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
804 			error = EMSGSIZE;
805 			goto done2;
806 		}
807 		MALLOC(iov, struct iovec *,
808 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
809 		       M_WAITOK);
810 	} else {
811 		iov = aiov;
812 	}
813 	if (msg.msg_iovlen &&
814 	    (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
815 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
816 		goto done;
817 	msg.msg_iov = iov;
818 #ifdef COMPAT_OLDSOCK
819 	msg.msg_flags = 0;
820 #endif
821 	error = sendit(td, uap->s, &msg, uap->flags);
822 done:
823 	if (iov != aiov)
824 		FREE(iov, M_IOV);
825 done2:
826 	mtx_unlock(&Giant);
827 	return (error);
828 }
829 
830 static int
831 recvit(td, s, mp, namelenp)
832 	register struct thread *td;
833 	int s;
834 	register struct msghdr *mp;
835 	caddr_t namelenp;
836 {
837 	struct file *fp;
838 	struct uio auio;
839 	register struct iovec *iov;
840 	register int i;
841 	int len, error;
842 	struct mbuf *m, *control = 0;
843 	caddr_t ctlbuf;
844 	struct socket *so;
845 	struct sockaddr *fromsa = 0;
846 #ifdef KTRACE
847 	struct iovec *ktriov = NULL;
848 	struct uio ktruio;
849 #endif
850 
851 	error = holdsock(td->td_proc->p_fd, s, &fp);
852 	if (error)
853 		return (error);
854 	auio.uio_iov = mp->msg_iov;
855 	auio.uio_iovcnt = mp->msg_iovlen;
856 	auio.uio_segflg = UIO_USERSPACE;
857 	auio.uio_rw = UIO_READ;
858 	auio.uio_td = td;
859 	auio.uio_offset = 0;			/* XXX */
860 	auio.uio_resid = 0;
861 	iov = mp->msg_iov;
862 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
863 		if ((auio.uio_resid += iov->iov_len) < 0) {
864 			fdrop(fp, td);
865 			return (EINVAL);
866 		}
867 	}
868 #ifdef KTRACE
869 	if (KTRPOINT(td->td_proc, KTR_GENIO)) {
870 		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
871 
872 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
873 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
874 		ktruio = auio;
875 	}
876 #endif
877 	len = auio.uio_resid;
878 	so = (struct socket *)fp->f_data;
879 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
880 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
881 	    &mp->msg_flags);
882 	if (error) {
883 		if (auio.uio_resid != len && (error == ERESTART ||
884 		    error == EINTR || error == EWOULDBLOCK))
885 			error = 0;
886 	}
887 #ifdef KTRACE
888 	if (ktriov != NULL) {
889 		if (error == 0) {
890 			ktruio.uio_iov = ktriov;
891 			ktruio.uio_resid = len - auio.uio_resid;
892 			ktrgenio(td->td_proc->p_tracep, s, UIO_READ, &ktruio, error);
893 		}
894 		FREE(ktriov, M_TEMP);
895 	}
896 #endif
897 	if (error)
898 		goto out;
899 	td->td_retval[0] = len - auio.uio_resid;
900 	if (mp->msg_name) {
901 		len = mp->msg_namelen;
902 		if (len <= 0 || fromsa == 0)
903 			len = 0;
904 		else {
905 #ifndef MIN
906 #define MIN(a,b) ((a)>(b)?(b):(a))
907 #endif
908 			/* save sa_len before it is destroyed by MSG_COMPAT */
909 			len = MIN(len, fromsa->sa_len);
910 #ifdef COMPAT_OLDSOCK
911 			if (mp->msg_flags & MSG_COMPAT)
912 				((struct osockaddr *)fromsa)->sa_family =
913 				    fromsa->sa_family;
914 #endif
915 			error = copyout(fromsa,
916 			    (caddr_t)mp->msg_name, (unsigned)len);
917 			if (error)
918 				goto out;
919 		}
920 		mp->msg_namelen = len;
921 		if (namelenp &&
922 		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
923 #ifdef COMPAT_OLDSOCK
924 			if (mp->msg_flags & MSG_COMPAT)
925 				error = 0;	/* old recvfrom didn't check */
926 			else
927 #endif
928 			goto out;
929 		}
930 	}
931 	if (mp->msg_control) {
932 #ifdef COMPAT_OLDSOCK
933 		/*
934 		 * We assume that old recvmsg calls won't receive access
935 		 * rights and other control info, esp. as control info
936 		 * is always optional and those options didn't exist in 4.3.
937 		 * If we receive rights, trim the cmsghdr; anything else
938 		 * is tossed.
939 		 */
940 		if (control && mp->msg_flags & MSG_COMPAT) {
941 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
942 			    SOL_SOCKET ||
943 			    mtod(control, struct cmsghdr *)->cmsg_type !=
944 			    SCM_RIGHTS) {
945 				mp->msg_controllen = 0;
946 				goto out;
947 			}
948 			control->m_len -= sizeof (struct cmsghdr);
949 			control->m_data += sizeof (struct cmsghdr);
950 		}
951 #endif
952 		len = mp->msg_controllen;
953 		m = control;
954 		mp->msg_controllen = 0;
955 		ctlbuf = (caddr_t) mp->msg_control;
956 
957 		while (m && len > 0) {
958 			unsigned int tocopy;
959 
960 			if (len >= m->m_len)
961 				tocopy = m->m_len;
962 			else {
963 				mp->msg_flags |= MSG_CTRUNC;
964 				tocopy = len;
965 			}
966 
967 			if ((error = copyout((caddr_t)mtod(m, caddr_t),
968 					ctlbuf, tocopy)) != 0)
969 				goto out;
970 
971 			ctlbuf += tocopy;
972 			len -= tocopy;
973 			m = m->m_next;
974 		}
975 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
976 	}
977 out:
978 	fdrop(fp, td);
979 	if (fromsa)
980 		FREE(fromsa, M_SONAME);
981 	if (control)
982 		m_freem(control);
983 	return (error);
984 }
985 
986 /*
987  * MPSAFE
988  */
989 int
990 recvfrom(td, uap)
991 	struct thread *td;
992 	register struct recvfrom_args /* {
993 		int	s;
994 		caddr_t	buf;
995 		size_t	len;
996 		int	flags;
997 		caddr_t	from;
998 		int	*fromlenaddr;
999 	} */ *uap;
1000 {
1001 	struct msghdr msg;
1002 	struct iovec aiov;
1003 	int error;
1004 
1005 	mtx_lock(&Giant);
1006 	if (uap->fromlenaddr) {
1007 		error = copyin((caddr_t)uap->fromlenaddr,
1008 		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1009 		if (error)
1010 			goto done2;
1011 	} else {
1012 		msg.msg_namelen = 0;
1013 	}
1014 	msg.msg_name = uap->from;
1015 	msg.msg_iov = &aiov;
1016 	msg.msg_iovlen = 1;
1017 	aiov.iov_base = uap->buf;
1018 	aiov.iov_len = uap->len;
1019 	msg.msg_control = 0;
1020 	msg.msg_flags = uap->flags;
1021 	error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr);
1022 done2:
1023 	mtx_unlock(&Giant);
1024 	return(error);
1025 }
1026 
1027 #ifdef COMPAT_OLDSOCK
1028 /*
1029  * MPSAFE
1030  */
1031 int
1032 orecvfrom(td, uap)
1033 	struct thread *td;
1034 	struct recvfrom_args *uap;
1035 {
1036 
1037 	uap->flags |= MSG_COMPAT;
1038 	return (recvfrom(td, uap));
1039 }
1040 #endif
1041 
1042 
1043 #ifdef COMPAT_OLDSOCK
1044 /*
1045  * MPSAFE
1046  */
1047 int
1048 orecv(td, uap)
1049 	struct thread *td;
1050 	register struct orecv_args /* {
1051 		int	s;
1052 		caddr_t	buf;
1053 		int	len;
1054 		int	flags;
1055 	} */ *uap;
1056 {
1057 	struct msghdr msg;
1058 	struct iovec aiov;
1059 	int error;
1060 
1061 	mtx_lock(&Giant);
1062 	msg.msg_name = 0;
1063 	msg.msg_namelen = 0;
1064 	msg.msg_iov = &aiov;
1065 	msg.msg_iovlen = 1;
1066 	aiov.iov_base = uap->buf;
1067 	aiov.iov_len = uap->len;
1068 	msg.msg_control = 0;
1069 	msg.msg_flags = uap->flags;
1070 	error = recvit(td, uap->s, &msg, (caddr_t)0);
1071 	mtx_unlock(&Giant);
1072 	return (error);
1073 }
1074 
1075 /*
1076  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1077  * overlays the new one, missing only the flags, and with the (old) access
1078  * rights where the control fields are now.
1079  *
1080  * MPSAFE
1081  */
1082 int
1083 orecvmsg(td, uap)
1084 	struct thread *td;
1085 	register struct orecvmsg_args /* {
1086 		int	s;
1087 		struct	omsghdr *msg;
1088 		int	flags;
1089 	} */ *uap;
1090 {
1091 	struct msghdr msg;
1092 	struct iovec aiov[UIO_SMALLIOV], *iov;
1093 	int error;
1094 
1095 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1096 	    sizeof (struct omsghdr));
1097 	if (error)
1098 		return (error);
1099 
1100 	mtx_lock(&Giant);
1101 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1102 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1103 			error = EMSGSIZE;
1104 			goto done2;
1105 		}
1106 		MALLOC(iov, struct iovec *,
1107 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1108 		      M_WAITOK);
1109 	} else {
1110 		iov = aiov;
1111 	}
1112 	msg.msg_flags = uap->flags | MSG_COMPAT;
1113 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1114 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1115 	if (error)
1116 		goto done;
1117 	msg.msg_iov = iov;
1118 	error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
1119 
1120 	if (msg.msg_controllen && error == 0)
1121 		error = copyout((caddr_t)&msg.msg_controllen,
1122 		    (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1123 done:
1124 	if (iov != aiov)
1125 		FREE(iov, M_IOV);
1126 done2:
1127 	mtx_unlock(&Giant);
1128 	return (error);
1129 }
1130 #endif
1131 
1132 /*
1133  * MPSAFE
1134  */
1135 int
1136 recvmsg(td, uap)
1137 	struct thread *td;
1138 	register struct recvmsg_args /* {
1139 		int	s;
1140 		struct	msghdr *msg;
1141 		int	flags;
1142 	} */ *uap;
1143 {
1144 	struct msghdr msg;
1145 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1146 	register int error;
1147 
1148 	mtx_lock(&Giant);
1149 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1150 	if (error)
1151 		goto done2;
1152 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1153 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1154 			error = EMSGSIZE;
1155 			goto done2;
1156 		}
1157 		MALLOC(iov, struct iovec *,
1158 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1159 		       M_WAITOK);
1160 	} else {
1161 		iov = aiov;
1162 	}
1163 #ifdef COMPAT_OLDSOCK
1164 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1165 #else
1166 	msg.msg_flags = uap->flags;
1167 #endif
1168 	uiov = msg.msg_iov;
1169 	msg.msg_iov = iov;
1170 	error = copyin((caddr_t)uiov, (caddr_t)iov,
1171 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1172 	if (error)
1173 		goto done;
1174 	error = recvit(td, uap->s, &msg, (caddr_t)0);
1175 	if (!error) {
1176 		msg.msg_iov = uiov;
1177 		error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1178 	}
1179 done:
1180 	if (iov != aiov)
1181 		FREE(iov, M_IOV);
1182 done2:
1183 	mtx_unlock(&Giant);
1184 	return (error);
1185 }
1186 
1187 /*
1188  * MPSAFE
1189  */
1190 /* ARGSUSED */
1191 int
1192 shutdown(td, uap)
1193 	struct thread *td;
1194 	register struct shutdown_args /* {
1195 		int	s;
1196 		int	how;
1197 	} */ *uap;
1198 {
1199 	struct file *fp;
1200 	int error;
1201 
1202 	mtx_lock(&Giant);
1203 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
1204 	if (error == 0) {
1205 		error = soshutdown((struct socket *)fp->f_data, uap->how);
1206 		fdrop(fp, td);
1207 	}
1208 	mtx_unlock(&Giant);
1209 	return(error);
1210 }
1211 
1212 /*
1213  * MPSAFE
1214  */
1215 /* ARGSUSED */
1216 int
1217 setsockopt(td, uap)
1218 	struct thread *td;
1219 	register struct setsockopt_args /* {
1220 		int	s;
1221 		int	level;
1222 		int	name;
1223 		caddr_t	val;
1224 		int	valsize;
1225 	} */ *uap;
1226 {
1227 	struct file *fp;
1228 	struct sockopt sopt;
1229 	int error;
1230 
1231 	if (uap->val == 0 && uap->valsize != 0)
1232 		return (EFAULT);
1233 	if (uap->valsize < 0)
1234 		return (EINVAL);
1235 
1236 	mtx_lock(&Giant);
1237 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
1238 	if (error == 0) {
1239 		sopt.sopt_dir = SOPT_SET;
1240 		sopt.sopt_level = uap->level;
1241 		sopt.sopt_name = uap->name;
1242 		sopt.sopt_val = uap->val;
1243 		sopt.sopt_valsize = uap->valsize;
1244 		sopt.sopt_td = td;
1245 		error = sosetopt((struct socket *)fp->f_data, &sopt);
1246 		fdrop(fp, td);
1247 	}
1248 	mtx_unlock(&Giant);
1249 	return(error);
1250 }
1251 
1252 /*
1253  * MPSAFE
1254  */
1255 /* ARGSUSED */
1256 int
1257 getsockopt(td, uap)
1258 	struct thread *td;
1259 	register struct getsockopt_args /* {
1260 		int	s;
1261 		int	level;
1262 		int	name;
1263 		caddr_t	val;
1264 		int	*avalsize;
1265 	} */ *uap;
1266 {
1267 	int	valsize, error;
1268 	struct	file *fp;
1269 	struct	sockopt sopt;
1270 
1271 	mtx_lock(&Giant);
1272 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
1273 	if (error)
1274 		goto done2;
1275 	if (uap->val) {
1276 		error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1277 		    sizeof (valsize));
1278 		if (error) {
1279 			fdrop(fp, td);
1280 			goto done2;
1281 		}
1282 		if (valsize < 0) {
1283 			fdrop(fp, td);
1284 			error = EINVAL;
1285 			goto done2;
1286 		}
1287 	} else {
1288 		valsize = 0;
1289 	}
1290 
1291 	sopt.sopt_dir = SOPT_GET;
1292 	sopt.sopt_level = uap->level;
1293 	sopt.sopt_name = uap->name;
1294 	sopt.sopt_val = uap->val;
1295 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1296 	sopt.sopt_td = td;
1297 
1298 	error = sogetopt((struct socket *)fp->f_data, &sopt);
1299 	if (error == 0) {
1300 		valsize = sopt.sopt_valsize;
1301 		error = copyout((caddr_t)&valsize,
1302 				(caddr_t)uap->avalsize, sizeof (valsize));
1303 	}
1304 	fdrop(fp, td);
1305 done2:
1306 	mtx_unlock(&Giant);
1307 	return (error);
1308 }
1309 
1310 /*
1311  * getsockname1() - Get socket name.
1312  *
1313  * MPSAFE
1314  */
1315 /* ARGSUSED */
1316 static int
1317 getsockname1(td, uap, compat)
1318 	struct thread *td;
1319 	register struct getsockname_args /* {
1320 		int	fdes;
1321 		caddr_t	asa;
1322 		int	*alen;
1323 	} */ *uap;
1324 	int compat;
1325 {
1326 	struct file *fp;
1327 	register struct socket *so;
1328 	struct sockaddr *sa;
1329 	int len, error;
1330 
1331 	mtx_lock(&Giant);
1332 	error = holdsock(td->td_proc->p_fd, uap->fdes, &fp);
1333 	if (error)
1334 		goto done2;
1335 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1336 	if (error) {
1337 		fdrop(fp, td);
1338 		goto done2;
1339 	}
1340 	so = (struct socket *)fp->f_data;
1341 	sa = 0;
1342 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1343 	if (error)
1344 		goto bad;
1345 	if (sa == 0) {
1346 		len = 0;
1347 		goto gotnothing;
1348 	}
1349 
1350 	len = MIN(len, sa->sa_len);
1351 #ifdef COMPAT_OLDSOCK
1352 	if (compat)
1353 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1354 #endif
1355 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1356 	if (error == 0)
1357 gotnothing:
1358 		error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1359 		    sizeof (len));
1360 bad:
1361 	if (sa)
1362 		FREE(sa, M_SONAME);
1363 	fdrop(fp, td);
1364 done2:
1365 	mtx_unlock(&Giant);
1366 	return (error);
1367 }
1368 
1369 /*
1370  * MPSAFE
1371  */
1372 int
1373 getsockname(td, uap)
1374 	struct thread *td;
1375 	struct getsockname_args *uap;
1376 {
1377 
1378 	return (getsockname1(td, uap, 0));
1379 }
1380 
1381 #ifdef COMPAT_OLDSOCK
1382 /*
1383  * MPSAFE
1384  */
1385 int
1386 ogetsockname(td, uap)
1387 	struct thread *td;
1388 	struct getsockname_args *uap;
1389 {
1390 
1391 	return (getsockname1(td, uap, 1));
1392 }
1393 #endif /* COMPAT_OLDSOCK */
1394 
1395 /*
1396  * getpeername1() - Get name of peer for connected socket.
1397  *
1398  * MPSAFE
1399  */
1400 /* ARGSUSED */
1401 static int
1402 getpeername1(td, uap, compat)
1403 	struct thread *td;
1404 	register struct getpeername_args /* {
1405 		int	fdes;
1406 		caddr_t	asa;
1407 		int	*alen;
1408 	} */ *uap;
1409 	int compat;
1410 {
1411 	struct file *fp;
1412 	register struct socket *so;
1413 	struct sockaddr *sa;
1414 	int len, error;
1415 
1416 	mtx_lock(&Giant);
1417 	error = holdsock(td->td_proc->p_fd, uap->fdes, &fp);
1418 	if (error)
1419 		goto done2;
1420 	so = (struct socket *)fp->f_data;
1421 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1422 		fdrop(fp, td);
1423 		error = ENOTCONN;
1424 		goto done2;
1425 	}
1426 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1427 	if (error) {
1428 		fdrop(fp, td);
1429 		goto done2;
1430 	}
1431 	sa = 0;
1432 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1433 	if (error)
1434 		goto bad;
1435 	if (sa == 0) {
1436 		len = 0;
1437 		goto gotnothing;
1438 	}
1439 	len = MIN(len, sa->sa_len);
1440 #ifdef COMPAT_OLDSOCK
1441 	if (compat)
1442 		((struct osockaddr *)sa)->sa_family =
1443 		    sa->sa_family;
1444 #endif
1445 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1446 	if (error)
1447 		goto bad;
1448 gotnothing:
1449 	error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1450 bad:
1451 	if (sa)
1452 		FREE(sa, M_SONAME);
1453 	fdrop(fp, td);
1454 done2:
1455 	mtx_unlock(&Giant);
1456 	return (error);
1457 }
1458 
1459 /*
1460  * MPSAFE
1461  */
1462 int
1463 getpeername(td, uap)
1464 	struct thread *td;
1465 	struct getpeername_args *uap;
1466 {
1467 
1468 	return (getpeername1(td, uap, 0));
1469 }
1470 
1471 #ifdef COMPAT_OLDSOCK
1472 /*
1473  * MPSAFE
1474  */
1475 int
1476 ogetpeername(td, uap)
1477 	struct thread *td;
1478 	struct ogetpeername_args *uap;
1479 {
1480 
1481 	/* XXX uap should have type `getpeername_args *' to begin with. */
1482 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1483 }
1484 #endif /* COMPAT_OLDSOCK */
1485 
1486 int
1487 sockargs(mp, buf, buflen, type)
1488 	struct mbuf **mp;
1489 	caddr_t buf;
1490 	int buflen, type;
1491 {
1492 	register struct sockaddr *sa;
1493 	register struct mbuf *m;
1494 	int error;
1495 
1496 	if ((u_int)buflen > MLEN) {
1497 #ifdef COMPAT_OLDSOCK
1498 		if (type == MT_SONAME && (u_int)buflen <= 112)
1499 			buflen = MLEN;		/* unix domain compat. hack */
1500 		else
1501 #endif
1502 		return (EINVAL);
1503 	}
1504 	m = m_get(M_TRYWAIT, type);
1505 	if (m == NULL)
1506 		return (ENOBUFS);
1507 	m->m_len = buflen;
1508 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1509 	if (error)
1510 		(void) m_free(m);
1511 	else {
1512 		*mp = m;
1513 		if (type == MT_SONAME) {
1514 			sa = mtod(m, struct sockaddr *);
1515 
1516 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1517 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1518 				sa->sa_family = sa->sa_len;
1519 #endif
1520 			sa->sa_len = buflen;
1521 		}
1522 	}
1523 	return (error);
1524 }
1525 
1526 int
1527 getsockaddr(namp, uaddr, len)
1528 	struct sockaddr **namp;
1529 	caddr_t uaddr;
1530 	size_t len;
1531 {
1532 	struct sockaddr *sa;
1533 	int error;
1534 
1535 	if (len > SOCK_MAXADDRLEN)
1536 		return ENAMETOOLONG;
1537 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1538 	error = copyin(uaddr, sa, len);
1539 	if (error) {
1540 		FREE(sa, M_SONAME);
1541 	} else {
1542 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1543 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1544 			sa->sa_family = sa->sa_len;
1545 #endif
1546 		sa->sa_len = len;
1547 		*namp = sa;
1548 	}
1549 	return error;
1550 }
1551 
1552 /*
1553  * holdsock() - load the struct file pointer associated
1554  * with a socket into *fpp.  If an error occurs, non-zero
1555  * will be returned and *fpp will be set to NULL.
1556  */
1557 int
1558 holdsock(fdp, fdes, fpp)
1559 	struct filedesc *fdp;
1560 	int fdes;
1561 	struct file **fpp;
1562 {
1563 	register struct file *fp = NULL;
1564 	int error = 0;
1565 
1566 	if ((unsigned)fdes >= fdp->fd_nfiles ||
1567 	    (fp = fdp->fd_ofiles[fdes]) == NULL) {
1568 		error = EBADF;
1569 	} else if (fp->f_type != DTYPE_SOCKET) {
1570 		error = ENOTSOCK;
1571 		fp = NULL;
1572 	} else {
1573 		fhold(fp);
1574 	}
1575 	*fpp = fp;
1576 	return(error);
1577 }
1578 
1579 /*
1580  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1581  * XXX - The sf_buf functions are currently private to sendfile(2), so have
1582  * been made static, but may be useful in the future for doing zero-copy in
1583  * other parts of the networking code.
1584  */
1585 static void
1586 sf_buf_init(void *arg)
1587 {
1588 	int i;
1589 
1590 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF);
1591 	mtx_lock(&sf_freelist.sf_lock);
1592 	SLIST_INIT(&sf_freelist.sf_head);
1593 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1594 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1595 	    M_NOWAIT | M_ZERO);
1596 	for (i = 0; i < nsfbufs; i++) {
1597 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1598 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1599 	}
1600 	sf_buf_alloc_want = 0;
1601 	mtx_unlock(&sf_freelist.sf_lock);
1602 }
1603 
1604 /*
1605  * Get an sf_buf from the freelist. Will block if none are available.
1606  */
1607 static struct sf_buf *
1608 sf_buf_alloc()
1609 {
1610 	struct sf_buf *sf;
1611 	int error;
1612 
1613 	mtx_lock(&sf_freelist.sf_lock);
1614 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1615 		sf_buf_alloc_want++;
1616 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1617 		    "sfbufa", 0);
1618 		sf_buf_alloc_want--;
1619 
1620 		/*
1621 		 * If we got a signal, don't risk going back to sleep.
1622 		 */
1623 		if (error)
1624 			break;
1625 	}
1626 	if (sf != NULL)
1627 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1628 	mtx_unlock(&sf_freelist.sf_lock);
1629 	return (sf);
1630 }
1631 
1632 #define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1633 
1634 /*
1635  * Detatch mapped page and release resources back to the system.
1636  */
1637 static void
1638 sf_buf_free(caddr_t addr, void *args)
1639 {
1640 	struct sf_buf *sf;
1641 	struct vm_page *m;
1642 
1643 	GIANT_REQUIRED;
1644 
1645 	sf = dtosf(addr);
1646 	pmap_qremove((vm_offset_t)addr, 1);
1647 	m = sf->m;
1648 	vm_page_unwire(m, 0);
1649 	/*
1650 	 * Check for the object going away on us. This can
1651 	 * happen since we don't hold a reference to it.
1652 	 * If so, we're responsible for freeing the page.
1653 	 */
1654 	if (m->wire_count == 0 && m->object == NULL)
1655 		vm_page_free(m);
1656 	sf->m = NULL;
1657 	mtx_lock(&sf_freelist.sf_lock);
1658 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1659 	if (sf_buf_alloc_want > 0)
1660 		wakeup_one(&sf_freelist);
1661 	mtx_unlock(&sf_freelist.sf_lock);
1662 }
1663 
1664 /*
1665  * sendfile(2)
1666  *
1667  * MPSAFE
1668  *
1669  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1670  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1671  *
1672  * Send a file specified by 'fd' and starting at 'offset' to a socket
1673  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1674  * nbytes == 0. Optionally add a header and/or trailer to the socket
1675  * output. If specified, write the total number of bytes sent into *sbytes.
1676  *
1677  */
1678 int
1679 sendfile(struct thread *td, struct sendfile_args *uap)
1680 {
1681 	struct file *fp;
1682 	struct filedesc *fdp = td->td_proc->p_fd;
1683 	struct vnode *vp;
1684 	struct vm_object *obj;
1685 	struct socket *so;
1686 	struct mbuf *m;
1687 	struct sf_buf *sf;
1688 	struct vm_page *pg;
1689 	struct writev_args nuap;
1690 	struct sf_hdtr hdtr;
1691 	off_t off, xfsize, sbytes = 0;
1692 	int error = 0, s;
1693 
1694 	mtx_lock(&Giant);
1695 	vp = NULL;
1696 	/*
1697 	 * Do argument checking. Must be a regular file in, stream
1698 	 * type and connected socket out, positive offset.
1699 	 */
1700 	fp = holdfp(fdp, uap->fd, FREAD);
1701 	if (fp == NULL) {
1702 		error = EBADF;
1703 		goto done;
1704 	}
1705 	if (fp->f_type != DTYPE_VNODE) {
1706 		error = EINVAL;
1707 		goto done;
1708 	}
1709 	vp = (struct vnode *)fp->f_data;
1710 	vref(vp);
1711 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1712 		error = EINVAL;
1713 		goto done;
1714 	}
1715 	fdrop(fp, td);
1716 	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
1717 	if (error)
1718 		goto done;
1719 	so = (struct socket *)fp->f_data;
1720 	if (so->so_type != SOCK_STREAM) {
1721 		error = EINVAL;
1722 		goto done;
1723 	}
1724 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1725 		error = ENOTCONN;
1726 		goto done;
1727 	}
1728 	if (uap->offset < 0) {
1729 		error = EINVAL;
1730 		goto done;
1731 	}
1732 
1733 	/*
1734 	 * If specified, get the pointer to the sf_hdtr struct for
1735 	 * any headers/trailers.
1736 	 */
1737 	if (uap->hdtr != NULL) {
1738 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1739 		if (error)
1740 			goto done;
1741 		/*
1742 		 * Send any headers. Wimp out and use writev(2).
1743 		 */
1744 		if (hdtr.headers != NULL) {
1745 			nuap.fd = uap->s;
1746 			nuap.iovp = hdtr.headers;
1747 			nuap.iovcnt = hdtr.hdr_cnt;
1748 			error = writev(td, &nuap);
1749 			if (error)
1750 				goto done;
1751 			sbytes += td->td_retval[0];
1752 		}
1753 	}
1754 
1755 	/*
1756 	 * Protect against multiple writers to the socket.
1757 	 */
1758 	(void) sblock(&so->so_snd, M_WAITOK);
1759 
1760 	/*
1761 	 * Loop through the pages in the file, starting with the requested
1762 	 * offset. Get a file page (do I/O if necessary), map the file page
1763 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1764 	 * it on the socket.
1765 	 */
1766 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1767 		vm_pindex_t pindex;
1768 		vm_offset_t pgoff;
1769 
1770 		pindex = OFF_TO_IDX(off);
1771 retry_lookup:
1772 		/*
1773 		 * Calculate the amount to transfer. Not to exceed a page,
1774 		 * the EOF, or the passed in nbytes.
1775 		 */
1776 		xfsize = obj->un_pager.vnp.vnp_size - off;
1777 		if (xfsize > PAGE_SIZE)
1778 			xfsize = PAGE_SIZE;
1779 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1780 		if (PAGE_SIZE - pgoff < xfsize)
1781 			xfsize = PAGE_SIZE - pgoff;
1782 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1783 			xfsize = uap->nbytes - sbytes;
1784 		if (xfsize <= 0)
1785 			break;
1786 		/*
1787 		 * Optimize the non-blocking case by looking at the socket space
1788 		 * before going to the extra work of constituting the sf_buf.
1789 		 */
1790 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1791 			if (so->so_state & SS_CANTSENDMORE)
1792 				error = EPIPE;
1793 			else
1794 				error = EAGAIN;
1795 			sbunlock(&so->so_snd);
1796 			goto done;
1797 		}
1798 		/*
1799 		 * Attempt to look up the page.
1800 		 *
1801 		 *	Allocate if not found
1802 		 *
1803 		 *	Wait and loop if busy.
1804 		 */
1805 		pg = vm_page_lookup(obj, pindex);
1806 
1807 		if (pg == NULL) {
1808 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1809 			if (pg == NULL) {
1810 				VM_WAIT;
1811 				goto retry_lookup;
1812 			}
1813 			vm_page_wakeup(pg);
1814 		} else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1815 			goto retry_lookup;
1816 		}
1817 
1818 		/*
1819 		 * Wire the page so it does not get ripped out from under
1820 		 * us.
1821 		 */
1822 
1823 		vm_page_wire(pg);
1824 
1825 		/*
1826 		 * If page is not valid for what we need, initiate I/O
1827 		 */
1828 
1829 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1830 			struct uio auio;
1831 			struct iovec aiov;
1832 			int bsize;
1833 
1834 			/*
1835 			 * Ensure that our page is still around when the I/O
1836 			 * completes.
1837 			 */
1838 			vm_page_io_start(pg);
1839 
1840 			/*
1841 			 * Get the page from backing store.
1842 			 */
1843 			bsize = vp->v_mount->mnt_stat.f_iosize;
1844 			auio.uio_iov = &aiov;
1845 			auio.uio_iovcnt = 1;
1846 			aiov.iov_base = 0;
1847 			aiov.iov_len = MAXBSIZE;
1848 			auio.uio_resid = MAXBSIZE;
1849 			auio.uio_offset = trunc_page(off);
1850 			auio.uio_segflg = UIO_NOCOPY;
1851 			auio.uio_rw = UIO_READ;
1852 			auio.uio_td = td;
1853 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1854 			error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1855 			        td->td_proc->p_ucred);
1856 			VOP_UNLOCK(vp, 0, td);
1857 			vm_page_flag_clear(pg, PG_ZERO);
1858 			vm_page_io_finish(pg);
1859 			if (error) {
1860 				vm_page_unwire(pg, 0);
1861 				/*
1862 				 * See if anyone else might know about this page.
1863 				 * If not and it is not valid, then free it.
1864 				 */
1865 				if (pg->wire_count == 0 && pg->valid == 0 &&
1866 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1867 				    pg->hold_count == 0) {
1868 					vm_page_busy(pg);
1869 					vm_page_free(pg);
1870 				}
1871 				sbunlock(&so->so_snd);
1872 				goto done;
1873 			}
1874 		}
1875 
1876 
1877 		/*
1878 		 * Get a sendfile buf. We usually wait as long as necessary,
1879 		 * but this wait can be interrupted.
1880 		 */
1881 		if ((sf = sf_buf_alloc()) == NULL) {
1882 			vm_page_unwire(pg, 0);
1883 			if (pg->wire_count == 0 && pg->object == NULL)
1884 				vm_page_free(pg);
1885 			sbunlock(&so->so_snd);
1886 			error = EINTR;
1887 			goto done;
1888 		}
1889 
1890 		/*
1891 		 * Allocate a kernel virtual page and insert the physical page
1892 		 * into it.
1893 		 */
1894 		sf->m = pg;
1895 		pmap_qenter(sf->kva, &pg, 1);
1896 		/*
1897 		 * Get an mbuf header and set it up as having external storage.
1898 		 */
1899 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1900 		if (m == NULL) {
1901 			error = ENOBUFS;
1902 			sf_buf_free((void *)sf->kva, NULL);
1903 			sbunlock(&so->so_snd);
1904 			goto done;
1905 		}
1906 		/*
1907 		 * Setup external storage for mbuf.
1908 		 */
1909 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1910 		    EXT_SFBUF);
1911 		m->m_data = (char *) sf->kva + pgoff;
1912 		m->m_pkthdr.len = m->m_len = xfsize;
1913 		/*
1914 		 * Add the buffer to the socket buffer chain.
1915 		 */
1916 		s = splnet();
1917 retry_space:
1918 		/*
1919 		 * Make sure that the socket is still able to take more data.
1920 		 * CANTSENDMORE being true usually means that the connection
1921 		 * was closed. so_error is true when an error was sensed after
1922 		 * a previous send.
1923 		 * The state is checked after the page mapping and buffer
1924 		 * allocation above since those operations may block and make
1925 		 * any socket checks stale. From this point forward, nothing
1926 		 * blocks before the pru_send (or more accurately, any blocking
1927 		 * results in a loop back to here to re-check).
1928 		 */
1929 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1930 			if (so->so_state & SS_CANTSENDMORE) {
1931 				error = EPIPE;
1932 			} else {
1933 				error = so->so_error;
1934 				so->so_error = 0;
1935 			}
1936 			m_freem(m);
1937 			sbunlock(&so->so_snd);
1938 			splx(s);
1939 			goto done;
1940 		}
1941 		/*
1942 		 * Wait for socket space to become available. We do this just
1943 		 * after checking the connection state above in order to avoid
1944 		 * a race condition with sbwait().
1945 		 */
1946 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1947 			if (so->so_state & SS_NBIO) {
1948 				m_freem(m);
1949 				sbunlock(&so->so_snd);
1950 				splx(s);
1951 				error = EAGAIN;
1952 				goto done;
1953 			}
1954 			error = sbwait(&so->so_snd);
1955 			/*
1956 			 * An error from sbwait usually indicates that we've
1957 			 * been interrupted by a signal. If we've sent anything
1958 			 * then return bytes sent, otherwise return the error.
1959 			 */
1960 			if (error) {
1961 				m_freem(m);
1962 				sbunlock(&so->so_snd);
1963 				splx(s);
1964 				goto done;
1965 			}
1966 			goto retry_space;
1967 		}
1968 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1969 		splx(s);
1970 		if (error) {
1971 			sbunlock(&so->so_snd);
1972 			goto done;
1973 		}
1974 	}
1975 	sbunlock(&so->so_snd);
1976 
1977 	/*
1978 	 * Send trailers. Wimp out and use writev(2).
1979 	 */
1980 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1981 			nuap.fd = uap->s;
1982 			nuap.iovp = hdtr.trailers;
1983 			nuap.iovcnt = hdtr.trl_cnt;
1984 			error = writev(td, &nuap);
1985 			if (error)
1986 				goto done;
1987 			sbytes += td->td_retval[0];
1988 	}
1989 
1990 done:
1991 	/*
1992 	 * If there was no error we have to clear td->td_retval[0]
1993 	 * because it may have been set by writev.
1994 	 */
1995 	if (error == 0) {
1996 		td->td_retval[0] = 0;
1997 	}
1998 	if (uap->sbytes != NULL) {
1999 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2000 	}
2001 	if (vp)
2002 		vrele(vp);
2003 	if (fp)
2004 		fdrop(fp, td);
2005 	mtx_unlock(&Giant);
2006 	return (error);
2007 }
2008