xref: /freebsd/sys/kern/uipc_syscalls.c (revision 3642298923e528d795e3a30ec165d2b469e28b40)
1 /*-
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77 
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80 
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 			int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 			int compat);
87 
88 /*
89  * NSFBUFS-related variables and associated sysctls
90  */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94 
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97     "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99     "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101     "Number of sendfile(2) sf_bufs in use");
102 
103 /*
104  * Convert a user file descriptor to a kernel file entry.  A reference on the
105  * file entry is held upon returning.  This is lighter weight than
106  * fgetsock(), which bumps the socket reference drops the file reference
107  * count instead, as this approach avoids several additional mutex operations
108  * associated with the additional reference count.
109  */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 	struct file *fp;
114 	int error;
115 
116 	fp = NULL;
117 	if (fdp == NULL)
118 		error = EBADF;
119 	else {
120 		FILEDESC_LOCK_FAST(fdp);
121 		fp = fget_locked(fdp, fd);
122 		if (fp == NULL)
123 			error = EBADF;
124 		else if (fp->f_type != DTYPE_SOCKET) {
125 			fp = NULL;
126 			error = ENOTSOCK;
127 		} else {
128 			fhold(fp);
129 			error = 0;
130 		}
131 		FILEDESC_UNLOCK_FAST(fdp);
132 	}
133 	*fpp = fp;
134 	return (error);
135 }
136 
137 /*
138  * System call interface to the socket abstraction.
139  */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143 
144 /*
145  * MPSAFE
146  */
147 int
148 socket(td, uap)
149 	struct thread *td;
150 	register struct socket_args /* {
151 		int	domain;
152 		int	type;
153 		int	protocol;
154 	} */ *uap;
155 {
156 	struct filedesc *fdp;
157 	struct socket *so;
158 	struct file *fp;
159 	int fd, error;
160 
161 #ifdef MAC
162 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
163 	    uap->protocol);
164 	if (error)
165 		return (error);
166 #endif
167 	fdp = td->td_proc->p_fd;
168 	error = falloc(td, &fp, &fd);
169 	if (error)
170 		return (error);
171 	/* An extra reference on `fp' has been held for us by falloc(). */
172 	NET_LOCK_GIANT();
173 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
174 	    td->td_ucred, td);
175 	NET_UNLOCK_GIANT();
176 	if (error) {
177 		fdclose(fdp, fp, fd, td);
178 	} else {
179 		FILEDESC_LOCK_FAST(fdp);
180 		fp->f_data = so;	/* already has ref count */
181 		fp->f_flag = FREAD|FWRITE;
182 		fp->f_ops = &socketops;
183 		fp->f_type = DTYPE_SOCKET;
184 		FILEDESC_UNLOCK_FAST(fdp);
185 		td->td_retval[0] = fd;
186 	}
187 	fdrop(fp, td);
188 	return (error);
189 }
190 
191 /*
192  * MPSAFE
193  */
194 /* ARGSUSED */
195 int
196 bind(td, uap)
197 	struct thread *td;
198 	register struct bind_args /* {
199 		int	s;
200 		caddr_t	name;
201 		int	namelen;
202 	} */ *uap;
203 {
204 	struct sockaddr *sa;
205 	int error;
206 
207 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
208 		return (error);
209 
210 	return (kern_bind(td, uap->s, sa));
211 }
212 
213 int
214 kern_bind(td, fd, sa)
215 	struct thread *td;
216 	int fd;
217 	struct sockaddr *sa;
218 {
219 	struct socket *so;
220 	struct file *fp;
221 	int error;
222 
223 	NET_LOCK_GIANT();
224 	error = getsock(td->td_proc->p_fd, fd, &fp);
225 	if (error)
226 		goto done2;
227 	so = fp->f_data;
228 #ifdef MAC
229 	SOCK_LOCK(so);
230 	error = mac_check_socket_bind(td->td_ucred, so, sa);
231 	SOCK_UNLOCK(so);
232 	if (error)
233 		goto done1;
234 #endif
235 	error = sobind(so, sa, td);
236 #ifdef MAC
237 done1:
238 #endif
239 	fdrop(fp, td);
240 done2:
241 	NET_UNLOCK_GIANT();
242 	FREE(sa, M_SONAME);
243 	return (error);
244 }
245 
246 /*
247  * MPSAFE
248  */
249 /* ARGSUSED */
250 int
251 listen(td, uap)
252 	struct thread *td;
253 	register struct listen_args /* {
254 		int	s;
255 		int	backlog;
256 	} */ *uap;
257 {
258 	struct socket *so;
259 	struct file *fp;
260 	int error;
261 
262 	NET_LOCK_GIANT();
263 	error = getsock(td->td_proc->p_fd, uap->s, &fp);
264 	if (error == 0) {
265 		so = fp->f_data;
266 #ifdef MAC
267 		SOCK_LOCK(so);
268 		error = mac_check_socket_listen(td->td_ucred, so);
269 		SOCK_UNLOCK(so);
270 		if (error)
271 			goto done;
272 #endif
273 		error = solisten(so, uap->backlog, td);
274 #ifdef MAC
275 done:
276 #endif
277 		fdrop(fp, td);
278 	}
279 	NET_UNLOCK_GIANT();
280 	return(error);
281 }
282 
283 /*
284  * accept1()
285  * MPSAFE
286  */
287 static int
288 accept1(td, uap, compat)
289 	struct thread *td;
290 	register struct accept_args /* {
291 		int	s;
292 		struct sockaddr	* __restrict name;
293 		socklen_t	* __restrict anamelen;
294 	} */ *uap;
295 	int compat;
296 {
297 	struct filedesc *fdp;
298 	struct file *nfp = NULL;
299 	struct sockaddr *sa = NULL;
300 	socklen_t namelen;
301 	int error;
302 	struct socket *head, *so;
303 	int fd;
304 	u_int fflag;
305 	pid_t pgid;
306 	int tmp;
307 
308 	fdp = td->td_proc->p_fd;
309 	if (uap->name) {
310 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
311 		if(error)
312 			return (error);
313 		if (namelen < 0)
314 			return (EINVAL);
315 	}
316 	NET_LOCK_GIANT();
317 	error = fgetsock(td, uap->s, &head, &fflag);
318 	if (error)
319 		goto done2;
320 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
321 		error = EINVAL;
322 		goto done;
323 	}
324 #ifdef MAC
325 	SOCK_LOCK(head);
326 	error = mac_check_socket_accept(td->td_ucred, head);
327 	SOCK_UNLOCK(head);
328 	if (error != 0)
329 		goto done;
330 #endif
331 	error = falloc(td, &nfp, &fd);
332 	if (error)
333 		goto done;
334 	ACCEPT_LOCK();
335 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
336 		ACCEPT_UNLOCK();
337 		error = EWOULDBLOCK;
338 		goto noconnection;
339 	}
340 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
341 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
342 			head->so_error = ECONNABORTED;
343 			break;
344 		}
345 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
346 		    "accept", 0);
347 		if (error) {
348 			ACCEPT_UNLOCK();
349 			goto noconnection;
350 		}
351 	}
352 	if (head->so_error) {
353 		error = head->so_error;
354 		head->so_error = 0;
355 		ACCEPT_UNLOCK();
356 		goto noconnection;
357 	}
358 	so = TAILQ_FIRST(&head->so_comp);
359 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
360 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
361 
362 	/*
363 	 * Before changing the flags on the socket, we have to bump the
364 	 * reference count.  Otherwise, if the protocol calls sofree(),
365 	 * the socket will be released due to a zero refcount.
366 	 */
367 	SOCK_LOCK(so);			/* soref() and so_state update */
368 	soref(so);			/* file descriptor reference */
369 
370 	TAILQ_REMOVE(&head->so_comp, so, so_list);
371 	head->so_qlen--;
372 	so->so_state |= (head->so_state & SS_NBIO);
373 	so->so_qstate &= ~SQ_COMP;
374 	so->so_head = NULL;
375 
376 	SOCK_UNLOCK(so);
377 	ACCEPT_UNLOCK();
378 
379 	/* An extra reference on `nfp' has been held for us by falloc(). */
380 	td->td_retval[0] = fd;
381 
382 	/* connection has been removed from the listen queue */
383 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
384 
385 	pgid = fgetown(&head->so_sigio);
386 	if (pgid != 0)
387 		fsetown(pgid, &so->so_sigio);
388 
389 	FILE_LOCK(nfp);
390 	nfp->f_data = so;	/* nfp has ref count from falloc */
391 	nfp->f_flag = fflag;
392 	nfp->f_ops = &socketops;
393 	nfp->f_type = DTYPE_SOCKET;
394 	FILE_UNLOCK(nfp);
395 	/* Sync socket nonblocking/async state with file flags */
396 	tmp = fflag & FNONBLOCK;
397 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
398 	tmp = fflag & FASYNC;
399 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
400 	sa = 0;
401 	error = soaccept(so, &sa);
402 	if (error) {
403 		/*
404 		 * return a namelen of zero for older code which might
405 		 * ignore the return value from accept.
406 		 */
407 		if (uap->name != NULL) {
408 			namelen = 0;
409 			(void) copyout(&namelen,
410 			    uap->anamelen, sizeof(*uap->anamelen));
411 		}
412 		goto noconnection;
413 	}
414 	if (sa == NULL) {
415 		namelen = 0;
416 		if (uap->name)
417 			goto gotnoname;
418 		error = 0;
419 		goto done;
420 	}
421 	if (uap->name) {
422 		/* check sa_len before it is destroyed */
423 		if (namelen > sa->sa_len)
424 			namelen = sa->sa_len;
425 #ifdef COMPAT_OLDSOCK
426 		if (compat)
427 			((struct osockaddr *)sa)->sa_family =
428 			    sa->sa_family;
429 #endif
430 		error = copyout(sa, uap->name, (u_int)namelen);
431 		if (!error)
432 gotnoname:
433 			error = copyout(&namelen,
434 			    uap->anamelen, sizeof (*uap->anamelen));
435 	}
436 noconnection:
437 	if (sa)
438 		FREE(sa, M_SONAME);
439 
440 	/*
441 	 * close the new descriptor, assuming someone hasn't ripped it
442 	 * out from under us.
443 	 */
444 	if (error)
445 		fdclose(fdp, nfp, fd, td);
446 
447 	/*
448 	 * Release explicitly held references before returning.
449 	 */
450 done:
451 	if (nfp != NULL)
452 		fdrop(nfp, td);
453 	fputsock(head);
454 done2:
455 	NET_UNLOCK_GIANT();
456 	return (error);
457 }
458 
459 /*
460  * MPSAFE (accept1() is MPSAFE)
461  */
462 int
463 accept(td, uap)
464 	struct thread *td;
465 	struct accept_args *uap;
466 {
467 
468 	return (accept1(td, uap, 0));
469 }
470 
471 #ifdef COMPAT_OLDSOCK
472 /*
473  * MPSAFE (accept1() is MPSAFE)
474  */
475 int
476 oaccept(td, uap)
477 	struct thread *td;
478 	struct accept_args *uap;
479 {
480 
481 	return (accept1(td, uap, 1));
482 }
483 #endif /* COMPAT_OLDSOCK */
484 
485 /*
486  * MPSAFE
487  */
488 /* ARGSUSED */
489 int
490 connect(td, uap)
491 	struct thread *td;
492 	register struct connect_args /* {
493 		int	s;
494 		caddr_t	name;
495 		int	namelen;
496 	} */ *uap;
497 {
498 	struct sockaddr *sa;
499 	int error;
500 
501 	error = getsockaddr(&sa, uap->name, uap->namelen);
502 	if (error)
503 		return (error);
504 
505 	return (kern_connect(td, uap->s, sa));
506 }
507 
508 
509 int
510 kern_connect(td, fd, sa)
511 	struct thread *td;
512 	int fd;
513 	struct sockaddr *sa;
514 {
515 	struct socket *so;
516 	struct file *fp;
517 	int error;
518 	int interrupted = 0;
519 
520 	NET_LOCK_GIANT();
521 	error = getsock(td->td_proc->p_fd, fd, &fp);
522 	if (error)
523 		goto done2;
524 	so = fp->f_data;
525 	if (so->so_state & SS_ISCONNECTING) {
526 		error = EALREADY;
527 		goto done1;
528 	}
529 #ifdef MAC
530 	SOCK_LOCK(so);
531 	error = mac_check_socket_connect(td->td_ucred, so, sa);
532 	SOCK_UNLOCK(so);
533 	if (error)
534 		goto bad;
535 #endif
536 	error = soconnect(so, sa, td);
537 	if (error)
538 		goto bad;
539 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
540 		error = EINPROGRESS;
541 		goto done1;
542 	}
543 	SOCK_LOCK(so);
544 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
545 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
546 		    "connec", 0);
547 		if (error) {
548 			if (error == EINTR || error == ERESTART)
549 				interrupted = 1;
550 			break;
551 		}
552 	}
553 	if (error == 0) {
554 		error = so->so_error;
555 		so->so_error = 0;
556 	}
557 	SOCK_UNLOCK(so);
558 bad:
559 	if (!interrupted)
560 		so->so_state &= ~SS_ISCONNECTING;
561 	if (error == ERESTART)
562 		error = EINTR;
563 done1:
564 	fdrop(fp, td);
565 done2:
566 	NET_UNLOCK_GIANT();
567 	FREE(sa, M_SONAME);
568 	return (error);
569 }
570 
571 /*
572  * MPSAFE
573  */
574 int
575 socketpair(td, uap)
576 	struct thread *td;
577 	register struct socketpair_args /* {
578 		int	domain;
579 		int	type;
580 		int	protocol;
581 		int	*rsv;
582 	} */ *uap;
583 {
584 	register struct filedesc *fdp = td->td_proc->p_fd;
585 	struct file *fp1, *fp2;
586 	struct socket *so1, *so2;
587 	int fd, error, sv[2];
588 
589 #ifdef MAC
590 	/* We might want to have a separate check for socket pairs. */
591 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
592 	    uap->protocol);
593 	if (error)
594 		return (error);
595 #endif
596 
597 	NET_LOCK_GIANT();
598 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
599 	    td->td_ucred, td);
600 	if (error)
601 		goto done2;
602 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
603 	    td->td_ucred, td);
604 	if (error)
605 		goto free1;
606 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
607 	error = falloc(td, &fp1, &fd);
608 	if (error)
609 		goto free2;
610 	sv[0] = fd;
611 	fp1->f_data = so1;	/* so1 already has ref count */
612 	error = falloc(td, &fp2, &fd);
613 	if (error)
614 		goto free3;
615 	fp2->f_data = so2;	/* so2 already has ref count */
616 	sv[1] = fd;
617 	error = soconnect2(so1, so2);
618 	if (error)
619 		goto free4;
620 	if (uap->type == SOCK_DGRAM) {
621 		/*
622 		 * Datagram socket connection is asymmetric.
623 		 */
624 		 error = soconnect2(so2, so1);
625 		 if (error)
626 			goto free4;
627 	}
628 	FILE_LOCK(fp1);
629 	fp1->f_flag = FREAD|FWRITE;
630 	fp1->f_ops = &socketops;
631 	fp1->f_type = DTYPE_SOCKET;
632 	FILE_UNLOCK(fp1);
633 	FILE_LOCK(fp2);
634 	fp2->f_flag = FREAD|FWRITE;
635 	fp2->f_ops = &socketops;
636 	fp2->f_type = DTYPE_SOCKET;
637 	FILE_UNLOCK(fp2);
638 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
639 	fdrop(fp1, td);
640 	fdrop(fp2, td);
641 	goto done2;
642 free4:
643 	fdclose(fdp, fp2, sv[1], td);
644 	fdrop(fp2, td);
645 free3:
646 	fdclose(fdp, fp1, sv[0], td);
647 	fdrop(fp1, td);
648 free2:
649 	(void)soclose(so2);
650 free1:
651 	(void)soclose(so1);
652 done2:
653 	NET_UNLOCK_GIANT();
654 	return (error);
655 }
656 
657 static int
658 sendit(td, s, mp, flags)
659 	register struct thread *td;
660 	int s;
661 	register struct msghdr *mp;
662 	int flags;
663 {
664 	struct mbuf *control;
665 	struct sockaddr *to;
666 	int error;
667 
668 	if (mp->msg_name != NULL) {
669 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
670 		if (error) {
671 			to = NULL;
672 			goto bad;
673 		}
674 		mp->msg_name = to;
675 	} else {
676 		to = NULL;
677 	}
678 
679 	if (mp->msg_control) {
680 		if (mp->msg_controllen < sizeof(struct cmsghdr)
681 #ifdef COMPAT_OLDSOCK
682 		    && mp->msg_flags != MSG_COMPAT
683 #endif
684 		) {
685 			error = EINVAL;
686 			goto bad;
687 		}
688 		error = sockargs(&control, mp->msg_control,
689 		    mp->msg_controllen, MT_CONTROL);
690 		if (error)
691 			goto bad;
692 #ifdef COMPAT_OLDSOCK
693 		if (mp->msg_flags == MSG_COMPAT) {
694 			register struct cmsghdr *cm;
695 
696 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
697 			if (control == 0) {
698 				error = ENOBUFS;
699 				goto bad;
700 			} else {
701 				cm = mtod(control, struct cmsghdr *);
702 				cm->cmsg_len = control->m_len;
703 				cm->cmsg_level = SOL_SOCKET;
704 				cm->cmsg_type = SCM_RIGHTS;
705 			}
706 		}
707 #endif
708 	} else {
709 		control = NULL;
710 	}
711 
712 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
713 
714 bad:
715 	if (to)
716 		FREE(to, M_SONAME);
717 	return (error);
718 }
719 
720 int
721 kern_sendit(td, s, mp, flags, control, segflg)
722 	struct thread *td;
723 	int s;
724 	struct msghdr *mp;
725 	int flags;
726 	struct mbuf *control;
727 	enum uio_seg segflg;
728 {
729 	struct file *fp;
730 	struct uio auio;
731 	struct iovec *iov;
732 	struct socket *so;
733 	int i;
734 	int len, error;
735 #ifdef KTRACE
736 	struct uio *ktruio = NULL;
737 #endif
738 
739 	NET_LOCK_GIANT();
740 	error = getsock(td->td_proc->p_fd, s, &fp);
741 	if (error)
742 		goto bad2;
743 	so = (struct socket *)fp->f_data;
744 
745 #ifdef MAC
746 	SOCK_LOCK(so);
747 	error = mac_check_socket_send(td->td_ucred, so);
748 	SOCK_UNLOCK(so);
749 	if (error)
750 		goto bad;
751 #endif
752 
753 	auio.uio_iov = mp->msg_iov;
754 	auio.uio_iovcnt = mp->msg_iovlen;
755 	auio.uio_segflg = segflg;
756 	auio.uio_rw = UIO_WRITE;
757 	auio.uio_td = td;
758 	auio.uio_offset = 0;			/* XXX */
759 	auio.uio_resid = 0;
760 	iov = mp->msg_iov;
761 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
762 		if ((auio.uio_resid += iov->iov_len) < 0) {
763 			error = EINVAL;
764 			goto bad;
765 		}
766 	}
767 #ifdef KTRACE
768 	if (KTRPOINT(td, KTR_GENIO))
769 		ktruio = cloneuio(&auio);
770 #endif
771 	len = auio.uio_resid;
772 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
773 	    0, control, flags, td);
774 	if (error) {
775 		if (auio.uio_resid != len && (error == ERESTART ||
776 		    error == EINTR || error == EWOULDBLOCK))
777 			error = 0;
778 		/* Generation of SIGPIPE can be controlled per socket */
779 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
780 		    !(flags & MSG_NOSIGNAL)) {
781 			PROC_LOCK(td->td_proc);
782 			psignal(td->td_proc, SIGPIPE);
783 			PROC_UNLOCK(td->td_proc);
784 		}
785 	}
786 	if (error == 0)
787 		td->td_retval[0] = len - auio.uio_resid;
788 #ifdef KTRACE
789 	if (ktruio != NULL) {
790 		ktruio->uio_resid = td->td_retval[0];
791 		ktrgenio(s, UIO_WRITE, ktruio, error);
792 	}
793 #endif
794 bad:
795 	fdrop(fp, td);
796 bad2:
797 	NET_UNLOCK_GIANT();
798 	return (error);
799 }
800 
801 /*
802  * MPSAFE
803  */
804 int
805 sendto(td, uap)
806 	struct thread *td;
807 	register struct sendto_args /* {
808 		int	s;
809 		caddr_t	buf;
810 		size_t	len;
811 		int	flags;
812 		caddr_t	to;
813 		int	tolen;
814 	} */ *uap;
815 {
816 	struct msghdr msg;
817 	struct iovec aiov;
818 	int error;
819 
820 	msg.msg_name = uap->to;
821 	msg.msg_namelen = uap->tolen;
822 	msg.msg_iov = &aiov;
823 	msg.msg_iovlen = 1;
824 	msg.msg_control = 0;
825 #ifdef COMPAT_OLDSOCK
826 	msg.msg_flags = 0;
827 #endif
828 	aiov.iov_base = uap->buf;
829 	aiov.iov_len = uap->len;
830 	error = sendit(td, uap->s, &msg, uap->flags);
831 	return (error);
832 }
833 
834 #ifdef COMPAT_OLDSOCK
835 /*
836  * MPSAFE
837  */
838 int
839 osend(td, uap)
840 	struct thread *td;
841 	register struct osend_args /* {
842 		int	s;
843 		caddr_t	buf;
844 		int	len;
845 		int	flags;
846 	} */ *uap;
847 {
848 	struct msghdr msg;
849 	struct iovec aiov;
850 	int error;
851 
852 	msg.msg_name = 0;
853 	msg.msg_namelen = 0;
854 	msg.msg_iov = &aiov;
855 	msg.msg_iovlen = 1;
856 	aiov.iov_base = uap->buf;
857 	aiov.iov_len = uap->len;
858 	msg.msg_control = 0;
859 	msg.msg_flags = 0;
860 	error = sendit(td, uap->s, &msg, uap->flags);
861 	return (error);
862 }
863 
864 /*
865  * MPSAFE
866  */
867 int
868 osendmsg(td, uap)
869 	struct thread *td;
870 	struct osendmsg_args /* {
871 		int	s;
872 		caddr_t	msg;
873 		int	flags;
874 	} */ *uap;
875 {
876 	struct msghdr msg;
877 	struct iovec *iov;
878 	int error;
879 
880 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
881 	if (error)
882 		return (error);
883 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
884 	if (error)
885 		return (error);
886 	msg.msg_iov = iov;
887 	msg.msg_flags = MSG_COMPAT;
888 	error = sendit(td, uap->s, &msg, uap->flags);
889 	free(iov, M_IOV);
890 	return (error);
891 }
892 #endif
893 
894 /*
895  * MPSAFE
896  */
897 int
898 sendmsg(td, uap)
899 	struct thread *td;
900 	struct sendmsg_args /* {
901 		int	s;
902 		caddr_t	msg;
903 		int	flags;
904 	} */ *uap;
905 {
906 	struct msghdr msg;
907 	struct iovec *iov;
908 	int error;
909 
910 	error = copyin(uap->msg, &msg, sizeof (msg));
911 	if (error)
912 		return (error);
913 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
914 	if (error)
915 		return (error);
916 	msg.msg_iov = iov;
917 #ifdef COMPAT_OLDSOCK
918 	msg.msg_flags = 0;
919 #endif
920 	error = sendit(td, uap->s, &msg, uap->flags);
921 	free(iov, M_IOV);
922 	return (error);
923 }
924 
925 static int
926 recvit(td, s, mp, namelenp)
927 	struct thread *td;
928 	int s;
929 	struct msghdr *mp;
930 	void *namelenp;
931 {
932 	struct uio auio;
933 	struct iovec *iov;
934 	int i;
935 	socklen_t len;
936 	int error;
937 	struct mbuf *m, *control = 0;
938 	caddr_t ctlbuf;
939 	struct file *fp;
940 	struct socket *so;
941 	struct sockaddr *fromsa = 0;
942 #ifdef KTRACE
943 	struct uio *ktruio = NULL;
944 #endif
945 
946 	NET_LOCK_GIANT();
947 	error = getsock(td->td_proc->p_fd, s, &fp);
948 	if (error) {
949 		NET_UNLOCK_GIANT();
950 		return (error);
951 	}
952 	so = fp->f_data;
953 
954 #ifdef MAC
955 	SOCK_LOCK(so);
956 	error = mac_check_socket_receive(td->td_ucred, so);
957 	SOCK_UNLOCK(so);
958 	if (error) {
959 		fdrop(fp, td);
960 		NET_UNLOCK_GIANT();
961 		return (error);
962 	}
963 #endif
964 
965 	auio.uio_iov = mp->msg_iov;
966 	auio.uio_iovcnt = mp->msg_iovlen;
967 	auio.uio_segflg = UIO_USERSPACE;
968 	auio.uio_rw = UIO_READ;
969 	auio.uio_td = td;
970 	auio.uio_offset = 0;			/* XXX */
971 	auio.uio_resid = 0;
972 	iov = mp->msg_iov;
973 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
974 		if ((auio.uio_resid += iov->iov_len) < 0) {
975 			fdrop(fp, td);
976 			NET_UNLOCK_GIANT();
977 			return (EINVAL);
978 		}
979 	}
980 #ifdef KTRACE
981 	if (KTRPOINT(td, KTR_GENIO))
982 		ktruio = cloneuio(&auio);
983 #endif
984 	len = auio.uio_resid;
985 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
986 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
987 	    &mp->msg_flags);
988 	if (error) {
989 		if (auio.uio_resid != (int)len && (error == ERESTART ||
990 		    error == EINTR || error == EWOULDBLOCK))
991 			error = 0;
992 	}
993 #ifdef KTRACE
994 	if (ktruio != NULL) {
995 		ktruio->uio_resid = (int)len - auio.uio_resid;
996 		ktrgenio(s, UIO_READ, ktruio, error);
997 	}
998 #endif
999 	if (error)
1000 		goto out;
1001 	td->td_retval[0] = (int)len - auio.uio_resid;
1002 	if (mp->msg_name) {
1003 		len = mp->msg_namelen;
1004 		if (len <= 0 || fromsa == 0)
1005 			len = 0;
1006 		else {
1007 			/* save sa_len before it is destroyed by MSG_COMPAT */
1008 			len = MIN(len, fromsa->sa_len);
1009 #ifdef COMPAT_OLDSOCK
1010 			if (mp->msg_flags & MSG_COMPAT)
1011 				((struct osockaddr *)fromsa)->sa_family =
1012 				    fromsa->sa_family;
1013 #endif
1014 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1015 			if (error)
1016 				goto out;
1017 		}
1018 		mp->msg_namelen = len;
1019 		if (namelenp &&
1020 		    (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1021 #ifdef COMPAT_OLDSOCK
1022 			if (mp->msg_flags & MSG_COMPAT)
1023 				error = 0;	/* old recvfrom didn't check */
1024 			else
1025 #endif
1026 			goto out;
1027 		}
1028 	}
1029 	if (mp->msg_control) {
1030 #ifdef COMPAT_OLDSOCK
1031 		/*
1032 		 * We assume that old recvmsg calls won't receive access
1033 		 * rights and other control info, esp. as control info
1034 		 * is always optional and those options didn't exist in 4.3.
1035 		 * If we receive rights, trim the cmsghdr; anything else
1036 		 * is tossed.
1037 		 */
1038 		if (control && mp->msg_flags & MSG_COMPAT) {
1039 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1040 			    SOL_SOCKET ||
1041 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1042 			    SCM_RIGHTS) {
1043 				mp->msg_controllen = 0;
1044 				goto out;
1045 			}
1046 			control->m_len -= sizeof (struct cmsghdr);
1047 			control->m_data += sizeof (struct cmsghdr);
1048 		}
1049 #endif
1050 		len = mp->msg_controllen;
1051 		m = control;
1052 		mp->msg_controllen = 0;
1053 		ctlbuf = mp->msg_control;
1054 
1055 		while (m && len > 0) {
1056 			unsigned int tocopy;
1057 
1058 			if (len >= m->m_len)
1059 				tocopy = m->m_len;
1060 			else {
1061 				mp->msg_flags |= MSG_CTRUNC;
1062 				tocopy = len;
1063 			}
1064 
1065 			if ((error = copyout(mtod(m, caddr_t),
1066 					ctlbuf, tocopy)) != 0)
1067 				goto out;
1068 
1069 			ctlbuf += tocopy;
1070 			len -= tocopy;
1071 			m = m->m_next;
1072 		}
1073 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1074 	}
1075 out:
1076 	fdrop(fp, td);
1077 	NET_UNLOCK_GIANT();
1078 	if (fromsa)
1079 		FREE(fromsa, M_SONAME);
1080 	if (control)
1081 		m_freem(control);
1082 	return (error);
1083 }
1084 
1085 /*
1086  * MPSAFE
1087  */
1088 int
1089 recvfrom(td, uap)
1090 	struct thread *td;
1091 	register struct recvfrom_args /* {
1092 		int	s;
1093 		caddr_t	buf;
1094 		size_t	len;
1095 		int	flags;
1096 		struct sockaddr * __restrict	from;
1097 		socklen_t * __restrict fromlenaddr;
1098 	} */ *uap;
1099 {
1100 	struct msghdr msg;
1101 	struct iovec aiov;
1102 	int error;
1103 
1104 	if (uap->fromlenaddr) {
1105 		error = copyin(uap->fromlenaddr,
1106 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1107 		if (error)
1108 			goto done2;
1109 	} else {
1110 		msg.msg_namelen = 0;
1111 	}
1112 	msg.msg_name = uap->from;
1113 	msg.msg_iov = &aiov;
1114 	msg.msg_iovlen = 1;
1115 	aiov.iov_base = uap->buf;
1116 	aiov.iov_len = uap->len;
1117 	msg.msg_control = 0;
1118 	msg.msg_flags = uap->flags;
1119 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1120 done2:
1121 	return(error);
1122 }
1123 
1124 #ifdef COMPAT_OLDSOCK
1125 /*
1126  * MPSAFE
1127  */
1128 int
1129 orecvfrom(td, uap)
1130 	struct thread *td;
1131 	struct recvfrom_args *uap;
1132 {
1133 
1134 	uap->flags |= MSG_COMPAT;
1135 	return (recvfrom(td, uap));
1136 }
1137 #endif
1138 
1139 
1140 #ifdef COMPAT_OLDSOCK
1141 /*
1142  * MPSAFE
1143  */
1144 int
1145 orecv(td, uap)
1146 	struct thread *td;
1147 	register struct orecv_args /* {
1148 		int	s;
1149 		caddr_t	buf;
1150 		int	len;
1151 		int	flags;
1152 	} */ *uap;
1153 {
1154 	struct msghdr msg;
1155 	struct iovec aiov;
1156 	int error;
1157 
1158 	msg.msg_name = 0;
1159 	msg.msg_namelen = 0;
1160 	msg.msg_iov = &aiov;
1161 	msg.msg_iovlen = 1;
1162 	aiov.iov_base = uap->buf;
1163 	aiov.iov_len = uap->len;
1164 	msg.msg_control = 0;
1165 	msg.msg_flags = uap->flags;
1166 	error = recvit(td, uap->s, &msg, NULL);
1167 	return (error);
1168 }
1169 
1170 /*
1171  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1172  * overlays the new one, missing only the flags, and with the (old) access
1173  * rights where the control fields are now.
1174  *
1175  * MPSAFE
1176  */
1177 int
1178 orecvmsg(td, uap)
1179 	struct thread *td;
1180 	struct orecvmsg_args /* {
1181 		int	s;
1182 		struct	omsghdr *msg;
1183 		int	flags;
1184 	} */ *uap;
1185 {
1186 	struct msghdr msg;
1187 	struct iovec *iov;
1188 	int error;
1189 
1190 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1191 	if (error)
1192 		return (error);
1193 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1194 	if (error)
1195 		return (error);
1196 	msg.msg_flags = uap->flags | MSG_COMPAT;
1197 	msg.msg_iov = iov;
1198 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1199 	if (msg.msg_controllen && error == 0)
1200 		error = copyout(&msg.msg_controllen,
1201 		    &uap->msg->msg_accrightslen, sizeof (int));
1202 	free(iov, M_IOV);
1203 	return (error);
1204 }
1205 #endif
1206 
1207 /*
1208  * MPSAFE
1209  */
1210 int
1211 recvmsg(td, uap)
1212 	struct thread *td;
1213 	struct recvmsg_args /* {
1214 		int	s;
1215 		struct	msghdr *msg;
1216 		int	flags;
1217 	} */ *uap;
1218 {
1219 	struct msghdr msg;
1220 	struct iovec *uiov, *iov;
1221 	int error;
1222 
1223 	error = copyin(uap->msg, &msg, sizeof (msg));
1224 	if (error)
1225 		return (error);
1226 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1227 	if (error)
1228 		return (error);
1229 	msg.msg_flags = uap->flags;
1230 #ifdef COMPAT_OLDSOCK
1231 	msg.msg_flags &= ~MSG_COMPAT;
1232 #endif
1233 	uiov = msg.msg_iov;
1234 	msg.msg_iov = iov;
1235 	error = recvit(td, uap->s, &msg, NULL);
1236 	if (error == 0) {
1237 		msg.msg_iov = uiov;
1238 		error = copyout(&msg, uap->msg, sizeof(msg));
1239 	}
1240 	free(iov, M_IOV);
1241 	return (error);
1242 }
1243 
1244 /*
1245  * MPSAFE
1246  */
1247 /* ARGSUSED */
1248 int
1249 shutdown(td, uap)
1250 	struct thread *td;
1251 	register struct shutdown_args /* {
1252 		int	s;
1253 		int	how;
1254 	} */ *uap;
1255 {
1256 	struct socket *so;
1257 	struct file *fp;
1258 	int error;
1259 
1260 	NET_LOCK_GIANT();
1261 	error = getsock(td->td_proc->p_fd, uap->s, &fp);
1262 	if (error == 0) {
1263 		so = fp->f_data;
1264 		error = soshutdown(so, uap->how);
1265 		fdrop(fp, td);
1266 	}
1267 	NET_UNLOCK_GIANT();
1268 	return (error);
1269 }
1270 
1271 /*
1272  * MPSAFE
1273  */
1274 /* ARGSUSED */
1275 int
1276 setsockopt(td, uap)
1277 	struct thread *td;
1278 	register struct setsockopt_args /* {
1279 		int	s;
1280 		int	level;
1281 		int	name;
1282 		caddr_t	val;
1283 		int	valsize;
1284 	} */ *uap;
1285 {
1286 
1287 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1288 	    uap->val, UIO_USERSPACE, uap->valsize));
1289 }
1290 
1291 int
1292 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1293 	struct thread *td;
1294 	int s;
1295 	int level;
1296 	int name;
1297 	void *val;
1298 	enum uio_seg valseg;
1299 	socklen_t valsize;
1300 {
1301 	int error;
1302 	struct socket *so;
1303 	struct file *fp;
1304 	struct sockopt sopt;
1305 
1306 	if (val == NULL && valsize != 0)
1307 		return (EFAULT);
1308 	if (valsize < 0)
1309 		return (EINVAL);
1310 
1311 	sopt.sopt_dir = SOPT_SET;
1312 	sopt.sopt_level = level;
1313 	sopt.sopt_name = name;
1314 	sopt.sopt_val = val;
1315 	sopt.sopt_valsize = valsize;
1316 	switch (valseg) {
1317 	case UIO_USERSPACE:
1318 		sopt.sopt_td = td;
1319 		break;
1320 	case UIO_SYSSPACE:
1321 		sopt.sopt_td = NULL;
1322 		break;
1323 	default:
1324 		panic("kern_setsockopt called with bad valseg");
1325 	}
1326 
1327 	NET_LOCK_GIANT();
1328 	error = getsock(td->td_proc->p_fd, s, &fp);
1329 	if (error == 0) {
1330 		so = fp->f_data;
1331 		error = sosetopt(so, &sopt);
1332 		fdrop(fp, td);
1333 	}
1334 	NET_UNLOCK_GIANT();
1335 	return(error);
1336 }
1337 
1338 /*
1339  * MPSAFE
1340  */
1341 /* ARGSUSED */
1342 int
1343 getsockopt(td, uap)
1344 	struct thread *td;
1345 	register struct getsockopt_args /* {
1346 		int	s;
1347 		int	level;
1348 		int	name;
1349 		void * __restrict	val;
1350 		socklen_t * __restrict avalsize;
1351 	} */ *uap;
1352 {
1353 	socklen_t valsize;
1354 	int	error;
1355 
1356 	if (uap->val) {
1357 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1358 		if (error)
1359 			return (error);
1360 	}
1361 
1362 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1363 	    uap->val, UIO_USERSPACE, &valsize);
1364 
1365 	if (error == 0)
1366 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1367 	return (error);
1368 }
1369 
1370 /*
1371  * Kernel version of getsockopt.
1372  * optval can be a userland or userspace. optlen is always a kernel pointer.
1373  */
1374 int
1375 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1376 	struct thread *td;
1377 	int s;
1378 	int level;
1379 	int name;
1380 	void *val;
1381 	enum uio_seg valseg;
1382 	socklen_t *valsize;
1383 {
1384 	int error;
1385 	struct  socket *so;
1386 	struct file *fp;
1387 	struct	sockopt sopt;
1388 
1389 	if (val == NULL)
1390 		*valsize = 0;
1391 	if (*valsize < 0)
1392 		return (EINVAL);
1393 
1394 	sopt.sopt_dir = SOPT_GET;
1395 	sopt.sopt_level = level;
1396 	sopt.sopt_name = name;
1397 	sopt.sopt_val = val;
1398 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1399 	switch (valseg) {
1400 	case UIO_USERSPACE:
1401 		sopt.sopt_td = td;
1402 		break;
1403 	case UIO_SYSSPACE:
1404 		sopt.sopt_td = NULL;
1405 		break;
1406 	default:
1407 		panic("kern_getsockopt called with bad valseg");
1408 	}
1409 
1410 	NET_LOCK_GIANT();
1411 	error = getsock(td->td_proc->p_fd, s, &fp);
1412 	if (error == 0) {
1413 		so = fp->f_data;
1414 		error = sogetopt(so, &sopt);
1415 		*valsize = sopt.sopt_valsize;
1416 		fdrop(fp, td);
1417 	}
1418 	NET_UNLOCK_GIANT();
1419 	return (error);
1420 }
1421 
1422 /*
1423  * getsockname1() - Get socket name.
1424  *
1425  * MPSAFE
1426  */
1427 /* ARGSUSED */
1428 static int
1429 getsockname1(td, uap, compat)
1430 	struct thread *td;
1431 	register struct getsockname_args /* {
1432 		int	fdes;
1433 		struct sockaddr * __restrict asa;
1434 		socklen_t * __restrict alen;
1435 	} */ *uap;
1436 	int compat;
1437 {
1438 	struct socket *so;
1439 	struct sockaddr *sa;
1440 	struct file *fp;
1441 	socklen_t len;
1442 	int error;
1443 
1444 	NET_LOCK_GIANT();
1445 	error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1446 	if (error)
1447 		goto done2;
1448 	so = fp->f_data;
1449 	error = copyin(uap->alen, &len, sizeof (len));
1450 	if (error)
1451 		goto done1;
1452 	if (len < 0) {
1453 		error = EINVAL;
1454 		goto done1;
1455 	}
1456 	sa = 0;
1457 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1458 	if (error)
1459 		goto bad;
1460 	if (sa == 0) {
1461 		len = 0;
1462 		goto gotnothing;
1463 	}
1464 
1465 	len = MIN(len, sa->sa_len);
1466 #ifdef COMPAT_OLDSOCK
1467 	if (compat)
1468 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1469 #endif
1470 	error = copyout(sa, uap->asa, (u_int)len);
1471 	if (error == 0)
1472 gotnothing:
1473 		error = copyout(&len, uap->alen, sizeof (len));
1474 bad:
1475 	if (sa)
1476 		FREE(sa, M_SONAME);
1477 done1:
1478 	fdrop(fp, td);
1479 done2:
1480 	NET_UNLOCK_GIANT();
1481 	return (error);
1482 }
1483 
1484 /*
1485  * MPSAFE
1486  */
1487 int
1488 getsockname(td, uap)
1489 	struct thread *td;
1490 	struct getsockname_args *uap;
1491 {
1492 
1493 	return (getsockname1(td, uap, 0));
1494 }
1495 
1496 #ifdef COMPAT_OLDSOCK
1497 /*
1498  * MPSAFE
1499  */
1500 int
1501 ogetsockname(td, uap)
1502 	struct thread *td;
1503 	struct getsockname_args *uap;
1504 {
1505 
1506 	return (getsockname1(td, uap, 1));
1507 }
1508 #endif /* COMPAT_OLDSOCK */
1509 
1510 /*
1511  * getpeername1() - Get name of peer for connected socket.
1512  *
1513  * MPSAFE
1514  */
1515 /* ARGSUSED */
1516 static int
1517 getpeername1(td, uap, compat)
1518 	struct thread *td;
1519 	register struct getpeername_args /* {
1520 		int	fdes;
1521 		struct sockaddr * __restrict	asa;
1522 		socklen_t * __restrict	alen;
1523 	} */ *uap;
1524 	int compat;
1525 {
1526 	struct socket *so;
1527 	struct sockaddr *sa;
1528 	struct file *fp;
1529 	socklen_t len;
1530 	int error;
1531 
1532 	NET_LOCK_GIANT();
1533 	error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1534 	if (error)
1535 		goto done2;
1536 	so = fp->f_data;
1537 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1538 		error = ENOTCONN;
1539 		goto done1;
1540 	}
1541 	error = copyin(uap->alen, &len, sizeof (len));
1542 	if (error)
1543 		goto done1;
1544 	if (len < 0) {
1545 		error = EINVAL;
1546 		goto done1;
1547 	}
1548 	sa = 0;
1549 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1550 	if (error)
1551 		goto bad;
1552 	if (sa == 0) {
1553 		len = 0;
1554 		goto gotnothing;
1555 	}
1556 	len = MIN(len, sa->sa_len);
1557 #ifdef COMPAT_OLDSOCK
1558 	if (compat)
1559 		((struct osockaddr *)sa)->sa_family =
1560 		    sa->sa_family;
1561 #endif
1562 	error = copyout(sa, uap->asa, (u_int)len);
1563 	if (error)
1564 		goto bad;
1565 gotnothing:
1566 	error = copyout(&len, uap->alen, sizeof (len));
1567 bad:
1568 	if (sa)
1569 		FREE(sa, M_SONAME);
1570 done1:
1571 	fdrop(fp, td);
1572 done2:
1573 	NET_UNLOCK_GIANT();
1574 	return (error);
1575 }
1576 
1577 /*
1578  * MPSAFE
1579  */
1580 int
1581 getpeername(td, uap)
1582 	struct thread *td;
1583 	struct getpeername_args *uap;
1584 {
1585 
1586 	return (getpeername1(td, uap, 0));
1587 }
1588 
1589 #ifdef COMPAT_OLDSOCK
1590 /*
1591  * MPSAFE
1592  */
1593 int
1594 ogetpeername(td, uap)
1595 	struct thread *td;
1596 	struct ogetpeername_args *uap;
1597 {
1598 
1599 	/* XXX uap should have type `getpeername_args *' to begin with. */
1600 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1601 }
1602 #endif /* COMPAT_OLDSOCK */
1603 
1604 int
1605 sockargs(mp, buf, buflen, type)
1606 	struct mbuf **mp;
1607 	caddr_t buf;
1608 	int buflen, type;
1609 {
1610 	register struct sockaddr *sa;
1611 	register struct mbuf *m;
1612 	int error;
1613 
1614 	if ((u_int)buflen > MLEN) {
1615 #ifdef COMPAT_OLDSOCK
1616 		if (type == MT_SONAME && (u_int)buflen <= 112)
1617 			buflen = MLEN;		/* unix domain compat. hack */
1618 		else
1619 #endif
1620 			if ((u_int)buflen > MCLBYTES)
1621 				return (EINVAL);
1622 	}
1623 	m = m_get(M_TRYWAIT, type);
1624 	if (m == NULL)
1625 		return (ENOBUFS);
1626 	if ((u_int)buflen > MLEN) {
1627 		MCLGET(m, M_TRYWAIT);
1628 		if ((m->m_flags & M_EXT) == 0) {
1629 			m_free(m);
1630 			return (ENOBUFS);
1631 		}
1632 	}
1633 	m->m_len = buflen;
1634 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1635 	if (error)
1636 		(void) m_free(m);
1637 	else {
1638 		*mp = m;
1639 		if (type == MT_SONAME) {
1640 			sa = mtod(m, struct sockaddr *);
1641 
1642 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1643 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1644 				sa->sa_family = sa->sa_len;
1645 #endif
1646 			sa->sa_len = buflen;
1647 		}
1648 	}
1649 	return (error);
1650 }
1651 
1652 int
1653 getsockaddr(namp, uaddr, len)
1654 	struct sockaddr **namp;
1655 	caddr_t uaddr;
1656 	size_t len;
1657 {
1658 	struct sockaddr *sa;
1659 	int error;
1660 
1661 	if (len > SOCK_MAXADDRLEN)
1662 		return (ENAMETOOLONG);
1663 	if (len < offsetof(struct sockaddr, sa_data[0]))
1664 		return (EINVAL);
1665 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1666 	error = copyin(uaddr, sa, len);
1667 	if (error) {
1668 		FREE(sa, M_SONAME);
1669 	} else {
1670 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1671 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1672 			sa->sa_family = sa->sa_len;
1673 #endif
1674 		sa->sa_len = len;
1675 		*namp = sa;
1676 	}
1677 	return (error);
1678 }
1679 
1680 /*
1681  * Detach mapped page and release resources back to the system.
1682  */
1683 void
1684 sf_buf_mext(void *addr, void *args)
1685 {
1686 	vm_page_t m;
1687 
1688 	m = sf_buf_page(args);
1689 	sf_buf_free(args);
1690 	vm_page_lock_queues();
1691 	vm_page_unwire(m, 0);
1692 	/*
1693 	 * Check for the object going away on us. This can
1694 	 * happen since we don't hold a reference to it.
1695 	 * If so, we're responsible for freeing the page.
1696 	 */
1697 	if (m->wire_count == 0 && m->object == NULL)
1698 		vm_page_free(m);
1699 	vm_page_unlock_queues();
1700 }
1701 
1702 /*
1703  * sendfile(2)
1704  *
1705  * MPSAFE
1706  *
1707  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1708  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1709  *
1710  * Send a file specified by 'fd' and starting at 'offset' to a socket
1711  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1712  * nbytes == 0. Optionally add a header and/or trailer to the socket
1713  * output. If specified, write the total number of bytes sent into *sbytes.
1714  *
1715  */
1716 int
1717 sendfile(struct thread *td, struct sendfile_args *uap)
1718 {
1719 
1720 	return (do_sendfile(td, uap, 0));
1721 }
1722 
1723 #ifdef COMPAT_FREEBSD4
1724 int
1725 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1726 {
1727 	struct sendfile_args args;
1728 
1729 	args.fd = uap->fd;
1730 	args.s = uap->s;
1731 	args.offset = uap->offset;
1732 	args.nbytes = uap->nbytes;
1733 	args.hdtr = uap->hdtr;
1734 	args.sbytes = uap->sbytes;
1735 	args.flags = uap->flags;
1736 
1737 	return (do_sendfile(td, &args, 1));
1738 }
1739 #endif /* COMPAT_FREEBSD4 */
1740 
1741 static int
1742 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1743 {
1744 	struct vnode *vp;
1745 	struct vm_object *obj;
1746 	struct socket *so = NULL;
1747 	struct mbuf *m, *m_header = NULL;
1748 	struct sf_buf *sf;
1749 	struct vm_page *pg;
1750 	struct writev_args nuap;
1751 	struct sf_hdtr hdtr;
1752 	struct uio *hdr_uio = NULL;
1753 	off_t off, xfsize, hdtr_size, sbytes = 0;
1754 	int error, headersize = 0, headersent = 0;
1755 
1756 	mtx_lock(&Giant);
1757 
1758 	hdtr_size = 0;
1759 
1760 	/*
1761 	 * The descriptor must be a regular file and have a backing VM object.
1762 	 */
1763 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1764 		goto done;
1765 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1766 	obj = vp->v_object;
1767 	VOP_UNLOCK(vp, 0, td);
1768 	if (obj == NULL) {
1769 		error = EINVAL;
1770 		goto done;
1771 	}
1772 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1773 		goto done;
1774 	if (so->so_type != SOCK_STREAM) {
1775 		error = EINVAL;
1776 		goto done;
1777 	}
1778 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1779 		error = ENOTCONN;
1780 		goto done;
1781 	}
1782 	if (uap->offset < 0) {
1783 		error = EINVAL;
1784 		goto done;
1785 	}
1786 
1787 #ifdef MAC
1788 	SOCK_LOCK(so);
1789 	error = mac_check_socket_send(td->td_ucred, so);
1790 	SOCK_UNLOCK(so);
1791 	if (error)
1792 		goto done;
1793 #endif
1794 
1795 	/*
1796 	 * If specified, get the pointer to the sf_hdtr struct for
1797 	 * any headers/trailers.
1798 	 */
1799 	if (uap->hdtr != NULL) {
1800 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1801 		if (error)
1802 			goto done;
1803 		/*
1804 		 * Send any headers.
1805 		 */
1806 		if (hdtr.headers != NULL) {
1807 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1808 			if (error)
1809 				goto done;
1810 			hdr_uio->uio_td = td;
1811 			hdr_uio->uio_rw = UIO_WRITE;
1812 			if (hdr_uio->uio_resid > 0) {
1813 				m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1814 				if (m_header == NULL)
1815 					goto done;
1816 				headersize = m_header->m_pkthdr.len;
1817 				if (compat)
1818 					sbytes += headersize;
1819 			}
1820 		}
1821 	}
1822 
1823 	/*
1824 	 * Protect against multiple writers to the socket.
1825 	 */
1826 	SOCKBUF_LOCK(&so->so_snd);
1827 	(void) sblock(&so->so_snd, M_WAITOK);
1828 	SOCKBUF_UNLOCK(&so->so_snd);
1829 
1830 	/*
1831 	 * Loop through the pages in the file, starting with the requested
1832 	 * offset. Get a file page (do I/O if necessary), map the file page
1833 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1834 	 * it on the socket.
1835 	 */
1836 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1837 		vm_pindex_t pindex;
1838 		vm_offset_t pgoff;
1839 
1840 		pindex = OFF_TO_IDX(off);
1841 		VM_OBJECT_LOCK(obj);
1842 retry_lookup:
1843 		/*
1844 		 * Calculate the amount to transfer. Not to exceed a page,
1845 		 * the EOF, or the passed in nbytes.
1846 		 */
1847 		xfsize = obj->un_pager.vnp.vnp_size - off;
1848 		VM_OBJECT_UNLOCK(obj);
1849 		if (xfsize > PAGE_SIZE)
1850 			xfsize = PAGE_SIZE;
1851 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1852 		if (PAGE_SIZE - pgoff < xfsize)
1853 			xfsize = PAGE_SIZE - pgoff;
1854 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1855 			xfsize = uap->nbytes - sbytes;
1856 		if (xfsize <= 0) {
1857 			if (m_header != NULL) {
1858 				m = m_header;
1859 				m_header = NULL;
1860 				SOCKBUF_LOCK(&so->so_snd);
1861 				goto retry_space;
1862 			} else
1863 				break;
1864 		}
1865 		/*
1866 		 * Optimize the non-blocking case by looking at the socket space
1867 		 * before going to the extra work of constituting the sf_buf.
1868 		 */
1869 		SOCKBUF_LOCK(&so->so_snd);
1870 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1871 			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1872 				error = EPIPE;
1873 			else
1874 				error = EAGAIN;
1875 			sbunlock(&so->so_snd);
1876 			SOCKBUF_UNLOCK(&so->so_snd);
1877 			goto done;
1878 		}
1879 		SOCKBUF_UNLOCK(&so->so_snd);
1880 		VM_OBJECT_LOCK(obj);
1881 		/*
1882 		 * Attempt to look up the page.
1883 		 *
1884 		 *	Allocate if not found
1885 		 *
1886 		 *	Wait and loop if busy.
1887 		 */
1888 		pg = vm_page_lookup(obj, pindex);
1889 
1890 		if (pg == NULL) {
1891 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1892 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1893 			if (pg == NULL) {
1894 				VM_OBJECT_UNLOCK(obj);
1895 				VM_WAIT;
1896 				VM_OBJECT_LOCK(obj);
1897 				goto retry_lookup;
1898 			}
1899 			vm_page_lock_queues();
1900 		} else {
1901 			vm_page_lock_queues();
1902 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1903 				goto retry_lookup;
1904 			/*
1905 			 * Wire the page so it does not get ripped out from
1906 			 * under us.
1907 			 */
1908 			vm_page_wire(pg);
1909 		}
1910 
1911 		/*
1912 		 * If page is not valid for what we need, initiate I/O
1913 		 */
1914 
1915 		if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1916 			VM_OBJECT_UNLOCK(obj);
1917 		} else if (uap->flags & SF_NODISKIO) {
1918 			error = EBUSY;
1919 		} else {
1920 			int bsize, resid;
1921 
1922 			/*
1923 			 * Ensure that our page is still around when the I/O
1924 			 * completes.
1925 			 */
1926 			vm_page_io_start(pg);
1927 			vm_page_unlock_queues();
1928 			VM_OBJECT_UNLOCK(obj);
1929 
1930 			/*
1931 			 * Get the page from backing store.
1932 			 */
1933 			bsize = vp->v_mount->mnt_stat.f_iosize;
1934 			vn_lock(vp, LK_SHARED | LK_RETRY, td);
1935 			/*
1936 			 * XXXMAC: Because we don't have fp->f_cred here,
1937 			 * we pass in NOCRED.  This is probably wrong, but
1938 			 * is consistent with our original implementation.
1939 			 */
1940 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1941 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1942 			    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1943 			    td->td_ucred, NOCRED, &resid, td);
1944 			VOP_UNLOCK(vp, 0, td);
1945 			VM_OBJECT_LOCK(obj);
1946 			vm_page_lock_queues();
1947 			vm_page_io_finish(pg);
1948 			if (!error)
1949 				VM_OBJECT_UNLOCK(obj);
1950 			mbstat.sf_iocnt++;
1951 		}
1952 
1953 		if (error) {
1954 			vm_page_unwire(pg, 0);
1955 			/*
1956 			 * See if anyone else might know about this page.
1957 			 * If not and it is not valid, then free it.
1958 			 */
1959 			if (pg->wire_count == 0 && pg->valid == 0 &&
1960 			    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1961 			    pg->hold_count == 0) {
1962 				vm_page_free(pg);
1963 			}
1964 			vm_page_unlock_queues();
1965 			VM_OBJECT_UNLOCK(obj);
1966 			SOCKBUF_LOCK(&so->so_snd);
1967 			sbunlock(&so->so_snd);
1968 			SOCKBUF_UNLOCK(&so->so_snd);
1969 			goto done;
1970 		}
1971 		vm_page_unlock_queues();
1972 
1973 		/*
1974 		 * Get a sendfile buf. We usually wait as long as necessary,
1975 		 * but this wait can be interrupted.
1976 		 */
1977 		if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
1978 			mbstat.sf_allocfail++;
1979 			vm_page_lock_queues();
1980 			vm_page_unwire(pg, 0);
1981 			if (pg->wire_count == 0 && pg->object == NULL)
1982 				vm_page_free(pg);
1983 			vm_page_unlock_queues();
1984 			SOCKBUF_LOCK(&so->so_snd);
1985 			sbunlock(&so->so_snd);
1986 			SOCKBUF_UNLOCK(&so->so_snd);
1987 			error = EINTR;
1988 			goto done;
1989 		}
1990 
1991 		/*
1992 		 * Get an mbuf header and set it up as having external storage.
1993 		 */
1994 		if (m_header)
1995 			MGET(m, M_TRYWAIT, MT_DATA);
1996 		else
1997 			MGETHDR(m, M_TRYWAIT, MT_DATA);
1998 		if (m == NULL) {
1999 			error = ENOBUFS;
2000 			sf_buf_mext((void *)sf_buf_kva(sf), sf);
2001 			SOCKBUF_LOCK(&so->so_snd);
2002 			sbunlock(&so->so_snd);
2003 			SOCKBUF_UNLOCK(&so->so_snd);
2004 			goto done;
2005 		}
2006 		/*
2007 		 * Setup external storage for mbuf.
2008 		 */
2009 		MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2010 		    EXT_SFBUF);
2011 		m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2012 		m->m_pkthdr.len = m->m_len = xfsize;
2013 
2014 		if (m_header) {
2015 			m_cat(m_header, m);
2016 			m = m_header;
2017 			m_header = NULL;
2018 			m_fixhdr(m);
2019 		}
2020 
2021 		/*
2022 		 * Add the buffer to the socket buffer chain.
2023 		 */
2024 		SOCKBUF_LOCK(&so->so_snd);
2025 retry_space:
2026 		/*
2027 		 * Make sure that the socket is still able to take more data.
2028 		 * CANTSENDMORE being true usually means that the connection
2029 		 * was closed. so_error is true when an error was sensed after
2030 		 * a previous send.
2031 		 * The state is checked after the page mapping and buffer
2032 		 * allocation above since those operations may block and make
2033 		 * any socket checks stale. From this point forward, nothing
2034 		 * blocks before the pru_send (or more accurately, any blocking
2035 		 * results in a loop back to here to re-check).
2036 		 */
2037 		SOCKBUF_LOCK_ASSERT(&so->so_snd);
2038 		if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2039 			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2040 				error = EPIPE;
2041 			} else {
2042 				error = so->so_error;
2043 				so->so_error = 0;
2044 			}
2045 			m_freem(m);
2046 			sbunlock(&so->so_snd);
2047 			SOCKBUF_UNLOCK(&so->so_snd);
2048 			goto done;
2049 		}
2050 		/*
2051 		 * Wait for socket space to become available. We do this just
2052 		 * after checking the connection state above in order to avoid
2053 		 * a race condition with sbwait().
2054 		 */
2055 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2056 			if (so->so_state & SS_NBIO) {
2057 				m_freem(m);
2058 				sbunlock(&so->so_snd);
2059 				SOCKBUF_UNLOCK(&so->so_snd);
2060 				error = EAGAIN;
2061 				goto done;
2062 			}
2063 			error = sbwait(&so->so_snd);
2064 			/*
2065 			 * An error from sbwait usually indicates that we've
2066 			 * been interrupted by a signal. If we've sent anything
2067 			 * then return bytes sent, otherwise return the error.
2068 			 */
2069 			if (error) {
2070 				m_freem(m);
2071 				sbunlock(&so->so_snd);
2072 				SOCKBUF_UNLOCK(&so->so_snd);
2073 				goto done;
2074 			}
2075 			goto retry_space;
2076 		}
2077 		SOCKBUF_UNLOCK(&so->so_snd);
2078 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2079 		if (error) {
2080 			SOCKBUF_LOCK(&so->so_snd);
2081 			sbunlock(&so->so_snd);
2082 			SOCKBUF_UNLOCK(&so->so_snd);
2083 			goto done;
2084 		}
2085 		headersent = 1;
2086 	}
2087 	SOCKBUF_LOCK(&so->so_snd);
2088 	sbunlock(&so->so_snd);
2089 	SOCKBUF_UNLOCK(&so->so_snd);
2090 
2091 	/*
2092 	 * Send trailers. Wimp out and use writev(2).
2093 	 */
2094 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2095 			nuap.fd = uap->s;
2096 			nuap.iovp = hdtr.trailers;
2097 			nuap.iovcnt = hdtr.trl_cnt;
2098 			error = writev(td, &nuap);
2099 			if (error)
2100 				goto done;
2101 			if (compat)
2102 				sbytes += td->td_retval[0];
2103 			else
2104 				hdtr_size += td->td_retval[0];
2105 	}
2106 
2107 done:
2108 	if (headersent) {
2109 		if (!compat)
2110 			hdtr_size += headersize;
2111 	} else {
2112 		if (compat)
2113 			sbytes -= headersize;
2114 	}
2115 	/*
2116 	 * If there was no error we have to clear td->td_retval[0]
2117 	 * because it may have been set by writev.
2118 	 */
2119 	if (error == 0) {
2120 		td->td_retval[0] = 0;
2121 	}
2122 	if (uap->sbytes != NULL) {
2123 		if (!compat)
2124 			sbytes += hdtr_size;
2125 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2126 	}
2127 	if (vp)
2128 		vrele(vp);
2129 	if (so)
2130 		fputsock(so);
2131 	if (hdr_uio != NULL)
2132 		free(hdr_uio, M_IOV);
2133 	if (m_header)
2134 		m_freem(m_header);
2135 
2136 	mtx_unlock(&Giant);
2137 
2138 	if (error == ERESTART)
2139 		error = EINTR;
2140 
2141 	return (error);
2142 }
2143