xref: /freebsd/sys/kern/uipc_syscalls.c (revision acd3428b7d3e94cef0e1881c868cb4b131d4ff41)
1 /*-
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_sctp.h"
39 #include "opt_compat.h"
40 #include "opt_ktrace.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <security/mac/mac_framework.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_pageout.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_extern.h>
79 
80 #ifdef SCTP
81 #include <netinet/sctp.h>
82 #include <netinet/sctp_peeloff.h>
83 #endif /* SCTP */
84 
85 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87 
88 static int accept1(struct thread *td, struct accept_args *uap, int compat);
89 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90 static int getsockname1(struct thread *td, struct getsockname_args *uap,
91 			int compat);
92 static int getpeername1(struct thread *td, struct getpeername_args *uap,
93 			int compat);
94 
95 /*
96  * NSFBUFS-related variables and associated sysctls
97  */
98 int nsfbufs;
99 int nsfbufspeak;
100 int nsfbufsused;
101 
102 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103     "Maximum number of sendfile(2) sf_bufs available");
104 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105     "Number of sendfile(2) sf_bufs at peak usage");
106 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107     "Number of sendfile(2) sf_bufs in use");
108 
109 /*
110  * Convert a user file descriptor to a kernel file entry.  A reference on the
111  * file entry is held upon returning.  This is lighter weight than
112  * fgetsock(), which bumps the socket reference drops the file reference
113  * count instead, as this approach avoids several additional mutex operations
114  * associated with the additional reference count.  If requested, return the
115  * open file flags.
116  */
117 static int
118 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119 {
120 	struct file *fp;
121 	int error;
122 
123 	fp = NULL;
124 	if (fdp == NULL)
125 		error = EBADF;
126 	else {
127 		FILEDESC_LOCK_FAST(fdp);
128 		fp = fget_locked(fdp, fd);
129 		if (fp == NULL)
130 			error = EBADF;
131 		else if (fp->f_type != DTYPE_SOCKET) {
132 			fp = NULL;
133 			error = ENOTSOCK;
134 		} else {
135 			fhold(fp);
136 			if (fflagp != NULL)
137 				*fflagp = fp->f_flag;
138 			error = 0;
139 		}
140 		FILEDESC_UNLOCK_FAST(fdp);
141 	}
142 	*fpp = fp;
143 	return (error);
144 }
145 
146 /*
147  * System call interface to the socket abstraction.
148  */
149 #if defined(COMPAT_43)
150 #define COMPAT_OLDSOCK
151 #endif
152 
153 /*
154  * MPSAFE
155  */
156 int
157 socket(td, uap)
158 	struct thread *td;
159 	register struct socket_args /* {
160 		int	domain;
161 		int	type;
162 		int	protocol;
163 	} */ *uap;
164 {
165 	struct filedesc *fdp;
166 	struct socket *so;
167 	struct file *fp;
168 	int fd, error;
169 
170 #ifdef MAC
171 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
172 	    uap->protocol);
173 	if (error)
174 		return (error);
175 #endif
176 	fdp = td->td_proc->p_fd;
177 	error = falloc(td, &fp, &fd);
178 	if (error)
179 		return (error);
180 	/* An extra reference on `fp' has been held for us by falloc(). */
181 	NET_LOCK_GIANT();
182 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
183 	    td->td_ucred, td);
184 	NET_UNLOCK_GIANT();
185 	if (error) {
186 		fdclose(fdp, fp, fd, td);
187 	} else {
188 		FILEDESC_LOCK_FAST(fdp);
189 		fp->f_data = so;	/* already has ref count */
190 		fp->f_flag = FREAD|FWRITE;
191 		fp->f_ops = &socketops;
192 		fp->f_type = DTYPE_SOCKET;
193 		FILEDESC_UNLOCK_FAST(fdp);
194 		td->td_retval[0] = fd;
195 	}
196 	fdrop(fp, td);
197 	return (error);
198 }
199 
200 /*
201  * MPSAFE
202  */
203 /* ARGSUSED */
204 int
205 bind(td, uap)
206 	struct thread *td;
207 	register struct bind_args /* {
208 		int	s;
209 		caddr_t	name;
210 		int	namelen;
211 	} */ *uap;
212 {
213 	struct sockaddr *sa;
214 	int error;
215 
216 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
217 		return (error);
218 
219 	error = kern_bind(td, uap->s, sa);
220 	free(sa, M_SONAME);
221 	return (error);
222 }
223 
224 int
225 kern_bind(td, fd, sa)
226 	struct thread *td;
227 	int fd;
228 	struct sockaddr *sa;
229 {
230 	struct socket *so;
231 	struct file *fp;
232 	int error;
233 
234 	NET_LOCK_GIANT();
235 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
236 	if (error)
237 		goto done2;
238 	so = fp->f_data;
239 #ifdef MAC
240 	SOCK_LOCK(so);
241 	error = mac_check_socket_bind(td->td_ucred, so, sa);
242 	SOCK_UNLOCK(so);
243 	if (error)
244 		goto done1;
245 #endif
246 	error = sobind(so, sa, td);
247 #ifdef MAC
248 done1:
249 #endif
250 	fdrop(fp, td);
251 done2:
252 	NET_UNLOCK_GIANT();
253 	return (error);
254 }
255 
256 /*
257  * MPSAFE
258  */
259 /* ARGSUSED */
260 int
261 listen(td, uap)
262 	struct thread *td;
263 	register struct listen_args /* {
264 		int	s;
265 		int	backlog;
266 	} */ *uap;
267 {
268 	struct socket *so;
269 	struct file *fp;
270 	int error;
271 
272 	NET_LOCK_GIANT();
273 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
274 	if (error == 0) {
275 		so = fp->f_data;
276 #ifdef MAC
277 		SOCK_LOCK(so);
278 		error = mac_check_socket_listen(td->td_ucred, so);
279 		SOCK_UNLOCK(so);
280 		if (error)
281 			goto done;
282 #endif
283 		error = solisten(so, uap->backlog, td);
284 #ifdef MAC
285 done:
286 #endif
287 		fdrop(fp, td);
288 	}
289 	NET_UNLOCK_GIANT();
290 	return(error);
291 }
292 
293 /*
294  * accept1()
295  * MPSAFE
296  */
297 static int
298 accept1(td, uap, compat)
299 	struct thread *td;
300 	register struct accept_args /* {
301 		int	s;
302 		struct sockaddr	* __restrict name;
303 		socklen_t	* __restrict anamelen;
304 	} */ *uap;
305 	int compat;
306 {
307 	struct sockaddr *name;
308 	socklen_t namelen;
309 	struct file *fp;
310 	int error;
311 
312 	if (uap->name == NULL)
313 		return (kern_accept(td, uap->s, NULL, NULL, NULL));
314 
315 	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
316 	if (error)
317 		return (error);
318 
319 	error = kern_accept(td, uap->s, &name, &namelen, &fp);
320 
321 	/*
322 	 * return a namelen of zero for older code which might
323 	 * ignore the return value from accept.
324 	 */
325 	if (error) {
326 		(void) copyout(&namelen,
327 		    uap->anamelen, sizeof(*uap->anamelen));
328 		return (error);
329 	}
330 
331 	if (error == 0 && name != NULL) {
332 #ifdef COMPAT_OLDSOCK
333 		if (compat)
334 			((struct osockaddr *)name)->sa_family =
335 			    name->sa_family;
336 #endif
337 		error = copyout(name, uap->name, namelen);
338 	}
339 	if (error == 0)
340 		error = copyout(&namelen, uap->anamelen,
341 		    sizeof(namelen));
342 	if (error)
343 		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
344 	fdrop(fp, td);
345 	free(name, M_SONAME);
346 	return (error);
347 }
348 
349 int
350 kern_accept(struct thread *td, int s, struct sockaddr **name,
351     socklen_t *namelen, struct file **fp)
352 {
353 	struct filedesc *fdp;
354 	struct file *headfp, *nfp = NULL;
355 	struct sockaddr *sa = NULL;
356 	int error;
357 	struct socket *head, *so;
358 	int fd;
359 	u_int fflag;
360 	pid_t pgid;
361 	int tmp;
362 
363 	if (name) {
364 		*name = NULL;
365 		if (*namelen < 0)
366 			return (EINVAL);
367 	}
368 
369 	fdp = td->td_proc->p_fd;
370 	NET_LOCK_GIANT();
371 	error = getsock(fdp, s, &headfp, &fflag);
372 	if (error)
373 		goto done2;
374 	head = headfp->f_data;
375 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
376 		error = EINVAL;
377 		goto done;
378 	}
379 #ifdef MAC
380 	SOCK_LOCK(head);
381 	error = mac_check_socket_accept(td->td_ucred, head);
382 	SOCK_UNLOCK(head);
383 	if (error != 0)
384 		goto done;
385 #endif
386 	error = falloc(td, &nfp, &fd);
387 	if (error)
388 		goto done;
389 	ACCEPT_LOCK();
390 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
391 		ACCEPT_UNLOCK();
392 		error = EWOULDBLOCK;
393 		goto noconnection;
394 	}
395 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
396 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
397 			head->so_error = ECONNABORTED;
398 			break;
399 		}
400 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
401 		    "accept", 0);
402 		if (error) {
403 			ACCEPT_UNLOCK();
404 			goto noconnection;
405 		}
406 	}
407 	if (head->so_error) {
408 		error = head->so_error;
409 		head->so_error = 0;
410 		ACCEPT_UNLOCK();
411 		goto noconnection;
412 	}
413 	so = TAILQ_FIRST(&head->so_comp);
414 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
415 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
416 
417 	/*
418 	 * Before changing the flags on the socket, we have to bump the
419 	 * reference count.  Otherwise, if the protocol calls sofree(),
420 	 * the socket will be released due to a zero refcount.
421 	 */
422 	SOCK_LOCK(so);			/* soref() and so_state update */
423 	soref(so);			/* file descriptor reference */
424 
425 	TAILQ_REMOVE(&head->so_comp, so, so_list);
426 	head->so_qlen--;
427 	so->so_state |= (head->so_state & SS_NBIO);
428 	so->so_qstate &= ~SQ_COMP;
429 	so->so_head = NULL;
430 
431 	SOCK_UNLOCK(so);
432 	ACCEPT_UNLOCK();
433 
434 	/* An extra reference on `nfp' has been held for us by falloc(). */
435 	td->td_retval[0] = fd;
436 
437 	/* connection has been removed from the listen queue */
438 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
439 
440 	pgid = fgetown(&head->so_sigio);
441 	if (pgid != 0)
442 		fsetown(pgid, &so->so_sigio);
443 
444 	FILE_LOCK(nfp);
445 	nfp->f_data = so;	/* nfp has ref count from falloc */
446 	nfp->f_flag = fflag;
447 	nfp->f_ops = &socketops;
448 	nfp->f_type = DTYPE_SOCKET;
449 	FILE_UNLOCK(nfp);
450 	/* Sync socket nonblocking/async state with file flags */
451 	tmp = fflag & FNONBLOCK;
452 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
453 	tmp = fflag & FASYNC;
454 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
455 	sa = 0;
456 	error = soaccept(so, &sa);
457 	if (error) {
458 		/*
459 		 * return a namelen of zero for older code which might
460 		 * ignore the return value from accept.
461 		 */
462 		if (name)
463 			*namelen = 0;
464 		goto noconnection;
465 	}
466 	if (sa == NULL) {
467 		if (name)
468 			*namelen = 0;
469 		goto done;
470 	}
471 	if (name) {
472 		/* check sa_len before it is destroyed */
473 		if (*namelen > sa->sa_len)
474 			*namelen = sa->sa_len;
475 		*name = sa;
476 		sa = NULL;
477 	}
478 noconnection:
479 	if (sa)
480 		FREE(sa, M_SONAME);
481 
482 	/*
483 	 * close the new descriptor, assuming someone hasn't ripped it
484 	 * out from under us.
485 	 */
486 	if (error)
487 		fdclose(fdp, nfp, fd, td);
488 
489 	/*
490 	 * Release explicitly held references before returning.  We return
491 	 * a reference on nfp to the caller on success if they request it.
492 	 */
493 done:
494 	if (fp != NULL) {
495 		if (error == 0) {
496 			*fp = nfp;
497 			nfp = NULL;
498 		} else
499 			*fp = NULL;
500 	}
501 	if (nfp != NULL)
502 		fdrop(nfp, td);
503 	fdrop(headfp, td);
504 done2:
505 	NET_UNLOCK_GIANT();
506 	return (error);
507 }
508 
509 /*
510  * MPSAFE (accept1() is MPSAFE)
511  */
512 int
513 accept(td, uap)
514 	struct thread *td;
515 	struct accept_args *uap;
516 {
517 
518 	return (accept1(td, uap, 0));
519 }
520 
521 #ifdef COMPAT_OLDSOCK
522 /*
523  * MPSAFE (accept1() is MPSAFE)
524  */
525 int
526 oaccept(td, uap)
527 	struct thread *td;
528 	struct accept_args *uap;
529 {
530 
531 	return (accept1(td, uap, 1));
532 }
533 #endif /* COMPAT_OLDSOCK */
534 
535 /*
536  * MPSAFE
537  */
538 /* ARGSUSED */
539 int
540 connect(td, uap)
541 	struct thread *td;
542 	register struct connect_args /* {
543 		int	s;
544 		caddr_t	name;
545 		int	namelen;
546 	} */ *uap;
547 {
548 	struct sockaddr *sa;
549 	int error;
550 
551 	error = getsockaddr(&sa, uap->name, uap->namelen);
552 	if (error)
553 		return (error);
554 
555 	error = kern_connect(td, uap->s, sa);
556 	free(sa, M_SONAME);
557 	return (error);
558 }
559 
560 
561 int
562 kern_connect(td, fd, sa)
563 	struct thread *td;
564 	int fd;
565 	struct sockaddr *sa;
566 {
567 	struct socket *so;
568 	struct file *fp;
569 	int error;
570 	int interrupted = 0;
571 
572 	NET_LOCK_GIANT();
573 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
574 	if (error)
575 		goto done2;
576 	so = fp->f_data;
577 	if (so->so_state & SS_ISCONNECTING) {
578 		error = EALREADY;
579 		goto done1;
580 	}
581 #ifdef MAC
582 	SOCK_LOCK(so);
583 	error = mac_check_socket_connect(td->td_ucred, so, sa);
584 	SOCK_UNLOCK(so);
585 	if (error)
586 		goto bad;
587 #endif
588 	error = soconnect(so, sa, td);
589 	if (error)
590 		goto bad;
591 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
592 		error = EINPROGRESS;
593 		goto done1;
594 	}
595 	SOCK_LOCK(so);
596 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
597 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
598 		    "connec", 0);
599 		if (error) {
600 			if (error == EINTR || error == ERESTART)
601 				interrupted = 1;
602 			break;
603 		}
604 	}
605 	if (error == 0) {
606 		error = so->so_error;
607 		so->so_error = 0;
608 	}
609 	SOCK_UNLOCK(so);
610 bad:
611 	if (!interrupted)
612 		so->so_state &= ~SS_ISCONNECTING;
613 	if (error == ERESTART)
614 		error = EINTR;
615 done1:
616 	fdrop(fp, td);
617 done2:
618 	NET_UNLOCK_GIANT();
619 	return (error);
620 }
621 
622 /*
623  * MPSAFE
624  */
625 int
626 socketpair(td, uap)
627 	struct thread *td;
628 	register struct socketpair_args /* {
629 		int	domain;
630 		int	type;
631 		int	protocol;
632 		int	*rsv;
633 	} */ *uap;
634 {
635 	register struct filedesc *fdp = td->td_proc->p_fd;
636 	struct file *fp1, *fp2;
637 	struct socket *so1, *so2;
638 	int fd, error, sv[2];
639 
640 #ifdef MAC
641 	/* We might want to have a separate check for socket pairs. */
642 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
643 	    uap->protocol);
644 	if (error)
645 		return (error);
646 #endif
647 
648 	NET_LOCK_GIANT();
649 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
650 	    td->td_ucred, td);
651 	if (error)
652 		goto done2;
653 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
654 	    td->td_ucred, td);
655 	if (error)
656 		goto free1;
657 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
658 	error = falloc(td, &fp1, &fd);
659 	if (error)
660 		goto free2;
661 	sv[0] = fd;
662 	fp1->f_data = so1;	/* so1 already has ref count */
663 	error = falloc(td, &fp2, &fd);
664 	if (error)
665 		goto free3;
666 	fp2->f_data = so2;	/* so2 already has ref count */
667 	sv[1] = fd;
668 	error = soconnect2(so1, so2);
669 	if (error)
670 		goto free4;
671 	if (uap->type == SOCK_DGRAM) {
672 		/*
673 		 * Datagram socket connection is asymmetric.
674 		 */
675 		 error = soconnect2(so2, so1);
676 		 if (error)
677 			goto free4;
678 	}
679 	FILE_LOCK(fp1);
680 	fp1->f_flag = FREAD|FWRITE;
681 	fp1->f_ops = &socketops;
682 	fp1->f_type = DTYPE_SOCKET;
683 	FILE_UNLOCK(fp1);
684 	FILE_LOCK(fp2);
685 	fp2->f_flag = FREAD|FWRITE;
686 	fp2->f_ops = &socketops;
687 	fp2->f_type = DTYPE_SOCKET;
688 	FILE_UNLOCK(fp2);
689 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
690 	fdrop(fp1, td);
691 	fdrop(fp2, td);
692 	goto done2;
693 free4:
694 	fdclose(fdp, fp2, sv[1], td);
695 	fdrop(fp2, td);
696 free3:
697 	fdclose(fdp, fp1, sv[0], td);
698 	fdrop(fp1, td);
699 free2:
700 	(void)soclose(so2);
701 free1:
702 	(void)soclose(so1);
703 done2:
704 	NET_UNLOCK_GIANT();
705 	return (error);
706 }
707 
708 static int
709 sendit(td, s, mp, flags)
710 	register struct thread *td;
711 	int s;
712 	register struct msghdr *mp;
713 	int flags;
714 {
715 	struct mbuf *control;
716 	struct sockaddr *to;
717 	int error;
718 
719 	if (mp->msg_name != NULL) {
720 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
721 		if (error) {
722 			to = NULL;
723 			goto bad;
724 		}
725 		mp->msg_name = to;
726 	} else {
727 		to = NULL;
728 	}
729 
730 	if (mp->msg_control) {
731 		if (mp->msg_controllen < sizeof(struct cmsghdr)
732 #ifdef COMPAT_OLDSOCK
733 		    && mp->msg_flags != MSG_COMPAT
734 #endif
735 		) {
736 			error = EINVAL;
737 			goto bad;
738 		}
739 		error = sockargs(&control, mp->msg_control,
740 		    mp->msg_controllen, MT_CONTROL);
741 		if (error)
742 			goto bad;
743 #ifdef COMPAT_OLDSOCK
744 		if (mp->msg_flags == MSG_COMPAT) {
745 			register struct cmsghdr *cm;
746 
747 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
748 			if (control == 0) {
749 				error = ENOBUFS;
750 				goto bad;
751 			} else {
752 				cm = mtod(control, struct cmsghdr *);
753 				cm->cmsg_len = control->m_len;
754 				cm->cmsg_level = SOL_SOCKET;
755 				cm->cmsg_type = SCM_RIGHTS;
756 			}
757 		}
758 #endif
759 	} else {
760 		control = NULL;
761 	}
762 
763 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
764 
765 bad:
766 	if (to)
767 		FREE(to, M_SONAME);
768 	return (error);
769 }
770 
771 int
772 kern_sendit(td, s, mp, flags, control, segflg)
773 	struct thread *td;
774 	int s;
775 	struct msghdr *mp;
776 	int flags;
777 	struct mbuf *control;
778 	enum uio_seg segflg;
779 {
780 	struct file *fp;
781 	struct uio auio;
782 	struct iovec *iov;
783 	struct socket *so;
784 	int i;
785 	int len, error;
786 #ifdef KTRACE
787 	struct uio *ktruio = NULL;
788 #endif
789 
790 	NET_LOCK_GIANT();
791 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
792 	if (error)
793 		goto bad2;
794 	so = (struct socket *)fp->f_data;
795 
796 #ifdef MAC
797 	SOCK_LOCK(so);
798 	error = mac_check_socket_send(td->td_ucred, so);
799 	SOCK_UNLOCK(so);
800 	if (error)
801 		goto bad;
802 #endif
803 
804 	auio.uio_iov = mp->msg_iov;
805 	auio.uio_iovcnt = mp->msg_iovlen;
806 	auio.uio_segflg = segflg;
807 	auio.uio_rw = UIO_WRITE;
808 	auio.uio_td = td;
809 	auio.uio_offset = 0;			/* XXX */
810 	auio.uio_resid = 0;
811 	iov = mp->msg_iov;
812 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
813 		if ((auio.uio_resid += iov->iov_len) < 0) {
814 			error = EINVAL;
815 			goto bad;
816 		}
817 	}
818 #ifdef KTRACE
819 	if (KTRPOINT(td, KTR_GENIO))
820 		ktruio = cloneuio(&auio);
821 #endif
822 	len = auio.uio_resid;
823 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
824 	if (error) {
825 		if (auio.uio_resid != len && (error == ERESTART ||
826 		    error == EINTR || error == EWOULDBLOCK))
827 			error = 0;
828 		/* Generation of SIGPIPE can be controlled per socket */
829 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
830 		    !(flags & MSG_NOSIGNAL)) {
831 			PROC_LOCK(td->td_proc);
832 			psignal(td->td_proc, SIGPIPE);
833 			PROC_UNLOCK(td->td_proc);
834 		}
835 	}
836 	if (error == 0)
837 		td->td_retval[0] = len - auio.uio_resid;
838 #ifdef KTRACE
839 	if (ktruio != NULL) {
840 		ktruio->uio_resid = td->td_retval[0];
841 		ktrgenio(s, UIO_WRITE, ktruio, error);
842 	}
843 #endif
844 bad:
845 	fdrop(fp, td);
846 bad2:
847 	NET_UNLOCK_GIANT();
848 	return (error);
849 }
850 
851 /*
852  * MPSAFE
853  */
854 int
855 sendto(td, uap)
856 	struct thread *td;
857 	register struct sendto_args /* {
858 		int	s;
859 		caddr_t	buf;
860 		size_t	len;
861 		int	flags;
862 		caddr_t	to;
863 		int	tolen;
864 	} */ *uap;
865 {
866 	struct msghdr msg;
867 	struct iovec aiov;
868 	int error;
869 
870 	msg.msg_name = uap->to;
871 	msg.msg_namelen = uap->tolen;
872 	msg.msg_iov = &aiov;
873 	msg.msg_iovlen = 1;
874 	msg.msg_control = 0;
875 #ifdef COMPAT_OLDSOCK
876 	msg.msg_flags = 0;
877 #endif
878 	aiov.iov_base = uap->buf;
879 	aiov.iov_len = uap->len;
880 	error = sendit(td, uap->s, &msg, uap->flags);
881 	return (error);
882 }
883 
884 #ifdef COMPAT_OLDSOCK
885 /*
886  * MPSAFE
887  */
888 int
889 osend(td, uap)
890 	struct thread *td;
891 	register struct osend_args /* {
892 		int	s;
893 		caddr_t	buf;
894 		int	len;
895 		int	flags;
896 	} */ *uap;
897 {
898 	struct msghdr msg;
899 	struct iovec aiov;
900 	int error;
901 
902 	msg.msg_name = 0;
903 	msg.msg_namelen = 0;
904 	msg.msg_iov = &aiov;
905 	msg.msg_iovlen = 1;
906 	aiov.iov_base = uap->buf;
907 	aiov.iov_len = uap->len;
908 	msg.msg_control = 0;
909 	msg.msg_flags = 0;
910 	error = sendit(td, uap->s, &msg, uap->flags);
911 	return (error);
912 }
913 
914 /*
915  * MPSAFE
916  */
917 int
918 osendmsg(td, uap)
919 	struct thread *td;
920 	struct osendmsg_args /* {
921 		int	s;
922 		caddr_t	msg;
923 		int	flags;
924 	} */ *uap;
925 {
926 	struct msghdr msg;
927 	struct iovec *iov;
928 	int error;
929 
930 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
931 	if (error)
932 		return (error);
933 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
934 	if (error)
935 		return (error);
936 	msg.msg_iov = iov;
937 	msg.msg_flags = MSG_COMPAT;
938 	error = sendit(td, uap->s, &msg, uap->flags);
939 	free(iov, M_IOV);
940 	return (error);
941 }
942 #endif
943 
944 /*
945  * MPSAFE
946  */
947 int
948 sendmsg(td, uap)
949 	struct thread *td;
950 	struct sendmsg_args /* {
951 		int	s;
952 		caddr_t	msg;
953 		int	flags;
954 	} */ *uap;
955 {
956 	struct msghdr msg;
957 	struct iovec *iov;
958 	int error;
959 
960 	error = copyin(uap->msg, &msg, sizeof (msg));
961 	if (error)
962 		return (error);
963 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
964 	if (error)
965 		return (error);
966 	msg.msg_iov = iov;
967 #ifdef COMPAT_OLDSOCK
968 	msg.msg_flags = 0;
969 #endif
970 	error = sendit(td, uap->s, &msg, uap->flags);
971 	free(iov, M_IOV);
972 	return (error);
973 }
974 
975 int
976 kern_recvit(td, s, mp, fromseg, controlp)
977 	struct thread *td;
978 	int s;
979 	struct msghdr *mp;
980 	enum uio_seg fromseg;
981 	struct mbuf **controlp;
982 {
983 	struct uio auio;
984 	struct iovec *iov;
985 	int i;
986 	socklen_t len;
987 	int error;
988 	struct mbuf *m, *control = 0;
989 	caddr_t ctlbuf;
990 	struct file *fp;
991 	struct socket *so;
992 	struct sockaddr *fromsa = 0;
993 #ifdef KTRACE
994 	struct uio *ktruio = NULL;
995 #endif
996 
997 	if(controlp != NULL)
998 		*controlp = 0;
999 
1000 	NET_LOCK_GIANT();
1001 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1002 	if (error) {
1003 		NET_UNLOCK_GIANT();
1004 		return (error);
1005 	}
1006 	so = fp->f_data;
1007 
1008 #ifdef MAC
1009 	SOCK_LOCK(so);
1010 	error = mac_check_socket_receive(td->td_ucred, so);
1011 	SOCK_UNLOCK(so);
1012 	if (error) {
1013 		fdrop(fp, td);
1014 		NET_UNLOCK_GIANT();
1015 		return (error);
1016 	}
1017 #endif
1018 
1019 	auio.uio_iov = mp->msg_iov;
1020 	auio.uio_iovcnt = mp->msg_iovlen;
1021 	auio.uio_segflg = UIO_USERSPACE;
1022 	auio.uio_rw = UIO_READ;
1023 	auio.uio_td = td;
1024 	auio.uio_offset = 0;			/* XXX */
1025 	auio.uio_resid = 0;
1026 	iov = mp->msg_iov;
1027 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
1028 		if ((auio.uio_resid += iov->iov_len) < 0) {
1029 			fdrop(fp, td);
1030 			NET_UNLOCK_GIANT();
1031 			return (EINVAL);
1032 		}
1033 	}
1034 #ifdef KTRACE
1035 	if (KTRPOINT(td, KTR_GENIO))
1036 		ktruio = cloneuio(&auio);
1037 #endif
1038 	len = auio.uio_resid;
1039 	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
1040 	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
1041 	    &mp->msg_flags);
1042 	if (error) {
1043 		if (auio.uio_resid != (int)len && (error == ERESTART ||
1044 		    error == EINTR || error == EWOULDBLOCK))
1045 			error = 0;
1046 	}
1047 #ifdef KTRACE
1048 	if (ktruio != NULL) {
1049 		ktruio->uio_resid = (int)len - auio.uio_resid;
1050 		ktrgenio(s, UIO_READ, ktruio, error);
1051 	}
1052 #endif
1053 	if (error)
1054 		goto out;
1055 	td->td_retval[0] = (int)len - auio.uio_resid;
1056 	if (mp->msg_name) {
1057 		len = mp->msg_namelen;
1058 		if (len <= 0 || fromsa == 0)
1059 			len = 0;
1060 		else {
1061 			/* save sa_len before it is destroyed by MSG_COMPAT */
1062 			len = MIN(len, fromsa->sa_len);
1063 #ifdef COMPAT_OLDSOCK
1064 			if (mp->msg_flags & MSG_COMPAT)
1065 				((struct osockaddr *)fromsa)->sa_family =
1066 				    fromsa->sa_family;
1067 #endif
1068 			if (fromseg == UIO_USERSPACE) {
1069 				error = copyout(fromsa, mp->msg_name,
1070 				    (unsigned)len);
1071 				if (error)
1072 					goto out;
1073 			} else
1074 				bcopy(fromsa, mp->msg_name, len);
1075 		}
1076 		mp->msg_namelen = len;
1077 	}
1078 	if (mp->msg_control && controlp == NULL) {
1079 #ifdef COMPAT_OLDSOCK
1080 		/*
1081 		 * We assume that old recvmsg calls won't receive access
1082 		 * rights and other control info, esp. as control info
1083 		 * is always optional and those options didn't exist in 4.3.
1084 		 * If we receive rights, trim the cmsghdr; anything else
1085 		 * is tossed.
1086 		 */
1087 		if (control && mp->msg_flags & MSG_COMPAT) {
1088 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1089 			    SOL_SOCKET ||
1090 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1091 			    SCM_RIGHTS) {
1092 				mp->msg_controllen = 0;
1093 				goto out;
1094 			}
1095 			control->m_len -= sizeof (struct cmsghdr);
1096 			control->m_data += sizeof (struct cmsghdr);
1097 		}
1098 #endif
1099 		len = mp->msg_controllen;
1100 		m = control;
1101 		mp->msg_controllen = 0;
1102 		ctlbuf = mp->msg_control;
1103 
1104 		while (m && len > 0) {
1105 			unsigned int tocopy;
1106 
1107 			if (len >= m->m_len)
1108 				tocopy = m->m_len;
1109 			else {
1110 				mp->msg_flags |= MSG_CTRUNC;
1111 				tocopy = len;
1112 			}
1113 
1114 			if ((error = copyout(mtod(m, caddr_t),
1115 					ctlbuf, tocopy)) != 0)
1116 				goto out;
1117 
1118 			ctlbuf += tocopy;
1119 			len -= tocopy;
1120 			m = m->m_next;
1121 		}
1122 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1123 	}
1124 out:
1125 	fdrop(fp, td);
1126 	NET_UNLOCK_GIANT();
1127 	if (fromsa)
1128 		FREE(fromsa, M_SONAME);
1129 
1130 	if (error == 0 && controlp != NULL)
1131 		*controlp = control;
1132 	else  if (control)
1133 		m_freem(control);
1134 
1135 	return (error);
1136 }
1137 
1138 static int
1139 recvit(td, s, mp, namelenp)
1140 	struct thread *td;
1141 	int s;
1142 	struct msghdr *mp;
1143 	void *namelenp;
1144 {
1145 	int error;
1146 
1147 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1148 	if (error)
1149 		return (error);
1150 	if (namelenp) {
1151 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1152 #ifdef COMPAT_OLDSOCK
1153 		if (mp->msg_flags & MSG_COMPAT)
1154 			error = 0;	/* old recvfrom didn't check */
1155 #endif
1156 	}
1157 	return (error);
1158 }
1159 
1160 /*
1161  * MPSAFE
1162  */
1163 int
1164 recvfrom(td, uap)
1165 	struct thread *td;
1166 	register struct recvfrom_args /* {
1167 		int	s;
1168 		caddr_t	buf;
1169 		size_t	len;
1170 		int	flags;
1171 		struct sockaddr * __restrict	from;
1172 		socklen_t * __restrict fromlenaddr;
1173 	} */ *uap;
1174 {
1175 	struct msghdr msg;
1176 	struct iovec aiov;
1177 	int error;
1178 
1179 	if (uap->fromlenaddr) {
1180 		error = copyin(uap->fromlenaddr,
1181 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1182 		if (error)
1183 			goto done2;
1184 	} else {
1185 		msg.msg_namelen = 0;
1186 	}
1187 	msg.msg_name = uap->from;
1188 	msg.msg_iov = &aiov;
1189 	msg.msg_iovlen = 1;
1190 	aiov.iov_base = uap->buf;
1191 	aiov.iov_len = uap->len;
1192 	msg.msg_control = 0;
1193 	msg.msg_flags = uap->flags;
1194 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1195 done2:
1196 	return(error);
1197 }
1198 
1199 #ifdef COMPAT_OLDSOCK
1200 /*
1201  * MPSAFE
1202  */
1203 int
1204 orecvfrom(td, uap)
1205 	struct thread *td;
1206 	struct recvfrom_args *uap;
1207 {
1208 
1209 	uap->flags |= MSG_COMPAT;
1210 	return (recvfrom(td, uap));
1211 }
1212 #endif
1213 
1214 
1215 #ifdef COMPAT_OLDSOCK
1216 /*
1217  * MPSAFE
1218  */
1219 int
1220 orecv(td, uap)
1221 	struct thread *td;
1222 	register struct orecv_args /* {
1223 		int	s;
1224 		caddr_t	buf;
1225 		int	len;
1226 		int	flags;
1227 	} */ *uap;
1228 {
1229 	struct msghdr msg;
1230 	struct iovec aiov;
1231 	int error;
1232 
1233 	msg.msg_name = 0;
1234 	msg.msg_namelen = 0;
1235 	msg.msg_iov = &aiov;
1236 	msg.msg_iovlen = 1;
1237 	aiov.iov_base = uap->buf;
1238 	aiov.iov_len = uap->len;
1239 	msg.msg_control = 0;
1240 	msg.msg_flags = uap->flags;
1241 	error = recvit(td, uap->s, &msg, NULL);
1242 	return (error);
1243 }
1244 
1245 /*
1246  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1247  * overlays the new one, missing only the flags, and with the (old) access
1248  * rights where the control fields are now.
1249  *
1250  * MPSAFE
1251  */
1252 int
1253 orecvmsg(td, uap)
1254 	struct thread *td;
1255 	struct orecvmsg_args /* {
1256 		int	s;
1257 		struct	omsghdr *msg;
1258 		int	flags;
1259 	} */ *uap;
1260 {
1261 	struct msghdr msg;
1262 	struct iovec *iov;
1263 	int error;
1264 
1265 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1266 	if (error)
1267 		return (error);
1268 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1269 	if (error)
1270 		return (error);
1271 	msg.msg_flags = uap->flags | MSG_COMPAT;
1272 	msg.msg_iov = iov;
1273 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1274 	if (msg.msg_controllen && error == 0)
1275 		error = copyout(&msg.msg_controllen,
1276 		    &uap->msg->msg_accrightslen, sizeof (int));
1277 	free(iov, M_IOV);
1278 	return (error);
1279 }
1280 #endif
1281 
1282 /*
1283  * MPSAFE
1284  */
1285 int
1286 recvmsg(td, uap)
1287 	struct thread *td;
1288 	struct recvmsg_args /* {
1289 		int	s;
1290 		struct	msghdr *msg;
1291 		int	flags;
1292 	} */ *uap;
1293 {
1294 	struct msghdr msg;
1295 	struct iovec *uiov, *iov;
1296 	int error;
1297 
1298 	error = copyin(uap->msg, &msg, sizeof (msg));
1299 	if (error)
1300 		return (error);
1301 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1302 	if (error)
1303 		return (error);
1304 	msg.msg_flags = uap->flags;
1305 #ifdef COMPAT_OLDSOCK
1306 	msg.msg_flags &= ~MSG_COMPAT;
1307 #endif
1308 	uiov = msg.msg_iov;
1309 	msg.msg_iov = iov;
1310 	error = recvit(td, uap->s, &msg, NULL);
1311 	if (error == 0) {
1312 		msg.msg_iov = uiov;
1313 		error = copyout(&msg, uap->msg, sizeof(msg));
1314 	}
1315 	free(iov, M_IOV);
1316 	return (error);
1317 }
1318 
1319 /*
1320  * MPSAFE
1321  */
1322 /* ARGSUSED */
1323 int
1324 shutdown(td, uap)
1325 	struct thread *td;
1326 	register struct shutdown_args /* {
1327 		int	s;
1328 		int	how;
1329 	} */ *uap;
1330 {
1331 	struct socket *so;
1332 	struct file *fp;
1333 	int error;
1334 
1335 	NET_LOCK_GIANT();
1336 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1337 	if (error == 0) {
1338 		so = fp->f_data;
1339 		error = soshutdown(so, uap->how);
1340 		fdrop(fp, td);
1341 	}
1342 	NET_UNLOCK_GIANT();
1343 	return (error);
1344 }
1345 
1346 /*
1347  * MPSAFE
1348  */
1349 /* ARGSUSED */
1350 int
1351 setsockopt(td, uap)
1352 	struct thread *td;
1353 	register struct setsockopt_args /* {
1354 		int	s;
1355 		int	level;
1356 		int	name;
1357 		caddr_t	val;
1358 		int	valsize;
1359 	} */ *uap;
1360 {
1361 
1362 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1363 	    uap->val, UIO_USERSPACE, uap->valsize));
1364 }
1365 
1366 int
1367 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1368 	struct thread *td;
1369 	int s;
1370 	int level;
1371 	int name;
1372 	void *val;
1373 	enum uio_seg valseg;
1374 	socklen_t valsize;
1375 {
1376 	int error;
1377 	struct socket *so;
1378 	struct file *fp;
1379 	struct sockopt sopt;
1380 
1381 	if (val == NULL && valsize != 0)
1382 		return (EFAULT);
1383 	if ((int)valsize < 0)
1384 		return (EINVAL);
1385 
1386 	sopt.sopt_dir = SOPT_SET;
1387 	sopt.sopt_level = level;
1388 	sopt.sopt_name = name;
1389 	sopt.sopt_val = val;
1390 	sopt.sopt_valsize = valsize;
1391 	switch (valseg) {
1392 	case UIO_USERSPACE:
1393 		sopt.sopt_td = td;
1394 		break;
1395 	case UIO_SYSSPACE:
1396 		sopt.sopt_td = NULL;
1397 		break;
1398 	default:
1399 		panic("kern_setsockopt called with bad valseg");
1400 	}
1401 
1402 	NET_LOCK_GIANT();
1403 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1404 	if (error == 0) {
1405 		so = fp->f_data;
1406 		error = sosetopt(so, &sopt);
1407 		fdrop(fp, td);
1408 	}
1409 	NET_UNLOCK_GIANT();
1410 	return(error);
1411 }
1412 
1413 /*
1414  * MPSAFE
1415  */
1416 /* ARGSUSED */
1417 int
1418 getsockopt(td, uap)
1419 	struct thread *td;
1420 	register struct getsockopt_args /* {
1421 		int	s;
1422 		int	level;
1423 		int	name;
1424 		void * __restrict	val;
1425 		socklen_t * __restrict avalsize;
1426 	} */ *uap;
1427 {
1428 	socklen_t valsize;
1429 	int	error;
1430 
1431 	if (uap->val) {
1432 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1433 		if (error)
1434 			return (error);
1435 	}
1436 
1437 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1438 	    uap->val, UIO_USERSPACE, &valsize);
1439 
1440 	if (error == 0)
1441 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1442 	return (error);
1443 }
1444 
1445 /*
1446  * Kernel version of getsockopt.
1447  * optval can be a userland or userspace. optlen is always a kernel pointer.
1448  */
1449 int
1450 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1451 	struct thread *td;
1452 	int s;
1453 	int level;
1454 	int name;
1455 	void *val;
1456 	enum uio_seg valseg;
1457 	socklen_t *valsize;
1458 {
1459 	int error;
1460 	struct  socket *so;
1461 	struct file *fp;
1462 	struct	sockopt sopt;
1463 
1464 	if (val == NULL)
1465 		*valsize = 0;
1466 	if ((int)*valsize < 0)
1467 		return (EINVAL);
1468 
1469 	sopt.sopt_dir = SOPT_GET;
1470 	sopt.sopt_level = level;
1471 	sopt.sopt_name = name;
1472 	sopt.sopt_val = val;
1473 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1474 	switch (valseg) {
1475 	case UIO_USERSPACE:
1476 		sopt.sopt_td = td;
1477 		break;
1478 	case UIO_SYSSPACE:
1479 		sopt.sopt_td = NULL;
1480 		break;
1481 	default:
1482 		panic("kern_getsockopt called with bad valseg");
1483 	}
1484 
1485 	NET_LOCK_GIANT();
1486 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1487 	if (error == 0) {
1488 		so = fp->f_data;
1489 		error = sogetopt(so, &sopt);
1490 		*valsize = sopt.sopt_valsize;
1491 		fdrop(fp, td);
1492 	}
1493 	NET_UNLOCK_GIANT();
1494 	return (error);
1495 }
1496 
1497 /*
1498  * getsockname1() - Get socket name.
1499  *
1500  * MPSAFE
1501  */
1502 /* ARGSUSED */
1503 static int
1504 getsockname1(td, uap, compat)
1505 	struct thread *td;
1506 	register struct getsockname_args /* {
1507 		int	fdes;
1508 		struct sockaddr * __restrict asa;
1509 		socklen_t * __restrict alen;
1510 	} */ *uap;
1511 	int compat;
1512 {
1513 	struct sockaddr *sa;
1514 	socklen_t len;
1515 	int error;
1516 
1517 	error = copyin(uap->alen, &len, sizeof(len));
1518 	if (error)
1519 		return (error);
1520 
1521 	error = kern_getsockname(td, uap->fdes, &sa, &len);
1522 	if (error)
1523 		return (error);
1524 
1525 	if (len != 0) {
1526 #ifdef COMPAT_OLDSOCK
1527 		if (compat)
1528 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1529 #endif
1530 		error = copyout(sa, uap->asa, (u_int)len);
1531 	}
1532 	free(sa, M_SONAME);
1533 	if (error == 0)
1534 		error = copyout(&len, uap->alen, sizeof(len));
1535 	return (error);
1536 }
1537 
1538 int
1539 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1540     socklen_t *alen)
1541 {
1542 	struct socket *so;
1543 	struct file *fp;
1544 	socklen_t len;
1545 	int error;
1546 
1547 	if (*alen < 0)
1548 		return (EINVAL);
1549 
1550 	NET_LOCK_GIANT();
1551 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1552 	if (error)
1553 		goto done;
1554 	so = fp->f_data;
1555 	*sa = NULL;
1556 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1557 	if (error)
1558 		goto bad;
1559 	if (*sa == NULL)
1560 		len = 0;
1561 	else
1562 		len = MIN(*alen, (*sa)->sa_len);
1563 	*alen = len;
1564 bad:
1565 	fdrop(fp, td);
1566 	if (error && *sa) {
1567 		free(*sa, M_SONAME);
1568 		*sa = NULL;
1569 	}
1570 done:
1571 	NET_UNLOCK_GIANT();
1572 	return (error);
1573 }
1574 
1575 /*
1576  * MPSAFE
1577  */
1578 int
1579 getsockname(td, uap)
1580 	struct thread *td;
1581 	struct getsockname_args *uap;
1582 {
1583 
1584 	return (getsockname1(td, uap, 0));
1585 }
1586 
1587 #ifdef COMPAT_OLDSOCK
1588 /*
1589  * MPSAFE
1590  */
1591 int
1592 ogetsockname(td, uap)
1593 	struct thread *td;
1594 	struct getsockname_args *uap;
1595 {
1596 
1597 	return (getsockname1(td, uap, 1));
1598 }
1599 #endif /* COMPAT_OLDSOCK */
1600 
1601 /*
1602  * getpeername1() - Get name of peer for connected socket.
1603  *
1604  * MPSAFE
1605  */
1606 /* ARGSUSED */
1607 static int
1608 getpeername1(td, uap, compat)
1609 	struct thread *td;
1610 	register struct getpeername_args /* {
1611 		int	fdes;
1612 		struct sockaddr * __restrict	asa;
1613 		socklen_t * __restrict	alen;
1614 	} */ *uap;
1615 	int compat;
1616 {
1617 	struct sockaddr *sa;
1618 	socklen_t len;
1619 	int error;
1620 
1621 	error = copyin(uap->alen, &len, sizeof (len));
1622 	if (error)
1623 		return (error);
1624 
1625 	error = kern_getpeername(td, uap->fdes, &sa, &len);
1626 	if (error)
1627 		return (error);
1628 
1629 	if (len != 0) {
1630 #ifdef COMPAT_OLDSOCK
1631 		if (compat)
1632 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1633 #endif
1634 		error = copyout(sa, uap->asa, (u_int)len);
1635 	}
1636 	free(sa, M_SONAME);
1637 	if (error == 0)
1638 		error = copyout(&len, uap->alen, sizeof(len));
1639 	return (error);
1640 }
1641 
1642 int
1643 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1644     socklen_t *alen)
1645 {
1646 	struct socket *so;
1647 	struct file *fp;
1648 	socklen_t len;
1649 	int error;
1650 
1651 	if (*alen < 0)
1652 		return (EINVAL);
1653 
1654 	NET_LOCK_GIANT();
1655 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1656 	if (error)
1657 		goto done2;
1658 	so = fp->f_data;
1659 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1660 		error = ENOTCONN;
1661 		goto done1;
1662 	}
1663 	*sa = NULL;
1664 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1665 	if (error)
1666 		goto bad;
1667 	if (*sa == NULL)
1668 		len = 0;
1669 	else
1670 		len = MIN(*alen, (*sa)->sa_len);
1671 	*alen = len;
1672 bad:
1673 	if (error && *sa) {
1674 		free(*sa, M_SONAME);
1675 		*sa = NULL;
1676 	}
1677 done1:
1678 	fdrop(fp, td);
1679 done2:
1680 	NET_UNLOCK_GIANT();
1681 	return (error);
1682 }
1683 
1684 /*
1685  * MPSAFE
1686  */
1687 int
1688 getpeername(td, uap)
1689 	struct thread *td;
1690 	struct getpeername_args *uap;
1691 {
1692 
1693 	return (getpeername1(td, uap, 0));
1694 }
1695 
1696 #ifdef COMPAT_OLDSOCK
1697 /*
1698  * MPSAFE
1699  */
1700 int
1701 ogetpeername(td, uap)
1702 	struct thread *td;
1703 	struct ogetpeername_args *uap;
1704 {
1705 
1706 	/* XXX uap should have type `getpeername_args *' to begin with. */
1707 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1708 }
1709 #endif /* COMPAT_OLDSOCK */
1710 
1711 int
1712 sockargs(mp, buf, buflen, type)
1713 	struct mbuf **mp;
1714 	caddr_t buf;
1715 	int buflen, type;
1716 {
1717 	register struct sockaddr *sa;
1718 	register struct mbuf *m;
1719 	int error;
1720 
1721 	if ((u_int)buflen > MLEN) {
1722 #ifdef COMPAT_OLDSOCK
1723 		if (type == MT_SONAME && (u_int)buflen <= 112)
1724 			buflen = MLEN;		/* unix domain compat. hack */
1725 		else
1726 #endif
1727 			if ((u_int)buflen > MCLBYTES)
1728 				return (EINVAL);
1729 	}
1730 	m = m_get(M_TRYWAIT, type);
1731 	if (m == NULL)
1732 		return (ENOBUFS);
1733 	if ((u_int)buflen > MLEN) {
1734 		MCLGET(m, M_TRYWAIT);
1735 		if ((m->m_flags & M_EXT) == 0) {
1736 			m_free(m);
1737 			return (ENOBUFS);
1738 		}
1739 	}
1740 	m->m_len = buflen;
1741 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1742 	if (error)
1743 		(void) m_free(m);
1744 	else {
1745 		*mp = m;
1746 		if (type == MT_SONAME) {
1747 			sa = mtod(m, struct sockaddr *);
1748 
1749 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1750 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1751 				sa->sa_family = sa->sa_len;
1752 #endif
1753 			sa->sa_len = buflen;
1754 		}
1755 	}
1756 	return (error);
1757 }
1758 
1759 int
1760 getsockaddr(namp, uaddr, len)
1761 	struct sockaddr **namp;
1762 	caddr_t uaddr;
1763 	size_t len;
1764 {
1765 	struct sockaddr *sa;
1766 	int error;
1767 
1768 	if (len > SOCK_MAXADDRLEN)
1769 		return (ENAMETOOLONG);
1770 	if (len < offsetof(struct sockaddr, sa_data[0]))
1771 		return (EINVAL);
1772 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1773 	error = copyin(uaddr, sa, len);
1774 	if (error) {
1775 		FREE(sa, M_SONAME);
1776 	} else {
1777 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1778 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1779 			sa->sa_family = sa->sa_len;
1780 #endif
1781 		sa->sa_len = len;
1782 		*namp = sa;
1783 	}
1784 	return (error);
1785 }
1786 
1787 /*
1788  * Detach mapped page and release resources back to the system.
1789  */
1790 void
1791 sf_buf_mext(void *addr, void *args)
1792 {
1793 	vm_page_t m;
1794 
1795 	m = sf_buf_page(args);
1796 	sf_buf_free(args);
1797 	vm_page_lock_queues();
1798 	vm_page_unwire(m, 0);
1799 	/*
1800 	 * Check for the object going away on us. This can
1801 	 * happen since we don't hold a reference to it.
1802 	 * If so, we're responsible for freeing the page.
1803 	 */
1804 	if (m->wire_count == 0 && m->object == NULL)
1805 		vm_page_free(m);
1806 	vm_page_unlock_queues();
1807 }
1808 
1809 /*
1810  * sendfile(2)
1811  *
1812  * MPSAFE
1813  *
1814  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1815  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1816  *
1817  * Send a file specified by 'fd' and starting at 'offset' to a socket
1818  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1819  * nbytes == 0. Optionally add a header and/or trailer to the socket
1820  * output. If specified, write the total number of bytes sent into *sbytes.
1821  *
1822  */
1823 int
1824 sendfile(struct thread *td, struct sendfile_args *uap)
1825 {
1826 
1827 	return (do_sendfile(td, uap, 0));
1828 }
1829 
1830 static int
1831 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1832 {
1833 	struct sf_hdtr hdtr;
1834 	struct uio *hdr_uio, *trl_uio;
1835 	int error;
1836 
1837 	hdr_uio = trl_uio = NULL;
1838 
1839 	if (uap->hdtr != NULL) {
1840 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1841 		if (error)
1842 			goto out;
1843 		if (hdtr.headers != NULL) {
1844 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1845 			if (error)
1846 				goto out;
1847 		}
1848 		if (hdtr.trailers != NULL) {
1849 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1850 			if (error)
1851 				goto out;
1852 
1853 		}
1854 	}
1855 
1856 	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1857 out:
1858 	if (hdr_uio)
1859 		free(hdr_uio, M_IOV);
1860 	if (trl_uio)
1861 		free(trl_uio, M_IOV);
1862 	return (error);
1863 }
1864 
1865 #ifdef COMPAT_FREEBSD4
1866 int
1867 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1868 {
1869 	struct sendfile_args args;
1870 
1871 	args.fd = uap->fd;
1872 	args.s = uap->s;
1873 	args.offset = uap->offset;
1874 	args.nbytes = uap->nbytes;
1875 	args.hdtr = uap->hdtr;
1876 	args.sbytes = uap->sbytes;
1877 	args.flags = uap->flags;
1878 
1879 	return (do_sendfile(td, &args, 1));
1880 }
1881 #endif /* COMPAT_FREEBSD4 */
1882 
1883 int
1884 kern_sendfile(struct thread *td, struct sendfile_args *uap,
1885     struct uio *hdr_uio, struct uio *trl_uio, int compat)
1886 {
1887 	struct file *sock_fp;
1888 	struct vnode *vp;
1889 	struct vm_object *obj = NULL;
1890 	struct socket *so = NULL;
1891 	struct mbuf *m = NULL;
1892 	struct sf_buf *sf;
1893 	struct vm_page *pg;
1894 	off_t off, xfsize, hdtr_size = 0, sbytes = 0, rem = 0;
1895 	int error, headersize = 0, headersent = 0, mnw = 0;
1896 	int vfslocked;
1897 
1898 	NET_LOCK_GIANT();
1899 
1900 	/*
1901 	 * The file descriptor must be a regular file and have a
1902 	 * backing VM object.
1903 	 * File offset must be positive.  If it goes beyond EOF
1904 	 * we send only the header/trailer and no payload data.
1905 	 */
1906 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1907 		goto done;
1908 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1909 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1910 	obj = vp->v_object;
1911 	if (obj != NULL) {
1912 		/*
1913 		 * Temporarily increase the backing VM object's reference
1914 		 * count so that a forced reclamation of its vnode does not
1915 		 * immediately destroy it.
1916 		 */
1917 		VM_OBJECT_LOCK(obj);
1918 		if ((obj->flags & OBJ_DEAD) == 0) {
1919 			vm_object_reference_locked(obj);
1920 			VM_OBJECT_UNLOCK(obj);
1921 		} else {
1922 			VM_OBJECT_UNLOCK(obj);
1923 			obj = NULL;
1924 		}
1925 	}
1926 	VOP_UNLOCK(vp, 0, td);
1927 	VFS_UNLOCK_GIANT(vfslocked);
1928 	if (obj == NULL) {
1929 		error = EINVAL;
1930 		goto done;
1931 	}
1932 	if (uap->offset < 0) {
1933 		error = EINVAL;
1934 		goto done;
1935 	}
1936 
1937 	/*
1938 	 * The socket must be a stream socket and connected.
1939 	 * Remember if it a blocking or non-blocking socket.
1940 	 */
1941 	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1942 	    NULL)) != 0)
1943 		goto done;
1944 	so = sock_fp->f_data;
1945 	if (so->so_type != SOCK_STREAM) {
1946 		error = EINVAL;
1947 		goto done;
1948 	}
1949 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1950 		error = ENOTCONN;
1951 		goto done;
1952 	}
1953 	/*
1954 	 * Do not wait on memory allocations but return ENOMEM for
1955 	 * caller to retry later.
1956 	 * XXX: Experimental.
1957 	 */
1958 	if (uap->flags & SF_MNOWAIT)
1959 		mnw = 1;
1960 
1961 #ifdef MAC
1962 	SOCK_LOCK(so);
1963 	error = mac_check_socket_send(td->td_ucred, so);
1964 	SOCK_UNLOCK(so);
1965 	if (error)
1966 		goto done;
1967 #endif
1968 
1969 	/* If headers are specified copy them into mbufs. */
1970 	if (hdr_uio != NULL) {
1971 		hdr_uio->uio_td = td;
1972 		hdr_uio->uio_rw = UIO_WRITE;
1973 		if (hdr_uio->uio_resid > 0) {
1974 			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1975 			    0, 0, 0);
1976 			if (m == NULL) {
1977 				error = mnw ? EAGAIN : ENOBUFS;
1978 				goto done;
1979 			}
1980 			headersize = hdr_uio->uio_resid;
1981 			if (compat)
1982 				sbytes += headersize;
1983 		}
1984 	}
1985 
1986 	/* Protect against multiple writers to the socket. */
1987 	SOCKBUF_LOCK(&so->so_snd);
1988 	(void) sblock(&so->so_snd, M_WAITOK);
1989 	SOCKBUF_UNLOCK(&so->so_snd);
1990 
1991 	/*
1992 	 * Loop through the pages of the file, starting with the requested
1993 	 * offset. Get a file page (do I/O if necessary), map the file page
1994 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1995 	 * it on the socket.
1996 	 * This is done in two loops.  The inner loop turns as many pages
1997 	 * as it can, up to available socket buffer space, without blocking
1998 	 * into mbufs to have it bulk delivered into the socket send buffer.
1999 	 * The outer loop checks the state and available space of the socket
2000 	 * and takes care of the overall progress.
2001 	 */
2002 	for (off = uap->offset; ; ) {
2003 		int loopbytes = 0;
2004 		int space = 0;
2005 		int done = 0;
2006 
2007 		/*
2008 		 * Check the socket state for ongoing connection,
2009 		 * no errors and space in socket buffer.
2010 		 * If space is low allow for the remainder of the
2011 		 * file to be processed if it fits the socket buffer.
2012 		 * Otherwise block in waiting for sufficient space
2013 		 * to proceed, or if the socket is nonblocking, return
2014 		 * to userland with EAGAIN while reporting how far
2015 		 * we've come.
2016 		 * We wait until the socket buffer has significant free
2017 		 * space to do bulk sends.  This makes good use of file
2018 		 * system read ahead and allows packet segmentation
2019 		 * offloading hardware to take over lots of work.  If
2020 		 * we were not careful here we would send off only one
2021 		 * sfbuf at a time.
2022 		 */
2023 		SOCKBUF_LOCK(&so->so_snd);
2024 		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
2025 			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
2026 retry_space:
2027 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2028 			error = EPIPE;
2029 			SOCKBUF_UNLOCK(&so->so_snd);
2030 			goto done;
2031 		} else if (so->so_error) {
2032 			error = so->so_error;
2033 			so->so_error = 0;
2034 			SOCKBUF_UNLOCK(&so->so_snd);
2035 			goto done;
2036 		}
2037 		space = sbspace(&so->so_snd);
2038 		if (space < rem &&
2039 		    (space <= 0 ||
2040 		     space < so->so_snd.sb_lowat)) {
2041 			if (so->so_state & SS_NBIO) {
2042 				SOCKBUF_UNLOCK(&so->so_snd);
2043 				error = EAGAIN;
2044 				goto done;
2045 			}
2046 			/*
2047 			 * sbwait drops the lock while sleeping.
2048 			 * When we loop back to retry_space the
2049 			 * state may have changed and we retest
2050 			 * for it.
2051 			 */
2052 			error = sbwait(&so->so_snd);
2053 			/*
2054 			 * An error from sbwait usually indicates that we've
2055 			 * been interrupted by a signal. If we've sent anything
2056 			 * then return bytes sent, otherwise return the error.
2057 			 */
2058 			if (error) {
2059 				SOCKBUF_UNLOCK(&so->so_snd);
2060 				goto done;
2061 			}
2062 			goto retry_space;
2063 		}
2064 		SOCKBUF_UNLOCK(&so->so_snd);
2065 
2066 		/*
2067 		 * Loop and construct maximum sized mbuf chain to be bulk
2068 		 * dumped into socket buffer.
2069 		 */
2070 		while(space > loopbytes) {
2071 			vm_pindex_t pindex;
2072 			vm_offset_t pgoff;
2073 			struct mbuf *m0;
2074 
2075 			VM_OBJECT_LOCK(obj);
2076 			/*
2077 			 * Calculate the amount to transfer.
2078 			 * Not to exceed a page, the EOF,
2079 			 * or the passed in nbytes.
2080 			 */
2081 			pgoff = (vm_offset_t)(off & PAGE_MASK);
2082 			xfsize = omin(PAGE_SIZE - pgoff,
2083 			    obj->un_pager.vnp.vnp_size - off -
2084 			    sbytes - loopbytes);
2085 			if (uap->nbytes)
2086 				rem = (uap->nbytes - sbytes - loopbytes);
2087 			else
2088 				rem = obj->un_pager.vnp.vnp_size - off -
2089 				    sbytes - loopbytes;
2090 			xfsize = omin(rem, xfsize);
2091 			if (xfsize <= 0) {
2092 				VM_OBJECT_UNLOCK(obj);
2093 				done = 1;		/* all data sent */
2094 				break;
2095 			}
2096 			/*
2097 			 * Don't overflow the send buffer.
2098 			 * Stop here and send out what we've
2099 			 * already got.
2100 			 */
2101 			if (space < loopbytes + xfsize) {
2102 				VM_OBJECT_UNLOCK(obj);
2103 				break;
2104 			}
2105 retry_lookup:
2106 			/*
2107 			 * Attempt to look up the page.
2108 			 * Allocate if not found or
2109 			 * wait and loop if busy.
2110 			 */
2111 			pindex = OFF_TO_IDX(off);
2112 			pg = vm_page_lookup(obj, pindex);
2113 			if (pg == NULL) {
2114 				pg = vm_page_alloc(obj, pindex,
2115 				    VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL |
2116 				    VM_ALLOC_WIRED);
2117 				if (pg == NULL) {
2118 					VM_OBJECT_UNLOCK(obj);
2119 					VM_WAIT;
2120 					VM_OBJECT_LOCK(obj);
2121 					goto retry_lookup;
2122 				}
2123 			} else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
2124 				goto retry_lookup;
2125 			else {
2126 				/*
2127 				 * Wire the page so it does not get
2128 				 * ripped out from under us.
2129 				 */
2130 				vm_page_lock_queues();
2131 				vm_page_wire(pg);
2132 				vm_page_unlock_queues();
2133 			}
2134 
2135 			/*
2136 			 * Check if page is valid for what we need,
2137 			 * otherwise initiate I/O.
2138 			 * If we already turned some pages into mbufs,
2139 			 * send them off before we come here again and
2140 			 * block.
2141 			 */
2142 			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2143 				VM_OBJECT_UNLOCK(obj);
2144 			else if (m != NULL)
2145 				error = EAGAIN;	/* send what we already got */
2146 			else if (uap->flags & SF_NODISKIO)
2147 				error = EBUSY;
2148 			else {
2149 				int bsize, resid;
2150 
2151 				/*
2152 				 * Ensure that our page is still around
2153 				 * when the I/O completes.
2154 				 */
2155 				vm_page_io_start(pg);
2156 				VM_OBJECT_UNLOCK(obj);
2157 
2158 				/*
2159 				 * Get the page from backing store.
2160 				 */
2161 				bsize = vp->v_mount->mnt_stat.f_iosize;
2162 				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2163 				vn_lock(vp, LK_SHARED | LK_RETRY, td);
2164 
2165 				/*
2166 				 * XXXMAC: Because we don't have fp->f_cred
2167 				 * here, we pass in NOCRED.  This is probably
2168 				 * wrong, but is consistent with our original
2169 				 * implementation.
2170 				 */
2171 				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2172 				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2173 				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2174 				    td->td_ucred, NOCRED, &resid, td);
2175 				VOP_UNLOCK(vp, 0, td);
2176 				VFS_UNLOCK_GIANT(vfslocked);
2177 				VM_OBJECT_LOCK(obj);
2178 				vm_page_io_finish(pg);
2179 				if (!error)
2180 					VM_OBJECT_UNLOCK(obj);
2181 				mbstat.sf_iocnt++;
2182 			}
2183 			if (error) {
2184 				vm_page_lock_queues();
2185 				vm_page_unwire(pg, 0);
2186 				/*
2187 				 * See if anyone else might know about
2188 				 * this page.  If not and it is not valid,
2189 				 * then free it.
2190 				 */
2191 				if (pg->wire_count == 0 && pg->valid == 0 &&
2192 				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2193 				    pg->hold_count == 0) {
2194 					vm_page_free(pg);
2195 				}
2196 				vm_page_unlock_queues();
2197 				VM_OBJECT_UNLOCK(obj);
2198 				if (error == EAGAIN)
2199 					error = 0;	/* not a real error */
2200 				break;
2201 			}
2202 
2203 			/*
2204 			 * Get a sendfile buf.  We usually wait as long
2205 			 * as necessary, but this wait can be interrupted.
2206 			 */
2207 			if ((sf = sf_buf_alloc(pg,
2208 			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2209 				mbstat.sf_allocfail++;
2210 				vm_page_lock_queues();
2211 				vm_page_unwire(pg, 0);
2212 				/*
2213 				 * XXX: Not same check as above!?
2214 				 */
2215 				if (pg->wire_count == 0 && pg->object == NULL)
2216 					vm_page_free(pg);
2217 				vm_page_unlock_queues();
2218 				error = (mnw ? EAGAIN : EINTR);
2219 				break;
2220 			}
2221 
2222 			/*
2223 			 * Get an mbuf and set it up as having
2224 			 * external storage.
2225 			 */
2226 			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2227 			if (m0 == NULL) {
2228 				error = (mnw ? EAGAIN : ENOBUFS);
2229 				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2230 				break;
2231 			}
2232 			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2233 			    sf, M_RDONLY, EXT_SFBUF);
2234 			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2235 			m0->m_len = xfsize;
2236 
2237 			/* Append to mbuf chain. */
2238 			if (m != NULL)
2239 				m_cat(m, m0);
2240 			else
2241 				m = m0;
2242 
2243 			/* Keep track of bits processed. */
2244 			loopbytes += xfsize;
2245 			off += xfsize;
2246 		}
2247 
2248 		/* Add the buffer chain to the socket buffer. */
2249 		if (m != NULL) {
2250 			SOCKBUF_LOCK(&so->so_snd);
2251 			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2252 				error = EPIPE;
2253 				SOCKBUF_UNLOCK(&so->so_snd);
2254 				goto done;
2255 			}
2256 			SOCKBUF_UNLOCK(&so->so_snd);
2257 			error = (*so->so_proto->pr_usrreqs->pru_send)
2258 				    (so, 0, m, NULL, NULL, td);
2259 			if (!error) {
2260 				sbytes += loopbytes;
2261 				headersent = 1;
2262 			}
2263 			m = NULL;	/* pru_send always consumes */
2264 		}
2265 
2266 		/* Quit outer loop on error or when we're done. */
2267 		if (error || done)
2268 			goto done;
2269 	}
2270 
2271 	/*
2272 	 * Send trailers. Wimp out and use writev(2).
2273 	 */
2274 	if (trl_uio != NULL) {
2275 		error = kern_writev(td, uap->s, trl_uio);
2276 		if (error)
2277 			goto done;
2278 		if (compat)
2279 			sbytes += td->td_retval[0];
2280 		else
2281 			hdtr_size += td->td_retval[0];
2282 	}
2283 
2284 done:
2285 	SOCKBUF_LOCK(&so->so_snd);
2286 	sbunlock(&so->so_snd);
2287 	SOCKBUF_UNLOCK(&so->so_snd);
2288 
2289 	if (headersent) {
2290 		if (!compat)
2291 			hdtr_size += headersize;
2292 	} else {
2293 		if (compat)
2294 			sbytes -= headersize;
2295 	}
2296 
2297 	/*
2298 	 * If there was no error we have to clear td->td_retval[0]
2299 	 * because it may have been set by writev.
2300 	 */
2301 	if (error == 0) {
2302 		td->td_retval[0] = 0;
2303 	}
2304 	if (uap->sbytes != NULL) {
2305 		if (!compat)
2306 			sbytes += hdtr_size;
2307 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2308 	}
2309 	if (obj != NULL)
2310 		vm_object_deallocate(obj);
2311 	if (vp != NULL) {
2312 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2313 		vrele(vp);
2314 		VFS_UNLOCK_GIANT(vfslocked);
2315 	}
2316 	if (so)
2317 		fdrop(sock_fp, td);
2318 	if (m)
2319 		m_freem(m);
2320 
2321 	NET_UNLOCK_GIANT();
2322 
2323 	if (error == ERESTART)
2324 		error = EINTR;
2325 
2326 	return (error);
2327 }
2328 
2329 
2330 int
2331 sctp_peeloff(td, uap)
2332 	struct thread *td;
2333 	register struct sctp_peeloff_args /* {
2334 		int	sd;
2335 		caddr_t	name;
2336 	} */ *uap;
2337 {
2338 #ifdef SCTP
2339 	struct filedesc *fdp;
2340 	struct file *nfp = NULL;
2341 	int error;
2342 	struct socket *head, *so;
2343 	int fd;
2344 	u_int fflag;
2345 
2346 	fdp = td->td_proc->p_fd;
2347 	error = fgetsock(td, uap->sd, &head, &fflag);
2348 	if (error)
2349 		goto done2;
2350 	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2351 	if (error)
2352 		goto done2;
2353 	/*
2354 	 * At this point we know we do have a assoc to pull
2355 	 * we proceed to get the fd setup. This may block
2356 	 * but that is ok.
2357 	 */
2358 
2359 	error = falloc(td, &nfp, &fd);
2360 	if (error)
2361 		goto done;
2362 	td->td_retval[0] = fd;
2363 
2364 	so = sonewconn(head, SS_ISCONNECTED);
2365 	if (so == NULL)
2366 		goto noconnection;
2367 	/*
2368 	 * Before changing the flags on the socket, we have to bump the
2369 	 * reference count.  Otherwise, if the protocol calls sofree(),
2370 	 * the socket will be released due to a zero refcount.
2371 	 */
2372         SOCK_LOCK(so);
2373         soref(so);                      /* file descriptor reference */
2374         SOCK_UNLOCK(so);
2375 
2376 	ACCEPT_LOCK();
2377 
2378 	TAILQ_REMOVE(&head->so_comp, so, so_list);
2379 	head->so_qlen--;
2380 	so->so_state |= (head->so_state & SS_NBIO);
2381 	so->so_state &= ~SS_NOFDREF;
2382 	so->so_qstate &= ~SQ_COMP;
2383 	so->so_head = NULL;
2384 
2385 	ACCEPT_UNLOCK();
2386 
2387 	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2388 	if (error)
2389 		goto noconnection;
2390 	if (head->so_sigio != NULL)
2391 		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2392 
2393 	FILE_LOCK(nfp);
2394 	nfp->f_data = so;
2395 	nfp->f_flag = fflag;
2396 	nfp->f_ops = &socketops;
2397 	nfp->f_type = DTYPE_SOCKET;
2398 	FILE_UNLOCK(nfp);
2399 
2400  noconnection:
2401 	/*
2402 	 * close the new descriptor, assuming someone hasn't ripped it
2403 	 * out from under us.
2404 	 */
2405 	if (error)
2406 		fdclose(fdp, nfp, fd, td);
2407 
2408 	/*
2409 	 * Release explicitly held references before returning.
2410 	 */
2411  done:
2412 	if (nfp != NULL)
2413 		fdrop(nfp, td);
2414 	fputsock(head);
2415  done2:
2416 	return (error);
2417 #else
2418 	return (EOPNOTSUPP);
2419 #endif
2420 }
2421 
2422 
2423 int sctp_generic_sendmsg (td, uap)
2424 	struct thread *td;
2425 	register struct sctp_generic_sendmsg_args /* {
2426 					  int sd,
2427 					  caddr_t msg,
2428 					  int mlen,
2429 					  caddr_t to,
2430 					  __socklen_t tolen,
2431 					  struct sctp_sndrcvinfo *sinfo,
2432 					  int flags
2433 					     } */ *uap;
2434 {
2435 #ifdef SCTP
2436 	struct sctp_sndrcvinfo sinfo, *u_sinfo=NULL;
2437 	struct socket *so;
2438 	struct file *fp;
2439 	int use_rcvinfo=1;
2440 	int error=0, len;
2441 	struct sockaddr *to=NULL;
2442 #ifdef KTRACE
2443 	struct uio *ktruio = NULL;
2444 #endif
2445 	struct uio auio;
2446 	struct iovec iov[1];
2447 
2448 	if(uap->sinfo) {
2449 		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2450 		if (error)
2451 			return (error);
2452 		u_sinfo = &sinfo;
2453 	}
2454 
2455 	if(uap->tolen) {
2456 		error = getsockaddr(&to, uap->to, uap->tolen);
2457 		if (error) {
2458 			to = NULL;
2459 			goto sctp_bad2;
2460 		}
2461 	}
2462 	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2463 	if (error)
2464 		goto sctp_bad;
2465 
2466 	iov[0].iov_base = uap->msg;
2467 	iov[0].iov_len = uap->mlen;
2468 
2469 	so = (struct socket *)fp->f_data;
2470 #ifdef MAC
2471 	SOCK_LOCK(so);
2472 	error = mac_check_socket_send(td->td_ucred, so);
2473 	SOCK_UNLOCK(so);
2474 	if (error)
2475 		goto sctp_bad;
2476 #endif
2477 
2478 
2479 	auio.uio_iov =  iov;
2480 	auio.uio_iovcnt = 1;
2481 	auio.uio_segflg = UIO_USERSPACE;
2482 	auio.uio_rw = UIO_WRITE;
2483 	auio.uio_td = td;
2484 	auio.uio_offset = 0;			/* XXX */
2485 	auio.uio_resid = 0;
2486 	len = auio.uio_resid = uap->mlen;
2487 	error = sctp_lower_sosend(so,
2488 				 to,
2489 				 &auio,
2490 				 (struct mbuf *)NULL,
2491 				 (struct mbuf *)NULL,
2492 				 uap->flags,
2493 				 use_rcvinfo,
2494 				 u_sinfo,
2495 				 td );
2496 
2497 	if (error) {
2498 		if (auio.uio_resid != len && (error == ERESTART ||
2499 		    error == EINTR || error == EWOULDBLOCK))
2500 			error = 0;
2501 		/* Generation of SIGPIPE can be controlled per socket */
2502 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2503 		    !(uap->flags & MSG_NOSIGNAL)) {
2504 			PROC_LOCK(td->td_proc);
2505 			psignal(td->td_proc, SIGPIPE);
2506 			PROC_UNLOCK(td->td_proc);
2507 		}
2508 	}
2509 	if (error == 0)
2510 		td->td_retval[0] = len - auio.uio_resid;
2511 #ifdef KTRACE
2512 	if (ktruio != NULL) {
2513 		ktruio->uio_resid = td->td_retval[0];
2514 		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2515 	}
2516 #endif
2517  sctp_bad:
2518 	fdrop(fp, td);
2519  sctp_bad2:
2520 	if (to)
2521 		FREE(to, M_SONAME);
2522 
2523 	return (error);
2524 #else
2525 	return (EOPNOTSUPP);
2526 #endif
2527 }
2528 
2529 
2530 int sctp_generic_sendmsg_iov(td, uap)
2531 	struct thread *td;
2532 	register struct sctp_generic_sendmsg_iov_args /* {
2533 					  int sd,
2534 					  struct iovec *iov,
2535 					  int iovlen,
2536 					  caddr_t to,
2537 					  __socklen_t tolen,
2538 					  struct sctp_sndrcvinfo *sinfo,
2539 					  int flags
2540 					     } */ *uap;
2541 {
2542 #ifdef SCTP
2543 	struct sctp_sndrcvinfo sinfo, *u_sinfo=NULL;
2544 	struct socket *so;
2545 	struct file *fp;
2546 	int use_rcvinfo=1;
2547 	int error=0, len, i;
2548 	struct sockaddr *to=NULL;
2549 #ifdef KTRACE
2550 	struct uio *ktruio = NULL;
2551 #endif
2552 	struct uio auio;
2553 	struct iovec *iov, *tiov;
2554 
2555 	if(uap->sinfo) {
2556 		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2557 		if (error)
2558 			return (error);
2559 		u_sinfo = &sinfo;
2560 	}
2561 
2562 	if(uap->tolen) {
2563 		error = getsockaddr(&to, uap->to, uap->tolen);
2564 		if (error) {
2565 			to = NULL;
2566 			goto sctp_bad2;
2567 		}
2568 	}
2569 	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2570 	if (error)
2571 		goto sctp_bad1;
2572 
2573 	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2574 	if (error)
2575 		goto sctp_bad1;
2576 
2577 
2578 	so = (struct socket *)fp->f_data;
2579 #ifdef MAC
2580 	SOCK_LOCK(so);
2581 	error = mac_check_socket_send(td->td_ucred, so);
2582 	SOCK_UNLOCK(so);
2583 	if (error)
2584 		goto sctp_bad;
2585 #endif
2586 
2587 
2588 	auio.uio_iov =  iov;
2589 	auio.uio_iovcnt = uap->iovlen;
2590 	auio.uio_segflg = UIO_USERSPACE;
2591 	auio.uio_rw = UIO_WRITE;
2592 	auio.uio_td = td;
2593 	auio.uio_offset = 0;			/* XXX */
2594 	auio.uio_resid = 0;
2595 	tiov = iov;
2596 	for (i = 0; i <uap->iovlen; i++, tiov++) {
2597 		if ((auio.uio_resid += tiov->iov_len) < 0) {
2598 			error = EINVAL;
2599 			goto sctp_bad;
2600 		}
2601 	}
2602 	len = auio.uio_resid;
2603 	error = sctp_lower_sosend(so,
2604 				 to,
2605 				 &auio,
2606 				 (struct mbuf *)NULL,
2607 				 (struct mbuf *)NULL,
2608 				 uap->flags,
2609 				 use_rcvinfo,
2610 				 u_sinfo,
2611 				 td );
2612 
2613 	if (error) {
2614 		if (auio.uio_resid != len && (error == ERESTART ||
2615 		    error == EINTR || error == EWOULDBLOCK))
2616 			error = 0;
2617 		/* Generation of SIGPIPE can be controlled per socket */
2618 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2619 		    !(uap->flags & MSG_NOSIGNAL)) {
2620 			PROC_LOCK(td->td_proc);
2621 			psignal(td->td_proc, SIGPIPE);
2622 			PROC_UNLOCK(td->td_proc);
2623 		}
2624 	}
2625 	if (error == 0)
2626 		td->td_retval[0] = len - auio.uio_resid;
2627 #ifdef KTRACE
2628 	if (ktruio != NULL) {
2629 		ktruio->uio_resid = td->td_retval[0];
2630 		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2631 	}
2632 #endif
2633  sctp_bad:
2634 	free(iov, M_IOV);
2635  sctp_bad1:
2636 	fdrop(fp, td);
2637  sctp_bad2:
2638 	if (to)
2639 		FREE(to, M_SONAME);
2640 
2641 	return (error);
2642 #else
2643 	return (EOPNOTSUPP);
2644 #endif
2645 }
2646 
2647 int sctp_generic_recvmsg(td, uap)
2648 	struct thread *td;
2649 	register struct sctp_generic_recvmsg_args /* {
2650 					     int sd,
2651 					     struct iovec *iov,
2652 					     int iovlen,
2653 					     struct sockaddr *from,
2654 					     __socklen_t *fromlenaddr,
2655 					     struct sctp_sndrcvinfo *sinfo,
2656 					     int *msg_flags
2657 					     } */ *uap;
2658 {
2659 #ifdef SCTP
2660 	u_int8_t sockbufstore[256];
2661 	struct uio auio;
2662 	struct iovec *iov, *tiov;
2663 	struct sctp_sndrcvinfo sinfo;
2664 	struct socket *so;
2665 	struct file *fp;
2666 	struct sockaddr *fromsa;
2667 	int fromlen;
2668 	int len, i, msg_flags=0;
2669 	int error=0;
2670 #ifdef KTRACE
2671 	struct uio *ktruio = NULL;
2672 #endif
2673 	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2674 	if (error) {
2675 		return (error);
2676 	}
2677 	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2678 	if (error) {
2679 		goto out1;
2680 	}
2681 	so = fp->f_data;
2682 #ifdef MAC
2683 	SOCK_LOCK(so);
2684 	error = mac_check_socket_receive(td->td_ucred, so);
2685 	SOCK_UNLOCK(so);
2686 	if (error) {
2687 		goto out;
2688 		return (error);
2689 	}
2690 #endif
2691 	if (uap->fromlenaddr) {
2692 		error = copyin(uap->fromlenaddr,
2693 		    &fromlen, sizeof (fromlen));
2694 		if (error) {
2695 			goto out;
2696 		}
2697 	} else {
2698 		fromlen = 0;
2699 	}
2700 
2701 
2702 	auio.uio_iov = iov;
2703 	auio.uio_iovcnt = uap->iovlen;
2704   	auio.uio_segflg = UIO_USERSPACE;
2705 	auio.uio_rw = UIO_READ;
2706 	auio.uio_td = td;
2707 	auio.uio_offset = 0;			/* XXX */
2708 	auio.uio_resid = 0;
2709 	tiov = iov;
2710 	for (i = 0; i <uap->iovlen; i++, tiov++) {
2711 		if ((auio.uio_resid += tiov->iov_len) < 0) {
2712 			error = EINVAL;
2713 			goto out;
2714 		}
2715 	}
2716 	len = auio.uio_resid;
2717 	fromsa = (struct sockaddr *)sockbufstore;
2718 #ifdef KTRACE
2719 	if (KTRPOINT(td, KTR_GENIO))
2720 		ktruio = cloneuio(&auio);
2721 #endif
2722 	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2723 			       fromsa, fromlen, &msg_flags, (struct sctp_sndrcvinfo *)&sinfo,
2724 			       1);
2725 	if (error) {
2726 		if (auio.uio_resid != (int)len && (error == ERESTART ||
2727 		    error == EINTR || error == EWOULDBLOCK))
2728 			error = 0;
2729 	} else {
2730 		if(uap->sinfo)
2731 			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2732 	}
2733 #ifdef KTRACE
2734 	if (ktruio != NULL) {
2735 		ktruio->uio_resid = (int)len - auio.uio_resid;
2736 		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2737 	}
2738 #endif
2739 	if (error)
2740 		goto out;
2741 	td->td_retval[0] = (int)len - auio.uio_resid;
2742 	if (fromlen && uap->from) {
2743 		len = fromlen;
2744 		if (len <= 0 || fromsa == 0)
2745 			len = 0;
2746 		else {
2747 			len = MIN(len, fromsa->sa_len);
2748 			error = copyout(fromsa, uap->from, (unsigned)len);
2749 			if (error)
2750 				goto out;
2751 		}
2752 		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2753 		if(error) {
2754 			goto out;
2755 		}
2756 	}
2757 	if (uap->msg_flags) {
2758 		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2759 		if(error) {
2760 			goto out;
2761 		}
2762 	}
2763 out:
2764 	free(iov, M_IOV);
2765 out1:
2766 	fdrop(fp, td);
2767 	return (error);
2768 #else
2769 	return (EOPNOTSUPP);
2770 #endif
2771 
2772 }
2773