xref: /freebsd/sys/kern/uipc_syscalls.c (revision b28624fde638caadd4a89f50c9b7e7da0f98c4d2)
1 /*-
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_sctp.h"
39 #include "opt_compat.h"
40 #include "opt_ktrace.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <security/mac/mac_framework.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_pageout.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_extern.h>
79 
80 #ifdef SCTP
81 #include <netinet/sctp.h>
82 #include <netinet/sctp_peeloff.h>
83 #endif /* SCTP */
84 
85 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87 
88 static int accept1(struct thread *td, struct accept_args *uap, int compat);
89 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90 static int getsockname1(struct thread *td, struct getsockname_args *uap,
91 			int compat);
92 static int getpeername1(struct thread *td, struct getpeername_args *uap,
93 			int compat);
94 
95 /*
96  * NSFBUFS-related variables and associated sysctls
97  */
98 int nsfbufs;
99 int nsfbufspeak;
100 int nsfbufsused;
101 
102 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103     "Maximum number of sendfile(2) sf_bufs available");
104 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105     "Number of sendfile(2) sf_bufs at peak usage");
106 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107     "Number of sendfile(2) sf_bufs in use");
108 
109 /*
110  * Convert a user file descriptor to a kernel file entry.  A reference on the
111  * file entry is held upon returning.  This is lighter weight than
112  * fgetsock(), which bumps the socket reference drops the file reference
113  * count instead, as this approach avoids several additional mutex operations
114  * associated with the additional reference count.  If requested, return the
115  * open file flags.
116  */
117 static int
118 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119 {
120 	struct file *fp;
121 	int error;
122 
123 	fp = NULL;
124 	if (fdp == NULL)
125 		error = EBADF;
126 	else {
127 		FILEDESC_SLOCK(fdp);
128 		fp = fget_locked(fdp, fd);
129 		if (fp == NULL)
130 			error = EBADF;
131 		else if (fp->f_type != DTYPE_SOCKET) {
132 			fp = NULL;
133 			error = ENOTSOCK;
134 		} else {
135 			fhold(fp);
136 			if (fflagp != NULL)
137 				*fflagp = fp->f_flag;
138 			error = 0;
139 		}
140 		FILEDESC_SUNLOCK(fdp);
141 	}
142 	*fpp = fp;
143 	return (error);
144 }
145 
146 /*
147  * System call interface to the socket abstraction.
148  */
149 #if defined(COMPAT_43)
150 #define COMPAT_OLDSOCK
151 #endif
152 
153 int
154 socket(td, uap)
155 	struct thread *td;
156 	struct socket_args /* {
157 		int	domain;
158 		int	type;
159 		int	protocol;
160 	} */ *uap;
161 {
162 	struct filedesc *fdp;
163 	struct socket *so;
164 	struct file *fp;
165 	int fd, error;
166 
167 #ifdef MAC
168 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
169 	    uap->protocol);
170 	if (error)
171 		return (error);
172 #endif
173 	fdp = td->td_proc->p_fd;
174 	error = falloc(td, &fp, &fd);
175 	if (error)
176 		return (error);
177 	/* An extra reference on `fp' has been held for us by falloc(). */
178 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
179 	    td->td_ucred, td);
180 	if (error) {
181 		fdclose(fdp, fp, fd, td);
182 	} else {
183 		FILE_LOCK(fp);
184 		fp->f_data = so;	/* already has ref count */
185 		fp->f_flag = FREAD|FWRITE;
186 		fp->f_type = DTYPE_SOCKET;
187 		fp->f_ops = &socketops;
188 		FILE_UNLOCK(fp);
189 		td->td_retval[0] = fd;
190 	}
191 	fdrop(fp, td);
192 	return (error);
193 }
194 
195 /* ARGSUSED */
196 int
197 bind(td, uap)
198 	struct thread *td;
199 	struct bind_args /* {
200 		int	s;
201 		caddr_t	name;
202 		int	namelen;
203 	} */ *uap;
204 {
205 	struct sockaddr *sa;
206 	int error;
207 
208 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
209 		return (error);
210 
211 	error = kern_bind(td, uap->s, sa);
212 	free(sa, M_SONAME);
213 	return (error);
214 }
215 
216 int
217 kern_bind(td, fd, sa)
218 	struct thread *td;
219 	int fd;
220 	struct sockaddr *sa;
221 {
222 	struct socket *so;
223 	struct file *fp;
224 	int error;
225 
226 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
227 	if (error)
228 		return (error);
229 	so = fp->f_data;
230 #ifdef MAC
231 	SOCK_LOCK(so);
232 	error = mac_check_socket_bind(td->td_ucred, so, sa);
233 	SOCK_UNLOCK(so);
234 	if (error)
235 		goto done;
236 #endif
237 	error = sobind(so, sa, td);
238 #ifdef MAC
239 done:
240 #endif
241 	fdrop(fp, td);
242 	return (error);
243 }
244 
245 /* ARGSUSED */
246 int
247 listen(td, uap)
248 	struct thread *td;
249 	struct listen_args /* {
250 		int	s;
251 		int	backlog;
252 	} */ *uap;
253 {
254 	struct socket *so;
255 	struct file *fp;
256 	int error;
257 
258 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
259 	if (error == 0) {
260 		so = fp->f_data;
261 #ifdef MAC
262 		SOCK_LOCK(so);
263 		error = mac_check_socket_listen(td->td_ucred, so);
264 		SOCK_UNLOCK(so);
265 		if (error)
266 			goto done;
267 #endif
268 		error = solisten(so, uap->backlog, td);
269 #ifdef MAC
270 done:
271 #endif
272 		fdrop(fp, td);
273 	}
274 	return(error);
275 }
276 
277 /*
278  * accept1()
279  */
280 static int
281 accept1(td, uap, compat)
282 	struct thread *td;
283 	struct accept_args /* {
284 		int	s;
285 		struct sockaddr	* __restrict name;
286 		socklen_t	* __restrict anamelen;
287 	} */ *uap;
288 	int compat;
289 {
290 	struct sockaddr *name;
291 	socklen_t namelen;
292 	struct file *fp;
293 	int error;
294 
295 	if (uap->name == NULL)
296 		return (kern_accept(td, uap->s, NULL, NULL, NULL));
297 
298 	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
299 	if (error)
300 		return (error);
301 
302 	error = kern_accept(td, uap->s, &name, &namelen, &fp);
303 
304 	/*
305 	 * return a namelen of zero for older code which might
306 	 * ignore the return value from accept.
307 	 */
308 	if (error) {
309 		(void) copyout(&namelen,
310 		    uap->anamelen, sizeof(*uap->anamelen));
311 		return (error);
312 	}
313 
314 	if (error == 0 && name != NULL) {
315 #ifdef COMPAT_OLDSOCK
316 		if (compat)
317 			((struct osockaddr *)name)->sa_family =
318 			    name->sa_family;
319 #endif
320 		error = copyout(name, uap->name, namelen);
321 	}
322 	if (error == 0)
323 		error = copyout(&namelen, uap->anamelen,
324 		    sizeof(namelen));
325 	if (error)
326 		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
327 	fdrop(fp, td);
328 	free(name, M_SONAME);
329 	return (error);
330 }
331 
332 int
333 kern_accept(struct thread *td, int s, struct sockaddr **name,
334     socklen_t *namelen, struct file **fp)
335 {
336 	struct filedesc *fdp;
337 	struct file *headfp, *nfp = NULL;
338 	struct sockaddr *sa = NULL;
339 	int error;
340 	struct socket *head, *so;
341 	int fd;
342 	u_int fflag;
343 	pid_t pgid;
344 	int tmp;
345 
346 	if (name) {
347 		*name = NULL;
348 		if (*namelen < 0)
349 			return (EINVAL);
350 	}
351 
352 	fdp = td->td_proc->p_fd;
353 	error = getsock(fdp, s, &headfp, &fflag);
354 	if (error)
355 		return (error);
356 	head = headfp->f_data;
357 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
358 		error = EINVAL;
359 		goto done;
360 	}
361 #ifdef MAC
362 	SOCK_LOCK(head);
363 	error = mac_check_socket_accept(td->td_ucred, head);
364 	SOCK_UNLOCK(head);
365 	if (error != 0)
366 		goto done;
367 #endif
368 	error = falloc(td, &nfp, &fd);
369 	if (error)
370 		goto done;
371 	ACCEPT_LOCK();
372 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
373 		ACCEPT_UNLOCK();
374 		error = EWOULDBLOCK;
375 		goto noconnection;
376 	}
377 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
378 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
379 			head->so_error = ECONNABORTED;
380 			break;
381 		}
382 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
383 		    "accept", 0);
384 		if (error) {
385 			ACCEPT_UNLOCK();
386 			goto noconnection;
387 		}
388 	}
389 	if (head->so_error) {
390 		error = head->so_error;
391 		head->so_error = 0;
392 		ACCEPT_UNLOCK();
393 		goto noconnection;
394 	}
395 	so = TAILQ_FIRST(&head->so_comp);
396 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
397 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
398 
399 	/*
400 	 * Before changing the flags on the socket, we have to bump the
401 	 * reference count.  Otherwise, if the protocol calls sofree(),
402 	 * the socket will be released due to a zero refcount.
403 	 */
404 	SOCK_LOCK(so);			/* soref() and so_state update */
405 	soref(so);			/* file descriptor reference */
406 
407 	TAILQ_REMOVE(&head->so_comp, so, so_list);
408 	head->so_qlen--;
409 	so->so_state |= (head->so_state & SS_NBIO);
410 	so->so_qstate &= ~SQ_COMP;
411 	so->so_head = NULL;
412 
413 	SOCK_UNLOCK(so);
414 	ACCEPT_UNLOCK();
415 
416 	/* An extra reference on `nfp' has been held for us by falloc(). */
417 	td->td_retval[0] = fd;
418 
419 	/* connection has been removed from the listen queue */
420 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
421 
422 	pgid = fgetown(&head->so_sigio);
423 	if (pgid != 0)
424 		fsetown(pgid, &so->so_sigio);
425 
426 	FILE_LOCK(nfp);
427 	nfp->f_data = so;	/* nfp has ref count from falloc */
428 	nfp->f_flag = fflag;
429 	nfp->f_type = DTYPE_SOCKET;
430 	nfp->f_ops = &socketops;
431 	FILE_UNLOCK(nfp);
432 	/* Sync socket nonblocking/async state with file flags */
433 	tmp = fflag & FNONBLOCK;
434 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
435 	tmp = fflag & FASYNC;
436 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
437 	sa = 0;
438 	error = soaccept(so, &sa);
439 	if (error) {
440 		/*
441 		 * return a namelen of zero for older code which might
442 		 * ignore the return value from accept.
443 		 */
444 		if (name)
445 			*namelen = 0;
446 		goto noconnection;
447 	}
448 	if (sa == NULL) {
449 		if (name)
450 			*namelen = 0;
451 		goto done;
452 	}
453 	if (name) {
454 		/* check sa_len before it is destroyed */
455 		if (*namelen > sa->sa_len)
456 			*namelen = sa->sa_len;
457 		*name = sa;
458 		sa = NULL;
459 	}
460 noconnection:
461 	if (sa)
462 		FREE(sa, M_SONAME);
463 
464 	/*
465 	 * close the new descriptor, assuming someone hasn't ripped it
466 	 * out from under us.
467 	 */
468 	if (error)
469 		fdclose(fdp, nfp, fd, td);
470 
471 	/*
472 	 * Release explicitly held references before returning.  We return
473 	 * a reference on nfp to the caller on success if they request it.
474 	 */
475 done:
476 	if (fp != NULL) {
477 		if (error == 0) {
478 			*fp = nfp;
479 			nfp = NULL;
480 		} else
481 			*fp = NULL;
482 	}
483 	if (nfp != NULL)
484 		fdrop(nfp, td);
485 	fdrop(headfp, td);
486 	return (error);
487 }
488 
489 int
490 accept(td, uap)
491 	struct thread *td;
492 	struct accept_args *uap;
493 {
494 
495 	return (accept1(td, uap, 0));
496 }
497 
498 #ifdef COMPAT_OLDSOCK
499 int
500 oaccept(td, uap)
501 	struct thread *td;
502 	struct accept_args *uap;
503 {
504 
505 	return (accept1(td, uap, 1));
506 }
507 #endif /* COMPAT_OLDSOCK */
508 
509 /* ARGSUSED */
510 int
511 connect(td, uap)
512 	struct thread *td;
513 	struct connect_args /* {
514 		int	s;
515 		caddr_t	name;
516 		int	namelen;
517 	} */ *uap;
518 {
519 	struct sockaddr *sa;
520 	int error;
521 
522 	error = getsockaddr(&sa, uap->name, uap->namelen);
523 	if (error)
524 		return (error);
525 
526 	error = kern_connect(td, uap->s, sa);
527 	free(sa, M_SONAME);
528 	return (error);
529 }
530 
531 
532 int
533 kern_connect(td, fd, sa)
534 	struct thread *td;
535 	int fd;
536 	struct sockaddr *sa;
537 {
538 	struct socket *so;
539 	struct file *fp;
540 	int error;
541 	int interrupted = 0;
542 
543 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
544 	if (error)
545 		return (error);
546 	so = fp->f_data;
547 	if (so->so_state & SS_ISCONNECTING) {
548 		error = EALREADY;
549 		goto done1;
550 	}
551 #ifdef MAC
552 	SOCK_LOCK(so);
553 	error = mac_check_socket_connect(td->td_ucred, so, sa);
554 	SOCK_UNLOCK(so);
555 	if (error)
556 		goto bad;
557 #endif
558 	error = soconnect(so, sa, td);
559 	if (error)
560 		goto bad;
561 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
562 		error = EINPROGRESS;
563 		goto done1;
564 	}
565 	SOCK_LOCK(so);
566 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
567 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
568 		    "connec", 0);
569 		if (error) {
570 			if (error == EINTR || error == ERESTART)
571 				interrupted = 1;
572 			break;
573 		}
574 	}
575 	if (error == 0) {
576 		error = so->so_error;
577 		so->so_error = 0;
578 	}
579 	SOCK_UNLOCK(so);
580 bad:
581 	if (!interrupted)
582 		so->so_state &= ~SS_ISCONNECTING;
583 	if (error == ERESTART)
584 		error = EINTR;
585 done1:
586 	fdrop(fp, td);
587 	return (error);
588 }
589 
590 int
591 socketpair(td, uap)
592 	struct thread *td;
593 	struct socketpair_args /* {
594 		int	domain;
595 		int	type;
596 		int	protocol;
597 		int	*rsv;
598 	} */ *uap;
599 {
600 	struct filedesc *fdp = td->td_proc->p_fd;
601 	struct file *fp1, *fp2;
602 	struct socket *so1, *so2;
603 	int fd, error, sv[2];
604 
605 #ifdef MAC
606 	/* We might want to have a separate check for socket pairs. */
607 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
608 	    uap->protocol);
609 	if (error)
610 		return (error);
611 #endif
612 
613 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
614 	    td->td_ucred, td);
615 	if (error)
616 		return (error);
617 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
618 	    td->td_ucred, td);
619 	if (error)
620 		goto free1;
621 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
622 	error = falloc(td, &fp1, &fd);
623 	if (error)
624 		goto free2;
625 	sv[0] = fd;
626 	fp1->f_data = so1;	/* so1 already has ref count */
627 	error = falloc(td, &fp2, &fd);
628 	if (error)
629 		goto free3;
630 	fp2->f_data = so2;	/* so2 already has ref count */
631 	sv[1] = fd;
632 	error = soconnect2(so1, so2);
633 	if (error)
634 		goto free4;
635 	if (uap->type == SOCK_DGRAM) {
636 		/*
637 		 * Datagram socket connection is asymmetric.
638 		 */
639 		 error = soconnect2(so2, so1);
640 		 if (error)
641 			goto free4;
642 	}
643 	FILE_LOCK(fp1);
644 	fp1->f_flag = FREAD|FWRITE;
645 	fp1->f_type = DTYPE_SOCKET;
646 	fp1->f_ops = &socketops;
647 	FILE_UNLOCK(fp1);
648 	FILE_LOCK(fp2);
649 	fp2->f_flag = FREAD|FWRITE;
650 	fp2->f_type = DTYPE_SOCKET;
651 	fp2->f_ops = &socketops;
652 	FILE_UNLOCK(fp2);
653 	so1 = so2 = NULL;
654 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
655 	if (error)
656 		goto free4;
657 	fdrop(fp1, td);
658 	fdrop(fp2, td);
659 	return (0);
660 free4:
661 	fdclose(fdp, fp2, sv[1], td);
662 	fdrop(fp2, td);
663 free3:
664 	fdclose(fdp, fp1, sv[0], td);
665 	fdrop(fp1, td);
666 free2:
667 	if (so2 != NULL)
668 		(void)soclose(so2);
669 free1:
670 	if (so1 != NULL)
671 		(void)soclose(so1);
672 	return (error);
673 }
674 
675 static int
676 sendit(td, s, mp, flags)
677 	struct thread *td;
678 	int s;
679 	struct msghdr *mp;
680 	int flags;
681 {
682 	struct mbuf *control;
683 	struct sockaddr *to;
684 	int error;
685 
686 	if (mp->msg_name != NULL) {
687 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
688 		if (error) {
689 			to = NULL;
690 			goto bad;
691 		}
692 		mp->msg_name = to;
693 	} else {
694 		to = NULL;
695 	}
696 
697 	if (mp->msg_control) {
698 		if (mp->msg_controllen < sizeof(struct cmsghdr)
699 #ifdef COMPAT_OLDSOCK
700 		    && mp->msg_flags != MSG_COMPAT
701 #endif
702 		) {
703 			error = EINVAL;
704 			goto bad;
705 		}
706 		error = sockargs(&control, mp->msg_control,
707 		    mp->msg_controllen, MT_CONTROL);
708 		if (error)
709 			goto bad;
710 #ifdef COMPAT_OLDSOCK
711 		if (mp->msg_flags == MSG_COMPAT) {
712 			struct cmsghdr *cm;
713 
714 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
715 			if (control == 0) {
716 				error = ENOBUFS;
717 				goto bad;
718 			} else {
719 				cm = mtod(control, struct cmsghdr *);
720 				cm->cmsg_len = control->m_len;
721 				cm->cmsg_level = SOL_SOCKET;
722 				cm->cmsg_type = SCM_RIGHTS;
723 			}
724 		}
725 #endif
726 	} else {
727 		control = NULL;
728 	}
729 
730 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
731 
732 bad:
733 	if (to)
734 		FREE(to, M_SONAME);
735 	return (error);
736 }
737 
738 int
739 kern_sendit(td, s, mp, flags, control, segflg)
740 	struct thread *td;
741 	int s;
742 	struct msghdr *mp;
743 	int flags;
744 	struct mbuf *control;
745 	enum uio_seg segflg;
746 {
747 	struct file *fp;
748 	struct uio auio;
749 	struct iovec *iov;
750 	struct socket *so;
751 	int i;
752 	int len, error;
753 #ifdef KTRACE
754 	struct uio *ktruio = NULL;
755 #endif
756 
757 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
758 	if (error)
759 		return (error);
760 	so = (struct socket *)fp->f_data;
761 
762 #ifdef MAC
763 	SOCK_LOCK(so);
764 	error = mac_check_socket_send(td->td_ucred, so);
765 	SOCK_UNLOCK(so);
766 	if (error)
767 		goto bad;
768 #endif
769 
770 	auio.uio_iov = mp->msg_iov;
771 	auio.uio_iovcnt = mp->msg_iovlen;
772 	auio.uio_segflg = segflg;
773 	auio.uio_rw = UIO_WRITE;
774 	auio.uio_td = td;
775 	auio.uio_offset = 0;			/* XXX */
776 	auio.uio_resid = 0;
777 	iov = mp->msg_iov;
778 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
779 		if ((auio.uio_resid += iov->iov_len) < 0) {
780 			error = EINVAL;
781 			goto bad;
782 		}
783 	}
784 #ifdef KTRACE
785 	if (KTRPOINT(td, KTR_GENIO))
786 		ktruio = cloneuio(&auio);
787 #endif
788 	len = auio.uio_resid;
789 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
790 	if (error) {
791 		if (auio.uio_resid != len && (error == ERESTART ||
792 		    error == EINTR || error == EWOULDBLOCK))
793 			error = 0;
794 		/* Generation of SIGPIPE can be controlled per socket */
795 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
796 		    !(flags & MSG_NOSIGNAL)) {
797 			PROC_LOCK(td->td_proc);
798 			psignal(td->td_proc, SIGPIPE);
799 			PROC_UNLOCK(td->td_proc);
800 		}
801 	}
802 	if (error == 0)
803 		td->td_retval[0] = len - auio.uio_resid;
804 #ifdef KTRACE
805 	if (ktruio != NULL) {
806 		ktruio->uio_resid = td->td_retval[0];
807 		ktrgenio(s, UIO_WRITE, ktruio, error);
808 	}
809 #endif
810 bad:
811 	fdrop(fp, td);
812 	return (error);
813 }
814 
815 int
816 sendto(td, uap)
817 	struct thread *td;
818 	struct sendto_args /* {
819 		int	s;
820 		caddr_t	buf;
821 		size_t	len;
822 		int	flags;
823 		caddr_t	to;
824 		int	tolen;
825 	} */ *uap;
826 {
827 	struct msghdr msg;
828 	struct iovec aiov;
829 	int error;
830 
831 	msg.msg_name = uap->to;
832 	msg.msg_namelen = uap->tolen;
833 	msg.msg_iov = &aiov;
834 	msg.msg_iovlen = 1;
835 	msg.msg_control = 0;
836 #ifdef COMPAT_OLDSOCK
837 	msg.msg_flags = 0;
838 #endif
839 	aiov.iov_base = uap->buf;
840 	aiov.iov_len = uap->len;
841 	error = sendit(td, uap->s, &msg, uap->flags);
842 	return (error);
843 }
844 
845 #ifdef COMPAT_OLDSOCK
846 int
847 osend(td, uap)
848 	struct thread *td;
849 	struct osend_args /* {
850 		int	s;
851 		caddr_t	buf;
852 		int	len;
853 		int	flags;
854 	} */ *uap;
855 {
856 	struct msghdr msg;
857 	struct iovec aiov;
858 	int error;
859 
860 	msg.msg_name = 0;
861 	msg.msg_namelen = 0;
862 	msg.msg_iov = &aiov;
863 	msg.msg_iovlen = 1;
864 	aiov.iov_base = uap->buf;
865 	aiov.iov_len = uap->len;
866 	msg.msg_control = 0;
867 	msg.msg_flags = 0;
868 	error = sendit(td, uap->s, &msg, uap->flags);
869 	return (error);
870 }
871 
872 int
873 osendmsg(td, uap)
874 	struct thread *td;
875 	struct osendmsg_args /* {
876 		int	s;
877 		caddr_t	msg;
878 		int	flags;
879 	} */ *uap;
880 {
881 	struct msghdr msg;
882 	struct iovec *iov;
883 	int error;
884 
885 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
886 	if (error)
887 		return (error);
888 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
889 	if (error)
890 		return (error);
891 	msg.msg_iov = iov;
892 	msg.msg_flags = MSG_COMPAT;
893 	error = sendit(td, uap->s, &msg, uap->flags);
894 	free(iov, M_IOV);
895 	return (error);
896 }
897 #endif
898 
899 int
900 sendmsg(td, uap)
901 	struct thread *td;
902 	struct sendmsg_args /* {
903 		int	s;
904 		caddr_t	msg;
905 		int	flags;
906 	} */ *uap;
907 {
908 	struct msghdr msg;
909 	struct iovec *iov;
910 	int error;
911 
912 	error = copyin(uap->msg, &msg, sizeof (msg));
913 	if (error)
914 		return (error);
915 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
916 	if (error)
917 		return (error);
918 	msg.msg_iov = iov;
919 #ifdef COMPAT_OLDSOCK
920 	msg.msg_flags = 0;
921 #endif
922 	error = sendit(td, uap->s, &msg, uap->flags);
923 	free(iov, M_IOV);
924 	return (error);
925 }
926 
927 int
928 kern_recvit(td, s, mp, fromseg, controlp)
929 	struct thread *td;
930 	int s;
931 	struct msghdr *mp;
932 	enum uio_seg fromseg;
933 	struct mbuf **controlp;
934 {
935 	struct uio auio;
936 	struct iovec *iov;
937 	int i;
938 	socklen_t len;
939 	int error;
940 	struct mbuf *m, *control = 0;
941 	caddr_t ctlbuf;
942 	struct file *fp;
943 	struct socket *so;
944 	struct sockaddr *fromsa = 0;
945 #ifdef KTRACE
946 	struct uio *ktruio = NULL;
947 #endif
948 
949 	if(controlp != NULL)
950 		*controlp = 0;
951 
952 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
953 	if (error)
954 		return (error);
955 	so = fp->f_data;
956 
957 #ifdef MAC
958 	SOCK_LOCK(so);
959 	error = mac_check_socket_receive(td->td_ucred, so);
960 	SOCK_UNLOCK(so);
961 	if (error) {
962 		fdrop(fp, td);
963 		return (error);
964 	}
965 #endif
966 
967 	auio.uio_iov = mp->msg_iov;
968 	auio.uio_iovcnt = mp->msg_iovlen;
969 	auio.uio_segflg = UIO_USERSPACE;
970 	auio.uio_rw = UIO_READ;
971 	auio.uio_td = td;
972 	auio.uio_offset = 0;			/* XXX */
973 	auio.uio_resid = 0;
974 	iov = mp->msg_iov;
975 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
976 		if ((auio.uio_resid += iov->iov_len) < 0) {
977 			fdrop(fp, td);
978 			return (EINVAL);
979 		}
980 	}
981 #ifdef KTRACE
982 	if (KTRPOINT(td, KTR_GENIO))
983 		ktruio = cloneuio(&auio);
984 #endif
985 	len = auio.uio_resid;
986 	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
987 	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
988 	    &mp->msg_flags);
989 	if (error) {
990 		if (auio.uio_resid != (int)len && (error == ERESTART ||
991 		    error == EINTR || error == EWOULDBLOCK))
992 			error = 0;
993 	}
994 #ifdef KTRACE
995 	if (ktruio != NULL) {
996 		ktruio->uio_resid = (int)len - auio.uio_resid;
997 		ktrgenio(s, UIO_READ, ktruio, error);
998 	}
999 #endif
1000 	if (error)
1001 		goto out;
1002 	td->td_retval[0] = (int)len - auio.uio_resid;
1003 	if (mp->msg_name) {
1004 		len = mp->msg_namelen;
1005 		if (len <= 0 || fromsa == 0)
1006 			len = 0;
1007 		else {
1008 			/* save sa_len before it is destroyed by MSG_COMPAT */
1009 			len = MIN(len, fromsa->sa_len);
1010 #ifdef COMPAT_OLDSOCK
1011 			if (mp->msg_flags & MSG_COMPAT)
1012 				((struct osockaddr *)fromsa)->sa_family =
1013 				    fromsa->sa_family;
1014 #endif
1015 			if (fromseg == UIO_USERSPACE) {
1016 				error = copyout(fromsa, mp->msg_name,
1017 				    (unsigned)len);
1018 				if (error)
1019 					goto out;
1020 			} else
1021 				bcopy(fromsa, mp->msg_name, len);
1022 		}
1023 		mp->msg_namelen = len;
1024 	}
1025 	if (mp->msg_control && controlp == NULL) {
1026 #ifdef COMPAT_OLDSOCK
1027 		/*
1028 		 * We assume that old recvmsg calls won't receive access
1029 		 * rights and other control info, esp. as control info
1030 		 * is always optional and those options didn't exist in 4.3.
1031 		 * If we receive rights, trim the cmsghdr; anything else
1032 		 * is tossed.
1033 		 */
1034 		if (control && mp->msg_flags & MSG_COMPAT) {
1035 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1036 			    SOL_SOCKET ||
1037 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1038 			    SCM_RIGHTS) {
1039 				mp->msg_controllen = 0;
1040 				goto out;
1041 			}
1042 			control->m_len -= sizeof (struct cmsghdr);
1043 			control->m_data += sizeof (struct cmsghdr);
1044 		}
1045 #endif
1046 		len = mp->msg_controllen;
1047 		m = control;
1048 		mp->msg_controllen = 0;
1049 		ctlbuf = mp->msg_control;
1050 
1051 		while (m && len > 0) {
1052 			unsigned int tocopy;
1053 
1054 			if (len >= m->m_len)
1055 				tocopy = m->m_len;
1056 			else {
1057 				mp->msg_flags |= MSG_CTRUNC;
1058 				tocopy = len;
1059 			}
1060 
1061 			if ((error = copyout(mtod(m, caddr_t),
1062 					ctlbuf, tocopy)) != 0)
1063 				goto out;
1064 
1065 			ctlbuf += tocopy;
1066 			len -= tocopy;
1067 			m = m->m_next;
1068 		}
1069 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1070 	}
1071 out:
1072 	fdrop(fp, td);
1073 	if (fromsa)
1074 		FREE(fromsa, M_SONAME);
1075 
1076 	if (error == 0 && controlp != NULL)
1077 		*controlp = control;
1078 	else  if (control)
1079 		m_freem(control);
1080 
1081 	return (error);
1082 }
1083 
1084 static int
1085 recvit(td, s, mp, namelenp)
1086 	struct thread *td;
1087 	int s;
1088 	struct msghdr *mp;
1089 	void *namelenp;
1090 {
1091 	int error;
1092 
1093 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1094 	if (error)
1095 		return (error);
1096 	if (namelenp) {
1097 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1098 #ifdef COMPAT_OLDSOCK
1099 		if (mp->msg_flags & MSG_COMPAT)
1100 			error = 0;	/* old recvfrom didn't check */
1101 #endif
1102 	}
1103 	return (error);
1104 }
1105 
1106 int
1107 recvfrom(td, uap)
1108 	struct thread *td;
1109 	struct recvfrom_args /* {
1110 		int	s;
1111 		caddr_t	buf;
1112 		size_t	len;
1113 		int	flags;
1114 		struct sockaddr * __restrict	from;
1115 		socklen_t * __restrict fromlenaddr;
1116 	} */ *uap;
1117 {
1118 	struct msghdr msg;
1119 	struct iovec aiov;
1120 	int error;
1121 
1122 	if (uap->fromlenaddr) {
1123 		error = copyin(uap->fromlenaddr,
1124 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1125 		if (error)
1126 			goto done2;
1127 	} else {
1128 		msg.msg_namelen = 0;
1129 	}
1130 	msg.msg_name = uap->from;
1131 	msg.msg_iov = &aiov;
1132 	msg.msg_iovlen = 1;
1133 	aiov.iov_base = uap->buf;
1134 	aiov.iov_len = uap->len;
1135 	msg.msg_control = 0;
1136 	msg.msg_flags = uap->flags;
1137 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1138 done2:
1139 	return(error);
1140 }
1141 
1142 #ifdef COMPAT_OLDSOCK
1143 int
1144 orecvfrom(td, uap)
1145 	struct thread *td;
1146 	struct recvfrom_args *uap;
1147 {
1148 
1149 	uap->flags |= MSG_COMPAT;
1150 	return (recvfrom(td, uap));
1151 }
1152 #endif
1153 
1154 #ifdef COMPAT_OLDSOCK
1155 int
1156 orecv(td, uap)
1157 	struct thread *td;
1158 	struct orecv_args /* {
1159 		int	s;
1160 		caddr_t	buf;
1161 		int	len;
1162 		int	flags;
1163 	} */ *uap;
1164 {
1165 	struct msghdr msg;
1166 	struct iovec aiov;
1167 	int error;
1168 
1169 	msg.msg_name = 0;
1170 	msg.msg_namelen = 0;
1171 	msg.msg_iov = &aiov;
1172 	msg.msg_iovlen = 1;
1173 	aiov.iov_base = uap->buf;
1174 	aiov.iov_len = uap->len;
1175 	msg.msg_control = 0;
1176 	msg.msg_flags = uap->flags;
1177 	error = recvit(td, uap->s, &msg, NULL);
1178 	return (error);
1179 }
1180 
1181 /*
1182  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1183  * overlays the new one, missing only the flags, and with the (old) access
1184  * rights where the control fields are now.
1185  */
1186 int
1187 orecvmsg(td, uap)
1188 	struct thread *td;
1189 	struct orecvmsg_args /* {
1190 		int	s;
1191 		struct	omsghdr *msg;
1192 		int	flags;
1193 	} */ *uap;
1194 {
1195 	struct msghdr msg;
1196 	struct iovec *iov;
1197 	int error;
1198 
1199 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1200 	if (error)
1201 		return (error);
1202 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1203 	if (error)
1204 		return (error);
1205 	msg.msg_flags = uap->flags | MSG_COMPAT;
1206 	msg.msg_iov = iov;
1207 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1208 	if (msg.msg_controllen && error == 0)
1209 		error = copyout(&msg.msg_controllen,
1210 		    &uap->msg->msg_accrightslen, sizeof (int));
1211 	free(iov, M_IOV);
1212 	return (error);
1213 }
1214 #endif
1215 
1216 int
1217 recvmsg(td, uap)
1218 	struct thread *td;
1219 	struct recvmsg_args /* {
1220 		int	s;
1221 		struct	msghdr *msg;
1222 		int	flags;
1223 	} */ *uap;
1224 {
1225 	struct msghdr msg;
1226 	struct iovec *uiov, *iov;
1227 	int error;
1228 
1229 	error = copyin(uap->msg, &msg, sizeof (msg));
1230 	if (error)
1231 		return (error);
1232 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1233 	if (error)
1234 		return (error);
1235 	msg.msg_flags = uap->flags;
1236 #ifdef COMPAT_OLDSOCK
1237 	msg.msg_flags &= ~MSG_COMPAT;
1238 #endif
1239 	uiov = msg.msg_iov;
1240 	msg.msg_iov = iov;
1241 	error = recvit(td, uap->s, &msg, NULL);
1242 	if (error == 0) {
1243 		msg.msg_iov = uiov;
1244 		error = copyout(&msg, uap->msg, sizeof(msg));
1245 	}
1246 	free(iov, M_IOV);
1247 	return (error);
1248 }
1249 
1250 /* ARGSUSED */
1251 int
1252 shutdown(td, uap)
1253 	struct thread *td;
1254 	struct shutdown_args /* {
1255 		int	s;
1256 		int	how;
1257 	} */ *uap;
1258 {
1259 	struct socket *so;
1260 	struct file *fp;
1261 	int error;
1262 
1263 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1264 	if (error == 0) {
1265 		so = fp->f_data;
1266 		error = soshutdown(so, uap->how);
1267 		fdrop(fp, td);
1268 	}
1269 	return (error);
1270 }
1271 
1272 /* ARGSUSED */
1273 int
1274 setsockopt(td, uap)
1275 	struct thread *td;
1276 	struct setsockopt_args /* {
1277 		int	s;
1278 		int	level;
1279 		int	name;
1280 		caddr_t	val;
1281 		int	valsize;
1282 	} */ *uap;
1283 {
1284 
1285 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1286 	    uap->val, UIO_USERSPACE, uap->valsize));
1287 }
1288 
1289 int
1290 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1291 	struct thread *td;
1292 	int s;
1293 	int level;
1294 	int name;
1295 	void *val;
1296 	enum uio_seg valseg;
1297 	socklen_t valsize;
1298 {
1299 	int error;
1300 	struct socket *so;
1301 	struct file *fp;
1302 	struct sockopt sopt;
1303 
1304 	if (val == NULL && valsize != 0)
1305 		return (EFAULT);
1306 	if ((int)valsize < 0)
1307 		return (EINVAL);
1308 
1309 	sopt.sopt_dir = SOPT_SET;
1310 	sopt.sopt_level = level;
1311 	sopt.sopt_name = name;
1312 	sopt.sopt_val = val;
1313 	sopt.sopt_valsize = valsize;
1314 	switch (valseg) {
1315 	case UIO_USERSPACE:
1316 		sopt.sopt_td = td;
1317 		break;
1318 	case UIO_SYSSPACE:
1319 		sopt.sopt_td = NULL;
1320 		break;
1321 	default:
1322 		panic("kern_setsockopt called with bad valseg");
1323 	}
1324 
1325 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1326 	if (error == 0) {
1327 		so = fp->f_data;
1328 		error = sosetopt(so, &sopt);
1329 		fdrop(fp, td);
1330 	}
1331 	return(error);
1332 }
1333 
1334 /* ARGSUSED */
1335 int
1336 getsockopt(td, uap)
1337 	struct thread *td;
1338 	struct getsockopt_args /* {
1339 		int	s;
1340 		int	level;
1341 		int	name;
1342 		void * __restrict	val;
1343 		socklen_t * __restrict avalsize;
1344 	} */ *uap;
1345 {
1346 	socklen_t valsize;
1347 	int	error;
1348 
1349 	if (uap->val) {
1350 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1351 		if (error)
1352 			return (error);
1353 	}
1354 
1355 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1356 	    uap->val, UIO_USERSPACE, &valsize);
1357 
1358 	if (error == 0)
1359 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1360 	return (error);
1361 }
1362 
1363 /*
1364  * Kernel version of getsockopt.
1365  * optval can be a userland or userspace. optlen is always a kernel pointer.
1366  */
1367 int
1368 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1369 	struct thread *td;
1370 	int s;
1371 	int level;
1372 	int name;
1373 	void *val;
1374 	enum uio_seg valseg;
1375 	socklen_t *valsize;
1376 {
1377 	int error;
1378 	struct  socket *so;
1379 	struct file *fp;
1380 	struct	sockopt sopt;
1381 
1382 	if (val == NULL)
1383 		*valsize = 0;
1384 	if ((int)*valsize < 0)
1385 		return (EINVAL);
1386 
1387 	sopt.sopt_dir = SOPT_GET;
1388 	sopt.sopt_level = level;
1389 	sopt.sopt_name = name;
1390 	sopt.sopt_val = val;
1391 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1392 	switch (valseg) {
1393 	case UIO_USERSPACE:
1394 		sopt.sopt_td = td;
1395 		break;
1396 	case UIO_SYSSPACE:
1397 		sopt.sopt_td = NULL;
1398 		break;
1399 	default:
1400 		panic("kern_getsockopt called with bad valseg");
1401 	}
1402 
1403 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1404 	if (error == 0) {
1405 		so = fp->f_data;
1406 		error = sogetopt(so, &sopt);
1407 		*valsize = sopt.sopt_valsize;
1408 		fdrop(fp, td);
1409 	}
1410 	return (error);
1411 }
1412 
1413 /*
1414  * getsockname1() - Get socket name.
1415  */
1416 /* ARGSUSED */
1417 static int
1418 getsockname1(td, uap, compat)
1419 	struct thread *td;
1420 	struct getsockname_args /* {
1421 		int	fdes;
1422 		struct sockaddr * __restrict asa;
1423 		socklen_t * __restrict alen;
1424 	} */ *uap;
1425 	int compat;
1426 {
1427 	struct sockaddr *sa;
1428 	socklen_t len;
1429 	int error;
1430 
1431 	error = copyin(uap->alen, &len, sizeof(len));
1432 	if (error)
1433 		return (error);
1434 
1435 	error = kern_getsockname(td, uap->fdes, &sa, &len);
1436 	if (error)
1437 		return (error);
1438 
1439 	if (len != 0) {
1440 #ifdef COMPAT_OLDSOCK
1441 		if (compat)
1442 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1443 #endif
1444 		error = copyout(sa, uap->asa, (u_int)len);
1445 	}
1446 	free(sa, M_SONAME);
1447 	if (error == 0)
1448 		error = copyout(&len, uap->alen, sizeof(len));
1449 	return (error);
1450 }
1451 
1452 int
1453 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1454     socklen_t *alen)
1455 {
1456 	struct socket *so;
1457 	struct file *fp;
1458 	socklen_t len;
1459 	int error;
1460 
1461 	if (*alen < 0)
1462 		return (EINVAL);
1463 
1464 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1465 	if (error)
1466 		return (error);
1467 	so = fp->f_data;
1468 	*sa = NULL;
1469 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1470 	if (error)
1471 		goto bad;
1472 	if (*sa == NULL)
1473 		len = 0;
1474 	else
1475 		len = MIN(*alen, (*sa)->sa_len);
1476 	*alen = len;
1477 bad:
1478 	fdrop(fp, td);
1479 	if (error && *sa) {
1480 		free(*sa, M_SONAME);
1481 		*sa = NULL;
1482 	}
1483 	return (error);
1484 }
1485 
1486 int
1487 getsockname(td, uap)
1488 	struct thread *td;
1489 	struct getsockname_args *uap;
1490 {
1491 
1492 	return (getsockname1(td, uap, 0));
1493 }
1494 
1495 #ifdef COMPAT_OLDSOCK
1496 int
1497 ogetsockname(td, uap)
1498 	struct thread *td;
1499 	struct getsockname_args *uap;
1500 {
1501 
1502 	return (getsockname1(td, uap, 1));
1503 }
1504 #endif /* COMPAT_OLDSOCK */
1505 
1506 /*
1507  * getpeername1() - Get name of peer for connected socket.
1508  */
1509 /* ARGSUSED */
1510 static int
1511 getpeername1(td, uap, compat)
1512 	struct thread *td;
1513 	struct getpeername_args /* {
1514 		int	fdes;
1515 		struct sockaddr * __restrict	asa;
1516 		socklen_t * __restrict	alen;
1517 	} */ *uap;
1518 	int compat;
1519 {
1520 	struct sockaddr *sa;
1521 	socklen_t len;
1522 	int error;
1523 
1524 	error = copyin(uap->alen, &len, sizeof (len));
1525 	if (error)
1526 		return (error);
1527 
1528 	error = kern_getpeername(td, uap->fdes, &sa, &len);
1529 	if (error)
1530 		return (error);
1531 
1532 	if (len != 0) {
1533 #ifdef COMPAT_OLDSOCK
1534 		if (compat)
1535 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1536 #endif
1537 		error = copyout(sa, uap->asa, (u_int)len);
1538 	}
1539 	free(sa, M_SONAME);
1540 	if (error == 0)
1541 		error = copyout(&len, uap->alen, sizeof(len));
1542 	return (error);
1543 }
1544 
1545 int
1546 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1547     socklen_t *alen)
1548 {
1549 	struct socket *so;
1550 	struct file *fp;
1551 	socklen_t len;
1552 	int error;
1553 
1554 	if (*alen < 0)
1555 		return (EINVAL);
1556 
1557 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1558 	if (error)
1559 		return (error);
1560 	so = fp->f_data;
1561 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1562 		error = ENOTCONN;
1563 		goto done;
1564 	}
1565 	*sa = NULL;
1566 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1567 	if (error)
1568 		goto bad;
1569 	if (*sa == NULL)
1570 		len = 0;
1571 	else
1572 		len = MIN(*alen, (*sa)->sa_len);
1573 	*alen = len;
1574 bad:
1575 	if (error && *sa) {
1576 		free(*sa, M_SONAME);
1577 		*sa = NULL;
1578 	}
1579 done:
1580 	fdrop(fp, td);
1581 	return (error);
1582 }
1583 
1584 int
1585 getpeername(td, uap)
1586 	struct thread *td;
1587 	struct getpeername_args *uap;
1588 {
1589 
1590 	return (getpeername1(td, uap, 0));
1591 }
1592 
1593 #ifdef COMPAT_OLDSOCK
1594 int
1595 ogetpeername(td, uap)
1596 	struct thread *td;
1597 	struct ogetpeername_args *uap;
1598 {
1599 
1600 	/* XXX uap should have type `getpeername_args *' to begin with. */
1601 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1602 }
1603 #endif /* COMPAT_OLDSOCK */
1604 
1605 int
1606 sockargs(mp, buf, buflen, type)
1607 	struct mbuf **mp;
1608 	caddr_t buf;
1609 	int buflen, type;
1610 {
1611 	struct sockaddr *sa;
1612 	struct mbuf *m;
1613 	int error;
1614 
1615 	if ((u_int)buflen > MLEN) {
1616 #ifdef COMPAT_OLDSOCK
1617 		if (type == MT_SONAME && (u_int)buflen <= 112)
1618 			buflen = MLEN;		/* unix domain compat. hack */
1619 		else
1620 #endif
1621 			if ((u_int)buflen > MCLBYTES)
1622 				return (EINVAL);
1623 	}
1624 	m = m_get(M_TRYWAIT, type);
1625 	if (m == NULL)
1626 		return (ENOBUFS);
1627 	if ((u_int)buflen > MLEN) {
1628 		MCLGET(m, M_TRYWAIT);
1629 		if ((m->m_flags & M_EXT) == 0) {
1630 			m_free(m);
1631 			return (ENOBUFS);
1632 		}
1633 	}
1634 	m->m_len = buflen;
1635 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1636 	if (error)
1637 		(void) m_free(m);
1638 	else {
1639 		*mp = m;
1640 		if (type == MT_SONAME) {
1641 			sa = mtod(m, struct sockaddr *);
1642 
1643 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1644 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1645 				sa->sa_family = sa->sa_len;
1646 #endif
1647 			sa->sa_len = buflen;
1648 		}
1649 	}
1650 	return (error);
1651 }
1652 
1653 int
1654 getsockaddr(namp, uaddr, len)
1655 	struct sockaddr **namp;
1656 	caddr_t uaddr;
1657 	size_t len;
1658 {
1659 	struct sockaddr *sa;
1660 	int error;
1661 
1662 	if (len > SOCK_MAXADDRLEN)
1663 		return (ENAMETOOLONG);
1664 	if (len < offsetof(struct sockaddr, sa_data[0]))
1665 		return (EINVAL);
1666 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1667 	error = copyin(uaddr, sa, len);
1668 	if (error) {
1669 		FREE(sa, M_SONAME);
1670 	} else {
1671 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1672 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1673 			sa->sa_family = sa->sa_len;
1674 #endif
1675 		sa->sa_len = len;
1676 		*namp = sa;
1677 	}
1678 	return (error);
1679 }
1680 
1681 /*
1682  * Detach mapped page and release resources back to the system.
1683  */
1684 void
1685 sf_buf_mext(void *addr, void *args)
1686 {
1687 	vm_page_t m;
1688 
1689 	m = sf_buf_page(args);
1690 	sf_buf_free(args);
1691 	vm_page_lock_queues();
1692 	vm_page_unwire(m, 0);
1693 	/*
1694 	 * Check for the object going away on us. This can
1695 	 * happen since we don't hold a reference to it.
1696 	 * If so, we're responsible for freeing the page.
1697 	 */
1698 	if (m->wire_count == 0 && m->object == NULL)
1699 		vm_page_free(m);
1700 	vm_page_unlock_queues();
1701 }
1702 
1703 /*
1704  * sendfile(2)
1705  *
1706  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1707  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1708  *
1709  * Send a file specified by 'fd' and starting at 'offset' to a socket
1710  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1711  * 0.  Optionally add a header and/or trailer to the socket output.  If
1712  * specified, write the total number of bytes sent into *sbytes.
1713  */
1714 int
1715 sendfile(struct thread *td, struct sendfile_args *uap)
1716 {
1717 
1718 	return (do_sendfile(td, uap, 0));
1719 }
1720 
1721 static int
1722 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1723 {
1724 	struct sf_hdtr hdtr;
1725 	struct uio *hdr_uio, *trl_uio;
1726 	int error;
1727 
1728 	hdr_uio = trl_uio = NULL;
1729 
1730 	if (uap->hdtr != NULL) {
1731 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1732 		if (error)
1733 			goto out;
1734 		if (hdtr.headers != NULL) {
1735 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1736 			if (error)
1737 				goto out;
1738 		}
1739 		if (hdtr.trailers != NULL) {
1740 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1741 			if (error)
1742 				goto out;
1743 
1744 		}
1745 	}
1746 
1747 	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1748 out:
1749 	if (hdr_uio)
1750 		free(hdr_uio, M_IOV);
1751 	if (trl_uio)
1752 		free(trl_uio, M_IOV);
1753 	return (error);
1754 }
1755 
1756 #ifdef COMPAT_FREEBSD4
1757 int
1758 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1759 {
1760 	struct sendfile_args args;
1761 
1762 	args.fd = uap->fd;
1763 	args.s = uap->s;
1764 	args.offset = uap->offset;
1765 	args.nbytes = uap->nbytes;
1766 	args.hdtr = uap->hdtr;
1767 	args.sbytes = uap->sbytes;
1768 	args.flags = uap->flags;
1769 
1770 	return (do_sendfile(td, &args, 1));
1771 }
1772 #endif /* COMPAT_FREEBSD4 */
1773 
1774 int
1775 kern_sendfile(struct thread *td, struct sendfile_args *uap,
1776     struct uio *hdr_uio, struct uio *trl_uio, int compat)
1777 {
1778 	struct file *sock_fp;
1779 	struct vnode *vp;
1780 	struct vm_object *obj = NULL;
1781 	struct socket *so = NULL;
1782 	struct mbuf *m = NULL;
1783 	struct sf_buf *sf;
1784 	struct vm_page *pg;
1785 	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1786 	int error, hdrlen = 0, mnw = 0;
1787 	int vfslocked;
1788 
1789 	/*
1790 	 * The file descriptor must be a regular file and have a
1791 	 * backing VM object.
1792 	 * File offset must be positive.  If it goes beyond EOF
1793 	 * we send only the header/trailer and no payload data.
1794 	 */
1795 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1796 		goto out;
1797 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1798 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1799 	obj = vp->v_object;
1800 	if (obj != NULL) {
1801 		/*
1802 		 * Temporarily increase the backing VM object's reference
1803 		 * count so that a forced reclamation of its vnode does not
1804 		 * immediately destroy it.
1805 		 */
1806 		VM_OBJECT_LOCK(obj);
1807 		if ((obj->flags & OBJ_DEAD) == 0) {
1808 			vm_object_reference_locked(obj);
1809 			VM_OBJECT_UNLOCK(obj);
1810 		} else {
1811 			VM_OBJECT_UNLOCK(obj);
1812 			obj = NULL;
1813 		}
1814 	}
1815 	VOP_UNLOCK(vp, 0, td);
1816 	VFS_UNLOCK_GIANT(vfslocked);
1817 	if (obj == NULL) {
1818 		error = EINVAL;
1819 		goto out;
1820 	}
1821 	if (uap->offset < 0) {
1822 		error = EINVAL;
1823 		goto out;
1824 	}
1825 
1826 	/*
1827 	 * The socket must be a stream socket and connected.
1828 	 * Remember if it a blocking or non-blocking socket.
1829 	 */
1830 	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1831 	    NULL)) != 0)
1832 		goto out;
1833 	so = sock_fp->f_data;
1834 	if (so->so_type != SOCK_STREAM) {
1835 		error = EINVAL;
1836 		goto out;
1837 	}
1838 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1839 		error = ENOTCONN;
1840 		goto out;
1841 	}
1842 	/*
1843 	 * Do not wait on memory allocations but return ENOMEM for
1844 	 * caller to retry later.
1845 	 * XXX: Experimental.
1846 	 */
1847 	if (uap->flags & SF_MNOWAIT)
1848 		mnw = 1;
1849 
1850 #ifdef MAC
1851 	SOCK_LOCK(so);
1852 	error = mac_check_socket_send(td->td_ucred, so);
1853 	SOCK_UNLOCK(so);
1854 	if (error)
1855 		goto out;
1856 #endif
1857 
1858 	/* If headers are specified copy them into mbufs. */
1859 	if (hdr_uio != NULL) {
1860 		hdr_uio->uio_td = td;
1861 		hdr_uio->uio_rw = UIO_WRITE;
1862 		if (hdr_uio->uio_resid > 0) {
1863 			/*
1864 			 * In FBSD < 5.0 the nbytes to send also included
1865 			 * the header.  If compat is specified subtract the
1866 			 * header size from nbytes.
1867 			 */
1868 			if (compat) {
1869 				if (uap->nbytes > hdr_uio->uio_resid)
1870 					uap->nbytes -= hdr_uio->uio_resid;
1871 				else
1872 					uap->nbytes = 0;
1873 			}
1874 			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1875 			    0, 0, 0);
1876 			if (m == NULL) {
1877 				error = mnw ? EAGAIN : ENOBUFS;
1878 				goto out;
1879 			}
1880 			hdrlen = m_length(m, NULL);
1881 		}
1882 	}
1883 
1884 	/* Protect against multiple writers to the socket. */
1885 	(void) sblock(&so->so_snd, M_WAITOK);
1886 
1887 	/*
1888 	 * Loop through the pages of the file, starting with the requested
1889 	 * offset. Get a file page (do I/O if necessary), map the file page
1890 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1891 	 * it on the socket.
1892 	 * This is done in two loops.  The inner loop turns as many pages
1893 	 * as it can, up to available socket buffer space, without blocking
1894 	 * into mbufs to have it bulk delivered into the socket send buffer.
1895 	 * The outer loop checks the state and available space of the socket
1896 	 * and takes care of the overall progress.
1897 	 */
1898 	for (off = uap->offset, rem = uap->nbytes; ; ) {
1899 		int loopbytes = 0;
1900 		int space = 0;
1901 		int done = 0;
1902 
1903 		/*
1904 		 * Check the socket state for ongoing connection,
1905 		 * no errors and space in socket buffer.
1906 		 * If space is low allow for the remainder of the
1907 		 * file to be processed if it fits the socket buffer.
1908 		 * Otherwise block in waiting for sufficient space
1909 		 * to proceed, or if the socket is nonblocking, return
1910 		 * to userland with EAGAIN while reporting how far
1911 		 * we've come.
1912 		 * We wait until the socket buffer has significant free
1913 		 * space to do bulk sends.  This makes good use of file
1914 		 * system read ahead and allows packet segmentation
1915 		 * offloading hardware to take over lots of work.  If
1916 		 * we were not careful here we would send off only one
1917 		 * sfbuf at a time.
1918 		 */
1919 		SOCKBUF_LOCK(&so->so_snd);
1920 		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1921 			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1922 retry_space:
1923 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1924 			error = EPIPE;
1925 			SOCKBUF_UNLOCK(&so->so_snd);
1926 			goto done;
1927 		} else if (so->so_error) {
1928 			error = so->so_error;
1929 			so->so_error = 0;
1930 			SOCKBUF_UNLOCK(&so->so_snd);
1931 			goto done;
1932 		}
1933 		space = sbspace(&so->so_snd);
1934 		if (space < rem &&
1935 		    (space <= 0 ||
1936 		     space < so->so_snd.sb_lowat)) {
1937 			if (so->so_state & SS_NBIO) {
1938 				SOCKBUF_UNLOCK(&so->so_snd);
1939 				error = EAGAIN;
1940 				goto done;
1941 			}
1942 			/*
1943 			 * sbwait drops the lock while sleeping.
1944 			 * When we loop back to retry_space the
1945 			 * state may have changed and we retest
1946 			 * for it.
1947 			 */
1948 			error = sbwait(&so->so_snd);
1949 			/*
1950 			 * An error from sbwait usually indicates that we've
1951 			 * been interrupted by a signal. If we've sent anything
1952 			 * then return bytes sent, otherwise return the error.
1953 			 */
1954 			if (error) {
1955 				SOCKBUF_UNLOCK(&so->so_snd);
1956 				goto done;
1957 			}
1958 			goto retry_space;
1959 		}
1960 		SOCKBUF_UNLOCK(&so->so_snd);
1961 
1962 		/*
1963 		 * Reduce space in the socket buffer by the size of
1964 		 * the header mbuf chain.
1965 		 * hdrlen is set to 0 after the first loop.
1966 		 */
1967 		space -= hdrlen;
1968 
1969 		/*
1970 		 * Loop and construct maximum sized mbuf chain to be bulk
1971 		 * dumped into socket buffer.
1972 		 */
1973 		while(space > loopbytes) {
1974 			vm_pindex_t pindex;
1975 			vm_offset_t pgoff;
1976 			struct mbuf *m0;
1977 
1978 			VM_OBJECT_LOCK(obj);
1979 			/*
1980 			 * Calculate the amount to transfer.
1981 			 * Not to exceed a page, the EOF,
1982 			 * or the passed in nbytes.
1983 			 */
1984 			pgoff = (vm_offset_t)(off & PAGE_MASK);
1985 			xfsize = omin(PAGE_SIZE - pgoff,
1986 			    obj->un_pager.vnp.vnp_size - uap->offset -
1987 			    fsbytes - loopbytes);
1988 			if (uap->nbytes)
1989 				rem = (uap->nbytes - fsbytes - loopbytes);
1990 			else
1991 				rem = obj->un_pager.vnp.vnp_size -
1992 				    uap->offset - fsbytes - loopbytes;
1993 			xfsize = omin(rem, xfsize);
1994 			if (xfsize <= 0) {
1995 				VM_OBJECT_UNLOCK(obj);
1996 				done = 1;		/* all data sent */
1997 				break;
1998 			}
1999 			/*
2000 			 * Don't overflow the send buffer.
2001 			 * Stop here and send out what we've
2002 			 * already got.
2003 			 */
2004 			if (space < loopbytes + xfsize) {
2005 				VM_OBJECT_UNLOCK(obj);
2006 				break;
2007 			}
2008 
2009 			/*
2010 			 * Attempt to look up the page.  Allocate
2011 			 * if not found or wait and loop if busy.
2012 			 */
2013 			pindex = OFF_TO_IDX(off);
2014 			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2015 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2016 
2017 			/*
2018 			 * Check if page is valid for what we need,
2019 			 * otherwise initiate I/O.
2020 			 * If we already turned some pages into mbufs,
2021 			 * send them off before we come here again and
2022 			 * block.
2023 			 */
2024 			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2025 				VM_OBJECT_UNLOCK(obj);
2026 			else if (m != NULL)
2027 				error = EAGAIN;	/* send what we already got */
2028 			else if (uap->flags & SF_NODISKIO)
2029 				error = EBUSY;
2030 			else {
2031 				int bsize, resid;
2032 
2033 				/*
2034 				 * Ensure that our page is still around
2035 				 * when the I/O completes.
2036 				 */
2037 				vm_page_io_start(pg);
2038 				VM_OBJECT_UNLOCK(obj);
2039 
2040 				/*
2041 				 * Get the page from backing store.
2042 				 */
2043 				bsize = vp->v_mount->mnt_stat.f_iosize;
2044 				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2045 				vn_lock(vp, LK_SHARED | LK_RETRY, td);
2046 
2047 				/*
2048 				 * XXXMAC: Because we don't have fp->f_cred
2049 				 * here, we pass in NOCRED.  This is probably
2050 				 * wrong, but is consistent with our original
2051 				 * implementation.
2052 				 */
2053 				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2054 				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2055 				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2056 				    td->td_ucred, NOCRED, &resid, td);
2057 				VOP_UNLOCK(vp, 0, td);
2058 				VFS_UNLOCK_GIANT(vfslocked);
2059 				VM_OBJECT_LOCK(obj);
2060 				vm_page_io_finish(pg);
2061 				if (!error)
2062 					VM_OBJECT_UNLOCK(obj);
2063 				mbstat.sf_iocnt++;
2064 			}
2065 			if (error) {
2066 				vm_page_lock_queues();
2067 				vm_page_unwire(pg, 0);
2068 				/*
2069 				 * See if anyone else might know about
2070 				 * this page.  If not and it is not valid,
2071 				 * then free it.
2072 				 */
2073 				if (pg->wire_count == 0 && pg->valid == 0 &&
2074 				    pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2075 				    pg->hold_count == 0) {
2076 					vm_page_free(pg);
2077 				}
2078 				vm_page_unlock_queues();
2079 				VM_OBJECT_UNLOCK(obj);
2080 				if (error == EAGAIN)
2081 					error = 0;	/* not a real error */
2082 				break;
2083 			}
2084 
2085 			/*
2086 			 * Get a sendfile buf.  We usually wait as long
2087 			 * as necessary, but this wait can be interrupted.
2088 			 */
2089 			if ((sf = sf_buf_alloc(pg,
2090 			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2091 				mbstat.sf_allocfail++;
2092 				vm_page_lock_queues();
2093 				vm_page_unwire(pg, 0);
2094 				/*
2095 				 * XXX: Not same check as above!?
2096 				 */
2097 				if (pg->wire_count == 0 && pg->object == NULL)
2098 					vm_page_free(pg);
2099 				vm_page_unlock_queues();
2100 				error = (mnw ? EAGAIN : EINTR);
2101 				break;
2102 			}
2103 
2104 			/*
2105 			 * Get an mbuf and set it up as having
2106 			 * external storage.
2107 			 */
2108 			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2109 			if (m0 == NULL) {
2110 				error = (mnw ? EAGAIN : ENOBUFS);
2111 				sf_buf_mext((void *)sf_buf_kva(sf), sf);
2112 				break;
2113 			}
2114 			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2115 			    sf, M_RDONLY, EXT_SFBUF);
2116 			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2117 			m0->m_len = xfsize;
2118 
2119 			/* Append to mbuf chain. */
2120 			if (m != NULL)
2121 				m_cat(m, m0);
2122 			else
2123 				m = m0;
2124 
2125 			/* Keep track of bits processed. */
2126 			loopbytes += xfsize;
2127 			off += xfsize;
2128 		}
2129 
2130 		/* Add the buffer chain to the socket buffer. */
2131 		if (m != NULL) {
2132 			int mlen, err;
2133 
2134 			mlen = m_length(m, NULL);
2135 			SOCKBUF_LOCK(&so->so_snd);
2136 			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2137 				error = EPIPE;
2138 				SOCKBUF_UNLOCK(&so->so_snd);
2139 				goto done;
2140 			}
2141 			SOCKBUF_UNLOCK(&so->so_snd);
2142 			/* Avoid error aliasing. */
2143 			err = (*so->so_proto->pr_usrreqs->pru_send)
2144 				    (so, 0, m, NULL, NULL, td);
2145 			if (err == 0) {
2146 				/*
2147 				 * We need two counters to get the
2148 				 * file offset and nbytes to send
2149 				 * right:
2150 				 * - sbytes contains the total amount
2151 				 *   of bytes sent, including headers.
2152 				 * - fsbytes contains the total amount
2153 				 *   of bytes sent from the file.
2154 				 */
2155 				sbytes += mlen;
2156 				fsbytes += mlen;
2157 				if (hdrlen) {
2158 					fsbytes -= hdrlen;
2159 					hdrlen = 0;
2160 				}
2161 			} else if (error == 0)
2162 				error = err;
2163 			m = NULL;	/* pru_send always consumes */
2164 		}
2165 
2166 		/* Quit outer loop on error or when we're done. */
2167 		if (error || done)
2168 			goto done;
2169 	}
2170 
2171 	/*
2172 	 * Send trailers. Wimp out and use writev(2).
2173 	 */
2174 	if (trl_uio != NULL) {
2175 		error = kern_writev(td, uap->s, trl_uio);
2176 		if (error)
2177 			goto done;
2178 		sbytes += td->td_retval[0];
2179 	}
2180 
2181 done:
2182 	sbunlock(&so->so_snd);
2183 out:
2184 	/*
2185 	 * If there was no error we have to clear td->td_retval[0]
2186 	 * because it may have been set by writev.
2187 	 */
2188 	if (error == 0) {
2189 		td->td_retval[0] = 0;
2190 	}
2191 	if (uap->sbytes != NULL) {
2192 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2193 	}
2194 	if (obj != NULL)
2195 		vm_object_deallocate(obj);
2196 	if (vp != NULL) {
2197 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2198 		vrele(vp);
2199 		VFS_UNLOCK_GIANT(vfslocked);
2200 	}
2201 	if (so)
2202 		fdrop(sock_fp, td);
2203 	if (m)
2204 		m_freem(m);
2205 
2206 	if (error == ERESTART)
2207 		error = EINTR;
2208 
2209 	return (error);
2210 }
2211 
2212 /*
2213  * SCTP syscalls.
2214  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2215  * otherwise all return EOPNOTSUPP.
2216  * XXX: We should make this loadable one day.
2217  */
2218 int
2219 sctp_peeloff(td, uap)
2220 	struct thread *td;
2221 	struct sctp_peeloff_args /* {
2222 		int	sd;
2223 		caddr_t	name;
2224 	} */ *uap;
2225 {
2226 #ifdef SCTP
2227 	struct filedesc *fdp;
2228 	struct file *nfp = NULL;
2229 	int error;
2230 	struct socket *head, *so;
2231 	int fd;
2232 	u_int fflag;
2233 
2234 	fdp = td->td_proc->p_fd;
2235 	error = fgetsock(td, uap->sd, &head, &fflag);
2236 	if (error)
2237 		goto done2;
2238 	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2239 	if (error)
2240 		goto done2;
2241 	/*
2242 	 * At this point we know we do have a assoc to pull
2243 	 * we proceed to get the fd setup. This may block
2244 	 * but that is ok.
2245 	 */
2246 
2247 	error = falloc(td, &nfp, &fd);
2248 	if (error)
2249 		goto done;
2250 	td->td_retval[0] = fd;
2251 
2252 	so = sonewconn(head, SS_ISCONNECTED);
2253 	if (so == NULL)
2254 		goto noconnection;
2255 	/*
2256 	 * Before changing the flags on the socket, we have to bump the
2257 	 * reference count.  Otherwise, if the protocol calls sofree(),
2258 	 * the socket will be released due to a zero refcount.
2259 	 */
2260         SOCK_LOCK(so);
2261         soref(so);                      /* file descriptor reference */
2262         SOCK_UNLOCK(so);
2263 
2264 	ACCEPT_LOCK();
2265 
2266 	TAILQ_REMOVE(&head->so_comp, so, so_list);
2267 	head->so_qlen--;
2268 	so->so_state |= (head->so_state & SS_NBIO);
2269 	so->so_state &= ~SS_NOFDREF;
2270 	so->so_qstate &= ~SQ_COMP;
2271 	so->so_head = NULL;
2272 	ACCEPT_UNLOCK();
2273 	FILE_LOCK(nfp);
2274 	nfp->f_data = so;
2275 	nfp->f_flag = fflag;
2276 	nfp->f_type = DTYPE_SOCKET;
2277 	nfp->f_ops = &socketops;
2278 	FILE_UNLOCK(nfp);
2279 	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2280 	if (error)
2281 		goto noconnection;
2282 	if (head->so_sigio != NULL)
2283 		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2284 
2285 noconnection:
2286 	/*
2287 	 * close the new descriptor, assuming someone hasn't ripped it
2288 	 * out from under us.
2289 	 */
2290 	if (error)
2291 		fdclose(fdp, nfp, fd, td);
2292 
2293 	/*
2294 	 * Release explicitly held references before returning.
2295 	 */
2296 done:
2297 	if (nfp != NULL)
2298 		fdrop(nfp, td);
2299 	fputsock(head);
2300 done2:
2301 	return (error);
2302 #else  /* SCTP */
2303 	return (EOPNOTSUPP);
2304 #endif /* SCTP */
2305 }
2306 
2307 int
2308 sctp_generic_sendmsg (td, uap)
2309 	struct thread *td;
2310 	struct sctp_generic_sendmsg_args /* {
2311 		int sd,
2312 		caddr_t msg,
2313 		int mlen,
2314 		caddr_t to,
2315 		__socklen_t tolen,
2316 		struct sctp_sndrcvinfo *sinfo,
2317 		int flags
2318 	} */ *uap;
2319 {
2320 #ifdef SCTP
2321 	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2322 	struct socket *so;
2323 	struct file *fp = NULL;
2324 	int use_rcvinfo = 1;
2325 	int error = 0, len;
2326 	struct sockaddr *to = NULL;
2327 #ifdef KTRACE
2328 	struct uio *ktruio = NULL;
2329 #endif
2330 	struct uio auio;
2331 	struct iovec iov[1];
2332 
2333 	if (uap->sinfo) {
2334 		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2335 		if (error)
2336 			return (error);
2337 		u_sinfo = &sinfo;
2338 	}
2339 	if (uap->tolen) {
2340 		error = getsockaddr(&to, uap->to, uap->tolen);
2341 		if (error) {
2342 			to = NULL;
2343 			goto sctp_bad2;
2344 		}
2345 	}
2346 
2347 	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2348 	if (error)
2349 		goto sctp_bad;
2350 
2351 	iov[0].iov_base = uap->msg;
2352 	iov[0].iov_len = uap->mlen;
2353 
2354 	so = (struct socket *)fp->f_data;
2355 #ifdef MAC
2356 	SOCK_LOCK(so);
2357 	error = mac_check_socket_send(td->td_ucred, so);
2358 	SOCK_UNLOCK(so);
2359 	if (error)
2360 		goto sctp_bad;
2361 #endif /* MAC */
2362 
2363 	auio.uio_iov =  iov;
2364 	auio.uio_iovcnt = 1;
2365 	auio.uio_segflg = UIO_USERSPACE;
2366 	auio.uio_rw = UIO_WRITE;
2367 	auio.uio_td = td;
2368 	auio.uio_offset = 0;			/* XXX */
2369 	auio.uio_resid = 0;
2370 	len = auio.uio_resid = uap->mlen;
2371 	error = sctp_lower_sosend(so, to, &auio,
2372 		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2373 		    uap->flags, use_rcvinfo, u_sinfo, td);
2374 	if (error) {
2375 		if (auio.uio_resid != len && (error == ERESTART ||
2376 		    error == EINTR || error == EWOULDBLOCK))
2377 			error = 0;
2378 		/* Generation of SIGPIPE can be controlled per socket. */
2379 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2380 		    !(uap->flags & MSG_NOSIGNAL)) {
2381 			PROC_LOCK(td->td_proc);
2382 			psignal(td->td_proc, SIGPIPE);
2383 			PROC_UNLOCK(td->td_proc);
2384 		}
2385 	}
2386 	if (error == 0)
2387 		td->td_retval[0] = len - auio.uio_resid;
2388 #ifdef KTRACE
2389 	if (ktruio != NULL) {
2390 		ktruio->uio_resid = td->td_retval[0];
2391 		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2392 	}
2393 #endif /* KTRACE */
2394 sctp_bad:
2395 	if (fp)
2396 		fdrop(fp, td);
2397 sctp_bad2:
2398 	if (to)
2399 		free(to, M_SONAME);
2400 	return (error);
2401 #else  /* SCTP */
2402 	return (EOPNOTSUPP);
2403 #endif /* SCTP */
2404 }
2405 
2406 int
2407 sctp_generic_sendmsg_iov(td, uap)
2408 	struct thread *td;
2409 	struct sctp_generic_sendmsg_iov_args /* {
2410 		int sd,
2411 		struct iovec *iov,
2412 		int iovlen,
2413 		caddr_t to,
2414 		__socklen_t tolen,
2415 		struct sctp_sndrcvinfo *sinfo,
2416 		int flags
2417 	} */ *uap;
2418 {
2419 #ifdef SCTP
2420 	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2421 	struct socket *so;
2422 	struct file *fp = NULL;
2423 	int use_rcvinfo = 1;
2424 	int error=0, len, i;
2425 	struct sockaddr *to = NULL;
2426 #ifdef KTRACE
2427 	struct uio *ktruio = NULL;
2428 #endif
2429 	struct uio auio;
2430 	struct iovec *iov, *tiov;
2431 
2432 	if (uap->sinfo) {
2433 		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2434 		if (error)
2435 			return (error);
2436 		u_sinfo = &sinfo;
2437 	}
2438 	if (uap->tolen) {
2439 		error = getsockaddr(&to, uap->to, uap->tolen);
2440 		if (error) {
2441 			to = NULL;
2442 			goto sctp_bad2;
2443 		}
2444 	}
2445 
2446 	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2447 	if (error)
2448 		goto sctp_bad1;
2449 
2450 	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2451 	if (error)
2452 		goto sctp_bad1;
2453 
2454 	so = (struct socket *)fp->f_data;
2455 #ifdef MAC
2456 	SOCK_LOCK(so);
2457 	error = mac_check_socket_send(td->td_ucred, so);
2458 	SOCK_UNLOCK(so);
2459 	if (error)
2460 		goto sctp_bad;
2461 #endif /* MAC */
2462 
2463 	auio.uio_iov =  iov;
2464 	auio.uio_iovcnt = uap->iovlen;
2465 	auio.uio_segflg = UIO_USERSPACE;
2466 	auio.uio_rw = UIO_WRITE;
2467 	auio.uio_td = td;
2468 	auio.uio_offset = 0;			/* XXX */
2469 	auio.uio_resid = 0;
2470 	tiov = iov;
2471 	for (i = 0; i <uap->iovlen; i++, tiov++) {
2472 		if ((auio.uio_resid += tiov->iov_len) < 0) {
2473 			error = EINVAL;
2474 			goto sctp_bad;
2475 		}
2476 	}
2477 	len = auio.uio_resid;
2478 	error = sctp_lower_sosend(so, to, &auio,
2479 		    (struct mbuf *)NULL, (struct mbuf *)NULL,
2480 		    uap->flags, use_rcvinfo, u_sinfo, td);
2481 	if (error) {
2482 		if (auio.uio_resid != len && (error == ERESTART ||
2483 		    error == EINTR || error == EWOULDBLOCK))
2484 			error = 0;
2485 		/* Generation of SIGPIPE can be controlled per socket */
2486 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2487 		    !(uap->flags & MSG_NOSIGNAL)) {
2488 			PROC_LOCK(td->td_proc);
2489 			psignal(td->td_proc, SIGPIPE);
2490 			PROC_UNLOCK(td->td_proc);
2491 		}
2492 	}
2493 	if (error == 0)
2494 		td->td_retval[0] = len - auio.uio_resid;
2495 #ifdef KTRACE
2496 	if (ktruio != NULL) {
2497 		ktruio->uio_resid = td->td_retval[0];
2498 		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2499 	}
2500 #endif /* KTRACE */
2501 sctp_bad:
2502 	free(iov, M_IOV);
2503 sctp_bad1:
2504 	if (fp)
2505 		fdrop(fp, td);
2506 sctp_bad2:
2507 	if (to)
2508 		free(to, M_SONAME);
2509 	return (error);
2510 #else  /* SCTP */
2511 	return (EOPNOTSUPP);
2512 #endif /* SCTP */
2513 }
2514 
2515 int
2516 sctp_generic_recvmsg(td, uap)
2517 	struct thread *td;
2518 	struct sctp_generic_recvmsg_args /* {
2519 		int sd,
2520 		struct iovec *iov,
2521 		int iovlen,
2522 		struct sockaddr *from,
2523 		__socklen_t *fromlenaddr,
2524 		struct sctp_sndrcvinfo *sinfo,
2525 		int *msg_flags
2526 	} */ *uap;
2527 {
2528 #ifdef SCTP
2529 	u_int8_t sockbufstore[256];
2530 	struct uio auio;
2531 	struct iovec *iov, *tiov;
2532 	struct sctp_sndrcvinfo sinfo;
2533 	struct socket *so;
2534 	struct file *fp = NULL;
2535 	struct sockaddr *fromsa;
2536 	int fromlen;
2537 	int len, i, msg_flags;
2538 	int error = 0;
2539 #ifdef KTRACE
2540 	struct uio *ktruio = NULL;
2541 #endif
2542 	error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2543 	if (error) {
2544 		return (error);
2545 	}
2546 	error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2547 	if (error) {
2548 		goto out1;
2549 	}
2550 
2551 	so = fp->f_data;
2552 #ifdef MAC
2553 	SOCK_LOCK(so);
2554 	error = mac_check_socket_receive(td->td_ucred, so);
2555 	SOCK_UNLOCK(so);
2556 	if (error) {
2557 		goto out;
2558 		return (error);
2559 	}
2560 #endif /* MAC */
2561 
2562 	if (uap->fromlenaddr) {
2563 		error = copyin(uap->fromlenaddr,
2564 		    &fromlen, sizeof (fromlen));
2565 		if (error) {
2566 			goto out;
2567 		}
2568 	} else {
2569 		fromlen = 0;
2570 	}
2571 	if(uap->msg_flags) {
2572 		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2573 		if (error) {
2574 			goto out;
2575 		}
2576 	} else {
2577 		msg_flags = 0;
2578 	}
2579 	auio.uio_iov = iov;
2580 	auio.uio_iovcnt = uap->iovlen;
2581   	auio.uio_segflg = UIO_USERSPACE;
2582 	auio.uio_rw = UIO_READ;
2583 	auio.uio_td = td;
2584 	auio.uio_offset = 0;			/* XXX */
2585 	auio.uio_resid = 0;
2586 	tiov = iov;
2587 	for (i = 0; i <uap->iovlen; i++, tiov++) {
2588 		if ((auio.uio_resid += tiov->iov_len) < 0) {
2589 			error = EINVAL;
2590 			goto out;
2591 		}
2592 	}
2593 	len = auio.uio_resid;
2594 	fromsa = (struct sockaddr *)sockbufstore;
2595 
2596 #ifdef KTRACE
2597 	if (KTRPOINT(td, KTR_GENIO))
2598 		ktruio = cloneuio(&auio);
2599 #endif /* KTRACE */
2600 	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2601 		    fromsa, fromlen, &msg_flags,
2602 		    (struct sctp_sndrcvinfo *)&sinfo, 1);
2603 	if (error) {
2604 		if (auio.uio_resid != (int)len && (error == ERESTART ||
2605 		    error == EINTR || error == EWOULDBLOCK))
2606 			error = 0;
2607 	} else {
2608 		if (uap->sinfo)
2609 			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2610 	}
2611 #ifdef KTRACE
2612 	if (ktruio != NULL) {
2613 		ktruio->uio_resid = (int)len - auio.uio_resid;
2614 		ktrgenio(uap->sd, UIO_READ, ktruio, error);
2615 	}
2616 #endif /* KTRACE */
2617 	if (error)
2618 		goto out;
2619 	td->td_retval[0] = (int)len - auio.uio_resid;
2620 
2621 	if (fromlen && uap->from) {
2622 		len = fromlen;
2623 		if (len <= 0 || fromsa == 0)
2624 			len = 0;
2625 		else {
2626 			len = MIN(len, fromsa->sa_len);
2627 			error = copyout(fromsa, uap->from, (unsigned)len);
2628 			if (error)
2629 				goto out;
2630 		}
2631 		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2632 		if (error) {
2633 			goto out;
2634 		}
2635 	}
2636 	if (uap->msg_flags) {
2637 		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2638 		if (error) {
2639 			goto out;
2640 		}
2641 	}
2642 out:
2643 	free(iov, M_IOV);
2644 out1:
2645 	if (fp)
2646 		fdrop(fp, td);
2647 
2648 	return (error);
2649 #else  /* SCTP */
2650 	return (EOPNOTSUPP);
2651 #endif /* SCTP */
2652 }
2653