xref: /freebsd/sys/kern/uipc_syscalls.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77 
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80 
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 			int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 			int compat);
87 
88 /*
89  * NSFBUFS-related variables and associated sysctls
90  */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94 
95 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
96     "Maximum number of sendfile(2) sf_bufs available");
97 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
98     "Number of sendfile(2) sf_bufs at peak usage");
99 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
100     "Number of sendfile(2) sf_bufs in use");
101 
102 /*
103  * Convert a user file descriptor to a kernel file entry.  A reference on the
104  * file entry is held upon returning.  This is lighter weight than
105  * fgetsock(), which bumps the socket reference drops the file reference
106  * count instead, as this approach avoids several additional mutex operations
107  * associated with the additional reference count.  If requested, return the
108  * open file flags.
109  */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
112 {
113 	struct file *fp;
114 	int error;
115 
116 	fp = NULL;
117 	if (fdp == NULL)
118 		error = EBADF;
119 	else {
120 		FILEDESC_LOCK_FAST(fdp);
121 		fp = fget_locked(fdp, fd);
122 		if (fp == NULL)
123 			error = EBADF;
124 		else if (fp->f_type != DTYPE_SOCKET) {
125 			fp = NULL;
126 			error = ENOTSOCK;
127 		} else {
128 			fhold(fp);
129 			if (fflagp != NULL)
130 				*fflagp = fp->f_flag;
131 			error = 0;
132 		}
133 		FILEDESC_UNLOCK_FAST(fdp);
134 	}
135 	*fpp = fp;
136 	return (error);
137 }
138 
139 /*
140  * System call interface to the socket abstraction.
141  */
142 #if defined(COMPAT_43)
143 #define COMPAT_OLDSOCK
144 #endif
145 
146 /*
147  * MPSAFE
148  */
149 int
150 socket(td, uap)
151 	struct thread *td;
152 	register struct socket_args /* {
153 		int	domain;
154 		int	type;
155 		int	protocol;
156 	} */ *uap;
157 {
158 	struct filedesc *fdp;
159 	struct socket *so;
160 	struct file *fp;
161 	int fd, error;
162 
163 #ifdef MAC
164 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
165 	    uap->protocol);
166 	if (error)
167 		return (error);
168 #endif
169 	fdp = td->td_proc->p_fd;
170 	error = falloc(td, &fp, &fd);
171 	if (error)
172 		return (error);
173 	/* An extra reference on `fp' has been held for us by falloc(). */
174 	NET_LOCK_GIANT();
175 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
176 	    td->td_ucred, td);
177 	NET_UNLOCK_GIANT();
178 	if (error) {
179 		fdclose(fdp, fp, fd, td);
180 	} else {
181 		FILEDESC_LOCK_FAST(fdp);
182 		fp->f_data = so;	/* already has ref count */
183 		fp->f_flag = FREAD|FWRITE;
184 		fp->f_ops = &socketops;
185 		fp->f_type = DTYPE_SOCKET;
186 		FILEDESC_UNLOCK_FAST(fdp);
187 		td->td_retval[0] = fd;
188 	}
189 	fdrop(fp, td);
190 	return (error);
191 }
192 
193 /*
194  * MPSAFE
195  */
196 /* ARGSUSED */
197 int
198 bind(td, uap)
199 	struct thread *td;
200 	register struct bind_args /* {
201 		int	s;
202 		caddr_t	name;
203 		int	namelen;
204 	} */ *uap;
205 {
206 	struct sockaddr *sa;
207 	int error;
208 
209 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
210 		return (error);
211 
212 	return (kern_bind(td, uap->s, sa));
213 }
214 
215 int
216 kern_bind(td, fd, sa)
217 	struct thread *td;
218 	int fd;
219 	struct sockaddr *sa;
220 {
221 	struct socket *so;
222 	struct file *fp;
223 	int error;
224 
225 	NET_LOCK_GIANT();
226 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
227 	if (error)
228 		goto done2;
229 	so = fp->f_data;
230 #ifdef MAC
231 	SOCK_LOCK(so);
232 	error = mac_check_socket_bind(td->td_ucred, so, sa);
233 	SOCK_UNLOCK(so);
234 	if (error)
235 		goto done1;
236 #endif
237 	error = sobind(so, sa, td);
238 #ifdef MAC
239 done1:
240 #endif
241 	fdrop(fp, td);
242 done2:
243 	NET_UNLOCK_GIANT();
244 	FREE(sa, M_SONAME);
245 	return (error);
246 }
247 
248 /*
249  * MPSAFE
250  */
251 /* ARGSUSED */
252 int
253 listen(td, uap)
254 	struct thread *td;
255 	register struct listen_args /* {
256 		int	s;
257 		int	backlog;
258 	} */ *uap;
259 {
260 	struct socket *so;
261 	struct file *fp;
262 	int error;
263 
264 	NET_LOCK_GIANT();
265 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
266 	if (error == 0) {
267 		so = fp->f_data;
268 #ifdef MAC
269 		SOCK_LOCK(so);
270 		error = mac_check_socket_listen(td->td_ucred, so);
271 		SOCK_UNLOCK(so);
272 		if (error)
273 			goto done;
274 #endif
275 		error = solisten(so, uap->backlog, td);
276 #ifdef MAC
277 done:
278 #endif
279 		fdrop(fp, td);
280 	}
281 	NET_UNLOCK_GIANT();
282 	return(error);
283 }
284 
285 /*
286  * accept1()
287  * MPSAFE
288  */
289 static int
290 accept1(td, uap, compat)
291 	struct thread *td;
292 	register struct accept_args /* {
293 		int	s;
294 		struct sockaddr	* __restrict name;
295 		socklen_t	* __restrict anamelen;
296 	} */ *uap;
297 	int compat;
298 {
299 	struct sockaddr *name;
300 	socklen_t namelen;
301 	int error;
302 
303 	if (uap->name == NULL)
304 		return (kern_accept(td, uap->s, NULL, NULL));
305 
306 	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
307 	if (error)
308 		return (error);
309 
310 	error = kern_accept(td, uap->s, &name, &namelen);
311 
312 	/*
313 	 * return a namelen of zero for older code which might
314 	 * ignore the return value from accept.
315 	 */
316 	if (error) {
317 		(void) copyout(&namelen,
318 		    uap->anamelen, sizeof(*uap->anamelen));
319 		return (error);
320 	}
321 
322 	if (error == 0 && name != NULL) {
323 #ifdef COMPAT_OLDSOCK
324 		if (compat)
325 			((struct osockaddr *)name)->sa_family =
326 			    name->sa_family;
327 #endif
328 		error = copyout(name, uap->name, namelen);
329 	}
330 	if (error == 0)
331 		error = copyout(&namelen, uap->anamelen,
332 		    sizeof(namelen));
333 	if (error)
334 		kern_close(td, td->td_retval[0]);
335 	free(name, M_SONAME);
336 	return (error);
337 }
338 
339 int
340 kern_accept(struct thread *td, int s, struct sockaddr **name,
341     socklen_t *namelen)
342 {
343 	struct filedesc *fdp;
344 	struct file *headfp, *nfp = NULL;
345 	struct sockaddr *sa = NULL;
346 	int error;
347 	struct socket *head, *so;
348 	int fd;
349 	u_int fflag;
350 	pid_t pgid;
351 	int tmp;
352 
353 	if (name) {
354 		*name = NULL;
355 		if (*namelen < 0)
356 			return (EINVAL);
357 	}
358 
359 	fdp = td->td_proc->p_fd;
360 	NET_LOCK_GIANT();
361 	error = getsock(fdp, s, &headfp, &fflag);
362 	if (error)
363 		goto done2;
364 	head = headfp->f_data;
365 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
366 		error = EINVAL;
367 		goto done;
368 	}
369 #ifdef MAC
370 	SOCK_LOCK(head);
371 	error = mac_check_socket_accept(td->td_ucred, head);
372 	SOCK_UNLOCK(head);
373 	if (error != 0)
374 		goto done;
375 #endif
376 	error = falloc(td, &nfp, &fd);
377 	if (error)
378 		goto done;
379 	ACCEPT_LOCK();
380 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
381 		ACCEPT_UNLOCK();
382 		error = EWOULDBLOCK;
383 		goto noconnection;
384 	}
385 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
386 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
387 			head->so_error = ECONNABORTED;
388 			break;
389 		}
390 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
391 		    "accept", 0);
392 		if (error) {
393 			ACCEPT_UNLOCK();
394 			goto noconnection;
395 		}
396 	}
397 	if (head->so_error) {
398 		error = head->so_error;
399 		head->so_error = 0;
400 		ACCEPT_UNLOCK();
401 		goto noconnection;
402 	}
403 	so = TAILQ_FIRST(&head->so_comp);
404 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
405 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
406 
407 	/*
408 	 * Before changing the flags on the socket, we have to bump the
409 	 * reference count.  Otherwise, if the protocol calls sofree(),
410 	 * the socket will be released due to a zero refcount.
411 	 */
412 	SOCK_LOCK(so);			/* soref() and so_state update */
413 	soref(so);			/* file descriptor reference */
414 
415 	TAILQ_REMOVE(&head->so_comp, so, so_list);
416 	head->so_qlen--;
417 	so->so_state |= (head->so_state & SS_NBIO);
418 	so->so_qstate &= ~SQ_COMP;
419 	so->so_head = NULL;
420 
421 	SOCK_UNLOCK(so);
422 	ACCEPT_UNLOCK();
423 
424 	/* An extra reference on `nfp' has been held for us by falloc(). */
425 	td->td_retval[0] = fd;
426 
427 	/* connection has been removed from the listen queue */
428 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
429 
430 	pgid = fgetown(&head->so_sigio);
431 	if (pgid != 0)
432 		fsetown(pgid, &so->so_sigio);
433 
434 	FILE_LOCK(nfp);
435 	nfp->f_data = so;	/* nfp has ref count from falloc */
436 	nfp->f_flag = fflag;
437 	nfp->f_ops = &socketops;
438 	nfp->f_type = DTYPE_SOCKET;
439 	FILE_UNLOCK(nfp);
440 	/* Sync socket nonblocking/async state with file flags */
441 	tmp = fflag & FNONBLOCK;
442 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
443 	tmp = fflag & FASYNC;
444 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
445 	sa = 0;
446 	error = soaccept(so, &sa);
447 	if (error) {
448 		/*
449 		 * return a namelen of zero for older code which might
450 		 * ignore the return value from accept.
451 		 */
452 		if (name)
453 			*namelen = 0;
454 		goto noconnection;
455 	}
456 	if (sa == NULL) {
457 		if (name)
458 			*namelen = 0;
459 		goto done;
460 	}
461 	if (name) {
462 		/* check sa_len before it is destroyed */
463 		if (*namelen > sa->sa_len)
464 			*namelen = sa->sa_len;
465 		*name = sa;
466 		sa = NULL;
467 	}
468 noconnection:
469 	if (sa)
470 		FREE(sa, M_SONAME);
471 
472 	/*
473 	 * close the new descriptor, assuming someone hasn't ripped it
474 	 * out from under us.
475 	 */
476 	if (error)
477 		fdclose(fdp, nfp, fd, td);
478 
479 	/*
480 	 * Release explicitly held references before returning.
481 	 */
482 done:
483 	if (nfp != NULL)
484 		fdrop(nfp, td);
485 	fdrop(headfp, td);
486 done2:
487 	NET_UNLOCK_GIANT();
488 	return (error);
489 }
490 
491 /*
492  * MPSAFE (accept1() is MPSAFE)
493  */
494 int
495 accept(td, uap)
496 	struct thread *td;
497 	struct accept_args *uap;
498 {
499 
500 	return (accept1(td, uap, 0));
501 }
502 
503 #ifdef COMPAT_OLDSOCK
504 /*
505  * MPSAFE (accept1() is MPSAFE)
506  */
507 int
508 oaccept(td, uap)
509 	struct thread *td;
510 	struct accept_args *uap;
511 {
512 
513 	return (accept1(td, uap, 1));
514 }
515 #endif /* COMPAT_OLDSOCK */
516 
517 /*
518  * MPSAFE
519  */
520 /* ARGSUSED */
521 int
522 connect(td, uap)
523 	struct thread *td;
524 	register struct connect_args /* {
525 		int	s;
526 		caddr_t	name;
527 		int	namelen;
528 	} */ *uap;
529 {
530 	struct sockaddr *sa;
531 	int error;
532 
533 	error = getsockaddr(&sa, uap->name, uap->namelen);
534 	if (error)
535 		return (error);
536 
537 	return (kern_connect(td, uap->s, sa));
538 }
539 
540 
541 int
542 kern_connect(td, fd, sa)
543 	struct thread *td;
544 	int fd;
545 	struct sockaddr *sa;
546 {
547 	struct socket *so;
548 	struct file *fp;
549 	int error;
550 	int interrupted = 0;
551 
552 	NET_LOCK_GIANT();
553 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
554 	if (error)
555 		goto done2;
556 	so = fp->f_data;
557 	if (so->so_state & SS_ISCONNECTING) {
558 		error = EALREADY;
559 		goto done1;
560 	}
561 #ifdef MAC
562 	SOCK_LOCK(so);
563 	error = mac_check_socket_connect(td->td_ucred, so, sa);
564 	SOCK_UNLOCK(so);
565 	if (error)
566 		goto bad;
567 #endif
568 	error = soconnect(so, sa, td);
569 	if (error)
570 		goto bad;
571 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
572 		error = EINPROGRESS;
573 		goto done1;
574 	}
575 	SOCK_LOCK(so);
576 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
577 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
578 		    "connec", 0);
579 		if (error) {
580 			if (error == EINTR || error == ERESTART)
581 				interrupted = 1;
582 			break;
583 		}
584 	}
585 	if (error == 0) {
586 		error = so->so_error;
587 		so->so_error = 0;
588 	}
589 	SOCK_UNLOCK(so);
590 bad:
591 	if (!interrupted)
592 		so->so_state &= ~SS_ISCONNECTING;
593 	if (error == ERESTART)
594 		error = EINTR;
595 done1:
596 	fdrop(fp, td);
597 done2:
598 	NET_UNLOCK_GIANT();
599 	FREE(sa, M_SONAME);
600 	return (error);
601 }
602 
603 /*
604  * MPSAFE
605  */
606 int
607 socketpair(td, uap)
608 	struct thread *td;
609 	register struct socketpair_args /* {
610 		int	domain;
611 		int	type;
612 		int	protocol;
613 		int	*rsv;
614 	} */ *uap;
615 {
616 	register struct filedesc *fdp = td->td_proc->p_fd;
617 	struct file *fp1, *fp2;
618 	struct socket *so1, *so2;
619 	int fd, error, sv[2];
620 
621 #ifdef MAC
622 	/* We might want to have a separate check for socket pairs. */
623 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
624 	    uap->protocol);
625 	if (error)
626 		return (error);
627 #endif
628 
629 	NET_LOCK_GIANT();
630 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
631 	    td->td_ucred, td);
632 	if (error)
633 		goto done2;
634 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
635 	    td->td_ucred, td);
636 	if (error)
637 		goto free1;
638 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
639 	error = falloc(td, &fp1, &fd);
640 	if (error)
641 		goto free2;
642 	sv[0] = fd;
643 	fp1->f_data = so1;	/* so1 already has ref count */
644 	error = falloc(td, &fp2, &fd);
645 	if (error)
646 		goto free3;
647 	fp2->f_data = so2;	/* so2 already has ref count */
648 	sv[1] = fd;
649 	error = soconnect2(so1, so2);
650 	if (error)
651 		goto free4;
652 	if (uap->type == SOCK_DGRAM) {
653 		/*
654 		 * Datagram socket connection is asymmetric.
655 		 */
656 		 error = soconnect2(so2, so1);
657 		 if (error)
658 			goto free4;
659 	}
660 	FILE_LOCK(fp1);
661 	fp1->f_flag = FREAD|FWRITE;
662 	fp1->f_ops = &socketops;
663 	fp1->f_type = DTYPE_SOCKET;
664 	FILE_UNLOCK(fp1);
665 	FILE_LOCK(fp2);
666 	fp2->f_flag = FREAD|FWRITE;
667 	fp2->f_ops = &socketops;
668 	fp2->f_type = DTYPE_SOCKET;
669 	FILE_UNLOCK(fp2);
670 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
671 	fdrop(fp1, td);
672 	fdrop(fp2, td);
673 	goto done2;
674 free4:
675 	fdclose(fdp, fp2, sv[1], td);
676 	fdrop(fp2, td);
677 free3:
678 	fdclose(fdp, fp1, sv[0], td);
679 	fdrop(fp1, td);
680 free2:
681 	(void)soclose(so2);
682 free1:
683 	(void)soclose(so1);
684 done2:
685 	NET_UNLOCK_GIANT();
686 	return (error);
687 }
688 
689 static int
690 sendit(td, s, mp, flags)
691 	register struct thread *td;
692 	int s;
693 	register struct msghdr *mp;
694 	int flags;
695 {
696 	struct mbuf *control;
697 	struct sockaddr *to;
698 	int error;
699 
700 	if (mp->msg_name != NULL) {
701 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
702 		if (error) {
703 			to = NULL;
704 			goto bad;
705 		}
706 		mp->msg_name = to;
707 	} else {
708 		to = NULL;
709 	}
710 
711 	if (mp->msg_control) {
712 		if (mp->msg_controllen < sizeof(struct cmsghdr)
713 #ifdef COMPAT_OLDSOCK
714 		    && mp->msg_flags != MSG_COMPAT
715 #endif
716 		) {
717 			error = EINVAL;
718 			goto bad;
719 		}
720 		error = sockargs(&control, mp->msg_control,
721 		    mp->msg_controllen, MT_CONTROL);
722 		if (error)
723 			goto bad;
724 #ifdef COMPAT_OLDSOCK
725 		if (mp->msg_flags == MSG_COMPAT) {
726 			register struct cmsghdr *cm;
727 
728 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
729 			if (control == 0) {
730 				error = ENOBUFS;
731 				goto bad;
732 			} else {
733 				cm = mtod(control, struct cmsghdr *);
734 				cm->cmsg_len = control->m_len;
735 				cm->cmsg_level = SOL_SOCKET;
736 				cm->cmsg_type = SCM_RIGHTS;
737 			}
738 		}
739 #endif
740 	} else {
741 		control = NULL;
742 	}
743 
744 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
745 
746 bad:
747 	if (to)
748 		FREE(to, M_SONAME);
749 	return (error);
750 }
751 
752 int
753 kern_sendit(td, s, mp, flags, control, segflg)
754 	struct thread *td;
755 	int s;
756 	struct msghdr *mp;
757 	int flags;
758 	struct mbuf *control;
759 	enum uio_seg segflg;
760 {
761 	struct file *fp;
762 	struct uio auio;
763 	struct iovec *iov;
764 	struct socket *so;
765 	int i;
766 	int len, error;
767 #ifdef KTRACE
768 	struct uio *ktruio = NULL;
769 #endif
770 
771 	NET_LOCK_GIANT();
772 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
773 	if (error)
774 		goto bad2;
775 	so = (struct socket *)fp->f_data;
776 
777 #ifdef MAC
778 	SOCK_LOCK(so);
779 	error = mac_check_socket_send(td->td_ucred, so);
780 	SOCK_UNLOCK(so);
781 	if (error)
782 		goto bad;
783 #endif
784 
785 	auio.uio_iov = mp->msg_iov;
786 	auio.uio_iovcnt = mp->msg_iovlen;
787 	auio.uio_segflg = segflg;
788 	auio.uio_rw = UIO_WRITE;
789 	auio.uio_td = td;
790 	auio.uio_offset = 0;			/* XXX */
791 	auio.uio_resid = 0;
792 	iov = mp->msg_iov;
793 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
794 		if ((auio.uio_resid += iov->iov_len) < 0) {
795 			error = EINVAL;
796 			goto bad;
797 		}
798 	}
799 #ifdef KTRACE
800 	if (KTRPOINT(td, KTR_GENIO))
801 		ktruio = cloneuio(&auio);
802 #endif
803 	len = auio.uio_resid;
804 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
805 	    0, control, flags, td);
806 	if (error) {
807 		if (auio.uio_resid != len && (error == ERESTART ||
808 		    error == EINTR || error == EWOULDBLOCK))
809 			error = 0;
810 		/* Generation of SIGPIPE can be controlled per socket */
811 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
812 		    !(flags & MSG_NOSIGNAL)) {
813 			PROC_LOCK(td->td_proc);
814 			psignal(td->td_proc, SIGPIPE);
815 			PROC_UNLOCK(td->td_proc);
816 		}
817 	}
818 	if (error == 0)
819 		td->td_retval[0] = len - auio.uio_resid;
820 #ifdef KTRACE
821 	if (ktruio != NULL) {
822 		ktruio->uio_resid = td->td_retval[0];
823 		ktrgenio(s, UIO_WRITE, ktruio, error);
824 	}
825 #endif
826 bad:
827 	fdrop(fp, td);
828 bad2:
829 	NET_UNLOCK_GIANT();
830 	return (error);
831 }
832 
833 /*
834  * MPSAFE
835  */
836 int
837 sendto(td, uap)
838 	struct thread *td;
839 	register struct sendto_args /* {
840 		int	s;
841 		caddr_t	buf;
842 		size_t	len;
843 		int	flags;
844 		caddr_t	to;
845 		int	tolen;
846 	} */ *uap;
847 {
848 	struct msghdr msg;
849 	struct iovec aiov;
850 	int error;
851 
852 	msg.msg_name = uap->to;
853 	msg.msg_namelen = uap->tolen;
854 	msg.msg_iov = &aiov;
855 	msg.msg_iovlen = 1;
856 	msg.msg_control = 0;
857 #ifdef COMPAT_OLDSOCK
858 	msg.msg_flags = 0;
859 #endif
860 	aiov.iov_base = uap->buf;
861 	aiov.iov_len = uap->len;
862 	error = sendit(td, uap->s, &msg, uap->flags);
863 	return (error);
864 }
865 
866 #ifdef COMPAT_OLDSOCK
867 /*
868  * MPSAFE
869  */
870 int
871 osend(td, uap)
872 	struct thread *td;
873 	register struct osend_args /* {
874 		int	s;
875 		caddr_t	buf;
876 		int	len;
877 		int	flags;
878 	} */ *uap;
879 {
880 	struct msghdr msg;
881 	struct iovec aiov;
882 	int error;
883 
884 	msg.msg_name = 0;
885 	msg.msg_namelen = 0;
886 	msg.msg_iov = &aiov;
887 	msg.msg_iovlen = 1;
888 	aiov.iov_base = uap->buf;
889 	aiov.iov_len = uap->len;
890 	msg.msg_control = 0;
891 	msg.msg_flags = 0;
892 	error = sendit(td, uap->s, &msg, uap->flags);
893 	return (error);
894 }
895 
896 /*
897  * MPSAFE
898  */
899 int
900 osendmsg(td, uap)
901 	struct thread *td;
902 	struct osendmsg_args /* {
903 		int	s;
904 		caddr_t	msg;
905 		int	flags;
906 	} */ *uap;
907 {
908 	struct msghdr msg;
909 	struct iovec *iov;
910 	int error;
911 
912 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
913 	if (error)
914 		return (error);
915 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
916 	if (error)
917 		return (error);
918 	msg.msg_iov = iov;
919 	msg.msg_flags = MSG_COMPAT;
920 	error = sendit(td, uap->s, &msg, uap->flags);
921 	free(iov, M_IOV);
922 	return (error);
923 }
924 #endif
925 
926 /*
927  * MPSAFE
928  */
929 int
930 sendmsg(td, uap)
931 	struct thread *td;
932 	struct sendmsg_args /* {
933 		int	s;
934 		caddr_t	msg;
935 		int	flags;
936 	} */ *uap;
937 {
938 	struct msghdr msg;
939 	struct iovec *iov;
940 	int error;
941 
942 	error = copyin(uap->msg, &msg, sizeof (msg));
943 	if (error)
944 		return (error);
945 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
946 	if (error)
947 		return (error);
948 	msg.msg_iov = iov;
949 #ifdef COMPAT_OLDSOCK
950 	msg.msg_flags = 0;
951 #endif
952 	error = sendit(td, uap->s, &msg, uap->flags);
953 	free(iov, M_IOV);
954 	return (error);
955 }
956 
957 int
958 kern_recvit(td, s, mp, fromseg, controlp)
959 	struct thread *td;
960 	int s;
961 	struct msghdr *mp;
962 	enum uio_seg fromseg;
963 	struct mbuf **controlp;
964 {
965 	struct uio auio;
966 	struct iovec *iov;
967 	int i;
968 	socklen_t len;
969 	int error;
970 	struct mbuf *m, *control = 0;
971 	caddr_t ctlbuf;
972 	struct file *fp;
973 	struct socket *so;
974 	struct sockaddr *fromsa = 0;
975 #ifdef KTRACE
976 	struct uio *ktruio = NULL;
977 #endif
978 
979 	if(controlp != NULL)
980 		*controlp = 0;
981 
982 	NET_LOCK_GIANT();
983 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
984 	if (error) {
985 		NET_UNLOCK_GIANT();
986 		return (error);
987 	}
988 	so = fp->f_data;
989 
990 #ifdef MAC
991 	SOCK_LOCK(so);
992 	error = mac_check_socket_receive(td->td_ucred, so);
993 	SOCK_UNLOCK(so);
994 	if (error) {
995 		fdrop(fp, td);
996 		NET_UNLOCK_GIANT();
997 		return (error);
998 	}
999 #endif
1000 
1001 	auio.uio_iov = mp->msg_iov;
1002 	auio.uio_iovcnt = mp->msg_iovlen;
1003 	auio.uio_segflg = UIO_USERSPACE;
1004 	auio.uio_rw = UIO_READ;
1005 	auio.uio_td = td;
1006 	auio.uio_offset = 0;			/* XXX */
1007 	auio.uio_resid = 0;
1008 	iov = mp->msg_iov;
1009 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
1010 		if ((auio.uio_resid += iov->iov_len) < 0) {
1011 			fdrop(fp, td);
1012 			NET_UNLOCK_GIANT();
1013 			return (EINVAL);
1014 		}
1015 	}
1016 #ifdef KTRACE
1017 	if (KTRPOINT(td, KTR_GENIO))
1018 		ktruio = cloneuio(&auio);
1019 #endif
1020 	len = auio.uio_resid;
1021 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
1022 	    (struct mbuf **)0,
1023 	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
1024 	    &mp->msg_flags);
1025 	if (error) {
1026 		if (auio.uio_resid != (int)len && (error == ERESTART ||
1027 		    error == EINTR || error == EWOULDBLOCK))
1028 			error = 0;
1029 	}
1030 #ifdef KTRACE
1031 	if (ktruio != NULL) {
1032 		ktruio->uio_resid = (int)len - auio.uio_resid;
1033 		ktrgenio(s, UIO_READ, ktruio, error);
1034 	}
1035 #endif
1036 	if (error)
1037 		goto out;
1038 	td->td_retval[0] = (int)len - auio.uio_resid;
1039 	if (mp->msg_name) {
1040 		len = mp->msg_namelen;
1041 		if (len <= 0 || fromsa == 0)
1042 			len = 0;
1043 		else {
1044 			/* save sa_len before it is destroyed by MSG_COMPAT */
1045 			len = MIN(len, fromsa->sa_len);
1046 #ifdef COMPAT_OLDSOCK
1047 			if (mp->msg_flags & MSG_COMPAT)
1048 				((struct osockaddr *)fromsa)->sa_family =
1049 				    fromsa->sa_family;
1050 #endif
1051 			if (fromseg == UIO_USERSPACE) {
1052 				error = copyout(fromsa, mp->msg_name,
1053 				    (unsigned)len);
1054 				if (error)
1055 					goto out;
1056 			} else
1057 				bcopy(fromsa, mp->msg_name, len);
1058 		}
1059 		mp->msg_namelen = len;
1060 	}
1061 	if (mp->msg_control && controlp == NULL) {
1062 #ifdef COMPAT_OLDSOCK
1063 		/*
1064 		 * We assume that old recvmsg calls won't receive access
1065 		 * rights and other control info, esp. as control info
1066 		 * is always optional and those options didn't exist in 4.3.
1067 		 * If we receive rights, trim the cmsghdr; anything else
1068 		 * is tossed.
1069 		 */
1070 		if (control && mp->msg_flags & MSG_COMPAT) {
1071 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1072 			    SOL_SOCKET ||
1073 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1074 			    SCM_RIGHTS) {
1075 				mp->msg_controllen = 0;
1076 				goto out;
1077 			}
1078 			control->m_len -= sizeof (struct cmsghdr);
1079 			control->m_data += sizeof (struct cmsghdr);
1080 		}
1081 #endif
1082 		len = mp->msg_controllen;
1083 		m = control;
1084 		mp->msg_controllen = 0;
1085 		ctlbuf = mp->msg_control;
1086 
1087 		while (m && len > 0) {
1088 			unsigned int tocopy;
1089 
1090 			if (len >= m->m_len)
1091 				tocopy = m->m_len;
1092 			else {
1093 				mp->msg_flags |= MSG_CTRUNC;
1094 				tocopy = len;
1095 			}
1096 
1097 			if ((error = copyout(mtod(m, caddr_t),
1098 					ctlbuf, tocopy)) != 0)
1099 				goto out;
1100 
1101 			ctlbuf += tocopy;
1102 			len -= tocopy;
1103 			m = m->m_next;
1104 		}
1105 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1106 	}
1107 out:
1108 	fdrop(fp, td);
1109 	NET_UNLOCK_GIANT();
1110 	if (fromsa)
1111 		FREE(fromsa, M_SONAME);
1112 
1113 	if (error == 0 && controlp != NULL)
1114 		*controlp = control;
1115 	else  if (control)
1116 		m_freem(control);
1117 
1118 	return (error);
1119 }
1120 
1121 static int
1122 recvit(td, s, mp, namelenp)
1123 	struct thread *td;
1124 	int s;
1125 	struct msghdr *mp;
1126 	void *namelenp;
1127 {
1128 	int error;
1129 
1130 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1131 	if (error)
1132 		return (error);
1133 	if (namelenp) {
1134 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1135 #ifdef COMPAT_OLDSOCK
1136 		if (mp->msg_flags & MSG_COMPAT)
1137 			error = 0;	/* old recvfrom didn't check */
1138 #endif
1139 	}
1140 	return (error);
1141 }
1142 
1143 /*
1144  * MPSAFE
1145  */
1146 int
1147 recvfrom(td, uap)
1148 	struct thread *td;
1149 	register struct recvfrom_args /* {
1150 		int	s;
1151 		caddr_t	buf;
1152 		size_t	len;
1153 		int	flags;
1154 		struct sockaddr * __restrict	from;
1155 		socklen_t * __restrict fromlenaddr;
1156 	} */ *uap;
1157 {
1158 	struct msghdr msg;
1159 	struct iovec aiov;
1160 	int error;
1161 
1162 	if (uap->fromlenaddr) {
1163 		error = copyin(uap->fromlenaddr,
1164 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1165 		if (error)
1166 			goto done2;
1167 	} else {
1168 		msg.msg_namelen = 0;
1169 	}
1170 	msg.msg_name = uap->from;
1171 	msg.msg_iov = &aiov;
1172 	msg.msg_iovlen = 1;
1173 	aiov.iov_base = uap->buf;
1174 	aiov.iov_len = uap->len;
1175 	msg.msg_control = 0;
1176 	msg.msg_flags = uap->flags;
1177 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1178 done2:
1179 	return(error);
1180 }
1181 
1182 #ifdef COMPAT_OLDSOCK
1183 /*
1184  * MPSAFE
1185  */
1186 int
1187 orecvfrom(td, uap)
1188 	struct thread *td;
1189 	struct recvfrom_args *uap;
1190 {
1191 
1192 	uap->flags |= MSG_COMPAT;
1193 	return (recvfrom(td, uap));
1194 }
1195 #endif
1196 
1197 
1198 #ifdef COMPAT_OLDSOCK
1199 /*
1200  * MPSAFE
1201  */
1202 int
1203 orecv(td, uap)
1204 	struct thread *td;
1205 	register struct orecv_args /* {
1206 		int	s;
1207 		caddr_t	buf;
1208 		int	len;
1209 		int	flags;
1210 	} */ *uap;
1211 {
1212 	struct msghdr msg;
1213 	struct iovec aiov;
1214 	int error;
1215 
1216 	msg.msg_name = 0;
1217 	msg.msg_namelen = 0;
1218 	msg.msg_iov = &aiov;
1219 	msg.msg_iovlen = 1;
1220 	aiov.iov_base = uap->buf;
1221 	aiov.iov_len = uap->len;
1222 	msg.msg_control = 0;
1223 	msg.msg_flags = uap->flags;
1224 	error = recvit(td, uap->s, &msg, NULL);
1225 	return (error);
1226 }
1227 
1228 /*
1229  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1230  * overlays the new one, missing only the flags, and with the (old) access
1231  * rights where the control fields are now.
1232  *
1233  * MPSAFE
1234  */
1235 int
1236 orecvmsg(td, uap)
1237 	struct thread *td;
1238 	struct orecvmsg_args /* {
1239 		int	s;
1240 		struct	omsghdr *msg;
1241 		int	flags;
1242 	} */ *uap;
1243 {
1244 	struct msghdr msg;
1245 	struct iovec *iov;
1246 	int error;
1247 
1248 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1249 	if (error)
1250 		return (error);
1251 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1252 	if (error)
1253 		return (error);
1254 	msg.msg_flags = uap->flags | MSG_COMPAT;
1255 	msg.msg_iov = iov;
1256 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1257 	if (msg.msg_controllen && error == 0)
1258 		error = copyout(&msg.msg_controllen,
1259 		    &uap->msg->msg_accrightslen, sizeof (int));
1260 	free(iov, M_IOV);
1261 	return (error);
1262 }
1263 #endif
1264 
1265 /*
1266  * MPSAFE
1267  */
1268 int
1269 recvmsg(td, uap)
1270 	struct thread *td;
1271 	struct recvmsg_args /* {
1272 		int	s;
1273 		struct	msghdr *msg;
1274 		int	flags;
1275 	} */ *uap;
1276 {
1277 	struct msghdr msg;
1278 	struct iovec *uiov, *iov;
1279 	int error;
1280 
1281 	error = copyin(uap->msg, &msg, sizeof (msg));
1282 	if (error)
1283 		return (error);
1284 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1285 	if (error)
1286 		return (error);
1287 	msg.msg_flags = uap->flags;
1288 #ifdef COMPAT_OLDSOCK
1289 	msg.msg_flags &= ~MSG_COMPAT;
1290 #endif
1291 	uiov = msg.msg_iov;
1292 	msg.msg_iov = iov;
1293 	error = recvit(td, uap->s, &msg, NULL);
1294 	if (error == 0) {
1295 		msg.msg_iov = uiov;
1296 		error = copyout(&msg, uap->msg, sizeof(msg));
1297 	}
1298 	free(iov, M_IOV);
1299 	return (error);
1300 }
1301 
1302 /*
1303  * MPSAFE
1304  */
1305 /* ARGSUSED */
1306 int
1307 shutdown(td, uap)
1308 	struct thread *td;
1309 	register struct shutdown_args /* {
1310 		int	s;
1311 		int	how;
1312 	} */ *uap;
1313 {
1314 	struct socket *so;
1315 	struct file *fp;
1316 	int error;
1317 
1318 	NET_LOCK_GIANT();
1319 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1320 	if (error == 0) {
1321 		so = fp->f_data;
1322 		error = soshutdown(so, uap->how);
1323 		fdrop(fp, td);
1324 	}
1325 	NET_UNLOCK_GIANT();
1326 	return (error);
1327 }
1328 
1329 /*
1330  * MPSAFE
1331  */
1332 /* ARGSUSED */
1333 int
1334 setsockopt(td, uap)
1335 	struct thread *td;
1336 	register struct setsockopt_args /* {
1337 		int	s;
1338 		int	level;
1339 		int	name;
1340 		caddr_t	val;
1341 		int	valsize;
1342 	} */ *uap;
1343 {
1344 
1345 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1346 	    uap->val, UIO_USERSPACE, uap->valsize));
1347 }
1348 
1349 int
1350 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1351 	struct thread *td;
1352 	int s;
1353 	int level;
1354 	int name;
1355 	void *val;
1356 	enum uio_seg valseg;
1357 	socklen_t valsize;
1358 {
1359 	int error;
1360 	struct socket *so;
1361 	struct file *fp;
1362 	struct sockopt sopt;
1363 
1364 	if (val == NULL && valsize != 0)
1365 		return (EFAULT);
1366 	if ((int)valsize < 0)
1367 		return (EINVAL);
1368 
1369 	sopt.sopt_dir = SOPT_SET;
1370 	sopt.sopt_level = level;
1371 	sopt.sopt_name = name;
1372 	sopt.sopt_val = val;
1373 	sopt.sopt_valsize = valsize;
1374 	switch (valseg) {
1375 	case UIO_USERSPACE:
1376 		sopt.sopt_td = td;
1377 		break;
1378 	case UIO_SYSSPACE:
1379 		sopt.sopt_td = NULL;
1380 		break;
1381 	default:
1382 		panic("kern_setsockopt called with bad valseg");
1383 	}
1384 
1385 	NET_LOCK_GIANT();
1386 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1387 	if (error == 0) {
1388 		so = fp->f_data;
1389 		error = sosetopt(so, &sopt);
1390 		fdrop(fp, td);
1391 	}
1392 	NET_UNLOCK_GIANT();
1393 	return(error);
1394 }
1395 
1396 /*
1397  * MPSAFE
1398  */
1399 /* ARGSUSED */
1400 int
1401 getsockopt(td, uap)
1402 	struct thread *td;
1403 	register struct getsockopt_args /* {
1404 		int	s;
1405 		int	level;
1406 		int	name;
1407 		void * __restrict	val;
1408 		socklen_t * __restrict avalsize;
1409 	} */ *uap;
1410 {
1411 	socklen_t valsize;
1412 	int	error;
1413 
1414 	if (uap->val) {
1415 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1416 		if (error)
1417 			return (error);
1418 	}
1419 
1420 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1421 	    uap->val, UIO_USERSPACE, &valsize);
1422 
1423 	if (error == 0)
1424 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1425 	return (error);
1426 }
1427 
1428 /*
1429  * Kernel version of getsockopt.
1430  * optval can be a userland or userspace. optlen is always a kernel pointer.
1431  */
1432 int
1433 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1434 	struct thread *td;
1435 	int s;
1436 	int level;
1437 	int name;
1438 	void *val;
1439 	enum uio_seg valseg;
1440 	socklen_t *valsize;
1441 {
1442 	int error;
1443 	struct  socket *so;
1444 	struct file *fp;
1445 	struct	sockopt sopt;
1446 
1447 	if (val == NULL)
1448 		*valsize = 0;
1449 	if ((int)*valsize < 0)
1450 		return (EINVAL);
1451 
1452 	sopt.sopt_dir = SOPT_GET;
1453 	sopt.sopt_level = level;
1454 	sopt.sopt_name = name;
1455 	sopt.sopt_val = val;
1456 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1457 	switch (valseg) {
1458 	case UIO_USERSPACE:
1459 		sopt.sopt_td = td;
1460 		break;
1461 	case UIO_SYSSPACE:
1462 		sopt.sopt_td = NULL;
1463 		break;
1464 	default:
1465 		panic("kern_getsockopt called with bad valseg");
1466 	}
1467 
1468 	NET_LOCK_GIANT();
1469 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1470 	if (error == 0) {
1471 		so = fp->f_data;
1472 		error = sogetopt(so, &sopt);
1473 		*valsize = sopt.sopt_valsize;
1474 		fdrop(fp, td);
1475 	}
1476 	NET_UNLOCK_GIANT();
1477 	return (error);
1478 }
1479 
1480 /*
1481  * getsockname1() - Get socket name.
1482  *
1483  * MPSAFE
1484  */
1485 /* ARGSUSED */
1486 static int
1487 getsockname1(td, uap, compat)
1488 	struct thread *td;
1489 	register struct getsockname_args /* {
1490 		int	fdes;
1491 		struct sockaddr * __restrict asa;
1492 		socklen_t * __restrict alen;
1493 	} */ *uap;
1494 	int compat;
1495 {
1496 	struct sockaddr *sa;
1497 	socklen_t len;
1498 	int error;
1499 
1500 	error = copyin(uap->alen, &len, sizeof(len));
1501 	if (error)
1502 		return (error);
1503 
1504 	error = kern_getsockname(td, uap->fdes, &sa, &len);
1505 	if (error)
1506 		return (error);
1507 
1508 	if (len != 0) {
1509 #ifdef COMPAT_OLDSOCK
1510 		if (compat)
1511 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1512 #endif
1513 		error = copyout(sa, uap->asa, (u_int)len);
1514 	}
1515 	free(sa, M_SONAME);
1516 	if (error == 0)
1517 		error = copyout(&len, uap->alen, sizeof(len));
1518 	return (error);
1519 }
1520 
1521 int
1522 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1523     socklen_t *alen)
1524 {
1525 	struct socket *so;
1526 	struct file *fp;
1527 	socklen_t len;
1528 	int error;
1529 
1530 	if (*alen < 0)
1531 		return (EINVAL);
1532 
1533 	NET_LOCK_GIANT();
1534 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1535 	if (error)
1536 		goto done;
1537 	so = fp->f_data;
1538 	*sa = NULL;
1539 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1540 	if (error)
1541 		goto bad;
1542 	if (*sa == NULL)
1543 		len = 0;
1544 	else
1545 		len = MIN(*alen, (*sa)->sa_len);
1546 	*alen = len;
1547 bad:
1548 	fdrop(fp, td);
1549 	if (error && *sa) {
1550 		free(*sa, M_SONAME);
1551 		*sa = NULL;
1552 	}
1553 done:
1554 	NET_UNLOCK_GIANT();
1555 	return (error);
1556 }
1557 
1558 /*
1559  * MPSAFE
1560  */
1561 int
1562 getsockname(td, uap)
1563 	struct thread *td;
1564 	struct getsockname_args *uap;
1565 {
1566 
1567 	return (getsockname1(td, uap, 0));
1568 }
1569 
1570 #ifdef COMPAT_OLDSOCK
1571 /*
1572  * MPSAFE
1573  */
1574 int
1575 ogetsockname(td, uap)
1576 	struct thread *td;
1577 	struct getsockname_args *uap;
1578 {
1579 
1580 	return (getsockname1(td, uap, 1));
1581 }
1582 #endif /* COMPAT_OLDSOCK */
1583 
1584 /*
1585  * getpeername1() - Get name of peer for connected socket.
1586  *
1587  * MPSAFE
1588  */
1589 /* ARGSUSED */
1590 static int
1591 getpeername1(td, uap, compat)
1592 	struct thread *td;
1593 	register struct getpeername_args /* {
1594 		int	fdes;
1595 		struct sockaddr * __restrict	asa;
1596 		socklen_t * __restrict	alen;
1597 	} */ *uap;
1598 	int compat;
1599 {
1600 	struct sockaddr *sa;
1601 	socklen_t len;
1602 	int error;
1603 
1604 	error = copyin(uap->alen, &len, sizeof (len));
1605 	if (error)
1606 		return (error);
1607 
1608 	error = kern_getpeername(td, uap->fdes, &sa, &len);
1609 	if (error)
1610 		return (error);
1611 
1612 	if (len != 0) {
1613 #ifdef COMPAT_OLDSOCK
1614 		if (compat)
1615 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1616 #endif
1617 		error = copyout(sa, uap->asa, (u_int)len);
1618 	}
1619 	free(sa, M_SONAME);
1620 	if (error == 0)
1621 		error = copyout(&len, uap->alen, sizeof(len));
1622 	return (error);
1623 }
1624 
1625 int
1626 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1627     socklen_t *alen)
1628 {
1629 	struct socket *so;
1630 	struct file *fp;
1631 	socklen_t len;
1632 	int error;
1633 
1634 	if (*alen < 0)
1635 		return (EINVAL);
1636 
1637 	NET_LOCK_GIANT();
1638 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1639 	if (error)
1640 		goto done2;
1641 	so = fp->f_data;
1642 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1643 		error = ENOTCONN;
1644 		goto done1;
1645 	}
1646 	*sa = NULL;
1647 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1648 	if (error)
1649 		goto bad;
1650 	if (*sa == NULL)
1651 		len = 0;
1652 	else
1653 		len = MIN(*alen, (*sa)->sa_len);
1654 	*alen = len;
1655 bad:
1656 	if (error && *sa) {
1657 		free(*sa, M_SONAME);
1658 		*sa = NULL;
1659 	}
1660 done1:
1661 	fdrop(fp, td);
1662 done2:
1663 	NET_UNLOCK_GIANT();
1664 	return (error);
1665 }
1666 
1667 /*
1668  * MPSAFE
1669  */
1670 int
1671 getpeername(td, uap)
1672 	struct thread *td;
1673 	struct getpeername_args *uap;
1674 {
1675 
1676 	return (getpeername1(td, uap, 0));
1677 }
1678 
1679 #ifdef COMPAT_OLDSOCK
1680 /*
1681  * MPSAFE
1682  */
1683 int
1684 ogetpeername(td, uap)
1685 	struct thread *td;
1686 	struct ogetpeername_args *uap;
1687 {
1688 
1689 	/* XXX uap should have type `getpeername_args *' to begin with. */
1690 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1691 }
1692 #endif /* COMPAT_OLDSOCK */
1693 
1694 int
1695 sockargs(mp, buf, buflen, type)
1696 	struct mbuf **mp;
1697 	caddr_t buf;
1698 	int buflen, type;
1699 {
1700 	register struct sockaddr *sa;
1701 	register struct mbuf *m;
1702 	int error;
1703 
1704 	if ((u_int)buflen > MLEN) {
1705 #ifdef COMPAT_OLDSOCK
1706 		if (type == MT_SONAME && (u_int)buflen <= 112)
1707 			buflen = MLEN;		/* unix domain compat. hack */
1708 		else
1709 #endif
1710 			if ((u_int)buflen > MCLBYTES)
1711 				return (EINVAL);
1712 	}
1713 	m = m_get(M_TRYWAIT, type);
1714 	if (m == NULL)
1715 		return (ENOBUFS);
1716 	if ((u_int)buflen > MLEN) {
1717 		MCLGET(m, M_TRYWAIT);
1718 		if ((m->m_flags & M_EXT) == 0) {
1719 			m_free(m);
1720 			return (ENOBUFS);
1721 		}
1722 	}
1723 	m->m_len = buflen;
1724 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1725 	if (error)
1726 		(void) m_free(m);
1727 	else {
1728 		*mp = m;
1729 		if (type == MT_SONAME) {
1730 			sa = mtod(m, struct sockaddr *);
1731 
1732 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1733 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1734 				sa->sa_family = sa->sa_len;
1735 #endif
1736 			sa->sa_len = buflen;
1737 		}
1738 	}
1739 	return (error);
1740 }
1741 
1742 int
1743 getsockaddr(namp, uaddr, len)
1744 	struct sockaddr **namp;
1745 	caddr_t uaddr;
1746 	size_t len;
1747 {
1748 	struct sockaddr *sa;
1749 	int error;
1750 
1751 	if (len > SOCK_MAXADDRLEN)
1752 		return (ENAMETOOLONG);
1753 	if (len < offsetof(struct sockaddr, sa_data[0]))
1754 		return (EINVAL);
1755 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1756 	error = copyin(uaddr, sa, len);
1757 	if (error) {
1758 		FREE(sa, M_SONAME);
1759 	} else {
1760 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1761 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1762 			sa->sa_family = sa->sa_len;
1763 #endif
1764 		sa->sa_len = len;
1765 		*namp = sa;
1766 	}
1767 	return (error);
1768 }
1769 
1770 /*
1771  * Detach mapped page and release resources back to the system.
1772  */
1773 void
1774 sf_buf_mext(void *addr, void *args)
1775 {
1776 	vm_page_t m;
1777 
1778 	m = sf_buf_page(args);
1779 	sf_buf_free(args);
1780 	vm_page_lock_queues();
1781 	vm_page_unwire(m, 0);
1782 	/*
1783 	 * Check for the object going away on us. This can
1784 	 * happen since we don't hold a reference to it.
1785 	 * If so, we're responsible for freeing the page.
1786 	 */
1787 	if (m->wire_count == 0 && m->object == NULL)
1788 		vm_page_free(m);
1789 	vm_page_unlock_queues();
1790 }
1791 
1792 /*
1793  * sendfile(2)
1794  *
1795  * MPSAFE
1796  *
1797  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1798  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1799  *
1800  * Send a file specified by 'fd' and starting at 'offset' to a socket
1801  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1802  * nbytes == 0. Optionally add a header and/or trailer to the socket
1803  * output. If specified, write the total number of bytes sent into *sbytes.
1804  *
1805  */
1806 int
1807 sendfile(struct thread *td, struct sendfile_args *uap)
1808 {
1809 
1810 	return (do_sendfile(td, uap, 0));
1811 }
1812 
1813 static int
1814 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1815 {
1816 	struct sf_hdtr hdtr;
1817 	struct uio *hdr_uio, *trl_uio;
1818 	int error;
1819 
1820 	hdr_uio = trl_uio = NULL;
1821 
1822 	if (uap->hdtr != NULL) {
1823 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1824 		if (error)
1825 			goto out;
1826 		if (hdtr.headers != NULL) {
1827 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1828 			if (error)
1829 				goto out;
1830 		}
1831 		if (hdtr.trailers != NULL) {
1832 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1833 			if (error)
1834 				goto out;
1835 
1836 		}
1837 	}
1838 
1839 	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1840 out:
1841 	if (hdr_uio)
1842 		free(hdr_uio, M_IOV);
1843 	if (trl_uio)
1844 		free(trl_uio, M_IOV);
1845 	return (error);
1846 }
1847 
1848 #ifdef COMPAT_FREEBSD4
1849 int
1850 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1851 {
1852 	struct sendfile_args args;
1853 
1854 	args.fd = uap->fd;
1855 	args.s = uap->s;
1856 	args.offset = uap->offset;
1857 	args.nbytes = uap->nbytes;
1858 	args.hdtr = uap->hdtr;
1859 	args.sbytes = uap->sbytes;
1860 	args.flags = uap->flags;
1861 
1862 	return (do_sendfile(td, &args, 1));
1863 }
1864 #endif /* COMPAT_FREEBSD4 */
1865 
1866 int
1867 kern_sendfile(struct thread *td, struct sendfile_args *uap,
1868     struct uio *hdr_uio, struct uio *trl_uio, int compat)
1869 {
1870 	struct file *sock_fp;
1871 	struct vnode *vp;
1872 	struct vm_object *obj = NULL;
1873 	struct socket *so = NULL;
1874 	struct mbuf *m, *m_header = NULL;
1875 	struct sf_buf *sf;
1876 	struct vm_page *pg;
1877 	off_t off, xfsize, hdtr_size, sbytes = 0;
1878 	int error, headersize = 0, headersent = 0;
1879 	int vfslocked;
1880 
1881 	NET_LOCK_GIANT();
1882 
1883 	hdtr_size = 0;
1884 
1885 	/*
1886 	 * The descriptor must be a regular file and have a backing VM object.
1887 	 */
1888 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1889 		goto done;
1890 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1891 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1892 	obj = vp->v_object;
1893 	if (obj != NULL) {
1894 		/*
1895 		 * Temporarily increase the backing VM object's reference
1896 		 * count so that a forced reclamation of its vnode does not
1897 		 * immediately destroy it.
1898 		 */
1899 		VM_OBJECT_LOCK(obj);
1900 		if ((obj->flags & OBJ_DEAD) == 0) {
1901 			vm_object_reference_locked(obj);
1902 			VM_OBJECT_UNLOCK(obj);
1903 		} else {
1904 			VM_OBJECT_UNLOCK(obj);
1905 			obj = NULL;
1906 		}
1907 	}
1908 	VOP_UNLOCK(vp, 0, td);
1909 	VFS_UNLOCK_GIANT(vfslocked);
1910 	if (obj == NULL) {
1911 		error = EINVAL;
1912 		goto done;
1913 	}
1914 	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0)
1915 		goto done;
1916 	so = sock_fp->f_data;
1917 	if (so->so_type != SOCK_STREAM) {
1918 		error = EINVAL;
1919 		goto done;
1920 	}
1921 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1922 		error = ENOTCONN;
1923 		goto done;
1924 	}
1925 	if (uap->offset < 0) {
1926 		error = EINVAL;
1927 		goto done;
1928 	}
1929 
1930 #ifdef MAC
1931 	SOCK_LOCK(so);
1932 	error = mac_check_socket_send(td->td_ucred, so);
1933 	SOCK_UNLOCK(so);
1934 	if (error)
1935 		goto done;
1936 #endif
1937 
1938 	/*
1939 	 * If specified, get the pointer to the sf_hdtr struct for
1940 	 * any headers/trailers.
1941 	 */
1942 	if (hdr_uio != NULL) {
1943 		hdr_uio->uio_td = td;
1944 		hdr_uio->uio_rw = UIO_WRITE;
1945 		if (hdr_uio->uio_resid > 0) {
1946 			m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1947 			if (m_header == NULL)
1948 				goto done;
1949 			headersize = m_header->m_pkthdr.len;
1950 			if (compat)
1951 				sbytes += headersize;
1952 		}
1953 	}
1954 
1955 	/*
1956 	 * Protect against multiple writers to the socket.
1957 	 */
1958 	SOCKBUF_LOCK(&so->so_snd);
1959 	(void) sblock(&so->so_snd, M_WAITOK);
1960 	SOCKBUF_UNLOCK(&so->so_snd);
1961 
1962 	/*
1963 	 * Loop through the pages in the file, starting with the requested
1964 	 * offset. Get a file page (do I/O if necessary), map the file page
1965 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1966 	 * it on the socket.
1967 	 */
1968 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1969 		vm_pindex_t pindex;
1970 		vm_offset_t pgoff;
1971 
1972 		pindex = OFF_TO_IDX(off);
1973 		VM_OBJECT_LOCK(obj);
1974 retry_lookup:
1975 		/*
1976 		 * Calculate the amount to transfer. Not to exceed a page,
1977 		 * the EOF, or the passed in nbytes.
1978 		 */
1979 		xfsize = obj->un_pager.vnp.vnp_size - off;
1980 		VM_OBJECT_UNLOCK(obj);
1981 		if (xfsize > PAGE_SIZE)
1982 			xfsize = PAGE_SIZE;
1983 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1984 		if (PAGE_SIZE - pgoff < xfsize)
1985 			xfsize = PAGE_SIZE - pgoff;
1986 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1987 			xfsize = uap->nbytes - sbytes;
1988 		if (xfsize <= 0) {
1989 			if (m_header != NULL) {
1990 				m = m_header;
1991 				m_header = NULL;
1992 				SOCKBUF_LOCK(&so->so_snd);
1993 				goto retry_space;
1994 			} else
1995 				break;
1996 		}
1997 		/*
1998 		 * Optimize the non-blocking case by looking at the socket space
1999 		 * before going to the extra work of constituting the sf_buf.
2000 		 */
2001 		SOCKBUF_LOCK(&so->so_snd);
2002 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
2003 			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
2004 				error = EPIPE;
2005 			else
2006 				error = EAGAIN;
2007 			sbunlock(&so->so_snd);
2008 			SOCKBUF_UNLOCK(&so->so_snd);
2009 			goto done;
2010 		}
2011 		SOCKBUF_UNLOCK(&so->so_snd);
2012 		VM_OBJECT_LOCK(obj);
2013 		/*
2014 		 * Attempt to look up the page.
2015 		 *
2016 		 *	Allocate if not found
2017 		 *
2018 		 *	Wait and loop if busy.
2019 		 */
2020 		pg = vm_page_lookup(obj, pindex);
2021 
2022 		if (pg == NULL) {
2023 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
2024 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
2025 			if (pg == NULL) {
2026 				VM_OBJECT_UNLOCK(obj);
2027 				VM_WAIT;
2028 				VM_OBJECT_LOCK(obj);
2029 				goto retry_lookup;
2030 			}
2031 			vm_page_lock_queues();
2032 		} else {
2033 			vm_page_lock_queues();
2034 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
2035 				goto retry_lookup;
2036 			/*
2037 			 * Wire the page so it does not get ripped out from
2038 			 * under us.
2039 			 */
2040 			vm_page_wire(pg);
2041 		}
2042 
2043 		/*
2044 		 * If page is not valid for what we need, initiate I/O
2045 		 */
2046 
2047 		if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
2048 			VM_OBJECT_UNLOCK(obj);
2049 		} else if (uap->flags & SF_NODISKIO) {
2050 			error = EBUSY;
2051 		} else {
2052 			int bsize, resid;
2053 
2054 			/*
2055 			 * Ensure that our page is still around when the I/O
2056 			 * completes.
2057 			 */
2058 			vm_page_io_start(pg);
2059 			vm_page_unlock_queues();
2060 			VM_OBJECT_UNLOCK(obj);
2061 
2062 			/*
2063 			 * Get the page from backing store.
2064 			 */
2065 			bsize = vp->v_mount->mnt_stat.f_iosize;
2066 			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2067 			vn_lock(vp, LK_SHARED | LK_RETRY, td);
2068 			/*
2069 			 * XXXMAC: Because we don't have fp->f_cred here,
2070 			 * we pass in NOCRED.  This is probably wrong, but
2071 			 * is consistent with our original implementation.
2072 			 */
2073 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2074 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2075 			    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2076 			    td->td_ucred, NOCRED, &resid, td);
2077 			VOP_UNLOCK(vp, 0, td);
2078 			VFS_UNLOCK_GIANT(vfslocked);
2079 			VM_OBJECT_LOCK(obj);
2080 			vm_page_lock_queues();
2081 			vm_page_io_finish(pg);
2082 			if (!error)
2083 				VM_OBJECT_UNLOCK(obj);
2084 			mbstat.sf_iocnt++;
2085 		}
2086 
2087 		if (error) {
2088 			vm_page_unwire(pg, 0);
2089 			/*
2090 			 * See if anyone else might know about this page.
2091 			 * If not and it is not valid, then free it.
2092 			 */
2093 			if (pg->wire_count == 0 && pg->valid == 0 &&
2094 			    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
2095 			    pg->hold_count == 0) {
2096 				vm_page_free(pg);
2097 			}
2098 			vm_page_unlock_queues();
2099 			VM_OBJECT_UNLOCK(obj);
2100 			SOCKBUF_LOCK(&so->so_snd);
2101 			sbunlock(&so->so_snd);
2102 			SOCKBUF_UNLOCK(&so->so_snd);
2103 			goto done;
2104 		}
2105 		vm_page_unlock_queues();
2106 
2107 		/*
2108 		 * Get a sendfile buf. We usually wait as long as necessary,
2109 		 * but this wait can be interrupted.
2110 		 */
2111 		if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
2112 			mbstat.sf_allocfail++;
2113 			vm_page_lock_queues();
2114 			vm_page_unwire(pg, 0);
2115 			if (pg->wire_count == 0 && pg->object == NULL)
2116 				vm_page_free(pg);
2117 			vm_page_unlock_queues();
2118 			SOCKBUF_LOCK(&so->so_snd);
2119 			sbunlock(&so->so_snd);
2120 			SOCKBUF_UNLOCK(&so->so_snd);
2121 			error = EINTR;
2122 			goto done;
2123 		}
2124 
2125 		/*
2126 		 * Get an mbuf header and set it up as having external storage.
2127 		 */
2128 		if (m_header)
2129 			MGET(m, M_TRYWAIT, MT_DATA);
2130 		else
2131 			MGETHDR(m, M_TRYWAIT, MT_DATA);
2132 		if (m == NULL) {
2133 			error = ENOBUFS;
2134 			sf_buf_mext((void *)sf_buf_kva(sf), sf);
2135 			SOCKBUF_LOCK(&so->so_snd);
2136 			sbunlock(&so->so_snd);
2137 			SOCKBUF_UNLOCK(&so->so_snd);
2138 			goto done;
2139 		}
2140 		/*
2141 		 * Setup external storage for mbuf.
2142 		 */
2143 		MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2144 		    EXT_SFBUF);
2145 		m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2146 		m->m_pkthdr.len = m->m_len = xfsize;
2147 
2148 		if (m_header) {
2149 			m_cat(m_header, m);
2150 			m = m_header;
2151 			m_header = NULL;
2152 			m_fixhdr(m);
2153 		}
2154 
2155 		/*
2156 		 * Add the buffer to the socket buffer chain.
2157 		 */
2158 		SOCKBUF_LOCK(&so->so_snd);
2159 retry_space:
2160 		/*
2161 		 * Make sure that the socket is still able to take more data.
2162 		 * CANTSENDMORE being true usually means that the connection
2163 		 * was closed. so_error is true when an error was sensed after
2164 		 * a previous send.
2165 		 * The state is checked after the page mapping and buffer
2166 		 * allocation above since those operations may block and make
2167 		 * any socket checks stale. From this point forward, nothing
2168 		 * blocks before the pru_send (or more accurately, any blocking
2169 		 * results in a loop back to here to re-check).
2170 		 */
2171 		SOCKBUF_LOCK_ASSERT(&so->so_snd);
2172 		if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2173 			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2174 				error = EPIPE;
2175 			} else {
2176 				error = so->so_error;
2177 				so->so_error = 0;
2178 			}
2179 			m_freem(m);
2180 			sbunlock(&so->so_snd);
2181 			SOCKBUF_UNLOCK(&so->so_snd);
2182 			goto done;
2183 		}
2184 		/*
2185 		 * Wait for socket space to become available. We do this just
2186 		 * after checking the connection state above in order to avoid
2187 		 * a race condition with sbwait().
2188 		 */
2189 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2190 			if (so->so_state & SS_NBIO) {
2191 				m_freem(m);
2192 				sbunlock(&so->so_snd);
2193 				SOCKBUF_UNLOCK(&so->so_snd);
2194 				error = EAGAIN;
2195 				goto done;
2196 			}
2197 			error = sbwait(&so->so_snd);
2198 			/*
2199 			 * An error from sbwait usually indicates that we've
2200 			 * been interrupted by a signal. If we've sent anything
2201 			 * then return bytes sent, otherwise return the error.
2202 			 */
2203 			if (error) {
2204 				m_freem(m);
2205 				sbunlock(&so->so_snd);
2206 				SOCKBUF_UNLOCK(&so->so_snd);
2207 				goto done;
2208 			}
2209 			goto retry_space;
2210 		}
2211 		SOCKBUF_UNLOCK(&so->so_snd);
2212 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2213 		if (error) {
2214 			SOCKBUF_LOCK(&so->so_snd);
2215 			sbunlock(&so->so_snd);
2216 			SOCKBUF_UNLOCK(&so->so_snd);
2217 			goto done;
2218 		}
2219 		headersent = 1;
2220 	}
2221 	SOCKBUF_LOCK(&so->so_snd);
2222 	sbunlock(&so->so_snd);
2223 	SOCKBUF_UNLOCK(&so->so_snd);
2224 
2225 	/*
2226 	 * Send trailers. Wimp out and use writev(2).
2227 	 */
2228 	if (trl_uio != NULL) {
2229 		error = kern_writev(td, uap->s, trl_uio);
2230 		if (error)
2231 			goto done;
2232 		if (compat)
2233 			sbytes += td->td_retval[0];
2234 		else
2235 			hdtr_size += td->td_retval[0];
2236 	}
2237 
2238 done:
2239 	if (headersent) {
2240 		if (!compat)
2241 			hdtr_size += headersize;
2242 	} else {
2243 		if (compat)
2244 			sbytes -= headersize;
2245 	}
2246 	/*
2247 	 * If there was no error we have to clear td->td_retval[0]
2248 	 * because it may have been set by writev.
2249 	 */
2250 	if (error == 0) {
2251 		td->td_retval[0] = 0;
2252 	}
2253 	if (uap->sbytes != NULL) {
2254 		if (!compat)
2255 			sbytes += hdtr_size;
2256 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2257 	}
2258 	if (obj != NULL)
2259 		vm_object_deallocate(obj);
2260 	if (vp != NULL) {
2261 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2262 		vrele(vp);
2263 		VFS_UNLOCK_GIANT(vfslocked);
2264 	}
2265 	if (so)
2266 		fdrop(sock_fp, td);
2267 	if (m_header)
2268 		m_freem(m_header);
2269 
2270 	NET_UNLOCK_GIANT();
2271 
2272 	if (error == ERESTART)
2273 		error = EINTR;
2274 
2275 	return (error);
2276 }
2277