xref: /freebsd/sys/kern/uipc_syscalls.c (revision e4e9813eb92cd7c4d4b819a8fbed5cbd3d92f5d8)
1 /*-
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77 
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80 
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 			int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 			int compat);
87 
88 /*
89  * NSFBUFS-related variables and associated sysctls
90  */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94 
95 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
96     "Maximum number of sendfile(2) sf_bufs available");
97 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
98     "Number of sendfile(2) sf_bufs at peak usage");
99 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
100     "Number of sendfile(2) sf_bufs in use");
101 
102 /*
103  * Convert a user file descriptor to a kernel file entry.  A reference on the
104  * file entry is held upon returning.  This is lighter weight than
105  * fgetsock(), which bumps the socket reference drops the file reference
106  * count instead, as this approach avoids several additional mutex operations
107  * associated with the additional reference count.  If requested, return the
108  * open file flags.
109  */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
112 {
113 	struct file *fp;
114 	int error;
115 
116 	fp = NULL;
117 	if (fdp == NULL)
118 		error = EBADF;
119 	else {
120 		FILEDESC_LOCK_FAST(fdp);
121 		fp = fget_locked(fdp, fd);
122 		if (fp == NULL)
123 			error = EBADF;
124 		else if (fp->f_type != DTYPE_SOCKET) {
125 			fp = NULL;
126 			error = ENOTSOCK;
127 		} else {
128 			fhold(fp);
129 			if (fflagp != NULL)
130 				*fflagp = fp->f_flag;
131 			error = 0;
132 		}
133 		FILEDESC_UNLOCK_FAST(fdp);
134 	}
135 	*fpp = fp;
136 	return (error);
137 }
138 
139 /*
140  * System call interface to the socket abstraction.
141  */
142 #if defined(COMPAT_43)
143 #define COMPAT_OLDSOCK
144 #endif
145 
146 /*
147  * MPSAFE
148  */
149 int
150 socket(td, uap)
151 	struct thread *td;
152 	register struct socket_args /* {
153 		int	domain;
154 		int	type;
155 		int	protocol;
156 	} */ *uap;
157 {
158 	struct filedesc *fdp;
159 	struct socket *so;
160 	struct file *fp;
161 	int fd, error;
162 
163 #ifdef MAC
164 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
165 	    uap->protocol);
166 	if (error)
167 		return (error);
168 #endif
169 	fdp = td->td_proc->p_fd;
170 	error = falloc(td, &fp, &fd);
171 	if (error)
172 		return (error);
173 	/* An extra reference on `fp' has been held for us by falloc(). */
174 	NET_LOCK_GIANT();
175 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
176 	    td->td_ucred, td);
177 	NET_UNLOCK_GIANT();
178 	if (error) {
179 		fdclose(fdp, fp, fd, td);
180 	} else {
181 		FILEDESC_LOCK_FAST(fdp);
182 		fp->f_data = so;	/* already has ref count */
183 		fp->f_flag = FREAD|FWRITE;
184 		fp->f_ops = &socketops;
185 		fp->f_type = DTYPE_SOCKET;
186 		FILEDESC_UNLOCK_FAST(fdp);
187 		td->td_retval[0] = fd;
188 	}
189 	fdrop(fp, td);
190 	return (error);
191 }
192 
193 /*
194  * MPSAFE
195  */
196 /* ARGSUSED */
197 int
198 bind(td, uap)
199 	struct thread *td;
200 	register struct bind_args /* {
201 		int	s;
202 		caddr_t	name;
203 		int	namelen;
204 	} */ *uap;
205 {
206 	struct sockaddr *sa;
207 	int error;
208 
209 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
210 		return (error);
211 
212 	error = kern_bind(td, uap->s, sa);
213 	free(sa, M_SONAME);
214 	return (error);
215 }
216 
217 int
218 kern_bind(td, fd, sa)
219 	struct thread *td;
220 	int fd;
221 	struct sockaddr *sa;
222 {
223 	struct socket *so;
224 	struct file *fp;
225 	int error;
226 
227 	NET_LOCK_GIANT();
228 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
229 	if (error)
230 		goto done2;
231 	so = fp->f_data;
232 #ifdef MAC
233 	SOCK_LOCK(so);
234 	error = mac_check_socket_bind(td->td_ucred, so, sa);
235 	SOCK_UNLOCK(so);
236 	if (error)
237 		goto done1;
238 #endif
239 	error = sobind(so, sa, td);
240 #ifdef MAC
241 done1:
242 #endif
243 	fdrop(fp, td);
244 done2:
245 	NET_UNLOCK_GIANT();
246 	return (error);
247 }
248 
249 /*
250  * MPSAFE
251  */
252 /* ARGSUSED */
253 int
254 listen(td, uap)
255 	struct thread *td;
256 	register struct listen_args /* {
257 		int	s;
258 		int	backlog;
259 	} */ *uap;
260 {
261 	struct socket *so;
262 	struct file *fp;
263 	int error;
264 
265 	NET_LOCK_GIANT();
266 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
267 	if (error == 0) {
268 		so = fp->f_data;
269 #ifdef MAC
270 		SOCK_LOCK(so);
271 		error = mac_check_socket_listen(td->td_ucred, so);
272 		SOCK_UNLOCK(so);
273 		if (error)
274 			goto done;
275 #endif
276 		error = solisten(so, uap->backlog, td);
277 #ifdef MAC
278 done:
279 #endif
280 		fdrop(fp, td);
281 	}
282 	NET_UNLOCK_GIANT();
283 	return(error);
284 }
285 
286 /*
287  * accept1()
288  * MPSAFE
289  */
290 static int
291 accept1(td, uap, compat)
292 	struct thread *td;
293 	register struct accept_args /* {
294 		int	s;
295 		struct sockaddr	* __restrict name;
296 		socklen_t	* __restrict anamelen;
297 	} */ *uap;
298 	int compat;
299 {
300 	struct sockaddr *name;
301 	socklen_t namelen;
302 	struct file *fp;
303 	int error;
304 
305 	if (uap->name == NULL)
306 		return (kern_accept(td, uap->s, NULL, NULL, NULL));
307 
308 	error = copyin(uap->anamelen, &namelen, sizeof (namelen));
309 	if (error)
310 		return (error);
311 
312 	error = kern_accept(td, uap->s, &name, &namelen, &fp);
313 
314 	/*
315 	 * return a namelen of zero for older code which might
316 	 * ignore the return value from accept.
317 	 */
318 	if (error) {
319 		(void) copyout(&namelen,
320 		    uap->anamelen, sizeof(*uap->anamelen));
321 		return (error);
322 	}
323 
324 	if (error == 0 && name != NULL) {
325 #ifdef COMPAT_OLDSOCK
326 		if (compat)
327 			((struct osockaddr *)name)->sa_family =
328 			    name->sa_family;
329 #endif
330 		error = copyout(name, uap->name, namelen);
331 	}
332 	if (error == 0)
333 		error = copyout(&namelen, uap->anamelen,
334 		    sizeof(namelen));
335 	if (error)
336 		fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
337 	fdrop(fp, td);
338 	free(name, M_SONAME);
339 	return (error);
340 }
341 
342 int
343 kern_accept(struct thread *td, int s, struct sockaddr **name,
344     socklen_t *namelen, struct file **fp)
345 {
346 	struct filedesc *fdp;
347 	struct file *headfp, *nfp = NULL;
348 	struct sockaddr *sa = NULL;
349 	int error;
350 	struct socket *head, *so;
351 	int fd;
352 	u_int fflag;
353 	pid_t pgid;
354 	int tmp;
355 
356 	if (name) {
357 		*name = NULL;
358 		if (*namelen < 0)
359 			return (EINVAL);
360 	}
361 
362 	fdp = td->td_proc->p_fd;
363 	NET_LOCK_GIANT();
364 	error = getsock(fdp, s, &headfp, &fflag);
365 	if (error)
366 		goto done2;
367 	head = headfp->f_data;
368 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
369 		error = EINVAL;
370 		goto done;
371 	}
372 #ifdef MAC
373 	SOCK_LOCK(head);
374 	error = mac_check_socket_accept(td->td_ucred, head);
375 	SOCK_UNLOCK(head);
376 	if (error != 0)
377 		goto done;
378 #endif
379 	error = falloc(td, &nfp, &fd);
380 	if (error)
381 		goto done;
382 	ACCEPT_LOCK();
383 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
384 		ACCEPT_UNLOCK();
385 		error = EWOULDBLOCK;
386 		goto noconnection;
387 	}
388 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
389 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
390 			head->so_error = ECONNABORTED;
391 			break;
392 		}
393 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
394 		    "accept", 0);
395 		if (error) {
396 			ACCEPT_UNLOCK();
397 			goto noconnection;
398 		}
399 	}
400 	if (head->so_error) {
401 		error = head->so_error;
402 		head->so_error = 0;
403 		ACCEPT_UNLOCK();
404 		goto noconnection;
405 	}
406 	so = TAILQ_FIRST(&head->so_comp);
407 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
408 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
409 
410 	/*
411 	 * Before changing the flags on the socket, we have to bump the
412 	 * reference count.  Otherwise, if the protocol calls sofree(),
413 	 * the socket will be released due to a zero refcount.
414 	 */
415 	SOCK_LOCK(so);			/* soref() and so_state update */
416 	soref(so);			/* file descriptor reference */
417 
418 	TAILQ_REMOVE(&head->so_comp, so, so_list);
419 	head->so_qlen--;
420 	so->so_state |= (head->so_state & SS_NBIO);
421 	so->so_qstate &= ~SQ_COMP;
422 	so->so_head = NULL;
423 
424 	SOCK_UNLOCK(so);
425 	ACCEPT_UNLOCK();
426 
427 	/* An extra reference on `nfp' has been held for us by falloc(). */
428 	td->td_retval[0] = fd;
429 
430 	/* connection has been removed from the listen queue */
431 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
432 
433 	pgid = fgetown(&head->so_sigio);
434 	if (pgid != 0)
435 		fsetown(pgid, &so->so_sigio);
436 
437 	FILE_LOCK(nfp);
438 	nfp->f_data = so;	/* nfp has ref count from falloc */
439 	nfp->f_flag = fflag;
440 	nfp->f_ops = &socketops;
441 	nfp->f_type = DTYPE_SOCKET;
442 	FILE_UNLOCK(nfp);
443 	/* Sync socket nonblocking/async state with file flags */
444 	tmp = fflag & FNONBLOCK;
445 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
446 	tmp = fflag & FASYNC;
447 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
448 	sa = 0;
449 	error = soaccept(so, &sa);
450 	if (error) {
451 		/*
452 		 * return a namelen of zero for older code which might
453 		 * ignore the return value from accept.
454 		 */
455 		if (name)
456 			*namelen = 0;
457 		goto noconnection;
458 	}
459 	if (sa == NULL) {
460 		if (name)
461 			*namelen = 0;
462 		goto done;
463 	}
464 	if (name) {
465 		/* check sa_len before it is destroyed */
466 		if (*namelen > sa->sa_len)
467 			*namelen = sa->sa_len;
468 		*name = sa;
469 		sa = NULL;
470 	}
471 noconnection:
472 	if (sa)
473 		FREE(sa, M_SONAME);
474 
475 	/*
476 	 * close the new descriptor, assuming someone hasn't ripped it
477 	 * out from under us.
478 	 */
479 	if (error)
480 		fdclose(fdp, nfp, fd, td);
481 
482 	/*
483 	 * Release explicitly held references before returning.  We return
484 	 * a reference on nfp to the caller on success if they request it.
485 	 */
486 done:
487 	if (fp != NULL) {
488 		if (error == 0) {
489 			*fp = nfp;
490 			nfp = NULL;
491 		} else
492 			*fp = NULL;
493 	}
494 	if (nfp != NULL)
495 		fdrop(nfp, td);
496 	fdrop(headfp, td);
497 done2:
498 	NET_UNLOCK_GIANT();
499 	return (error);
500 }
501 
502 /*
503  * MPSAFE (accept1() is MPSAFE)
504  */
505 int
506 accept(td, uap)
507 	struct thread *td;
508 	struct accept_args *uap;
509 {
510 
511 	return (accept1(td, uap, 0));
512 }
513 
514 #ifdef COMPAT_OLDSOCK
515 /*
516  * MPSAFE (accept1() is MPSAFE)
517  */
518 int
519 oaccept(td, uap)
520 	struct thread *td;
521 	struct accept_args *uap;
522 {
523 
524 	return (accept1(td, uap, 1));
525 }
526 #endif /* COMPAT_OLDSOCK */
527 
528 /*
529  * MPSAFE
530  */
531 /* ARGSUSED */
532 int
533 connect(td, uap)
534 	struct thread *td;
535 	register struct connect_args /* {
536 		int	s;
537 		caddr_t	name;
538 		int	namelen;
539 	} */ *uap;
540 {
541 	struct sockaddr *sa;
542 	int error;
543 
544 	error = getsockaddr(&sa, uap->name, uap->namelen);
545 	if (error)
546 		return (error);
547 
548 	error = kern_connect(td, uap->s, sa);
549 	free(sa, M_SONAME);
550 	return (error);
551 }
552 
553 
554 int
555 kern_connect(td, fd, sa)
556 	struct thread *td;
557 	int fd;
558 	struct sockaddr *sa;
559 {
560 	struct socket *so;
561 	struct file *fp;
562 	int error;
563 	int interrupted = 0;
564 
565 	NET_LOCK_GIANT();
566 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
567 	if (error)
568 		goto done2;
569 	so = fp->f_data;
570 	if (so->so_state & SS_ISCONNECTING) {
571 		error = EALREADY;
572 		goto done1;
573 	}
574 #ifdef MAC
575 	SOCK_LOCK(so);
576 	error = mac_check_socket_connect(td->td_ucred, so, sa);
577 	SOCK_UNLOCK(so);
578 	if (error)
579 		goto bad;
580 #endif
581 	error = soconnect(so, sa, td);
582 	if (error)
583 		goto bad;
584 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
585 		error = EINPROGRESS;
586 		goto done1;
587 	}
588 	SOCK_LOCK(so);
589 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
590 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
591 		    "connec", 0);
592 		if (error) {
593 			if (error == EINTR || error == ERESTART)
594 				interrupted = 1;
595 			break;
596 		}
597 	}
598 	if (error == 0) {
599 		error = so->so_error;
600 		so->so_error = 0;
601 	}
602 	SOCK_UNLOCK(so);
603 bad:
604 	if (!interrupted)
605 		so->so_state &= ~SS_ISCONNECTING;
606 	if (error == ERESTART)
607 		error = EINTR;
608 done1:
609 	fdrop(fp, td);
610 done2:
611 	NET_UNLOCK_GIANT();
612 	return (error);
613 }
614 
615 /*
616  * MPSAFE
617  */
618 int
619 socketpair(td, uap)
620 	struct thread *td;
621 	register struct socketpair_args /* {
622 		int	domain;
623 		int	type;
624 		int	protocol;
625 		int	*rsv;
626 	} */ *uap;
627 {
628 	register struct filedesc *fdp = td->td_proc->p_fd;
629 	struct file *fp1, *fp2;
630 	struct socket *so1, *so2;
631 	int fd, error, sv[2];
632 
633 #ifdef MAC
634 	/* We might want to have a separate check for socket pairs. */
635 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
636 	    uap->protocol);
637 	if (error)
638 		return (error);
639 #endif
640 
641 	NET_LOCK_GIANT();
642 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
643 	    td->td_ucred, td);
644 	if (error)
645 		goto done2;
646 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
647 	    td->td_ucred, td);
648 	if (error)
649 		goto free1;
650 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
651 	error = falloc(td, &fp1, &fd);
652 	if (error)
653 		goto free2;
654 	sv[0] = fd;
655 	fp1->f_data = so1;	/* so1 already has ref count */
656 	error = falloc(td, &fp2, &fd);
657 	if (error)
658 		goto free3;
659 	fp2->f_data = so2;	/* so2 already has ref count */
660 	sv[1] = fd;
661 	error = soconnect2(so1, so2);
662 	if (error)
663 		goto free4;
664 	if (uap->type == SOCK_DGRAM) {
665 		/*
666 		 * Datagram socket connection is asymmetric.
667 		 */
668 		 error = soconnect2(so2, so1);
669 		 if (error)
670 			goto free4;
671 	}
672 	FILE_LOCK(fp1);
673 	fp1->f_flag = FREAD|FWRITE;
674 	fp1->f_ops = &socketops;
675 	fp1->f_type = DTYPE_SOCKET;
676 	FILE_UNLOCK(fp1);
677 	FILE_LOCK(fp2);
678 	fp2->f_flag = FREAD|FWRITE;
679 	fp2->f_ops = &socketops;
680 	fp2->f_type = DTYPE_SOCKET;
681 	FILE_UNLOCK(fp2);
682 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
683 	fdrop(fp1, td);
684 	fdrop(fp2, td);
685 	goto done2;
686 free4:
687 	fdclose(fdp, fp2, sv[1], td);
688 	fdrop(fp2, td);
689 free3:
690 	fdclose(fdp, fp1, sv[0], td);
691 	fdrop(fp1, td);
692 free2:
693 	(void)soclose(so2);
694 free1:
695 	(void)soclose(so1);
696 done2:
697 	NET_UNLOCK_GIANT();
698 	return (error);
699 }
700 
701 static int
702 sendit(td, s, mp, flags)
703 	register struct thread *td;
704 	int s;
705 	register struct msghdr *mp;
706 	int flags;
707 {
708 	struct mbuf *control;
709 	struct sockaddr *to;
710 	int error;
711 
712 	if (mp->msg_name != NULL) {
713 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
714 		if (error) {
715 			to = NULL;
716 			goto bad;
717 		}
718 		mp->msg_name = to;
719 	} else {
720 		to = NULL;
721 	}
722 
723 	if (mp->msg_control) {
724 		if (mp->msg_controllen < sizeof(struct cmsghdr)
725 #ifdef COMPAT_OLDSOCK
726 		    && mp->msg_flags != MSG_COMPAT
727 #endif
728 		) {
729 			error = EINVAL;
730 			goto bad;
731 		}
732 		error = sockargs(&control, mp->msg_control,
733 		    mp->msg_controllen, MT_CONTROL);
734 		if (error)
735 			goto bad;
736 #ifdef COMPAT_OLDSOCK
737 		if (mp->msg_flags == MSG_COMPAT) {
738 			register struct cmsghdr *cm;
739 
740 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
741 			if (control == 0) {
742 				error = ENOBUFS;
743 				goto bad;
744 			} else {
745 				cm = mtod(control, struct cmsghdr *);
746 				cm->cmsg_len = control->m_len;
747 				cm->cmsg_level = SOL_SOCKET;
748 				cm->cmsg_type = SCM_RIGHTS;
749 			}
750 		}
751 #endif
752 	} else {
753 		control = NULL;
754 	}
755 
756 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
757 
758 bad:
759 	if (to)
760 		FREE(to, M_SONAME);
761 	return (error);
762 }
763 
764 int
765 kern_sendit(td, s, mp, flags, control, segflg)
766 	struct thread *td;
767 	int s;
768 	struct msghdr *mp;
769 	int flags;
770 	struct mbuf *control;
771 	enum uio_seg segflg;
772 {
773 	struct file *fp;
774 	struct uio auio;
775 	struct iovec *iov;
776 	struct socket *so;
777 	int i;
778 	int len, error;
779 #ifdef KTRACE
780 	struct uio *ktruio = NULL;
781 #endif
782 
783 	NET_LOCK_GIANT();
784 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
785 	if (error)
786 		goto bad2;
787 	so = (struct socket *)fp->f_data;
788 
789 #ifdef MAC
790 	SOCK_LOCK(so);
791 	error = mac_check_socket_send(td->td_ucred, so);
792 	SOCK_UNLOCK(so);
793 	if (error)
794 		goto bad;
795 #endif
796 
797 	auio.uio_iov = mp->msg_iov;
798 	auio.uio_iovcnt = mp->msg_iovlen;
799 	auio.uio_segflg = segflg;
800 	auio.uio_rw = UIO_WRITE;
801 	auio.uio_td = td;
802 	auio.uio_offset = 0;			/* XXX */
803 	auio.uio_resid = 0;
804 	iov = mp->msg_iov;
805 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
806 		if ((auio.uio_resid += iov->iov_len) < 0) {
807 			error = EINVAL;
808 			goto bad;
809 		}
810 	}
811 #ifdef KTRACE
812 	if (KTRPOINT(td, KTR_GENIO))
813 		ktruio = cloneuio(&auio);
814 #endif
815 	len = auio.uio_resid;
816 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
817 	if (error) {
818 		if (auio.uio_resid != len && (error == ERESTART ||
819 		    error == EINTR || error == EWOULDBLOCK))
820 			error = 0;
821 		/* Generation of SIGPIPE can be controlled per socket */
822 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
823 		    !(flags & MSG_NOSIGNAL)) {
824 			PROC_LOCK(td->td_proc);
825 			psignal(td->td_proc, SIGPIPE);
826 			PROC_UNLOCK(td->td_proc);
827 		}
828 	}
829 	if (error == 0)
830 		td->td_retval[0] = len - auio.uio_resid;
831 #ifdef KTRACE
832 	if (ktruio != NULL) {
833 		ktruio->uio_resid = td->td_retval[0];
834 		ktrgenio(s, UIO_WRITE, ktruio, error);
835 	}
836 #endif
837 bad:
838 	fdrop(fp, td);
839 bad2:
840 	NET_UNLOCK_GIANT();
841 	return (error);
842 }
843 
844 /*
845  * MPSAFE
846  */
847 int
848 sendto(td, uap)
849 	struct thread *td;
850 	register struct sendto_args /* {
851 		int	s;
852 		caddr_t	buf;
853 		size_t	len;
854 		int	flags;
855 		caddr_t	to;
856 		int	tolen;
857 	} */ *uap;
858 {
859 	struct msghdr msg;
860 	struct iovec aiov;
861 	int error;
862 
863 	msg.msg_name = uap->to;
864 	msg.msg_namelen = uap->tolen;
865 	msg.msg_iov = &aiov;
866 	msg.msg_iovlen = 1;
867 	msg.msg_control = 0;
868 #ifdef COMPAT_OLDSOCK
869 	msg.msg_flags = 0;
870 #endif
871 	aiov.iov_base = uap->buf;
872 	aiov.iov_len = uap->len;
873 	error = sendit(td, uap->s, &msg, uap->flags);
874 	return (error);
875 }
876 
877 #ifdef COMPAT_OLDSOCK
878 /*
879  * MPSAFE
880  */
881 int
882 osend(td, uap)
883 	struct thread *td;
884 	register struct osend_args /* {
885 		int	s;
886 		caddr_t	buf;
887 		int	len;
888 		int	flags;
889 	} */ *uap;
890 {
891 	struct msghdr msg;
892 	struct iovec aiov;
893 	int error;
894 
895 	msg.msg_name = 0;
896 	msg.msg_namelen = 0;
897 	msg.msg_iov = &aiov;
898 	msg.msg_iovlen = 1;
899 	aiov.iov_base = uap->buf;
900 	aiov.iov_len = uap->len;
901 	msg.msg_control = 0;
902 	msg.msg_flags = 0;
903 	error = sendit(td, uap->s, &msg, uap->flags);
904 	return (error);
905 }
906 
907 /*
908  * MPSAFE
909  */
910 int
911 osendmsg(td, uap)
912 	struct thread *td;
913 	struct osendmsg_args /* {
914 		int	s;
915 		caddr_t	msg;
916 		int	flags;
917 	} */ *uap;
918 {
919 	struct msghdr msg;
920 	struct iovec *iov;
921 	int error;
922 
923 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
924 	if (error)
925 		return (error);
926 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
927 	if (error)
928 		return (error);
929 	msg.msg_iov = iov;
930 	msg.msg_flags = MSG_COMPAT;
931 	error = sendit(td, uap->s, &msg, uap->flags);
932 	free(iov, M_IOV);
933 	return (error);
934 }
935 #endif
936 
937 /*
938  * MPSAFE
939  */
940 int
941 sendmsg(td, uap)
942 	struct thread *td;
943 	struct sendmsg_args /* {
944 		int	s;
945 		caddr_t	msg;
946 		int	flags;
947 	} */ *uap;
948 {
949 	struct msghdr msg;
950 	struct iovec *iov;
951 	int error;
952 
953 	error = copyin(uap->msg, &msg, sizeof (msg));
954 	if (error)
955 		return (error);
956 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
957 	if (error)
958 		return (error);
959 	msg.msg_iov = iov;
960 #ifdef COMPAT_OLDSOCK
961 	msg.msg_flags = 0;
962 #endif
963 	error = sendit(td, uap->s, &msg, uap->flags);
964 	free(iov, M_IOV);
965 	return (error);
966 }
967 
968 int
969 kern_recvit(td, s, mp, fromseg, controlp)
970 	struct thread *td;
971 	int s;
972 	struct msghdr *mp;
973 	enum uio_seg fromseg;
974 	struct mbuf **controlp;
975 {
976 	struct uio auio;
977 	struct iovec *iov;
978 	int i;
979 	socklen_t len;
980 	int error;
981 	struct mbuf *m, *control = 0;
982 	caddr_t ctlbuf;
983 	struct file *fp;
984 	struct socket *so;
985 	struct sockaddr *fromsa = 0;
986 #ifdef KTRACE
987 	struct uio *ktruio = NULL;
988 #endif
989 
990 	if(controlp != NULL)
991 		*controlp = 0;
992 
993 	NET_LOCK_GIANT();
994 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
995 	if (error) {
996 		NET_UNLOCK_GIANT();
997 		return (error);
998 	}
999 	so = fp->f_data;
1000 
1001 #ifdef MAC
1002 	SOCK_LOCK(so);
1003 	error = mac_check_socket_receive(td->td_ucred, so);
1004 	SOCK_UNLOCK(so);
1005 	if (error) {
1006 		fdrop(fp, td);
1007 		NET_UNLOCK_GIANT();
1008 		return (error);
1009 	}
1010 #endif
1011 
1012 	auio.uio_iov = mp->msg_iov;
1013 	auio.uio_iovcnt = mp->msg_iovlen;
1014 	auio.uio_segflg = UIO_USERSPACE;
1015 	auio.uio_rw = UIO_READ;
1016 	auio.uio_td = td;
1017 	auio.uio_offset = 0;			/* XXX */
1018 	auio.uio_resid = 0;
1019 	iov = mp->msg_iov;
1020 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
1021 		if ((auio.uio_resid += iov->iov_len) < 0) {
1022 			fdrop(fp, td);
1023 			NET_UNLOCK_GIANT();
1024 			return (EINVAL);
1025 		}
1026 	}
1027 #ifdef KTRACE
1028 	if (KTRPOINT(td, KTR_GENIO))
1029 		ktruio = cloneuio(&auio);
1030 #endif
1031 	len = auio.uio_resid;
1032 	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
1033 	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
1034 	    &mp->msg_flags);
1035 	if (error) {
1036 		if (auio.uio_resid != (int)len && (error == ERESTART ||
1037 		    error == EINTR || error == EWOULDBLOCK))
1038 			error = 0;
1039 	}
1040 #ifdef KTRACE
1041 	if (ktruio != NULL) {
1042 		ktruio->uio_resid = (int)len - auio.uio_resid;
1043 		ktrgenio(s, UIO_READ, ktruio, error);
1044 	}
1045 #endif
1046 	if (error)
1047 		goto out;
1048 	td->td_retval[0] = (int)len - auio.uio_resid;
1049 	if (mp->msg_name) {
1050 		len = mp->msg_namelen;
1051 		if (len <= 0 || fromsa == 0)
1052 			len = 0;
1053 		else {
1054 			/* save sa_len before it is destroyed by MSG_COMPAT */
1055 			len = MIN(len, fromsa->sa_len);
1056 #ifdef COMPAT_OLDSOCK
1057 			if (mp->msg_flags & MSG_COMPAT)
1058 				((struct osockaddr *)fromsa)->sa_family =
1059 				    fromsa->sa_family;
1060 #endif
1061 			if (fromseg == UIO_USERSPACE) {
1062 				error = copyout(fromsa, mp->msg_name,
1063 				    (unsigned)len);
1064 				if (error)
1065 					goto out;
1066 			} else
1067 				bcopy(fromsa, mp->msg_name, len);
1068 		}
1069 		mp->msg_namelen = len;
1070 	}
1071 	if (mp->msg_control && controlp == NULL) {
1072 #ifdef COMPAT_OLDSOCK
1073 		/*
1074 		 * We assume that old recvmsg calls won't receive access
1075 		 * rights and other control info, esp. as control info
1076 		 * is always optional and those options didn't exist in 4.3.
1077 		 * If we receive rights, trim the cmsghdr; anything else
1078 		 * is tossed.
1079 		 */
1080 		if (control && mp->msg_flags & MSG_COMPAT) {
1081 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1082 			    SOL_SOCKET ||
1083 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1084 			    SCM_RIGHTS) {
1085 				mp->msg_controllen = 0;
1086 				goto out;
1087 			}
1088 			control->m_len -= sizeof (struct cmsghdr);
1089 			control->m_data += sizeof (struct cmsghdr);
1090 		}
1091 #endif
1092 		len = mp->msg_controllen;
1093 		m = control;
1094 		mp->msg_controllen = 0;
1095 		ctlbuf = mp->msg_control;
1096 
1097 		while (m && len > 0) {
1098 			unsigned int tocopy;
1099 
1100 			if (len >= m->m_len)
1101 				tocopy = m->m_len;
1102 			else {
1103 				mp->msg_flags |= MSG_CTRUNC;
1104 				tocopy = len;
1105 			}
1106 
1107 			if ((error = copyout(mtod(m, caddr_t),
1108 					ctlbuf, tocopy)) != 0)
1109 				goto out;
1110 
1111 			ctlbuf += tocopy;
1112 			len -= tocopy;
1113 			m = m->m_next;
1114 		}
1115 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1116 	}
1117 out:
1118 	fdrop(fp, td);
1119 	NET_UNLOCK_GIANT();
1120 	if (fromsa)
1121 		FREE(fromsa, M_SONAME);
1122 
1123 	if (error == 0 && controlp != NULL)
1124 		*controlp = control;
1125 	else  if (control)
1126 		m_freem(control);
1127 
1128 	return (error);
1129 }
1130 
1131 static int
1132 recvit(td, s, mp, namelenp)
1133 	struct thread *td;
1134 	int s;
1135 	struct msghdr *mp;
1136 	void *namelenp;
1137 {
1138 	int error;
1139 
1140 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1141 	if (error)
1142 		return (error);
1143 	if (namelenp) {
1144 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1145 #ifdef COMPAT_OLDSOCK
1146 		if (mp->msg_flags & MSG_COMPAT)
1147 			error = 0;	/* old recvfrom didn't check */
1148 #endif
1149 	}
1150 	return (error);
1151 }
1152 
1153 /*
1154  * MPSAFE
1155  */
1156 int
1157 recvfrom(td, uap)
1158 	struct thread *td;
1159 	register struct recvfrom_args /* {
1160 		int	s;
1161 		caddr_t	buf;
1162 		size_t	len;
1163 		int	flags;
1164 		struct sockaddr * __restrict	from;
1165 		socklen_t * __restrict fromlenaddr;
1166 	} */ *uap;
1167 {
1168 	struct msghdr msg;
1169 	struct iovec aiov;
1170 	int error;
1171 
1172 	if (uap->fromlenaddr) {
1173 		error = copyin(uap->fromlenaddr,
1174 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1175 		if (error)
1176 			goto done2;
1177 	} else {
1178 		msg.msg_namelen = 0;
1179 	}
1180 	msg.msg_name = uap->from;
1181 	msg.msg_iov = &aiov;
1182 	msg.msg_iovlen = 1;
1183 	aiov.iov_base = uap->buf;
1184 	aiov.iov_len = uap->len;
1185 	msg.msg_control = 0;
1186 	msg.msg_flags = uap->flags;
1187 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1188 done2:
1189 	return(error);
1190 }
1191 
1192 #ifdef COMPAT_OLDSOCK
1193 /*
1194  * MPSAFE
1195  */
1196 int
1197 orecvfrom(td, uap)
1198 	struct thread *td;
1199 	struct recvfrom_args *uap;
1200 {
1201 
1202 	uap->flags |= MSG_COMPAT;
1203 	return (recvfrom(td, uap));
1204 }
1205 #endif
1206 
1207 
1208 #ifdef COMPAT_OLDSOCK
1209 /*
1210  * MPSAFE
1211  */
1212 int
1213 orecv(td, uap)
1214 	struct thread *td;
1215 	register struct orecv_args /* {
1216 		int	s;
1217 		caddr_t	buf;
1218 		int	len;
1219 		int	flags;
1220 	} */ *uap;
1221 {
1222 	struct msghdr msg;
1223 	struct iovec aiov;
1224 	int error;
1225 
1226 	msg.msg_name = 0;
1227 	msg.msg_namelen = 0;
1228 	msg.msg_iov = &aiov;
1229 	msg.msg_iovlen = 1;
1230 	aiov.iov_base = uap->buf;
1231 	aiov.iov_len = uap->len;
1232 	msg.msg_control = 0;
1233 	msg.msg_flags = uap->flags;
1234 	error = recvit(td, uap->s, &msg, NULL);
1235 	return (error);
1236 }
1237 
1238 /*
1239  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1240  * overlays the new one, missing only the flags, and with the (old) access
1241  * rights where the control fields are now.
1242  *
1243  * MPSAFE
1244  */
1245 int
1246 orecvmsg(td, uap)
1247 	struct thread *td;
1248 	struct orecvmsg_args /* {
1249 		int	s;
1250 		struct	omsghdr *msg;
1251 		int	flags;
1252 	} */ *uap;
1253 {
1254 	struct msghdr msg;
1255 	struct iovec *iov;
1256 	int error;
1257 
1258 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1259 	if (error)
1260 		return (error);
1261 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1262 	if (error)
1263 		return (error);
1264 	msg.msg_flags = uap->flags | MSG_COMPAT;
1265 	msg.msg_iov = iov;
1266 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1267 	if (msg.msg_controllen && error == 0)
1268 		error = copyout(&msg.msg_controllen,
1269 		    &uap->msg->msg_accrightslen, sizeof (int));
1270 	free(iov, M_IOV);
1271 	return (error);
1272 }
1273 #endif
1274 
1275 /*
1276  * MPSAFE
1277  */
1278 int
1279 recvmsg(td, uap)
1280 	struct thread *td;
1281 	struct recvmsg_args /* {
1282 		int	s;
1283 		struct	msghdr *msg;
1284 		int	flags;
1285 	} */ *uap;
1286 {
1287 	struct msghdr msg;
1288 	struct iovec *uiov, *iov;
1289 	int error;
1290 
1291 	error = copyin(uap->msg, &msg, sizeof (msg));
1292 	if (error)
1293 		return (error);
1294 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1295 	if (error)
1296 		return (error);
1297 	msg.msg_flags = uap->flags;
1298 #ifdef COMPAT_OLDSOCK
1299 	msg.msg_flags &= ~MSG_COMPAT;
1300 #endif
1301 	uiov = msg.msg_iov;
1302 	msg.msg_iov = iov;
1303 	error = recvit(td, uap->s, &msg, NULL);
1304 	if (error == 0) {
1305 		msg.msg_iov = uiov;
1306 		error = copyout(&msg, uap->msg, sizeof(msg));
1307 	}
1308 	free(iov, M_IOV);
1309 	return (error);
1310 }
1311 
1312 /*
1313  * MPSAFE
1314  */
1315 /* ARGSUSED */
1316 int
1317 shutdown(td, uap)
1318 	struct thread *td;
1319 	register struct shutdown_args /* {
1320 		int	s;
1321 		int	how;
1322 	} */ *uap;
1323 {
1324 	struct socket *so;
1325 	struct file *fp;
1326 	int error;
1327 
1328 	NET_LOCK_GIANT();
1329 	error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1330 	if (error == 0) {
1331 		so = fp->f_data;
1332 		error = soshutdown(so, uap->how);
1333 		fdrop(fp, td);
1334 	}
1335 	NET_UNLOCK_GIANT();
1336 	return (error);
1337 }
1338 
1339 /*
1340  * MPSAFE
1341  */
1342 /* ARGSUSED */
1343 int
1344 setsockopt(td, uap)
1345 	struct thread *td;
1346 	register struct setsockopt_args /* {
1347 		int	s;
1348 		int	level;
1349 		int	name;
1350 		caddr_t	val;
1351 		int	valsize;
1352 	} */ *uap;
1353 {
1354 
1355 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1356 	    uap->val, UIO_USERSPACE, uap->valsize));
1357 }
1358 
1359 int
1360 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1361 	struct thread *td;
1362 	int s;
1363 	int level;
1364 	int name;
1365 	void *val;
1366 	enum uio_seg valseg;
1367 	socklen_t valsize;
1368 {
1369 	int error;
1370 	struct socket *so;
1371 	struct file *fp;
1372 	struct sockopt sopt;
1373 
1374 	if (val == NULL && valsize != 0)
1375 		return (EFAULT);
1376 	if ((int)valsize < 0)
1377 		return (EINVAL);
1378 
1379 	sopt.sopt_dir = SOPT_SET;
1380 	sopt.sopt_level = level;
1381 	sopt.sopt_name = name;
1382 	sopt.sopt_val = val;
1383 	sopt.sopt_valsize = valsize;
1384 	switch (valseg) {
1385 	case UIO_USERSPACE:
1386 		sopt.sopt_td = td;
1387 		break;
1388 	case UIO_SYSSPACE:
1389 		sopt.sopt_td = NULL;
1390 		break;
1391 	default:
1392 		panic("kern_setsockopt called with bad valseg");
1393 	}
1394 
1395 	NET_LOCK_GIANT();
1396 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1397 	if (error == 0) {
1398 		so = fp->f_data;
1399 		error = sosetopt(so, &sopt);
1400 		fdrop(fp, td);
1401 	}
1402 	NET_UNLOCK_GIANT();
1403 	return(error);
1404 }
1405 
1406 /*
1407  * MPSAFE
1408  */
1409 /* ARGSUSED */
1410 int
1411 getsockopt(td, uap)
1412 	struct thread *td;
1413 	register struct getsockopt_args /* {
1414 		int	s;
1415 		int	level;
1416 		int	name;
1417 		void * __restrict	val;
1418 		socklen_t * __restrict avalsize;
1419 	} */ *uap;
1420 {
1421 	socklen_t valsize;
1422 	int	error;
1423 
1424 	if (uap->val) {
1425 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1426 		if (error)
1427 			return (error);
1428 	}
1429 
1430 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1431 	    uap->val, UIO_USERSPACE, &valsize);
1432 
1433 	if (error == 0)
1434 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1435 	return (error);
1436 }
1437 
1438 /*
1439  * Kernel version of getsockopt.
1440  * optval can be a userland or userspace. optlen is always a kernel pointer.
1441  */
1442 int
1443 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1444 	struct thread *td;
1445 	int s;
1446 	int level;
1447 	int name;
1448 	void *val;
1449 	enum uio_seg valseg;
1450 	socklen_t *valsize;
1451 {
1452 	int error;
1453 	struct  socket *so;
1454 	struct file *fp;
1455 	struct	sockopt sopt;
1456 
1457 	if (val == NULL)
1458 		*valsize = 0;
1459 	if ((int)*valsize < 0)
1460 		return (EINVAL);
1461 
1462 	sopt.sopt_dir = SOPT_GET;
1463 	sopt.sopt_level = level;
1464 	sopt.sopt_name = name;
1465 	sopt.sopt_val = val;
1466 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1467 	switch (valseg) {
1468 	case UIO_USERSPACE:
1469 		sopt.sopt_td = td;
1470 		break;
1471 	case UIO_SYSSPACE:
1472 		sopt.sopt_td = NULL;
1473 		break;
1474 	default:
1475 		panic("kern_getsockopt called with bad valseg");
1476 	}
1477 
1478 	NET_LOCK_GIANT();
1479 	error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1480 	if (error == 0) {
1481 		so = fp->f_data;
1482 		error = sogetopt(so, &sopt);
1483 		*valsize = sopt.sopt_valsize;
1484 		fdrop(fp, td);
1485 	}
1486 	NET_UNLOCK_GIANT();
1487 	return (error);
1488 }
1489 
1490 /*
1491  * getsockname1() - Get socket name.
1492  *
1493  * MPSAFE
1494  */
1495 /* ARGSUSED */
1496 static int
1497 getsockname1(td, uap, compat)
1498 	struct thread *td;
1499 	register struct getsockname_args /* {
1500 		int	fdes;
1501 		struct sockaddr * __restrict asa;
1502 		socklen_t * __restrict alen;
1503 	} */ *uap;
1504 	int compat;
1505 {
1506 	struct sockaddr *sa;
1507 	socklen_t len;
1508 	int error;
1509 
1510 	error = copyin(uap->alen, &len, sizeof(len));
1511 	if (error)
1512 		return (error);
1513 
1514 	error = kern_getsockname(td, uap->fdes, &sa, &len);
1515 	if (error)
1516 		return (error);
1517 
1518 	if (len != 0) {
1519 #ifdef COMPAT_OLDSOCK
1520 		if (compat)
1521 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1522 #endif
1523 		error = copyout(sa, uap->asa, (u_int)len);
1524 	}
1525 	free(sa, M_SONAME);
1526 	if (error == 0)
1527 		error = copyout(&len, uap->alen, sizeof(len));
1528 	return (error);
1529 }
1530 
1531 int
1532 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1533     socklen_t *alen)
1534 {
1535 	struct socket *so;
1536 	struct file *fp;
1537 	socklen_t len;
1538 	int error;
1539 
1540 	if (*alen < 0)
1541 		return (EINVAL);
1542 
1543 	NET_LOCK_GIANT();
1544 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1545 	if (error)
1546 		goto done;
1547 	so = fp->f_data;
1548 	*sa = NULL;
1549 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1550 	if (error)
1551 		goto bad;
1552 	if (*sa == NULL)
1553 		len = 0;
1554 	else
1555 		len = MIN(*alen, (*sa)->sa_len);
1556 	*alen = len;
1557 bad:
1558 	fdrop(fp, td);
1559 	if (error && *sa) {
1560 		free(*sa, M_SONAME);
1561 		*sa = NULL;
1562 	}
1563 done:
1564 	NET_UNLOCK_GIANT();
1565 	return (error);
1566 }
1567 
1568 /*
1569  * MPSAFE
1570  */
1571 int
1572 getsockname(td, uap)
1573 	struct thread *td;
1574 	struct getsockname_args *uap;
1575 {
1576 
1577 	return (getsockname1(td, uap, 0));
1578 }
1579 
1580 #ifdef COMPAT_OLDSOCK
1581 /*
1582  * MPSAFE
1583  */
1584 int
1585 ogetsockname(td, uap)
1586 	struct thread *td;
1587 	struct getsockname_args *uap;
1588 {
1589 
1590 	return (getsockname1(td, uap, 1));
1591 }
1592 #endif /* COMPAT_OLDSOCK */
1593 
1594 /*
1595  * getpeername1() - Get name of peer for connected socket.
1596  *
1597  * MPSAFE
1598  */
1599 /* ARGSUSED */
1600 static int
1601 getpeername1(td, uap, compat)
1602 	struct thread *td;
1603 	register struct getpeername_args /* {
1604 		int	fdes;
1605 		struct sockaddr * __restrict	asa;
1606 		socklen_t * __restrict	alen;
1607 	} */ *uap;
1608 	int compat;
1609 {
1610 	struct sockaddr *sa;
1611 	socklen_t len;
1612 	int error;
1613 
1614 	error = copyin(uap->alen, &len, sizeof (len));
1615 	if (error)
1616 		return (error);
1617 
1618 	error = kern_getpeername(td, uap->fdes, &sa, &len);
1619 	if (error)
1620 		return (error);
1621 
1622 	if (len != 0) {
1623 #ifdef COMPAT_OLDSOCK
1624 		if (compat)
1625 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
1626 #endif
1627 		error = copyout(sa, uap->asa, (u_int)len);
1628 	}
1629 	free(sa, M_SONAME);
1630 	if (error == 0)
1631 		error = copyout(&len, uap->alen, sizeof(len));
1632 	return (error);
1633 }
1634 
1635 int
1636 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1637     socklen_t *alen)
1638 {
1639 	struct socket *so;
1640 	struct file *fp;
1641 	socklen_t len;
1642 	int error;
1643 
1644 	if (*alen < 0)
1645 		return (EINVAL);
1646 
1647 	NET_LOCK_GIANT();
1648 	error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1649 	if (error)
1650 		goto done2;
1651 	so = fp->f_data;
1652 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1653 		error = ENOTCONN;
1654 		goto done1;
1655 	}
1656 	*sa = NULL;
1657 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1658 	if (error)
1659 		goto bad;
1660 	if (*sa == NULL)
1661 		len = 0;
1662 	else
1663 		len = MIN(*alen, (*sa)->sa_len);
1664 	*alen = len;
1665 bad:
1666 	if (error && *sa) {
1667 		free(*sa, M_SONAME);
1668 		*sa = NULL;
1669 	}
1670 done1:
1671 	fdrop(fp, td);
1672 done2:
1673 	NET_UNLOCK_GIANT();
1674 	return (error);
1675 }
1676 
1677 /*
1678  * MPSAFE
1679  */
1680 int
1681 getpeername(td, uap)
1682 	struct thread *td;
1683 	struct getpeername_args *uap;
1684 {
1685 
1686 	return (getpeername1(td, uap, 0));
1687 }
1688 
1689 #ifdef COMPAT_OLDSOCK
1690 /*
1691  * MPSAFE
1692  */
1693 int
1694 ogetpeername(td, uap)
1695 	struct thread *td;
1696 	struct ogetpeername_args *uap;
1697 {
1698 
1699 	/* XXX uap should have type `getpeername_args *' to begin with. */
1700 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1701 }
1702 #endif /* COMPAT_OLDSOCK */
1703 
1704 int
1705 sockargs(mp, buf, buflen, type)
1706 	struct mbuf **mp;
1707 	caddr_t buf;
1708 	int buflen, type;
1709 {
1710 	register struct sockaddr *sa;
1711 	register struct mbuf *m;
1712 	int error;
1713 
1714 	if ((u_int)buflen > MLEN) {
1715 #ifdef COMPAT_OLDSOCK
1716 		if (type == MT_SONAME && (u_int)buflen <= 112)
1717 			buflen = MLEN;		/* unix domain compat. hack */
1718 		else
1719 #endif
1720 			if ((u_int)buflen > MCLBYTES)
1721 				return (EINVAL);
1722 	}
1723 	m = m_get(M_TRYWAIT, type);
1724 	if (m == NULL)
1725 		return (ENOBUFS);
1726 	if ((u_int)buflen > MLEN) {
1727 		MCLGET(m, M_TRYWAIT);
1728 		if ((m->m_flags & M_EXT) == 0) {
1729 			m_free(m);
1730 			return (ENOBUFS);
1731 		}
1732 	}
1733 	m->m_len = buflen;
1734 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1735 	if (error)
1736 		(void) m_free(m);
1737 	else {
1738 		*mp = m;
1739 		if (type == MT_SONAME) {
1740 			sa = mtod(m, struct sockaddr *);
1741 
1742 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1743 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1744 				sa->sa_family = sa->sa_len;
1745 #endif
1746 			sa->sa_len = buflen;
1747 		}
1748 	}
1749 	return (error);
1750 }
1751 
1752 int
1753 getsockaddr(namp, uaddr, len)
1754 	struct sockaddr **namp;
1755 	caddr_t uaddr;
1756 	size_t len;
1757 {
1758 	struct sockaddr *sa;
1759 	int error;
1760 
1761 	if (len > SOCK_MAXADDRLEN)
1762 		return (ENAMETOOLONG);
1763 	if (len < offsetof(struct sockaddr, sa_data[0]))
1764 		return (EINVAL);
1765 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1766 	error = copyin(uaddr, sa, len);
1767 	if (error) {
1768 		FREE(sa, M_SONAME);
1769 	} else {
1770 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1771 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1772 			sa->sa_family = sa->sa_len;
1773 #endif
1774 		sa->sa_len = len;
1775 		*namp = sa;
1776 	}
1777 	return (error);
1778 }
1779 
1780 /*
1781  * Detach mapped page and release resources back to the system.
1782  */
1783 void
1784 sf_buf_mext(void *addr, void *args)
1785 {
1786 	vm_page_t m;
1787 
1788 	m = sf_buf_page(args);
1789 	sf_buf_free(args);
1790 	vm_page_lock_queues();
1791 	vm_page_unwire(m, 0);
1792 	/*
1793 	 * Check for the object going away on us. This can
1794 	 * happen since we don't hold a reference to it.
1795 	 * If so, we're responsible for freeing the page.
1796 	 */
1797 	if (m->wire_count == 0 && m->object == NULL)
1798 		vm_page_free(m);
1799 	vm_page_unlock_queues();
1800 }
1801 
1802 /*
1803  * sendfile(2)
1804  *
1805  * MPSAFE
1806  *
1807  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1808  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1809  *
1810  * Send a file specified by 'fd' and starting at 'offset' to a socket
1811  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1812  * nbytes == 0. Optionally add a header and/or trailer to the socket
1813  * output. If specified, write the total number of bytes sent into *sbytes.
1814  *
1815  */
1816 int
1817 sendfile(struct thread *td, struct sendfile_args *uap)
1818 {
1819 
1820 	return (do_sendfile(td, uap, 0));
1821 }
1822 
1823 static int
1824 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1825 {
1826 	struct sf_hdtr hdtr;
1827 	struct uio *hdr_uio, *trl_uio;
1828 	int error;
1829 
1830 	hdr_uio = trl_uio = NULL;
1831 
1832 	if (uap->hdtr != NULL) {
1833 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1834 		if (error)
1835 			goto out;
1836 		if (hdtr.headers != NULL) {
1837 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1838 			if (error)
1839 				goto out;
1840 		}
1841 		if (hdtr.trailers != NULL) {
1842 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1843 			if (error)
1844 				goto out;
1845 
1846 		}
1847 	}
1848 
1849 	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1850 out:
1851 	if (hdr_uio)
1852 		free(hdr_uio, M_IOV);
1853 	if (trl_uio)
1854 		free(trl_uio, M_IOV);
1855 	return (error);
1856 }
1857 
1858 #ifdef COMPAT_FREEBSD4
1859 int
1860 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1861 {
1862 	struct sendfile_args args;
1863 
1864 	args.fd = uap->fd;
1865 	args.s = uap->s;
1866 	args.offset = uap->offset;
1867 	args.nbytes = uap->nbytes;
1868 	args.hdtr = uap->hdtr;
1869 	args.sbytes = uap->sbytes;
1870 	args.flags = uap->flags;
1871 
1872 	return (do_sendfile(td, &args, 1));
1873 }
1874 #endif /* COMPAT_FREEBSD4 */
1875 
1876 int
1877 kern_sendfile(struct thread *td, struct sendfile_args *uap,
1878     struct uio *hdr_uio, struct uio *trl_uio, int compat)
1879 {
1880 	struct file *sock_fp;
1881 	struct vnode *vp;
1882 	struct vm_object *obj = NULL;
1883 	struct socket *so = NULL;
1884 	struct mbuf *m, *m_header = NULL;
1885 	struct sf_buf *sf;
1886 	struct vm_page *pg;
1887 	off_t off, xfsize, hdtr_size, sbytes = 0;
1888 	int error, headersize = 0, headersent = 0;
1889 	int vfslocked;
1890 
1891 	NET_LOCK_GIANT();
1892 
1893 	hdtr_size = 0;
1894 
1895 	/*
1896 	 * The descriptor must be a regular file and have a backing VM object.
1897 	 */
1898 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1899 		goto done;
1900 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1901 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1902 	obj = vp->v_object;
1903 	if (obj != NULL) {
1904 		/*
1905 		 * Temporarily increase the backing VM object's reference
1906 		 * count so that a forced reclamation of its vnode does not
1907 		 * immediately destroy it.
1908 		 */
1909 		VM_OBJECT_LOCK(obj);
1910 		if ((obj->flags & OBJ_DEAD) == 0) {
1911 			vm_object_reference_locked(obj);
1912 			VM_OBJECT_UNLOCK(obj);
1913 		} else {
1914 			VM_OBJECT_UNLOCK(obj);
1915 			obj = NULL;
1916 		}
1917 	}
1918 	VOP_UNLOCK(vp, 0, td);
1919 	VFS_UNLOCK_GIANT(vfslocked);
1920 	if (obj == NULL) {
1921 		error = EINVAL;
1922 		goto done;
1923 	}
1924 	if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0)
1925 		goto done;
1926 	so = sock_fp->f_data;
1927 	if (so->so_type != SOCK_STREAM) {
1928 		error = EINVAL;
1929 		goto done;
1930 	}
1931 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1932 		error = ENOTCONN;
1933 		goto done;
1934 	}
1935 	if (uap->offset < 0) {
1936 		error = EINVAL;
1937 		goto done;
1938 	}
1939 
1940 #ifdef MAC
1941 	SOCK_LOCK(so);
1942 	error = mac_check_socket_send(td->td_ucred, so);
1943 	SOCK_UNLOCK(so);
1944 	if (error)
1945 		goto done;
1946 #endif
1947 
1948 	/*
1949 	 * If specified, get the pointer to the sf_hdtr struct for
1950 	 * any headers/trailers.
1951 	 */
1952 	if (hdr_uio != NULL) {
1953 		hdr_uio->uio_td = td;
1954 		hdr_uio->uio_rw = UIO_WRITE;
1955 		if (hdr_uio->uio_resid > 0) {
1956 			m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1957 			if (m_header == NULL)
1958 				goto done;
1959 			headersize = m_header->m_pkthdr.len;
1960 			if (compat)
1961 				sbytes += headersize;
1962 		}
1963 	}
1964 
1965 	/*
1966 	 * Protect against multiple writers to the socket.
1967 	 */
1968 	SOCKBUF_LOCK(&so->so_snd);
1969 	(void) sblock(&so->so_snd, M_WAITOK);
1970 	SOCKBUF_UNLOCK(&so->so_snd);
1971 
1972 	/*
1973 	 * Loop through the pages in the file, starting with the requested
1974 	 * offset. Get a file page (do I/O if necessary), map the file page
1975 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1976 	 * it on the socket.
1977 	 */
1978 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1979 		vm_pindex_t pindex;
1980 		vm_offset_t pgoff;
1981 
1982 		pindex = OFF_TO_IDX(off);
1983 		VM_OBJECT_LOCK(obj);
1984 retry_lookup:
1985 		/*
1986 		 * Calculate the amount to transfer. Not to exceed a page,
1987 		 * the EOF, or the passed in nbytes.
1988 		 */
1989 		xfsize = obj->un_pager.vnp.vnp_size - off;
1990 		VM_OBJECT_UNLOCK(obj);
1991 		if (xfsize > PAGE_SIZE)
1992 			xfsize = PAGE_SIZE;
1993 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1994 		if (PAGE_SIZE - pgoff < xfsize)
1995 			xfsize = PAGE_SIZE - pgoff;
1996 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1997 			xfsize = uap->nbytes - sbytes;
1998 		if (xfsize <= 0) {
1999 			if (m_header != NULL) {
2000 				m = m_header;
2001 				m_header = NULL;
2002 				SOCKBUF_LOCK(&so->so_snd);
2003 				goto retry_space;
2004 			} else
2005 				break;
2006 		}
2007 		/*
2008 		 * Optimize the non-blocking case by looking at the socket space
2009 		 * before going to the extra work of constituting the sf_buf.
2010 		 */
2011 		SOCKBUF_LOCK(&so->so_snd);
2012 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
2013 			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
2014 				error = EPIPE;
2015 			else
2016 				error = EAGAIN;
2017 			sbunlock(&so->so_snd);
2018 			SOCKBUF_UNLOCK(&so->so_snd);
2019 			goto done;
2020 		}
2021 		SOCKBUF_UNLOCK(&so->so_snd);
2022 		VM_OBJECT_LOCK(obj);
2023 		/*
2024 		 * Attempt to look up the page.
2025 		 *
2026 		 *	Allocate if not found
2027 		 *
2028 		 *	Wait and loop if busy.
2029 		 */
2030 		pg = vm_page_lookup(obj, pindex);
2031 
2032 		if (pg == NULL) {
2033 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
2034 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
2035 			if (pg == NULL) {
2036 				VM_OBJECT_UNLOCK(obj);
2037 				VM_WAIT;
2038 				VM_OBJECT_LOCK(obj);
2039 				goto retry_lookup;
2040 			}
2041 		} else {
2042 			vm_page_lock_queues();
2043 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
2044 				goto retry_lookup;
2045 			/*
2046 			 * Wire the page so it does not get ripped out from
2047 			 * under us.
2048 			 */
2049 			vm_page_wire(pg);
2050 			vm_page_unlock_queues();
2051 		}
2052 
2053 		/*
2054 		 * If page is not valid for what we need, initiate I/O
2055 		 */
2056 
2057 		if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
2058 			VM_OBJECT_UNLOCK(obj);
2059 		} else if (uap->flags & SF_NODISKIO) {
2060 			error = EBUSY;
2061 		} else {
2062 			int bsize, resid;
2063 
2064 			/*
2065 			 * Ensure that our page is still around when the I/O
2066 			 * completes.
2067 			 */
2068 			vm_page_io_start(pg);
2069 			VM_OBJECT_UNLOCK(obj);
2070 
2071 			/*
2072 			 * Get the page from backing store.
2073 			 */
2074 			bsize = vp->v_mount->mnt_stat.f_iosize;
2075 			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2076 			vn_lock(vp, LK_SHARED | LK_RETRY, td);
2077 			/*
2078 			 * XXXMAC: Because we don't have fp->f_cred here,
2079 			 * we pass in NOCRED.  This is probably wrong, but
2080 			 * is consistent with our original implementation.
2081 			 */
2082 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2083 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2084 			    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2085 			    td->td_ucred, NOCRED, &resid, td);
2086 			VOP_UNLOCK(vp, 0, td);
2087 			VFS_UNLOCK_GIANT(vfslocked);
2088 			VM_OBJECT_LOCK(obj);
2089 			vm_page_lock_queues();
2090 			vm_page_io_finish(pg);
2091 			vm_page_unlock_queues();
2092 			if (!error)
2093 				VM_OBJECT_UNLOCK(obj);
2094 			mbstat.sf_iocnt++;
2095 		}
2096 
2097 		if (error) {
2098 			vm_page_lock_queues();
2099 			vm_page_unwire(pg, 0);
2100 			/*
2101 			 * See if anyone else might know about this page.
2102 			 * If not and it is not valid, then free it.
2103 			 */
2104 			if (pg->wire_count == 0 && pg->valid == 0 &&
2105 			    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
2106 			    pg->hold_count == 0) {
2107 				vm_page_free(pg);
2108 			}
2109 			vm_page_unlock_queues();
2110 			VM_OBJECT_UNLOCK(obj);
2111 			SOCKBUF_LOCK(&so->so_snd);
2112 			sbunlock(&so->so_snd);
2113 			SOCKBUF_UNLOCK(&so->so_snd);
2114 			goto done;
2115 		}
2116 
2117 		/*
2118 		 * Get a sendfile buf. We usually wait as long as necessary,
2119 		 * but this wait can be interrupted.
2120 		 */
2121 		if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
2122 			mbstat.sf_allocfail++;
2123 			vm_page_lock_queues();
2124 			vm_page_unwire(pg, 0);
2125 			if (pg->wire_count == 0 && pg->object == NULL)
2126 				vm_page_free(pg);
2127 			vm_page_unlock_queues();
2128 			SOCKBUF_LOCK(&so->so_snd);
2129 			sbunlock(&so->so_snd);
2130 			SOCKBUF_UNLOCK(&so->so_snd);
2131 			error = EINTR;
2132 			goto done;
2133 		}
2134 
2135 		/*
2136 		 * Get an mbuf header and set it up as having external storage.
2137 		 */
2138 		if (m_header)
2139 			MGET(m, M_TRYWAIT, MT_DATA);
2140 		else
2141 			MGETHDR(m, M_TRYWAIT, MT_DATA);
2142 		if (m == NULL) {
2143 			error = ENOBUFS;
2144 			sf_buf_mext((void *)sf_buf_kva(sf), sf);
2145 			SOCKBUF_LOCK(&so->so_snd);
2146 			sbunlock(&so->so_snd);
2147 			SOCKBUF_UNLOCK(&so->so_snd);
2148 			goto done;
2149 		}
2150 		/*
2151 		 * Setup external storage for mbuf.
2152 		 */
2153 		MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2154 		    EXT_SFBUF);
2155 		m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2156 		m->m_pkthdr.len = m->m_len = xfsize;
2157 
2158 		if (m_header) {
2159 			m_cat(m_header, m);
2160 			m = m_header;
2161 			m_header = NULL;
2162 			m_fixhdr(m);
2163 		}
2164 
2165 		/*
2166 		 * Add the buffer to the socket buffer chain.
2167 		 */
2168 		SOCKBUF_LOCK(&so->so_snd);
2169 retry_space:
2170 		/*
2171 		 * Make sure that the socket is still able to take more data.
2172 		 * CANTSENDMORE being true usually means that the connection
2173 		 * was closed. so_error is true when an error was sensed after
2174 		 * a previous send.
2175 		 * The state is checked after the page mapping and buffer
2176 		 * allocation above since those operations may block and make
2177 		 * any socket checks stale. From this point forward, nothing
2178 		 * blocks before the pru_send (or more accurately, any blocking
2179 		 * results in a loop back to here to re-check).
2180 		 */
2181 		SOCKBUF_LOCK_ASSERT(&so->so_snd);
2182 		if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2183 			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2184 				error = EPIPE;
2185 			} else {
2186 				error = so->so_error;
2187 				so->so_error = 0;
2188 			}
2189 			m_freem(m);
2190 			sbunlock(&so->so_snd);
2191 			SOCKBUF_UNLOCK(&so->so_snd);
2192 			goto done;
2193 		}
2194 		/*
2195 		 * Wait for socket space to become available. We do this just
2196 		 * after checking the connection state above in order to avoid
2197 		 * a race condition with sbwait().
2198 		 */
2199 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2200 			if (so->so_state & SS_NBIO) {
2201 				m_freem(m);
2202 				sbunlock(&so->so_snd);
2203 				SOCKBUF_UNLOCK(&so->so_snd);
2204 				error = EAGAIN;
2205 				goto done;
2206 			}
2207 			error = sbwait(&so->so_snd);
2208 			/*
2209 			 * An error from sbwait usually indicates that we've
2210 			 * been interrupted by a signal. If we've sent anything
2211 			 * then return bytes sent, otherwise return the error.
2212 			 */
2213 			if (error) {
2214 				m_freem(m);
2215 				sbunlock(&so->so_snd);
2216 				SOCKBUF_UNLOCK(&so->so_snd);
2217 				goto done;
2218 			}
2219 			goto retry_space;
2220 		}
2221 		SOCKBUF_UNLOCK(&so->so_snd);
2222 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2223 		if (error) {
2224 			SOCKBUF_LOCK(&so->so_snd);
2225 			sbunlock(&so->so_snd);
2226 			SOCKBUF_UNLOCK(&so->so_snd);
2227 			goto done;
2228 		}
2229 		headersent = 1;
2230 	}
2231 	SOCKBUF_LOCK(&so->so_snd);
2232 	sbunlock(&so->so_snd);
2233 	SOCKBUF_UNLOCK(&so->so_snd);
2234 
2235 	/*
2236 	 * Send trailers. Wimp out and use writev(2).
2237 	 */
2238 	if (trl_uio != NULL) {
2239 		error = kern_writev(td, uap->s, trl_uio);
2240 		if (error)
2241 			goto done;
2242 		if (compat)
2243 			sbytes += td->td_retval[0];
2244 		else
2245 			hdtr_size += td->td_retval[0];
2246 	}
2247 
2248 done:
2249 	if (headersent) {
2250 		if (!compat)
2251 			hdtr_size += headersize;
2252 	} else {
2253 		if (compat)
2254 			sbytes -= headersize;
2255 	}
2256 	/*
2257 	 * If there was no error we have to clear td->td_retval[0]
2258 	 * because it may have been set by writev.
2259 	 */
2260 	if (error == 0) {
2261 		td->td_retval[0] = 0;
2262 	}
2263 	if (uap->sbytes != NULL) {
2264 		if (!compat)
2265 			sbytes += hdtr_size;
2266 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2267 	}
2268 	if (obj != NULL)
2269 		vm_object_deallocate(obj);
2270 	if (vp != NULL) {
2271 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2272 		vrele(vp);
2273 		VFS_UNLOCK_GIANT(vfslocked);
2274 	}
2275 	if (so)
2276 		fdrop(sock_fp, td);
2277 	if (m_header)
2278 		m_freem(m_header);
2279 
2280 	NET_UNLOCK_GIANT();
2281 
2282 	if (error == ERESTART)
2283 		error = EINTR;
2284 
2285 	return (error);
2286 }
2287