xref: /freebsd/sys/kern/uipc_syscalls.c (revision 3d11b6c8f01e1fca5936a11d6996448467851a94)
1 /*-
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77 
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80 
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 			int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 			int compat);
87 
88 /*
89  * NSFBUFS-related variables and associated sysctls
90  */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94 
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97     "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99     "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101     "Number of sendfile(2) sf_bufs in use");
102 
103 /*
104  * Convert a user file descriptor to a kernel file entry.  A reference on the
105  * file entry is held upon returning.  This is lighter weight than
106  * fgetsock(), which bumps the socket reference drops the file reference
107  * count instead, as this approach avoids several additional mutex operations
108  * associated with the additional reference count.
109  */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 	struct file *fp;
114 	int error;
115 
116 	fp = NULL;
117 	if (fdp == NULL)
118 		error = EBADF;
119 	else {
120 		FILEDESC_LOCK_FAST(fdp);
121 		fp = fget_locked(fdp, fd);
122 		if (fp == NULL)
123 			error = EBADF;
124 		else if (fp->f_type != DTYPE_SOCKET) {
125 			fp = NULL;
126 			error = ENOTSOCK;
127 		} else {
128 			fhold(fp);
129 			error = 0;
130 		}
131 		FILEDESC_UNLOCK_FAST(fdp);
132 	}
133 	*fpp = fp;
134 	return (error);
135 }
136 
137 /*
138  * System call interface to the socket abstraction.
139  */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143 
144 /*
145  * MPSAFE
146  */
147 int
148 socket(td, uap)
149 	struct thread *td;
150 	register struct socket_args /* {
151 		int	domain;
152 		int	type;
153 		int	protocol;
154 	} */ *uap;
155 {
156 	struct filedesc *fdp;
157 	struct socket *so;
158 	struct file *fp;
159 	int fd, error;
160 
161 #ifdef MAC
162 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
163 	    uap->protocol);
164 	if (error)
165 		return (error);
166 #endif
167 	fdp = td->td_proc->p_fd;
168 	error = falloc(td, &fp, &fd);
169 	if (error)
170 		return (error);
171 	/* An extra reference on `fp' has been held for us by falloc(). */
172 	NET_LOCK_GIANT();
173 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
174 	    td->td_ucred, td);
175 	NET_UNLOCK_GIANT();
176 	if (error) {
177 		fdclose(fdp, fp, fd, td);
178 	} else {
179 		FILEDESC_LOCK_FAST(fdp);
180 		fp->f_data = so;	/* already has ref count */
181 		fp->f_flag = FREAD|FWRITE;
182 		fp->f_ops = &socketops;
183 		fp->f_type = DTYPE_SOCKET;
184 		FILEDESC_UNLOCK_FAST(fdp);
185 		td->td_retval[0] = fd;
186 	}
187 	fdrop(fp, td);
188 	return (error);
189 }
190 
191 /*
192  * MPSAFE
193  */
194 /* ARGSUSED */
195 int
196 bind(td, uap)
197 	struct thread *td;
198 	register struct bind_args /* {
199 		int	s;
200 		caddr_t	name;
201 		int	namelen;
202 	} */ *uap;
203 {
204 	struct sockaddr *sa;
205 	int error;
206 
207 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
208 		return (error);
209 
210 	return (kern_bind(td, uap->s, sa));
211 }
212 
213 int
214 kern_bind(td, fd, sa)
215 	struct thread *td;
216 	int fd;
217 	struct sockaddr *sa;
218 {
219 	struct socket *so;
220 	struct file *fp;
221 	int error;
222 
223 	NET_LOCK_GIANT();
224 	error = getsock(td->td_proc->p_fd, fd, &fp);
225 	if (error)
226 		goto done2;
227 	so = fp->f_data;
228 #ifdef MAC
229 	SOCK_LOCK(so);
230 	error = mac_check_socket_bind(td->td_ucred, so, sa);
231 	SOCK_UNLOCK(so);
232 	if (error)
233 		goto done1;
234 #endif
235 	error = sobind(so, sa, td);
236 #ifdef MAC
237 done1:
238 #endif
239 	fdrop(fp, td);
240 done2:
241 	NET_UNLOCK_GIANT();
242 	FREE(sa, M_SONAME);
243 	return (error);
244 }
245 
246 /*
247  * MPSAFE
248  */
249 /* ARGSUSED */
250 int
251 listen(td, uap)
252 	struct thread *td;
253 	register struct listen_args /* {
254 		int	s;
255 		int	backlog;
256 	} */ *uap;
257 {
258 	struct socket *so;
259 	struct file *fp;
260 	int error;
261 
262 	NET_LOCK_GIANT();
263 	error = getsock(td->td_proc->p_fd, uap->s, &fp);
264 	if (error == 0) {
265 		so = fp->f_data;
266 #ifdef MAC
267 		SOCK_LOCK(so);
268 		error = mac_check_socket_listen(td->td_ucred, so);
269 		SOCK_UNLOCK(so);
270 		if (error)
271 			goto done;
272 #endif
273 		error = solisten(so, uap->backlog, td);
274 #ifdef MAC
275 done:
276 #endif
277 		fdrop(fp, td);
278 	}
279 	NET_UNLOCK_GIANT();
280 	return(error);
281 }
282 
283 /*
284  * accept1()
285  * MPSAFE
286  *
287  * XXXRW: Use getsock() instead of fgetsock() here to avoid additional mutex
288  * operations due to soref()/sorele().
289  */
290 static int
291 accept1(td, uap, compat)
292 	struct thread *td;
293 	register struct accept_args /* {
294 		int	s;
295 		struct sockaddr	* __restrict name;
296 		socklen_t	* __restrict anamelen;
297 	} */ *uap;
298 	int compat;
299 {
300 	struct filedesc *fdp;
301 	struct file *nfp = NULL;
302 	struct sockaddr *sa = NULL;
303 	socklen_t namelen;
304 	int error;
305 	struct socket *head, *so;
306 	int fd;
307 	u_int fflag;
308 	pid_t pgid;
309 	int tmp;
310 
311 	fdp = td->td_proc->p_fd;
312 	if (uap->name) {
313 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
314 		if(error)
315 			return (error);
316 		if (namelen < 0)
317 			return (EINVAL);
318 	}
319 	NET_LOCK_GIANT();
320 	error = fgetsock(td, uap->s, &head, &fflag);
321 	if (error)
322 		goto done2;
323 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
324 		error = EINVAL;
325 		goto done;
326 	}
327 #ifdef MAC
328 	SOCK_LOCK(head);
329 	error = mac_check_socket_accept(td->td_ucred, head);
330 	SOCK_UNLOCK(head);
331 	if (error != 0)
332 		goto done;
333 #endif
334 	error = falloc(td, &nfp, &fd);
335 	if (error)
336 		goto done;
337 	ACCEPT_LOCK();
338 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
339 		ACCEPT_UNLOCK();
340 		error = EWOULDBLOCK;
341 		goto noconnection;
342 	}
343 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
344 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
345 			head->so_error = ECONNABORTED;
346 			break;
347 		}
348 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
349 		    "accept", 0);
350 		if (error) {
351 			ACCEPT_UNLOCK();
352 			goto noconnection;
353 		}
354 	}
355 	if (head->so_error) {
356 		error = head->so_error;
357 		head->so_error = 0;
358 		ACCEPT_UNLOCK();
359 		goto noconnection;
360 	}
361 	so = TAILQ_FIRST(&head->so_comp);
362 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
363 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
364 
365 	/*
366 	 * Before changing the flags on the socket, we have to bump the
367 	 * reference count.  Otherwise, if the protocol calls sofree(),
368 	 * the socket will be released due to a zero refcount.
369 	 */
370 	SOCK_LOCK(so);			/* soref() and so_state update */
371 	soref(so);			/* file descriptor reference */
372 
373 	TAILQ_REMOVE(&head->so_comp, so, so_list);
374 	head->so_qlen--;
375 	so->so_state |= (head->so_state & SS_NBIO);
376 	so->so_qstate &= ~SQ_COMP;
377 	so->so_head = NULL;
378 
379 	SOCK_UNLOCK(so);
380 	ACCEPT_UNLOCK();
381 
382 	/* An extra reference on `nfp' has been held for us by falloc(). */
383 	td->td_retval[0] = fd;
384 
385 	/* connection has been removed from the listen queue */
386 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
387 
388 	pgid = fgetown(&head->so_sigio);
389 	if (pgid != 0)
390 		fsetown(pgid, &so->so_sigio);
391 
392 	FILE_LOCK(nfp);
393 	nfp->f_data = so;	/* nfp has ref count from falloc */
394 	nfp->f_flag = fflag;
395 	nfp->f_ops = &socketops;
396 	nfp->f_type = DTYPE_SOCKET;
397 	FILE_UNLOCK(nfp);
398 	/* Sync socket nonblocking/async state with file flags */
399 	tmp = fflag & FNONBLOCK;
400 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
401 	tmp = fflag & FASYNC;
402 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
403 	sa = 0;
404 	error = soaccept(so, &sa);
405 	if (error) {
406 		/*
407 		 * return a namelen of zero for older code which might
408 		 * ignore the return value from accept.
409 		 */
410 		if (uap->name != NULL) {
411 			namelen = 0;
412 			(void) copyout(&namelen,
413 			    uap->anamelen, sizeof(*uap->anamelen));
414 		}
415 		goto noconnection;
416 	}
417 	if (sa == NULL) {
418 		namelen = 0;
419 		if (uap->name)
420 			goto gotnoname;
421 		error = 0;
422 		goto done;
423 	}
424 	if (uap->name) {
425 		/* check sa_len before it is destroyed */
426 		if (namelen > sa->sa_len)
427 			namelen = sa->sa_len;
428 #ifdef COMPAT_OLDSOCK
429 		if (compat)
430 			((struct osockaddr *)sa)->sa_family =
431 			    sa->sa_family;
432 #endif
433 		error = copyout(sa, uap->name, (u_int)namelen);
434 		if (!error)
435 gotnoname:
436 			error = copyout(&namelen,
437 			    uap->anamelen, sizeof (*uap->anamelen));
438 	}
439 noconnection:
440 	if (sa)
441 		FREE(sa, M_SONAME);
442 
443 	/*
444 	 * close the new descriptor, assuming someone hasn't ripped it
445 	 * out from under us.
446 	 */
447 	if (error)
448 		fdclose(fdp, nfp, fd, td);
449 
450 	/*
451 	 * Release explicitly held references before returning.
452 	 */
453 done:
454 	if (nfp != NULL)
455 		fdrop(nfp, td);
456 	fputsock(head);
457 done2:
458 	NET_UNLOCK_GIANT();
459 	return (error);
460 }
461 
462 /*
463  * MPSAFE (accept1() is MPSAFE)
464  */
465 int
466 accept(td, uap)
467 	struct thread *td;
468 	struct accept_args *uap;
469 {
470 
471 	return (accept1(td, uap, 0));
472 }
473 
474 #ifdef COMPAT_OLDSOCK
475 /*
476  * MPSAFE (accept1() is MPSAFE)
477  */
478 int
479 oaccept(td, uap)
480 	struct thread *td;
481 	struct accept_args *uap;
482 {
483 
484 	return (accept1(td, uap, 1));
485 }
486 #endif /* COMPAT_OLDSOCK */
487 
488 /*
489  * MPSAFE
490  */
491 /* ARGSUSED */
492 int
493 connect(td, uap)
494 	struct thread *td;
495 	register struct connect_args /* {
496 		int	s;
497 		caddr_t	name;
498 		int	namelen;
499 	} */ *uap;
500 {
501 	struct sockaddr *sa;
502 	int error;
503 
504 	error = getsockaddr(&sa, uap->name, uap->namelen);
505 	if (error)
506 		return (error);
507 
508 	return (kern_connect(td, uap->s, sa));
509 }
510 
511 
512 int
513 kern_connect(td, fd, sa)
514 	struct thread *td;
515 	int fd;
516 	struct sockaddr *sa;
517 {
518 	struct socket *so;
519 	struct file *fp;
520 	int error;
521 	int interrupted = 0;
522 
523 	NET_LOCK_GIANT();
524 	error = getsock(td->td_proc->p_fd, fd, &fp);
525 	if (error)
526 		goto done2;
527 	so = fp->f_data;
528 	if (so->so_state & SS_ISCONNECTING) {
529 		error = EALREADY;
530 		goto done1;
531 	}
532 #ifdef MAC
533 	SOCK_LOCK(so);
534 	error = mac_check_socket_connect(td->td_ucred, so, sa);
535 	SOCK_UNLOCK(so);
536 	if (error)
537 		goto bad;
538 #endif
539 	error = soconnect(so, sa, td);
540 	if (error)
541 		goto bad;
542 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
543 		error = EINPROGRESS;
544 		goto done1;
545 	}
546 	SOCK_LOCK(so);
547 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
548 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
549 		    "connec", 0);
550 		if (error) {
551 			if (error == EINTR || error == ERESTART)
552 				interrupted = 1;
553 			break;
554 		}
555 	}
556 	if (error == 0) {
557 		error = so->so_error;
558 		so->so_error = 0;
559 	}
560 	SOCK_UNLOCK(so);
561 bad:
562 	if (!interrupted)
563 		so->so_state &= ~SS_ISCONNECTING;
564 	if (error == ERESTART)
565 		error = EINTR;
566 done1:
567 	fdrop(fp, td);
568 done2:
569 	NET_UNLOCK_GIANT();
570 	FREE(sa, M_SONAME);
571 	return (error);
572 }
573 
574 /*
575  * MPSAFE
576  */
577 int
578 socketpair(td, uap)
579 	struct thread *td;
580 	register struct socketpair_args /* {
581 		int	domain;
582 		int	type;
583 		int	protocol;
584 		int	*rsv;
585 	} */ *uap;
586 {
587 	register struct filedesc *fdp = td->td_proc->p_fd;
588 	struct file *fp1, *fp2;
589 	struct socket *so1, *so2;
590 	int fd, error, sv[2];
591 
592 #ifdef MAC
593 	/* We might want to have a separate check for socket pairs. */
594 	error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
595 	    uap->protocol);
596 	if (error)
597 		return (error);
598 #endif
599 
600 	NET_LOCK_GIANT();
601 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
602 	    td->td_ucred, td);
603 	if (error)
604 		goto done2;
605 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
606 	    td->td_ucred, td);
607 	if (error)
608 		goto free1;
609 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
610 	error = falloc(td, &fp1, &fd);
611 	if (error)
612 		goto free2;
613 	sv[0] = fd;
614 	fp1->f_data = so1;	/* so1 already has ref count */
615 	error = falloc(td, &fp2, &fd);
616 	if (error)
617 		goto free3;
618 	fp2->f_data = so2;	/* so2 already has ref count */
619 	sv[1] = fd;
620 	error = soconnect2(so1, so2);
621 	if (error)
622 		goto free4;
623 	if (uap->type == SOCK_DGRAM) {
624 		/*
625 		 * Datagram socket connection is asymmetric.
626 		 */
627 		 error = soconnect2(so2, so1);
628 		 if (error)
629 			goto free4;
630 	}
631 	FILE_LOCK(fp1);
632 	fp1->f_flag = FREAD|FWRITE;
633 	fp1->f_ops = &socketops;
634 	fp1->f_type = DTYPE_SOCKET;
635 	FILE_UNLOCK(fp1);
636 	FILE_LOCK(fp2);
637 	fp2->f_flag = FREAD|FWRITE;
638 	fp2->f_ops = &socketops;
639 	fp2->f_type = DTYPE_SOCKET;
640 	FILE_UNLOCK(fp2);
641 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
642 	fdrop(fp1, td);
643 	fdrop(fp2, td);
644 	goto done2;
645 free4:
646 	fdclose(fdp, fp2, sv[1], td);
647 	fdrop(fp2, td);
648 free3:
649 	fdclose(fdp, fp1, sv[0], td);
650 	fdrop(fp1, td);
651 free2:
652 	(void)soclose(so2);
653 free1:
654 	(void)soclose(so1);
655 done2:
656 	NET_UNLOCK_GIANT();
657 	return (error);
658 }
659 
660 static int
661 sendit(td, s, mp, flags)
662 	register struct thread *td;
663 	int s;
664 	register struct msghdr *mp;
665 	int flags;
666 {
667 	struct mbuf *control;
668 	struct sockaddr *to;
669 	int error;
670 
671 	if (mp->msg_name != NULL) {
672 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
673 		if (error) {
674 			to = NULL;
675 			goto bad;
676 		}
677 		mp->msg_name = to;
678 	} else {
679 		to = NULL;
680 	}
681 
682 	if (mp->msg_control) {
683 		if (mp->msg_controllen < sizeof(struct cmsghdr)
684 #ifdef COMPAT_OLDSOCK
685 		    && mp->msg_flags != MSG_COMPAT
686 #endif
687 		) {
688 			error = EINVAL;
689 			goto bad;
690 		}
691 		error = sockargs(&control, mp->msg_control,
692 		    mp->msg_controllen, MT_CONTROL);
693 		if (error)
694 			goto bad;
695 #ifdef COMPAT_OLDSOCK
696 		if (mp->msg_flags == MSG_COMPAT) {
697 			register struct cmsghdr *cm;
698 
699 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
700 			if (control == 0) {
701 				error = ENOBUFS;
702 				goto bad;
703 			} else {
704 				cm = mtod(control, struct cmsghdr *);
705 				cm->cmsg_len = control->m_len;
706 				cm->cmsg_level = SOL_SOCKET;
707 				cm->cmsg_type = SCM_RIGHTS;
708 			}
709 		}
710 #endif
711 	} else {
712 		control = NULL;
713 	}
714 
715 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
716 
717 bad:
718 	if (to)
719 		FREE(to, M_SONAME);
720 	return (error);
721 }
722 
723 int
724 kern_sendit(td, s, mp, flags, control, segflg)
725 	struct thread *td;
726 	int s;
727 	struct msghdr *mp;
728 	int flags;
729 	struct mbuf *control;
730 	enum uio_seg segflg;
731 {
732 	struct file *fp;
733 	struct uio auio;
734 	struct iovec *iov;
735 	struct socket *so;
736 	int i;
737 	int len, error;
738 #ifdef KTRACE
739 	struct uio *ktruio = NULL;
740 #endif
741 
742 	NET_LOCK_GIANT();
743 	error = getsock(td->td_proc->p_fd, s, &fp);
744 	if (error)
745 		goto bad2;
746 	so = (struct socket *)fp->f_data;
747 
748 #ifdef MAC
749 	SOCK_LOCK(so);
750 	error = mac_check_socket_send(td->td_ucred, so);
751 	SOCK_UNLOCK(so);
752 	if (error)
753 		goto bad;
754 #endif
755 
756 	auio.uio_iov = mp->msg_iov;
757 	auio.uio_iovcnt = mp->msg_iovlen;
758 	auio.uio_segflg = segflg;
759 	auio.uio_rw = UIO_WRITE;
760 	auio.uio_td = td;
761 	auio.uio_offset = 0;			/* XXX */
762 	auio.uio_resid = 0;
763 	iov = mp->msg_iov;
764 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
765 		if ((auio.uio_resid += iov->iov_len) < 0) {
766 			error = EINVAL;
767 			goto bad;
768 		}
769 	}
770 #ifdef KTRACE
771 	if (KTRPOINT(td, KTR_GENIO))
772 		ktruio = cloneuio(&auio);
773 #endif
774 	len = auio.uio_resid;
775 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
776 	    0, control, flags, td);
777 	if (error) {
778 		if (auio.uio_resid != len && (error == ERESTART ||
779 		    error == EINTR || error == EWOULDBLOCK))
780 			error = 0;
781 		/* Generation of SIGPIPE can be controlled per socket */
782 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
783 		    !(flags & MSG_NOSIGNAL)) {
784 			PROC_LOCK(td->td_proc);
785 			psignal(td->td_proc, SIGPIPE);
786 			PROC_UNLOCK(td->td_proc);
787 		}
788 	}
789 	if (error == 0)
790 		td->td_retval[0] = len - auio.uio_resid;
791 #ifdef KTRACE
792 	if (ktruio != NULL) {
793 		ktruio->uio_resid = td->td_retval[0];
794 		ktrgenio(s, UIO_WRITE, ktruio, error);
795 	}
796 #endif
797 bad:
798 	fdrop(fp, td);
799 bad2:
800 	NET_UNLOCK_GIANT();
801 	return (error);
802 }
803 
804 /*
805  * MPSAFE
806  */
807 int
808 sendto(td, uap)
809 	struct thread *td;
810 	register struct sendto_args /* {
811 		int	s;
812 		caddr_t	buf;
813 		size_t	len;
814 		int	flags;
815 		caddr_t	to;
816 		int	tolen;
817 	} */ *uap;
818 {
819 	struct msghdr msg;
820 	struct iovec aiov;
821 	int error;
822 
823 	msg.msg_name = uap->to;
824 	msg.msg_namelen = uap->tolen;
825 	msg.msg_iov = &aiov;
826 	msg.msg_iovlen = 1;
827 	msg.msg_control = 0;
828 #ifdef COMPAT_OLDSOCK
829 	msg.msg_flags = 0;
830 #endif
831 	aiov.iov_base = uap->buf;
832 	aiov.iov_len = uap->len;
833 	error = sendit(td, uap->s, &msg, uap->flags);
834 	return (error);
835 }
836 
837 #ifdef COMPAT_OLDSOCK
838 /*
839  * MPSAFE
840  */
841 int
842 osend(td, uap)
843 	struct thread *td;
844 	register struct osend_args /* {
845 		int	s;
846 		caddr_t	buf;
847 		int	len;
848 		int	flags;
849 	} */ *uap;
850 {
851 	struct msghdr msg;
852 	struct iovec aiov;
853 	int error;
854 
855 	msg.msg_name = 0;
856 	msg.msg_namelen = 0;
857 	msg.msg_iov = &aiov;
858 	msg.msg_iovlen = 1;
859 	aiov.iov_base = uap->buf;
860 	aiov.iov_len = uap->len;
861 	msg.msg_control = 0;
862 	msg.msg_flags = 0;
863 	error = sendit(td, uap->s, &msg, uap->flags);
864 	return (error);
865 }
866 
867 /*
868  * MPSAFE
869  */
870 int
871 osendmsg(td, uap)
872 	struct thread *td;
873 	struct osendmsg_args /* {
874 		int	s;
875 		caddr_t	msg;
876 		int	flags;
877 	} */ *uap;
878 {
879 	struct msghdr msg;
880 	struct iovec *iov;
881 	int error;
882 
883 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
884 	if (error)
885 		return (error);
886 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
887 	if (error)
888 		return (error);
889 	msg.msg_iov = iov;
890 	msg.msg_flags = MSG_COMPAT;
891 	error = sendit(td, uap->s, &msg, uap->flags);
892 	free(iov, M_IOV);
893 	return (error);
894 }
895 #endif
896 
897 /*
898  * MPSAFE
899  */
900 int
901 sendmsg(td, uap)
902 	struct thread *td;
903 	struct sendmsg_args /* {
904 		int	s;
905 		caddr_t	msg;
906 		int	flags;
907 	} */ *uap;
908 {
909 	struct msghdr msg;
910 	struct iovec *iov;
911 	int error;
912 
913 	error = copyin(uap->msg, &msg, sizeof (msg));
914 	if (error)
915 		return (error);
916 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
917 	if (error)
918 		return (error);
919 	msg.msg_iov = iov;
920 #ifdef COMPAT_OLDSOCK
921 	msg.msg_flags = 0;
922 #endif
923 	error = sendit(td, uap->s, &msg, uap->flags);
924 	free(iov, M_IOV);
925 	return (error);
926 }
927 
928 int
929 kern_recvit(td, s, mp, namelenp, segflg, controlp)
930 	struct thread *td;
931 	int s;
932 	struct msghdr *mp;
933 	void *namelenp;
934 	enum uio_seg segflg;
935 	struct mbuf **controlp;
936 {
937 	struct uio auio;
938 	struct iovec *iov;
939 	int i;
940 	socklen_t len;
941 	int error;
942 	struct mbuf *m, *control = 0;
943 	caddr_t ctlbuf;
944 	struct file *fp;
945 	struct socket *so;
946 	struct sockaddr *fromsa = 0;
947 #ifdef KTRACE
948 	struct uio *ktruio = NULL;
949 #endif
950 
951 	if(controlp != NULL)
952 		*controlp = 0;
953 
954 	NET_LOCK_GIANT();
955 	error = getsock(td->td_proc->p_fd, s, &fp);
956 	if (error) {
957 		NET_UNLOCK_GIANT();
958 		return (error);
959 	}
960 	so = fp->f_data;
961 
962 #ifdef MAC
963 	SOCK_LOCK(so);
964 	error = mac_check_socket_receive(td->td_ucred, so);
965 	SOCK_UNLOCK(so);
966 	if (error) {
967 		fdrop(fp, td);
968 		NET_UNLOCK_GIANT();
969 		return (error);
970 	}
971 #endif
972 
973 	auio.uio_iov = mp->msg_iov;
974 	auio.uio_iovcnt = mp->msg_iovlen;
975 	auio.uio_segflg = segflg;
976 	auio.uio_rw = UIO_READ;
977 	auio.uio_td = td;
978 	auio.uio_offset = 0;			/* XXX */
979 	auio.uio_resid = 0;
980 	iov = mp->msg_iov;
981 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
982 		if ((auio.uio_resid += iov->iov_len) < 0) {
983 			fdrop(fp, td);
984 			NET_UNLOCK_GIANT();
985 			return (EINVAL);
986 		}
987 	}
988 #ifdef KTRACE
989 	if (KTRPOINT(td, KTR_GENIO))
990 		ktruio = cloneuio(&auio);
991 #endif
992 	len = auio.uio_resid;
993 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
994 	    (struct mbuf **)0,
995 	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
996 	    &mp->msg_flags);
997 	if (error) {
998 		if (auio.uio_resid != (int)len && (error == ERESTART ||
999 		    error == EINTR || error == EWOULDBLOCK))
1000 			error = 0;
1001 	}
1002 #ifdef KTRACE
1003 	if (ktruio != NULL) {
1004 		ktruio->uio_resid = (int)len - auio.uio_resid;
1005 		ktrgenio(s, UIO_READ, ktruio, error);
1006 	}
1007 #endif
1008 	if (error)
1009 		goto out;
1010 	td->td_retval[0] = (int)len - auio.uio_resid;
1011 	if (mp->msg_name) {
1012 		len = mp->msg_namelen;
1013 		if (len <= 0 || fromsa == 0)
1014 			len = 0;
1015 		else {
1016 			/* save sa_len before it is destroyed by MSG_COMPAT */
1017 			len = MIN(len, fromsa->sa_len);
1018 #ifdef COMPAT_OLDSOCK
1019 			if (mp->msg_flags & MSG_COMPAT)
1020 				((struct osockaddr *)fromsa)->sa_family =
1021 				    fromsa->sa_family;
1022 #endif
1023 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1024 			if (error)
1025 				goto out;
1026 		}
1027 		mp->msg_namelen = len;
1028 		if (namelenp &&
1029 		    (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1030 #ifdef COMPAT_OLDSOCK
1031 			if (mp->msg_flags & MSG_COMPAT)
1032 				error = 0;	/* old recvfrom didn't check */
1033 			else
1034 #endif
1035 			goto out;
1036 		}
1037 	}
1038 	if (mp->msg_control && controlp == NULL) {
1039 #ifdef COMPAT_OLDSOCK
1040 		/*
1041 		 * We assume that old recvmsg calls won't receive access
1042 		 * rights and other control info, esp. as control info
1043 		 * is always optional and those options didn't exist in 4.3.
1044 		 * If we receive rights, trim the cmsghdr; anything else
1045 		 * is tossed.
1046 		 */
1047 		if (control && mp->msg_flags & MSG_COMPAT) {
1048 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1049 			    SOL_SOCKET ||
1050 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1051 			    SCM_RIGHTS) {
1052 				mp->msg_controllen = 0;
1053 				goto out;
1054 			}
1055 			control->m_len -= sizeof (struct cmsghdr);
1056 			control->m_data += sizeof (struct cmsghdr);
1057 		}
1058 #endif
1059 		len = mp->msg_controllen;
1060 		m = control;
1061 		mp->msg_controllen = 0;
1062 		ctlbuf = mp->msg_control;
1063 
1064 		while (m && len > 0) {
1065 			unsigned int tocopy;
1066 
1067 			if (len >= m->m_len)
1068 				tocopy = m->m_len;
1069 			else {
1070 				mp->msg_flags |= MSG_CTRUNC;
1071 				tocopy = len;
1072 			}
1073 
1074 			if ((error = copyout(mtod(m, caddr_t),
1075 					ctlbuf, tocopy)) != 0)
1076 				goto out;
1077 
1078 			ctlbuf += tocopy;
1079 			len -= tocopy;
1080 			m = m->m_next;
1081 		}
1082 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1083 	}
1084 out:
1085 	fdrop(fp, td);
1086 	NET_UNLOCK_GIANT();
1087 	if (fromsa)
1088 		FREE(fromsa, M_SONAME);
1089 
1090 	if (error == 0 && controlp != NULL)
1091 		*controlp = control;
1092 	else  if (control)
1093 		m_freem(control);
1094 
1095 	return (error);
1096 }
1097 
1098 static int
1099 recvit(td, s, mp, namelenp)
1100 	struct thread *td;
1101 	int s;
1102 	struct msghdr *mp;
1103 	void *namelenp;
1104 {
1105 
1106 	return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL));
1107 }
1108 
1109 /*
1110  * MPSAFE
1111  */
1112 int
1113 recvfrom(td, uap)
1114 	struct thread *td;
1115 	register struct recvfrom_args /* {
1116 		int	s;
1117 		caddr_t	buf;
1118 		size_t	len;
1119 		int	flags;
1120 		struct sockaddr * __restrict	from;
1121 		socklen_t * __restrict fromlenaddr;
1122 	} */ *uap;
1123 {
1124 	struct msghdr msg;
1125 	struct iovec aiov;
1126 	int error;
1127 
1128 	if (uap->fromlenaddr) {
1129 		error = copyin(uap->fromlenaddr,
1130 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1131 		if (error)
1132 			goto done2;
1133 	} else {
1134 		msg.msg_namelen = 0;
1135 	}
1136 	msg.msg_name = uap->from;
1137 	msg.msg_iov = &aiov;
1138 	msg.msg_iovlen = 1;
1139 	aiov.iov_base = uap->buf;
1140 	aiov.iov_len = uap->len;
1141 	msg.msg_control = 0;
1142 	msg.msg_flags = uap->flags;
1143 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1144 done2:
1145 	return(error);
1146 }
1147 
1148 #ifdef COMPAT_OLDSOCK
1149 /*
1150  * MPSAFE
1151  */
1152 int
1153 orecvfrom(td, uap)
1154 	struct thread *td;
1155 	struct recvfrom_args *uap;
1156 {
1157 
1158 	uap->flags |= MSG_COMPAT;
1159 	return (recvfrom(td, uap));
1160 }
1161 #endif
1162 
1163 
1164 #ifdef COMPAT_OLDSOCK
1165 /*
1166  * MPSAFE
1167  */
1168 int
1169 orecv(td, uap)
1170 	struct thread *td;
1171 	register struct orecv_args /* {
1172 		int	s;
1173 		caddr_t	buf;
1174 		int	len;
1175 		int	flags;
1176 	} */ *uap;
1177 {
1178 	struct msghdr msg;
1179 	struct iovec aiov;
1180 	int error;
1181 
1182 	msg.msg_name = 0;
1183 	msg.msg_namelen = 0;
1184 	msg.msg_iov = &aiov;
1185 	msg.msg_iovlen = 1;
1186 	aiov.iov_base = uap->buf;
1187 	aiov.iov_len = uap->len;
1188 	msg.msg_control = 0;
1189 	msg.msg_flags = uap->flags;
1190 	error = recvit(td, uap->s, &msg, NULL);
1191 	return (error);
1192 }
1193 
1194 /*
1195  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1196  * overlays the new one, missing only the flags, and with the (old) access
1197  * rights where the control fields are now.
1198  *
1199  * MPSAFE
1200  */
1201 int
1202 orecvmsg(td, uap)
1203 	struct thread *td;
1204 	struct orecvmsg_args /* {
1205 		int	s;
1206 		struct	omsghdr *msg;
1207 		int	flags;
1208 	} */ *uap;
1209 {
1210 	struct msghdr msg;
1211 	struct iovec *iov;
1212 	int error;
1213 
1214 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1215 	if (error)
1216 		return (error);
1217 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1218 	if (error)
1219 		return (error);
1220 	msg.msg_flags = uap->flags | MSG_COMPAT;
1221 	msg.msg_iov = iov;
1222 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1223 	if (msg.msg_controllen && error == 0)
1224 		error = copyout(&msg.msg_controllen,
1225 		    &uap->msg->msg_accrightslen, sizeof (int));
1226 	free(iov, M_IOV);
1227 	return (error);
1228 }
1229 #endif
1230 
1231 /*
1232  * MPSAFE
1233  */
1234 int
1235 recvmsg(td, uap)
1236 	struct thread *td;
1237 	struct recvmsg_args /* {
1238 		int	s;
1239 		struct	msghdr *msg;
1240 		int	flags;
1241 	} */ *uap;
1242 {
1243 	struct msghdr msg;
1244 	struct iovec *uiov, *iov;
1245 	int error;
1246 
1247 	error = copyin(uap->msg, &msg, sizeof (msg));
1248 	if (error)
1249 		return (error);
1250 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1251 	if (error)
1252 		return (error);
1253 	msg.msg_flags = uap->flags;
1254 #ifdef COMPAT_OLDSOCK
1255 	msg.msg_flags &= ~MSG_COMPAT;
1256 #endif
1257 	uiov = msg.msg_iov;
1258 	msg.msg_iov = iov;
1259 	error = recvit(td, uap->s, &msg, NULL);
1260 	if (error == 0) {
1261 		msg.msg_iov = uiov;
1262 		error = copyout(&msg, uap->msg, sizeof(msg));
1263 	}
1264 	free(iov, M_IOV);
1265 	return (error);
1266 }
1267 
1268 /*
1269  * MPSAFE
1270  */
1271 /* ARGSUSED */
1272 int
1273 shutdown(td, uap)
1274 	struct thread *td;
1275 	register struct shutdown_args /* {
1276 		int	s;
1277 		int	how;
1278 	} */ *uap;
1279 {
1280 	struct socket *so;
1281 	struct file *fp;
1282 	int error;
1283 
1284 	NET_LOCK_GIANT();
1285 	error = getsock(td->td_proc->p_fd, uap->s, &fp);
1286 	if (error == 0) {
1287 		so = fp->f_data;
1288 		error = soshutdown(so, uap->how);
1289 		fdrop(fp, td);
1290 	}
1291 	NET_UNLOCK_GIANT();
1292 	return (error);
1293 }
1294 
1295 /*
1296  * MPSAFE
1297  */
1298 /* ARGSUSED */
1299 int
1300 setsockopt(td, uap)
1301 	struct thread *td;
1302 	register struct setsockopt_args /* {
1303 		int	s;
1304 		int	level;
1305 		int	name;
1306 		caddr_t	val;
1307 		int	valsize;
1308 	} */ *uap;
1309 {
1310 
1311 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1312 	    uap->val, UIO_USERSPACE, uap->valsize));
1313 }
1314 
1315 int
1316 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1317 	struct thread *td;
1318 	int s;
1319 	int level;
1320 	int name;
1321 	void *val;
1322 	enum uio_seg valseg;
1323 	socklen_t valsize;
1324 {
1325 	int error;
1326 	struct socket *so;
1327 	struct file *fp;
1328 	struct sockopt sopt;
1329 
1330 	if (val == NULL && valsize != 0)
1331 		return (EFAULT);
1332 	if (valsize < 0)
1333 		return (EINVAL);
1334 
1335 	sopt.sopt_dir = SOPT_SET;
1336 	sopt.sopt_level = level;
1337 	sopt.sopt_name = name;
1338 	sopt.sopt_val = val;
1339 	sopt.sopt_valsize = valsize;
1340 	switch (valseg) {
1341 	case UIO_USERSPACE:
1342 		sopt.sopt_td = td;
1343 		break;
1344 	case UIO_SYSSPACE:
1345 		sopt.sopt_td = NULL;
1346 		break;
1347 	default:
1348 		panic("kern_setsockopt called with bad valseg");
1349 	}
1350 
1351 	NET_LOCK_GIANT();
1352 	error = getsock(td->td_proc->p_fd, s, &fp);
1353 	if (error == 0) {
1354 		so = fp->f_data;
1355 		error = sosetopt(so, &sopt);
1356 		fdrop(fp, td);
1357 	}
1358 	NET_UNLOCK_GIANT();
1359 	return(error);
1360 }
1361 
1362 /*
1363  * MPSAFE
1364  */
1365 /* ARGSUSED */
1366 int
1367 getsockopt(td, uap)
1368 	struct thread *td;
1369 	register struct getsockopt_args /* {
1370 		int	s;
1371 		int	level;
1372 		int	name;
1373 		void * __restrict	val;
1374 		socklen_t * __restrict avalsize;
1375 	} */ *uap;
1376 {
1377 	socklen_t valsize;
1378 	int	error;
1379 
1380 	if (uap->val) {
1381 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1382 		if (error)
1383 			return (error);
1384 	}
1385 
1386 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1387 	    uap->val, UIO_USERSPACE, &valsize);
1388 
1389 	if (error == 0)
1390 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1391 	return (error);
1392 }
1393 
1394 /*
1395  * Kernel version of getsockopt.
1396  * optval can be a userland or userspace. optlen is always a kernel pointer.
1397  */
1398 int
1399 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1400 	struct thread *td;
1401 	int s;
1402 	int level;
1403 	int name;
1404 	void *val;
1405 	enum uio_seg valseg;
1406 	socklen_t *valsize;
1407 {
1408 	int error;
1409 	struct  socket *so;
1410 	struct file *fp;
1411 	struct	sockopt sopt;
1412 
1413 	if (val == NULL)
1414 		*valsize = 0;
1415 	if (*valsize < 0)
1416 		return (EINVAL);
1417 
1418 	sopt.sopt_dir = SOPT_GET;
1419 	sopt.sopt_level = level;
1420 	sopt.sopt_name = name;
1421 	sopt.sopt_val = val;
1422 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1423 	switch (valseg) {
1424 	case UIO_USERSPACE:
1425 		sopt.sopt_td = td;
1426 		break;
1427 	case UIO_SYSSPACE:
1428 		sopt.sopt_td = NULL;
1429 		break;
1430 	default:
1431 		panic("kern_getsockopt called with bad valseg");
1432 	}
1433 
1434 	NET_LOCK_GIANT();
1435 	error = getsock(td->td_proc->p_fd, s, &fp);
1436 	if (error == 0) {
1437 		so = fp->f_data;
1438 		error = sogetopt(so, &sopt);
1439 		*valsize = sopt.sopt_valsize;
1440 		fdrop(fp, td);
1441 	}
1442 	NET_UNLOCK_GIANT();
1443 	return (error);
1444 }
1445 
1446 /*
1447  * getsockname1() - Get socket name.
1448  *
1449  * MPSAFE
1450  */
1451 /* ARGSUSED */
1452 static int
1453 getsockname1(td, uap, compat)
1454 	struct thread *td;
1455 	register struct getsockname_args /* {
1456 		int	fdes;
1457 		struct sockaddr * __restrict asa;
1458 		socklen_t * __restrict alen;
1459 	} */ *uap;
1460 	int compat;
1461 {
1462 	struct socket *so;
1463 	struct sockaddr *sa;
1464 	struct file *fp;
1465 	socklen_t len;
1466 	int error;
1467 
1468 	NET_LOCK_GIANT();
1469 	error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1470 	if (error)
1471 		goto done2;
1472 	so = fp->f_data;
1473 	error = copyin(uap->alen, &len, sizeof (len));
1474 	if (error)
1475 		goto done1;
1476 	if (len < 0) {
1477 		error = EINVAL;
1478 		goto done1;
1479 	}
1480 	sa = 0;
1481 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1482 	if (error)
1483 		goto bad;
1484 	if (sa == 0) {
1485 		len = 0;
1486 		goto gotnothing;
1487 	}
1488 
1489 	len = MIN(len, sa->sa_len);
1490 #ifdef COMPAT_OLDSOCK
1491 	if (compat)
1492 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1493 #endif
1494 	error = copyout(sa, uap->asa, (u_int)len);
1495 	if (error == 0)
1496 gotnothing:
1497 		error = copyout(&len, uap->alen, sizeof (len));
1498 bad:
1499 	if (sa)
1500 		FREE(sa, M_SONAME);
1501 done1:
1502 	fdrop(fp, td);
1503 done2:
1504 	NET_UNLOCK_GIANT();
1505 	return (error);
1506 }
1507 
1508 /*
1509  * MPSAFE
1510  */
1511 int
1512 getsockname(td, uap)
1513 	struct thread *td;
1514 	struct getsockname_args *uap;
1515 {
1516 
1517 	return (getsockname1(td, uap, 0));
1518 }
1519 
1520 #ifdef COMPAT_OLDSOCK
1521 /*
1522  * MPSAFE
1523  */
1524 int
1525 ogetsockname(td, uap)
1526 	struct thread *td;
1527 	struct getsockname_args *uap;
1528 {
1529 
1530 	return (getsockname1(td, uap, 1));
1531 }
1532 #endif /* COMPAT_OLDSOCK */
1533 
1534 /*
1535  * getpeername1() - Get name of peer for connected socket.
1536  *
1537  * MPSAFE
1538  */
1539 /* ARGSUSED */
1540 static int
1541 getpeername1(td, uap, compat)
1542 	struct thread *td;
1543 	register struct getpeername_args /* {
1544 		int	fdes;
1545 		struct sockaddr * __restrict	asa;
1546 		socklen_t * __restrict	alen;
1547 	} */ *uap;
1548 	int compat;
1549 {
1550 	struct socket *so;
1551 	struct sockaddr *sa;
1552 	struct file *fp;
1553 	socklen_t len;
1554 	int error;
1555 
1556 	NET_LOCK_GIANT();
1557 	error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1558 	if (error)
1559 		goto done2;
1560 	so = fp->f_data;
1561 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1562 		error = ENOTCONN;
1563 		goto done1;
1564 	}
1565 	error = copyin(uap->alen, &len, sizeof (len));
1566 	if (error)
1567 		goto done1;
1568 	if (len < 0) {
1569 		error = EINVAL;
1570 		goto done1;
1571 	}
1572 	sa = 0;
1573 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1574 	if (error)
1575 		goto bad;
1576 	if (sa == 0) {
1577 		len = 0;
1578 		goto gotnothing;
1579 	}
1580 	len = MIN(len, sa->sa_len);
1581 #ifdef COMPAT_OLDSOCK
1582 	if (compat)
1583 		((struct osockaddr *)sa)->sa_family =
1584 		    sa->sa_family;
1585 #endif
1586 	error = copyout(sa, uap->asa, (u_int)len);
1587 	if (error)
1588 		goto bad;
1589 gotnothing:
1590 	error = copyout(&len, uap->alen, sizeof (len));
1591 bad:
1592 	if (sa)
1593 		FREE(sa, M_SONAME);
1594 done1:
1595 	fdrop(fp, td);
1596 done2:
1597 	NET_UNLOCK_GIANT();
1598 	return (error);
1599 }
1600 
1601 /*
1602  * MPSAFE
1603  */
1604 int
1605 getpeername(td, uap)
1606 	struct thread *td;
1607 	struct getpeername_args *uap;
1608 {
1609 
1610 	return (getpeername1(td, uap, 0));
1611 }
1612 
1613 #ifdef COMPAT_OLDSOCK
1614 /*
1615  * MPSAFE
1616  */
1617 int
1618 ogetpeername(td, uap)
1619 	struct thread *td;
1620 	struct ogetpeername_args *uap;
1621 {
1622 
1623 	/* XXX uap should have type `getpeername_args *' to begin with. */
1624 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1625 }
1626 #endif /* COMPAT_OLDSOCK */
1627 
1628 int
1629 sockargs(mp, buf, buflen, type)
1630 	struct mbuf **mp;
1631 	caddr_t buf;
1632 	int buflen, type;
1633 {
1634 	register struct sockaddr *sa;
1635 	register struct mbuf *m;
1636 	int error;
1637 
1638 	if ((u_int)buflen > MLEN) {
1639 #ifdef COMPAT_OLDSOCK
1640 		if (type == MT_SONAME && (u_int)buflen <= 112)
1641 			buflen = MLEN;		/* unix domain compat. hack */
1642 		else
1643 #endif
1644 			if ((u_int)buflen > MCLBYTES)
1645 				return (EINVAL);
1646 	}
1647 	m = m_get(M_TRYWAIT, type);
1648 	if (m == NULL)
1649 		return (ENOBUFS);
1650 	if ((u_int)buflen > MLEN) {
1651 		MCLGET(m, M_TRYWAIT);
1652 		if ((m->m_flags & M_EXT) == 0) {
1653 			m_free(m);
1654 			return (ENOBUFS);
1655 		}
1656 	}
1657 	m->m_len = buflen;
1658 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1659 	if (error)
1660 		(void) m_free(m);
1661 	else {
1662 		*mp = m;
1663 		if (type == MT_SONAME) {
1664 			sa = mtod(m, struct sockaddr *);
1665 
1666 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1667 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1668 				sa->sa_family = sa->sa_len;
1669 #endif
1670 			sa->sa_len = buflen;
1671 		}
1672 	}
1673 	return (error);
1674 }
1675 
1676 int
1677 getsockaddr(namp, uaddr, len)
1678 	struct sockaddr **namp;
1679 	caddr_t uaddr;
1680 	size_t len;
1681 {
1682 	struct sockaddr *sa;
1683 	int error;
1684 
1685 	if (len > SOCK_MAXADDRLEN)
1686 		return (ENAMETOOLONG);
1687 	if (len < offsetof(struct sockaddr, sa_data[0]))
1688 		return (EINVAL);
1689 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1690 	error = copyin(uaddr, sa, len);
1691 	if (error) {
1692 		FREE(sa, M_SONAME);
1693 	} else {
1694 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1695 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1696 			sa->sa_family = sa->sa_len;
1697 #endif
1698 		sa->sa_len = len;
1699 		*namp = sa;
1700 	}
1701 	return (error);
1702 }
1703 
1704 /*
1705  * Detach mapped page and release resources back to the system.
1706  */
1707 void
1708 sf_buf_mext(void *addr, void *args)
1709 {
1710 	vm_page_t m;
1711 
1712 	m = sf_buf_page(args);
1713 	sf_buf_free(args);
1714 	vm_page_lock_queues();
1715 	vm_page_unwire(m, 0);
1716 	/*
1717 	 * Check for the object going away on us. This can
1718 	 * happen since we don't hold a reference to it.
1719 	 * If so, we're responsible for freeing the page.
1720 	 */
1721 	if (m->wire_count == 0 && m->object == NULL)
1722 		vm_page_free(m);
1723 	vm_page_unlock_queues();
1724 }
1725 
1726 /*
1727  * sendfile(2)
1728  *
1729  * MPSAFE
1730  *
1731  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1732  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1733  *
1734  * Send a file specified by 'fd' and starting at 'offset' to a socket
1735  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1736  * nbytes == 0. Optionally add a header and/or trailer to the socket
1737  * output. If specified, write the total number of bytes sent into *sbytes.
1738  *
1739  */
1740 int
1741 sendfile(struct thread *td, struct sendfile_args *uap)
1742 {
1743 
1744 	return (do_sendfile(td, uap, 0));
1745 }
1746 
1747 static int
1748 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1749 {
1750 	struct sf_hdtr hdtr;
1751 	struct uio *hdr_uio, *trl_uio;
1752 	int error;
1753 
1754 	hdr_uio = trl_uio = NULL;
1755 
1756 	if (uap->hdtr != NULL) {
1757 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1758 		if (error)
1759 			goto out;
1760 		if (hdtr.headers != NULL) {
1761 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1762 			if (error)
1763 				goto out;
1764 		}
1765 		if (hdtr.trailers != NULL) {
1766 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1767 			if (error)
1768 				goto out;
1769 
1770 		}
1771 	}
1772 
1773 	error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1774 out:
1775 	if (hdr_uio)
1776 		free(hdr_uio, M_IOV);
1777 	if (trl_uio)
1778 		free(trl_uio, M_IOV);
1779 	return (error);
1780 }
1781 
1782 #ifdef COMPAT_FREEBSD4
1783 int
1784 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1785 {
1786 	struct sendfile_args args;
1787 
1788 	args.fd = uap->fd;
1789 	args.s = uap->s;
1790 	args.offset = uap->offset;
1791 	args.nbytes = uap->nbytes;
1792 	args.hdtr = uap->hdtr;
1793 	args.sbytes = uap->sbytes;
1794 	args.flags = uap->flags;
1795 
1796 	return (do_sendfile(td, &args, 1));
1797 }
1798 #endif /* COMPAT_FREEBSD4 */
1799 
1800 int
1801 kern_sendfile(struct thread *td, struct sendfile_args *uap,
1802     struct uio *hdr_uio, struct uio *trl_uio, int compat)
1803 {
1804 	struct vnode *vp;
1805 	struct vm_object *obj = NULL;
1806 	struct socket *so = NULL;
1807 	struct mbuf *m, *m_header = NULL;
1808 	struct sf_buf *sf;
1809 	struct vm_page *pg;
1810 	off_t off, xfsize, hdtr_size, sbytes = 0;
1811 	int error, headersize = 0, headersent = 0;
1812 	int vfslocked;
1813 
1814 	NET_LOCK_GIANT();
1815 
1816 	hdtr_size = 0;
1817 
1818 	/*
1819 	 * The descriptor must be a regular file and have a backing VM object.
1820 	 */
1821 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1822 		goto done;
1823 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1824 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1825 	obj = vp->v_object;
1826 	if (obj != NULL) {
1827 		/*
1828 		 * Temporarily increase the backing VM object's reference
1829 		 * count so that a forced reclamation of its vnode does not
1830 		 * immediately destroy it.
1831 		 */
1832 		VM_OBJECT_LOCK(obj);
1833 		if ((obj->flags & OBJ_DEAD) == 0) {
1834 			vm_object_reference_locked(obj);
1835 			VM_OBJECT_UNLOCK(obj);
1836 		} else {
1837 			VM_OBJECT_UNLOCK(obj);
1838 			obj = NULL;
1839 		}
1840 	}
1841 	VOP_UNLOCK(vp, 0, td);
1842 	VFS_UNLOCK_GIANT(vfslocked);
1843 	if (obj == NULL) {
1844 		error = EINVAL;
1845 		goto done;
1846 	}
1847 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1848 		goto done;
1849 	if (so->so_type != SOCK_STREAM) {
1850 		error = EINVAL;
1851 		goto done;
1852 	}
1853 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1854 		error = ENOTCONN;
1855 		goto done;
1856 	}
1857 	if (uap->offset < 0) {
1858 		error = EINVAL;
1859 		goto done;
1860 	}
1861 
1862 #ifdef MAC
1863 	SOCK_LOCK(so);
1864 	error = mac_check_socket_send(td->td_ucred, so);
1865 	SOCK_UNLOCK(so);
1866 	if (error)
1867 		goto done;
1868 #endif
1869 
1870 	/*
1871 	 * If specified, get the pointer to the sf_hdtr struct for
1872 	 * any headers/trailers.
1873 	 */
1874 	if (hdr_uio != NULL) {
1875 		hdr_uio->uio_td = td;
1876 		hdr_uio->uio_rw = UIO_WRITE;
1877 		if (hdr_uio->uio_resid > 0) {
1878 			m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1879 			if (m_header == NULL)
1880 				goto done;
1881 			headersize = m_header->m_pkthdr.len;
1882 			if (compat)
1883 				sbytes += headersize;
1884 		}
1885 	}
1886 
1887 	/*
1888 	 * Protect against multiple writers to the socket.
1889 	 */
1890 	SOCKBUF_LOCK(&so->so_snd);
1891 	(void) sblock(&so->so_snd, M_WAITOK);
1892 	SOCKBUF_UNLOCK(&so->so_snd);
1893 
1894 	/*
1895 	 * Loop through the pages in the file, starting with the requested
1896 	 * offset. Get a file page (do I/O if necessary), map the file page
1897 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1898 	 * it on the socket.
1899 	 */
1900 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1901 		vm_pindex_t pindex;
1902 		vm_offset_t pgoff;
1903 
1904 		pindex = OFF_TO_IDX(off);
1905 		VM_OBJECT_LOCK(obj);
1906 retry_lookup:
1907 		/*
1908 		 * Calculate the amount to transfer. Not to exceed a page,
1909 		 * the EOF, or the passed in nbytes.
1910 		 */
1911 		xfsize = obj->un_pager.vnp.vnp_size - off;
1912 		VM_OBJECT_UNLOCK(obj);
1913 		if (xfsize > PAGE_SIZE)
1914 			xfsize = PAGE_SIZE;
1915 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1916 		if (PAGE_SIZE - pgoff < xfsize)
1917 			xfsize = PAGE_SIZE - pgoff;
1918 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1919 			xfsize = uap->nbytes - sbytes;
1920 		if (xfsize <= 0) {
1921 			if (m_header != NULL) {
1922 				m = m_header;
1923 				m_header = NULL;
1924 				SOCKBUF_LOCK(&so->so_snd);
1925 				goto retry_space;
1926 			} else
1927 				break;
1928 		}
1929 		/*
1930 		 * Optimize the non-blocking case by looking at the socket space
1931 		 * before going to the extra work of constituting the sf_buf.
1932 		 */
1933 		SOCKBUF_LOCK(&so->so_snd);
1934 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1935 			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1936 				error = EPIPE;
1937 			else
1938 				error = EAGAIN;
1939 			sbunlock(&so->so_snd);
1940 			SOCKBUF_UNLOCK(&so->so_snd);
1941 			goto done;
1942 		}
1943 		SOCKBUF_UNLOCK(&so->so_snd);
1944 		VM_OBJECT_LOCK(obj);
1945 		/*
1946 		 * Attempt to look up the page.
1947 		 *
1948 		 *	Allocate if not found
1949 		 *
1950 		 *	Wait and loop if busy.
1951 		 */
1952 		pg = vm_page_lookup(obj, pindex);
1953 
1954 		if (pg == NULL) {
1955 			pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1956 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1957 			if (pg == NULL) {
1958 				VM_OBJECT_UNLOCK(obj);
1959 				VM_WAIT;
1960 				VM_OBJECT_LOCK(obj);
1961 				goto retry_lookup;
1962 			}
1963 			vm_page_lock_queues();
1964 		} else {
1965 			vm_page_lock_queues();
1966 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1967 				goto retry_lookup;
1968 			/*
1969 			 * Wire the page so it does not get ripped out from
1970 			 * under us.
1971 			 */
1972 			vm_page_wire(pg);
1973 		}
1974 
1975 		/*
1976 		 * If page is not valid for what we need, initiate I/O
1977 		 */
1978 
1979 		if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1980 			VM_OBJECT_UNLOCK(obj);
1981 		} else if (uap->flags & SF_NODISKIO) {
1982 			error = EBUSY;
1983 		} else {
1984 			int bsize, resid;
1985 
1986 			/*
1987 			 * Ensure that our page is still around when the I/O
1988 			 * completes.
1989 			 */
1990 			vm_page_io_start(pg);
1991 			vm_page_unlock_queues();
1992 			VM_OBJECT_UNLOCK(obj);
1993 
1994 			/*
1995 			 * Get the page from backing store.
1996 			 */
1997 			bsize = vp->v_mount->mnt_stat.f_iosize;
1998 			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1999 			vn_lock(vp, LK_SHARED | LK_RETRY, td);
2000 			/*
2001 			 * XXXMAC: Because we don't have fp->f_cred here,
2002 			 * we pass in NOCRED.  This is probably wrong, but
2003 			 * is consistent with our original implementation.
2004 			 */
2005 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2006 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2007 			    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2008 			    td->td_ucred, NOCRED, &resid, td);
2009 			VOP_UNLOCK(vp, 0, td);
2010 			VFS_UNLOCK_GIANT(vfslocked);
2011 			VM_OBJECT_LOCK(obj);
2012 			vm_page_lock_queues();
2013 			vm_page_io_finish(pg);
2014 			if (!error)
2015 				VM_OBJECT_UNLOCK(obj);
2016 			mbstat.sf_iocnt++;
2017 		}
2018 
2019 		if (error) {
2020 			vm_page_unwire(pg, 0);
2021 			/*
2022 			 * See if anyone else might know about this page.
2023 			 * If not and it is not valid, then free it.
2024 			 */
2025 			if (pg->wire_count == 0 && pg->valid == 0 &&
2026 			    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
2027 			    pg->hold_count == 0) {
2028 				vm_page_free(pg);
2029 			}
2030 			vm_page_unlock_queues();
2031 			VM_OBJECT_UNLOCK(obj);
2032 			SOCKBUF_LOCK(&so->so_snd);
2033 			sbunlock(&so->so_snd);
2034 			SOCKBUF_UNLOCK(&so->so_snd);
2035 			goto done;
2036 		}
2037 		vm_page_unlock_queues();
2038 
2039 		/*
2040 		 * Get a sendfile buf. We usually wait as long as necessary,
2041 		 * but this wait can be interrupted.
2042 		 */
2043 		if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
2044 			mbstat.sf_allocfail++;
2045 			vm_page_lock_queues();
2046 			vm_page_unwire(pg, 0);
2047 			if (pg->wire_count == 0 && pg->object == NULL)
2048 				vm_page_free(pg);
2049 			vm_page_unlock_queues();
2050 			SOCKBUF_LOCK(&so->so_snd);
2051 			sbunlock(&so->so_snd);
2052 			SOCKBUF_UNLOCK(&so->so_snd);
2053 			error = EINTR;
2054 			goto done;
2055 		}
2056 
2057 		/*
2058 		 * Get an mbuf header and set it up as having external storage.
2059 		 */
2060 		if (m_header)
2061 			MGET(m, M_TRYWAIT, MT_DATA);
2062 		else
2063 			MGETHDR(m, M_TRYWAIT, MT_DATA);
2064 		if (m == NULL) {
2065 			error = ENOBUFS;
2066 			sf_buf_mext((void *)sf_buf_kva(sf), sf);
2067 			SOCKBUF_LOCK(&so->so_snd);
2068 			sbunlock(&so->so_snd);
2069 			SOCKBUF_UNLOCK(&so->so_snd);
2070 			goto done;
2071 		}
2072 		/*
2073 		 * Setup external storage for mbuf.
2074 		 */
2075 		MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2076 		    EXT_SFBUF);
2077 		m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2078 		m->m_pkthdr.len = m->m_len = xfsize;
2079 
2080 		if (m_header) {
2081 			m_cat(m_header, m);
2082 			m = m_header;
2083 			m_header = NULL;
2084 			m_fixhdr(m);
2085 		}
2086 
2087 		/*
2088 		 * Add the buffer to the socket buffer chain.
2089 		 */
2090 		SOCKBUF_LOCK(&so->so_snd);
2091 retry_space:
2092 		/*
2093 		 * Make sure that the socket is still able to take more data.
2094 		 * CANTSENDMORE being true usually means that the connection
2095 		 * was closed. so_error is true when an error was sensed after
2096 		 * a previous send.
2097 		 * The state is checked after the page mapping and buffer
2098 		 * allocation above since those operations may block and make
2099 		 * any socket checks stale. From this point forward, nothing
2100 		 * blocks before the pru_send (or more accurately, any blocking
2101 		 * results in a loop back to here to re-check).
2102 		 */
2103 		SOCKBUF_LOCK_ASSERT(&so->so_snd);
2104 		if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2105 			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2106 				error = EPIPE;
2107 			} else {
2108 				error = so->so_error;
2109 				so->so_error = 0;
2110 			}
2111 			m_freem(m);
2112 			sbunlock(&so->so_snd);
2113 			SOCKBUF_UNLOCK(&so->so_snd);
2114 			goto done;
2115 		}
2116 		/*
2117 		 * Wait for socket space to become available. We do this just
2118 		 * after checking the connection state above in order to avoid
2119 		 * a race condition with sbwait().
2120 		 */
2121 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2122 			if (so->so_state & SS_NBIO) {
2123 				m_freem(m);
2124 				sbunlock(&so->so_snd);
2125 				SOCKBUF_UNLOCK(&so->so_snd);
2126 				error = EAGAIN;
2127 				goto done;
2128 			}
2129 			error = sbwait(&so->so_snd);
2130 			/*
2131 			 * An error from sbwait usually indicates that we've
2132 			 * been interrupted by a signal. If we've sent anything
2133 			 * then return bytes sent, otherwise return the error.
2134 			 */
2135 			if (error) {
2136 				m_freem(m);
2137 				sbunlock(&so->so_snd);
2138 				SOCKBUF_UNLOCK(&so->so_snd);
2139 				goto done;
2140 			}
2141 			goto retry_space;
2142 		}
2143 		SOCKBUF_UNLOCK(&so->so_snd);
2144 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2145 		if (error) {
2146 			SOCKBUF_LOCK(&so->so_snd);
2147 			sbunlock(&so->so_snd);
2148 			SOCKBUF_UNLOCK(&so->so_snd);
2149 			goto done;
2150 		}
2151 		headersent = 1;
2152 	}
2153 	SOCKBUF_LOCK(&so->so_snd);
2154 	sbunlock(&so->so_snd);
2155 	SOCKBUF_UNLOCK(&so->so_snd);
2156 
2157 	/*
2158 	 * Send trailers. Wimp out and use writev(2).
2159 	 */
2160 	if (trl_uio != NULL) {
2161 		error = kern_writev(td, uap->s, trl_uio);
2162 		if (error)
2163 			goto done;
2164 		if (compat)
2165 			sbytes += td->td_retval[0];
2166 		else
2167 			hdtr_size += td->td_retval[0];
2168 	}
2169 
2170 done:
2171 	if (headersent) {
2172 		if (!compat)
2173 			hdtr_size += headersize;
2174 	} else {
2175 		if (compat)
2176 			sbytes -= headersize;
2177 	}
2178 	/*
2179 	 * If there was no error we have to clear td->td_retval[0]
2180 	 * because it may have been set by writev.
2181 	 */
2182 	if (error == 0) {
2183 		td->td_retval[0] = 0;
2184 	}
2185 	if (uap->sbytes != NULL) {
2186 		if (!compat)
2187 			sbytes += hdtr_size;
2188 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2189 	}
2190 	if (obj != NULL)
2191 		vm_object_deallocate(obj);
2192 	if (vp != NULL) {
2193 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2194 		vrele(vp);
2195 		VFS_UNLOCK_GIANT(vfslocked);
2196 	}
2197 	if (so)
2198 		fputsock(so);
2199 	if (m_header)
2200 		m_freem(m_header);
2201 
2202 	NET_UNLOCK_GIANT();
2203 
2204 	if (error == ERESTART)
2205 		error = EINTR;
2206 
2207 	return (error);
2208 }
2209