xref: /freebsd/sys/kern/uipc_syscalls.c (revision 390e8cc2974df1888369c06339ef8e0e92b312b6)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * sendfile(2) and related extensions:
6  * Copyright (c) 1998, David Greenman. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
37  * $FreeBSD$
38  */
39 
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 #include "opt_mac.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/mutex.h>
50 #include <sys/sysproto.h>
51 #include <sys/malloc.h>
52 #include <sys/filedesc.h>
53 #include <sys/event.h>
54 #include <sys/proc.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/filio.h>
58 #include <sys/mount.h>
59 #include <sys/mbuf.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/signalvar.h>
64 #include <sys/syscallsubr.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77 
78 static void sf_buf_init(void *arg);
79 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
80 
81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
83 
84 static int accept1(struct thread *td, struct accept_args *uap, int compat);
85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
86 static int getsockname1(struct thread *td, struct getsockname_args *uap,
87 			int compat);
88 static int getpeername1(struct thread *td, struct getpeername_args *uap,
89 			int compat);
90 
91 /*
92  * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
93  * sf_freelist head with the sf_lock mutex.
94  */
95 static struct {
96 	SLIST_HEAD(, sf_buf) sf_head;
97 	struct mtx sf_lock;
98 } sf_freelist;
99 
100 static u_int sf_buf_alloc_want;
101 
102 /*
103  * System call interface to the socket abstraction.
104  */
105 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
106 #define COMPAT_OLDSOCK
107 #endif
108 
109 /*
110  * MPSAFE
111  */
112 int
113 socket(td, uap)
114 	struct thread *td;
115 	register struct socket_args /* {
116 		int	domain;
117 		int	type;
118 		int	protocol;
119 	} */ *uap;
120 {
121 	struct filedesc *fdp;
122 	struct socket *so;
123 	struct file *fp;
124 	int fd, error;
125 
126 	mtx_lock(&Giant);
127 	fdp = td->td_proc->p_fd;
128 	error = falloc(td, &fp, &fd);
129 	if (error)
130 		goto done2;
131 	fhold(fp);
132 	error = socreate(uap->domain, &so, uap->type, uap->protocol,
133 	    td->td_ucred, td);
134 	FILEDESC_LOCK(fdp);
135 	if (error) {
136 		if (fdp->fd_ofiles[fd] == fp) {
137 			fdp->fd_ofiles[fd] = NULL;
138 			FILEDESC_UNLOCK(fdp);
139 			fdrop(fp, td);
140 		} else
141 			FILEDESC_UNLOCK(fdp);
142 	} else {
143 		fp->f_data = so;	/* already has ref count */
144 		fp->f_flag = FREAD|FWRITE;
145 		fp->f_ops = &socketops;
146 		fp->f_type = DTYPE_SOCKET;
147 		FILEDESC_UNLOCK(fdp);
148 		td->td_retval[0] = fd;
149 	}
150 	fdrop(fp, td);
151 done2:
152 	mtx_unlock(&Giant);
153 	return (error);
154 }
155 
156 /*
157  * MPSAFE
158  */
159 /* ARGSUSED */
160 int
161 bind(td, uap)
162 	struct thread *td;
163 	register struct bind_args /* {
164 		int	s;
165 		caddr_t	name;
166 		int	namelen;
167 	} */ *uap;
168 {
169 	struct sockaddr *sa;
170 	int error;
171 
172 	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
173 		return (error);
174 
175 	return (kern_bind(td, uap->s, sa));
176 }
177 
178 int
179 kern_bind(td, fd, sa)
180 	struct thread *td;
181 	int fd;
182 	struct sockaddr *sa;
183 {
184 	struct socket *so;
185 	int error;
186 
187 	mtx_lock(&Giant);
188 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
189 		goto done2;
190 #ifdef MAC
191 	error = mac_check_socket_bind(td->td_ucred, so, sa);
192 	if (error)
193 		goto done1;
194 #endif
195 	error = sobind(so, sa, td);
196 #ifdef MAC
197 done1:
198 #endif
199 	fputsock(so);
200 done2:
201 	mtx_unlock(&Giant);
202 	FREE(sa, M_SONAME);
203 	return (error);
204 }
205 
206 /*
207  * MPSAFE
208  */
209 /* ARGSUSED */
210 int
211 listen(td, uap)
212 	struct thread *td;
213 	register struct listen_args /* {
214 		int	s;
215 		int	backlog;
216 	} */ *uap;
217 {
218 	struct socket *so;
219 	int error;
220 
221 	mtx_lock(&Giant);
222 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
223 #ifdef MAC
224 		error = mac_check_socket_listen(td->td_ucred, so);
225 		if (error)
226 			goto done;
227 #endif
228 		error = solisten(so, uap->backlog, td);
229 #ifdef MAC
230 done:
231 #endif
232 		fputsock(so);
233 	}
234 	mtx_unlock(&Giant);
235 	return(error);
236 }
237 
238 /*
239  * accept1()
240  * MPSAFE
241  */
242 static int
243 accept1(td, uap, compat)
244 	struct thread *td;
245 	register struct accept_args /* {
246 		int	s;
247 		caddr_t	name;
248 		int	*anamelen;
249 	} */ *uap;
250 	int compat;
251 {
252 	struct filedesc *fdp;
253 	struct file *nfp = NULL;
254 	struct sockaddr *sa;
255 	int namelen, error, s;
256 	struct socket *head, *so;
257 	int fd;
258 	u_int fflag;
259 	pid_t pgid;
260 	int tmp;
261 
262 	mtx_lock(&Giant);
263 	fdp = td->td_proc->p_fd;
264 	if (uap->name) {
265 		error = copyin(uap->anamelen, &namelen, sizeof (namelen));
266 		if(error)
267 			goto done2;
268 		if (namelen < 0) {
269 			error = EINVAL;
270 			goto done2;
271 		}
272 	}
273 	error = fgetsock(td, uap->s, &head, &fflag);
274 	if (error)
275 		goto done2;
276 	s = splnet();
277 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
278 		splx(s);
279 		error = EINVAL;
280 		goto done;
281 	}
282 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
283 		if (head->so_state & SS_CANTRCVMORE) {
284 			head->so_error = ECONNABORTED;
285 			break;
286 		}
287 		if ((head->so_state & SS_NBIO) != 0) {
288 			head->so_error = EWOULDBLOCK;
289 			break;
290 		}
291 		error = tsleep(&head->so_timeo, PSOCK | PCATCH,
292 		    "accept", 0);
293 		if (error) {
294 			splx(s);
295 			goto done;
296 		}
297 	}
298 	if (head->so_error) {
299 		error = head->so_error;
300 		head->so_error = 0;
301 		splx(s);
302 		goto done;
303 	}
304 
305 	/*
306 	 * At this point we know that there is at least one connection
307 	 * ready to be accepted. Remove it from the queue prior to
308 	 * allocating the file descriptor for it since falloc() may
309 	 * block allowing another process to accept the connection
310 	 * instead.
311 	 */
312 	so = TAILQ_FIRST(&head->so_comp);
313 	TAILQ_REMOVE(&head->so_comp, so, so_list);
314 	head->so_qlen--;
315 
316 	error = falloc(td, &nfp, &fd);
317 	if (error) {
318 		/*
319 		 * Probably ran out of file descriptors. Put the
320 		 * unaccepted connection back onto the queue and
321 		 * do another wakeup so some other process might
322 		 * have a chance at it.
323 		 */
324 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
325 		head->so_qlen++;
326 		wakeup_one(&head->so_timeo);
327 		splx(s);
328 		goto done;
329 	}
330 	fhold(nfp);
331 	td->td_retval[0] = fd;
332 
333 	/* connection has been removed from the listen queue */
334 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
335 
336 	so->so_state &= ~SS_COMP;
337 	so->so_head = NULL;
338 	pgid = fgetown(&head->so_sigio);
339 	if (pgid != 0)
340 		fsetown(pgid, &so->so_sigio);
341 
342 	FILE_LOCK(nfp);
343 	soref(so);			/* file descriptor reference */
344 	nfp->f_data = so;	/* nfp has ref count from falloc */
345 	nfp->f_flag = fflag;
346 	nfp->f_ops = &socketops;
347 	nfp->f_type = DTYPE_SOCKET;
348 	FILE_UNLOCK(nfp);
349 	/* Sync socket nonblocking/async state with file flags */
350 	tmp = fflag & FNONBLOCK;
351 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
352 	tmp = fflag & FASYNC;
353 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
354 	sa = 0;
355 	error = soaccept(so, &sa);
356 	if (error) {
357 		/*
358 		 * return a namelen of zero for older code which might
359 	 	 * ignore the return value from accept.
360 		 */
361 		if (uap->name != NULL) {
362 			namelen = 0;
363 			(void) copyout(&namelen,
364 			    uap->anamelen, sizeof(*uap->anamelen));
365 		}
366 		goto noconnection;
367 	}
368 	if (sa == NULL) {
369 		namelen = 0;
370 		if (uap->name)
371 			goto gotnoname;
372 		splx(s);
373 		error = 0;
374 		goto done;
375 	}
376 	if (uap->name) {
377 		/* check sa_len before it is destroyed */
378 		if (namelen > sa->sa_len)
379 			namelen = sa->sa_len;
380 #ifdef COMPAT_OLDSOCK
381 		if (compat)
382 			((struct osockaddr *)sa)->sa_family =
383 			    sa->sa_family;
384 #endif
385 		error = copyout(sa, uap->name, (u_int)namelen);
386 		if (!error)
387 gotnoname:
388 			error = copyout(&namelen,
389 			    uap->anamelen, sizeof (*uap->anamelen));
390 	}
391 noconnection:
392 	if (sa)
393 		FREE(sa, M_SONAME);
394 
395 	/*
396 	 * close the new descriptor, assuming someone hasn't ripped it
397 	 * out from under us.
398 	 */
399 	if (error) {
400 		FILEDESC_LOCK(fdp);
401 		if (fdp->fd_ofiles[fd] == nfp) {
402 			fdp->fd_ofiles[fd] = NULL;
403 			FILEDESC_UNLOCK(fdp);
404 			fdrop(nfp, td);
405 		} else {
406 			FILEDESC_UNLOCK(fdp);
407 		}
408 	}
409 	splx(s);
410 
411 	/*
412 	 * Release explicitly held references before returning.
413 	 */
414 done:
415 	if (nfp != NULL)
416 		fdrop(nfp, td);
417 	fputsock(head);
418 done2:
419 	mtx_unlock(&Giant);
420 	return (error);
421 }
422 
423 /*
424  * MPSAFE (accept1() is MPSAFE)
425  */
426 int
427 accept(td, uap)
428 	struct thread *td;
429 	struct accept_args *uap;
430 {
431 
432 	return (accept1(td, uap, 0));
433 }
434 
435 #ifdef COMPAT_OLDSOCK
436 /*
437  * MPSAFE (accept1() is MPSAFE)
438  */
439 int
440 oaccept(td, uap)
441 	struct thread *td;
442 	struct accept_args *uap;
443 {
444 
445 	return (accept1(td, uap, 1));
446 }
447 #endif /* COMPAT_OLDSOCK */
448 
449 /*
450  * MPSAFE
451  */
452 /* ARGSUSED */
453 int
454 connect(td, uap)
455 	struct thread *td;
456 	register struct connect_args /* {
457 		int	s;
458 		caddr_t	name;
459 		int	namelen;
460 	} */ *uap;
461 {
462 	struct sockaddr *sa;
463 	int error;
464 
465 	error = getsockaddr(&sa, uap->name, uap->namelen);
466 	if (error)
467 		return error;
468 
469 	return (kern_connect(td, uap->s, sa));
470 }
471 
472 
473 int
474 kern_connect(td, fd, sa)
475 	struct thread *td;
476 	int fd;
477 	struct sockaddr *sa;
478 {
479 	struct socket *so;
480 	int error, s;
481 
482 	mtx_lock(&Giant);
483 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
484 		goto done2;
485 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
486 		error = EALREADY;
487 		goto done1;
488 	}
489 #ifdef MAC
490 	error = mac_check_socket_connect(td->td_ucred, so, sa);
491 	if (error)
492 		goto bad;
493 #endif
494 	error = soconnect(so, sa, td);
495 	if (error)
496 		goto bad;
497 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
498 		error = EINPROGRESS;
499 		goto done1;
500 	}
501 	s = splnet();
502 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
503 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
504 		if (error)
505 			break;
506 	}
507 	if (error == 0) {
508 		error = so->so_error;
509 		so->so_error = 0;
510 	}
511 	splx(s);
512 bad:
513 	so->so_state &= ~SS_ISCONNECTING;
514 	if (error == ERESTART)
515 		error = EINTR;
516 done1:
517 	fputsock(so);
518 done2:
519 	mtx_unlock(&Giant);
520 	FREE(sa, M_SONAME);
521 	return (error);
522 }
523 
524 /*
525  * MPSAFE
526  */
527 int
528 socketpair(td, uap)
529 	struct thread *td;
530 	register struct socketpair_args /* {
531 		int	domain;
532 		int	type;
533 		int	protocol;
534 		int	*rsv;
535 	} */ *uap;
536 {
537 	register struct filedesc *fdp = td->td_proc->p_fd;
538 	struct file *fp1, *fp2;
539 	struct socket *so1, *so2;
540 	int fd, error, sv[2];
541 
542 	mtx_lock(&Giant);
543 	error = socreate(uap->domain, &so1, uap->type, uap->protocol,
544 	    td->td_ucred, td);
545 	if (error)
546 		goto done2;
547 	error = socreate(uap->domain, &so2, uap->type, uap->protocol,
548 	    td->td_ucred, td);
549 	if (error)
550 		goto free1;
551 	error = falloc(td, &fp1, &fd);
552 	if (error)
553 		goto free2;
554 	fhold(fp1);
555 	sv[0] = fd;
556 	fp1->f_data = so1;	/* so1 already has ref count */
557 	error = falloc(td, &fp2, &fd);
558 	if (error)
559 		goto free3;
560 	fhold(fp2);
561 	fp2->f_data = so2;	/* so2 already has ref count */
562 	sv[1] = fd;
563 	error = soconnect2(so1, so2);
564 	if (error)
565 		goto free4;
566 	if (uap->type == SOCK_DGRAM) {
567 		/*
568 		 * Datagram socket connection is asymmetric.
569 		 */
570 		 error = soconnect2(so2, so1);
571 		 if (error)
572 			goto free4;
573 	}
574 	FILE_LOCK(fp1);
575 	fp1->f_flag = FREAD|FWRITE;
576 	fp1->f_ops = &socketops;
577 	fp1->f_type = DTYPE_SOCKET;
578 	FILE_UNLOCK(fp1);
579 	FILE_LOCK(fp2);
580 	fp2->f_flag = FREAD|FWRITE;
581 	fp2->f_ops = &socketops;
582 	fp2->f_type = DTYPE_SOCKET;
583 	FILE_UNLOCK(fp2);
584 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
585 	fdrop(fp1, td);
586 	fdrop(fp2, td);
587 	goto done2;
588 free4:
589 	FILEDESC_LOCK(fdp);
590 	if (fdp->fd_ofiles[sv[1]] == fp2) {
591 		fdp->fd_ofiles[sv[1]] = NULL;
592 		FILEDESC_UNLOCK(fdp);
593 		fdrop(fp2, td);
594 	} else
595 		FILEDESC_UNLOCK(fdp);
596 	fdrop(fp2, td);
597 free3:
598 	FILEDESC_LOCK(fdp);
599 	if (fdp->fd_ofiles[sv[0]] == fp1) {
600 		fdp->fd_ofiles[sv[0]] = NULL;
601 		FILEDESC_UNLOCK(fdp);
602 		fdrop(fp1, td);
603 	} else
604 		FILEDESC_UNLOCK(fdp);
605 	fdrop(fp1, td);
606 free2:
607 	(void)soclose(so2);
608 free1:
609 	(void)soclose(so1);
610 done2:
611 	mtx_unlock(&Giant);
612 	return (error);
613 }
614 
615 static int
616 sendit(td, s, mp, flags)
617 	register struct thread *td;
618 	int s;
619 	register struct msghdr *mp;
620 	int flags;
621 {
622 	struct mbuf *control;
623 	struct sockaddr *to;
624 	int error;
625 
626 	mtx_lock(&Giant);
627 	if (mp->msg_name != NULL) {
628 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
629 		if (error) {
630 			to = NULL;
631 			goto bad;
632 		}
633 		mp->msg_name = to;
634 	} else
635 		to = NULL;
636 
637 	if (mp->msg_control) {
638 		if (mp->msg_controllen < sizeof(struct cmsghdr)
639 #ifdef COMPAT_OLDSOCK
640 		    && mp->msg_flags != MSG_COMPAT
641 #endif
642 		) {
643 			error = EINVAL;
644 			goto bad;
645 		}
646 		error = sockargs(&control, mp->msg_control,
647 		    mp->msg_controllen, MT_CONTROL);
648 		if (error)
649 			goto bad;
650 #ifdef COMPAT_OLDSOCK
651 		if (mp->msg_flags == MSG_COMPAT) {
652 			register struct cmsghdr *cm;
653 
654 			M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
655 			if (control == 0) {
656 				error = ENOBUFS;
657 				goto bad;
658 			} else {
659 				cm = mtod(control, struct cmsghdr *);
660 				cm->cmsg_len = control->m_len;
661 				cm->cmsg_level = SOL_SOCKET;
662 				cm->cmsg_type = SCM_RIGHTS;
663 			}
664 		}
665 #endif
666 	} else {
667 		control = NULL;
668 	}
669 
670 	error = kern_sendit(td, s, mp, flags, control);
671 
672 bad:
673 	if (to)
674 		FREE(to, M_SONAME);
675 	mtx_unlock(&Giant);
676 	return (error);
677 }
678 
679 int
680 kern_sendit(td, s, mp, flags, control)
681 	struct thread *td;
682 	int s;
683 	struct msghdr *mp;
684 	int flags;
685 	struct mbuf *control;
686 {
687 	struct uio auio;
688 	struct iovec *iov;
689 	struct socket *so;
690 	int i;
691 	int len, error;
692 #ifdef KTRACE
693 	struct iovec *ktriov = NULL;
694 	struct uio ktruio;
695 	int iovlen;
696 #endif
697 
698 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
699 		goto bad2;
700 
701 #ifdef MAC
702 	error = mac_check_socket_send(td->td_ucred, so);
703 	if (error)
704 		goto bad;
705 #endif
706 
707 	auio.uio_iov = mp->msg_iov;
708 	auio.uio_iovcnt = mp->msg_iovlen;
709 	auio.uio_segflg = UIO_USERSPACE;
710 	auio.uio_rw = UIO_WRITE;
711 	auio.uio_td = td;
712 	auio.uio_offset = 0;			/* XXX */
713 	auio.uio_resid = 0;
714 	iov = mp->msg_iov;
715 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
716 		if ((auio.uio_resid += iov->iov_len) < 0) {
717 			error = EINVAL;
718 			goto bad;
719 		}
720 	}
721 #ifdef KTRACE
722 	if (KTRPOINT(td, KTR_GENIO)) {
723 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
724 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
725 		bcopy(auio.uio_iov, ktriov, iovlen);
726 		ktruio = auio;
727 	}
728 #endif
729 	len = auio.uio_resid;
730 	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
731 	    0, control, flags, td);
732 	if (error) {
733 		if (auio.uio_resid != len && (error == ERESTART ||
734 		    error == EINTR || error == EWOULDBLOCK))
735 			error = 0;
736 		/* Generation of SIGPIPE can be controlled per socket */
737 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
738 			PROC_LOCK(td->td_proc);
739 			psignal(td->td_proc, SIGPIPE);
740 			PROC_UNLOCK(td->td_proc);
741 		}
742 	}
743 	if (error == 0)
744 		td->td_retval[0] = len - auio.uio_resid;
745 #ifdef KTRACE
746 	if (ktriov != NULL) {
747 		if (error == 0) {
748 			ktruio.uio_iov = ktriov;
749 			ktruio.uio_resid = td->td_retval[0];
750 			ktrgenio(s, UIO_WRITE, &ktruio, error);
751 		}
752 		FREE(ktriov, M_TEMP);
753 	}
754 #endif
755 bad:
756 	fputsock(so);
757 bad2:
758 	return (error);
759 }
760 
761 /*
762  * MPSAFE
763  */
764 int
765 sendto(td, uap)
766 	struct thread *td;
767 	register struct sendto_args /* {
768 		int	s;
769 		caddr_t	buf;
770 		size_t	len;
771 		int	flags;
772 		caddr_t	to;
773 		int	tolen;
774 	} */ *uap;
775 {
776 	struct msghdr msg;
777 	struct iovec aiov;
778 	int error;
779 
780 	msg.msg_name = uap->to;
781 	msg.msg_namelen = uap->tolen;
782 	msg.msg_iov = &aiov;
783 	msg.msg_iovlen = 1;
784 	msg.msg_control = 0;
785 #ifdef COMPAT_OLDSOCK
786 	msg.msg_flags = 0;
787 #endif
788 	aiov.iov_base = uap->buf;
789 	aiov.iov_len = uap->len;
790 	error = sendit(td, uap->s, &msg, uap->flags);
791 	return (error);
792 }
793 
794 #ifdef COMPAT_OLDSOCK
795 /*
796  * MPSAFE
797  */
798 int
799 osend(td, uap)
800 	struct thread *td;
801 	register struct osend_args /* {
802 		int	s;
803 		caddr_t	buf;
804 		int	len;
805 		int	flags;
806 	} */ *uap;
807 {
808 	struct msghdr msg;
809 	struct iovec aiov;
810 	int error;
811 
812 	msg.msg_name = 0;
813 	msg.msg_namelen = 0;
814 	msg.msg_iov = &aiov;
815 	msg.msg_iovlen = 1;
816 	aiov.iov_base = uap->buf;
817 	aiov.iov_len = uap->len;
818 	msg.msg_control = 0;
819 	msg.msg_flags = 0;
820 	error = sendit(td, uap->s, &msg, uap->flags);
821 	return (error);
822 }
823 
824 /*
825  * MPSAFE
826  */
827 int
828 osendmsg(td, uap)
829 	struct thread *td;
830 	register struct osendmsg_args /* {
831 		int	s;
832 		caddr_t	msg;
833 		int	flags;
834 	} */ *uap;
835 {
836 	struct msghdr msg;
837 	struct iovec aiov[UIO_SMALLIOV], *iov;
838 	int error;
839 
840 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
841 	if (error)
842 		goto done2;
843 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
844 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
845 			error = EMSGSIZE;
846 			goto done2;
847 		}
848 		MALLOC(iov, struct iovec *,
849 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
850 		      M_WAITOK);
851 	} else {
852 		iov = aiov;
853 	}
854 	error = copyin(msg.msg_iov, iov,
855 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
856 	if (error)
857 		goto done;
858 	msg.msg_flags = MSG_COMPAT;
859 	msg.msg_iov = iov;
860 	error = sendit(td, uap->s, &msg, uap->flags);
861 done:
862 	if (iov != aiov)
863 		FREE(iov, M_IOV);
864 done2:
865 	return (error);
866 }
867 #endif
868 
869 /*
870  * MPSAFE
871  */
872 int
873 sendmsg(td, uap)
874 	struct thread *td;
875 	register struct sendmsg_args /* {
876 		int	s;
877 		caddr_t	msg;
878 		int	flags;
879 	} */ *uap;
880 {
881 	struct msghdr msg;
882 	struct iovec aiov[UIO_SMALLIOV], *iov;
883 	int error;
884 
885 	error = copyin(uap->msg, &msg, sizeof (msg));
886 	if (error)
887 		goto done2;
888 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
889 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
890 			error = EMSGSIZE;
891 			goto done2;
892 		}
893 		MALLOC(iov, struct iovec *,
894 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
895 		       M_WAITOK);
896 	} else {
897 		iov = aiov;
898 	}
899 	if (msg.msg_iovlen &&
900 	    (error = copyin(msg.msg_iov, iov,
901 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
902 		goto done;
903 	msg.msg_iov = iov;
904 #ifdef COMPAT_OLDSOCK
905 	msg.msg_flags = 0;
906 #endif
907 	error = sendit(td, uap->s, &msg, uap->flags);
908 done:
909 	if (iov != aiov)
910 		FREE(iov, M_IOV);
911 done2:
912 	return (error);
913 }
914 
915 static int
916 recvit(td, s, mp, namelenp)
917 	register struct thread *td;
918 	int s;
919 	register struct msghdr *mp;
920 	void *namelenp;
921 {
922 	struct uio auio;
923 	register struct iovec *iov;
924 	register int i;
925 	int len, error;
926 	struct mbuf *m, *control = 0;
927 	caddr_t ctlbuf;
928 	struct socket *so;
929 	struct sockaddr *fromsa = 0;
930 #ifdef KTRACE
931 	struct iovec *ktriov = NULL;
932 	struct uio ktruio;
933 	int iovlen;
934 #endif
935 
936 	if ((error = fgetsock(td, s, &so, NULL)) != 0)
937 		return (error);
938 
939 #ifdef MAC
940 	error = mac_check_socket_receive(td->td_ucred, so);
941 	if (error) {
942 		fputsock(so);
943 		return (error);
944 	}
945 #endif
946 
947 	auio.uio_iov = mp->msg_iov;
948 	auio.uio_iovcnt = mp->msg_iovlen;
949 	auio.uio_segflg = UIO_USERSPACE;
950 	auio.uio_rw = UIO_READ;
951 	auio.uio_td = td;
952 	auio.uio_offset = 0;			/* XXX */
953 	auio.uio_resid = 0;
954 	iov = mp->msg_iov;
955 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
956 		if ((auio.uio_resid += iov->iov_len) < 0) {
957 			fputsock(so);
958 			return (EINVAL);
959 		}
960 	}
961 #ifdef KTRACE
962 	if (KTRPOINT(td, KTR_GENIO)) {
963 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
964 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
965 		bcopy(auio.uio_iov, ktriov, iovlen);
966 		ktruio = auio;
967 	}
968 #endif
969 	len = auio.uio_resid;
970 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
971 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
972 	    &mp->msg_flags);
973 	if (error) {
974 		if (auio.uio_resid != len && (error == ERESTART ||
975 		    error == EINTR || error == EWOULDBLOCK))
976 			error = 0;
977 	}
978 #ifdef KTRACE
979 	if (ktriov != NULL) {
980 		if (error == 0) {
981 			ktruio.uio_iov = ktriov;
982 			ktruio.uio_resid = len - auio.uio_resid;
983 			ktrgenio(s, UIO_READ, &ktruio, error);
984 		}
985 		FREE(ktriov, M_TEMP);
986 	}
987 #endif
988 	if (error)
989 		goto out;
990 	td->td_retval[0] = len - auio.uio_resid;
991 	if (mp->msg_name) {
992 		len = mp->msg_namelen;
993 		if (len <= 0 || fromsa == 0)
994 			len = 0;
995 		else {
996 			/* save sa_len before it is destroyed by MSG_COMPAT */
997 			len = MIN(len, fromsa->sa_len);
998 #ifdef COMPAT_OLDSOCK
999 			if (mp->msg_flags & MSG_COMPAT)
1000 				((struct osockaddr *)fromsa)->sa_family =
1001 				    fromsa->sa_family;
1002 #endif
1003 			error = copyout(fromsa, mp->msg_name, (unsigned)len);
1004 			if (error)
1005 				goto out;
1006 		}
1007 		mp->msg_namelen = len;
1008 		if (namelenp &&
1009 		    (error = copyout(&len, namelenp, sizeof (int)))) {
1010 #ifdef COMPAT_OLDSOCK
1011 			if (mp->msg_flags & MSG_COMPAT)
1012 				error = 0;	/* old recvfrom didn't check */
1013 			else
1014 #endif
1015 			goto out;
1016 		}
1017 	}
1018 	if (mp->msg_control) {
1019 #ifdef COMPAT_OLDSOCK
1020 		/*
1021 		 * We assume that old recvmsg calls won't receive access
1022 		 * rights and other control info, esp. as control info
1023 		 * is always optional and those options didn't exist in 4.3.
1024 		 * If we receive rights, trim the cmsghdr; anything else
1025 		 * is tossed.
1026 		 */
1027 		if (control && mp->msg_flags & MSG_COMPAT) {
1028 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
1029 			    SOL_SOCKET ||
1030 			    mtod(control, struct cmsghdr *)->cmsg_type !=
1031 			    SCM_RIGHTS) {
1032 				mp->msg_controllen = 0;
1033 				goto out;
1034 			}
1035 			control->m_len -= sizeof (struct cmsghdr);
1036 			control->m_data += sizeof (struct cmsghdr);
1037 		}
1038 #endif
1039 		len = mp->msg_controllen;
1040 		m = control;
1041 		mp->msg_controllen = 0;
1042 		ctlbuf = mp->msg_control;
1043 
1044 		while (m && len > 0) {
1045 			unsigned int tocopy;
1046 
1047 			if (len >= m->m_len)
1048 				tocopy = m->m_len;
1049 			else {
1050 				mp->msg_flags |= MSG_CTRUNC;
1051 				tocopy = len;
1052 			}
1053 
1054 			if ((error = copyout(mtod(m, caddr_t),
1055 					ctlbuf, tocopy)) != 0)
1056 				goto out;
1057 
1058 			ctlbuf += tocopy;
1059 			len -= tocopy;
1060 			m = m->m_next;
1061 		}
1062 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1063 	}
1064 out:
1065 	fputsock(so);
1066 	if (fromsa)
1067 		FREE(fromsa, M_SONAME);
1068 	if (control)
1069 		m_freem(control);
1070 	return (error);
1071 }
1072 
1073 /*
1074  * MPSAFE
1075  */
1076 int
1077 recvfrom(td, uap)
1078 	struct thread *td;
1079 	register struct recvfrom_args /* {
1080 		int	s;
1081 		caddr_t	buf;
1082 		size_t	len;
1083 		int	flags;
1084 		caddr_t	from;
1085 		int	*fromlenaddr;
1086 	} */ *uap;
1087 {
1088 	struct msghdr msg;
1089 	struct iovec aiov;
1090 	int error;
1091 
1092 	mtx_lock(&Giant);
1093 	if (uap->fromlenaddr) {
1094 		error = copyin(uap->fromlenaddr,
1095 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
1096 		if (error)
1097 			goto done2;
1098 	} else {
1099 		msg.msg_namelen = 0;
1100 	}
1101 	msg.msg_name = uap->from;
1102 	msg.msg_iov = &aiov;
1103 	msg.msg_iovlen = 1;
1104 	aiov.iov_base = uap->buf;
1105 	aiov.iov_len = uap->len;
1106 	msg.msg_control = 0;
1107 	msg.msg_flags = uap->flags;
1108 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1109 done2:
1110 	mtx_unlock(&Giant);
1111 	return(error);
1112 }
1113 
1114 #ifdef COMPAT_OLDSOCK
1115 /*
1116  * MPSAFE
1117  */
1118 int
1119 orecvfrom(td, uap)
1120 	struct thread *td;
1121 	struct recvfrom_args *uap;
1122 {
1123 
1124 	uap->flags |= MSG_COMPAT;
1125 	return (recvfrom(td, uap));
1126 }
1127 #endif
1128 
1129 
1130 #ifdef COMPAT_OLDSOCK
1131 /*
1132  * MPSAFE
1133  */
1134 int
1135 orecv(td, uap)
1136 	struct thread *td;
1137 	register struct orecv_args /* {
1138 		int	s;
1139 		caddr_t	buf;
1140 		int	len;
1141 		int	flags;
1142 	} */ *uap;
1143 {
1144 	struct msghdr msg;
1145 	struct iovec aiov;
1146 	int error;
1147 
1148 	mtx_lock(&Giant);
1149 	msg.msg_name = 0;
1150 	msg.msg_namelen = 0;
1151 	msg.msg_iov = &aiov;
1152 	msg.msg_iovlen = 1;
1153 	aiov.iov_base = uap->buf;
1154 	aiov.iov_len = uap->len;
1155 	msg.msg_control = 0;
1156 	msg.msg_flags = uap->flags;
1157 	error = recvit(td, uap->s, &msg, NULL);
1158 	mtx_unlock(&Giant);
1159 	return (error);
1160 }
1161 
1162 /*
1163  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1164  * overlays the new one, missing only the flags, and with the (old) access
1165  * rights where the control fields are now.
1166  *
1167  * MPSAFE
1168  */
1169 int
1170 orecvmsg(td, uap)
1171 	struct thread *td;
1172 	register struct orecvmsg_args /* {
1173 		int	s;
1174 		struct	omsghdr *msg;
1175 		int	flags;
1176 	} */ *uap;
1177 {
1178 	struct msghdr msg;
1179 	struct iovec aiov[UIO_SMALLIOV], *iov;
1180 	int error;
1181 
1182 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1183 	if (error)
1184 		return (error);
1185 
1186 	mtx_lock(&Giant);
1187 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1188 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1189 			error = EMSGSIZE;
1190 			goto done2;
1191 		}
1192 		MALLOC(iov, struct iovec *,
1193 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1194 		      M_WAITOK);
1195 	} else {
1196 		iov = aiov;
1197 	}
1198 	msg.msg_flags = uap->flags | MSG_COMPAT;
1199 	error = copyin(msg.msg_iov, iov,
1200 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1201 	if (error)
1202 		goto done;
1203 	msg.msg_iov = iov;
1204 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1205 
1206 	if (msg.msg_controllen && error == 0)
1207 		error = copyout(&msg.msg_controllen,
1208 		    &uap->msg->msg_accrightslen, sizeof (int));
1209 done:
1210 	if (iov != aiov)
1211 		FREE(iov, M_IOV);
1212 done2:
1213 	mtx_unlock(&Giant);
1214 	return (error);
1215 }
1216 #endif
1217 
1218 /*
1219  * MPSAFE
1220  */
1221 int
1222 recvmsg(td, uap)
1223 	struct thread *td;
1224 	register struct recvmsg_args /* {
1225 		int	s;
1226 		struct	msghdr *msg;
1227 		int	flags;
1228 	} */ *uap;
1229 {
1230 	struct msghdr msg;
1231 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1232 	register int error;
1233 
1234 	mtx_lock(&Giant);
1235 	error = copyin(uap->msg, &msg, sizeof (msg));
1236 	if (error)
1237 		goto done2;
1238 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1239 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1240 			error = EMSGSIZE;
1241 			goto done2;
1242 		}
1243 		MALLOC(iov, struct iovec *,
1244 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1245 		       M_WAITOK);
1246 	} else {
1247 		iov = aiov;
1248 	}
1249 #ifdef COMPAT_OLDSOCK
1250 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
1251 #else
1252 	msg.msg_flags = uap->flags;
1253 #endif
1254 	uiov = msg.msg_iov;
1255 	msg.msg_iov = iov;
1256 	error = copyin(uiov, iov,
1257 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1258 	if (error)
1259 		goto done;
1260 	error = recvit(td, uap->s, &msg, NULL);
1261 	if (!error) {
1262 		msg.msg_iov = uiov;
1263 		error = copyout(&msg, uap->msg, sizeof(msg));
1264 	}
1265 done:
1266 	if (iov != aiov)
1267 		FREE(iov, M_IOV);
1268 done2:
1269 	mtx_unlock(&Giant);
1270 	return (error);
1271 }
1272 
1273 /*
1274  * MPSAFE
1275  */
1276 /* ARGSUSED */
1277 int
1278 shutdown(td, uap)
1279 	struct thread *td;
1280 	register struct shutdown_args /* {
1281 		int	s;
1282 		int	how;
1283 	} */ *uap;
1284 {
1285 	struct socket *so;
1286 	int error;
1287 
1288 	mtx_lock(&Giant);
1289 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1290 		error = soshutdown(so, uap->how);
1291 		fputsock(so);
1292 	}
1293 	mtx_unlock(&Giant);
1294 	return(error);
1295 }
1296 
1297 /*
1298  * MPSAFE
1299  */
1300 /* ARGSUSED */
1301 int
1302 setsockopt(td, uap)
1303 	struct thread *td;
1304 	register struct setsockopt_args /* {
1305 		int	s;
1306 		int	level;
1307 		int	name;
1308 		caddr_t	val;
1309 		int	valsize;
1310 	} */ *uap;
1311 {
1312 	struct socket *so;
1313 	struct sockopt sopt;
1314 	int error;
1315 
1316 	if (uap->val == 0 && uap->valsize != 0)
1317 		return (EFAULT);
1318 	if (uap->valsize < 0)
1319 		return (EINVAL);
1320 
1321 	mtx_lock(&Giant);
1322 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1323 		sopt.sopt_dir = SOPT_SET;
1324 		sopt.sopt_level = uap->level;
1325 		sopt.sopt_name = uap->name;
1326 		sopt.sopt_val = uap->val;
1327 		sopt.sopt_valsize = uap->valsize;
1328 		sopt.sopt_td = td;
1329 		error = sosetopt(so, &sopt);
1330 		fputsock(so);
1331 	}
1332 	mtx_unlock(&Giant);
1333 	return(error);
1334 }
1335 
1336 /*
1337  * MPSAFE
1338  */
1339 /* ARGSUSED */
1340 int
1341 getsockopt(td, uap)
1342 	struct thread *td;
1343 	register struct getsockopt_args /* {
1344 		int	s;
1345 		int	level;
1346 		int	name;
1347 		caddr_t	val;
1348 		int	*avalsize;
1349 	} */ *uap;
1350 {
1351 	int	valsize, error;
1352 	struct  socket *so;
1353 	struct	sockopt sopt;
1354 
1355 	mtx_lock(&Giant);
1356 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1357 		goto done2;
1358 	if (uap->val) {
1359 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1360 		if (error)
1361 			goto done1;
1362 		if (valsize < 0) {
1363 			error = EINVAL;
1364 			goto done1;
1365 		}
1366 	} else {
1367 		valsize = 0;
1368 	}
1369 
1370 	sopt.sopt_dir = SOPT_GET;
1371 	sopt.sopt_level = uap->level;
1372 	sopt.sopt_name = uap->name;
1373 	sopt.sopt_val = uap->val;
1374 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1375 	sopt.sopt_td = td;
1376 
1377 	error = sogetopt(so, &sopt);
1378 	if (error == 0) {
1379 		valsize = sopt.sopt_valsize;
1380 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1381 	}
1382 done1:
1383 	fputsock(so);
1384 done2:
1385 	mtx_unlock(&Giant);
1386 	return (error);
1387 }
1388 
1389 /*
1390  * getsockname1() - Get socket name.
1391  *
1392  * MPSAFE
1393  */
1394 /* ARGSUSED */
1395 static int
1396 getsockname1(td, uap, compat)
1397 	struct thread *td;
1398 	register struct getsockname_args /* {
1399 		int	fdes;
1400 		caddr_t	asa;
1401 		int	*alen;
1402 	} */ *uap;
1403 	int compat;
1404 {
1405 	struct socket *so;
1406 	struct sockaddr *sa;
1407 	int len, error;
1408 
1409 	mtx_lock(&Giant);
1410 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1411 		goto done2;
1412 	error = copyin(uap->alen, &len, sizeof (len));
1413 	if (error)
1414 		goto done1;
1415 	if (len < 0) {
1416 		error = EINVAL;
1417 		goto done1;
1418 	}
1419 	sa = 0;
1420 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1421 	if (error)
1422 		goto bad;
1423 	if (sa == 0) {
1424 		len = 0;
1425 		goto gotnothing;
1426 	}
1427 
1428 	len = MIN(len, sa->sa_len);
1429 #ifdef COMPAT_OLDSOCK
1430 	if (compat)
1431 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
1432 #endif
1433 	error = copyout(sa, uap->asa, (u_int)len);
1434 	if (error == 0)
1435 gotnothing:
1436 		error = copyout(&len, uap->alen, sizeof (len));
1437 bad:
1438 	if (sa)
1439 		FREE(sa, M_SONAME);
1440 done1:
1441 	fputsock(so);
1442 done2:
1443 	mtx_unlock(&Giant);
1444 	return (error);
1445 }
1446 
1447 /*
1448  * MPSAFE
1449  */
1450 int
1451 getsockname(td, uap)
1452 	struct thread *td;
1453 	struct getsockname_args *uap;
1454 {
1455 
1456 	return (getsockname1(td, uap, 0));
1457 }
1458 
1459 #ifdef COMPAT_OLDSOCK
1460 /*
1461  * MPSAFE
1462  */
1463 int
1464 ogetsockname(td, uap)
1465 	struct thread *td;
1466 	struct getsockname_args *uap;
1467 {
1468 
1469 	return (getsockname1(td, uap, 1));
1470 }
1471 #endif /* COMPAT_OLDSOCK */
1472 
1473 /*
1474  * getpeername1() - Get name of peer for connected socket.
1475  *
1476  * MPSAFE
1477  */
1478 /* ARGSUSED */
1479 static int
1480 getpeername1(td, uap, compat)
1481 	struct thread *td;
1482 	register struct getpeername_args /* {
1483 		int	fdes;
1484 		caddr_t	asa;
1485 		int	*alen;
1486 	} */ *uap;
1487 	int compat;
1488 {
1489 	struct socket *so;
1490 	struct sockaddr *sa;
1491 	int len, error;
1492 
1493 	mtx_lock(&Giant);
1494 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1495 		goto done2;
1496 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1497 		error = ENOTCONN;
1498 		goto done1;
1499 	}
1500 	error = copyin(uap->alen, &len, sizeof (len));
1501 	if (error)
1502 		goto done1;
1503 	if (len < 0) {
1504 		error = EINVAL;
1505 		goto done1;
1506 	}
1507 	sa = 0;
1508 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1509 	if (error)
1510 		goto bad;
1511 	if (sa == 0) {
1512 		len = 0;
1513 		goto gotnothing;
1514 	}
1515 	len = MIN(len, sa->sa_len);
1516 #ifdef COMPAT_OLDSOCK
1517 	if (compat)
1518 		((struct osockaddr *)sa)->sa_family =
1519 		    sa->sa_family;
1520 #endif
1521 	error = copyout(sa, uap->asa, (u_int)len);
1522 	if (error)
1523 		goto bad;
1524 gotnothing:
1525 	error = copyout(&len, uap->alen, sizeof (len));
1526 bad:
1527 	if (sa)
1528 		FREE(sa, M_SONAME);
1529 done1:
1530 	fputsock(so);
1531 done2:
1532 	mtx_unlock(&Giant);
1533 	return (error);
1534 }
1535 
1536 /*
1537  * MPSAFE
1538  */
1539 int
1540 getpeername(td, uap)
1541 	struct thread *td;
1542 	struct getpeername_args *uap;
1543 {
1544 
1545 	return (getpeername1(td, uap, 0));
1546 }
1547 
1548 #ifdef COMPAT_OLDSOCK
1549 /*
1550  * MPSAFE
1551  */
1552 int
1553 ogetpeername(td, uap)
1554 	struct thread *td;
1555 	struct ogetpeername_args *uap;
1556 {
1557 
1558 	/* XXX uap should have type `getpeername_args *' to begin with. */
1559 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
1560 }
1561 #endif /* COMPAT_OLDSOCK */
1562 
1563 int
1564 sockargs(mp, buf, buflen, type)
1565 	struct mbuf **mp;
1566 	caddr_t buf;
1567 	int buflen, type;
1568 {
1569 	register struct sockaddr *sa;
1570 	register struct mbuf *m;
1571 	int error;
1572 
1573 	if ((u_int)buflen > MLEN) {
1574 #ifdef COMPAT_OLDSOCK
1575 		if (type == MT_SONAME && (u_int)buflen <= 112)
1576 			buflen = MLEN;		/* unix domain compat. hack */
1577 		else
1578 #endif
1579 		return (EINVAL);
1580 	}
1581 	m = m_get(M_TRYWAIT, type);
1582 	if (m == NULL)
1583 		return (ENOBUFS);
1584 	m->m_len = buflen;
1585 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1586 	if (error)
1587 		(void) m_free(m);
1588 	else {
1589 		*mp = m;
1590 		if (type == MT_SONAME) {
1591 			sa = mtod(m, struct sockaddr *);
1592 
1593 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1594 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1595 				sa->sa_family = sa->sa_len;
1596 #endif
1597 			sa->sa_len = buflen;
1598 		}
1599 	}
1600 	return (error);
1601 }
1602 
1603 int
1604 getsockaddr(namp, uaddr, len)
1605 	struct sockaddr **namp;
1606 	caddr_t uaddr;
1607 	size_t len;
1608 {
1609 	struct sockaddr *sa;
1610 	int error;
1611 
1612 	if (len > SOCK_MAXADDRLEN)
1613 		return ENAMETOOLONG;
1614 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1615 	error = copyin(uaddr, sa, len);
1616 	if (error) {
1617 		FREE(sa, M_SONAME);
1618 	} else {
1619 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1620 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1621 			sa->sa_family = sa->sa_len;
1622 #endif
1623 		sa->sa_len = len;
1624 		*namp = sa;
1625 	}
1626 	return error;
1627 }
1628 
1629 /*
1630  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1631  */
1632 static void
1633 sf_buf_init(void *arg)
1634 {
1635 	struct sf_buf *sf_bufs;
1636 	vm_offset_t sf_base;
1637 	int i;
1638 
1639 	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1640 	mtx_lock(&sf_freelist.sf_lock);
1641 	SLIST_INIT(&sf_freelist.sf_head);
1642 	sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1643 	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1644 	    M_NOWAIT | M_ZERO);
1645 	for (i = 0; i < nsfbufs; i++) {
1646 		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1647 		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1648 	}
1649 	sf_buf_alloc_want = 0;
1650 	mtx_unlock(&sf_freelist.sf_lock);
1651 }
1652 
1653 /*
1654  * Get an sf_buf from the freelist. Will block if none are available.
1655  */
1656 struct sf_buf *
1657 sf_buf_alloc(struct vm_page *m)
1658 {
1659 	struct sf_buf *sf;
1660 	int error;
1661 
1662 	mtx_lock(&sf_freelist.sf_lock);
1663 	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1664 		sf_buf_alloc_want++;
1665 		error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1666 		    "sfbufa", 0);
1667 		sf_buf_alloc_want--;
1668 
1669 		/*
1670 		 * If we got a signal, don't risk going back to sleep.
1671 		 */
1672 		if (error)
1673 			break;
1674 	}
1675 	if (sf != NULL) {
1676 		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1677 		sf->m = m;
1678 		pmap_qenter(sf->kva, &sf->m, 1);
1679 	}
1680 	mtx_unlock(&sf_freelist.sf_lock);
1681 	return (sf);
1682 }
1683 
1684 /*
1685  * Detatch mapped page and release resources back to the system.
1686  */
1687 void
1688 sf_buf_free(void *addr, void *args)
1689 {
1690 	struct sf_buf *sf;
1691 	struct vm_page *m;
1692 
1693 	sf = args;
1694 	pmap_qremove((vm_offset_t)addr, 1);
1695 	m = sf->m;
1696 	vm_page_lock_queues();
1697 	vm_page_unwire(m, 0);
1698 	/*
1699 	 * Check for the object going away on us. This can
1700 	 * happen since we don't hold a reference to it.
1701 	 * If so, we're responsible for freeing the page.
1702 	 */
1703 	if (m->wire_count == 0 && m->object == NULL)
1704 		vm_page_free(m);
1705 	vm_page_unlock_queues();
1706 	sf->m = NULL;
1707 	mtx_lock(&sf_freelist.sf_lock);
1708 	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1709 	if (sf_buf_alloc_want > 0)
1710 		wakeup_one(&sf_freelist);
1711 	mtx_unlock(&sf_freelist.sf_lock);
1712 }
1713 
1714 /*
1715  * sendfile(2)
1716  *
1717  * MPSAFE
1718  *
1719  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1720  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1721  *
1722  * Send a file specified by 'fd' and starting at 'offset' to a socket
1723  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1724  * nbytes == 0. Optionally add a header and/or trailer to the socket
1725  * output. If specified, write the total number of bytes sent into *sbytes.
1726  *
1727  */
1728 int
1729 sendfile(struct thread *td, struct sendfile_args *uap)
1730 {
1731 
1732 	return (do_sendfile(td, uap, 0));
1733 }
1734 
1735 #ifdef COMPAT_FREEBSD4
1736 int
1737 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1738 {
1739 	struct sendfile_args args;
1740 
1741 	args.fd = uap->fd;
1742 	args.s = uap->s;
1743 	args.offset = uap->offset;
1744 	args.nbytes = uap->nbytes;
1745 	args.hdtr = uap->hdtr;
1746 	args.sbytes = uap->sbytes;
1747 	args.flags = uap->flags;
1748 
1749 	return (do_sendfile(td, &args, 1));
1750 }
1751 #endif /* COMPAT_FREEBSD4 */
1752 
1753 static int
1754 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1755 {
1756 	struct vnode *vp;
1757 	struct vm_object *obj;
1758 	struct socket *so = NULL;
1759 	struct mbuf *m;
1760 	struct sf_buf *sf;
1761 	struct vm_page *pg;
1762 	struct writev_args nuap;
1763 	struct sf_hdtr hdtr;
1764 	off_t off, xfsize, hdtr_size, sbytes = 0;
1765 	int error, s;
1766 
1767 	mtx_lock(&Giant);
1768 
1769 	hdtr_size = 0;
1770 
1771 	/*
1772 	 * The descriptor must be a regular file and have a backing VM object.
1773 	 */
1774 	if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1775 		goto done;
1776 	if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1777 		error = EINVAL;
1778 		goto done;
1779 	}
1780 	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1781 		goto done;
1782 	if (so->so_type != SOCK_STREAM) {
1783 		error = EINVAL;
1784 		goto done;
1785 	}
1786 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1787 		error = ENOTCONN;
1788 		goto done;
1789 	}
1790 	if (uap->offset < 0) {
1791 		error = EINVAL;
1792 		goto done;
1793 	}
1794 
1795 #ifdef MAC
1796 	error = mac_check_socket_send(td->td_ucred, so);
1797 	if (error)
1798 		goto done;
1799 #endif
1800 
1801 	/*
1802 	 * If specified, get the pointer to the sf_hdtr struct for
1803 	 * any headers/trailers.
1804 	 */
1805 	if (uap->hdtr != NULL) {
1806 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1807 		if (error)
1808 			goto done;
1809 		/*
1810 		 * Send any headers. Wimp out and use writev(2).
1811 		 */
1812 		if (hdtr.headers != NULL) {
1813 			nuap.fd = uap->s;
1814 			nuap.iovp = hdtr.headers;
1815 			nuap.iovcnt = hdtr.hdr_cnt;
1816 			error = writev(td, &nuap);
1817 			if (error)
1818 				goto done;
1819 			if (compat)
1820 				sbytes += td->td_retval[0];
1821 			else
1822 				hdtr_size += td->td_retval[0];
1823 		}
1824 	}
1825 
1826 	/*
1827 	 * Protect against multiple writers to the socket.
1828 	 */
1829 	(void) sblock(&so->so_snd, M_WAITOK);
1830 
1831 	/*
1832 	 * Loop through the pages in the file, starting with the requested
1833 	 * offset. Get a file page (do I/O if necessary), map the file page
1834 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1835 	 * it on the socket.
1836 	 */
1837 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1838 		vm_pindex_t pindex;
1839 		vm_offset_t pgoff;
1840 
1841 		pindex = OFF_TO_IDX(off);
1842 retry_lookup:
1843 		/*
1844 		 * Calculate the amount to transfer. Not to exceed a page,
1845 		 * the EOF, or the passed in nbytes.
1846 		 */
1847 		xfsize = obj->un_pager.vnp.vnp_size - off;
1848 		if (xfsize > PAGE_SIZE)
1849 			xfsize = PAGE_SIZE;
1850 		pgoff = (vm_offset_t)(off & PAGE_MASK);
1851 		if (PAGE_SIZE - pgoff < xfsize)
1852 			xfsize = PAGE_SIZE - pgoff;
1853 		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1854 			xfsize = uap->nbytes - sbytes;
1855 		if (xfsize <= 0)
1856 			break;
1857 		/*
1858 		 * Optimize the non-blocking case by looking at the socket space
1859 		 * before going to the extra work of constituting the sf_buf.
1860 		 */
1861 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1862 			if (so->so_state & SS_CANTSENDMORE)
1863 				error = EPIPE;
1864 			else
1865 				error = EAGAIN;
1866 			sbunlock(&so->so_snd);
1867 			goto done;
1868 		}
1869 		/*
1870 		 * Attempt to look up the page.
1871 		 *
1872 		 *	Allocate if not found
1873 		 *
1874 		 *	Wait and loop if busy.
1875 		 */
1876 		pg = vm_page_lookup(obj, pindex);
1877 
1878 		if (pg == NULL) {
1879 			pg = vm_page_alloc(obj, pindex,
1880 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1881 			if (pg == NULL) {
1882 				VM_WAIT;
1883 				goto retry_lookup;
1884 			}
1885 			vm_page_lock_queues();
1886 			vm_page_wakeup(pg);
1887 		} else {
1888 			vm_page_lock_queues();
1889 			if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1890 				goto retry_lookup;
1891 			/*
1892 		 	 * Wire the page so it does not get ripped out from
1893 			 * under us.
1894 			 */
1895 			vm_page_wire(pg);
1896 		}
1897 
1898 		/*
1899 		 * If page is not valid for what we need, initiate I/O
1900 		 */
1901 
1902 		if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1903 			int bsize, resid;
1904 
1905 			/*
1906 			 * Ensure that our page is still around when the I/O
1907 			 * completes.
1908 			 */
1909 			vm_page_io_start(pg);
1910 			vm_page_unlock_queues();
1911 
1912 			/*
1913 			 * Get the page from backing store.
1914 			 */
1915 			bsize = vp->v_mount->mnt_stat.f_iosize;
1916 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1917 			/*
1918 			 * XXXMAC: Because we don't have fp->f_cred here,
1919 			 * we pass in NOCRED.  This is probably wrong, but
1920 			 * is consistent with our original implementation.
1921 			 */
1922 			error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1923 			    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1924 			    IO_VMIO | ((MAXBSIZE / bsize) << 16),
1925 			    td->td_ucred, NOCRED, &resid, td);
1926 			VOP_UNLOCK(vp, 0, td);
1927 			vm_page_lock_queues();
1928 			vm_page_flag_clear(pg, PG_ZERO);
1929 			vm_page_io_finish(pg);
1930 			if (error) {
1931 				vm_page_unwire(pg, 0);
1932 				/*
1933 				 * See if anyone else might know about this page.
1934 				 * If not and it is not valid, then free it.
1935 				 */
1936 				if (pg->wire_count == 0 && pg->valid == 0 &&
1937 				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1938 				    pg->hold_count == 0) {
1939 					vm_page_busy(pg);
1940 					vm_page_free(pg);
1941 				}
1942 				vm_page_unlock_queues();
1943 				sbunlock(&so->so_snd);
1944 				goto done;
1945 			}
1946 		}
1947 		vm_page_unlock_queues();
1948 
1949 		/*
1950 		 * Get a sendfile buf. We usually wait as long as necessary,
1951 		 * but this wait can be interrupted.
1952 		 */
1953 		if ((sf = sf_buf_alloc(pg)) == NULL) {
1954 			vm_page_lock_queues();
1955 			vm_page_unwire(pg, 0);
1956 			if (pg->wire_count == 0 && pg->object == NULL)
1957 				vm_page_free(pg);
1958 			vm_page_unlock_queues();
1959 			sbunlock(&so->so_snd);
1960 			error = EINTR;
1961 			goto done;
1962 		}
1963 
1964 		/*
1965 		 * Get an mbuf header and set it up as having external storage.
1966 		 */
1967 		MGETHDR(m, M_TRYWAIT, MT_DATA);
1968 		if (m == NULL) {
1969 			error = ENOBUFS;
1970 			sf_buf_free((void *)sf->kva, sf);
1971 			sbunlock(&so->so_snd);
1972 			goto done;
1973 		}
1974 		/*
1975 		 * Setup external storage for mbuf.
1976 		 */
1977 		MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY,
1978 		    EXT_SFBUF);
1979 		m->m_data = (char *) sf->kva + pgoff;
1980 		m->m_pkthdr.len = m->m_len = xfsize;
1981 		/*
1982 		 * Add the buffer to the socket buffer chain.
1983 		 */
1984 		s = splnet();
1985 retry_space:
1986 		/*
1987 		 * Make sure that the socket is still able to take more data.
1988 		 * CANTSENDMORE being true usually means that the connection
1989 		 * was closed. so_error is true when an error was sensed after
1990 		 * a previous send.
1991 		 * The state is checked after the page mapping and buffer
1992 		 * allocation above since those operations may block and make
1993 		 * any socket checks stale. From this point forward, nothing
1994 		 * blocks before the pru_send (or more accurately, any blocking
1995 		 * results in a loop back to here to re-check).
1996 		 */
1997 		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1998 			if (so->so_state & SS_CANTSENDMORE) {
1999 				error = EPIPE;
2000 			} else {
2001 				error = so->so_error;
2002 				so->so_error = 0;
2003 			}
2004 			m_freem(m);
2005 			sbunlock(&so->so_snd);
2006 			splx(s);
2007 			goto done;
2008 		}
2009 		/*
2010 		 * Wait for socket space to become available. We do this just
2011 		 * after checking the connection state above in order to avoid
2012 		 * a race condition with sbwait().
2013 		 */
2014 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2015 			if (so->so_state & SS_NBIO) {
2016 				m_freem(m);
2017 				sbunlock(&so->so_snd);
2018 				splx(s);
2019 				error = EAGAIN;
2020 				goto done;
2021 			}
2022 			error = sbwait(&so->so_snd);
2023 			/*
2024 			 * An error from sbwait usually indicates that we've
2025 			 * been interrupted by a signal. If we've sent anything
2026 			 * then return bytes sent, otherwise return the error.
2027 			 */
2028 			if (error) {
2029 				m_freem(m);
2030 				sbunlock(&so->so_snd);
2031 				splx(s);
2032 				goto done;
2033 			}
2034 			goto retry_space;
2035 		}
2036 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2037 		splx(s);
2038 		if (error) {
2039 			sbunlock(&so->so_snd);
2040 			goto done;
2041 		}
2042 	}
2043 	sbunlock(&so->so_snd);
2044 
2045 	/*
2046 	 * Send trailers. Wimp out and use writev(2).
2047 	 */
2048 	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2049 			nuap.fd = uap->s;
2050 			nuap.iovp = hdtr.trailers;
2051 			nuap.iovcnt = hdtr.trl_cnt;
2052 			error = writev(td, &nuap);
2053 			if (error)
2054 				goto done;
2055 			if (compat)
2056 				sbytes += td->td_retval[0];
2057 			else
2058 				hdtr_size += td->td_retval[0];
2059 	}
2060 
2061 done:
2062 	/*
2063 	 * If there was no error we have to clear td->td_retval[0]
2064 	 * because it may have been set by writev.
2065 	 */
2066 	if (error == 0) {
2067 		td->td_retval[0] = 0;
2068 	}
2069 	if (uap->sbytes != NULL) {
2070 		if (!compat)
2071 			sbytes += hdtr_size;
2072 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
2073 	}
2074 	if (vp)
2075 		vrele(vp);
2076 	if (so)
2077 		fputsock(so);
2078 	mtx_unlock(&Giant);
2079 	return (error);
2080 }
2081