xref: /freebsd/sys/compat/linux/linux_socket.c (revision 79775f8f1be8dc6ce2b705276b7626cb24347f7d)
1 /*-
2  * Copyright (c) 1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 
36 #include <sys/param.h>
37 #include <sys/proc.h>
38 #include <sys/systm.h>
39 #include <sys/sysproto.h>
40 #include <sys/fcntl.h>
41 #include <sys/file.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/syscallsubr.h>
50 #include <sys/uio.h>
51 #include <sys/syslog.h>
52 #include <sys/un.h>
53 #include <sys/vimage.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/ip.h>
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #include <netinet6/ip6_var.h>
61 #endif
62 
63 #ifdef COMPAT_LINUX32
64 #include <machine/../linux32/linux.h>
65 #include <machine/../linux32/linux32_proto.h>
66 #else
67 #include <machine/../linux/linux.h>
68 #include <machine/../linux/linux_proto.h>
69 #endif
70 #include <compat/linux/linux_socket.h>
71 #include <compat/linux/linux_util.h>
72 
73 static int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
74     struct malloc_type *);
75 static int linux_to_bsd_domain(int);
76 
77 /*
78  * Reads a linux sockaddr and does any necessary translation.
79  * Linux sockaddrs don't have a length field, only a family.
80  */
81 static int
82 linux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
83 {
84 	int osalen = len;
85 
86 	return (do_sa_get(sap, osa, &osalen, M_SONAME));
87 }
88 
89 /*
90  * Copy the osockaddr structure pointed to by osa to kernel, adjust
91  * family and convert to sockaddr.
92  */
93 static int
94 do_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
95     struct malloc_type *mtype)
96 {
97 	int error=0, bdom;
98 	struct sockaddr *sa;
99 	struct osockaddr *kosa;
100 	int alloclen;
101 #ifdef INET6
102 	int oldv6size;
103 	struct sockaddr_in6 *sin6;
104 #endif
105 
106 	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
107 		return (EINVAL);
108 
109 	alloclen = *osalen;
110 #ifdef INET6
111 	oldv6size = 0;
112 	/*
113 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
114 	 * if it's a v4-mapped address, so reserve the proper space
115 	 * for it.
116 	 */
117 	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
118 		alloclen = sizeof (struct sockaddr_in6);
119 		oldv6size = 1;
120 	}
121 #endif
122 
123 	MALLOC(kosa, struct osockaddr *, alloclen, mtype, M_WAITOK);
124 
125 	if ((error = copyin(osa, kosa, *osalen)))
126 		goto out;
127 
128 	bdom = linux_to_bsd_domain(kosa->sa_family);
129 	if (bdom == -1) {
130 		error = EINVAL;
131 		goto out;
132 	}
133 
134 #ifdef INET6
135 	/*
136 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
137 	 * which lacks the scope id compared with RFC2553 one. If we detect
138 	 * the situation, reject the address and write a message to system log.
139 	 *
140 	 * Still accept addresses for which the scope id is not used.
141 	 */
142 	if (oldv6size && bdom == AF_INET6) {
143 		sin6 = (struct sockaddr_in6 *)kosa;
144 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
145 		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
146 		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
147 		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
148 		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
149 		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
150 			sin6->sin6_scope_id = 0;
151 		} else {
152 			log(LOG_DEBUG,
153 			    "obsolete pre-RFC2553 sockaddr_in6 rejected\n");
154 			error = EINVAL;
155 			goto out;
156 		}
157 	} else
158 #endif
159 	if (bdom == AF_INET)
160 		alloclen = sizeof(struct sockaddr_in);
161 
162 	sa = (struct sockaddr *) kosa;
163 	sa->sa_family = bdom;
164 	sa->sa_len = alloclen;
165 
166 	*sap = sa;
167 	*osalen = alloclen;
168 	return (0);
169 
170 out:
171 	FREE(kosa, mtype);
172 	return (error);
173 }
174 
175 static int
176 linux_to_bsd_domain(int domain)
177 {
178 
179 	switch (domain) {
180 	case LINUX_AF_UNSPEC:
181 		return (AF_UNSPEC);
182 	case LINUX_AF_UNIX:
183 		return (AF_LOCAL);
184 	case LINUX_AF_INET:
185 		return (AF_INET);
186 	case LINUX_AF_INET6:
187 		return (AF_INET6);
188 	case LINUX_AF_AX25:
189 		return (AF_CCITT);
190 	case LINUX_AF_IPX:
191 		return (AF_IPX);
192 	case LINUX_AF_APPLETALK:
193 		return (AF_APPLETALK);
194 	}
195 	return (-1);
196 }
197 
198 static int
199 bsd_to_linux_domain(int domain)
200 {
201 
202 	switch (domain) {
203 	case AF_UNSPEC:
204 		return (LINUX_AF_UNSPEC);
205 	case AF_LOCAL:
206 		return (LINUX_AF_UNIX);
207 	case AF_INET:
208 		return (LINUX_AF_INET);
209 	case AF_INET6:
210 		return (LINUX_AF_INET6);
211 	case AF_CCITT:
212 		return (LINUX_AF_AX25);
213 	case AF_IPX:
214 		return (LINUX_AF_IPX);
215 	case AF_APPLETALK:
216 		return (LINUX_AF_APPLETALK);
217 	}
218 	return (-1);
219 }
220 
221 static int
222 linux_to_bsd_sockopt_level(int level)
223 {
224 
225 	switch (level) {
226 	case LINUX_SOL_SOCKET:
227 		return (SOL_SOCKET);
228 	}
229 	return (level);
230 }
231 
232 static int
233 bsd_to_linux_sockopt_level(int level)
234 {
235 
236 	switch (level) {
237 	case SOL_SOCKET:
238 		return (LINUX_SOL_SOCKET);
239 	}
240 	return (level);
241 }
242 
243 static int
244 linux_to_bsd_ip_sockopt(int opt)
245 {
246 
247 	switch (opt) {
248 	case LINUX_IP_TOS:
249 		return (IP_TOS);
250 	case LINUX_IP_TTL:
251 		return (IP_TTL);
252 	case LINUX_IP_OPTIONS:
253 		return (IP_OPTIONS);
254 	case LINUX_IP_MULTICAST_IF:
255 		return (IP_MULTICAST_IF);
256 	case LINUX_IP_MULTICAST_TTL:
257 		return (IP_MULTICAST_TTL);
258 	case LINUX_IP_MULTICAST_LOOP:
259 		return (IP_MULTICAST_LOOP);
260 	case LINUX_IP_ADD_MEMBERSHIP:
261 		return (IP_ADD_MEMBERSHIP);
262 	case LINUX_IP_DROP_MEMBERSHIP:
263 		return (IP_DROP_MEMBERSHIP);
264 	case LINUX_IP_HDRINCL:
265 		return (IP_HDRINCL);
266 	}
267 	return (-1);
268 }
269 
270 static int
271 linux_to_bsd_so_sockopt(int opt)
272 {
273 
274 	switch (opt) {
275 	case LINUX_SO_DEBUG:
276 		return (SO_DEBUG);
277 	case LINUX_SO_REUSEADDR:
278 		return (SO_REUSEADDR);
279 	case LINUX_SO_TYPE:
280 		return (SO_TYPE);
281 	case LINUX_SO_ERROR:
282 		return (SO_ERROR);
283 	case LINUX_SO_DONTROUTE:
284 		return (SO_DONTROUTE);
285 	case LINUX_SO_BROADCAST:
286 		return (SO_BROADCAST);
287 	case LINUX_SO_SNDBUF:
288 		return (SO_SNDBUF);
289 	case LINUX_SO_RCVBUF:
290 		return (SO_RCVBUF);
291 	case LINUX_SO_KEEPALIVE:
292 		return (SO_KEEPALIVE);
293 	case LINUX_SO_OOBINLINE:
294 		return (SO_OOBINLINE);
295 	case LINUX_SO_LINGER:
296 		return (SO_LINGER);
297 	case LINUX_SO_PEERCRED:
298 		return (LOCAL_PEERCRED);
299 	case LINUX_SO_RCVLOWAT:
300 		return (SO_RCVLOWAT);
301 	case LINUX_SO_SNDLOWAT:
302 		return (SO_SNDLOWAT);
303 	case LINUX_SO_RCVTIMEO:
304 		return (SO_RCVTIMEO);
305 	case LINUX_SO_SNDTIMEO:
306 		return (SO_SNDTIMEO);
307 	case LINUX_SO_TIMESTAMP:
308 		return (SO_TIMESTAMP);
309 	case LINUX_SO_ACCEPTCONN:
310 		return (SO_ACCEPTCONN);
311 	}
312 	return (-1);
313 }
314 
315 static int
316 linux_to_bsd_msg_flags(int flags)
317 {
318 	int ret_flags = 0;
319 
320 	if (flags & LINUX_MSG_OOB)
321 		ret_flags |= MSG_OOB;
322 	if (flags & LINUX_MSG_PEEK)
323 		ret_flags |= MSG_PEEK;
324 	if (flags & LINUX_MSG_DONTROUTE)
325 		ret_flags |= MSG_DONTROUTE;
326 	if (flags & LINUX_MSG_CTRUNC)
327 		ret_flags |= MSG_CTRUNC;
328 	if (flags & LINUX_MSG_TRUNC)
329 		ret_flags |= MSG_TRUNC;
330 	if (flags & LINUX_MSG_DONTWAIT)
331 		ret_flags |= MSG_DONTWAIT;
332 	if (flags & LINUX_MSG_EOR)
333 		ret_flags |= MSG_EOR;
334 	if (flags & LINUX_MSG_WAITALL)
335 		ret_flags |= MSG_WAITALL;
336 	if (flags & LINUX_MSG_NOSIGNAL)
337 		ret_flags |= MSG_NOSIGNAL;
338 #if 0 /* not handled */
339 	if (flags & LINUX_MSG_PROXY)
340 		;
341 	if (flags & LINUX_MSG_FIN)
342 		;
343 	if (flags & LINUX_MSG_SYN)
344 		;
345 	if (flags & LINUX_MSG_CONFIRM)
346 		;
347 	if (flags & LINUX_MSG_RST)
348 		;
349 	if (flags & LINUX_MSG_ERRQUEUE)
350 		;
351 #endif
352 	return ret_flags;
353 }
354 
355 /*
356 * If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the
357 * native syscall will fault.  Thus, we don't really need to check the
358 * return values for these functions.
359 */
360 
361 static int
362 bsd_to_linux_sockaddr(struct sockaddr *arg)
363 {
364 	struct sockaddr sa;
365 	size_t sa_len = sizeof(struct sockaddr);
366 	int error;
367 
368 	if ((error = copyin(arg, &sa, sa_len)))
369 		return (error);
370 
371 	*(u_short *)&sa = sa.sa_family;
372 
373 	error = copyout(&sa, arg, sa_len);
374 
375 	return (error);
376 }
377 
378 static int
379 linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
380 {
381 	struct sockaddr sa;
382 	size_t sa_len = sizeof(struct sockaddr);
383 	int error;
384 
385 	if ((error = copyin(arg, &sa, sa_len)))
386 		return (error);
387 
388 	sa.sa_family = *(sa_family_t *)&sa;
389 	sa.sa_len = len;
390 
391 	error = copyout(&sa, arg, sa_len);
392 
393 	return (error);
394 }
395 
396 
397 static int
398 linux_sa_put(struct osockaddr *osa)
399 {
400 	struct osockaddr sa;
401 	int error, bdom;
402 
403 	/*
404 	 * Only read/write the osockaddr family part, the rest is
405 	 * not changed.
406 	 */
407 	error = copyin(osa, &sa, sizeof(sa.sa_family));
408 	if (error)
409 		return (error);
410 
411 	bdom = bsd_to_linux_domain(sa.sa_family);
412 	if (bdom == -1)
413 		return (EINVAL);
414 
415 	sa.sa_family = bdom;
416 	error = copyout(&sa, osa, sizeof(sa.sa_family));
417 	if (error)
418 		return (error);
419 
420 	return (0);
421 }
422 
423 static int
424 linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
425     enum uio_seg segflg)
426 {
427 	struct mbuf *control;
428 	struct sockaddr *to;
429 	int error;
430 
431 	if (mp->msg_name != NULL) {
432 		error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen);
433 		if (error)
434 			return (error);
435 		mp->msg_name = to;
436 	} else
437 		to = NULL;
438 
439 	if (mp->msg_control != NULL) {
440 		struct cmsghdr *cmsg;
441 
442 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
443 			error = EINVAL;
444 			goto bad;
445 		}
446 		error = sockargs(&control, mp->msg_control,
447 		    mp->msg_controllen, MT_CONTROL);
448 		if (error)
449 			goto bad;
450 
451 		cmsg = mtod(control, struct cmsghdr *);
452 		cmsg->cmsg_level = linux_to_bsd_sockopt_level(cmsg->cmsg_level);
453 	} else
454 		control = NULL;
455 
456 	error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control,
457 	    segflg);
458 
459 bad:
460 	if (to)
461 		FREE(to, M_SONAME);
462 	return (error);
463 }
464 
465 /* Return 0 if IP_HDRINCL is set for the given socket. */
466 static int
467 linux_check_hdrincl(struct thread *td, int s)
468 {
469 	int error, optval, size_val;
470 
471 	size_val = sizeof(optval);
472 	error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL,
473 	    &optval, UIO_SYSSPACE, &size_val);
474 	if (error)
475 		return (error);
476 
477 	return (optval == 0);
478 }
479 
480 struct linux_sendto_args {
481 	int s;
482 	l_uintptr_t msg;
483 	int len;
484 	int flags;
485 	l_uintptr_t to;
486 	int tolen;
487 };
488 
489 /*
490  * Updated sendto() when IP_HDRINCL is set:
491  * tweak endian-dependent fields in the IP packet.
492  */
493 static int
494 linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
495 {
496 /*
497  * linux_ip_copysize defines how many bytes we should copy
498  * from the beginning of the IP packet before we customize it for BSD.
499  * It should include all the fields we modify (ip_len and ip_off).
500  */
501 #define linux_ip_copysize	8
502 
503 	struct ip *packet;
504 	struct msghdr msg;
505 	struct iovec aiov[1];
506 	int error;
507 
508 	/* Check that the packet isn't too big or too small. */
509 	if (linux_args->len < linux_ip_copysize ||
510 	    linux_args->len > IP_MAXPACKET)
511 		return (EINVAL);
512 
513 	packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
514 
515 	/* Make kernel copy of the packet to be sent */
516 	if ((error = copyin(PTRIN(linux_args->msg), packet,
517 	    linux_args->len)))
518 		goto goout;
519 
520 	/* Convert fields from Linux to BSD raw IP socket format */
521 	packet->ip_len = linux_args->len;
522 	packet->ip_off = ntohs(packet->ip_off);
523 
524 	/* Prepare the msghdr and iovec structures describing the new packet */
525 	msg.msg_name = PTRIN(linux_args->to);
526 	msg.msg_namelen = linux_args->tolen;
527 	msg.msg_iov = aiov;
528 	msg.msg_iovlen = 1;
529 	msg.msg_control = NULL;
530 	msg.msg_flags = 0;
531 	aiov[0].iov_base = (char *)packet;
532 	aiov[0].iov_len = linux_args->len;
533 	error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
534 	    UIO_SYSSPACE);
535 goout:
536 	free(packet, M_TEMP);
537 	return (error);
538 }
539 
540 struct linux_socket_args {
541 	int domain;
542 	int type;
543 	int protocol;
544 };
545 
546 static int
547 linux_socket(struct thread *td, struct linux_socket_args *args)
548 {
549 	struct socket_args /* {
550 		int domain;
551 		int type;
552 		int protocol;
553 	} */ bsd_args;
554 	int retval_socket;
555 
556 	bsd_args.protocol = args->protocol;
557 	bsd_args.type = args->type;
558 	bsd_args.domain = linux_to_bsd_domain(args->domain);
559 	if (bsd_args.domain == -1)
560 		return (EINVAL);
561 
562 	retval_socket = socket(td, &bsd_args);
563 	if (bsd_args.type == SOCK_RAW
564 	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
565 	    && bsd_args.domain == AF_INET
566 	    && retval_socket >= 0) {
567 		/* It's a raw IP socket: set the IP_HDRINCL option. */
568 		int hdrincl;
569 
570 		hdrincl = 1;
571 		/* We ignore any error returned by kern_setsockopt() */
572 		kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL,
573 		    &hdrincl, UIO_SYSSPACE, sizeof(hdrincl));
574 	}
575 #ifdef INET6
576 	/*
577 	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
578 	 * default and some apps depend on this. So, set V6ONLY to 0
579 	 * for Linux apps if the sysctl value is set to 1.
580 	 */
581 	if (bsd_args.domain == PF_INET6 && retval_socket >= 0
582 #ifndef KLD_MODULE
583 	    /*
584 	     * XXX: Avoid undefined symbol error with an IPv4 only
585 	     * kernel.
586 	     */
587 	    && V_ip6_v6only
588 #endif
589 	    ) {
590 		int v6only;
591 
592 		v6only = 0;
593 		/* We ignore any error returned by setsockopt() */
594 		kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
595 		    &v6only, UIO_SYSSPACE, sizeof(v6only));
596 	}
597 #endif
598 
599 	return (retval_socket);
600 }
601 
602 struct linux_bind_args {
603 	int s;
604 	l_uintptr_t name;
605 	int namelen;
606 };
607 
608 static int
609 linux_bind(struct thread *td, struct linux_bind_args *args)
610 {
611 	struct sockaddr *sa;
612 	int error;
613 
614 	error = linux_getsockaddr(&sa, PTRIN(args->name),
615 	    args->namelen);
616 	if (error)
617 		return (error);
618 
619 	error = kern_bind(td, args->s, sa);
620 	free(sa, M_SONAME);
621 	if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in))
622 	   	return (EINVAL);
623 	return (error);
624 }
625 
626 struct linux_connect_args {
627 	int s;
628 	l_uintptr_t name;
629 	int namelen;
630 };
631 int linux_connect(struct thread *, struct linux_connect_args *);
632 
633 int
634 linux_connect(struct thread *td, struct linux_connect_args *args)
635 {
636 	struct socket *so;
637 	struct sockaddr *sa;
638 	u_int fflag;
639 	int error;
640 
641 	error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name),
642 	    args->namelen);
643 	if (error)
644 		return (error);
645 
646 	error = kern_connect(td, args->s, sa);
647 	free(sa, M_SONAME);
648 	if (error != EISCONN)
649 		return (error);
650 
651 	/*
652 	 * Linux doesn't return EISCONN the first time it occurs,
653 	 * when on a non-blocking socket. Instead it returns the
654 	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
655 	 *
656 	 * XXXRW: Instead of using fgetsock(), check that it is a
657 	 * socket and use the file descriptor reference instead of
658 	 * creating a new one.
659 	 */
660 	error = fgetsock(td, args->s, &so, &fflag);
661 	if (error == 0) {
662 		error = EISCONN;
663 		if (fflag & FNONBLOCK) {
664 			SOCK_LOCK(so);
665 			if (so->so_emuldata == 0)
666 				error = so->so_error;
667 			so->so_emuldata = (void *)1;
668 			SOCK_UNLOCK(so);
669 		}
670 		fputsock(so);
671 	}
672 	return (error);
673 }
674 
675 struct linux_listen_args {
676 	int s;
677 	int backlog;
678 };
679 
680 static int
681 linux_listen(struct thread *td, struct linux_listen_args *args)
682 {
683 	struct listen_args /* {
684 		int s;
685 		int backlog;
686 	} */ bsd_args;
687 
688 	bsd_args.s = args->s;
689 	bsd_args.backlog = args->backlog;
690 	return (listen(td, &bsd_args));
691 }
692 
693 struct linux_accept_args {
694 	int s;
695 	l_uintptr_t addr;
696 	l_uintptr_t namelen;
697 };
698 
699 static int
700 linux_accept(struct thread *td, struct linux_accept_args *args)
701 {
702 	struct accept_args /* {
703 		int	s;
704 		struct sockaddr * __restrict name;
705 		socklen_t * __restrict anamelen;
706 	} */ bsd_args;
707 	int error, fd;
708 
709 	bsd_args.s = args->s;
710 	/* XXX: */
711 	bsd_args.name = (struct sockaddr * __restrict)PTRIN(args->addr);
712 	bsd_args.anamelen = PTRIN(args->namelen);/* XXX */
713 	error = accept(td, &bsd_args);
714 	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
715 	if (error) {
716 		if (error == EFAULT && args->namelen != sizeof(struct sockaddr_in))
717 			return (EINVAL);
718 		return (error);
719 	}
720 	if (args->addr) {
721 		error = linux_sa_put(PTRIN(args->addr));
722 		if (error) {
723 			(void)kern_close(td, td->td_retval[0]);
724 			return (error);
725 		}
726 	}
727 
728 	/*
729 	 * linux appears not to copy flags from the parent socket to the
730 	 * accepted one, so we must clear the flags in the new descriptor.
731 	 * Ignore any errors, because we already have an open fd.
732 	 */
733 	fd = td->td_retval[0];
734 	(void)kern_fcntl(td, fd, F_SETFL, 0);
735 	td->td_retval[0] = fd;
736 	return (0);
737 }
738 
739 struct linux_getsockname_args {
740 	int s;
741 	l_uintptr_t addr;
742 	l_uintptr_t namelen;
743 };
744 
745 static int
746 linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
747 {
748 	struct getsockname_args /* {
749 		int	fdes;
750 		struct sockaddr * __restrict asa;
751 		socklen_t * __restrict alen;
752 	} */ bsd_args;
753 	int error;
754 
755 	bsd_args.fdes = args->s;
756 	/* XXX: */
757 	bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr);
758 	bsd_args.alen = PTRIN(args->namelen);	/* XXX */
759 	error = getsockname(td, &bsd_args);
760 	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
761 	if (error)
762 		return (error);
763 	error = linux_sa_put(PTRIN(args->addr));
764 	if (error)
765 		return (error);
766 	return (0);
767 }
768 
769 struct linux_getpeername_args {
770 	int s;
771 	l_uintptr_t addr;
772 	l_uintptr_t namelen;
773 };
774 
775 static int
776 linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
777 {
778 	struct getpeername_args /* {
779 		int fdes;
780 		caddr_t asa;
781 		int *alen;
782 	} */ bsd_args;
783 	int error;
784 
785 	bsd_args.fdes = args->s;
786 	bsd_args.asa = (struct sockaddr *)PTRIN(args->addr);
787 	bsd_args.alen = (int *)PTRIN(args->namelen);
788 	error = getpeername(td, &bsd_args);
789 	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa);
790 	if (error)
791 		return (error);
792 	error = linux_sa_put(PTRIN(args->addr));
793 	if (error)
794 		return (error);
795 	return (0);
796 }
797 
798 struct linux_socketpair_args {
799 	int domain;
800 	int type;
801 	int protocol;
802 	l_uintptr_t rsv;
803 };
804 
805 static int
806 linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
807 {
808 	struct socketpair_args /* {
809 		int domain;
810 		int type;
811 		int protocol;
812 		int *rsv;
813 	} */ bsd_args;
814 
815 	bsd_args.domain = linux_to_bsd_domain(args->domain);
816 	if (bsd_args.domain == -1)
817 		return (EINVAL);
818 
819 	bsd_args.type = args->type;
820 	bsd_args.protocol = args->protocol;
821 	bsd_args.rsv = (int *)PTRIN(args->rsv);
822 	return (socketpair(td, &bsd_args));
823 }
824 
825 struct linux_send_args {
826 	int s;
827 	l_uintptr_t msg;
828 	int len;
829 	int flags;
830 };
831 
832 static int
833 linux_send(struct thread *td, struct linux_send_args *args)
834 {
835 	struct sendto_args /* {
836 		int s;
837 		caddr_t buf;
838 		int len;
839 		int flags;
840 		caddr_t to;
841 		int tolen;
842 	} */ bsd_args;
843 
844 	bsd_args.s = args->s;
845 	bsd_args.buf = (caddr_t)PTRIN(args->msg);
846 	bsd_args.len = args->len;
847 	bsd_args.flags = args->flags;
848 	bsd_args.to = NULL;
849 	bsd_args.tolen = 0;
850 	return sendto(td, &bsd_args);
851 }
852 
853 struct linux_recv_args {
854 	int s;
855 	l_uintptr_t msg;
856 	int len;
857 	int flags;
858 };
859 
860 static int
861 linux_recv(struct thread *td, struct linux_recv_args *args)
862 {
863 	struct recvfrom_args /* {
864 		int s;
865 		caddr_t buf;
866 		int len;
867 		int flags;
868 		struct sockaddr *from;
869 		socklen_t fromlenaddr;
870 	} */ bsd_args;
871 
872 	bsd_args.s = args->s;
873 	bsd_args.buf = (caddr_t)PTRIN(args->msg);
874 	bsd_args.len = args->len;
875 	bsd_args.flags = args->flags;
876 	bsd_args.from = NULL;
877 	bsd_args.fromlenaddr = 0;
878 	return (recvfrom(td, &bsd_args));
879 }
880 
881 static int
882 linux_sendto(struct thread *td, struct linux_sendto_args *args)
883 {
884 	struct msghdr msg;
885 	struct iovec aiov;
886 	int error;
887 
888 	if (linux_check_hdrincl(td, args->s) == 0)
889 		/* IP_HDRINCL set, tweak the packet before sending */
890 		return (linux_sendto_hdrincl(td, args));
891 
892 	msg.msg_name = PTRIN(args->to);
893 	msg.msg_namelen = args->tolen;
894 	msg.msg_iov = &aiov;
895 	msg.msg_iovlen = 1;
896 	msg.msg_control = NULL;
897 	msg.msg_flags = 0;
898 	aiov.iov_base = PTRIN(args->msg);
899 	aiov.iov_len = args->len;
900 	error = linux_sendit(td, args->s, &msg, args->flags, UIO_USERSPACE);
901 	return (error);
902 }
903 
904 struct linux_recvfrom_args {
905 	int s;
906 	l_uintptr_t buf;
907 	int len;
908 	int flags;
909 	l_uintptr_t from;
910 	l_uintptr_t fromlen;
911 };
912 
913 static int
914 linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
915 {
916 	struct recvfrom_args /* {
917 		int	s;
918 		caddr_t	buf;
919 		size_t	len;
920 		int	flags;
921 		struct sockaddr * __restrict from;
922 		socklen_t * __restrict fromlenaddr;
923 	} */ bsd_args;
924 	size_t len;
925 	int error;
926 
927 	if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
928 		return (error);
929 
930 	bsd_args.s = args->s;
931 	bsd_args.buf = PTRIN(args->buf);
932 	bsd_args.len = args->len;
933 	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
934 	/* XXX: */
935 	bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
936 	bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
937 
938 	linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
939 	error = recvfrom(td, &bsd_args);
940 	bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
941 
942 	if (error)
943 		return (error);
944 	if (args->from) {
945 		error = linux_sa_put((struct osockaddr *)
946 		    PTRIN(args->from));
947 		if (error)
948 			return (error);
949 	}
950 	return (0);
951 }
952 
953 struct linux_sendmsg_args {
954 	int s;
955 	l_uintptr_t msg;
956 	int flags;
957 };
958 
959 static int
960 linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
961 {
962 	struct msghdr msg;
963 	struct iovec *iov;
964 	int error;
965 
966 	/* XXXTJR sendmsg is broken on amd64 */
967 
968 	error = copyin(PTRIN(args->msg), &msg, sizeof(msg));
969 	if (error)
970 		return (error);
971 
972 	/*
973 	 * Some Linux applications (ping) define a non-NULL control data
974 	 * pointer, but a msg_controllen of 0, which is not allowed in the
975 	 * FreeBSD system call interface.  NULL the msg_control pointer in
976 	 * order to handle this case.  This should be checked, but allows the
977 	 * Linux ping to work.
978 	 */
979 	if (msg.msg_control != NULL && msg.msg_controllen == 0)
980 		msg.msg_control = NULL;
981 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
982 	if (error)
983 		return (error);
984 	msg.msg_iov = iov;
985 	msg.msg_flags = 0;
986 	error = linux_sendit(td, args->s, &msg, args->flags, UIO_USERSPACE);
987 	free(iov, M_IOV);
988 	return (error);
989 }
990 
991 struct linux_recvmsg_args {
992 	int s;
993 	l_uintptr_t msg;
994 	int flags;
995 };
996 
997 static int
998 linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
999 {
1000 	struct recvmsg_args /* {
1001 		int	s;
1002 		struct	msghdr *msg;
1003 		int	flags;
1004 	} */ bsd_args;
1005 	struct msghdr msg;
1006 	struct cmsghdr *cmsg;
1007 	int error;
1008 
1009 	/* XXXTJR recvmsg is broken on amd64 */
1010 
1011 	if ((error = copyin(PTRIN(args->msg), &msg, sizeof (msg))))
1012 		return (error);
1013 
1014 	bsd_args.s = args->s;
1015 	bsd_args.msg = PTRIN(args->msg);
1016 	bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
1017 	if (msg.msg_name) {
1018 	   	linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
1019 		      msg.msg_namelen);
1020 		error = recvmsg(td, &bsd_args);
1021 		bsd_to_linux_sockaddr((struct sockaddr *)msg.msg_name);
1022 	} else
1023 	   	error = recvmsg(td, &bsd_args);
1024 	if (error)
1025 		return (error);
1026 
1027 	if (bsd_args.msg->msg_control != NULL &&
1028 	    bsd_args.msg->msg_controllen > 0) {
1029 		cmsg = (struct cmsghdr*)bsd_args.msg->msg_control;
1030 		cmsg->cmsg_level = bsd_to_linux_sockopt_level(cmsg->cmsg_level);
1031 	}
1032 
1033 	error = copyin(PTRIN(args->msg), &msg, sizeof(msg));
1034 	if (error)
1035 		return (error);
1036 	if (msg.msg_name && msg.msg_namelen > 2)
1037 		error = linux_sa_put(msg.msg_name);
1038 	return (error);
1039 }
1040 
1041 struct linux_shutdown_args {
1042 	int s;
1043 	int how;
1044 };
1045 
1046 static int
1047 linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1048 {
1049 	struct shutdown_args /* {
1050 		int s;
1051 		int how;
1052 	} */ bsd_args;
1053 
1054 	bsd_args.s = args->s;
1055 	bsd_args.how = args->how;
1056 	return (shutdown(td, &bsd_args));
1057 }
1058 
1059 struct linux_setsockopt_args {
1060 	int s;
1061 	int level;
1062 	int optname;
1063 	l_uintptr_t optval;
1064 	int optlen;
1065 };
1066 
1067 static int
1068 linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1069 {
1070 	struct setsockopt_args /* {
1071 		int s;
1072 		int level;
1073 		int name;
1074 		caddr_t val;
1075 		int valsize;
1076 	} */ bsd_args;
1077 	int error, name;
1078 
1079 	bsd_args.s = args->s;
1080 	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1081 	switch (bsd_args.level) {
1082 	case SOL_SOCKET:
1083 		name = linux_to_bsd_so_sockopt(args->optname);
1084 		break;
1085 	case IPPROTO_IP:
1086 		name = linux_to_bsd_ip_sockopt(args->optname);
1087 		break;
1088 	case IPPROTO_TCP:
1089 		/* Linux TCP option values match BSD's */
1090 		name = args->optname;
1091 		break;
1092 	default:
1093 		name = -1;
1094 		break;
1095 	}
1096 	if (name == -1)
1097 		return (ENOPROTOOPT);
1098 
1099 	bsd_args.name = name;
1100 	bsd_args.val = PTRIN(args->optval);
1101 	bsd_args.valsize = args->optlen;
1102 
1103 	if (name == IPV6_NEXTHOP) {
1104 		linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val,
1105 			bsd_args.valsize);
1106 		error = setsockopt(td, &bsd_args);
1107 		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1108 	} else
1109 		error = setsockopt(td, &bsd_args);
1110 
1111 	return (error);
1112 }
1113 
1114 struct linux_getsockopt_args {
1115 	int s;
1116 	int level;
1117 	int optname;
1118 	l_uintptr_t optval;
1119 	l_uintptr_t optlen;
1120 };
1121 
1122 static int
1123 linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1124 {
1125 	struct getsockopt_args /* {
1126 		int s;
1127 		int level;
1128 		int name;
1129 		caddr_t val;
1130 		int *avalsize;
1131 	} */ bsd_args;
1132 	int error, name;
1133 
1134 	bsd_args.s = args->s;
1135 	bsd_args.level = linux_to_bsd_sockopt_level(args->level);
1136 	switch (bsd_args.level) {
1137 	case SOL_SOCKET:
1138 		name = linux_to_bsd_so_sockopt(args->optname);
1139 		break;
1140 	case IPPROTO_IP:
1141 		name = linux_to_bsd_ip_sockopt(args->optname);
1142 		break;
1143 	case IPPROTO_TCP:
1144 		/* Linux TCP option values match BSD's */
1145 		name = args->optname;
1146 		break;
1147 	default:
1148 		name = -1;
1149 		break;
1150 	}
1151 	if (name == -1)
1152 		return (EINVAL);
1153 
1154 	bsd_args.name = name;
1155 	bsd_args.val = PTRIN(args->optval);
1156 	bsd_args.avalsize = PTRIN(args->optlen);
1157 
1158 	if (name == IPV6_NEXTHOP) {
1159 		error = getsockopt(td, &bsd_args);
1160 		bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val);
1161 	} else
1162 		error = getsockopt(td, &bsd_args);
1163 
1164 	return (error);
1165 }
1166 
1167 int
1168 linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1169 {
1170 	void *arg = (void *)(intptr_t)args->args;
1171 
1172 	switch (args->what) {
1173 	case LINUX_SOCKET:
1174 		return (linux_socket(td, arg));
1175 	case LINUX_BIND:
1176 		return (linux_bind(td, arg));
1177 	case LINUX_CONNECT:
1178 		return (linux_connect(td, arg));
1179 	case LINUX_LISTEN:
1180 		return (linux_listen(td, arg));
1181 	case LINUX_ACCEPT:
1182 		return (linux_accept(td, arg));
1183 	case LINUX_GETSOCKNAME:
1184 		return (linux_getsockname(td, arg));
1185 	case LINUX_GETPEERNAME:
1186 		return (linux_getpeername(td, arg));
1187 	case LINUX_SOCKETPAIR:
1188 		return (linux_socketpair(td, arg));
1189 	case LINUX_SEND:
1190 		return (linux_send(td, arg));
1191 	case LINUX_RECV:
1192 		return (linux_recv(td, arg));
1193 	case LINUX_SENDTO:
1194 		return (linux_sendto(td, arg));
1195 	case LINUX_RECVFROM:
1196 		return (linux_recvfrom(td, arg));
1197 	case LINUX_SHUTDOWN:
1198 		return (linux_shutdown(td, arg));
1199 	case LINUX_SETSOCKOPT:
1200 		return (linux_setsockopt(td, arg));
1201 	case LINUX_GETSOCKOPT:
1202 		return (linux_getsockopt(td, arg));
1203 	case LINUX_SENDMSG:
1204 		return (linux_sendmsg(td, arg));
1205 	case LINUX_RECVMSG:
1206 		return (linux_recvmsg(td, arg));
1207 	}
1208 
1209 	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1210 	return (ENOSYS);
1211 }
1212