xref: /freebsd/sys/compat/linux/linux_socket.c (revision 538015aa3b16b2819401ed2b6cebdd459affc7d4)
1 /*-
2  * Copyright (c) 1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 #include "opt_inet6.h"
34 
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/proc.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/fcntl.h>
44 #include <sys/file.h>
45 #include <sys/malloc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/uio.h>
50 #include <sys/syslog.h>
51 
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/ip.h>
55 #ifdef INET6
56 #include <netinet/ip6.h>
57 #include <netinet6/ip6_var.h>
58 #endif
59 
60 #include <machine/../linux/linux.h>
61 #include <machine/../linux/linux_proto.h>
62 #include <compat/linux/linux_socket.h>
63 #include <compat/linux/linux_util.h>
64 
65 #include <machine/limits.h>
66 
67 static int do_sa_get(struct sockaddr **, const struct osockaddr *, int *,
68     struct malloc_type *);
69 static int linux_to_bsd_domain(int);
70 
71 /*
72  * Reads a linux sockaddr and does any necessary translation.
73  * Linux sockaddrs don't have a length field, only a family.
74  */
75 static int
76 linux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int len)
77 {
78 	int osalen = len;
79 
80 	return (do_sa_get(sap, osa, &osalen, M_SONAME));
81 }
82 
83 /*
84  * Copy the osockaddr structure pointed to by osa to kernel, adjust
85  * family and convert to sockaddr.
86  */
87 static int
88 do_sa_get(struct sockaddr **sap, const struct osockaddr *osa, int *osalen,
89     struct malloc_type *mtype)
90 {
91 	int error=0, bdom;
92 	struct sockaddr *sa;
93 	struct osockaddr *kosa;
94 	int alloclen;
95 #ifdef INET6
96 	int oldv6size;
97 	struct sockaddr_in6 *sin6;
98 #endif
99 
100 	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa)
101 		return (EINVAL);
102 
103 	alloclen = *osalen;
104 #ifdef INET6
105 	oldv6size = 0;
106 	/*
107 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
108 	 * if it's a v4-mapped address, so reserve the proper space
109 	 * for it.
110 	 */
111 	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
112 		alloclen = sizeof (struct sockaddr_in6);
113 		oldv6size = 1;
114 	}
115 #endif
116 
117 	MALLOC(kosa, struct osockaddr *, alloclen, mtype, 0);
118 
119 	if ((error = copyin(osa, (caddr_t) kosa, *osalen)))
120 		goto out;
121 
122 	bdom = linux_to_bsd_domain(kosa->sa_family);
123 	if (bdom == -1) {
124 		error = EINVAL;
125 		goto out;
126 	}
127 
128 #ifdef INET6
129 	/*
130 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
131 	 * which lacks the scope id compared with RFC2553 one. If we detect
132 	 * the situation, reject the address and write a message to system log.
133 	 *
134 	 * Still accept addresses for which the scope id is not used.
135 	 */
136 	if (oldv6size && bdom == AF_INET6) {
137 		sin6 = (struct sockaddr_in6 *)kosa;
138 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
139 		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
140 		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
141 		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
142 		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
143 		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
144 			sin6->sin6_scope_id = 0;
145 		} else {
146 			log(LOG_DEBUG,
147 			    "obsolete pre-RFC2553 sockaddr_in6 rejected");
148 			error = EINVAL;
149 			goto out;
150 		}
151 	} else
152 #endif
153 	if (bdom == AF_INET)
154 		alloclen = sizeof(struct sockaddr_in);
155 
156 	sa = (struct sockaddr *) kosa;
157 	sa->sa_family = bdom;
158 	sa->sa_len = alloclen;
159 
160 	*sap = sa;
161 	*osalen = alloclen;
162 	return (0);
163 
164 out:
165 	FREE(kosa, mtype);
166 	return (error);
167 }
168 
169 static int
170 linux_to_bsd_domain(int domain)
171 {
172 
173 	switch (domain) {
174 	case LINUX_AF_UNSPEC:
175 		return (AF_UNSPEC);
176 	case LINUX_AF_UNIX:
177 		return (AF_LOCAL);
178 	case LINUX_AF_INET:
179 		return (AF_INET);
180 	case LINUX_AF_INET6:
181 		return (AF_INET6);
182 	case LINUX_AF_AX25:
183 		return (AF_CCITT);
184 	case LINUX_AF_IPX:
185 		return (AF_IPX);
186 	case LINUX_AF_APPLETALK:
187 		return (AF_APPLETALK);
188 	}
189 	return (-1);
190 }
191 
192 #ifndef __alpha__
193 static int
194 bsd_to_linux_domain(int domain)
195 {
196 
197 	switch (domain) {
198 	case AF_UNSPEC:
199 		return (LINUX_AF_UNSPEC);
200 	case AF_LOCAL:
201 		return (LINUX_AF_UNIX);
202 	case AF_INET:
203 		return (LINUX_AF_INET);
204 	case AF_INET6:
205 		return (LINUX_AF_INET6);
206 	case AF_CCITT:
207 		return (LINUX_AF_AX25);
208 	case AF_IPX:
209 		return (LINUX_AF_IPX);
210 	case AF_APPLETALK:
211 		return (LINUX_AF_APPLETALK);
212 	}
213 	return (-1);
214 }
215 
216 static int
217 linux_to_bsd_sockopt_level(int level)
218 {
219 
220 	switch (level) {
221 	case LINUX_SOL_SOCKET:
222 		return (SOL_SOCKET);
223 	}
224 	return (level);
225 }
226 
227 static int
228 linux_to_bsd_ip_sockopt(int opt)
229 {
230 
231 	switch (opt) {
232 	case LINUX_IP_TOS:
233 		return (IP_TOS);
234 	case LINUX_IP_TTL:
235 		return (IP_TTL);
236 	case LINUX_IP_OPTIONS:
237 		return (IP_OPTIONS);
238 	case LINUX_IP_MULTICAST_IF:
239 		return (IP_MULTICAST_IF);
240 	case LINUX_IP_MULTICAST_TTL:
241 		return (IP_MULTICAST_TTL);
242 	case LINUX_IP_MULTICAST_LOOP:
243 		return (IP_MULTICAST_LOOP);
244 	case LINUX_IP_ADD_MEMBERSHIP:
245 		return (IP_ADD_MEMBERSHIP);
246 	case LINUX_IP_DROP_MEMBERSHIP:
247 		return (IP_DROP_MEMBERSHIP);
248 	case LINUX_IP_HDRINCL:
249 		return (IP_HDRINCL);
250 	}
251 	return (-1);
252 }
253 
254 static int
255 linux_to_bsd_so_sockopt(int opt)
256 {
257 
258 	switch (opt) {
259 	case LINUX_SO_DEBUG:
260 		return (SO_DEBUG);
261 	case LINUX_SO_REUSEADDR:
262 		return (SO_REUSEADDR);
263 	case LINUX_SO_TYPE:
264 		return (SO_TYPE);
265 	case LINUX_SO_ERROR:
266 		return (SO_ERROR);
267 	case LINUX_SO_DONTROUTE:
268 		return (SO_DONTROUTE);
269 	case LINUX_SO_BROADCAST:
270 		return (SO_BROADCAST);
271 	case LINUX_SO_SNDBUF:
272 		return (SO_SNDBUF);
273 	case LINUX_SO_RCVBUF:
274 		return (SO_RCVBUF);
275 	case LINUX_SO_KEEPALIVE:
276 		return (SO_KEEPALIVE);
277 	case LINUX_SO_OOBINLINE:
278 		return (SO_OOBINLINE);
279 	case LINUX_SO_LINGER:
280 		return (SO_LINGER);
281 	}
282 	return (-1);
283 }
284 
285 static int
286 linux_to_bsd_msg_flags(int flags)
287 {
288 	int ret_flags = 0;
289 
290 	if (flags & LINUX_MSG_OOB)
291 		ret_flags |= MSG_OOB;
292 	if (flags & LINUX_MSG_PEEK)
293 		ret_flags |= MSG_PEEK;
294 	if (flags & LINUX_MSG_DONTROUTE)
295 		ret_flags |= MSG_DONTROUTE;
296 	if (flags & LINUX_MSG_CTRUNC)
297 		ret_flags |= MSG_CTRUNC;
298 	if (flags & LINUX_MSG_TRUNC)
299 		ret_flags |= MSG_TRUNC;
300 	if (flags & LINUX_MSG_DONTWAIT)
301 		ret_flags |= MSG_DONTWAIT;
302 	if (flags & LINUX_MSG_EOR)
303 		ret_flags |= MSG_EOR;
304 	if (flags & LINUX_MSG_WAITALL)
305 		ret_flags |= MSG_WAITALL;
306 #if 0 /* not handled */
307 	if (flags & LINUX_MSG_PROXY)
308 		;
309 	if (flags & LINUX_MSG_FIN)
310 		;
311 	if (flags & LINUX_MSG_SYN)
312 		;
313 	if (flags & LINUX_MSG_CONFIRM)
314 		;
315 	if (flags & LINUX_MSG_RST)
316 		;
317 	if (flags & LINUX_MSG_ERRQUEUE)
318 		;
319 	if (flags & LINUX_MSG_NOSIGNAL)
320 		;
321 #endif
322 	return ret_flags;
323 }
324 
325 /*
326  * Allocate stackgap and put the converted sockaddr structure
327  * there, address on stackgap returned in sap.
328  */
329 static int
330 linux_sa_get(caddr_t *sgp, struct sockaddr **sap,
331 	     const struct osockaddr *osa, int *osalen)
332 {
333 	struct sockaddr *sa, *usa;
334 	int alloclen, error;
335 
336 	alloclen = *osalen;
337 	error = do_sa_get(&sa, osa, &alloclen, M_TEMP);
338 	if (error)
339 		return (error);
340 
341 	usa = (struct sockaddr *) stackgap_alloc(sgp, alloclen);
342 	if (!usa) {
343 		error = ENOMEM;
344 		goto out;
345 	}
346 
347 	if ((error = copyout(sa, usa, alloclen)))
348 		goto out;
349 
350 	*sap = usa;
351 	*osalen = alloclen;
352 
353 out:
354 	FREE(sa, M_TEMP);
355 	return (error);
356 }
357 
358 static int
359 linux_sa_put(struct osockaddr *osa)
360 {
361 	struct osockaddr sa;
362 	int error, bdom;
363 
364 	/*
365 	 * Only read/write the osockaddr family part, the rest is
366 	 * not changed.
367 	 */
368 	error = copyin((caddr_t) osa, (caddr_t) &sa, sizeof(sa.sa_family));
369 	if (error)
370 		return (error);
371 
372 	bdom = bsd_to_linux_domain(sa.sa_family);
373 	if (bdom == -1)
374 		return (EINVAL);
375 
376 	sa.sa_family = bdom;
377 	error = copyout(&sa, osa, sizeof(sa.sa_family));
378 	if (error)
379 		return (error);
380 
381 	return (0);
382 }
383 
384 /* Return 0 if IP_HDRINCL is set for the given socket. */
385 static int
386 linux_check_hdrincl(struct thread *td, int s)
387 {
388 	struct getsockopt_args /* {
389 		int s;
390 		int level;
391 		int name;
392 		caddr_t val;
393 		int *avalsize;
394 	} */ bsd_args;
395 	int error;
396 	caddr_t sg, val, valsize;
397 	int size_val = sizeof val;
398 	int optval;
399 
400 	sg = stackgap_init();
401 	val = stackgap_alloc(&sg, sizeof(int));
402 	valsize = stackgap_alloc(&sg, sizeof(int));
403 
404 	if ((error = copyout(&size_val, valsize, sizeof(size_val))))
405 		return (error);
406 
407 	bsd_args.s = s;
408 	bsd_args.level = IPPROTO_IP;
409 	bsd_args.name = IP_HDRINCL;
410 	bsd_args.val = val;
411 	bsd_args.avalsize = (int *)valsize;
412 	if ((error = getsockopt(td, &bsd_args)))
413 		return (error);
414 
415 	if ((error = copyin(val, &optval, sizeof(optval))))
416 		return (error);
417 
418 	return (optval == 0);
419 }
420 
421 /*
422  * Updated sendto() when IP_HDRINCL is set:
423  * tweak endian-dependent fields in the IP packet.
424  */
425 static int
426 linux_sendto_hdrincl(struct thread *td, struct sendto_args *bsd_args)
427 {
428 /*
429  * linux_ip_copysize defines how many bytes we should copy
430  * from the beginning of the IP packet before we customize it for BSD.
431  * It should include all the fields we modify (ip_len and ip_off)
432  * and be as small as possible to minimize copying overhead.
433  */
434 #define linux_ip_copysize	8
435 
436 	caddr_t sg;
437 	struct ip *packet;
438 	struct msghdr *msg;
439 	struct iovec *iov;
440 
441 	int error;
442 	struct  sendmsg_args /* {
443 		int s;
444 		caddr_t msg;
445 		int flags;
446 	} */ sendmsg_args;
447 
448 	/* Check the packet isn't too small before we mess with it */
449 	if (bsd_args->len < linux_ip_copysize)
450 		return (EINVAL);
451 
452 	/*
453 	 * Tweaking the user buffer in place would be bad manners.
454 	 * We create a corrected IP header with just the needed length,
455 	 * then use an iovec to glue it to the rest of the user packet
456 	 * when calling sendmsg().
457 	 */
458 	sg = stackgap_init();
459 	packet = (struct ip *)stackgap_alloc(&sg, linux_ip_copysize);
460 	msg = (struct msghdr *)stackgap_alloc(&sg, sizeof(*msg));
461 	iov = (struct iovec *)stackgap_alloc(&sg, sizeof(*iov)*2);
462 
463 	/* Make a copy of the beginning of the packet to be sent */
464 	if ((error = copyin(bsd_args->buf, packet, linux_ip_copysize)))
465 		return (error);
466 
467 	/* Convert fields from Linux to BSD raw IP socket format */
468 	packet->ip_len = bsd_args->len;
469 	packet->ip_off = ntohs(packet->ip_off);
470 
471 	/* Prepare the msghdr and iovec structures describing the new packet */
472 	msg->msg_name = bsd_args->to;
473 	msg->msg_namelen = bsd_args->tolen;
474 	msg->msg_iov = iov;
475 	msg->msg_iovlen = 2;
476 	msg->msg_control = NULL;
477 	msg->msg_controllen = 0;
478 	msg->msg_flags = 0;
479 	iov[0].iov_base = (char *)packet;
480 	iov[0].iov_len = linux_ip_copysize;
481 	iov[1].iov_base = (char *)(bsd_args->buf) + linux_ip_copysize;
482 	iov[1].iov_len = bsd_args->len - linux_ip_copysize;
483 
484 	sendmsg_args.s = bsd_args->s;
485 	sendmsg_args.msg = (caddr_t)msg;
486 	sendmsg_args.flags = bsd_args->flags;
487 	return (sendmsg(td, &sendmsg_args));
488 }
489 
490 struct linux_socket_args {
491 	int domain;
492 	int type;
493 	int protocol;
494 };
495 
496 static int
497 linux_socket(struct thread *td, struct linux_socket_args *args)
498 {
499 	struct linux_socket_args linux_args;
500 	struct socket_args /* {
501 		int domain;
502 		int type;
503 		int protocol;
504 	} */ bsd_args;
505 	struct setsockopt_args /* {
506 		int s;
507 		int level;
508 		int name;
509 		caddr_t val;
510 		int valsize;
511 	} */ bsd_setsockopt_args;
512 	int error;
513 	int retval_socket;
514 
515 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
516 		return (error);
517 
518 	bsd_args.protocol = linux_args.protocol;
519 	bsd_args.type = linux_args.type;
520 	bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
521 	if (bsd_args.domain == -1)
522 		return (EINVAL);
523 
524 	retval_socket = socket(td, &bsd_args);
525 	if (bsd_args.type == SOCK_RAW
526 	    && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
527 	    && bsd_args.domain == AF_INET
528 	    && retval_socket >= 0) {
529 		/* It's a raw IP socket: set the IP_HDRINCL option. */
530 		caddr_t sg;
531 		int *hdrincl;
532 
533 		sg = stackgap_init();
534 		hdrincl = (int *)stackgap_alloc(&sg, sizeof(*hdrincl));
535 		*hdrincl = 1;
536 		bsd_setsockopt_args.s = td->td_retval[0];
537 		bsd_setsockopt_args.level = IPPROTO_IP;
538 		bsd_setsockopt_args.name = IP_HDRINCL;
539 		bsd_setsockopt_args.val = (caddr_t)hdrincl;
540 		bsd_setsockopt_args.valsize = sizeof(*hdrincl);
541 		/* We ignore any error returned by setsockopt() */
542 		setsockopt(td, &bsd_setsockopt_args);
543 		/* Copy back the return value from socket() */
544 		td->td_retval[0] = bsd_setsockopt_args.s;
545 	}
546 #ifdef INET6
547 	/*
548 	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
549 	 * default and some apps depend on this. So, set V6ONLY to 0
550 	 * for Linux apps if the sysctl value is set to 1.
551 	 */
552 	if (bsd_args.domain == PF_INET6 && retval_socket >= 0
553 #ifndef KLD_MODULE
554 	    /*
555 	     * XXX: Avoid undefined symbol error with an IPv4 only
556 	     * kernel.
557 	     */
558 	    && ip6_v6only
559 #endif
560 	    ) {
561 		caddr_t sg;
562 		int *v6only;
563 
564 		sg = stackgap_init();
565 		v6only = (int *)stackgap_alloc(&sg, sizeof(*v6only));
566 		*v6only = 0;
567 		bsd_setsockopt_args.s = td->td_retval[0];
568 		bsd_setsockopt_args.level = IPPROTO_IPV6;
569 		bsd_setsockopt_args.name = IPV6_V6ONLY;
570 		bsd_setsockopt_args.val = (caddr_t)v6only;
571 		bsd_setsockopt_args.valsize = sizeof(*v6only);
572 		/* We ignore any error returned by setsockopt() */
573 		setsockopt(td, &bsd_setsockopt_args);
574 		/* Copy back the return value from socket() */
575 		td->td_retval[0] = bsd_setsockopt_args.s;
576 	}
577 #endif
578 
579 	return (retval_socket);
580 }
581 
582 struct linux_bind_args {
583 	int s;
584 	struct osockaddr *name;
585 	int namelen;
586 };
587 
588 static int
589 linux_bind(struct thread *td, struct linux_bind_args *args)
590 {
591 	struct linux_bind_args linux_args;
592 	struct sockaddr *sa;
593 	int error;
594 
595 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
596 		return (error);
597 
598 	error = linux_getsockaddr(&sa, linux_args.name, linux_args.namelen);
599 	if (error)
600 		return (error);
601 
602 	return (kern_bind(td, linux_args.s, sa));
603 }
604 
605 struct linux_connect_args {
606 	int s;
607 	struct osockaddr * name;
608 	int namelen;
609 };
610 int linux_connect(struct thread *, struct linux_connect_args *);
611 #endif /* !__alpha__*/
612 
613 int
614 linux_connect(struct thread *td, struct linux_connect_args *args)
615 {
616 	struct linux_connect_args linux_args;
617 	struct socket *so;
618 	struct sockaddr *sa;
619 	u_int fflag;
620 	int error;
621 
622 #ifdef __alpha__
623 	bcopy(args, &linux_args, sizeof(linux_args));
624 #else
625 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
626 		return (error);
627 #endif /* __alpha__ */
628 
629 	error = linux_getsockaddr(&sa, linux_args.name, linux_args.namelen);
630 	if (error)
631 		return (error);
632 
633 	error = kern_connect(td, linux_args.s, sa);
634 	if (error != EISCONN)
635 		return (error);
636 
637 	/*
638 	 * Linux doesn't return EISCONN the first time it occurs,
639 	 * when on a non-blocking socket. Instead it returns the
640 	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
641 	 */
642 	if ((error = fgetsock(td, linux_args.s, &so, &fflag)) != 0)
643 		return(error);
644 	error = EISCONN;
645 	if (fflag & FNONBLOCK) {
646 		if (so->so_emuldata == 0)
647 			error = so->so_error;
648 		so->so_emuldata = (void *)1;
649 	}
650 	fputsock(so);
651 	return (error);
652 }
653 
654 #ifndef __alpha__
655 
656 struct linux_listen_args {
657 	int s;
658 	int backlog;
659 };
660 
661 static int
662 linux_listen(struct thread *td, struct linux_listen_args *args)
663 {
664 	struct linux_listen_args linux_args;
665 	struct listen_args /* {
666 		int s;
667 		int backlog;
668 	} */ bsd_args;
669 	int error;
670 
671 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
672 		return (error);
673 
674 	bsd_args.s = linux_args.s;
675 	bsd_args.backlog = linux_args.backlog;
676 	return (listen(td, &bsd_args));
677 }
678 
679 struct linux_accept_args {
680 	int s;
681 	struct osockaddr *addr;
682 	int *namelen;
683 };
684 
685 static int
686 linux_accept(struct thread *td, struct linux_accept_args *args)
687 {
688 	struct linux_accept_args linux_args;
689 	struct accept_args /* {
690 		int s;
691 		caddr_t name;
692 		int *anamelen;
693 	} */ bsd_args;
694 	struct close_args /* {
695 		int     fd;
696 	} */ c_args;
697 	struct fcntl_args /* {
698 		int fd;
699 		int cmd;
700 		long arg;
701 	} */ f_args;
702 	int error;
703 
704 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
705 		return (error);
706 
707 	bsd_args.s = linux_args.s;
708 	bsd_args.name = (caddr_t)linux_args.addr;
709 	bsd_args.anamelen = linux_args.namelen;
710 	error = oaccept(td, &bsd_args);
711 	if (error)
712 		return (error);
713 	if (linux_args.addr) {
714 		error = linux_sa_put(linux_args.addr);
715 		if (error) {
716 			c_args.fd = td->td_retval[0];
717 			(void)close(td, &c_args);
718 			return (error);
719 		}
720 	}
721 
722 	/*
723 	 * linux appears not to copy flags from the parent socket to the
724 	 * accepted one, so we must clear the flags in the new descriptor.
725 	 * Ignore any errors, because we already have an open fd.
726 	 */
727 	f_args.fd = td->td_retval[0];
728 	f_args.cmd = F_SETFL;
729 	f_args.arg = 0;
730 	(void)fcntl(td, &f_args);
731 	td->td_retval[0] = f_args.fd;
732 	return (0);
733 }
734 
735 struct linux_getsockname_args {
736 	int s;
737 	struct osockaddr *addr;
738 	int *namelen;
739 };
740 
741 static int
742 linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
743 {
744 	struct linux_getsockname_args linux_args;
745 	struct getsockname_args /* {
746 		int fdes;
747 		caddr_t asa;
748 		int *alen;
749 	} */ bsd_args;
750 	int error;
751 
752 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
753 		return (error);
754 
755 	bsd_args.fdes = linux_args.s;
756 	bsd_args.asa = (caddr_t) linux_args.addr;
757 	bsd_args.alen = linux_args.namelen;
758 	error = ogetsockname(td, &bsd_args);
759 	if (error)
760 		return (error);
761 	error = linux_sa_put(linux_args.addr);
762 	if (error)
763 		return (error);
764 	return (0);
765 }
766 
767 struct linux_getpeername_args {
768 	int s;
769 	struct osockaddr *addr;
770 	int *namelen;
771 };
772 
773 static int
774 linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
775 {
776 	struct linux_getpeername_args linux_args;
777 	struct ogetpeername_args /* {
778 		int fdes;
779 		caddr_t asa;
780 		int *alen;
781 	} */ bsd_args;
782 	int error;
783 
784 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
785 		return (error);
786 
787 	bsd_args.fdes = linux_args.s;
788 	bsd_args.asa = (caddr_t) linux_args.addr;
789 	bsd_args.alen = linux_args.namelen;
790 	error = ogetpeername(td, &bsd_args);
791 	if (error)
792 		return (error);
793 	error = linux_sa_put(linux_args.addr);
794 	if (error)
795 		return (error);
796 	return (0);
797 }
798 
799 struct linux_socketpair_args {
800 	int domain;
801 	int type;
802 	int protocol;
803 	int *rsv;
804 };
805 
806 static int
807 linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
808 {
809 	struct linux_socketpair_args linux_args;
810 	struct socketpair_args /* {
811 		int domain;
812 		int type;
813 		int protocol;
814 		int *rsv;
815 	} */ bsd_args;
816 	int error;
817 
818 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
819 		return (error);
820 
821 	bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
822 	if (bsd_args.domain == -1)
823 		return (EINVAL);
824 
825 	bsd_args.type = linux_args.type;
826 	bsd_args.protocol = linux_args.protocol;
827 	bsd_args.rsv = linux_args.rsv;
828 	return (socketpair(td, &bsd_args));
829 }
830 
831 struct linux_send_args {
832 	int s;
833 	void *msg;
834 	int len;
835 	int flags;
836 };
837 
838 static int
839 linux_send(struct thread *td, struct linux_send_args *args)
840 {
841 	struct linux_send_args linux_args;
842 	struct osend_args /* {
843 		int s;
844 		caddr_t buf;
845 		int len;
846 		int flags;
847 	} */ bsd_args;
848 	int error;
849 
850 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
851 		return (error);
852 
853 	bsd_args.s = linux_args.s;
854 	bsd_args.buf = linux_args.msg;
855 	bsd_args.len = linux_args.len;
856 	bsd_args.flags = linux_args.flags;
857 	return (osend(td, &bsd_args));
858 }
859 
860 struct linux_recv_args {
861 	int s;
862 	void *msg;
863 	int len;
864 	int flags;
865 };
866 
867 static int
868 linux_recv(struct thread *td, struct linux_recv_args *args)
869 {
870 	struct linux_recv_args linux_args;
871 	struct orecv_args /* {
872 		int s;
873 		caddr_t buf;
874 		int len;
875 		int flags;
876 	} */ bsd_args;
877 	int error;
878 
879 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
880 		return (error);
881 
882 	bsd_args.s = linux_args.s;
883 	bsd_args.buf = linux_args.msg;
884 	bsd_args.len = linux_args.len;
885 	bsd_args.flags = linux_args.flags;
886 	return (orecv(td, &bsd_args));
887 }
888 
889 struct linux_sendto_args {
890 	int s;
891 	void *msg;
892 	int len;
893 	int flags;
894 	caddr_t to;
895 	int tolen;
896 };
897 
898 static int
899 linux_sendto(struct thread *td, struct linux_sendto_args *args)
900 {
901 	struct linux_sendto_args linux_args;
902 	struct sendto_args /* {
903 		int s;
904 		caddr_t buf;
905 		size_t len;
906 		int flags;
907 		caddr_t to;
908 		int tolen;
909 	} */ bsd_args;
910 	caddr_t sg = stackgap_init();
911 	struct sockaddr *to;
912 	int tolen, error;
913 
914 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
915 		return (error);
916 
917 	tolen = linux_args.tolen;
918 	if (linux_args.to) {
919 		error = linux_sa_get(&sg, &to,
920 		    (struct osockaddr *) linux_args.to, &tolen);
921 		if (error)
922 			return (error);
923 	} else
924 		to = NULL;
925 
926 	bsd_args.s = linux_args.s;
927 	bsd_args.buf = linux_args.msg;
928 	bsd_args.len = linux_args.len;
929 	bsd_args.flags = linux_args.flags;
930 	bsd_args.to = (caddr_t) to;
931 	bsd_args.tolen = (unsigned int) tolen;
932 
933 	if (linux_check_hdrincl(td, linux_args.s) == 0)
934 		/* IP_HDRINCL set, tweak the packet before sending */
935 		return (linux_sendto_hdrincl(td, &bsd_args));
936 
937 	return (sendto(td, &bsd_args));
938 }
939 
940 struct linux_recvfrom_args {
941 	int s;
942 	void *buf;
943 	int len;
944 	int flags;
945 	caddr_t from;
946 	int *fromlen;
947 };
948 
949 static int
950 linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
951 {
952 	struct linux_recvfrom_args linux_args;
953 	struct recvfrom_args /* {
954 		int s;
955 		caddr_t buf;
956 		size_t len;
957 		int flags;
958 		caddr_t from;
959 		int *fromlenaddr;
960 	} */ bsd_args;
961 	int error;
962 
963 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
964 		return (error);
965 
966 	bsd_args.s = linux_args.s;
967 	bsd_args.buf = linux_args.buf;
968 	bsd_args.len = linux_args.len;
969 	bsd_args.flags = linux_to_bsd_msg_flags(linux_args.flags);
970 	bsd_args.from = linux_args.from;
971 	bsd_args.fromlenaddr = linux_args.fromlen;
972 	error = orecvfrom(td, &bsd_args);
973 	if (error)
974 		return (error);
975 	if (linux_args.from) {
976 		error = linux_sa_put((struct osockaddr *) linux_args.from);
977 		if (error)
978 			return (error);
979 	}
980 	return (0);
981 }
982 
983 struct linux_sendmsg_args {
984 	int s;
985 	const struct msghdr *msg;
986 	int flags;
987 };
988 
989 static int
990 linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
991 {
992 	struct linux_sendmsg_args linux_args;
993 	struct sendmsg_args /* {
994 		int s;
995 		const struct msghdr *msg;
996 		int flags;
997 	} */ bsd_args;
998 	struct msghdr msg;
999 	struct msghdr *nmsg = NULL;
1000 	int error;
1001 	int level;
1002 	caddr_t control;
1003 
1004 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1005 		return (error);
1006 
1007 	error = copyin(linux_args.msg, (caddr_t) &msg, sizeof(msg));
1008 	if (error)
1009 		return (error);
1010 
1011 	if (msg.msg_name) {
1012 		struct sockaddr *sa;
1013 		caddr_t sg = stackgap_init();
1014 
1015 		nmsg = (struct msghdr *) stackgap_alloc(&sg,
1016 		    sizeof(struct msghdr));
1017 		if (!nmsg)
1018 			return (ENOMEM);
1019 
1020 		error = linux_sa_get(&sg, &sa,
1021 		    (struct osockaddr *) msg.msg_name, &msg.msg_namelen);
1022 		if (error)
1023 			return (error);
1024 
1025 		msg.msg_name = (struct sockaddr *) sa;
1026 		error = copyout(&msg, nmsg, sizeof(struct msghdr));
1027 		if (error)
1028 			return (error);
1029 	}
1030 
1031 	error = copyin(&linux_args.msg->msg_control, &control,
1032 	    sizeof(caddr_t));
1033 	if (error)
1034 		return (error);
1035 
1036 	if (control == NULL)
1037 		goto done;
1038 
1039 	error = copyin(&((struct cmsghdr*)control)->cmsg_level, &level,
1040 	    sizeof(int));
1041 	if (error)
1042 		return (error);
1043 
1044 	if (level == 1) {
1045 		/*
1046 		 * Linux thinks that SOL_SOCKET is 1; we know
1047 		 * that it's really 0xffff, of course.
1048 		 */
1049 		level = SOL_SOCKET;
1050 		error = copyout(&level,
1051 		    &((struct cmsghdr *)control)->cmsg_level, sizeof(int));
1052 		if (error)
1053 			return (error);
1054 	}
1055 done:
1056 	return (sendmsg(td, &bsd_args));
1057 }
1058 
1059 struct linux_recvmsg_args {
1060 	int s;
1061 	struct msghdr *msg;
1062 	int flags;
1063 };
1064 
1065 static int
1066 linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
1067 {
1068 	struct linux_recvmsg_args linux_args;
1069 	struct recvmsg_args /* {
1070 		int	s;
1071 		struct	msghdr *msg;
1072 		int	flags;
1073 	} */ bsd_args;
1074 	struct msghdr msg;
1075 	int error;
1076 
1077 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1078 		return (error);
1079 
1080 	bsd_args.s = linux_args.s;
1081 	bsd_args.msg = linux_args.msg;
1082 	bsd_args.flags = linux_to_bsd_msg_flags(linux_args.flags);
1083 	error = recvmsg(td, &bsd_args);
1084 	if (error)
1085 		return (error);
1086 
1087 	error = copyin((caddr_t)linux_args.msg, (caddr_t)&msg, sizeof(msg));
1088 	if (error)
1089 		return (error);
1090 	if (msg.msg_name && msg.msg_namelen > 2)
1091 		error = linux_sa_put(msg.msg_name);
1092 	return (error);
1093 }
1094 
1095 struct linux_shutdown_args {
1096 	int s;
1097 	int how;
1098 };
1099 
1100 static int
1101 linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
1102 {
1103 	struct linux_shutdown_args linux_args;
1104 	struct shutdown_args /* {
1105 		int s;
1106 		int how;
1107 	} */ bsd_args;
1108 	int error;
1109 
1110 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1111 		return (error);
1112 
1113 	bsd_args.s = linux_args.s;
1114 	bsd_args.how = linux_args.how;
1115 	return (shutdown(td, &bsd_args));
1116 }
1117 
1118 struct linux_setsockopt_args {
1119 	int s;
1120 	int level;
1121 	int optname;
1122 	void *optval;
1123 	int optlen;
1124 };
1125 
1126 static int
1127 linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
1128 {
1129 	struct linux_setsockopt_args linux_args;
1130 	struct setsockopt_args /* {
1131 		int s;
1132 		int level;
1133 		int name;
1134 		caddr_t val;
1135 		int valsize;
1136 	} */ bsd_args;
1137 	int error, name;
1138 
1139 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1140 		return (error);
1141 
1142 	bsd_args.s = linux_args.s;
1143 	bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
1144 	switch (bsd_args.level) {
1145 	case SOL_SOCKET:
1146 		name = linux_to_bsd_so_sockopt(linux_args.optname);
1147 		break;
1148 	case IPPROTO_IP:
1149 		name = linux_to_bsd_ip_sockopt(linux_args.optname);
1150 		break;
1151 	case IPPROTO_TCP:
1152 		/* Linux TCP option values match BSD's */
1153 		name = linux_args.optname;
1154 		break;
1155 	default:
1156 		name = -1;
1157 		break;
1158 	}
1159 	if (name == -1)
1160 		return (EINVAL);
1161 
1162 	bsd_args.name = name;
1163 	bsd_args.val = linux_args.optval;
1164 	bsd_args.valsize = linux_args.optlen;
1165 	return (setsockopt(td, &bsd_args));
1166 }
1167 
1168 struct linux_getsockopt_args {
1169 	int s;
1170 	int level;
1171 	int optname;
1172 	void *optval;
1173 	int *optlen;
1174 };
1175 
1176 static int
1177 linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
1178 {
1179 	struct linux_getsockopt_args linux_args;
1180 	struct getsockopt_args /* {
1181 		int s;
1182 		int level;
1183 		int name;
1184 		caddr_t val;
1185 		int *avalsize;
1186 	} */ bsd_args;
1187 	int error, name;
1188 
1189 	if ((error = copyin(args, &linux_args, sizeof(linux_args))))
1190 		return (error);
1191 
1192 	bsd_args.s = linux_args.s;
1193 	bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
1194 	switch (bsd_args.level) {
1195 	case SOL_SOCKET:
1196 		name = linux_to_bsd_so_sockopt(linux_args.optname);
1197 		break;
1198 	case IPPROTO_IP:
1199 		name = linux_to_bsd_ip_sockopt(linux_args.optname);
1200 		break;
1201 	case IPPROTO_TCP:
1202 		/* Linux TCP option values match BSD's */
1203 		name = linux_args.optname;
1204 		break;
1205 	default:
1206 		name = -1;
1207 		break;
1208 	}
1209 	if (name == -1)
1210 		return (EINVAL);
1211 
1212 	bsd_args.name = name;
1213 	bsd_args.val = linux_args.optval;
1214 	bsd_args.avalsize = linux_args.optlen;
1215 	return (getsockopt(td, &bsd_args));
1216 }
1217 
1218 int
1219 linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
1220 {
1221 	void *arg = (void *)args->args;
1222 
1223 	switch (args->what) {
1224 	case LINUX_SOCKET:
1225 		return (linux_socket(td, arg));
1226 	case LINUX_BIND:
1227 		return (linux_bind(td, arg));
1228 	case LINUX_CONNECT:
1229 		return (linux_connect(td, arg));
1230 	case LINUX_LISTEN:
1231 		return (linux_listen(td, arg));
1232 	case LINUX_ACCEPT:
1233 		return (linux_accept(td, arg));
1234 	case LINUX_GETSOCKNAME:
1235 		return (linux_getsockname(td, arg));
1236 	case LINUX_GETPEERNAME:
1237 		return (linux_getpeername(td, arg));
1238 	case LINUX_SOCKETPAIR:
1239 		return (linux_socketpair(td, arg));
1240 	case LINUX_SEND:
1241 		return (linux_send(td, arg));
1242 	case LINUX_RECV:
1243 		return (linux_recv(td, arg));
1244 	case LINUX_SENDTO:
1245 		return (linux_sendto(td, arg));
1246 	case LINUX_RECVFROM:
1247 		return (linux_recvfrom(td, arg));
1248 	case LINUX_SHUTDOWN:
1249 		return (linux_shutdown(td, arg));
1250 	case LINUX_SETSOCKOPT:
1251 		return (linux_setsockopt(td, arg));
1252 	case LINUX_GETSOCKOPT:
1253 		return (linux_getsockopt(td, arg));
1254 	case LINUX_SENDMSG:
1255 		return (linux_sendmsg(td, arg));
1256 	case LINUX_RECVMSG:
1257 		return (linux_recvmsg(td, arg));
1258 	}
1259 
1260 	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
1261 	return (ENOSYS);
1262 }
1263 #endif	/*!__alpha__*/
1264