xref: /freebsd/sys/amd64/linux32/linux32_machdep.c (revision 6af83ee0d2941d18880b6aaa2b4facd1d30c6106)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2002 Doug Rabson
4  * Copyright (c) 2000 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mman.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/resource.h>
43 #include <sys/resourcevar.h>
44 #include <sys/syscallsubr.h>
45 #include <sys/sysproto.h>
46 #include <sys/unistd.h>
47 
48 #include <machine/frame.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 
54 #include <amd64/linux32/linux.h>
55 #include <amd64/linux32/linux32_proto.h>
56 #include <compat/linux/linux_ipc.h>
57 #include <compat/linux/linux_signal.h>
58 #include <compat/linux/linux_util.h>
59 
60 struct l_old_select_argv {
61 	l_int		nfds;
62 	l_uintptr_t	readfds;
63 	l_uintptr_t	writefds;
64 	l_uintptr_t	exceptfds;
65 	l_uintptr_t	timeout;
66 } __packed;
67 
68 int
69 linux_to_bsd_sigaltstack(int lsa)
70 {
71 	int bsa = 0;
72 
73 	if (lsa & LINUX_SS_DISABLE)
74 		bsa |= SS_DISABLE;
75 	if (lsa & LINUX_SS_ONSTACK)
76 		bsa |= SS_ONSTACK;
77 	return (bsa);
78 }
79 
80 int
81 bsd_to_linux_sigaltstack(int bsa)
82 {
83 	int lsa = 0;
84 
85 	if (bsa & SS_DISABLE)
86 		lsa |= LINUX_SS_DISABLE;
87 	if (bsa & SS_ONSTACK)
88 		lsa |= LINUX_SS_ONSTACK;
89 	return (lsa);
90 }
91 
92 int
93 linux_execve(struct thread *td, struct linux_execve_args *args)
94 {
95 	struct execve_args ap;
96 	caddr_t sg;
97 	int error;
98 	u_int32_t *p32, arg;
99 	char **p, *p64;
100 	int count;
101 
102 	sg = stackgap_init();
103 	CHECKALTEXIST(td, &sg, args->path);
104 
105 #ifdef DEBUG
106 	if (ldebug(execve))
107 		printf(ARGS(execve, "%s"), args->path);
108 #endif
109 
110 	ap.fname = args->path;
111 
112 	if (args->argp != NULL) {
113 		count = 0;
114 		p32 = (u_int32_t *)args->argp;
115 		do {
116 			error = copyin(p32++, &arg, sizeof(arg));
117 			if (error)
118 				return error;
119 			count++;
120 		} while (arg != 0);
121 		p = stackgap_alloc(&sg, count * sizeof(char *));
122 		ap.argv = p;
123 		p32 = (u_int32_t *)args->argp;
124 		do {
125 			error = copyin(p32++, &arg, sizeof(arg));
126 			if (error)
127 				return error;
128 			p64 = PTRIN(arg);
129 			error = copyout(&p64, p++, sizeof(p64));
130 			if (error)
131 				return error;
132 		} while (arg != 0);
133 	}
134 	if (args->envp != NULL) {
135 		count = 0;
136 		p32 = (u_int32_t *)args->envp;
137 		do {
138 			error = copyin(p32++, &arg, sizeof(arg));
139 			if (error)
140 				return error;
141 			count++;
142 		} while (arg != 0);
143 		p = stackgap_alloc(&sg, count * sizeof(char *));
144 		ap.envv = p;
145 		p32 = (u_int32_t *)args->envp;
146 		do {
147 			error = copyin(p32++, &arg, sizeof(arg));
148 			if (error)
149 				return error;
150 			p64 = PTRIN(arg);
151 			error = copyout(&p64, p++, sizeof(p64));
152 			if (error)
153 				return error;
154 		} while (arg != 0);
155 	}
156 
157 	return (execve(td, &ap));
158 }
159 
160 struct iovec32 {
161 	u_int32_t iov_base;
162 	int	iov_len;
163 };
164 #define	STACKGAPLEN	400
165 
166 CTASSERT(sizeof(struct iovec32) == 8);
167 
168 int
169 linux_readv(struct thread *td, struct linux_readv_args *uap)
170 {
171 	int error, osize, nsize, i;
172 	caddr_t sg;
173 	struct readv_args /* {
174 		syscallarg(int) fd;
175 		syscallarg(struct iovec *) iovp;
176 		syscallarg(u_int) iovcnt;
177 	} */ a;
178 	struct iovec32 *oio;
179 	struct iovec *nio;
180 
181 	sg = stackgap_init();
182 
183 	if (uap->iovcnt > (STACKGAPLEN / sizeof (struct iovec)))
184 		return (EINVAL);
185 
186 	osize = uap->iovcnt * sizeof (struct iovec32);
187 	nsize = uap->iovcnt * sizeof (struct iovec);
188 
189 	oio = malloc(osize, M_TEMP, M_WAITOK);
190 	nio = malloc(nsize, M_TEMP, M_WAITOK);
191 
192 	error = 0;
193 	if ((error = copyin(uap->iovp, oio, osize)))
194 		goto punt;
195 	for (i = 0; i < uap->iovcnt; i++) {
196 		nio[i].iov_base = PTRIN(oio[i].iov_base);
197 		nio[i].iov_len = oio[i].iov_len;
198 	}
199 
200 	a.fd = uap->fd;
201 	a.iovp = stackgap_alloc(&sg, nsize);
202 	a.iovcnt = uap->iovcnt;
203 
204 	if ((error = copyout(nio, (caddr_t)a.iovp, nsize)))
205 		goto punt;
206 	error = readv(td, &a);
207 
208 punt:
209 	free(oio, M_TEMP);
210 	free(nio, M_TEMP);
211 	return (error);
212 }
213 
214 int
215 linux_writev(struct thread *td, struct linux_writev_args *uap)
216 {
217 	int error, i, nsize, osize;
218 	caddr_t sg;
219 	struct writev_args /* {
220 		syscallarg(int) fd;
221 		syscallarg(struct iovec *) iovp;
222 		syscallarg(u_int) iovcnt;
223 	} */ a;
224 	struct iovec32 *oio;
225 	struct iovec *nio;
226 
227 	sg = stackgap_init();
228 
229 	if (uap->iovcnt > (STACKGAPLEN / sizeof (struct iovec)))
230 		return (EINVAL);
231 
232 	osize = uap->iovcnt * sizeof (struct iovec32);
233 	nsize = uap->iovcnt * sizeof (struct iovec);
234 
235 	oio = malloc(osize, M_TEMP, M_WAITOK);
236 	nio = malloc(nsize, M_TEMP, M_WAITOK);
237 
238 	error = 0;
239 	if ((error = copyin(uap->iovp, oio, osize)))
240 		goto punt;
241 	for (i = 0; i < uap->iovcnt; i++) {
242 		nio[i].iov_base = PTRIN(oio[i].iov_base);
243 		nio[i].iov_len = oio[i].iov_len;
244 	}
245 
246 	a.fd = uap->fd;
247 	a.iovp = stackgap_alloc(&sg, nsize);
248 	a.iovcnt = uap->iovcnt;
249 
250 	if ((error = copyout(nio, (caddr_t)a.iovp, nsize)))
251 		goto punt;
252 	error = writev(td, &a);
253 
254 punt:
255 	free(oio, M_TEMP);
256 	free(nio, M_TEMP);
257 	return (error);
258 }
259 
260 struct l_ipc_kludge {
261 	l_uintptr_t msgp;
262 	l_long msgtyp;
263 } __packed;
264 
265 int
266 linux_ipc(struct thread *td, struct linux_ipc_args *args)
267 {
268 
269 	switch (args->what & 0xFFFF) {
270 	case LINUX_SEMOP: {
271 		struct linux_semop_args a;
272 
273 		a.semid = args->arg1;
274 		a.tsops = args->ptr;
275 		a.nsops = args->arg2;
276 		return (linux_semop(td, &a));
277 	}
278 	case LINUX_SEMGET: {
279 		struct linux_semget_args a;
280 
281 		a.key = args->arg1;
282 		a.nsems = args->arg2;
283 		a.semflg = args->arg3;
284 		return (linux_semget(td, &a));
285 	}
286 	case LINUX_SEMCTL: {
287 		struct linux_semctl_args a;
288 		int error;
289 
290 		a.semid = args->arg1;
291 		a.semnum = args->arg2;
292 		a.cmd = args->arg3;
293 		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
294 		if (error)
295 			return (error);
296 		return (linux_semctl(td, &a));
297 	}
298 	case LINUX_MSGSND: {
299 		struct linux_msgsnd_args a;
300 
301 		a.msqid = args->arg1;
302 		a.msgp = args->ptr;
303 		a.msgsz = args->arg2;
304 		a.msgflg = args->arg3;
305 		return (linux_msgsnd(td, &a));
306 	}
307 	case LINUX_MSGRCV: {
308 		struct linux_msgrcv_args a;
309 
310 		a.msqid = args->arg1;
311 		a.msgsz = args->arg2;
312 		a.msgflg = args->arg3;
313 		if ((args->what >> 16) == 0) {
314 			struct l_ipc_kludge tmp;
315 			int error;
316 
317 			if (args->ptr == 0)
318 				return (EINVAL);
319 			error = copyin(args->ptr, &tmp, sizeof(tmp));
320 			if (error)
321 				return (error);
322 			a.msgp = PTRIN(tmp.msgp);
323 			a.msgtyp = tmp.msgtyp;
324 		} else {
325 			a.msgp = args->ptr;
326 			a.msgtyp = args->arg5;
327 		}
328 		return (linux_msgrcv(td, &a));
329 	}
330 	case LINUX_MSGGET: {
331 		struct linux_msgget_args a;
332 
333 		a.key = args->arg1;
334 		a.msgflg = args->arg2;
335 		return (linux_msgget(td, &a));
336 	}
337 	case LINUX_MSGCTL: {
338 		struct linux_msgctl_args a;
339 
340 		a.msqid = args->arg1;
341 		a.cmd = args->arg2;
342 		a.buf = args->ptr;
343 		return (linux_msgctl(td, &a));
344 	}
345 	case LINUX_SHMAT: {
346 		struct linux_shmat_args a;
347 
348 		a.shmid = args->arg1;
349 		a.shmaddr = args->ptr;
350 		a.shmflg = args->arg2;
351 		a.raddr = PTRIN(args->arg3);
352 		return (linux_shmat(td, &a));
353 	}
354 	case LINUX_SHMDT: {
355 		struct linux_shmdt_args a;
356 
357 		a.shmaddr = args->ptr;
358 		return (linux_shmdt(td, &a));
359 	}
360 	case LINUX_SHMGET: {
361 		struct linux_shmget_args a;
362 
363 		a.key = args->arg1;
364 		a.size = args->arg2;
365 		a.shmflg = args->arg3;
366 		return (linux_shmget(td, &a));
367 	}
368 	case LINUX_SHMCTL: {
369 		struct linux_shmctl_args a;
370 
371 		a.shmid = args->arg1;
372 		a.cmd = args->arg2;
373 		a.buf = args->ptr;
374 		return (linux_shmctl(td, &a));
375 	}
376 	default:
377 		break;
378 	}
379 
380 	return (EINVAL);
381 }
382 
383 int
384 linux_old_select(struct thread *td, struct linux_old_select_args *args)
385 {
386 	struct l_old_select_argv linux_args;
387 	struct linux_select_args newsel;
388 	int error;
389 
390 #ifdef DEBUG
391 	if (ldebug(old_select))
392 		printf(ARGS(old_select, "%p"), args->ptr);
393 #endif
394 
395 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
396 	if (error)
397 		return (error);
398 
399 	newsel.nfds = linux_args.nfds;
400 	newsel.readfds = PTRIN(linux_args.readfds);
401 	newsel.writefds = PTRIN(linux_args.writefds);
402 	newsel.exceptfds = PTRIN(linux_args.exceptfds);
403 	newsel.timeout = PTRIN(linux_args.timeout);
404 	return (linux_select(td, &newsel));
405 }
406 
407 int
408 linux_fork(struct thread *td, struct linux_fork_args *args)
409 {
410 	int error;
411 
412 #ifdef DEBUG
413 	if (ldebug(fork))
414 		printf(ARGS(fork, ""));
415 #endif
416 
417 	if ((error = fork(td, (struct fork_args *)args)) != 0)
418 		return (error);
419 
420 	if (td->td_retval[1] == 1)
421 		td->td_retval[0] = 0;
422 	return (0);
423 }
424 
425 int
426 linux_vfork(struct thread *td, struct linux_vfork_args *args)
427 {
428 	int error;
429 
430 #ifdef DEBUG
431 	if (ldebug(vfork))
432 		printf(ARGS(vfork, ""));
433 #endif
434 
435 	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
436 		return (error);
437 	/* Are we the child? */
438 	if (td->td_retval[1] == 1)
439 		td->td_retval[0] = 0;
440 	return (0);
441 }
442 
443 #define CLONE_VM	0x100
444 #define CLONE_FS	0x200
445 #define CLONE_FILES	0x400
446 #define CLONE_SIGHAND	0x800
447 #define CLONE_PID	0x1000
448 
449 int
450 linux_clone(struct thread *td, struct linux_clone_args *args)
451 {
452 	int error, ff = RFPROC | RFSTOPPED;
453 	struct proc *p2;
454 	struct thread *td2;
455 	int exit_signal;
456 
457 #ifdef DEBUG
458 	if (ldebug(clone)) {
459 		printf(ARGS(clone, "flags %x, stack %x"),
460 		    (unsigned int)(uintptr_t)args->flags,
461 		    (unsigned int)(uintptr_t)args->stack);
462 		if (args->flags & CLONE_PID)
463 			printf(LMSG("CLONE_PID not yet supported"));
464 	}
465 #endif
466 
467 	if (!args->stack)
468 		return (EINVAL);
469 
470 	exit_signal = args->flags & 0x000000ff;
471 	if (exit_signal >= LINUX_NSIG)
472 		return (EINVAL);
473 
474 	if (exit_signal <= LINUX_SIGTBLSZ)
475 		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
476 
477 	if (args->flags & CLONE_VM)
478 		ff |= RFMEM;
479 	if (args->flags & CLONE_SIGHAND)
480 		ff |= RFSIGSHARE;
481 	if (!(args->flags & CLONE_FILES))
482 		ff |= RFFDG;
483 
484 	error = fork1(td, ff, 0, &p2);
485 	if (error)
486 		return (error);
487 
488 
489 	PROC_LOCK(p2);
490 	p2->p_sigparent = exit_signal;
491 	PROC_UNLOCK(p2);
492 	td2 = FIRST_THREAD_IN_PROC(p2);
493 	td2->td_frame->tf_rsp = PTROUT(args->stack);
494 
495 #ifdef DEBUG
496 	if (ldebug(clone))
497 		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
498 		    (long)p2->p_pid, args->stack, exit_signal);
499 #endif
500 
501 	/*
502 	 * Make this runnable after we are finished with it.
503 	 */
504 	mtx_lock_spin(&sched_lock);
505 	TD_SET_CAN_RUN(td2);
506 	setrunqueue(td2, SRQ_BORING);
507 	mtx_unlock_spin(&sched_lock);
508 
509 	td->td_retval[0] = p2->p_pid;
510 	td->td_retval[1] = 0;
511 	return (0);
512 }
513 
514 /* XXX move */
515 struct l_mmap_argv {
516 	l_ulong		addr;
517 	l_int		len;
518 	l_int		prot;
519 	l_int		flags;
520 	l_int		fd;
521 	l_int		pos;
522 };
523 
524 #define STACK_SIZE  (2 * 1024 * 1024)
525 #define GUARD_SIZE  (4 * PAGE_SIZE)
526 
527 static int linux_mmap_common(struct thread *, struct l_mmap_argv *);
528 
529 int
530 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
531 {
532 	struct l_mmap_argv linux_args;
533 
534 #ifdef DEBUG
535 	if (ldebug(mmap2))
536 		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
537 		    (void *)(intptr_t)args->addr, args->len, args->prot,
538 		    args->flags, args->fd, args->pgoff);
539 #endif
540 
541 	linux_args.addr = PTROUT(args->addr);
542 	linux_args.len = args->len;
543 	linux_args.prot = args->prot;
544 	linux_args.flags = args->flags;
545 	linux_args.fd = args->fd;
546 	linux_args.pos = args->pgoff * PAGE_SIZE;
547 
548 	return (linux_mmap_common(td, &linux_args));
549 }
550 
551 int
552 linux_mmap(struct thread *td, struct linux_mmap_args *args)
553 {
554 	int error;
555 	struct l_mmap_argv linux_args;
556 
557 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
558 	if (error)
559 		return (error);
560 
561 #ifdef DEBUG
562 	if (ldebug(mmap))
563 		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
564 		    (void *)(intptr_t)linux_args.addr, linux_args.len,
565 		    linux_args.prot, linux_args.flags, linux_args.fd,
566 		    linux_args.pos);
567 #endif
568 
569 	return (linux_mmap_common(td, &linux_args));
570 }
571 
572 static int
573 linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
574 {
575 	struct proc *p = td->td_proc;
576 	struct mmap_args /* {
577 		caddr_t addr;
578 		size_t len;
579 		int prot;
580 		int flags;
581 		int fd;
582 		long pad;
583 		off_t pos;
584 	} */ bsd_args;
585 	int error;
586 
587 	error = 0;
588 	bsd_args.flags = 0;
589 	if (linux_args->flags & LINUX_MAP_SHARED)
590 		bsd_args.flags |= MAP_SHARED;
591 	if (linux_args->flags & LINUX_MAP_PRIVATE)
592 		bsd_args.flags |= MAP_PRIVATE;
593 	if (linux_args->flags & LINUX_MAP_FIXED)
594 		bsd_args.flags |= MAP_FIXED;
595 	if (linux_args->flags & LINUX_MAP_ANON)
596 		bsd_args.flags |= MAP_ANON;
597 	else
598 		bsd_args.flags |= MAP_NOSYNC;
599 	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
600 		bsd_args.flags |= MAP_STACK;
601 
602 		/* The linux MAP_GROWSDOWN option does not limit auto
603 		 * growth of the region.  Linux mmap with this option
604 		 * takes as addr the inital BOS, and as len, the initial
605 		 * region size.  It can then grow down from addr without
606 		 * limit.  However, linux threads has an implicit internal
607 		 * limit to stack size of STACK_SIZE.  Its just not
608 		 * enforced explicitly in linux.  But, here we impose
609 		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
610 		 * region, since we can do this with our mmap.
611 		 *
612 		 * Our mmap with MAP_STACK takes addr as the maximum
613 		 * downsize limit on BOS, and as len the max size of
614 		 * the region.  It them maps the top SGROWSIZ bytes,
615 		 * and autgrows the region down, up to the limit
616 		 * in addr.
617 		 *
618 		 * If we don't use the MAP_STACK option, the effect
619 		 * of this code is to allocate a stack region of a
620 		 * fixed size of (STACK_SIZE - GUARD_SIZE).
621 		 */
622 
623 		/* This gives us TOS */
624 		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr) +
625 		    linux_args->len;
626 
627 		if ((caddr_t)PTRIN(bsd_args.addr) >
628 		    p->p_vmspace->vm_maxsaddr) {
629 			/* Some linux apps will attempt to mmap
630 			 * thread stacks near the top of their
631 			 * address space.  If their TOS is greater
632 			 * than vm_maxsaddr, vm_map_growstack()
633 			 * will confuse the thread stack with the
634 			 * process stack and deliver a SEGV if they
635 			 * attempt to grow the thread stack past their
636 			 * current stacksize rlimit.  To avoid this,
637 			 * adjust vm_maxsaddr upwards to reflect
638 			 * the current stacksize rlimit rather
639 			 * than the maximum possible stacksize.
640 			 * It would be better to adjust the
641 			 * mmap'ed region, but some apps do not check
642 			 * mmap's return value.
643 			 */
644 			PROC_LOCK(p);
645 			p->p_vmspace->vm_maxsaddr =
646 			    (char *)LINUX32_USRSTACK -
647 			    lim_cur(p, RLIMIT_STACK);
648 			PROC_UNLOCK(p);
649 		}
650 
651 		/* This gives us our maximum stack size */
652 		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
653 			bsd_args.len = linux_args->len;
654 		else
655 			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
656 
657 		/* This gives us a new BOS.  If we're using VM_STACK, then
658 		 * mmap will just map the top SGROWSIZ bytes, and let
659 		 * the stack grow down to the limit at BOS.  If we're
660 		 * not using VM_STACK we map the full stack, since we
661 		 * don't have a way to autogrow it.
662 		 */
663 		bsd_args.addr -= bsd_args.len;
664 	} else {
665 		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr);
666 		bsd_args.len  = linux_args->len;
667 	}
668 	/*
669 	 * XXX i386 Linux always emulator forces PROT_READ on (why?)
670 	 * so we do the same. We add PROT_EXEC to work around buggy
671 	 * applications (e.g. Java) that take advantage of the fact
672 	 * that execute permissions are not enforced by x86 CPUs.
673 	 */
674 	bsd_args.prot = linux_args->prot | PROT_EXEC | PROT_READ;
675 	if (linux_args->flags & LINUX_MAP_ANON)
676 		bsd_args.fd = -1;
677 	else
678 		bsd_args.fd = linux_args->fd;
679 	bsd_args.pos = linux_args->pos;
680 	bsd_args.pad = 0;
681 
682 #ifdef DEBUG
683 	if (ldebug(mmap))
684 		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
685 		    __func__,
686 		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
687 		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
688 #endif
689 	error = mmap(td, &bsd_args);
690 #ifdef DEBUG
691 	if (ldebug(mmap))
692 		printf("-> %s() return: 0x%x (0x%08x)\n",
693 			__func__, error, (u_int)td->td_retval[0]);
694 #endif
695 	return (error);
696 }
697 
698 int
699 linux_pipe(struct thread *td, struct linux_pipe_args *args)
700 {
701 	int pip[2];
702 	int error;
703 	register_t reg_rdx;
704 
705 #ifdef DEBUG
706 	if (ldebug(pipe))
707 		printf(ARGS(pipe, "*"));
708 #endif
709 
710 	reg_rdx = td->td_retval[1];
711 	error = pipe(td, 0);
712 	if (error) {
713 		td->td_retval[1] = reg_rdx;
714 		return (error);
715 	}
716 
717 	pip[0] = td->td_retval[0];
718 	pip[1] = td->td_retval[1];
719 	error = copyout(pip, args->pipefds, 2 * sizeof(int));
720 	if (error) {
721 		td->td_retval[1] = reg_rdx;
722 		return (error);
723 	}
724 
725 	td->td_retval[1] = reg_rdx;
726 	td->td_retval[0] = 0;
727 	return (0);
728 }
729 
730 int
731 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
732 {
733 	l_osigaction_t osa;
734 	l_sigaction_t act, oact;
735 	int error;
736 
737 #ifdef DEBUG
738 	if (ldebug(sigaction))
739 		printf(ARGS(sigaction, "%d, %p, %p"),
740 		    args->sig, (void *)args->nsa, (void *)args->osa);
741 #endif
742 
743 	if (args->nsa != NULL) {
744 		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
745 		if (error)
746 			return (error);
747 		act.lsa_handler = osa.lsa_handler;
748 		act.lsa_flags = osa.lsa_flags;
749 		act.lsa_restorer = osa.lsa_restorer;
750 		LINUX_SIGEMPTYSET(act.lsa_mask);
751 		act.lsa_mask.__bits[0] = osa.lsa_mask;
752 	}
753 
754 	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
755 	    args->osa ? &oact : NULL);
756 
757 	if (args->osa != NULL && !error) {
758 		osa.lsa_handler = oact.lsa_handler;
759 		osa.lsa_flags = oact.lsa_flags;
760 		osa.lsa_restorer = oact.lsa_restorer;
761 		osa.lsa_mask = oact.lsa_mask.__bits[0];
762 		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
763 	}
764 
765 	return (error);
766 }
767 
768 /*
769  * Linux has two extra args, restart and oldmask.  We dont use these,
770  * but it seems that "restart" is actually a context pointer that
771  * enables the signal to happen with a different register set.
772  */
773 int
774 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
775 {
776 	sigset_t sigmask;
777 	l_sigset_t mask;
778 
779 #ifdef DEBUG
780 	if (ldebug(sigsuspend))
781 		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
782 #endif
783 
784 	LINUX_SIGEMPTYSET(mask);
785 	mask.__bits[0] = args->mask;
786 	linux_to_bsd_sigset(&mask, &sigmask);
787 	return (kern_sigsuspend(td, sigmask));
788 }
789 
790 int
791 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
792 {
793 	l_sigset_t lmask;
794 	sigset_t sigmask;
795 	int error;
796 
797 #ifdef DEBUG
798 	if (ldebug(rt_sigsuspend))
799 		printf(ARGS(rt_sigsuspend, "%p, %d"),
800 		    (void *)uap->newset, uap->sigsetsize);
801 #endif
802 
803 	if (uap->sigsetsize != sizeof(l_sigset_t))
804 		return (EINVAL);
805 
806 	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
807 	if (error)
808 		return (error);
809 
810 	linux_to_bsd_sigset(&lmask, &sigmask);
811 	return (kern_sigsuspend(td, sigmask));
812 }
813 
814 int
815 linux_pause(struct thread *td, struct linux_pause_args *args)
816 {
817 	struct proc *p = td->td_proc;
818 	sigset_t sigmask;
819 
820 #ifdef DEBUG
821 	if (ldebug(pause))
822 		printf(ARGS(pause, ""));
823 #endif
824 
825 	PROC_LOCK(p);
826 	sigmask = td->td_sigmask;
827 	PROC_UNLOCK(p);
828 	return (kern_sigsuspend(td, sigmask));
829 }
830 
831 int
832 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
833 {
834 	stack_t ss, oss;
835 	l_stack_t lss;
836 	int error;
837 
838 #ifdef DEBUG
839 	if (ldebug(sigaltstack))
840 		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
841 #endif
842 
843 	if (uap->uss != NULL) {
844 		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
845 		if (error)
846 			return (error);
847 
848 		ss.ss_sp = PTRIN(lss.ss_sp);
849 		ss.ss_size = lss.ss_size;
850 		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
851 	}
852 	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
853 	    (uap->uoss != NULL) ? &oss : NULL);
854 	if (!error && uap->uoss != NULL) {
855 		lss.ss_sp = PTROUT(oss.ss_sp);
856 		lss.ss_size = oss.ss_size;
857 		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
858 		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
859 	}
860 
861 	return (error);
862 }
863 
864 int
865 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
866 {
867 	struct ftruncate_args sa;
868 
869 #ifdef DEBUG
870 	if (ldebug(ftruncate64))
871 		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
872 		    (intmax_t)args->length);
873 #endif
874 
875 	sa.fd = args->fd;
876 	sa.pad = 0;
877 	sa.length = args->length;
878 	return ftruncate(td, &sa);
879 }
880 
881 int
882 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
883 {
884 	struct timeval atv;
885 	l_timeval atv32;
886 	struct timezone rtz;
887 	int error = 0;
888 
889 	if (uap->tp) {
890 		microtime(&atv);
891 		atv32.tv_sec = atv.tv_sec;
892 		atv32.tv_usec = atv.tv_usec;
893 		error = copyout(&atv32, uap->tp, sizeof (atv32));
894 	}
895 	if (error == 0 && uap->tzp != NULL) {
896 		rtz.tz_minuteswest = tz_minuteswest;
897 		rtz.tz_dsttime = tz_dsttime;
898 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
899 	}
900 	return (error);
901 }
902 
903 int
904 linux_nanosleep(struct thread *td, struct linux_nanosleep_args *uap)
905 {
906 	struct timespec ats;
907 	struct l_timespec ats32;
908 	struct nanosleep_args bsd_args;
909 	int error;
910 	caddr_t sg;
911 	caddr_t sarqts, sarmts;
912 
913 	sg = stackgap_init();
914 	error = copyin(uap->rqtp, &ats32, sizeof(ats32));
915 	if (error != 0)
916 		return (error);
917 	ats.tv_sec = ats32.tv_sec;
918 	ats.tv_nsec = ats32.tv_nsec;
919 	sarqts = stackgap_alloc(&sg, sizeof(ats));
920 	error = copyout(&ats, sarqts, sizeof(ats));
921 	if (error != 0)
922 		return (error);
923 	sarmts = stackgap_alloc(&sg, sizeof(ats));
924 	bsd_args.rqtp = (void *)sarqts;
925 	bsd_args.rmtp = (void *)sarmts;
926 	error = nanosleep(td, &bsd_args);
927 	if (uap->rmtp != NULL) {
928 		error = copyin(sarmts, &ats, sizeof(ats));
929 		if (error != 0)
930 			return (error);
931 		ats32.tv_sec = ats.tv_sec;
932 		ats32.tv_nsec = ats.tv_nsec;
933 		error = copyout(&ats32, uap->rmtp, sizeof(ats32));
934 		if (error != 0)
935 			return (error);
936 	}
937 	return (error);
938 }
939 
940 int
941 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
942 {
943 	struct l_rusage s32;
944 	struct rusage s;
945 	int error;
946 
947 	error = kern_getrusage(td, uap->who, &s);
948 	if (error != 0)
949 		return (error);
950 	if (uap->rusage != NULL) {
951 		s32.ru_utime.tv_sec = s.ru_utime.tv_sec;
952 		s32.ru_utime.tv_usec = s.ru_utime.tv_usec;
953 		s32.ru_stime.tv_sec = s.ru_stime.tv_sec;
954 		s32.ru_stime.tv_usec = s.ru_stime.tv_usec;
955 		s32.ru_maxrss = s.ru_maxrss;
956 		s32.ru_ixrss = s.ru_ixrss;
957 		s32.ru_idrss = s.ru_idrss;
958 		s32.ru_isrss = s.ru_isrss;
959 		s32.ru_minflt = s.ru_minflt;
960 		s32.ru_majflt = s.ru_majflt;
961 		s32.ru_nswap = s.ru_nswap;
962 		s32.ru_inblock = s.ru_inblock;
963 		s32.ru_oublock = s.ru_oublock;
964 		s32.ru_msgsnd = s.ru_msgsnd;
965 		s32.ru_msgrcv = s.ru_msgrcv;
966 		s32.ru_nsignals = s.ru_nsignals;
967 		s32.ru_nvcsw = s.ru_nvcsw;
968 		s32.ru_nivcsw = s.ru_nivcsw;
969 		error = copyout(&s32, uap->rusage, sizeof(s32));
970 	}
971 	return (error);
972 }
973 
974 int
975 linux_sched_rr_get_interval(struct thread *td,
976     struct linux_sched_rr_get_interval_args *uap)
977 {
978 	struct sched_rr_get_interval_args bsd_args;
979 	caddr_t sg, psgts;
980 	struct timespec ts;
981 	struct l_timespec ts32;
982 	int error;
983 
984 	sg = stackgap_init();
985 	psgts = stackgap_alloc(&sg, sizeof(struct timespec));
986 	bsd_args.pid = uap->pid;
987 	bsd_args.interval = (void *)psgts;
988 	error = sched_rr_get_interval(td, &bsd_args);
989 	if (error != 0)
990 		return (error);
991 	error = copyin(psgts, &ts, sizeof(ts));
992 	if (error != 0)
993 		return (error);
994 	ts32.tv_sec = ts.tv_sec;
995 	ts32.tv_nsec = ts.tv_nsec;
996 	return (copyout(&ts32, uap->interval, sizeof(ts32)));
997 }
998 
999 int
1000 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
1001 {
1002 	struct mprotect_args bsd_args;
1003 
1004 	bsd_args.addr = uap->addr;
1005 	bsd_args.len = uap->len;
1006 	bsd_args.prot = uap->prot;
1007 	/* XXX PROT_READ implies PROT_EXEC; see linux_mmap_common(). */
1008 	if ((bsd_args.prot & PROT_READ) != 0)
1009 		bsd_args.prot |= PROT_EXEC;
1010 	return (mprotect(td, &bsd_args));
1011 }
1012