xref: /freebsd/sys/amd64/linux32/linux32_machdep.c (revision 262e143bd46171a6415a5b28af260a5efa2a3db8)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2002 Doug Rabson
4  * Copyright (c) 2000 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/imgact.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mman.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/resource.h>
44 #include <sys/resourcevar.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysproto.h>
47 #include <sys/unistd.h>
48 
49 #include <machine/frame.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_extern.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 
57 #include <amd64/linux32/linux.h>
58 #include <amd64/linux32/linux32_proto.h>
59 #include <compat/linux/linux_ipc.h>
60 #include <compat/linux/linux_signal.h>
61 #include <compat/linux/linux_util.h>
62 
63 struct l_old_select_argv {
64 	l_int		nfds;
65 	l_uintptr_t	readfds;
66 	l_uintptr_t	writefds;
67 	l_uintptr_t	exceptfds;
68 	l_uintptr_t	timeout;
69 } __packed;
70 
71 int
72 linux_to_bsd_sigaltstack(int lsa)
73 {
74 	int bsa = 0;
75 
76 	if (lsa & LINUX_SS_DISABLE)
77 		bsa |= SS_DISABLE;
78 	if (lsa & LINUX_SS_ONSTACK)
79 		bsa |= SS_ONSTACK;
80 	return (bsa);
81 }
82 
83 int
84 bsd_to_linux_sigaltstack(int bsa)
85 {
86 	int lsa = 0;
87 
88 	if (bsa & SS_DISABLE)
89 		lsa |= LINUX_SS_DISABLE;
90 	if (bsa & SS_ONSTACK)
91 		lsa |= LINUX_SS_ONSTACK;
92 	return (lsa);
93 }
94 
95 /*
96  * Custom version of exec_copyin_args() so that we can translate
97  * the pointers.
98  */
99 static int
100 linux_exec_copyin_args(struct image_args *args, char *fname,
101     enum uio_seg segflg, char **argv, char **envv)
102 {
103 	char *argp, *envp;
104 	u_int32_t *p32, arg;
105 	size_t length;
106 	int error;
107 
108 	bzero(args, sizeof(*args));
109 	if (argv == NULL)
110 		return (EFAULT);
111 
112 	/*
113 	 * Allocate temporary demand zeroed space for argument and
114 	 *	environment strings
115 	 */
116 	args->buf = (char *) kmem_alloc_wait(exec_map,
117 	    PATH_MAX + ARG_MAX + MAXSHELLCMDLEN);
118 	if (args->buf == NULL)
119 		return (ENOMEM);
120 	args->begin_argv = args->buf;
121 	args->endp = args->begin_argv;
122 	args->stringspace = ARG_MAX;
123 
124 	args->fname = args->buf + ARG_MAX;
125 
126 	/*
127 	 * Copy the file name.
128 	 */
129 	error = (segflg == UIO_SYSSPACE) ?
130 	    copystr(fname, args->fname, PATH_MAX, &length) :
131 	    copyinstr(fname, args->fname, PATH_MAX, &length);
132 	if (error != 0)
133 		return (error);
134 
135 	/*
136 	 * extract arguments first
137 	 */
138 	p32 = (u_int32_t *)argv;
139 	for (;;) {
140 		error = copyin(p32++, &arg, sizeof(arg));
141 		if (error)
142 			return (error);
143 		if (arg == 0)
144 			break;
145 		argp = PTRIN(arg);
146 		error = copyinstr(argp, args->endp, args->stringspace, &length);
147 		if (error) {
148 			if (error == ENAMETOOLONG)
149 				return (E2BIG);
150 			else
151 				return (error);
152 		}
153 		args->stringspace -= length;
154 		args->endp += length;
155 		args->argc++;
156 	}
157 
158 	args->begin_envv = args->endp;
159 
160 	/*
161 	 * extract environment strings
162 	 */
163 	if (envv) {
164 		p32 = (u_int32_t *)envv;
165 		for (;;) {
166 			error = copyin(p32++, &arg, sizeof(arg));
167 			if (error)
168 				return (error);
169 			if (arg == 0)
170 				break;
171 			envp = PTRIN(arg);
172 			error = copyinstr(envp, args->endp, args->stringspace,
173 			    &length);
174 			if (error) {
175 				if (error == ENAMETOOLONG)
176 					return (E2BIG);
177 				else
178 					return (error);
179 			}
180 			args->stringspace -= length;
181 			args->endp += length;
182 			args->envc++;
183 		}
184 	}
185 
186 	return (0);
187 }
188 
189 int
190 linux_execve(struct thread *td, struct linux_execve_args *args)
191 {
192 	struct image_args eargs;
193 	char *path;
194 	int error;
195 
196 	LCONVPATHEXIST(td, args->path, &path);
197 
198 #ifdef DEBUG
199 	if (ldebug(execve))
200 		printf(ARGS(execve, "%s"), path);
201 #endif
202 
203 	error = linux_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp,
204 	    args->envp);
205 	free(path, M_TEMP);
206 	if (error == 0)
207 		error = kern_execve(td, &eargs, NULL);
208 	exec_free_args(&eargs);
209 	return (error);
210 }
211 
212 struct iovec32 {
213 	u_int32_t iov_base;
214 	int	iov_len;
215 };
216 
217 CTASSERT(sizeof(struct iovec32) == 8);
218 
219 static int
220 linux32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
221 {
222 	struct iovec32 iov32;
223 	struct iovec *iov;
224 	struct uio *uio;
225 	u_int iovlen;
226 	int error, i;
227 
228 	*uiop = NULL;
229 	if (iovcnt > UIO_MAXIOV)
230 		return (EINVAL);
231 	iovlen = iovcnt * sizeof(struct iovec);
232 	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
233 	iov = (struct iovec *)(uio + 1);
234 	for (i = 0; i < iovcnt; i++) {
235 		error = copyin(&iovp[i], &iov32, sizeof(struct iovec32));
236 		if (error) {
237 			free(uio, M_IOV);
238 			return (error);
239 		}
240 		iov[i].iov_base = PTRIN(iov32.iov_base);
241 		iov[i].iov_len = iov32.iov_len;
242 	}
243 	uio->uio_iov = iov;
244 	uio->uio_iovcnt = iovcnt;
245 	uio->uio_segflg = UIO_USERSPACE;
246 	uio->uio_offset = -1;
247 	uio->uio_resid = 0;
248 	for (i = 0; i < iovcnt; i++) {
249 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
250 			free(uio, M_IOV);
251 			return (EINVAL);
252 		}
253 		uio->uio_resid += iov->iov_len;
254 		iov++;
255 	}
256 	*uiop = uio;
257 	return (0);
258 }
259 
260 int
261 linux_readv(struct thread *td, struct linux_readv_args *uap)
262 {
263 	struct uio *auio;
264 	int error;
265 
266 	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
267 	if (error)
268 		return (error);
269 	error = kern_readv(td, uap->fd, auio);
270 	free(auio, M_IOV);
271 	return (error);
272 }
273 
274 int
275 linux_writev(struct thread *td, struct linux_writev_args *uap)
276 {
277 	struct uio *auio;
278 	int error;
279 
280 	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
281 	if (error)
282 		return (error);
283 	error = kern_writev(td, uap->fd, auio);
284 	free(auio, M_IOV);
285 	return (error);
286 }
287 
288 struct l_ipc_kludge {
289 	l_uintptr_t msgp;
290 	l_long msgtyp;
291 } __packed;
292 
293 int
294 linux_ipc(struct thread *td, struct linux_ipc_args *args)
295 {
296 
297 	switch (args->what & 0xFFFF) {
298 	case LINUX_SEMOP: {
299 		struct linux_semop_args a;
300 
301 		a.semid = args->arg1;
302 		a.tsops = args->ptr;
303 		a.nsops = args->arg2;
304 		return (linux_semop(td, &a));
305 	}
306 	case LINUX_SEMGET: {
307 		struct linux_semget_args a;
308 
309 		a.key = args->arg1;
310 		a.nsems = args->arg2;
311 		a.semflg = args->arg3;
312 		return (linux_semget(td, &a));
313 	}
314 	case LINUX_SEMCTL: {
315 		struct linux_semctl_args a;
316 		int error;
317 
318 		a.semid = args->arg1;
319 		a.semnum = args->arg2;
320 		a.cmd = args->arg3;
321 		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
322 		if (error)
323 			return (error);
324 		return (linux_semctl(td, &a));
325 	}
326 	case LINUX_MSGSND: {
327 		struct linux_msgsnd_args a;
328 
329 		a.msqid = args->arg1;
330 		a.msgp = args->ptr;
331 		a.msgsz = args->arg2;
332 		a.msgflg = args->arg3;
333 		return (linux_msgsnd(td, &a));
334 	}
335 	case LINUX_MSGRCV: {
336 		struct linux_msgrcv_args a;
337 
338 		a.msqid = args->arg1;
339 		a.msgsz = args->arg2;
340 		a.msgflg = args->arg3;
341 		if ((args->what >> 16) == 0) {
342 			struct l_ipc_kludge tmp;
343 			int error;
344 
345 			if (args->ptr == 0)
346 				return (EINVAL);
347 			error = copyin(args->ptr, &tmp, sizeof(tmp));
348 			if (error)
349 				return (error);
350 			a.msgp = PTRIN(tmp.msgp);
351 			a.msgtyp = tmp.msgtyp;
352 		} else {
353 			a.msgp = args->ptr;
354 			a.msgtyp = args->arg5;
355 		}
356 		return (linux_msgrcv(td, &a));
357 	}
358 	case LINUX_MSGGET: {
359 		struct linux_msgget_args a;
360 
361 		a.key = args->arg1;
362 		a.msgflg = args->arg2;
363 		return (linux_msgget(td, &a));
364 	}
365 	case LINUX_MSGCTL: {
366 		struct linux_msgctl_args a;
367 
368 		a.msqid = args->arg1;
369 		a.cmd = args->arg2;
370 		a.buf = args->ptr;
371 		return (linux_msgctl(td, &a));
372 	}
373 	case LINUX_SHMAT: {
374 		struct linux_shmat_args a;
375 
376 		a.shmid = args->arg1;
377 		a.shmaddr = args->ptr;
378 		a.shmflg = args->arg2;
379 		a.raddr = PTRIN((l_uint)args->arg3);
380 		return (linux_shmat(td, &a));
381 	}
382 	case LINUX_SHMDT: {
383 		struct linux_shmdt_args a;
384 
385 		a.shmaddr = args->ptr;
386 		return (linux_shmdt(td, &a));
387 	}
388 	case LINUX_SHMGET: {
389 		struct linux_shmget_args a;
390 
391 		a.key = args->arg1;
392 		a.size = args->arg2;
393 		a.shmflg = args->arg3;
394 		return (linux_shmget(td, &a));
395 	}
396 	case LINUX_SHMCTL: {
397 		struct linux_shmctl_args a;
398 
399 		a.shmid = args->arg1;
400 		a.cmd = args->arg2;
401 		a.buf = args->ptr;
402 		return (linux_shmctl(td, &a));
403 	}
404 	default:
405 		break;
406 	}
407 
408 	return (EINVAL);
409 }
410 
411 int
412 linux_old_select(struct thread *td, struct linux_old_select_args *args)
413 {
414 	struct l_old_select_argv linux_args;
415 	struct linux_select_args newsel;
416 	int error;
417 
418 #ifdef DEBUG
419 	if (ldebug(old_select))
420 		printf(ARGS(old_select, "%p"), args->ptr);
421 #endif
422 
423 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
424 	if (error)
425 		return (error);
426 
427 	newsel.nfds = linux_args.nfds;
428 	newsel.readfds = PTRIN(linux_args.readfds);
429 	newsel.writefds = PTRIN(linux_args.writefds);
430 	newsel.exceptfds = PTRIN(linux_args.exceptfds);
431 	newsel.timeout = PTRIN(linux_args.timeout);
432 	return (linux_select(td, &newsel));
433 }
434 
435 int
436 linux_fork(struct thread *td, struct linux_fork_args *args)
437 {
438 	int error;
439 
440 #ifdef DEBUG
441 	if (ldebug(fork))
442 		printf(ARGS(fork, ""));
443 #endif
444 
445 	if ((error = fork(td, (struct fork_args *)args)) != 0)
446 		return (error);
447 
448 	if (td->td_retval[1] == 1)
449 		td->td_retval[0] = 0;
450 	return (0);
451 }
452 
453 int
454 linux_vfork(struct thread *td, struct linux_vfork_args *args)
455 {
456 	int error;
457 
458 #ifdef DEBUG
459 	if (ldebug(vfork))
460 		printf(ARGS(vfork, ""));
461 #endif
462 
463 	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
464 		return (error);
465 	/* Are we the child? */
466 	if (td->td_retval[1] == 1)
467 		td->td_retval[0] = 0;
468 	return (0);
469 }
470 
471 #define CLONE_VM	0x100
472 #define CLONE_FS	0x200
473 #define CLONE_FILES	0x400
474 #define CLONE_SIGHAND	0x800
475 #define CLONE_PID	0x1000
476 
477 int
478 linux_clone(struct thread *td, struct linux_clone_args *args)
479 {
480 	int error, ff = RFPROC | RFSTOPPED;
481 	struct proc *p2;
482 	struct thread *td2;
483 	int exit_signal;
484 
485 #ifdef DEBUG
486 	if (ldebug(clone)) {
487 		printf(ARGS(clone, "flags %x, stack %x"),
488 		    (unsigned int)(uintptr_t)args->flags,
489 		    (unsigned int)(uintptr_t)args->stack);
490 		if (args->flags & CLONE_PID)
491 			printf(LMSG("CLONE_PID not yet supported"));
492 	}
493 #endif
494 
495 	if (!args->stack)
496 		return (EINVAL);
497 
498 	exit_signal = args->flags & 0x000000ff;
499 	if (exit_signal >= LINUX_NSIG)
500 		return (EINVAL);
501 
502 	if (exit_signal <= LINUX_SIGTBLSZ)
503 		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
504 
505 	if (args->flags & CLONE_VM)
506 		ff |= RFMEM;
507 	if (args->flags & CLONE_SIGHAND)
508 		ff |= RFSIGSHARE;
509 	if (!(args->flags & CLONE_FILES))
510 		ff |= RFFDG;
511 
512 	error = fork1(td, ff, 0, &p2);
513 	if (error)
514 		return (error);
515 
516 
517 	PROC_LOCK(p2);
518 	p2->p_sigparent = exit_signal;
519 	PROC_UNLOCK(p2);
520 	td2 = FIRST_THREAD_IN_PROC(p2);
521 	td2->td_frame->tf_rsp = PTROUT(args->stack);
522 
523 #ifdef DEBUG
524 	if (ldebug(clone))
525 		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
526 		    (long)p2->p_pid, args->stack, exit_signal);
527 #endif
528 
529 	/*
530 	 * Make this runnable after we are finished with it.
531 	 */
532 	mtx_lock_spin(&sched_lock);
533 	TD_SET_CAN_RUN(td2);
534 	setrunqueue(td2, SRQ_BORING);
535 	mtx_unlock_spin(&sched_lock);
536 
537 	td->td_retval[0] = p2->p_pid;
538 	td->td_retval[1] = 0;
539 	return (0);
540 }
541 
542 /* XXX move */
543 struct l_mmap_argv {
544 	l_ulong		addr;
545 	l_ulong		len;
546 	l_ulong		prot;
547 	l_ulong		flags;
548 	l_ulong		fd;
549 	l_ulong		pgoff;
550 };
551 
552 #define STACK_SIZE  (2 * 1024 * 1024)
553 #define GUARD_SIZE  (4 * PAGE_SIZE)
554 
555 static int linux_mmap_common(struct thread *, struct l_mmap_argv *);
556 
557 int
558 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
559 {
560 	struct l_mmap_argv linux_args;
561 
562 #ifdef DEBUG
563 	if (ldebug(mmap2))
564 		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
565 		    (void *)(intptr_t)args->addr, args->len, args->prot,
566 		    args->flags, args->fd, args->pgoff);
567 #endif
568 
569 	linux_args.addr = PTROUT(args->addr);
570 	linux_args.len = args->len;
571 	linux_args.prot = args->prot;
572 	linux_args.flags = args->flags;
573 	linux_args.fd = args->fd;
574 	linux_args.pgoff = args->pgoff;
575 
576 	return (linux_mmap_common(td, &linux_args));
577 }
578 
579 int
580 linux_mmap(struct thread *td, struct linux_mmap_args *args)
581 {
582 	int error;
583 	struct l_mmap_argv linux_args;
584 
585 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
586 	if (error)
587 		return (error);
588 
589 #ifdef DEBUG
590 	if (ldebug(mmap))
591 		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
592 		    (void *)(intptr_t)linux_args.addr, linux_args.len,
593 		    linux_args.prot, linux_args.flags, linux_args.fd,
594 		    linux_args.pgoff);
595 #endif
596 	if ((linux_args.pgoff % PAGE_SIZE) != 0)
597 		return (EINVAL);
598 	linux_args.pgoff /= PAGE_SIZE;
599 
600 	return (linux_mmap_common(td, &linux_args));
601 }
602 
603 static int
604 linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
605 {
606 	struct proc *p = td->td_proc;
607 	struct mmap_args /* {
608 		caddr_t addr;
609 		size_t len;
610 		int prot;
611 		int flags;
612 		int fd;
613 		long pad;
614 		off_t pos;
615 	} */ bsd_args;
616 	int error;
617 
618 	error = 0;
619 	bsd_args.flags = 0;
620 	if (linux_args->flags & LINUX_MAP_SHARED)
621 		bsd_args.flags |= MAP_SHARED;
622 	if (linux_args->flags & LINUX_MAP_PRIVATE)
623 		bsd_args.flags |= MAP_PRIVATE;
624 	if (linux_args->flags & LINUX_MAP_FIXED)
625 		bsd_args.flags |= MAP_FIXED;
626 	if (linux_args->flags & LINUX_MAP_ANON)
627 		bsd_args.flags |= MAP_ANON;
628 	else
629 		bsd_args.flags |= MAP_NOSYNC;
630 	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
631 		bsd_args.flags |= MAP_STACK;
632 
633 		/* The linux MAP_GROWSDOWN option does not limit auto
634 		 * growth of the region.  Linux mmap with this option
635 		 * takes as addr the inital BOS, and as len, the initial
636 		 * region size.  It can then grow down from addr without
637 		 * limit.  However, linux threads has an implicit internal
638 		 * limit to stack size of STACK_SIZE.  Its just not
639 		 * enforced explicitly in linux.  But, here we impose
640 		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
641 		 * region, since we can do this with our mmap.
642 		 *
643 		 * Our mmap with MAP_STACK takes addr as the maximum
644 		 * downsize limit on BOS, and as len the max size of
645 		 * the region.  It them maps the top SGROWSIZ bytes,
646 		 * and autgrows the region down, up to the limit
647 		 * in addr.
648 		 *
649 		 * If we don't use the MAP_STACK option, the effect
650 		 * of this code is to allocate a stack region of a
651 		 * fixed size of (STACK_SIZE - GUARD_SIZE).
652 		 */
653 
654 		/* This gives us TOS */
655 		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr) +
656 		    linux_args->len;
657 
658 		if ((caddr_t)PTRIN(bsd_args.addr) >
659 		    p->p_vmspace->vm_maxsaddr) {
660 			/* Some linux apps will attempt to mmap
661 			 * thread stacks near the top of their
662 			 * address space.  If their TOS is greater
663 			 * than vm_maxsaddr, vm_map_growstack()
664 			 * will confuse the thread stack with the
665 			 * process stack and deliver a SEGV if they
666 			 * attempt to grow the thread stack past their
667 			 * current stacksize rlimit.  To avoid this,
668 			 * adjust vm_maxsaddr upwards to reflect
669 			 * the current stacksize rlimit rather
670 			 * than the maximum possible stacksize.
671 			 * It would be better to adjust the
672 			 * mmap'ed region, but some apps do not check
673 			 * mmap's return value.
674 			 */
675 			PROC_LOCK(p);
676 			p->p_vmspace->vm_maxsaddr =
677 			    (char *)LINUX32_USRSTACK -
678 			    lim_cur(p, RLIMIT_STACK);
679 			PROC_UNLOCK(p);
680 		}
681 
682 		/* This gives us our maximum stack size */
683 		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
684 			bsd_args.len = linux_args->len;
685 		else
686 			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
687 
688 		/* This gives us a new BOS.  If we're using VM_STACK, then
689 		 * mmap will just map the top SGROWSIZ bytes, and let
690 		 * the stack grow down to the limit at BOS.  If we're
691 		 * not using VM_STACK we map the full stack, since we
692 		 * don't have a way to autogrow it.
693 		 */
694 		bsd_args.addr -= bsd_args.len;
695 	} else {
696 		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr);
697 		bsd_args.len  = linux_args->len;
698 	}
699 	/*
700 	 * XXX i386 Linux always emulator forces PROT_READ on (why?)
701 	 * so we do the same. We add PROT_EXEC to work around buggy
702 	 * applications (e.g. Java) that take advantage of the fact
703 	 * that execute permissions are not enforced by x86 CPUs.
704 	 */
705 	bsd_args.prot = linux_args->prot | PROT_EXEC | PROT_READ;
706 	if (linux_args->flags & LINUX_MAP_ANON)
707 		bsd_args.fd = -1;
708 	else
709 		bsd_args.fd = linux_args->fd;
710 	bsd_args.pos = (off_t)linux_args->pgoff * PAGE_SIZE;
711 	bsd_args.pad = 0;
712 
713 #ifdef DEBUG
714 	if (ldebug(mmap))
715 		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
716 		    __func__,
717 		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
718 		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
719 #endif
720 	error = mmap(td, &bsd_args);
721 #ifdef DEBUG
722 	if (ldebug(mmap))
723 		printf("-> %s() return: 0x%x (0x%08x)\n",
724 			__func__, error, (u_int)td->td_retval[0]);
725 #endif
726 	return (error);
727 }
728 
729 int
730 linux_pipe(struct thread *td, struct linux_pipe_args *args)
731 {
732 	int pip[2];
733 	int error;
734 	register_t reg_rdx;
735 
736 #ifdef DEBUG
737 	if (ldebug(pipe))
738 		printf(ARGS(pipe, "*"));
739 #endif
740 
741 	reg_rdx = td->td_retval[1];
742 	error = pipe(td, 0);
743 	if (error) {
744 		td->td_retval[1] = reg_rdx;
745 		return (error);
746 	}
747 
748 	pip[0] = td->td_retval[0];
749 	pip[1] = td->td_retval[1];
750 	error = copyout(pip, args->pipefds, 2 * sizeof(int));
751 	if (error) {
752 		td->td_retval[1] = reg_rdx;
753 		return (error);
754 	}
755 
756 	td->td_retval[1] = reg_rdx;
757 	td->td_retval[0] = 0;
758 	return (0);
759 }
760 
761 int
762 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
763 {
764 	l_osigaction_t osa;
765 	l_sigaction_t act, oact;
766 	int error;
767 
768 #ifdef DEBUG
769 	if (ldebug(sigaction))
770 		printf(ARGS(sigaction, "%d, %p, %p"),
771 		    args->sig, (void *)args->nsa, (void *)args->osa);
772 #endif
773 
774 	if (args->nsa != NULL) {
775 		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
776 		if (error)
777 			return (error);
778 		act.lsa_handler = osa.lsa_handler;
779 		act.lsa_flags = osa.lsa_flags;
780 		act.lsa_restorer = osa.lsa_restorer;
781 		LINUX_SIGEMPTYSET(act.lsa_mask);
782 		act.lsa_mask.__bits[0] = osa.lsa_mask;
783 	}
784 
785 	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
786 	    args->osa ? &oact : NULL);
787 
788 	if (args->osa != NULL && !error) {
789 		osa.lsa_handler = oact.lsa_handler;
790 		osa.lsa_flags = oact.lsa_flags;
791 		osa.lsa_restorer = oact.lsa_restorer;
792 		osa.lsa_mask = oact.lsa_mask.__bits[0];
793 		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
794 	}
795 
796 	return (error);
797 }
798 
799 /*
800  * Linux has two extra args, restart and oldmask.  We dont use these,
801  * but it seems that "restart" is actually a context pointer that
802  * enables the signal to happen with a different register set.
803  */
804 int
805 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
806 {
807 	sigset_t sigmask;
808 	l_sigset_t mask;
809 
810 #ifdef DEBUG
811 	if (ldebug(sigsuspend))
812 		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
813 #endif
814 
815 	LINUX_SIGEMPTYSET(mask);
816 	mask.__bits[0] = args->mask;
817 	linux_to_bsd_sigset(&mask, &sigmask);
818 	return (kern_sigsuspend(td, sigmask));
819 }
820 
821 int
822 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
823 {
824 	l_sigset_t lmask;
825 	sigset_t sigmask;
826 	int error;
827 
828 #ifdef DEBUG
829 	if (ldebug(rt_sigsuspend))
830 		printf(ARGS(rt_sigsuspend, "%p, %d"),
831 		    (void *)uap->newset, uap->sigsetsize);
832 #endif
833 
834 	if (uap->sigsetsize != sizeof(l_sigset_t))
835 		return (EINVAL);
836 
837 	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
838 	if (error)
839 		return (error);
840 
841 	linux_to_bsd_sigset(&lmask, &sigmask);
842 	return (kern_sigsuspend(td, sigmask));
843 }
844 
845 int
846 linux_pause(struct thread *td, struct linux_pause_args *args)
847 {
848 	struct proc *p = td->td_proc;
849 	sigset_t sigmask;
850 
851 #ifdef DEBUG
852 	if (ldebug(pause))
853 		printf(ARGS(pause, ""));
854 #endif
855 
856 	PROC_LOCK(p);
857 	sigmask = td->td_sigmask;
858 	PROC_UNLOCK(p);
859 	return (kern_sigsuspend(td, sigmask));
860 }
861 
862 int
863 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
864 {
865 	stack_t ss, oss;
866 	l_stack_t lss;
867 	int error;
868 
869 #ifdef DEBUG
870 	if (ldebug(sigaltstack))
871 		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
872 #endif
873 
874 	if (uap->uss != NULL) {
875 		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
876 		if (error)
877 			return (error);
878 
879 		ss.ss_sp = PTRIN(lss.ss_sp);
880 		ss.ss_size = lss.ss_size;
881 		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
882 	}
883 	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
884 	    (uap->uoss != NULL) ? &oss : NULL);
885 	if (!error && uap->uoss != NULL) {
886 		lss.ss_sp = PTROUT(oss.ss_sp);
887 		lss.ss_size = oss.ss_size;
888 		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
889 		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
890 	}
891 
892 	return (error);
893 }
894 
895 int
896 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
897 {
898 	struct ftruncate_args sa;
899 
900 #ifdef DEBUG
901 	if (ldebug(ftruncate64))
902 		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
903 		    (intmax_t)args->length);
904 #endif
905 
906 	sa.fd = args->fd;
907 	sa.pad = 0;
908 	sa.length = args->length;
909 	return ftruncate(td, &sa);
910 }
911 
912 int
913 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
914 {
915 	struct timeval atv;
916 	l_timeval atv32;
917 	struct timezone rtz;
918 	int error = 0;
919 
920 	if (uap->tp) {
921 		microtime(&atv);
922 		atv32.tv_sec = atv.tv_sec;
923 		atv32.tv_usec = atv.tv_usec;
924 		error = copyout(&atv32, uap->tp, sizeof (atv32));
925 	}
926 	if (error == 0 && uap->tzp != NULL) {
927 		rtz.tz_minuteswest = tz_minuteswest;
928 		rtz.tz_dsttime = tz_dsttime;
929 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
930 	}
931 	return (error);
932 }
933 
934 int
935 linux_nanosleep(struct thread *td, struct linux_nanosleep_args *uap)
936 {
937 	struct timespec rqt, rmt;
938 	struct l_timespec ats32;
939 	int error;
940 
941 	error = copyin(uap->rqtp, &ats32, sizeof(ats32));
942 	if (error != 0)
943 		return (error);
944 	rqt.tv_sec = ats32.tv_sec;
945 	rqt.tv_nsec = ats32.tv_nsec;
946 	error = kern_nanosleep(td, &rqt, &rmt);
947 	if (uap->rmtp != NULL) {
948 		ats32.tv_sec = rmt.tv_sec;
949 		ats32.tv_nsec = rmt.tv_nsec;
950 		error = copyout(&ats32, uap->rmtp, sizeof(ats32));
951 	}
952 	return (error);
953 }
954 
955 int
956 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
957 {
958 	struct l_rusage s32;
959 	struct rusage s;
960 	int error;
961 
962 	error = kern_getrusage(td, uap->who, &s);
963 	if (error != 0)
964 		return (error);
965 	if (uap->rusage != NULL) {
966 		s32.ru_utime.tv_sec = s.ru_utime.tv_sec;
967 		s32.ru_utime.tv_usec = s.ru_utime.tv_usec;
968 		s32.ru_stime.tv_sec = s.ru_stime.tv_sec;
969 		s32.ru_stime.tv_usec = s.ru_stime.tv_usec;
970 		s32.ru_maxrss = s.ru_maxrss;
971 		s32.ru_ixrss = s.ru_ixrss;
972 		s32.ru_idrss = s.ru_idrss;
973 		s32.ru_isrss = s.ru_isrss;
974 		s32.ru_minflt = s.ru_minflt;
975 		s32.ru_majflt = s.ru_majflt;
976 		s32.ru_nswap = s.ru_nswap;
977 		s32.ru_inblock = s.ru_inblock;
978 		s32.ru_oublock = s.ru_oublock;
979 		s32.ru_msgsnd = s.ru_msgsnd;
980 		s32.ru_msgrcv = s.ru_msgrcv;
981 		s32.ru_nsignals = s.ru_nsignals;
982 		s32.ru_nvcsw = s.ru_nvcsw;
983 		s32.ru_nivcsw = s.ru_nivcsw;
984 		error = copyout(&s32, uap->rusage, sizeof(s32));
985 	}
986 	return (error);
987 }
988 
989 int
990 linux_sched_rr_get_interval(struct thread *td,
991     struct linux_sched_rr_get_interval_args *uap)
992 {
993 	struct timespec ts;
994 	struct l_timespec ts32;
995 	int error;
996 
997 	error = kern_sched_rr_get_interval(td, uap->pid, &ts);
998 	if (error != 0)
999 		return (error);
1000 	ts32.tv_sec = ts.tv_sec;
1001 	ts32.tv_nsec = ts.tv_nsec;
1002 	return (copyout(&ts32, uap->interval, sizeof(ts32)));
1003 }
1004 
1005 int
1006 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
1007 {
1008 	struct mprotect_args bsd_args;
1009 
1010 	bsd_args.addr = uap->addr;
1011 	bsd_args.len = uap->len;
1012 	bsd_args.prot = uap->prot;
1013 	/* XXX PROT_READ implies PROT_EXEC; see linux_mmap_common(). */
1014 	if ((bsd_args.prot & PROT_READ) != 0)
1015 		bsd_args.prot |= PROT_EXEC;
1016 	return (mprotect(td, &bsd_args));
1017 }
1018