xref: /freebsd/sys/compat/linux/linux_misc.c (revision 2aebedc3ad9e722b272254e6dd3a12e399595e57)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: linux_misc.c,v 1.48 1998/12/19 02:55:33 julian Exp $
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysproto.h>
34 #include <sys/kernel.h>
35 #include <sys/mman.h>
36 #include <sys/proc.h>
37 #include <sys/fcntl.h>
38 #include <sys/imgact_aout.h>
39 #include <sys/mount.h>
40 #include <sys/namei.h>
41 #include <sys/resourcevar.h>
42 #include <sys/stat.h>
43 #include <sys/sysctl.h>
44 #ifdef COMPAT_LINUX_THREADS
45 #include <sys/unistd.h>
46 #endif /* COMPAT_LINUX_THREADS */
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_prot.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <i386/linux/linux_util.h>
64 
65 int
66 linux_alarm(struct proc *p, struct linux_alarm_args *args)
67 {
68     struct itimerval it, old_it;
69     struct timeval tv;
70     int s;
71 
72 #ifdef DEBUG
73     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
74 #endif
75     if (args->secs > 100000000)
76 	return EINVAL;
77     it.it_value.tv_sec = (long)args->secs;
78     it.it_value.tv_usec = 0;
79     it.it_interval.tv_sec = 0;
80     it.it_interval.tv_usec = 0;
81     s = splsoftclock();
82     old_it = p->p_realtimer;
83     getmicrouptime(&tv);
84     if (timevalisset(&old_it.it_value))
85 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
86     if (it.it_value.tv_sec != 0) {
87 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
88 	timevaladd(&it.it_value, &tv);
89     }
90     p->p_realtimer = it;
91     splx(s);
92     if (timevalcmp(&old_it.it_value, &tv, >)) {
93 	timevalsub(&old_it.it_value, &tv);
94 	if (old_it.it_value.tv_usec != 0)
95 	    old_it.it_value.tv_sec++;
96 	p->p_retval[0] = old_it.it_value.tv_sec;
97     }
98     return 0;
99 }
100 
101 int
102 linux_brk(struct proc *p, struct linux_brk_args *args)
103 {
104 #if 0
105     struct vmspace *vm = p->p_vmspace;
106     vm_offset_t new, old;
107     int error;
108 
109     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
110 	return EINVAL;
111     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
112 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
113 	return ENOMEM;
114 
115     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
116     new = round_page((vm_offset_t)args->dsend);
117     p->p_retval[0] = old;
118     if ((new-old) > 0) {
119 	if (swap_pager_full)
120 	    return ENOMEM;
121 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
122 			VM_PROT_ALL, VM_PROT_ALL, 0);
123 	if (error)
124 	    return error;
125 	vm->vm_dsize += btoc((new-old));
126 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
127     }
128     return 0;
129 #else
130     struct vmspace *vm = p->p_vmspace;
131     vm_offset_t new, old;
132     struct obreak_args /* {
133 	char * nsize;
134     } */ tmp;
135 
136 #ifdef DEBUG
137     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
138 #endif
139     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
140     new = (vm_offset_t)args->dsend;
141     tmp.nsize = (char *) new;
142     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
143 	p->p_retval[0] = (int)new;
144     else
145 	p->p_retval[0] = (int)old;
146 
147     return 0;
148 #endif
149 }
150 
151 int
152 linux_uselib(struct proc *p, struct linux_uselib_args *args)
153 {
154     struct nameidata ni;
155     struct vnode *vp;
156     struct exec *a_out;
157     struct vattr attr;
158     vm_offset_t vmaddr;
159     unsigned long file_offset;
160     vm_offset_t buffer;
161     unsigned long bss_size;
162     int error;
163     caddr_t sg;
164     int locked;
165 
166     sg = stackgap_init();
167     CHECKALTEXIST(p, &sg, args->library);
168 
169 #ifdef DEBUG
170     printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
171 #endif
172 
173     a_out = NULL;
174     locked = 0;
175     vp = NULL;
176 
177     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p);
178     if (error = namei(&ni))
179 	goto cleanup;
180 
181     vp = ni.ni_vp;
182     if (vp == NULL) {
183 	error = ENOEXEC;	/* ?? */
184 	goto cleanup;
185     }
186 
187     /*
188      * From here on down, we have a locked vnode that must be unlocked.
189      */
190     locked++;
191 
192     /*
193      * Writable?
194      */
195     if (vp->v_writecount) {
196 	error = ETXTBSY;
197 	goto cleanup;
198     }
199 
200     /*
201      * Executable?
202      */
203     if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p))
204 	goto cleanup;
205 
206     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
207 	((attr.va_mode & 0111) == 0) ||
208 	(attr.va_type != VREG)) {
209 	    error = ENOEXEC;
210 	    goto cleanup;
211     }
212 
213     /*
214      * Sensible size?
215      */
216     if (attr.va_size == 0) {
217 	error = ENOEXEC;
218 	goto cleanup;
219     }
220 
221     /*
222      * Can we access it?
223      */
224     if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p))
225 	goto cleanup;
226 
227     if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p))
228 	goto cleanup;
229 
230     /*
231      * Lock no longer needed
232      */
233     VOP_UNLOCK(vp, 0, p);
234     locked = 0;
235 
236     /*
237      * Pull in executable header into kernel_map
238      */
239     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
240 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
241     if (error)
242 	goto cleanup;
243 
244     /*
245      * Is it a Linux binary ?
246      */
247     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
248 	error = ENOEXEC;
249 	goto cleanup;
250     }
251 
252     /* While we are here, we should REALLY do some more checks */
253 
254     /*
255      * Set file/virtual offset based on a.out variant.
256      */
257     switch ((int)(a_out->a_magic & 0xffff)) {
258     case 0413:	/* ZMAGIC */
259 	file_offset = 1024;
260 	break;
261     case 0314:	/* QMAGIC */
262 	file_offset = 0;
263 	break;
264     default:
265 	error = ENOEXEC;
266 	goto cleanup;
267     }
268 
269     bss_size = round_page(a_out->a_bss);
270 
271     /*
272      * Check various fields in header for validity/bounds.
273      */
274     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
275 	error = ENOEXEC;
276 	goto cleanup;
277     }
278 
279     /* text + data can't exceed file size */
280     if (a_out->a_data + a_out->a_text > attr.va_size) {
281 	error = EFAULT;
282 	goto cleanup;
283     }
284 
285     /*
286      * text/data/bss must not exceed limits
287      * XXX: this is not complete. it should check current usage PLUS
288      * the resources needed by this library.
289      */
290     if (a_out->a_text > MAXTSIZ ||
291 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
292 	error = ENOMEM;
293 	goto cleanup;
294     }
295 
296     /*
297      * prevent more writers
298      */
299     vp->v_flag |= VTEXT;
300 
301     /*
302      * Check if file_offset page aligned,.
303      * Currently we cannot handle misalinged file offsets,
304      * and so we read in the entire image (what a waste).
305      */
306     if (file_offset & PAGE_MASK) {
307 #ifdef DEBUG
308 printf("uselib: Non page aligned binary %lu\n", file_offset);
309 #endif
310 	/*
311 	 * Map text+data read/write/execute
312 	 */
313 
314 	/* a_entry is the load address and is page aligned */
315 	vmaddr = trunc_page(a_out->a_entry);
316 
317 	/* get anon user mapping, read+write+execute */
318 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
319 		    	    a_out->a_text + a_out->a_data, FALSE,
320 			    VM_PROT_ALL, VM_PROT_ALL, 0);
321 	if (error)
322 	    goto cleanup;
323 
324 	/* map file into kernel_map */
325 	error = vm_mmap(kernel_map, &buffer,
326 			round_page(a_out->a_text + a_out->a_data + file_offset),
327 		   	VM_PROT_READ, VM_PROT_READ, 0,
328 			(caddr_t)vp, trunc_page(file_offset));
329 	if (error)
330 	    goto cleanup;
331 
332 	/* copy from kernel VM space to user space */
333 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
334 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
335 
336 	/* release temporary kernel space */
337 	vm_map_remove(kernel_map, buffer,
338 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
339 
340 	if (error)
341 	    goto cleanup;
342     }
343     else {
344 #ifdef DEBUG
345 printf("uselib: Page aligned binary %lu\n", file_offset);
346 #endif
347 	/*
348 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
349 	 * to skip the executable header
350 	 */
351 	vmaddr = trunc_page(a_out->a_entry);
352 
353 	/*
354 	 * Map it all into the process's space as a single copy-on-write
355 	 * "data" segment.
356 	 */
357 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
358 		   	a_out->a_text + a_out->a_data,
359 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
360 			(caddr_t)vp, file_offset);
361 	if (error)
362 	    goto cleanup;
363     }
364 #ifdef DEBUG
365 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
366 #endif
367     if (bss_size != 0) {
368         /*
369 	 * Calculate BSS start address
370 	 */
371 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
372 
373 	/*
374 	 * allocate some 'anon' space
375 	 */
376 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
377 			    bss_size, FALSE,
378 			    VM_PROT_ALL, VM_PROT_ALL, 0);
379 	if (error)
380 	    goto cleanup;
381     }
382 
383 cleanup:
384     /*
385      * Unlock vnode if needed
386      */
387     if (locked)
388 	VOP_UNLOCK(vp, 0, p);
389 
390     /*
391      * Release the kernel mapping.
392      */
393     if (a_out)
394 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
395 
396     return error;
397 }
398 
399 /* XXX move */
400 struct linux_select_argv {
401 	int nfds;
402 	fd_set *readfds;
403 	fd_set *writefds;
404 	fd_set *exceptfds;
405 	struct timeval *timeout;
406 };
407 
408 int
409 linux_select(struct proc *p, struct linux_select_args *args)
410 {
411     struct linux_select_argv linux_args;
412     struct linux_newselect_args newsel;
413     int error;
414 
415 #ifdef SELECT_DEBUG
416     printf("Linux-emul(%d): select(%x)\n",
417 	   p->p_pid, args->ptr);
418 #endif
419     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
420 			sizeof(linux_args))))
421 	return error;
422 
423     newsel.nfds = linux_args.nfds;
424     newsel.readfds = linux_args.readfds;
425     newsel.writefds = linux_args.writefds;
426     newsel.exceptfds = linux_args.exceptfds;
427     newsel.timeout = linux_args.timeout;
428 
429     return linux_newselect(p, &newsel);
430 }
431 
432 int
433 linux_newselect(struct proc *p, struct linux_newselect_args *args)
434 {
435     struct select_args bsa;
436     struct timeval tv0, tv1, utv, *tvp;
437     caddr_t sg;
438     int error;
439 
440 #ifdef DEBUG
441     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
442   	(long)p->p_pid, args->nfds, (void *)args->readfds,
443 	(void *)args->writefds, (void *)args->exceptfds,
444 	(void *)args->timeout);
445 #endif
446     error = 0;
447     bsa.nd = args->nfds;
448     bsa.in = args->readfds;
449     bsa.ou = args->writefds;
450     bsa.ex = args->exceptfds;
451     bsa.tv = args->timeout;
452 
453     /*
454      * Store current time for computation of the amount of
455      * time left.
456      */
457     if (args->timeout) {
458 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
459 	    goto select_out;
460 #ifdef DEBUG
461 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
462 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
463 #endif
464 	if (itimerfix(&utv)) {
465 	    /*
466 	     * The timeval was invalid.  Convert it to something
467 	     * valid that will act as it does under Linux.
468 	     */
469 	    sg = stackgap_init();
470 	    tvp = stackgap_alloc(&sg, sizeof(utv));
471 	    utv.tv_sec += utv.tv_usec / 1000000;
472 	    utv.tv_usec %= 1000000;
473 	    if (utv.tv_usec < 0) {
474 		utv.tv_sec -= 1;
475 		utv.tv_usec += 1000000;
476 	    }
477 	    if (utv.tv_sec < 0)
478 		timevalclear(&utv);
479 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
480 		goto select_out;
481 	    bsa.tv = tvp;
482 	}
483 	microtime(&tv0);
484     }
485 
486     error = select(p, &bsa);
487 #ifdef DEBUG
488     printf("Linux-emul(%d): real select returns %d\n",
489 	       p->p_pid, error);
490 #endif
491 
492     if (error) {
493 	/*
494 	 * See fs/select.c in the Linux kernel.  Without this,
495 	 * Maelstrom doesn't work.
496 	 */
497 	if (error == ERESTART)
498 	    error = EINTR;
499 	goto select_out;
500     }
501 
502     if (args->timeout) {
503 	if (p->p_retval[0]) {
504 	    /*
505 	     * Compute how much time was left of the timeout,
506 	     * by subtracting the current time and the time
507 	     * before we started the call, and subtracting
508 	     * that result from the user-supplied value.
509 	     */
510 	    microtime(&tv1);
511 	    timevalsub(&tv1, &tv0);
512 	    timevalsub(&utv, &tv1);
513 	    if (utv.tv_sec < 0)
514 		timevalclear(&utv);
515 	} else
516 	    timevalclear(&utv);
517 #ifdef DEBUG
518 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
519 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
520 #endif
521 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
522 	    goto select_out;
523     }
524 
525 select_out:
526 #ifdef DEBUG
527     printf("Linux-emul(%d): newselect_out -> %d\n",
528 	       p->p_pid, error);
529 #endif
530     return error;
531 }
532 
533 int
534 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
535 {
536     struct proc *curproc;
537 
538 #ifdef DEBUG
539     printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
540 #endif
541     if (args->pid != p->p_pid) {
542 	if (!(curproc = pfind(args->pid)))
543 	    return ESRCH;
544     }
545     else
546 	curproc = p;
547     p->p_retval[0] = curproc->p_pgid;
548     return 0;
549 }
550 
551 int
552 linux_fork(struct proc *p, struct linux_fork_args *args)
553 {
554     int error;
555 
556 #ifdef DEBUG
557     printf("Linux-emul(%d): fork()\n", p->p_pid);
558 #endif
559     if (error = fork(p, (struct fork_args *)args))
560 	return error;
561     if (p->p_retval[1] == 1)
562 	p->p_retval[0] = 0;
563     return 0;
564 }
565 
566 #ifndef COMPAT_LINUX_THREADS
567 int
568 linux_clone(struct proc *p, struct linux_clone_args *args)
569 {
570     printf("linux_clone(%d): Not enabled\n", p->p_pid);
571     return (EOPNOTSUPP);
572 }
573 
574 #else
575 #define CLONE_VM	0x100
576 #define CLONE_FS	0x200
577 #define CLONE_FILES	0x400
578 #define CLONE_SIGHAND	0x800
579 #define CLONE_PID	0x1000
580 
581 int
582 linux_clone(struct proc *p, struct linux_clone_args *args)
583 {
584     int error, ff = RFPROC;
585     struct proc *p2;
586     int            exit_signal;
587     vm_offset_t    start;
588     struct rfork_args rf_args;
589 
590 #ifdef SMP
591     printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid);
592     return (EOPNOTSUPP);
593 #endif
594 #ifdef DEBUG
595     if (args->flags & CLONE_PID)
596 	printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid);
597     printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid,
598 	     (unsigned int)args->flags, (unsigned int)args->stack);
599 #endif
600 
601     if (!args->stack)
602         return (EINVAL);
603     exit_signal = args->flags & 0x000000ff;
604     if (exit_signal >= LINUX_NSIG)
605 	return EINVAL;
606     exit_signal = linux_to_bsd_signal[exit_signal];
607 
608     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
609     ff |= RFTHREAD;
610 
611     if (args->flags & CLONE_VM)
612 	ff |= RFMEM;
613     if (args->flags & CLONE_SIGHAND)
614 	ff |= RFSIGSHARE;
615     if (!(args->flags & CLONE_FILES))
616 	ff |= RFFDG;
617 
618     error = 0;
619     start = 0;
620 
621     rf_args.flags = ff;
622     if (error = rfork(p, &rf_args))
623 	return error;
624 
625     p2 = pfind(p->p_retval[0]);
626     if (p2 == 0)
627  	return ESRCH;
628 
629     p2->p_sigparent = exit_signal;
630     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
631 
632 #ifdef DEBUG
633     printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid);
634 #endif
635     return 0;
636 }
637 
638 #endif /* COMPAT_LINUX_THREADS */
639 /* XXX move */
640 struct linux_mmap_argv {
641 	linux_caddr_t addr;
642 	int len;
643 	int prot;
644 	int flags;
645 	int fd;
646 	int pos;
647 };
648 
649 #ifdef COMPAT_LINUX_THREADS
650 #define STACK_SIZE  (2 * 1024 * 1024)
651 #define GUARD_SIZE  (4 * PAGE_SIZE)
652 
653 #endif /* COMPAT_LINUX_THREADS */
654 int
655 linux_mmap(struct proc *p, struct linux_mmap_args *args)
656 {
657     struct mmap_args /* {
658 	caddr_t addr;
659 	size_t len;
660 	int prot;
661 	int flags;
662 	int fd;
663 	long pad;
664 	off_t pos;
665     } */ bsd_args;
666     int error;
667     struct linux_mmap_argv linux_args;
668 
669     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
670 			sizeof(linux_args))))
671 	return error;
672 #ifdef DEBUG
673     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
674 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
675 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
676 #endif
677     bsd_args.flags = 0;
678     if (linux_args.flags & LINUX_MAP_SHARED)
679 	bsd_args.flags |= MAP_SHARED;
680     if (linux_args.flags & LINUX_MAP_PRIVATE)
681 	bsd_args.flags |= MAP_PRIVATE;
682     if (linux_args.flags & LINUX_MAP_FIXED)
683 	bsd_args.flags |= MAP_FIXED;
684     if (linux_args.flags & LINUX_MAP_ANON)
685 	bsd_args.flags |= MAP_ANON;
686 #ifndef COMPAT_LINUX_THREADS
687     bsd_args.addr = linux_args.addr;
688     bsd_args.len = linux_args.len;
689 #else
690 
691     /*#if !defined(USE_VM_STACK) && !defined(USE_VM_STACK_FOR_EXEC)*/
692     /* Linux Threads will map into the proc stack space, unless
693        we prevent it.  This causes problems if we're not using
694        our VM_STACK options.
695     */
696     if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ))
697         return (EINVAL);
698     /*#endif*/
699 
700     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
701 
702 #ifdef USE_VM_STACK
703         /* USE_VM_STACK is defined (or not) in vm/vm_map.h */
704         bsd_args.flags |= MAP_STACK;
705 #endif
706 
707 	/* The linux MAP_GROWSDOWN option does not limit auto
708 	   growth of the region.  Linux mmap with this option
709 	   takes as addr the inital BOS, and as len, the initial
710 	   region size.  It can then grow down from addr without
711 	   limit.  However, linux threads has an implicit internal
712 	   limit to stack size of STACK_SIZE.  Its just not
713 	   enforced explicitly in linux.  But, here we impose
714 	   a limit of (STACK_SIZE - GUARD_SIZE) on the stack
715 	   region, since we can do this with our mmap.
716 
717 	   Our mmap with MAP_STACK takes addr as the maximum
718 	   downsize limit on BOS, and as len the max size of
719 	   the region.  It them maps the top SGROWSIZ bytes,
720 	   and autgrows the region down, up to the limit
721 	   in addr.
722 
723 	   If we don't use the MAP_STACK option, the effect
724 	   of this code is to allocate a stack region of a
725 	   fixed size of (STACK_SIZE - GUARD_SIZE).
726 	*/
727 
728 	/* This gives us TOS */
729         bsd_args.addr = linux_args.addr + linux_args.len;
730 
731 	/* This gives us our maximum stack size */
732 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
733 	    bsd_args.len = linux_args.len;
734 	else
735 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
736 
737 	/* This gives us a new BOS.  If we're using VM_STACK, then
738 	   mmap will just map the top SGROWSIZ bytes, and let
739 	   the stack grow down to the limit at BOS.  If we're
740 	   not using VM_STACK we map the full stack, since we
741 	   don't have a way to autogrow it.
742 	*/
743 	bsd_args.addr -= bsd_args.len;
744 
745     } else {
746         bsd_args.addr = linux_args.addr;
747 	bsd_args.len  = linux_args.len;
748     }
749 #endif /* COMPAT_LINUX_THREADS */
750     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
751     bsd_args.fd = linux_args.fd;
752     bsd_args.pos = linux_args.pos;
753     bsd_args.pad = 0;
754     return mmap(p, &bsd_args);
755 }
756 
757 int
758 linux_mremap(struct proc *p, struct linux_mremap_args *args)
759 {
760 	struct munmap_args /* {
761 		void *addr;
762 		size_t len;
763 	} */ bsd_args;
764 	int error = 0;
765 
766 #ifdef DEBUG
767 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
768 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
769 	    args->flags);
770 #endif
771 	args->new_len = round_page(args->new_len);
772 	args->old_len = round_page(args->old_len);
773 
774 	if (args->new_len > args->old_len) {
775 		p->p_retval[0] = 0;
776 		return ENOMEM;
777 	}
778 
779 	if (args->new_len < args->old_len) {
780 		bsd_args.addr = args->addr + args->new_len;
781 		bsd_args.len = args->old_len - args->new_len;
782 		error = munmap(p, &bsd_args);
783 	}
784 
785 	p->p_retval[0] = error ? 0 : (int)args->addr;
786 	return error;
787 }
788 
789 int
790 linux_msync(struct proc *p, struct linux_msync_args *args)
791 {
792 	struct msync_args bsd_args;
793 
794 	bsd_args.addr = args->addr;
795 	bsd_args.len = args->len;
796 	bsd_args.flags = 0;	/* XXX ignore */
797 
798 	return msync(p, &bsd_args);
799 }
800 
801 int
802 linux_pipe(struct proc *p, struct linux_pipe_args *args)
803 {
804     int error;
805     int reg_edx;
806 
807 #ifdef DEBUG
808     printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
809 #endif
810     reg_edx = p->p_retval[1];
811     if (error = pipe(p, 0)) {
812 	p->p_retval[1] = reg_edx;
813 	return error;
814     }
815 
816     if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) {
817 	p->p_retval[1] = reg_edx;
818 	return error;
819     }
820 
821     p->p_retval[1] = reg_edx;
822     p->p_retval[0] = 0;
823     return 0;
824 }
825 
826 int
827 linux_time(struct proc *p, struct linux_time_args *args)
828 {
829     struct timeval tv;
830     linux_time_t tm;
831     int error;
832 
833 #ifdef DEBUG
834     printf("Linux-emul(%d): time(*)\n", p->p_pid);
835 #endif
836     microtime(&tv);
837     tm = tv.tv_sec;
838     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
839 	return error;
840     p->p_retval[0] = tm;
841     return 0;
842 }
843 
844 struct linux_times_argv {
845     long    tms_utime;
846     long    tms_stime;
847     long    tms_cutime;
848     long    tms_cstime;
849 };
850 
851 #define CLK_TCK 100	/* Linux uses 100 */
852 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
853 
854 int
855 linux_times(struct proc *p, struct linux_times_args *args)
856 {
857     struct timeval tv;
858     struct linux_times_argv tms;
859     struct rusage ru;
860     int error;
861 
862 #ifdef DEBUG
863     printf("Linux-emul(%d): times(*)\n", p->p_pid);
864 #endif
865     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
866 
867     tms.tms_utime = CONVTCK(ru.ru_utime);
868     tms.tms_stime = CONVTCK(ru.ru_stime);
869 
870     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
871     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
872 
873     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
874 	    	    sizeof(struct linux_times_argv))))
875 	return error;
876 
877     microuptime(&tv);
878     p->p_retval[0] = (int)CONVTCK(tv);
879     return 0;
880 }
881 
882 /* XXX move */
883 struct linux_newuname_t {
884     char sysname[65];
885     char nodename[65];
886     char release[65];
887     char version[65];
888     char machine[65];
889     char domainname[65];
890 };
891 
892 int
893 linux_newuname(struct proc *p, struct linux_newuname_args *args)
894 {
895     struct linux_newuname_t linux_newuname;
896 
897 #ifdef DEBUG
898     printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
899 #endif
900     bzero(&linux_newuname, sizeof(struct linux_newuname_t));
901     strncpy(linux_newuname.sysname, ostype,
902 	sizeof(linux_newuname.sysname) - 1);
903     strncpy(linux_newuname.nodename, hostname,
904 	sizeof(linux_newuname.nodename) - 1);
905     strncpy(linux_newuname.release, osrelease,
906 	sizeof(linux_newuname.release) - 1);
907     strncpy(linux_newuname.version, version,
908 	sizeof(linux_newuname.version) - 1);
909     strncpy(linux_newuname.machine, machine,
910 	sizeof(linux_newuname.machine) - 1);
911     strncpy(linux_newuname.domainname, domainname,
912 	sizeof(linux_newuname.domainname) - 1);
913     return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
914 	    	    sizeof(struct linux_newuname_t)));
915 }
916 
917 struct linux_utimbuf {
918 	linux_time_t l_actime;
919 	linux_time_t l_modtime;
920 };
921 
922 int
923 linux_utime(struct proc *p, struct linux_utime_args *args)
924 {
925     struct utimes_args /* {
926 	char	*path;
927 	struct	timeval *tptr;
928     } */ bsdutimes;
929     struct timeval tv[2], *tvp;
930     struct linux_utimbuf lut;
931     int error;
932     caddr_t sg;
933 
934     sg = stackgap_init();
935     CHECKALTEXIST(p, &sg, args->fname);
936 
937 #ifdef DEBUG
938     printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
939 #endif
940     if (args->times) {
941 	if ((error = copyin(args->times, &lut, sizeof lut)))
942 	    return error;
943 	tv[0].tv_sec = lut.l_actime;
944 	tv[0].tv_usec = 0;
945 	tv[1].tv_sec = lut.l_modtime;
946 	tv[1].tv_usec = 0;
947 	/* so that utimes can copyin */
948 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
949 	if ((error = copyout(tv, tvp, sizeof(tv))))
950 	    return error;
951 	bsdutimes.tptr = tvp;
952     } else
953 	bsdutimes.tptr = NULL;
954 
955     bsdutimes.path = args->fname;
956     return utimes(p, &bsdutimes);
957 }
958 
959 int
960 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
961 {
962     struct wait_args /* {
963 	int pid;
964 	int *status;
965 	int options;
966 	struct	rusage *rusage;
967     } */ tmp;
968     int error, tmpstat;
969 
970 #ifdef DEBUG
971     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
972 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
973 #endif
974     tmp.pid = args->pid;
975     tmp.status = args->status;
976 #ifndef COMPAT_LINUX_THREADS
977     tmp.options = args->options;
978 #else
979     /* This filters out the linux option _WCLONE.  I don't
980        think we need it, but I could be wrong.  If we need
981        it, we need to fix wait4, since it will give us an
982        error return of EINVAL if we pass in _WCLONE, and
983        of course, it won't do anything with it.
984     */
985     tmp.options = (args->options & (WNOHANG | WUNTRACED));
986 #endif /* COMPAT_LINUX_THREADS */
987     tmp.rusage = NULL;
988 
989     if (error = wait4(p, &tmp))
990 #ifndef COMPAT_LINUX_THREADS
991 	return error;
992 #else
993   	return error;
994 #endif /* COMPAT_LINUX_THREADS */
995     if (args->status) {
996 	if (error = copyin(args->status, &tmpstat, sizeof(int)))
997 	    return error;
998 	if (WIFSIGNALED(tmpstat))
999 	    tmpstat = (tmpstat & 0xffffff80) |
1000 		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
1001 	else if (WIFSTOPPED(tmpstat))
1002 	    tmpstat = (tmpstat & 0xffff00ff) |
1003 		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1004 	return copyout(&tmpstat, args->status, sizeof(int));
1005     } else
1006 	return 0;
1007 }
1008 
1009 int
1010 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1011 {
1012     struct wait_args /* {
1013 	int pid;
1014 	int *status;
1015 	int options;
1016 	struct	rusage *rusage;
1017     } */ tmp;
1018     int error, tmpstat;
1019 
1020 #ifdef DEBUG
1021     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1022 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1023 	(void *)args->rusage);
1024 #endif
1025     tmp.pid = args->pid;
1026     tmp.status = args->status;
1027 #ifndef COMPAT_LINUX_THREADS
1028     tmp.options = args->options;
1029 #else
1030     /* This filters out the linux option _WCLONE.  I don't
1031        think we need it, but I could be wrong.  If we need
1032        it, we need to fix wait4, since it will give us an
1033        error return of EINVAL if we pass in _WCLONE, and
1034        of course, it won't do anything with it.
1035     */
1036     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1037 #endif /* COMPAT_LINUX_THREADS */
1038     tmp.rusage = args->rusage;
1039 
1040     if (error = wait4(p, &tmp))
1041 	return error;
1042 
1043     p->p_siglist &= ~sigmask(SIGCHLD);
1044 
1045     if (args->status) {
1046 	if (error = copyin(args->status, &tmpstat, sizeof(int)))
1047 	    return error;
1048 	if (WIFSIGNALED(tmpstat))
1049 	    tmpstat = (tmpstat & 0xffffff80) |
1050 		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1051 	else if (WIFSTOPPED(tmpstat))
1052 	    tmpstat = (tmpstat & 0xffff00ff) |
1053 		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1054 	return copyout(&tmpstat, args->status, sizeof(int));
1055     } else
1056 	return 0;
1057 }
1058 
1059 int
1060 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1061 {
1062 	caddr_t sg;
1063 	struct mknod_args bsd_mknod;
1064 	struct mkfifo_args bsd_mkfifo;
1065 
1066 	sg = stackgap_init();
1067 
1068 	CHECKALTCREAT(p, &sg, args->path);
1069 
1070 #ifdef DEBUG
1071 	printf("Linux-emul(%d): mknod(%s, %d, %d)\n",
1072 	   p->p_pid, args->path, args->mode, args->dev);
1073 #endif
1074 
1075 	if (args->mode & S_IFIFO) {
1076 		bsd_mkfifo.path = args->path;
1077 		bsd_mkfifo.mode = args->mode;
1078 		return mkfifo(p, &bsd_mkfifo);
1079 	} else {
1080 		bsd_mknod.path = args->path;
1081 		bsd_mknod.mode = args->mode;
1082 		bsd_mknod.dev = args->dev;
1083 		return mknod(p, &bsd_mknod);
1084 	}
1085 }
1086 
1087 /*
1088  * UGH! This is just about the dumbest idea I've ever heard!!
1089  */
1090 int
1091 linux_personality(struct proc *p, struct linux_personality_args *args)
1092 {
1093 #ifdef DEBUG
1094 	printf("Linux-emul(%d): personality(%d)\n",
1095 	   p->p_pid, args->per);
1096 #endif
1097 	if (args->per != 0)
1098 		return EINVAL;
1099 
1100 	/* Yes Jim, it's still a Linux... */
1101 	p->p_retval[0] = 0;
1102 	return 0;
1103 }
1104 
1105 /*
1106  * Wrappers for get/setitimer for debugging..
1107  */
1108 int
1109 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1110 {
1111 	struct setitimer_args bsa;
1112 	struct itimerval foo;
1113 	int error;
1114 
1115 #ifdef DEBUG
1116 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1117 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1118 #endif
1119 	bsa.which = args->which;
1120 	bsa.itv = args->itv;
1121 	bsa.oitv = args->oitv;
1122 	if (args->itv) {
1123 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1124 			sizeof(foo))))
1125 		return error;
1126 #ifdef DEBUG
1127 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1128 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1129 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1130 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1131 #endif
1132 	}
1133 	return setitimer(p, &bsa);
1134 }
1135 
1136 int
1137 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1138 {
1139 	struct getitimer_args bsa;
1140 #ifdef DEBUG
1141 	printf("Linux-emul(%ld): getitimer(%p)\n",
1142 	    (long)p->p_pid, (void *)args->itv);
1143 #endif
1144 	bsa.which = args->which;
1145 	bsa.itv = args->itv;
1146 	return getitimer(p, &bsa);
1147 }
1148 
1149 int
1150 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1151 {
1152 	int error;
1153 
1154 	error = suser(p->p_ucred, &p->p_acflag);
1155 	if (error != 0)
1156 		return error;
1157 	if (securelevel > 0)
1158 		return EPERM;
1159 	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1160 	return 0;
1161 }
1162 
1163 int
1164 linux_nice(struct proc *p, struct linux_nice_args *args)
1165 {
1166 	struct setpriority_args	bsd_args;
1167 
1168 	bsd_args.which = PRIO_PROCESS;
1169 	bsd_args.who = 0;	/* current process */
1170 	bsd_args.prio = args->inc;
1171 	return setpriority(p, &bsd_args);
1172 }
1173 
1174