xref: /freebsd/sys/compat/linux/linux_misc.c (revision cc611a43c20093aec01b4bf8b8755c46af148735)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: linux_misc.c,v 1.56 1999/04/27 12:21:04 phk Exp $
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysproto.h>
34 #include <sys/kernel.h>
35 #include <sys/mman.h>
36 #include <sys/proc.h>
37 #include <sys/fcntl.h>
38 #include <sys/imgact_aout.h>
39 #include <sys/mount.h>
40 #include <sys/namei.h>
41 #include <sys/resourcevar.h>
42 #include <sys/stat.h>
43 #include <sys/sysctl.h>
44 #include <sys/unistd.h>
45 #include <sys/vnode.h>
46 #include <sys/wait.h>
47 #include <sys/time.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_prot.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_extern.h>
55 
56 #include <machine/frame.h>
57 #include <machine/psl.h>
58 
59 #include <i386/linux/linux.h>
60 #include <i386/linux/linux_proto.h>
61 #include <i386/linux/linux_util.h>
62 
63 int
64 linux_alarm(struct proc *p, struct linux_alarm_args *args)
65 {
66     struct itimerval it, old_it;
67     struct timeval tv;
68     int s;
69 
70 #ifdef DEBUG
71     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
72 #endif
73     if (args->secs > 100000000)
74 	return EINVAL;
75     it.it_value.tv_sec = (long)args->secs;
76     it.it_value.tv_usec = 0;
77     it.it_interval.tv_sec = 0;
78     it.it_interval.tv_usec = 0;
79     s = splsoftclock();
80     old_it = p->p_realtimer;
81     getmicrouptime(&tv);
82     if (timevalisset(&old_it.it_value))
83 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
84     if (it.it_value.tv_sec != 0) {
85 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
86 	timevaladd(&it.it_value, &tv);
87     }
88     p->p_realtimer = it;
89     splx(s);
90     if (timevalcmp(&old_it.it_value, &tv, >)) {
91 	timevalsub(&old_it.it_value, &tv);
92 	if (old_it.it_value.tv_usec != 0)
93 	    old_it.it_value.tv_sec++;
94 	p->p_retval[0] = old_it.it_value.tv_sec;
95     }
96     return 0;
97 }
98 
99 int
100 linux_brk(struct proc *p, struct linux_brk_args *args)
101 {
102 #if 0
103     struct vmspace *vm = p->p_vmspace;
104     vm_offset_t new, old;
105     int error;
106 
107     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
108 	return EINVAL;
109     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
110 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
111 	return ENOMEM;
112 
113     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
114     new = round_page((vm_offset_t)args->dsend);
115     p->p_retval[0] = old;
116     if ((new-old) > 0) {
117 	if (swap_pager_full)
118 	    return ENOMEM;
119 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
120 			VM_PROT_ALL, VM_PROT_ALL, 0);
121 	if (error)
122 	    return error;
123 	vm->vm_dsize += btoc((new-old));
124 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
125     }
126     return 0;
127 #else
128     struct vmspace *vm = p->p_vmspace;
129     vm_offset_t new, old;
130     struct obreak_args /* {
131 	char * nsize;
132     } */ tmp;
133 
134 #ifdef DEBUG
135     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
136 #endif
137     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
138     new = (vm_offset_t)args->dsend;
139     tmp.nsize = (char *) new;
140     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
141 	p->p_retval[0] = (int)new;
142     else
143 	p->p_retval[0] = (int)old;
144 
145     return 0;
146 #endif
147 }
148 
149 int
150 linux_uselib(struct proc *p, struct linux_uselib_args *args)
151 {
152     struct nameidata ni;
153     struct vnode *vp;
154     struct exec *a_out;
155     struct vattr attr;
156     vm_offset_t vmaddr;
157     unsigned long file_offset;
158     vm_offset_t buffer;
159     unsigned long bss_size;
160     int error;
161     caddr_t sg;
162     int locked;
163 
164     sg = stackgap_init();
165     CHECKALTEXIST(p, &sg, args->library);
166 
167 #ifdef DEBUG
168     printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
169 #endif
170 
171     a_out = NULL;
172     locked = 0;
173     vp = NULL;
174 
175     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p);
176     if (error = namei(&ni))
177 	goto cleanup;
178 
179     vp = ni.ni_vp;
180     if (vp == NULL) {
181 	error = ENOEXEC;	/* ?? */
182 	goto cleanup;
183     }
184 
185     /*
186      * From here on down, we have a locked vnode that must be unlocked.
187      */
188     locked++;
189 
190     /*
191      * Writable?
192      */
193     if (vp->v_writecount) {
194 	error = ETXTBSY;
195 	goto cleanup;
196     }
197 
198     /*
199      * Executable?
200      */
201     if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p))
202 	goto cleanup;
203 
204     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
205 	((attr.va_mode & 0111) == 0) ||
206 	(attr.va_type != VREG)) {
207 	    error = ENOEXEC;
208 	    goto cleanup;
209     }
210 
211     /*
212      * Sensible size?
213      */
214     if (attr.va_size == 0) {
215 	error = ENOEXEC;
216 	goto cleanup;
217     }
218 
219     /*
220      * Can we access it?
221      */
222     if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p))
223 	goto cleanup;
224 
225     if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p))
226 	goto cleanup;
227 
228     /*
229      * Lock no longer needed
230      */
231     VOP_UNLOCK(vp, 0, p);
232     locked = 0;
233 
234     /*
235      * Pull in executable header into kernel_map
236      */
237     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
238 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
239     if (error)
240 	goto cleanup;
241 
242     /*
243      * Is it a Linux binary ?
244      */
245     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
246 	error = ENOEXEC;
247 	goto cleanup;
248     }
249 
250     /* While we are here, we should REALLY do some more checks */
251 
252     /*
253      * Set file/virtual offset based on a.out variant.
254      */
255     switch ((int)(a_out->a_magic & 0xffff)) {
256     case 0413:	/* ZMAGIC */
257 	file_offset = 1024;
258 	break;
259     case 0314:	/* QMAGIC */
260 	file_offset = 0;
261 	break;
262     default:
263 	error = ENOEXEC;
264 	goto cleanup;
265     }
266 
267     bss_size = round_page(a_out->a_bss);
268 
269     /*
270      * Check various fields in header for validity/bounds.
271      */
272     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
273 	error = ENOEXEC;
274 	goto cleanup;
275     }
276 
277     /* text + data can't exceed file size */
278     if (a_out->a_data + a_out->a_text > attr.va_size) {
279 	error = EFAULT;
280 	goto cleanup;
281     }
282 
283     /*
284      * text/data/bss must not exceed limits
285      * XXX: this is not complete. it should check current usage PLUS
286      * the resources needed by this library.
287      */
288     if (a_out->a_text > MAXTSIZ ||
289 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
290 	error = ENOMEM;
291 	goto cleanup;
292     }
293 
294     /*
295      * prevent more writers
296      */
297     vp->v_flag |= VTEXT;
298 
299     /*
300      * Check if file_offset page aligned,.
301      * Currently we cannot handle misalinged file offsets,
302      * and so we read in the entire image (what a waste).
303      */
304     if (file_offset & PAGE_MASK) {
305 #ifdef DEBUG
306 printf("uselib: Non page aligned binary %lu\n", file_offset);
307 #endif
308 	/*
309 	 * Map text+data read/write/execute
310 	 */
311 
312 	/* a_entry is the load address and is page aligned */
313 	vmaddr = trunc_page(a_out->a_entry);
314 
315 	/* get anon user mapping, read+write+execute */
316 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
317 		    	    a_out->a_text + a_out->a_data, FALSE,
318 			    VM_PROT_ALL, VM_PROT_ALL, 0);
319 	if (error)
320 	    goto cleanup;
321 
322 	/* map file into kernel_map */
323 	error = vm_mmap(kernel_map, &buffer,
324 			round_page(a_out->a_text + a_out->a_data + file_offset),
325 		   	VM_PROT_READ, VM_PROT_READ, 0,
326 			(caddr_t)vp, trunc_page(file_offset));
327 	if (error)
328 	    goto cleanup;
329 
330 	/* copy from kernel VM space to user space */
331 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
332 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
333 
334 	/* release temporary kernel space */
335 	vm_map_remove(kernel_map, buffer,
336 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
337 
338 	if (error)
339 	    goto cleanup;
340     }
341     else {
342 #ifdef DEBUG
343 printf("uselib: Page aligned binary %lu\n", file_offset);
344 #endif
345 	/*
346 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
347 	 * to skip the executable header
348 	 */
349 	vmaddr = trunc_page(a_out->a_entry);
350 
351 	/*
352 	 * Map it all into the process's space as a single copy-on-write
353 	 * "data" segment.
354 	 */
355 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
356 		   	a_out->a_text + a_out->a_data,
357 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
358 			(caddr_t)vp, file_offset);
359 	if (error)
360 	    goto cleanup;
361     }
362 #ifdef DEBUG
363 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
364 #endif
365     if (bss_size != 0) {
366         /*
367 	 * Calculate BSS start address
368 	 */
369 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
370 
371 	/*
372 	 * allocate some 'anon' space
373 	 */
374 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
375 			    bss_size, FALSE,
376 			    VM_PROT_ALL, VM_PROT_ALL, 0);
377 	if (error)
378 	    goto cleanup;
379     }
380 
381 cleanup:
382     /*
383      * Unlock vnode if needed
384      */
385     if (locked)
386 	VOP_UNLOCK(vp, 0, p);
387 
388     /*
389      * Release the kernel mapping.
390      */
391     if (a_out)
392 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
393 
394     return error;
395 }
396 
397 /* XXX move */
398 struct linux_select_argv {
399 	int nfds;
400 	fd_set *readfds;
401 	fd_set *writefds;
402 	fd_set *exceptfds;
403 	struct timeval *timeout;
404 };
405 
406 int
407 linux_select(struct proc *p, struct linux_select_args *args)
408 {
409     struct linux_select_argv linux_args;
410     struct linux_newselect_args newsel;
411     int error;
412 
413 #ifdef SELECT_DEBUG
414     printf("Linux-emul(%d): select(%x)\n",
415 	   p->p_pid, args->ptr);
416 #endif
417     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
418 			sizeof(linux_args))))
419 	return error;
420 
421     newsel.nfds = linux_args.nfds;
422     newsel.readfds = linux_args.readfds;
423     newsel.writefds = linux_args.writefds;
424     newsel.exceptfds = linux_args.exceptfds;
425     newsel.timeout = linux_args.timeout;
426 
427     return linux_newselect(p, &newsel);
428 }
429 
430 int
431 linux_newselect(struct proc *p, struct linux_newselect_args *args)
432 {
433     struct select_args bsa;
434     struct timeval tv0, tv1, utv, *tvp;
435     caddr_t sg;
436     int error;
437 
438 #ifdef DEBUG
439     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
440   	(long)p->p_pid, args->nfds, (void *)args->readfds,
441 	(void *)args->writefds, (void *)args->exceptfds,
442 	(void *)args->timeout);
443 #endif
444     error = 0;
445     bsa.nd = args->nfds;
446     bsa.in = args->readfds;
447     bsa.ou = args->writefds;
448     bsa.ex = args->exceptfds;
449     bsa.tv = args->timeout;
450 
451     /*
452      * Store current time for computation of the amount of
453      * time left.
454      */
455     if (args->timeout) {
456 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
457 	    goto select_out;
458 #ifdef DEBUG
459 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
460 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
461 #endif
462 	if (itimerfix(&utv)) {
463 	    /*
464 	     * The timeval was invalid.  Convert it to something
465 	     * valid that will act as it does under Linux.
466 	     */
467 	    sg = stackgap_init();
468 	    tvp = stackgap_alloc(&sg, sizeof(utv));
469 	    utv.tv_sec += utv.tv_usec / 1000000;
470 	    utv.tv_usec %= 1000000;
471 	    if (utv.tv_usec < 0) {
472 		utv.tv_sec -= 1;
473 		utv.tv_usec += 1000000;
474 	    }
475 	    if (utv.tv_sec < 0)
476 		timevalclear(&utv);
477 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
478 		goto select_out;
479 	    bsa.tv = tvp;
480 	}
481 	microtime(&tv0);
482     }
483 
484     error = select(p, &bsa);
485 #ifdef DEBUG
486     printf("Linux-emul(%d): real select returns %d\n",
487 	       p->p_pid, error);
488 #endif
489 
490     if (error) {
491 	/*
492 	 * See fs/select.c in the Linux kernel.  Without this,
493 	 * Maelstrom doesn't work.
494 	 */
495 	if (error == ERESTART)
496 	    error = EINTR;
497 	goto select_out;
498     }
499 
500     if (args->timeout) {
501 	if (p->p_retval[0]) {
502 	    /*
503 	     * Compute how much time was left of the timeout,
504 	     * by subtracting the current time and the time
505 	     * before we started the call, and subtracting
506 	     * that result from the user-supplied value.
507 	     */
508 	    microtime(&tv1);
509 	    timevalsub(&tv1, &tv0);
510 	    timevalsub(&utv, &tv1);
511 	    if (utv.tv_sec < 0)
512 		timevalclear(&utv);
513 	} else
514 	    timevalclear(&utv);
515 #ifdef DEBUG
516 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
517 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
518 #endif
519 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
520 	    goto select_out;
521     }
522 
523 select_out:
524 #ifdef DEBUG
525     printf("Linux-emul(%d): newselect_out -> %d\n",
526 	       p->p_pid, error);
527 #endif
528     return error;
529 }
530 
531 int
532 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
533 {
534     struct proc *curp;
535 
536 #ifdef DEBUG
537     printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
538 #endif
539     if (args->pid != p->p_pid) {
540 	if (!(curp = pfind(args->pid)))
541 	    return ESRCH;
542     }
543     else
544 	curp = p;
545     p->p_retval[0] = curp->p_pgid;
546     return 0;
547 }
548 
549 int
550 linux_fork(struct proc *p, struct linux_fork_args *args)
551 {
552     int error;
553 
554 #ifdef DEBUG
555     printf("Linux-emul(%d): fork()\n", p->p_pid);
556 #endif
557     if ((error = fork(p, (struct fork_args *)args)) != 0)
558 	return error;
559     if (p->p_retval[1] == 1)
560 	p->p_retval[0] = 0;
561     return 0;
562 }
563 
564 #define CLONE_VM	0x100
565 #define CLONE_FS	0x200
566 #define CLONE_FILES	0x400
567 #define CLONE_SIGHAND	0x800
568 #define CLONE_PID	0x1000
569 
570 int
571 linux_clone(struct proc *p, struct linux_clone_args *args)
572 {
573     int error, ff = RFPROC;
574     struct proc *p2;
575     int            exit_signal;
576     vm_offset_t    start;
577     struct rfork_args rf_args;
578 
579 #ifdef DEBUG
580     if (args->flags & CLONE_PID)
581 	printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid);
582     printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid,
583 	     (unsigned int)args->flags, (unsigned int)args->stack);
584 #endif
585 
586     if (!args->stack)
587         return (EINVAL);
588 
589     exit_signal = args->flags & 0x000000ff;
590     if (exit_signal >= LINUX_NSIG)
591 	return EINVAL;
592     exit_signal = linux_to_bsd_signal[exit_signal];
593 
594     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
595     ff |= RFTHREAD;
596 
597     if (args->flags & CLONE_VM)
598 	ff |= RFMEM;
599     if (args->flags & CLONE_SIGHAND)
600 	ff |= RFSIGSHARE;
601     if (!(args->flags & CLONE_FILES))
602 	ff |= RFFDG;
603 
604     error = 0;
605     start = 0;
606 
607     rf_args.flags = ff;
608     if ((error = rfork(p, &rf_args)) != 0)
609 	return error;
610 
611     p2 = pfind(p->p_retval[0]);
612     if (p2 == 0)
613  	return ESRCH;
614 
615     p2->p_sigparent = exit_signal;
616     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
617 
618 #ifdef DEBUG
619     printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid);
620 #endif
621     return 0;
622 }
623 
624 /* XXX move */
625 struct linux_mmap_argv {
626 	linux_caddr_t addr;
627 	int len;
628 	int prot;
629 	int flags;
630 	int fd;
631 	int pos;
632 };
633 
634 #define STACK_SIZE  (2 * 1024 * 1024)
635 #define GUARD_SIZE  (4 * PAGE_SIZE)
636 int
637 linux_mmap(struct proc *p, struct linux_mmap_args *args)
638 {
639     struct mmap_args /* {
640 	caddr_t addr;
641 	size_t len;
642 	int prot;
643 	int flags;
644 	int fd;
645 	long pad;
646 	off_t pos;
647     } */ bsd_args;
648     int error;
649     struct linux_mmap_argv linux_args;
650 
651     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
652 			sizeof(linux_args))))
653 	return error;
654 #ifdef DEBUG
655     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
656 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
657 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
658 #endif
659     bsd_args.flags = 0;
660     if (linux_args.flags & LINUX_MAP_SHARED)
661 	bsd_args.flags |= MAP_SHARED;
662     if (linux_args.flags & LINUX_MAP_PRIVATE)
663 	bsd_args.flags |= MAP_PRIVATE;
664     if (linux_args.flags & LINUX_MAP_FIXED)
665 	bsd_args.flags |= MAP_FIXED;
666     if (linux_args.flags & LINUX_MAP_ANON)
667 	bsd_args.flags |= MAP_ANON;
668     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
669 	bsd_args.flags |= MAP_STACK;
670 
671 	/* The linux MAP_GROWSDOWN option does not limit auto
672 	 * growth of the region.  Linux mmap with this option
673 	 * takes as addr the inital BOS, and as len, the initial
674 	 * region size.  It can then grow down from addr without
675 	 * limit.  However, linux threads has an implicit internal
676 	 * limit to stack size of STACK_SIZE.  Its just not
677 	 * enforced explicitly in linux.  But, here we impose
678 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
679 	 * region, since we can do this with our mmap.
680 	 *
681 	 * Our mmap with MAP_STACK takes addr as the maximum
682 	 * downsize limit on BOS, and as len the max size of
683 	 * the region.  It them maps the top SGROWSIZ bytes,
684 	 * and autgrows the region down, up to the limit
685 	 * in addr.
686 	 *
687 	 * If we don't use the MAP_STACK option, the effect
688 	 * of this code is to allocate a stack region of a
689 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
690 	 */
691 
692 	/* This gives us TOS */
693 	bsd_args.addr = linux_args.addr + linux_args.len;
694 
695 	/* This gives us our maximum stack size */
696 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
697 	    bsd_args.len = linux_args.len;
698 	else
699 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
700 
701 	/* This gives us a new BOS.  If we're using VM_STACK, then
702 	 * mmap will just map the top SGROWSIZ bytes, and let
703 	 * the stack grow down to the limit at BOS.  If we're
704 	 * not using VM_STACK we map the full stack, since we
705 	 * don't have a way to autogrow it.
706 	 */
707 	bsd_args.addr -= bsd_args.len;
708 
709     } else {
710 	bsd_args.addr = linux_args.addr;
711 	bsd_args.len  = linux_args.len;
712     }
713 
714     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
715     bsd_args.fd = linux_args.fd;
716     bsd_args.pos = linux_args.pos;
717     bsd_args.pad = 0;
718     return mmap(p, &bsd_args);
719 }
720 
721 int
722 linux_mremap(struct proc *p, struct linux_mremap_args *args)
723 {
724 	struct munmap_args /* {
725 		void *addr;
726 		size_t len;
727 	} */ bsd_args;
728 	int error = 0;
729 
730 #ifdef DEBUG
731 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
732 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
733 	    args->flags);
734 #endif
735 	args->new_len = round_page(args->new_len);
736 	args->old_len = round_page(args->old_len);
737 
738 	if (args->new_len > args->old_len) {
739 		p->p_retval[0] = 0;
740 		return ENOMEM;
741 	}
742 
743 	if (args->new_len < args->old_len) {
744 		bsd_args.addr = args->addr + args->new_len;
745 		bsd_args.len = args->old_len - args->new_len;
746 		error = munmap(p, &bsd_args);
747 	}
748 
749 	p->p_retval[0] = error ? 0 : (int)args->addr;
750 	return error;
751 }
752 
753 int
754 linux_msync(struct proc *p, struct linux_msync_args *args)
755 {
756 	struct msync_args bsd_args;
757 
758 	bsd_args.addr = args->addr;
759 	bsd_args.len = args->len;
760 	bsd_args.flags = 0;	/* XXX ignore */
761 
762 	return msync(p, &bsd_args);
763 }
764 
765 int
766 linux_pipe(struct proc *p, struct linux_pipe_args *args)
767 {
768     int error;
769     int reg_edx;
770 
771 #ifdef DEBUG
772     printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
773 #endif
774     reg_edx = p->p_retval[1];
775     if (error = pipe(p, 0)) {
776 	p->p_retval[1] = reg_edx;
777 	return error;
778     }
779 
780     if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) {
781 	p->p_retval[1] = reg_edx;
782 	return error;
783     }
784 
785     p->p_retval[1] = reg_edx;
786     p->p_retval[0] = 0;
787     return 0;
788 }
789 
790 int
791 linux_time(struct proc *p, struct linux_time_args *args)
792 {
793     struct timeval tv;
794     linux_time_t tm;
795     int error;
796 
797 #ifdef DEBUG
798     printf("Linux-emul(%d): time(*)\n", p->p_pid);
799 #endif
800     microtime(&tv);
801     tm = tv.tv_sec;
802     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
803 	return error;
804     p->p_retval[0] = tm;
805     return 0;
806 }
807 
808 struct linux_times_argv {
809     long    tms_utime;
810     long    tms_stime;
811     long    tms_cutime;
812     long    tms_cstime;
813 };
814 
815 #define CLK_TCK 100	/* Linux uses 100 */
816 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
817 
818 int
819 linux_times(struct proc *p, struct linux_times_args *args)
820 {
821     struct timeval tv;
822     struct linux_times_argv tms;
823     struct rusage ru;
824     int error;
825 
826 #ifdef DEBUG
827     printf("Linux-emul(%d): times(*)\n", p->p_pid);
828 #endif
829     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
830 
831     tms.tms_utime = CONVTCK(ru.ru_utime);
832     tms.tms_stime = CONVTCK(ru.ru_stime);
833 
834     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
835     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
836 
837     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
838 	    	    sizeof(struct linux_times_argv))))
839 	return error;
840 
841     microuptime(&tv);
842     p->p_retval[0] = (int)CONVTCK(tv);
843     return 0;
844 }
845 
846 /* XXX move */
847 struct linux_newuname_t {
848     char sysname[65];
849     char nodename[65];
850     char release[65];
851     char version[65];
852     char machine[65];
853     char domainname[65];
854 };
855 
856 int
857 linux_newuname(struct proc *p, struct linux_newuname_args *args)
858 {
859     struct linux_newuname_t linux_newuname;
860 
861 #ifdef DEBUG
862     printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
863 #endif
864     bzero(&linux_newuname, sizeof(struct linux_newuname_t));
865     strncpy(linux_newuname.sysname, ostype,
866 	sizeof(linux_newuname.sysname) - 1);
867     strncpy(linux_newuname.nodename, hostname,
868 	sizeof(linux_newuname.nodename) - 1);
869     strncpy(linux_newuname.release, osrelease,
870 	sizeof(linux_newuname.release) - 1);
871     strncpy(linux_newuname.version, version,
872 	sizeof(linux_newuname.version) - 1);
873     strncpy(linux_newuname.machine, machine,
874 	sizeof(linux_newuname.machine) - 1);
875     strncpy(linux_newuname.domainname, domainname,
876 	sizeof(linux_newuname.domainname) - 1);
877     return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
878 	    	    sizeof(struct linux_newuname_t)));
879 }
880 
881 struct linux_utimbuf {
882 	linux_time_t l_actime;
883 	linux_time_t l_modtime;
884 };
885 
886 int
887 linux_utime(struct proc *p, struct linux_utime_args *args)
888 {
889     struct utimes_args /* {
890 	char	*path;
891 	struct	timeval *tptr;
892     } */ bsdutimes;
893     struct timeval tv[2], *tvp;
894     struct linux_utimbuf lut;
895     int error;
896     caddr_t sg;
897 
898     sg = stackgap_init();
899     CHECKALTEXIST(p, &sg, args->fname);
900 
901 #ifdef DEBUG
902     printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
903 #endif
904     if (args->times) {
905 	if ((error = copyin(args->times, &lut, sizeof lut)))
906 	    return error;
907 	tv[0].tv_sec = lut.l_actime;
908 	tv[0].tv_usec = 0;
909 	tv[1].tv_sec = lut.l_modtime;
910 	tv[1].tv_usec = 0;
911 	/* so that utimes can copyin */
912 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
913 	if ((error = copyout(tv, tvp, sizeof(tv))))
914 	    return error;
915 	bsdutimes.tptr = tvp;
916     } else
917 	bsdutimes.tptr = NULL;
918 
919     bsdutimes.path = args->fname;
920     return utimes(p, &bsdutimes);
921 }
922 
923 #define __WCLONE 0x80000000
924 
925 int
926 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
927 {
928     struct wait_args /* {
929 	int pid;
930 	int *status;
931 	int options;
932 	struct	rusage *rusage;
933     } */ tmp;
934     int error, tmpstat;
935 
936 #ifdef DEBUG
937     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
938 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
939 #endif
940     tmp.pid = args->pid;
941     tmp.status = args->status;
942     tmp.options = (args->options & (WNOHANG | WUNTRACED));
943     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
944     if (args->options & __WCLONE)
945 	tmp.options |= WLINUXCLONE;
946     tmp.rusage = NULL;
947 
948     if ((error = wait4(p, &tmp)) != 0)
949 	return error;
950 
951     if (args->status) {
952 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
953 	    return error;
954 	if (WIFSIGNALED(tmpstat))
955 	    tmpstat = (tmpstat & 0xffffff80) |
956 		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
957 	else if (WIFSTOPPED(tmpstat))
958 	    tmpstat = (tmpstat & 0xffff00ff) |
959 		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
960 	return copyout(&tmpstat, args->status, sizeof(int));
961     } else
962 	return 0;
963 }
964 
965 int
966 linux_wait4(struct proc *p, struct linux_wait4_args *args)
967 {
968     struct wait_args /* {
969 	int pid;
970 	int *status;
971 	int options;
972 	struct	rusage *rusage;
973     } */ tmp;
974     int error, tmpstat;
975 
976 #ifdef DEBUG
977     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
978 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
979 	(void *)args->rusage);
980 #endif
981     tmp.pid = args->pid;
982     tmp.status = args->status;
983     tmp.options = (args->options & (WNOHANG | WUNTRACED));
984     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
985     if (args->options & __WCLONE)
986 	tmp.options |= WLINUXCLONE;
987     tmp.rusage = args->rusage;
988 
989     if ((error = wait4(p, &tmp)) != 0)
990 	return error;
991 
992     p->p_siglist &= ~sigmask(SIGCHLD);
993 
994     if (args->status) {
995 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
996 	    return error;
997 	if (WIFSIGNALED(tmpstat))
998 	    tmpstat = (tmpstat & 0xffffff80) |
999 		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1000 	else if (WIFSTOPPED(tmpstat))
1001 	    tmpstat = (tmpstat & 0xffff00ff) |
1002 		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1003 	return copyout(&tmpstat, args->status, sizeof(int));
1004     } else
1005 	return 0;
1006 }
1007 
1008 int
1009 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1010 {
1011 	caddr_t sg;
1012 	struct mknod_args bsd_mknod;
1013 	struct mkfifo_args bsd_mkfifo;
1014 
1015 	sg = stackgap_init();
1016 
1017 	CHECKALTCREAT(p, &sg, args->path);
1018 
1019 #ifdef DEBUG
1020 	printf("Linux-emul(%d): mknod(%s, %d, %d)\n",
1021 	   p->p_pid, args->path, args->mode, args->dev);
1022 #endif
1023 
1024 	if (args->mode & S_IFIFO) {
1025 		bsd_mkfifo.path = args->path;
1026 		bsd_mkfifo.mode = args->mode;
1027 		return mkfifo(p, &bsd_mkfifo);
1028 	} else {
1029 		bsd_mknod.path = args->path;
1030 		bsd_mknod.mode = args->mode;
1031 		bsd_mknod.dev = args->dev;
1032 		return mknod(p, &bsd_mknod);
1033 	}
1034 }
1035 
1036 /*
1037  * UGH! This is just about the dumbest idea I've ever heard!!
1038  */
1039 int
1040 linux_personality(struct proc *p, struct linux_personality_args *args)
1041 {
1042 #ifdef DEBUG
1043 	printf("Linux-emul(%d): personality(%d)\n",
1044 	   p->p_pid, args->per);
1045 #endif
1046 	if (args->per != 0)
1047 		return EINVAL;
1048 
1049 	/* Yes Jim, it's still a Linux... */
1050 	p->p_retval[0] = 0;
1051 	return 0;
1052 }
1053 
1054 /*
1055  * Wrappers for get/setitimer for debugging..
1056  */
1057 int
1058 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1059 {
1060 	struct setitimer_args bsa;
1061 	struct itimerval foo;
1062 	int error;
1063 
1064 #ifdef DEBUG
1065 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1066 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1067 #endif
1068 	bsa.which = args->which;
1069 	bsa.itv = args->itv;
1070 	bsa.oitv = args->oitv;
1071 	if (args->itv) {
1072 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1073 			sizeof(foo))))
1074 		return error;
1075 #ifdef DEBUG
1076 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1077 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1078 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1079 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1080 #endif
1081 	}
1082 	return setitimer(p, &bsa);
1083 }
1084 
1085 int
1086 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1087 {
1088 	struct getitimer_args bsa;
1089 #ifdef DEBUG
1090 	printf("Linux-emul(%ld): getitimer(%p)\n",
1091 	    (long)p->p_pid, (void *)args->itv);
1092 #endif
1093 	bsa.which = args->which;
1094 	bsa.itv = args->itv;
1095 	return getitimer(p, &bsa);
1096 }
1097 
1098 int
1099 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1100 {
1101 	int error;
1102 
1103 	error = suser(p);
1104 	if (error != 0)
1105 		return error;
1106 	if (securelevel > 0)
1107 		return EPERM;
1108 	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1109 	return 0;
1110 }
1111 
1112 int
1113 linux_nice(struct proc *p, struct linux_nice_args *args)
1114 {
1115 	struct setpriority_args	bsd_args;
1116 
1117 	bsd_args.which = PRIO_PROCESS;
1118 	bsd_args.who = 0;	/* current process */
1119 	bsd_args.prio = args->inc;
1120 	return setpriority(p, &bsd_args);
1121 }
1122 
1123 int
1124 linux_setgroups(p, uap)
1125      struct proc *p;
1126      struct linux_setgroups_args *uap;
1127 {
1128   struct pcred *pc = p->p_cred;
1129   linux_gid_t linux_gidset[NGROUPS];
1130   gid_t *bsd_gidset;
1131   int ngrp, error;
1132 
1133   if ((error = suser(p)))
1134     return error;
1135 
1136   if (uap->gidsetsize > NGROUPS)
1137     return EINVAL;
1138 
1139   ngrp = uap->gidsetsize;
1140   pc->pc_ucred = crcopy(pc->pc_ucred);
1141   if (ngrp >= 1) {
1142     if ((error = copyin((caddr_t)uap->gidset,
1143                       (caddr_t)linux_gidset,
1144                         ngrp * sizeof(linux_gid_t))))
1145       return error;
1146 
1147     pc->pc_ucred->cr_ngroups = ngrp;
1148 
1149     bsd_gidset = pc->pc_ucred->cr_groups;
1150     ngrp--;
1151     while (ngrp >= 0) {
1152       bsd_gidset[ngrp] = linux_gidset[ngrp];
1153       ngrp--;
1154     }
1155   }
1156   else
1157     pc->pc_ucred->cr_ngroups = 1;
1158 
1159   setsugid(p);
1160   return 0;
1161 }
1162 
1163 int
1164 linux_getgroups(p, uap)
1165      struct proc *p;
1166      struct linux_getgroups_args *uap;
1167 {
1168   struct pcred *pc = p->p_cred;
1169   linux_gid_t linux_gidset[NGROUPS];
1170   gid_t *bsd_gidset;
1171   int ngrp, error;
1172 
1173   if ((ngrp = uap->gidsetsize) == 0) {
1174     p->p_retval[0] = pc->pc_ucred->cr_ngroups;
1175     return 0;
1176   }
1177 
1178   if (ngrp < pc->pc_ucred->cr_ngroups)
1179     return EINVAL;
1180 
1181   ngrp = 0;
1182   bsd_gidset = pc->pc_ucred->cr_groups;
1183   while (ngrp < pc->pc_ucred->cr_ngroups) {
1184     linux_gidset[ngrp] = bsd_gidset[ngrp];
1185     ngrp++;
1186   }
1187 
1188   if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1189                        ngrp * sizeof(linux_gid_t))))
1190     return error;
1191 
1192   p->p_retval[0] = ngrp;
1193   return (0);
1194 }
1195