xref: /freebsd/sys/compat/linux/linux_misc.c (revision 2ad872c5794e4c26fdf6ed219ad3f09ca0d5304a)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: linux_misc.c,v 1.50 1998/12/30 21:01:33 sos Exp $
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysproto.h>
34 #include <sys/kernel.h>
35 #include <sys/mman.h>
36 #include <sys/proc.h>
37 #include <sys/fcntl.h>
38 #include <sys/imgact_aout.h>
39 #include <sys/mount.h>
40 #include <sys/namei.h>
41 #include <sys/resourcevar.h>
42 #include <sys/stat.h>
43 #include <sys/sysctl.h>
44 #ifdef COMPAT_LINUX_THREADS
45 #include <sys/unistd.h>
46 #endif /* COMPAT_LINUX_THREADS */
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_prot.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <i386/linux/linux_util.h>
64 
65 int
66 linux_alarm(struct proc *p, struct linux_alarm_args *args)
67 {
68     struct itimerval it, old_it;
69     struct timeval tv;
70     int s;
71 
72 #ifdef DEBUG
73     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
74 #endif
75     if (args->secs > 100000000)
76 	return EINVAL;
77     it.it_value.tv_sec = (long)args->secs;
78     it.it_value.tv_usec = 0;
79     it.it_interval.tv_sec = 0;
80     it.it_interval.tv_usec = 0;
81     s = splsoftclock();
82     old_it = p->p_realtimer;
83     getmicrouptime(&tv);
84     if (timevalisset(&old_it.it_value))
85 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
86     if (it.it_value.tv_sec != 0) {
87 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
88 	timevaladd(&it.it_value, &tv);
89     }
90     p->p_realtimer = it;
91     splx(s);
92     if (timevalcmp(&old_it.it_value, &tv, >)) {
93 	timevalsub(&old_it.it_value, &tv);
94 	if (old_it.it_value.tv_usec != 0)
95 	    old_it.it_value.tv_sec++;
96 	p->p_retval[0] = old_it.it_value.tv_sec;
97     }
98     return 0;
99 }
100 
101 int
102 linux_brk(struct proc *p, struct linux_brk_args *args)
103 {
104 #if 0
105     struct vmspace *vm = p->p_vmspace;
106     vm_offset_t new, old;
107     int error;
108 
109     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
110 	return EINVAL;
111     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
112 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
113 	return ENOMEM;
114 
115     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
116     new = round_page((vm_offset_t)args->dsend);
117     p->p_retval[0] = old;
118     if ((new-old) > 0) {
119 	if (swap_pager_full)
120 	    return ENOMEM;
121 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
122 			VM_PROT_ALL, VM_PROT_ALL, 0);
123 	if (error)
124 	    return error;
125 	vm->vm_dsize += btoc((new-old));
126 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
127     }
128     return 0;
129 #else
130     struct vmspace *vm = p->p_vmspace;
131     vm_offset_t new, old;
132     struct obreak_args /* {
133 	char * nsize;
134     } */ tmp;
135 
136 #ifdef DEBUG
137     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
138 #endif
139     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
140     new = (vm_offset_t)args->dsend;
141     tmp.nsize = (char *) new;
142     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
143 	p->p_retval[0] = (int)new;
144     else
145 	p->p_retval[0] = (int)old;
146 
147     return 0;
148 #endif
149 }
150 
151 int
152 linux_uselib(struct proc *p, struct linux_uselib_args *args)
153 {
154     struct nameidata ni;
155     struct vnode *vp;
156     struct exec *a_out;
157     struct vattr attr;
158     vm_offset_t vmaddr;
159     unsigned long file_offset;
160     vm_offset_t buffer;
161     unsigned long bss_size;
162     int error;
163     caddr_t sg;
164     int locked;
165 
166     sg = stackgap_init();
167     CHECKALTEXIST(p, &sg, args->library);
168 
169 #ifdef DEBUG
170     printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
171 #endif
172 
173     a_out = NULL;
174     locked = 0;
175     vp = NULL;
176 
177     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p);
178     if (error = namei(&ni))
179 	goto cleanup;
180 
181     vp = ni.ni_vp;
182     if (vp == NULL) {
183 	error = ENOEXEC;	/* ?? */
184 	goto cleanup;
185     }
186 
187     /*
188      * From here on down, we have a locked vnode that must be unlocked.
189      */
190     locked++;
191 
192     /*
193      * Writable?
194      */
195     if (vp->v_writecount) {
196 	error = ETXTBSY;
197 	goto cleanup;
198     }
199 
200     /*
201      * Executable?
202      */
203     if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p))
204 	goto cleanup;
205 
206     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
207 	((attr.va_mode & 0111) == 0) ||
208 	(attr.va_type != VREG)) {
209 	    error = ENOEXEC;
210 	    goto cleanup;
211     }
212 
213     /*
214      * Sensible size?
215      */
216     if (attr.va_size == 0) {
217 	error = ENOEXEC;
218 	goto cleanup;
219     }
220 
221     /*
222      * Can we access it?
223      */
224     if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p))
225 	goto cleanup;
226 
227     if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p))
228 	goto cleanup;
229 
230     /*
231      * Lock no longer needed
232      */
233     VOP_UNLOCK(vp, 0, p);
234     locked = 0;
235 
236     /*
237      * Pull in executable header into kernel_map
238      */
239     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
240 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
241     if (error)
242 	goto cleanup;
243 
244     /*
245      * Is it a Linux binary ?
246      */
247     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
248 	error = ENOEXEC;
249 	goto cleanup;
250     }
251 
252     /* While we are here, we should REALLY do some more checks */
253 
254     /*
255      * Set file/virtual offset based on a.out variant.
256      */
257     switch ((int)(a_out->a_magic & 0xffff)) {
258     case 0413:	/* ZMAGIC */
259 	file_offset = 1024;
260 	break;
261     case 0314:	/* QMAGIC */
262 	file_offset = 0;
263 	break;
264     default:
265 	error = ENOEXEC;
266 	goto cleanup;
267     }
268 
269     bss_size = round_page(a_out->a_bss);
270 
271     /*
272      * Check various fields in header for validity/bounds.
273      */
274     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
275 	error = ENOEXEC;
276 	goto cleanup;
277     }
278 
279     /* text + data can't exceed file size */
280     if (a_out->a_data + a_out->a_text > attr.va_size) {
281 	error = EFAULT;
282 	goto cleanup;
283     }
284 
285     /*
286      * text/data/bss must not exceed limits
287      * XXX: this is not complete. it should check current usage PLUS
288      * the resources needed by this library.
289      */
290     if (a_out->a_text > MAXTSIZ ||
291 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
292 	error = ENOMEM;
293 	goto cleanup;
294     }
295 
296     /*
297      * prevent more writers
298      */
299     vp->v_flag |= VTEXT;
300 
301     /*
302      * Check if file_offset page aligned,.
303      * Currently we cannot handle misalinged file offsets,
304      * and so we read in the entire image (what a waste).
305      */
306     if (file_offset & PAGE_MASK) {
307 #ifdef DEBUG
308 printf("uselib: Non page aligned binary %lu\n", file_offset);
309 #endif
310 	/*
311 	 * Map text+data read/write/execute
312 	 */
313 
314 	/* a_entry is the load address and is page aligned */
315 	vmaddr = trunc_page(a_out->a_entry);
316 
317 	/* get anon user mapping, read+write+execute */
318 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
319 		    	    a_out->a_text + a_out->a_data, FALSE,
320 			    VM_PROT_ALL, VM_PROT_ALL, 0);
321 	if (error)
322 	    goto cleanup;
323 
324 	/* map file into kernel_map */
325 	error = vm_mmap(kernel_map, &buffer,
326 			round_page(a_out->a_text + a_out->a_data + file_offset),
327 		   	VM_PROT_READ, VM_PROT_READ, 0,
328 			(caddr_t)vp, trunc_page(file_offset));
329 	if (error)
330 	    goto cleanup;
331 
332 	/* copy from kernel VM space to user space */
333 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
334 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
335 
336 	/* release temporary kernel space */
337 	vm_map_remove(kernel_map, buffer,
338 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
339 
340 	if (error)
341 	    goto cleanup;
342     }
343     else {
344 #ifdef DEBUG
345 printf("uselib: Page aligned binary %lu\n", file_offset);
346 #endif
347 	/*
348 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
349 	 * to skip the executable header
350 	 */
351 	vmaddr = trunc_page(a_out->a_entry);
352 
353 	/*
354 	 * Map it all into the process's space as a single copy-on-write
355 	 * "data" segment.
356 	 */
357 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
358 		   	a_out->a_text + a_out->a_data,
359 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
360 			(caddr_t)vp, file_offset);
361 	if (error)
362 	    goto cleanup;
363     }
364 #ifdef DEBUG
365 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
366 #endif
367     if (bss_size != 0) {
368         /*
369 	 * Calculate BSS start address
370 	 */
371 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
372 
373 	/*
374 	 * allocate some 'anon' space
375 	 */
376 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
377 			    bss_size, FALSE,
378 			    VM_PROT_ALL, VM_PROT_ALL, 0);
379 	if (error)
380 	    goto cleanup;
381     }
382 
383 cleanup:
384     /*
385      * Unlock vnode if needed
386      */
387     if (locked)
388 	VOP_UNLOCK(vp, 0, p);
389 
390     /*
391      * Release the kernel mapping.
392      */
393     if (a_out)
394 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
395 
396     return error;
397 }
398 
399 /* XXX move */
400 struct linux_select_argv {
401 	int nfds;
402 	fd_set *readfds;
403 	fd_set *writefds;
404 	fd_set *exceptfds;
405 	struct timeval *timeout;
406 };
407 
408 int
409 linux_select(struct proc *p, struct linux_select_args *args)
410 {
411     struct linux_select_argv linux_args;
412     struct linux_newselect_args newsel;
413     int error;
414 
415 #ifdef SELECT_DEBUG
416     printf("Linux-emul(%d): select(%x)\n",
417 	   p->p_pid, args->ptr);
418 #endif
419     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
420 			sizeof(linux_args))))
421 	return error;
422 
423     newsel.nfds = linux_args.nfds;
424     newsel.readfds = linux_args.readfds;
425     newsel.writefds = linux_args.writefds;
426     newsel.exceptfds = linux_args.exceptfds;
427     newsel.timeout = linux_args.timeout;
428 
429     return linux_newselect(p, &newsel);
430 }
431 
432 int
433 linux_newselect(struct proc *p, struct linux_newselect_args *args)
434 {
435     struct select_args bsa;
436     struct timeval tv0, tv1, utv, *tvp;
437     caddr_t sg;
438     int error;
439 
440 #ifdef DEBUG
441     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
442   	(long)p->p_pid, args->nfds, (void *)args->readfds,
443 	(void *)args->writefds, (void *)args->exceptfds,
444 	(void *)args->timeout);
445 #endif
446     error = 0;
447     bsa.nd = args->nfds;
448     bsa.in = args->readfds;
449     bsa.ou = args->writefds;
450     bsa.ex = args->exceptfds;
451     bsa.tv = args->timeout;
452 
453     /*
454      * Store current time for computation of the amount of
455      * time left.
456      */
457     if (args->timeout) {
458 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
459 	    goto select_out;
460 #ifdef DEBUG
461 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
462 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
463 #endif
464 	if (itimerfix(&utv)) {
465 	    /*
466 	     * The timeval was invalid.  Convert it to something
467 	     * valid that will act as it does under Linux.
468 	     */
469 	    sg = stackgap_init();
470 	    tvp = stackgap_alloc(&sg, sizeof(utv));
471 	    utv.tv_sec += utv.tv_usec / 1000000;
472 	    utv.tv_usec %= 1000000;
473 	    if (utv.tv_usec < 0) {
474 		utv.tv_sec -= 1;
475 		utv.tv_usec += 1000000;
476 	    }
477 	    if (utv.tv_sec < 0)
478 		timevalclear(&utv);
479 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
480 		goto select_out;
481 	    bsa.tv = tvp;
482 	}
483 	microtime(&tv0);
484     }
485 
486     error = select(p, &bsa);
487 #ifdef DEBUG
488     printf("Linux-emul(%d): real select returns %d\n",
489 	       p->p_pid, error);
490 #endif
491 
492     if (error) {
493 	/*
494 	 * See fs/select.c in the Linux kernel.  Without this,
495 	 * Maelstrom doesn't work.
496 	 */
497 	if (error == ERESTART)
498 	    error = EINTR;
499 	goto select_out;
500     }
501 
502     if (args->timeout) {
503 	if (p->p_retval[0]) {
504 	    /*
505 	     * Compute how much time was left of the timeout,
506 	     * by subtracting the current time and the time
507 	     * before we started the call, and subtracting
508 	     * that result from the user-supplied value.
509 	     */
510 	    microtime(&tv1);
511 	    timevalsub(&tv1, &tv0);
512 	    timevalsub(&utv, &tv1);
513 	    if (utv.tv_sec < 0)
514 		timevalclear(&utv);
515 	} else
516 	    timevalclear(&utv);
517 #ifdef DEBUG
518 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
519 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
520 #endif
521 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
522 	    goto select_out;
523     }
524 
525 select_out:
526 #ifdef DEBUG
527     printf("Linux-emul(%d): newselect_out -> %d\n",
528 	       p->p_pid, error);
529 #endif
530     return error;
531 }
532 
533 int
534 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
535 {
536     struct proc *curproc;
537 
538 #ifdef DEBUG
539     printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
540 #endif
541     if (args->pid != p->p_pid) {
542 	if (!(curproc = pfind(args->pid)))
543 	    return ESRCH;
544     }
545     else
546 	curproc = p;
547     p->p_retval[0] = curproc->p_pgid;
548     return 0;
549 }
550 
551 int
552 linux_fork(struct proc *p, struct linux_fork_args *args)
553 {
554     int error;
555 
556 #ifdef DEBUG
557     printf("Linux-emul(%d): fork()\n", p->p_pid);
558 #endif
559     if (error = fork(p, (struct fork_args *)args))
560 	return error;
561     if (p->p_retval[1] == 1)
562 	p->p_retval[0] = 0;
563     return 0;
564 }
565 
566 #ifndef COMPAT_LINUX_THREADS
567 int
568 linux_clone(struct proc *p, struct linux_clone_args *args)
569 {
570     printf("linux_clone(%d): Not enabled\n", p->p_pid);
571     return (EOPNOTSUPP);
572 }
573 
574 #else
575 #define CLONE_VM	0x100
576 #define CLONE_FS	0x200
577 #define CLONE_FILES	0x400
578 #define CLONE_SIGHAND	0x800
579 #define CLONE_PID	0x1000
580 
581 int
582 linux_clone(struct proc *p, struct linux_clone_args *args)
583 {
584     int error, ff = RFPROC;
585     struct proc *p2;
586     int            exit_signal;
587     vm_offset_t    start;
588     struct rfork_args rf_args;
589 
590 #ifdef SMP
591     printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid);
592     return (EOPNOTSUPP);
593 #endif
594 #ifdef DEBUG
595     if (args->flags & CLONE_PID)
596 	printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid);
597     printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid,
598 	     (unsigned int)args->flags, (unsigned int)args->stack);
599 #endif
600 
601     if (!args->stack)
602         return (EINVAL);
603     exit_signal = args->flags & 0x000000ff;
604     if (exit_signal >= LINUX_NSIG)
605 	return EINVAL;
606     exit_signal = linux_to_bsd_signal[exit_signal];
607 
608     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
609     ff |= RFTHREAD;
610 
611     if (args->flags & CLONE_VM)
612 	ff |= RFMEM;
613     if (args->flags & CLONE_SIGHAND)
614 	ff |= RFSIGSHARE;
615     if (!(args->flags & CLONE_FILES))
616 	ff |= RFFDG;
617 
618     error = 0;
619     start = 0;
620 
621     rf_args.flags = ff;
622     if (error = rfork(p, &rf_args))
623 	return error;
624 
625     p2 = pfind(p->p_retval[0]);
626     if (p2 == 0)
627  	return ESRCH;
628 
629     p2->p_sigparent = exit_signal;
630     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
631 
632 #ifdef DEBUG
633     printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid);
634 #endif
635     return 0;
636 }
637 
638 #endif /* COMPAT_LINUX_THREADS */
639 /* XXX move */
640 struct linux_mmap_argv {
641 	linux_caddr_t addr;
642 	int len;
643 	int prot;
644 	int flags;
645 	int fd;
646 	int pos;
647 };
648 
649 #ifdef COMPAT_LINUX_THREADS
650 #define STACK_SIZE  (2 * 1024 * 1024)
651 #define GUARD_SIZE  (4 * PAGE_SIZE)
652 
653 #endif /* COMPAT_LINUX_THREADS */
654 int
655 linux_mmap(struct proc *p, struct linux_mmap_args *args)
656 {
657     struct mmap_args /* {
658 	caddr_t addr;
659 	size_t len;
660 	int prot;
661 	int flags;
662 	int fd;
663 	long pad;
664 	off_t pos;
665     } */ bsd_args;
666     int error;
667     struct linux_mmap_argv linux_args;
668 
669     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
670 			sizeof(linux_args))))
671 	return error;
672 #ifdef DEBUG
673     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
674 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
675 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
676 #endif
677     bsd_args.flags = 0;
678     if (linux_args.flags & LINUX_MAP_SHARED)
679 	bsd_args.flags |= MAP_SHARED;
680     if (linux_args.flags & LINUX_MAP_PRIVATE)
681 	bsd_args.flags |= MAP_PRIVATE;
682     if (linux_args.flags & LINUX_MAP_FIXED)
683 	bsd_args.flags |= MAP_FIXED;
684     if (linux_args.flags & LINUX_MAP_ANON)
685 	bsd_args.flags |= MAP_ANON;
686 #ifndef COMPAT_LINUX_THREADS
687     bsd_args.addr = linux_args.addr;
688     bsd_args.len = linux_args.len;
689 #else
690 
691 #ifndef VM_STACK
692     /* Linux Threads will map into the proc stack space, unless
693      * we prevent it.  This causes problems if we're not using
694      * our VM_STACK options.
695      */
696     if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ))
697 	return (EINVAL);
698 #endif
699 
700     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
701 
702 #ifdef VM_STACK
703 	bsd_args.flags |= MAP_STACK;
704 #endif
705 
706 	/* The linux MAP_GROWSDOWN option does not limit auto
707 	 * growth of the region.  Linux mmap with this option
708 	 * takes as addr the inital BOS, and as len, the initial
709 	 * region size.  It can then grow down from addr without
710 	 * limit.  However, linux threads has an implicit internal
711 	 * limit to stack size of STACK_SIZE.  Its just not
712 	 * enforced explicitly in linux.  But, here we impose
713 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
714 	 * region, since we can do this with our mmap.
715 	 *
716 	 * Our mmap with MAP_STACK takes addr as the maximum
717 	 * downsize limit on BOS, and as len the max size of
718 	 * the region.  It them maps the top SGROWSIZ bytes,
719 	 * and autgrows the region down, up to the limit
720 	 * in addr.
721 	 *
722 	 * If we don't use the MAP_STACK option, the effect
723 	 * of this code is to allocate a stack region of a
724 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
725 	 */
726 
727 	/* This gives us TOS */
728 	bsd_args.addr = linux_args.addr + linux_args.len;
729 
730 	/* This gives us our maximum stack size */
731 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
732 	    bsd_args.len = linux_args.len;
733 	else
734 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
735 
736 	/* This gives us a new BOS.  If we're using VM_STACK, then
737 	 * mmap will just map the top SGROWSIZ bytes, and let
738 	 * the stack grow down to the limit at BOS.  If we're
739 	 * not using VM_STACK we map the full stack, since we
740 	 * don't have a way to autogrow it.
741 	 */
742 	bsd_args.addr -= bsd_args.len;
743 
744     } else {
745 	bsd_args.addr = linux_args.addr;
746 	bsd_args.len  = linux_args.len;
747     }
748 #endif /* COMPAT_LINUX_THREADS */
749     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
750     bsd_args.fd = linux_args.fd;
751     bsd_args.pos = linux_args.pos;
752     bsd_args.pad = 0;
753     return mmap(p, &bsd_args);
754 }
755 
756 int
757 linux_mremap(struct proc *p, struct linux_mremap_args *args)
758 {
759 	struct munmap_args /* {
760 		void *addr;
761 		size_t len;
762 	} */ bsd_args;
763 	int error = 0;
764 
765 #ifdef DEBUG
766 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
767 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
768 	    args->flags);
769 #endif
770 	args->new_len = round_page(args->new_len);
771 	args->old_len = round_page(args->old_len);
772 
773 	if (args->new_len > args->old_len) {
774 		p->p_retval[0] = 0;
775 		return ENOMEM;
776 	}
777 
778 	if (args->new_len < args->old_len) {
779 		bsd_args.addr = args->addr + args->new_len;
780 		bsd_args.len = args->old_len - args->new_len;
781 		error = munmap(p, &bsd_args);
782 	}
783 
784 	p->p_retval[0] = error ? 0 : (int)args->addr;
785 	return error;
786 }
787 
788 int
789 linux_msync(struct proc *p, struct linux_msync_args *args)
790 {
791 	struct msync_args bsd_args;
792 
793 	bsd_args.addr = args->addr;
794 	bsd_args.len = args->len;
795 	bsd_args.flags = 0;	/* XXX ignore */
796 
797 	return msync(p, &bsd_args);
798 }
799 
800 int
801 linux_pipe(struct proc *p, struct linux_pipe_args *args)
802 {
803     int error;
804     int reg_edx;
805 
806 #ifdef DEBUG
807     printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
808 #endif
809     reg_edx = p->p_retval[1];
810     if (error = pipe(p, 0)) {
811 	p->p_retval[1] = reg_edx;
812 	return error;
813     }
814 
815     if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) {
816 	p->p_retval[1] = reg_edx;
817 	return error;
818     }
819 
820     p->p_retval[1] = reg_edx;
821     p->p_retval[0] = 0;
822     return 0;
823 }
824 
825 int
826 linux_time(struct proc *p, struct linux_time_args *args)
827 {
828     struct timeval tv;
829     linux_time_t tm;
830     int error;
831 
832 #ifdef DEBUG
833     printf("Linux-emul(%d): time(*)\n", p->p_pid);
834 #endif
835     microtime(&tv);
836     tm = tv.tv_sec;
837     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
838 	return error;
839     p->p_retval[0] = tm;
840     return 0;
841 }
842 
843 struct linux_times_argv {
844     long    tms_utime;
845     long    tms_stime;
846     long    tms_cutime;
847     long    tms_cstime;
848 };
849 
850 #define CLK_TCK 100	/* Linux uses 100 */
851 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
852 
853 int
854 linux_times(struct proc *p, struct linux_times_args *args)
855 {
856     struct timeval tv;
857     struct linux_times_argv tms;
858     struct rusage ru;
859     int error;
860 
861 #ifdef DEBUG
862     printf("Linux-emul(%d): times(*)\n", p->p_pid);
863 #endif
864     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
865 
866     tms.tms_utime = CONVTCK(ru.ru_utime);
867     tms.tms_stime = CONVTCK(ru.ru_stime);
868 
869     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
870     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
871 
872     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
873 	    	    sizeof(struct linux_times_argv))))
874 	return error;
875 
876     microuptime(&tv);
877     p->p_retval[0] = (int)CONVTCK(tv);
878     return 0;
879 }
880 
881 /* XXX move */
882 struct linux_newuname_t {
883     char sysname[65];
884     char nodename[65];
885     char release[65];
886     char version[65];
887     char machine[65];
888     char domainname[65];
889 };
890 
891 int
892 linux_newuname(struct proc *p, struct linux_newuname_args *args)
893 {
894     struct linux_newuname_t linux_newuname;
895 
896 #ifdef DEBUG
897     printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
898 #endif
899     bzero(&linux_newuname, sizeof(struct linux_newuname_t));
900     strncpy(linux_newuname.sysname, ostype,
901 	sizeof(linux_newuname.sysname) - 1);
902     strncpy(linux_newuname.nodename, hostname,
903 	sizeof(linux_newuname.nodename) - 1);
904     strncpy(linux_newuname.release, osrelease,
905 	sizeof(linux_newuname.release) - 1);
906     strncpy(linux_newuname.version, version,
907 	sizeof(linux_newuname.version) - 1);
908     strncpy(linux_newuname.machine, machine,
909 	sizeof(linux_newuname.machine) - 1);
910     strncpy(linux_newuname.domainname, domainname,
911 	sizeof(linux_newuname.domainname) - 1);
912     return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
913 	    	    sizeof(struct linux_newuname_t)));
914 }
915 
916 struct linux_utimbuf {
917 	linux_time_t l_actime;
918 	linux_time_t l_modtime;
919 };
920 
921 int
922 linux_utime(struct proc *p, struct linux_utime_args *args)
923 {
924     struct utimes_args /* {
925 	char	*path;
926 	struct	timeval *tptr;
927     } */ bsdutimes;
928     struct timeval tv[2], *tvp;
929     struct linux_utimbuf lut;
930     int error;
931     caddr_t sg;
932 
933     sg = stackgap_init();
934     CHECKALTEXIST(p, &sg, args->fname);
935 
936 #ifdef DEBUG
937     printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
938 #endif
939     if (args->times) {
940 	if ((error = copyin(args->times, &lut, sizeof lut)))
941 	    return error;
942 	tv[0].tv_sec = lut.l_actime;
943 	tv[0].tv_usec = 0;
944 	tv[1].tv_sec = lut.l_modtime;
945 	tv[1].tv_usec = 0;
946 	/* so that utimes can copyin */
947 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
948 	if ((error = copyout(tv, tvp, sizeof(tv))))
949 	    return error;
950 	bsdutimes.tptr = tvp;
951     } else
952 	bsdutimes.tptr = NULL;
953 
954     bsdutimes.path = args->fname;
955     return utimes(p, &bsdutimes);
956 }
957 
958 int
959 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
960 {
961     struct wait_args /* {
962 	int pid;
963 	int *status;
964 	int options;
965 	struct	rusage *rusage;
966     } */ tmp;
967     int error, tmpstat;
968 
969 #ifdef DEBUG
970     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
971 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
972 #endif
973     tmp.pid = args->pid;
974     tmp.status = args->status;
975 #ifndef COMPAT_LINUX_THREADS
976     tmp.options = args->options;
977 #else
978     /* This filters out the linux option _WCLONE.  I don't
979      * think we need it, but I could be wrong.  If we need
980      * it, we need to fix wait4, since it will give us an
981      * error return of EINVAL if we pass in _WCLONE, and
982      * of course, it won't do anything with it.
983      */
984     tmp.options = (args->options & (WNOHANG | WUNTRACED));
985 #endif /* COMPAT_LINUX_THREADS */
986     tmp.rusage = NULL;
987 
988     if (error = wait4(p, &tmp))
989 #ifndef COMPAT_LINUX_THREADS
990 	return error;
991 #else
992 	return error;
993 #endif /* COMPAT_LINUX_THREADS */
994     if (args->status) {
995 	if (error = copyin(args->status, &tmpstat, sizeof(int)))
996 	    return error;
997 	if (WIFSIGNALED(tmpstat))
998 	    tmpstat = (tmpstat & 0xffffff80) |
999 		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
1000 	else if (WIFSTOPPED(tmpstat))
1001 	    tmpstat = (tmpstat & 0xffff00ff) |
1002 		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1003 	return copyout(&tmpstat, args->status, sizeof(int));
1004     } else
1005 	return 0;
1006 }
1007 
1008 int
1009 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1010 {
1011     struct wait_args /* {
1012 	int pid;
1013 	int *status;
1014 	int options;
1015 	struct	rusage *rusage;
1016     } */ tmp;
1017     int error, tmpstat;
1018 
1019 #ifdef DEBUG
1020     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1021 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1022 	(void *)args->rusage);
1023 #endif
1024     tmp.pid = args->pid;
1025     tmp.status = args->status;
1026 #ifndef COMPAT_LINUX_THREADS
1027     tmp.options = args->options;
1028 #else
1029     /* This filters out the linux option _WCLONE.  I don't
1030      * think we need it, but I could be wrong.  If we need
1031      * it, we need to fix wait4, since it will give us an
1032      * error return of EINVAL if we pass in _WCLONE, and
1033      * of course, it won't do anything with it.
1034      */
1035     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1036 #endif /* COMPAT_LINUX_THREADS */
1037     tmp.rusage = args->rusage;
1038 
1039     if (error = wait4(p, &tmp))
1040 	return error;
1041 
1042     p->p_siglist &= ~sigmask(SIGCHLD);
1043 
1044     if (args->status) {
1045 	if (error = copyin(args->status, &tmpstat, sizeof(int)))
1046 	    return error;
1047 	if (WIFSIGNALED(tmpstat))
1048 	    tmpstat = (tmpstat & 0xffffff80) |
1049 		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1050 	else if (WIFSTOPPED(tmpstat))
1051 	    tmpstat = (tmpstat & 0xffff00ff) |
1052 		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1053 	return copyout(&tmpstat, args->status, sizeof(int));
1054     } else
1055 	return 0;
1056 }
1057 
1058 int
1059 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1060 {
1061 	caddr_t sg;
1062 	struct mknod_args bsd_mknod;
1063 	struct mkfifo_args bsd_mkfifo;
1064 
1065 	sg = stackgap_init();
1066 
1067 	CHECKALTCREAT(p, &sg, args->path);
1068 
1069 #ifdef DEBUG
1070 	printf("Linux-emul(%d): mknod(%s, %d, %d)\n",
1071 	   p->p_pid, args->path, args->mode, args->dev);
1072 #endif
1073 
1074 	if (args->mode & S_IFIFO) {
1075 		bsd_mkfifo.path = args->path;
1076 		bsd_mkfifo.mode = args->mode;
1077 		return mkfifo(p, &bsd_mkfifo);
1078 	} else {
1079 		bsd_mknod.path = args->path;
1080 		bsd_mknod.mode = args->mode;
1081 		bsd_mknod.dev = args->dev;
1082 		return mknod(p, &bsd_mknod);
1083 	}
1084 }
1085 
1086 /*
1087  * UGH! This is just about the dumbest idea I've ever heard!!
1088  */
1089 int
1090 linux_personality(struct proc *p, struct linux_personality_args *args)
1091 {
1092 #ifdef DEBUG
1093 	printf("Linux-emul(%d): personality(%d)\n",
1094 	   p->p_pid, args->per);
1095 #endif
1096 	if (args->per != 0)
1097 		return EINVAL;
1098 
1099 	/* Yes Jim, it's still a Linux... */
1100 	p->p_retval[0] = 0;
1101 	return 0;
1102 }
1103 
1104 /*
1105  * Wrappers for get/setitimer for debugging..
1106  */
1107 int
1108 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1109 {
1110 	struct setitimer_args bsa;
1111 	struct itimerval foo;
1112 	int error;
1113 
1114 #ifdef DEBUG
1115 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1116 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1117 #endif
1118 	bsa.which = args->which;
1119 	bsa.itv = args->itv;
1120 	bsa.oitv = args->oitv;
1121 	if (args->itv) {
1122 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1123 			sizeof(foo))))
1124 		return error;
1125 #ifdef DEBUG
1126 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1127 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1128 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1129 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1130 #endif
1131 	}
1132 	return setitimer(p, &bsa);
1133 }
1134 
1135 int
1136 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1137 {
1138 	struct getitimer_args bsa;
1139 #ifdef DEBUG
1140 	printf("Linux-emul(%ld): getitimer(%p)\n",
1141 	    (long)p->p_pid, (void *)args->itv);
1142 #endif
1143 	bsa.which = args->which;
1144 	bsa.itv = args->itv;
1145 	return getitimer(p, &bsa);
1146 }
1147 
1148 int
1149 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1150 {
1151 	int error;
1152 
1153 	error = suser(p->p_ucred, &p->p_acflag);
1154 	if (error != 0)
1155 		return error;
1156 	if (securelevel > 0)
1157 		return EPERM;
1158 	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1159 	return 0;
1160 }
1161 
1162 int
1163 linux_nice(struct proc *p, struct linux_nice_args *args)
1164 {
1165 	struct setpriority_args	bsd_args;
1166 
1167 	bsd_args.which = PRIO_PROCESS;
1168 	bsd_args.who = 0;	/* current process */
1169 	bsd_args.prio = args->inc;
1170 	return setpriority(p, &bsd_args);
1171 }
1172 
1173 int
1174 linux_setgroups(p, uap)
1175      struct proc *p;
1176      struct linux_setgroups_args *uap;
1177 {
1178   struct pcred *pc = p->p_cred;
1179   linux_gid_t linux_gidset[NGROUPS];
1180   gid_t *bsd_gidset;
1181   int ngrp, error;
1182 
1183   if ((error = suser(pc->pc_ucred, &p->p_acflag)))
1184     return error;
1185 
1186   if (uap->gidsetsize > NGROUPS)
1187     return EINVAL;
1188 
1189   ngrp = uap->gidsetsize;
1190   pc->pc_ucred = crcopy(pc->pc_ucred);
1191   if (ngrp >= 1) {
1192     if ((error = copyin((caddr_t)uap->gidset,
1193                       (caddr_t)linux_gidset,
1194                         ngrp * sizeof(linux_gid_t))))
1195       return error;
1196 
1197     pc->pc_ucred->cr_ngroups = ngrp;
1198 
1199     bsd_gidset = pc->pc_ucred->cr_groups;
1200     ngrp--;
1201     while (ngrp >= 0) {
1202       bsd_gidset[ngrp] = linux_gidset[ngrp];
1203       ngrp--;
1204     }
1205   }
1206   else
1207     pc->pc_ucred->cr_ngroups = 1;
1208 
1209   setsugid(p);
1210   return 0;
1211 }
1212 
1213 int
1214 linux_getgroups(p, uap)
1215      struct proc *p;
1216      struct linux_getgroups_args *uap;
1217 {
1218   struct pcred *pc = p->p_cred;
1219   linux_gid_t linux_gidset[NGROUPS];
1220   gid_t *bsd_gidset;
1221   int ngrp, error;
1222 
1223   if ((ngrp = uap->gidsetsize) == 0) {
1224     p->p_retval[0] = pc->pc_ucred->cr_ngroups;
1225     return 0;
1226   }
1227 
1228   if (ngrp < pc->pc_ucred->cr_ngroups)
1229     return EINVAL;
1230 
1231   ngrp = 0;
1232   bsd_gidset = pc->pc_ucred->cr_groups;
1233   while (ngrp < pc->pc_ucred->cr_ngroups) {
1234     linux_gidset[ngrp] = bsd_gidset[ngrp];
1235     ngrp++;
1236   }
1237 
1238   if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1239                        ngrp * sizeof(linux_gid_t))))
1240     return error;
1241 
1242   p->p_retval[0] = ngrp;
1243   return (0);
1244 }
1245