xref: /freebsd/sys/compat/linux/linux_misc.c (revision 1ef8fbeabfb41f932bd0b8184ba5da8f9f2e3a2d)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_prot.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <i386/linux/linux_util.h>
64 #include <i386/linux/linux_mib.h>
65 
66 #include <posix4/sched.h>
67 
68 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
69 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
70   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
71   RLIMIT_MEMLOCK, -1
72 };
73 
74 int
75 linux_alarm(struct proc *p, struct linux_alarm_args *args)
76 {
77     struct itimerval it, old_it;
78     struct timeval tv;
79     int s;
80 
81 #ifdef DEBUG
82     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
83 #endif
84     if (args->secs > 100000000)
85 	return EINVAL;
86     it.it_value.tv_sec = (long)args->secs;
87     it.it_value.tv_usec = 0;
88     it.it_interval.tv_sec = 0;
89     it.it_interval.tv_usec = 0;
90     s = splsoftclock();
91     old_it = p->p_realtimer;
92     getmicrouptime(&tv);
93     if (timevalisset(&old_it.it_value))
94 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
95     if (it.it_value.tv_sec != 0) {
96 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
97 	timevaladd(&it.it_value, &tv);
98     }
99     p->p_realtimer = it;
100     splx(s);
101     if (timevalcmp(&old_it.it_value, &tv, >)) {
102 	timevalsub(&old_it.it_value, &tv);
103 	if (old_it.it_value.tv_usec != 0)
104 	    old_it.it_value.tv_sec++;
105 	p->p_retval[0] = old_it.it_value.tv_sec;
106     }
107     return 0;
108 }
109 
110 int
111 linux_brk(struct proc *p, struct linux_brk_args *args)
112 {
113 #if 0
114     struct vmspace *vm = p->p_vmspace;
115     vm_offset_t new, old;
116     int error;
117 
118     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
119 	return EINVAL;
120     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
121 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
122 	return ENOMEM;
123 
124     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
125     new = round_page((vm_offset_t)args->dsend);
126     p->p_retval[0] = old;
127     if ((new-old) > 0) {
128 	if (swap_pager_full)
129 	    return ENOMEM;
130 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
131 			VM_PROT_ALL, VM_PROT_ALL, 0);
132 	if (error)
133 	    return error;
134 	vm->vm_dsize += btoc((new-old));
135 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
136     }
137     return 0;
138 #else
139     struct vmspace *vm = p->p_vmspace;
140     vm_offset_t new, old;
141     struct obreak_args /* {
142 	char * nsize;
143     } */ tmp;
144 
145 #ifdef DEBUG
146     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
147 #endif
148     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
149     new = (vm_offset_t)args->dsend;
150     tmp.nsize = (char *) new;
151     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
152 	p->p_retval[0] = (int)new;
153     else
154 	p->p_retval[0] = (int)old;
155 
156     return 0;
157 #endif
158 }
159 
160 int
161 linux_uselib(struct proc *p, struct linux_uselib_args *args)
162 {
163     struct nameidata ni;
164     struct vnode *vp;
165     struct exec *a_out;
166     struct vattr attr;
167     vm_offset_t vmaddr;
168     unsigned long file_offset;
169     vm_offset_t buffer;
170     unsigned long bss_size;
171     int error;
172     caddr_t sg;
173     int locked;
174 
175     sg = stackgap_init();
176     CHECKALTEXIST(p, &sg, args->library);
177 
178 #ifdef DEBUG
179     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
180 #endif
181 
182     a_out = NULL;
183     locked = 0;
184     vp = NULL;
185 
186     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
187     error = namei(&ni);
188     if (error)
189 	goto cleanup;
190 
191     vp = ni.ni_vp;
192     if (vp == NULL) {
193 	error = ENOEXEC;	/* ?? */
194 	goto cleanup;
195     }
196 
197     /*
198      * From here on down, we have a locked vnode that must be unlocked.
199      */
200     locked++;
201 
202     /*
203      * Writable?
204      */
205     if (vp->v_writecount) {
206 	error = ETXTBSY;
207 	goto cleanup;
208     }
209 
210     /*
211      * Executable?
212      */
213     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
214     if (error)
215 	goto cleanup;
216 
217     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
218 	((attr.va_mode & 0111) == 0) ||
219 	(attr.va_type != VREG)) {
220 	    error = ENOEXEC;
221 	    goto cleanup;
222     }
223 
224     /*
225      * Sensible size?
226      */
227     if (attr.va_size == 0) {
228 	error = ENOEXEC;
229 	goto cleanup;
230     }
231 
232     /*
233      * Can we access it?
234      */
235     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
236     if (error)
237 	goto cleanup;
238 
239     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
240     if (error)
241 	goto cleanup;
242 
243     /*
244      * Lock no longer needed
245      */
246     VOP_UNLOCK(vp, 0, p);
247     locked = 0;
248 
249     /*
250      * Pull in executable header into kernel_map
251      */
252     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
253 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
254     if (error)
255 	goto cleanup;
256 
257     /*
258      * Is it a Linux binary ?
259      */
260     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
261 	error = ENOEXEC;
262 	goto cleanup;
263     }
264 
265     /* While we are here, we should REALLY do some more checks */
266 
267     /*
268      * Set file/virtual offset based on a.out variant.
269      */
270     switch ((int)(a_out->a_magic & 0xffff)) {
271     case 0413:	/* ZMAGIC */
272 	file_offset = 1024;
273 	break;
274     case 0314:	/* QMAGIC */
275 	file_offset = 0;
276 	break;
277     default:
278 	error = ENOEXEC;
279 	goto cleanup;
280     }
281 
282     bss_size = round_page(a_out->a_bss);
283 
284     /*
285      * Check various fields in header for validity/bounds.
286      */
287     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
288 	error = ENOEXEC;
289 	goto cleanup;
290     }
291 
292     /* text + data can't exceed file size */
293     if (a_out->a_data + a_out->a_text > attr.va_size) {
294 	error = EFAULT;
295 	goto cleanup;
296     }
297 
298     /*
299      * text/data/bss must not exceed limits
300      * XXX: this is not complete. it should check current usage PLUS
301      * the resources needed by this library.
302      */
303     if (a_out->a_text > MAXTSIZ ||
304 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
305 	error = ENOMEM;
306 	goto cleanup;
307     }
308 
309     /*
310      * prevent more writers
311      */
312     vp->v_flag |= VTEXT;
313 
314     /*
315      * Check if file_offset page aligned,.
316      * Currently we cannot handle misalinged file offsets,
317      * and so we read in the entire image (what a waste).
318      */
319     if (file_offset & PAGE_MASK) {
320 #ifdef DEBUG
321 printf("uselib: Non page aligned binary %lu\n", file_offset);
322 #endif
323 	/*
324 	 * Map text+data read/write/execute
325 	 */
326 
327 	/* a_entry is the load address and is page aligned */
328 	vmaddr = trunc_page(a_out->a_entry);
329 
330 	/* get anon user mapping, read+write+execute */
331 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
332 		    	    a_out->a_text + a_out->a_data, FALSE,
333 			    VM_PROT_ALL, VM_PROT_ALL, 0);
334 	if (error)
335 	    goto cleanup;
336 
337 	/* map file into kernel_map */
338 	error = vm_mmap(kernel_map, &buffer,
339 			round_page(a_out->a_text + a_out->a_data + file_offset),
340 		   	VM_PROT_READ, VM_PROT_READ, 0,
341 			(caddr_t)vp, trunc_page(file_offset));
342 	if (error)
343 	    goto cleanup;
344 
345 	/* copy from kernel VM space to user space */
346 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
347 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
348 
349 	/* release temporary kernel space */
350 	vm_map_remove(kernel_map, buffer,
351 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
352 
353 	if (error)
354 	    goto cleanup;
355     }
356     else {
357 #ifdef DEBUG
358 printf("uselib: Page aligned binary %lu\n", file_offset);
359 #endif
360 	/*
361 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
362 	 * to skip the executable header
363 	 */
364 	vmaddr = trunc_page(a_out->a_entry);
365 
366 	/*
367 	 * Map it all into the process's space as a single copy-on-write
368 	 * "data" segment.
369 	 */
370 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
371 		   	a_out->a_text + a_out->a_data,
372 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
373 			(caddr_t)vp, file_offset);
374 	if (error)
375 	    goto cleanup;
376     }
377 #ifdef DEBUG
378 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
379 #endif
380     if (bss_size != 0) {
381         /*
382 	 * Calculate BSS start address
383 	 */
384 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
385 
386 	/*
387 	 * allocate some 'anon' space
388 	 */
389 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
390 			    bss_size, FALSE,
391 			    VM_PROT_ALL, VM_PROT_ALL, 0);
392 	if (error)
393 	    goto cleanup;
394     }
395 
396 cleanup:
397     /*
398      * Unlock vnode if needed
399      */
400     if (locked)
401 	VOP_UNLOCK(vp, 0, p);
402 
403     /*
404      * Release the kernel mapping.
405      */
406     if (a_out)
407 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
408 
409     return error;
410 }
411 
412 /* XXX move */
413 struct linux_select_argv {
414 	int nfds;
415 	fd_set *readfds;
416 	fd_set *writefds;
417 	fd_set *exceptfds;
418 	struct timeval *timeout;
419 };
420 
421 int
422 linux_select(struct proc *p, struct linux_select_args *args)
423 {
424     struct linux_select_argv linux_args;
425     struct linux_newselect_args newsel;
426     int error;
427 
428 #ifdef SELECT_DEBUG
429     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
430 #endif
431     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
432 			sizeof(linux_args))))
433 	return error;
434 
435     newsel.nfds = linux_args.nfds;
436     newsel.readfds = linux_args.readfds;
437     newsel.writefds = linux_args.writefds;
438     newsel.exceptfds = linux_args.exceptfds;
439     newsel.timeout = linux_args.timeout;
440 
441     return linux_newselect(p, &newsel);
442 }
443 
444 int
445 linux_newselect(struct proc *p, struct linux_newselect_args *args)
446 {
447     struct select_args bsa;
448     struct timeval tv0, tv1, utv, *tvp;
449     caddr_t sg;
450     int error;
451 
452 #ifdef DEBUG
453     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
454   	(long)p->p_pid, args->nfds, (void *)args->readfds,
455 	(void *)args->writefds, (void *)args->exceptfds,
456 	(void *)args->timeout);
457 #endif
458     error = 0;
459     bsa.nd = args->nfds;
460     bsa.in = args->readfds;
461     bsa.ou = args->writefds;
462     bsa.ex = args->exceptfds;
463     bsa.tv = args->timeout;
464 
465     /*
466      * Store current time for computation of the amount of
467      * time left.
468      */
469     if (args->timeout) {
470 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
471 	    goto select_out;
472 #ifdef DEBUG
473 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
474 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
475 #endif
476 	if (itimerfix(&utv)) {
477 	    /*
478 	     * The timeval was invalid.  Convert it to something
479 	     * valid that will act as it does under Linux.
480 	     */
481 	    sg = stackgap_init();
482 	    tvp = stackgap_alloc(&sg, sizeof(utv));
483 	    utv.tv_sec += utv.tv_usec / 1000000;
484 	    utv.tv_usec %= 1000000;
485 	    if (utv.tv_usec < 0) {
486 		utv.tv_sec -= 1;
487 		utv.tv_usec += 1000000;
488 	    }
489 	    if (utv.tv_sec < 0)
490 		timevalclear(&utv);
491 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
492 		goto select_out;
493 	    bsa.tv = tvp;
494 	}
495 	microtime(&tv0);
496     }
497 
498     error = select(p, &bsa);
499 #ifdef DEBUG
500     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
501 #endif
502 
503     if (error) {
504 	/*
505 	 * See fs/select.c in the Linux kernel.  Without this,
506 	 * Maelstrom doesn't work.
507 	 */
508 	if (error == ERESTART)
509 	    error = EINTR;
510 	goto select_out;
511     }
512 
513     if (args->timeout) {
514 	if (p->p_retval[0]) {
515 	    /*
516 	     * Compute how much time was left of the timeout,
517 	     * by subtracting the current time and the time
518 	     * before we started the call, and subtracting
519 	     * that result from the user-supplied value.
520 	     */
521 	    microtime(&tv1);
522 	    timevalsub(&tv1, &tv0);
523 	    timevalsub(&utv, &tv1);
524 	    if (utv.tv_sec < 0)
525 		timevalclear(&utv);
526 	} else
527 	    timevalclear(&utv);
528 #ifdef DEBUG
529 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
530 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
531 #endif
532 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
533 	    goto select_out;
534     }
535 
536 select_out:
537 #ifdef DEBUG
538     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
539 #endif
540     return error;
541 }
542 
543 int
544 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
545 {
546     struct proc *curp;
547 
548 #ifdef DEBUG
549     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
550 #endif
551     if (args->pid != p->p_pid) {
552 	if (!(curp = pfind(args->pid)))
553 	    return ESRCH;
554     }
555     else
556 	curp = p;
557     p->p_retval[0] = curp->p_pgid;
558     return 0;
559 }
560 
561 int
562 linux_fork(struct proc *p, struct linux_fork_args *args)
563 {
564     int error;
565 
566 #ifdef DEBUG
567     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
568 #endif
569     if ((error = fork(p, (struct fork_args *)args)) != 0)
570 	return error;
571     if (p->p_retval[1] == 1)
572 	p->p_retval[0] = 0;
573     return 0;
574 }
575 
576 int
577 linux_vfork(struct proc *p, struct linux_vfork_args *args)
578 {
579 	int error;
580 
581 #ifdef DEBUG
582 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
583 #endif
584 
585 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
586 		return error;
587 	/* Are we the child? */
588 	if (p->p_retval[1] == 1)
589 		p->p_retval[0] = 0;
590 	return 0;
591 }
592 
593 #define CLONE_VM	0x100
594 #define CLONE_FS	0x200
595 #define CLONE_FILES	0x400
596 #define CLONE_SIGHAND	0x800
597 #define CLONE_PID	0x1000
598 
599 int
600 linux_clone(struct proc *p, struct linux_clone_args *args)
601 {
602     int error, ff = RFPROC;
603     struct proc *p2;
604     int            exit_signal;
605     vm_offset_t    start;
606     struct rfork_args rf_args;
607 
608 #ifdef DEBUG
609     if (args->flags & CLONE_PID)
610 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
611 	       (long)p->p_pid);
612     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
613 	   (long)p->p_pid, (unsigned int)args->flags,
614 	   (unsigned int)args->stack);
615 #endif
616 
617     if (!args->stack)
618         return (EINVAL);
619 
620     exit_signal = args->flags & 0x000000ff;
621     if (exit_signal >= LINUX_NSIG)
622 	return EINVAL;
623     exit_signal = linux_to_bsd_signal[exit_signal];
624 
625     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
626     ff |= RFTHREAD;
627 
628     if (args->flags & CLONE_VM)
629 	ff |= RFMEM;
630     if (args->flags & CLONE_SIGHAND)
631 	ff |= RFSIGSHARE;
632     if (!(args->flags & CLONE_FILES))
633 	ff |= RFFDG;
634 
635     error = 0;
636     start = 0;
637 
638     rf_args.flags = ff;
639     if ((error = rfork(p, &rf_args)) != 0)
640 	return error;
641 
642     p2 = pfind(p->p_retval[0]);
643     if (p2 == 0)
644  	return ESRCH;
645 
646     p2->p_sigparent = exit_signal;
647     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
648 
649 #ifdef DEBUG
650     printf ("linux_clone(%ld): successful rfork to %ld\n",
651 	    (long)p->p_pid, (long)p2->p_pid);
652 #endif
653     return 0;
654 }
655 
656 /* XXX move */
657 struct linux_mmap_argv {
658 	linux_caddr_t addr;
659 	int len;
660 	int prot;
661 	int flags;
662 	int fd;
663 	int pos;
664 };
665 
666 #define STACK_SIZE  (2 * 1024 * 1024)
667 #define GUARD_SIZE  (4 * PAGE_SIZE)
668 int
669 linux_mmap(struct proc *p, struct linux_mmap_args *args)
670 {
671     struct mmap_args /* {
672 	caddr_t addr;
673 	size_t len;
674 	int prot;
675 	int flags;
676 	int fd;
677 	long pad;
678 	off_t pos;
679     } */ bsd_args;
680     int error;
681     struct linux_mmap_argv linux_args;
682 
683     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
684 			sizeof(linux_args))))
685 	return error;
686 #ifdef DEBUG
687     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
688 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
689 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
690 #endif
691     bsd_args.flags = 0;
692     if (linux_args.flags & LINUX_MAP_SHARED)
693 	bsd_args.flags |= MAP_SHARED;
694     if (linux_args.flags & LINUX_MAP_PRIVATE)
695 	bsd_args.flags |= MAP_PRIVATE;
696     if (linux_args.flags & LINUX_MAP_FIXED)
697 	bsd_args.flags |= MAP_FIXED;
698     if (linux_args.flags & LINUX_MAP_ANON)
699 	bsd_args.flags |= MAP_ANON;
700     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
701 	bsd_args.flags |= MAP_STACK;
702 
703 	/* The linux MAP_GROWSDOWN option does not limit auto
704 	 * growth of the region.  Linux mmap with this option
705 	 * takes as addr the inital BOS, and as len, the initial
706 	 * region size.  It can then grow down from addr without
707 	 * limit.  However, linux threads has an implicit internal
708 	 * limit to stack size of STACK_SIZE.  Its just not
709 	 * enforced explicitly in linux.  But, here we impose
710 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
711 	 * region, since we can do this with our mmap.
712 	 *
713 	 * Our mmap with MAP_STACK takes addr as the maximum
714 	 * downsize limit on BOS, and as len the max size of
715 	 * the region.  It them maps the top SGROWSIZ bytes,
716 	 * and autgrows the region down, up to the limit
717 	 * in addr.
718 	 *
719 	 * If we don't use the MAP_STACK option, the effect
720 	 * of this code is to allocate a stack region of a
721 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
722 	 */
723 
724 	/* This gives us TOS */
725 	bsd_args.addr = linux_args.addr + linux_args.len;
726 
727 	/* This gives us our maximum stack size */
728 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
729 	    bsd_args.len = linux_args.len;
730 	else
731 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
732 
733 	/* This gives us a new BOS.  If we're using VM_STACK, then
734 	 * mmap will just map the top SGROWSIZ bytes, and let
735 	 * the stack grow down to the limit at BOS.  If we're
736 	 * not using VM_STACK we map the full stack, since we
737 	 * don't have a way to autogrow it.
738 	 */
739 	bsd_args.addr -= bsd_args.len;
740 
741     } else {
742 	bsd_args.addr = linux_args.addr;
743 	bsd_args.len  = linux_args.len;
744     }
745 
746     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
747     bsd_args.fd = linux_args.fd;
748     bsd_args.pos = linux_args.pos;
749     bsd_args.pad = 0;
750     return mmap(p, &bsd_args);
751 }
752 
753 int
754 linux_mremap(struct proc *p, struct linux_mremap_args *args)
755 {
756 	struct munmap_args /* {
757 		void *addr;
758 		size_t len;
759 	} */ bsd_args;
760 	int error = 0;
761 
762 #ifdef DEBUG
763 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
764 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
765 	    args->flags);
766 #endif
767 	args->new_len = round_page(args->new_len);
768 	args->old_len = round_page(args->old_len);
769 
770 	if (args->new_len > args->old_len) {
771 		p->p_retval[0] = 0;
772 		return ENOMEM;
773 	}
774 
775 	if (args->new_len < args->old_len) {
776 		bsd_args.addr = args->addr + args->new_len;
777 		bsd_args.len = args->old_len - args->new_len;
778 		error = munmap(p, &bsd_args);
779 	}
780 
781 	p->p_retval[0] = error ? 0 : (int)args->addr;
782 	return error;
783 }
784 
785 int
786 linux_msync(struct proc *p, struct linux_msync_args *args)
787 {
788 	struct msync_args bsd_args;
789 
790 	bsd_args.addr = args->addr;
791 	bsd_args.len = args->len;
792 	bsd_args.flags = 0;	/* XXX ignore */
793 
794 	return msync(p, &bsd_args);
795 }
796 
797 int
798 linux_pipe(struct proc *p, struct linux_pipe_args *args)
799 {
800     int error;
801     int reg_edx;
802 
803 #ifdef DEBUG
804     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
805 #endif
806     reg_edx = p->p_retval[1];
807     error = pipe(p, 0);
808     if (error) {
809 	p->p_retval[1] = reg_edx;
810 	return error;
811     }
812 
813     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
814     if (error) {
815 	p->p_retval[1] = reg_edx;
816 	return error;
817     }
818 
819     p->p_retval[1] = reg_edx;
820     p->p_retval[0] = 0;
821     return 0;
822 }
823 
824 int
825 linux_time(struct proc *p, struct linux_time_args *args)
826 {
827     struct timeval tv;
828     linux_time_t tm;
829     int error;
830 
831 #ifdef DEBUG
832     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
833 #endif
834     microtime(&tv);
835     tm = tv.tv_sec;
836     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
837 	return error;
838     p->p_retval[0] = tm;
839     return 0;
840 }
841 
842 struct linux_times_argv {
843     long    tms_utime;
844     long    tms_stime;
845     long    tms_cutime;
846     long    tms_cstime;
847 };
848 
849 #define CLK_TCK 100	/* Linux uses 100 */
850 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
851 
852 int
853 linux_times(struct proc *p, struct linux_times_args *args)
854 {
855     struct timeval tv;
856     struct linux_times_argv tms;
857     struct rusage ru;
858     int error;
859 
860 #ifdef DEBUG
861     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
862 #endif
863     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
864 
865     tms.tms_utime = CONVTCK(ru.ru_utime);
866     tms.tms_stime = CONVTCK(ru.ru_stime);
867 
868     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
869     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
870 
871     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
872 	    	    sizeof(struct linux_times_argv))))
873 	return error;
874 
875     microuptime(&tv);
876     p->p_retval[0] = (int)CONVTCK(tv);
877     return 0;
878 }
879 
880 int
881 linux_newuname(struct proc *p, struct linux_newuname_args *args)
882 {
883 	struct linux_new_utsname utsname;
884 	char *osrelease, *osname;
885 
886 #ifdef DEBUG
887 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
888 #endif
889 
890 	osname = linux_get_osname(p);
891 	osrelease = linux_get_osrelease(p);
892 
893 	bzero(&utsname, sizeof(struct linux_new_utsname));
894 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
895 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
896 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
897 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
898 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
899 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
900 
901 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
902 			sizeof(struct linux_new_utsname)));
903 }
904 
905 struct linux_utimbuf {
906 	linux_time_t l_actime;
907 	linux_time_t l_modtime;
908 };
909 
910 int
911 linux_utime(struct proc *p, struct linux_utime_args *args)
912 {
913     struct utimes_args /* {
914 	char	*path;
915 	struct	timeval *tptr;
916     } */ bsdutimes;
917     struct timeval tv[2], *tvp;
918     struct linux_utimbuf lut;
919     int error;
920     caddr_t sg;
921 
922     sg = stackgap_init();
923     CHECKALTEXIST(p, &sg, args->fname);
924 
925 #ifdef DEBUG
926     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
927 #endif
928     if (args->times) {
929 	if ((error = copyin(args->times, &lut, sizeof lut)))
930 	    return error;
931 	tv[0].tv_sec = lut.l_actime;
932 	tv[0].tv_usec = 0;
933 	tv[1].tv_sec = lut.l_modtime;
934 	tv[1].tv_usec = 0;
935 	/* so that utimes can copyin */
936 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
937 	if ((error = copyout(tv, tvp, sizeof(tv))))
938 	    return error;
939 	bsdutimes.tptr = tvp;
940     } else
941 	bsdutimes.tptr = NULL;
942 
943     bsdutimes.path = args->fname;
944     return utimes(p, &bsdutimes);
945 }
946 
947 #define __WCLONE 0x80000000
948 
949 int
950 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
951 {
952     struct wait_args /* {
953 	int pid;
954 	int *status;
955 	int options;
956 	struct	rusage *rusage;
957     } */ tmp;
958     int error, tmpstat;
959 
960 #ifdef DEBUG
961     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
962 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
963 #endif
964     tmp.pid = args->pid;
965     tmp.status = args->status;
966     tmp.options = (args->options & (WNOHANG | WUNTRACED));
967     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
968     if (args->options & __WCLONE)
969 	tmp.options |= WLINUXCLONE;
970     tmp.rusage = NULL;
971 
972     if ((error = wait4(p, &tmp)) != 0)
973 	return error;
974 
975     if (args->status) {
976 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
977 	    return error;
978 	if (WIFSIGNALED(tmpstat))
979 	    tmpstat = (tmpstat & 0xffffff80) |
980 		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
981 	else if (WIFSTOPPED(tmpstat))
982 	    tmpstat = (tmpstat & 0xffff00ff) |
983 		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
984 	return copyout(&tmpstat, args->status, sizeof(int));
985     } else
986 	return 0;
987 }
988 
989 int
990 linux_wait4(struct proc *p, struct linux_wait4_args *args)
991 {
992     struct wait_args /* {
993 	int pid;
994 	int *status;
995 	int options;
996 	struct	rusage *rusage;
997     } */ tmp;
998     int error, tmpstat;
999 
1000 #ifdef DEBUG
1001     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1002 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1003 	(void *)args->rusage);
1004 #endif
1005     tmp.pid = args->pid;
1006     tmp.status = args->status;
1007     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1008     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1009     if (args->options & __WCLONE)
1010 	tmp.options |= WLINUXCLONE;
1011     tmp.rusage = args->rusage;
1012 
1013     if ((error = wait4(p, &tmp)) != 0)
1014 	return error;
1015 
1016     p->p_siglist &= ~sigmask(SIGCHLD);
1017 
1018     if (args->status) {
1019 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1020 	    return error;
1021 	if (WIFSIGNALED(tmpstat))
1022 	    tmpstat = (tmpstat & 0xffffff80) |
1023 		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1024 	else if (WIFSTOPPED(tmpstat))
1025 	    tmpstat = (tmpstat & 0xffff00ff) |
1026 		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1027 	return copyout(&tmpstat, args->status, sizeof(int));
1028     } else
1029 	return 0;
1030 }
1031 
1032 int
1033 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1034 {
1035 	caddr_t sg;
1036 	struct mknod_args bsd_mknod;
1037 	struct mkfifo_args bsd_mkfifo;
1038 
1039 	sg = stackgap_init();
1040 
1041 	CHECKALTCREAT(p, &sg, args->path);
1042 
1043 #ifdef DEBUG
1044 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1045 	   (long)p->p_pid, args->path, args->mode, args->dev);
1046 #endif
1047 
1048 	if (args->mode & S_IFIFO) {
1049 		bsd_mkfifo.path = args->path;
1050 		bsd_mkfifo.mode = args->mode;
1051 		return mkfifo(p, &bsd_mkfifo);
1052 	} else {
1053 		bsd_mknod.path = args->path;
1054 		bsd_mknod.mode = args->mode;
1055 		bsd_mknod.dev = args->dev;
1056 		return mknod(p, &bsd_mknod);
1057 	}
1058 }
1059 
1060 /*
1061  * UGH! This is just about the dumbest idea I've ever heard!!
1062  */
1063 int
1064 linux_personality(struct proc *p, struct linux_personality_args *args)
1065 {
1066 #ifdef DEBUG
1067 	printf("Linux-emul(%ld): personality(%d)\n",
1068 	   (long)p->p_pid, args->per);
1069 #endif
1070 	if (args->per != 0)
1071 		return EINVAL;
1072 
1073 	/* Yes Jim, it's still a Linux... */
1074 	p->p_retval[0] = 0;
1075 	return 0;
1076 }
1077 
1078 /*
1079  * Wrappers for get/setitimer for debugging..
1080  */
1081 int
1082 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1083 {
1084 	struct setitimer_args bsa;
1085 	struct itimerval foo;
1086 	int error;
1087 
1088 #ifdef DEBUG
1089 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1090 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1091 #endif
1092 	bsa.which = args->which;
1093 	bsa.itv = args->itv;
1094 	bsa.oitv = args->oitv;
1095 	if (args->itv) {
1096 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1097 			sizeof(foo))))
1098 		return error;
1099 #ifdef DEBUG
1100 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1101 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1102 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1103 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1104 #endif
1105 	}
1106 	return setitimer(p, &bsa);
1107 }
1108 
1109 int
1110 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1111 {
1112 	struct getitimer_args bsa;
1113 #ifdef DEBUG
1114 	printf("Linux-emul(%ld): getitimer(%p)\n",
1115 	    (long)p->p_pid, (void *)args->itv);
1116 #endif
1117 	bsa.which = args->which;
1118 	bsa.itv = args->itv;
1119 	return getitimer(p, &bsa);
1120 }
1121 
1122 int
1123 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1124 {
1125 	int error;
1126 
1127 	error = suser(p);
1128 	if (error != 0)
1129 		return error;
1130 	if (securelevel > 0)
1131 		return EPERM;
1132 	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1133 	return 0;
1134 }
1135 
1136 int
1137 linux_nice(struct proc *p, struct linux_nice_args *args)
1138 {
1139 	struct setpriority_args	bsd_args;
1140 
1141 	bsd_args.which = PRIO_PROCESS;
1142 	bsd_args.who = 0;	/* current process */
1143 	bsd_args.prio = args->inc;
1144 	return setpriority(p, &bsd_args);
1145 }
1146 
1147 int
1148 linux_setgroups(p, uap)
1149 	struct proc *p;
1150 	struct linux_setgroups_args *uap;
1151 {
1152 	struct pcred *pc;
1153 	linux_gid_t linux_gidset[NGROUPS];
1154 	gid_t *bsd_gidset;
1155 	int ngrp, error;
1156 
1157 	pc = p->p_cred;
1158 	ngrp = uap->gidsetsize;
1159 
1160 	/*
1161 	 * cr_groups[0] holds egid. Setting the whole set from
1162 	 * the supplied set will cause egid to be changed too.
1163 	 * Keep cr_groups[0] unchanged to prevent that.
1164 	 */
1165 
1166 	if ((error = suser(p)) != 0)
1167 		return (error);
1168 
1169 	if (ngrp >= NGROUPS)
1170 		return (EINVAL);
1171 
1172 	pc->pc_ucred = crcopy(pc->pc_ucred);
1173 	if (ngrp > 0) {
1174 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1175 			       ngrp * sizeof(linux_gid_t));
1176 		if (error)
1177 			return (error);
1178 
1179 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1180 
1181 		bsd_gidset = pc->pc_ucred->cr_groups;
1182 		ngrp--;
1183 		while (ngrp >= 0) {
1184 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1185 			ngrp--;
1186 		}
1187 	}
1188 	else
1189 		pc->pc_ucred->cr_ngroups = 1;
1190 
1191 	setsugid(p);
1192 	return (0);
1193 }
1194 
1195 int
1196 linux_getgroups(p, uap)
1197 	struct proc *p;
1198 	struct linux_getgroups_args *uap;
1199 {
1200 	struct pcred *pc;
1201 	linux_gid_t linux_gidset[NGROUPS];
1202 	gid_t *bsd_gidset;
1203 	int bsd_gidsetsz, ngrp, error;
1204 
1205 	pc = p->p_cred;
1206 	bsd_gidset = pc->pc_ucred->cr_groups;
1207 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1208 
1209 	/*
1210 	 * cr_groups[0] holds egid. Returning the whole set
1211 	 * here will cause a duplicate. Exclude cr_groups[0]
1212 	 * to prevent that.
1213 	 */
1214 
1215 	if ((ngrp = uap->gidsetsize) == 0) {
1216 		p->p_retval[0] = bsd_gidsetsz;
1217 		return (0);
1218 	}
1219 
1220 	if (ngrp < bsd_gidsetsz)
1221 		return (EINVAL);
1222 
1223 	ngrp = 0;
1224 	while (ngrp < bsd_gidsetsz) {
1225 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1226 		ngrp++;
1227 	}
1228 
1229 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1230 	    ngrp * sizeof(linux_gid_t))))
1231 		return (error);
1232 
1233 	p->p_retval[0] = ngrp;
1234 	return (0);
1235 }
1236 
1237 int
1238 linux_setrlimit(p, uap)
1239      struct proc *p;
1240      struct linux_setrlimit_args *uap;
1241 {
1242     struct osetrlimit_args bsd;
1243 
1244 #ifdef DEBUG
1245     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1246 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1247 #endif
1248 
1249     if (uap->resource >= LINUX_RLIM_NLIMITS)
1250 	return EINVAL;
1251 
1252     bsd.which = linux_to_bsd_resource[uap->resource];
1253 
1254     if (bsd.which == -1)
1255 	return EINVAL;
1256 
1257     bsd.rlp = uap->rlim;
1258     return osetrlimit(p, &bsd);
1259 }
1260 
1261 int
1262 linux_getrlimit(p, uap)
1263      struct proc *p;
1264      struct linux_getrlimit_args *uap;
1265 {
1266     struct ogetrlimit_args bsd;
1267 
1268 #ifdef DEBUG
1269     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1270 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1271 #endif
1272 
1273     if (uap->resource >= LINUX_RLIM_NLIMITS)
1274 	return EINVAL;
1275 
1276     bsd.which = linux_to_bsd_resource[uap->resource];
1277 
1278     if (bsd.which == -1)
1279 	return EINVAL;
1280 
1281     bsd.rlp = uap->rlim;
1282     return ogetrlimit(p, &bsd);
1283 }
1284 
1285 int
1286 linux_sched_setscheduler(p, uap)
1287 	struct proc *p;
1288 	struct linux_sched_setscheduler_args *uap;
1289 {
1290 	struct sched_setscheduler_args bsd;
1291 
1292 #ifdef DEBUG
1293 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1294 	       (long)p->p_pid, uap->pid, uap->policy, (void *)uap->param);
1295 #endif
1296 
1297 	switch (uap->policy) {
1298 	case LINUX_SCHED_OTHER:
1299 		bsd.policy = SCHED_OTHER;
1300 		break;
1301 	case LINUX_SCHED_FIFO:
1302 		bsd.policy = SCHED_FIFO;
1303 		break;
1304 	case LINUX_SCHED_RR:
1305 		bsd.policy = SCHED_RR;
1306 		break;
1307 	default:
1308 		return EINVAL;
1309 	}
1310 
1311 	bsd.pid = uap->pid;
1312 	bsd.param = uap->param;
1313 	return sched_setscheduler(p, &bsd);
1314 }
1315 
1316 int
1317 linux_sched_getscheduler(p, uap)
1318 	struct proc *p;
1319 	struct linux_sched_getscheduler_args *uap;
1320 {
1321 	struct sched_getscheduler_args bsd;
1322 	int error;
1323 
1324 #ifdef DEBUG
1325 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1326 	       (long)p->p_pid, uap->pid);
1327 #endif
1328 
1329 	bsd.pid = uap->pid;
1330 	error = sched_getscheduler(p, &bsd);
1331 
1332 	switch (p->p_retval[0]) {
1333 	case SCHED_OTHER:
1334 		p->p_retval[0] = LINUX_SCHED_OTHER;
1335 		break;
1336 	case SCHED_FIFO:
1337 		p->p_retval[0] = LINUX_SCHED_FIFO;
1338 		break;
1339 	case SCHED_RR:
1340 		p->p_retval[0] = LINUX_SCHED_RR;
1341 		break;
1342 	}
1343 
1344 	return error;
1345 }
1346