xref: /freebsd/sys/compat/linux/linux_misc.c (revision c1462236787ec09d00d5e2d222edc3e34bce1e69)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 #include <machine/sysarch.h>
61 #include <machine/segments.h>
62 
63 #include <i386/linux/linux.h>
64 #include <i386/linux/linux_proto.h>
65 #include <i386/linux/linux_util.h>
66 #include <i386/linux/linux_mib.h>
67 
68 #include <posix4/sched.h>
69 
70 #define BSD_TO_LINUX_SIGNAL(sig)	\
71 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
72 
73 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
74 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
75   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
76   RLIMIT_MEMLOCK, -1
77 };
78 
79 int
80 linux_alarm(struct proc *p, struct linux_alarm_args *args)
81 {
82     struct itimerval it, old_it;
83     struct timeval tv;
84     int s;
85 
86 #ifdef DEBUG
87     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
88 #endif
89     if (args->secs > 100000000)
90 	return EINVAL;
91     it.it_value.tv_sec = (long)args->secs;
92     it.it_value.tv_usec = 0;
93     it.it_interval.tv_sec = 0;
94     it.it_interval.tv_usec = 0;
95     s = splsoftclock();
96     old_it = p->p_realtimer;
97     getmicrouptime(&tv);
98     if (timevalisset(&old_it.it_value))
99 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
100     if (it.it_value.tv_sec != 0) {
101 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
102 	timevaladd(&it.it_value, &tv);
103     }
104     p->p_realtimer = it;
105     splx(s);
106     if (timevalcmp(&old_it.it_value, &tv, >)) {
107 	timevalsub(&old_it.it_value, &tv);
108 	if (old_it.it_value.tv_usec != 0)
109 	    old_it.it_value.tv_sec++;
110 	p->p_retval[0] = old_it.it_value.tv_sec;
111     }
112     return 0;
113 }
114 
115 int
116 linux_brk(struct proc *p, struct linux_brk_args *args)
117 {
118 #if 0
119     struct vmspace *vm = p->p_vmspace;
120     vm_offset_t new, old;
121     int error;
122 
123     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
124 	return EINVAL;
125     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
126 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
127 	return ENOMEM;
128 
129     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
130     new = round_page((vm_offset_t)args->dsend);
131     p->p_retval[0] = old;
132     if ((new-old) > 0) {
133 	if (swap_pager_full)
134 	    return ENOMEM;
135 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
136 			VM_PROT_ALL, VM_PROT_ALL, 0);
137 	if (error)
138 	    return error;
139 	vm->vm_dsize += btoc((new-old));
140 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
141     }
142     return 0;
143 #else
144     struct vmspace *vm = p->p_vmspace;
145     vm_offset_t new, old;
146     struct obreak_args /* {
147 	char * nsize;
148     } */ tmp;
149 
150 #ifdef DEBUG
151     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
152 #endif
153     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
154     new = (vm_offset_t)args->dsend;
155     tmp.nsize = (char *) new;
156     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
157 	p->p_retval[0] = (int)new;
158     else
159 	p->p_retval[0] = (int)old;
160 
161     return 0;
162 #endif
163 }
164 
165 int
166 linux_uselib(struct proc *p, struct linux_uselib_args *args)
167 {
168     struct nameidata ni;
169     struct vnode *vp;
170     struct exec *a_out;
171     struct vattr attr;
172     vm_offset_t vmaddr;
173     unsigned long file_offset;
174     vm_offset_t buffer;
175     unsigned long bss_size;
176     int error;
177     caddr_t sg;
178     int locked;
179 
180     sg = stackgap_init();
181     CHECKALTEXIST(p, &sg, args->library);
182 
183 #ifdef DEBUG
184     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
185 #endif
186 
187     a_out = NULL;
188     locked = 0;
189     vp = NULL;
190 
191     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
192     error = namei(&ni);
193     if (error)
194 	goto cleanup;
195 
196     vp = ni.ni_vp;
197     /*
198      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
199      * without returning a vnode.
200      */
201     if (vp == NULL) {
202 	error = ENOEXEC;	/* ?? */
203 	goto cleanup;
204     }
205     NDFREE(&ni, NDF_ONLY_PNBUF);
206 
207     /*
208      * From here on down, we have a locked vnode that must be unlocked.
209      */
210     locked++;
211 
212     /*
213      * Writable?
214      */
215     if (vp->v_writecount) {
216 	error = ETXTBSY;
217 	goto cleanup;
218     }
219 
220     /*
221      * Executable?
222      */
223     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
224     if (error)
225 	goto cleanup;
226 
227     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
228 	((attr.va_mode & 0111) == 0) ||
229 	(attr.va_type != VREG)) {
230 	    error = ENOEXEC;
231 	    goto cleanup;
232     }
233 
234     /*
235      * Sensible size?
236      */
237     if (attr.va_size == 0) {
238 	error = ENOEXEC;
239 	goto cleanup;
240     }
241 
242     /*
243      * Can we access it?
244      */
245     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
246     if (error)
247 	goto cleanup;
248 
249     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
250     if (error)
251 	goto cleanup;
252 
253     /*
254      * Lock no longer needed
255      */
256     VOP_UNLOCK(vp, 0, p);
257     locked = 0;
258 
259     /*
260      * Pull in executable header into kernel_map
261      */
262     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
263 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
264     if (error)
265 	goto cleanup;
266 
267     /*
268      * Is it a Linux binary ?
269      */
270     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
271 	error = ENOEXEC;
272 	goto cleanup;
273     }
274 
275     /* While we are here, we should REALLY do some more checks */
276 
277     /*
278      * Set file/virtual offset based on a.out variant.
279      */
280     switch ((int)(a_out->a_magic & 0xffff)) {
281     case 0413:	/* ZMAGIC */
282 	file_offset = 1024;
283 	break;
284     case 0314:	/* QMAGIC */
285 	file_offset = 0;
286 	break;
287     default:
288 	error = ENOEXEC;
289 	goto cleanup;
290     }
291 
292     bss_size = round_page(a_out->a_bss);
293 
294     /*
295      * Check various fields in header for validity/bounds.
296      */
297     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
298 	error = ENOEXEC;
299 	goto cleanup;
300     }
301 
302     /* text + data can't exceed file size */
303     if (a_out->a_data + a_out->a_text > attr.va_size) {
304 	error = EFAULT;
305 	goto cleanup;
306     }
307 
308     /*
309      * text/data/bss must not exceed limits
310      * XXX: this is not complete. it should check current usage PLUS
311      * the resources needed by this library.
312      */
313     if (a_out->a_text > MAXTSIZ ||
314 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
315 	error = ENOMEM;
316 	goto cleanup;
317     }
318 
319     /*
320      * prevent more writers
321      */
322     vp->v_flag |= VTEXT;
323 
324     /*
325      * Check if file_offset page aligned,.
326      * Currently we cannot handle misalinged file offsets,
327      * and so we read in the entire image (what a waste).
328      */
329     if (file_offset & PAGE_MASK) {
330 #ifdef DEBUG
331 printf("uselib: Non page aligned binary %lu\n", file_offset);
332 #endif
333 	/*
334 	 * Map text+data read/write/execute
335 	 */
336 
337 	/* a_entry is the load address and is page aligned */
338 	vmaddr = trunc_page(a_out->a_entry);
339 
340 	/* get anon user mapping, read+write+execute */
341 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
342 		    	    a_out->a_text + a_out->a_data, FALSE,
343 			    VM_PROT_ALL, VM_PROT_ALL, 0);
344 	if (error)
345 	    goto cleanup;
346 
347 	/* map file into kernel_map */
348 	error = vm_mmap(kernel_map, &buffer,
349 			round_page(a_out->a_text + a_out->a_data + file_offset),
350 		   	VM_PROT_READ, VM_PROT_READ, 0,
351 			(caddr_t)vp, trunc_page(file_offset));
352 	if (error)
353 	    goto cleanup;
354 
355 	/* copy from kernel VM space to user space */
356 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
357 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
358 
359 	/* release temporary kernel space */
360 	vm_map_remove(kernel_map, buffer,
361 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
362 
363 	if (error)
364 	    goto cleanup;
365     }
366     else {
367 #ifdef DEBUG
368 printf("uselib: Page aligned binary %lu\n", file_offset);
369 #endif
370 	/*
371 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
372 	 * to skip the executable header
373 	 */
374 	vmaddr = trunc_page(a_out->a_entry);
375 
376 	/*
377 	 * Map it all into the process's space as a single copy-on-write
378 	 * "data" segment.
379 	 */
380 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
381 		   	a_out->a_text + a_out->a_data,
382 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
383 			(caddr_t)vp, file_offset);
384 	if (error)
385 	    goto cleanup;
386     }
387 #ifdef DEBUG
388 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
389 #endif
390     if (bss_size != 0) {
391         /*
392 	 * Calculate BSS start address
393 	 */
394 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
395 
396 	/*
397 	 * allocate some 'anon' space
398 	 */
399 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
400 			    bss_size, FALSE,
401 			    VM_PROT_ALL, VM_PROT_ALL, 0);
402 	if (error)
403 	    goto cleanup;
404     }
405 
406 cleanup:
407     /*
408      * Unlock vnode if needed
409      */
410     if (locked)
411 	VOP_UNLOCK(vp, 0, p);
412 
413     /*
414      * Release the kernel mapping.
415      */
416     if (a_out)
417 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
418 
419     return error;
420 }
421 
422 /* XXX move */
423 struct linux_select_argv {
424 	int nfds;
425 	fd_set *readfds;
426 	fd_set *writefds;
427 	fd_set *exceptfds;
428 	struct timeval *timeout;
429 };
430 
431 int
432 linux_select(struct proc *p, struct linux_select_args *args)
433 {
434     struct linux_select_argv linux_args;
435     struct linux_newselect_args newsel;
436     int error;
437 
438 #ifdef SELECT_DEBUG
439     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
440 #endif
441     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
442 			sizeof(linux_args))))
443 	return error;
444 
445     newsel.nfds = linux_args.nfds;
446     newsel.readfds = linux_args.readfds;
447     newsel.writefds = linux_args.writefds;
448     newsel.exceptfds = linux_args.exceptfds;
449     newsel.timeout = linux_args.timeout;
450 
451     return linux_newselect(p, &newsel);
452 }
453 
454 int
455 linux_newselect(struct proc *p, struct linux_newselect_args *args)
456 {
457     struct select_args bsa;
458     struct timeval tv0, tv1, utv, *tvp;
459     caddr_t sg;
460     int error;
461 
462 #ifdef DEBUG
463     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
464   	(long)p->p_pid, args->nfds, (void *)args->readfds,
465 	(void *)args->writefds, (void *)args->exceptfds,
466 	(void *)args->timeout);
467 #endif
468     error = 0;
469     bsa.nd = args->nfds;
470     bsa.in = args->readfds;
471     bsa.ou = args->writefds;
472     bsa.ex = args->exceptfds;
473     bsa.tv = args->timeout;
474 
475     /*
476      * Store current time for computation of the amount of
477      * time left.
478      */
479     if (args->timeout) {
480 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
481 	    goto select_out;
482 #ifdef DEBUG
483 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
484 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
485 #endif
486 	if (itimerfix(&utv)) {
487 	    /*
488 	     * The timeval was invalid.  Convert it to something
489 	     * valid that will act as it does under Linux.
490 	     */
491 	    sg = stackgap_init();
492 	    tvp = stackgap_alloc(&sg, sizeof(utv));
493 	    utv.tv_sec += utv.tv_usec / 1000000;
494 	    utv.tv_usec %= 1000000;
495 	    if (utv.tv_usec < 0) {
496 		utv.tv_sec -= 1;
497 		utv.tv_usec += 1000000;
498 	    }
499 	    if (utv.tv_sec < 0)
500 		timevalclear(&utv);
501 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
502 		goto select_out;
503 	    bsa.tv = tvp;
504 	}
505 	microtime(&tv0);
506     }
507 
508     error = select(p, &bsa);
509 #ifdef DEBUG
510     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
511 #endif
512 
513     if (error) {
514 	/*
515 	 * See fs/select.c in the Linux kernel.  Without this,
516 	 * Maelstrom doesn't work.
517 	 */
518 	if (error == ERESTART)
519 	    error = EINTR;
520 	goto select_out;
521     }
522 
523     if (args->timeout) {
524 	if (p->p_retval[0]) {
525 	    /*
526 	     * Compute how much time was left of the timeout,
527 	     * by subtracting the current time and the time
528 	     * before we started the call, and subtracting
529 	     * that result from the user-supplied value.
530 	     */
531 	    microtime(&tv1);
532 	    timevalsub(&tv1, &tv0);
533 	    timevalsub(&utv, &tv1);
534 	    if (utv.tv_sec < 0)
535 		timevalclear(&utv);
536 	} else
537 	    timevalclear(&utv);
538 #ifdef DEBUG
539 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
540 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
541 #endif
542 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
543 	    goto select_out;
544     }
545 
546 select_out:
547 #ifdef DEBUG
548     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
549 #endif
550     return error;
551 }
552 
553 int
554 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
555 {
556     struct proc *curp;
557 
558 #ifdef DEBUG
559     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
560 #endif
561     if (args->pid != p->p_pid) {
562 	if (!(curp = pfind(args->pid)))
563 	    return ESRCH;
564     }
565     else
566 	curp = p;
567     p->p_retval[0] = curp->p_pgid;
568     return 0;
569 }
570 
571 int
572 linux_fork(struct proc *p, struct linux_fork_args *args)
573 {
574     int error;
575 
576 #ifdef DEBUG
577     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
578 #endif
579     if ((error = fork(p, (struct fork_args *)args)) != 0)
580 	return error;
581     if (p->p_retval[1] == 1)
582 	p->p_retval[0] = 0;
583     return 0;
584 }
585 
586 int
587 linux_vfork(struct proc *p, struct linux_vfork_args *args)
588 {
589 	int error;
590 
591 #ifdef DEBUG
592 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
593 #endif
594 
595 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
596 		return error;
597 	/* Are we the child? */
598 	if (p->p_retval[1] == 1)
599 		p->p_retval[0] = 0;
600 	return 0;
601 }
602 
603 #define CLONE_VM	0x100
604 #define CLONE_FS	0x200
605 #define CLONE_FILES	0x400
606 #define CLONE_SIGHAND	0x800
607 #define CLONE_PID	0x1000
608 
609 int
610 linux_clone(struct proc *p, struct linux_clone_args *args)
611 {
612     int error, ff = RFPROC;
613     struct proc *p2;
614     int            exit_signal;
615     vm_offset_t    start;
616     struct rfork_args rf_args;
617 
618 #ifdef DEBUG
619     if (args->flags & CLONE_PID)
620 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
621 	       (long)p->p_pid);
622     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
623 	   (long)p->p_pid, (unsigned int)args->flags,
624 	   (unsigned int)args->stack);
625 #endif
626 
627     if (!args->stack)
628         return (EINVAL);
629 
630     exit_signal = args->flags & 0x000000ff;
631     if (exit_signal >= LINUX_NSIG)
632 	return EINVAL;
633 
634     if (exit_signal <= LINUX_SIGTBLSZ)
635 	exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
636 
637     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
638     ff |= RFTHREAD;
639 
640     if (args->flags & CLONE_VM)
641 	ff |= RFMEM;
642     if (args->flags & CLONE_SIGHAND)
643 	ff |= RFSIGSHARE;
644     if (!(args->flags & CLONE_FILES))
645 	ff |= RFFDG;
646 
647     error = 0;
648     start = 0;
649 
650     rf_args.flags = ff;
651     if ((error = rfork(p, &rf_args)) != 0)
652 	return error;
653 
654     p2 = pfind(p->p_retval[0]);
655     if (p2 == 0)
656  	return ESRCH;
657 
658     p2->p_sigparent = exit_signal;
659     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
660 
661 #ifdef DEBUG
662     printf ("linux_clone(%ld): successful rfork to %ld\n",
663 	    (long)p->p_pid, (long)p2->p_pid);
664 #endif
665     return 0;
666 }
667 
668 /* XXX move */
669 struct linux_mmap_argv {
670 	linux_caddr_t addr;
671 	int len;
672 	int prot;
673 	int flags;
674 	int fd;
675 	int pos;
676 };
677 
678 #define STACK_SIZE  (2 * 1024 * 1024)
679 #define GUARD_SIZE  (4 * PAGE_SIZE)
680 int
681 linux_mmap(struct proc *p, struct linux_mmap_args *args)
682 {
683     struct mmap_args /* {
684 	caddr_t addr;
685 	size_t len;
686 	int prot;
687 	int flags;
688 	int fd;
689 	long pad;
690 	off_t pos;
691     } */ bsd_args;
692     int error;
693     struct linux_mmap_argv linux_args;
694 
695     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
696 			sizeof(linux_args))))
697 	return error;
698 #ifdef DEBUG
699     printf("Linux-emul(%ld): mmap(%p, %d, %d, 0x%08x, %d, %d)",
700 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
701 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
702 #endif
703     bsd_args.flags = 0;
704     if (linux_args.flags & LINUX_MAP_SHARED)
705 	bsd_args.flags |= MAP_SHARED;
706     if (linux_args.flags & LINUX_MAP_PRIVATE)
707 	bsd_args.flags |= MAP_PRIVATE;
708     if (linux_args.flags & LINUX_MAP_FIXED)
709 	bsd_args.flags |= MAP_FIXED;
710     if (linux_args.flags & LINUX_MAP_ANON)
711 	bsd_args.flags |= MAP_ANON;
712     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
713 	bsd_args.flags |= MAP_STACK;
714 
715 	/* The linux MAP_GROWSDOWN option does not limit auto
716 	 * growth of the region.  Linux mmap with this option
717 	 * takes as addr the inital BOS, and as len, the initial
718 	 * region size.  It can then grow down from addr without
719 	 * limit.  However, linux threads has an implicit internal
720 	 * limit to stack size of STACK_SIZE.  Its just not
721 	 * enforced explicitly in linux.  But, here we impose
722 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
723 	 * region, since we can do this with our mmap.
724 	 *
725 	 * Our mmap with MAP_STACK takes addr as the maximum
726 	 * downsize limit on BOS, and as len the max size of
727 	 * the region.  It them maps the top SGROWSIZ bytes,
728 	 * and autgrows the region down, up to the limit
729 	 * in addr.
730 	 *
731 	 * If we don't use the MAP_STACK option, the effect
732 	 * of this code is to allocate a stack region of a
733 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
734 	 */
735 
736 	/* This gives us TOS */
737 	bsd_args.addr = linux_args.addr + linux_args.len;
738 
739 	/* This gives us our maximum stack size */
740 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
741 	    bsd_args.len = linux_args.len;
742 	else
743 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
744 
745 	/* This gives us a new BOS.  If we're using VM_STACK, then
746 	 * mmap will just map the top SGROWSIZ bytes, and let
747 	 * the stack grow down to the limit at BOS.  If we're
748 	 * not using VM_STACK we map the full stack, since we
749 	 * don't have a way to autogrow it.
750 	 */
751 	bsd_args.addr -= bsd_args.len;
752 
753     } else {
754 	bsd_args.addr = linux_args.addr;
755 	bsd_args.len  = linux_args.len;
756     }
757 
758     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
759     if (linux_args.flags & LINUX_MAP_ANON)
760 	bsd_args.fd = -1;
761     else
762 	bsd_args.fd = linux_args.fd;
763     bsd_args.pos = linux_args.pos;
764     bsd_args.pad = 0;
765 #ifdef DEBUG
766     printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
767 	(void *)bsd_args.addr, bsd_args.len,
768 	bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
769 #endif
770     return mmap(p, &bsd_args);
771 }
772 
773 int
774 linux_mremap(struct proc *p, struct linux_mremap_args *args)
775 {
776 	struct munmap_args /* {
777 		void *addr;
778 		size_t len;
779 	} */ bsd_args;
780 	int error = 0;
781 
782 #ifdef DEBUG
783 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
784 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
785 	    args->flags);
786 #endif
787 	args->new_len = round_page(args->new_len);
788 	args->old_len = round_page(args->old_len);
789 
790 	if (args->new_len > args->old_len) {
791 		p->p_retval[0] = 0;
792 		return ENOMEM;
793 	}
794 
795 	if (args->new_len < args->old_len) {
796 		bsd_args.addr = args->addr + args->new_len;
797 		bsd_args.len = args->old_len - args->new_len;
798 		error = munmap(p, &bsd_args);
799 	}
800 
801 	p->p_retval[0] = error ? 0 : (int)args->addr;
802 	return error;
803 }
804 
805 int
806 linux_msync(struct proc *p, struct linux_msync_args *args)
807 {
808 	struct msync_args bsd_args;
809 
810 	bsd_args.addr = args->addr;
811 	bsd_args.len = args->len;
812 	bsd_args.flags = 0;	/* XXX ignore */
813 
814 	return msync(p, &bsd_args);
815 }
816 
817 int
818 linux_pipe(struct proc *p, struct linux_pipe_args *args)
819 {
820     int error;
821     int reg_edx;
822 
823 #ifdef DEBUG
824     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
825 #endif
826     reg_edx = p->p_retval[1];
827     error = pipe(p, 0);
828     if (error) {
829 	p->p_retval[1] = reg_edx;
830 	return error;
831     }
832 
833     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
834     if (error) {
835 	p->p_retval[1] = reg_edx;
836 	return error;
837     }
838 
839     p->p_retval[1] = reg_edx;
840     p->p_retval[0] = 0;
841     return 0;
842 }
843 
844 int
845 linux_time(struct proc *p, struct linux_time_args *args)
846 {
847     struct timeval tv;
848     linux_time_t tm;
849     int error;
850 
851 #ifdef DEBUG
852     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
853 #endif
854     microtime(&tv);
855     tm = tv.tv_sec;
856     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
857 	return error;
858     p->p_retval[0] = tm;
859     return 0;
860 }
861 
862 struct linux_times_argv {
863     long    tms_utime;
864     long    tms_stime;
865     long    tms_cutime;
866     long    tms_cstime;
867 };
868 
869 #define CLK_TCK 100	/* Linux uses 100 */
870 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
871 
872 int
873 linux_times(struct proc *p, struct linux_times_args *args)
874 {
875     struct timeval tv;
876     struct linux_times_argv tms;
877     struct rusage ru;
878     int error;
879 
880 #ifdef DEBUG
881     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
882 #endif
883     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
884 
885     tms.tms_utime = CONVTCK(ru.ru_utime);
886     tms.tms_stime = CONVTCK(ru.ru_stime);
887 
888     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
889     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
890 
891     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
892 	    	    sizeof(struct linux_times_argv))))
893 	return error;
894 
895     microuptime(&tv);
896     p->p_retval[0] = (int)CONVTCK(tv);
897     return 0;
898 }
899 
900 int
901 linux_newuname(struct proc *p, struct linux_newuname_args *args)
902 {
903 	struct linux_new_utsname utsname;
904 	char *osrelease, *osname;
905 
906 #ifdef DEBUG
907 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
908 #endif
909 
910 	osname = linux_get_osname(p);
911 	osrelease = linux_get_osrelease(p);
912 
913 	bzero(&utsname, sizeof(struct linux_new_utsname));
914 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
915 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
916 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
917 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
918 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
919 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
920 
921 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
922 			sizeof(struct linux_new_utsname)));
923 }
924 
925 struct linux_utimbuf {
926 	linux_time_t l_actime;
927 	linux_time_t l_modtime;
928 };
929 
930 int
931 linux_utime(struct proc *p, struct linux_utime_args *args)
932 {
933     struct utimes_args /* {
934 	char	*path;
935 	struct	timeval *tptr;
936     } */ bsdutimes;
937     struct timeval tv[2], *tvp;
938     struct linux_utimbuf lut;
939     int error;
940     caddr_t sg;
941 
942     sg = stackgap_init();
943     CHECKALTEXIST(p, &sg, args->fname);
944 
945 #ifdef DEBUG
946     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
947 #endif
948     if (args->times) {
949 	if ((error = copyin(args->times, &lut, sizeof lut)))
950 	    return error;
951 	tv[0].tv_sec = lut.l_actime;
952 	tv[0].tv_usec = 0;
953 	tv[1].tv_sec = lut.l_modtime;
954 	tv[1].tv_usec = 0;
955 	/* so that utimes can copyin */
956 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
957 	if (tvp == NULL)
958 		return (ENAMETOOLONG);
959 	if ((error = copyout(tv, tvp, sizeof(tv))))
960 	    return error;
961 	bsdutimes.tptr = tvp;
962     } else
963 	bsdutimes.tptr = NULL;
964 
965     bsdutimes.path = args->fname;
966     return utimes(p, &bsdutimes);
967 }
968 
969 #define __WCLONE 0x80000000
970 
971 int
972 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
973 {
974     struct wait_args /* {
975 	int pid;
976 	int *status;
977 	int options;
978 	struct	rusage *rusage;
979     } */ tmp;
980     int error, tmpstat;
981 
982 #ifdef DEBUG
983     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
984 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
985 #endif
986     tmp.pid = args->pid;
987     tmp.status = args->status;
988     tmp.options = (args->options & (WNOHANG | WUNTRACED));
989     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
990     if (args->options & __WCLONE)
991 	tmp.options |= WLINUXCLONE;
992     tmp.rusage = NULL;
993 
994     if ((error = wait4(p, &tmp)) != 0)
995 	return error;
996 
997     if (args->status) {
998 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
999 	    return error;
1000 	tmpstat &= 0xffff;
1001 	if (WIFSIGNALED(tmpstat))
1002 	    tmpstat = (tmpstat & 0xffffff80) |
1003 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1004 	else if (WIFSTOPPED(tmpstat))
1005 	    tmpstat = (tmpstat & 0xffff00ff) |
1006 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1007 	return copyout(&tmpstat, args->status, sizeof(int));
1008     } else
1009 	return 0;
1010 }
1011 
1012 int
1013 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1014 {
1015     struct wait_args /* {
1016 	int pid;
1017 	int *status;
1018 	int options;
1019 	struct	rusage *rusage;
1020     } */ tmp;
1021     int error, tmpstat;
1022 
1023 #ifdef DEBUG
1024     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1025 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1026 	(void *)args->rusage);
1027 #endif
1028     tmp.pid = args->pid;
1029     tmp.status = args->status;
1030     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1031     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1032     if (args->options & __WCLONE)
1033 	tmp.options |= WLINUXCLONE;
1034     tmp.rusage = args->rusage;
1035 
1036     if ((error = wait4(p, &tmp)) != 0)
1037 	return error;
1038 
1039     SIGDELSET(p->p_siglist, SIGCHLD);
1040 
1041     if (args->status) {
1042 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1043 	    return error;
1044 	tmpstat &= 0xffff;
1045 	if (WIFSIGNALED(tmpstat))
1046 	    tmpstat = (tmpstat & 0xffffff80) |
1047 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1048 	else if (WIFSTOPPED(tmpstat))
1049 	    tmpstat = (tmpstat & 0xffff00ff) |
1050 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1051 	return copyout(&tmpstat, args->status, sizeof(int));
1052     } else
1053 	return 0;
1054 }
1055 
1056 int
1057 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1058 {
1059 	caddr_t sg;
1060 	struct mknod_args bsd_mknod;
1061 	struct mkfifo_args bsd_mkfifo;
1062 
1063 	sg = stackgap_init();
1064 
1065 	CHECKALTCREAT(p, &sg, args->path);
1066 
1067 #ifdef DEBUG
1068 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1069 	   (long)p->p_pid, args->path, args->mode, args->dev);
1070 #endif
1071 
1072 	if (args->mode & S_IFIFO) {
1073 		bsd_mkfifo.path = args->path;
1074 		bsd_mkfifo.mode = args->mode;
1075 		return mkfifo(p, &bsd_mkfifo);
1076 	} else {
1077 		bsd_mknod.path = args->path;
1078 		bsd_mknod.mode = args->mode;
1079 		bsd_mknod.dev = args->dev;
1080 		return mknod(p, &bsd_mknod);
1081 	}
1082 }
1083 
1084 /*
1085  * UGH! This is just about the dumbest idea I've ever heard!!
1086  */
1087 int
1088 linux_personality(struct proc *p, struct linux_personality_args *args)
1089 {
1090 #ifdef DEBUG
1091 	printf("Linux-emul(%ld): personality(%d)\n",
1092 	   (long)p->p_pid, args->per);
1093 #endif
1094 	if (args->per != 0)
1095 		return EINVAL;
1096 
1097 	/* Yes Jim, it's still a Linux... */
1098 	p->p_retval[0] = 0;
1099 	return 0;
1100 }
1101 
1102 /*
1103  * Wrappers for get/setitimer for debugging..
1104  */
1105 int
1106 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1107 {
1108 	struct setitimer_args bsa;
1109 	struct itimerval foo;
1110 	int error;
1111 
1112 #ifdef DEBUG
1113 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1114 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1115 #endif
1116 	bsa.which = args->which;
1117 	bsa.itv = args->itv;
1118 	bsa.oitv = args->oitv;
1119 	if (args->itv) {
1120 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1121 			sizeof(foo))))
1122 		return error;
1123 #ifdef DEBUG
1124 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1125 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1126 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1127 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1128 #endif
1129 	}
1130 	return setitimer(p, &bsa);
1131 }
1132 
1133 int
1134 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1135 {
1136 	struct getitimer_args bsa;
1137 #ifdef DEBUG
1138 	printf("Linux-emul(%ld): getitimer(%p)\n",
1139 	    (long)p->p_pid, (void *)args->itv);
1140 #endif
1141 	bsa.which = args->which;
1142 	bsa.itv = args->itv;
1143 	return getitimer(p, &bsa);
1144 }
1145 
1146 int
1147 linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1148 {
1149 	struct sysarch_args sa;
1150 	struct i386_ioperm_args *iia;
1151 	caddr_t sg;
1152 
1153 	sg = stackgap_init();
1154 	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1155 	iia->start = args->start;
1156 	iia->length = args->length;
1157 	iia->enable = args->enable;
1158 	sa.op = I386_SET_IOPERM;
1159 	sa.parms = (char *)iia;
1160 	return sysarch(p, &sa);
1161 }
1162 
1163 int
1164 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1165 {
1166 	int error;
1167 
1168 	if (args->level < 0 || args->level > 3)
1169 		return (EINVAL);
1170 	if ((error = suser(p)) != 0)
1171 		return (error);
1172 	if (securelevel > 0)
1173 		return (EPERM);
1174 	p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1175 		(args->level * (PSL_IOPL / 3));
1176 	return (0);
1177 }
1178 
1179 int
1180 linux_nice(struct proc *p, struct linux_nice_args *args)
1181 {
1182 	struct setpriority_args	bsd_args;
1183 
1184 	bsd_args.which = PRIO_PROCESS;
1185 	bsd_args.who = 0;	/* current process */
1186 	bsd_args.prio = args->inc;
1187 	return setpriority(p, &bsd_args);
1188 }
1189 
1190 int
1191 linux_setgroups(p, uap)
1192 	struct proc *p;
1193 	struct linux_setgroups_args *uap;
1194 {
1195 	struct pcred *pc;
1196 	linux_gid_t linux_gidset[NGROUPS];
1197 	gid_t *bsd_gidset;
1198 	int ngrp, error;
1199 
1200 	pc = p->p_cred;
1201 	ngrp = uap->gidsetsize;
1202 
1203 	/*
1204 	 * cr_groups[0] holds egid. Setting the whole set from
1205 	 * the supplied set will cause egid to be changed too.
1206 	 * Keep cr_groups[0] unchanged to prevent that.
1207 	 */
1208 
1209 	if ((error = suser(p)) != 0)
1210 		return (error);
1211 
1212 	if (ngrp >= NGROUPS)
1213 		return (EINVAL);
1214 
1215 	pc->pc_ucred = crcopy(pc->pc_ucred);
1216 	if (ngrp > 0) {
1217 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1218 			       ngrp * sizeof(linux_gid_t));
1219 		if (error)
1220 			return (error);
1221 
1222 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1223 
1224 		bsd_gidset = pc->pc_ucred->cr_groups;
1225 		ngrp--;
1226 		while (ngrp >= 0) {
1227 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1228 			ngrp--;
1229 		}
1230 	}
1231 	else
1232 		pc->pc_ucred->cr_ngroups = 1;
1233 
1234 	setsugid(p);
1235 	return (0);
1236 }
1237 
1238 int
1239 linux_getgroups(p, uap)
1240 	struct proc *p;
1241 	struct linux_getgroups_args *uap;
1242 {
1243 	struct pcred *pc;
1244 	linux_gid_t linux_gidset[NGROUPS];
1245 	gid_t *bsd_gidset;
1246 	int bsd_gidsetsz, ngrp, error;
1247 
1248 	pc = p->p_cred;
1249 	bsd_gidset = pc->pc_ucred->cr_groups;
1250 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1251 
1252 	/*
1253 	 * cr_groups[0] holds egid. Returning the whole set
1254 	 * here will cause a duplicate. Exclude cr_groups[0]
1255 	 * to prevent that.
1256 	 */
1257 
1258 	if ((ngrp = uap->gidsetsize) == 0) {
1259 		p->p_retval[0] = bsd_gidsetsz;
1260 		return (0);
1261 	}
1262 
1263 	if (ngrp < bsd_gidsetsz)
1264 		return (EINVAL);
1265 
1266 	ngrp = 0;
1267 	while (ngrp < bsd_gidsetsz) {
1268 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1269 		ngrp++;
1270 	}
1271 
1272 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1273 	    ngrp * sizeof(linux_gid_t))))
1274 		return (error);
1275 
1276 	p->p_retval[0] = ngrp;
1277 	return (0);
1278 }
1279 
1280 int
1281 linux_setrlimit(p, uap)
1282      struct proc *p;
1283      struct linux_setrlimit_args *uap;
1284 {
1285     struct osetrlimit_args bsd;
1286 
1287 #ifdef DEBUG
1288     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1289 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1290 #endif
1291 
1292     if (uap->resource >= LINUX_RLIM_NLIMITS)
1293 	return EINVAL;
1294 
1295     bsd.which = linux_to_bsd_resource[uap->resource];
1296 
1297     if (bsd.which == -1)
1298 	return EINVAL;
1299 
1300     bsd.rlp = uap->rlim;
1301     return osetrlimit(p, &bsd);
1302 }
1303 
1304 int
1305 linux_getrlimit(p, uap)
1306      struct proc *p;
1307      struct linux_getrlimit_args *uap;
1308 {
1309     struct ogetrlimit_args bsd;
1310 
1311 #ifdef DEBUG
1312     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1313 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1314 #endif
1315 
1316     if (uap->resource >= LINUX_RLIM_NLIMITS)
1317 	return EINVAL;
1318 
1319     bsd.which = linux_to_bsd_resource[uap->resource];
1320 
1321     if (bsd.which == -1)
1322 	return EINVAL;
1323 
1324     bsd.rlp = uap->rlim;
1325     return ogetrlimit(p, &bsd);
1326 }
1327 
1328 int
1329 linux_sched_setscheduler(p, uap)
1330 	struct proc *p;
1331 	struct linux_sched_setscheduler_args *uap;
1332 {
1333 	struct sched_setscheduler_args bsd;
1334 
1335 #ifdef DEBUG
1336 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1337 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1338 #endif
1339 
1340 	switch (uap->policy) {
1341 	case LINUX_SCHED_OTHER:
1342 		bsd.policy = SCHED_OTHER;
1343 		break;
1344 	case LINUX_SCHED_FIFO:
1345 		bsd.policy = SCHED_FIFO;
1346 		break;
1347 	case LINUX_SCHED_RR:
1348 		bsd.policy = SCHED_RR;
1349 		break;
1350 	default:
1351 		return EINVAL;
1352 	}
1353 
1354 	bsd.pid = uap->pid;
1355 	bsd.param = uap->param;
1356 	return sched_setscheduler(p, &bsd);
1357 }
1358 
1359 int
1360 linux_sched_getscheduler(p, uap)
1361 	struct proc *p;
1362 	struct linux_sched_getscheduler_args *uap;
1363 {
1364 	struct sched_getscheduler_args bsd;
1365 	int error;
1366 
1367 #ifdef DEBUG
1368 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1369 	       (long)p->p_pid, uap->pid);
1370 #endif
1371 
1372 	bsd.pid = uap->pid;
1373 	error = sched_getscheduler(p, &bsd);
1374 
1375 	switch (p->p_retval[0]) {
1376 	case SCHED_OTHER:
1377 		p->p_retval[0] = LINUX_SCHED_OTHER;
1378 		break;
1379 	case SCHED_FIFO:
1380 		p->p_retval[0] = LINUX_SCHED_FIFO;
1381 		break;
1382 	case SCHED_RR:
1383 		p->p_retval[0] = LINUX_SCHED_RR;
1384 		break;
1385 	}
1386 
1387 	return error;
1388 }
1389 
1390 struct linux_descriptor {
1391 	unsigned int  entry_number;
1392 	unsigned long base_addr;
1393 	unsigned int  limit;
1394 	unsigned int  seg_32bit:1;
1395 	unsigned int  contents:2;
1396 	unsigned int  read_exec_only:1;
1397 	unsigned int  limit_in_pages:1;
1398 	unsigned int  seg_not_present:1;
1399 	unsigned int  useable:1;
1400 };
1401 
1402 int
1403 linux_modify_ldt(p, uap)
1404 	struct proc *p;
1405 	struct linux_modify_ldt_args *uap;
1406 {
1407 	int error;
1408 	caddr_t sg;
1409 	struct sysarch_args args;
1410 	struct i386_ldt_args *ldt;
1411 	struct linux_descriptor ld;
1412 	union descriptor *desc;
1413 
1414 	sg = stackgap_init();
1415 
1416 	if (uap->ptr == NULL)
1417 		return (EINVAL);
1418 
1419 	switch (uap->func) {
1420 	case 0x00: /* read_ldt */
1421 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1422 		ldt->start = 0;
1423 		ldt->descs = uap->ptr;
1424 		ldt->num = uap->bytecount / sizeof(union descriptor);
1425 		args.op = I386_GET_LDT;
1426 		args.parms = (char*)ldt;
1427 		error = sysarch(p, &args);
1428 		p->p_retval[0] *= sizeof(union descriptor);
1429 		break;
1430 	case 0x01: /* write_ldt */
1431 	case 0x11: /* write_ldt */
1432 		if (uap->bytecount != sizeof(ld))
1433 			return (EINVAL);
1434 
1435 		error = copyin(uap->ptr, &ld, sizeof(ld));
1436 		if (error)
1437 			return (error);
1438 
1439 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1440 		desc = stackgap_alloc(&sg, sizeof(*desc));
1441 		ldt->start = ld.entry_number;
1442 		ldt->descs = desc;
1443 		ldt->num = 1;
1444 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1445 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1446 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1447 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1448 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1449 			(ld.contents << 2);
1450 		desc->sd.sd_dpl = 3;
1451 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1452 		desc->sd.sd_xx = 0;
1453 		desc->sd.sd_def32 = ld.seg_32bit;
1454 		desc->sd.sd_gran = ld.limit_in_pages;
1455 		args.op = I386_SET_LDT;
1456 		args.parms = (char*)ldt;
1457 		error = sysarch(p, &args);
1458 		break;
1459 	default:
1460 		error = EINVAL;
1461 		break;
1462 	}
1463 
1464 	if (error == EOPNOTSUPP) {
1465 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1466 		error = ENOSYS;
1467 	}
1468 
1469 	return (error);
1470 }
1471