xref: /freebsd/sys/compat/linux/linux_misc.c (revision 4cf49a43559ed9fdad601bdcccd2c55963008675)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_prot.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_extern.h>
58 
59 #include <machine/frame.h>
60 #include <machine/psl.h>
61 #include <machine/sysarch.h>
62 #include <machine/segments.h>
63 
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <i386/linux/linux_util.h>
67 #include <i386/linux/linux_mib.h>
68 
69 #include <posix4/sched.h>
70 
71 #define BSD_TO_LINUX_SIGNAL(sig)	\
72 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
73 
74 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
75 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
76   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
77   RLIMIT_MEMLOCK, -1
78 };
79 
80 int
81 linux_alarm(struct proc *p, struct linux_alarm_args *args)
82 {
83     struct itimerval it, old_it;
84     struct timeval tv;
85     int s;
86 
87 #ifdef DEBUG
88     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
89 #endif
90     if (args->secs > 100000000)
91 	return EINVAL;
92     it.it_value.tv_sec = (long)args->secs;
93     it.it_value.tv_usec = 0;
94     it.it_interval.tv_sec = 0;
95     it.it_interval.tv_usec = 0;
96     s = splsoftclock();
97     old_it = p->p_realtimer;
98     getmicrouptime(&tv);
99     if (timevalisset(&old_it.it_value))
100 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
101     if (it.it_value.tv_sec != 0) {
102 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
103 	timevaladd(&it.it_value, &tv);
104     }
105     p->p_realtimer = it;
106     splx(s);
107     if (timevalcmp(&old_it.it_value, &tv, >)) {
108 	timevalsub(&old_it.it_value, &tv);
109 	if (old_it.it_value.tv_usec != 0)
110 	    old_it.it_value.tv_sec++;
111 	p->p_retval[0] = old_it.it_value.tv_sec;
112     }
113     return 0;
114 }
115 
116 int
117 linux_brk(struct proc *p, struct linux_brk_args *args)
118 {
119 #if 0
120     struct vmspace *vm = p->p_vmspace;
121     vm_offset_t new, old;
122     int error;
123 
124     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
125 	return EINVAL;
126     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
127 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
128 	return ENOMEM;
129 
130     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
131     new = round_page((vm_offset_t)args->dsend);
132     p->p_retval[0] = old;
133     if ((new-old) > 0) {
134 	if (swap_pager_full)
135 	    return ENOMEM;
136 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
137 			VM_PROT_ALL, VM_PROT_ALL, 0);
138 	if (error)
139 	    return error;
140 	vm->vm_dsize += btoc((new-old));
141 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
142     }
143     return 0;
144 #else
145     struct vmspace *vm = p->p_vmspace;
146     vm_offset_t new, old;
147     struct obreak_args /* {
148 	char * nsize;
149     } */ tmp;
150 
151 #ifdef DEBUG
152     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
153 #endif
154     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
155     new = (vm_offset_t)args->dsend;
156     tmp.nsize = (char *) new;
157     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
158 	p->p_retval[0] = (int)new;
159     else
160 	p->p_retval[0] = (int)old;
161 
162     return 0;
163 #endif
164 }
165 
166 int
167 linux_uselib(struct proc *p, struct linux_uselib_args *args)
168 {
169     struct nameidata ni;
170     struct vnode *vp;
171     struct exec *a_out;
172     struct vattr attr;
173     vm_offset_t vmaddr;
174     unsigned long file_offset;
175     vm_offset_t buffer;
176     unsigned long bss_size;
177     int error;
178     caddr_t sg;
179     int locked;
180 
181     sg = stackgap_init();
182     CHECKALTEXIST(p, &sg, args->library);
183 
184 #ifdef DEBUG
185     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
186 #endif
187 
188     a_out = NULL;
189     locked = 0;
190     vp = NULL;
191 
192     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
193     error = namei(&ni);
194     if (error)
195 	goto cleanup;
196 
197     vp = ni.ni_vp;
198     if (vp == NULL) {
199 	error = ENOEXEC;	/* ?? */
200 	goto cleanup;
201     }
202 
203     /*
204      * From here on down, we have a locked vnode that must be unlocked.
205      */
206     locked++;
207 
208     /*
209      * Writable?
210      */
211     if (vp->v_writecount) {
212 	error = ETXTBSY;
213 	goto cleanup;
214     }
215 
216     /*
217      * Executable?
218      */
219     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
220     if (error)
221 	goto cleanup;
222 
223     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
224 	((attr.va_mode & 0111) == 0) ||
225 	(attr.va_type != VREG)) {
226 	    error = ENOEXEC;
227 	    goto cleanup;
228     }
229 
230     /*
231      * Sensible size?
232      */
233     if (attr.va_size == 0) {
234 	error = ENOEXEC;
235 	goto cleanup;
236     }
237 
238     /*
239      * Can we access it?
240      */
241     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
242     if (error)
243 	goto cleanup;
244 
245     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
246     if (error)
247 	goto cleanup;
248 
249     /*
250      * Lock no longer needed
251      */
252     VOP_UNLOCK(vp, 0, p);
253     locked = 0;
254 
255     /*
256      * Pull in executable header into kernel_map
257      */
258     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
259 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
260     if (error)
261 	goto cleanup;
262 
263     /*
264      * Is it a Linux binary ?
265      */
266     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
267 	error = ENOEXEC;
268 	goto cleanup;
269     }
270 
271     /* While we are here, we should REALLY do some more checks */
272 
273     /*
274      * Set file/virtual offset based on a.out variant.
275      */
276     switch ((int)(a_out->a_magic & 0xffff)) {
277     case 0413:	/* ZMAGIC */
278 	file_offset = 1024;
279 	break;
280     case 0314:	/* QMAGIC */
281 	file_offset = 0;
282 	break;
283     default:
284 	error = ENOEXEC;
285 	goto cleanup;
286     }
287 
288     bss_size = round_page(a_out->a_bss);
289 
290     /*
291      * Check various fields in header for validity/bounds.
292      */
293     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
294 	error = ENOEXEC;
295 	goto cleanup;
296     }
297 
298     /* text + data can't exceed file size */
299     if (a_out->a_data + a_out->a_text > attr.va_size) {
300 	error = EFAULT;
301 	goto cleanup;
302     }
303 
304     /*
305      * text/data/bss must not exceed limits
306      * XXX: this is not complete. it should check current usage PLUS
307      * the resources needed by this library.
308      */
309     if (a_out->a_text > MAXTSIZ ||
310 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
311 	error = ENOMEM;
312 	goto cleanup;
313     }
314 
315     /*
316      * prevent more writers
317      */
318     vp->v_flag |= VTEXT;
319 
320     /*
321      * Check if file_offset page aligned,.
322      * Currently we cannot handle misalinged file offsets,
323      * and so we read in the entire image (what a waste).
324      */
325     if (file_offset & PAGE_MASK) {
326 #ifdef DEBUG
327 printf("uselib: Non page aligned binary %lu\n", file_offset);
328 #endif
329 	/*
330 	 * Map text+data read/write/execute
331 	 */
332 
333 	/* a_entry is the load address and is page aligned */
334 	vmaddr = trunc_page(a_out->a_entry);
335 
336 	/* get anon user mapping, read+write+execute */
337 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
338 		    	    a_out->a_text + a_out->a_data, FALSE,
339 			    VM_PROT_ALL, VM_PROT_ALL, 0);
340 	if (error)
341 	    goto cleanup;
342 
343 	/* map file into kernel_map */
344 	error = vm_mmap(kernel_map, &buffer,
345 			round_page(a_out->a_text + a_out->a_data + file_offset),
346 		   	VM_PROT_READ, VM_PROT_READ, 0,
347 			(caddr_t)vp, trunc_page(file_offset));
348 	if (error)
349 	    goto cleanup;
350 
351 	/* copy from kernel VM space to user space */
352 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
353 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
354 
355 	/* release temporary kernel space */
356 	vm_map_remove(kernel_map, buffer,
357 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
358 
359 	if (error)
360 	    goto cleanup;
361     }
362     else {
363 #ifdef DEBUG
364 printf("uselib: Page aligned binary %lu\n", file_offset);
365 #endif
366 	/*
367 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
368 	 * to skip the executable header
369 	 */
370 	vmaddr = trunc_page(a_out->a_entry);
371 
372 	/*
373 	 * Map it all into the process's space as a single copy-on-write
374 	 * "data" segment.
375 	 */
376 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
377 		   	a_out->a_text + a_out->a_data,
378 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
379 			(caddr_t)vp, file_offset);
380 	if (error)
381 	    goto cleanup;
382     }
383 #ifdef DEBUG
384 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
385 #endif
386     if (bss_size != 0) {
387         /*
388 	 * Calculate BSS start address
389 	 */
390 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
391 
392 	/*
393 	 * allocate some 'anon' space
394 	 */
395 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
396 			    bss_size, FALSE,
397 			    VM_PROT_ALL, VM_PROT_ALL, 0);
398 	if (error)
399 	    goto cleanup;
400     }
401 
402 cleanup:
403     /*
404      * Unlock vnode if needed
405      */
406     if (locked)
407 	VOP_UNLOCK(vp, 0, p);
408 
409     /*
410      * Release the kernel mapping.
411      */
412     if (a_out)
413 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
414 
415     return error;
416 }
417 
418 /* XXX move */
419 struct linux_select_argv {
420 	int nfds;
421 	fd_set *readfds;
422 	fd_set *writefds;
423 	fd_set *exceptfds;
424 	struct timeval *timeout;
425 };
426 
427 int
428 linux_select(struct proc *p, struct linux_select_args *args)
429 {
430     struct linux_select_argv linux_args;
431     struct linux_newselect_args newsel;
432     int error;
433 
434 #ifdef SELECT_DEBUG
435     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
436 #endif
437     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
438 			sizeof(linux_args))))
439 	return error;
440 
441     newsel.nfds = linux_args.nfds;
442     newsel.readfds = linux_args.readfds;
443     newsel.writefds = linux_args.writefds;
444     newsel.exceptfds = linux_args.exceptfds;
445     newsel.timeout = linux_args.timeout;
446 
447     return linux_newselect(p, &newsel);
448 }
449 
450 int
451 linux_newselect(struct proc *p, struct linux_newselect_args *args)
452 {
453     struct select_args bsa;
454     struct timeval tv0, tv1, utv, *tvp;
455     caddr_t sg;
456     int error;
457 
458 #ifdef DEBUG
459     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
460   	(long)p->p_pid, args->nfds, (void *)args->readfds,
461 	(void *)args->writefds, (void *)args->exceptfds,
462 	(void *)args->timeout);
463 #endif
464     error = 0;
465     bsa.nd = args->nfds;
466     bsa.in = args->readfds;
467     bsa.ou = args->writefds;
468     bsa.ex = args->exceptfds;
469     bsa.tv = args->timeout;
470 
471     /*
472      * Store current time for computation of the amount of
473      * time left.
474      */
475     if (args->timeout) {
476 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
477 	    goto select_out;
478 #ifdef DEBUG
479 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
480 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
481 #endif
482 	if (itimerfix(&utv)) {
483 	    /*
484 	     * The timeval was invalid.  Convert it to something
485 	     * valid that will act as it does under Linux.
486 	     */
487 	    sg = stackgap_init();
488 	    tvp = stackgap_alloc(&sg, sizeof(utv));
489 	    utv.tv_sec += utv.tv_usec / 1000000;
490 	    utv.tv_usec %= 1000000;
491 	    if (utv.tv_usec < 0) {
492 		utv.tv_sec -= 1;
493 		utv.tv_usec += 1000000;
494 	    }
495 	    if (utv.tv_sec < 0)
496 		timevalclear(&utv);
497 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
498 		goto select_out;
499 	    bsa.tv = tvp;
500 	}
501 	microtime(&tv0);
502     }
503 
504     error = select(p, &bsa);
505 #ifdef DEBUG
506     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
507 #endif
508 
509     if (error) {
510 	/*
511 	 * See fs/select.c in the Linux kernel.  Without this,
512 	 * Maelstrom doesn't work.
513 	 */
514 	if (error == ERESTART)
515 	    error = EINTR;
516 	goto select_out;
517     }
518 
519     if (args->timeout) {
520 	if (p->p_retval[0]) {
521 	    /*
522 	     * Compute how much time was left of the timeout,
523 	     * by subtracting the current time and the time
524 	     * before we started the call, and subtracting
525 	     * that result from the user-supplied value.
526 	     */
527 	    microtime(&tv1);
528 	    timevalsub(&tv1, &tv0);
529 	    timevalsub(&utv, &tv1);
530 	    if (utv.tv_sec < 0)
531 		timevalclear(&utv);
532 	} else
533 	    timevalclear(&utv);
534 #ifdef DEBUG
535 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
536 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
537 #endif
538 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
539 	    goto select_out;
540     }
541 
542 select_out:
543 #ifdef DEBUG
544     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
545 #endif
546     return error;
547 }
548 
549 int
550 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
551 {
552     struct proc *curp;
553 
554 #ifdef DEBUG
555     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
556 #endif
557     if (args->pid != p->p_pid) {
558 	if (!(curp = pfind(args->pid)))
559 	    return ESRCH;
560     }
561     else
562 	curp = p;
563     p->p_retval[0] = curp->p_pgid;
564     return 0;
565 }
566 
567 int
568 linux_fork(struct proc *p, struct linux_fork_args *args)
569 {
570     int error;
571 
572 #ifdef DEBUG
573     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
574 #endif
575     if ((error = fork(p, (struct fork_args *)args)) != 0)
576 	return error;
577     if (p->p_retval[1] == 1)
578 	p->p_retval[0] = 0;
579     return 0;
580 }
581 
582 int
583 linux_vfork(struct proc *p, struct linux_vfork_args *args)
584 {
585 	int error;
586 
587 #ifdef DEBUG
588 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
589 #endif
590 
591 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
592 		return error;
593 	/* Are we the child? */
594 	if (p->p_retval[1] == 1)
595 		p->p_retval[0] = 0;
596 	return 0;
597 }
598 
599 #define CLONE_VM	0x100
600 #define CLONE_FS	0x200
601 #define CLONE_FILES	0x400
602 #define CLONE_SIGHAND	0x800
603 #define CLONE_PID	0x1000
604 
605 int
606 linux_clone(struct proc *p, struct linux_clone_args *args)
607 {
608     int error, ff = RFPROC;
609     struct proc *p2;
610     int            exit_signal;
611     vm_offset_t    start;
612     struct rfork_args rf_args;
613 
614 #ifdef DEBUG
615     if (args->flags & CLONE_PID)
616 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
617 	       (long)p->p_pid);
618     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
619 	   (long)p->p_pid, (unsigned int)args->flags,
620 	   (unsigned int)args->stack);
621 #endif
622 
623     if (!args->stack)
624         return (EINVAL);
625 
626     exit_signal = args->flags & 0x000000ff;
627     if (exit_signal >= LINUX_NSIG)
628 	return EINVAL;
629 
630     if (exit_signal <= LINUX_SIGTBLSZ)
631 	exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
632 
633     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
634     ff |= RFTHREAD;
635 
636     if (args->flags & CLONE_VM)
637 	ff |= RFMEM;
638     if (args->flags & CLONE_SIGHAND)
639 	ff |= RFSIGSHARE;
640     if (!(args->flags & CLONE_FILES))
641 	ff |= RFFDG;
642 
643     error = 0;
644     start = 0;
645 
646     rf_args.flags = ff;
647     if ((error = rfork(p, &rf_args)) != 0)
648 	return error;
649 
650     p2 = pfind(p->p_retval[0]);
651     if (p2 == 0)
652  	return ESRCH;
653 
654     p2->p_sigparent = exit_signal;
655     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
656 
657 #ifdef DEBUG
658     printf ("linux_clone(%ld): successful rfork to %ld\n",
659 	    (long)p->p_pid, (long)p2->p_pid);
660 #endif
661     return 0;
662 }
663 
664 /* XXX move */
665 struct linux_mmap_argv {
666 	linux_caddr_t addr;
667 	int len;
668 	int prot;
669 	int flags;
670 	int fd;
671 	int pos;
672 };
673 
674 #define STACK_SIZE  (2 * 1024 * 1024)
675 #define GUARD_SIZE  (4 * PAGE_SIZE)
676 int
677 linux_mmap(struct proc *p, struct linux_mmap_args *args)
678 {
679     struct mmap_args /* {
680 	caddr_t addr;
681 	size_t len;
682 	int prot;
683 	int flags;
684 	int fd;
685 	long pad;
686 	off_t pos;
687     } */ bsd_args;
688     int error;
689     struct linux_mmap_argv linux_args;
690 
691     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
692 			sizeof(linux_args))))
693 	return error;
694 #ifdef DEBUG
695     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
696 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
697 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
698 #endif
699     bsd_args.flags = 0;
700     if (linux_args.flags & LINUX_MAP_SHARED)
701 	bsd_args.flags |= MAP_SHARED;
702     if (linux_args.flags & LINUX_MAP_PRIVATE)
703 	bsd_args.flags |= MAP_PRIVATE;
704     if (linux_args.flags & LINUX_MAP_FIXED)
705 	bsd_args.flags |= MAP_FIXED;
706     if (linux_args.flags & LINUX_MAP_ANON)
707 	bsd_args.flags |= MAP_ANON;
708     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
709 	bsd_args.flags |= MAP_STACK;
710 
711 	/* The linux MAP_GROWSDOWN option does not limit auto
712 	 * growth of the region.  Linux mmap with this option
713 	 * takes as addr the inital BOS, and as len, the initial
714 	 * region size.  It can then grow down from addr without
715 	 * limit.  However, linux threads has an implicit internal
716 	 * limit to stack size of STACK_SIZE.  Its just not
717 	 * enforced explicitly in linux.  But, here we impose
718 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
719 	 * region, since we can do this with our mmap.
720 	 *
721 	 * Our mmap with MAP_STACK takes addr as the maximum
722 	 * downsize limit on BOS, and as len the max size of
723 	 * the region.  It them maps the top SGROWSIZ bytes,
724 	 * and autgrows the region down, up to the limit
725 	 * in addr.
726 	 *
727 	 * If we don't use the MAP_STACK option, the effect
728 	 * of this code is to allocate a stack region of a
729 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
730 	 */
731 
732 	/* This gives us TOS */
733 	bsd_args.addr = linux_args.addr + linux_args.len;
734 
735 	/* This gives us our maximum stack size */
736 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
737 	    bsd_args.len = linux_args.len;
738 	else
739 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
740 
741 	/* This gives us a new BOS.  If we're using VM_STACK, then
742 	 * mmap will just map the top SGROWSIZ bytes, and let
743 	 * the stack grow down to the limit at BOS.  If we're
744 	 * not using VM_STACK we map the full stack, since we
745 	 * don't have a way to autogrow it.
746 	 */
747 	bsd_args.addr -= bsd_args.len;
748 
749     } else {
750 	bsd_args.addr = linux_args.addr;
751 	bsd_args.len  = linux_args.len;
752     }
753 
754     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
755     bsd_args.fd = linux_args.fd;
756     bsd_args.pos = linux_args.pos;
757     bsd_args.pad = 0;
758     return mmap(p, &bsd_args);
759 }
760 
761 int
762 linux_mremap(struct proc *p, struct linux_mremap_args *args)
763 {
764 	struct munmap_args /* {
765 		void *addr;
766 		size_t len;
767 	} */ bsd_args;
768 	int error = 0;
769 
770 #ifdef DEBUG
771 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
772 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
773 	    args->flags);
774 #endif
775 	args->new_len = round_page(args->new_len);
776 	args->old_len = round_page(args->old_len);
777 
778 	if (args->new_len > args->old_len) {
779 		p->p_retval[0] = 0;
780 		return ENOMEM;
781 	}
782 
783 	if (args->new_len < args->old_len) {
784 		bsd_args.addr = args->addr + args->new_len;
785 		bsd_args.len = args->old_len - args->new_len;
786 		error = munmap(p, &bsd_args);
787 	}
788 
789 	p->p_retval[0] = error ? 0 : (int)args->addr;
790 	return error;
791 }
792 
793 int
794 linux_msync(struct proc *p, struct linux_msync_args *args)
795 {
796 	struct msync_args bsd_args;
797 
798 	bsd_args.addr = args->addr;
799 	bsd_args.len = args->len;
800 	bsd_args.flags = 0;	/* XXX ignore */
801 
802 	return msync(p, &bsd_args);
803 }
804 
805 int
806 linux_pipe(struct proc *p, struct linux_pipe_args *args)
807 {
808     int error;
809     int reg_edx;
810 
811 #ifdef DEBUG
812     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
813 #endif
814     reg_edx = p->p_retval[1];
815     error = pipe(p, 0);
816     if (error) {
817 	p->p_retval[1] = reg_edx;
818 	return error;
819     }
820 
821     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
822     if (error) {
823 	p->p_retval[1] = reg_edx;
824 	return error;
825     }
826 
827     p->p_retval[1] = reg_edx;
828     p->p_retval[0] = 0;
829     return 0;
830 }
831 
832 int
833 linux_time(struct proc *p, struct linux_time_args *args)
834 {
835     struct timeval tv;
836     linux_time_t tm;
837     int error;
838 
839 #ifdef DEBUG
840     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
841 #endif
842     microtime(&tv);
843     tm = tv.tv_sec;
844     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
845 	return error;
846     p->p_retval[0] = tm;
847     return 0;
848 }
849 
850 struct linux_times_argv {
851     long    tms_utime;
852     long    tms_stime;
853     long    tms_cutime;
854     long    tms_cstime;
855 };
856 
857 #define CLK_TCK 100	/* Linux uses 100 */
858 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
859 
860 int
861 linux_times(struct proc *p, struct linux_times_args *args)
862 {
863     struct timeval tv;
864     struct linux_times_argv tms;
865     struct rusage ru;
866     int error;
867 
868 #ifdef DEBUG
869     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
870 #endif
871     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
872 
873     tms.tms_utime = CONVTCK(ru.ru_utime);
874     tms.tms_stime = CONVTCK(ru.ru_stime);
875 
876     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
877     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
878 
879     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
880 	    	    sizeof(struct linux_times_argv))))
881 	return error;
882 
883     microuptime(&tv);
884     p->p_retval[0] = (int)CONVTCK(tv);
885     return 0;
886 }
887 
888 int
889 linux_newuname(struct proc *p, struct linux_newuname_args *args)
890 {
891 	struct linux_new_utsname utsname;
892 	char *osrelease, *osname;
893 
894 #ifdef DEBUG
895 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
896 #endif
897 
898 	osname = linux_get_osname(p);
899 	osrelease = linux_get_osrelease(p);
900 
901 	bzero(&utsname, sizeof(struct linux_new_utsname));
902 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
903 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
904 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
905 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
906 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
907 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
908 
909 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
910 			sizeof(struct linux_new_utsname)));
911 }
912 
913 struct linux_utimbuf {
914 	linux_time_t l_actime;
915 	linux_time_t l_modtime;
916 };
917 
918 int
919 linux_utime(struct proc *p, struct linux_utime_args *args)
920 {
921     struct utimes_args /* {
922 	char	*path;
923 	struct	timeval *tptr;
924     } */ bsdutimes;
925     struct timeval tv[2], *tvp;
926     struct linux_utimbuf lut;
927     int error;
928     caddr_t sg;
929 
930     sg = stackgap_init();
931     CHECKALTEXIST(p, &sg, args->fname);
932 
933 #ifdef DEBUG
934     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
935 #endif
936     if (args->times) {
937 	if ((error = copyin(args->times, &lut, sizeof lut)))
938 	    return error;
939 	tv[0].tv_sec = lut.l_actime;
940 	tv[0].tv_usec = 0;
941 	tv[1].tv_sec = lut.l_modtime;
942 	tv[1].tv_usec = 0;
943 	/* so that utimes can copyin */
944 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
945 	if ((error = copyout(tv, tvp, sizeof(tv))))
946 	    return error;
947 	bsdutimes.tptr = tvp;
948     } else
949 	bsdutimes.tptr = NULL;
950 
951     bsdutimes.path = args->fname;
952     return utimes(p, &bsdutimes);
953 }
954 
955 #define __WCLONE 0x80000000
956 
957 int
958 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
959 {
960     struct wait_args /* {
961 	int pid;
962 	int *status;
963 	int options;
964 	struct	rusage *rusage;
965     } */ tmp;
966     int error, tmpstat;
967 
968 #ifdef DEBUG
969     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
970 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
971 #endif
972     tmp.pid = args->pid;
973     tmp.status = args->status;
974     tmp.options = (args->options & (WNOHANG | WUNTRACED));
975     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
976     if (args->options & __WCLONE)
977 	tmp.options |= WLINUXCLONE;
978     tmp.rusage = NULL;
979 
980     if ((error = wait4(p, &tmp)) != 0)
981 	return error;
982 
983     if (args->status) {
984 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
985 	    return error;
986 	if (WIFSIGNALED(tmpstat))
987 	    tmpstat = (tmpstat & 0xffffff80) |
988 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
989 	else if (WIFSTOPPED(tmpstat))
990 	    tmpstat = (tmpstat & 0xffff00ff) |
991 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
992 	return copyout(&tmpstat, args->status, sizeof(int));
993     } else
994 	return 0;
995 }
996 
997 int
998 linux_wait4(struct proc *p, struct linux_wait4_args *args)
999 {
1000     struct wait_args /* {
1001 	int pid;
1002 	int *status;
1003 	int options;
1004 	struct	rusage *rusage;
1005     } */ tmp;
1006     int error, tmpstat;
1007 
1008 #ifdef DEBUG
1009     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1010 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1011 	(void *)args->rusage);
1012 #endif
1013     tmp.pid = args->pid;
1014     tmp.status = args->status;
1015     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1016     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1017     if (args->options & __WCLONE)
1018 	tmp.options |= WLINUXCLONE;
1019     tmp.rusage = args->rusage;
1020 
1021     if ((error = wait4(p, &tmp)) != 0)
1022 	return error;
1023 
1024     SIGDELSET(p->p_siglist, SIGCHLD);
1025 
1026     if (args->status) {
1027 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1028 	    return error;
1029 	if (WIFSIGNALED(tmpstat))
1030 	    tmpstat = (tmpstat & 0xffffff80) |
1031 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1032 	else if (WIFSTOPPED(tmpstat))
1033 	    tmpstat = (tmpstat & 0xffff00ff) |
1034 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1035 	return copyout(&tmpstat, args->status, sizeof(int));
1036     } else
1037 	return 0;
1038 }
1039 
1040 int
1041 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1042 {
1043 	caddr_t sg;
1044 	struct mknod_args bsd_mknod;
1045 	struct mkfifo_args bsd_mkfifo;
1046 
1047 	sg = stackgap_init();
1048 
1049 	CHECKALTCREAT(p, &sg, args->path);
1050 
1051 #ifdef DEBUG
1052 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1053 	   (long)p->p_pid, args->path, args->mode, args->dev);
1054 #endif
1055 
1056 	if (args->mode & S_IFIFO) {
1057 		bsd_mkfifo.path = args->path;
1058 		bsd_mkfifo.mode = args->mode;
1059 		return mkfifo(p, &bsd_mkfifo);
1060 	} else {
1061 		bsd_mknod.path = args->path;
1062 		bsd_mknod.mode = args->mode;
1063 		bsd_mknod.dev = args->dev;
1064 		return mknod(p, &bsd_mknod);
1065 	}
1066 }
1067 
1068 /*
1069  * UGH! This is just about the dumbest idea I've ever heard!!
1070  */
1071 int
1072 linux_personality(struct proc *p, struct linux_personality_args *args)
1073 {
1074 #ifdef DEBUG
1075 	printf("Linux-emul(%ld): personality(%d)\n",
1076 	   (long)p->p_pid, args->per);
1077 #endif
1078 	if (args->per != 0)
1079 		return EINVAL;
1080 
1081 	/* Yes Jim, it's still a Linux... */
1082 	p->p_retval[0] = 0;
1083 	return 0;
1084 }
1085 
1086 /*
1087  * Wrappers for get/setitimer for debugging..
1088  */
1089 int
1090 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1091 {
1092 	struct setitimer_args bsa;
1093 	struct itimerval foo;
1094 	int error;
1095 
1096 #ifdef DEBUG
1097 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1098 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1099 #endif
1100 	bsa.which = args->which;
1101 	bsa.itv = args->itv;
1102 	bsa.oitv = args->oitv;
1103 	if (args->itv) {
1104 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1105 			sizeof(foo))))
1106 		return error;
1107 #ifdef DEBUG
1108 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1109 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1110 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1111 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1112 #endif
1113 	}
1114 	return setitimer(p, &bsa);
1115 }
1116 
1117 int
1118 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1119 {
1120 	struct getitimer_args bsa;
1121 #ifdef DEBUG
1122 	printf("Linux-emul(%ld): getitimer(%p)\n",
1123 	    (long)p->p_pid, (void *)args->itv);
1124 #endif
1125 	bsa.which = args->which;
1126 	bsa.itv = args->itv;
1127 	return getitimer(p, &bsa);
1128 }
1129 
1130 int
1131 linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1132 {
1133 	struct sysarch_args sa;
1134 	struct i386_ioperm_args *iia;
1135 	caddr_t sg;
1136 
1137 	sg = stackgap_init();
1138 	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1139 	iia->start = args->start;
1140 	iia->length = args->length;
1141 	iia->enable = args->enable;
1142 	sa.op = I386_SET_IOPERM;
1143 	sa.parms = (char *)iia;
1144 	return sysarch(p, &sa);
1145 }
1146 
1147 int
1148 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1149 {
1150 	int error;
1151 
1152 	if (args->level < 0 || args->level > 3)
1153 		return (EINVAL);
1154 	if ((error = suser(p)) != 0)
1155 		return (error);
1156 	if (securelevel > 0)
1157 		return (EPERM);
1158 	p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1159 		(args->level * (PSL_IOPL / 3));
1160 	return (0);
1161 }
1162 
1163 int
1164 linux_nice(struct proc *p, struct linux_nice_args *args)
1165 {
1166 	struct setpriority_args	bsd_args;
1167 
1168 	bsd_args.which = PRIO_PROCESS;
1169 	bsd_args.who = 0;	/* current process */
1170 	bsd_args.prio = args->inc;
1171 	return setpriority(p, &bsd_args);
1172 }
1173 
1174 int
1175 linux_setgroups(p, uap)
1176 	struct proc *p;
1177 	struct linux_setgroups_args *uap;
1178 {
1179 	struct pcred *pc;
1180 	linux_gid_t linux_gidset[NGROUPS];
1181 	gid_t *bsd_gidset;
1182 	int ngrp, error;
1183 
1184 	pc = p->p_cred;
1185 	ngrp = uap->gidsetsize;
1186 
1187 	/*
1188 	 * cr_groups[0] holds egid. Setting the whole set from
1189 	 * the supplied set will cause egid to be changed too.
1190 	 * Keep cr_groups[0] unchanged to prevent that.
1191 	 */
1192 
1193 	if ((error = suser(p)) != 0)
1194 		return (error);
1195 
1196 	if (ngrp >= NGROUPS)
1197 		return (EINVAL);
1198 
1199 	pc->pc_ucred = crcopy(pc->pc_ucred);
1200 	if (ngrp > 0) {
1201 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1202 			       ngrp * sizeof(linux_gid_t));
1203 		if (error)
1204 			return (error);
1205 
1206 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1207 
1208 		bsd_gidset = pc->pc_ucred->cr_groups;
1209 		ngrp--;
1210 		while (ngrp >= 0) {
1211 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1212 			ngrp--;
1213 		}
1214 	}
1215 	else
1216 		pc->pc_ucred->cr_ngroups = 1;
1217 
1218 	setsugid(p);
1219 	return (0);
1220 }
1221 
1222 int
1223 linux_getgroups(p, uap)
1224 	struct proc *p;
1225 	struct linux_getgroups_args *uap;
1226 {
1227 	struct pcred *pc;
1228 	linux_gid_t linux_gidset[NGROUPS];
1229 	gid_t *bsd_gidset;
1230 	int bsd_gidsetsz, ngrp, error;
1231 
1232 	pc = p->p_cred;
1233 	bsd_gidset = pc->pc_ucred->cr_groups;
1234 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1235 
1236 	/*
1237 	 * cr_groups[0] holds egid. Returning the whole set
1238 	 * here will cause a duplicate. Exclude cr_groups[0]
1239 	 * to prevent that.
1240 	 */
1241 
1242 	if ((ngrp = uap->gidsetsize) == 0) {
1243 		p->p_retval[0] = bsd_gidsetsz;
1244 		return (0);
1245 	}
1246 
1247 	if (ngrp < bsd_gidsetsz)
1248 		return (EINVAL);
1249 
1250 	ngrp = 0;
1251 	while (ngrp < bsd_gidsetsz) {
1252 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1253 		ngrp++;
1254 	}
1255 
1256 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1257 	    ngrp * sizeof(linux_gid_t))))
1258 		return (error);
1259 
1260 	p->p_retval[0] = ngrp;
1261 	return (0);
1262 }
1263 
1264 int
1265 linux_setrlimit(p, uap)
1266      struct proc *p;
1267      struct linux_setrlimit_args *uap;
1268 {
1269     struct osetrlimit_args bsd;
1270 
1271 #ifdef DEBUG
1272     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1273 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1274 #endif
1275 
1276     if (uap->resource >= LINUX_RLIM_NLIMITS)
1277 	return EINVAL;
1278 
1279     bsd.which = linux_to_bsd_resource[uap->resource];
1280 
1281     if (bsd.which == -1)
1282 	return EINVAL;
1283 
1284     bsd.rlp = uap->rlim;
1285     return osetrlimit(p, &bsd);
1286 }
1287 
1288 int
1289 linux_getrlimit(p, uap)
1290      struct proc *p;
1291      struct linux_getrlimit_args *uap;
1292 {
1293     struct ogetrlimit_args bsd;
1294 
1295 #ifdef DEBUG
1296     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1297 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1298 #endif
1299 
1300     if (uap->resource >= LINUX_RLIM_NLIMITS)
1301 	return EINVAL;
1302 
1303     bsd.which = linux_to_bsd_resource[uap->resource];
1304 
1305     if (bsd.which == -1)
1306 	return EINVAL;
1307 
1308     bsd.rlp = uap->rlim;
1309     return ogetrlimit(p, &bsd);
1310 }
1311 
1312 int
1313 linux_sched_setscheduler(p, uap)
1314 	struct proc *p;
1315 	struct linux_sched_setscheduler_args *uap;
1316 {
1317 	struct sched_setscheduler_args bsd;
1318 
1319 #ifdef DEBUG
1320 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1321 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1322 #endif
1323 
1324 	switch (uap->policy) {
1325 	case LINUX_SCHED_OTHER:
1326 		bsd.policy = SCHED_OTHER;
1327 		break;
1328 	case LINUX_SCHED_FIFO:
1329 		bsd.policy = SCHED_FIFO;
1330 		break;
1331 	case LINUX_SCHED_RR:
1332 		bsd.policy = SCHED_RR;
1333 		break;
1334 	default:
1335 		return EINVAL;
1336 	}
1337 
1338 	bsd.pid = uap->pid;
1339 	bsd.param = uap->param;
1340 	return sched_setscheduler(p, &bsd);
1341 }
1342 
1343 int
1344 linux_sched_getscheduler(p, uap)
1345 	struct proc *p;
1346 	struct linux_sched_getscheduler_args *uap;
1347 {
1348 	struct sched_getscheduler_args bsd;
1349 	int error;
1350 
1351 #ifdef DEBUG
1352 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1353 	       (long)p->p_pid, uap->pid);
1354 #endif
1355 
1356 	bsd.pid = uap->pid;
1357 	error = sched_getscheduler(p, &bsd);
1358 
1359 	switch (p->p_retval[0]) {
1360 	case SCHED_OTHER:
1361 		p->p_retval[0] = LINUX_SCHED_OTHER;
1362 		break;
1363 	case SCHED_FIFO:
1364 		p->p_retval[0] = LINUX_SCHED_FIFO;
1365 		break;
1366 	case SCHED_RR:
1367 		p->p_retval[0] = LINUX_SCHED_RR;
1368 		break;
1369 	}
1370 
1371 	return error;
1372 }
1373 
1374 struct linux_descriptor {
1375 	unsigned int  entry_number;
1376 	unsigned long base_addr;
1377 	unsigned int  limit;
1378 	unsigned int  seg_32bit:1;
1379 	unsigned int  contents:2;
1380 	unsigned int  read_exec_only:1;
1381 	unsigned int  limit_in_pages:1;
1382 	unsigned int  seg_not_present:1;
1383 	unsigned int  useable:1;
1384 };
1385 
1386 int
1387 linux_modify_ldt(p, uap)
1388 	struct proc *p;
1389 	struct linux_modify_ldt_args *uap;
1390 {
1391 	int error;
1392 	caddr_t sg;
1393 	struct sysarch_args args;
1394 	struct i386_ldt_args *ldt;
1395 	struct linux_descriptor ld;
1396 	union descriptor *desc;
1397 
1398 	sg = stackgap_init();
1399 
1400 	if (uap->ptr == NULL)
1401 		return (EINVAL);
1402 
1403 	switch (uap->func) {
1404 	case 0x00: /* read_ldt */
1405 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1406 		ldt->start = 0;
1407 		ldt->descs = uap->ptr;
1408 		ldt->num = uap->bytecount / sizeof(union descriptor);
1409 		args.op = I386_GET_LDT;
1410 		args.parms = (char*)ldt;
1411 		error = sysarch(p, &args);
1412 		p->p_retval[0] *= sizeof(union descriptor);
1413 		break;
1414 	case 0x01: /* write_ldt */
1415 	case 0x11: /* write_ldt */
1416 		if (uap->bytecount != sizeof(ld))
1417 			return (EINVAL);
1418 
1419 		error = copyin(uap->ptr, &ld, sizeof(ld));
1420 		if (error)
1421 			return (error);
1422 
1423 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1424 		desc = stackgap_alloc(&sg, sizeof(*desc));
1425 		ldt->start = ld.entry_number;
1426 		ldt->descs = desc;
1427 		ldt->num = 1;
1428 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1429 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1430 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1431 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1432 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1433 			(ld.contents << 2);
1434 		desc->sd.sd_dpl = 3;
1435 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1436 		desc->sd.sd_xx = 0;
1437 		desc->sd.sd_def32 = ld.seg_32bit;
1438 		desc->sd.sd_gran = ld.limit_in_pages;
1439 		args.op = I386_SET_LDT;
1440 		args.parms = (char*)ldt;
1441 		error = sysarch(p, &args);
1442 		break;
1443 	default:
1444 		error = EINVAL;
1445 		break;
1446 	}
1447 
1448 	if (error == EOPNOTSUPP) {
1449 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1450 		error = ENOSYS;
1451 	}
1452 
1453 	return (error);
1454 }
1455