xref: /freebsd/sys/compat/linux/linux_misc.c (revision daf1cffce2e07931f27c6c6998652e90df6ba87e)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_zone.h>
58 
59 #include <machine/frame.h>
60 #include <machine/psl.h>
61 #include <machine/sysarch.h>
62 #include <machine/segments.h>
63 
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <i386/linux/linux_util.h>
67 #include <i386/linux/linux_mib.h>
68 
69 #include <posix4/sched.h>
70 
71 #define BSD_TO_LINUX_SIGNAL(sig)	\
72 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
73 
74 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
75 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
76   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
77   RLIMIT_MEMLOCK, -1
78 };
79 
80 int
81 linux_alarm(struct proc *p, struct linux_alarm_args *args)
82 {
83     struct itimerval it, old_it;
84     struct timeval tv;
85     int s;
86 
87 #ifdef DEBUG
88     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
89 #endif
90     if (args->secs > 100000000)
91 	return EINVAL;
92     it.it_value.tv_sec = (long)args->secs;
93     it.it_value.tv_usec = 0;
94     it.it_interval.tv_sec = 0;
95     it.it_interval.tv_usec = 0;
96     s = splsoftclock();
97     old_it = p->p_realtimer;
98     getmicrouptime(&tv);
99     if (timevalisset(&old_it.it_value))
100 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
101     if (it.it_value.tv_sec != 0) {
102 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
103 	timevaladd(&it.it_value, &tv);
104     }
105     p->p_realtimer = it;
106     splx(s);
107     if (timevalcmp(&old_it.it_value, &tv, >)) {
108 	timevalsub(&old_it.it_value, &tv);
109 	if (old_it.it_value.tv_usec != 0)
110 	    old_it.it_value.tv_sec++;
111 	p->p_retval[0] = old_it.it_value.tv_sec;
112     }
113     return 0;
114 }
115 
116 int
117 linux_brk(struct proc *p, struct linux_brk_args *args)
118 {
119 #if 0
120     struct vmspace *vm = p->p_vmspace;
121     vm_offset_t new, old;
122     int error;
123 
124     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
125 	return EINVAL;
126     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
127 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
128 	return ENOMEM;
129 
130     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
131     new = round_page((vm_offset_t)args->dsend);
132     p->p_retval[0] = old;
133     if ((new-old) > 0) {
134 	if (swap_pager_full)
135 	    return ENOMEM;
136 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
137 			VM_PROT_ALL, VM_PROT_ALL, 0);
138 	if (error)
139 	    return error;
140 	vm->vm_dsize += btoc((new-old));
141 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
142     }
143     return 0;
144 #else
145     struct vmspace *vm = p->p_vmspace;
146     vm_offset_t new, old;
147     struct obreak_args /* {
148 	char * nsize;
149     } */ tmp;
150 
151 #ifdef DEBUG
152     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
153 #endif
154     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
155     new = (vm_offset_t)args->dsend;
156     tmp.nsize = (char *) new;
157     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
158 	p->p_retval[0] = (int)new;
159     else
160 	p->p_retval[0] = (int)old;
161 
162     return 0;
163 #endif
164 }
165 
166 int
167 linux_uselib(struct proc *p, struct linux_uselib_args *args)
168 {
169     struct nameidata ni;
170     struct vnode *vp;
171     struct exec *a_out;
172     struct vattr attr;
173     vm_offset_t vmaddr;
174     unsigned long file_offset;
175     vm_offset_t buffer;
176     unsigned long bss_size;
177     int error;
178     caddr_t sg;
179     int locked;
180 
181     sg = stackgap_init();
182     CHECKALTEXIST(p, &sg, args->library);
183 
184 #ifdef DEBUG
185     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
186 #endif
187 
188     a_out = NULL;
189     locked = 0;
190     vp = NULL;
191 
192     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
193     error = namei(&ni);
194     if (error)
195 	goto cleanup;
196 
197     vp = ni.ni_vp;
198     /*
199      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
200      * without returning a vnode.
201      */
202     if (vp == NULL) {
203 	error = ENOEXEC;	/* ?? */
204 	goto cleanup;
205     }
206     NDFREE(&ni, NDF_ONLY_PNBUF);
207 
208     /*
209      * From here on down, we have a locked vnode that must be unlocked.
210      */
211     locked++;
212 
213     /*
214      * Writable?
215      */
216     if (vp->v_writecount) {
217 	error = ETXTBSY;
218 	goto cleanup;
219     }
220 
221     /*
222      * Executable?
223      */
224     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
225     if (error)
226 	goto cleanup;
227 
228     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
229 	((attr.va_mode & 0111) == 0) ||
230 	(attr.va_type != VREG)) {
231 	    error = ENOEXEC;
232 	    goto cleanup;
233     }
234 
235     /*
236      * Sensible size?
237      */
238     if (attr.va_size == 0) {
239 	error = ENOEXEC;
240 	goto cleanup;
241     }
242 
243     /*
244      * Can we access it?
245      */
246     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
247     if (error)
248 	goto cleanup;
249 
250     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
251     if (error)
252 	goto cleanup;
253 
254     /*
255      * Lock no longer needed
256      */
257     VOP_UNLOCK(vp, 0, p);
258     locked = 0;
259 
260     /*
261      * Pull in executable header into kernel_map
262      */
263     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
264 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
265     if (error)
266 	goto cleanup;
267 
268     /*
269      * Is it a Linux binary ?
270      */
271     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
272 	error = ENOEXEC;
273 	goto cleanup;
274     }
275 
276     /* While we are here, we should REALLY do some more checks */
277 
278     /*
279      * Set file/virtual offset based on a.out variant.
280      */
281     switch ((int)(a_out->a_magic & 0xffff)) {
282     case 0413:	/* ZMAGIC */
283 	file_offset = 1024;
284 	break;
285     case 0314:	/* QMAGIC */
286 	file_offset = 0;
287 	break;
288     default:
289 	error = ENOEXEC;
290 	goto cleanup;
291     }
292 
293     bss_size = round_page(a_out->a_bss);
294 
295     /*
296      * Check various fields in header for validity/bounds.
297      */
298     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
299 	error = ENOEXEC;
300 	goto cleanup;
301     }
302 
303     /* text + data can't exceed file size */
304     if (a_out->a_data + a_out->a_text > attr.va_size) {
305 	error = EFAULT;
306 	goto cleanup;
307     }
308 
309     /*
310      * text/data/bss must not exceed limits
311      * XXX: this is not complete. it should check current usage PLUS
312      * the resources needed by this library.
313      */
314     if (a_out->a_text > MAXTSIZ ||
315 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
316 	error = ENOMEM;
317 	goto cleanup;
318     }
319 
320     /*
321      * prevent more writers
322      */
323     vp->v_flag |= VTEXT;
324 
325     /*
326      * Check if file_offset page aligned,.
327      * Currently we cannot handle misalinged file offsets,
328      * and so we read in the entire image (what a waste).
329      */
330     if (file_offset & PAGE_MASK) {
331 #ifdef DEBUG
332 printf("uselib: Non page aligned binary %lu\n", file_offset);
333 #endif
334 	/*
335 	 * Map text+data read/write/execute
336 	 */
337 
338 	/* a_entry is the load address and is page aligned */
339 	vmaddr = trunc_page(a_out->a_entry);
340 
341 	/* get anon user mapping, read+write+execute */
342 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
343 		    	    a_out->a_text + a_out->a_data, FALSE,
344 			    VM_PROT_ALL, VM_PROT_ALL, 0);
345 	if (error)
346 	    goto cleanup;
347 
348 	/* map file into kernel_map */
349 	error = vm_mmap(kernel_map, &buffer,
350 			round_page(a_out->a_text + a_out->a_data + file_offset),
351 		   	VM_PROT_READ, VM_PROT_READ, 0,
352 			(caddr_t)vp, trunc_page(file_offset));
353 	if (error)
354 	    goto cleanup;
355 
356 	/* copy from kernel VM space to user space */
357 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
358 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
359 
360 	/* release temporary kernel space */
361 	vm_map_remove(kernel_map, buffer,
362 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
363 
364 	if (error)
365 	    goto cleanup;
366     }
367     else {
368 #ifdef DEBUG
369 printf("uselib: Page aligned binary %lu\n", file_offset);
370 #endif
371 	/*
372 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
373 	 * to skip the executable header
374 	 */
375 	vmaddr = trunc_page(a_out->a_entry);
376 
377 	/*
378 	 * Map it all into the process's space as a single copy-on-write
379 	 * "data" segment.
380 	 */
381 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
382 		   	a_out->a_text + a_out->a_data,
383 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
384 			(caddr_t)vp, file_offset);
385 	if (error)
386 	    goto cleanup;
387     }
388 #ifdef DEBUG
389 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
390 #endif
391     if (bss_size != 0) {
392         /*
393 	 * Calculate BSS start address
394 	 */
395 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
396 
397 	/*
398 	 * allocate some 'anon' space
399 	 */
400 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
401 			    bss_size, FALSE,
402 			    VM_PROT_ALL, VM_PROT_ALL, 0);
403 	if (error)
404 	    goto cleanup;
405     }
406 
407 cleanup:
408     /*
409      * Unlock vnode if needed
410      */
411     if (locked)
412 	VOP_UNLOCK(vp, 0, p);
413 
414     /*
415      * Release the kernel mapping.
416      */
417     if (a_out)
418 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
419 
420     return error;
421 }
422 
423 /* XXX move */
424 struct linux_select_argv {
425 	int nfds;
426 	fd_set *readfds;
427 	fd_set *writefds;
428 	fd_set *exceptfds;
429 	struct timeval *timeout;
430 };
431 
432 int
433 linux_select(struct proc *p, struct linux_select_args *args)
434 {
435     struct linux_select_argv linux_args;
436     struct linux_newselect_args newsel;
437     int error;
438 
439 #ifdef SELECT_DEBUG
440     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
441 #endif
442     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
443 			sizeof(linux_args))))
444 	return error;
445 
446     newsel.nfds = linux_args.nfds;
447     newsel.readfds = linux_args.readfds;
448     newsel.writefds = linux_args.writefds;
449     newsel.exceptfds = linux_args.exceptfds;
450     newsel.timeout = linux_args.timeout;
451 
452     return linux_newselect(p, &newsel);
453 }
454 
455 int
456 linux_newselect(struct proc *p, struct linux_newselect_args *args)
457 {
458     struct select_args bsa;
459     struct timeval tv0, tv1, utv, *tvp;
460     caddr_t sg;
461     int error;
462 
463 #ifdef DEBUG
464     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
465   	(long)p->p_pid, args->nfds, (void *)args->readfds,
466 	(void *)args->writefds, (void *)args->exceptfds,
467 	(void *)args->timeout);
468 #endif
469     error = 0;
470     bsa.nd = args->nfds;
471     bsa.in = args->readfds;
472     bsa.ou = args->writefds;
473     bsa.ex = args->exceptfds;
474     bsa.tv = args->timeout;
475 
476     /*
477      * Store current time for computation of the amount of
478      * time left.
479      */
480     if (args->timeout) {
481 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
482 	    goto select_out;
483 #ifdef DEBUG
484 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
485 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
486 #endif
487 	if (itimerfix(&utv)) {
488 	    /*
489 	     * The timeval was invalid.  Convert it to something
490 	     * valid that will act as it does under Linux.
491 	     */
492 	    sg = stackgap_init();
493 	    tvp = stackgap_alloc(&sg, sizeof(utv));
494 	    utv.tv_sec += utv.tv_usec / 1000000;
495 	    utv.tv_usec %= 1000000;
496 	    if (utv.tv_usec < 0) {
497 		utv.tv_sec -= 1;
498 		utv.tv_usec += 1000000;
499 	    }
500 	    if (utv.tv_sec < 0)
501 		timevalclear(&utv);
502 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
503 		goto select_out;
504 	    bsa.tv = tvp;
505 	}
506 	microtime(&tv0);
507     }
508 
509     error = select(p, &bsa);
510 #ifdef DEBUG
511     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
512 #endif
513 
514     if (error) {
515 	/*
516 	 * See fs/select.c in the Linux kernel.  Without this,
517 	 * Maelstrom doesn't work.
518 	 */
519 	if (error == ERESTART)
520 	    error = EINTR;
521 	goto select_out;
522     }
523 
524     if (args->timeout) {
525 	if (p->p_retval[0]) {
526 	    /*
527 	     * Compute how much time was left of the timeout,
528 	     * by subtracting the current time and the time
529 	     * before we started the call, and subtracting
530 	     * that result from the user-supplied value.
531 	     */
532 	    microtime(&tv1);
533 	    timevalsub(&tv1, &tv0);
534 	    timevalsub(&utv, &tv1);
535 	    if (utv.tv_sec < 0)
536 		timevalclear(&utv);
537 	} else
538 	    timevalclear(&utv);
539 #ifdef DEBUG
540 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
541 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
542 #endif
543 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
544 	    goto select_out;
545     }
546 
547 select_out:
548 #ifdef DEBUG
549     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
550 #endif
551     return error;
552 }
553 
554 int
555 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
556 {
557     struct proc *curp;
558 
559 #ifdef DEBUG
560     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
561 #endif
562     if (args->pid != p->p_pid) {
563 	if (!(curp = pfind(args->pid)))
564 	    return ESRCH;
565     }
566     else
567 	curp = p;
568     p->p_retval[0] = curp->p_pgid;
569     return 0;
570 }
571 
572 int
573 linux_fork(struct proc *p, struct linux_fork_args *args)
574 {
575     int error;
576 
577 #ifdef DEBUG
578     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
579 #endif
580     if ((error = fork(p, (struct fork_args *)args)) != 0)
581 	return error;
582     if (p->p_retval[1] == 1)
583 	p->p_retval[0] = 0;
584     return 0;
585 }
586 
587 int
588 linux_vfork(struct proc *p, struct linux_vfork_args *args)
589 {
590 	int error;
591 
592 #ifdef DEBUG
593 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
594 #endif
595 
596 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
597 		return error;
598 	/* Are we the child? */
599 	if (p->p_retval[1] == 1)
600 		p->p_retval[0] = 0;
601 	return 0;
602 }
603 
604 #define CLONE_VM	0x100
605 #define CLONE_FS	0x200
606 #define CLONE_FILES	0x400
607 #define CLONE_SIGHAND	0x800
608 #define CLONE_PID	0x1000
609 
610 int
611 linux_clone(struct proc *p, struct linux_clone_args *args)
612 {
613     int error, ff = RFPROC;
614     struct proc *p2;
615     int            exit_signal;
616     vm_offset_t    start;
617     struct rfork_args rf_args;
618 
619 #ifdef DEBUG
620     if (args->flags & CLONE_PID)
621 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
622 	       (long)p->p_pid);
623     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
624 	   (long)p->p_pid, (unsigned int)args->flags,
625 	   (unsigned int)args->stack);
626 #endif
627 
628     if (!args->stack)
629         return (EINVAL);
630 
631     exit_signal = args->flags & 0x000000ff;
632     if (exit_signal >= LINUX_NSIG)
633 	return EINVAL;
634 
635     if (exit_signal <= LINUX_SIGTBLSZ)
636 	exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
637 
638     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
639     ff |= RFTHREAD;
640 
641     if (args->flags & CLONE_VM)
642 	ff |= RFMEM;
643     if (args->flags & CLONE_SIGHAND)
644 	ff |= RFSIGSHARE;
645     if (!(args->flags & CLONE_FILES))
646 	ff |= RFFDG;
647 
648     error = 0;
649     start = 0;
650 
651     rf_args.flags = ff;
652     if ((error = rfork(p, &rf_args)) != 0)
653 	return error;
654 
655     p2 = pfind(p->p_retval[0]);
656     if (p2 == 0)
657  	return ESRCH;
658 
659     p2->p_sigparent = exit_signal;
660     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
661 
662 #ifdef DEBUG
663     printf ("linux_clone(%ld): successful rfork to %ld\n",
664 	    (long)p->p_pid, (long)p2->p_pid);
665 #endif
666     return 0;
667 }
668 
669 /* XXX move */
670 struct linux_mmap_argv {
671 	linux_caddr_t addr;
672 	int len;
673 	int prot;
674 	int flags;
675 	int fd;
676 	int pos;
677 };
678 
679 #define STACK_SIZE  (2 * 1024 * 1024)
680 #define GUARD_SIZE  (4 * PAGE_SIZE)
681 int
682 linux_mmap(struct proc *p, struct linux_mmap_args *args)
683 {
684     struct mmap_args /* {
685 	caddr_t addr;
686 	size_t len;
687 	int prot;
688 	int flags;
689 	int fd;
690 	long pad;
691 	off_t pos;
692     } */ bsd_args;
693     int error;
694     struct linux_mmap_argv linux_args;
695 
696     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
697 			sizeof(linux_args))))
698 	return error;
699 #ifdef DEBUG
700     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
701 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
702 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
703 #endif
704     bsd_args.flags = 0;
705     if (linux_args.flags & LINUX_MAP_SHARED)
706 	bsd_args.flags |= MAP_SHARED;
707     if (linux_args.flags & LINUX_MAP_PRIVATE)
708 	bsd_args.flags |= MAP_PRIVATE;
709     if (linux_args.flags & LINUX_MAP_FIXED)
710 	bsd_args.flags |= MAP_FIXED;
711     if (linux_args.flags & LINUX_MAP_ANON)
712 	bsd_args.flags |= MAP_ANON;
713     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
714 	bsd_args.flags |= MAP_STACK;
715 
716 	/* The linux MAP_GROWSDOWN option does not limit auto
717 	 * growth of the region.  Linux mmap with this option
718 	 * takes as addr the inital BOS, and as len, the initial
719 	 * region size.  It can then grow down from addr without
720 	 * limit.  However, linux threads has an implicit internal
721 	 * limit to stack size of STACK_SIZE.  Its just not
722 	 * enforced explicitly in linux.  But, here we impose
723 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
724 	 * region, since we can do this with our mmap.
725 	 *
726 	 * Our mmap with MAP_STACK takes addr as the maximum
727 	 * downsize limit on BOS, and as len the max size of
728 	 * the region.  It them maps the top SGROWSIZ bytes,
729 	 * and autgrows the region down, up to the limit
730 	 * in addr.
731 	 *
732 	 * If we don't use the MAP_STACK option, the effect
733 	 * of this code is to allocate a stack region of a
734 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
735 	 */
736 
737 	/* This gives us TOS */
738 	bsd_args.addr = linux_args.addr + linux_args.len;
739 
740 	/* This gives us our maximum stack size */
741 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
742 	    bsd_args.len = linux_args.len;
743 	else
744 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
745 
746 	/* This gives us a new BOS.  If we're using VM_STACK, then
747 	 * mmap will just map the top SGROWSIZ bytes, and let
748 	 * the stack grow down to the limit at BOS.  If we're
749 	 * not using VM_STACK we map the full stack, since we
750 	 * don't have a way to autogrow it.
751 	 */
752 	bsd_args.addr -= bsd_args.len;
753 
754     } else {
755 	bsd_args.addr = linux_args.addr;
756 	bsd_args.len  = linux_args.len;
757     }
758 
759     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
760     bsd_args.fd = linux_args.fd;
761     bsd_args.pos = linux_args.pos;
762     bsd_args.pad = 0;
763     return mmap(p, &bsd_args);
764 }
765 
766 int
767 linux_mremap(struct proc *p, struct linux_mremap_args *args)
768 {
769 	struct munmap_args /* {
770 		void *addr;
771 		size_t len;
772 	} */ bsd_args;
773 	int error = 0;
774 
775 #ifdef DEBUG
776 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
777 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
778 	    args->flags);
779 #endif
780 	args->new_len = round_page(args->new_len);
781 	args->old_len = round_page(args->old_len);
782 
783 	if (args->new_len > args->old_len) {
784 		p->p_retval[0] = 0;
785 		return ENOMEM;
786 	}
787 
788 	if (args->new_len < args->old_len) {
789 		bsd_args.addr = args->addr + args->new_len;
790 		bsd_args.len = args->old_len - args->new_len;
791 		error = munmap(p, &bsd_args);
792 	}
793 
794 	p->p_retval[0] = error ? 0 : (int)args->addr;
795 	return error;
796 }
797 
798 int
799 linux_msync(struct proc *p, struct linux_msync_args *args)
800 {
801 	struct msync_args bsd_args;
802 
803 	bsd_args.addr = args->addr;
804 	bsd_args.len = args->len;
805 	bsd_args.flags = 0;	/* XXX ignore */
806 
807 	return msync(p, &bsd_args);
808 }
809 
810 int
811 linux_pipe(struct proc *p, struct linux_pipe_args *args)
812 {
813     int error;
814     int reg_edx;
815 
816 #ifdef DEBUG
817     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
818 #endif
819     reg_edx = p->p_retval[1];
820     error = pipe(p, 0);
821     if (error) {
822 	p->p_retval[1] = reg_edx;
823 	return error;
824     }
825 
826     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
827     if (error) {
828 	p->p_retval[1] = reg_edx;
829 	return error;
830     }
831 
832     p->p_retval[1] = reg_edx;
833     p->p_retval[0] = 0;
834     return 0;
835 }
836 
837 int
838 linux_time(struct proc *p, struct linux_time_args *args)
839 {
840     struct timeval tv;
841     linux_time_t tm;
842     int error;
843 
844 #ifdef DEBUG
845     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
846 #endif
847     microtime(&tv);
848     tm = tv.tv_sec;
849     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
850 	return error;
851     p->p_retval[0] = tm;
852     return 0;
853 }
854 
855 struct linux_times_argv {
856     long    tms_utime;
857     long    tms_stime;
858     long    tms_cutime;
859     long    tms_cstime;
860 };
861 
862 #define CLK_TCK 100	/* Linux uses 100 */
863 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
864 
865 int
866 linux_times(struct proc *p, struct linux_times_args *args)
867 {
868     struct timeval tv;
869     struct linux_times_argv tms;
870     struct rusage ru;
871     int error;
872 
873 #ifdef DEBUG
874     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
875 #endif
876     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
877 
878     tms.tms_utime = CONVTCK(ru.ru_utime);
879     tms.tms_stime = CONVTCK(ru.ru_stime);
880 
881     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
882     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
883 
884     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
885 	    	    sizeof(struct linux_times_argv))))
886 	return error;
887 
888     microuptime(&tv);
889     p->p_retval[0] = (int)CONVTCK(tv);
890     return 0;
891 }
892 
893 int
894 linux_newuname(struct proc *p, struct linux_newuname_args *args)
895 {
896 	struct linux_new_utsname utsname;
897 	char *osrelease, *osname;
898 
899 #ifdef DEBUG
900 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
901 #endif
902 
903 	osname = linux_get_osname(p);
904 	osrelease = linux_get_osrelease(p);
905 
906 	bzero(&utsname, sizeof(struct linux_new_utsname));
907 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
908 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
909 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
910 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
911 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
912 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
913 
914 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
915 			sizeof(struct linux_new_utsname)));
916 }
917 
918 struct linux_utimbuf {
919 	linux_time_t l_actime;
920 	linux_time_t l_modtime;
921 };
922 
923 int
924 linux_utime(struct proc *p, struct linux_utime_args *args)
925 {
926     struct utimes_args /* {
927 	char	*path;
928 	struct	timeval *tptr;
929     } */ bsdutimes;
930     struct timeval tv[2], *tvp;
931     struct linux_utimbuf lut;
932     int error;
933     caddr_t sg;
934 
935     sg = stackgap_init();
936     CHECKALTEXIST(p, &sg, args->fname);
937 
938 #ifdef DEBUG
939     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
940 #endif
941     if (args->times) {
942 	if ((error = copyin(args->times, &lut, sizeof lut)))
943 	    return error;
944 	tv[0].tv_sec = lut.l_actime;
945 	tv[0].tv_usec = 0;
946 	tv[1].tv_sec = lut.l_modtime;
947 	tv[1].tv_usec = 0;
948 	/* so that utimes can copyin */
949 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
950 	if ((error = copyout(tv, tvp, sizeof(tv))))
951 	    return error;
952 	bsdutimes.tptr = tvp;
953     } else
954 	bsdutimes.tptr = NULL;
955 
956     bsdutimes.path = args->fname;
957     return utimes(p, &bsdutimes);
958 }
959 
960 #define __WCLONE 0x80000000
961 
962 int
963 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
964 {
965     struct wait_args /* {
966 	int pid;
967 	int *status;
968 	int options;
969 	struct	rusage *rusage;
970     } */ tmp;
971     int error, tmpstat;
972 
973 #ifdef DEBUG
974     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
975 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
976 #endif
977     tmp.pid = args->pid;
978     tmp.status = args->status;
979     tmp.options = (args->options & (WNOHANG | WUNTRACED));
980     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
981     if (args->options & __WCLONE)
982 	tmp.options |= WLINUXCLONE;
983     tmp.rusage = NULL;
984 
985     if ((error = wait4(p, &tmp)) != 0)
986 	return error;
987 
988     if (args->status) {
989 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
990 	    return error;
991 	if (WIFSIGNALED(tmpstat))
992 	    tmpstat = (tmpstat & 0xffffff80) |
993 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
994 	else if (WIFSTOPPED(tmpstat))
995 	    tmpstat = (tmpstat & 0xffff00ff) |
996 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
997 	return copyout(&tmpstat, args->status, sizeof(int));
998     } else
999 	return 0;
1000 }
1001 
1002 int
1003 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1004 {
1005     struct wait_args /* {
1006 	int pid;
1007 	int *status;
1008 	int options;
1009 	struct	rusage *rusage;
1010     } */ tmp;
1011     int error, tmpstat;
1012 
1013 #ifdef DEBUG
1014     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1015 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1016 	(void *)args->rusage);
1017 #endif
1018     tmp.pid = args->pid;
1019     tmp.status = args->status;
1020     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1021     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1022     if (args->options & __WCLONE)
1023 	tmp.options |= WLINUXCLONE;
1024     tmp.rusage = args->rusage;
1025 
1026     if ((error = wait4(p, &tmp)) != 0)
1027 	return error;
1028 
1029     SIGDELSET(p->p_siglist, SIGCHLD);
1030 
1031     if (args->status) {
1032 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1033 	    return error;
1034 	if (WIFSIGNALED(tmpstat))
1035 	    tmpstat = (tmpstat & 0xffffff80) |
1036 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1037 	else if (WIFSTOPPED(tmpstat))
1038 	    tmpstat = (tmpstat & 0xffff00ff) |
1039 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1040 	return copyout(&tmpstat, args->status, sizeof(int));
1041     } else
1042 	return 0;
1043 }
1044 
1045 int
1046 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1047 {
1048 	caddr_t sg;
1049 	struct mknod_args bsd_mknod;
1050 	struct mkfifo_args bsd_mkfifo;
1051 
1052 	sg = stackgap_init();
1053 
1054 	CHECKALTCREAT(p, &sg, args->path);
1055 
1056 #ifdef DEBUG
1057 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1058 	   (long)p->p_pid, args->path, args->mode, args->dev);
1059 #endif
1060 
1061 	if (args->mode & S_IFIFO) {
1062 		bsd_mkfifo.path = args->path;
1063 		bsd_mkfifo.mode = args->mode;
1064 		return mkfifo(p, &bsd_mkfifo);
1065 	} else {
1066 		bsd_mknod.path = args->path;
1067 		bsd_mknod.mode = args->mode;
1068 		bsd_mknod.dev = args->dev;
1069 		return mknod(p, &bsd_mknod);
1070 	}
1071 }
1072 
1073 /*
1074  * UGH! This is just about the dumbest idea I've ever heard!!
1075  */
1076 int
1077 linux_personality(struct proc *p, struct linux_personality_args *args)
1078 {
1079 #ifdef DEBUG
1080 	printf("Linux-emul(%ld): personality(%d)\n",
1081 	   (long)p->p_pid, args->per);
1082 #endif
1083 	if (args->per != 0)
1084 		return EINVAL;
1085 
1086 	/* Yes Jim, it's still a Linux... */
1087 	p->p_retval[0] = 0;
1088 	return 0;
1089 }
1090 
1091 /*
1092  * Wrappers for get/setitimer for debugging..
1093  */
1094 int
1095 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1096 {
1097 	struct setitimer_args bsa;
1098 	struct itimerval foo;
1099 	int error;
1100 
1101 #ifdef DEBUG
1102 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1103 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1104 #endif
1105 	bsa.which = args->which;
1106 	bsa.itv = args->itv;
1107 	bsa.oitv = args->oitv;
1108 	if (args->itv) {
1109 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1110 			sizeof(foo))))
1111 		return error;
1112 #ifdef DEBUG
1113 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1114 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1115 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1116 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1117 #endif
1118 	}
1119 	return setitimer(p, &bsa);
1120 }
1121 
1122 int
1123 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1124 {
1125 	struct getitimer_args bsa;
1126 #ifdef DEBUG
1127 	printf("Linux-emul(%ld): getitimer(%p)\n",
1128 	    (long)p->p_pid, (void *)args->itv);
1129 #endif
1130 	bsa.which = args->which;
1131 	bsa.itv = args->itv;
1132 	return getitimer(p, &bsa);
1133 }
1134 
1135 int
1136 linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1137 {
1138 	struct sysarch_args sa;
1139 	struct i386_ioperm_args *iia;
1140 	caddr_t sg;
1141 
1142 	sg = stackgap_init();
1143 	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1144 	iia->start = args->start;
1145 	iia->length = args->length;
1146 	iia->enable = args->enable;
1147 	sa.op = I386_SET_IOPERM;
1148 	sa.parms = (char *)iia;
1149 	return sysarch(p, &sa);
1150 }
1151 
1152 int
1153 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1154 {
1155 	int error;
1156 
1157 	if (args->level < 0 || args->level > 3)
1158 		return (EINVAL);
1159 	if ((error = suser(p)) != 0)
1160 		return (error);
1161 	if (securelevel > 0)
1162 		return (EPERM);
1163 	p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1164 		(args->level * (PSL_IOPL / 3));
1165 	return (0);
1166 }
1167 
1168 int
1169 linux_nice(struct proc *p, struct linux_nice_args *args)
1170 {
1171 	struct setpriority_args	bsd_args;
1172 
1173 	bsd_args.which = PRIO_PROCESS;
1174 	bsd_args.who = 0;	/* current process */
1175 	bsd_args.prio = args->inc;
1176 	return setpriority(p, &bsd_args);
1177 }
1178 
1179 int
1180 linux_setgroups(p, uap)
1181 	struct proc *p;
1182 	struct linux_setgroups_args *uap;
1183 {
1184 	struct pcred *pc;
1185 	linux_gid_t linux_gidset[NGROUPS];
1186 	gid_t *bsd_gidset;
1187 	int ngrp, error;
1188 
1189 	pc = p->p_cred;
1190 	ngrp = uap->gidsetsize;
1191 
1192 	/*
1193 	 * cr_groups[0] holds egid. Setting the whole set from
1194 	 * the supplied set will cause egid to be changed too.
1195 	 * Keep cr_groups[0] unchanged to prevent that.
1196 	 */
1197 
1198 	if ((error = suser(p)) != 0)
1199 		return (error);
1200 
1201 	if (ngrp >= NGROUPS)
1202 		return (EINVAL);
1203 
1204 	pc->pc_ucred = crcopy(pc->pc_ucred);
1205 	if (ngrp > 0) {
1206 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1207 			       ngrp * sizeof(linux_gid_t));
1208 		if (error)
1209 			return (error);
1210 
1211 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1212 
1213 		bsd_gidset = pc->pc_ucred->cr_groups;
1214 		ngrp--;
1215 		while (ngrp >= 0) {
1216 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1217 			ngrp--;
1218 		}
1219 	}
1220 	else
1221 		pc->pc_ucred->cr_ngroups = 1;
1222 
1223 	setsugid(p);
1224 	return (0);
1225 }
1226 
1227 int
1228 linux_getgroups(p, uap)
1229 	struct proc *p;
1230 	struct linux_getgroups_args *uap;
1231 {
1232 	struct pcred *pc;
1233 	linux_gid_t linux_gidset[NGROUPS];
1234 	gid_t *bsd_gidset;
1235 	int bsd_gidsetsz, ngrp, error;
1236 
1237 	pc = p->p_cred;
1238 	bsd_gidset = pc->pc_ucred->cr_groups;
1239 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1240 
1241 	/*
1242 	 * cr_groups[0] holds egid. Returning the whole set
1243 	 * here will cause a duplicate. Exclude cr_groups[0]
1244 	 * to prevent that.
1245 	 */
1246 
1247 	if ((ngrp = uap->gidsetsize) == 0) {
1248 		p->p_retval[0] = bsd_gidsetsz;
1249 		return (0);
1250 	}
1251 
1252 	if (ngrp < bsd_gidsetsz)
1253 		return (EINVAL);
1254 
1255 	ngrp = 0;
1256 	while (ngrp < bsd_gidsetsz) {
1257 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1258 		ngrp++;
1259 	}
1260 
1261 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1262 	    ngrp * sizeof(linux_gid_t))))
1263 		return (error);
1264 
1265 	p->p_retval[0] = ngrp;
1266 	return (0);
1267 }
1268 
1269 int
1270 linux_setrlimit(p, uap)
1271      struct proc *p;
1272      struct linux_setrlimit_args *uap;
1273 {
1274     struct osetrlimit_args bsd;
1275 
1276 #ifdef DEBUG
1277     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1278 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1279 #endif
1280 
1281     if (uap->resource >= LINUX_RLIM_NLIMITS)
1282 	return EINVAL;
1283 
1284     bsd.which = linux_to_bsd_resource[uap->resource];
1285 
1286     if (bsd.which == -1)
1287 	return EINVAL;
1288 
1289     bsd.rlp = uap->rlim;
1290     return osetrlimit(p, &bsd);
1291 }
1292 
1293 int
1294 linux_getrlimit(p, uap)
1295      struct proc *p;
1296      struct linux_getrlimit_args *uap;
1297 {
1298     struct ogetrlimit_args bsd;
1299 
1300 #ifdef DEBUG
1301     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1302 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1303 #endif
1304 
1305     if (uap->resource >= LINUX_RLIM_NLIMITS)
1306 	return EINVAL;
1307 
1308     bsd.which = linux_to_bsd_resource[uap->resource];
1309 
1310     if (bsd.which == -1)
1311 	return EINVAL;
1312 
1313     bsd.rlp = uap->rlim;
1314     return ogetrlimit(p, &bsd);
1315 }
1316 
1317 int
1318 linux_sched_setscheduler(p, uap)
1319 	struct proc *p;
1320 	struct linux_sched_setscheduler_args *uap;
1321 {
1322 	struct sched_setscheduler_args bsd;
1323 
1324 #ifdef DEBUG
1325 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1326 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1327 #endif
1328 
1329 	switch (uap->policy) {
1330 	case LINUX_SCHED_OTHER:
1331 		bsd.policy = SCHED_OTHER;
1332 		break;
1333 	case LINUX_SCHED_FIFO:
1334 		bsd.policy = SCHED_FIFO;
1335 		break;
1336 	case LINUX_SCHED_RR:
1337 		bsd.policy = SCHED_RR;
1338 		break;
1339 	default:
1340 		return EINVAL;
1341 	}
1342 
1343 	bsd.pid = uap->pid;
1344 	bsd.param = uap->param;
1345 	return sched_setscheduler(p, &bsd);
1346 }
1347 
1348 int
1349 linux_sched_getscheduler(p, uap)
1350 	struct proc *p;
1351 	struct linux_sched_getscheduler_args *uap;
1352 {
1353 	struct sched_getscheduler_args bsd;
1354 	int error;
1355 
1356 #ifdef DEBUG
1357 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1358 	       (long)p->p_pid, uap->pid);
1359 #endif
1360 
1361 	bsd.pid = uap->pid;
1362 	error = sched_getscheduler(p, &bsd);
1363 
1364 	switch (p->p_retval[0]) {
1365 	case SCHED_OTHER:
1366 		p->p_retval[0] = LINUX_SCHED_OTHER;
1367 		break;
1368 	case SCHED_FIFO:
1369 		p->p_retval[0] = LINUX_SCHED_FIFO;
1370 		break;
1371 	case SCHED_RR:
1372 		p->p_retval[0] = LINUX_SCHED_RR;
1373 		break;
1374 	}
1375 
1376 	return error;
1377 }
1378 
1379 struct linux_descriptor {
1380 	unsigned int  entry_number;
1381 	unsigned long base_addr;
1382 	unsigned int  limit;
1383 	unsigned int  seg_32bit:1;
1384 	unsigned int  contents:2;
1385 	unsigned int  read_exec_only:1;
1386 	unsigned int  limit_in_pages:1;
1387 	unsigned int  seg_not_present:1;
1388 	unsigned int  useable:1;
1389 };
1390 
1391 int
1392 linux_modify_ldt(p, uap)
1393 	struct proc *p;
1394 	struct linux_modify_ldt_args *uap;
1395 {
1396 	int error;
1397 	caddr_t sg;
1398 	struct sysarch_args args;
1399 	struct i386_ldt_args *ldt;
1400 	struct linux_descriptor ld;
1401 	union descriptor *desc;
1402 
1403 	sg = stackgap_init();
1404 
1405 	if (uap->ptr == NULL)
1406 		return (EINVAL);
1407 
1408 	switch (uap->func) {
1409 	case 0x00: /* read_ldt */
1410 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1411 		ldt->start = 0;
1412 		ldt->descs = uap->ptr;
1413 		ldt->num = uap->bytecount / sizeof(union descriptor);
1414 		args.op = I386_GET_LDT;
1415 		args.parms = (char*)ldt;
1416 		error = sysarch(p, &args);
1417 		p->p_retval[0] *= sizeof(union descriptor);
1418 		break;
1419 	case 0x01: /* write_ldt */
1420 	case 0x11: /* write_ldt */
1421 		if (uap->bytecount != sizeof(ld))
1422 			return (EINVAL);
1423 
1424 		error = copyin(uap->ptr, &ld, sizeof(ld));
1425 		if (error)
1426 			return (error);
1427 
1428 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1429 		desc = stackgap_alloc(&sg, sizeof(*desc));
1430 		ldt->start = ld.entry_number;
1431 		ldt->descs = desc;
1432 		ldt->num = 1;
1433 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1434 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1435 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1436 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1437 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1438 			(ld.contents << 2);
1439 		desc->sd.sd_dpl = 3;
1440 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1441 		desc->sd.sd_xx = 0;
1442 		desc->sd.sd_def32 = ld.seg_32bit;
1443 		desc->sd.sd_gran = ld.limit_in_pages;
1444 		args.op = I386_SET_LDT;
1445 		args.parms = (char*)ldt;
1446 		error = sysarch(p, &args);
1447 		break;
1448 	default:
1449 		error = EINVAL;
1450 		break;
1451 	}
1452 
1453 	if (error == EOPNOTSUPP) {
1454 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1455 		error = ENOSYS;
1456 	}
1457 
1458 	return (error);
1459 }
1460