xref: /freebsd/sys/compat/linux/linux_misc.c (revision 807a5caa14df5ff04b331e24b45893f6a2f6bc1b)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_zone.h>
58 
59 #include <machine/frame.h>
60 #include <machine/psl.h>
61 #include <machine/sysarch.h>
62 #include <machine/segments.h>
63 
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <i386/linux/linux_util.h>
67 #include <i386/linux/linux_mib.h>
68 
69 #include <posix4/sched.h>
70 
71 #define BSD_TO_LINUX_SIGNAL(sig)	\
72 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
73 
74 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
75 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
76   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
77   RLIMIT_MEMLOCK, -1
78 };
79 
80 int
81 linux_alarm(struct proc *p, struct linux_alarm_args *args)
82 {
83     struct itimerval it, old_it;
84     struct timeval tv;
85     int s;
86 
87 #ifdef DEBUG
88     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
89 #endif
90     if (args->secs > 100000000)
91 	return EINVAL;
92     it.it_value.tv_sec = (long)args->secs;
93     it.it_value.tv_usec = 0;
94     it.it_interval.tv_sec = 0;
95     it.it_interval.tv_usec = 0;
96     s = splsoftclock();
97     old_it = p->p_realtimer;
98     getmicrouptime(&tv);
99     if (timevalisset(&old_it.it_value))
100 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
101     if (it.it_value.tv_sec != 0) {
102 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
103 	timevaladd(&it.it_value, &tv);
104     }
105     p->p_realtimer = it;
106     splx(s);
107     if (timevalcmp(&old_it.it_value, &tv, >)) {
108 	timevalsub(&old_it.it_value, &tv);
109 	if (old_it.it_value.tv_usec != 0)
110 	    old_it.it_value.tv_sec++;
111 	p->p_retval[0] = old_it.it_value.tv_sec;
112     }
113     return 0;
114 }
115 
116 int
117 linux_brk(struct proc *p, struct linux_brk_args *args)
118 {
119 #if 0
120     struct vmspace *vm = p->p_vmspace;
121     vm_offset_t new, old;
122     int error;
123 
124     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
125 	return EINVAL;
126     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
127 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
128 	return ENOMEM;
129 
130     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
131     new = round_page((vm_offset_t)args->dsend);
132     p->p_retval[0] = old;
133     if ((new-old) > 0) {
134 	if (swap_pager_full)
135 	    return ENOMEM;
136 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
137 			VM_PROT_ALL, VM_PROT_ALL, 0);
138 	if (error)
139 	    return error;
140 	vm->vm_dsize += btoc((new-old));
141 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
142     }
143     return 0;
144 #else
145     struct vmspace *vm = p->p_vmspace;
146     vm_offset_t new, old;
147     struct obreak_args /* {
148 	char * nsize;
149     } */ tmp;
150 
151 #ifdef DEBUG
152     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
153 #endif
154     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
155     new = (vm_offset_t)args->dsend;
156     tmp.nsize = (char *) new;
157     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
158 	p->p_retval[0] = (int)new;
159     else
160 	p->p_retval[0] = (int)old;
161 
162     return 0;
163 #endif
164 }
165 
166 int
167 linux_uselib(struct proc *p, struct linux_uselib_args *args)
168 {
169     struct nameidata ni;
170     struct vnode *vp;
171     struct exec *a_out;
172     struct vattr attr;
173     vm_offset_t vmaddr;
174     unsigned long file_offset;
175     vm_offset_t buffer;
176     unsigned long bss_size;
177     int error;
178     caddr_t sg;
179     int locked;
180 
181     sg = stackgap_init();
182     CHECKALTEXIST(p, &sg, args->library);
183 
184 #ifdef DEBUG
185     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
186 #endif
187 
188     a_out = NULL;
189     locked = 0;
190     vp = NULL;
191 
192     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
193     error = namei(&ni);
194     if (error)
195 	goto cleanup;
196 
197     vp = ni.ni_vp;
198     /*
199      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
200      * without returning a vnode.
201      */
202     if (vp == NULL) {
203 	error = ENOEXEC;	/* ?? */
204 	goto cleanup;
205     }
206     NDFREE(&ni, NDF_ONLY_PNBUF);
207 
208     /*
209      * From here on down, we have a locked vnode that must be unlocked.
210      */
211     locked++;
212 
213     /*
214      * Writable?
215      */
216     if (vp->v_writecount) {
217 	error = ETXTBSY;
218 	goto cleanup;
219     }
220 
221     /*
222      * Executable?
223      */
224     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
225     if (error)
226 	goto cleanup;
227 
228     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
229 	((attr.va_mode & 0111) == 0) ||
230 	(attr.va_type != VREG)) {
231 	    error = ENOEXEC;
232 	    goto cleanup;
233     }
234 
235     /*
236      * Sensible size?
237      */
238     if (attr.va_size == 0) {
239 	error = ENOEXEC;
240 	goto cleanup;
241     }
242 
243     /*
244      * Can we access it?
245      */
246     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
247     if (error)
248 	goto cleanup;
249 
250     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
251     if (error)
252 	goto cleanup;
253 
254     /*
255      * Lock no longer needed
256      */
257     VOP_UNLOCK(vp, 0, p);
258     locked = 0;
259 
260     /*
261      * Pull in executable header into kernel_map
262      */
263     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
264 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
265     if (error)
266 	goto cleanup;
267 
268     /*
269      * Is it a Linux binary ?
270      */
271     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
272 	error = ENOEXEC;
273 	goto cleanup;
274     }
275 
276     /* While we are here, we should REALLY do some more checks */
277 
278     /*
279      * Set file/virtual offset based on a.out variant.
280      */
281     switch ((int)(a_out->a_magic & 0xffff)) {
282     case 0413:	/* ZMAGIC */
283 	file_offset = 1024;
284 	break;
285     case 0314:	/* QMAGIC */
286 	file_offset = 0;
287 	break;
288     default:
289 	error = ENOEXEC;
290 	goto cleanup;
291     }
292 
293     bss_size = round_page(a_out->a_bss);
294 
295     /*
296      * Check various fields in header for validity/bounds.
297      */
298     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
299 	error = ENOEXEC;
300 	goto cleanup;
301     }
302 
303     /* text + data can't exceed file size */
304     if (a_out->a_data + a_out->a_text > attr.va_size) {
305 	error = EFAULT;
306 	goto cleanup;
307     }
308 
309     /*
310      * text/data/bss must not exceed limits
311      * XXX: this is not complete. it should check current usage PLUS
312      * the resources needed by this library.
313      */
314     if (a_out->a_text > MAXTSIZ ||
315 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
316 	error = ENOMEM;
317 	goto cleanup;
318     }
319 
320     /*
321      * prevent more writers
322      */
323     vp->v_flag |= VTEXT;
324 
325     /*
326      * Check if file_offset page aligned,.
327      * Currently we cannot handle misalinged file offsets,
328      * and so we read in the entire image (what a waste).
329      */
330     if (file_offset & PAGE_MASK) {
331 #ifdef DEBUG
332 printf("uselib: Non page aligned binary %lu\n", file_offset);
333 #endif
334 	/*
335 	 * Map text+data read/write/execute
336 	 */
337 
338 	/* a_entry is the load address and is page aligned */
339 	vmaddr = trunc_page(a_out->a_entry);
340 
341 	/* get anon user mapping, read+write+execute */
342 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
343 		    	    a_out->a_text + a_out->a_data, FALSE,
344 			    VM_PROT_ALL, VM_PROT_ALL, 0);
345 	if (error)
346 	    goto cleanup;
347 
348 	/* map file into kernel_map */
349 	error = vm_mmap(kernel_map, &buffer,
350 			round_page(a_out->a_text + a_out->a_data + file_offset),
351 		   	VM_PROT_READ, VM_PROT_READ, 0,
352 			(caddr_t)vp, trunc_page(file_offset));
353 	if (error)
354 	    goto cleanup;
355 
356 	/* copy from kernel VM space to user space */
357 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
358 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
359 
360 	/* release temporary kernel space */
361 	vm_map_remove(kernel_map, buffer,
362 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
363 
364 	if (error)
365 	    goto cleanup;
366     }
367     else {
368 #ifdef DEBUG
369 printf("uselib: Page aligned binary %lu\n", file_offset);
370 #endif
371 	/*
372 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
373 	 * to skip the executable header
374 	 */
375 	vmaddr = trunc_page(a_out->a_entry);
376 
377 	/*
378 	 * Map it all into the process's space as a single copy-on-write
379 	 * "data" segment.
380 	 */
381 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
382 		   	a_out->a_text + a_out->a_data,
383 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
384 			(caddr_t)vp, file_offset);
385 	if (error)
386 	    goto cleanup;
387     }
388 #ifdef DEBUG
389 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
390 #endif
391     if (bss_size != 0) {
392         /*
393 	 * Calculate BSS start address
394 	 */
395 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
396 
397 	/*
398 	 * allocate some 'anon' space
399 	 */
400 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
401 			    bss_size, FALSE,
402 			    VM_PROT_ALL, VM_PROT_ALL, 0);
403 	if (error)
404 	    goto cleanup;
405     }
406 
407 cleanup:
408     /*
409      * Unlock vnode if needed
410      */
411     if (locked)
412 	VOP_UNLOCK(vp, 0, p);
413 
414     /*
415      * Release the kernel mapping.
416      */
417     if (a_out)
418 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
419 
420     return error;
421 }
422 
423 /* XXX move */
424 struct linux_select_argv {
425 	int nfds;
426 	fd_set *readfds;
427 	fd_set *writefds;
428 	fd_set *exceptfds;
429 	struct timeval *timeout;
430 };
431 
432 int
433 linux_select(struct proc *p, struct linux_select_args *args)
434 {
435     struct linux_select_argv linux_args;
436     struct linux_newselect_args newsel;
437     int error;
438 
439 #ifdef SELECT_DEBUG
440     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
441 #endif
442     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
443 			sizeof(linux_args))))
444 	return error;
445 
446     newsel.nfds = linux_args.nfds;
447     newsel.readfds = linux_args.readfds;
448     newsel.writefds = linux_args.writefds;
449     newsel.exceptfds = linux_args.exceptfds;
450     newsel.timeout = linux_args.timeout;
451 
452     return linux_newselect(p, &newsel);
453 }
454 
455 int
456 linux_newselect(struct proc *p, struct linux_newselect_args *args)
457 {
458     struct select_args bsa;
459     struct timeval tv0, tv1, utv, *tvp;
460     caddr_t sg;
461     int error;
462 
463 #ifdef DEBUG
464     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
465   	(long)p->p_pid, args->nfds, (void *)args->readfds,
466 	(void *)args->writefds, (void *)args->exceptfds,
467 	(void *)args->timeout);
468 #endif
469     error = 0;
470     bsa.nd = args->nfds;
471     bsa.in = args->readfds;
472     bsa.ou = args->writefds;
473     bsa.ex = args->exceptfds;
474     bsa.tv = args->timeout;
475 
476     /*
477      * Store current time for computation of the amount of
478      * time left.
479      */
480     if (args->timeout) {
481 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
482 	    goto select_out;
483 #ifdef DEBUG
484 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
485 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
486 #endif
487 	if (itimerfix(&utv)) {
488 	    /*
489 	     * The timeval was invalid.  Convert it to something
490 	     * valid that will act as it does under Linux.
491 	     */
492 	    sg = stackgap_init();
493 	    tvp = stackgap_alloc(&sg, sizeof(utv));
494 	    utv.tv_sec += utv.tv_usec / 1000000;
495 	    utv.tv_usec %= 1000000;
496 	    if (utv.tv_usec < 0) {
497 		utv.tv_sec -= 1;
498 		utv.tv_usec += 1000000;
499 	    }
500 	    if (utv.tv_sec < 0)
501 		timevalclear(&utv);
502 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
503 		goto select_out;
504 	    bsa.tv = tvp;
505 	}
506 	microtime(&tv0);
507     }
508 
509     error = select(p, &bsa);
510 #ifdef DEBUG
511     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
512 #endif
513 
514     if (error) {
515 	/*
516 	 * See fs/select.c in the Linux kernel.  Without this,
517 	 * Maelstrom doesn't work.
518 	 */
519 	if (error == ERESTART)
520 	    error = EINTR;
521 	goto select_out;
522     }
523 
524     if (args->timeout) {
525 	if (p->p_retval[0]) {
526 	    /*
527 	     * Compute how much time was left of the timeout,
528 	     * by subtracting the current time and the time
529 	     * before we started the call, and subtracting
530 	     * that result from the user-supplied value.
531 	     */
532 	    microtime(&tv1);
533 	    timevalsub(&tv1, &tv0);
534 	    timevalsub(&utv, &tv1);
535 	    if (utv.tv_sec < 0)
536 		timevalclear(&utv);
537 	} else
538 	    timevalclear(&utv);
539 #ifdef DEBUG
540 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
541 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
542 #endif
543 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
544 	    goto select_out;
545     }
546 
547 select_out:
548 #ifdef DEBUG
549     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
550 #endif
551     return error;
552 }
553 
554 int
555 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
556 {
557     struct proc *curp;
558 
559 #ifdef DEBUG
560     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
561 #endif
562     if (args->pid != p->p_pid) {
563 	if (!(curp = pfind(args->pid)))
564 	    return ESRCH;
565     }
566     else
567 	curp = p;
568     p->p_retval[0] = curp->p_pgid;
569     return 0;
570 }
571 
572 int
573 linux_fork(struct proc *p, struct linux_fork_args *args)
574 {
575     int error;
576 
577 #ifdef DEBUG
578     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
579 #endif
580     if ((error = fork(p, (struct fork_args *)args)) != 0)
581 	return error;
582     if (p->p_retval[1] == 1)
583 	p->p_retval[0] = 0;
584     return 0;
585 }
586 
587 int
588 linux_vfork(struct proc *p, struct linux_vfork_args *args)
589 {
590 	int error;
591 
592 #ifdef DEBUG
593 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
594 #endif
595 
596 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
597 		return error;
598 	/* Are we the child? */
599 	if (p->p_retval[1] == 1)
600 		p->p_retval[0] = 0;
601 	return 0;
602 }
603 
604 #define CLONE_VM	0x100
605 #define CLONE_FS	0x200
606 #define CLONE_FILES	0x400
607 #define CLONE_SIGHAND	0x800
608 #define CLONE_PID	0x1000
609 
610 int
611 linux_clone(struct proc *p, struct linux_clone_args *args)
612 {
613     int error, ff = RFPROC;
614     struct proc *p2;
615     int            exit_signal;
616     vm_offset_t    start;
617     struct rfork_args rf_args;
618 
619 #ifdef DEBUG
620     if (args->flags & CLONE_PID)
621 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
622 	       (long)p->p_pid);
623     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
624 	   (long)p->p_pid, (unsigned int)args->flags,
625 	   (unsigned int)args->stack);
626 #endif
627 
628     if (!args->stack)
629         return (EINVAL);
630 
631     exit_signal = args->flags & 0x000000ff;
632     if (exit_signal >= LINUX_NSIG)
633 	return EINVAL;
634 
635     if (exit_signal <= LINUX_SIGTBLSZ)
636 	exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
637 
638     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
639     ff |= RFTHREAD;
640 
641     if (args->flags & CLONE_VM)
642 	ff |= RFMEM;
643     if (args->flags & CLONE_SIGHAND)
644 	ff |= RFSIGSHARE;
645     if (!(args->flags & CLONE_FILES))
646 	ff |= RFFDG;
647 
648     error = 0;
649     start = 0;
650 
651     rf_args.flags = ff;
652     if ((error = rfork(p, &rf_args)) != 0)
653 	return error;
654 
655     p2 = pfind(p->p_retval[0]);
656     if (p2 == 0)
657  	return ESRCH;
658 
659     p2->p_sigparent = exit_signal;
660     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
661 
662 #ifdef DEBUG
663     printf ("linux_clone(%ld): successful rfork to %ld\n",
664 	    (long)p->p_pid, (long)p2->p_pid);
665 #endif
666     return 0;
667 }
668 
669 /* XXX move */
670 struct linux_mmap_argv {
671 	linux_caddr_t addr;
672 	int len;
673 	int prot;
674 	int flags;
675 	int fd;
676 	int pos;
677 };
678 
679 #define STACK_SIZE  (2 * 1024 * 1024)
680 #define GUARD_SIZE  (4 * PAGE_SIZE)
681 int
682 linux_mmap(struct proc *p, struct linux_mmap_args *args)
683 {
684     struct mmap_args /* {
685 	caddr_t addr;
686 	size_t len;
687 	int prot;
688 	int flags;
689 	int fd;
690 	long pad;
691 	off_t pos;
692     } */ bsd_args;
693     int error;
694     struct linux_mmap_argv linux_args;
695 
696     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
697 			sizeof(linux_args))))
698 	return error;
699 #ifdef DEBUG
700     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
701 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
702 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
703 #endif
704     bsd_args.flags = 0;
705     if (linux_args.flags & LINUX_MAP_SHARED)
706 	bsd_args.flags |= MAP_SHARED;
707     if (linux_args.flags & LINUX_MAP_PRIVATE)
708 	bsd_args.flags |= MAP_PRIVATE;
709     if (linux_args.flags & LINUX_MAP_FIXED)
710 	bsd_args.flags |= MAP_FIXED;
711     if (linux_args.flags & LINUX_MAP_ANON)
712 	bsd_args.flags |= MAP_ANON;
713     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
714 	bsd_args.flags |= MAP_STACK;
715 
716 	/* The linux MAP_GROWSDOWN option does not limit auto
717 	 * growth of the region.  Linux mmap with this option
718 	 * takes as addr the inital BOS, and as len, the initial
719 	 * region size.  It can then grow down from addr without
720 	 * limit.  However, linux threads has an implicit internal
721 	 * limit to stack size of STACK_SIZE.  Its just not
722 	 * enforced explicitly in linux.  But, here we impose
723 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
724 	 * region, since we can do this with our mmap.
725 	 *
726 	 * Our mmap with MAP_STACK takes addr as the maximum
727 	 * downsize limit on BOS, and as len the max size of
728 	 * the region.  It them maps the top SGROWSIZ bytes,
729 	 * and autgrows the region down, up to the limit
730 	 * in addr.
731 	 *
732 	 * If we don't use the MAP_STACK option, the effect
733 	 * of this code is to allocate a stack region of a
734 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
735 	 */
736 
737 	/* This gives us TOS */
738 	bsd_args.addr = linux_args.addr + linux_args.len;
739 
740 	/* This gives us our maximum stack size */
741 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
742 	    bsd_args.len = linux_args.len;
743 	else
744 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
745 
746 	/* This gives us a new BOS.  If we're using VM_STACK, then
747 	 * mmap will just map the top SGROWSIZ bytes, and let
748 	 * the stack grow down to the limit at BOS.  If we're
749 	 * not using VM_STACK we map the full stack, since we
750 	 * don't have a way to autogrow it.
751 	 */
752 	bsd_args.addr -= bsd_args.len;
753 
754     } else {
755 	bsd_args.addr = linux_args.addr;
756 	bsd_args.len  = linux_args.len;
757     }
758 
759     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
760     bsd_args.fd = linux_args.fd;
761     bsd_args.pos = linux_args.pos;
762     bsd_args.pad = 0;
763     return mmap(p, &bsd_args);
764 }
765 
766 int
767 linux_mremap(struct proc *p, struct linux_mremap_args *args)
768 {
769 	struct munmap_args /* {
770 		void *addr;
771 		size_t len;
772 	} */ bsd_args;
773 	int error = 0;
774 
775 #ifdef DEBUG
776 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
777 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
778 	    args->flags);
779 #endif
780 	args->new_len = round_page(args->new_len);
781 	args->old_len = round_page(args->old_len);
782 
783 	if (args->new_len > args->old_len) {
784 		p->p_retval[0] = 0;
785 		return ENOMEM;
786 	}
787 
788 	if (args->new_len < args->old_len) {
789 		bsd_args.addr = args->addr + args->new_len;
790 		bsd_args.len = args->old_len - args->new_len;
791 		error = munmap(p, &bsd_args);
792 	}
793 
794 	p->p_retval[0] = error ? 0 : (int)args->addr;
795 	return error;
796 }
797 
798 int
799 linux_msync(struct proc *p, struct linux_msync_args *args)
800 {
801 	struct msync_args bsd_args;
802 
803 	bsd_args.addr = args->addr;
804 	bsd_args.len = args->len;
805 	bsd_args.flags = 0;	/* XXX ignore */
806 
807 	return msync(p, &bsd_args);
808 }
809 
810 int
811 linux_pipe(struct proc *p, struct linux_pipe_args *args)
812 {
813     int error;
814     int reg_edx;
815 
816 #ifdef DEBUG
817     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
818 #endif
819     reg_edx = p->p_retval[1];
820     error = pipe(p, 0);
821     if (error) {
822 	p->p_retval[1] = reg_edx;
823 	return error;
824     }
825 
826     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
827     if (error) {
828 	p->p_retval[1] = reg_edx;
829 	return error;
830     }
831 
832     p->p_retval[1] = reg_edx;
833     p->p_retval[0] = 0;
834     return 0;
835 }
836 
837 int
838 linux_time(struct proc *p, struct linux_time_args *args)
839 {
840     struct timeval tv;
841     linux_time_t tm;
842     int error;
843 
844 #ifdef DEBUG
845     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
846 #endif
847     microtime(&tv);
848     tm = tv.tv_sec;
849     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
850 	return error;
851     p->p_retval[0] = tm;
852     return 0;
853 }
854 
855 struct linux_times_argv {
856     long    tms_utime;
857     long    tms_stime;
858     long    tms_cutime;
859     long    tms_cstime;
860 };
861 
862 #define CLK_TCK 100	/* Linux uses 100 */
863 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
864 
865 int
866 linux_times(struct proc *p, struct linux_times_args *args)
867 {
868     struct timeval tv;
869     struct linux_times_argv tms;
870     struct rusage ru;
871     int error;
872 
873 #ifdef DEBUG
874     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
875 #endif
876     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
877 
878     tms.tms_utime = CONVTCK(ru.ru_utime);
879     tms.tms_stime = CONVTCK(ru.ru_stime);
880 
881     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
882     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
883 
884     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
885 	    	    sizeof(struct linux_times_argv))))
886 	return error;
887 
888     microuptime(&tv);
889     p->p_retval[0] = (int)CONVTCK(tv);
890     return 0;
891 }
892 
893 int
894 linux_newuname(struct proc *p, struct linux_newuname_args *args)
895 {
896 	struct linux_new_utsname utsname;
897 	char *osrelease, *osname;
898 
899 #ifdef DEBUG
900 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
901 #endif
902 
903 	osname = linux_get_osname(p);
904 	osrelease = linux_get_osrelease(p);
905 
906 	bzero(&utsname, sizeof(struct linux_new_utsname));
907 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
908 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
909 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
910 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
911 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
912 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
913 
914 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
915 			sizeof(struct linux_new_utsname)));
916 }
917 
918 struct linux_utimbuf {
919 	linux_time_t l_actime;
920 	linux_time_t l_modtime;
921 };
922 
923 int
924 linux_utime(struct proc *p, struct linux_utime_args *args)
925 {
926     struct utimes_args /* {
927 	char	*path;
928 	struct	timeval *tptr;
929     } */ bsdutimes;
930     struct timeval tv[2], *tvp;
931     struct linux_utimbuf lut;
932     int error;
933     caddr_t sg;
934 
935     sg = stackgap_init();
936     CHECKALTEXIST(p, &sg, args->fname);
937 
938 #ifdef DEBUG
939     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
940 #endif
941     if (args->times) {
942 	if ((error = copyin(args->times, &lut, sizeof lut)))
943 	    return error;
944 	tv[0].tv_sec = lut.l_actime;
945 	tv[0].tv_usec = 0;
946 	tv[1].tv_sec = lut.l_modtime;
947 	tv[1].tv_usec = 0;
948 	/* so that utimes can copyin */
949 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
950 	if ((error = copyout(tv, tvp, sizeof(tv))))
951 	    return error;
952 	bsdutimes.tptr = tvp;
953     } else
954 	bsdutimes.tptr = NULL;
955 
956     bsdutimes.path = args->fname;
957     return utimes(p, &bsdutimes);
958 }
959 
960 #define __WCLONE 0x80000000
961 
962 int
963 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
964 {
965     struct wait_args /* {
966 	int pid;
967 	int *status;
968 	int options;
969 	struct	rusage *rusage;
970     } */ tmp;
971     int error, tmpstat;
972 
973 #ifdef DEBUG
974     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
975 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
976 #endif
977     tmp.pid = args->pid;
978     tmp.status = args->status;
979     tmp.options = (args->options & (WNOHANG | WUNTRACED));
980     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
981     if (args->options & __WCLONE)
982 	tmp.options |= WLINUXCLONE;
983     tmp.rusage = NULL;
984 
985     if ((error = wait4(p, &tmp)) != 0)
986 	return error;
987 
988     if (args->status) {
989 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
990 	    return error;
991 	tmpstat &= 0xffff;
992 	if (WIFSIGNALED(tmpstat))
993 	    tmpstat = (tmpstat & 0xffffff80) |
994 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
995 	else if (WIFSTOPPED(tmpstat))
996 	    tmpstat = (tmpstat & 0xffff00ff) |
997 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
998 	return copyout(&tmpstat, args->status, sizeof(int));
999     } else
1000 	return 0;
1001 }
1002 
1003 int
1004 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1005 {
1006     struct wait_args /* {
1007 	int pid;
1008 	int *status;
1009 	int options;
1010 	struct	rusage *rusage;
1011     } */ tmp;
1012     int error, tmpstat;
1013 
1014 #ifdef DEBUG
1015     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1016 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1017 	(void *)args->rusage);
1018 #endif
1019     tmp.pid = args->pid;
1020     tmp.status = args->status;
1021     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1022     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1023     if (args->options & __WCLONE)
1024 	tmp.options |= WLINUXCLONE;
1025     tmp.rusage = args->rusage;
1026 
1027     if ((error = wait4(p, &tmp)) != 0)
1028 	return error;
1029 
1030     SIGDELSET(p->p_siglist, SIGCHLD);
1031 
1032     if (args->status) {
1033 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1034 	    return error;
1035 	tmpstat &= 0xffff;
1036 	if (WIFSIGNALED(tmpstat))
1037 	    tmpstat = (tmpstat & 0xffffff80) |
1038 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1039 	else if (WIFSTOPPED(tmpstat))
1040 	    tmpstat = (tmpstat & 0xffff00ff) |
1041 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1042 	return copyout(&tmpstat, args->status, sizeof(int));
1043     } else
1044 	return 0;
1045 }
1046 
1047 int
1048 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1049 {
1050 	caddr_t sg;
1051 	struct mknod_args bsd_mknod;
1052 	struct mkfifo_args bsd_mkfifo;
1053 
1054 	sg = stackgap_init();
1055 
1056 	CHECKALTCREAT(p, &sg, args->path);
1057 
1058 #ifdef DEBUG
1059 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1060 	   (long)p->p_pid, args->path, args->mode, args->dev);
1061 #endif
1062 
1063 	if (args->mode & S_IFIFO) {
1064 		bsd_mkfifo.path = args->path;
1065 		bsd_mkfifo.mode = args->mode;
1066 		return mkfifo(p, &bsd_mkfifo);
1067 	} else {
1068 		bsd_mknod.path = args->path;
1069 		bsd_mknod.mode = args->mode;
1070 		bsd_mknod.dev = args->dev;
1071 		return mknod(p, &bsd_mknod);
1072 	}
1073 }
1074 
1075 /*
1076  * UGH! This is just about the dumbest idea I've ever heard!!
1077  */
1078 int
1079 linux_personality(struct proc *p, struct linux_personality_args *args)
1080 {
1081 #ifdef DEBUG
1082 	printf("Linux-emul(%ld): personality(%d)\n",
1083 	   (long)p->p_pid, args->per);
1084 #endif
1085 	if (args->per != 0)
1086 		return EINVAL;
1087 
1088 	/* Yes Jim, it's still a Linux... */
1089 	p->p_retval[0] = 0;
1090 	return 0;
1091 }
1092 
1093 /*
1094  * Wrappers for get/setitimer for debugging..
1095  */
1096 int
1097 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1098 {
1099 	struct setitimer_args bsa;
1100 	struct itimerval foo;
1101 	int error;
1102 
1103 #ifdef DEBUG
1104 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1105 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1106 #endif
1107 	bsa.which = args->which;
1108 	bsa.itv = args->itv;
1109 	bsa.oitv = args->oitv;
1110 	if (args->itv) {
1111 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1112 			sizeof(foo))))
1113 		return error;
1114 #ifdef DEBUG
1115 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1116 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1117 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1118 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1119 #endif
1120 	}
1121 	return setitimer(p, &bsa);
1122 }
1123 
1124 int
1125 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1126 {
1127 	struct getitimer_args bsa;
1128 #ifdef DEBUG
1129 	printf("Linux-emul(%ld): getitimer(%p)\n",
1130 	    (long)p->p_pid, (void *)args->itv);
1131 #endif
1132 	bsa.which = args->which;
1133 	bsa.itv = args->itv;
1134 	return getitimer(p, &bsa);
1135 }
1136 
1137 int
1138 linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1139 {
1140 	struct sysarch_args sa;
1141 	struct i386_ioperm_args *iia;
1142 	caddr_t sg;
1143 
1144 	sg = stackgap_init();
1145 	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1146 	iia->start = args->start;
1147 	iia->length = args->length;
1148 	iia->enable = args->enable;
1149 	sa.op = I386_SET_IOPERM;
1150 	sa.parms = (char *)iia;
1151 	return sysarch(p, &sa);
1152 }
1153 
1154 int
1155 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1156 {
1157 	int error;
1158 
1159 	if (args->level < 0 || args->level > 3)
1160 		return (EINVAL);
1161 	if ((error = suser(p)) != 0)
1162 		return (error);
1163 	if (securelevel > 0)
1164 		return (EPERM);
1165 	p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1166 		(args->level * (PSL_IOPL / 3));
1167 	return (0);
1168 }
1169 
1170 int
1171 linux_nice(struct proc *p, struct linux_nice_args *args)
1172 {
1173 	struct setpriority_args	bsd_args;
1174 
1175 	bsd_args.which = PRIO_PROCESS;
1176 	bsd_args.who = 0;	/* current process */
1177 	bsd_args.prio = args->inc;
1178 	return setpriority(p, &bsd_args);
1179 }
1180 
1181 int
1182 linux_setgroups(p, uap)
1183 	struct proc *p;
1184 	struct linux_setgroups_args *uap;
1185 {
1186 	struct pcred *pc;
1187 	linux_gid_t linux_gidset[NGROUPS];
1188 	gid_t *bsd_gidset;
1189 	int ngrp, error;
1190 
1191 	pc = p->p_cred;
1192 	ngrp = uap->gidsetsize;
1193 
1194 	/*
1195 	 * cr_groups[0] holds egid. Setting the whole set from
1196 	 * the supplied set will cause egid to be changed too.
1197 	 * Keep cr_groups[0] unchanged to prevent that.
1198 	 */
1199 
1200 	if ((error = suser(p)) != 0)
1201 		return (error);
1202 
1203 	if (ngrp >= NGROUPS)
1204 		return (EINVAL);
1205 
1206 	pc->pc_ucred = crcopy(pc->pc_ucred);
1207 	if (ngrp > 0) {
1208 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1209 			       ngrp * sizeof(linux_gid_t));
1210 		if (error)
1211 			return (error);
1212 
1213 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1214 
1215 		bsd_gidset = pc->pc_ucred->cr_groups;
1216 		ngrp--;
1217 		while (ngrp >= 0) {
1218 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1219 			ngrp--;
1220 		}
1221 	}
1222 	else
1223 		pc->pc_ucred->cr_ngroups = 1;
1224 
1225 	setsugid(p);
1226 	return (0);
1227 }
1228 
1229 int
1230 linux_getgroups(p, uap)
1231 	struct proc *p;
1232 	struct linux_getgroups_args *uap;
1233 {
1234 	struct pcred *pc;
1235 	linux_gid_t linux_gidset[NGROUPS];
1236 	gid_t *bsd_gidset;
1237 	int bsd_gidsetsz, ngrp, error;
1238 
1239 	pc = p->p_cred;
1240 	bsd_gidset = pc->pc_ucred->cr_groups;
1241 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1242 
1243 	/*
1244 	 * cr_groups[0] holds egid. Returning the whole set
1245 	 * here will cause a duplicate. Exclude cr_groups[0]
1246 	 * to prevent that.
1247 	 */
1248 
1249 	if ((ngrp = uap->gidsetsize) == 0) {
1250 		p->p_retval[0] = bsd_gidsetsz;
1251 		return (0);
1252 	}
1253 
1254 	if (ngrp < bsd_gidsetsz)
1255 		return (EINVAL);
1256 
1257 	ngrp = 0;
1258 	while (ngrp < bsd_gidsetsz) {
1259 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1260 		ngrp++;
1261 	}
1262 
1263 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1264 	    ngrp * sizeof(linux_gid_t))))
1265 		return (error);
1266 
1267 	p->p_retval[0] = ngrp;
1268 	return (0);
1269 }
1270 
1271 int
1272 linux_setrlimit(p, uap)
1273      struct proc *p;
1274      struct linux_setrlimit_args *uap;
1275 {
1276     struct osetrlimit_args bsd;
1277 
1278 #ifdef DEBUG
1279     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1280 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1281 #endif
1282 
1283     if (uap->resource >= LINUX_RLIM_NLIMITS)
1284 	return EINVAL;
1285 
1286     bsd.which = linux_to_bsd_resource[uap->resource];
1287 
1288     if (bsd.which == -1)
1289 	return EINVAL;
1290 
1291     bsd.rlp = uap->rlim;
1292     return osetrlimit(p, &bsd);
1293 }
1294 
1295 int
1296 linux_getrlimit(p, uap)
1297      struct proc *p;
1298      struct linux_getrlimit_args *uap;
1299 {
1300     struct ogetrlimit_args bsd;
1301 
1302 #ifdef DEBUG
1303     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1304 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1305 #endif
1306 
1307     if (uap->resource >= LINUX_RLIM_NLIMITS)
1308 	return EINVAL;
1309 
1310     bsd.which = linux_to_bsd_resource[uap->resource];
1311 
1312     if (bsd.which == -1)
1313 	return EINVAL;
1314 
1315     bsd.rlp = uap->rlim;
1316     return ogetrlimit(p, &bsd);
1317 }
1318 
1319 int
1320 linux_sched_setscheduler(p, uap)
1321 	struct proc *p;
1322 	struct linux_sched_setscheduler_args *uap;
1323 {
1324 	struct sched_setscheduler_args bsd;
1325 
1326 #ifdef DEBUG
1327 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1328 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1329 #endif
1330 
1331 	switch (uap->policy) {
1332 	case LINUX_SCHED_OTHER:
1333 		bsd.policy = SCHED_OTHER;
1334 		break;
1335 	case LINUX_SCHED_FIFO:
1336 		bsd.policy = SCHED_FIFO;
1337 		break;
1338 	case LINUX_SCHED_RR:
1339 		bsd.policy = SCHED_RR;
1340 		break;
1341 	default:
1342 		return EINVAL;
1343 	}
1344 
1345 	bsd.pid = uap->pid;
1346 	bsd.param = uap->param;
1347 	return sched_setscheduler(p, &bsd);
1348 }
1349 
1350 int
1351 linux_sched_getscheduler(p, uap)
1352 	struct proc *p;
1353 	struct linux_sched_getscheduler_args *uap;
1354 {
1355 	struct sched_getscheduler_args bsd;
1356 	int error;
1357 
1358 #ifdef DEBUG
1359 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1360 	       (long)p->p_pid, uap->pid);
1361 #endif
1362 
1363 	bsd.pid = uap->pid;
1364 	error = sched_getscheduler(p, &bsd);
1365 
1366 	switch (p->p_retval[0]) {
1367 	case SCHED_OTHER:
1368 		p->p_retval[0] = LINUX_SCHED_OTHER;
1369 		break;
1370 	case SCHED_FIFO:
1371 		p->p_retval[0] = LINUX_SCHED_FIFO;
1372 		break;
1373 	case SCHED_RR:
1374 		p->p_retval[0] = LINUX_SCHED_RR;
1375 		break;
1376 	}
1377 
1378 	return error;
1379 }
1380 
1381 struct linux_descriptor {
1382 	unsigned int  entry_number;
1383 	unsigned long base_addr;
1384 	unsigned int  limit;
1385 	unsigned int  seg_32bit:1;
1386 	unsigned int  contents:2;
1387 	unsigned int  read_exec_only:1;
1388 	unsigned int  limit_in_pages:1;
1389 	unsigned int  seg_not_present:1;
1390 	unsigned int  useable:1;
1391 };
1392 
1393 int
1394 linux_modify_ldt(p, uap)
1395 	struct proc *p;
1396 	struct linux_modify_ldt_args *uap;
1397 {
1398 	int error;
1399 	caddr_t sg;
1400 	struct sysarch_args args;
1401 	struct i386_ldt_args *ldt;
1402 	struct linux_descriptor ld;
1403 	union descriptor *desc;
1404 
1405 	sg = stackgap_init();
1406 
1407 	if (uap->ptr == NULL)
1408 		return (EINVAL);
1409 
1410 	switch (uap->func) {
1411 	case 0x00: /* read_ldt */
1412 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1413 		ldt->start = 0;
1414 		ldt->descs = uap->ptr;
1415 		ldt->num = uap->bytecount / sizeof(union descriptor);
1416 		args.op = I386_GET_LDT;
1417 		args.parms = (char*)ldt;
1418 		error = sysarch(p, &args);
1419 		p->p_retval[0] *= sizeof(union descriptor);
1420 		break;
1421 	case 0x01: /* write_ldt */
1422 	case 0x11: /* write_ldt */
1423 		if (uap->bytecount != sizeof(ld))
1424 			return (EINVAL);
1425 
1426 		error = copyin(uap->ptr, &ld, sizeof(ld));
1427 		if (error)
1428 			return (error);
1429 
1430 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1431 		desc = stackgap_alloc(&sg, sizeof(*desc));
1432 		ldt->start = ld.entry_number;
1433 		ldt->descs = desc;
1434 		ldt->num = 1;
1435 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1436 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1437 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1438 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1439 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1440 			(ld.contents << 2);
1441 		desc->sd.sd_dpl = 3;
1442 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1443 		desc->sd.sd_xx = 0;
1444 		desc->sd.sd_def32 = ld.seg_32bit;
1445 		desc->sd.sd_gran = ld.limit_in_pages;
1446 		args.op = I386_SET_LDT;
1447 		args.parms = (char*)ldt;
1448 		error = sysarch(p, &args);
1449 		break;
1450 	default:
1451 		error = EINVAL;
1452 		break;
1453 	}
1454 
1455 	if (error == EOPNOTSUPP) {
1456 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1457 		error = ENOSYS;
1458 	}
1459 
1460 	return (error);
1461 }
1462