xref: /freebsd/sys/compat/linux/linux_misc.c (revision a14a0223ae1b172e96dd2a1d849e22026a98b692)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 #include <machine/sysarch.h>
61 #include <machine/segments.h>
62 
63 #include <i386/linux/linux.h>
64 #include <i386/linux/linux_proto.h>
65 #include <i386/linux/linux_util.h>
66 #include <i386/linux/linux_mib.h>
67 
68 #include <posix4/sched.h>
69 
70 #define BSD_TO_LINUX_SIGNAL(sig)	\
71 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
72 
73 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
74 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
75   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
76   RLIMIT_MEMLOCK, -1
77 };
78 
79 int
80 linux_alarm(struct proc *p, struct linux_alarm_args *args)
81 {
82     struct itimerval it, old_it;
83     struct timeval tv;
84     int s;
85 
86 #ifdef DEBUG
87     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
88 #endif
89     if (args->secs > 100000000)
90 	return EINVAL;
91     it.it_value.tv_sec = (long)args->secs;
92     it.it_value.tv_usec = 0;
93     it.it_interval.tv_sec = 0;
94     it.it_interval.tv_usec = 0;
95     s = splsoftclock();
96     old_it = p->p_realtimer;
97     getmicrouptime(&tv);
98     if (timevalisset(&old_it.it_value))
99 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
100     if (it.it_value.tv_sec != 0) {
101 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
102 	timevaladd(&it.it_value, &tv);
103     }
104     p->p_realtimer = it;
105     splx(s);
106     if (timevalcmp(&old_it.it_value, &tv, >)) {
107 	timevalsub(&old_it.it_value, &tv);
108 	if (old_it.it_value.tv_usec != 0)
109 	    old_it.it_value.tv_sec++;
110 	p->p_retval[0] = old_it.it_value.tv_sec;
111     }
112     return 0;
113 }
114 
115 int
116 linux_brk(struct proc *p, struct linux_brk_args *args)
117 {
118 #if 0
119     struct vmspace *vm = p->p_vmspace;
120     vm_offset_t new, old;
121     int error;
122 
123     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
124 	return EINVAL;
125     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
126 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
127 	return ENOMEM;
128 
129     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
130     new = round_page((vm_offset_t)args->dsend);
131     p->p_retval[0] = old;
132     if ((new-old) > 0) {
133 	if (swap_pager_full)
134 	    return ENOMEM;
135 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
136 			VM_PROT_ALL, VM_PROT_ALL, 0);
137 	if (error)
138 	    return error;
139 	vm->vm_dsize += btoc((new-old));
140 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
141     }
142     return 0;
143 #else
144     struct vmspace *vm = p->p_vmspace;
145     vm_offset_t new, old;
146     struct obreak_args /* {
147 	char * nsize;
148     } */ tmp;
149 
150 #ifdef DEBUG
151     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
152 #endif
153     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
154     new = (vm_offset_t)args->dsend;
155     tmp.nsize = (char *) new;
156     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
157 	p->p_retval[0] = (int)new;
158     else
159 	p->p_retval[0] = (int)old;
160 
161     return 0;
162 #endif
163 }
164 
165 int
166 linux_uselib(struct proc *p, struct linux_uselib_args *args)
167 {
168     struct nameidata ni;
169     struct vnode *vp;
170     struct exec *a_out;
171     struct vattr attr;
172     vm_offset_t vmaddr;
173     unsigned long file_offset;
174     vm_offset_t buffer;
175     unsigned long bss_size;
176     int error;
177     caddr_t sg;
178     int locked;
179 
180     sg = stackgap_init();
181     CHECKALTEXIST(p, &sg, args->library);
182 
183 #ifdef DEBUG
184     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
185 #endif
186 
187     a_out = NULL;
188     locked = 0;
189     vp = NULL;
190 
191     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
192     error = namei(&ni);
193     if (error)
194 	goto cleanup;
195 
196     vp = ni.ni_vp;
197     if (vp == NULL) {
198 	error = ENOEXEC;	/* ?? */
199 	goto cleanup;
200     }
201 
202     /*
203      * From here on down, we have a locked vnode that must be unlocked.
204      */
205     locked++;
206 
207     /*
208      * Writable?
209      */
210     if (vp->v_writecount) {
211 	error = ETXTBSY;
212 	goto cleanup;
213     }
214 
215     /*
216      * Executable?
217      */
218     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
219     if (error)
220 	goto cleanup;
221 
222     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
223 	((attr.va_mode & 0111) == 0) ||
224 	(attr.va_type != VREG)) {
225 	    error = ENOEXEC;
226 	    goto cleanup;
227     }
228 
229     /*
230      * Sensible size?
231      */
232     if (attr.va_size == 0) {
233 	error = ENOEXEC;
234 	goto cleanup;
235     }
236 
237     /*
238      * Can we access it?
239      */
240     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
241     if (error)
242 	goto cleanup;
243 
244     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
245     if (error)
246 	goto cleanup;
247 
248     /*
249      * Lock no longer needed
250      */
251     VOP_UNLOCK(vp, 0, p);
252     locked = 0;
253 
254     /*
255      * Pull in executable header into kernel_map
256      */
257     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
258 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
259     if (error)
260 	goto cleanup;
261 
262     /*
263      * Is it a Linux binary ?
264      */
265     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
266 	error = ENOEXEC;
267 	goto cleanup;
268     }
269 
270     /* While we are here, we should REALLY do some more checks */
271 
272     /*
273      * Set file/virtual offset based on a.out variant.
274      */
275     switch ((int)(a_out->a_magic & 0xffff)) {
276     case 0413:	/* ZMAGIC */
277 	file_offset = 1024;
278 	break;
279     case 0314:	/* QMAGIC */
280 	file_offset = 0;
281 	break;
282     default:
283 	error = ENOEXEC;
284 	goto cleanup;
285     }
286 
287     bss_size = round_page(a_out->a_bss);
288 
289     /*
290      * Check various fields in header for validity/bounds.
291      */
292     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
293 	error = ENOEXEC;
294 	goto cleanup;
295     }
296 
297     /* text + data can't exceed file size */
298     if (a_out->a_data + a_out->a_text > attr.va_size) {
299 	error = EFAULT;
300 	goto cleanup;
301     }
302 
303     /*
304      * text/data/bss must not exceed limits
305      * XXX: this is not complete. it should check current usage PLUS
306      * the resources needed by this library.
307      */
308     if (a_out->a_text > MAXTSIZ ||
309 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
310 	error = ENOMEM;
311 	goto cleanup;
312     }
313 
314     /*
315      * prevent more writers
316      */
317     vp->v_flag |= VTEXT;
318 
319     /*
320      * Check if file_offset page aligned,.
321      * Currently we cannot handle misalinged file offsets,
322      * and so we read in the entire image (what a waste).
323      */
324     if (file_offset & PAGE_MASK) {
325 #ifdef DEBUG
326 printf("uselib: Non page aligned binary %lu\n", file_offset);
327 #endif
328 	/*
329 	 * Map text+data read/write/execute
330 	 */
331 
332 	/* a_entry is the load address and is page aligned */
333 	vmaddr = trunc_page(a_out->a_entry);
334 
335 	/* get anon user mapping, read+write+execute */
336 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
337 		    	    a_out->a_text + a_out->a_data, FALSE,
338 			    VM_PROT_ALL, VM_PROT_ALL, 0);
339 	if (error)
340 	    goto cleanup;
341 
342 	/* map file into kernel_map */
343 	error = vm_mmap(kernel_map, &buffer,
344 			round_page(a_out->a_text + a_out->a_data + file_offset),
345 		   	VM_PROT_READ, VM_PROT_READ, 0,
346 			(caddr_t)vp, trunc_page(file_offset));
347 	if (error)
348 	    goto cleanup;
349 
350 	/* copy from kernel VM space to user space */
351 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
352 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
353 
354 	/* release temporary kernel space */
355 	vm_map_remove(kernel_map, buffer,
356 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
357 
358 	if (error)
359 	    goto cleanup;
360     }
361     else {
362 #ifdef DEBUG
363 printf("uselib: Page aligned binary %lu\n", file_offset);
364 #endif
365 	/*
366 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
367 	 * to skip the executable header
368 	 */
369 	vmaddr = trunc_page(a_out->a_entry);
370 
371 	/*
372 	 * Map it all into the process's space as a single copy-on-write
373 	 * "data" segment.
374 	 */
375 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
376 		   	a_out->a_text + a_out->a_data,
377 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
378 			(caddr_t)vp, file_offset);
379 	if (error)
380 	    goto cleanup;
381     }
382 #ifdef DEBUG
383 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
384 #endif
385     if (bss_size != 0) {
386         /*
387 	 * Calculate BSS start address
388 	 */
389 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
390 
391 	/*
392 	 * allocate some 'anon' space
393 	 */
394 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
395 			    bss_size, FALSE,
396 			    VM_PROT_ALL, VM_PROT_ALL, 0);
397 	if (error)
398 	    goto cleanup;
399     }
400 
401 cleanup:
402     /*
403      * Unlock vnode if needed
404      */
405     if (locked)
406 	VOP_UNLOCK(vp, 0, p);
407 
408     /*
409      * Release the kernel mapping.
410      */
411     if (a_out)
412 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
413 
414     return error;
415 }
416 
417 /* XXX move */
418 struct linux_select_argv {
419 	int nfds;
420 	fd_set *readfds;
421 	fd_set *writefds;
422 	fd_set *exceptfds;
423 	struct timeval *timeout;
424 };
425 
426 int
427 linux_select(struct proc *p, struct linux_select_args *args)
428 {
429     struct linux_select_argv linux_args;
430     struct linux_newselect_args newsel;
431     int error;
432 
433 #ifdef SELECT_DEBUG
434     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
435 #endif
436     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
437 			sizeof(linux_args))))
438 	return error;
439 
440     newsel.nfds = linux_args.nfds;
441     newsel.readfds = linux_args.readfds;
442     newsel.writefds = linux_args.writefds;
443     newsel.exceptfds = linux_args.exceptfds;
444     newsel.timeout = linux_args.timeout;
445 
446     return linux_newselect(p, &newsel);
447 }
448 
449 int
450 linux_newselect(struct proc *p, struct linux_newselect_args *args)
451 {
452     struct select_args bsa;
453     struct timeval tv0, tv1, utv, *tvp;
454     caddr_t sg;
455     int error;
456 
457 #ifdef DEBUG
458     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
459   	(long)p->p_pid, args->nfds, (void *)args->readfds,
460 	(void *)args->writefds, (void *)args->exceptfds,
461 	(void *)args->timeout);
462 #endif
463     error = 0;
464     bsa.nd = args->nfds;
465     bsa.in = args->readfds;
466     bsa.ou = args->writefds;
467     bsa.ex = args->exceptfds;
468     bsa.tv = args->timeout;
469 
470     /*
471      * Store current time for computation of the amount of
472      * time left.
473      */
474     if (args->timeout) {
475 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
476 	    goto select_out;
477 #ifdef DEBUG
478 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
479 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
480 #endif
481 	if (itimerfix(&utv)) {
482 	    /*
483 	     * The timeval was invalid.  Convert it to something
484 	     * valid that will act as it does under Linux.
485 	     */
486 	    sg = stackgap_init();
487 	    tvp = stackgap_alloc(&sg, sizeof(utv));
488 	    utv.tv_sec += utv.tv_usec / 1000000;
489 	    utv.tv_usec %= 1000000;
490 	    if (utv.tv_usec < 0) {
491 		utv.tv_sec -= 1;
492 		utv.tv_usec += 1000000;
493 	    }
494 	    if (utv.tv_sec < 0)
495 		timevalclear(&utv);
496 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
497 		goto select_out;
498 	    bsa.tv = tvp;
499 	}
500 	microtime(&tv0);
501     }
502 
503     error = select(p, &bsa);
504 #ifdef DEBUG
505     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
506 #endif
507 
508     if (error) {
509 	/*
510 	 * See fs/select.c in the Linux kernel.  Without this,
511 	 * Maelstrom doesn't work.
512 	 */
513 	if (error == ERESTART)
514 	    error = EINTR;
515 	goto select_out;
516     }
517 
518     if (args->timeout) {
519 	if (p->p_retval[0]) {
520 	    /*
521 	     * Compute how much time was left of the timeout,
522 	     * by subtracting the current time and the time
523 	     * before we started the call, and subtracting
524 	     * that result from the user-supplied value.
525 	     */
526 	    microtime(&tv1);
527 	    timevalsub(&tv1, &tv0);
528 	    timevalsub(&utv, &tv1);
529 	    if (utv.tv_sec < 0)
530 		timevalclear(&utv);
531 	} else
532 	    timevalclear(&utv);
533 #ifdef DEBUG
534 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
535 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
536 #endif
537 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
538 	    goto select_out;
539     }
540 
541 select_out:
542 #ifdef DEBUG
543     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
544 #endif
545     return error;
546 }
547 
548 int
549 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
550 {
551     struct proc *curp;
552 
553 #ifdef DEBUG
554     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
555 #endif
556     if (args->pid != p->p_pid) {
557 	if (!(curp = pfind(args->pid)))
558 	    return ESRCH;
559     }
560     else
561 	curp = p;
562     p->p_retval[0] = curp->p_pgid;
563     return 0;
564 }
565 
566 int
567 linux_fork(struct proc *p, struct linux_fork_args *args)
568 {
569     int error;
570 
571 #ifdef DEBUG
572     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
573 #endif
574     if ((error = fork(p, (struct fork_args *)args)) != 0)
575 	return error;
576     if (p->p_retval[1] == 1)
577 	p->p_retval[0] = 0;
578     return 0;
579 }
580 
581 int
582 linux_vfork(struct proc *p, struct linux_vfork_args *args)
583 {
584 	int error;
585 
586 #ifdef DEBUG
587 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
588 #endif
589 
590 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
591 		return error;
592 	/* Are we the child? */
593 	if (p->p_retval[1] == 1)
594 		p->p_retval[0] = 0;
595 	return 0;
596 }
597 
598 #define CLONE_VM	0x100
599 #define CLONE_FS	0x200
600 #define CLONE_FILES	0x400
601 #define CLONE_SIGHAND	0x800
602 #define CLONE_PID	0x1000
603 
604 int
605 linux_clone(struct proc *p, struct linux_clone_args *args)
606 {
607     int error, ff = RFPROC;
608     struct proc *p2;
609     int            exit_signal;
610     vm_offset_t    start;
611     struct rfork_args rf_args;
612 
613 #ifdef DEBUG
614     if (args->flags & CLONE_PID)
615 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
616 	       (long)p->p_pid);
617     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
618 	   (long)p->p_pid, (unsigned int)args->flags,
619 	   (unsigned int)args->stack);
620 #endif
621 
622     if (!args->stack)
623         return (EINVAL);
624 
625     exit_signal = args->flags & 0x000000ff;
626     if (exit_signal >= LINUX_NSIG)
627 	return EINVAL;
628 
629     if (exit_signal <= LINUX_SIGTBLSZ)
630 	exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
631 
632     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
633     ff |= RFTHREAD;
634 
635     if (args->flags & CLONE_VM)
636 	ff |= RFMEM;
637     if (args->flags & CLONE_SIGHAND)
638 	ff |= RFSIGSHARE;
639     if (!(args->flags & CLONE_FILES))
640 	ff |= RFFDG;
641 
642     error = 0;
643     start = 0;
644 
645     rf_args.flags = ff;
646     if ((error = rfork(p, &rf_args)) != 0)
647 	return error;
648 
649     p2 = pfind(p->p_retval[0]);
650     if (p2 == 0)
651  	return ESRCH;
652 
653     p2->p_sigparent = exit_signal;
654     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
655 
656 #ifdef DEBUG
657     printf ("linux_clone(%ld): successful rfork to %ld\n",
658 	    (long)p->p_pid, (long)p2->p_pid);
659 #endif
660     return 0;
661 }
662 
663 /* XXX move */
664 struct linux_mmap_argv {
665 	linux_caddr_t addr;
666 	int len;
667 	int prot;
668 	int flags;
669 	int fd;
670 	int pos;
671 };
672 
673 #define STACK_SIZE  (2 * 1024 * 1024)
674 #define GUARD_SIZE  (4 * PAGE_SIZE)
675 int
676 linux_mmap(struct proc *p, struct linux_mmap_args *args)
677 {
678     struct mmap_args /* {
679 	caddr_t addr;
680 	size_t len;
681 	int prot;
682 	int flags;
683 	int fd;
684 	long pad;
685 	off_t pos;
686     } */ bsd_args;
687     int error;
688     struct linux_mmap_argv linux_args;
689 
690     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
691 			sizeof(linux_args))))
692 	return error;
693 #ifdef DEBUG
694     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
695 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
696 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
697 #endif
698     bsd_args.flags = 0;
699     if (linux_args.flags & LINUX_MAP_SHARED)
700 	bsd_args.flags |= MAP_SHARED;
701     if (linux_args.flags & LINUX_MAP_PRIVATE)
702 	bsd_args.flags |= MAP_PRIVATE;
703     if (linux_args.flags & LINUX_MAP_FIXED)
704 	bsd_args.flags |= MAP_FIXED;
705     if (linux_args.flags & LINUX_MAP_ANON)
706 	bsd_args.flags |= MAP_ANON;
707     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
708 	bsd_args.flags |= MAP_STACK;
709 
710 	/* The linux MAP_GROWSDOWN option does not limit auto
711 	 * growth of the region.  Linux mmap with this option
712 	 * takes as addr the inital BOS, and as len, the initial
713 	 * region size.  It can then grow down from addr without
714 	 * limit.  However, linux threads has an implicit internal
715 	 * limit to stack size of STACK_SIZE.  Its just not
716 	 * enforced explicitly in linux.  But, here we impose
717 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
718 	 * region, since we can do this with our mmap.
719 	 *
720 	 * Our mmap with MAP_STACK takes addr as the maximum
721 	 * downsize limit on BOS, and as len the max size of
722 	 * the region.  It them maps the top SGROWSIZ bytes,
723 	 * and autgrows the region down, up to the limit
724 	 * in addr.
725 	 *
726 	 * If we don't use the MAP_STACK option, the effect
727 	 * of this code is to allocate a stack region of a
728 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
729 	 */
730 
731 	/* This gives us TOS */
732 	bsd_args.addr = linux_args.addr + linux_args.len;
733 
734 	/* This gives us our maximum stack size */
735 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
736 	    bsd_args.len = linux_args.len;
737 	else
738 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
739 
740 	/* This gives us a new BOS.  If we're using VM_STACK, then
741 	 * mmap will just map the top SGROWSIZ bytes, and let
742 	 * the stack grow down to the limit at BOS.  If we're
743 	 * not using VM_STACK we map the full stack, since we
744 	 * don't have a way to autogrow it.
745 	 */
746 	bsd_args.addr -= bsd_args.len;
747 
748     } else {
749 	bsd_args.addr = linux_args.addr;
750 	bsd_args.len  = linux_args.len;
751     }
752 
753     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
754     bsd_args.fd = linux_args.fd;
755     bsd_args.pos = linux_args.pos;
756     bsd_args.pad = 0;
757     return mmap(p, &bsd_args);
758 }
759 
760 int
761 linux_mremap(struct proc *p, struct linux_mremap_args *args)
762 {
763 	struct munmap_args /* {
764 		void *addr;
765 		size_t len;
766 	} */ bsd_args;
767 	int error = 0;
768 
769 #ifdef DEBUG
770 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
771 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
772 	    args->flags);
773 #endif
774 	args->new_len = round_page(args->new_len);
775 	args->old_len = round_page(args->old_len);
776 
777 	if (args->new_len > args->old_len) {
778 		p->p_retval[0] = 0;
779 		return ENOMEM;
780 	}
781 
782 	if (args->new_len < args->old_len) {
783 		bsd_args.addr = args->addr + args->new_len;
784 		bsd_args.len = args->old_len - args->new_len;
785 		error = munmap(p, &bsd_args);
786 	}
787 
788 	p->p_retval[0] = error ? 0 : (int)args->addr;
789 	return error;
790 }
791 
792 int
793 linux_msync(struct proc *p, struct linux_msync_args *args)
794 {
795 	struct msync_args bsd_args;
796 
797 	bsd_args.addr = args->addr;
798 	bsd_args.len = args->len;
799 	bsd_args.flags = 0;	/* XXX ignore */
800 
801 	return msync(p, &bsd_args);
802 }
803 
804 int
805 linux_pipe(struct proc *p, struct linux_pipe_args *args)
806 {
807     int error;
808     int reg_edx;
809 
810 #ifdef DEBUG
811     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
812 #endif
813     reg_edx = p->p_retval[1];
814     error = pipe(p, 0);
815     if (error) {
816 	p->p_retval[1] = reg_edx;
817 	return error;
818     }
819 
820     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
821     if (error) {
822 	p->p_retval[1] = reg_edx;
823 	return error;
824     }
825 
826     p->p_retval[1] = reg_edx;
827     p->p_retval[0] = 0;
828     return 0;
829 }
830 
831 int
832 linux_time(struct proc *p, struct linux_time_args *args)
833 {
834     struct timeval tv;
835     linux_time_t tm;
836     int error;
837 
838 #ifdef DEBUG
839     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
840 #endif
841     microtime(&tv);
842     tm = tv.tv_sec;
843     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
844 	return error;
845     p->p_retval[0] = tm;
846     return 0;
847 }
848 
849 struct linux_times_argv {
850     long    tms_utime;
851     long    tms_stime;
852     long    tms_cutime;
853     long    tms_cstime;
854 };
855 
856 #define CLK_TCK 100	/* Linux uses 100 */
857 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
858 
859 int
860 linux_times(struct proc *p, struct linux_times_args *args)
861 {
862     struct timeval tv;
863     struct linux_times_argv tms;
864     struct rusage ru;
865     int error;
866 
867 #ifdef DEBUG
868     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
869 #endif
870     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
871 
872     tms.tms_utime = CONVTCK(ru.ru_utime);
873     tms.tms_stime = CONVTCK(ru.ru_stime);
874 
875     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
876     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
877 
878     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
879 	    	    sizeof(struct linux_times_argv))))
880 	return error;
881 
882     microuptime(&tv);
883     p->p_retval[0] = (int)CONVTCK(tv);
884     return 0;
885 }
886 
887 int
888 linux_newuname(struct proc *p, struct linux_newuname_args *args)
889 {
890 	struct linux_new_utsname utsname;
891 	char *osrelease, *osname;
892 
893 #ifdef DEBUG
894 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
895 #endif
896 
897 	osname = linux_get_osname(p);
898 	osrelease = linux_get_osrelease(p);
899 
900 	bzero(&utsname, sizeof(struct linux_new_utsname));
901 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
902 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
903 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
904 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
905 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
906 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
907 
908 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
909 			sizeof(struct linux_new_utsname)));
910 }
911 
912 struct linux_utimbuf {
913 	linux_time_t l_actime;
914 	linux_time_t l_modtime;
915 };
916 
917 int
918 linux_utime(struct proc *p, struct linux_utime_args *args)
919 {
920     struct utimes_args /* {
921 	char	*path;
922 	struct	timeval *tptr;
923     } */ bsdutimes;
924     struct timeval tv[2], *tvp;
925     struct linux_utimbuf lut;
926     int error;
927     caddr_t sg;
928 
929     sg = stackgap_init();
930     CHECKALTEXIST(p, &sg, args->fname);
931 
932 #ifdef DEBUG
933     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
934 #endif
935     if (args->times) {
936 	if ((error = copyin(args->times, &lut, sizeof lut)))
937 	    return error;
938 	tv[0].tv_sec = lut.l_actime;
939 	tv[0].tv_usec = 0;
940 	tv[1].tv_sec = lut.l_modtime;
941 	tv[1].tv_usec = 0;
942 	/* so that utimes can copyin */
943 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
944 	if ((error = copyout(tv, tvp, sizeof(tv))))
945 	    return error;
946 	bsdutimes.tptr = tvp;
947     } else
948 	bsdutimes.tptr = NULL;
949 
950     bsdutimes.path = args->fname;
951     return utimes(p, &bsdutimes);
952 }
953 
954 #define __WCLONE 0x80000000
955 
956 int
957 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
958 {
959     struct wait_args /* {
960 	int pid;
961 	int *status;
962 	int options;
963 	struct	rusage *rusage;
964     } */ tmp;
965     int error, tmpstat;
966 
967 #ifdef DEBUG
968     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
969 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
970 #endif
971     tmp.pid = args->pid;
972     tmp.status = args->status;
973     tmp.options = (args->options & (WNOHANG | WUNTRACED));
974     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
975     if (args->options & __WCLONE)
976 	tmp.options |= WLINUXCLONE;
977     tmp.rusage = NULL;
978 
979     if ((error = wait4(p, &tmp)) != 0)
980 	return error;
981 
982     if (args->status) {
983 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
984 	    return error;
985 	if (WIFSIGNALED(tmpstat))
986 	    tmpstat = (tmpstat & 0xffffff80) |
987 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
988 	else if (WIFSTOPPED(tmpstat))
989 	    tmpstat = (tmpstat & 0xffff00ff) |
990 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
991 	return copyout(&tmpstat, args->status, sizeof(int));
992     } else
993 	return 0;
994 }
995 
996 int
997 linux_wait4(struct proc *p, struct linux_wait4_args *args)
998 {
999     struct wait_args /* {
1000 	int pid;
1001 	int *status;
1002 	int options;
1003 	struct	rusage *rusage;
1004     } */ tmp;
1005     int error, tmpstat;
1006 
1007 #ifdef DEBUG
1008     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1009 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1010 	(void *)args->rusage);
1011 #endif
1012     tmp.pid = args->pid;
1013     tmp.status = args->status;
1014     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1015     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1016     if (args->options & __WCLONE)
1017 	tmp.options |= WLINUXCLONE;
1018     tmp.rusage = args->rusage;
1019 
1020     if ((error = wait4(p, &tmp)) != 0)
1021 	return error;
1022 
1023     SIGDELSET(p->p_siglist, SIGCHLD);
1024 
1025     if (args->status) {
1026 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1027 	    return error;
1028 	if (WIFSIGNALED(tmpstat))
1029 	    tmpstat = (tmpstat & 0xffffff80) |
1030 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1031 	else if (WIFSTOPPED(tmpstat))
1032 	    tmpstat = (tmpstat & 0xffff00ff) |
1033 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1034 	return copyout(&tmpstat, args->status, sizeof(int));
1035     } else
1036 	return 0;
1037 }
1038 
1039 int
1040 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1041 {
1042 	caddr_t sg;
1043 	struct mknod_args bsd_mknod;
1044 	struct mkfifo_args bsd_mkfifo;
1045 
1046 	sg = stackgap_init();
1047 
1048 	CHECKALTCREAT(p, &sg, args->path);
1049 
1050 #ifdef DEBUG
1051 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1052 	   (long)p->p_pid, args->path, args->mode, args->dev);
1053 #endif
1054 
1055 	if (args->mode & S_IFIFO) {
1056 		bsd_mkfifo.path = args->path;
1057 		bsd_mkfifo.mode = args->mode;
1058 		return mkfifo(p, &bsd_mkfifo);
1059 	} else {
1060 		bsd_mknod.path = args->path;
1061 		bsd_mknod.mode = args->mode;
1062 		bsd_mknod.dev = args->dev;
1063 		return mknod(p, &bsd_mknod);
1064 	}
1065 }
1066 
1067 /*
1068  * UGH! This is just about the dumbest idea I've ever heard!!
1069  */
1070 int
1071 linux_personality(struct proc *p, struct linux_personality_args *args)
1072 {
1073 #ifdef DEBUG
1074 	printf("Linux-emul(%ld): personality(%d)\n",
1075 	   (long)p->p_pid, args->per);
1076 #endif
1077 	if (args->per != 0)
1078 		return EINVAL;
1079 
1080 	/* Yes Jim, it's still a Linux... */
1081 	p->p_retval[0] = 0;
1082 	return 0;
1083 }
1084 
1085 /*
1086  * Wrappers for get/setitimer for debugging..
1087  */
1088 int
1089 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1090 {
1091 	struct setitimer_args bsa;
1092 	struct itimerval foo;
1093 	int error;
1094 
1095 #ifdef DEBUG
1096 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1097 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1098 #endif
1099 	bsa.which = args->which;
1100 	bsa.itv = args->itv;
1101 	bsa.oitv = args->oitv;
1102 	if (args->itv) {
1103 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1104 			sizeof(foo))))
1105 		return error;
1106 #ifdef DEBUG
1107 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1108 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1109 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1110 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1111 #endif
1112 	}
1113 	return setitimer(p, &bsa);
1114 }
1115 
1116 int
1117 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1118 {
1119 	struct getitimer_args bsa;
1120 #ifdef DEBUG
1121 	printf("Linux-emul(%ld): getitimer(%p)\n",
1122 	    (long)p->p_pid, (void *)args->itv);
1123 #endif
1124 	bsa.which = args->which;
1125 	bsa.itv = args->itv;
1126 	return getitimer(p, &bsa);
1127 }
1128 
1129 int
1130 linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1131 {
1132 	struct sysarch_args sa;
1133 	struct i386_ioperm_args *iia;
1134 	caddr_t sg;
1135 
1136 	sg = stackgap_init();
1137 	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1138 	iia->start = args->start;
1139 	iia->length = args->length;
1140 	iia->enable = args->enable;
1141 	sa.op = I386_SET_IOPERM;
1142 	sa.parms = (char *)iia;
1143 	return sysarch(p, &sa);
1144 }
1145 
1146 int
1147 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1148 {
1149 	int error;
1150 
1151 	if (args->level < 0 || args->level > 3)
1152 		return (EINVAL);
1153 	if ((error = suser(p)) != 0)
1154 		return (error);
1155 	if (securelevel > 0)
1156 		return (EPERM);
1157 	p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1158 		(args->level * (PSL_IOPL / 3));
1159 	return (0);
1160 }
1161 
1162 int
1163 linux_nice(struct proc *p, struct linux_nice_args *args)
1164 {
1165 	struct setpriority_args	bsd_args;
1166 
1167 	bsd_args.which = PRIO_PROCESS;
1168 	bsd_args.who = 0;	/* current process */
1169 	bsd_args.prio = args->inc;
1170 	return setpriority(p, &bsd_args);
1171 }
1172 
1173 int
1174 linux_setgroups(p, uap)
1175 	struct proc *p;
1176 	struct linux_setgroups_args *uap;
1177 {
1178 	struct pcred *pc;
1179 	linux_gid_t linux_gidset[NGROUPS];
1180 	gid_t *bsd_gidset;
1181 	int ngrp, error;
1182 
1183 	pc = p->p_cred;
1184 	ngrp = uap->gidsetsize;
1185 
1186 	/*
1187 	 * cr_groups[0] holds egid. Setting the whole set from
1188 	 * the supplied set will cause egid to be changed too.
1189 	 * Keep cr_groups[0] unchanged to prevent that.
1190 	 */
1191 
1192 	if ((error = suser(p)) != 0)
1193 		return (error);
1194 
1195 	if (ngrp >= NGROUPS)
1196 		return (EINVAL);
1197 
1198 	pc->pc_ucred = crcopy(pc->pc_ucred);
1199 	if (ngrp > 0) {
1200 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1201 			       ngrp * sizeof(linux_gid_t));
1202 		if (error)
1203 			return (error);
1204 
1205 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1206 
1207 		bsd_gidset = pc->pc_ucred->cr_groups;
1208 		ngrp--;
1209 		while (ngrp >= 0) {
1210 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1211 			ngrp--;
1212 		}
1213 	}
1214 	else
1215 		pc->pc_ucred->cr_ngroups = 1;
1216 
1217 	setsugid(p);
1218 	return (0);
1219 }
1220 
1221 int
1222 linux_getgroups(p, uap)
1223 	struct proc *p;
1224 	struct linux_getgroups_args *uap;
1225 {
1226 	struct pcred *pc;
1227 	linux_gid_t linux_gidset[NGROUPS];
1228 	gid_t *bsd_gidset;
1229 	int bsd_gidsetsz, ngrp, error;
1230 
1231 	pc = p->p_cred;
1232 	bsd_gidset = pc->pc_ucred->cr_groups;
1233 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1234 
1235 	/*
1236 	 * cr_groups[0] holds egid. Returning the whole set
1237 	 * here will cause a duplicate. Exclude cr_groups[0]
1238 	 * to prevent that.
1239 	 */
1240 
1241 	if ((ngrp = uap->gidsetsize) == 0) {
1242 		p->p_retval[0] = bsd_gidsetsz;
1243 		return (0);
1244 	}
1245 
1246 	if (ngrp < bsd_gidsetsz)
1247 		return (EINVAL);
1248 
1249 	ngrp = 0;
1250 	while (ngrp < bsd_gidsetsz) {
1251 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1252 		ngrp++;
1253 	}
1254 
1255 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1256 	    ngrp * sizeof(linux_gid_t))))
1257 		return (error);
1258 
1259 	p->p_retval[0] = ngrp;
1260 	return (0);
1261 }
1262 
1263 int
1264 linux_setrlimit(p, uap)
1265      struct proc *p;
1266      struct linux_setrlimit_args *uap;
1267 {
1268     struct osetrlimit_args bsd;
1269 
1270 #ifdef DEBUG
1271     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1272 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1273 #endif
1274 
1275     if (uap->resource >= LINUX_RLIM_NLIMITS)
1276 	return EINVAL;
1277 
1278     bsd.which = linux_to_bsd_resource[uap->resource];
1279 
1280     if (bsd.which == -1)
1281 	return EINVAL;
1282 
1283     bsd.rlp = uap->rlim;
1284     return osetrlimit(p, &bsd);
1285 }
1286 
1287 int
1288 linux_getrlimit(p, uap)
1289      struct proc *p;
1290      struct linux_getrlimit_args *uap;
1291 {
1292     struct ogetrlimit_args bsd;
1293 
1294 #ifdef DEBUG
1295     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1296 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1297 #endif
1298 
1299     if (uap->resource >= LINUX_RLIM_NLIMITS)
1300 	return EINVAL;
1301 
1302     bsd.which = linux_to_bsd_resource[uap->resource];
1303 
1304     if (bsd.which == -1)
1305 	return EINVAL;
1306 
1307     bsd.rlp = uap->rlim;
1308     return ogetrlimit(p, &bsd);
1309 }
1310 
1311 int
1312 linux_sched_setscheduler(p, uap)
1313 	struct proc *p;
1314 	struct linux_sched_setscheduler_args *uap;
1315 {
1316 	struct sched_setscheduler_args bsd;
1317 
1318 #ifdef DEBUG
1319 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1320 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1321 #endif
1322 
1323 	switch (uap->policy) {
1324 	case LINUX_SCHED_OTHER:
1325 		bsd.policy = SCHED_OTHER;
1326 		break;
1327 	case LINUX_SCHED_FIFO:
1328 		bsd.policy = SCHED_FIFO;
1329 		break;
1330 	case LINUX_SCHED_RR:
1331 		bsd.policy = SCHED_RR;
1332 		break;
1333 	default:
1334 		return EINVAL;
1335 	}
1336 
1337 	bsd.pid = uap->pid;
1338 	bsd.param = uap->param;
1339 	return sched_setscheduler(p, &bsd);
1340 }
1341 
1342 int
1343 linux_sched_getscheduler(p, uap)
1344 	struct proc *p;
1345 	struct linux_sched_getscheduler_args *uap;
1346 {
1347 	struct sched_getscheduler_args bsd;
1348 	int error;
1349 
1350 #ifdef DEBUG
1351 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1352 	       (long)p->p_pid, uap->pid);
1353 #endif
1354 
1355 	bsd.pid = uap->pid;
1356 	error = sched_getscheduler(p, &bsd);
1357 
1358 	switch (p->p_retval[0]) {
1359 	case SCHED_OTHER:
1360 		p->p_retval[0] = LINUX_SCHED_OTHER;
1361 		break;
1362 	case SCHED_FIFO:
1363 		p->p_retval[0] = LINUX_SCHED_FIFO;
1364 		break;
1365 	case SCHED_RR:
1366 		p->p_retval[0] = LINUX_SCHED_RR;
1367 		break;
1368 	}
1369 
1370 	return error;
1371 }
1372 
1373 struct linux_descriptor {
1374 	unsigned int  entry_number;
1375 	unsigned long base_addr;
1376 	unsigned int  limit;
1377 	unsigned int  seg_32bit:1;
1378 	unsigned int  contents:2;
1379 	unsigned int  read_exec_only:1;
1380 	unsigned int  limit_in_pages:1;
1381 	unsigned int  seg_not_present:1;
1382 	unsigned int  useable:1;
1383 };
1384 
1385 int
1386 linux_modify_ldt(p, uap)
1387 	struct proc *p;
1388 	struct linux_modify_ldt_args *uap;
1389 {
1390 	int error;
1391 	caddr_t sg;
1392 	struct sysarch_args args;
1393 	struct i386_ldt_args *ldt;
1394 	struct linux_descriptor ld;
1395 	union descriptor *desc;
1396 
1397 	sg = stackgap_init();
1398 
1399 	if (uap->ptr == NULL)
1400 		return (EINVAL);
1401 
1402 	switch (uap->func) {
1403 	case 0x00: /* read_ldt */
1404 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1405 		ldt->start = 0;
1406 		ldt->descs = uap->ptr;
1407 		ldt->num = uap->bytecount / sizeof(union descriptor);
1408 		args.op = I386_GET_LDT;
1409 		args.parms = (char*)ldt;
1410 		error = sysarch(p, &args);
1411 		p->p_retval[0] *= sizeof(union descriptor);
1412 		break;
1413 	case 0x01: /* write_ldt */
1414 	case 0x11: /* write_ldt */
1415 		if (uap->bytecount != sizeof(ld))
1416 			return (EINVAL);
1417 
1418 		error = copyin(uap->ptr, &ld, sizeof(ld));
1419 		if (error)
1420 			return (error);
1421 
1422 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1423 		desc = stackgap_alloc(&sg, sizeof(*desc));
1424 		ldt->start = ld.entry_number;
1425 		ldt->descs = desc;
1426 		ldt->num = 1;
1427 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1428 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1429 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1430 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1431 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1432 			(ld.contents << 2);
1433 		desc->sd.sd_dpl = 3;
1434 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1435 		desc->sd.sd_xx = 0;
1436 		desc->sd.sd_def32 = ld.seg_32bit;
1437 		desc->sd.sd_gran = ld.limit_in_pages;
1438 		args.op = I386_SET_LDT;
1439 		args.parms = (char*)ldt;
1440 		error = sysarch(p, &args);
1441 		break;
1442 	default:
1443 		error = EINVAL;
1444 		break;
1445 	}
1446 
1447 	if (error == EOPNOTSUPP) {
1448 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1449 		error = ENOSYS;
1450 	}
1451 
1452 	return (error);
1453 }
1454