xref: /freebsd/sys/compat/linux/linux_misc.c (revision 23f282aa31e9b6fceacd449020e936e98d6f2298)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 #include <machine/sysarch.h>
61 #include <machine/segments.h>
62 
63 #include <i386/linux/linux.h>
64 #include <i386/linux/linux_proto.h>
65 #include <i386/linux/linux_util.h>
66 #include <i386/linux/linux_mib.h>
67 
68 #include <posix4/sched.h>
69 
70 #define BSD_TO_LINUX_SIGNAL(sig)	\
71 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
72 
73 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
74 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
75   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
76   RLIMIT_MEMLOCK, -1
77 };
78 
79 int
80 linux_alarm(struct proc *p, struct linux_alarm_args *args)
81 {
82     struct itimerval it, old_it;
83     struct timeval tv;
84     int s;
85 
86 #ifdef DEBUG
87     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
88 #endif
89     if (args->secs > 100000000)
90 	return EINVAL;
91     it.it_value.tv_sec = (long)args->secs;
92     it.it_value.tv_usec = 0;
93     it.it_interval.tv_sec = 0;
94     it.it_interval.tv_usec = 0;
95     s = splsoftclock();
96     old_it = p->p_realtimer;
97     getmicrouptime(&tv);
98     if (timevalisset(&old_it.it_value))
99 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
100     if (it.it_value.tv_sec != 0) {
101 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
102 	timevaladd(&it.it_value, &tv);
103     }
104     p->p_realtimer = it;
105     splx(s);
106     if (timevalcmp(&old_it.it_value, &tv, >)) {
107 	timevalsub(&old_it.it_value, &tv);
108 	if (old_it.it_value.tv_usec != 0)
109 	    old_it.it_value.tv_sec++;
110 	p->p_retval[0] = old_it.it_value.tv_sec;
111     }
112     return 0;
113 }
114 
115 int
116 linux_brk(struct proc *p, struct linux_brk_args *args)
117 {
118 #if 0
119     struct vmspace *vm = p->p_vmspace;
120     vm_offset_t new, old;
121     int error;
122 
123     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
124 	return EINVAL;
125     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
126 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
127 	return ENOMEM;
128 
129     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
130     new = round_page((vm_offset_t)args->dsend);
131     p->p_retval[0] = old;
132     if ((new-old) > 0) {
133 	if (swap_pager_full)
134 	    return ENOMEM;
135 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
136 			VM_PROT_ALL, VM_PROT_ALL, 0);
137 	if (error)
138 	    return error;
139 	vm->vm_dsize += btoc((new-old));
140 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
141     }
142     return 0;
143 #else
144     struct vmspace *vm = p->p_vmspace;
145     vm_offset_t new, old;
146     struct obreak_args /* {
147 	char * nsize;
148     } */ tmp;
149 
150 #ifdef DEBUG
151     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
152 #endif
153     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
154     new = (vm_offset_t)args->dsend;
155     tmp.nsize = (char *) new;
156     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
157 	p->p_retval[0] = (int)new;
158     else
159 	p->p_retval[0] = (int)old;
160 
161     return 0;
162 #endif
163 }
164 
165 int
166 linux_uselib(struct proc *p, struct linux_uselib_args *args)
167 {
168     struct nameidata ni;
169     struct vnode *vp;
170     struct exec *a_out;
171     struct vattr attr;
172     vm_offset_t vmaddr;
173     unsigned long file_offset;
174     vm_offset_t buffer;
175     unsigned long bss_size;
176     int error;
177     caddr_t sg;
178     int locked;
179 
180     sg = stackgap_init();
181     CHECKALTEXIST(p, &sg, args->library);
182 
183 #ifdef DEBUG
184     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
185 #endif
186 
187     a_out = NULL;
188     locked = 0;
189     vp = NULL;
190 
191     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
192     error = namei(&ni);
193     if (error)
194 	goto cleanup;
195 
196     vp = ni.ni_vp;
197     /*
198      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
199      * without returning a vnode.
200      */
201     if (vp == NULL) {
202 	error = ENOEXEC;	/* ?? */
203 	goto cleanup;
204     }
205     NDFREE(&ni, NDF_ONLY_PNBUF);
206 
207     /*
208      * From here on down, we have a locked vnode that must be unlocked.
209      */
210     locked++;
211 
212     /*
213      * Writable?
214      */
215     if (vp->v_writecount) {
216 	error = ETXTBSY;
217 	goto cleanup;
218     }
219 
220     /*
221      * Executable?
222      */
223     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
224     if (error)
225 	goto cleanup;
226 
227     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
228 	((attr.va_mode & 0111) == 0) ||
229 	(attr.va_type != VREG)) {
230 	    error = ENOEXEC;
231 	    goto cleanup;
232     }
233 
234     /*
235      * Sensible size?
236      */
237     if (attr.va_size == 0) {
238 	error = ENOEXEC;
239 	goto cleanup;
240     }
241 
242     /*
243      * Can we access it?
244      */
245     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
246     if (error)
247 	goto cleanup;
248 
249     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
250     if (error)
251 	goto cleanup;
252 
253     /*
254      * Lock no longer needed
255      */
256     VOP_UNLOCK(vp, 0, p);
257     locked = 0;
258 
259     /*
260      * Pull in executable header into kernel_map
261      */
262     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
263 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
264     if (error)
265 	goto cleanup;
266 
267     /*
268      * Is it a Linux binary ?
269      */
270     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
271 	error = ENOEXEC;
272 	goto cleanup;
273     }
274 
275     /* While we are here, we should REALLY do some more checks */
276 
277     /*
278      * Set file/virtual offset based on a.out variant.
279      */
280     switch ((int)(a_out->a_magic & 0xffff)) {
281     case 0413:	/* ZMAGIC */
282 	file_offset = 1024;
283 	break;
284     case 0314:	/* QMAGIC */
285 	file_offset = 0;
286 	break;
287     default:
288 	error = ENOEXEC;
289 	goto cleanup;
290     }
291 
292     bss_size = round_page(a_out->a_bss);
293 
294     /*
295      * Check various fields in header for validity/bounds.
296      */
297     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
298 	error = ENOEXEC;
299 	goto cleanup;
300     }
301 
302     /* text + data can't exceed file size */
303     if (a_out->a_data + a_out->a_text > attr.va_size) {
304 	error = EFAULT;
305 	goto cleanup;
306     }
307 
308     /*
309      * text/data/bss must not exceed limits
310      * XXX: this is not complete. it should check current usage PLUS
311      * the resources needed by this library.
312      */
313     if (a_out->a_text > MAXTSIZ ||
314 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
315 	error = ENOMEM;
316 	goto cleanup;
317     }
318 
319     /*
320      * prevent more writers
321      */
322     vp->v_flag |= VTEXT;
323 
324     /*
325      * Check if file_offset page aligned,.
326      * Currently we cannot handle misalinged file offsets,
327      * and so we read in the entire image (what a waste).
328      */
329     if (file_offset & PAGE_MASK) {
330 #ifdef DEBUG
331 printf("uselib: Non page aligned binary %lu\n", file_offset);
332 #endif
333 	/*
334 	 * Map text+data read/write/execute
335 	 */
336 
337 	/* a_entry is the load address and is page aligned */
338 	vmaddr = trunc_page(a_out->a_entry);
339 
340 	/* get anon user mapping, read+write+execute */
341 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
342 		    	    a_out->a_text + a_out->a_data, FALSE,
343 			    VM_PROT_ALL, VM_PROT_ALL, 0);
344 	if (error)
345 	    goto cleanup;
346 
347 	/* map file into kernel_map */
348 	error = vm_mmap(kernel_map, &buffer,
349 			round_page(a_out->a_text + a_out->a_data + file_offset),
350 		   	VM_PROT_READ, VM_PROT_READ, 0,
351 			(caddr_t)vp, trunc_page(file_offset));
352 	if (error)
353 	    goto cleanup;
354 
355 	/* copy from kernel VM space to user space */
356 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
357 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
358 
359 	/* release temporary kernel space */
360 	vm_map_remove(kernel_map, buffer,
361 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
362 
363 	if (error)
364 	    goto cleanup;
365     }
366     else {
367 #ifdef DEBUG
368 printf("uselib: Page aligned binary %lu\n", file_offset);
369 #endif
370 	/*
371 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
372 	 * to skip the executable header
373 	 */
374 	vmaddr = trunc_page(a_out->a_entry);
375 
376 	/*
377 	 * Map it all into the process's space as a single copy-on-write
378 	 * "data" segment.
379 	 */
380 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
381 		   	a_out->a_text + a_out->a_data,
382 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
383 			(caddr_t)vp, file_offset);
384 	if (error)
385 	    goto cleanup;
386     }
387 #ifdef DEBUG
388 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
389 #endif
390     if (bss_size != 0) {
391         /*
392 	 * Calculate BSS start address
393 	 */
394 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
395 
396 	/*
397 	 * allocate some 'anon' space
398 	 */
399 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
400 			    bss_size, FALSE,
401 			    VM_PROT_ALL, VM_PROT_ALL, 0);
402 	if (error)
403 	    goto cleanup;
404     }
405 
406 cleanup:
407     /*
408      * Unlock vnode if needed
409      */
410     if (locked)
411 	VOP_UNLOCK(vp, 0, p);
412 
413     /*
414      * Release the kernel mapping.
415      */
416     if (a_out)
417 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
418 
419     return error;
420 }
421 
422 /* XXX move */
423 struct linux_select_argv {
424 	int nfds;
425 	fd_set *readfds;
426 	fd_set *writefds;
427 	fd_set *exceptfds;
428 	struct timeval *timeout;
429 };
430 
431 int
432 linux_select(struct proc *p, struct linux_select_args *args)
433 {
434     struct linux_select_argv linux_args;
435     struct linux_newselect_args newsel;
436     int error;
437 
438 #ifdef SELECT_DEBUG
439     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
440 #endif
441     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
442 			sizeof(linux_args))))
443 	return error;
444 
445     newsel.nfds = linux_args.nfds;
446     newsel.readfds = linux_args.readfds;
447     newsel.writefds = linux_args.writefds;
448     newsel.exceptfds = linux_args.exceptfds;
449     newsel.timeout = linux_args.timeout;
450 
451     return linux_newselect(p, &newsel);
452 }
453 
454 int
455 linux_newselect(struct proc *p, struct linux_newselect_args *args)
456 {
457     struct select_args bsa;
458     struct timeval tv0, tv1, utv, *tvp;
459     caddr_t sg;
460     int error;
461 
462 #ifdef DEBUG
463     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
464   	(long)p->p_pid, args->nfds, (void *)args->readfds,
465 	(void *)args->writefds, (void *)args->exceptfds,
466 	(void *)args->timeout);
467 #endif
468     error = 0;
469     bsa.nd = args->nfds;
470     bsa.in = args->readfds;
471     bsa.ou = args->writefds;
472     bsa.ex = args->exceptfds;
473     bsa.tv = args->timeout;
474 
475     /*
476      * Store current time for computation of the amount of
477      * time left.
478      */
479     if (args->timeout) {
480 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
481 	    goto select_out;
482 #ifdef DEBUG
483 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
484 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
485 #endif
486 	if (itimerfix(&utv)) {
487 	    /*
488 	     * The timeval was invalid.  Convert it to something
489 	     * valid that will act as it does under Linux.
490 	     */
491 	    sg = stackgap_init();
492 	    tvp = stackgap_alloc(&sg, sizeof(utv));
493 	    utv.tv_sec += utv.tv_usec / 1000000;
494 	    utv.tv_usec %= 1000000;
495 	    if (utv.tv_usec < 0) {
496 		utv.tv_sec -= 1;
497 		utv.tv_usec += 1000000;
498 	    }
499 	    if (utv.tv_sec < 0)
500 		timevalclear(&utv);
501 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
502 		goto select_out;
503 	    bsa.tv = tvp;
504 	}
505 	microtime(&tv0);
506     }
507 
508     error = select(p, &bsa);
509 #ifdef DEBUG
510     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
511 #endif
512 
513     if (error) {
514 	/*
515 	 * See fs/select.c in the Linux kernel.  Without this,
516 	 * Maelstrom doesn't work.
517 	 */
518 	if (error == ERESTART)
519 	    error = EINTR;
520 	goto select_out;
521     }
522 
523     if (args->timeout) {
524 	if (p->p_retval[0]) {
525 	    /*
526 	     * Compute how much time was left of the timeout,
527 	     * by subtracting the current time and the time
528 	     * before we started the call, and subtracting
529 	     * that result from the user-supplied value.
530 	     */
531 	    microtime(&tv1);
532 	    timevalsub(&tv1, &tv0);
533 	    timevalsub(&utv, &tv1);
534 	    if (utv.tv_sec < 0)
535 		timevalclear(&utv);
536 	} else
537 	    timevalclear(&utv);
538 #ifdef DEBUG
539 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
540 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
541 #endif
542 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
543 	    goto select_out;
544     }
545 
546 select_out:
547 #ifdef DEBUG
548     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
549 #endif
550     return error;
551 }
552 
553 int
554 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
555 {
556     struct proc *curp;
557 
558 #ifdef DEBUG
559     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
560 #endif
561     if (args->pid != p->p_pid) {
562 	if (!(curp = pfind(args->pid)))
563 	    return ESRCH;
564     }
565     else
566 	curp = p;
567     p->p_retval[0] = curp->p_pgid;
568     return 0;
569 }
570 
571 int
572 linux_fork(struct proc *p, struct linux_fork_args *args)
573 {
574     int error;
575 
576 #ifdef DEBUG
577     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
578 #endif
579     if ((error = fork(p, (struct fork_args *)args)) != 0)
580 	return error;
581     if (p->p_retval[1] == 1)
582 	p->p_retval[0] = 0;
583     return 0;
584 }
585 
586 int
587 linux_vfork(struct proc *p, struct linux_vfork_args *args)
588 {
589 	int error;
590 
591 #ifdef DEBUG
592 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
593 #endif
594 
595 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
596 		return error;
597 	/* Are we the child? */
598 	if (p->p_retval[1] == 1)
599 		p->p_retval[0] = 0;
600 	return 0;
601 }
602 
603 #define CLONE_VM	0x100
604 #define CLONE_FS	0x200
605 #define CLONE_FILES	0x400
606 #define CLONE_SIGHAND	0x800
607 #define CLONE_PID	0x1000
608 
609 int
610 linux_clone(struct proc *p, struct linux_clone_args *args)
611 {
612     int error, ff = RFPROC;
613     struct proc *p2;
614     int            exit_signal;
615     vm_offset_t    start;
616     struct rfork_args rf_args;
617 
618 #ifdef DEBUG
619     if (args->flags & CLONE_PID)
620 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
621 	       (long)p->p_pid);
622     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
623 	   (long)p->p_pid, (unsigned int)args->flags,
624 	   (unsigned int)args->stack);
625 #endif
626 
627     if (!args->stack)
628         return (EINVAL);
629 
630     exit_signal = args->flags & 0x000000ff;
631     if (exit_signal >= LINUX_NSIG)
632 	return EINVAL;
633 
634     if (exit_signal <= LINUX_SIGTBLSZ)
635 	exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
636 
637     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
638     ff |= RFTHREAD;
639 
640     if (args->flags & CLONE_VM)
641 	ff |= RFMEM;
642     if (args->flags & CLONE_SIGHAND)
643 	ff |= RFSIGSHARE;
644     if (!(args->flags & CLONE_FILES))
645 	ff |= RFFDG;
646 
647     error = 0;
648     start = 0;
649 
650     rf_args.flags = ff;
651     if ((error = rfork(p, &rf_args)) != 0)
652 	return error;
653 
654     p2 = pfind(p->p_retval[0]);
655     if (p2 == 0)
656  	return ESRCH;
657 
658     p2->p_sigparent = exit_signal;
659     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
660 
661 #ifdef DEBUG
662     printf ("linux_clone(%ld): successful rfork to %ld\n",
663 	    (long)p->p_pid, (long)p2->p_pid);
664 #endif
665     return 0;
666 }
667 
668 /* XXX move */
669 struct linux_mmap_argv {
670 	linux_caddr_t addr;
671 	int len;
672 	int prot;
673 	int flags;
674 	int fd;
675 	int pos;
676 };
677 
678 #define STACK_SIZE  (2 * 1024 * 1024)
679 #define GUARD_SIZE  (4 * PAGE_SIZE)
680 int
681 linux_mmap(struct proc *p, struct linux_mmap_args *args)
682 {
683     struct mmap_args /* {
684 	caddr_t addr;
685 	size_t len;
686 	int prot;
687 	int flags;
688 	int fd;
689 	long pad;
690 	off_t pos;
691     } */ bsd_args;
692     int error;
693     struct linux_mmap_argv linux_args;
694 
695     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
696 			sizeof(linux_args))))
697 	return error;
698 #ifdef DEBUG
699     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
700 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
701 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
702 #endif
703     bsd_args.flags = 0;
704     if (linux_args.flags & LINUX_MAP_SHARED)
705 	bsd_args.flags |= MAP_SHARED;
706     if (linux_args.flags & LINUX_MAP_PRIVATE)
707 	bsd_args.flags |= MAP_PRIVATE;
708     if (linux_args.flags & LINUX_MAP_FIXED)
709 	bsd_args.flags |= MAP_FIXED;
710     if (linux_args.flags & LINUX_MAP_ANON)
711 	bsd_args.flags |= MAP_ANON;
712     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
713 	bsd_args.flags |= MAP_STACK;
714 
715 	/* The linux MAP_GROWSDOWN option does not limit auto
716 	 * growth of the region.  Linux mmap with this option
717 	 * takes as addr the inital BOS, and as len, the initial
718 	 * region size.  It can then grow down from addr without
719 	 * limit.  However, linux threads has an implicit internal
720 	 * limit to stack size of STACK_SIZE.  Its just not
721 	 * enforced explicitly in linux.  But, here we impose
722 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
723 	 * region, since we can do this with our mmap.
724 	 *
725 	 * Our mmap with MAP_STACK takes addr as the maximum
726 	 * downsize limit on BOS, and as len the max size of
727 	 * the region.  It them maps the top SGROWSIZ bytes,
728 	 * and autgrows the region down, up to the limit
729 	 * in addr.
730 	 *
731 	 * If we don't use the MAP_STACK option, the effect
732 	 * of this code is to allocate a stack region of a
733 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
734 	 */
735 
736 	/* This gives us TOS */
737 	bsd_args.addr = linux_args.addr + linux_args.len;
738 
739 	/* This gives us our maximum stack size */
740 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
741 	    bsd_args.len = linux_args.len;
742 	else
743 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
744 
745 	/* This gives us a new BOS.  If we're using VM_STACK, then
746 	 * mmap will just map the top SGROWSIZ bytes, and let
747 	 * the stack grow down to the limit at BOS.  If we're
748 	 * not using VM_STACK we map the full stack, since we
749 	 * don't have a way to autogrow it.
750 	 */
751 	bsd_args.addr -= bsd_args.len;
752 
753     } else {
754 	bsd_args.addr = linux_args.addr;
755 	bsd_args.len  = linux_args.len;
756     }
757 
758     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
759     bsd_args.fd = linux_args.fd;
760     bsd_args.pos = linux_args.pos;
761     bsd_args.pad = 0;
762     return mmap(p, &bsd_args);
763 }
764 
765 int
766 linux_mremap(struct proc *p, struct linux_mremap_args *args)
767 {
768 	struct munmap_args /* {
769 		void *addr;
770 		size_t len;
771 	} */ bsd_args;
772 	int error = 0;
773 
774 #ifdef DEBUG
775 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
776 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
777 	    args->flags);
778 #endif
779 	args->new_len = round_page(args->new_len);
780 	args->old_len = round_page(args->old_len);
781 
782 	if (args->new_len > args->old_len) {
783 		p->p_retval[0] = 0;
784 		return ENOMEM;
785 	}
786 
787 	if (args->new_len < args->old_len) {
788 		bsd_args.addr = args->addr + args->new_len;
789 		bsd_args.len = args->old_len - args->new_len;
790 		error = munmap(p, &bsd_args);
791 	}
792 
793 	p->p_retval[0] = error ? 0 : (int)args->addr;
794 	return error;
795 }
796 
797 int
798 linux_msync(struct proc *p, struct linux_msync_args *args)
799 {
800 	struct msync_args bsd_args;
801 
802 	bsd_args.addr = args->addr;
803 	bsd_args.len = args->len;
804 	bsd_args.flags = 0;	/* XXX ignore */
805 
806 	return msync(p, &bsd_args);
807 }
808 
809 int
810 linux_pipe(struct proc *p, struct linux_pipe_args *args)
811 {
812     int error;
813     int reg_edx;
814 
815 #ifdef DEBUG
816     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
817 #endif
818     reg_edx = p->p_retval[1];
819     error = pipe(p, 0);
820     if (error) {
821 	p->p_retval[1] = reg_edx;
822 	return error;
823     }
824 
825     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
826     if (error) {
827 	p->p_retval[1] = reg_edx;
828 	return error;
829     }
830 
831     p->p_retval[1] = reg_edx;
832     p->p_retval[0] = 0;
833     return 0;
834 }
835 
836 int
837 linux_time(struct proc *p, struct linux_time_args *args)
838 {
839     struct timeval tv;
840     linux_time_t tm;
841     int error;
842 
843 #ifdef DEBUG
844     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
845 #endif
846     microtime(&tv);
847     tm = tv.tv_sec;
848     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
849 	return error;
850     p->p_retval[0] = tm;
851     return 0;
852 }
853 
854 struct linux_times_argv {
855     long    tms_utime;
856     long    tms_stime;
857     long    tms_cutime;
858     long    tms_cstime;
859 };
860 
861 #define CLK_TCK 100	/* Linux uses 100 */
862 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
863 
864 int
865 linux_times(struct proc *p, struct linux_times_args *args)
866 {
867     struct timeval tv;
868     struct linux_times_argv tms;
869     struct rusage ru;
870     int error;
871 
872 #ifdef DEBUG
873     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
874 #endif
875     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
876 
877     tms.tms_utime = CONVTCK(ru.ru_utime);
878     tms.tms_stime = CONVTCK(ru.ru_stime);
879 
880     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
881     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
882 
883     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
884 	    	    sizeof(struct linux_times_argv))))
885 	return error;
886 
887     microuptime(&tv);
888     p->p_retval[0] = (int)CONVTCK(tv);
889     return 0;
890 }
891 
892 int
893 linux_newuname(struct proc *p, struct linux_newuname_args *args)
894 {
895 	struct linux_new_utsname utsname;
896 	char *osrelease, *osname;
897 
898 #ifdef DEBUG
899 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
900 #endif
901 
902 	osname = linux_get_osname(p);
903 	osrelease = linux_get_osrelease(p);
904 
905 	bzero(&utsname, sizeof(struct linux_new_utsname));
906 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
907 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
908 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
909 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
910 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
911 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
912 
913 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
914 			sizeof(struct linux_new_utsname)));
915 }
916 
917 struct linux_utimbuf {
918 	linux_time_t l_actime;
919 	linux_time_t l_modtime;
920 };
921 
922 int
923 linux_utime(struct proc *p, struct linux_utime_args *args)
924 {
925     struct utimes_args /* {
926 	char	*path;
927 	struct	timeval *tptr;
928     } */ bsdutimes;
929     struct timeval tv[2], *tvp;
930     struct linux_utimbuf lut;
931     int error;
932     caddr_t sg;
933 
934     sg = stackgap_init();
935     CHECKALTEXIST(p, &sg, args->fname);
936 
937 #ifdef DEBUG
938     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
939 #endif
940     if (args->times) {
941 	if ((error = copyin(args->times, &lut, sizeof lut)))
942 	    return error;
943 	tv[0].tv_sec = lut.l_actime;
944 	tv[0].tv_usec = 0;
945 	tv[1].tv_sec = lut.l_modtime;
946 	tv[1].tv_usec = 0;
947 	/* so that utimes can copyin */
948 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
949 	if ((error = copyout(tv, tvp, sizeof(tv))))
950 	    return error;
951 	bsdutimes.tptr = tvp;
952     } else
953 	bsdutimes.tptr = NULL;
954 
955     bsdutimes.path = args->fname;
956     return utimes(p, &bsdutimes);
957 }
958 
959 #define __WCLONE 0x80000000
960 
961 int
962 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
963 {
964     struct wait_args /* {
965 	int pid;
966 	int *status;
967 	int options;
968 	struct	rusage *rusage;
969     } */ tmp;
970     int error, tmpstat;
971 
972 #ifdef DEBUG
973     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
974 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
975 #endif
976     tmp.pid = args->pid;
977     tmp.status = args->status;
978     tmp.options = (args->options & (WNOHANG | WUNTRACED));
979     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
980     if (args->options & __WCLONE)
981 	tmp.options |= WLINUXCLONE;
982     tmp.rusage = NULL;
983 
984     if ((error = wait4(p, &tmp)) != 0)
985 	return error;
986 
987     if (args->status) {
988 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
989 	    return error;
990 	tmpstat &= 0xffff;
991 	if (WIFSIGNALED(tmpstat))
992 	    tmpstat = (tmpstat & 0xffffff80) |
993 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
994 	else if (WIFSTOPPED(tmpstat))
995 	    tmpstat = (tmpstat & 0xffff00ff) |
996 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
997 	return copyout(&tmpstat, args->status, sizeof(int));
998     } else
999 	return 0;
1000 }
1001 
1002 int
1003 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1004 {
1005     struct wait_args /* {
1006 	int pid;
1007 	int *status;
1008 	int options;
1009 	struct	rusage *rusage;
1010     } */ tmp;
1011     int error, tmpstat;
1012 
1013 #ifdef DEBUG
1014     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1015 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1016 	(void *)args->rusage);
1017 #endif
1018     tmp.pid = args->pid;
1019     tmp.status = args->status;
1020     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1021     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1022     if (args->options & __WCLONE)
1023 	tmp.options |= WLINUXCLONE;
1024     tmp.rusage = args->rusage;
1025 
1026     if ((error = wait4(p, &tmp)) != 0)
1027 	return error;
1028 
1029     SIGDELSET(p->p_siglist, SIGCHLD);
1030 
1031     if (args->status) {
1032 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1033 	    return error;
1034 	tmpstat &= 0xffff;
1035 	if (WIFSIGNALED(tmpstat))
1036 	    tmpstat = (tmpstat & 0xffffff80) |
1037 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1038 	else if (WIFSTOPPED(tmpstat))
1039 	    tmpstat = (tmpstat & 0xffff00ff) |
1040 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1041 	return copyout(&tmpstat, args->status, sizeof(int));
1042     } else
1043 	return 0;
1044 }
1045 
1046 int
1047 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1048 {
1049 	caddr_t sg;
1050 	struct mknod_args bsd_mknod;
1051 	struct mkfifo_args bsd_mkfifo;
1052 
1053 	sg = stackgap_init();
1054 
1055 	CHECKALTCREAT(p, &sg, args->path);
1056 
1057 #ifdef DEBUG
1058 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1059 	   (long)p->p_pid, args->path, args->mode, args->dev);
1060 #endif
1061 
1062 	if (args->mode & S_IFIFO) {
1063 		bsd_mkfifo.path = args->path;
1064 		bsd_mkfifo.mode = args->mode;
1065 		return mkfifo(p, &bsd_mkfifo);
1066 	} else {
1067 		bsd_mknod.path = args->path;
1068 		bsd_mknod.mode = args->mode;
1069 		bsd_mknod.dev = args->dev;
1070 		return mknod(p, &bsd_mknod);
1071 	}
1072 }
1073 
1074 /*
1075  * UGH! This is just about the dumbest idea I've ever heard!!
1076  */
1077 int
1078 linux_personality(struct proc *p, struct linux_personality_args *args)
1079 {
1080 #ifdef DEBUG
1081 	printf("Linux-emul(%ld): personality(%d)\n",
1082 	   (long)p->p_pid, args->per);
1083 #endif
1084 	if (args->per != 0)
1085 		return EINVAL;
1086 
1087 	/* Yes Jim, it's still a Linux... */
1088 	p->p_retval[0] = 0;
1089 	return 0;
1090 }
1091 
1092 /*
1093  * Wrappers for get/setitimer for debugging..
1094  */
1095 int
1096 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1097 {
1098 	struct setitimer_args bsa;
1099 	struct itimerval foo;
1100 	int error;
1101 
1102 #ifdef DEBUG
1103 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1104 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1105 #endif
1106 	bsa.which = args->which;
1107 	bsa.itv = args->itv;
1108 	bsa.oitv = args->oitv;
1109 	if (args->itv) {
1110 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1111 			sizeof(foo))))
1112 		return error;
1113 #ifdef DEBUG
1114 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1115 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1116 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1117 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1118 #endif
1119 	}
1120 	return setitimer(p, &bsa);
1121 }
1122 
1123 int
1124 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1125 {
1126 	struct getitimer_args bsa;
1127 #ifdef DEBUG
1128 	printf("Linux-emul(%ld): getitimer(%p)\n",
1129 	    (long)p->p_pid, (void *)args->itv);
1130 #endif
1131 	bsa.which = args->which;
1132 	bsa.itv = args->itv;
1133 	return getitimer(p, &bsa);
1134 }
1135 
1136 int
1137 linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1138 {
1139 	struct sysarch_args sa;
1140 	struct i386_ioperm_args *iia;
1141 	caddr_t sg;
1142 
1143 	sg = stackgap_init();
1144 	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1145 	iia->start = args->start;
1146 	iia->length = args->length;
1147 	iia->enable = args->enable;
1148 	sa.op = I386_SET_IOPERM;
1149 	sa.parms = (char *)iia;
1150 	return sysarch(p, &sa);
1151 }
1152 
1153 int
1154 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1155 {
1156 	int error;
1157 
1158 	if (args->level < 0 || args->level > 3)
1159 		return (EINVAL);
1160 	if ((error = suser(p)) != 0)
1161 		return (error);
1162 	if (securelevel > 0)
1163 		return (EPERM);
1164 	p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1165 		(args->level * (PSL_IOPL / 3));
1166 	return (0);
1167 }
1168 
1169 int
1170 linux_nice(struct proc *p, struct linux_nice_args *args)
1171 {
1172 	struct setpriority_args	bsd_args;
1173 
1174 	bsd_args.which = PRIO_PROCESS;
1175 	bsd_args.who = 0;	/* current process */
1176 	bsd_args.prio = args->inc;
1177 	return setpriority(p, &bsd_args);
1178 }
1179 
1180 int
1181 linux_setgroups(p, uap)
1182 	struct proc *p;
1183 	struct linux_setgroups_args *uap;
1184 {
1185 	struct pcred *pc;
1186 	linux_gid_t linux_gidset[NGROUPS];
1187 	gid_t *bsd_gidset;
1188 	int ngrp, error;
1189 
1190 	pc = p->p_cred;
1191 	ngrp = uap->gidsetsize;
1192 
1193 	/*
1194 	 * cr_groups[0] holds egid. Setting the whole set from
1195 	 * the supplied set will cause egid to be changed too.
1196 	 * Keep cr_groups[0] unchanged to prevent that.
1197 	 */
1198 
1199 	if ((error = suser(p)) != 0)
1200 		return (error);
1201 
1202 	if (ngrp >= NGROUPS)
1203 		return (EINVAL);
1204 
1205 	pc->pc_ucred = crcopy(pc->pc_ucred);
1206 	if (ngrp > 0) {
1207 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1208 			       ngrp * sizeof(linux_gid_t));
1209 		if (error)
1210 			return (error);
1211 
1212 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1213 
1214 		bsd_gidset = pc->pc_ucred->cr_groups;
1215 		ngrp--;
1216 		while (ngrp >= 0) {
1217 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1218 			ngrp--;
1219 		}
1220 	}
1221 	else
1222 		pc->pc_ucred->cr_ngroups = 1;
1223 
1224 	setsugid(p);
1225 	return (0);
1226 }
1227 
1228 int
1229 linux_getgroups(p, uap)
1230 	struct proc *p;
1231 	struct linux_getgroups_args *uap;
1232 {
1233 	struct pcred *pc;
1234 	linux_gid_t linux_gidset[NGROUPS];
1235 	gid_t *bsd_gidset;
1236 	int bsd_gidsetsz, ngrp, error;
1237 
1238 	pc = p->p_cred;
1239 	bsd_gidset = pc->pc_ucred->cr_groups;
1240 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1241 
1242 	/*
1243 	 * cr_groups[0] holds egid. Returning the whole set
1244 	 * here will cause a duplicate. Exclude cr_groups[0]
1245 	 * to prevent that.
1246 	 */
1247 
1248 	if ((ngrp = uap->gidsetsize) == 0) {
1249 		p->p_retval[0] = bsd_gidsetsz;
1250 		return (0);
1251 	}
1252 
1253 	if (ngrp < bsd_gidsetsz)
1254 		return (EINVAL);
1255 
1256 	ngrp = 0;
1257 	while (ngrp < bsd_gidsetsz) {
1258 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1259 		ngrp++;
1260 	}
1261 
1262 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1263 	    ngrp * sizeof(linux_gid_t))))
1264 		return (error);
1265 
1266 	p->p_retval[0] = ngrp;
1267 	return (0);
1268 }
1269 
1270 int
1271 linux_setrlimit(p, uap)
1272      struct proc *p;
1273      struct linux_setrlimit_args *uap;
1274 {
1275     struct osetrlimit_args bsd;
1276 
1277 #ifdef DEBUG
1278     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1279 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1280 #endif
1281 
1282     if (uap->resource >= LINUX_RLIM_NLIMITS)
1283 	return EINVAL;
1284 
1285     bsd.which = linux_to_bsd_resource[uap->resource];
1286 
1287     if (bsd.which == -1)
1288 	return EINVAL;
1289 
1290     bsd.rlp = uap->rlim;
1291     return osetrlimit(p, &bsd);
1292 }
1293 
1294 int
1295 linux_getrlimit(p, uap)
1296      struct proc *p;
1297      struct linux_getrlimit_args *uap;
1298 {
1299     struct ogetrlimit_args bsd;
1300 
1301 #ifdef DEBUG
1302     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1303 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1304 #endif
1305 
1306     if (uap->resource >= LINUX_RLIM_NLIMITS)
1307 	return EINVAL;
1308 
1309     bsd.which = linux_to_bsd_resource[uap->resource];
1310 
1311     if (bsd.which == -1)
1312 	return EINVAL;
1313 
1314     bsd.rlp = uap->rlim;
1315     return ogetrlimit(p, &bsd);
1316 }
1317 
1318 int
1319 linux_sched_setscheduler(p, uap)
1320 	struct proc *p;
1321 	struct linux_sched_setscheduler_args *uap;
1322 {
1323 	struct sched_setscheduler_args bsd;
1324 
1325 #ifdef DEBUG
1326 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1327 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1328 #endif
1329 
1330 	switch (uap->policy) {
1331 	case LINUX_SCHED_OTHER:
1332 		bsd.policy = SCHED_OTHER;
1333 		break;
1334 	case LINUX_SCHED_FIFO:
1335 		bsd.policy = SCHED_FIFO;
1336 		break;
1337 	case LINUX_SCHED_RR:
1338 		bsd.policy = SCHED_RR;
1339 		break;
1340 	default:
1341 		return EINVAL;
1342 	}
1343 
1344 	bsd.pid = uap->pid;
1345 	bsd.param = uap->param;
1346 	return sched_setscheduler(p, &bsd);
1347 }
1348 
1349 int
1350 linux_sched_getscheduler(p, uap)
1351 	struct proc *p;
1352 	struct linux_sched_getscheduler_args *uap;
1353 {
1354 	struct sched_getscheduler_args bsd;
1355 	int error;
1356 
1357 #ifdef DEBUG
1358 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1359 	       (long)p->p_pid, uap->pid);
1360 #endif
1361 
1362 	bsd.pid = uap->pid;
1363 	error = sched_getscheduler(p, &bsd);
1364 
1365 	switch (p->p_retval[0]) {
1366 	case SCHED_OTHER:
1367 		p->p_retval[0] = LINUX_SCHED_OTHER;
1368 		break;
1369 	case SCHED_FIFO:
1370 		p->p_retval[0] = LINUX_SCHED_FIFO;
1371 		break;
1372 	case SCHED_RR:
1373 		p->p_retval[0] = LINUX_SCHED_RR;
1374 		break;
1375 	}
1376 
1377 	return error;
1378 }
1379 
1380 struct linux_descriptor {
1381 	unsigned int  entry_number;
1382 	unsigned long base_addr;
1383 	unsigned int  limit;
1384 	unsigned int  seg_32bit:1;
1385 	unsigned int  contents:2;
1386 	unsigned int  read_exec_only:1;
1387 	unsigned int  limit_in_pages:1;
1388 	unsigned int  seg_not_present:1;
1389 	unsigned int  useable:1;
1390 };
1391 
1392 int
1393 linux_modify_ldt(p, uap)
1394 	struct proc *p;
1395 	struct linux_modify_ldt_args *uap;
1396 {
1397 	int error;
1398 	caddr_t sg;
1399 	struct sysarch_args args;
1400 	struct i386_ldt_args *ldt;
1401 	struct linux_descriptor ld;
1402 	union descriptor *desc;
1403 
1404 	sg = stackgap_init();
1405 
1406 	if (uap->ptr == NULL)
1407 		return (EINVAL);
1408 
1409 	switch (uap->func) {
1410 	case 0x00: /* read_ldt */
1411 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1412 		ldt->start = 0;
1413 		ldt->descs = uap->ptr;
1414 		ldt->num = uap->bytecount / sizeof(union descriptor);
1415 		args.op = I386_GET_LDT;
1416 		args.parms = (char*)ldt;
1417 		error = sysarch(p, &args);
1418 		p->p_retval[0] *= sizeof(union descriptor);
1419 		break;
1420 	case 0x01: /* write_ldt */
1421 	case 0x11: /* write_ldt */
1422 		if (uap->bytecount != sizeof(ld))
1423 			return (EINVAL);
1424 
1425 		error = copyin(uap->ptr, &ld, sizeof(ld));
1426 		if (error)
1427 			return (error);
1428 
1429 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1430 		desc = stackgap_alloc(&sg, sizeof(*desc));
1431 		ldt->start = ld.entry_number;
1432 		ldt->descs = desc;
1433 		ldt->num = 1;
1434 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1435 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1436 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1437 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1438 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1439 			(ld.contents << 2);
1440 		desc->sd.sd_dpl = 3;
1441 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1442 		desc->sd.sd_xx = 0;
1443 		desc->sd.sd_def32 = ld.seg_32bit;
1444 		desc->sd.sd_gran = ld.limit_in_pages;
1445 		args.op = I386_SET_LDT;
1446 		args.parms = (char*)ldt;
1447 		error = sysarch(p, &args);
1448 		break;
1449 	default:
1450 		error = EINVAL;
1451 		break;
1452 	}
1453 
1454 	if (error == EOPNOTSUPP) {
1455 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1456 		error = ENOSYS;
1457 	}
1458 
1459 	return (error);
1460 }
1461