xref: /freebsd/sys/compat/linux/linux_misc.c (revision f9ce010afdd3136fc73e2b500f2ed916bf9cfa59)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_prot.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60 #include <machine/sysarch.h>
61 #include <machine/segments.h>
62 
63 #include <i386/linux/linux.h>
64 #include <i386/linux/linux_proto.h>
65 #include <i386/linux/linux_util.h>
66 #include <i386/linux/linux_mib.h>
67 
68 #include <posix4/sched.h>
69 
70 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
71 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
72   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
73   RLIMIT_MEMLOCK, -1
74 };
75 
76 int
77 linux_alarm(struct proc *p, struct linux_alarm_args *args)
78 {
79     struct itimerval it, old_it;
80     struct timeval tv;
81     int s;
82 
83 #ifdef DEBUG
84     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
85 #endif
86     if (args->secs > 100000000)
87 	return EINVAL;
88     it.it_value.tv_sec = (long)args->secs;
89     it.it_value.tv_usec = 0;
90     it.it_interval.tv_sec = 0;
91     it.it_interval.tv_usec = 0;
92     s = splsoftclock();
93     old_it = p->p_realtimer;
94     getmicrouptime(&tv);
95     if (timevalisset(&old_it.it_value))
96 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
97     if (it.it_value.tv_sec != 0) {
98 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
99 	timevaladd(&it.it_value, &tv);
100     }
101     p->p_realtimer = it;
102     splx(s);
103     if (timevalcmp(&old_it.it_value, &tv, >)) {
104 	timevalsub(&old_it.it_value, &tv);
105 	if (old_it.it_value.tv_usec != 0)
106 	    old_it.it_value.tv_sec++;
107 	p->p_retval[0] = old_it.it_value.tv_sec;
108     }
109     return 0;
110 }
111 
112 int
113 linux_brk(struct proc *p, struct linux_brk_args *args)
114 {
115 #if 0
116     struct vmspace *vm = p->p_vmspace;
117     vm_offset_t new, old;
118     int error;
119 
120     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
121 	return EINVAL;
122     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
123 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
124 	return ENOMEM;
125 
126     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
127     new = round_page((vm_offset_t)args->dsend);
128     p->p_retval[0] = old;
129     if ((new-old) > 0) {
130 	if (swap_pager_full)
131 	    return ENOMEM;
132 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
133 			VM_PROT_ALL, VM_PROT_ALL, 0);
134 	if (error)
135 	    return error;
136 	vm->vm_dsize += btoc((new-old));
137 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
138     }
139     return 0;
140 #else
141     struct vmspace *vm = p->p_vmspace;
142     vm_offset_t new, old;
143     struct obreak_args /* {
144 	char * nsize;
145     } */ tmp;
146 
147 #ifdef DEBUG
148     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
149 #endif
150     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
151     new = (vm_offset_t)args->dsend;
152     tmp.nsize = (char *) new;
153     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
154 	p->p_retval[0] = (int)new;
155     else
156 	p->p_retval[0] = (int)old;
157 
158     return 0;
159 #endif
160 }
161 
162 int
163 linux_uselib(struct proc *p, struct linux_uselib_args *args)
164 {
165     struct nameidata ni;
166     struct vnode *vp;
167     struct exec *a_out;
168     struct vattr attr;
169     vm_offset_t vmaddr;
170     unsigned long file_offset;
171     vm_offset_t buffer;
172     unsigned long bss_size;
173     int error;
174     caddr_t sg;
175     int locked;
176 
177     sg = stackgap_init();
178     CHECKALTEXIST(p, &sg, args->library);
179 
180 #ifdef DEBUG
181     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
182 #endif
183 
184     a_out = NULL;
185     locked = 0;
186     vp = NULL;
187 
188     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
189     error = namei(&ni);
190     if (error)
191 	goto cleanup;
192 
193     vp = ni.ni_vp;
194     if (vp == NULL) {
195 	error = ENOEXEC;	/* ?? */
196 	goto cleanup;
197     }
198 
199     /*
200      * From here on down, we have a locked vnode that must be unlocked.
201      */
202     locked++;
203 
204     /*
205      * Writable?
206      */
207     if (vp->v_writecount) {
208 	error = ETXTBSY;
209 	goto cleanup;
210     }
211 
212     /*
213      * Executable?
214      */
215     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
216     if (error)
217 	goto cleanup;
218 
219     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
220 	((attr.va_mode & 0111) == 0) ||
221 	(attr.va_type != VREG)) {
222 	    error = ENOEXEC;
223 	    goto cleanup;
224     }
225 
226     /*
227      * Sensible size?
228      */
229     if (attr.va_size == 0) {
230 	error = ENOEXEC;
231 	goto cleanup;
232     }
233 
234     /*
235      * Can we access it?
236      */
237     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
238     if (error)
239 	goto cleanup;
240 
241     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
242     if (error)
243 	goto cleanup;
244 
245     /*
246      * Lock no longer needed
247      */
248     VOP_UNLOCK(vp, 0, p);
249     locked = 0;
250 
251     /*
252      * Pull in executable header into kernel_map
253      */
254     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
255 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
256     if (error)
257 	goto cleanup;
258 
259     /*
260      * Is it a Linux binary ?
261      */
262     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
263 	error = ENOEXEC;
264 	goto cleanup;
265     }
266 
267     /* While we are here, we should REALLY do some more checks */
268 
269     /*
270      * Set file/virtual offset based on a.out variant.
271      */
272     switch ((int)(a_out->a_magic & 0xffff)) {
273     case 0413:	/* ZMAGIC */
274 	file_offset = 1024;
275 	break;
276     case 0314:	/* QMAGIC */
277 	file_offset = 0;
278 	break;
279     default:
280 	error = ENOEXEC;
281 	goto cleanup;
282     }
283 
284     bss_size = round_page(a_out->a_bss);
285 
286     /*
287      * Check various fields in header for validity/bounds.
288      */
289     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
290 	error = ENOEXEC;
291 	goto cleanup;
292     }
293 
294     /* text + data can't exceed file size */
295     if (a_out->a_data + a_out->a_text > attr.va_size) {
296 	error = EFAULT;
297 	goto cleanup;
298     }
299 
300     /*
301      * text/data/bss must not exceed limits
302      * XXX: this is not complete. it should check current usage PLUS
303      * the resources needed by this library.
304      */
305     if (a_out->a_text > MAXTSIZ ||
306 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
307 	error = ENOMEM;
308 	goto cleanup;
309     }
310 
311     /*
312      * prevent more writers
313      */
314     vp->v_flag |= VTEXT;
315 
316     /*
317      * Check if file_offset page aligned,.
318      * Currently we cannot handle misalinged file offsets,
319      * and so we read in the entire image (what a waste).
320      */
321     if (file_offset & PAGE_MASK) {
322 #ifdef DEBUG
323 printf("uselib: Non page aligned binary %lu\n", file_offset);
324 #endif
325 	/*
326 	 * Map text+data read/write/execute
327 	 */
328 
329 	/* a_entry is the load address and is page aligned */
330 	vmaddr = trunc_page(a_out->a_entry);
331 
332 	/* get anon user mapping, read+write+execute */
333 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
334 		    	    a_out->a_text + a_out->a_data, FALSE,
335 			    VM_PROT_ALL, VM_PROT_ALL, 0);
336 	if (error)
337 	    goto cleanup;
338 
339 	/* map file into kernel_map */
340 	error = vm_mmap(kernel_map, &buffer,
341 			round_page(a_out->a_text + a_out->a_data + file_offset),
342 		   	VM_PROT_READ, VM_PROT_READ, 0,
343 			(caddr_t)vp, trunc_page(file_offset));
344 	if (error)
345 	    goto cleanup;
346 
347 	/* copy from kernel VM space to user space */
348 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
349 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
350 
351 	/* release temporary kernel space */
352 	vm_map_remove(kernel_map, buffer,
353 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
354 
355 	if (error)
356 	    goto cleanup;
357     }
358     else {
359 #ifdef DEBUG
360 printf("uselib: Page aligned binary %lu\n", file_offset);
361 #endif
362 	/*
363 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
364 	 * to skip the executable header
365 	 */
366 	vmaddr = trunc_page(a_out->a_entry);
367 
368 	/*
369 	 * Map it all into the process's space as a single copy-on-write
370 	 * "data" segment.
371 	 */
372 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
373 		   	a_out->a_text + a_out->a_data,
374 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
375 			(caddr_t)vp, file_offset);
376 	if (error)
377 	    goto cleanup;
378     }
379 #ifdef DEBUG
380 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
381 #endif
382     if (bss_size != 0) {
383         /*
384 	 * Calculate BSS start address
385 	 */
386 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
387 
388 	/*
389 	 * allocate some 'anon' space
390 	 */
391 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
392 			    bss_size, FALSE,
393 			    VM_PROT_ALL, VM_PROT_ALL, 0);
394 	if (error)
395 	    goto cleanup;
396     }
397 
398 cleanup:
399     /*
400      * Unlock vnode if needed
401      */
402     if (locked)
403 	VOP_UNLOCK(vp, 0, p);
404 
405     /*
406      * Release the kernel mapping.
407      */
408     if (a_out)
409 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
410 
411     return error;
412 }
413 
414 /* XXX move */
415 struct linux_select_argv {
416 	int nfds;
417 	fd_set *readfds;
418 	fd_set *writefds;
419 	fd_set *exceptfds;
420 	struct timeval *timeout;
421 };
422 
423 int
424 linux_select(struct proc *p, struct linux_select_args *args)
425 {
426     struct linux_select_argv linux_args;
427     struct linux_newselect_args newsel;
428     int error;
429 
430 #ifdef SELECT_DEBUG
431     printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
432 #endif
433     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
434 			sizeof(linux_args))))
435 	return error;
436 
437     newsel.nfds = linux_args.nfds;
438     newsel.readfds = linux_args.readfds;
439     newsel.writefds = linux_args.writefds;
440     newsel.exceptfds = linux_args.exceptfds;
441     newsel.timeout = linux_args.timeout;
442 
443     return linux_newselect(p, &newsel);
444 }
445 
446 int
447 linux_newselect(struct proc *p, struct linux_newselect_args *args)
448 {
449     struct select_args bsa;
450     struct timeval tv0, tv1, utv, *tvp;
451     caddr_t sg;
452     int error;
453 
454 #ifdef DEBUG
455     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
456   	(long)p->p_pid, args->nfds, (void *)args->readfds,
457 	(void *)args->writefds, (void *)args->exceptfds,
458 	(void *)args->timeout);
459 #endif
460     error = 0;
461     bsa.nd = args->nfds;
462     bsa.in = args->readfds;
463     bsa.ou = args->writefds;
464     bsa.ex = args->exceptfds;
465     bsa.tv = args->timeout;
466 
467     /*
468      * Store current time for computation of the amount of
469      * time left.
470      */
471     if (args->timeout) {
472 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
473 	    goto select_out;
474 #ifdef DEBUG
475 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
476 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
477 #endif
478 	if (itimerfix(&utv)) {
479 	    /*
480 	     * The timeval was invalid.  Convert it to something
481 	     * valid that will act as it does under Linux.
482 	     */
483 	    sg = stackgap_init();
484 	    tvp = stackgap_alloc(&sg, sizeof(utv));
485 	    utv.tv_sec += utv.tv_usec / 1000000;
486 	    utv.tv_usec %= 1000000;
487 	    if (utv.tv_usec < 0) {
488 		utv.tv_sec -= 1;
489 		utv.tv_usec += 1000000;
490 	    }
491 	    if (utv.tv_sec < 0)
492 		timevalclear(&utv);
493 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
494 		goto select_out;
495 	    bsa.tv = tvp;
496 	}
497 	microtime(&tv0);
498     }
499 
500     error = select(p, &bsa);
501 #ifdef DEBUG
502     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
503 #endif
504 
505     if (error) {
506 	/*
507 	 * See fs/select.c in the Linux kernel.  Without this,
508 	 * Maelstrom doesn't work.
509 	 */
510 	if (error == ERESTART)
511 	    error = EINTR;
512 	goto select_out;
513     }
514 
515     if (args->timeout) {
516 	if (p->p_retval[0]) {
517 	    /*
518 	     * Compute how much time was left of the timeout,
519 	     * by subtracting the current time and the time
520 	     * before we started the call, and subtracting
521 	     * that result from the user-supplied value.
522 	     */
523 	    microtime(&tv1);
524 	    timevalsub(&tv1, &tv0);
525 	    timevalsub(&utv, &tv1);
526 	    if (utv.tv_sec < 0)
527 		timevalclear(&utv);
528 	} else
529 	    timevalclear(&utv);
530 #ifdef DEBUG
531 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
532 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
533 #endif
534 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
535 	    goto select_out;
536     }
537 
538 select_out:
539 #ifdef DEBUG
540     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
541 #endif
542     return error;
543 }
544 
545 int
546 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
547 {
548     struct proc *curp;
549 
550 #ifdef DEBUG
551     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
552 #endif
553     if (args->pid != p->p_pid) {
554 	if (!(curp = pfind(args->pid)))
555 	    return ESRCH;
556     }
557     else
558 	curp = p;
559     p->p_retval[0] = curp->p_pgid;
560     return 0;
561 }
562 
563 int
564 linux_fork(struct proc *p, struct linux_fork_args *args)
565 {
566     int error;
567 
568 #ifdef DEBUG
569     printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
570 #endif
571     if ((error = fork(p, (struct fork_args *)args)) != 0)
572 	return error;
573     if (p->p_retval[1] == 1)
574 	p->p_retval[0] = 0;
575     return 0;
576 }
577 
578 int
579 linux_vfork(struct proc *p, struct linux_vfork_args *args)
580 {
581 	int error;
582 
583 #ifdef DEBUG
584 	printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
585 #endif
586 
587 	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
588 		return error;
589 	/* Are we the child? */
590 	if (p->p_retval[1] == 1)
591 		p->p_retval[0] = 0;
592 	return 0;
593 }
594 
595 #define CLONE_VM	0x100
596 #define CLONE_FS	0x200
597 #define CLONE_FILES	0x400
598 #define CLONE_SIGHAND	0x800
599 #define CLONE_PID	0x1000
600 
601 int
602 linux_clone(struct proc *p, struct linux_clone_args *args)
603 {
604     int error, ff = RFPROC;
605     struct proc *p2;
606     int            exit_signal;
607     vm_offset_t    start;
608     struct rfork_args rf_args;
609 
610 #ifdef DEBUG
611     if (args->flags & CLONE_PID)
612 	printf("linux_clone(%ld): CLONE_PID not yet supported\n",
613 	       (long)p->p_pid);
614     printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
615 	   (long)p->p_pid, (unsigned int)args->flags,
616 	   (unsigned int)args->stack);
617 #endif
618 
619     if (!args->stack)
620         return (EINVAL);
621 
622     exit_signal = args->flags & 0x000000ff;
623     if (exit_signal >= LINUX_NSIG)
624 	return EINVAL;
625     exit_signal = linux_to_bsd_signal[exit_signal];
626 
627     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
628     ff |= RFTHREAD;
629 
630     if (args->flags & CLONE_VM)
631 	ff |= RFMEM;
632     if (args->flags & CLONE_SIGHAND)
633 	ff |= RFSIGSHARE;
634     if (!(args->flags & CLONE_FILES))
635 	ff |= RFFDG;
636 
637     error = 0;
638     start = 0;
639 
640     rf_args.flags = ff;
641     if ((error = rfork(p, &rf_args)) != 0)
642 	return error;
643 
644     p2 = pfind(p->p_retval[0]);
645     if (p2 == 0)
646  	return ESRCH;
647 
648     p2->p_sigparent = exit_signal;
649     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
650 
651 #ifdef DEBUG
652     printf ("linux_clone(%ld): successful rfork to %ld\n",
653 	    (long)p->p_pid, (long)p2->p_pid);
654 #endif
655     return 0;
656 }
657 
658 /* XXX move */
659 struct linux_mmap_argv {
660 	linux_caddr_t addr;
661 	int len;
662 	int prot;
663 	int flags;
664 	int fd;
665 	int pos;
666 };
667 
668 #define STACK_SIZE  (2 * 1024 * 1024)
669 #define GUARD_SIZE  (4 * PAGE_SIZE)
670 int
671 linux_mmap(struct proc *p, struct linux_mmap_args *args)
672 {
673     struct mmap_args /* {
674 	caddr_t addr;
675 	size_t len;
676 	int prot;
677 	int flags;
678 	int fd;
679 	long pad;
680 	off_t pos;
681     } */ bsd_args;
682     int error;
683     struct linux_mmap_argv linux_args;
684 
685     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
686 			sizeof(linux_args))))
687 	return error;
688 #ifdef DEBUG
689     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
690 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
691 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
692 #endif
693     bsd_args.flags = 0;
694     if (linux_args.flags & LINUX_MAP_SHARED)
695 	bsd_args.flags |= MAP_SHARED;
696     if (linux_args.flags & LINUX_MAP_PRIVATE)
697 	bsd_args.flags |= MAP_PRIVATE;
698     if (linux_args.flags & LINUX_MAP_FIXED)
699 	bsd_args.flags |= MAP_FIXED;
700     if (linux_args.flags & LINUX_MAP_ANON)
701 	bsd_args.flags |= MAP_ANON;
702     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
703 	bsd_args.flags |= MAP_STACK;
704 
705 	/* The linux MAP_GROWSDOWN option does not limit auto
706 	 * growth of the region.  Linux mmap with this option
707 	 * takes as addr the inital BOS, and as len, the initial
708 	 * region size.  It can then grow down from addr without
709 	 * limit.  However, linux threads has an implicit internal
710 	 * limit to stack size of STACK_SIZE.  Its just not
711 	 * enforced explicitly in linux.  But, here we impose
712 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
713 	 * region, since we can do this with our mmap.
714 	 *
715 	 * Our mmap with MAP_STACK takes addr as the maximum
716 	 * downsize limit on BOS, and as len the max size of
717 	 * the region.  It them maps the top SGROWSIZ bytes,
718 	 * and autgrows the region down, up to the limit
719 	 * in addr.
720 	 *
721 	 * If we don't use the MAP_STACK option, the effect
722 	 * of this code is to allocate a stack region of a
723 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
724 	 */
725 
726 	/* This gives us TOS */
727 	bsd_args.addr = linux_args.addr + linux_args.len;
728 
729 	/* This gives us our maximum stack size */
730 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
731 	    bsd_args.len = linux_args.len;
732 	else
733 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
734 
735 	/* This gives us a new BOS.  If we're using VM_STACK, then
736 	 * mmap will just map the top SGROWSIZ bytes, and let
737 	 * the stack grow down to the limit at BOS.  If we're
738 	 * not using VM_STACK we map the full stack, since we
739 	 * don't have a way to autogrow it.
740 	 */
741 	bsd_args.addr -= bsd_args.len;
742 
743     } else {
744 	bsd_args.addr = linux_args.addr;
745 	bsd_args.len  = linux_args.len;
746     }
747 
748     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
749     bsd_args.fd = linux_args.fd;
750     bsd_args.pos = linux_args.pos;
751     bsd_args.pad = 0;
752     return mmap(p, &bsd_args);
753 }
754 
755 int
756 linux_mremap(struct proc *p, struct linux_mremap_args *args)
757 {
758 	struct munmap_args /* {
759 		void *addr;
760 		size_t len;
761 	} */ bsd_args;
762 	int error = 0;
763 
764 #ifdef DEBUG
765 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
766 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
767 	    args->flags);
768 #endif
769 	args->new_len = round_page(args->new_len);
770 	args->old_len = round_page(args->old_len);
771 
772 	if (args->new_len > args->old_len) {
773 		p->p_retval[0] = 0;
774 		return ENOMEM;
775 	}
776 
777 	if (args->new_len < args->old_len) {
778 		bsd_args.addr = args->addr + args->new_len;
779 		bsd_args.len = args->old_len - args->new_len;
780 		error = munmap(p, &bsd_args);
781 	}
782 
783 	p->p_retval[0] = error ? 0 : (int)args->addr;
784 	return error;
785 }
786 
787 int
788 linux_msync(struct proc *p, struct linux_msync_args *args)
789 {
790 	struct msync_args bsd_args;
791 
792 	bsd_args.addr = args->addr;
793 	bsd_args.len = args->len;
794 	bsd_args.flags = 0;	/* XXX ignore */
795 
796 	return msync(p, &bsd_args);
797 }
798 
799 int
800 linux_pipe(struct proc *p, struct linux_pipe_args *args)
801 {
802     int error;
803     int reg_edx;
804 
805 #ifdef DEBUG
806     printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
807 #endif
808     reg_edx = p->p_retval[1];
809     error = pipe(p, 0);
810     if (error) {
811 	p->p_retval[1] = reg_edx;
812 	return error;
813     }
814 
815     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
816     if (error) {
817 	p->p_retval[1] = reg_edx;
818 	return error;
819     }
820 
821     p->p_retval[1] = reg_edx;
822     p->p_retval[0] = 0;
823     return 0;
824 }
825 
826 int
827 linux_time(struct proc *p, struct linux_time_args *args)
828 {
829     struct timeval tv;
830     linux_time_t tm;
831     int error;
832 
833 #ifdef DEBUG
834     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
835 #endif
836     microtime(&tv);
837     tm = tv.tv_sec;
838     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
839 	return error;
840     p->p_retval[0] = tm;
841     return 0;
842 }
843 
844 struct linux_times_argv {
845     long    tms_utime;
846     long    tms_stime;
847     long    tms_cutime;
848     long    tms_cstime;
849 };
850 
851 #define CLK_TCK 100	/* Linux uses 100 */
852 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
853 
854 int
855 linux_times(struct proc *p, struct linux_times_args *args)
856 {
857     struct timeval tv;
858     struct linux_times_argv tms;
859     struct rusage ru;
860     int error;
861 
862 #ifdef DEBUG
863     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
864 #endif
865     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
866 
867     tms.tms_utime = CONVTCK(ru.ru_utime);
868     tms.tms_stime = CONVTCK(ru.ru_stime);
869 
870     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
871     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
872 
873     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
874 	    	    sizeof(struct linux_times_argv))))
875 	return error;
876 
877     microuptime(&tv);
878     p->p_retval[0] = (int)CONVTCK(tv);
879     return 0;
880 }
881 
882 int
883 linux_newuname(struct proc *p, struct linux_newuname_args *args)
884 {
885 	struct linux_new_utsname utsname;
886 	char *osrelease, *osname;
887 
888 #ifdef DEBUG
889 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
890 #endif
891 
892 	osname = linux_get_osname(p);
893 	osrelease = linux_get_osrelease(p);
894 
895 	bzero(&utsname, sizeof(struct linux_new_utsname));
896 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
897 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
898 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
899 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
900 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
901 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
902 
903 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
904 			sizeof(struct linux_new_utsname)));
905 }
906 
907 struct linux_utimbuf {
908 	linux_time_t l_actime;
909 	linux_time_t l_modtime;
910 };
911 
912 int
913 linux_utime(struct proc *p, struct linux_utime_args *args)
914 {
915     struct utimes_args /* {
916 	char	*path;
917 	struct	timeval *tptr;
918     } */ bsdutimes;
919     struct timeval tv[2], *tvp;
920     struct linux_utimbuf lut;
921     int error;
922     caddr_t sg;
923 
924     sg = stackgap_init();
925     CHECKALTEXIST(p, &sg, args->fname);
926 
927 #ifdef DEBUG
928     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
929 #endif
930     if (args->times) {
931 	if ((error = copyin(args->times, &lut, sizeof lut)))
932 	    return error;
933 	tv[0].tv_sec = lut.l_actime;
934 	tv[0].tv_usec = 0;
935 	tv[1].tv_sec = lut.l_modtime;
936 	tv[1].tv_usec = 0;
937 	/* so that utimes can copyin */
938 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
939 	if ((error = copyout(tv, tvp, sizeof(tv))))
940 	    return error;
941 	bsdutimes.tptr = tvp;
942     } else
943 	bsdutimes.tptr = NULL;
944 
945     bsdutimes.path = args->fname;
946     return utimes(p, &bsdutimes);
947 }
948 
949 #define __WCLONE 0x80000000
950 
951 int
952 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
953 {
954     struct wait_args /* {
955 	int pid;
956 	int *status;
957 	int options;
958 	struct	rusage *rusage;
959     } */ tmp;
960     int error, tmpstat;
961 
962 #ifdef DEBUG
963     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
964 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
965 #endif
966     tmp.pid = args->pid;
967     tmp.status = args->status;
968     tmp.options = (args->options & (WNOHANG | WUNTRACED));
969     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
970     if (args->options & __WCLONE)
971 	tmp.options |= WLINUXCLONE;
972     tmp.rusage = NULL;
973 
974     if ((error = wait4(p, &tmp)) != 0)
975 	return error;
976 
977     if (args->status) {
978 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
979 	    return error;
980 	if (WIFSIGNALED(tmpstat))
981 	    tmpstat = (tmpstat & 0xffffff80) |
982 		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
983 	else if (WIFSTOPPED(tmpstat))
984 	    tmpstat = (tmpstat & 0xffff00ff) |
985 		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
986 	return copyout(&tmpstat, args->status, sizeof(int));
987     } else
988 	return 0;
989 }
990 
991 int
992 linux_wait4(struct proc *p, struct linux_wait4_args *args)
993 {
994     struct wait_args /* {
995 	int pid;
996 	int *status;
997 	int options;
998 	struct	rusage *rusage;
999     } */ tmp;
1000     int error, tmpstat;
1001 
1002 #ifdef DEBUG
1003     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1004 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1005 	(void *)args->rusage);
1006 #endif
1007     tmp.pid = args->pid;
1008     tmp.status = args->status;
1009     tmp.options = (args->options & (WNOHANG | WUNTRACED));
1010     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1011     if (args->options & __WCLONE)
1012 	tmp.options |= WLINUXCLONE;
1013     tmp.rusage = args->rusage;
1014 
1015     if ((error = wait4(p, &tmp)) != 0)
1016 	return error;
1017 
1018     p->p_siglist &= ~sigmask(SIGCHLD);
1019 
1020     if (args->status) {
1021 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1022 	    return error;
1023 	if (WIFSIGNALED(tmpstat))
1024 	    tmpstat = (tmpstat & 0xffffff80) |
1025 		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1026 	else if (WIFSTOPPED(tmpstat))
1027 	    tmpstat = (tmpstat & 0xffff00ff) |
1028 		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1029 	return copyout(&tmpstat, args->status, sizeof(int));
1030     } else
1031 	return 0;
1032 }
1033 
1034 int
1035 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1036 {
1037 	caddr_t sg;
1038 	struct mknod_args bsd_mknod;
1039 	struct mkfifo_args bsd_mkfifo;
1040 
1041 	sg = stackgap_init();
1042 
1043 	CHECKALTCREAT(p, &sg, args->path);
1044 
1045 #ifdef DEBUG
1046 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1047 	   (long)p->p_pid, args->path, args->mode, args->dev);
1048 #endif
1049 
1050 	if (args->mode & S_IFIFO) {
1051 		bsd_mkfifo.path = args->path;
1052 		bsd_mkfifo.mode = args->mode;
1053 		return mkfifo(p, &bsd_mkfifo);
1054 	} else {
1055 		bsd_mknod.path = args->path;
1056 		bsd_mknod.mode = args->mode;
1057 		bsd_mknod.dev = args->dev;
1058 		return mknod(p, &bsd_mknod);
1059 	}
1060 }
1061 
1062 /*
1063  * UGH! This is just about the dumbest idea I've ever heard!!
1064  */
1065 int
1066 linux_personality(struct proc *p, struct linux_personality_args *args)
1067 {
1068 #ifdef DEBUG
1069 	printf("Linux-emul(%ld): personality(%d)\n",
1070 	   (long)p->p_pid, args->per);
1071 #endif
1072 	if (args->per != 0)
1073 		return EINVAL;
1074 
1075 	/* Yes Jim, it's still a Linux... */
1076 	p->p_retval[0] = 0;
1077 	return 0;
1078 }
1079 
1080 /*
1081  * Wrappers for get/setitimer for debugging..
1082  */
1083 int
1084 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1085 {
1086 	struct setitimer_args bsa;
1087 	struct itimerval foo;
1088 	int error;
1089 
1090 #ifdef DEBUG
1091 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1092 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1093 #endif
1094 	bsa.which = args->which;
1095 	bsa.itv = args->itv;
1096 	bsa.oitv = args->oitv;
1097 	if (args->itv) {
1098 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1099 			sizeof(foo))))
1100 		return error;
1101 #ifdef DEBUG
1102 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1103 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1104 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1105 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1106 #endif
1107 	}
1108 	return setitimer(p, &bsa);
1109 }
1110 
1111 int
1112 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1113 {
1114 	struct getitimer_args bsa;
1115 #ifdef DEBUG
1116 	printf("Linux-emul(%ld): getitimer(%p)\n",
1117 	    (long)p->p_pid, (void *)args->itv);
1118 #endif
1119 	bsa.which = args->which;
1120 	bsa.itv = args->itv;
1121 	return getitimer(p, &bsa);
1122 }
1123 
1124 int
1125 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1126 {
1127 	int error;
1128 
1129 	error = suser(p);
1130 	if (error != 0)
1131 		return error;
1132 	if (securelevel > 0)
1133 		return EPERM;
1134 	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1135 	return 0;
1136 }
1137 
1138 int
1139 linux_nice(struct proc *p, struct linux_nice_args *args)
1140 {
1141 	struct setpriority_args	bsd_args;
1142 
1143 	bsd_args.which = PRIO_PROCESS;
1144 	bsd_args.who = 0;	/* current process */
1145 	bsd_args.prio = args->inc;
1146 	return setpriority(p, &bsd_args);
1147 }
1148 
1149 int
1150 linux_setgroups(p, uap)
1151 	struct proc *p;
1152 	struct linux_setgroups_args *uap;
1153 {
1154 	struct pcred *pc;
1155 	linux_gid_t linux_gidset[NGROUPS];
1156 	gid_t *bsd_gidset;
1157 	int ngrp, error;
1158 
1159 	pc = p->p_cred;
1160 	ngrp = uap->gidsetsize;
1161 
1162 	/*
1163 	 * cr_groups[0] holds egid. Setting the whole set from
1164 	 * the supplied set will cause egid to be changed too.
1165 	 * Keep cr_groups[0] unchanged to prevent that.
1166 	 */
1167 
1168 	if ((error = suser(p)) != 0)
1169 		return (error);
1170 
1171 	if (ngrp >= NGROUPS)
1172 		return (EINVAL);
1173 
1174 	pc->pc_ucred = crcopy(pc->pc_ucred);
1175 	if (ngrp > 0) {
1176 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1177 			       ngrp * sizeof(linux_gid_t));
1178 		if (error)
1179 			return (error);
1180 
1181 		pc->pc_ucred->cr_ngroups = ngrp + 1;
1182 
1183 		bsd_gidset = pc->pc_ucred->cr_groups;
1184 		ngrp--;
1185 		while (ngrp >= 0) {
1186 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1187 			ngrp--;
1188 		}
1189 	}
1190 	else
1191 		pc->pc_ucred->cr_ngroups = 1;
1192 
1193 	setsugid(p);
1194 	return (0);
1195 }
1196 
1197 int
1198 linux_getgroups(p, uap)
1199 	struct proc *p;
1200 	struct linux_getgroups_args *uap;
1201 {
1202 	struct pcred *pc;
1203 	linux_gid_t linux_gidset[NGROUPS];
1204 	gid_t *bsd_gidset;
1205 	int bsd_gidsetsz, ngrp, error;
1206 
1207 	pc = p->p_cred;
1208 	bsd_gidset = pc->pc_ucred->cr_groups;
1209 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1210 
1211 	/*
1212 	 * cr_groups[0] holds egid. Returning the whole set
1213 	 * here will cause a duplicate. Exclude cr_groups[0]
1214 	 * to prevent that.
1215 	 */
1216 
1217 	if ((ngrp = uap->gidsetsize) == 0) {
1218 		p->p_retval[0] = bsd_gidsetsz;
1219 		return (0);
1220 	}
1221 
1222 	if (ngrp < bsd_gidsetsz)
1223 		return (EINVAL);
1224 
1225 	ngrp = 0;
1226 	while (ngrp < bsd_gidsetsz) {
1227 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1228 		ngrp++;
1229 	}
1230 
1231 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1232 	    ngrp * sizeof(linux_gid_t))))
1233 		return (error);
1234 
1235 	p->p_retval[0] = ngrp;
1236 	return (0);
1237 }
1238 
1239 int
1240 linux_setrlimit(p, uap)
1241      struct proc *p;
1242      struct linux_setrlimit_args *uap;
1243 {
1244     struct osetrlimit_args bsd;
1245 
1246 #ifdef DEBUG
1247     printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1248 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1249 #endif
1250 
1251     if (uap->resource >= LINUX_RLIM_NLIMITS)
1252 	return EINVAL;
1253 
1254     bsd.which = linux_to_bsd_resource[uap->resource];
1255 
1256     if (bsd.which == -1)
1257 	return EINVAL;
1258 
1259     bsd.rlp = uap->rlim;
1260     return osetrlimit(p, &bsd);
1261 }
1262 
1263 int
1264 linux_getrlimit(p, uap)
1265      struct proc *p;
1266      struct linux_getrlimit_args *uap;
1267 {
1268     struct ogetrlimit_args bsd;
1269 
1270 #ifdef DEBUG
1271     printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1272 	   (long)p->p_pid, uap->resource, (void *)uap->rlim);
1273 #endif
1274 
1275     if (uap->resource >= LINUX_RLIM_NLIMITS)
1276 	return EINVAL;
1277 
1278     bsd.which = linux_to_bsd_resource[uap->resource];
1279 
1280     if (bsd.which == -1)
1281 	return EINVAL;
1282 
1283     bsd.rlp = uap->rlim;
1284     return ogetrlimit(p, &bsd);
1285 }
1286 
1287 int
1288 linux_sched_setscheduler(p, uap)
1289 	struct proc *p;
1290 	struct linux_sched_setscheduler_args *uap;
1291 {
1292 	struct sched_setscheduler_args bsd;
1293 
1294 #ifdef DEBUG
1295 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1296 	       (long)p->p_pid, uap->pid, uap->policy, (void *)uap->param);
1297 #endif
1298 
1299 	switch (uap->policy) {
1300 	case LINUX_SCHED_OTHER:
1301 		bsd.policy = SCHED_OTHER;
1302 		break;
1303 	case LINUX_SCHED_FIFO:
1304 		bsd.policy = SCHED_FIFO;
1305 		break;
1306 	case LINUX_SCHED_RR:
1307 		bsd.policy = SCHED_RR;
1308 		break;
1309 	default:
1310 		return EINVAL;
1311 	}
1312 
1313 	bsd.pid = uap->pid;
1314 	bsd.param = uap->param;
1315 	return sched_setscheduler(p, &bsd);
1316 }
1317 
1318 int
1319 linux_sched_getscheduler(p, uap)
1320 	struct proc *p;
1321 	struct linux_sched_getscheduler_args *uap;
1322 {
1323 	struct sched_getscheduler_args bsd;
1324 	int error;
1325 
1326 #ifdef DEBUG
1327 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1328 	       (long)p->p_pid, uap->pid);
1329 #endif
1330 
1331 	bsd.pid = uap->pid;
1332 	error = sched_getscheduler(p, &bsd);
1333 
1334 	switch (p->p_retval[0]) {
1335 	case SCHED_OTHER:
1336 		p->p_retval[0] = LINUX_SCHED_OTHER;
1337 		break;
1338 	case SCHED_FIFO:
1339 		p->p_retval[0] = LINUX_SCHED_FIFO;
1340 		break;
1341 	case SCHED_RR:
1342 		p->p_retval[0] = LINUX_SCHED_RR;
1343 		break;
1344 	}
1345 
1346 	return error;
1347 }
1348 
1349 struct linux_descriptor {
1350 	unsigned int  entry_number;
1351 	unsigned long base_addr;
1352 	unsigned int  limit;
1353 	unsigned int  seg_32bit:1;
1354 	unsigned int  contents:2;
1355 	unsigned int  read_exec_only:1;
1356 	unsigned int  limit_in_pages:1;
1357 	unsigned int  seg_not_present:1;
1358 	unsigned int  useable:1;
1359 };
1360 
1361 int
1362 linux_modify_ldt(p, uap)
1363 	struct proc *p;
1364 	struct linux_modify_ldt_args *uap;
1365 {
1366 	int error;
1367 	caddr_t sg;
1368 	struct sysarch_args args;
1369 	struct i386_ldt_args *ldt;
1370 	struct linux_descriptor ld;
1371 	union descriptor *desc;
1372 
1373 	sg = stackgap_init();
1374 
1375 	if (uap->ptr == NULL)
1376 		return (EINVAL);
1377 
1378 	switch (uap->func) {
1379 	case 0x00: /* read_ldt */
1380 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1381 		ldt->start = 0;
1382 		ldt->descs = uap->ptr;
1383 		ldt->num = uap->bytecount / sizeof(union descriptor);
1384 		args.op = I386_GET_LDT;
1385 		args.parms = (char*)ldt;
1386 		error = sysarch(p, &args);
1387 		p->p_retval[0] *= sizeof(union descriptor);
1388 		break;
1389 	case 0x01: /* write_ldt */
1390 	case 0x11: /* write_ldt */
1391 		if (uap->bytecount != sizeof(ld))
1392 			return (EINVAL);
1393 
1394 		error = copyin(uap->ptr, &ld, sizeof(ld));
1395 		if (error)
1396 			return (error);
1397 
1398 		ldt = stackgap_alloc(&sg, sizeof(*ldt));
1399 		desc = stackgap_alloc(&sg, sizeof(*desc));
1400 		ldt->start = ld.entry_number;
1401 		ldt->descs = desc;
1402 		ldt->num = 1;
1403 		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1404 		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1405 		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1406 		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1407 		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1408 			(ld.contents << 2);
1409 		desc->sd.sd_dpl = 3;
1410 		desc->sd.sd_p = (ld.seg_not_present ^ 1);
1411 		desc->sd.sd_xx = 0;
1412 		desc->sd.sd_def32 = ld.seg_32bit;
1413 		desc->sd.sd_gran = ld.limit_in_pages;
1414 		args.op = I386_SET_LDT;
1415 		args.parms = (char*)ldt;
1416 		error = sysarch(p, &args);
1417 		break;
1418 	default:
1419 		error = EINVAL;
1420 		break;
1421 	}
1422 
1423 	if (error == EOPNOTSUPP) {
1424 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
1425 		error = ENOSYS;
1426 	}
1427 
1428 	return (error);
1429 }
1430