xref: /freebsd/sys/compat/linux/linux_misc.c (revision 3be5f1f5ce6c92fb28926e7d02f855c12d534c34)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: linux_misc.c,v 1.58 1999/05/06 18:44:25 peter Exp $
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysproto.h>
34 #include <sys/kernel.h>
35 #include <sys/mman.h>
36 #include <sys/proc.h>
37 #include <sys/fcntl.h>
38 #include <sys/imgact_aout.h>
39 #include <sys/mount.h>
40 #include <sys/namei.h>
41 #include <sys/resourcevar.h>
42 #include <sys/stat.h>
43 #include <sys/sysctl.h>
44 #include <sys/unistd.h>
45 #include <sys/vnode.h>
46 #include <sys/wait.h>
47 #include <sys/time.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_prot.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_extern.h>
55 
56 #include <machine/frame.h>
57 #include <machine/psl.h>
58 
59 #include <i386/linux/linux.h>
60 #include <i386/linux/linux_proto.h>
61 #include <i386/linux/linux_util.h>
62 
63 int
64 linux_alarm(struct proc *p, struct linux_alarm_args *args)
65 {
66     struct itimerval it, old_it;
67     struct timeval tv;
68     int s;
69 
70 #ifdef DEBUG
71     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
72 #endif
73     if (args->secs > 100000000)
74 	return EINVAL;
75     it.it_value.tv_sec = (long)args->secs;
76     it.it_value.tv_usec = 0;
77     it.it_interval.tv_sec = 0;
78     it.it_interval.tv_usec = 0;
79     s = splsoftclock();
80     old_it = p->p_realtimer;
81     getmicrouptime(&tv);
82     if (timevalisset(&old_it.it_value))
83 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
84     if (it.it_value.tv_sec != 0) {
85 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
86 	timevaladd(&it.it_value, &tv);
87     }
88     p->p_realtimer = it;
89     splx(s);
90     if (timevalcmp(&old_it.it_value, &tv, >)) {
91 	timevalsub(&old_it.it_value, &tv);
92 	if (old_it.it_value.tv_usec != 0)
93 	    old_it.it_value.tv_sec++;
94 	p->p_retval[0] = old_it.it_value.tv_sec;
95     }
96     return 0;
97 }
98 
99 int
100 linux_brk(struct proc *p, struct linux_brk_args *args)
101 {
102 #if 0
103     struct vmspace *vm = p->p_vmspace;
104     vm_offset_t new, old;
105     int error;
106 
107     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
108 	return EINVAL;
109     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
110 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
111 	return ENOMEM;
112 
113     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
114     new = round_page((vm_offset_t)args->dsend);
115     p->p_retval[0] = old;
116     if ((new-old) > 0) {
117 	if (swap_pager_full)
118 	    return ENOMEM;
119 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
120 			VM_PROT_ALL, VM_PROT_ALL, 0);
121 	if (error)
122 	    return error;
123 	vm->vm_dsize += btoc((new-old));
124 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
125     }
126     return 0;
127 #else
128     struct vmspace *vm = p->p_vmspace;
129     vm_offset_t new, old;
130     struct obreak_args /* {
131 	char * nsize;
132     } */ tmp;
133 
134 #ifdef DEBUG
135     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
136 #endif
137     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
138     new = (vm_offset_t)args->dsend;
139     tmp.nsize = (char *) new;
140     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
141 	p->p_retval[0] = (int)new;
142     else
143 	p->p_retval[0] = (int)old;
144 
145     return 0;
146 #endif
147 }
148 
149 int
150 linux_uselib(struct proc *p, struct linux_uselib_args *args)
151 {
152     struct nameidata ni;
153     struct vnode *vp;
154     struct exec *a_out;
155     struct vattr attr;
156     vm_offset_t vmaddr;
157     unsigned long file_offset;
158     vm_offset_t buffer;
159     unsigned long bss_size;
160     int error;
161     caddr_t sg;
162     int locked;
163 
164     sg = stackgap_init();
165     CHECKALTEXIST(p, &sg, args->library);
166 
167 #ifdef DEBUG
168     printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
169 #endif
170 
171     a_out = NULL;
172     locked = 0;
173     vp = NULL;
174 
175     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p);
176     error = namei(&ni);
177     if (error)
178 	goto cleanup;
179 
180     vp = ni.ni_vp;
181     if (vp == NULL) {
182 	error = ENOEXEC;	/* ?? */
183 	goto cleanup;
184     }
185 
186     /*
187      * From here on down, we have a locked vnode that must be unlocked.
188      */
189     locked++;
190 
191     /*
192      * Writable?
193      */
194     if (vp->v_writecount) {
195 	error = ETXTBSY;
196 	goto cleanup;
197     }
198 
199     /*
200      * Executable?
201      */
202     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
203     if (error)
204 	goto cleanup;
205 
206     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
207 	((attr.va_mode & 0111) == 0) ||
208 	(attr.va_type != VREG)) {
209 	    error = ENOEXEC;
210 	    goto cleanup;
211     }
212 
213     /*
214      * Sensible size?
215      */
216     if (attr.va_size == 0) {
217 	error = ENOEXEC;
218 	goto cleanup;
219     }
220 
221     /*
222      * Can we access it?
223      */
224     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
225     if (error)
226 	goto cleanup;
227 
228     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
229     if (error)
230 	goto cleanup;
231 
232     /*
233      * Lock no longer needed
234      */
235     VOP_UNLOCK(vp, 0, p);
236     locked = 0;
237 
238     /*
239      * Pull in executable header into kernel_map
240      */
241     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
242 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
243     if (error)
244 	goto cleanup;
245 
246     /*
247      * Is it a Linux binary ?
248      */
249     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
250 	error = ENOEXEC;
251 	goto cleanup;
252     }
253 
254     /* While we are here, we should REALLY do some more checks */
255 
256     /*
257      * Set file/virtual offset based on a.out variant.
258      */
259     switch ((int)(a_out->a_magic & 0xffff)) {
260     case 0413:	/* ZMAGIC */
261 	file_offset = 1024;
262 	break;
263     case 0314:	/* QMAGIC */
264 	file_offset = 0;
265 	break;
266     default:
267 	error = ENOEXEC;
268 	goto cleanup;
269     }
270 
271     bss_size = round_page(a_out->a_bss);
272 
273     /*
274      * Check various fields in header for validity/bounds.
275      */
276     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
277 	error = ENOEXEC;
278 	goto cleanup;
279     }
280 
281     /* text + data can't exceed file size */
282     if (a_out->a_data + a_out->a_text > attr.va_size) {
283 	error = EFAULT;
284 	goto cleanup;
285     }
286 
287     /*
288      * text/data/bss must not exceed limits
289      * XXX: this is not complete. it should check current usage PLUS
290      * the resources needed by this library.
291      */
292     if (a_out->a_text > MAXTSIZ ||
293 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
294 	error = ENOMEM;
295 	goto cleanup;
296     }
297 
298     /*
299      * prevent more writers
300      */
301     vp->v_flag |= VTEXT;
302 
303     /*
304      * Check if file_offset page aligned,.
305      * Currently we cannot handle misalinged file offsets,
306      * and so we read in the entire image (what a waste).
307      */
308     if (file_offset & PAGE_MASK) {
309 #ifdef DEBUG
310 printf("uselib: Non page aligned binary %lu\n", file_offset);
311 #endif
312 	/*
313 	 * Map text+data read/write/execute
314 	 */
315 
316 	/* a_entry is the load address and is page aligned */
317 	vmaddr = trunc_page(a_out->a_entry);
318 
319 	/* get anon user mapping, read+write+execute */
320 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
321 		    	    a_out->a_text + a_out->a_data, FALSE,
322 			    VM_PROT_ALL, VM_PROT_ALL, 0);
323 	if (error)
324 	    goto cleanup;
325 
326 	/* map file into kernel_map */
327 	error = vm_mmap(kernel_map, &buffer,
328 			round_page(a_out->a_text + a_out->a_data + file_offset),
329 		   	VM_PROT_READ, VM_PROT_READ, 0,
330 			(caddr_t)vp, trunc_page(file_offset));
331 	if (error)
332 	    goto cleanup;
333 
334 	/* copy from kernel VM space to user space */
335 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
336 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
337 
338 	/* release temporary kernel space */
339 	vm_map_remove(kernel_map, buffer,
340 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
341 
342 	if (error)
343 	    goto cleanup;
344     }
345     else {
346 #ifdef DEBUG
347 printf("uselib: Page aligned binary %lu\n", file_offset);
348 #endif
349 	/*
350 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
351 	 * to skip the executable header
352 	 */
353 	vmaddr = trunc_page(a_out->a_entry);
354 
355 	/*
356 	 * Map it all into the process's space as a single copy-on-write
357 	 * "data" segment.
358 	 */
359 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
360 		   	a_out->a_text + a_out->a_data,
361 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
362 			(caddr_t)vp, file_offset);
363 	if (error)
364 	    goto cleanup;
365     }
366 #ifdef DEBUG
367 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
368 #endif
369     if (bss_size != 0) {
370         /*
371 	 * Calculate BSS start address
372 	 */
373 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
374 
375 	/*
376 	 * allocate some 'anon' space
377 	 */
378 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
379 			    bss_size, FALSE,
380 			    VM_PROT_ALL, VM_PROT_ALL, 0);
381 	if (error)
382 	    goto cleanup;
383     }
384 
385 cleanup:
386     /*
387      * Unlock vnode if needed
388      */
389     if (locked)
390 	VOP_UNLOCK(vp, 0, p);
391 
392     /*
393      * Release the kernel mapping.
394      */
395     if (a_out)
396 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
397 
398     return error;
399 }
400 
401 /* XXX move */
402 struct linux_select_argv {
403 	int nfds;
404 	fd_set *readfds;
405 	fd_set *writefds;
406 	fd_set *exceptfds;
407 	struct timeval *timeout;
408 };
409 
410 int
411 linux_select(struct proc *p, struct linux_select_args *args)
412 {
413     struct linux_select_argv linux_args;
414     struct linux_newselect_args newsel;
415     int error;
416 
417 #ifdef SELECT_DEBUG
418     printf("Linux-emul(%d): select(%x)\n",
419 	   p->p_pid, args->ptr);
420 #endif
421     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
422 			sizeof(linux_args))))
423 	return error;
424 
425     newsel.nfds = linux_args.nfds;
426     newsel.readfds = linux_args.readfds;
427     newsel.writefds = linux_args.writefds;
428     newsel.exceptfds = linux_args.exceptfds;
429     newsel.timeout = linux_args.timeout;
430 
431     return linux_newselect(p, &newsel);
432 }
433 
434 int
435 linux_newselect(struct proc *p, struct linux_newselect_args *args)
436 {
437     struct select_args bsa;
438     struct timeval tv0, tv1, utv, *tvp;
439     caddr_t sg;
440     int error;
441 
442 #ifdef DEBUG
443     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
444   	(long)p->p_pid, args->nfds, (void *)args->readfds,
445 	(void *)args->writefds, (void *)args->exceptfds,
446 	(void *)args->timeout);
447 #endif
448     error = 0;
449     bsa.nd = args->nfds;
450     bsa.in = args->readfds;
451     bsa.ou = args->writefds;
452     bsa.ex = args->exceptfds;
453     bsa.tv = args->timeout;
454 
455     /*
456      * Store current time for computation of the amount of
457      * time left.
458      */
459     if (args->timeout) {
460 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
461 	    goto select_out;
462 #ifdef DEBUG
463 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
464 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
465 #endif
466 	if (itimerfix(&utv)) {
467 	    /*
468 	     * The timeval was invalid.  Convert it to something
469 	     * valid that will act as it does under Linux.
470 	     */
471 	    sg = stackgap_init();
472 	    tvp = stackgap_alloc(&sg, sizeof(utv));
473 	    utv.tv_sec += utv.tv_usec / 1000000;
474 	    utv.tv_usec %= 1000000;
475 	    if (utv.tv_usec < 0) {
476 		utv.tv_sec -= 1;
477 		utv.tv_usec += 1000000;
478 	    }
479 	    if (utv.tv_sec < 0)
480 		timevalclear(&utv);
481 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
482 		goto select_out;
483 	    bsa.tv = tvp;
484 	}
485 	microtime(&tv0);
486     }
487 
488     error = select(p, &bsa);
489 #ifdef DEBUG
490     printf("Linux-emul(%d): real select returns %d\n",
491 	       p->p_pid, error);
492 #endif
493 
494     if (error) {
495 	/*
496 	 * See fs/select.c in the Linux kernel.  Without this,
497 	 * Maelstrom doesn't work.
498 	 */
499 	if (error == ERESTART)
500 	    error = EINTR;
501 	goto select_out;
502     }
503 
504     if (args->timeout) {
505 	if (p->p_retval[0]) {
506 	    /*
507 	     * Compute how much time was left of the timeout,
508 	     * by subtracting the current time and the time
509 	     * before we started the call, and subtracting
510 	     * that result from the user-supplied value.
511 	     */
512 	    microtime(&tv1);
513 	    timevalsub(&tv1, &tv0);
514 	    timevalsub(&utv, &tv1);
515 	    if (utv.tv_sec < 0)
516 		timevalclear(&utv);
517 	} else
518 	    timevalclear(&utv);
519 #ifdef DEBUG
520 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
521 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
522 #endif
523 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
524 	    goto select_out;
525     }
526 
527 select_out:
528 #ifdef DEBUG
529     printf("Linux-emul(%d): newselect_out -> %d\n",
530 	       p->p_pid, error);
531 #endif
532     return error;
533 }
534 
535 int
536 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
537 {
538     struct proc *curp;
539 
540 #ifdef DEBUG
541     printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
542 #endif
543     if (args->pid != p->p_pid) {
544 	if (!(curp = pfind(args->pid)))
545 	    return ESRCH;
546     }
547     else
548 	curp = p;
549     p->p_retval[0] = curp->p_pgid;
550     return 0;
551 }
552 
553 int
554 linux_fork(struct proc *p, struct linux_fork_args *args)
555 {
556     int error;
557 
558 #ifdef DEBUG
559     printf("Linux-emul(%d): fork()\n", p->p_pid);
560 #endif
561     if ((error = fork(p, (struct fork_args *)args)) != 0)
562 	return error;
563     if (p->p_retval[1] == 1)
564 	p->p_retval[0] = 0;
565     return 0;
566 }
567 
568 #define CLONE_VM	0x100
569 #define CLONE_FS	0x200
570 #define CLONE_FILES	0x400
571 #define CLONE_SIGHAND	0x800
572 #define CLONE_PID	0x1000
573 
574 int
575 linux_clone(struct proc *p, struct linux_clone_args *args)
576 {
577     int error, ff = RFPROC;
578     struct proc *p2;
579     int            exit_signal;
580     vm_offset_t    start;
581     struct rfork_args rf_args;
582 
583 #ifdef DEBUG
584     if (args->flags & CLONE_PID)
585 	printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid);
586     printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid,
587 	     (unsigned int)args->flags, (unsigned int)args->stack);
588 #endif
589 
590     if (!args->stack)
591         return (EINVAL);
592 
593     exit_signal = args->flags & 0x000000ff;
594     if (exit_signal >= LINUX_NSIG)
595 	return EINVAL;
596     exit_signal = linux_to_bsd_signal[exit_signal];
597 
598     /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
599     ff |= RFTHREAD;
600 
601     if (args->flags & CLONE_VM)
602 	ff |= RFMEM;
603     if (args->flags & CLONE_SIGHAND)
604 	ff |= RFSIGSHARE;
605     if (!(args->flags & CLONE_FILES))
606 	ff |= RFFDG;
607 
608     error = 0;
609     start = 0;
610 
611     rf_args.flags = ff;
612     if ((error = rfork(p, &rf_args)) != 0)
613 	return error;
614 
615     p2 = pfind(p->p_retval[0]);
616     if (p2 == 0)
617  	return ESRCH;
618 
619     p2->p_sigparent = exit_signal;
620     p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
621 
622 #ifdef DEBUG
623     printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid);
624 #endif
625     return 0;
626 }
627 
628 /* XXX move */
629 struct linux_mmap_argv {
630 	linux_caddr_t addr;
631 	int len;
632 	int prot;
633 	int flags;
634 	int fd;
635 	int pos;
636 };
637 
638 #define STACK_SIZE  (2 * 1024 * 1024)
639 #define GUARD_SIZE  (4 * PAGE_SIZE)
640 int
641 linux_mmap(struct proc *p, struct linux_mmap_args *args)
642 {
643     struct mmap_args /* {
644 	caddr_t addr;
645 	size_t len;
646 	int prot;
647 	int flags;
648 	int fd;
649 	long pad;
650 	off_t pos;
651     } */ bsd_args;
652     int error;
653     struct linux_mmap_argv linux_args;
654 
655     if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
656 			sizeof(linux_args))))
657 	return error;
658 #ifdef DEBUG
659     printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
660 	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
661 	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
662 #endif
663     bsd_args.flags = 0;
664     if (linux_args.flags & LINUX_MAP_SHARED)
665 	bsd_args.flags |= MAP_SHARED;
666     if (linux_args.flags & LINUX_MAP_PRIVATE)
667 	bsd_args.flags |= MAP_PRIVATE;
668     if (linux_args.flags & LINUX_MAP_FIXED)
669 	bsd_args.flags |= MAP_FIXED;
670     if (linux_args.flags & LINUX_MAP_ANON)
671 	bsd_args.flags |= MAP_ANON;
672     if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
673 	bsd_args.flags |= MAP_STACK;
674 
675 	/* The linux MAP_GROWSDOWN option does not limit auto
676 	 * growth of the region.  Linux mmap with this option
677 	 * takes as addr the inital BOS, and as len, the initial
678 	 * region size.  It can then grow down from addr without
679 	 * limit.  However, linux threads has an implicit internal
680 	 * limit to stack size of STACK_SIZE.  Its just not
681 	 * enforced explicitly in linux.  But, here we impose
682 	 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
683 	 * region, since we can do this with our mmap.
684 	 *
685 	 * Our mmap with MAP_STACK takes addr as the maximum
686 	 * downsize limit on BOS, and as len the max size of
687 	 * the region.  It them maps the top SGROWSIZ bytes,
688 	 * and autgrows the region down, up to the limit
689 	 * in addr.
690 	 *
691 	 * If we don't use the MAP_STACK option, the effect
692 	 * of this code is to allocate a stack region of a
693 	 * fixed size of (STACK_SIZE - GUARD_SIZE).
694 	 */
695 
696 	/* This gives us TOS */
697 	bsd_args.addr = linux_args.addr + linux_args.len;
698 
699 	/* This gives us our maximum stack size */
700 	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
701 	    bsd_args.len = linux_args.len;
702 	else
703 	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
704 
705 	/* This gives us a new BOS.  If we're using VM_STACK, then
706 	 * mmap will just map the top SGROWSIZ bytes, and let
707 	 * the stack grow down to the limit at BOS.  If we're
708 	 * not using VM_STACK we map the full stack, since we
709 	 * don't have a way to autogrow it.
710 	 */
711 	bsd_args.addr -= bsd_args.len;
712 
713     } else {
714 	bsd_args.addr = linux_args.addr;
715 	bsd_args.len  = linux_args.len;
716     }
717 
718     bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
719     bsd_args.fd = linux_args.fd;
720     bsd_args.pos = linux_args.pos;
721     bsd_args.pad = 0;
722     return mmap(p, &bsd_args);
723 }
724 
725 int
726 linux_mremap(struct proc *p, struct linux_mremap_args *args)
727 {
728 	struct munmap_args /* {
729 		void *addr;
730 		size_t len;
731 	} */ bsd_args;
732 	int error = 0;
733 
734 #ifdef DEBUG
735 	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
736 	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
737 	    args->flags);
738 #endif
739 	args->new_len = round_page(args->new_len);
740 	args->old_len = round_page(args->old_len);
741 
742 	if (args->new_len > args->old_len) {
743 		p->p_retval[0] = 0;
744 		return ENOMEM;
745 	}
746 
747 	if (args->new_len < args->old_len) {
748 		bsd_args.addr = args->addr + args->new_len;
749 		bsd_args.len = args->old_len - args->new_len;
750 		error = munmap(p, &bsd_args);
751 	}
752 
753 	p->p_retval[0] = error ? 0 : (int)args->addr;
754 	return error;
755 }
756 
757 int
758 linux_msync(struct proc *p, struct linux_msync_args *args)
759 {
760 	struct msync_args bsd_args;
761 
762 	bsd_args.addr = args->addr;
763 	bsd_args.len = args->len;
764 	bsd_args.flags = 0;	/* XXX ignore */
765 
766 	return msync(p, &bsd_args);
767 }
768 
769 int
770 linux_pipe(struct proc *p, struct linux_pipe_args *args)
771 {
772     int error;
773     int reg_edx;
774 
775 #ifdef DEBUG
776     printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
777 #endif
778     reg_edx = p->p_retval[1];
779     error = pipe(p, 0);
780     if (error) {
781 	p->p_retval[1] = reg_edx;
782 	return error;
783     }
784 
785     error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
786     if (error) {
787 	p->p_retval[1] = reg_edx;
788 	return error;
789     }
790 
791     p->p_retval[1] = reg_edx;
792     p->p_retval[0] = 0;
793     return 0;
794 }
795 
796 int
797 linux_time(struct proc *p, struct linux_time_args *args)
798 {
799     struct timeval tv;
800     linux_time_t tm;
801     int error;
802 
803 #ifdef DEBUG
804     printf("Linux-emul(%d): time(*)\n", p->p_pid);
805 #endif
806     microtime(&tv);
807     tm = tv.tv_sec;
808     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
809 	return error;
810     p->p_retval[0] = tm;
811     return 0;
812 }
813 
814 struct linux_times_argv {
815     long    tms_utime;
816     long    tms_stime;
817     long    tms_cutime;
818     long    tms_cstime;
819 };
820 
821 #define CLK_TCK 100	/* Linux uses 100 */
822 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
823 
824 int
825 linux_times(struct proc *p, struct linux_times_args *args)
826 {
827     struct timeval tv;
828     struct linux_times_argv tms;
829     struct rusage ru;
830     int error;
831 
832 #ifdef DEBUG
833     printf("Linux-emul(%d): times(*)\n", p->p_pid);
834 #endif
835     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
836 
837     tms.tms_utime = CONVTCK(ru.ru_utime);
838     tms.tms_stime = CONVTCK(ru.ru_stime);
839 
840     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
841     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
842 
843     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
844 	    	    sizeof(struct linux_times_argv))))
845 	return error;
846 
847     microuptime(&tv);
848     p->p_retval[0] = (int)CONVTCK(tv);
849     return 0;
850 }
851 
852 /* XXX move */
853 struct linux_newuname_t {
854     char sysname[65];
855     char nodename[65];
856     char release[65];
857     char version[65];
858     char machine[65];
859     char domainname[65];
860 };
861 
862 int
863 linux_newuname(struct proc *p, struct linux_newuname_args *args)
864 {
865     struct linux_newuname_t linux_newuname;
866 
867 #ifdef DEBUG
868     printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
869 #endif
870     bzero(&linux_newuname, sizeof(struct linux_newuname_t));
871     strncpy(linux_newuname.sysname, "Linux",
872 	sizeof(linux_newuname.sysname) - 1);
873     strncpy(linux_newuname.nodename, hostname,
874 	sizeof(linux_newuname.nodename) - 1);
875     strncpy(linux_newuname.release, "2.0.36",
876 	sizeof(linux_newuname.release) - 1);
877     strncpy(linux_newuname.version, version,
878 	sizeof(linux_newuname.version) - 1);
879     strncpy(linux_newuname.machine, machine,
880 	sizeof(linux_newuname.machine) - 1);
881     strncpy(linux_newuname.domainname, domainname,
882 	sizeof(linux_newuname.domainname) - 1);
883     return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
884 	    	    sizeof(struct linux_newuname_t)));
885 }
886 
887 struct linux_utimbuf {
888 	linux_time_t l_actime;
889 	linux_time_t l_modtime;
890 };
891 
892 int
893 linux_utime(struct proc *p, struct linux_utime_args *args)
894 {
895     struct utimes_args /* {
896 	char	*path;
897 	struct	timeval *tptr;
898     } */ bsdutimes;
899     struct timeval tv[2], *tvp;
900     struct linux_utimbuf lut;
901     int error;
902     caddr_t sg;
903 
904     sg = stackgap_init();
905     CHECKALTEXIST(p, &sg, args->fname);
906 
907 #ifdef DEBUG
908     printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
909 #endif
910     if (args->times) {
911 	if ((error = copyin(args->times, &lut, sizeof lut)))
912 	    return error;
913 	tv[0].tv_sec = lut.l_actime;
914 	tv[0].tv_usec = 0;
915 	tv[1].tv_sec = lut.l_modtime;
916 	tv[1].tv_usec = 0;
917 	/* so that utimes can copyin */
918 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
919 	if ((error = copyout(tv, tvp, sizeof(tv))))
920 	    return error;
921 	bsdutimes.tptr = tvp;
922     } else
923 	bsdutimes.tptr = NULL;
924 
925     bsdutimes.path = args->fname;
926     return utimes(p, &bsdutimes);
927 }
928 
929 #define __WCLONE 0x80000000
930 
931 int
932 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
933 {
934     struct wait_args /* {
935 	int pid;
936 	int *status;
937 	int options;
938 	struct	rusage *rusage;
939     } */ tmp;
940     int error, tmpstat;
941 
942 #ifdef DEBUG
943     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
944 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
945 #endif
946     tmp.pid = args->pid;
947     tmp.status = args->status;
948     tmp.options = (args->options & (WNOHANG | WUNTRACED));
949     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
950     if (args->options & __WCLONE)
951 	tmp.options |= WLINUXCLONE;
952     tmp.rusage = NULL;
953 
954     if ((error = wait4(p, &tmp)) != 0)
955 	return error;
956 
957     if (args->status) {
958 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
959 	    return error;
960 	if (WIFSIGNALED(tmpstat))
961 	    tmpstat = (tmpstat & 0xffffff80) |
962 		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
963 	else if (WIFSTOPPED(tmpstat))
964 	    tmpstat = (tmpstat & 0xffff00ff) |
965 		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
966 	return copyout(&tmpstat, args->status, sizeof(int));
967     } else
968 	return 0;
969 }
970 
971 int
972 linux_wait4(struct proc *p, struct linux_wait4_args *args)
973 {
974     struct wait_args /* {
975 	int pid;
976 	int *status;
977 	int options;
978 	struct	rusage *rusage;
979     } */ tmp;
980     int error, tmpstat;
981 
982 #ifdef DEBUG
983     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
984 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
985 	(void *)args->rusage);
986 #endif
987     tmp.pid = args->pid;
988     tmp.status = args->status;
989     tmp.options = (args->options & (WNOHANG | WUNTRACED));
990     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
991     if (args->options & __WCLONE)
992 	tmp.options |= WLINUXCLONE;
993     tmp.rusage = args->rusage;
994 
995     if ((error = wait4(p, &tmp)) != 0)
996 	return error;
997 
998     p->p_siglist &= ~sigmask(SIGCHLD);
999 
1000     if (args->status) {
1001 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1002 	    return error;
1003 	if (WIFSIGNALED(tmpstat))
1004 	    tmpstat = (tmpstat & 0xffffff80) |
1005 		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1006 	else if (WIFSTOPPED(tmpstat))
1007 	    tmpstat = (tmpstat & 0xffff00ff) |
1008 		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1009 	return copyout(&tmpstat, args->status, sizeof(int));
1010     } else
1011 	return 0;
1012 }
1013 
1014 int
1015 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1016 {
1017 	caddr_t sg;
1018 	struct mknod_args bsd_mknod;
1019 	struct mkfifo_args bsd_mkfifo;
1020 
1021 	sg = stackgap_init();
1022 
1023 	CHECKALTCREAT(p, &sg, args->path);
1024 
1025 #ifdef DEBUG
1026 	printf("Linux-emul(%d): mknod(%s, %d, %d)\n",
1027 	   p->p_pid, args->path, args->mode, args->dev);
1028 #endif
1029 
1030 	if (args->mode & S_IFIFO) {
1031 		bsd_mkfifo.path = args->path;
1032 		bsd_mkfifo.mode = args->mode;
1033 		return mkfifo(p, &bsd_mkfifo);
1034 	} else {
1035 		bsd_mknod.path = args->path;
1036 		bsd_mknod.mode = args->mode;
1037 		bsd_mknod.dev = args->dev;
1038 		return mknod(p, &bsd_mknod);
1039 	}
1040 }
1041 
1042 /*
1043  * UGH! This is just about the dumbest idea I've ever heard!!
1044  */
1045 int
1046 linux_personality(struct proc *p, struct linux_personality_args *args)
1047 {
1048 #ifdef DEBUG
1049 	printf("Linux-emul(%d): personality(%d)\n",
1050 	   p->p_pid, args->per);
1051 #endif
1052 	if (args->per != 0)
1053 		return EINVAL;
1054 
1055 	/* Yes Jim, it's still a Linux... */
1056 	p->p_retval[0] = 0;
1057 	return 0;
1058 }
1059 
1060 /*
1061  * Wrappers for get/setitimer for debugging..
1062  */
1063 int
1064 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1065 {
1066 	struct setitimer_args bsa;
1067 	struct itimerval foo;
1068 	int error;
1069 
1070 #ifdef DEBUG
1071 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1072 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1073 #endif
1074 	bsa.which = args->which;
1075 	bsa.itv = args->itv;
1076 	bsa.oitv = args->oitv;
1077 	if (args->itv) {
1078 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1079 			sizeof(foo))))
1080 		return error;
1081 #ifdef DEBUG
1082 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1083 		foo.it_value.tv_sec, foo.it_value.tv_usec);
1084 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1085 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1086 #endif
1087 	}
1088 	return setitimer(p, &bsa);
1089 }
1090 
1091 int
1092 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1093 {
1094 	struct getitimer_args bsa;
1095 #ifdef DEBUG
1096 	printf("Linux-emul(%ld): getitimer(%p)\n",
1097 	    (long)p->p_pid, (void *)args->itv);
1098 #endif
1099 	bsa.which = args->which;
1100 	bsa.itv = args->itv;
1101 	return getitimer(p, &bsa);
1102 }
1103 
1104 int
1105 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1106 {
1107 	int error;
1108 
1109 	error = suser(p);
1110 	if (error != 0)
1111 		return error;
1112 	if (securelevel > 0)
1113 		return EPERM;
1114 	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1115 	return 0;
1116 }
1117 
1118 int
1119 linux_nice(struct proc *p, struct linux_nice_args *args)
1120 {
1121 	struct setpriority_args	bsd_args;
1122 
1123 	bsd_args.which = PRIO_PROCESS;
1124 	bsd_args.who = 0;	/* current process */
1125 	bsd_args.prio = args->inc;
1126 	return setpriority(p, &bsd_args);
1127 }
1128 
1129 int
1130 linux_setgroups(p, uap)
1131      struct proc *p;
1132      struct linux_setgroups_args *uap;
1133 {
1134   struct pcred *pc = p->p_cred;
1135   linux_gid_t linux_gidset[NGROUPS];
1136   gid_t *bsd_gidset;
1137   int ngrp, error;
1138 
1139   if ((error = suser(p)))
1140     return error;
1141 
1142   if (uap->gidsetsize > NGROUPS)
1143     return EINVAL;
1144 
1145   ngrp = uap->gidsetsize;
1146   pc->pc_ucred = crcopy(pc->pc_ucred);
1147   if (ngrp >= 1) {
1148     if ((error = copyin((caddr_t)uap->gidset,
1149                       (caddr_t)linux_gidset,
1150                         ngrp * sizeof(linux_gid_t))))
1151       return error;
1152 
1153     pc->pc_ucred->cr_ngroups = ngrp;
1154 
1155     bsd_gidset = pc->pc_ucred->cr_groups;
1156     ngrp--;
1157     while (ngrp >= 0) {
1158       bsd_gidset[ngrp] = linux_gidset[ngrp];
1159       ngrp--;
1160     }
1161   }
1162   else
1163     pc->pc_ucred->cr_ngroups = 1;
1164 
1165   setsugid(p);
1166   return 0;
1167 }
1168 
1169 int
1170 linux_getgroups(p, uap)
1171      struct proc *p;
1172      struct linux_getgroups_args *uap;
1173 {
1174   struct pcred *pc = p->p_cred;
1175   linux_gid_t linux_gidset[NGROUPS];
1176   gid_t *bsd_gidset;
1177   int ngrp, error;
1178 
1179   if ((ngrp = uap->gidsetsize) == 0) {
1180     p->p_retval[0] = pc->pc_ucred->cr_ngroups;
1181     return 0;
1182   }
1183 
1184   if (ngrp < pc->pc_ucred->cr_ngroups)
1185     return EINVAL;
1186 
1187   ngrp = 0;
1188   bsd_gidset = pc->pc_ucred->cr_groups;
1189   while (ngrp < pc->pc_ucred->cr_ngroups) {
1190     linux_gidset[ngrp] = bsd_gidset[ngrp];
1191     ngrp++;
1192   }
1193 
1194   if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1195                        ngrp * sizeof(linux_gid_t))))
1196     return error;
1197 
1198   p->p_retval[0] = ngrp;
1199   return (0);
1200 }
1201