xref: /freebsd/sys/compat/linux/linux_misc.c (revision 77a0943ded95b9e6438f7db70c4a28e4d93946d4)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/limits.h>
60 #include <machine/psl.h>
61 #include <machine/sysarch.h>
62 #ifdef __i386__
63 #include <machine/segments.h>
64 #endif
65 
66 #include <posix4/sched.h>
67 
68 #include <machine/../linux/linux.h>
69 #ifdef __alpha__
70 #include <linux_proto.h>
71 #else
72 #include <machine/../linux/linux_proto.h>
73 #endif
74 #include <compat/linux/linux_mib.h>
75 #include <compat/linux/linux_util.h>
76 
77 #ifdef __alpha__
78 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
79 #else
80 #define BSD_TO_LINUX_SIGNAL(sig)	\
81 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
82 #endif
83 
84 struct linux_rlimit {
85 	unsigned long rlim_cur;
86 	unsigned long rlim_max;
87 };
88 
89 #ifndef __alpha__
90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
91 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
92   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
93   RLIMIT_MEMLOCK, -1
94 };
95 #endif /*!__alpha__*/
96 
97 #ifndef __alpha__
98 int
99 linux_alarm(struct proc *p, struct linux_alarm_args *args)
100 {
101     struct itimerval it, old_it;
102     struct timeval tv;
103     int s;
104 
105 #ifdef DEBUG
106     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
107 #endif
108     if (args->secs > 100000000)
109 	return EINVAL;
110     it.it_value.tv_sec = (long)args->secs;
111     it.it_value.tv_usec = 0;
112     it.it_interval.tv_sec = 0;
113     it.it_interval.tv_usec = 0;
114     s = splsoftclock();
115     old_it = p->p_realtimer;
116     getmicrouptime(&tv);
117     if (timevalisset(&old_it.it_value))
118 	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
119     if (it.it_value.tv_sec != 0) {
120 	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
121 	timevaladd(&it.it_value, &tv);
122     }
123     p->p_realtimer = it;
124     splx(s);
125     if (timevalcmp(&old_it.it_value, &tv, >)) {
126 	timevalsub(&old_it.it_value, &tv);
127 	if (old_it.it_value.tv_usec != 0)
128 	    old_it.it_value.tv_sec++;
129 	p->p_retval[0] = old_it.it_value.tv_sec;
130     }
131     return 0;
132 }
133 #endif /*!__alpha__*/
134 
135 int
136 linux_brk(struct proc *p, struct linux_brk_args *args)
137 {
138 #if 0
139     struct vmspace *vm = p->p_vmspace;
140     vm_offset_t new, old;
141     int error;
142 
143     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
144 	return EINVAL;
145     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
146 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
147 	return ENOMEM;
148 
149     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
150     new = round_page((vm_offset_t)args->dsend);
151     p->p_retval[0] = old;
152     if ((new-old) > 0) {
153 	if (swap_pager_full)
154 	    return ENOMEM;
155 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
156 			VM_PROT_ALL, VM_PROT_ALL, 0);
157 	if (error)
158 	    return error;
159 	vm->vm_dsize += btoc((new-old));
160 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
161     }
162     return 0;
163 #else
164     struct vmspace *vm = p->p_vmspace;
165     vm_offset_t new, old;
166     struct obreak_args /* {
167 	char * nsize;
168     } */ tmp;
169 
170 #ifdef DEBUG
171     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
172 #endif
173     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
174     new = (vm_offset_t)args->dsend;
175     tmp.nsize = (char *) new;
176     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
177 	p->p_retval[0] = (long)new;
178     else
179 	p->p_retval[0] = (long)old;
180 
181     return 0;
182 #endif
183 }
184 
185 int
186 linux_uselib(struct proc *p, struct linux_uselib_args *args)
187 {
188     struct nameidata ni;
189     struct vnode *vp;
190     struct exec *a_out;
191     struct vattr attr;
192     vm_offset_t vmaddr;
193     unsigned long file_offset;
194     vm_offset_t buffer;
195     unsigned long bss_size;
196     int error;
197     caddr_t sg;
198     int locked;
199 
200     sg = stackgap_init();
201     CHECKALTEXIST(p, &sg, args->library);
202 
203 #ifdef DEBUG
204     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
205 #endif
206 
207     a_out = NULL;
208     locked = 0;
209     vp = NULL;
210 
211     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
212     error = namei(&ni);
213     if (error)
214 	goto cleanup;
215 
216     vp = ni.ni_vp;
217     /*
218      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
219      * without returning a vnode.
220      */
221     if (vp == NULL) {
222 	error = ENOEXEC;	/* ?? */
223 	goto cleanup;
224     }
225     NDFREE(&ni, NDF_ONLY_PNBUF);
226 
227     /*
228      * From here on down, we have a locked vnode that must be unlocked.
229      */
230     locked++;
231 
232     /*
233      * Writable?
234      */
235     if (vp->v_writecount) {
236 	error = ETXTBSY;
237 	goto cleanup;
238     }
239 
240     /*
241      * Executable?
242      */
243     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
244     if (error)
245 	goto cleanup;
246 
247     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
248 	((attr.va_mode & 0111) == 0) ||
249 	(attr.va_type != VREG)) {
250 	    error = ENOEXEC;
251 	    goto cleanup;
252     }
253 
254     /*
255      * Sensible size?
256      */
257     if (attr.va_size == 0) {
258 	error = ENOEXEC;
259 	goto cleanup;
260     }
261 
262     /*
263      * Can we access it?
264      */
265     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
266     if (error)
267 	goto cleanup;
268 
269     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
270     if (error)
271 	goto cleanup;
272 
273     /*
274      * Lock no longer needed
275      */
276     VOP_UNLOCK(vp, 0, p);
277     locked = 0;
278 
279     /*
280      * Pull in executable header into kernel_map
281      */
282     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
283 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
284     if (error)
285 	goto cleanup;
286 
287     /*
288      * Is it a Linux binary ?
289      */
290     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
291 	error = ENOEXEC;
292 	goto cleanup;
293     }
294 
295     /* While we are here, we should REALLY do some more checks */
296 
297     /*
298      * Set file/virtual offset based on a.out variant.
299      */
300     switch ((int)(a_out->a_magic & 0xffff)) {
301     case 0413:	/* ZMAGIC */
302 	file_offset = 1024;
303 	break;
304     case 0314:	/* QMAGIC */
305 	file_offset = 0;
306 	break;
307     default:
308 	error = ENOEXEC;
309 	goto cleanup;
310     }
311 
312     bss_size = round_page(a_out->a_bss);
313 
314     /*
315      * Check various fields in header for validity/bounds.
316      */
317     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
318 	error = ENOEXEC;
319 	goto cleanup;
320     }
321 
322     /* text + data can't exceed file size */
323     if (a_out->a_data + a_out->a_text > attr.va_size) {
324 	error = EFAULT;
325 	goto cleanup;
326     }
327 
328     /*
329      * text/data/bss must not exceed limits
330      * XXX: this is not complete. it should check current usage PLUS
331      * the resources needed by this library.
332      */
333     if (a_out->a_text > MAXTSIZ ||
334 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
335 	error = ENOMEM;
336 	goto cleanup;
337     }
338 
339     /*
340      * prevent more writers
341      */
342     vp->v_flag |= VTEXT;
343 
344     /*
345      * Check if file_offset page aligned,.
346      * Currently we cannot handle misalinged file offsets,
347      * and so we read in the entire image (what a waste).
348      */
349     if (file_offset & PAGE_MASK) {
350 #ifdef DEBUG
351 printf("uselib: Non page aligned binary %lu\n", file_offset);
352 #endif
353 	/*
354 	 * Map text+data read/write/execute
355 	 */
356 
357 	/* a_entry is the load address and is page aligned */
358 	vmaddr = trunc_page(a_out->a_entry);
359 
360 	/* get anon user mapping, read+write+execute */
361 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
362 		    	    a_out->a_text + a_out->a_data, FALSE,
363 			    VM_PROT_ALL, VM_PROT_ALL, 0);
364 	if (error)
365 	    goto cleanup;
366 
367 	/* map file into kernel_map */
368 	error = vm_mmap(kernel_map, &buffer,
369 			round_page(a_out->a_text + a_out->a_data + file_offset),
370 		   	VM_PROT_READ, VM_PROT_READ, 0,
371 			(caddr_t)vp, trunc_page(file_offset));
372 	if (error)
373 	    goto cleanup;
374 
375 	/* copy from kernel VM space to user space */
376 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
377 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
378 
379 	/* release temporary kernel space */
380 	vm_map_remove(kernel_map, buffer,
381 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
382 
383 	if (error)
384 	    goto cleanup;
385     }
386     else {
387 #ifdef DEBUG
388 printf("uselib: Page aligned binary %lu\n", file_offset);
389 #endif
390 	/*
391 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
392 	 * to skip the executable header
393 	 */
394 	vmaddr = trunc_page(a_out->a_entry);
395 
396 	/*
397 	 * Map it all into the process's space as a single copy-on-write
398 	 * "data" segment.
399 	 */
400 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
401 		   	a_out->a_text + a_out->a_data,
402 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
403 			(caddr_t)vp, file_offset);
404 	if (error)
405 	    goto cleanup;
406     }
407 #ifdef DEBUG
408 printf("mem=%08lx = %08lx %08lx\n", vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
409 #endif
410     if (bss_size != 0) {
411         /*
412 	 * Calculate BSS start address
413 	 */
414 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
415 
416 	/*
417 	 * allocate some 'anon' space
418 	 */
419 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
420 			    bss_size, FALSE,
421 			    VM_PROT_ALL, VM_PROT_ALL, 0);
422 	if (error)
423 	    goto cleanup;
424     }
425 
426 cleanup:
427     /*
428      * Unlock vnode if needed
429      */
430     if (locked)
431 	VOP_UNLOCK(vp, 0, p);
432 
433     /*
434      * Release the kernel mapping.
435      */
436     if (a_out)
437 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
438 
439     return error;
440 }
441 
442 int
443 linux_newselect(struct proc *p, struct linux_newselect_args *args)
444 {
445     struct select_args bsa;
446     struct timeval tv0, tv1, utv, *tvp;
447     caddr_t sg;
448     int error;
449 
450 #ifdef DEBUG
451     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
452   	(long)p->p_pid, args->nfds, (void *)args->readfds,
453 	(void *)args->writefds, (void *)args->exceptfds,
454 	(void *)args->timeout);
455 #endif
456     error = 0;
457     bsa.nd = args->nfds;
458     bsa.in = args->readfds;
459     bsa.ou = args->writefds;
460     bsa.ex = args->exceptfds;
461     bsa.tv = args->timeout;
462 
463     /*
464      * Store current time for computation of the amount of
465      * time left.
466      */
467     if (args->timeout) {
468 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
469 	    goto select_out;
470 #ifdef DEBUG
471 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
472 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
473 #endif
474 	if (itimerfix(&utv)) {
475 	    /*
476 	     * The timeval was invalid.  Convert it to something
477 	     * valid that will act as it does under Linux.
478 	     */
479 	    sg = stackgap_init();
480 	    tvp = stackgap_alloc(&sg, sizeof(utv));
481 	    utv.tv_sec += utv.tv_usec / 1000000;
482 	    utv.tv_usec %= 1000000;
483 	    if (utv.tv_usec < 0) {
484 		utv.tv_sec -= 1;
485 		utv.tv_usec += 1000000;
486 	    }
487 	    if (utv.tv_sec < 0)
488 		timevalclear(&utv);
489 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
490 		goto select_out;
491 	    bsa.tv = tvp;
492 	}
493 	microtime(&tv0);
494     }
495 
496     error = select(p, &bsa);
497 #ifdef DEBUG
498     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
499 #endif
500 
501     if (error) {
502 	/*
503 	 * See fs/select.c in the Linux kernel.  Without this,
504 	 * Maelstrom doesn't work.
505 	 */
506 	if (error == ERESTART)
507 	    error = EINTR;
508 	goto select_out;
509     }
510 
511     if (args->timeout) {
512 	if (p->p_retval[0]) {
513 	    /*
514 	     * Compute how much time was left of the timeout,
515 	     * by subtracting the current time and the time
516 	     * before we started the call, and subtracting
517 	     * that result from the user-supplied value.
518 	     */
519 	    microtime(&tv1);
520 	    timevalsub(&tv1, &tv0);
521 	    timevalsub(&utv, &tv1);
522 	    if (utv.tv_sec < 0)
523 		timevalclear(&utv);
524 	} else
525 	    timevalclear(&utv);
526 #ifdef DEBUG
527 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
528 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
529 #endif
530 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
531 	    goto select_out;
532     }
533 
534 select_out:
535 #ifdef DEBUG
536     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
537 #endif
538     return error;
539 }
540 
541 int
542 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
543 {
544     struct proc *curp;
545 
546 #ifdef DEBUG
547     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
548 #endif
549     if (args->pid != p->p_pid) {
550 	if (!(curp = pfind(args->pid)))
551 	    return ESRCH;
552     }
553     else
554 	curp = p;
555     p->p_retval[0] = curp->p_pgid;
556     return 0;
557 }
558 
559 int
560 linux_mremap(struct proc *p, struct linux_mremap_args *args)
561 {
562 	struct munmap_args /* {
563 		void *addr;
564 		size_t len;
565 	} */ bsd_args;
566 	int error = 0;
567 
568 #ifdef DEBUG
569 	printf("Linux-emul(%ld): mremap(%p, %08lx, %08lx, %08lx)\n",
570 	    (long)p->p_pid, (void *)args->addr,
571 	    (unsigned long)args->old_len,
572 	    (unsigned long)args->new_len,
573 	    (unsigned long)args->flags);
574 #endif
575 	args->new_len = round_page(args->new_len);
576 	args->old_len = round_page(args->old_len);
577 
578 	if (args->new_len > args->old_len) {
579 		p->p_retval[0] = 0;
580 		return ENOMEM;
581 	}
582 
583 	if (args->new_len < args->old_len) {
584 		bsd_args.addr = args->addr + args->new_len;
585 		bsd_args.len = args->old_len - args->new_len;
586 		error = munmap(p, &bsd_args);
587 	}
588 
589 	p->p_retval[0] = error ? 0 : (u_long)args->addr;
590 	return error;
591 }
592 
593 int
594 linux_msync(struct proc *p, struct linux_msync_args *args)
595 {
596 	struct msync_args bsd_args;
597 
598 	bsd_args.addr = args->addr;
599 	bsd_args.len = args->len;
600 	bsd_args.flags = 0;	/* XXX ignore */
601 
602 	return msync(p, &bsd_args);
603 }
604 
605 #ifndef __alpha__
606 int
607 linux_time(struct proc *p, struct linux_time_args *args)
608 {
609     struct timeval tv;
610     linux_time_t tm;
611     int error;
612 
613 #ifdef DEBUG
614     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
615 #endif
616     microtime(&tv);
617     tm = tv.tv_sec;
618     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
619 	return error;
620     p->p_retval[0] = tm;
621     return 0;
622 }
623 #endif	/*!__alpha__*/
624 
625 struct linux_times_argv {
626     long    tms_utime;
627     long    tms_stime;
628     long    tms_cutime;
629     long    tms_cstime;
630 };
631 
632 #define CLK_TCK 100	/* Linux uses 100 */
633 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
634 
635 int
636 linux_times(struct proc *p, struct linux_times_args *args)
637 {
638     struct timeval tv;
639     struct linux_times_argv tms;
640     struct rusage ru;
641     int error;
642 
643 #ifdef DEBUG
644     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
645 #endif
646     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
647 
648     tms.tms_utime = CONVTCK(ru.ru_utime);
649     tms.tms_stime = CONVTCK(ru.ru_stime);
650 
651     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
652     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
653 
654     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
655 	    	    sizeof(struct linux_times_argv))))
656 	return error;
657 
658     microuptime(&tv);
659     p->p_retval[0] = (int)CONVTCK(tv);
660     return 0;
661 }
662 
663 int
664 linux_newuname(struct proc *p, struct linux_newuname_args *args)
665 {
666 	struct linux_new_utsname utsname;
667 	char *osrelease, *osname;
668 
669 #ifdef DEBUG
670 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
671 #endif
672 
673 	osname = linux_get_osname(p);
674 	osrelease = linux_get_osrelease(p);
675 
676 	bzero(&utsname, sizeof(struct linux_new_utsname));
677 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
678 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
679 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
680 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
681 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
682 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
683 
684 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
685 			sizeof(struct linux_new_utsname)));
686 }
687 
688 struct linux_utimbuf {
689 	linux_time_t l_actime;
690 	linux_time_t l_modtime;
691 };
692 
693 int
694 linux_utime(struct proc *p, struct linux_utime_args *args)
695 {
696     struct utimes_args /* {
697 	char	*path;
698 	struct	timeval *tptr;
699     } */ bsdutimes;
700     struct timeval tv[2], *tvp;
701     struct linux_utimbuf lut;
702     int error;
703     caddr_t sg;
704 
705     sg = stackgap_init();
706     CHECKALTEXIST(p, &sg, args->fname);
707 
708 #ifdef DEBUG
709     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
710 #endif
711     if (args->times) {
712 	if ((error = copyin(args->times, &lut, sizeof lut)))
713 	    return error;
714 	tv[0].tv_sec = lut.l_actime;
715 	tv[0].tv_usec = 0;
716 	tv[1].tv_sec = lut.l_modtime;
717 	tv[1].tv_usec = 0;
718 	/* so that utimes can copyin */
719 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
720 	if (tvp == NULL)
721 		return (ENAMETOOLONG);
722 	if ((error = copyout(tv, tvp, sizeof(tv))))
723 	    return error;
724 	bsdutimes.tptr = tvp;
725     } else
726 	bsdutimes.tptr = NULL;
727 
728     bsdutimes.path = args->fname;
729     return utimes(p, &bsdutimes);
730 }
731 
732 #define __WCLONE 0x80000000
733 
734 #ifndef __alpha__
735 int
736 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
737 {
738     struct wait_args /* {
739 	int pid;
740 	int *status;
741 	int options;
742 	struct	rusage *rusage;
743     } */ tmp;
744     int error, tmpstat;
745 
746 #ifdef DEBUG
747     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
748 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
749 #endif
750     tmp.pid = args->pid;
751     tmp.status = args->status;
752     tmp.options = (args->options & (WNOHANG | WUNTRACED));
753     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
754     if (args->options & __WCLONE)
755 	tmp.options |= WLINUXCLONE;
756     tmp.rusage = NULL;
757 
758     if ((error = wait4(p, &tmp)) != 0)
759 	return error;
760 
761     if (args->status) {
762 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
763 	    return error;
764 	tmpstat &= 0xffff;
765 	if (WIFSIGNALED(tmpstat))
766 	    tmpstat = (tmpstat & 0xffffff80) |
767 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
768 	else if (WIFSTOPPED(tmpstat))
769 	    tmpstat = (tmpstat & 0xffff00ff) |
770 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
771 	return copyout(&tmpstat, args->status, sizeof(int));
772     } else
773 	return 0;
774 }
775 #endif	/*!__alpha__*/
776 
777 int
778 linux_wait4(struct proc *p, struct linux_wait4_args *args)
779 {
780     struct wait_args /* {
781 	int pid;
782 	int *status;
783 	int options;
784 	struct	rusage *rusage;
785     } */ tmp;
786     int error, tmpstat;
787 
788 #ifdef DEBUG
789     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
790 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
791 	(void *)args->rusage);
792 #endif
793     tmp.pid = args->pid;
794     tmp.status = args->status;
795     tmp.options = (args->options & (WNOHANG | WUNTRACED));
796     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
797     if (args->options & __WCLONE)
798 	tmp.options |= WLINUXCLONE;
799     tmp.rusage = args->rusage;
800 
801     if ((error = wait4(p, &tmp)) != 0)
802 	return error;
803 
804     SIGDELSET(p->p_siglist, SIGCHLD);
805 
806     if (args->status) {
807 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
808 	    return error;
809 	tmpstat &= 0xffff;
810 	if (WIFSIGNALED(tmpstat))
811 	    tmpstat = (tmpstat & 0xffffff80) |
812 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
813 	else if (WIFSTOPPED(tmpstat))
814 	    tmpstat = (tmpstat & 0xffff00ff) |
815 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
816 	return copyout(&tmpstat, args->status, sizeof(int));
817     } else
818 	return 0;
819 }
820 
821 int
822 linux_mknod(struct proc *p, struct linux_mknod_args *args)
823 {
824 	caddr_t sg;
825 	struct mknod_args bsd_mknod;
826 	struct mkfifo_args bsd_mkfifo;
827 
828 	sg = stackgap_init();
829 
830 	CHECKALTCREAT(p, &sg, args->path);
831 
832 #ifdef DEBUG
833 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
834 	   (long)p->p_pid, args->path, args->mode, args->dev);
835 #endif
836 
837 	if (args->mode & S_IFIFO) {
838 		bsd_mkfifo.path = args->path;
839 		bsd_mkfifo.mode = args->mode;
840 		return mkfifo(p, &bsd_mkfifo);
841 	} else {
842 		bsd_mknod.path = args->path;
843 		bsd_mknod.mode = args->mode;
844 		bsd_mknod.dev = args->dev;
845 		return mknod(p, &bsd_mknod);
846 	}
847 }
848 
849 /*
850  * UGH! This is just about the dumbest idea I've ever heard!!
851  */
852 int
853 linux_personality(struct proc *p, struct linux_personality_args *args)
854 {
855 #ifdef DEBUG
856 	printf("Linux-emul(%ld): personality(%d)\n",
857 	   (long)p->p_pid, args->per);
858 #endif
859 #ifndef __alpha__
860 	if (args->per != 0)
861 		return EINVAL;
862 #endif
863 
864 	/* Yes Jim, it's still a Linux... */
865 	p->p_retval[0] = 0;
866 	return 0;
867 }
868 
869 /*
870  * Wrappers for get/setitimer for debugging..
871  */
872 int
873 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
874 {
875 	struct setitimer_args bsa;
876 	struct itimerval foo;
877 	int error;
878 
879 #ifdef DEBUG
880 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
881 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
882 #endif
883 	bsa.which = args->which;
884 	bsa.itv = args->itv;
885 	bsa.oitv = args->oitv;
886 	if (args->itv) {
887 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
888 			sizeof(foo))))
889 		return error;
890 #ifdef DEBUG
891 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
892 		foo.it_value.tv_sec, foo.it_value.tv_usec);
893 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
894 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
895 #endif
896 	}
897 	return setitimer(p, &bsa);
898 }
899 
900 int
901 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
902 {
903 	struct getitimer_args bsa;
904 #ifdef DEBUG
905 	printf("Linux-emul(%ld): getitimer(%p)\n",
906 	    (long)p->p_pid, (void *)args->itv);
907 #endif
908 	bsa.which = args->which;
909 	bsa.itv = args->itv;
910 	return getitimer(p, &bsa);
911 }
912 
913 #ifndef __alpha__
914 int
915 linux_nice(struct proc *p, struct linux_nice_args *args)
916 {
917 	struct setpriority_args	bsd_args;
918 
919 	bsd_args.which = PRIO_PROCESS;
920 	bsd_args.who = 0;	/* current process */
921 	bsd_args.prio = args->inc;
922 	return setpriority(p, &bsd_args);
923 }
924 #endif	/*!__alpha__*/
925 
926 int
927 linux_setgroups(p, uap)
928 	struct proc *p;
929 	struct linux_setgroups_args *uap;
930 {
931 	struct pcred *pc;
932 	linux_gid_t linux_gidset[NGROUPS];
933 	gid_t *bsd_gidset;
934 	int ngrp, error;
935 
936 	pc = p->p_cred;
937 	ngrp = uap->gidsetsize;
938 
939 	/*
940 	 * cr_groups[0] holds egid. Setting the whole set from
941 	 * the supplied set will cause egid to be changed too.
942 	 * Keep cr_groups[0] unchanged to prevent that.
943 	 */
944 
945 	if ((error = suser(p)) != 0)
946 		return (error);
947 
948 	if (ngrp >= NGROUPS)
949 		return (EINVAL);
950 
951 	pc->pc_ucred = crcopy(pc->pc_ucred);
952 	if (ngrp > 0) {
953 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
954 			       ngrp * sizeof(linux_gid_t));
955 		if (error)
956 			return (error);
957 
958 		pc->pc_ucred->cr_ngroups = ngrp + 1;
959 
960 		bsd_gidset = pc->pc_ucred->cr_groups;
961 		ngrp--;
962 		while (ngrp >= 0) {
963 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
964 			ngrp--;
965 		}
966 	}
967 	else
968 		pc->pc_ucred->cr_ngroups = 1;
969 
970 	setsugid(p);
971 	return (0);
972 }
973 
974 int
975 linux_getgroups(p, uap)
976 	struct proc *p;
977 	struct linux_getgroups_args *uap;
978 {
979 	struct pcred *pc;
980 	linux_gid_t linux_gidset[NGROUPS];
981 	gid_t *bsd_gidset;
982 	int bsd_gidsetsz, ngrp, error;
983 
984 	pc = p->p_cred;
985 	bsd_gidset = pc->pc_ucred->cr_groups;
986 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
987 
988 	/*
989 	 * cr_groups[0] holds egid. Returning the whole set
990 	 * here will cause a duplicate. Exclude cr_groups[0]
991 	 * to prevent that.
992 	 */
993 
994 	if ((ngrp = uap->gidsetsize) == 0) {
995 		p->p_retval[0] = bsd_gidsetsz;
996 		return (0);
997 	}
998 
999 	if (ngrp < bsd_gidsetsz)
1000 		return (EINVAL);
1001 
1002 	ngrp = 0;
1003 	while (ngrp < bsd_gidsetsz) {
1004 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1005 		ngrp++;
1006 	}
1007 
1008 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1009 	    ngrp * sizeof(linux_gid_t))))
1010 		return (error);
1011 
1012 	p->p_retval[0] = ngrp;
1013 	return (0);
1014 }
1015 
1016 #ifndef __alpha__
1017 int
1018 linux_setrlimit(p, uap)
1019 	struct proc *p;
1020 	struct linux_setrlimit_args *uap;
1021 {
1022 	struct __setrlimit_args bsd;
1023 	struct linux_rlimit rlim;
1024 	int error;
1025 	caddr_t sg = stackgap_init();
1026 
1027 #ifdef DEBUG
1028 	printf("Linux-emul(%ld): setrlimit(%d, %p)\n", (long)p->p_pid,
1029 	    uap->resource, (void *)uap->rlim);
1030 #endif
1031 
1032 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1033 		return (EINVAL);
1034 
1035 	bsd.which = linux_to_bsd_resource[uap->resource];
1036 	if (bsd.which == -1)
1037 		return (EINVAL);
1038 
1039 	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1040 	if (error)
1041 		return (error);
1042 
1043 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1044 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1045 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1046 	return (setrlimit(p, &bsd));
1047 }
1048 
1049 int
1050 linux_getrlimit(p, uap)
1051 	struct proc *p;
1052 	struct linux_getrlimit_args *uap;
1053 {
1054 	struct __getrlimit_args bsd;
1055 	struct linux_rlimit rlim;
1056 	int error;
1057 	caddr_t sg = stackgap_init();
1058 
1059 #ifdef DEBUG
1060 	printf("Linux-emul(%ld): getrlimit(%d, %p)\n", (long)p->p_pid,
1061 	    uap->resource, (void *)uap->rlim);
1062 #endif
1063 
1064 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1065 		return (EINVAL);
1066 
1067 	bsd.which = linux_to_bsd_resource[uap->resource];
1068 	if (bsd.which == -1)
1069 		return (EINVAL);
1070 
1071 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1072 	error = getrlimit(p, &bsd);
1073 	if (error)
1074 		return (error);
1075 
1076 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1077 	if (rlim.rlim_cur == ULONG_MAX)
1078 		rlim.rlim_cur = LONG_MAX;
1079 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1080 	if (rlim.rlim_max == ULONG_MAX)
1081 		rlim.rlim_max = LONG_MAX;
1082 	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1083 }
1084 #endif /*!__alpha__*/
1085 
1086 int
1087 linux_sched_setscheduler(p, uap)
1088 	struct proc *p;
1089 	struct linux_sched_setscheduler_args *uap;
1090 {
1091 	struct sched_setscheduler_args bsd;
1092 
1093 #ifdef DEBUG
1094 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1095 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1096 #endif
1097 
1098 	switch (uap->policy) {
1099 	case LINUX_SCHED_OTHER:
1100 		bsd.policy = SCHED_OTHER;
1101 		break;
1102 	case LINUX_SCHED_FIFO:
1103 		bsd.policy = SCHED_FIFO;
1104 		break;
1105 	case LINUX_SCHED_RR:
1106 		bsd.policy = SCHED_RR;
1107 		break;
1108 	default:
1109 		return EINVAL;
1110 	}
1111 
1112 	bsd.pid = uap->pid;
1113 	bsd.param = uap->param;
1114 	return sched_setscheduler(p, &bsd);
1115 }
1116 
1117 int
1118 linux_sched_getscheduler(p, uap)
1119 	struct proc *p;
1120 	struct linux_sched_getscheduler_args *uap;
1121 {
1122 	struct sched_getscheduler_args bsd;
1123 	int error;
1124 
1125 #ifdef DEBUG
1126 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1127 	       (long)p->p_pid, uap->pid);
1128 #endif
1129 
1130 	bsd.pid = uap->pid;
1131 	error = sched_getscheduler(p, &bsd);
1132 
1133 	switch (p->p_retval[0]) {
1134 	case SCHED_OTHER:
1135 		p->p_retval[0] = LINUX_SCHED_OTHER;
1136 		break;
1137 	case SCHED_FIFO:
1138 		p->p_retval[0] = LINUX_SCHED_FIFO;
1139 		break;
1140 	case SCHED_RR:
1141 		p->p_retval[0] = LINUX_SCHED_RR;
1142 		break;
1143 	}
1144 
1145 	return error;
1146 }
1147