xref: /freebsd/sys/compat/linux/linux_misc.c (revision ba4c5e6950b14ce5eb69bc5d953574f203e01dc4)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 #include <sys/signalvar.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 
58 #include <machine/frame.h>
59 #include <machine/limits.h>
60 #include <machine/psl.h>
61 #include <machine/sysarch.h>
62 #ifdef __i386__
63 #include <machine/segments.h>
64 #endif
65 
66 #include <posix4/sched.h>
67 
68 #include <machine/../linux/linux.h>
69 #include <machine/../linux/linux_proto.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_util.h>
72 
73 #ifdef __alpha__
74 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
75 #else
76 #define BSD_TO_LINUX_SIGNAL(sig)	\
77 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
78 #endif
79 
80 struct linux_rlimit {
81 	unsigned long rlim_cur;
82 	unsigned long rlim_max;
83 };
84 
85 #ifndef __alpha__
86 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
87 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
88   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
89   RLIMIT_MEMLOCK, -1
90 };
91 #endif /*!__alpha__*/
92 
93 #ifndef __alpha__
94 int
95 linux_alarm(struct proc *p, struct linux_alarm_args *args)
96 {
97     struct itimerval it, old_it;
98     struct timeval tv;
99     int s;
100 
101 #ifdef DEBUG
102     printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
103 #endif
104     if (args->secs > 100000000)
105 	return EINVAL;
106     it.it_value.tv_sec = (long)args->secs;
107     it.it_value.tv_usec = 0;
108     it.it_interval.tv_sec = 0;
109     it.it_interval.tv_usec = 0;
110     s = splsoftclock();
111     old_it = p->p_realtimer;
112     getmicrouptime(&tv);
113     if (timevalisset(&old_it.it_value))
114 	callout_stop(&p->p_itcallout);
115     if (it.it_value.tv_sec != 0) {
116 	callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p);
117 	timevaladd(&it.it_value, &tv);
118     }
119     p->p_realtimer = it;
120     splx(s);
121     if (timevalcmp(&old_it.it_value, &tv, >)) {
122 	timevalsub(&old_it.it_value, &tv);
123 	if (old_it.it_value.tv_usec != 0)
124 	    old_it.it_value.tv_sec++;
125 	p->p_retval[0] = old_it.it_value.tv_sec;
126     }
127     return 0;
128 }
129 #endif /*!__alpha__*/
130 
131 int
132 linux_brk(struct proc *p, struct linux_brk_args *args)
133 {
134 #if 0
135     struct vmspace *vm = p->p_vmspace;
136     vm_offset_t new, old;
137     int error;
138 
139     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
140 	return EINVAL;
141     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
142 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
143 	return ENOMEM;
144 
145     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
146     new = round_page((vm_offset_t)args->dsend);
147     p->p_retval[0] = old;
148     if ((new-old) > 0) {
149 	if (swap_pager_full)
150 	    return ENOMEM;
151 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
152 			VM_PROT_ALL, VM_PROT_ALL, 0);
153 	if (error)
154 	    return error;
155 	vm->vm_dsize += btoc((new-old));
156 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
157     }
158     return 0;
159 #else
160     struct vmspace *vm = p->p_vmspace;
161     vm_offset_t new, old;
162     struct obreak_args /* {
163 	char * nsize;
164     } */ tmp;
165 
166 #ifdef DEBUG
167     printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
168 #endif
169     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
170     new = (vm_offset_t)args->dsend;
171     tmp.nsize = (char *) new;
172     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
173 	p->p_retval[0] = (long)new;
174     else
175 	p->p_retval[0] = (long)old;
176 
177     return 0;
178 #endif
179 }
180 
181 int
182 linux_uselib(struct proc *p, struct linux_uselib_args *args)
183 {
184     struct nameidata ni;
185     struct vnode *vp;
186     struct exec *a_out;
187     struct vattr attr;
188     struct ucred *uc;
189     vm_offset_t vmaddr;
190     unsigned long file_offset;
191     vm_offset_t buffer;
192     unsigned long bss_size;
193     int error;
194     caddr_t sg;
195     int locked;
196 
197     sg = stackgap_init();
198     CHECKALTEXIST(p, &sg, args->library);
199 
200 #ifdef DEBUG
201     printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
202 #endif
203 
204     a_out = NULL;
205     locked = 0;
206     vp = NULL;
207 
208     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
209     error = namei(&ni);
210     if (error)
211 	goto cleanup;
212 
213     vp = ni.ni_vp;
214     /*
215      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
216      * without returning a vnode.
217      */
218     if (vp == NULL) {
219 	error = ENOEXEC;	/* ?? */
220 	goto cleanup;
221     }
222     NDFREE(&ni, NDF_ONLY_PNBUF);
223 
224     /*
225      * From here on down, we have a locked vnode that must be unlocked.
226      */
227     locked++;
228 
229     /*
230      * Writable?
231      */
232     if (vp->v_writecount) {
233 	error = ETXTBSY;
234 	goto cleanup;
235     }
236 
237     /*
238      * Executable?
239      */
240     PROC_LOCK(p);
241     uc = p->p_ucred;
242     crhold(uc);
243     PROC_UNLOCK(p);
244     error = VOP_GETATTR(vp, &attr, uc, p);
245     if (error) {
246 	crfree(uc);
247 	goto cleanup;
248     }
249 
250     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
251 	((attr.va_mode & 0111) == 0) ||
252 	(attr.va_type != VREG)) {
253 	    error = ENOEXEC;
254 	    crfree(uc);
255 	    goto cleanup;
256     }
257 
258     /*
259      * Sensible size?
260      */
261     if (attr.va_size == 0) {
262 	error = ENOEXEC;
263 	crfree(uc);
264 	goto cleanup;
265     }
266 
267     /*
268      * Can we access it?
269      */
270     error = VOP_ACCESS(vp, VEXEC, uc, p);
271     if (error) {
272 	crfree(uc);
273 	goto cleanup;
274     }
275 
276     error = VOP_OPEN(vp, FREAD, uc, p);
277     crfree(uc);
278     if (error)
279 	goto cleanup;
280 
281     /*
282      * Lock no longer needed
283      */
284     VOP_UNLOCK(vp, 0, p);
285     locked = 0;
286 
287     /*
288      * Pull in executable header into kernel_map
289      */
290     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
291 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
292     if (error)
293 	goto cleanup;
294 
295     /*
296      * Is it a Linux binary ?
297      */
298     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
299 	error = ENOEXEC;
300 	goto cleanup;
301     }
302 
303     /* While we are here, we should REALLY do some more checks */
304 
305     /*
306      * Set file/virtual offset based on a.out variant.
307      */
308     switch ((int)(a_out->a_magic & 0xffff)) {
309     case 0413:	/* ZMAGIC */
310 	file_offset = 1024;
311 	break;
312     case 0314:	/* QMAGIC */
313 	file_offset = 0;
314 	break;
315     default:
316 	error = ENOEXEC;
317 	goto cleanup;
318     }
319 
320     bss_size = round_page(a_out->a_bss);
321 
322     /*
323      * Check various fields in header for validity/bounds.
324      */
325     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
326 	error = ENOEXEC;
327 	goto cleanup;
328     }
329 
330     /* text + data can't exceed file size */
331     if (a_out->a_data + a_out->a_text > attr.va_size) {
332 	error = EFAULT;
333 	goto cleanup;
334     }
335 
336     /* To protect p->p_rlimit in the if condition. */
337     mtx_assert(&Giant, MA_OWNED);
338 
339     /*
340      * text/data/bss must not exceed limits
341      * XXX: this is not complete. it should check current usage PLUS
342      * the resources needed by this library.
343      */
344     if (a_out->a_text > MAXTSIZ ||
345 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
346 	error = ENOMEM;
347 	goto cleanup;
348     }
349 
350     /*
351      * prevent more writers
352      */
353     vp->v_flag |= VTEXT;
354 
355     /*
356      * Check if file_offset page aligned,.
357      * Currently we cannot handle misalinged file offsets,
358      * and so we read in the entire image (what a waste).
359      */
360     if (file_offset & PAGE_MASK) {
361 #ifdef DEBUG
362 printf("uselib: Non page aligned binary %lu\n", file_offset);
363 #endif
364 	/*
365 	 * Map text+data read/write/execute
366 	 */
367 
368 	/* a_entry is the load address and is page aligned */
369 	vmaddr = trunc_page(a_out->a_entry);
370 
371 	/* get anon user mapping, read+write+execute */
372 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
373 		    	    a_out->a_text + a_out->a_data, FALSE,
374 			    VM_PROT_ALL, VM_PROT_ALL, 0);
375 	if (error)
376 	    goto cleanup;
377 
378 	/* map file into kernel_map */
379 	error = vm_mmap(kernel_map, &buffer,
380 			round_page(a_out->a_text + a_out->a_data + file_offset),
381 		   	VM_PROT_READ, VM_PROT_READ, 0,
382 			(caddr_t)vp, trunc_page(file_offset));
383 	if (error)
384 	    goto cleanup;
385 
386 	/* copy from kernel VM space to user space */
387 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
388 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
389 
390 	/* release temporary kernel space */
391 	vm_map_remove(kernel_map, buffer,
392 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
393 
394 	if (error)
395 	    goto cleanup;
396     }
397     else {
398 #ifdef DEBUG
399 printf("uselib: Page aligned binary %lu\n", file_offset);
400 #endif
401 	/*
402 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
403 	 * to skip the executable header
404 	 */
405 	vmaddr = trunc_page(a_out->a_entry);
406 
407 	/*
408 	 * Map it all into the process's space as a single copy-on-write
409 	 * "data" segment.
410 	 */
411 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
412 		   	a_out->a_text + a_out->a_data,
413 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
414 			(caddr_t)vp, file_offset);
415 	if (error)
416 	    goto cleanup;
417     }
418 #ifdef DEBUG
419 printf("mem=%08lx = %08lx %08lx\n", vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
420 #endif
421     if (bss_size != 0) {
422         /*
423 	 * Calculate BSS start address
424 	 */
425 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
426 
427 	/*
428 	 * allocate some 'anon' space
429 	 */
430 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
431 			    bss_size, FALSE,
432 			    VM_PROT_ALL, VM_PROT_ALL, 0);
433 	if (error)
434 	    goto cleanup;
435     }
436 
437 cleanup:
438     /*
439      * Unlock vnode if needed
440      */
441     if (locked)
442 	VOP_UNLOCK(vp, 0, p);
443 
444     /*
445      * Release the kernel mapping.
446      */
447     if (a_out)
448 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
449 
450     return error;
451 }
452 
453 int
454 linux_newselect(struct proc *p, struct linux_newselect_args *args)
455 {
456     struct select_args bsa;
457     struct timeval tv0, tv1, utv, *tvp;
458     caddr_t sg;
459     int error;
460 
461 #ifdef DEBUG
462     printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
463   	(long)p->p_pid, args->nfds, (void *)args->readfds,
464 	(void *)args->writefds, (void *)args->exceptfds,
465 	(void *)args->timeout);
466 #endif
467     error = 0;
468     bsa.nd = args->nfds;
469     bsa.in = args->readfds;
470     bsa.ou = args->writefds;
471     bsa.ex = args->exceptfds;
472     bsa.tv = args->timeout;
473 
474     /*
475      * Store current time for computation of the amount of
476      * time left.
477      */
478     if (args->timeout) {
479 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
480 	    goto select_out;
481 #ifdef DEBUG
482 	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
483 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
484 #endif
485 	if (itimerfix(&utv)) {
486 	    /*
487 	     * The timeval was invalid.  Convert it to something
488 	     * valid that will act as it does under Linux.
489 	     */
490 	    sg = stackgap_init();
491 	    tvp = stackgap_alloc(&sg, sizeof(utv));
492 	    utv.tv_sec += utv.tv_usec / 1000000;
493 	    utv.tv_usec %= 1000000;
494 	    if (utv.tv_usec < 0) {
495 		utv.tv_sec -= 1;
496 		utv.tv_usec += 1000000;
497 	    }
498 	    if (utv.tv_sec < 0)
499 		timevalclear(&utv);
500 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
501 		goto select_out;
502 	    bsa.tv = tvp;
503 	}
504 	microtime(&tv0);
505     }
506 
507     error = select(p, &bsa);
508 #ifdef DEBUG
509     printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
510 #endif
511 
512     if (error) {
513 	/*
514 	 * See fs/select.c in the Linux kernel.  Without this,
515 	 * Maelstrom doesn't work.
516 	 */
517 	if (error == ERESTART)
518 	    error = EINTR;
519 	goto select_out;
520     }
521 
522     if (args->timeout) {
523 	if (p->p_retval[0]) {
524 	    /*
525 	     * Compute how much time was left of the timeout,
526 	     * by subtracting the current time and the time
527 	     * before we started the call, and subtracting
528 	     * that result from the user-supplied value.
529 	     */
530 	    microtime(&tv1);
531 	    timevalsub(&tv1, &tv0);
532 	    timevalsub(&utv, &tv1);
533 	    if (utv.tv_sec < 0)
534 		timevalclear(&utv);
535 	} else
536 	    timevalclear(&utv);
537 #ifdef DEBUG
538 	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
539 	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
540 #endif
541 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
542 	    goto select_out;
543     }
544 
545 select_out:
546 #ifdef DEBUG
547     printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
548 #endif
549     return error;
550 }
551 
552 int
553 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
554 {
555     struct proc *curp;
556 
557 #ifdef DEBUG
558     printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
559 #endif
560     if (args->pid != p->p_pid) {
561 	if (!(curp = pfind(args->pid)))
562 	    return ESRCH;
563     }
564     else
565 	curp = p;
566     p->p_retval[0] = curp->p_pgid;
567     return 0;
568 }
569 
570 int
571 linux_mremap(struct proc *p, struct linux_mremap_args *args)
572 {
573 	struct munmap_args /* {
574 		void *addr;
575 		size_t len;
576 	} */ bsd_args;
577 	int error = 0;
578 
579 #ifdef DEBUG
580 	printf("Linux-emul(%ld): mremap(%p, %08lx, %08lx, %08lx)\n",
581 	    (long)p->p_pid, (void *)args->addr,
582 	    (unsigned long)args->old_len,
583 	    (unsigned long)args->new_len,
584 	    (unsigned long)args->flags);
585 #endif
586 	args->new_len = round_page(args->new_len);
587 	args->old_len = round_page(args->old_len);
588 
589 	if (args->new_len > args->old_len) {
590 		p->p_retval[0] = 0;
591 		return ENOMEM;
592 	}
593 
594 	if (args->new_len < args->old_len) {
595 		bsd_args.addr = args->addr + args->new_len;
596 		bsd_args.len = args->old_len - args->new_len;
597 		error = munmap(p, &bsd_args);
598 	}
599 
600 	p->p_retval[0] = error ? 0 : (u_long)args->addr;
601 	return error;
602 }
603 
604 int
605 linux_msync(struct proc *p, struct linux_msync_args *args)
606 {
607 	struct msync_args bsd_args;
608 
609 	bsd_args.addr = args->addr;
610 	bsd_args.len = args->len;
611 	bsd_args.flags = 0;	/* XXX ignore */
612 
613 	return msync(p, &bsd_args);
614 }
615 
616 #ifndef __alpha__
617 int
618 linux_time(struct proc *p, struct linux_time_args *args)
619 {
620     struct timeval tv;
621     linux_time_t tm;
622     int error;
623 
624 #ifdef DEBUG
625     printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
626 #endif
627     microtime(&tv);
628     tm = tv.tv_sec;
629     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
630 	return error;
631     p->p_retval[0] = tm;
632     return 0;
633 }
634 #endif	/*!__alpha__*/
635 
636 struct linux_times_argv {
637     long    tms_utime;
638     long    tms_stime;
639     long    tms_cutime;
640     long    tms_cstime;
641 };
642 
643 #define CLK_TCK 100	/* Linux uses 100 */
644 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
645 
646 int
647 linux_times(struct proc *p, struct linux_times_args *args)
648 {
649     struct timeval tv;
650     struct linux_times_argv tms;
651     struct rusage ru;
652     int error;
653 
654 #ifdef DEBUG
655     printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
656 #endif
657     mtx_enter(&sched_lock, MTX_SPIN);
658     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
659     mtx_exit(&sched_lock, MTX_SPIN);
660 
661     tms.tms_utime = CONVTCK(ru.ru_utime);
662     tms.tms_stime = CONVTCK(ru.ru_stime);
663 
664     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
665     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
666 
667     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
668 	    	    sizeof(struct linux_times_argv))))
669 	return error;
670 
671     microuptime(&tv);
672     p->p_retval[0] = (int)CONVTCK(tv);
673     return 0;
674 }
675 
676 int
677 linux_newuname(struct proc *p, struct linux_newuname_args *args)
678 {
679 	struct linux_new_utsname utsname;
680 	char *osrelease, *osname;
681 
682 #ifdef DEBUG
683 	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
684 #endif
685 
686 	osname = linux_get_osname(p);
687 	osrelease = linux_get_osrelease(p);
688 
689 	bzero(&utsname, sizeof(struct linux_new_utsname));
690 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
691 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
692 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
693 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
694 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
695 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
696 
697 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
698 			sizeof(struct linux_new_utsname)));
699 }
700 
701 struct linux_utimbuf {
702 	linux_time_t l_actime;
703 	linux_time_t l_modtime;
704 };
705 
706 int
707 linux_utime(struct proc *p, struct linux_utime_args *args)
708 {
709     struct utimes_args /* {
710 	char	*path;
711 	struct	timeval *tptr;
712     } */ bsdutimes;
713     struct timeval tv[2], *tvp;
714     struct linux_utimbuf lut;
715     int error;
716     caddr_t sg;
717 
718     sg = stackgap_init();
719     CHECKALTEXIST(p, &sg, args->fname);
720 
721 #ifdef DEBUG
722     printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
723 #endif
724     if (args->times) {
725 	if ((error = copyin(args->times, &lut, sizeof lut)))
726 	    return error;
727 	tv[0].tv_sec = lut.l_actime;
728 	tv[0].tv_usec = 0;
729 	tv[1].tv_sec = lut.l_modtime;
730 	tv[1].tv_usec = 0;
731 	/* so that utimes can copyin */
732 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
733 	if (tvp == NULL)
734 		return (ENAMETOOLONG);
735 	if ((error = copyout(tv, tvp, sizeof(tv))))
736 	    return error;
737 	bsdutimes.tptr = tvp;
738     } else
739 	bsdutimes.tptr = NULL;
740 
741     bsdutimes.path = args->fname;
742     return utimes(p, &bsdutimes);
743 }
744 
745 #define __WCLONE 0x80000000
746 
747 #ifndef __alpha__
748 int
749 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
750 {
751     struct wait_args /* {
752 	int pid;
753 	int *status;
754 	int options;
755 	struct	rusage *rusage;
756     } */ tmp;
757     int error, tmpstat;
758 
759 #ifdef DEBUG
760     printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
761 	(long)p->p_pid, args->pid, (void *)args->status, args->options);
762 #endif
763     tmp.pid = args->pid;
764     tmp.status = args->status;
765     tmp.options = (args->options & (WNOHANG | WUNTRACED));
766     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
767     if (args->options & __WCLONE)
768 	tmp.options |= WLINUXCLONE;
769     tmp.rusage = NULL;
770 
771     if ((error = wait4(p, &tmp)) != 0)
772 	return error;
773 
774     if (args->status) {
775 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
776 	    return error;
777 	tmpstat &= 0xffff;
778 	if (WIFSIGNALED(tmpstat))
779 	    tmpstat = (tmpstat & 0xffffff80) |
780 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
781 	else if (WIFSTOPPED(tmpstat))
782 	    tmpstat = (tmpstat & 0xffff00ff) |
783 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
784 	return copyout(&tmpstat, args->status, sizeof(int));
785     } else
786 	return 0;
787 }
788 #endif	/*!__alpha__*/
789 
790 int
791 linux_wait4(struct proc *p, struct linux_wait4_args *args)
792 {
793     struct wait_args /* {
794 	int pid;
795 	int *status;
796 	int options;
797 	struct	rusage *rusage;
798     } */ tmp;
799     int error, tmpstat;
800 
801 #ifdef DEBUG
802     printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
803 	(long)p->p_pid, args->pid, (void *)args->status, args->options,
804 	(void *)args->rusage);
805 #endif
806     tmp.pid = args->pid;
807     tmp.status = args->status;
808     tmp.options = (args->options & (WNOHANG | WUNTRACED));
809     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
810     if (args->options & __WCLONE)
811 	tmp.options |= WLINUXCLONE;
812     tmp.rusage = args->rusage;
813 
814     if ((error = wait4(p, &tmp)) != 0)
815 	return error;
816 
817     SIGDELSET(p->p_siglist, SIGCHLD);
818 
819     if (args->status) {
820 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
821 	    return error;
822 	tmpstat &= 0xffff;
823 	if (WIFSIGNALED(tmpstat))
824 	    tmpstat = (tmpstat & 0xffffff80) |
825 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
826 	else if (WIFSTOPPED(tmpstat))
827 	    tmpstat = (tmpstat & 0xffff00ff) |
828 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
829 	return copyout(&tmpstat, args->status, sizeof(int));
830     } else
831 	return 0;
832 }
833 
834 int
835 linux_mknod(struct proc *p, struct linux_mknod_args *args)
836 {
837 	caddr_t sg;
838 	struct mknod_args bsd_mknod;
839 	struct mkfifo_args bsd_mkfifo;
840 
841 	sg = stackgap_init();
842 
843 	CHECKALTCREAT(p, &sg, args->path);
844 
845 #ifdef DEBUG
846 	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
847 	   (long)p->p_pid, args->path, args->mode, args->dev);
848 #endif
849 
850 	if (args->mode & S_IFIFO) {
851 		bsd_mkfifo.path = args->path;
852 		bsd_mkfifo.mode = args->mode;
853 		return mkfifo(p, &bsd_mkfifo);
854 	} else {
855 		bsd_mknod.path = args->path;
856 		bsd_mknod.mode = args->mode;
857 		bsd_mknod.dev = args->dev;
858 		return mknod(p, &bsd_mknod);
859 	}
860 }
861 
862 /*
863  * UGH! This is just about the dumbest idea I've ever heard!!
864  */
865 int
866 linux_personality(struct proc *p, struct linux_personality_args *args)
867 {
868 #ifdef DEBUG
869 	printf("Linux-emul(%ld): personality(%d)\n",
870 	   (long)p->p_pid, args->per);
871 #endif
872 #ifndef __alpha__
873 	if (args->per != 0)
874 		return EINVAL;
875 #endif
876 
877 	/* Yes Jim, it's still a Linux... */
878 	p->p_retval[0] = 0;
879 	return 0;
880 }
881 
882 /*
883  * Wrappers for get/setitimer for debugging..
884  */
885 int
886 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
887 {
888 	struct setitimer_args bsa;
889 	struct itimerval foo;
890 	int error;
891 
892 #ifdef DEBUG
893 	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
894 	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
895 #endif
896 	bsa.which = args->which;
897 	bsa.itv = args->itv;
898 	bsa.oitv = args->oitv;
899 	if (args->itv) {
900 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
901 			sizeof(foo))))
902 		return error;
903 #ifdef DEBUG
904 	    printf("setitimer: value: sec: %ld, usec: %ld\n",
905 		foo.it_value.tv_sec, foo.it_value.tv_usec);
906 	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
907 		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
908 #endif
909 	}
910 	return setitimer(p, &bsa);
911 }
912 
913 int
914 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
915 {
916 	struct getitimer_args bsa;
917 #ifdef DEBUG
918 	printf("Linux-emul(%ld): getitimer(%p)\n",
919 	    (long)p->p_pid, (void *)args->itv);
920 #endif
921 	bsa.which = args->which;
922 	bsa.itv = args->itv;
923 	return getitimer(p, &bsa);
924 }
925 
926 #ifndef __alpha__
927 int
928 linux_nice(struct proc *p, struct linux_nice_args *args)
929 {
930 	struct setpriority_args	bsd_args;
931 
932 	bsd_args.which = PRIO_PROCESS;
933 	bsd_args.who = 0;	/* current process */
934 	bsd_args.prio = args->inc;
935 	return setpriority(p, &bsd_args);
936 }
937 #endif	/*!__alpha__*/
938 
939 int
940 linux_setgroups(p, uap)
941 	struct proc *p;
942 	struct linux_setgroups_args *uap;
943 {
944 	struct pcred *pc;
945 	linux_gid_t linux_gidset[NGROUPS];
946 	gid_t *bsd_gidset;
947 	int ngrp, error;
948 
949 	pc = p->p_cred;
950 	ngrp = uap->gidsetsize;
951 
952 	/*
953 	 * cr_groups[0] holds egid. Setting the whole set from
954 	 * the supplied set will cause egid to be changed too.
955 	 * Keep cr_groups[0] unchanged to prevent that.
956 	 */
957 
958 	if ((error = suser(p)) != 0)
959 		return (error);
960 
961 	if (ngrp >= NGROUPS)
962 		return (EINVAL);
963 
964 	pc->pc_ucred = crcopy(pc->pc_ucred);
965 	if (ngrp > 0) {
966 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
967 			       ngrp * sizeof(linux_gid_t));
968 		if (error)
969 			return (error);
970 
971 		pc->pc_ucred->cr_ngroups = ngrp + 1;
972 
973 		bsd_gidset = pc->pc_ucred->cr_groups;
974 		ngrp--;
975 		while (ngrp >= 0) {
976 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
977 			ngrp--;
978 		}
979 	}
980 	else
981 		pc->pc_ucred->cr_ngroups = 1;
982 
983 	setsugid(p);
984 	return (0);
985 }
986 
987 int
988 linux_getgroups(p, uap)
989 	struct proc *p;
990 	struct linux_getgroups_args *uap;
991 {
992 	struct pcred *pc;
993 	linux_gid_t linux_gidset[NGROUPS];
994 	gid_t *bsd_gidset;
995 	int bsd_gidsetsz, ngrp, error;
996 
997 	pc = p->p_cred;
998 	bsd_gidset = pc->pc_ucred->cr_groups;
999 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1000 
1001 	/*
1002 	 * cr_groups[0] holds egid. Returning the whole set
1003 	 * here will cause a duplicate. Exclude cr_groups[0]
1004 	 * to prevent that.
1005 	 */
1006 
1007 	if ((ngrp = uap->gidsetsize) == 0) {
1008 		p->p_retval[0] = bsd_gidsetsz;
1009 		return (0);
1010 	}
1011 
1012 	if (ngrp < bsd_gidsetsz)
1013 		return (EINVAL);
1014 
1015 	ngrp = 0;
1016 	while (ngrp < bsd_gidsetsz) {
1017 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1018 		ngrp++;
1019 	}
1020 
1021 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1022 	    ngrp * sizeof(linux_gid_t))))
1023 		return (error);
1024 
1025 	p->p_retval[0] = ngrp;
1026 	return (0);
1027 }
1028 
1029 #ifndef __alpha__
1030 int
1031 linux_setrlimit(p, uap)
1032 	struct proc *p;
1033 	struct linux_setrlimit_args *uap;
1034 {
1035 	struct __setrlimit_args bsd;
1036 	struct linux_rlimit rlim;
1037 	int error;
1038 	caddr_t sg = stackgap_init();
1039 
1040 #ifdef DEBUG
1041 	printf("Linux-emul(%ld): setrlimit(%d, %p)\n", (long)p->p_pid,
1042 	    uap->resource, (void *)uap->rlim);
1043 #endif
1044 
1045 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1046 		return (EINVAL);
1047 
1048 	bsd.which = linux_to_bsd_resource[uap->resource];
1049 	if (bsd.which == -1)
1050 		return (EINVAL);
1051 
1052 	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1053 	if (error)
1054 		return (error);
1055 
1056 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1057 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1058 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1059 	return (setrlimit(p, &bsd));
1060 }
1061 
1062 int
1063 linux_getrlimit(p, uap)
1064 	struct proc *p;
1065 	struct linux_getrlimit_args *uap;
1066 {
1067 	struct __getrlimit_args bsd;
1068 	struct linux_rlimit rlim;
1069 	int error;
1070 	caddr_t sg = stackgap_init();
1071 
1072 #ifdef DEBUG
1073 	printf("Linux-emul(%ld): getrlimit(%d, %p)\n", (long)p->p_pid,
1074 	    uap->resource, (void *)uap->rlim);
1075 #endif
1076 
1077 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1078 		return (EINVAL);
1079 
1080 	bsd.which = linux_to_bsd_resource[uap->resource];
1081 	if (bsd.which == -1)
1082 		return (EINVAL);
1083 
1084 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1085 	error = getrlimit(p, &bsd);
1086 	if (error)
1087 		return (error);
1088 
1089 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1090 	if (rlim.rlim_cur == ULONG_MAX)
1091 		rlim.rlim_cur = LONG_MAX;
1092 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1093 	if (rlim.rlim_max == ULONG_MAX)
1094 		rlim.rlim_max = LONG_MAX;
1095 	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1096 }
1097 #endif /*!__alpha__*/
1098 
1099 int
1100 linux_sched_setscheduler(p, uap)
1101 	struct proc *p;
1102 	struct linux_sched_setscheduler_args *uap;
1103 {
1104 	struct sched_setscheduler_args bsd;
1105 
1106 #ifdef DEBUG
1107 	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1108 	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1109 #endif
1110 
1111 	switch (uap->policy) {
1112 	case LINUX_SCHED_OTHER:
1113 		bsd.policy = SCHED_OTHER;
1114 		break;
1115 	case LINUX_SCHED_FIFO:
1116 		bsd.policy = SCHED_FIFO;
1117 		break;
1118 	case LINUX_SCHED_RR:
1119 		bsd.policy = SCHED_RR;
1120 		break;
1121 	default:
1122 		return EINVAL;
1123 	}
1124 
1125 	bsd.pid = uap->pid;
1126 	bsd.param = uap->param;
1127 	return sched_setscheduler(p, &bsd);
1128 }
1129 
1130 int
1131 linux_sched_getscheduler(p, uap)
1132 	struct proc *p;
1133 	struct linux_sched_getscheduler_args *uap;
1134 {
1135 	struct sched_getscheduler_args bsd;
1136 	int error;
1137 
1138 #ifdef DEBUG
1139 	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1140 	       (long)p->p_pid, uap->pid);
1141 #endif
1142 
1143 	bsd.pid = uap->pid;
1144 	error = sched_getscheduler(p, &bsd);
1145 
1146 	switch (p->p_retval[0]) {
1147 	case SCHED_OTHER:
1148 		p->p_retval[0] = LINUX_SCHED_OTHER;
1149 		break;
1150 	case SCHED_FIFO:
1151 		p->p_retval[0] = LINUX_SCHED_FIFO;
1152 		break;
1153 	case SCHED_RR:
1154 		p->p_retval[0] = LINUX_SCHED_RR;
1155 		break;
1156 	}
1157 
1158 	return error;
1159 }
1160