xref: /freebsd/sys/compat/linux/linux_misc.c (revision 25d0cc3b4dcc3d9e3929615a9e9b5be4801fbaac)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/reboot.h>
44 #include <sys/resourcevar.h>
45 #include <sys/stat.h>
46 #include <sys/sysctl.h>
47 #include <sys/unistd.h>
48 #include <sys/vnode.h>
49 #include <sys/wait.h>
50 #include <sys/time.h>
51 #include <sys/signalvar.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_extern.h>
58 
59 #include <machine/frame.h>
60 #include <machine/limits.h>
61 #include <machine/psl.h>
62 #include <machine/sysarch.h>
63 #ifdef __i386__
64 #include <machine/segments.h>
65 #endif
66 
67 #include <posix4/sched.h>
68 
69 #include <machine/../linux/linux.h>
70 #include <machine/../linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_util.h>
73 
74 #ifdef __alpha__
75 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
76 #else
77 #define BSD_TO_LINUX_SIGNAL(sig)	\
78 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
79 #endif
80 
81 struct linux_rlimit {
82 	unsigned long rlim_cur;
83 	unsigned long rlim_max;
84 };
85 
86 #ifndef __alpha__
87 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
88 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
89   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
90   RLIMIT_MEMLOCK, -1
91 };
92 #endif /*!__alpha__*/
93 
94 #ifndef __alpha__
95 int
96 linux_alarm(struct proc *p, struct linux_alarm_args *args)
97 {
98     struct itimerval it, old_it;
99     struct timeval tv;
100     int s;
101 
102 #ifdef DEBUG
103 	if (ldebug(alarm))
104 		printf(ARGS(alarm, "%u"), args->secs);
105 #endif
106     if (args->secs > 100000000)
107 	return EINVAL;
108     it.it_value.tv_sec = (long)args->secs;
109     it.it_value.tv_usec = 0;
110     it.it_interval.tv_sec = 0;
111     it.it_interval.tv_usec = 0;
112     s = splsoftclock();
113     old_it = p->p_realtimer;
114     getmicrouptime(&tv);
115     if (timevalisset(&old_it.it_value))
116 	callout_stop(&p->p_itcallout);
117     if (it.it_value.tv_sec != 0) {
118 	callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p);
119 	timevaladd(&it.it_value, &tv);
120     }
121     p->p_realtimer = it;
122     splx(s);
123     if (timevalcmp(&old_it.it_value, &tv, >)) {
124 	timevalsub(&old_it.it_value, &tv);
125 	if (old_it.it_value.tv_usec != 0)
126 	    old_it.it_value.tv_sec++;
127 	p->p_retval[0] = old_it.it_value.tv_sec;
128     }
129     return 0;
130 }
131 #endif /*!__alpha__*/
132 
133 int
134 linux_brk(struct proc *p, struct linux_brk_args *args)
135 {
136 #if 0
137     struct vmspace *vm = p->p_vmspace;
138     vm_offset_t new, old;
139     int error;
140 
141     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
142 	return EINVAL;
143     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
144 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
145 	return ENOMEM;
146 
147     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
148     new = round_page((vm_offset_t)args->dsend);
149     p->p_retval[0] = old;
150     if ((new-old) > 0) {
151 	if (swap_pager_full)
152 	    return ENOMEM;
153 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
154 			VM_PROT_ALL, VM_PROT_ALL, 0);
155 	if (error)
156 	    return error;
157 	vm->vm_dsize += btoc((new-old));
158 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
159     }
160     return 0;
161 #else
162     struct vmspace *vm = p->p_vmspace;
163     vm_offset_t new, old;
164     struct obreak_args /* {
165 	char * nsize;
166     } */ tmp;
167 
168 #ifdef DEBUG
169 	if (ldebug(brk))
170 		printf(ARGS(brk, "%p"), (void *)args->dsend);
171 #endif
172     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
173     new = (vm_offset_t)args->dsend;
174     tmp.nsize = (char *) new;
175     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
176 	p->p_retval[0] = (long)new;
177     else
178 	p->p_retval[0] = (long)old;
179 
180     return 0;
181 #endif
182 }
183 
184 int
185 linux_uselib(struct proc *p, struct linux_uselib_args *args)
186 {
187     struct nameidata ni;
188     struct vnode *vp;
189     struct exec *a_out;
190     struct vattr attr;
191     vm_offset_t vmaddr;
192     unsigned long file_offset;
193     vm_offset_t buffer;
194     unsigned long bss_size;
195     int error;
196     caddr_t sg;
197     int locked;
198 
199     sg = stackgap_init();
200     CHECKALTEXIST(p, &sg, args->library);
201 
202 #ifdef DEBUG
203 	if (ldebug(uselib))
204 		printf(ARGS(uselib, "%s"), args->library);
205 #endif
206 
207     a_out = NULL;
208     locked = 0;
209     vp = NULL;
210 
211     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
212     error = namei(&ni);
213     if (error)
214 	goto cleanup;
215 
216     vp = ni.ni_vp;
217     /*
218      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
219      * without returning a vnode.
220      */
221     if (vp == NULL) {
222 	error = ENOEXEC;	/* ?? */
223 	goto cleanup;
224     }
225     NDFREE(&ni, NDF_ONLY_PNBUF);
226 
227     /*
228      * From here on down, we have a locked vnode that must be unlocked.
229      */
230     locked++;
231 
232     /*
233      * Writable?
234      */
235     if (vp->v_writecount) {
236 	error = ETXTBSY;
237 	goto cleanup;
238     }
239 
240     /*
241      * Executable?
242      */
243     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
244     if (error)
245 	goto cleanup;
246 
247     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
248 	((attr.va_mode & 0111) == 0) ||
249 	(attr.va_type != VREG)) {
250 	    error = ENOEXEC;
251 	    goto cleanup;
252     }
253 
254     /*
255      * Sensible size?
256      */
257     if (attr.va_size == 0) {
258 	error = ENOEXEC;
259 	goto cleanup;
260     }
261 
262     /*
263      * Can we access it?
264      */
265     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
266     if (error)
267 	goto cleanup;
268 
269     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
270     if (error)
271 	goto cleanup;
272 
273     /*
274      * Lock no longer needed
275      */
276     VOP_UNLOCK(vp, 0, p);
277     locked = 0;
278 
279     /*
280      * Pull in executable header into kernel_map
281      */
282     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
283 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
284     if (error)
285 	goto cleanup;
286 
287     /*
288      * Is it a Linux binary ?
289      */
290     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
291 	error = ENOEXEC;
292 	goto cleanup;
293     }
294 
295     /* While we are here, we should REALLY do some more checks */
296 
297     /*
298      * Set file/virtual offset based on a.out variant.
299      */
300     switch ((int)(a_out->a_magic & 0xffff)) {
301     case 0413:	/* ZMAGIC */
302 	file_offset = 1024;
303 	break;
304     case 0314:	/* QMAGIC */
305 	file_offset = 0;
306 	break;
307     default:
308 	error = ENOEXEC;
309 	goto cleanup;
310     }
311 
312     bss_size = round_page(a_out->a_bss);
313 
314     /*
315      * Check various fields in header for validity/bounds.
316      */
317     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
318 	error = ENOEXEC;
319 	goto cleanup;
320     }
321 
322     /* text + data can't exceed file size */
323     if (a_out->a_data + a_out->a_text > attr.va_size) {
324 	error = EFAULT;
325 	goto cleanup;
326     }
327 
328     /* To protect p->p_rlimit in the if condition. */
329     mtx_assert(&Giant, MA_OWNED);
330 
331     /*
332      * text/data/bss must not exceed limits
333      * XXX: this is not complete. it should check current usage PLUS
334      * the resources needed by this library.
335      */
336     if (a_out->a_text > MAXTSIZ ||
337 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
338 	error = ENOMEM;
339 	goto cleanup;
340     }
341 
342     /*
343      * prevent more writers
344      */
345     vp->v_flag |= VTEXT;
346 
347     /*
348      * Check if file_offset page aligned,.
349      * Currently we cannot handle misalinged file offsets,
350      * and so we read in the entire image (what a waste).
351      */
352     if (file_offset & PAGE_MASK) {
353 #ifdef DEBUG
354 printf("uselib: Non page aligned binary %lu\n", file_offset);
355 #endif
356 	/*
357 	 * Map text+data read/write/execute
358 	 */
359 
360 	/* a_entry is the load address and is page aligned */
361 	vmaddr = trunc_page(a_out->a_entry);
362 
363 	/* get anon user mapping, read+write+execute */
364 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
365 		    	    a_out->a_text + a_out->a_data, FALSE,
366 			    VM_PROT_ALL, VM_PROT_ALL, 0);
367 	if (error)
368 	    goto cleanup;
369 
370 	/* map file into kernel_map */
371 	error = vm_mmap(kernel_map, &buffer,
372 			round_page(a_out->a_text + a_out->a_data + file_offset),
373 		   	VM_PROT_READ, VM_PROT_READ, 0,
374 			(caddr_t)vp, trunc_page(file_offset));
375 	if (error)
376 	    goto cleanup;
377 
378 	/* copy from kernel VM space to user space */
379 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
380 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
381 
382 	/* release temporary kernel space */
383 	vm_map_remove(kernel_map, buffer,
384 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
385 
386 	if (error)
387 	    goto cleanup;
388     }
389     else {
390 #ifdef DEBUG
391 printf("uselib: Page aligned binary %lu\n", file_offset);
392 #endif
393 	/*
394 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
395 	 * to skip the executable header
396 	 */
397 	vmaddr = trunc_page(a_out->a_entry);
398 
399 	/*
400 	 * Map it all into the process's space as a single copy-on-write
401 	 * "data" segment.
402 	 */
403 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
404 		   	a_out->a_text + a_out->a_data,
405 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
406 			(caddr_t)vp, file_offset);
407 	if (error)
408 	    goto cleanup;
409     }
410 #ifdef DEBUG
411 printf("mem=%08lx = %08lx %08lx\n", vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
412 #endif
413     if (bss_size != 0) {
414         /*
415 	 * Calculate BSS start address
416 	 */
417 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
418 
419 	/*
420 	 * allocate some 'anon' space
421 	 */
422 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
423 			    bss_size, FALSE,
424 			    VM_PROT_ALL, VM_PROT_ALL, 0);
425 	if (error)
426 	    goto cleanup;
427     }
428 
429 cleanup:
430     /*
431      * Unlock vnode if needed
432      */
433     if (locked)
434 	VOP_UNLOCK(vp, 0, p);
435 
436     /*
437      * Release the kernel mapping.
438      */
439     if (a_out)
440 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
441 
442     return error;
443 }
444 
445 int
446 linux_newselect(struct proc *p, struct linux_newselect_args *args)
447 {
448     struct select_args bsa;
449     struct timeval tv0, tv1, utv, *tvp;
450     caddr_t sg;
451     int error;
452 
453 #ifdef DEBUG
454 	if (ldebug(newselect))
455 		printf(ARGS(newselect, "%d, %p, %p, %p, %p"),
456 		    args->nfds, (void *)args->readfds,
457 		    (void *)args->writefds, (void *)args->exceptfds,
458 		    (void *)args->timeout);
459 #endif
460     error = 0;
461     bsa.nd = args->nfds;
462     bsa.in = args->readfds;
463     bsa.ou = args->writefds;
464     bsa.ex = args->exceptfds;
465     bsa.tv = args->timeout;
466 
467     /*
468      * Store current time for computation of the amount of
469      * time left.
470      */
471     if (args->timeout) {
472 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
473 	    goto select_out;
474 #ifdef DEBUG
475 	if (ldebug(newselect))
476 		printf(LMSG("incoming timeout (%ld/%ld)"),
477 		    utv.tv_sec, utv.tv_usec);
478 #endif
479 	if (itimerfix(&utv)) {
480 	    /*
481 	     * The timeval was invalid.  Convert it to something
482 	     * valid that will act as it does under Linux.
483 	     */
484 	    sg = stackgap_init();
485 	    tvp = stackgap_alloc(&sg, sizeof(utv));
486 	    utv.tv_sec += utv.tv_usec / 1000000;
487 	    utv.tv_usec %= 1000000;
488 	    if (utv.tv_usec < 0) {
489 		utv.tv_sec -= 1;
490 		utv.tv_usec += 1000000;
491 	    }
492 	    if (utv.tv_sec < 0)
493 		timevalclear(&utv);
494 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
495 		goto select_out;
496 	    bsa.tv = tvp;
497 	}
498 	microtime(&tv0);
499     }
500 
501     error = select(p, &bsa);
502 #ifdef DEBUG
503 	if (ldebug(newselect))
504 		printf(LMSG("real select returns %d"), error);
505 #endif
506 
507     if (error) {
508 	/*
509 	 * See fs/select.c in the Linux kernel.  Without this,
510 	 * Maelstrom doesn't work.
511 	 */
512 	if (error == ERESTART)
513 	    error = EINTR;
514 	goto select_out;
515     }
516 
517     if (args->timeout) {
518 	if (p->p_retval[0]) {
519 	    /*
520 	     * Compute how much time was left of the timeout,
521 	     * by subtracting the current time and the time
522 	     * before we started the call, and subtracting
523 	     * that result from the user-supplied value.
524 	     */
525 	    microtime(&tv1);
526 	    timevalsub(&tv1, &tv0);
527 	    timevalsub(&utv, &tv1);
528 	    if (utv.tv_sec < 0)
529 		timevalclear(&utv);
530 	} else
531 	    timevalclear(&utv);
532 #ifdef DEBUG
533 	if (ldebug(newselect))
534 		printf(LMSG("outgoing timeout (%ld/%ld)"),
535 		    utv.tv_sec, utv.tv_usec);
536 #endif
537 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
538 	    goto select_out;
539     }
540 
541 select_out:
542 #ifdef DEBUG
543 	if (ldebug(newselect))
544 		printf(LMSG("newselect_out -> %d"), error);
545 #endif
546     return error;
547 }
548 
549 int
550 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
551 {
552     struct proc *curp;
553 
554 #ifdef DEBUG
555 	if (ldebug(getpgid))
556 		printf(ARGS(getpgid, "%d"), args->pid);
557 #endif
558     if (args->pid != p->p_pid) {
559 	if (!(curp = pfind(args->pid)))
560 	    return ESRCH;
561     }
562     else
563 	curp = p;
564     p->p_retval[0] = curp->p_pgid;
565     return 0;
566 }
567 
568 int
569 linux_mremap(struct proc *p, struct linux_mremap_args *args)
570 {
571 	struct munmap_args /* {
572 		void *addr;
573 		size_t len;
574 	} */ bsd_args;
575 	int error = 0;
576 
577 #ifdef DEBUG
578 	if (ldebug(mremap))
579 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
580 		    (void *)args->addr,
581 		    (unsigned long)args->old_len,
582 		    (unsigned long)args->new_len,
583 		    (unsigned long)args->flags);
584 #endif
585 	args->new_len = round_page(args->new_len);
586 	args->old_len = round_page(args->old_len);
587 
588 	if (args->new_len > args->old_len) {
589 		p->p_retval[0] = 0;
590 		return ENOMEM;
591 	}
592 
593 	if (args->new_len < args->old_len) {
594 		bsd_args.addr = args->addr + args->new_len;
595 		bsd_args.len = args->old_len - args->new_len;
596 		error = munmap(p, &bsd_args);
597 	}
598 
599 	p->p_retval[0] = error ? 0 : (u_long)args->addr;
600 	return error;
601 }
602 
603 int
604 linux_msync(struct proc *p, struct linux_msync_args *args)
605 {
606 	struct msync_args bsd_args;
607 
608 	bsd_args.addr = args->addr;
609 	bsd_args.len = args->len;
610 	bsd_args.flags = 0;	/* XXX ignore */
611 
612 	return msync(p, &bsd_args);
613 }
614 
615 #ifndef __alpha__
616 int
617 linux_time(struct proc *p, struct linux_time_args *args)
618 {
619     struct timeval tv;
620     linux_time_t tm;
621     int error;
622 
623 #ifdef DEBUG
624 	if (ldebug(time))
625 		printf(ARGS(time, "*"));
626 #endif
627     microtime(&tv);
628     tm = tv.tv_sec;
629     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
630 	return error;
631     p->p_retval[0] = tm;
632     return 0;
633 }
634 #endif	/*!__alpha__*/
635 
636 struct linux_times_argv {
637     long    tms_utime;
638     long    tms_stime;
639     long    tms_cutime;
640     long    tms_cstime;
641 };
642 
643 #ifdef __alpha__
644 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
645 #else
646 #define CLK_TCK 100	/* Linux uses 100 */
647 #endif
648 
649 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
650 
651 int
652 linux_times(struct proc *p, struct linux_times_args *args)
653 {
654     struct timeval tv;
655     struct linux_times_argv tms;
656     struct rusage ru;
657     int error;
658 
659 #ifdef DEBUG
660 	if (ldebug(times))
661 		printf(ARGS(times, "*"));
662 #endif
663     mtx_lock_spin(&sched_lock);
664     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
665     mtx_unlock_spin(&sched_lock);
666 
667     tms.tms_utime = CONVTCK(ru.ru_utime);
668     tms.tms_stime = CONVTCK(ru.ru_stime);
669 
670     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
671     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
672 
673     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
674 	    	    sizeof(struct linux_times_argv))))
675 	return error;
676 
677     microuptime(&tv);
678     p->p_retval[0] = (int)CONVTCK(tv);
679     return 0;
680 }
681 
682 int
683 linux_newuname(struct proc *p, struct linux_newuname_args *args)
684 {
685 	struct linux_new_utsname utsname;
686 	char *osrelease, *osname;
687 
688 #ifdef DEBUG
689 	if (ldebug(newuname))
690 		printf(ARGS(newuname, "*"));
691 #endif
692 
693 	osname = linux_get_osname(p);
694 	osrelease = linux_get_osrelease(p);
695 
696 	bzero(&utsname, sizeof(struct linux_new_utsname));
697 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
698 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
699 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
700 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
701 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
702 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
703 
704 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
705 			sizeof(struct linux_new_utsname)));
706 }
707 
708 struct linux_utimbuf {
709 	linux_time_t l_actime;
710 	linux_time_t l_modtime;
711 };
712 
713 int
714 linux_utime(struct proc *p, struct linux_utime_args *args)
715 {
716     struct utimes_args /* {
717 	char	*path;
718 	struct	timeval *tptr;
719     } */ bsdutimes;
720     struct timeval tv[2], *tvp;
721     struct linux_utimbuf lut;
722     int error;
723     caddr_t sg;
724 
725     sg = stackgap_init();
726     CHECKALTEXIST(p, &sg, args->fname);
727 
728 #ifdef DEBUG
729 	if (ldebug(utime))
730 		printf(ARGS(utime, "%s, *"), args->fname);
731 #endif
732     if (args->times) {
733 	if ((error = copyin(args->times, &lut, sizeof lut)))
734 	    return error;
735 	tv[0].tv_sec = lut.l_actime;
736 	tv[0].tv_usec = 0;
737 	tv[1].tv_sec = lut.l_modtime;
738 	tv[1].tv_usec = 0;
739 	/* so that utimes can copyin */
740 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
741 	if (tvp == NULL)
742 		return (ENAMETOOLONG);
743 	if ((error = copyout(tv, tvp, sizeof(tv))))
744 	    return error;
745 	bsdutimes.tptr = tvp;
746     } else
747 	bsdutimes.tptr = NULL;
748 
749     bsdutimes.path = args->fname;
750     return utimes(p, &bsdutimes);
751 }
752 
753 #define __WCLONE 0x80000000
754 
755 #ifndef __alpha__
756 int
757 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
758 {
759     struct wait_args /* {
760 	int pid;
761 	int *status;
762 	int options;
763 	struct	rusage *rusage;
764     } */ tmp;
765     int error, tmpstat;
766 
767 #ifdef DEBUG
768 	if (ldebug(waitpid))
769 		printf(ARGS(waitpid, "%d, %p, %d"),
770 		    args->pid, (void *)args->status, args->options);
771 #endif
772     tmp.pid = args->pid;
773     tmp.status = args->status;
774     tmp.options = (args->options & (WNOHANG | WUNTRACED));
775     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
776     if (args->options & __WCLONE)
777 	tmp.options |= WLINUXCLONE;
778     tmp.rusage = NULL;
779 
780     if ((error = wait4(p, &tmp)) != 0)
781 	return error;
782 
783     if (args->status) {
784 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
785 	    return error;
786 	tmpstat &= 0xffff;
787 	if (WIFSIGNALED(tmpstat))
788 	    tmpstat = (tmpstat & 0xffffff80) |
789 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
790 	else if (WIFSTOPPED(tmpstat))
791 	    tmpstat = (tmpstat & 0xffff00ff) |
792 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
793 	return copyout(&tmpstat, args->status, sizeof(int));
794     } else
795 	return 0;
796 }
797 #endif	/*!__alpha__*/
798 
799 int
800 linux_wait4(struct proc *p, struct linux_wait4_args *args)
801 {
802     struct wait_args /* {
803 	int pid;
804 	int *status;
805 	int options;
806 	struct	rusage *rusage;
807     } */ tmp;
808     int error, tmpstat;
809 
810 #ifdef DEBUG
811 	if (ldebug(wait4))
812 		printf(ARGS(wait4, "%d, %p, %d, %p"),
813 		    args->pid, (void *)args->status, args->options,
814 		    (void *)args->rusage);
815 #endif
816     tmp.pid = args->pid;
817     tmp.status = args->status;
818     tmp.options = (args->options & (WNOHANG | WUNTRACED));
819     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
820     if (args->options & __WCLONE)
821 	tmp.options |= WLINUXCLONE;
822     tmp.rusage = args->rusage;
823 
824     if ((error = wait4(p, &tmp)) != 0)
825 	return error;
826 
827     SIGDELSET(p->p_siglist, SIGCHLD);
828 
829     if (args->status) {
830 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
831 	    return error;
832 	tmpstat &= 0xffff;
833 	if (WIFSIGNALED(tmpstat))
834 	    tmpstat = (tmpstat & 0xffffff80) |
835 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
836 	else if (WIFSTOPPED(tmpstat))
837 	    tmpstat = (tmpstat & 0xffff00ff) |
838 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
839 	return copyout(&tmpstat, args->status, sizeof(int));
840     } else
841 	return 0;
842 }
843 
844 int
845 linux_mknod(struct proc *p, struct linux_mknod_args *args)
846 {
847 	caddr_t sg;
848 	struct mknod_args bsd_mknod;
849 	struct mkfifo_args bsd_mkfifo;
850 
851 	sg = stackgap_init();
852 
853 	CHECKALTCREAT(p, &sg, args->path);
854 
855 #ifdef DEBUG
856 	if (ldebug(mknod))
857 		printf(ARGS(mknod, "%s, %d, %d"),
858 		    args->path, args->mode, args->dev);
859 #endif
860 
861 	if (args->mode & S_IFIFO) {
862 		bsd_mkfifo.path = args->path;
863 		bsd_mkfifo.mode = args->mode;
864 		return mkfifo(p, &bsd_mkfifo);
865 	} else {
866 		bsd_mknod.path = args->path;
867 		bsd_mknod.mode = args->mode;
868 		bsd_mknod.dev = args->dev;
869 		return mknod(p, &bsd_mknod);
870 	}
871 }
872 
873 /*
874  * UGH! This is just about the dumbest idea I've ever heard!!
875  */
876 int
877 linux_personality(struct proc *p, struct linux_personality_args *args)
878 {
879 #ifdef DEBUG
880 	if (ldebug(personality))
881 		printf(ARGS(personality, "%d"), args->per);
882 #endif
883 #ifndef __alpha__
884 	if (args->per != 0)
885 		return EINVAL;
886 #endif
887 
888 	/* Yes Jim, it's still a Linux... */
889 	p->p_retval[0] = 0;
890 	return 0;
891 }
892 
893 /*
894  * Wrappers for get/setitimer for debugging..
895  */
896 int
897 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
898 {
899 	struct setitimer_args bsa;
900 	struct itimerval foo;
901 	int error;
902 
903 #ifdef DEBUG
904 	if (ldebug(setitimer))
905 		printf(ARGS(setitimer, "%p, %p"),
906 		    (void *)args->itv, (void *)args->oitv);
907 #endif
908 	bsa.which = args->which;
909 	bsa.itv = args->itv;
910 	bsa.oitv = args->oitv;
911 	if (args->itv) {
912 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
913 			sizeof(foo))))
914 		return error;
915 #ifdef DEBUG
916 	    if (ldebug(setitimer)) {
917 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
918 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
919 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
920 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
921 	    }
922 #endif
923 	}
924 	return setitimer(p, &bsa);
925 }
926 
927 int
928 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
929 {
930 	struct getitimer_args bsa;
931 #ifdef DEBUG
932 	if (ldebug(getitimer))
933 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
934 #endif
935 	bsa.which = args->which;
936 	bsa.itv = args->itv;
937 	return getitimer(p, &bsa);
938 }
939 
940 #ifndef __alpha__
941 int
942 linux_nice(struct proc *p, struct linux_nice_args *args)
943 {
944 	struct setpriority_args	bsd_args;
945 
946 	bsd_args.which = PRIO_PROCESS;
947 	bsd_args.who = 0;	/* current process */
948 	bsd_args.prio = args->inc;
949 	return setpriority(p, &bsd_args);
950 }
951 #endif	/*!__alpha__*/
952 
953 int
954 linux_setgroups(p, uap)
955 	struct proc *p;
956 	struct linux_setgroups_args *uap;
957 {
958 	struct pcred *pc;
959 	linux_gid_t linux_gidset[NGROUPS];
960 	gid_t *bsd_gidset;
961 	int ngrp, error;
962 
963 	pc = p->p_cred;
964 	ngrp = uap->gidsetsize;
965 
966 	/*
967 	 * cr_groups[0] holds egid. Setting the whole set from
968 	 * the supplied set will cause egid to be changed too.
969 	 * Keep cr_groups[0] unchanged to prevent that.
970 	 */
971 
972 	if ((error = suser(p)) != 0)
973 		return (error);
974 
975 	if (ngrp >= NGROUPS)
976 		return (EINVAL);
977 
978 	pc->pc_ucred = crcopy(pc->pc_ucred);
979 	if (ngrp > 0) {
980 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
981 			       ngrp * sizeof(linux_gid_t));
982 		if (error)
983 			return (error);
984 
985 		pc->pc_ucred->cr_ngroups = ngrp + 1;
986 
987 		bsd_gidset = pc->pc_ucred->cr_groups;
988 		ngrp--;
989 		while (ngrp >= 0) {
990 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
991 			ngrp--;
992 		}
993 	}
994 	else
995 		pc->pc_ucred->cr_ngroups = 1;
996 
997 	setsugid(p);
998 	return (0);
999 }
1000 
1001 int
1002 linux_getgroups(p, uap)
1003 	struct proc *p;
1004 	struct linux_getgroups_args *uap;
1005 {
1006 	struct pcred *pc;
1007 	linux_gid_t linux_gidset[NGROUPS];
1008 	gid_t *bsd_gidset;
1009 	int bsd_gidsetsz, ngrp, error;
1010 
1011 	pc = p->p_cred;
1012 	bsd_gidset = pc->pc_ucred->cr_groups;
1013 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1014 
1015 	/*
1016 	 * cr_groups[0] holds egid. Returning the whole set
1017 	 * here will cause a duplicate. Exclude cr_groups[0]
1018 	 * to prevent that.
1019 	 */
1020 
1021 	if ((ngrp = uap->gidsetsize) == 0) {
1022 		p->p_retval[0] = bsd_gidsetsz;
1023 		return (0);
1024 	}
1025 
1026 	if (ngrp < bsd_gidsetsz)
1027 		return (EINVAL);
1028 
1029 	ngrp = 0;
1030 	while (ngrp < bsd_gidsetsz) {
1031 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1032 		ngrp++;
1033 	}
1034 
1035 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1036 	    ngrp * sizeof(linux_gid_t))))
1037 		return (error);
1038 
1039 	p->p_retval[0] = ngrp;
1040 	return (0);
1041 }
1042 
1043 #ifndef __alpha__
1044 int
1045 linux_setrlimit(p, uap)
1046 	struct proc *p;
1047 	struct linux_setrlimit_args *uap;
1048 {
1049 	struct __setrlimit_args bsd;
1050 	struct linux_rlimit rlim;
1051 	int error;
1052 	caddr_t sg = stackgap_init();
1053 
1054 #ifdef DEBUG
1055 	if (ldebug(setrlimit))
1056 		printf(ARGS(setrlimit, "%d, %p"),
1057 		    uap->resource, (void *)uap->rlim);
1058 #endif
1059 
1060 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1061 		return (EINVAL);
1062 
1063 	bsd.which = linux_to_bsd_resource[uap->resource];
1064 	if (bsd.which == -1)
1065 		return (EINVAL);
1066 
1067 	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1068 	if (error)
1069 		return (error);
1070 
1071 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1072 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1073 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1074 	return (setrlimit(p, &bsd));
1075 }
1076 
1077 int
1078 linux_getrlimit(p, uap)
1079 	struct proc *p;
1080 	struct linux_getrlimit_args *uap;
1081 {
1082 	struct __getrlimit_args bsd;
1083 	struct linux_rlimit rlim;
1084 	int error;
1085 	caddr_t sg = stackgap_init();
1086 
1087 #ifdef DEBUG
1088 	if (ldebug(getrlimit))
1089 		printf(ARGS(getrlimit, "%d, %p"),
1090 		    uap->resource, (void *)uap->rlim);
1091 #endif
1092 
1093 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1094 		return (EINVAL);
1095 
1096 	bsd.which = linux_to_bsd_resource[uap->resource];
1097 	if (bsd.which == -1)
1098 		return (EINVAL);
1099 
1100 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1101 	error = getrlimit(p, &bsd);
1102 	if (error)
1103 		return (error);
1104 
1105 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1106 	if (rlim.rlim_cur == ULONG_MAX)
1107 		rlim.rlim_cur = LONG_MAX;
1108 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1109 	if (rlim.rlim_max == ULONG_MAX)
1110 		rlim.rlim_max = LONG_MAX;
1111 	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1112 }
1113 #endif /*!__alpha__*/
1114 
1115 int
1116 linux_sched_setscheduler(p, uap)
1117 	struct proc *p;
1118 	struct linux_sched_setscheduler_args *uap;
1119 {
1120 	struct sched_setscheduler_args bsd;
1121 
1122 #ifdef DEBUG
1123 	if (ldebug(sched_setscheduler))
1124 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1125 		    uap->pid, uap->policy, (const void *)uap->param);
1126 #endif
1127 
1128 	switch (uap->policy) {
1129 	case LINUX_SCHED_OTHER:
1130 		bsd.policy = SCHED_OTHER;
1131 		break;
1132 	case LINUX_SCHED_FIFO:
1133 		bsd.policy = SCHED_FIFO;
1134 		break;
1135 	case LINUX_SCHED_RR:
1136 		bsd.policy = SCHED_RR;
1137 		break;
1138 	default:
1139 		return EINVAL;
1140 	}
1141 
1142 	bsd.pid = uap->pid;
1143 	bsd.param = uap->param;
1144 	return sched_setscheduler(p, &bsd);
1145 }
1146 
1147 int
1148 linux_sched_getscheduler(p, uap)
1149 	struct proc *p;
1150 	struct linux_sched_getscheduler_args *uap;
1151 {
1152 	struct sched_getscheduler_args bsd;
1153 	int error;
1154 
1155 #ifdef DEBUG
1156 	if (ldebug(sched_getscheduler))
1157 		printf(ARGS(sched_getscheduler, "%d"), uap->pid);
1158 #endif
1159 
1160 	bsd.pid = uap->pid;
1161 	error = sched_getscheduler(p, &bsd);
1162 
1163 	switch (p->p_retval[0]) {
1164 	case SCHED_OTHER:
1165 		p->p_retval[0] = LINUX_SCHED_OTHER;
1166 		break;
1167 	case SCHED_FIFO:
1168 		p->p_retval[0] = LINUX_SCHED_FIFO;
1169 		break;
1170 	case SCHED_RR:
1171 		p->p_retval[0] = LINUX_SCHED_RR;
1172 		break;
1173 	}
1174 
1175 	return error;
1176 }
1177 
1178 int
1179 linux_sched_get_priority_max(p, uap)
1180 	struct proc *p;
1181 	struct linux_sched_get_priority_max_args *uap;
1182 {
1183 	struct sched_get_priority_max_args bsd;
1184 
1185 #ifdef DEBUG
1186 	if (ldebug(sched_get_priority_max))
1187 		printf(ARGS(sched_get_priority_max, "%d"), uap->policy);
1188 #endif
1189 
1190 	switch (uap->policy) {
1191 	case LINUX_SCHED_OTHER:
1192 		bsd.policy = SCHED_OTHER;
1193 		break;
1194 	case LINUX_SCHED_FIFO:
1195 		bsd.policy = SCHED_FIFO;
1196 		break;
1197 	case LINUX_SCHED_RR:
1198 		bsd.policy = SCHED_RR;
1199 		break;
1200 	default:
1201 		return EINVAL;
1202 	}
1203 	return sched_get_priority_max(p, &bsd);
1204 }
1205 
1206 int
1207 linux_sched_get_priority_min(p, uap)
1208 	struct proc *p;
1209 	struct linux_sched_get_priority_min_args *uap;
1210 {
1211 	struct sched_get_priority_min_args bsd;
1212 
1213 #ifdef DEBUG
1214 	if (ldebug(sched_get_priority_min))
1215 		printf(ARGS(sched_get_priority_min, "%d"), uap->policy);
1216 #endif
1217 
1218 	switch (uap->policy) {
1219 	case LINUX_SCHED_OTHER:
1220 		bsd.policy = SCHED_OTHER;
1221 		break;
1222 	case LINUX_SCHED_FIFO:
1223 		bsd.policy = SCHED_FIFO;
1224 		break;
1225 	case LINUX_SCHED_RR:
1226 		bsd.policy = SCHED_RR;
1227 		break;
1228 	default:
1229 		return EINVAL;
1230 	}
1231 	return sched_get_priority_min(p, &bsd);
1232 }
1233 
1234 #define REBOOT_CAD_ON	0x89abcdef
1235 #define REBOOT_CAD_OFF	0
1236 #define REBOOT_HALT	0xcdef0123
1237 
1238 int
1239 linux_reboot(struct proc *p, struct linux_reboot_args *args)
1240 {
1241 	struct reboot_args bsd_args;
1242 
1243 #ifdef DEBUG
1244 	if (ldebug(reboot))
1245 		printf(ARGS(reboot, "0x%x"), args->opt);
1246 #endif
1247 	if (args->opt == REBOOT_CAD_ON || args->opt == REBOOT_CAD_OFF)
1248 		return (0);
1249 	bsd_args.opt = args->opt == REBOOT_HALT ? RB_HALT : 0;
1250 	return (reboot(p, &bsd_args));
1251 }
1252