xref: /freebsd/sys/compat/linux/linux_misc.c (revision 5c1296168babf97c51ff030872cddb7f9857474f)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/reboot.h>
44 #include <sys/resourcevar.h>
45 #include <sys/stat.h>
46 #include <sys/sysctl.h>
47 #include <sys/unistd.h>
48 #include <sys/vnode.h>
49 #include <sys/wait.h>
50 #include <sys/time.h>
51 #include <sys/signalvar.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_extern.h>
58 
59 #include <machine/frame.h>
60 #include <machine/limits.h>
61 #include <machine/psl.h>
62 #include <machine/sysarch.h>
63 #ifdef __i386__
64 #include <machine/segments.h>
65 #endif
66 
67 #include <posix4/sched.h>
68 
69 #include <machine/../linux/linux.h>
70 #include <machine/../linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_util.h>
73 
74 #ifdef __alpha__
75 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
76 #else
77 #define BSD_TO_LINUX_SIGNAL(sig)	\
78 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
79 #endif
80 
81 struct linux_rlimit {
82 	unsigned long rlim_cur;
83 	unsigned long rlim_max;
84 };
85 
86 #ifndef __alpha__
87 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
88 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
89   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
90   RLIMIT_MEMLOCK, -1
91 };
92 #endif /*!__alpha__*/
93 
94 #ifndef __alpha__
95 int
96 linux_alarm(struct proc *p, struct linux_alarm_args *args)
97 {
98     struct itimerval it, old_it;
99     struct timeval tv;
100     int s;
101 
102 #ifdef DEBUG
103 	if (ldebug(alarm))
104 		printf(ARGS(alarm, "%u"), args->secs);
105 #endif
106     if (args->secs > 100000000)
107 	return EINVAL;
108     it.it_value.tv_sec = (long)args->secs;
109     it.it_value.tv_usec = 0;
110     it.it_interval.tv_sec = 0;
111     it.it_interval.tv_usec = 0;
112     s = splsoftclock();
113     old_it = p->p_realtimer;
114     getmicrouptime(&tv);
115     if (timevalisset(&old_it.it_value))
116 	callout_stop(&p->p_itcallout);
117     if (it.it_value.tv_sec != 0) {
118 	callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p);
119 	timevaladd(&it.it_value, &tv);
120     }
121     p->p_realtimer = it;
122     splx(s);
123     if (timevalcmp(&old_it.it_value, &tv, >)) {
124 	timevalsub(&old_it.it_value, &tv);
125 	if (old_it.it_value.tv_usec != 0)
126 	    old_it.it_value.tv_sec++;
127 	p->p_retval[0] = old_it.it_value.tv_sec;
128     }
129     return 0;
130 }
131 #endif /*!__alpha__*/
132 
133 int
134 linux_brk(struct proc *p, struct linux_brk_args *args)
135 {
136 #if 0
137     struct vmspace *vm = p->p_vmspace;
138     vm_offset_t new, old;
139     int error;
140 
141     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
142 	return EINVAL;
143     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
144 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
145 	return ENOMEM;
146 
147     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
148     new = round_page((vm_offset_t)args->dsend);
149     p->p_retval[0] = old;
150     if ((new-old) > 0) {
151 	if (swap_pager_full)
152 	    return ENOMEM;
153 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
154 			VM_PROT_ALL, VM_PROT_ALL, 0);
155 	if (error)
156 	    return error;
157 	vm->vm_dsize += btoc((new-old));
158 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
159     }
160     return 0;
161 #else
162     struct vmspace *vm = p->p_vmspace;
163     vm_offset_t new, old;
164     struct obreak_args /* {
165 	char * nsize;
166     } */ tmp;
167 
168 #ifdef DEBUG
169 	if (ldebug(brk))
170 		printf(ARGS(brk, "%p"), (void *)args->dsend);
171 #endif
172     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
173     new = (vm_offset_t)args->dsend;
174     tmp.nsize = (char *) new;
175     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
176 	p->p_retval[0] = (long)new;
177     else
178 	p->p_retval[0] = (long)old;
179 
180     return 0;
181 #endif
182 }
183 
184 int
185 linux_uselib(struct proc *p, struct linux_uselib_args *args)
186 {
187     struct nameidata ni;
188     struct vnode *vp;
189     struct exec *a_out;
190     struct vattr attr;
191     vm_offset_t vmaddr;
192     unsigned long file_offset;
193     vm_offset_t buffer;
194     unsigned long bss_size;
195     int error;
196     caddr_t sg;
197     int locked;
198 
199     sg = stackgap_init();
200     CHECKALTEXIST(p, &sg, args->library);
201 
202 #ifdef DEBUG
203 	if (ldebug(uselib))
204 		printf(ARGS(uselib, "%s"), args->library);
205 #endif
206 
207     a_out = NULL;
208     locked = 0;
209     vp = NULL;
210 
211     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
212     error = namei(&ni);
213     if (error)
214 	goto cleanup;
215 
216     vp = ni.ni_vp;
217     /*
218      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
219      * without returning a vnode.
220      */
221     if (vp == NULL) {
222 	error = ENOEXEC;	/* ?? */
223 	goto cleanup;
224     }
225     NDFREE(&ni, NDF_ONLY_PNBUF);
226 
227     /*
228      * From here on down, we have a locked vnode that must be unlocked.
229      */
230     locked++;
231 
232     /*
233      * Writable?
234      */
235     if (vp->v_writecount) {
236 	error = ETXTBSY;
237 	goto cleanup;
238     }
239 
240     /*
241      * Executable?
242      */
243     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
244     if (error)
245 	goto cleanup;
246 
247     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
248 	((attr.va_mode & 0111) == 0) ||
249 	(attr.va_type != VREG)) {
250 	    error = ENOEXEC;
251 	    goto cleanup;
252     }
253 
254     /*
255      * Sensible size?
256      */
257     if (attr.va_size == 0) {
258 	error = ENOEXEC;
259 	goto cleanup;
260     }
261 
262     /*
263      * Can we access it?
264      */
265     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
266     if (error)
267 	goto cleanup;
268 
269     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
270     if (error)
271 	goto cleanup;
272 
273     /*
274      * Lock no longer needed
275      */
276     VOP_UNLOCK(vp, 0, p);
277     locked = 0;
278 
279     /*
280      * Pull in executable header into kernel_map
281      */
282     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
283 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
284     if (error)
285 	goto cleanup;
286 
287     /*
288      * Is it a Linux binary ?
289      */
290     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
291 	error = ENOEXEC;
292 	goto cleanup;
293     }
294 
295     /* While we are here, we should REALLY do some more checks */
296 
297     /*
298      * Set file/virtual offset based on a.out variant.
299      */
300     switch ((int)(a_out->a_magic & 0xffff)) {
301     case 0413:	/* ZMAGIC */
302 	file_offset = 1024;
303 	break;
304     case 0314:	/* QMAGIC */
305 	file_offset = 0;
306 	break;
307     default:
308 	error = ENOEXEC;
309 	goto cleanup;
310     }
311 
312     bss_size = round_page(a_out->a_bss);
313 
314     /*
315      * Check various fields in header for validity/bounds.
316      */
317     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
318 	error = ENOEXEC;
319 	goto cleanup;
320     }
321 
322     /* text + data can't exceed file size */
323     if (a_out->a_data + a_out->a_text > attr.va_size) {
324 	error = EFAULT;
325 	goto cleanup;
326     }
327 
328     /* To protect p->p_rlimit in the if condition. */
329     mtx_assert(&Giant, MA_OWNED);
330 
331     /*
332      * text/data/bss must not exceed limits
333      * XXX: this is not complete. it should check current usage PLUS
334      * the resources needed by this library.
335      */
336     if (a_out->a_text > MAXTSIZ ||
337 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
338 	error = ENOMEM;
339 	goto cleanup;
340     }
341 
342     /*
343      * prevent more writers
344      */
345     vp->v_flag |= VTEXT;
346 
347     /*
348      * Check if file_offset page aligned,.
349      * Currently we cannot handle misalinged file offsets,
350      * and so we read in the entire image (what a waste).
351      */
352     if (file_offset & PAGE_MASK) {
353 #ifdef DEBUG
354 printf("uselib: Non page aligned binary %lu\n", file_offset);
355 #endif
356 	/*
357 	 * Map text+data read/write/execute
358 	 */
359 
360 	/* a_entry is the load address and is page aligned */
361 	vmaddr = trunc_page(a_out->a_entry);
362 
363 	/* get anon user mapping, read+write+execute */
364 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
365 		    	    a_out->a_text + a_out->a_data, FALSE,
366 			    VM_PROT_ALL, VM_PROT_ALL, 0);
367 	if (error)
368 	    goto cleanup;
369 
370 	/* map file into kernel_map */
371 	error = vm_mmap(kernel_map, &buffer,
372 			round_page(a_out->a_text + a_out->a_data + file_offset),
373 		   	VM_PROT_READ, VM_PROT_READ, 0,
374 			(caddr_t)vp, trunc_page(file_offset));
375 	if (error)
376 	    goto cleanup;
377 
378 	/* copy from kernel VM space to user space */
379 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
380 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
381 
382 	/* release temporary kernel space */
383 	vm_map_remove(kernel_map, buffer,
384 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
385 
386 	if (error)
387 	    goto cleanup;
388     }
389     else {
390 #ifdef DEBUG
391 printf("uselib: Page aligned binary %lu\n", file_offset);
392 #endif
393 	/*
394 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
395 	 * to skip the executable header
396 	 */
397 	vmaddr = trunc_page(a_out->a_entry);
398 
399 	/*
400 	 * Map it all into the process's space as a single copy-on-write
401 	 * "data" segment.
402 	 */
403 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
404 		   	a_out->a_text + a_out->a_data,
405 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
406 			(caddr_t)vp, file_offset);
407 	if (error)
408 	    goto cleanup;
409     }
410 #ifdef DEBUG
411 printf("mem=%08lx = %08lx %08lx\n", vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
412 #endif
413     if (bss_size != 0) {
414         /*
415 	 * Calculate BSS start address
416 	 */
417 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
418 
419 	/*
420 	 * allocate some 'anon' space
421 	 */
422 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
423 			    bss_size, FALSE,
424 			    VM_PROT_ALL, VM_PROT_ALL, 0);
425 	if (error)
426 	    goto cleanup;
427     }
428 
429 cleanup:
430     /*
431      * Unlock vnode if needed
432      */
433     if (locked)
434 	VOP_UNLOCK(vp, 0, p);
435 
436     /*
437      * Release the kernel mapping.
438      */
439     if (a_out)
440 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
441 
442     return error;
443 }
444 
445 int
446 linux_newselect(struct proc *p, struct linux_newselect_args *args)
447 {
448     struct select_args bsa;
449     struct timeval tv0, tv1, utv, *tvp;
450     caddr_t sg;
451     int error;
452 
453 #ifdef DEBUG
454 	if (ldebug(newselect))
455 		printf(ARGS(newselect, "%d, %p, %p, %p, %p"),
456 		    args->nfds, (void *)args->readfds,
457 		    (void *)args->writefds, (void *)args->exceptfds,
458 		    (void *)args->timeout);
459 #endif
460     error = 0;
461     bsa.nd = args->nfds;
462     bsa.in = args->readfds;
463     bsa.ou = args->writefds;
464     bsa.ex = args->exceptfds;
465     bsa.tv = args->timeout;
466 
467     /*
468      * Store current time for computation of the amount of
469      * time left.
470      */
471     if (args->timeout) {
472 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
473 	    goto select_out;
474 #ifdef DEBUG
475 	if (ldebug(newselect))
476 		printf(LMSG("incoming timeout (%ld/%ld)"),
477 		    utv.tv_sec, utv.tv_usec);
478 #endif
479 	if (itimerfix(&utv)) {
480 	    /*
481 	     * The timeval was invalid.  Convert it to something
482 	     * valid that will act as it does under Linux.
483 	     */
484 	    sg = stackgap_init();
485 	    tvp = stackgap_alloc(&sg, sizeof(utv));
486 	    utv.tv_sec += utv.tv_usec / 1000000;
487 	    utv.tv_usec %= 1000000;
488 	    if (utv.tv_usec < 0) {
489 		utv.tv_sec -= 1;
490 		utv.tv_usec += 1000000;
491 	    }
492 	    if (utv.tv_sec < 0)
493 		timevalclear(&utv);
494 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
495 		goto select_out;
496 	    bsa.tv = tvp;
497 	}
498 	microtime(&tv0);
499     }
500 
501     error = select(p, &bsa);
502 #ifdef DEBUG
503 	if (ldebug(newselect))
504 		printf(LMSG("real select returns %d"), error);
505 #endif
506 
507     if (error) {
508 	/*
509 	 * See fs/select.c in the Linux kernel.  Without this,
510 	 * Maelstrom doesn't work.
511 	 */
512 	if (error == ERESTART)
513 	    error = EINTR;
514 	goto select_out;
515     }
516 
517     if (args->timeout) {
518 	if (p->p_retval[0]) {
519 	    /*
520 	     * Compute how much time was left of the timeout,
521 	     * by subtracting the current time and the time
522 	     * before we started the call, and subtracting
523 	     * that result from the user-supplied value.
524 	     */
525 	    microtime(&tv1);
526 	    timevalsub(&tv1, &tv0);
527 	    timevalsub(&utv, &tv1);
528 	    if (utv.tv_sec < 0)
529 		timevalclear(&utv);
530 	} else
531 	    timevalclear(&utv);
532 #ifdef DEBUG
533 	if (ldebug(newselect))
534 		printf(LMSG("outgoing timeout (%ld/%ld)"),
535 		    utv.tv_sec, utv.tv_usec);
536 #endif
537 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
538 	    goto select_out;
539     }
540 
541 select_out:
542 #ifdef DEBUG
543 	if (ldebug(newselect))
544 		printf(LMSG("newselect_out -> %d"), error);
545 #endif
546     return error;
547 }
548 
549 int
550 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
551 {
552     struct proc *curp;
553 
554 #ifdef DEBUG
555 	if (ldebug(getpgid))
556 		printf(ARGS(getpgid, "%d"), args->pid);
557 #endif
558     if (args->pid != p->p_pid) {
559 	if (!(curp = pfind(args->pid)))
560 	    return ESRCH;
561     }
562     else
563 	curp = p;
564     p->p_retval[0] = curp->p_pgid;
565     return 0;
566 }
567 
568 int
569 linux_mremap(struct proc *p, struct linux_mremap_args *args)
570 {
571 	struct munmap_args /* {
572 		void *addr;
573 		size_t len;
574 	} */ bsd_args;
575 	int error = 0;
576 
577 #ifdef DEBUG
578 	if (ldebug(mremap))
579 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
580 		    (void *)args->addr,
581 		    (unsigned long)args->old_len,
582 		    (unsigned long)args->new_len,
583 		    (unsigned long)args->flags);
584 #endif
585 	args->new_len = round_page(args->new_len);
586 	args->old_len = round_page(args->old_len);
587 
588 	if (args->new_len > args->old_len) {
589 		p->p_retval[0] = 0;
590 		return ENOMEM;
591 	}
592 
593 	if (args->new_len < args->old_len) {
594 		bsd_args.addr = args->addr + args->new_len;
595 		bsd_args.len = args->old_len - args->new_len;
596 		error = munmap(p, &bsd_args);
597 	}
598 
599 	p->p_retval[0] = error ? 0 : (u_long)args->addr;
600 	return error;
601 }
602 
603 int
604 linux_msync(struct proc *p, struct linux_msync_args *args)
605 {
606 	struct msync_args bsd_args;
607 
608 	bsd_args.addr = args->addr;
609 	bsd_args.len = args->len;
610 	bsd_args.flags = 0;	/* XXX ignore */
611 
612 	return msync(p, &bsd_args);
613 }
614 
615 #ifndef __alpha__
616 int
617 linux_time(struct proc *p, struct linux_time_args *args)
618 {
619     struct timeval tv;
620     linux_time_t tm;
621     int error;
622 
623 #ifdef DEBUG
624 	if (ldebug(time))
625 		printf(ARGS(time, "*"));
626 #endif
627     microtime(&tv);
628     tm = tv.tv_sec;
629     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
630 	return error;
631     p->p_retval[0] = tm;
632     return 0;
633 }
634 #endif	/*!__alpha__*/
635 
636 struct linux_times_argv {
637     long    tms_utime;
638     long    tms_stime;
639     long    tms_cutime;
640     long    tms_cstime;
641 };
642 
643 #define CLK_TCK 100	/* Linux uses 100 */
644 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
645 
646 int
647 linux_times(struct proc *p, struct linux_times_args *args)
648 {
649     struct timeval tv;
650     struct linux_times_argv tms;
651     struct rusage ru;
652     int error;
653 
654 #ifdef DEBUG
655 	if (ldebug(times))
656 		printf(ARGS(times, "*"));
657 #endif
658     mtx_lock_spin(&sched_lock);
659     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
660     mtx_unlock_spin(&sched_lock);
661 
662     tms.tms_utime = CONVTCK(ru.ru_utime);
663     tms.tms_stime = CONVTCK(ru.ru_stime);
664 
665     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
666     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
667 
668     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
669 	    	    sizeof(struct linux_times_argv))))
670 	return error;
671 
672     microuptime(&tv);
673     p->p_retval[0] = (int)CONVTCK(tv);
674     return 0;
675 }
676 
677 int
678 linux_newuname(struct proc *p, struct linux_newuname_args *args)
679 {
680 	struct linux_new_utsname utsname;
681 	char *osrelease, *osname;
682 
683 #ifdef DEBUG
684 	if (ldebug(newuname))
685 		printf(ARGS(newuname, "*"));
686 #endif
687 
688 	osname = linux_get_osname(p);
689 	osrelease = linux_get_osrelease(p);
690 
691 	bzero(&utsname, sizeof(struct linux_new_utsname));
692 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
693 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
694 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
695 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
696 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
697 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
698 
699 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
700 			sizeof(struct linux_new_utsname)));
701 }
702 
703 struct linux_utimbuf {
704 	linux_time_t l_actime;
705 	linux_time_t l_modtime;
706 };
707 
708 int
709 linux_utime(struct proc *p, struct linux_utime_args *args)
710 {
711     struct utimes_args /* {
712 	char	*path;
713 	struct	timeval *tptr;
714     } */ bsdutimes;
715     struct timeval tv[2], *tvp;
716     struct linux_utimbuf lut;
717     int error;
718     caddr_t sg;
719 
720     sg = stackgap_init();
721     CHECKALTEXIST(p, &sg, args->fname);
722 
723 #ifdef DEBUG
724 	if (ldebug(utime))
725 		printf(ARGS(utime, "%s, *"), args->fname);
726 #endif
727     if (args->times) {
728 	if ((error = copyin(args->times, &lut, sizeof lut)))
729 	    return error;
730 	tv[0].tv_sec = lut.l_actime;
731 	tv[0].tv_usec = 0;
732 	tv[1].tv_sec = lut.l_modtime;
733 	tv[1].tv_usec = 0;
734 	/* so that utimes can copyin */
735 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
736 	if (tvp == NULL)
737 		return (ENAMETOOLONG);
738 	if ((error = copyout(tv, tvp, sizeof(tv))))
739 	    return error;
740 	bsdutimes.tptr = tvp;
741     } else
742 	bsdutimes.tptr = NULL;
743 
744     bsdutimes.path = args->fname;
745     return utimes(p, &bsdutimes);
746 }
747 
748 #define __WCLONE 0x80000000
749 
750 #ifndef __alpha__
751 int
752 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
753 {
754     struct wait_args /* {
755 	int pid;
756 	int *status;
757 	int options;
758 	struct	rusage *rusage;
759     } */ tmp;
760     int error, tmpstat;
761 
762 #ifdef DEBUG
763 	if (ldebug(waitpid))
764 		printf(ARGS(waitpid, "%d, %p, %d"),
765 		    args->pid, (void *)args->status, args->options);
766 #endif
767     tmp.pid = args->pid;
768     tmp.status = args->status;
769     tmp.options = (args->options & (WNOHANG | WUNTRACED));
770     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
771     if (args->options & __WCLONE)
772 	tmp.options |= WLINUXCLONE;
773     tmp.rusage = NULL;
774 
775     if ((error = wait4(p, &tmp)) != 0)
776 	return error;
777 
778     if (args->status) {
779 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
780 	    return error;
781 	tmpstat &= 0xffff;
782 	if (WIFSIGNALED(tmpstat))
783 	    tmpstat = (tmpstat & 0xffffff80) |
784 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
785 	else if (WIFSTOPPED(tmpstat))
786 	    tmpstat = (tmpstat & 0xffff00ff) |
787 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
788 	return copyout(&tmpstat, args->status, sizeof(int));
789     } else
790 	return 0;
791 }
792 #endif	/*!__alpha__*/
793 
794 int
795 linux_wait4(struct proc *p, struct linux_wait4_args *args)
796 {
797     struct wait_args /* {
798 	int pid;
799 	int *status;
800 	int options;
801 	struct	rusage *rusage;
802     } */ tmp;
803     int error, tmpstat;
804 
805 #ifdef DEBUG
806 	if (ldebug(wait4))
807 		printf(ARGS(wait4, "%d, %p, %d, %p"),
808 		    args->pid, (void *)args->status, args->options,
809 		    (void *)args->rusage);
810 #endif
811     tmp.pid = args->pid;
812     tmp.status = args->status;
813     tmp.options = (args->options & (WNOHANG | WUNTRACED));
814     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
815     if (args->options & __WCLONE)
816 	tmp.options |= WLINUXCLONE;
817     tmp.rusage = args->rusage;
818 
819     if ((error = wait4(p, &tmp)) != 0)
820 	return error;
821 
822     SIGDELSET(p->p_siglist, SIGCHLD);
823 
824     if (args->status) {
825 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
826 	    return error;
827 	tmpstat &= 0xffff;
828 	if (WIFSIGNALED(tmpstat))
829 	    tmpstat = (tmpstat & 0xffffff80) |
830 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
831 	else if (WIFSTOPPED(tmpstat))
832 	    tmpstat = (tmpstat & 0xffff00ff) |
833 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
834 	return copyout(&tmpstat, args->status, sizeof(int));
835     } else
836 	return 0;
837 }
838 
839 int
840 linux_mknod(struct proc *p, struct linux_mknod_args *args)
841 {
842 	caddr_t sg;
843 	struct mknod_args bsd_mknod;
844 	struct mkfifo_args bsd_mkfifo;
845 
846 	sg = stackgap_init();
847 
848 	CHECKALTCREAT(p, &sg, args->path);
849 
850 #ifdef DEBUG
851 	if (ldebug(mknod))
852 		printf(ARGS(mknod, "%s, %d, %d"),
853 		    args->path, args->mode, args->dev);
854 #endif
855 
856 	if (args->mode & S_IFIFO) {
857 		bsd_mkfifo.path = args->path;
858 		bsd_mkfifo.mode = args->mode;
859 		return mkfifo(p, &bsd_mkfifo);
860 	} else {
861 		bsd_mknod.path = args->path;
862 		bsd_mknod.mode = args->mode;
863 		bsd_mknod.dev = args->dev;
864 		return mknod(p, &bsd_mknod);
865 	}
866 }
867 
868 /*
869  * UGH! This is just about the dumbest idea I've ever heard!!
870  */
871 int
872 linux_personality(struct proc *p, struct linux_personality_args *args)
873 {
874 #ifdef DEBUG
875 	if (ldebug(personality))
876 		printf(ARGS(personality, "%d"), args->per);
877 #endif
878 #ifndef __alpha__
879 	if (args->per != 0)
880 		return EINVAL;
881 #endif
882 
883 	/* Yes Jim, it's still a Linux... */
884 	p->p_retval[0] = 0;
885 	return 0;
886 }
887 
888 /*
889  * Wrappers for get/setitimer for debugging..
890  */
891 int
892 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
893 {
894 	struct setitimer_args bsa;
895 	struct itimerval foo;
896 	int error;
897 
898 #ifdef DEBUG
899 	if (ldebug(setitimer))
900 		printf(ARGS(setitimer, "%p, %p"),
901 		    (void *)args->itv, (void *)args->oitv);
902 #endif
903 	bsa.which = args->which;
904 	bsa.itv = args->itv;
905 	bsa.oitv = args->oitv;
906 	if (args->itv) {
907 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
908 			sizeof(foo))))
909 		return error;
910 #ifdef DEBUG
911 	    if (ldebug(setitimer)) {
912 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
913 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
914 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
915 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
916 	    }
917 #endif
918 	}
919 	return setitimer(p, &bsa);
920 }
921 
922 int
923 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
924 {
925 	struct getitimer_args bsa;
926 #ifdef DEBUG
927 	if (ldebug(getitimer))
928 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
929 #endif
930 	bsa.which = args->which;
931 	bsa.itv = args->itv;
932 	return getitimer(p, &bsa);
933 }
934 
935 #ifndef __alpha__
936 int
937 linux_nice(struct proc *p, struct linux_nice_args *args)
938 {
939 	struct setpriority_args	bsd_args;
940 
941 	bsd_args.which = PRIO_PROCESS;
942 	bsd_args.who = 0;	/* current process */
943 	bsd_args.prio = args->inc;
944 	return setpriority(p, &bsd_args);
945 }
946 #endif	/*!__alpha__*/
947 
948 int
949 linux_setgroups(p, uap)
950 	struct proc *p;
951 	struct linux_setgroups_args *uap;
952 {
953 	struct pcred *pc;
954 	linux_gid_t linux_gidset[NGROUPS];
955 	gid_t *bsd_gidset;
956 	int ngrp, error;
957 
958 	pc = p->p_cred;
959 	ngrp = uap->gidsetsize;
960 
961 	/*
962 	 * cr_groups[0] holds egid. Setting the whole set from
963 	 * the supplied set will cause egid to be changed too.
964 	 * Keep cr_groups[0] unchanged to prevent that.
965 	 */
966 
967 	if ((error = suser(p)) != 0)
968 		return (error);
969 
970 	if (ngrp >= NGROUPS)
971 		return (EINVAL);
972 
973 	pc->pc_ucred = crcopy(pc->pc_ucred);
974 	if (ngrp > 0) {
975 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
976 			       ngrp * sizeof(linux_gid_t));
977 		if (error)
978 			return (error);
979 
980 		pc->pc_ucred->cr_ngroups = ngrp + 1;
981 
982 		bsd_gidset = pc->pc_ucred->cr_groups;
983 		ngrp--;
984 		while (ngrp >= 0) {
985 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
986 			ngrp--;
987 		}
988 	}
989 	else
990 		pc->pc_ucred->cr_ngroups = 1;
991 
992 	setsugid(p);
993 	return (0);
994 }
995 
996 int
997 linux_getgroups(p, uap)
998 	struct proc *p;
999 	struct linux_getgroups_args *uap;
1000 {
1001 	struct pcred *pc;
1002 	linux_gid_t linux_gidset[NGROUPS];
1003 	gid_t *bsd_gidset;
1004 	int bsd_gidsetsz, ngrp, error;
1005 
1006 	pc = p->p_cred;
1007 	bsd_gidset = pc->pc_ucred->cr_groups;
1008 	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1009 
1010 	/*
1011 	 * cr_groups[0] holds egid. Returning the whole set
1012 	 * here will cause a duplicate. Exclude cr_groups[0]
1013 	 * to prevent that.
1014 	 */
1015 
1016 	if ((ngrp = uap->gidsetsize) == 0) {
1017 		p->p_retval[0] = bsd_gidsetsz;
1018 		return (0);
1019 	}
1020 
1021 	if (ngrp < bsd_gidsetsz)
1022 		return (EINVAL);
1023 
1024 	ngrp = 0;
1025 	while (ngrp < bsd_gidsetsz) {
1026 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1027 		ngrp++;
1028 	}
1029 
1030 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1031 	    ngrp * sizeof(linux_gid_t))))
1032 		return (error);
1033 
1034 	p->p_retval[0] = ngrp;
1035 	return (0);
1036 }
1037 
1038 #ifndef __alpha__
1039 int
1040 linux_setrlimit(p, uap)
1041 	struct proc *p;
1042 	struct linux_setrlimit_args *uap;
1043 {
1044 	struct __setrlimit_args bsd;
1045 	struct linux_rlimit rlim;
1046 	int error;
1047 	caddr_t sg = stackgap_init();
1048 
1049 #ifdef DEBUG
1050 	if (ldebug(setrlimit))
1051 		printf(ARGS(setrlimit, "%d, %p"),
1052 		    uap->resource, (void *)uap->rlim);
1053 #endif
1054 
1055 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1056 		return (EINVAL);
1057 
1058 	bsd.which = linux_to_bsd_resource[uap->resource];
1059 	if (bsd.which == -1)
1060 		return (EINVAL);
1061 
1062 	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1063 	if (error)
1064 		return (error);
1065 
1066 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1067 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1068 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1069 	return (setrlimit(p, &bsd));
1070 }
1071 
1072 int
1073 linux_getrlimit(p, uap)
1074 	struct proc *p;
1075 	struct linux_getrlimit_args *uap;
1076 {
1077 	struct __getrlimit_args bsd;
1078 	struct linux_rlimit rlim;
1079 	int error;
1080 	caddr_t sg = stackgap_init();
1081 
1082 #ifdef DEBUG
1083 	if (ldebug(getrlimit))
1084 		printf(ARGS(getrlimit, "%d, %p"),
1085 		    uap->resource, (void *)uap->rlim);
1086 #endif
1087 
1088 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1089 		return (EINVAL);
1090 
1091 	bsd.which = linux_to_bsd_resource[uap->resource];
1092 	if (bsd.which == -1)
1093 		return (EINVAL);
1094 
1095 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1096 	error = getrlimit(p, &bsd);
1097 	if (error)
1098 		return (error);
1099 
1100 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1101 	if (rlim.rlim_cur == ULONG_MAX)
1102 		rlim.rlim_cur = LONG_MAX;
1103 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1104 	if (rlim.rlim_max == ULONG_MAX)
1105 		rlim.rlim_max = LONG_MAX;
1106 	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1107 }
1108 #endif /*!__alpha__*/
1109 
1110 int
1111 linux_sched_setscheduler(p, uap)
1112 	struct proc *p;
1113 	struct linux_sched_setscheduler_args *uap;
1114 {
1115 	struct sched_setscheduler_args bsd;
1116 
1117 #ifdef DEBUG
1118 	if (ldebug(sched_setscheduler))
1119 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1120 		    uap->pid, uap->policy, (const void *)uap->param);
1121 #endif
1122 
1123 	switch (uap->policy) {
1124 	case LINUX_SCHED_OTHER:
1125 		bsd.policy = SCHED_OTHER;
1126 		break;
1127 	case LINUX_SCHED_FIFO:
1128 		bsd.policy = SCHED_FIFO;
1129 		break;
1130 	case LINUX_SCHED_RR:
1131 		bsd.policy = SCHED_RR;
1132 		break;
1133 	default:
1134 		return EINVAL;
1135 	}
1136 
1137 	bsd.pid = uap->pid;
1138 	bsd.param = uap->param;
1139 	return sched_setscheduler(p, &bsd);
1140 }
1141 
1142 int
1143 linux_sched_getscheduler(p, uap)
1144 	struct proc *p;
1145 	struct linux_sched_getscheduler_args *uap;
1146 {
1147 	struct sched_getscheduler_args bsd;
1148 	int error;
1149 
1150 #ifdef DEBUG
1151 	if (ldebug(sched_getscheduler))
1152 		printf(ARGS(sched_getscheduler, "%d"), uap->pid);
1153 #endif
1154 
1155 	bsd.pid = uap->pid;
1156 	error = sched_getscheduler(p, &bsd);
1157 
1158 	switch (p->p_retval[0]) {
1159 	case SCHED_OTHER:
1160 		p->p_retval[0] = LINUX_SCHED_OTHER;
1161 		break;
1162 	case SCHED_FIFO:
1163 		p->p_retval[0] = LINUX_SCHED_FIFO;
1164 		break;
1165 	case SCHED_RR:
1166 		p->p_retval[0] = LINUX_SCHED_RR;
1167 		break;
1168 	}
1169 
1170 	return error;
1171 }
1172 
1173 #define REBOOT_CAD_ON	0x89abcdef
1174 #define REBOOT_CAD_OFF	0
1175 #define REBOOT_HALT	0xcdef0123
1176 
1177 int
1178 linux_reboot(struct proc *p, struct linux_reboot_args *args)
1179 {
1180 	struct reboot_args bsd_args;
1181 
1182 #ifdef DEBUG
1183 	if (ldebug(reboot))
1184 		printf(ARGS(reboot, "0x%x"), args->opt);
1185 #endif
1186 	if (args->opt == REBOOT_CAD_ON || args->opt == REBOOT_CAD_OFF)
1187 		return (0);
1188 	bsd_args.opt = args->opt == REBOOT_HALT ? RB_HALT : 0;
1189 	return (reboot(p, &bsd_args));
1190 }
1191