xref: /freebsd/sys/compat/linux/linux_misc.c (revision 1b6c76a2fe091c74f08427e6c870851025a9cf67)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/reboot.h>
45 #include <sys/resourcevar.h>
46 #include <sys/signalvar.h>
47 #include <sys/stat.h>
48 #include <sys/sysctl.h>
49 #include <sys/sysproto.h>
50 #include <sys/time.h>
51 #include <sys/unistd.h>
52 #include <sys/vnode.h>
53 #include <sys/wait.h>
54 
55 #include <vm/vm.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_kern.h>
58 #include <vm/vm_map.h>
59 #include <vm/vm_extern.h>
60 
61 #include <machine/frame.h>
62 #include <machine/limits.h>
63 #include <machine/psl.h>
64 #include <machine/sysarch.h>
65 #ifdef __i386__
66 #include <machine/segments.h>
67 #endif
68 
69 #include <posix4/sched.h>
70 
71 #include <machine/../linux/linux.h>
72 #include <machine/../linux/linux_proto.h>
73 #include <compat/linux/linux_mib.h>
74 #include <compat/linux/linux_util.h>
75 
76 #ifdef __alpha__
77 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
78 #else
79 #define BSD_TO_LINUX_SIGNAL(sig)	\
80 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
81 #endif
82 
83 struct linux_rlimit {
84 	unsigned long rlim_cur;
85 	unsigned long rlim_max;
86 };
87 
88 #ifndef __alpha__
89 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
90 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
91   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
92   RLIMIT_MEMLOCK, -1
93 };
94 #endif /*!__alpha__*/
95 
96 #ifndef __alpha__
97 int
98 linux_alarm(struct proc *p, struct linux_alarm_args *args)
99 {
100     struct itimerval it, old_it;
101     struct timeval tv;
102     int s;
103 
104 #ifdef DEBUG
105 	if (ldebug(alarm))
106 		printf(ARGS(alarm, "%u"), args->secs);
107 #endif
108     if (args->secs > 100000000)
109 	return EINVAL;
110     it.it_value.tv_sec = (long)args->secs;
111     it.it_value.tv_usec = 0;
112     it.it_interval.tv_sec = 0;
113     it.it_interval.tv_usec = 0;
114     s = splsoftclock();
115     old_it = p->p_realtimer;
116     getmicrouptime(&tv);
117     if (timevalisset(&old_it.it_value))
118 	callout_stop(&p->p_itcallout);
119     if (it.it_value.tv_sec != 0) {
120 	callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p);
121 	timevaladd(&it.it_value, &tv);
122     }
123     p->p_realtimer = it;
124     splx(s);
125     if (timevalcmp(&old_it.it_value, &tv, >)) {
126 	timevalsub(&old_it.it_value, &tv);
127 	if (old_it.it_value.tv_usec != 0)
128 	    old_it.it_value.tv_sec++;
129 	p->p_retval[0] = old_it.it_value.tv_sec;
130     }
131     return 0;
132 }
133 #endif /*!__alpha__*/
134 
135 int
136 linux_brk(struct proc *p, struct linux_brk_args *args)
137 {
138 #if 0
139     struct vmspace *vm = p->p_vmspace;
140     vm_offset_t new, old;
141     int error;
142 
143     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
144 	return EINVAL;
145     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
146 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
147 	return ENOMEM;
148 
149     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
150     new = round_page((vm_offset_t)args->dsend);
151     p->p_retval[0] = old;
152     if ((new-old) > 0) {
153 	if (swap_pager_full)
154 	    return ENOMEM;
155 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
156 			VM_PROT_ALL, VM_PROT_ALL, 0);
157 	if (error)
158 	    return error;
159 	vm->vm_dsize += btoc((new-old));
160 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
161     }
162     return 0;
163 #else
164     struct vmspace *vm = p->p_vmspace;
165     vm_offset_t new, old;
166     struct obreak_args /* {
167 	char * nsize;
168     } */ tmp;
169 
170 #ifdef DEBUG
171 	if (ldebug(brk))
172 		printf(ARGS(brk, "%p"), (void *)args->dsend);
173 #endif
174     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
175     new = (vm_offset_t)args->dsend;
176     tmp.nsize = (char *) new;
177     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
178 	p->p_retval[0] = (long)new;
179     else
180 	p->p_retval[0] = (long)old;
181 
182     return 0;
183 #endif
184 }
185 
186 int
187 linux_uselib(struct proc *p, struct linux_uselib_args *args)
188 {
189     struct nameidata ni;
190     struct vnode *vp;
191     struct exec *a_out;
192     struct vattr attr;
193     vm_offset_t vmaddr;
194     unsigned long file_offset;
195     vm_offset_t buffer;
196     unsigned long bss_size;
197     int error;
198     caddr_t sg;
199     int locked;
200 
201     sg = stackgap_init();
202     CHECKALTEXIST(p, &sg, args->library);
203 
204 #ifdef DEBUG
205 	if (ldebug(uselib))
206 		printf(ARGS(uselib, "%s"), args->library);
207 #endif
208 
209     a_out = NULL;
210     locked = 0;
211     vp = NULL;
212 
213     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
214     error = namei(&ni);
215     if (error)
216 	goto cleanup;
217 
218     vp = ni.ni_vp;
219     /*
220      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
221      * without returning a vnode.
222      */
223     if (vp == NULL) {
224 	error = ENOEXEC;	/* ?? */
225 	goto cleanup;
226     }
227     NDFREE(&ni, NDF_ONLY_PNBUF);
228 
229     /*
230      * From here on down, we have a locked vnode that must be unlocked.
231      */
232     locked++;
233 
234     /*
235      * Writable?
236      */
237     if (vp->v_writecount) {
238 	error = ETXTBSY;
239 	goto cleanup;
240     }
241 
242     /*
243      * Executable?
244      */
245     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
246     if (error)
247 	goto cleanup;
248 
249     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
250 	((attr.va_mode & 0111) == 0) ||
251 	(attr.va_type != VREG)) {
252 	    error = ENOEXEC;
253 	    goto cleanup;
254     }
255 
256     /*
257      * Sensible size?
258      */
259     if (attr.va_size == 0) {
260 	error = ENOEXEC;
261 	goto cleanup;
262     }
263 
264     /*
265      * Can we access it?
266      */
267     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
268     if (error)
269 	goto cleanup;
270 
271     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
272     if (error)
273 	goto cleanup;
274 
275     /*
276      * Lock no longer needed
277      */
278     VOP_UNLOCK(vp, 0, p);
279     locked = 0;
280 
281     /*
282      * Pull in executable header into kernel_map
283      */
284     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
285 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
286     if (error)
287 	goto cleanup;
288 
289     /*
290      * Is it a Linux binary ?
291      */
292     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
293 	error = ENOEXEC;
294 	goto cleanup;
295     }
296 
297     /* While we are here, we should REALLY do some more checks */
298 
299     /*
300      * Set file/virtual offset based on a.out variant.
301      */
302     switch ((int)(a_out->a_magic & 0xffff)) {
303     case 0413:	/* ZMAGIC */
304 	file_offset = 1024;
305 	break;
306     case 0314:	/* QMAGIC */
307 	file_offset = 0;
308 	break;
309     default:
310 	error = ENOEXEC;
311 	goto cleanup;
312     }
313 
314     bss_size = round_page(a_out->a_bss);
315 
316     /*
317      * Check various fields in header for validity/bounds.
318      */
319     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
320 	error = ENOEXEC;
321 	goto cleanup;
322     }
323 
324     /* text + data can't exceed file size */
325     if (a_out->a_data + a_out->a_text > attr.va_size) {
326 	error = EFAULT;
327 	goto cleanup;
328     }
329 
330     /* To protect p->p_rlimit in the if condition. */
331     mtx_assert(&Giant, MA_OWNED);
332 
333     /*
334      * text/data/bss must not exceed limits
335      * XXX: this is not complete. it should check current usage PLUS
336      * the resources needed by this library.
337      */
338     if (a_out->a_text > MAXTSIZ ||
339 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
340 	error = ENOMEM;
341 	goto cleanup;
342     }
343 
344     /*
345      * prevent more writers
346      */
347     vp->v_flag |= VTEXT;
348 
349     /*
350      * Check if file_offset page aligned,.
351      * Currently we cannot handle misalinged file offsets,
352      * and so we read in the entire image (what a waste).
353      */
354     if (file_offset & PAGE_MASK) {
355 #ifdef DEBUG
356 printf("uselib: Non page aligned binary %lu\n", file_offset);
357 #endif
358 	/*
359 	 * Map text+data read/write/execute
360 	 */
361 
362 	/* a_entry is the load address and is page aligned */
363 	vmaddr = trunc_page(a_out->a_entry);
364 
365 	/* get anon user mapping, read+write+execute */
366 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
367 		    	    a_out->a_text + a_out->a_data, FALSE,
368 			    VM_PROT_ALL, VM_PROT_ALL, 0);
369 	if (error)
370 	    goto cleanup;
371 
372 	/* map file into kernel_map */
373 	error = vm_mmap(kernel_map, &buffer,
374 			round_page(a_out->a_text + a_out->a_data + file_offset),
375 		   	VM_PROT_READ, VM_PROT_READ, 0,
376 			(caddr_t)vp, trunc_page(file_offset));
377 	if (error)
378 	    goto cleanup;
379 
380 	/* copy from kernel VM space to user space */
381 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
382 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
383 
384 	/* release temporary kernel space */
385 	vm_map_remove(kernel_map, buffer,
386 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
387 
388 	if (error)
389 	    goto cleanup;
390     }
391     else {
392 #ifdef DEBUG
393 printf("uselib: Page aligned binary %lu\n", file_offset);
394 #endif
395 	/*
396 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
397 	 * to skip the executable header
398 	 */
399 	vmaddr = trunc_page(a_out->a_entry);
400 
401 	/*
402 	 * Map it all into the process's space as a single copy-on-write
403 	 * "data" segment.
404 	 */
405 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
406 		   	a_out->a_text + a_out->a_data,
407 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
408 			(caddr_t)vp, file_offset);
409 	if (error)
410 	    goto cleanup;
411     }
412 #ifdef DEBUG
413 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
414 #endif
415     if (bss_size != 0) {
416         /*
417 	 * Calculate BSS start address
418 	 */
419 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
420 
421 	/*
422 	 * allocate some 'anon' space
423 	 */
424 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
425 			    bss_size, FALSE,
426 			    VM_PROT_ALL, VM_PROT_ALL, 0);
427 	if (error)
428 	    goto cleanup;
429     }
430 
431 cleanup:
432     /*
433      * Unlock vnode if needed
434      */
435     if (locked)
436 	VOP_UNLOCK(vp, 0, p);
437 
438     /*
439      * Release the kernel mapping.
440      */
441     if (a_out)
442 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
443 
444     return error;
445 }
446 
447 int
448 linux_newselect(struct proc *p, struct linux_newselect_args *args)
449 {
450     struct select_args bsa;
451     struct timeval tv0, tv1, utv, *tvp;
452     caddr_t sg;
453     int error;
454 
455 #ifdef DEBUG
456 	if (ldebug(newselect))
457 		printf(ARGS(newselect, "%d, %p, %p, %p, %p"),
458 		    args->nfds, (void *)args->readfds,
459 		    (void *)args->writefds, (void *)args->exceptfds,
460 		    (void *)args->timeout);
461 #endif
462     error = 0;
463     bsa.nd = args->nfds;
464     bsa.in = args->readfds;
465     bsa.ou = args->writefds;
466     bsa.ex = args->exceptfds;
467     bsa.tv = args->timeout;
468 
469     /*
470      * Store current time for computation of the amount of
471      * time left.
472      */
473     if (args->timeout) {
474 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
475 	    goto select_out;
476 #ifdef DEBUG
477 	if (ldebug(newselect))
478 		printf(LMSG("incoming timeout (%ld/%ld)"),
479 		    utv.tv_sec, utv.tv_usec);
480 #endif
481 	if (itimerfix(&utv)) {
482 	    /*
483 	     * The timeval was invalid.  Convert it to something
484 	     * valid that will act as it does under Linux.
485 	     */
486 	    sg = stackgap_init();
487 	    tvp = stackgap_alloc(&sg, sizeof(utv));
488 	    utv.tv_sec += utv.tv_usec / 1000000;
489 	    utv.tv_usec %= 1000000;
490 	    if (utv.tv_usec < 0) {
491 		utv.tv_sec -= 1;
492 		utv.tv_usec += 1000000;
493 	    }
494 	    if (utv.tv_sec < 0)
495 		timevalclear(&utv);
496 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
497 		goto select_out;
498 	    bsa.tv = tvp;
499 	}
500 	microtime(&tv0);
501     }
502 
503     error = select(p, &bsa);
504 #ifdef DEBUG
505 	if (ldebug(newselect))
506 		printf(LMSG("real select returns %d"), error);
507 #endif
508 
509     if (error) {
510 	/*
511 	 * See fs/select.c in the Linux kernel.  Without this,
512 	 * Maelstrom doesn't work.
513 	 */
514 	if (error == ERESTART)
515 	    error = EINTR;
516 	goto select_out;
517     }
518 
519     if (args->timeout) {
520 	if (p->p_retval[0]) {
521 	    /*
522 	     * Compute how much time was left of the timeout,
523 	     * by subtracting the current time and the time
524 	     * before we started the call, and subtracting
525 	     * that result from the user-supplied value.
526 	     */
527 	    microtime(&tv1);
528 	    timevalsub(&tv1, &tv0);
529 	    timevalsub(&utv, &tv1);
530 	    if (utv.tv_sec < 0)
531 		timevalclear(&utv);
532 	} else
533 	    timevalclear(&utv);
534 #ifdef DEBUG
535 	if (ldebug(newselect))
536 		printf(LMSG("outgoing timeout (%ld/%ld)"),
537 		    utv.tv_sec, utv.tv_usec);
538 #endif
539 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
540 	    goto select_out;
541     }
542 
543 select_out:
544 #ifdef DEBUG
545 	if (ldebug(newselect))
546 		printf(LMSG("newselect_out -> %d"), error);
547 #endif
548     return error;
549 }
550 
551 int
552 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
553 {
554     struct proc *curp;
555 
556 #ifdef DEBUG
557 	if (ldebug(getpgid))
558 		printf(ARGS(getpgid, "%d"), args->pid);
559 #endif
560     if (args->pid != p->p_pid) {
561 	if (!(curp = pfind(args->pid)))
562 	    return ESRCH;
563 	p->p_retval[0] = curp->p_pgid;
564 	PROC_UNLOCK(curp);
565     }
566     else
567 	p->p_retval[0] = p->p_pgid;
568     return 0;
569 }
570 
571 int
572 linux_mremap(struct proc *p, struct linux_mremap_args *args)
573 {
574 	struct munmap_args /* {
575 		void *addr;
576 		size_t len;
577 	} */ bsd_args;
578 	int error = 0;
579 
580 #ifdef DEBUG
581 	if (ldebug(mremap))
582 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
583 		    (void *)args->addr,
584 		    (unsigned long)args->old_len,
585 		    (unsigned long)args->new_len,
586 		    (unsigned long)args->flags);
587 #endif
588 	args->new_len = round_page(args->new_len);
589 	args->old_len = round_page(args->old_len);
590 
591 	if (args->new_len > args->old_len) {
592 		p->p_retval[0] = 0;
593 		return ENOMEM;
594 	}
595 
596 	if (args->new_len < args->old_len) {
597 		bsd_args.addr = args->addr + args->new_len;
598 		bsd_args.len = args->old_len - args->new_len;
599 		error = munmap(p, &bsd_args);
600 	}
601 
602 	p->p_retval[0] = error ? 0 : (u_long)args->addr;
603 	return error;
604 }
605 
606 int
607 linux_msync(struct proc *p, struct linux_msync_args *args)
608 {
609 	struct msync_args bsd_args;
610 
611 	bsd_args.addr = args->addr;
612 	bsd_args.len = args->len;
613 	bsd_args.flags = 0;	/* XXX ignore */
614 
615 	return msync(p, &bsd_args);
616 }
617 
618 #ifndef __alpha__
619 int
620 linux_time(struct proc *p, struct linux_time_args *args)
621 {
622     struct timeval tv;
623     linux_time_t tm;
624     int error;
625 
626 #ifdef DEBUG
627 	if (ldebug(time))
628 		printf(ARGS(time, "*"));
629 #endif
630     microtime(&tv);
631     tm = tv.tv_sec;
632     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
633 	return error;
634     p->p_retval[0] = tm;
635     return 0;
636 }
637 #endif	/*!__alpha__*/
638 
639 struct linux_times_argv {
640     long    tms_utime;
641     long    tms_stime;
642     long    tms_cutime;
643     long    tms_cstime;
644 };
645 
646 #ifdef __alpha__
647 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
648 #else
649 #define CLK_TCK 100	/* Linux uses 100 */
650 #endif
651 
652 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
653 
654 int
655 linux_times(struct proc *p, struct linux_times_args *args)
656 {
657     struct timeval tv;
658     struct linux_times_argv tms;
659     struct rusage ru;
660     int error;
661 
662 #ifdef DEBUG
663 	if (ldebug(times))
664 		printf(ARGS(times, "*"));
665 #endif
666     mtx_lock_spin(&sched_lock);
667     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
668     mtx_unlock_spin(&sched_lock);
669 
670     tms.tms_utime = CONVTCK(ru.ru_utime);
671     tms.tms_stime = CONVTCK(ru.ru_stime);
672 
673     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
674     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
675 
676     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
677 	    	    sizeof(struct linux_times_argv))))
678 	return error;
679 
680     microuptime(&tv);
681     p->p_retval[0] = (int)CONVTCK(tv);
682     return 0;
683 }
684 
685 int
686 linux_newuname(struct proc *p, struct linux_newuname_args *args)
687 {
688 	struct linux_new_utsname utsname;
689 	char *osrelease, *osname;
690 
691 #ifdef DEBUG
692 	if (ldebug(newuname))
693 		printf(ARGS(newuname, "*"));
694 #endif
695 
696 	osname = linux_get_osname(p);
697 	osrelease = linux_get_osrelease(p);
698 
699 	bzero(&utsname, sizeof(struct linux_new_utsname));
700 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
701 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
702 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
703 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
704 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
705 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
706 
707 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
708 			sizeof(struct linux_new_utsname)));
709 }
710 
711 struct linux_utimbuf {
712 	linux_time_t l_actime;
713 	linux_time_t l_modtime;
714 };
715 
716 int
717 linux_utime(struct proc *p, struct linux_utime_args *args)
718 {
719     struct utimes_args /* {
720 	char	*path;
721 	struct	timeval *tptr;
722     } */ bsdutimes;
723     struct timeval tv[2], *tvp;
724     struct linux_utimbuf lut;
725     int error;
726     caddr_t sg;
727 
728     sg = stackgap_init();
729     CHECKALTEXIST(p, &sg, args->fname);
730 
731 #ifdef DEBUG
732 	if (ldebug(utime))
733 		printf(ARGS(utime, "%s, *"), args->fname);
734 #endif
735     if (args->times) {
736 	if ((error = copyin(args->times, &lut, sizeof lut)))
737 	    return error;
738 	tv[0].tv_sec = lut.l_actime;
739 	tv[0].tv_usec = 0;
740 	tv[1].tv_sec = lut.l_modtime;
741 	tv[1].tv_usec = 0;
742 	/* so that utimes can copyin */
743 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
744 	if (tvp == NULL)
745 		return (ENAMETOOLONG);
746 	if ((error = copyout(tv, tvp, sizeof(tv))))
747 	    return error;
748 	bsdutimes.tptr = tvp;
749     } else
750 	bsdutimes.tptr = NULL;
751 
752     bsdutimes.path = args->fname;
753     return utimes(p, &bsdutimes);
754 }
755 
756 #define __WCLONE 0x80000000
757 
758 #ifndef __alpha__
759 int
760 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
761 {
762     struct wait_args /* {
763 	int pid;
764 	int *status;
765 	int options;
766 	struct	rusage *rusage;
767     } */ tmp;
768     int error, tmpstat;
769 
770 #ifdef DEBUG
771 	if (ldebug(waitpid))
772 		printf(ARGS(waitpid, "%d, %p, %d"),
773 		    args->pid, (void *)args->status, args->options);
774 #endif
775     tmp.pid = args->pid;
776     tmp.status = args->status;
777     tmp.options = (args->options & (WNOHANG | WUNTRACED));
778     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
779     if (args->options & __WCLONE)
780 	tmp.options |= WLINUXCLONE;
781     tmp.rusage = NULL;
782 
783     if ((error = wait4(p, &tmp)) != 0)
784 	return error;
785 
786     if (args->status) {
787 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
788 	    return error;
789 	tmpstat &= 0xffff;
790 	if (WIFSIGNALED(tmpstat))
791 	    tmpstat = (tmpstat & 0xffffff80) |
792 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
793 	else if (WIFSTOPPED(tmpstat))
794 	    tmpstat = (tmpstat & 0xffff00ff) |
795 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
796 	return copyout(&tmpstat, args->status, sizeof(int));
797     } else
798 	return 0;
799 }
800 #endif	/*!__alpha__*/
801 
802 int
803 linux_wait4(struct proc *p, struct linux_wait4_args *args)
804 {
805     struct wait_args /* {
806 	int pid;
807 	int *status;
808 	int options;
809 	struct	rusage *rusage;
810     } */ tmp;
811     int error, tmpstat;
812 
813 #ifdef DEBUG
814 	if (ldebug(wait4))
815 		printf(ARGS(wait4, "%d, %p, %d, %p"),
816 		    args->pid, (void *)args->status, args->options,
817 		    (void *)args->rusage);
818 #endif
819     tmp.pid = args->pid;
820     tmp.status = args->status;
821     tmp.options = (args->options & (WNOHANG | WUNTRACED));
822     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
823     if (args->options & __WCLONE)
824 	tmp.options |= WLINUXCLONE;
825     tmp.rusage = args->rusage;
826 
827     if ((error = wait4(p, &tmp)) != 0)
828 	return error;
829 
830     SIGDELSET(p->p_siglist, SIGCHLD);
831 
832     if (args->status) {
833 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
834 	    return error;
835 	tmpstat &= 0xffff;
836 	if (WIFSIGNALED(tmpstat))
837 	    tmpstat = (tmpstat & 0xffffff80) |
838 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
839 	else if (WIFSTOPPED(tmpstat))
840 	    tmpstat = (tmpstat & 0xffff00ff) |
841 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
842 	return copyout(&tmpstat, args->status, sizeof(int));
843     } else
844 	return 0;
845 }
846 
847 int
848 linux_mknod(struct proc *p, struct linux_mknod_args *args)
849 {
850 	caddr_t sg;
851 	struct mknod_args bsd_mknod;
852 	struct mkfifo_args bsd_mkfifo;
853 
854 	sg = stackgap_init();
855 
856 	CHECKALTCREAT(p, &sg, args->path);
857 
858 #ifdef DEBUG
859 	if (ldebug(mknod))
860 		printf(ARGS(mknod, "%s, %d, %d"),
861 		    args->path, args->mode, args->dev);
862 #endif
863 
864 	if (args->mode & S_IFIFO) {
865 		bsd_mkfifo.path = args->path;
866 		bsd_mkfifo.mode = args->mode;
867 		return mkfifo(p, &bsd_mkfifo);
868 	} else {
869 		bsd_mknod.path = args->path;
870 		bsd_mknod.mode = args->mode;
871 		bsd_mknod.dev = args->dev;
872 		return mknod(p, &bsd_mknod);
873 	}
874 }
875 
876 /*
877  * UGH! This is just about the dumbest idea I've ever heard!!
878  */
879 int
880 linux_personality(struct proc *p, struct linux_personality_args *args)
881 {
882 #ifdef DEBUG
883 	if (ldebug(personality))
884 		printf(ARGS(personality, "%d"), args->per);
885 #endif
886 #ifndef __alpha__
887 	if (args->per != 0)
888 		return EINVAL;
889 #endif
890 
891 	/* Yes Jim, it's still a Linux... */
892 	p->p_retval[0] = 0;
893 	return 0;
894 }
895 
896 /*
897  * Wrappers for get/setitimer for debugging..
898  */
899 int
900 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
901 {
902 	struct setitimer_args bsa;
903 	struct itimerval foo;
904 	int error;
905 
906 #ifdef DEBUG
907 	if (ldebug(setitimer))
908 		printf(ARGS(setitimer, "%p, %p"),
909 		    (void *)args->itv, (void *)args->oitv);
910 #endif
911 	bsa.which = args->which;
912 	bsa.itv = args->itv;
913 	bsa.oitv = args->oitv;
914 	if (args->itv) {
915 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
916 			sizeof(foo))))
917 		return error;
918 #ifdef DEBUG
919 	    if (ldebug(setitimer)) {
920 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
921 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
922 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
923 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
924 	    }
925 #endif
926 	}
927 	return setitimer(p, &bsa);
928 }
929 
930 int
931 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
932 {
933 	struct getitimer_args bsa;
934 #ifdef DEBUG
935 	if (ldebug(getitimer))
936 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
937 #endif
938 	bsa.which = args->which;
939 	bsa.itv = args->itv;
940 	return getitimer(p, &bsa);
941 }
942 
943 #ifndef __alpha__
944 int
945 linux_nice(struct proc *p, struct linux_nice_args *args)
946 {
947 	struct setpriority_args	bsd_args;
948 
949 	bsd_args.which = PRIO_PROCESS;
950 	bsd_args.who = 0;	/* current process */
951 	bsd_args.prio = args->inc;
952 	return setpriority(p, &bsd_args);
953 }
954 #endif	/*!__alpha__*/
955 
956 int
957 linux_setgroups(p, uap)
958 	struct proc *p;
959 	struct linux_setgroups_args *uap;
960 {
961 	struct ucred *newcred, *oldcred;
962 	linux_gid_t linux_gidset[NGROUPS];
963 	gid_t *bsd_gidset;
964 	int ngrp, error;
965 
966 	ngrp = uap->gidsetsize;
967 	oldcred = p->p_ucred;
968 
969 	/*
970 	 * cr_groups[0] holds egid. Setting the whole set from
971 	 * the supplied set will cause egid to be changed too.
972 	 * Keep cr_groups[0] unchanged to prevent that.
973 	 */
974 
975 	if ((error = suser_xxx(oldcred, NULL, PRISON_ROOT)) != 0)
976 		return (error);
977 
978 	if (ngrp >= NGROUPS)
979 		return (EINVAL);
980 
981 	newcred = crdup(oldcred);
982 	if (ngrp > 0) {
983 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
984 			       ngrp * sizeof(linux_gid_t));
985 		if (error)
986 			return (error);
987 
988 		newcred->cr_ngroups = ngrp + 1;
989 
990 		bsd_gidset = newcred->cr_groups;
991 		ngrp--;
992 		while (ngrp >= 0) {
993 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
994 			ngrp--;
995 		}
996 	}
997 	else
998 		newcred->cr_ngroups = 1;
999 
1000 	setsugid(p);
1001 	p->p_ucred = newcred;
1002 	crfree(oldcred);
1003 	return (0);
1004 }
1005 
1006 int
1007 linux_getgroups(p, uap)
1008 	struct proc *p;
1009 	struct linux_getgroups_args *uap;
1010 {
1011 	struct ucred *cred;
1012 	linux_gid_t linux_gidset[NGROUPS];
1013 	gid_t *bsd_gidset;
1014 	int bsd_gidsetsz, ngrp, error;
1015 
1016 	cred = p->p_ucred;
1017 	bsd_gidset = cred->cr_groups;
1018 	bsd_gidsetsz = cred->cr_ngroups - 1;
1019 
1020 	/*
1021 	 * cr_groups[0] holds egid. Returning the whole set
1022 	 * here will cause a duplicate. Exclude cr_groups[0]
1023 	 * to prevent that.
1024 	 */
1025 
1026 	if ((ngrp = uap->gidsetsize) == 0) {
1027 		p->p_retval[0] = bsd_gidsetsz;
1028 		return (0);
1029 	}
1030 
1031 	if (ngrp < bsd_gidsetsz)
1032 		return (EINVAL);
1033 
1034 	ngrp = 0;
1035 	while (ngrp < bsd_gidsetsz) {
1036 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1037 		ngrp++;
1038 	}
1039 
1040 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1041 	    ngrp * sizeof(linux_gid_t))))
1042 		return (error);
1043 
1044 	p->p_retval[0] = ngrp;
1045 	return (0);
1046 }
1047 
1048 #ifndef __alpha__
1049 int
1050 linux_setrlimit(p, uap)
1051 	struct proc *p;
1052 	struct linux_setrlimit_args *uap;
1053 {
1054 	struct __setrlimit_args bsd;
1055 	struct linux_rlimit rlim;
1056 	int error;
1057 	caddr_t sg = stackgap_init();
1058 
1059 #ifdef DEBUG
1060 	if (ldebug(setrlimit))
1061 		printf(ARGS(setrlimit, "%d, %p"),
1062 		    uap->resource, (void *)uap->rlim);
1063 #endif
1064 
1065 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1066 		return (EINVAL);
1067 
1068 	bsd.which = linux_to_bsd_resource[uap->resource];
1069 	if (bsd.which == -1)
1070 		return (EINVAL);
1071 
1072 	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1073 	if (error)
1074 		return (error);
1075 
1076 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1077 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1078 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1079 	return (setrlimit(p, &bsd));
1080 }
1081 
1082 int
1083 linux_getrlimit(p, uap)
1084 	struct proc *p;
1085 	struct linux_getrlimit_args *uap;
1086 {
1087 	struct __getrlimit_args bsd;
1088 	struct linux_rlimit rlim;
1089 	int error;
1090 	caddr_t sg = stackgap_init();
1091 
1092 #ifdef DEBUG
1093 	if (ldebug(getrlimit))
1094 		printf(ARGS(getrlimit, "%d, %p"),
1095 		    uap->resource, (void *)uap->rlim);
1096 #endif
1097 
1098 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1099 		return (EINVAL);
1100 
1101 	bsd.which = linux_to_bsd_resource[uap->resource];
1102 	if (bsd.which == -1)
1103 		return (EINVAL);
1104 
1105 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1106 	error = getrlimit(p, &bsd);
1107 	if (error)
1108 		return (error);
1109 
1110 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1111 	if (rlim.rlim_cur == ULONG_MAX)
1112 		rlim.rlim_cur = LONG_MAX;
1113 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1114 	if (rlim.rlim_max == ULONG_MAX)
1115 		rlim.rlim_max = LONG_MAX;
1116 	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1117 }
1118 #endif /*!__alpha__*/
1119 
1120 int
1121 linux_sched_setscheduler(p, uap)
1122 	struct proc *p;
1123 	struct linux_sched_setscheduler_args *uap;
1124 {
1125 	struct sched_setscheduler_args bsd;
1126 
1127 #ifdef DEBUG
1128 	if (ldebug(sched_setscheduler))
1129 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1130 		    uap->pid, uap->policy, (const void *)uap->param);
1131 #endif
1132 
1133 	switch (uap->policy) {
1134 	case LINUX_SCHED_OTHER:
1135 		bsd.policy = SCHED_OTHER;
1136 		break;
1137 	case LINUX_SCHED_FIFO:
1138 		bsd.policy = SCHED_FIFO;
1139 		break;
1140 	case LINUX_SCHED_RR:
1141 		bsd.policy = SCHED_RR;
1142 		break;
1143 	default:
1144 		return EINVAL;
1145 	}
1146 
1147 	bsd.pid = uap->pid;
1148 	bsd.param = uap->param;
1149 	return sched_setscheduler(p, &bsd);
1150 }
1151 
1152 int
1153 linux_sched_getscheduler(p, uap)
1154 	struct proc *p;
1155 	struct linux_sched_getscheduler_args *uap;
1156 {
1157 	struct sched_getscheduler_args bsd;
1158 	int error;
1159 
1160 #ifdef DEBUG
1161 	if (ldebug(sched_getscheduler))
1162 		printf(ARGS(sched_getscheduler, "%d"), uap->pid);
1163 #endif
1164 
1165 	bsd.pid = uap->pid;
1166 	error = sched_getscheduler(p, &bsd);
1167 
1168 	switch (p->p_retval[0]) {
1169 	case SCHED_OTHER:
1170 		p->p_retval[0] = LINUX_SCHED_OTHER;
1171 		break;
1172 	case SCHED_FIFO:
1173 		p->p_retval[0] = LINUX_SCHED_FIFO;
1174 		break;
1175 	case SCHED_RR:
1176 		p->p_retval[0] = LINUX_SCHED_RR;
1177 		break;
1178 	}
1179 
1180 	return error;
1181 }
1182 
1183 int
1184 linux_sched_get_priority_max(p, uap)
1185 	struct proc *p;
1186 	struct linux_sched_get_priority_max_args *uap;
1187 {
1188 	struct sched_get_priority_max_args bsd;
1189 
1190 #ifdef DEBUG
1191 	if (ldebug(sched_get_priority_max))
1192 		printf(ARGS(sched_get_priority_max, "%d"), uap->policy);
1193 #endif
1194 
1195 	switch (uap->policy) {
1196 	case LINUX_SCHED_OTHER:
1197 		bsd.policy = SCHED_OTHER;
1198 		break;
1199 	case LINUX_SCHED_FIFO:
1200 		bsd.policy = SCHED_FIFO;
1201 		break;
1202 	case LINUX_SCHED_RR:
1203 		bsd.policy = SCHED_RR;
1204 		break;
1205 	default:
1206 		return EINVAL;
1207 	}
1208 	return sched_get_priority_max(p, &bsd);
1209 }
1210 
1211 int
1212 linux_sched_get_priority_min(p, uap)
1213 	struct proc *p;
1214 	struct linux_sched_get_priority_min_args *uap;
1215 {
1216 	struct sched_get_priority_min_args bsd;
1217 
1218 #ifdef DEBUG
1219 	if (ldebug(sched_get_priority_min))
1220 		printf(ARGS(sched_get_priority_min, "%d"), uap->policy);
1221 #endif
1222 
1223 	switch (uap->policy) {
1224 	case LINUX_SCHED_OTHER:
1225 		bsd.policy = SCHED_OTHER;
1226 		break;
1227 	case LINUX_SCHED_FIFO:
1228 		bsd.policy = SCHED_FIFO;
1229 		break;
1230 	case LINUX_SCHED_RR:
1231 		bsd.policy = SCHED_RR;
1232 		break;
1233 	default:
1234 		return EINVAL;
1235 	}
1236 	return sched_get_priority_min(p, &bsd);
1237 }
1238 
1239 #define REBOOT_CAD_ON	0x89abcdef
1240 #define REBOOT_CAD_OFF	0
1241 #define REBOOT_HALT	0xcdef0123
1242 
1243 int
1244 linux_reboot(struct proc *p, struct linux_reboot_args *args)
1245 {
1246 	struct reboot_args bsd_args;
1247 
1248 #ifdef DEBUG
1249 	if (ldebug(reboot))
1250 		printf(ARGS(reboot, "0x%x"), args->opt);
1251 #endif
1252 	if (args->opt == REBOOT_CAD_ON || args->opt == REBOOT_CAD_OFF)
1253 		return (0);
1254 	bsd_args.opt = args->opt == REBOOT_HALT ? RB_HALT : 0;
1255 	return (reboot(p, &bsd_args));
1256 }
1257