xref: /freebsd/sys/compat/linux/linux_misc.c (revision 68fe945a06a38b0bfd4482548b2ca57d4dfc6f99)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/blist.h>
45 #include <sys/reboot.h>
46 #include <sys/resourcevar.h>
47 #include <sys/signalvar.h>
48 #include <sys/stat.h>
49 #include <sys/sysctl.h>
50 #include <sys/sysproto.h>
51 #include <sys/time.h>
52 #include <sys/unistd.h>
53 #include <sys/vmmeter.h>
54 #include <sys/vnode.h>
55 #include <sys/wait.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_extern.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_zone.h>
64 #include <vm/swap_pager.h>
65 
66 #include <machine/frame.h>
67 #include <machine/limits.h>
68 #include <machine/psl.h>
69 #include <machine/sysarch.h>
70 #ifdef __i386__
71 #include <machine/segments.h>
72 #endif
73 
74 #include <posix4/sched.h>
75 
76 #include <machine/../linux/linux.h>
77 #include <machine/../linux/linux_proto.h>
78 #include <compat/linux/linux_mib.h>
79 #include <compat/linux/linux_util.h>
80 
81 #ifdef __alpha__
82 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
83 #else
84 #define BSD_TO_LINUX_SIGNAL(sig)	\
85 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
86 #endif
87 
88 struct linux_rlimit {
89 	unsigned long rlim_cur;
90 	unsigned long rlim_max;
91 };
92 
93 #ifndef __alpha__
94 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
95 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
96   RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
97   RLIMIT_MEMLOCK, -1
98 };
99 #endif /*!__alpha__*/
100 
101 struct linux_sysinfo {
102      long uptime;              /* Seconds since boot */
103      unsigned long loads[3];   /* 1, 5, and 15 minute load averages */
104      unsigned long totalram;   /* Total usable main memory size */
105      unsigned long freeram;    /* Available memory size */
106      unsigned long sharedram;  /* Amount of shared memory */
107      unsigned long bufferram;  /* Memory used by buffers */
108      unsigned long totalswap;  /* Total swap space size */
109      unsigned long freeswap;   /* swap space still available */
110      unsigned short procs;     /* Number of current processes */
111      char _f[22];              /* Pads structure to 64 bytes */
112 };
113 
114 #ifndef __alpha__
115 int
116 linux_sysinfo(struct proc *p, struct linux_sysinfo_args *args)
117 {
118      struct linux_sysinfo sysinfo;
119      vm_object_t object;
120      int i;
121      struct timespec ts;
122 
123      /* Uptime is copied out of print_uptime() procedure in kern_shutdown.c */
124      getnanouptime(&ts);
125      i = 0;
126      if (ts.tv_sec >= 86400) {
127           ts.tv_sec %= 86400;
128           i = 1;
129      }
130      if (i || ts.tv_sec >= 3600) {
131           ts.tv_sec %= 3600;
132           i = 1;
133      }
134      if (i || ts.tv_sec >= 60) {
135           ts.tv_sec %= 60;
136           i = 1;
137      }
138      sysinfo.uptime=ts.tv_sec;
139 
140      /* Use the information from the mib to get our load averages */
141      for (i = 0; i < 3; i++)
142           sysinfo.loads[i] = averunnable.ldavg[i];
143 
144      sysinfo.totalram = physmem * PAGE_SIZE;
145      sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
146 
147      sysinfo.sharedram = 0;
148      for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
149           object = TAILQ_NEXT(object, object_list))
150                if (object->shadow_count > 1)
151                     sysinfo.sharedram += object->resident_page_count;
152 
153      sysinfo.sharedram *= PAGE_SIZE;
154 
155      sysinfo.bufferram = 0;
156 
157      if (swapblist == NULL) {
158           sysinfo.totalswap= 0;
159           sysinfo.freeswap = 0;
160      } else {
161           sysinfo.totalswap = swapblist->bl_blocks * 1024;
162           sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
163      }
164 
165      sysinfo.procs = 20; /* Hack */
166 
167      return copyout((caddr_t)&sysinfo, (caddr_t)args->info,
168                sizeof(struct linux_sysinfo));
169 }
170 #endif /*!__alpha__*/
171 
172 #ifndef __alpha__
173 int
174 linux_alarm(struct proc *p, struct linux_alarm_args *args)
175 {
176     struct itimerval it, old_it;
177     struct timeval tv;
178     int s;
179 
180 #ifdef DEBUG
181 	if (ldebug(alarm))
182 		printf(ARGS(alarm, "%u"), args->secs);
183 #endif
184     if (args->secs > 100000000)
185 	return EINVAL;
186     it.it_value.tv_sec = (long)args->secs;
187     it.it_value.tv_usec = 0;
188     it.it_interval.tv_sec = 0;
189     it.it_interval.tv_usec = 0;
190     s = splsoftclock();
191     old_it = p->p_realtimer;
192     getmicrouptime(&tv);
193     if (timevalisset(&old_it.it_value))
194 	callout_stop(&p->p_itcallout);
195     if (it.it_value.tv_sec != 0) {
196 	callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p);
197 	timevaladd(&it.it_value, &tv);
198     }
199     p->p_realtimer = it;
200     splx(s);
201     if (timevalcmp(&old_it.it_value, &tv, >)) {
202 	timevalsub(&old_it.it_value, &tv);
203 	if (old_it.it_value.tv_usec != 0)
204 	    old_it.it_value.tv_sec++;
205 	p->p_retval[0] = old_it.it_value.tv_sec;
206     }
207     return 0;
208 }
209 #endif /*!__alpha__*/
210 
211 int
212 linux_brk(struct proc *p, struct linux_brk_args *args)
213 {
214 #if 0
215     struct vmspace *vm = p->p_vmspace;
216     vm_offset_t new, old;
217     int error;
218 
219     if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
220 	return EINVAL;
221     if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
222 	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
223 	return ENOMEM;
224 
225     old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
226     new = round_page((vm_offset_t)args->dsend);
227     p->p_retval[0] = old;
228     if ((new-old) > 0) {
229 	if (swap_pager_full)
230 	    return ENOMEM;
231 	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
232 			VM_PROT_ALL, VM_PROT_ALL, 0);
233 	if (error)
234 	    return error;
235 	vm->vm_dsize += btoc((new-old));
236 	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
237     }
238     return 0;
239 #else
240     struct vmspace *vm = p->p_vmspace;
241     vm_offset_t new, old;
242     struct obreak_args /* {
243 	char * nsize;
244     } */ tmp;
245 
246 #ifdef DEBUG
247 	if (ldebug(brk))
248 		printf(ARGS(brk, "%p"), (void *)args->dsend);
249 #endif
250     old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
251     new = (vm_offset_t)args->dsend;
252     tmp.nsize = (char *) new;
253     if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
254 	p->p_retval[0] = (long)new;
255     else
256 	p->p_retval[0] = (long)old;
257 
258     return 0;
259 #endif
260 }
261 
262 int
263 linux_uselib(struct proc *p, struct linux_uselib_args *args)
264 {
265     struct nameidata ni;
266     struct vnode *vp;
267     struct exec *a_out;
268     struct vattr attr;
269     vm_offset_t vmaddr;
270     unsigned long file_offset;
271     vm_offset_t buffer;
272     unsigned long bss_size;
273     int error;
274     caddr_t sg;
275     int locked;
276 
277     sg = stackgap_init();
278     CHECKALTEXIST(p, &sg, args->library);
279 
280 #ifdef DEBUG
281 	if (ldebug(uselib))
282 		printf(ARGS(uselib, "%s"), args->library);
283 #endif
284 
285     a_out = NULL;
286     locked = 0;
287     vp = NULL;
288 
289     NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
290     error = namei(&ni);
291     if (error)
292 	goto cleanup;
293 
294     vp = ni.ni_vp;
295     /*
296      * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
297      * without returning a vnode.
298      */
299     if (vp == NULL) {
300 	error = ENOEXEC;	/* ?? */
301 	goto cleanup;
302     }
303     NDFREE(&ni, NDF_ONLY_PNBUF);
304 
305     /*
306      * From here on down, we have a locked vnode that must be unlocked.
307      */
308     locked++;
309 
310     /*
311      * Writable?
312      */
313     if (vp->v_writecount) {
314 	error = ETXTBSY;
315 	goto cleanup;
316     }
317 
318     /*
319      * Executable?
320      */
321     error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
322     if (error)
323 	goto cleanup;
324 
325     if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
326 	((attr.va_mode & 0111) == 0) ||
327 	(attr.va_type != VREG)) {
328 	    error = ENOEXEC;
329 	    goto cleanup;
330     }
331 
332     /*
333      * Sensible size?
334      */
335     if (attr.va_size == 0) {
336 	error = ENOEXEC;
337 	goto cleanup;
338     }
339 
340     /*
341      * Can we access it?
342      */
343     error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
344     if (error)
345 	goto cleanup;
346 
347     error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
348     if (error)
349 	goto cleanup;
350 
351     /*
352      * Lock no longer needed
353      */
354     VOP_UNLOCK(vp, 0, p);
355     locked = 0;
356 
357     /*
358      * Pull in executable header into kernel_map
359      */
360     error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
361 	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
362     if (error)
363 	goto cleanup;
364 
365     /*
366      * Is it a Linux binary ?
367      */
368     if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
369 	error = ENOEXEC;
370 	goto cleanup;
371     }
372 
373     /* While we are here, we should REALLY do some more checks */
374 
375     /*
376      * Set file/virtual offset based on a.out variant.
377      */
378     switch ((int)(a_out->a_magic & 0xffff)) {
379     case 0413:	/* ZMAGIC */
380 	file_offset = 1024;
381 	break;
382     case 0314:	/* QMAGIC */
383 	file_offset = 0;
384 	break;
385     default:
386 	error = ENOEXEC;
387 	goto cleanup;
388     }
389 
390     bss_size = round_page(a_out->a_bss);
391 
392     /*
393      * Check various fields in header for validity/bounds.
394      */
395     if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
396 	error = ENOEXEC;
397 	goto cleanup;
398     }
399 
400     /* text + data can't exceed file size */
401     if (a_out->a_data + a_out->a_text > attr.va_size) {
402 	error = EFAULT;
403 	goto cleanup;
404     }
405 
406     /* To protect p->p_rlimit in the if condition. */
407     mtx_assert(&Giant, MA_OWNED);
408 
409     /*
410      * text/data/bss must not exceed limits
411      * XXX: this is not complete. it should check current usage PLUS
412      * the resources needed by this library.
413      */
414     if (a_out->a_text > MAXTSIZ ||
415 	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
416 	error = ENOMEM;
417 	goto cleanup;
418     }
419 
420     /*
421      * prevent more writers
422      */
423     vp->v_flag |= VTEXT;
424 
425     /*
426      * Check if file_offset page aligned,.
427      * Currently we cannot handle misalinged file offsets,
428      * and so we read in the entire image (what a waste).
429      */
430     if (file_offset & PAGE_MASK) {
431 #ifdef DEBUG
432 printf("uselib: Non page aligned binary %lu\n", file_offset);
433 #endif
434 	/*
435 	 * Map text+data read/write/execute
436 	 */
437 
438 	/* a_entry is the load address and is page aligned */
439 	vmaddr = trunc_page(a_out->a_entry);
440 
441 	/* get anon user mapping, read+write+execute */
442 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
443 		    	    a_out->a_text + a_out->a_data, FALSE,
444 			    VM_PROT_ALL, VM_PROT_ALL, 0);
445 	if (error)
446 	    goto cleanup;
447 
448 	/* map file into kernel_map */
449 	error = vm_mmap(kernel_map, &buffer,
450 			round_page(a_out->a_text + a_out->a_data + file_offset),
451 		   	VM_PROT_READ, VM_PROT_READ, 0,
452 			(caddr_t)vp, trunc_page(file_offset));
453 	if (error)
454 	    goto cleanup;
455 
456 	/* copy from kernel VM space to user space */
457 	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
458 			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
459 
460 	/* release temporary kernel space */
461 	vm_map_remove(kernel_map, buffer,
462 		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
463 
464 	if (error)
465 	    goto cleanup;
466     }
467     else {
468 #ifdef DEBUG
469 printf("uselib: Page aligned binary %lu\n", file_offset);
470 #endif
471 	/*
472 	 * for QMAGIC, a_entry is 20 bytes beyond the load address
473 	 * to skip the executable header
474 	 */
475 	vmaddr = trunc_page(a_out->a_entry);
476 
477 	/*
478 	 * Map it all into the process's space as a single copy-on-write
479 	 * "data" segment.
480 	 */
481 	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
482 		   	a_out->a_text + a_out->a_data,
483 			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
484 			(caddr_t)vp, file_offset);
485 	if (error)
486 	    goto cleanup;
487     }
488 #ifdef DEBUG
489 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
490 #endif
491     if (bss_size != 0) {
492         /*
493 	 * Calculate BSS start address
494 	 */
495 	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
496 
497 	/*
498 	 * allocate some 'anon' space
499 	 */
500 	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
501 			    bss_size, FALSE,
502 			    VM_PROT_ALL, VM_PROT_ALL, 0);
503 	if (error)
504 	    goto cleanup;
505     }
506 
507 cleanup:
508     /*
509      * Unlock vnode if needed
510      */
511     if (locked)
512 	VOP_UNLOCK(vp, 0, p);
513 
514     /*
515      * Release the kernel mapping.
516      */
517     if (a_out)
518 	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
519 
520     return error;
521 }
522 
523 int
524 linux_newselect(struct proc *p, struct linux_newselect_args *args)
525 {
526     struct select_args bsa;
527     struct timeval tv0, tv1, utv, *tvp;
528     caddr_t sg;
529     int error;
530 
531 #ifdef DEBUG
532 	if (ldebug(newselect))
533 		printf(ARGS(newselect, "%d, %p, %p, %p, %p"),
534 		    args->nfds, (void *)args->readfds,
535 		    (void *)args->writefds, (void *)args->exceptfds,
536 		    (void *)args->timeout);
537 #endif
538     error = 0;
539     bsa.nd = args->nfds;
540     bsa.in = args->readfds;
541     bsa.ou = args->writefds;
542     bsa.ex = args->exceptfds;
543     bsa.tv = args->timeout;
544 
545     /*
546      * Store current time for computation of the amount of
547      * time left.
548      */
549     if (args->timeout) {
550 	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
551 	    goto select_out;
552 #ifdef DEBUG
553 	if (ldebug(newselect))
554 		printf(LMSG("incoming timeout (%ld/%ld)"),
555 		    utv.tv_sec, utv.tv_usec);
556 #endif
557 	if (itimerfix(&utv)) {
558 	    /*
559 	     * The timeval was invalid.  Convert it to something
560 	     * valid that will act as it does under Linux.
561 	     */
562 	    sg = stackgap_init();
563 	    tvp = stackgap_alloc(&sg, sizeof(utv));
564 	    utv.tv_sec += utv.tv_usec / 1000000;
565 	    utv.tv_usec %= 1000000;
566 	    if (utv.tv_usec < 0) {
567 		utv.tv_sec -= 1;
568 		utv.tv_usec += 1000000;
569 	    }
570 	    if (utv.tv_sec < 0)
571 		timevalclear(&utv);
572 	    if ((error = copyout(&utv, tvp, sizeof(utv))))
573 		goto select_out;
574 	    bsa.tv = tvp;
575 	}
576 	microtime(&tv0);
577     }
578 
579     error = select(p, &bsa);
580 #ifdef DEBUG
581 	if (ldebug(newselect))
582 		printf(LMSG("real select returns %d"), error);
583 #endif
584 
585     if (error) {
586 	/*
587 	 * See fs/select.c in the Linux kernel.  Without this,
588 	 * Maelstrom doesn't work.
589 	 */
590 	if (error == ERESTART)
591 	    error = EINTR;
592 	goto select_out;
593     }
594 
595     if (args->timeout) {
596 	if (p->p_retval[0]) {
597 	    /*
598 	     * Compute how much time was left of the timeout,
599 	     * by subtracting the current time and the time
600 	     * before we started the call, and subtracting
601 	     * that result from the user-supplied value.
602 	     */
603 	    microtime(&tv1);
604 	    timevalsub(&tv1, &tv0);
605 	    timevalsub(&utv, &tv1);
606 	    if (utv.tv_sec < 0)
607 		timevalclear(&utv);
608 	} else
609 	    timevalclear(&utv);
610 #ifdef DEBUG
611 	if (ldebug(newselect))
612 		printf(LMSG("outgoing timeout (%ld/%ld)"),
613 		    utv.tv_sec, utv.tv_usec);
614 #endif
615 	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
616 	    goto select_out;
617     }
618 
619 select_out:
620 #ifdef DEBUG
621 	if (ldebug(newselect))
622 		printf(LMSG("newselect_out -> %d"), error);
623 #endif
624     return error;
625 }
626 
627 int
628 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
629 {
630     struct proc *curp;
631 
632 #ifdef DEBUG
633 	if (ldebug(getpgid))
634 		printf(ARGS(getpgid, "%d"), args->pid);
635 #endif
636     if (args->pid != p->p_pid) {
637 	if (!(curp = pfind(args->pid)))
638 	    return ESRCH;
639 	p->p_retval[0] = curp->p_pgid;
640 	PROC_UNLOCK(curp);
641     }
642     else
643 	p->p_retval[0] = p->p_pgid;
644     return 0;
645 }
646 
647 int
648 linux_mremap(struct proc *p, struct linux_mremap_args *args)
649 {
650 	struct munmap_args /* {
651 		void *addr;
652 		size_t len;
653 	} */ bsd_args;
654 	int error = 0;
655 
656 #ifdef DEBUG
657 	if (ldebug(mremap))
658 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
659 		    (void *)args->addr,
660 		    (unsigned long)args->old_len,
661 		    (unsigned long)args->new_len,
662 		    (unsigned long)args->flags);
663 #endif
664 	args->new_len = round_page(args->new_len);
665 	args->old_len = round_page(args->old_len);
666 
667 	if (args->new_len > args->old_len) {
668 		p->p_retval[0] = 0;
669 		return ENOMEM;
670 	}
671 
672 	if (args->new_len < args->old_len) {
673 		bsd_args.addr = args->addr + args->new_len;
674 		bsd_args.len = args->old_len - args->new_len;
675 		error = munmap(p, &bsd_args);
676 	}
677 
678 	p->p_retval[0] = error ? 0 : (u_long)args->addr;
679 	return error;
680 }
681 
682 int
683 linux_msync(struct proc *p, struct linux_msync_args *args)
684 {
685 	struct msync_args bsd_args;
686 
687 	bsd_args.addr = args->addr;
688 	bsd_args.len = args->len;
689 	bsd_args.flags = 0;	/* XXX ignore */
690 
691 	return msync(p, &bsd_args);
692 }
693 
694 #ifndef __alpha__
695 int
696 linux_time(struct proc *p, struct linux_time_args *args)
697 {
698     struct timeval tv;
699     linux_time_t tm;
700     int error;
701 
702 #ifdef DEBUG
703 	if (ldebug(time))
704 		printf(ARGS(time, "*"));
705 #endif
706     microtime(&tv);
707     tm = tv.tv_sec;
708     if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
709 	return error;
710     p->p_retval[0] = tm;
711     return 0;
712 }
713 #endif	/*!__alpha__*/
714 
715 struct linux_times_argv {
716     long    tms_utime;
717     long    tms_stime;
718     long    tms_cutime;
719     long    tms_cstime;
720 };
721 
722 #ifdef __alpha__
723 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
724 #else
725 #define CLK_TCK 100	/* Linux uses 100 */
726 #endif
727 
728 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
729 
730 int
731 linux_times(struct proc *p, struct linux_times_args *args)
732 {
733     struct timeval tv;
734     struct linux_times_argv tms;
735     struct rusage ru;
736     int error;
737 
738 #ifdef DEBUG
739 	if (ldebug(times))
740 		printf(ARGS(times, "*"));
741 #endif
742     mtx_lock_spin(&sched_lock);
743     calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
744     mtx_unlock_spin(&sched_lock);
745 
746     tms.tms_utime = CONVTCK(ru.ru_utime);
747     tms.tms_stime = CONVTCK(ru.ru_stime);
748 
749     tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
750     tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
751 
752     if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
753 	    	    sizeof(struct linux_times_argv))))
754 	return error;
755 
756     microuptime(&tv);
757     p->p_retval[0] = (int)CONVTCK(tv);
758     return 0;
759 }
760 
761 int
762 linux_newuname(struct proc *p, struct linux_newuname_args *args)
763 {
764 	struct linux_new_utsname utsname;
765 	char *osrelease, *osname;
766 
767 #ifdef DEBUG
768 	if (ldebug(newuname))
769 		printf(ARGS(newuname, "*"));
770 #endif
771 
772 	osname = linux_get_osname(p);
773 	osrelease = linux_get_osrelease(p);
774 
775 	bzero(&utsname, sizeof(struct linux_new_utsname));
776 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
777 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
778 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
779 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
780 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
781 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
782 
783 	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
784 			sizeof(struct linux_new_utsname)));
785 }
786 
787 struct linux_utimbuf {
788 	linux_time_t l_actime;
789 	linux_time_t l_modtime;
790 };
791 
792 int
793 linux_utime(struct proc *p, struct linux_utime_args *args)
794 {
795     struct utimes_args /* {
796 	char	*path;
797 	struct	timeval *tptr;
798     } */ bsdutimes;
799     struct timeval tv[2], *tvp;
800     struct linux_utimbuf lut;
801     int error;
802     caddr_t sg;
803 
804     sg = stackgap_init();
805     CHECKALTEXIST(p, &sg, args->fname);
806 
807 #ifdef DEBUG
808 	if (ldebug(utime))
809 		printf(ARGS(utime, "%s, *"), args->fname);
810 #endif
811     if (args->times) {
812 	if ((error = copyin(args->times, &lut, sizeof lut)))
813 	    return error;
814 	tv[0].tv_sec = lut.l_actime;
815 	tv[0].tv_usec = 0;
816 	tv[1].tv_sec = lut.l_modtime;
817 	tv[1].tv_usec = 0;
818 	/* so that utimes can copyin */
819 	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
820 	if (tvp == NULL)
821 		return (ENAMETOOLONG);
822 	if ((error = copyout(tv, tvp, sizeof(tv))))
823 	    return error;
824 	bsdutimes.tptr = tvp;
825     } else
826 	bsdutimes.tptr = NULL;
827 
828     bsdutimes.path = args->fname;
829     return utimes(p, &bsdutimes);
830 }
831 
832 #define __WCLONE 0x80000000
833 
834 #ifndef __alpha__
835 int
836 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
837 {
838     struct wait_args /* {
839 	int pid;
840 	int *status;
841 	int options;
842 	struct	rusage *rusage;
843     } */ tmp;
844     int error, tmpstat;
845 
846 #ifdef DEBUG
847 	if (ldebug(waitpid))
848 		printf(ARGS(waitpid, "%d, %p, %d"),
849 		    args->pid, (void *)args->status, args->options);
850 #endif
851     tmp.pid = args->pid;
852     tmp.status = args->status;
853     tmp.options = (args->options & (WNOHANG | WUNTRACED));
854     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
855     if (args->options & __WCLONE)
856 	tmp.options |= WLINUXCLONE;
857     tmp.rusage = NULL;
858 
859     if ((error = wait4(p, &tmp)) != 0)
860 	return error;
861 
862     if (args->status) {
863 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
864 	    return error;
865 	tmpstat &= 0xffff;
866 	if (WIFSIGNALED(tmpstat))
867 	    tmpstat = (tmpstat & 0xffffff80) |
868 		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
869 	else if (WIFSTOPPED(tmpstat))
870 	    tmpstat = (tmpstat & 0xffff00ff) |
871 		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
872 	return copyout(&tmpstat, args->status, sizeof(int));
873     } else
874 	return 0;
875 }
876 #endif	/*!__alpha__*/
877 
878 int
879 linux_wait4(struct proc *p, struct linux_wait4_args *args)
880 {
881     struct wait_args /* {
882 	int pid;
883 	int *status;
884 	int options;
885 	struct	rusage *rusage;
886     } */ tmp;
887     int error, tmpstat;
888 
889 #ifdef DEBUG
890 	if (ldebug(wait4))
891 		printf(ARGS(wait4, "%d, %p, %d, %p"),
892 		    args->pid, (void *)args->status, args->options,
893 		    (void *)args->rusage);
894 #endif
895     tmp.pid = args->pid;
896     tmp.status = args->status;
897     tmp.options = (args->options & (WNOHANG | WUNTRACED));
898     /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
899     if (args->options & __WCLONE)
900 	tmp.options |= WLINUXCLONE;
901     tmp.rusage = args->rusage;
902 
903     if ((error = wait4(p, &tmp)) != 0)
904 	return error;
905 
906     SIGDELSET(p->p_siglist, SIGCHLD);
907 
908     if (args->status) {
909 	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
910 	    return error;
911 	tmpstat &= 0xffff;
912 	if (WIFSIGNALED(tmpstat))
913 	    tmpstat = (tmpstat & 0xffffff80) |
914 		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
915 	else if (WIFSTOPPED(tmpstat))
916 	    tmpstat = (tmpstat & 0xffff00ff) |
917 		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
918 	return copyout(&tmpstat, args->status, sizeof(int));
919     } else
920 	return 0;
921 }
922 
923 int
924 linux_mknod(struct proc *p, struct linux_mknod_args *args)
925 {
926 	caddr_t sg;
927 	struct mknod_args bsd_mknod;
928 	struct mkfifo_args bsd_mkfifo;
929 
930 	sg = stackgap_init();
931 
932 	CHECKALTCREAT(p, &sg, args->path);
933 
934 #ifdef DEBUG
935 	if (ldebug(mknod))
936 		printf(ARGS(mknod, "%s, %d, %d"),
937 		    args->path, args->mode, args->dev);
938 #endif
939 
940 	if (args->mode & S_IFIFO) {
941 		bsd_mkfifo.path = args->path;
942 		bsd_mkfifo.mode = args->mode;
943 		return mkfifo(p, &bsd_mkfifo);
944 	} else {
945 		bsd_mknod.path = args->path;
946 		bsd_mknod.mode = args->mode;
947 		bsd_mknod.dev = args->dev;
948 		return mknod(p, &bsd_mknod);
949 	}
950 }
951 
952 /*
953  * UGH! This is just about the dumbest idea I've ever heard!!
954  */
955 int
956 linux_personality(struct proc *p, struct linux_personality_args *args)
957 {
958 #ifdef DEBUG
959 	if (ldebug(personality))
960 		printf(ARGS(personality, "%d"), args->per);
961 #endif
962 #ifndef __alpha__
963 	if (args->per != 0)
964 		return EINVAL;
965 #endif
966 
967 	/* Yes Jim, it's still a Linux... */
968 	p->p_retval[0] = 0;
969 	return 0;
970 }
971 
972 /*
973  * Wrappers for get/setitimer for debugging..
974  */
975 int
976 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
977 {
978 	struct setitimer_args bsa;
979 	struct itimerval foo;
980 	int error;
981 
982 #ifdef DEBUG
983 	if (ldebug(setitimer))
984 		printf(ARGS(setitimer, "%p, %p"),
985 		    (void *)args->itv, (void *)args->oitv);
986 #endif
987 	bsa.which = args->which;
988 	bsa.itv = args->itv;
989 	bsa.oitv = args->oitv;
990 	if (args->itv) {
991 	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
992 			sizeof(foo))))
993 		return error;
994 #ifdef DEBUG
995 	    if (ldebug(setitimer)) {
996 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
997 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
998 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
999 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1000 	    }
1001 #endif
1002 	}
1003 	return setitimer(p, &bsa);
1004 }
1005 
1006 int
1007 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1008 {
1009 	struct getitimer_args bsa;
1010 #ifdef DEBUG
1011 	if (ldebug(getitimer))
1012 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
1013 #endif
1014 	bsa.which = args->which;
1015 	bsa.itv = args->itv;
1016 	return getitimer(p, &bsa);
1017 }
1018 
1019 #ifndef __alpha__
1020 int
1021 linux_nice(struct proc *p, struct linux_nice_args *args)
1022 {
1023 	struct setpriority_args	bsd_args;
1024 
1025 	bsd_args.which = PRIO_PROCESS;
1026 	bsd_args.who = 0;	/* current process */
1027 	bsd_args.prio = args->inc;
1028 	return setpriority(p, &bsd_args);
1029 }
1030 #endif	/*!__alpha__*/
1031 
1032 int
1033 linux_setgroups(p, uap)
1034 	struct proc *p;
1035 	struct linux_setgroups_args *uap;
1036 {
1037 	struct ucred *newcred, *oldcred;
1038 	linux_gid_t linux_gidset[NGROUPS];
1039 	gid_t *bsd_gidset;
1040 	int ngrp, error;
1041 
1042 	ngrp = uap->gidsetsize;
1043 	oldcred = p->p_ucred;
1044 
1045 	/*
1046 	 * cr_groups[0] holds egid. Setting the whole set from
1047 	 * the supplied set will cause egid to be changed too.
1048 	 * Keep cr_groups[0] unchanged to prevent that.
1049 	 */
1050 
1051 	if ((error = suser_xxx(oldcred, NULL, PRISON_ROOT)) != 0)
1052 		return (error);
1053 
1054 	if (ngrp >= NGROUPS)
1055 		return (EINVAL);
1056 
1057 	newcred = crdup(oldcred);
1058 	if (ngrp > 0) {
1059 		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1060 			       ngrp * sizeof(linux_gid_t));
1061 		if (error)
1062 			return (error);
1063 
1064 		newcred->cr_ngroups = ngrp + 1;
1065 
1066 		bsd_gidset = newcred->cr_groups;
1067 		ngrp--;
1068 		while (ngrp >= 0) {
1069 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1070 			ngrp--;
1071 		}
1072 	}
1073 	else
1074 		newcred->cr_ngroups = 1;
1075 
1076 	setsugid(p);
1077 	p->p_ucred = newcred;
1078 	crfree(oldcred);
1079 	return (0);
1080 }
1081 
1082 int
1083 linux_getgroups(p, uap)
1084 	struct proc *p;
1085 	struct linux_getgroups_args *uap;
1086 {
1087 	struct ucred *cred;
1088 	linux_gid_t linux_gidset[NGROUPS];
1089 	gid_t *bsd_gidset;
1090 	int bsd_gidsetsz, ngrp, error;
1091 
1092 	cred = p->p_ucred;
1093 	bsd_gidset = cred->cr_groups;
1094 	bsd_gidsetsz = cred->cr_ngroups - 1;
1095 
1096 	/*
1097 	 * cr_groups[0] holds egid. Returning the whole set
1098 	 * here will cause a duplicate. Exclude cr_groups[0]
1099 	 * to prevent that.
1100 	 */
1101 
1102 	if ((ngrp = uap->gidsetsize) == 0) {
1103 		p->p_retval[0] = bsd_gidsetsz;
1104 		return (0);
1105 	}
1106 
1107 	if (ngrp < bsd_gidsetsz)
1108 		return (EINVAL);
1109 
1110 	ngrp = 0;
1111 	while (ngrp < bsd_gidsetsz) {
1112 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1113 		ngrp++;
1114 	}
1115 
1116 	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1117 	    ngrp * sizeof(linux_gid_t))))
1118 		return (error);
1119 
1120 	p->p_retval[0] = ngrp;
1121 	return (0);
1122 }
1123 
1124 #ifndef __alpha__
1125 int
1126 linux_setrlimit(p, uap)
1127 	struct proc *p;
1128 	struct linux_setrlimit_args *uap;
1129 {
1130 	struct __setrlimit_args bsd;
1131 	struct linux_rlimit rlim;
1132 	int error;
1133 	caddr_t sg = stackgap_init();
1134 
1135 #ifdef DEBUG
1136 	if (ldebug(setrlimit))
1137 		printf(ARGS(setrlimit, "%d, %p"),
1138 		    uap->resource, (void *)uap->rlim);
1139 #endif
1140 
1141 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1142 		return (EINVAL);
1143 
1144 	bsd.which = linux_to_bsd_resource[uap->resource];
1145 	if (bsd.which == -1)
1146 		return (EINVAL);
1147 
1148 	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1149 	if (error)
1150 		return (error);
1151 
1152 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1153 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1154 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1155 	return (setrlimit(p, &bsd));
1156 }
1157 
1158 int
1159 linux_getrlimit(p, uap)
1160 	struct proc *p;
1161 	struct linux_getrlimit_args *uap;
1162 {
1163 	struct __getrlimit_args bsd;
1164 	struct linux_rlimit rlim;
1165 	int error;
1166 	caddr_t sg = stackgap_init();
1167 
1168 #ifdef DEBUG
1169 	if (ldebug(getrlimit))
1170 		printf(ARGS(getrlimit, "%d, %p"),
1171 		    uap->resource, (void *)uap->rlim);
1172 #endif
1173 
1174 	if (uap->resource >= LINUX_RLIM_NLIMITS)
1175 		return (EINVAL);
1176 
1177 	bsd.which = linux_to_bsd_resource[uap->resource];
1178 	if (bsd.which == -1)
1179 		return (EINVAL);
1180 
1181 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1182 	error = getrlimit(p, &bsd);
1183 	if (error)
1184 		return (error);
1185 
1186 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1187 	if (rlim.rlim_cur == ULONG_MAX)
1188 		rlim.rlim_cur = LONG_MAX;
1189 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1190 	if (rlim.rlim_max == ULONG_MAX)
1191 		rlim.rlim_max = LONG_MAX;
1192 	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1193 }
1194 #endif /*!__alpha__*/
1195 
1196 int
1197 linux_sched_setscheduler(p, uap)
1198 	struct proc *p;
1199 	struct linux_sched_setscheduler_args *uap;
1200 {
1201 	struct sched_setscheduler_args bsd;
1202 
1203 #ifdef DEBUG
1204 	if (ldebug(sched_setscheduler))
1205 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1206 		    uap->pid, uap->policy, (const void *)uap->param);
1207 #endif
1208 
1209 	switch (uap->policy) {
1210 	case LINUX_SCHED_OTHER:
1211 		bsd.policy = SCHED_OTHER;
1212 		break;
1213 	case LINUX_SCHED_FIFO:
1214 		bsd.policy = SCHED_FIFO;
1215 		break;
1216 	case LINUX_SCHED_RR:
1217 		bsd.policy = SCHED_RR;
1218 		break;
1219 	default:
1220 		return EINVAL;
1221 	}
1222 
1223 	bsd.pid = uap->pid;
1224 	bsd.param = uap->param;
1225 	return sched_setscheduler(p, &bsd);
1226 }
1227 
1228 int
1229 linux_sched_getscheduler(p, uap)
1230 	struct proc *p;
1231 	struct linux_sched_getscheduler_args *uap;
1232 {
1233 	struct sched_getscheduler_args bsd;
1234 	int error;
1235 
1236 #ifdef DEBUG
1237 	if (ldebug(sched_getscheduler))
1238 		printf(ARGS(sched_getscheduler, "%d"), uap->pid);
1239 #endif
1240 
1241 	bsd.pid = uap->pid;
1242 	error = sched_getscheduler(p, &bsd);
1243 
1244 	switch (p->p_retval[0]) {
1245 	case SCHED_OTHER:
1246 		p->p_retval[0] = LINUX_SCHED_OTHER;
1247 		break;
1248 	case SCHED_FIFO:
1249 		p->p_retval[0] = LINUX_SCHED_FIFO;
1250 		break;
1251 	case SCHED_RR:
1252 		p->p_retval[0] = LINUX_SCHED_RR;
1253 		break;
1254 	}
1255 
1256 	return error;
1257 }
1258 
1259 int
1260 linux_sched_get_priority_max(p, uap)
1261 	struct proc *p;
1262 	struct linux_sched_get_priority_max_args *uap;
1263 {
1264 	struct sched_get_priority_max_args bsd;
1265 
1266 #ifdef DEBUG
1267 	if (ldebug(sched_get_priority_max))
1268 		printf(ARGS(sched_get_priority_max, "%d"), uap->policy);
1269 #endif
1270 
1271 	switch (uap->policy) {
1272 	case LINUX_SCHED_OTHER:
1273 		bsd.policy = SCHED_OTHER;
1274 		break;
1275 	case LINUX_SCHED_FIFO:
1276 		bsd.policy = SCHED_FIFO;
1277 		break;
1278 	case LINUX_SCHED_RR:
1279 		bsd.policy = SCHED_RR;
1280 		break;
1281 	default:
1282 		return EINVAL;
1283 	}
1284 	return sched_get_priority_max(p, &bsd);
1285 }
1286 
1287 int
1288 linux_sched_get_priority_min(p, uap)
1289 	struct proc *p;
1290 	struct linux_sched_get_priority_min_args *uap;
1291 {
1292 	struct sched_get_priority_min_args bsd;
1293 
1294 #ifdef DEBUG
1295 	if (ldebug(sched_get_priority_min))
1296 		printf(ARGS(sched_get_priority_min, "%d"), uap->policy);
1297 #endif
1298 
1299 	switch (uap->policy) {
1300 	case LINUX_SCHED_OTHER:
1301 		bsd.policy = SCHED_OTHER;
1302 		break;
1303 	case LINUX_SCHED_FIFO:
1304 		bsd.policy = SCHED_FIFO;
1305 		break;
1306 	case LINUX_SCHED_RR:
1307 		bsd.policy = SCHED_RR;
1308 		break;
1309 	default:
1310 		return EINVAL;
1311 	}
1312 	return sched_get_priority_min(p, &bsd);
1313 }
1314 
1315 #define REBOOT_CAD_ON	0x89abcdef
1316 #define REBOOT_CAD_OFF	0
1317 #define REBOOT_HALT	0xcdef0123
1318 
1319 int
1320 linux_reboot(struct proc *p, struct linux_reboot_args *args)
1321 {
1322 	struct reboot_args bsd_args;
1323 
1324 #ifdef DEBUG
1325 	if (ldebug(reboot))
1326 		printf(ARGS(reboot, "0x%x"), args->opt);
1327 #endif
1328 	if (args->opt == REBOOT_CAD_ON || args->opt == REBOOT_CAD_OFF)
1329 		return (0);
1330 	bsd_args.opt = args->opt == REBOOT_HALT ? RB_HALT : 0;
1331 	return (reboot(p, &bsd_args));
1332 }
1333