xref: /freebsd/sys/compat/linux/linux_misc.c (revision ee2ea5ceafed78a5bd9810beb9e3ca927180c226)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/jail.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/mman.h>
41 #include <sys/mount.h>
42 #include <sys/mutex.h>
43 #include <sys/namei.h>
44 #include <sys/poll.h>
45 #include <sys/proc.h>
46 #include <sys/blist.h>
47 #include <sys/reboot.h>
48 #include <sys/resourcevar.h>
49 #include <sys/signalvar.h>
50 #include <sys/stat.h>
51 #include <sys/sysctl.h>
52 #include <sys/sysproto.h>
53 #include <sys/time.h>
54 #include <sys/unistd.h>
55 #include <sys/vmmeter.h>
56 #include <sys/vnode.h>
57 #include <sys/wait.h>
58 
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_kern.h>
62 #include <vm/vm_map.h>
63 #include <vm/vm_extern.h>
64 #include <vm/vm_object.h>
65 #include <vm/swap_pager.h>
66 
67 #include <machine/limits.h>
68 
69 #include <posix4/sched.h>
70 
71 #include <machine/../linux/linux.h>
72 #include <machine/../linux/linux_proto.h>
73 #include <compat/linux/linux_mib.h>
74 #include <compat/linux/linux_util.h>
75 
76 #ifdef __alpha__
77 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
78 #else
79 #define BSD_TO_LINUX_SIGNAL(sig)	\
80 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
81 #endif
82 
83 #ifndef __alpha__
84 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
85 	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
86 	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
87 	RLIMIT_MEMLOCK, -1
88 };
89 #endif /*!__alpha__*/
90 
91 struct l_sysinfo {
92 	l_long		uptime;		/* Seconds since boot */
93 	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
94 	l_ulong		totalram;	/* Total usable main memory size */
95 	l_ulong		freeram;	/* Available memory size */
96 	l_ulong		sharedram;	/* Amount of shared memory */
97 	l_ulong		bufferram;	/* Memory used by buffers */
98 	l_ulong		totalswap;	/* Total swap space size */
99 	l_ulong		freeswap;	/* swap space still available */
100 	l_ushort	procs;		/* Number of current processes */
101 	char		_f[22];		/* Pads structure to 64 bytes */
102 };
103 #ifndef __alpha__
104 int
105 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
106 {
107 	struct l_sysinfo sysinfo;
108 	vm_object_t object;
109 	int i;
110 	struct timespec ts;
111 
112 	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
113 	getnanouptime(&ts);
114 	i = 0;
115 	if (ts.tv_sec >= 86400) {
116 		ts.tv_sec %= 86400;
117 		i = 1;
118 	}
119 	if (i || ts.tv_sec >= 3600) {
120 		ts.tv_sec %= 3600;
121 		i = 1;
122 	}
123 	if (i || ts.tv_sec >= 60) {
124 		ts.tv_sec %= 60;
125 		i = 1;
126 	}
127 	sysinfo.uptime=ts.tv_sec;
128 
129 	/* Use the information from the mib to get our load averages */
130 	for (i = 0; i < 3; i++)
131 		sysinfo.loads[i] = averunnable.ldavg[i];
132 
133 	sysinfo.totalram = physmem * PAGE_SIZE;
134 	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
135 
136 	sysinfo.sharedram = 0;
137 	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
138 	     object = TAILQ_NEXT(object, object_list))
139 		if (object->shadow_count > 1)
140 			sysinfo.sharedram += object->resident_page_count;
141 
142 	sysinfo.sharedram *= PAGE_SIZE;
143 	sysinfo.bufferram = 0;
144 
145 	if (swapblist == NULL) {
146 		sysinfo.totalswap= 0;
147 		sysinfo.freeswap = 0;
148 	} else {
149 		sysinfo.totalswap = swapblist->bl_blocks * 1024;
150 		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
151 	}
152 
153 	sysinfo.procs = 20; /* Hack */
154 
155 	return copyout(&sysinfo, (caddr_t)args->info, sizeof(sysinfo));
156 }
157 #endif /*!__alpha__*/
158 
159 #ifndef __alpha__
160 int
161 linux_alarm(struct thread *td, struct linux_alarm_args *args)
162 {
163 	struct itimerval it, old_it;
164 	struct timeval tv;
165 	int s;
166 
167 #ifdef DEBUG
168 	if (ldebug(alarm))
169 		printf(ARGS(alarm, "%u"), args->secs);
170 #endif
171 
172 	if (args->secs > 100000000)
173 		return EINVAL;
174 
175 	it.it_value.tv_sec = (long)args->secs;
176 	it.it_value.tv_usec = 0;
177 	it.it_interval.tv_sec = 0;
178 	it.it_interval.tv_usec = 0;
179 	s = splsoftclock();
180 	old_it = td->td_proc->p_realtimer;
181 	getmicrouptime(&tv);
182 	if (timevalisset(&old_it.it_value))
183 		callout_stop(&td->td_proc->p_itcallout);
184 	if (it.it_value.tv_sec != 0) {
185 		callout_reset(&td->td_proc->p_itcallout, tvtohz(&it.it_value),
186 		    realitexpire, td->td_proc);
187 		timevaladd(&it.it_value, &tv);
188 	}
189 	td->td_proc->p_realtimer = it;
190 	splx(s);
191 	if (timevalcmp(&old_it.it_value, &tv, >)) {
192 		timevalsub(&old_it.it_value, &tv);
193 		if (old_it.it_value.tv_usec != 0)
194 			old_it.it_value.tv_sec++;
195 		td->td_retval[0] = old_it.it_value.tv_sec;
196 	}
197 	return 0;
198 }
199 #endif /*!__alpha__*/
200 
201 int
202 linux_brk(struct thread *td, struct linux_brk_args *args)
203 {
204 	struct vmspace *vm = td->td_proc->p_vmspace;
205 	vm_offset_t new, old;
206 	struct obreak_args /* {
207 		char * nsize;
208 	} */ tmp;
209 
210 #ifdef DEBUG
211 	if (ldebug(brk))
212 		printf(ARGS(brk, "%p"), (void *)args->dsend);
213 #endif
214 	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
215 	new = (vm_offset_t)args->dsend;
216 	tmp.nsize = (char *) new;
217 	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
218 		td->td_retval[0] = (long)new;
219 	else
220 		td->td_retval[0] = (long)old;
221 
222 	return 0;
223 }
224 
225 int
226 linux_uselib(struct thread *td, struct linux_uselib_args *args)
227 {
228 	struct nameidata ni;
229 	struct vnode *vp;
230 	struct exec *a_out;
231 	struct vattr attr;
232 	vm_offset_t vmaddr;
233 	unsigned long file_offset;
234 	vm_offset_t buffer;
235 	unsigned long bss_size;
236 	int error;
237 	caddr_t sg;
238 	int locked;
239 
240 	sg = stackgap_init();
241 	CHECKALTEXIST(td, &sg, args->library);
242 
243 #ifdef DEBUG
244 	if (ldebug(uselib))
245 		printf(ARGS(uselib, "%s"), args->library);
246 #endif
247 
248 	a_out = NULL;
249 	locked = 0;
250 	vp = NULL;
251 
252 	/*
253 	 * XXX This code should make use of vn_open(), rather than doing
254 	 * all this stuff itself.
255 	 */
256 	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_USERSPACE, args->library, td);
257 	error = namei(&ni);
258 	if (error)
259 		goto cleanup;
260 
261 	vp = ni.ni_vp;
262 	/*
263 	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
264 	 * succeed without returning a vnode.
265 	 */
266 	if (vp == NULL) {
267 		error = ENOEXEC;	/* ?? */
268 		goto cleanup;
269 	}
270 	NDFREE(&ni, NDF_ONLY_PNBUF);
271 
272 	/*
273 	 * From here on down, we have a locked vnode that must be unlocked.
274 	 */
275 	locked++;
276 
277 	/* Writable? */
278 	if (vp->v_writecount) {
279 		error = ETXTBSY;
280 		goto cleanup;
281 	}
282 
283 	/* Executable? */
284 	error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
285 	if (error)
286 		goto cleanup;
287 
288 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
289 	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
290 		error = ENOEXEC;
291 		goto cleanup;
292 	}
293 
294 	/* Sensible size? */
295 	if (attr.va_size == 0) {
296 		error = ENOEXEC;
297 		goto cleanup;
298 	}
299 
300 	/* Can we access it? */
301 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
302 	if (error)
303 		goto cleanup;
304 
305 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
306 	if (error)
307 		goto cleanup;
308 
309 	/*
310 	 * Lock no longer needed
311 	 */
312 	VOP_UNLOCK(vp, 0, td);
313 	locked = 0;
314 
315 	/* Pull in executable header into kernel_map */
316 	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
317 	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
318 	if (error)
319 		goto cleanup;
320 
321 	/* Is it a Linux binary ? */
322 	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
323 		error = ENOEXEC;
324 		goto cleanup;
325 	}
326 
327 	/*
328 	 * While we are here, we should REALLY do some more checks
329 	 */
330 
331 	/* Set file/virtual offset based on a.out variant. */
332 	switch ((int)(a_out->a_magic & 0xffff)) {
333 	case 0413:	/* ZMAGIC */
334 		file_offset = 1024;
335 		break;
336 	case 0314:	/* QMAGIC */
337 		file_offset = 0;
338 		break;
339 	default:
340 		error = ENOEXEC;
341 		goto cleanup;
342 	}
343 
344 	bss_size = round_page(a_out->a_bss);
345 
346 	/* Check various fields in header for validity/bounds. */
347 	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
348 		error = ENOEXEC;
349 		goto cleanup;
350 	}
351 
352 	/* text + data can't exceed file size */
353 	if (a_out->a_data + a_out->a_text > attr.va_size) {
354 		error = EFAULT;
355 		goto cleanup;
356 	}
357 
358 	/* To protect td->td_proc->p_rlimit in the if condition. */
359 	mtx_assert(&Giant, MA_OWNED);
360 
361 	/*
362 	 * text/data/bss must not exceed limits
363 	 * XXX - this is not complete. it should check current usage PLUS
364 	 * the resources needed by this library.
365 	 */
366 	if (a_out->a_text > maxtsiz ||
367 	    a_out->a_data + bss_size >
368 	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
369 		error = ENOMEM;
370 		goto cleanup;
371 	}
372 
373 	/* prevent more writers */
374 	vp->v_flag |= VTEXT;
375 
376 	/*
377 	 * Check if file_offset page aligned. Currently we cannot handle
378 	 * misalinged file offsets, and so we read in the entire image
379 	 * (what a waste).
380 	 */
381 	if (file_offset & PAGE_MASK) {
382 #ifdef DEBUG
383 		printf("uselib: Non page aligned binary %lu\n", file_offset);
384 #endif
385 		/* Map text+data read/write/execute */
386 
387 		/* a_entry is the load address and is page aligned */
388 		vmaddr = trunc_page(a_out->a_entry);
389 
390 		/* get anon user mapping, read+write+execute */
391 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
392 		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
393 		    VM_PROT_ALL, 0);
394 		if (error)
395 			goto cleanup;
396 
397 		/* map file into kernel_map */
398 		error = vm_mmap(kernel_map, &buffer,
399 		    round_page(a_out->a_text + a_out->a_data + file_offset),
400 		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
401 		    trunc_page(file_offset));
402 		if (error)
403 			goto cleanup;
404 
405 		/* copy from kernel VM space to user space */
406 		error = copyout((caddr_t)(uintptr_t)(buffer + file_offset),
407 		    (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
408 
409 		/* release temporary kernel space */
410 		vm_map_remove(kernel_map, buffer, buffer +
411 		    round_page(a_out->a_text + a_out->a_data + file_offset));
412 
413 		if (error)
414 			goto cleanup;
415 	} else {
416 #ifdef DEBUG
417 		printf("uselib: Page aligned binary %lu\n", file_offset);
418 #endif
419 		/*
420 		 * for QMAGIC, a_entry is 20 bytes beyond the load address
421 		 * to skip the executable header
422 		 */
423 		vmaddr = trunc_page(a_out->a_entry);
424 
425 		/*
426 		 * Map it all into the process's space as a single
427 		 * copy-on-write "data" segment.
428 		 */
429 		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
430 		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
431 		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
432 		if (error)
433 			goto cleanup;
434 	}
435 #ifdef DEBUG
436 	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
437 	    ((long*)vmaddr)[1]);
438 #endif
439 	if (bss_size != 0) {
440 		/* Calculate BSS start address */
441 		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
442 		    a_out->a_data;
443 
444 		/* allocate some 'anon' space */
445 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
446 		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
447 		if (error)
448 			goto cleanup;
449 	}
450 
451 cleanup:
452 	/* Unlock vnode if needed */
453 	if (locked)
454 		VOP_UNLOCK(vp, 0, td);
455 
456 	/* Release the kernel mapping. */
457 	if (a_out)
458 		vm_map_remove(kernel_map, (vm_offset_t)a_out,
459 		    (vm_offset_t)a_out + PAGE_SIZE);
460 
461 	return error;
462 }
463 
464 int
465 linux_select(struct thread *td, struct linux_select_args *args)
466 {
467 	struct select_args bsa;
468 	struct timeval tv0, tv1, utv, *tvp;
469 	caddr_t sg;
470 	int error;
471 
472 #ifdef DEBUG
473 	if (ldebug(select))
474 		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
475 		    (void *)args->readfds, (void *)args->writefds,
476 		    (void *)args->exceptfds, (void *)args->timeout);
477 #endif
478 
479 	error = 0;
480 	bsa.nd = args->nfds;
481 	bsa.in = args->readfds;
482 	bsa.ou = args->writefds;
483 	bsa.ex = args->exceptfds;
484 	bsa.tv = (struct timeval *)args->timeout;
485 
486 	/*
487 	 * Store current time for computation of the amount of
488 	 * time left.
489 	 */
490 	if (args->timeout) {
491 		if ((error = copyin((caddr_t)args->timeout, &utv,
492 		    sizeof(utv))))
493 			goto select_out;
494 #ifdef DEBUG
495 		if (ldebug(select))
496 			printf(LMSG("incoming timeout (%ld/%ld)"),
497 			    utv.tv_sec, utv.tv_usec);
498 #endif
499 
500 		if (itimerfix(&utv)) {
501 			/*
502 			 * The timeval was invalid.  Convert it to something
503 			 * valid that will act as it does under Linux.
504 			 */
505 			sg = stackgap_init();
506 			tvp = stackgap_alloc(&sg, sizeof(utv));
507 			utv.tv_sec += utv.tv_usec / 1000000;
508 			utv.tv_usec %= 1000000;
509 			if (utv.tv_usec < 0) {
510 				utv.tv_sec -= 1;
511 				utv.tv_usec += 1000000;
512 			}
513 			if (utv.tv_sec < 0)
514 				timevalclear(&utv);
515 			if ((error = copyout(&utv, tvp, sizeof(utv))))
516 				goto select_out;
517 			bsa.tv = tvp;
518 		}
519 		microtime(&tv0);
520 	}
521 
522 	error = select(td, &bsa);
523 #ifdef DEBUG
524 	if (ldebug(select))
525 		printf(LMSG("real select returns %d"), error);
526 #endif
527 	if (error) {
528 		/*
529 		 * See fs/select.c in the Linux kernel.  Without this,
530 		 * Maelstrom doesn't work.
531 		 */
532 		if (error == ERESTART)
533 			error = EINTR;
534 		goto select_out;
535 	}
536 
537 	if (args->timeout) {
538 		if (td->td_retval[0]) {
539 			/*
540 			 * Compute how much time was left of the timeout,
541 			 * by subtracting the current time and the time
542 			 * before we started the call, and subtracting
543 			 * that result from the user-supplied value.
544 			 */
545 			microtime(&tv1);
546 			timevalsub(&tv1, &tv0);
547 			timevalsub(&utv, &tv1);
548 			if (utv.tv_sec < 0)
549 				timevalclear(&utv);
550 		} else
551 			timevalclear(&utv);
552 #ifdef DEBUG
553 		if (ldebug(select))
554 			printf(LMSG("outgoing timeout (%ld/%ld)"),
555 			    utv.tv_sec, utv.tv_usec);
556 #endif
557 		if ((error = copyout(&utv, (caddr_t)args->timeout,
558 		    sizeof(utv))))
559 			goto select_out;
560 	}
561 
562 select_out:
563 #ifdef DEBUG
564 	if (ldebug(select))
565 		printf(LMSG("select_out -> %d"), error);
566 #endif
567 	return error;
568 }
569 
570 int
571 linux_mremap(struct thread *td, struct linux_mremap_args *args)
572 {
573 	struct munmap_args /* {
574 		void *addr;
575 		size_t len;
576 	} */ bsd_args;
577 	int error = 0;
578 
579 #ifdef DEBUG
580 	if (ldebug(mremap))
581 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
582 		    (void *)args->addr,
583 		    (unsigned long)args->old_len,
584 		    (unsigned long)args->new_len,
585 		    (unsigned long)args->flags);
586 #endif
587 	args->new_len = round_page(args->new_len);
588 	args->old_len = round_page(args->old_len);
589 
590 	if (args->new_len > args->old_len) {
591 		td->td_retval[0] = 0;
592 		return ENOMEM;
593 	}
594 
595 	if (args->new_len < args->old_len) {
596 		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
597 		bsd_args.len = args->old_len - args->new_len;
598 		error = munmap(td, &bsd_args);
599 	}
600 
601 	td->td_retval[0] = error ? 0 : (u_long)args->addr;
602 	return error;
603 }
604 
605 int
606 linux_msync(struct thread *td, struct linux_msync_args *args)
607 {
608 	struct msync_args bsd_args;
609 
610 	bsd_args.addr = (caddr_t)args->addr;
611 	bsd_args.len = args->len;
612 	bsd_args.flags = 0;	/* XXX ignore */
613 
614 	return msync(td, &bsd_args);
615 }
616 
617 #ifndef __alpha__
618 int
619 linux_time(struct thread *td, struct linux_time_args *args)
620 {
621 	struct timeval tv;
622 	l_time_t tm;
623 	int error;
624 
625 #ifdef DEBUG
626 	if (ldebug(time))
627 		printf(ARGS(time, "*"));
628 #endif
629 
630 	microtime(&tv);
631 	tm = tv.tv_sec;
632 	if (args->tm && (error = copyout(&tm, (caddr_t)args->tm, sizeof(tm))))
633 		return error;
634 	td->td_retval[0] = tm;
635 	return 0;
636 }
637 #endif	/*!__alpha__*/
638 
639 struct l_times_argv {
640 	l_long		tms_utime;
641 	l_long		tms_stime;
642 	l_long		tms_cutime;
643 	l_long		tms_cstime;
644 };
645 
646 #ifdef __alpha__
647 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
648 #else
649 #define CLK_TCK 100	/* Linux uses 100 */
650 #endif
651 
652 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
653 
654 int
655 linux_times(struct thread *td, struct linux_times_args *args)
656 {
657 	struct timeval tv;
658 	struct l_times_argv tms;
659 	struct rusage ru;
660 	int error;
661 
662 #ifdef DEBUG
663 	if (ldebug(times))
664 		printf(ARGS(times, "*"));
665 #endif
666 
667 	mtx_lock_spin(&sched_lock);
668 	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
669 	mtx_unlock_spin(&sched_lock);
670 
671 	tms.tms_utime = CONVTCK(ru.ru_utime);
672 	tms.tms_stime = CONVTCK(ru.ru_stime);
673 
674 	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
675 	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
676 
677 	if ((error = copyout(&tms, (caddr_t)args->buf, sizeof(tms))))
678 		return error;
679 
680 	microuptime(&tv);
681 	td->td_retval[0] = (int)CONVTCK(tv);
682 	return 0;
683 }
684 
685 int
686 linux_newuname(struct thread *td, struct linux_newuname_args *args)
687 {
688 	struct l_new_utsname utsname;
689 	char osname[LINUX_MAX_UTSNAME];
690 	char osrelease[LINUX_MAX_UTSNAME];
691 
692 #ifdef DEBUG
693 	if (ldebug(newuname))
694 		printf(ARGS(newuname, "*"));
695 #endif
696 
697 	linux_get_osname(td->td_proc, osname);
698 	linux_get_osrelease(td->td_proc, osrelease);
699 
700 	bzero(&utsname, sizeof(utsname));
701 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
702 	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME-1);
703 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
704 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
705 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
706 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
707 
708 	return (copyout(&utsname, (caddr_t)args->buf, sizeof(utsname)));
709 }
710 
711 #if defined(__i386__)
712 struct l_utimbuf {
713 	l_time_t l_actime;
714 	l_time_t l_modtime;
715 };
716 
717 int
718 linux_utime(struct thread *td, struct linux_utime_args *args)
719 {
720 	struct utimes_args /* {
721 		char	*path;
722 		struct	timeval *tptr;
723 	} */ bsdutimes;
724 	struct timeval tv[2], *tvp;
725 	struct l_utimbuf lut;
726 	int error;
727 	caddr_t sg;
728 
729 	sg = stackgap_init();
730 	CHECKALTEXIST(td, &sg, args->fname);
731 
732 #ifdef DEBUG
733 	if (ldebug(utime))
734 		printf(ARGS(utime, "%s, *"), args->fname);
735 #endif
736 
737 	if (args->times) {
738 		if ((error = copyin((caddr_t)args->times, &lut, sizeof lut)))
739 			return error;
740 		tv[0].tv_sec = lut.l_actime;
741 		tv[0].tv_usec = 0;
742 		tv[1].tv_sec = lut.l_modtime;
743 		tv[1].tv_usec = 0;
744 		/* so that utimes can copyin */
745 		tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
746 		if (tvp == NULL)
747 			return (ENAMETOOLONG);
748 		if ((error = copyout(tv, tvp, sizeof(tv))))
749 			return error;
750 		bsdutimes.tptr = tvp;
751 	} else
752 		bsdutimes.tptr = NULL;
753 
754 	bsdutimes.path = args->fname;
755 	return utimes(td, &bsdutimes);
756 }
757 #endif /* __i386__ */
758 
759 #define __WCLONE 0x80000000
760 
761 #ifndef __alpha__
762 int
763 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
764 {
765 	struct wait_args /* {
766 		int pid;
767 		int *status;
768 		int options;
769 		struct	rusage *rusage;
770 	} */ tmp;
771 	int error, tmpstat;
772 
773 #ifdef DEBUG
774 	if (ldebug(waitpid))
775 		printf(ARGS(waitpid, "%d, %p, %d"),
776 		    args->pid, (void *)args->status, args->options);
777 #endif
778 
779 	tmp.pid = args->pid;
780 	tmp.status = args->status;
781 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
782 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
783 	if (args->options & __WCLONE)
784 		tmp.options |= WLINUXCLONE;
785 	tmp.rusage = NULL;
786 
787 	if ((error = wait4(td, &tmp)) != 0)
788 		return error;
789 
790 	if (args->status) {
791 		if ((error = copyin((caddr_t)args->status, &tmpstat,
792 		    sizeof(int))) != 0)
793 			return error;
794 		tmpstat &= 0xffff;
795 		if (WIFSIGNALED(tmpstat))
796 			tmpstat = (tmpstat & 0xffffff80) |
797 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
798 		else if (WIFSTOPPED(tmpstat))
799 			tmpstat = (tmpstat & 0xffff00ff) |
800 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
801 		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
802 	}
803 
804 	return 0;
805 }
806 #endif	/*!__alpha__*/
807 
808 int
809 linux_wait4(struct thread *td, struct linux_wait4_args *args)
810 {
811 	struct wait_args /* {
812 		int pid;
813 		int *status;
814 		int options;
815 		struct	rusage *rusage;
816 	} */ tmp;
817 	int error, tmpstat;
818 
819 #ifdef DEBUG
820 	if (ldebug(wait4))
821 		printf(ARGS(wait4, "%d, %p, %d, %p"),
822 		    args->pid, (void *)args->status, args->options,
823 		    (void *)args->rusage);
824 #endif
825 
826 	tmp.pid = args->pid;
827 	tmp.status = args->status;
828 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
829 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
830 	if (args->options & __WCLONE)
831 		tmp.options |= WLINUXCLONE;
832 	tmp.rusage = (struct rusage *)args->rusage;
833 
834 	if ((error = wait4(td, &tmp)) != 0)
835 		return error;
836 
837 	SIGDELSET(td->td_proc->p_siglist, SIGCHLD);
838 
839 	if (args->status) {
840 		if ((error = copyin((caddr_t)args->status, &tmpstat,
841 		    sizeof(int))) != 0)
842 			return error;
843 		tmpstat &= 0xffff;
844 		if (WIFSIGNALED(tmpstat))
845 			tmpstat = (tmpstat & 0xffffff80) |
846 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
847 		else if (WIFSTOPPED(tmpstat))
848 			tmpstat = (tmpstat & 0xffff00ff) |
849 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
850 		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
851 	}
852 
853 	return 0;
854 }
855 
856 int
857 linux_mknod(struct thread *td, struct linux_mknod_args *args)
858 {
859 	caddr_t sg;
860 	struct mknod_args bsd_mknod;
861 	struct mkfifo_args bsd_mkfifo;
862 
863 	sg = stackgap_init();
864 
865 	CHECKALTCREAT(td, &sg, args->path);
866 
867 #ifdef DEBUG
868 	if (ldebug(mknod))
869 		printf(ARGS(mknod, "%s, %d, %d"),
870 		    args->path, args->mode, args->dev);
871 #endif
872 
873 	if (args->mode & S_IFIFO) {
874 		bsd_mkfifo.path = args->path;
875 		bsd_mkfifo.mode = args->mode;
876 		return mkfifo(td, &bsd_mkfifo);
877 	} else {
878 		bsd_mknod.path = args->path;
879 		bsd_mknod.mode = args->mode;
880 		bsd_mknod.dev = args->dev;
881 		return mknod(td, &bsd_mknod);
882 	}
883 }
884 
885 /*
886  * UGH! This is just about the dumbest idea I've ever heard!!
887  */
888 int
889 linux_personality(struct thread *td, struct linux_personality_args *args)
890 {
891 #ifdef DEBUG
892 	if (ldebug(personality))
893 		printf(ARGS(personality, "%d"), args->per);
894 #endif
895 #ifndef __alpha__
896 	if (args->per != 0)
897 		return EINVAL;
898 #endif
899 
900 	/* Yes Jim, it's still a Linux... */
901 	td->td_retval[0] = 0;
902 	return 0;
903 }
904 
905 /*
906  * Wrappers for get/setitimer for debugging..
907  */
908 int
909 linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
910 {
911 	struct setitimer_args bsa;
912 	struct itimerval foo;
913 	int error;
914 
915 #ifdef DEBUG
916 	if (ldebug(setitimer))
917 		printf(ARGS(setitimer, "%p, %p"),
918 		    (void *)args->itv, (void *)args->oitv);
919 #endif
920 	bsa.which = args->which;
921 	bsa.itv = (struct itimerval *)args->itv;
922 	bsa.oitv = (struct itimerval *)args->oitv;
923 	if (args->itv) {
924 	    if ((error = copyin((caddr_t)args->itv, &foo, sizeof(foo))))
925 		return error;
926 #ifdef DEBUG
927 	    if (ldebug(setitimer)) {
928 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
929 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
930 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
931 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
932 	    }
933 #endif
934 	}
935 	return setitimer(td, &bsa);
936 }
937 
938 int
939 linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
940 {
941 	struct getitimer_args bsa;
942 #ifdef DEBUG
943 	if (ldebug(getitimer))
944 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
945 #endif
946 	bsa.which = args->which;
947 	bsa.itv = (struct itimerval *)args->itv;
948 	return getitimer(td, &bsa);
949 }
950 
951 #ifndef __alpha__
952 int
953 linux_nice(struct thread *td, struct linux_nice_args *args)
954 {
955 	struct setpriority_args	bsd_args;
956 
957 	bsd_args.which = PRIO_PROCESS;
958 	bsd_args.who = 0;	/* current process */
959 	bsd_args.prio = args->inc;
960 	return setpriority(td, &bsd_args);
961 }
962 #endif	/*!__alpha__*/
963 
964 int
965 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
966 {
967 	struct ucred *newcred, *oldcred;
968 	l_gid_t linux_gidset[NGROUPS];
969 	gid_t *bsd_gidset;
970 	int ngrp, error;
971 	struct proc *p;
972 
973 	ngrp = args->gidsetsize;
974 	if (ngrp >= NGROUPS)
975 		return (EINVAL);
976 	error = copyin((caddr_t)args->grouplist, linux_gidset,
977 	    ngrp * sizeof(l_gid_t));
978 	if (error)
979 		return (error);
980 	newcred = crget();
981 	p = td->td_proc;
982 	PROC_LOCK(p);
983 	oldcred = p->p_ucred;
984 
985 	/*
986 	 * cr_groups[0] holds egid. Setting the whole set from
987 	 * the supplied set will cause egid to be changed too.
988 	 * Keep cr_groups[0] unchanged to prevent that.
989 	 */
990 
991 	if ((error = suser_cred(oldcred, PRISON_ROOT)) != 0) {
992 		PROC_UNLOCK(p);
993 		crfree(newcred);
994 		return (error);
995 	}
996 
997 	crcopy(newcred, oldcred);
998 	if (ngrp > 0) {
999 		newcred->cr_ngroups = ngrp + 1;
1000 
1001 		bsd_gidset = newcred->cr_groups;
1002 		ngrp--;
1003 		while (ngrp >= 0) {
1004 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1005 			ngrp--;
1006 		}
1007 	}
1008 	else
1009 		newcred->cr_ngroups = 1;
1010 
1011 	setsugid(p);
1012 	p->p_ucred = newcred;
1013 	PROC_UNLOCK(p);
1014 	crfree(oldcred);
1015 	return (0);
1016 }
1017 
1018 int
1019 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1020 {
1021 	struct ucred *cred;
1022 	l_gid_t linux_gidset[NGROUPS];
1023 	gid_t *bsd_gidset;
1024 	int bsd_gidsetsz, ngrp, error;
1025 
1026 	cred = td->td_ucred;
1027 	bsd_gidset = cred->cr_groups;
1028 	bsd_gidsetsz = cred->cr_ngroups - 1;
1029 
1030 	/*
1031 	 * cr_groups[0] holds egid. Returning the whole set
1032 	 * here will cause a duplicate. Exclude cr_groups[0]
1033 	 * to prevent that.
1034 	 */
1035 
1036 	if ((ngrp = args->gidsetsize) == 0) {
1037 		td->td_retval[0] = bsd_gidsetsz;
1038 		return (0);
1039 	}
1040 
1041 	if (ngrp < bsd_gidsetsz)
1042 		return (EINVAL);
1043 
1044 	ngrp = 0;
1045 	while (ngrp < bsd_gidsetsz) {
1046 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1047 		ngrp++;
1048 	}
1049 
1050 	if ((error = copyout(linux_gidset, (caddr_t)args->grouplist,
1051 	    ngrp * sizeof(l_gid_t))))
1052 		return (error);
1053 
1054 	td->td_retval[0] = ngrp;
1055 	return (0);
1056 }
1057 
1058 #ifndef __alpha__
1059 int
1060 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1061 {
1062 	struct __setrlimit_args bsd;
1063 	struct l_rlimit rlim;
1064 	int error;
1065 	caddr_t sg = stackgap_init();
1066 
1067 #ifdef DEBUG
1068 	if (ldebug(setrlimit))
1069 		printf(ARGS(setrlimit, "%d, %p"),
1070 		    args->resource, (void *)args->rlim);
1071 #endif
1072 
1073 	if (args->resource >= LINUX_RLIM_NLIMITS)
1074 		return (EINVAL);
1075 
1076 	bsd.which = linux_to_bsd_resource[args->resource];
1077 	if (bsd.which == -1)
1078 		return (EINVAL);
1079 
1080 	error = copyin((caddr_t)args->rlim, &rlim, sizeof(rlim));
1081 	if (error)
1082 		return (error);
1083 
1084 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1085 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1086 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1087 	return (setrlimit(td, &bsd));
1088 }
1089 
1090 int
1091 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1092 {
1093 	struct __getrlimit_args bsd;
1094 	struct l_rlimit rlim;
1095 	int error;
1096 	caddr_t sg = stackgap_init();
1097 
1098 #ifdef DEBUG
1099 	if (ldebug(old_getrlimit))
1100 		printf(ARGS(old_getrlimit, "%d, %p"),
1101 		    args->resource, (void *)args->rlim);
1102 #endif
1103 
1104 	if (args->resource >= LINUX_RLIM_NLIMITS)
1105 		return (EINVAL);
1106 
1107 	bsd.which = linux_to_bsd_resource[args->resource];
1108 	if (bsd.which == -1)
1109 		return (EINVAL);
1110 
1111 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1112 	error = getrlimit(td, &bsd);
1113 	if (error)
1114 		return (error);
1115 
1116 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1117 	if (rlim.rlim_cur == ULONG_MAX)
1118 		rlim.rlim_cur = LONG_MAX;
1119 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1120 	if (rlim.rlim_max == ULONG_MAX)
1121 		rlim.rlim_max = LONG_MAX;
1122 	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1123 }
1124 
1125 int
1126 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1127 {
1128 	struct __getrlimit_args bsd;
1129 	struct l_rlimit rlim;
1130 	int error;
1131 	caddr_t sg = stackgap_init();
1132 
1133 #ifdef DEBUG
1134 	if (ldebug(getrlimit))
1135 		printf(ARGS(getrlimit, "%d, %p"),
1136 		    args->resource, (void *)args->rlim);
1137 #endif
1138 
1139 	if (args->resource >= LINUX_RLIM_NLIMITS)
1140 		return (EINVAL);
1141 
1142 	bsd.which = linux_to_bsd_resource[args->resource];
1143 	if (bsd.which == -1)
1144 		return (EINVAL);
1145 
1146 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1147 	error = getrlimit(td, &bsd);
1148 	if (error)
1149 		return (error);
1150 
1151 	rlim.rlim_cur = (l_ulong)bsd.rlp->rlim_cur;
1152 	rlim.rlim_max = (l_ulong)bsd.rlp->rlim_max;
1153 	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1154 }
1155 #endif /*!__alpha__*/
1156 
1157 int
1158 linux_sched_setscheduler(struct thread *td,
1159     struct linux_sched_setscheduler_args *args)
1160 {
1161 	struct sched_setscheduler_args bsd;
1162 
1163 #ifdef DEBUG
1164 	if (ldebug(sched_setscheduler))
1165 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1166 		    args->pid, args->policy, (const void *)args->param);
1167 #endif
1168 
1169 	switch (args->policy) {
1170 	case LINUX_SCHED_OTHER:
1171 		bsd.policy = SCHED_OTHER;
1172 		break;
1173 	case LINUX_SCHED_FIFO:
1174 		bsd.policy = SCHED_FIFO;
1175 		break;
1176 	case LINUX_SCHED_RR:
1177 		bsd.policy = SCHED_RR;
1178 		break;
1179 	default:
1180 		return EINVAL;
1181 	}
1182 
1183 	bsd.pid = args->pid;
1184 	bsd.param = (struct sched_param *)args->param;
1185 	return sched_setscheduler(td, &bsd);
1186 }
1187 
1188 int
1189 linux_sched_getscheduler(struct thread *td,
1190     struct linux_sched_getscheduler_args *args)
1191 {
1192 	struct sched_getscheduler_args bsd;
1193 	int error;
1194 
1195 #ifdef DEBUG
1196 	if (ldebug(sched_getscheduler))
1197 		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1198 #endif
1199 
1200 	bsd.pid = args->pid;
1201 	error = sched_getscheduler(td, &bsd);
1202 
1203 	switch (td->td_retval[0]) {
1204 	case SCHED_OTHER:
1205 		td->td_retval[0] = LINUX_SCHED_OTHER;
1206 		break;
1207 	case SCHED_FIFO:
1208 		td->td_retval[0] = LINUX_SCHED_FIFO;
1209 		break;
1210 	case SCHED_RR:
1211 		td->td_retval[0] = LINUX_SCHED_RR;
1212 		break;
1213 	}
1214 
1215 	return error;
1216 }
1217 
1218 int
1219 linux_sched_get_priority_max(struct thread *td,
1220     struct linux_sched_get_priority_max_args *args)
1221 {
1222 	struct sched_get_priority_max_args bsd;
1223 
1224 #ifdef DEBUG
1225 	if (ldebug(sched_get_priority_max))
1226 		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1227 #endif
1228 
1229 	switch (args->policy) {
1230 	case LINUX_SCHED_OTHER:
1231 		bsd.policy = SCHED_OTHER;
1232 		break;
1233 	case LINUX_SCHED_FIFO:
1234 		bsd.policy = SCHED_FIFO;
1235 		break;
1236 	case LINUX_SCHED_RR:
1237 		bsd.policy = SCHED_RR;
1238 		break;
1239 	default:
1240 		return EINVAL;
1241 	}
1242 	return sched_get_priority_max(td, &bsd);
1243 }
1244 
1245 int
1246 linux_sched_get_priority_min(struct thread *td,
1247     struct linux_sched_get_priority_min_args *args)
1248 {
1249 	struct sched_get_priority_min_args bsd;
1250 
1251 #ifdef DEBUG
1252 	if (ldebug(sched_get_priority_min))
1253 		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1254 #endif
1255 
1256 	switch (args->policy) {
1257 	case LINUX_SCHED_OTHER:
1258 		bsd.policy = SCHED_OTHER;
1259 		break;
1260 	case LINUX_SCHED_FIFO:
1261 		bsd.policy = SCHED_FIFO;
1262 		break;
1263 	case LINUX_SCHED_RR:
1264 		bsd.policy = SCHED_RR;
1265 		break;
1266 	default:
1267 		return EINVAL;
1268 	}
1269 	return sched_get_priority_min(td, &bsd);
1270 }
1271 
1272 #define REBOOT_CAD_ON	0x89abcdef
1273 #define REBOOT_CAD_OFF	0
1274 #define REBOOT_HALT	0xcdef0123
1275 
1276 int
1277 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1278 {
1279 	struct reboot_args bsd_args;
1280 
1281 #ifdef DEBUG
1282 	if (ldebug(reboot))
1283 		printf(ARGS(reboot, "0x%x"), args->cmd);
1284 #endif
1285 	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1286 		return (0);
1287 	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1288 	return (reboot(td, &bsd_args));
1289 }
1290 
1291 #ifndef __alpha__
1292 
1293 /*
1294  * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1295  * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1296  * globbers registers that are assumed to be preserved. The following
1297  * lightweight syscalls fixes this. See also linux_getgid16() and
1298  * linux_getuid16() in linux_uid16.c.
1299  *
1300  * linux_getpid() - MP SAFE
1301  * linux_getgid() - MP SAFE
1302  * linux_getuid() - MP SAFE
1303  */
1304 
1305 int
1306 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1307 {
1308 
1309 	td->td_retval[0] = td->td_proc->p_pid;
1310 	return (0);
1311 }
1312 
1313 int
1314 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1315 {
1316 
1317 	td->td_retval[0] = td->td_ucred->cr_rgid;
1318 	return (0);
1319 }
1320 
1321 int
1322 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1323 {
1324 
1325 	td->td_retval[0] = td->td_ucred->cr_ruid;
1326 	return (0);
1327 }
1328 
1329 #endif /*!__alpha__*/
1330 
1331 int
1332 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1333 {
1334 	struct getsid_args bsd;
1335 	bsd.pid = args->pid;
1336 	return getsid(td, &bsd);
1337 }
1338