xref: /freebsd/sys/compat/linux/linux_misc.c (revision f2ac424af7b980ba4d858ecfd1644ce197d6869d)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/jail.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/mman.h>
41 #include <sys/mount.h>
42 #include <sys/mutex.h>
43 #include <sys/namei.h>
44 #include <sys/poll.h>
45 #include <sys/proc.h>
46 #include <sys/blist.h>
47 #include <sys/reboot.h>
48 #include <sys/resourcevar.h>
49 #include <sys/signalvar.h>
50 #include <sys/stat.h>
51 #include <sys/sysctl.h>
52 #include <sys/sysproto.h>
53 #include <sys/time.h>
54 #include <sys/unistd.h>
55 #include <sys/vmmeter.h>
56 #include <sys/vnode.h>
57 #include <sys/wait.h>
58 
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_kern.h>
62 #include <vm/vm_map.h>
63 #include <vm/vm_extern.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_zone.h>
66 #include <vm/swap_pager.h>
67 
68 #include <machine/frame.h>
69 #include <machine/limits.h>
70 #include <machine/psl.h>
71 #include <machine/sysarch.h>
72 #ifdef __i386__
73 #include <machine/segments.h>
74 #endif
75 
76 #include <posix4/sched.h>
77 
78 #include <machine/../linux/linux.h>
79 #include <machine/../linux/linux_proto.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_util.h>
82 
83 #ifdef __alpha__
84 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
85 #else
86 #define BSD_TO_LINUX_SIGNAL(sig)	\
87 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
88 #endif
89 
90 #ifndef __alpha__
91 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
92 	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
93 	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
94 	RLIMIT_MEMLOCK, -1
95 };
96 #endif /*!__alpha__*/
97 
98 struct l_sysinfo {
99 	l_long		uptime;		/* Seconds since boot */
100 	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
101 	l_ulong		totalram;	/* Total usable main memory size */
102 	l_ulong		freeram;	/* Available memory size */
103 	l_ulong		sharedram;	/* Amount of shared memory */
104 	l_ulong		bufferram;	/* Memory used by buffers */
105 	l_ulong		totalswap;	/* Total swap space size */
106 	l_ulong		freeswap;	/* swap space still available */
107 	l_ushort	procs;		/* Number of current processes */
108 	char		_f[22];		/* Pads structure to 64 bytes */
109 };
110 #ifndef __alpha__
111 int
112 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
113 {
114 	struct l_sysinfo sysinfo;
115 	vm_object_t object;
116 	int i;
117 	struct timespec ts;
118 
119 	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
120 	getnanouptime(&ts);
121 	i = 0;
122 	if (ts.tv_sec >= 86400) {
123 		ts.tv_sec %= 86400;
124 		i = 1;
125 	}
126 	if (i || ts.tv_sec >= 3600) {
127 		ts.tv_sec %= 3600;
128 		i = 1;
129 	}
130 	if (i || ts.tv_sec >= 60) {
131 		ts.tv_sec %= 60;
132 		i = 1;
133 	}
134 	sysinfo.uptime=ts.tv_sec;
135 
136 	/* Use the information from the mib to get our load averages */
137 	for (i = 0; i < 3; i++)
138 		sysinfo.loads[i] = averunnable.ldavg[i];
139 
140 	sysinfo.totalram = physmem * PAGE_SIZE;
141 	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
142 
143 	sysinfo.sharedram = 0;
144 	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
145 	     object = TAILQ_NEXT(object, object_list))
146 		if (object->shadow_count > 1)
147 			sysinfo.sharedram += object->resident_page_count;
148 
149 	sysinfo.sharedram *= PAGE_SIZE;
150 	sysinfo.bufferram = 0;
151 
152 	if (swapblist == NULL) {
153 		sysinfo.totalswap= 0;
154 		sysinfo.freeswap = 0;
155 	} else {
156 		sysinfo.totalswap = swapblist->bl_blocks * 1024;
157 		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
158 	}
159 
160 	sysinfo.procs = 20; /* Hack */
161 
162 	return copyout(&sysinfo, (caddr_t)args->info, sizeof(sysinfo));
163 }
164 #endif /*!__alpha__*/
165 
166 #ifndef __alpha__
167 int
168 linux_alarm(struct thread *td, struct linux_alarm_args *args)
169 {
170 	struct itimerval it, old_it;
171 	struct timeval tv;
172 	int s;
173 
174 #ifdef DEBUG
175 	if (ldebug(alarm))
176 		printf(ARGS(alarm, "%u"), args->secs);
177 #endif
178 
179 	if (args->secs > 100000000)
180 		return EINVAL;
181 
182 	it.it_value.tv_sec = (long)args->secs;
183 	it.it_value.tv_usec = 0;
184 	it.it_interval.tv_sec = 0;
185 	it.it_interval.tv_usec = 0;
186 	s = splsoftclock();
187 	old_it = td->td_proc->p_realtimer;
188 	getmicrouptime(&tv);
189 	if (timevalisset(&old_it.it_value))
190 		callout_stop(&td->td_proc->p_itcallout);
191 	if (it.it_value.tv_sec != 0) {
192 		callout_reset(&td->td_proc->p_itcallout, tvtohz(&it.it_value),
193 		    realitexpire, td->td_proc);
194 		timevaladd(&it.it_value, &tv);
195 	}
196 	td->td_proc->p_realtimer = it;
197 	splx(s);
198 	if (timevalcmp(&old_it.it_value, &tv, >)) {
199 		timevalsub(&old_it.it_value, &tv);
200 		if (old_it.it_value.tv_usec != 0)
201 			old_it.it_value.tv_sec++;
202 		td->td_retval[0] = old_it.it_value.tv_sec;
203 	}
204 	return 0;
205 }
206 #endif /*!__alpha__*/
207 
208 int
209 linux_brk(struct thread *td, struct linux_brk_args *args)
210 {
211 	struct vmspace *vm = td->td_proc->p_vmspace;
212 	vm_offset_t new, old;
213 	struct obreak_args /* {
214 		char * nsize;
215 	} */ tmp;
216 
217 #ifdef DEBUG
218 	if (ldebug(brk))
219 		printf(ARGS(brk, "%p"), (void *)args->dsend);
220 #endif
221 	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
222 	new = (vm_offset_t)args->dsend;
223 	tmp.nsize = (char *) new;
224 	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
225 		td->td_retval[0] = (long)new;
226 	else
227 		td->td_retval[0] = (long)old;
228 
229 	return 0;
230 }
231 
232 int
233 linux_uselib(struct thread *td, struct linux_uselib_args *args)
234 {
235 	struct nameidata ni;
236 	struct vnode *vp;
237 	struct exec *a_out;
238 	struct vattr attr;
239 	vm_offset_t vmaddr;
240 	unsigned long file_offset;
241 	vm_offset_t buffer;
242 	unsigned long bss_size;
243 	int error;
244 	caddr_t sg;
245 	int locked;
246 
247 	sg = stackgap_init();
248 	CHECKALTEXIST(td, &sg, args->library);
249 
250 #ifdef DEBUG
251 	if (ldebug(uselib))
252 		printf(ARGS(uselib, "%s"), args->library);
253 #endif
254 
255 	a_out = NULL;
256 	locked = 0;
257 	vp = NULL;
258 
259 	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_USERSPACE, args->library, td);
260 	error = namei(&ni);
261 	if (error)
262 		goto cleanup;
263 
264 	vp = ni.ni_vp;
265 	/*
266 	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
267 	 * succeed without returning a vnode.
268 	 */
269 	if (vp == NULL) {
270 		error = ENOEXEC;	/* ?? */
271 		goto cleanup;
272 	}
273 	NDFREE(&ni, NDF_ONLY_PNBUF);
274 
275 	/*
276 	 * From here on down, we have a locked vnode that must be unlocked.
277 	 */
278 	locked++;
279 
280 	/* Writable? */
281 	if (vp->v_writecount) {
282 		error = ETXTBSY;
283 		goto cleanup;
284 	}
285 
286 	/* Executable? */
287 	error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
288 	if (error)
289 		goto cleanup;
290 
291 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
292 	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
293 		error = ENOEXEC;
294 		goto cleanup;
295 	}
296 
297 	/* Sensible size? */
298 	if (attr.va_size == 0) {
299 		error = ENOEXEC;
300 		goto cleanup;
301 	}
302 
303 	/* Can we access it? */
304 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
305 	if (error)
306 		goto cleanup;
307 
308 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
309 	if (error)
310 		goto cleanup;
311 
312 	/*
313 	 * Lock no longer needed
314 	 */
315 	VOP_UNLOCK(vp, 0, td);
316 	locked = 0;
317 
318 	/* Pull in executable header into kernel_map */
319 	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
320 	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
321 	if (error)
322 		goto cleanup;
323 
324 	/* Is it a Linux binary ? */
325 	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
326 		error = ENOEXEC;
327 		goto cleanup;
328 	}
329 
330 	/*
331 	 * While we are here, we should REALLY do some more checks
332 	 */
333 
334 	/* Set file/virtual offset based on a.out variant. */
335 	switch ((int)(a_out->a_magic & 0xffff)) {
336 	case 0413:	/* ZMAGIC */
337 		file_offset = 1024;
338 		break;
339 	case 0314:	/* QMAGIC */
340 		file_offset = 0;
341 		break;
342 	default:
343 		error = ENOEXEC;
344 		goto cleanup;
345 	}
346 
347 	bss_size = round_page(a_out->a_bss);
348 
349 	/* Check various fields in header for validity/bounds. */
350 	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
351 		error = ENOEXEC;
352 		goto cleanup;
353 	}
354 
355 	/* text + data can't exceed file size */
356 	if (a_out->a_data + a_out->a_text > attr.va_size) {
357 		error = EFAULT;
358 		goto cleanup;
359 	}
360 
361 	/* To protect td->td_proc->p_rlimit in the if condition. */
362 	mtx_assert(&Giant, MA_OWNED);
363 
364 	/*
365 	 * text/data/bss must not exceed limits
366 	 * XXX - this is not complete. it should check current usage PLUS
367 	 * the resources needed by this library.
368 	 */
369 	if (a_out->a_text > maxtsiz ||
370 	    a_out->a_data + bss_size >
371 	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
372 		error = ENOMEM;
373 		goto cleanup;
374 	}
375 
376 	/* prevent more writers */
377 	vp->v_flag |= VTEXT;
378 
379 	/*
380 	 * Check if file_offset page aligned. Currently we cannot handle
381 	 * misalinged file offsets, and so we read in the entire image
382 	 * (what a waste).
383 	 */
384 	if (file_offset & PAGE_MASK) {
385 #ifdef DEBUG
386 		printf("uselib: Non page aligned binary %lu\n", file_offset);
387 #endif
388 		/* Map text+data read/write/execute */
389 
390 		/* a_entry is the load address and is page aligned */
391 		vmaddr = trunc_page(a_out->a_entry);
392 
393 		/* get anon user mapping, read+write+execute */
394 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
395 		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
396 		    VM_PROT_ALL, 0);
397 		if (error)
398 			goto cleanup;
399 
400 		/* map file into kernel_map */
401 		error = vm_mmap(kernel_map, &buffer,
402 		    round_page(a_out->a_text + a_out->a_data + file_offset),
403 		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
404 		    trunc_page(file_offset));
405 		if (error)
406 			goto cleanup;
407 
408 		/* copy from kernel VM space to user space */
409 		error = copyout((caddr_t)(uintptr_t)(buffer + file_offset),
410 		    (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
411 
412 		/* release temporary kernel space */
413 		vm_map_remove(kernel_map, buffer, buffer +
414 		    round_page(a_out->a_text + a_out->a_data + file_offset));
415 
416 		if (error)
417 			goto cleanup;
418 	} else {
419 #ifdef DEBUG
420 		printf("uselib: Page aligned binary %lu\n", file_offset);
421 #endif
422 		/*
423 		 * for QMAGIC, a_entry is 20 bytes beyond the load address
424 		 * to skip the executable header
425 		 */
426 		vmaddr = trunc_page(a_out->a_entry);
427 
428 		/*
429 		 * Map it all into the process's space as a single
430 		 * copy-on-write "data" segment.
431 		 */
432 		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
433 		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
434 		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
435 		if (error)
436 			goto cleanup;
437 	}
438 #ifdef DEBUG
439 	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
440 	    ((long*)vmaddr)[1]);
441 #endif
442 	if (bss_size != 0) {
443 		/* Calculate BSS start address */
444 		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
445 		    a_out->a_data;
446 
447 		/* allocate some 'anon' space */
448 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
449 		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
450 		if (error)
451 			goto cleanup;
452 	}
453 
454 cleanup:
455 	/* Unlock vnode if needed */
456 	if (locked)
457 		VOP_UNLOCK(vp, 0, td);
458 
459 	/* Release the kernel mapping. */
460 	if (a_out)
461 		vm_map_remove(kernel_map, (vm_offset_t)a_out,
462 		    (vm_offset_t)a_out + PAGE_SIZE);
463 
464 	return error;
465 }
466 
467 int
468 linux_select(struct thread *td, struct linux_select_args *args)
469 {
470 	struct select_args bsa;
471 	struct timeval tv0, tv1, utv, *tvp;
472 	caddr_t sg;
473 	int error;
474 
475 #ifdef DEBUG
476 	if (ldebug(select))
477 		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
478 		    (void *)args->readfds, (void *)args->writefds,
479 		    (void *)args->exceptfds, (void *)args->timeout);
480 #endif
481 
482 	error = 0;
483 	bsa.nd = args->nfds;
484 	bsa.in = args->readfds;
485 	bsa.ou = args->writefds;
486 	bsa.ex = args->exceptfds;
487 	bsa.tv = (struct timeval *)args->timeout;
488 
489 	/*
490 	 * Store current time for computation of the amount of
491 	 * time left.
492 	 */
493 	if (args->timeout) {
494 		if ((error = copyin((caddr_t)args->timeout, &utv,
495 		    sizeof(utv))))
496 			goto select_out;
497 #ifdef DEBUG
498 		if (ldebug(select))
499 			printf(LMSG("incoming timeout (%ld/%ld)"),
500 			    utv.tv_sec, utv.tv_usec);
501 #endif
502 
503 		if (itimerfix(&utv)) {
504 			/*
505 			 * The timeval was invalid.  Convert it to something
506 			 * valid that will act as it does under Linux.
507 			 */
508 			sg = stackgap_init();
509 			tvp = stackgap_alloc(&sg, sizeof(utv));
510 			utv.tv_sec += utv.tv_usec / 1000000;
511 			utv.tv_usec %= 1000000;
512 			if (utv.tv_usec < 0) {
513 				utv.tv_sec -= 1;
514 				utv.tv_usec += 1000000;
515 			}
516 			if (utv.tv_sec < 0)
517 				timevalclear(&utv);
518 			if ((error = copyout(&utv, tvp, sizeof(utv))))
519 				goto select_out;
520 			bsa.tv = tvp;
521 		}
522 		microtime(&tv0);
523 	}
524 
525 	error = select(td, &bsa);
526 #ifdef DEBUG
527 	if (ldebug(select))
528 		printf(LMSG("real select returns %d"), error);
529 #endif
530 	if (error) {
531 		/*
532 		 * See fs/select.c in the Linux kernel.  Without this,
533 		 * Maelstrom doesn't work.
534 		 */
535 		if (error == ERESTART)
536 			error = EINTR;
537 		goto select_out;
538 	}
539 
540 	if (args->timeout) {
541 		if (td->td_retval[0]) {
542 			/*
543 			 * Compute how much time was left of the timeout,
544 			 * by subtracting the current time and the time
545 			 * before we started the call, and subtracting
546 			 * that result from the user-supplied value.
547 			 */
548 			microtime(&tv1);
549 			timevalsub(&tv1, &tv0);
550 			timevalsub(&utv, &tv1);
551 			if (utv.tv_sec < 0)
552 				timevalclear(&utv);
553 		} else
554 			timevalclear(&utv);
555 #ifdef DEBUG
556 		if (ldebug(select))
557 			printf(LMSG("outgoing timeout (%ld/%ld)"),
558 			    utv.tv_sec, utv.tv_usec);
559 #endif
560 		if ((error = copyout(&utv, (caddr_t)args->timeout,
561 		    sizeof(utv))))
562 			goto select_out;
563 	}
564 
565 select_out:
566 #ifdef DEBUG
567 	if (ldebug(select))
568 		printf(LMSG("select_out -> %d"), error);
569 #endif
570 	return error;
571 }
572 
573 int
574 linux_mremap(struct thread *td, struct linux_mremap_args *args)
575 {
576 	struct munmap_args /* {
577 		void *addr;
578 		size_t len;
579 	} */ bsd_args;
580 	int error = 0;
581 
582 #ifdef DEBUG
583 	if (ldebug(mremap))
584 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
585 		    (void *)args->addr,
586 		    (unsigned long)args->old_len,
587 		    (unsigned long)args->new_len,
588 		    (unsigned long)args->flags);
589 #endif
590 	args->new_len = round_page(args->new_len);
591 	args->old_len = round_page(args->old_len);
592 
593 	if (args->new_len > args->old_len) {
594 		td->td_retval[0] = 0;
595 		return ENOMEM;
596 	}
597 
598 	if (args->new_len < args->old_len) {
599 		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
600 		bsd_args.len = args->old_len - args->new_len;
601 		error = munmap(td, &bsd_args);
602 	}
603 
604 	td->td_retval[0] = error ? 0 : (u_long)args->addr;
605 	return error;
606 }
607 
608 int
609 linux_msync(struct thread *td, struct linux_msync_args *args)
610 {
611 	struct msync_args bsd_args;
612 
613 	bsd_args.addr = (caddr_t)args->addr;
614 	bsd_args.len = args->len;
615 	bsd_args.flags = 0;	/* XXX ignore */
616 
617 	return msync(td, &bsd_args);
618 }
619 
620 #ifndef __alpha__
621 int
622 linux_time(struct thread *td, struct linux_time_args *args)
623 {
624 	struct timeval tv;
625 	l_time_t tm;
626 	int error;
627 
628 #ifdef DEBUG
629 	if (ldebug(time))
630 		printf(ARGS(time, "*"));
631 #endif
632 
633 	microtime(&tv);
634 	tm = tv.tv_sec;
635 	if (args->tm && (error = copyout(&tm, (caddr_t)args->tm, sizeof(tm))))
636 		return error;
637 	td->td_retval[0] = tm;
638 	return 0;
639 }
640 #endif	/*!__alpha__*/
641 
642 struct l_times_argv {
643 	l_long		tms_utime;
644 	l_long		tms_stime;
645 	l_long		tms_cutime;
646 	l_long		tms_cstime;
647 };
648 
649 #ifdef __alpha__
650 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
651 #else
652 #define CLK_TCK 100	/* Linux uses 100 */
653 #endif
654 
655 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
656 
657 int
658 linux_times(struct thread *td, struct linux_times_args *args)
659 {
660 	struct timeval tv;
661 	struct l_times_argv tms;
662 	struct rusage ru;
663 	int error;
664 
665 #ifdef DEBUG
666 	if (ldebug(times))
667 		printf(ARGS(times, "*"));
668 #endif
669 
670 	mtx_lock_spin(&sched_lock);
671 	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
672 	mtx_unlock_spin(&sched_lock);
673 
674 	tms.tms_utime = CONVTCK(ru.ru_utime);
675 	tms.tms_stime = CONVTCK(ru.ru_stime);
676 
677 	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
678 	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
679 
680 	if ((error = copyout(&tms, (caddr_t)args->buf, sizeof(tms))))
681 		return error;
682 
683 	microuptime(&tv);
684 	td->td_retval[0] = (int)CONVTCK(tv);
685 	return 0;
686 }
687 
688 int
689 linux_newuname(struct thread *td, struct linux_newuname_args *args)
690 {
691 	struct l_new_utsname utsname;
692 	char osname[LINUX_MAX_UTSNAME];
693 	char osrelease[LINUX_MAX_UTSNAME];
694 
695 #ifdef DEBUG
696 	if (ldebug(newuname))
697 		printf(ARGS(newuname, "*"));
698 #endif
699 
700 	linux_get_osname(td->td_proc, osname);
701 	linux_get_osrelease(td->td_proc, osrelease);
702 
703 	bzero(&utsname, sizeof(utsname));
704 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
705 	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME-1);
706 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
707 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
708 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
709 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
710 
711 	return (copyout(&utsname, (caddr_t)args->buf, sizeof(utsname)));
712 }
713 
714 #if defined(__i386__)
715 struct l_utimbuf {
716 	l_time_t l_actime;
717 	l_time_t l_modtime;
718 };
719 
720 int
721 linux_utime(struct thread *td, struct linux_utime_args *args)
722 {
723 	struct utimes_args /* {
724 		char	*path;
725 		struct	timeval *tptr;
726 	} */ bsdutimes;
727 	struct timeval tv[2], *tvp;
728 	struct l_utimbuf lut;
729 	int error;
730 	caddr_t sg;
731 
732 	sg = stackgap_init();
733 	CHECKALTEXIST(td, &sg, args->fname);
734 
735 #ifdef DEBUG
736 	if (ldebug(utime))
737 		printf(ARGS(utime, "%s, *"), args->fname);
738 #endif
739 
740 	if (args->times) {
741 		if ((error = copyin((caddr_t)args->times, &lut, sizeof lut)))
742 			return error;
743 		tv[0].tv_sec = lut.l_actime;
744 		tv[0].tv_usec = 0;
745 		tv[1].tv_sec = lut.l_modtime;
746 		tv[1].tv_usec = 0;
747 		/* so that utimes can copyin */
748 		tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
749 		if (tvp == NULL)
750 			return (ENAMETOOLONG);
751 		if ((error = copyout(tv, tvp, sizeof(tv))))
752 			return error;
753 		bsdutimes.tptr = tvp;
754 	} else
755 		bsdutimes.tptr = NULL;
756 
757 	bsdutimes.path = args->fname;
758 	return utimes(td, &bsdutimes);
759 }
760 #endif /* __i386__ */
761 
762 #define __WCLONE 0x80000000
763 
764 #ifndef __alpha__
765 int
766 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
767 {
768 	struct wait_args /* {
769 		int pid;
770 		int *status;
771 		int options;
772 		struct	rusage *rusage;
773 	} */ tmp;
774 	int error, tmpstat;
775 
776 #ifdef DEBUG
777 	if (ldebug(waitpid))
778 		printf(ARGS(waitpid, "%d, %p, %d"),
779 		    args->pid, (void *)args->status, args->options);
780 #endif
781 
782 	tmp.pid = args->pid;
783 	tmp.status = args->status;
784 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
785 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
786 	if (args->options & __WCLONE)
787 		tmp.options |= WLINUXCLONE;
788 	tmp.rusage = NULL;
789 
790 	if ((error = wait4(td, &tmp)) != 0)
791 		return error;
792 
793 	if (args->status) {
794 		if ((error = copyin((caddr_t)args->status, &tmpstat,
795 		    sizeof(int))) != 0)
796 			return error;
797 		tmpstat &= 0xffff;
798 		if (WIFSIGNALED(tmpstat))
799 			tmpstat = (tmpstat & 0xffffff80) |
800 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
801 		else if (WIFSTOPPED(tmpstat))
802 			tmpstat = (tmpstat & 0xffff00ff) |
803 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
804 		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
805 	}
806 
807 	return 0;
808 }
809 #endif	/*!__alpha__*/
810 
811 int
812 linux_wait4(struct thread *td, struct linux_wait4_args *args)
813 {
814 	struct wait_args /* {
815 		int pid;
816 		int *status;
817 		int options;
818 		struct	rusage *rusage;
819 	} */ tmp;
820 	int error, tmpstat;
821 
822 #ifdef DEBUG
823 	if (ldebug(wait4))
824 		printf(ARGS(wait4, "%d, %p, %d, %p"),
825 		    args->pid, (void *)args->status, args->options,
826 		    (void *)args->rusage);
827 #endif
828 
829 	tmp.pid = args->pid;
830 	tmp.status = args->status;
831 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
832 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
833 	if (args->options & __WCLONE)
834 		tmp.options |= WLINUXCLONE;
835 	tmp.rusage = (struct rusage *)args->rusage;
836 
837 	if ((error = wait4(td, &tmp)) != 0)
838 		return error;
839 
840 	SIGDELSET(td->td_proc->p_siglist, SIGCHLD);
841 
842 	if (args->status) {
843 		if ((error = copyin((caddr_t)args->status, &tmpstat,
844 		    sizeof(int))) != 0)
845 			return error;
846 		tmpstat &= 0xffff;
847 		if (WIFSIGNALED(tmpstat))
848 			tmpstat = (tmpstat & 0xffffff80) |
849 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
850 		else if (WIFSTOPPED(tmpstat))
851 			tmpstat = (tmpstat & 0xffff00ff) |
852 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
853 		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
854 	}
855 
856 	return 0;
857 }
858 
859 int
860 linux_mknod(struct thread *td, struct linux_mknod_args *args)
861 {
862 	caddr_t sg;
863 	struct mknod_args bsd_mknod;
864 	struct mkfifo_args bsd_mkfifo;
865 
866 	sg = stackgap_init();
867 
868 	CHECKALTCREAT(td, &sg, args->path);
869 
870 #ifdef DEBUG
871 	if (ldebug(mknod))
872 		printf(ARGS(mknod, "%s, %d, %d"),
873 		    args->path, args->mode, args->dev);
874 #endif
875 
876 	if (args->mode & S_IFIFO) {
877 		bsd_mkfifo.path = args->path;
878 		bsd_mkfifo.mode = args->mode;
879 		return mkfifo(td, &bsd_mkfifo);
880 	} else {
881 		bsd_mknod.path = args->path;
882 		bsd_mknod.mode = args->mode;
883 		bsd_mknod.dev = args->dev;
884 		return mknod(td, &bsd_mknod);
885 	}
886 }
887 
888 /*
889  * UGH! This is just about the dumbest idea I've ever heard!!
890  */
891 int
892 linux_personality(struct thread *td, struct linux_personality_args *args)
893 {
894 #ifdef DEBUG
895 	if (ldebug(personality))
896 		printf(ARGS(personality, "%d"), args->per);
897 #endif
898 #ifndef __alpha__
899 	if (args->per != 0)
900 		return EINVAL;
901 #endif
902 
903 	/* Yes Jim, it's still a Linux... */
904 	td->td_retval[0] = 0;
905 	return 0;
906 }
907 
908 /*
909  * Wrappers for get/setitimer for debugging..
910  */
911 int
912 linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
913 {
914 	struct setitimer_args bsa;
915 	struct itimerval foo;
916 	int error;
917 
918 #ifdef DEBUG
919 	if (ldebug(setitimer))
920 		printf(ARGS(setitimer, "%p, %p"),
921 		    (void *)args->itv, (void *)args->oitv);
922 #endif
923 	bsa.which = args->which;
924 	bsa.itv = (struct itimerval *)args->itv;
925 	bsa.oitv = (struct itimerval *)args->oitv;
926 	if (args->itv) {
927 	    if ((error = copyin((caddr_t)args->itv, &foo, sizeof(foo))))
928 		return error;
929 #ifdef DEBUG
930 	    if (ldebug(setitimer)) {
931 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
932 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
933 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
934 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
935 	    }
936 #endif
937 	}
938 	return setitimer(td, &bsa);
939 }
940 
941 int
942 linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
943 {
944 	struct getitimer_args bsa;
945 #ifdef DEBUG
946 	if (ldebug(getitimer))
947 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
948 #endif
949 	bsa.which = args->which;
950 	bsa.itv = (struct itimerval *)args->itv;
951 	return getitimer(td, &bsa);
952 }
953 
954 #ifndef __alpha__
955 int
956 linux_nice(struct thread *td, struct linux_nice_args *args)
957 {
958 	struct setpriority_args	bsd_args;
959 
960 	bsd_args.which = PRIO_PROCESS;
961 	bsd_args.who = 0;	/* current process */
962 	bsd_args.prio = args->inc;
963 	return setpriority(td, &bsd_args);
964 }
965 #endif	/*!__alpha__*/
966 
967 int
968 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
969 {
970 	struct ucred *newcred, *oldcred;
971 	l_gid_t linux_gidset[NGROUPS];
972 	gid_t *bsd_gidset;
973 	int ngrp, error;
974 
975 	ngrp = args->gidsetsize;
976 	oldcred = td->td_proc->p_ucred;
977 
978 	/*
979 	 * cr_groups[0] holds egid. Setting the whole set from
980 	 * the supplied set will cause egid to be changed too.
981 	 * Keep cr_groups[0] unchanged to prevent that.
982 	 */
983 
984 	if ((error = suser_xxx(oldcred, NULL, PRISON_ROOT)) != 0)
985 		return (error);
986 
987 	if (ngrp >= NGROUPS)
988 		return (EINVAL);
989 
990 	newcred = crdup(oldcred);
991 	if (ngrp > 0) {
992 		error = copyin((caddr_t)args->grouplist, linux_gidset,
993 			       ngrp * sizeof(l_gid_t));
994 		if (error)
995 			return (error);
996 
997 		newcred->cr_ngroups = ngrp + 1;
998 
999 		bsd_gidset = newcred->cr_groups;
1000 		ngrp--;
1001 		while (ngrp >= 0) {
1002 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1003 			ngrp--;
1004 		}
1005 	}
1006 	else
1007 		newcred->cr_ngroups = 1;
1008 
1009 	setsugid(td->td_proc);
1010 	td->td_proc->p_ucred = newcred;
1011 	crfree(oldcred);
1012 	return (0);
1013 }
1014 
1015 int
1016 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1017 {
1018 	struct ucred *cred;
1019 	l_gid_t linux_gidset[NGROUPS];
1020 	gid_t *bsd_gidset;
1021 	int bsd_gidsetsz, ngrp, error;
1022 
1023 	cred = td->td_proc->p_ucred;
1024 	bsd_gidset = cred->cr_groups;
1025 	bsd_gidsetsz = cred->cr_ngroups - 1;
1026 
1027 	/*
1028 	 * cr_groups[0] holds egid. Returning the whole set
1029 	 * here will cause a duplicate. Exclude cr_groups[0]
1030 	 * to prevent that.
1031 	 */
1032 
1033 	if ((ngrp = args->gidsetsize) == 0) {
1034 		td->td_retval[0] = bsd_gidsetsz;
1035 		return (0);
1036 	}
1037 
1038 	if (ngrp < bsd_gidsetsz)
1039 		return (EINVAL);
1040 
1041 	ngrp = 0;
1042 	while (ngrp < bsd_gidsetsz) {
1043 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1044 		ngrp++;
1045 	}
1046 
1047 	if ((error = copyout(linux_gidset, (caddr_t)args->grouplist,
1048 	    ngrp * sizeof(l_gid_t))))
1049 		return (error);
1050 
1051 	td->td_retval[0] = ngrp;
1052 	return (0);
1053 }
1054 
1055 #ifndef __alpha__
1056 int
1057 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1058 {
1059 	struct __setrlimit_args bsd;
1060 	struct l_rlimit rlim;
1061 	int error;
1062 	caddr_t sg = stackgap_init();
1063 
1064 #ifdef DEBUG
1065 	if (ldebug(setrlimit))
1066 		printf(ARGS(setrlimit, "%d, %p"),
1067 		    args->resource, (void *)args->rlim);
1068 #endif
1069 
1070 	if (args->resource >= LINUX_RLIM_NLIMITS)
1071 		return (EINVAL);
1072 
1073 	bsd.which = linux_to_bsd_resource[args->resource];
1074 	if (bsd.which == -1)
1075 		return (EINVAL);
1076 
1077 	error = copyin((caddr_t)args->rlim, &rlim, sizeof(rlim));
1078 	if (error)
1079 		return (error);
1080 
1081 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1082 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1083 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1084 	return (setrlimit(td, &bsd));
1085 }
1086 
1087 int
1088 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1089 {
1090 	struct __getrlimit_args bsd;
1091 	struct l_rlimit rlim;
1092 	int error;
1093 	caddr_t sg = stackgap_init();
1094 
1095 #ifdef DEBUG
1096 	if (ldebug(old_getrlimit))
1097 		printf(ARGS(old_getrlimit, "%d, %p"),
1098 		    args->resource, (void *)args->rlim);
1099 #endif
1100 
1101 	if (args->resource >= LINUX_RLIM_NLIMITS)
1102 		return (EINVAL);
1103 
1104 	bsd.which = linux_to_bsd_resource[args->resource];
1105 	if (bsd.which == -1)
1106 		return (EINVAL);
1107 
1108 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1109 	error = getrlimit(td, &bsd);
1110 	if (error)
1111 		return (error);
1112 
1113 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1114 	if (rlim.rlim_cur == ULONG_MAX)
1115 		rlim.rlim_cur = LONG_MAX;
1116 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1117 	if (rlim.rlim_max == ULONG_MAX)
1118 		rlim.rlim_max = LONG_MAX;
1119 	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1120 }
1121 
1122 int
1123 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1124 {
1125 	struct __getrlimit_args bsd;
1126 	struct l_rlimit rlim;
1127 	int error;
1128 	caddr_t sg = stackgap_init();
1129 
1130 #ifdef DEBUG
1131 	if (ldebug(getrlimit))
1132 		printf(ARGS(getrlimit, "%d, %p"),
1133 		    args->resource, (void *)args->rlim);
1134 #endif
1135 
1136 	if (args->resource >= LINUX_RLIM_NLIMITS)
1137 		return (EINVAL);
1138 
1139 	bsd.which = linux_to_bsd_resource[args->resource];
1140 	if (bsd.which == -1)
1141 		return (EINVAL);
1142 
1143 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1144 	error = getrlimit(td, &bsd);
1145 	if (error)
1146 		return (error);
1147 
1148 	rlim.rlim_cur = (l_ulong)bsd.rlp->rlim_cur;
1149 	rlim.rlim_max = (l_ulong)bsd.rlp->rlim_max;
1150 	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1151 }
1152 #endif /*!__alpha__*/
1153 
1154 int
1155 linux_sched_setscheduler(struct thread *td,
1156     struct linux_sched_setscheduler_args *args)
1157 {
1158 	struct sched_setscheduler_args bsd;
1159 
1160 #ifdef DEBUG
1161 	if (ldebug(sched_setscheduler))
1162 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1163 		    args->pid, args->policy, (const void *)args->param);
1164 #endif
1165 
1166 	switch (args->policy) {
1167 	case LINUX_SCHED_OTHER:
1168 		bsd.policy = SCHED_OTHER;
1169 		break;
1170 	case LINUX_SCHED_FIFO:
1171 		bsd.policy = SCHED_FIFO;
1172 		break;
1173 	case LINUX_SCHED_RR:
1174 		bsd.policy = SCHED_RR;
1175 		break;
1176 	default:
1177 		return EINVAL;
1178 	}
1179 
1180 	bsd.pid = args->pid;
1181 	bsd.param = (struct sched_param *)args->param;
1182 	return sched_setscheduler(td, &bsd);
1183 }
1184 
1185 int
1186 linux_sched_getscheduler(struct thread *td,
1187     struct linux_sched_getscheduler_args *args)
1188 {
1189 	struct sched_getscheduler_args bsd;
1190 	int error;
1191 
1192 #ifdef DEBUG
1193 	if (ldebug(sched_getscheduler))
1194 		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1195 #endif
1196 
1197 	bsd.pid = args->pid;
1198 	error = sched_getscheduler(td, &bsd);
1199 
1200 	switch (td->td_retval[0]) {
1201 	case SCHED_OTHER:
1202 		td->td_retval[0] = LINUX_SCHED_OTHER;
1203 		break;
1204 	case SCHED_FIFO:
1205 		td->td_retval[0] = LINUX_SCHED_FIFO;
1206 		break;
1207 	case SCHED_RR:
1208 		td->td_retval[0] = LINUX_SCHED_RR;
1209 		break;
1210 	}
1211 
1212 	return error;
1213 }
1214 
1215 int
1216 linux_sched_get_priority_max(struct thread *td,
1217     struct linux_sched_get_priority_max_args *args)
1218 {
1219 	struct sched_get_priority_max_args bsd;
1220 
1221 #ifdef DEBUG
1222 	if (ldebug(sched_get_priority_max))
1223 		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1224 #endif
1225 
1226 	switch (args->policy) {
1227 	case LINUX_SCHED_OTHER:
1228 		bsd.policy = SCHED_OTHER;
1229 		break;
1230 	case LINUX_SCHED_FIFO:
1231 		bsd.policy = SCHED_FIFO;
1232 		break;
1233 	case LINUX_SCHED_RR:
1234 		bsd.policy = SCHED_RR;
1235 		break;
1236 	default:
1237 		return EINVAL;
1238 	}
1239 	return sched_get_priority_max(td, &bsd);
1240 }
1241 
1242 int
1243 linux_sched_get_priority_min(struct thread *td,
1244     struct linux_sched_get_priority_min_args *args)
1245 {
1246 	struct sched_get_priority_min_args bsd;
1247 
1248 #ifdef DEBUG
1249 	if (ldebug(sched_get_priority_min))
1250 		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1251 #endif
1252 
1253 	switch (args->policy) {
1254 	case LINUX_SCHED_OTHER:
1255 		bsd.policy = SCHED_OTHER;
1256 		break;
1257 	case LINUX_SCHED_FIFO:
1258 		bsd.policy = SCHED_FIFO;
1259 		break;
1260 	case LINUX_SCHED_RR:
1261 		bsd.policy = SCHED_RR;
1262 		break;
1263 	default:
1264 		return EINVAL;
1265 	}
1266 	return sched_get_priority_min(td, &bsd);
1267 }
1268 
1269 #define REBOOT_CAD_ON	0x89abcdef
1270 #define REBOOT_CAD_OFF	0
1271 #define REBOOT_HALT	0xcdef0123
1272 
1273 int
1274 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1275 {
1276 	struct reboot_args bsd_args;
1277 
1278 #ifdef DEBUG
1279 	if (ldebug(reboot))
1280 		printf(ARGS(reboot, "0x%x"), args->cmd);
1281 #endif
1282 	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1283 		return (0);
1284 	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1285 	return (reboot(td, &bsd_args));
1286 }
1287 
1288 #ifndef __alpha__
1289 
1290 /*
1291  * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1292  * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1293  * globbers registers that are assumed to be preserved. The following
1294  * lightweight syscalls fixes this. See also linux_getgid16() and
1295  * linux_getuid16() in linux_uid16.c.
1296  *
1297  * linux_getpid() - MP SAFE
1298  * linux_getgid() - MP SAFE
1299  * linux_getuid() - MP SAFE
1300  */
1301 
1302 int
1303 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1304 {
1305 
1306 	td->td_retval[0] = td->td_proc->p_pid;
1307 	return (0);
1308 }
1309 
1310 int
1311 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1312 {
1313 
1314 	td->td_retval[0] = td->td_proc->p_ucred->cr_rgid;
1315 	return (0);
1316 }
1317 
1318 int
1319 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1320 {
1321 
1322 	td->td_retval[0] = td->td_proc->p_ucred->cr_ruid;
1323 	return (0);
1324 }
1325 
1326 #endif /*!__alpha__*/
1327 
1328 int
1329 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1330 {
1331 	struct getsid_args bsd;
1332 	bsd.pid = args->pid;
1333 	return getsid(td, &bsd);
1334 }
1335