xref: /freebsd/sys/compat/linux/linux_misc.c (revision 8fa113e5fc65fe6abc757f0089f477a87ee4d185)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_compat.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/poll.h>
44 #include <sys/proc.h>
45 #include <sys/blist.h>
46 #include <sys/reboot.h>
47 #include <sys/resourcevar.h>
48 #include <sys/signalvar.h>
49 #include <sys/stat.h>
50 #include <sys/sysctl.h>
51 #include <sys/sysproto.h>
52 #include <sys/time.h>
53 #include <sys/unistd.h>
54 #include <sys/vmmeter.h>
55 #include <sys/vnode.h>
56 #include <sys/wait.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_kern.h>
61 #include <vm/vm_map.h>
62 #include <vm/vm_extern.h>
63 #include <vm/vm_object.h>
64 #include <vm/vm_zone.h>
65 #include <vm/swap_pager.h>
66 
67 #include <machine/frame.h>
68 #include <machine/limits.h>
69 #include <machine/psl.h>
70 #include <machine/sysarch.h>
71 #ifdef __i386__
72 #include <machine/segments.h>
73 #endif
74 
75 #include <posix4/sched.h>
76 
77 #include <machine/../linux/linux.h>
78 #include <machine/../linux/linux_proto.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_util.h>
81 
82 #ifdef __alpha__
83 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
84 #else
85 #define BSD_TO_LINUX_SIGNAL(sig)	\
86 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
87 #endif
88 
89 #ifndef __alpha__
90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
91 	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
92 	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
93 	RLIMIT_MEMLOCK, -1
94 };
95 #endif /*!__alpha__*/
96 
97 struct l_sysinfo {
98 	l_long		uptime;		/* Seconds since boot */
99 	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
100 	l_ulong		totalram;	/* Total usable main memory size */
101 	l_ulong		freeram;	/* Available memory size */
102 	l_ulong		sharedram;	/* Amount of shared memory */
103 	l_ulong		bufferram;	/* Memory used by buffers */
104 	l_ulong		totalswap;	/* Total swap space size */
105 	l_ulong		freeswap;	/* swap space still available */
106 	l_ushort	procs;		/* Number of current processes */
107 	char		_f[22];		/* Pads structure to 64 bytes */
108 };
109 #ifndef __alpha__
110 int
111 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
112 {
113 	struct l_sysinfo sysinfo;
114 	vm_object_t object;
115 	int i;
116 	struct timespec ts;
117 
118 	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
119 	getnanouptime(&ts);
120 	i = 0;
121 	if (ts.tv_sec >= 86400) {
122 		ts.tv_sec %= 86400;
123 		i = 1;
124 	}
125 	if (i || ts.tv_sec >= 3600) {
126 		ts.tv_sec %= 3600;
127 		i = 1;
128 	}
129 	if (i || ts.tv_sec >= 60) {
130 		ts.tv_sec %= 60;
131 		i = 1;
132 	}
133 	sysinfo.uptime=ts.tv_sec;
134 
135 	/* Use the information from the mib to get our load averages */
136 	for (i = 0; i < 3; i++)
137 		sysinfo.loads[i] = averunnable.ldavg[i];
138 
139 	sysinfo.totalram = physmem * PAGE_SIZE;
140 	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
141 
142 	sysinfo.sharedram = 0;
143 	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
144 	     object = TAILQ_NEXT(object, object_list))
145 		if (object->shadow_count > 1)
146 			sysinfo.sharedram += object->resident_page_count;
147 
148 	sysinfo.sharedram *= PAGE_SIZE;
149 	sysinfo.bufferram = 0;
150 
151 	if (swapblist == NULL) {
152 		sysinfo.totalswap= 0;
153 		sysinfo.freeswap = 0;
154 	} else {
155 		sysinfo.totalswap = swapblist->bl_blocks * 1024;
156 		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
157 	}
158 
159 	sysinfo.procs = 20; /* Hack */
160 
161 	return copyout(&sysinfo, (caddr_t)args->info, sizeof(sysinfo));
162 }
163 #endif /*!__alpha__*/
164 
165 #ifndef __alpha__
166 int
167 linux_alarm(struct thread *td, struct linux_alarm_args *args)
168 {
169 	struct itimerval it, old_it;
170 	struct timeval tv;
171 	int s;
172 
173 #ifdef DEBUG
174 	if (ldebug(alarm))
175 		printf(ARGS(alarm, "%u"), args->secs);
176 #endif
177 
178 	if (args->secs > 100000000)
179 		return EINVAL;
180 
181 	it.it_value.tv_sec = (long)args->secs;
182 	it.it_value.tv_usec = 0;
183 	it.it_interval.tv_sec = 0;
184 	it.it_interval.tv_usec = 0;
185 	s = splsoftclock();
186 	old_it = td->td_proc->p_realtimer;
187 	getmicrouptime(&tv);
188 	if (timevalisset(&old_it.it_value))
189 		callout_stop(&td->td_proc->p_itcallout);
190 	if (it.it_value.tv_sec != 0) {
191 		callout_reset(&td->td_proc->p_itcallout, tvtohz(&it.it_value),
192 		    realitexpire, td->td_proc);
193 		timevaladd(&it.it_value, &tv);
194 	}
195 	td->td_proc->p_realtimer = it;
196 	splx(s);
197 	if (timevalcmp(&old_it.it_value, &tv, >)) {
198 		timevalsub(&old_it.it_value, &tv);
199 		if (old_it.it_value.tv_usec != 0)
200 			old_it.it_value.tv_sec++;
201 		td->td_retval[0] = old_it.it_value.tv_sec;
202 	}
203 	return 0;
204 }
205 #endif /*!__alpha__*/
206 
207 int
208 linux_brk(struct thread *td, struct linux_brk_args *args)
209 {
210 	struct vmspace *vm = td->td_proc->p_vmspace;
211 	vm_offset_t new, old;
212 	struct obreak_args /* {
213 		char * nsize;
214 	} */ tmp;
215 
216 #ifdef DEBUG
217 	if (ldebug(brk))
218 		printf(ARGS(brk, "%p"), (void *)args->dsend);
219 #endif
220 	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
221 	new = (vm_offset_t)args->dsend;
222 	tmp.nsize = (char *) new;
223 	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
224 		td->td_retval[0] = (long)new;
225 	else
226 		td->td_retval[0] = (long)old;
227 
228 	return 0;
229 }
230 
231 int
232 linux_uselib(struct thread *td, struct linux_uselib_args *args)
233 {
234 	struct nameidata ni;
235 	struct vnode *vp;
236 	struct exec *a_out;
237 	struct vattr attr;
238 	vm_offset_t vmaddr;
239 	unsigned long file_offset;
240 	vm_offset_t buffer;
241 	unsigned long bss_size;
242 	int error;
243 	caddr_t sg;
244 	int locked;
245 
246 	sg = stackgap_init();
247 	CHECKALTEXIST(td, &sg, args->library);
248 
249 #ifdef DEBUG
250 	if (ldebug(uselib))
251 		printf(ARGS(uselib, "%s"), args->library);
252 #endif
253 
254 	a_out = NULL;
255 	locked = 0;
256 	vp = NULL;
257 
258 	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_USERSPACE, args->library, td);
259 	error = namei(&ni);
260 	if (error)
261 		goto cleanup;
262 
263 	vp = ni.ni_vp;
264 	/*
265 	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
266 	 * succeed without returning a vnode.
267 	 */
268 	if (vp == NULL) {
269 		error = ENOEXEC;	/* ?? */
270 		goto cleanup;
271 	}
272 	NDFREE(&ni, NDF_ONLY_PNBUF);
273 
274 	/*
275 	 * From here on down, we have a locked vnode that must be unlocked.
276 	 */
277 	locked++;
278 
279 	/* Writable? */
280 	if (vp->v_writecount) {
281 		error = ETXTBSY;
282 		goto cleanup;
283 	}
284 
285 	/* Executable? */
286 	error = VOP_GETATTR(vp, &attr, td->td_proc->p_ucred, td);
287 	if (error)
288 		goto cleanup;
289 
290 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
291 	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
292 		error = ENOEXEC;
293 		goto cleanup;
294 	}
295 
296 	/* Sensible size? */
297 	if (attr.va_size == 0) {
298 		error = ENOEXEC;
299 		goto cleanup;
300 	}
301 
302 	/* Can we access it? */
303 	error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
304 	if (error)
305 		goto cleanup;
306 
307 	error = VOP_OPEN(vp, FREAD, td->td_proc->p_ucred, td);
308 	if (error)
309 		goto cleanup;
310 
311 	/*
312 	 * Lock no longer needed
313 	 */
314 	VOP_UNLOCK(vp, 0, td);
315 	locked = 0;
316 
317 	/* Pull in executable header into kernel_map */
318 	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
319 	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
320 	if (error)
321 		goto cleanup;
322 
323 	/* Is it a Linux binary ? */
324 	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
325 		error = ENOEXEC;
326 		goto cleanup;
327 	}
328 
329 	/*
330 	 * While we are here, we should REALLY do some more checks
331 	 */
332 
333 	/* Set file/virtual offset based on a.out variant. */
334 	switch ((int)(a_out->a_magic & 0xffff)) {
335 	case 0413:	/* ZMAGIC */
336 		file_offset = 1024;
337 		break;
338 	case 0314:	/* QMAGIC */
339 		file_offset = 0;
340 		break;
341 	default:
342 		error = ENOEXEC;
343 		goto cleanup;
344 	}
345 
346 	bss_size = round_page(a_out->a_bss);
347 
348 	/* Check various fields in header for validity/bounds. */
349 	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
350 		error = ENOEXEC;
351 		goto cleanup;
352 	}
353 
354 	/* text + data can't exceed file size */
355 	if (a_out->a_data + a_out->a_text > attr.va_size) {
356 		error = EFAULT;
357 		goto cleanup;
358 	}
359 
360 	/* To protect td->td_proc->p_rlimit in the if condition. */
361 	mtx_assert(&Giant, MA_OWNED);
362 
363 	/*
364 	 * text/data/bss must not exceed limits
365 	 * XXX - this is not complete. it should check current usage PLUS
366 	 * the resources needed by this library.
367 	 */
368 	if (a_out->a_text > maxtsiz ||
369 	    a_out->a_data + bss_size >
370 	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
371 		error = ENOMEM;
372 		goto cleanup;
373 	}
374 
375 	/* prevent more writers */
376 	vp->v_flag |= VTEXT;
377 
378 	/*
379 	 * Check if file_offset page aligned. Currently we cannot handle
380 	 * misalinged file offsets, and so we read in the entire image
381 	 * (what a waste).
382 	 */
383 	if (file_offset & PAGE_MASK) {
384 #ifdef DEBUG
385 		printf("uselib: Non page aligned binary %lu\n", file_offset);
386 #endif
387 		/* Map text+data read/write/execute */
388 
389 		/* a_entry is the load address and is page aligned */
390 		vmaddr = trunc_page(a_out->a_entry);
391 
392 		/* get anon user mapping, read+write+execute */
393 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
394 		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
395 		    VM_PROT_ALL, 0);
396 		if (error)
397 			goto cleanup;
398 
399 		/* map file into kernel_map */
400 		error = vm_mmap(kernel_map, &buffer,
401 		    round_page(a_out->a_text + a_out->a_data + file_offset),
402 		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
403 		    trunc_page(file_offset));
404 		if (error)
405 			goto cleanup;
406 
407 		/* copy from kernel VM space to user space */
408 		error = copyout((caddr_t)(uintptr_t)(buffer + file_offset),
409 		    (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
410 
411 		/* release temporary kernel space */
412 		vm_map_remove(kernel_map, buffer, buffer +
413 		    round_page(a_out->a_text + a_out->a_data + file_offset));
414 
415 		if (error)
416 			goto cleanup;
417 	} else {
418 #ifdef DEBUG
419 		printf("uselib: Page aligned binary %lu\n", file_offset);
420 #endif
421 		/*
422 		 * for QMAGIC, a_entry is 20 bytes beyond the load address
423 		 * to skip the executable header
424 		 */
425 		vmaddr = trunc_page(a_out->a_entry);
426 
427 		/*
428 		 * Map it all into the process's space as a single
429 		 * copy-on-write "data" segment.
430 		 */
431 		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
432 		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
433 		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
434 		if (error)
435 			goto cleanup;
436 	}
437 #ifdef DEBUG
438 	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
439 	    ((long*)vmaddr)[1]);
440 #endif
441 	if (bss_size != 0) {
442 		/* Calculate BSS start address */
443 		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
444 		    a_out->a_data;
445 
446 		/* allocate some 'anon' space */
447 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
448 		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
449 		if (error)
450 			goto cleanup;
451 	}
452 
453 cleanup:
454 	/* Unlock vnode if needed */
455 	if (locked)
456 		VOP_UNLOCK(vp, 0, td);
457 
458 	/* Release the kernel mapping. */
459 	if (a_out)
460 		vm_map_remove(kernel_map, (vm_offset_t)a_out,
461 		    (vm_offset_t)a_out + PAGE_SIZE);
462 
463 	return error;
464 }
465 
466 int
467 linux_select(struct thread *td, struct linux_select_args *args)
468 {
469 	struct select_args bsa;
470 	struct timeval tv0, tv1, utv, *tvp;
471 	caddr_t sg;
472 	int error;
473 
474 #ifdef DEBUG
475 	if (ldebug(select))
476 		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
477 		    (void *)args->readfds, (void *)args->writefds,
478 		    (void *)args->exceptfds, (void *)args->timeout);
479 #endif
480 
481 	error = 0;
482 	bsa.nd = args->nfds;
483 	bsa.in = args->readfds;
484 	bsa.ou = args->writefds;
485 	bsa.ex = args->exceptfds;
486 	bsa.tv = (struct timeval *)args->timeout;
487 
488 	/*
489 	 * Store current time for computation of the amount of
490 	 * time left.
491 	 */
492 	if (args->timeout) {
493 		if ((error = copyin((caddr_t)args->timeout, &utv,
494 		    sizeof(utv))))
495 			goto select_out;
496 #ifdef DEBUG
497 		if (ldebug(select))
498 			printf(LMSG("incoming timeout (%ld/%ld)"),
499 			    utv.tv_sec, utv.tv_usec);
500 #endif
501 
502 		if (itimerfix(&utv)) {
503 			/*
504 			 * The timeval was invalid.  Convert it to something
505 			 * valid that will act as it does under Linux.
506 			 */
507 			sg = stackgap_init();
508 			tvp = stackgap_alloc(&sg, sizeof(utv));
509 			utv.tv_sec += utv.tv_usec / 1000000;
510 			utv.tv_usec %= 1000000;
511 			if (utv.tv_usec < 0) {
512 				utv.tv_sec -= 1;
513 				utv.tv_usec += 1000000;
514 			}
515 			if (utv.tv_sec < 0)
516 				timevalclear(&utv);
517 			if ((error = copyout(&utv, tvp, sizeof(utv))))
518 				goto select_out;
519 			bsa.tv = tvp;
520 		}
521 		microtime(&tv0);
522 	}
523 
524 	error = select(td, &bsa);
525 #ifdef DEBUG
526 	if (ldebug(select))
527 		printf(LMSG("real select returns %d"), error);
528 #endif
529 	if (error) {
530 		/*
531 		 * See fs/select.c in the Linux kernel.  Without this,
532 		 * Maelstrom doesn't work.
533 		 */
534 		if (error == ERESTART)
535 			error = EINTR;
536 		goto select_out;
537 	}
538 
539 	if (args->timeout) {
540 		if (td->td_retval[0]) {
541 			/*
542 			 * Compute how much time was left of the timeout,
543 			 * by subtracting the current time and the time
544 			 * before we started the call, and subtracting
545 			 * that result from the user-supplied value.
546 			 */
547 			microtime(&tv1);
548 			timevalsub(&tv1, &tv0);
549 			timevalsub(&utv, &tv1);
550 			if (utv.tv_sec < 0)
551 				timevalclear(&utv);
552 		} else
553 			timevalclear(&utv);
554 #ifdef DEBUG
555 		if (ldebug(select))
556 			printf(LMSG("outgoing timeout (%ld/%ld)"),
557 			    utv.tv_sec, utv.tv_usec);
558 #endif
559 		if ((error = copyout(&utv, (caddr_t)args->timeout,
560 		    sizeof(utv))))
561 			goto select_out;
562 	}
563 
564 select_out:
565 #ifdef DEBUG
566 	if (ldebug(select))
567 		printf(LMSG("select_out -> %d"), error);
568 #endif
569 	return error;
570 }
571 
572 int
573 linux_mremap(struct thread *td, struct linux_mremap_args *args)
574 {
575 	struct munmap_args /* {
576 		void *addr;
577 		size_t len;
578 	} */ bsd_args;
579 	int error = 0;
580 
581 #ifdef DEBUG
582 	if (ldebug(mremap))
583 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
584 		    (void *)args->addr,
585 		    (unsigned long)args->old_len,
586 		    (unsigned long)args->new_len,
587 		    (unsigned long)args->flags);
588 #endif
589 	args->new_len = round_page(args->new_len);
590 	args->old_len = round_page(args->old_len);
591 
592 	if (args->new_len > args->old_len) {
593 		td->td_retval[0] = 0;
594 		return ENOMEM;
595 	}
596 
597 	if (args->new_len < args->old_len) {
598 		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
599 		bsd_args.len = args->old_len - args->new_len;
600 		error = munmap(td, &bsd_args);
601 	}
602 
603 	td->td_retval[0] = error ? 0 : (u_long)args->addr;
604 	return error;
605 }
606 
607 int
608 linux_msync(struct thread *td, struct linux_msync_args *args)
609 {
610 	struct msync_args bsd_args;
611 
612 	bsd_args.addr = (caddr_t)args->addr;
613 	bsd_args.len = args->len;
614 	bsd_args.flags = 0;	/* XXX ignore */
615 
616 	return msync(td, &bsd_args);
617 }
618 
619 #ifndef __alpha__
620 int
621 linux_time(struct thread *td, struct linux_time_args *args)
622 {
623 	struct timeval tv;
624 	l_time_t tm;
625 	int error;
626 
627 #ifdef DEBUG
628 	if (ldebug(time))
629 		printf(ARGS(time, "*"));
630 #endif
631 
632 	microtime(&tv);
633 	tm = tv.tv_sec;
634 	if (args->tm && (error = copyout(&tm, (caddr_t)args->tm, sizeof(tm))))
635 		return error;
636 	td->td_retval[0] = tm;
637 	return 0;
638 }
639 #endif	/*!__alpha__*/
640 
641 struct l_times_argv {
642 	l_long		tms_utime;
643 	l_long		tms_stime;
644 	l_long		tms_cutime;
645 	l_long		tms_cstime;
646 };
647 
648 #ifdef __alpha__
649 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
650 #else
651 #define CLK_TCK 100	/* Linux uses 100 */
652 #endif
653 
654 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
655 
656 int
657 linux_times(struct thread *td, struct linux_times_args *args)
658 {
659 	struct timeval tv;
660 	struct l_times_argv tms;
661 	struct rusage ru;
662 	int error;
663 
664 #ifdef DEBUG
665 	if (ldebug(times))
666 		printf(ARGS(times, "*"));
667 #endif
668 
669 	mtx_lock_spin(&sched_lock);
670 	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
671 	mtx_unlock_spin(&sched_lock);
672 
673 	tms.tms_utime = CONVTCK(ru.ru_utime);
674 	tms.tms_stime = CONVTCK(ru.ru_stime);
675 
676 	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
677 	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
678 
679 	if ((error = copyout(&tms, (caddr_t)args->buf, sizeof(tms))))
680 		return error;
681 
682 	microuptime(&tv);
683 	td->td_retval[0] = (int)CONVTCK(tv);
684 	return 0;
685 }
686 
687 int
688 linux_newuname(struct thread *td, struct linux_newuname_args *args)
689 {
690 	struct l_new_utsname utsname;
691 	char *osrelease, *osname;
692 
693 #ifdef DEBUG
694 	if (ldebug(newuname))
695 		printf(ARGS(newuname, "*"));
696 #endif
697 
698 	osname = linux_get_osname(td->td_proc);
699 	osrelease = linux_get_osrelease(td->td_proc);
700 
701 	bzero(&utsname, sizeof(utsname));
702 	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
703 	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
704 	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
705 	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
706 	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
707 	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
708 
709 	return (copyout(&utsname, (caddr_t)args->buf, sizeof(utsname)));
710 }
711 
712 #if defined(__i386__)
713 struct l_utimbuf {
714 	l_time_t l_actime;
715 	l_time_t l_modtime;
716 };
717 
718 int
719 linux_utime(struct thread *td, struct linux_utime_args *args)
720 {
721 	struct utimes_args /* {
722 		char	*path;
723 		struct	timeval *tptr;
724 	} */ bsdutimes;
725 	struct timeval tv[2], *tvp;
726 	struct l_utimbuf lut;
727 	int error;
728 	caddr_t sg;
729 
730 	sg = stackgap_init();
731 	CHECKALTEXIST(td, &sg, args->fname);
732 
733 #ifdef DEBUG
734 	if (ldebug(utime))
735 		printf(ARGS(utime, "%s, *"), args->fname);
736 #endif
737 
738 	if (args->times) {
739 		if ((error = copyin((caddr_t)args->times, &lut, sizeof lut)))
740 			return error;
741 		tv[0].tv_sec = lut.l_actime;
742 		tv[0].tv_usec = 0;
743 		tv[1].tv_sec = lut.l_modtime;
744 		tv[1].tv_usec = 0;
745 		/* so that utimes can copyin */
746 		tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
747 		if (tvp == NULL)
748 			return (ENAMETOOLONG);
749 		if ((error = copyout(tv, tvp, sizeof(tv))))
750 			return error;
751 		bsdutimes.tptr = tvp;
752 	} else
753 		bsdutimes.tptr = NULL;
754 
755 	bsdutimes.path = args->fname;
756 	return utimes(td, &bsdutimes);
757 }
758 #endif /* __i386__ */
759 
760 #define __WCLONE 0x80000000
761 
762 #ifndef __alpha__
763 int
764 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
765 {
766 	struct wait_args /* {
767 		int pid;
768 		int *status;
769 		int options;
770 		struct	rusage *rusage;
771 	} */ tmp;
772 	int error, tmpstat;
773 
774 #ifdef DEBUG
775 	if (ldebug(waitpid))
776 		printf(ARGS(waitpid, "%d, %p, %d"),
777 		    args->pid, (void *)args->status, args->options);
778 #endif
779 
780 	tmp.pid = args->pid;
781 	tmp.status = args->status;
782 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
783 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
784 	if (args->options & __WCLONE)
785 		tmp.options |= WLINUXCLONE;
786 	tmp.rusage = NULL;
787 
788 	if ((error = wait4(td, &tmp)) != 0)
789 		return error;
790 
791 	if (args->status) {
792 		if ((error = copyin((caddr_t)args->status, &tmpstat,
793 		    sizeof(int))) != 0)
794 			return error;
795 		tmpstat &= 0xffff;
796 		if (WIFSIGNALED(tmpstat))
797 			tmpstat = (tmpstat & 0xffffff80) |
798 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
799 		else if (WIFSTOPPED(tmpstat))
800 			tmpstat = (tmpstat & 0xffff00ff) |
801 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
802 		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
803 	}
804 
805 	return 0;
806 }
807 #endif	/*!__alpha__*/
808 
809 int
810 linux_wait4(struct thread *td, struct linux_wait4_args *args)
811 {
812 	struct wait_args /* {
813 		int pid;
814 		int *status;
815 		int options;
816 		struct	rusage *rusage;
817 	} */ tmp;
818 	int error, tmpstat;
819 
820 #ifdef DEBUG
821 	if (ldebug(wait4))
822 		printf(ARGS(wait4, "%d, %p, %d, %p"),
823 		    args->pid, (void *)args->status, args->options,
824 		    (void *)args->rusage);
825 #endif
826 
827 	tmp.pid = args->pid;
828 	tmp.status = args->status;
829 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
830 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
831 	if (args->options & __WCLONE)
832 		tmp.options |= WLINUXCLONE;
833 	tmp.rusage = (struct rusage *)args->rusage;
834 
835 	if ((error = wait4(td, &tmp)) != 0)
836 		return error;
837 
838 	SIGDELSET(td->td_proc->p_siglist, SIGCHLD);
839 
840 	if (args->status) {
841 		if ((error = copyin((caddr_t)args->status, &tmpstat,
842 		    sizeof(int))) != 0)
843 			return error;
844 		tmpstat &= 0xffff;
845 		if (WIFSIGNALED(tmpstat))
846 			tmpstat = (tmpstat & 0xffffff80) |
847 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
848 		else if (WIFSTOPPED(tmpstat))
849 			tmpstat = (tmpstat & 0xffff00ff) |
850 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
851 		return copyout(&tmpstat, (caddr_t)args->status, sizeof(int));
852 	}
853 
854 	return 0;
855 }
856 
857 int
858 linux_mknod(struct thread *td, struct linux_mknod_args *args)
859 {
860 	caddr_t sg;
861 	struct mknod_args bsd_mknod;
862 	struct mkfifo_args bsd_mkfifo;
863 
864 	sg = stackgap_init();
865 
866 	CHECKALTCREAT(td, &sg, args->path);
867 
868 #ifdef DEBUG
869 	if (ldebug(mknod))
870 		printf(ARGS(mknod, "%s, %d, %d"),
871 		    args->path, args->mode, args->dev);
872 #endif
873 
874 	if (args->mode & S_IFIFO) {
875 		bsd_mkfifo.path = args->path;
876 		bsd_mkfifo.mode = args->mode;
877 		return mkfifo(td, &bsd_mkfifo);
878 	} else {
879 		bsd_mknod.path = args->path;
880 		bsd_mknod.mode = args->mode;
881 		bsd_mknod.dev = args->dev;
882 		return mknod(td, &bsd_mknod);
883 	}
884 }
885 
886 /*
887  * UGH! This is just about the dumbest idea I've ever heard!!
888  */
889 int
890 linux_personality(struct thread *td, struct linux_personality_args *args)
891 {
892 #ifdef DEBUG
893 	if (ldebug(personality))
894 		printf(ARGS(personality, "%d"), args->per);
895 #endif
896 #ifndef __alpha__
897 	if (args->per != 0)
898 		return EINVAL;
899 #endif
900 
901 	/* Yes Jim, it's still a Linux... */
902 	td->td_retval[0] = 0;
903 	return 0;
904 }
905 
906 /*
907  * Wrappers for get/setitimer for debugging..
908  */
909 int
910 linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
911 {
912 	struct setitimer_args bsa;
913 	struct itimerval foo;
914 	int error;
915 
916 #ifdef DEBUG
917 	if (ldebug(setitimer))
918 		printf(ARGS(setitimer, "%p, %p"),
919 		    (void *)args->itv, (void *)args->oitv);
920 #endif
921 	bsa.which = args->which;
922 	bsa.itv = (struct itimerval *)args->itv;
923 	bsa.oitv = (struct itimerval *)args->oitv;
924 	if (args->itv) {
925 	    if ((error = copyin((caddr_t)args->itv, &foo, sizeof(foo))))
926 		return error;
927 #ifdef DEBUG
928 	    if (ldebug(setitimer)) {
929 	        printf("setitimer: value: sec: %ld, usec: %ld\n",
930 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
931 	        printf("setitimer: interval: sec: %ld, usec: %ld\n",
932 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
933 	    }
934 #endif
935 	}
936 	return setitimer(td, &bsa);
937 }
938 
939 int
940 linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
941 {
942 	struct getitimer_args bsa;
943 #ifdef DEBUG
944 	if (ldebug(getitimer))
945 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
946 #endif
947 	bsa.which = args->which;
948 	bsa.itv = (struct itimerval *)args->itv;
949 	return getitimer(td, &bsa);
950 }
951 
952 #ifndef __alpha__
953 int
954 linux_nice(struct thread *td, struct linux_nice_args *args)
955 {
956 	struct setpriority_args	bsd_args;
957 
958 	bsd_args.which = PRIO_PROCESS;
959 	bsd_args.who = 0;	/* current process */
960 	bsd_args.prio = args->inc;
961 	return setpriority(td, &bsd_args);
962 }
963 #endif	/*!__alpha__*/
964 
965 int
966 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
967 {
968 	struct ucred *newcred, *oldcred;
969 	l_gid_t linux_gidset[NGROUPS];
970 	gid_t *bsd_gidset;
971 	int ngrp, error;
972 
973 	ngrp = args->gidsetsize;
974 	oldcred = td->td_proc->p_ucred;
975 
976 	/*
977 	 * cr_groups[0] holds egid. Setting the whole set from
978 	 * the supplied set will cause egid to be changed too.
979 	 * Keep cr_groups[0] unchanged to prevent that.
980 	 */
981 
982 	if ((error = suser_xxx(oldcred, NULL, PRISON_ROOT)) != 0)
983 		return (error);
984 
985 	if (ngrp >= NGROUPS)
986 		return (EINVAL);
987 
988 	newcred = crdup(oldcred);
989 	if (ngrp > 0) {
990 		error = copyin((caddr_t)args->grouplist, linux_gidset,
991 			       ngrp * sizeof(l_gid_t));
992 		if (error)
993 			return (error);
994 
995 		newcred->cr_ngroups = ngrp + 1;
996 
997 		bsd_gidset = newcred->cr_groups;
998 		ngrp--;
999 		while (ngrp >= 0) {
1000 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1001 			ngrp--;
1002 		}
1003 	}
1004 	else
1005 		newcred->cr_ngroups = 1;
1006 
1007 	setsugid(td->td_proc);
1008 	td->td_proc->p_ucred = newcred;
1009 	crfree(oldcred);
1010 	return (0);
1011 }
1012 
1013 int
1014 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1015 {
1016 	struct ucred *cred;
1017 	l_gid_t linux_gidset[NGROUPS];
1018 	gid_t *bsd_gidset;
1019 	int bsd_gidsetsz, ngrp, error;
1020 
1021 	cred = td->td_proc->p_ucred;
1022 	bsd_gidset = cred->cr_groups;
1023 	bsd_gidsetsz = cred->cr_ngroups - 1;
1024 
1025 	/*
1026 	 * cr_groups[0] holds egid. Returning the whole set
1027 	 * here will cause a duplicate. Exclude cr_groups[0]
1028 	 * to prevent that.
1029 	 */
1030 
1031 	if ((ngrp = args->gidsetsize) == 0) {
1032 		td->td_retval[0] = bsd_gidsetsz;
1033 		return (0);
1034 	}
1035 
1036 	if (ngrp < bsd_gidsetsz)
1037 		return (EINVAL);
1038 
1039 	ngrp = 0;
1040 	while (ngrp < bsd_gidsetsz) {
1041 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1042 		ngrp++;
1043 	}
1044 
1045 	if ((error = copyout(linux_gidset, (caddr_t)args->grouplist,
1046 	    ngrp * sizeof(l_gid_t))))
1047 		return (error);
1048 
1049 	td->td_retval[0] = ngrp;
1050 	return (0);
1051 }
1052 
1053 #ifndef __alpha__
1054 int
1055 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1056 {
1057 	struct __setrlimit_args bsd;
1058 	struct l_rlimit rlim;
1059 	int error;
1060 	caddr_t sg = stackgap_init();
1061 
1062 #ifdef DEBUG
1063 	if (ldebug(setrlimit))
1064 		printf(ARGS(setrlimit, "%d, %p"),
1065 		    args->resource, (void *)args->rlim);
1066 #endif
1067 
1068 	if (args->resource >= LINUX_RLIM_NLIMITS)
1069 		return (EINVAL);
1070 
1071 	bsd.which = linux_to_bsd_resource[args->resource];
1072 	if (bsd.which == -1)
1073 		return (EINVAL);
1074 
1075 	error = copyin((caddr_t)args->rlim, &rlim, sizeof(rlim));
1076 	if (error)
1077 		return (error);
1078 
1079 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1080 	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1081 	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1082 	return (setrlimit(td, &bsd));
1083 }
1084 
1085 int
1086 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1087 {
1088 	struct __getrlimit_args bsd;
1089 	struct l_rlimit rlim;
1090 	int error;
1091 	caddr_t sg = stackgap_init();
1092 
1093 #ifdef DEBUG
1094 	if (ldebug(old_getrlimit))
1095 		printf(ARGS(old_getrlimit, "%d, %p"),
1096 		    args->resource, (void *)args->rlim);
1097 #endif
1098 
1099 	if (args->resource >= LINUX_RLIM_NLIMITS)
1100 		return (EINVAL);
1101 
1102 	bsd.which = linux_to_bsd_resource[args->resource];
1103 	if (bsd.which == -1)
1104 		return (EINVAL);
1105 
1106 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1107 	error = getrlimit(td, &bsd);
1108 	if (error)
1109 		return (error);
1110 
1111 	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1112 	if (rlim.rlim_cur == ULONG_MAX)
1113 		rlim.rlim_cur = LONG_MAX;
1114 	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1115 	if (rlim.rlim_max == ULONG_MAX)
1116 		rlim.rlim_max = LONG_MAX;
1117 	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1118 }
1119 
1120 int
1121 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1122 {
1123 	struct __getrlimit_args bsd;
1124 	struct l_rlimit rlim;
1125 	int error;
1126 	caddr_t sg = stackgap_init();
1127 
1128 #ifdef DEBUG
1129 	if (ldebug(getrlimit))
1130 		printf(ARGS(getrlimit, "%d, %p"),
1131 		    args->resource, (void *)args->rlim);
1132 #endif
1133 
1134 	if (args->resource >= LINUX_RLIM_NLIMITS)
1135 		return (EINVAL);
1136 
1137 	bsd.which = linux_to_bsd_resource[args->resource];
1138 	if (bsd.which == -1)
1139 		return (EINVAL);
1140 
1141 	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1142 	error = getrlimit(td, &bsd);
1143 	if (error)
1144 		return (error);
1145 
1146 	rlim.rlim_cur = (l_ulong)bsd.rlp->rlim_cur;
1147 	rlim.rlim_max = (l_ulong)bsd.rlp->rlim_max;
1148 	return (copyout(&rlim, (caddr_t)args->rlim, sizeof(rlim)));
1149 }
1150 #endif /*!__alpha__*/
1151 
1152 int
1153 linux_sched_setscheduler(struct thread *td,
1154     struct linux_sched_setscheduler_args *args)
1155 {
1156 	struct sched_setscheduler_args bsd;
1157 
1158 #ifdef DEBUG
1159 	if (ldebug(sched_setscheduler))
1160 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1161 		    args->pid, args->policy, (const void *)args->param);
1162 #endif
1163 
1164 	switch (args->policy) {
1165 	case LINUX_SCHED_OTHER:
1166 		bsd.policy = SCHED_OTHER;
1167 		break;
1168 	case LINUX_SCHED_FIFO:
1169 		bsd.policy = SCHED_FIFO;
1170 		break;
1171 	case LINUX_SCHED_RR:
1172 		bsd.policy = SCHED_RR;
1173 		break;
1174 	default:
1175 		return EINVAL;
1176 	}
1177 
1178 	bsd.pid = args->pid;
1179 	bsd.param = (struct sched_param *)args->param;
1180 	return sched_setscheduler(td, &bsd);
1181 }
1182 
1183 int
1184 linux_sched_getscheduler(struct thread *td,
1185     struct linux_sched_getscheduler_args *args)
1186 {
1187 	struct sched_getscheduler_args bsd;
1188 	int error;
1189 
1190 #ifdef DEBUG
1191 	if (ldebug(sched_getscheduler))
1192 		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1193 #endif
1194 
1195 	bsd.pid = args->pid;
1196 	error = sched_getscheduler(td, &bsd);
1197 
1198 	switch (td->td_retval[0]) {
1199 	case SCHED_OTHER:
1200 		td->td_retval[0] = LINUX_SCHED_OTHER;
1201 		break;
1202 	case SCHED_FIFO:
1203 		td->td_retval[0] = LINUX_SCHED_FIFO;
1204 		break;
1205 	case SCHED_RR:
1206 		td->td_retval[0] = LINUX_SCHED_RR;
1207 		break;
1208 	}
1209 
1210 	return error;
1211 }
1212 
1213 int
1214 linux_sched_get_priority_max(struct thread *td,
1215     struct linux_sched_get_priority_max_args *args)
1216 {
1217 	struct sched_get_priority_max_args bsd;
1218 
1219 #ifdef DEBUG
1220 	if (ldebug(sched_get_priority_max))
1221 		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1222 #endif
1223 
1224 	switch (args->policy) {
1225 	case LINUX_SCHED_OTHER:
1226 		bsd.policy = SCHED_OTHER;
1227 		break;
1228 	case LINUX_SCHED_FIFO:
1229 		bsd.policy = SCHED_FIFO;
1230 		break;
1231 	case LINUX_SCHED_RR:
1232 		bsd.policy = SCHED_RR;
1233 		break;
1234 	default:
1235 		return EINVAL;
1236 	}
1237 	return sched_get_priority_max(td, &bsd);
1238 }
1239 
1240 int
1241 linux_sched_get_priority_min(struct thread *td,
1242     struct linux_sched_get_priority_min_args *args)
1243 {
1244 	struct sched_get_priority_min_args bsd;
1245 
1246 #ifdef DEBUG
1247 	if (ldebug(sched_get_priority_min))
1248 		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1249 #endif
1250 
1251 	switch (args->policy) {
1252 	case LINUX_SCHED_OTHER:
1253 		bsd.policy = SCHED_OTHER;
1254 		break;
1255 	case LINUX_SCHED_FIFO:
1256 		bsd.policy = SCHED_FIFO;
1257 		break;
1258 	case LINUX_SCHED_RR:
1259 		bsd.policy = SCHED_RR;
1260 		break;
1261 	default:
1262 		return EINVAL;
1263 	}
1264 	return sched_get_priority_min(td, &bsd);
1265 }
1266 
1267 #define REBOOT_CAD_ON	0x89abcdef
1268 #define REBOOT_CAD_OFF	0
1269 #define REBOOT_HALT	0xcdef0123
1270 
1271 int
1272 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1273 {
1274 	struct reboot_args bsd_args;
1275 
1276 #ifdef DEBUG
1277 	if (ldebug(reboot))
1278 		printf(ARGS(reboot, "0x%x"), args->cmd);
1279 #endif
1280 	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1281 		return (0);
1282 	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1283 	return (reboot(td, &bsd_args));
1284 }
1285 
1286 /*
1287  * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1288  * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1289  * globbers registers that are assumed to be preserved. The following
1290  * lightweight syscalls fixes this. See also linux_getgid16() and
1291  * linux_getuid16() in linux_uid16.c.
1292  *
1293  * linux_getpid() - MP SAFE
1294  * linux_getgid() - MP SAFE
1295  * linux_getuid() - MP SAFE
1296  */
1297 
1298 int
1299 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1300 {
1301 
1302 	td->td_retval[0] = td->td_proc->p_pid;
1303 	return (0);
1304 }
1305 
1306 int
1307 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1308 {
1309 
1310 	td->td_retval[0] = td->td_proc->p_ucred->cr_rgid;
1311 	return (0);
1312 }
1313 
1314 int
1315 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1316 {
1317 
1318 	td->td_retval[0] = td->td_proc->p_ucred->cr_ruid;
1319 	return (0);
1320 }
1321 
1322 int
1323 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1324 {
1325 	struct getsid_args bsd;
1326 	bsd.pid = args->pid;
1327 	return getsid(td, &bsd);
1328 }
1329