xref: /freebsd/sys/compat/linux/linux_misc.c (revision 77b7cdf1999ee965ad494fddd184b18f532ac91a)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_mac.h"
32 
33 #include <sys/param.h>
34 #include <sys/blist.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/jail.h>
38 #include <sys/kernel.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/mac.h>
42 #include <sys/malloc.h>
43 #include <sys/mman.h>
44 #include <sys/mount.h>
45 #include <sys/mutex.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/reboot.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signalvar.h>
51 #include <sys/stat.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysctl.h>
54 #include <sys/sysproto.h>
55 #include <sys/systm.h>
56 #include <sys/time.h>
57 #include <sys/vmmeter.h>
58 #include <sys/vnode.h>
59 #include <sys/wait.h>
60 
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63 #include <vm/vm_kern.h>
64 #include <vm/vm_map.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_object.h>
67 #include <vm/swap_pager.h>
68 
69 #include <posix4/sched.h>
70 
71 #include <machine/../linux/linux.h>
72 #include <machine/../linux/linux_proto.h>
73 
74 #include <compat/linux/linux_mib.h>
75 #include <compat/linux/linux_util.h>
76 
77 #ifdef __alpha__
78 #define BSD_TO_LINUX_SIGNAL(sig)       (sig)
79 #else
80 #define BSD_TO_LINUX_SIGNAL(sig)	\
81 	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
82 #endif
83 
84 #ifndef __alpha__
85 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
86 	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
87 	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
88 	RLIMIT_MEMLOCK, -1
89 };
90 #endif /*!__alpha__*/
91 
92 struct l_sysinfo {
93 	l_long		uptime;		/* Seconds since boot */
94 	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
95 	l_ulong		totalram;	/* Total usable main memory size */
96 	l_ulong		freeram;	/* Available memory size */
97 	l_ulong		sharedram;	/* Amount of shared memory */
98 	l_ulong		bufferram;	/* Memory used by buffers */
99 	l_ulong		totalswap;	/* Total swap space size */
100 	l_ulong		freeswap;	/* swap space still available */
101 	l_ushort	procs;		/* Number of current processes */
102 	char		_f[22];		/* Pads structure to 64 bytes */
103 };
104 #ifndef __alpha__
105 int
106 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
107 {
108 	struct l_sysinfo sysinfo;
109 	vm_object_t object;
110 	int i;
111 	struct timespec ts;
112 
113 	/* Uptime is copied out of print_uptime() in kern_shutdown.c */
114 	getnanouptime(&ts);
115 	i = 0;
116 	if (ts.tv_sec >= 86400) {
117 		ts.tv_sec %= 86400;
118 		i = 1;
119 	}
120 	if (i || ts.tv_sec >= 3600) {
121 		ts.tv_sec %= 3600;
122 		i = 1;
123 	}
124 	if (i || ts.tv_sec >= 60) {
125 		ts.tv_sec %= 60;
126 		i = 1;
127 	}
128 	sysinfo.uptime=ts.tv_sec;
129 
130 	/* Use the information from the mib to get our load averages */
131 	for (i = 0; i < 3; i++)
132 		sysinfo.loads[i] = averunnable.ldavg[i];
133 
134 	sysinfo.totalram = physmem * PAGE_SIZE;
135 	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
136 
137 	sysinfo.sharedram = 0;
138 	for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
139 	     object = TAILQ_NEXT(object, object_list))
140 		if (object->shadow_count > 1)
141 			sysinfo.sharedram += object->resident_page_count;
142 
143 	sysinfo.sharedram *= PAGE_SIZE;
144 	sysinfo.bufferram = 0;
145 
146 	if (swapblist == NULL) {
147 		sysinfo.totalswap= 0;
148 		sysinfo.freeswap = 0;
149 	} else {
150 		sysinfo.totalswap = swapblist->bl_blocks * 1024;
151 		sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
152 	}
153 
154 	sysinfo.procs = 20; /* Hack */
155 
156 	return copyout(&sysinfo, args->info, sizeof(sysinfo));
157 }
158 #endif /*!__alpha__*/
159 
160 #ifndef __alpha__
161 int
162 linux_alarm(struct thread *td, struct linux_alarm_args *args)
163 {
164 	struct itimerval it, old_it;
165 	struct timeval tv;
166 	struct proc *p;
167 
168 #ifdef DEBUG
169 	if (ldebug(alarm))
170 		printf(ARGS(alarm, "%u"), args->secs);
171 #endif
172 
173 	if (args->secs > 100000000)
174 		return EINVAL;
175 
176 	it.it_value.tv_sec = (long)args->secs;
177 	it.it_value.tv_usec = 0;
178 	it.it_interval.tv_sec = 0;
179 	it.it_interval.tv_usec = 0;
180 	p = td->td_proc;
181 	PROC_LOCK(p);
182 	old_it = p->p_realtimer;
183 	getmicrouptime(&tv);
184 	if (timevalisset(&old_it.it_value))
185 		callout_stop(&p->p_itcallout);
186 	if (it.it_value.tv_sec != 0) {
187 		callout_reset(&p->p_itcallout, tvtohz(&it.it_value),
188 		    realitexpire, p);
189 		timevaladd(&it.it_value, &tv);
190 	}
191 	p->p_realtimer = it;
192 	PROC_UNLOCK(p);
193 	if (timevalcmp(&old_it.it_value, &tv, >)) {
194 		timevalsub(&old_it.it_value, &tv);
195 		if (old_it.it_value.tv_usec != 0)
196 			old_it.it_value.tv_sec++;
197 		td->td_retval[0] = old_it.it_value.tv_sec;
198 	}
199 	return 0;
200 }
201 #endif /*!__alpha__*/
202 
203 int
204 linux_brk(struct thread *td, struct linux_brk_args *args)
205 {
206 	struct vmspace *vm = td->td_proc->p_vmspace;
207 	vm_offset_t new, old;
208 	struct obreak_args /* {
209 		char * nsize;
210 	} */ tmp;
211 
212 #ifdef DEBUG
213 	if (ldebug(brk))
214 		printf(ARGS(brk, "%p"), (void *)args->dsend);
215 #endif
216 	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
217 	new = (vm_offset_t)args->dsend;
218 	tmp.nsize = (char *) new;
219 	if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
220 		td->td_retval[0] = (long)new;
221 	else
222 		td->td_retval[0] = (long)old;
223 
224 	return 0;
225 }
226 
227 int
228 linux_uselib(struct thread *td, struct linux_uselib_args *args)
229 {
230 	struct nameidata ni;
231 	struct vnode *vp;
232 	struct exec *a_out;
233 	struct vattr attr;
234 	vm_offset_t vmaddr;
235 	unsigned long file_offset;
236 	vm_offset_t buffer;
237 	unsigned long bss_size;
238 	char *library;
239 	int error;
240 	int locked;
241 
242 	LCONVPATHEXIST(td, args->library, &library);
243 
244 #ifdef DEBUG
245 	if (ldebug(uselib))
246 		printf(ARGS(uselib, "%s"), library);
247 #endif
248 
249 	a_out = NULL;
250 	locked = 0;
251 	vp = NULL;
252 
253 	/*
254 	 * XXX: This code should make use of vn_open(), rather than doing
255 	 * all this stuff itself.
256 	 */
257 	NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td);
258 	error = namei(&ni);
259 	LFREEPATH(library);
260 	if (error)
261 		goto cleanup;
262 
263 	vp = ni.ni_vp;
264 	/*
265 	 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
266 	 * succeed without returning a vnode.
267 	 */
268 	if (vp == NULL) {
269 		error = ENOEXEC;	/* ?? */
270 		goto cleanup;
271 	}
272 	NDFREE(&ni, NDF_ONLY_PNBUF);
273 
274 	/*
275 	 * From here on down, we have a locked vnode that must be unlocked.
276 	 */
277 	locked++;
278 
279 	/* Writable? */
280 	if (vp->v_writecount) {
281 		error = ETXTBSY;
282 		goto cleanup;
283 	}
284 
285 	/* Executable? */
286 	error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
287 	if (error)
288 		goto cleanup;
289 
290 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
291 	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
292 		error = ENOEXEC;
293 		goto cleanup;
294 	}
295 
296 	/* Sensible size? */
297 	if (attr.va_size == 0) {
298 		error = ENOEXEC;
299 		goto cleanup;
300 	}
301 
302 	/* Can we access it? */
303 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
304 	if (error)
305 		goto cleanup;
306 
307 	/*
308 	 * XXX: This should use vn_open() so that it is properly authorized,
309 	 * and to reduce code redundancy all over the place here.
310 	 */
311 #ifdef MAC
312 	error = mac_check_vnode_open(td->td_ucred, vp, FREAD);
313 	if (error)
314 		goto cleanup;
315 #endif
316 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
317 	if (error)
318 		goto cleanup;
319 
320 	/* Pull in executable header into kernel_map */
321 	error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
322 	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
323 	/*
324 	 * Lock no longer needed
325 	 */
326 	locked = 0;
327 	VOP_UNLOCK(vp, 0, td);
328 
329 	if (error)
330 		goto cleanup;
331 
332 	/* Is it a Linux binary ? */
333 	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
334 		error = ENOEXEC;
335 		goto cleanup;
336 	}
337 
338 	/*
339 	 * While we are here, we should REALLY do some more checks
340 	 */
341 
342 	/* Set file/virtual offset based on a.out variant. */
343 	switch ((int)(a_out->a_magic & 0xffff)) {
344 	case 0413:	/* ZMAGIC */
345 		file_offset = 1024;
346 		break;
347 	case 0314:	/* QMAGIC */
348 		file_offset = 0;
349 		break;
350 	default:
351 		error = ENOEXEC;
352 		goto cleanup;
353 	}
354 
355 	bss_size = round_page(a_out->a_bss);
356 
357 	/* Check various fields in header for validity/bounds. */
358 	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
359 		error = ENOEXEC;
360 		goto cleanup;
361 	}
362 
363 	/* text + data can't exceed file size */
364 	if (a_out->a_data + a_out->a_text > attr.va_size) {
365 		error = EFAULT;
366 		goto cleanup;
367 	}
368 
369 	/* To protect td->td_proc->p_rlimit in the if condition. */
370 	mtx_assert(&Giant, MA_OWNED);
371 
372 	/*
373 	 * text/data/bss must not exceed limits
374 	 * XXX - this is not complete. it should check current usage PLUS
375 	 * the resources needed by this library.
376 	 */
377 	if (a_out->a_text > maxtsiz ||
378 	    a_out->a_data + bss_size >
379 	    td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) {
380 		error = ENOMEM;
381 		goto cleanup;
382 	}
383 
384 	mp_fixme("Unlocked vflags access.");
385 	/* prevent more writers */
386 	vp->v_vflag |= VV_TEXT;
387 
388 	/*
389 	 * Check if file_offset page aligned. Currently we cannot handle
390 	 * misalinged file offsets, and so we read in the entire image
391 	 * (what a waste).
392 	 */
393 	if (file_offset & PAGE_MASK) {
394 #ifdef DEBUG
395 		printf("uselib: Non page aligned binary %lu\n", file_offset);
396 #endif
397 		/* Map text+data read/write/execute */
398 
399 		/* a_entry is the load address and is page aligned */
400 		vmaddr = trunc_page(a_out->a_entry);
401 
402 		/* get anon user mapping, read+write+execute */
403 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
404 		    &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
405 		    VM_PROT_ALL, 0);
406 		if (error)
407 			goto cleanup;
408 
409 		/* map file into kernel_map */
410 		error = vm_mmap(kernel_map, &buffer,
411 		    round_page(a_out->a_text + a_out->a_data + file_offset),
412 		    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
413 		    trunc_page(file_offset));
414 		if (error)
415 			goto cleanup;
416 
417 		/* copy from kernel VM space to user space */
418 		error = copyout((void *)(buffer + file_offset),
419 		    (void *)vmaddr, a_out->a_text + a_out->a_data);
420 
421 		/* release temporary kernel space */
422 		vm_map_remove(kernel_map, buffer, buffer +
423 		    round_page(a_out->a_text + a_out->a_data + file_offset));
424 
425 		if (error)
426 			goto cleanup;
427 	} else {
428 #ifdef DEBUG
429 		printf("uselib: Page aligned binary %lu\n", file_offset);
430 #endif
431 		/*
432 		 * for QMAGIC, a_entry is 20 bytes beyond the load address
433 		 * to skip the executable header
434 		 */
435 		vmaddr = trunc_page(a_out->a_entry);
436 
437 		/*
438 		 * Map it all into the process's space as a single
439 		 * copy-on-write "data" segment.
440 		 */
441 		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
442 		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
443 		    MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
444 		if (error)
445 			goto cleanup;
446 	}
447 #ifdef DEBUG
448 	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
449 	    ((long*)vmaddr)[1]);
450 #endif
451 	if (bss_size != 0) {
452 		/* Calculate BSS start address */
453 		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
454 		    a_out->a_data;
455 
456 		/* allocate some 'anon' space */
457 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
458 		    &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
459 		if (error)
460 			goto cleanup;
461 	}
462 
463 cleanup:
464 	/* Unlock vnode if needed */
465 	if (locked)
466 		VOP_UNLOCK(vp, 0, td);
467 
468 	/* Release the kernel mapping. */
469 	if (a_out)
470 		vm_map_remove(kernel_map, (vm_offset_t)a_out,
471 		    (vm_offset_t)a_out + PAGE_SIZE);
472 
473 	return error;
474 }
475 
476 int
477 linux_select(struct thread *td, struct linux_select_args *args)
478 {
479 	struct timeval tv0, tv1, utv, *tvp;
480 	int error;
481 
482 #ifdef DEBUG
483 	if (ldebug(select))
484 		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
485 		    (void *)args->readfds, (void *)args->writefds,
486 		    (void *)args->exceptfds, (void *)args->timeout);
487 #endif
488 
489 	/*
490 	 * Store current time for computation of the amount of
491 	 * time left.
492 	 */
493 	if (args->timeout) {
494 		if ((error = copyin(args->timeout, &utv, sizeof(utv))))
495 			goto select_out;
496 #ifdef DEBUG
497 		if (ldebug(select))
498 			printf(LMSG("incoming timeout (%ld/%ld)"),
499 			    utv.tv_sec, utv.tv_usec);
500 #endif
501 
502 		if (itimerfix(&utv)) {
503 			/*
504 			 * The timeval was invalid.  Convert it to something
505 			 * valid that will act as it does under Linux.
506 			 */
507 			utv.tv_sec += utv.tv_usec / 1000000;
508 			utv.tv_usec %= 1000000;
509 			if (utv.tv_usec < 0) {
510 				utv.tv_sec -= 1;
511 				utv.tv_usec += 1000000;
512 			}
513 			if (utv.tv_sec < 0)
514 				timevalclear(&utv);
515 		}
516 		microtime(&tv0);
517 		tvp = &utv;
518 	} else
519 		tvp = NULL;
520 
521 	error = kern_select(td, args->nfds, args->readfds, args->writefds,
522 	    args->exceptfds, tvp);
523 
524 #ifdef DEBUG
525 	if (ldebug(select))
526 		printf(LMSG("real select returns %d"), error);
527 #endif
528 	if (error) {
529 		/*
530 		 * See fs/select.c in the Linux kernel.  Without this,
531 		 * Maelstrom doesn't work.
532 		 */
533 		if (error == ERESTART)
534 			error = EINTR;
535 		goto select_out;
536 	}
537 
538 	if (args->timeout) {
539 		if (td->td_retval[0]) {
540 			/*
541 			 * Compute how much time was left of the timeout,
542 			 * by subtracting the current time and the time
543 			 * before we started the call, and subtracting
544 			 * that result from the user-supplied value.
545 			 */
546 			microtime(&tv1);
547 			timevalsub(&tv1, &tv0);
548 			timevalsub(&utv, &tv1);
549 			if (utv.tv_sec < 0)
550 				timevalclear(&utv);
551 		} else
552 			timevalclear(&utv);
553 #ifdef DEBUG
554 		if (ldebug(select))
555 			printf(LMSG("outgoing timeout (%ld/%ld)"),
556 			    utv.tv_sec, utv.tv_usec);
557 #endif
558 		if ((error = copyout(&utv, args->timeout, sizeof(utv))))
559 			goto select_out;
560 	}
561 
562 select_out:
563 #ifdef DEBUG
564 	if (ldebug(select))
565 		printf(LMSG("select_out -> %d"), error);
566 #endif
567 	return error;
568 }
569 
570 int
571 linux_mremap(struct thread *td, struct linux_mremap_args *args)
572 {
573 	struct munmap_args /* {
574 		void *addr;
575 		size_t len;
576 	} */ bsd_args;
577 	int error = 0;
578 
579 #ifdef DEBUG
580 	if (ldebug(mremap))
581 		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
582 		    (void *)args->addr,
583 		    (unsigned long)args->old_len,
584 		    (unsigned long)args->new_len,
585 		    (unsigned long)args->flags);
586 #endif
587 	args->new_len = round_page(args->new_len);
588 	args->old_len = round_page(args->old_len);
589 
590 	if (args->new_len > args->old_len) {
591 		td->td_retval[0] = 0;
592 		return ENOMEM;
593 	}
594 
595 	if (args->new_len < args->old_len) {
596 		bsd_args.addr = (caddr_t)(args->addr + args->new_len);
597 		bsd_args.len = args->old_len - args->new_len;
598 		error = munmap(td, &bsd_args);
599 	}
600 
601 	td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
602 	return error;
603 }
604 
605 #define LINUX_MS_ASYNC       0x0001
606 #define LINUX_MS_INVALIDATE  0x0002
607 #define LINUX_MS_SYNC        0x0004
608 
609 int
610 linux_msync(struct thread *td, struct linux_msync_args *args)
611 {
612 	struct msync_args bsd_args;
613 
614 	bsd_args.addr = (caddr_t)args->addr;
615 	bsd_args.len = args->len;
616 	bsd_args.flags = args->fl & ~LINUX_MS_SYNC;
617 
618 	return msync(td, &bsd_args);
619 }
620 
621 #ifndef __alpha__
622 int
623 linux_time(struct thread *td, struct linux_time_args *args)
624 {
625 	struct timeval tv;
626 	l_time_t tm;
627 	int error;
628 
629 #ifdef DEBUG
630 	if (ldebug(time))
631 		printf(ARGS(time, "*"));
632 #endif
633 
634 	microtime(&tv);
635 	tm = tv.tv_sec;
636 	if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
637 		return error;
638 	td->td_retval[0] = tm;
639 	return 0;
640 }
641 #endif	/*!__alpha__*/
642 
643 struct l_times_argv {
644 	l_long		tms_utime;
645 	l_long		tms_stime;
646 	l_long		tms_cutime;
647 	l_long		tms_cstime;
648 };
649 
650 #ifdef __alpha__
651 #define CLK_TCK 1024	/* Linux uses 1024 on alpha */
652 #else
653 #define CLK_TCK 100	/* Linux uses 100 */
654 #endif
655 
656 #define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
657 
658 int
659 linux_times(struct thread *td, struct linux_times_args *args)
660 {
661 	struct timeval tv;
662 	struct l_times_argv tms;
663 	struct rusage ru;
664 	int error;
665 
666 #ifdef DEBUG
667 	if (ldebug(times))
668 		printf(ARGS(times, "*"));
669 #endif
670 
671 	mtx_lock_spin(&sched_lock);
672 	calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
673 	mtx_unlock_spin(&sched_lock);
674 
675 	tms.tms_utime = CONVTCK(ru.ru_utime);
676 	tms.tms_stime = CONVTCK(ru.ru_stime);
677 
678 	tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
679 	tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
680 
681 	if ((error = copyout(&tms, args->buf, sizeof(tms))))
682 		return error;
683 
684 	microuptime(&tv);
685 	td->td_retval[0] = (int)CONVTCK(tv);
686 	return 0;
687 }
688 
689 int
690 linux_newuname(struct thread *td, struct linux_newuname_args *args)
691 {
692 	struct l_new_utsname utsname;
693 	char osname[LINUX_MAX_UTSNAME];
694 	char osrelease[LINUX_MAX_UTSNAME];
695 
696 #ifdef DEBUG
697 	if (ldebug(newuname))
698 		printf(ARGS(newuname, "*"));
699 #endif
700 
701 	linux_get_osname(td, osname);
702 	linux_get_osrelease(td, osrelease);
703 
704 	bzero(&utsname, sizeof(utsname));
705 	strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
706 	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
707 	strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
708 	strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
709 	strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME);
710 	strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME);
711 
712 	return (copyout(&utsname, args->buf, sizeof(utsname)));
713 }
714 
715 #if defined(__i386__)
716 struct l_utimbuf {
717 	l_time_t l_actime;
718 	l_time_t l_modtime;
719 };
720 
721 int
722 linux_utime(struct thread *td, struct linux_utime_args *args)
723 {
724 	struct timeval tv[2], *tvp;
725 	struct l_utimbuf lut;
726 	char *fname;
727 	int error;
728 
729 	LCONVPATHEXIST(td, args->fname, &fname);
730 
731 #ifdef DEBUG
732 	if (ldebug(utime))
733 		printf(ARGS(utime, "%s, *"), fname);
734 #endif
735 
736 	if (args->times) {
737 		if ((error = copyin(args->times, &lut, sizeof lut))) {
738 			LFREEPATH(fname);
739 			return error;
740 		}
741 		tv[0].tv_sec = lut.l_actime;
742 		tv[0].tv_usec = 0;
743 		tv[1].tv_sec = lut.l_modtime;
744 		tv[1].tv_usec = 0;
745 		tvp = tv;
746 	} else
747 		tvp = NULL;
748 
749 	error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
750 	LFREEPATH(fname);
751 	return (error);
752 }
753 #endif /* __i386__ */
754 
755 #define __WCLONE 0x80000000
756 
757 #ifndef __alpha__
758 int
759 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
760 {
761 	struct wait_args /* {
762 		int pid;
763 		int *status;
764 		int options;
765 		struct	rusage *rusage;
766 	} */ tmp;
767 	int error, tmpstat;
768 
769 #ifdef DEBUG
770 	if (ldebug(waitpid))
771 		printf(ARGS(waitpid, "%d, %p, %d"),
772 		    args->pid, (void *)args->status, args->options);
773 #endif
774 
775 	tmp.pid = args->pid;
776 	tmp.status = args->status;
777 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
778 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
779 	if (args->options & __WCLONE)
780 		tmp.options |= WLINUXCLONE;
781 	tmp.rusage = NULL;
782 
783 	if ((error = wait4(td, &tmp)) != 0)
784 		return error;
785 
786 	if (args->status) {
787 		if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
788 			return error;
789 		tmpstat &= 0xffff;
790 		if (WIFSIGNALED(tmpstat))
791 			tmpstat = (tmpstat & 0xffffff80) |
792 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
793 		else if (WIFSTOPPED(tmpstat))
794 			tmpstat = (tmpstat & 0xffff00ff) |
795 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
796 		return copyout(&tmpstat, args->status, sizeof(int));
797 	}
798 
799 	return 0;
800 }
801 #endif	/*!__alpha__*/
802 
803 int
804 linux_wait4(struct thread *td, struct linux_wait4_args *args)
805 {
806 	struct wait_args /* {
807 		int pid;
808 		int *status;
809 		int options;
810 		struct	rusage *rusage;
811 	} */ tmp;
812 	int error, tmpstat;
813 	struct proc *p;
814 
815 #ifdef DEBUG
816 	if (ldebug(wait4))
817 		printf(ARGS(wait4, "%d, %p, %d, %p"),
818 		    args->pid, (void *)args->status, args->options,
819 		    (void *)args->rusage);
820 #endif
821 
822 	tmp.pid = args->pid;
823 	tmp.status = args->status;
824 	tmp.options = (args->options & (WNOHANG | WUNTRACED));
825 	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
826 	if (args->options & __WCLONE)
827 		tmp.options |= WLINUXCLONE;
828 	tmp.rusage = (struct rusage *)args->rusage;
829 
830 	if ((error = wait4(td, &tmp)) != 0)
831 		return error;
832 
833 	p = td->td_proc;
834 	PROC_LOCK(p);
835 	SIGDELSET(p->p_siglist, SIGCHLD);
836 	PROC_UNLOCK(p);
837 
838 	if (args->status) {
839 		if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
840 			return error;
841 		tmpstat &= 0xffff;
842 		if (WIFSIGNALED(tmpstat))
843 			tmpstat = (tmpstat & 0xffffff80) |
844 			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
845 		else if (WIFSTOPPED(tmpstat))
846 			tmpstat = (tmpstat & 0xffff00ff) |
847 			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
848 		return copyout(&tmpstat, args->status, sizeof(int));
849 	}
850 
851 	return 0;
852 }
853 
854 int
855 linux_mknod(struct thread *td, struct linux_mknod_args *args)
856 {
857 	char *path;
858 	int error;
859 
860 	LCONVPATHCREAT(td, args->path, &path);
861 
862 #ifdef DEBUG
863 	if (ldebug(mknod))
864 		printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
865 #endif
866 
867 	if (args->mode & S_IFIFO)
868 		error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode);
869 	else
870 		error = kern_mknod(td, path, UIO_SYSSPACE, args->mode,
871 		    args->dev);
872 	LFREEPATH(path);
873 	return (error);
874 }
875 
876 /*
877  * UGH! This is just about the dumbest idea I've ever heard!!
878  */
879 int
880 linux_personality(struct thread *td, struct linux_personality_args *args)
881 {
882 #ifdef DEBUG
883 	if (ldebug(personality))
884 		printf(ARGS(personality, "%lu"), (unsigned long)args->per);
885 #endif
886 #ifndef __alpha__
887 	if (args->per != 0)
888 		return EINVAL;
889 #endif
890 
891 	/* Yes Jim, it's still a Linux... */
892 	td->td_retval[0] = 0;
893 	return 0;
894 }
895 
896 /*
897  * Wrappers for get/setitimer for debugging..
898  */
899 int
900 linux_setitimer(struct thread *td, struct linux_setitimer_args *args)
901 {
902 	struct setitimer_args bsa;
903 	struct itimerval foo;
904 	int error;
905 
906 #ifdef DEBUG
907 	if (ldebug(setitimer))
908 		printf(ARGS(setitimer, "%p, %p"),
909 		    (void *)args->itv, (void *)args->oitv);
910 #endif
911 	bsa.which = args->which;
912 	bsa.itv = (struct itimerval *)args->itv;
913 	bsa.oitv = (struct itimerval *)args->oitv;
914 	if (args->itv) {
915 	    if ((error = copyin(args->itv, &foo, sizeof(foo))))
916 		return error;
917 #ifdef DEBUG
918 	    if (ldebug(setitimer)) {
919 		printf("setitimer: value: sec: %ld, usec: %ld\n",
920 		    foo.it_value.tv_sec, foo.it_value.tv_usec);
921 		printf("setitimer: interval: sec: %ld, usec: %ld\n",
922 		    foo.it_interval.tv_sec, foo.it_interval.tv_usec);
923 	    }
924 #endif
925 	}
926 	return setitimer(td, &bsa);
927 }
928 
929 int
930 linux_getitimer(struct thread *td, struct linux_getitimer_args *args)
931 {
932 	struct getitimer_args bsa;
933 #ifdef DEBUG
934 	if (ldebug(getitimer))
935 		printf(ARGS(getitimer, "%p"), (void *)args->itv);
936 #endif
937 	bsa.which = args->which;
938 	bsa.itv = (struct itimerval *)args->itv;
939 	return getitimer(td, &bsa);
940 }
941 
942 #ifndef __alpha__
943 int
944 linux_nice(struct thread *td, struct linux_nice_args *args)
945 {
946 	struct setpriority_args	bsd_args;
947 
948 	bsd_args.which = PRIO_PROCESS;
949 	bsd_args.who = 0;	/* current process */
950 	bsd_args.prio = args->inc;
951 	return setpriority(td, &bsd_args);
952 }
953 #endif	/*!__alpha__*/
954 
955 int
956 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
957 {
958 	struct ucred *newcred, *oldcred;
959 	l_gid_t linux_gidset[NGROUPS];
960 	gid_t *bsd_gidset;
961 	int ngrp, error;
962 	struct proc *p;
963 
964 	ngrp = args->gidsetsize;
965 	if (ngrp >= NGROUPS)
966 		return (EINVAL);
967 	error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
968 	if (error)
969 		return (error);
970 	newcred = crget();
971 	p = td->td_proc;
972 	PROC_LOCK(p);
973 	oldcred = p->p_ucred;
974 
975 	/*
976 	 * cr_groups[0] holds egid. Setting the whole set from
977 	 * the supplied set will cause egid to be changed too.
978 	 * Keep cr_groups[0] unchanged to prevent that.
979 	 */
980 
981 	if ((error = suser_cred(oldcred, PRISON_ROOT)) != 0) {
982 		PROC_UNLOCK(p);
983 		crfree(newcred);
984 		return (error);
985 	}
986 
987 	crcopy(newcred, oldcred);
988 	if (ngrp > 0) {
989 		newcred->cr_ngroups = ngrp + 1;
990 
991 		bsd_gidset = newcred->cr_groups;
992 		ngrp--;
993 		while (ngrp >= 0) {
994 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
995 			ngrp--;
996 		}
997 	}
998 	else
999 		newcred->cr_ngroups = 1;
1000 
1001 	setsugid(p);
1002 	p->p_ucred = newcred;
1003 	PROC_UNLOCK(p);
1004 	crfree(oldcred);
1005 	return (0);
1006 }
1007 
1008 int
1009 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1010 {
1011 	struct ucred *cred;
1012 	l_gid_t linux_gidset[NGROUPS];
1013 	gid_t *bsd_gidset;
1014 	int bsd_gidsetsz, ngrp, error;
1015 
1016 	cred = td->td_ucred;
1017 	bsd_gidset = cred->cr_groups;
1018 	bsd_gidsetsz = cred->cr_ngroups - 1;
1019 
1020 	/*
1021 	 * cr_groups[0] holds egid. Returning the whole set
1022 	 * here will cause a duplicate. Exclude cr_groups[0]
1023 	 * to prevent that.
1024 	 */
1025 
1026 	if ((ngrp = args->gidsetsize) == 0) {
1027 		td->td_retval[0] = bsd_gidsetsz;
1028 		return (0);
1029 	}
1030 
1031 	if (ngrp < bsd_gidsetsz)
1032 		return (EINVAL);
1033 
1034 	ngrp = 0;
1035 	while (ngrp < bsd_gidsetsz) {
1036 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1037 		ngrp++;
1038 	}
1039 
1040 	if ((error = copyout(linux_gidset, args->grouplist,
1041 	    ngrp * sizeof(l_gid_t))))
1042 		return (error);
1043 
1044 	td->td_retval[0] = ngrp;
1045 	return (0);
1046 }
1047 
1048 #ifndef __alpha__
1049 int
1050 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1051 {
1052 	struct rlimit bsd_rlim;
1053 	struct l_rlimit rlim;
1054 	u_int which;
1055 	int error;
1056 
1057 #ifdef DEBUG
1058 	if (ldebug(setrlimit))
1059 		printf(ARGS(setrlimit, "%d, %p"),
1060 		    args->resource, (void *)args->rlim);
1061 #endif
1062 
1063 	if (args->resource >= LINUX_RLIM_NLIMITS)
1064 		return (EINVAL);
1065 
1066 	which = linux_to_bsd_resource[args->resource];
1067 	if (which == -1)
1068 		return (EINVAL);
1069 
1070 	error = copyin(args->rlim, &rlim, sizeof(rlim));
1071 	if (error)
1072 		return (error);
1073 
1074 	bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1075 	bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1076 	return (dosetrlimit(td, which, &bsd_rlim));
1077 }
1078 
1079 int
1080 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1081 {
1082 	struct l_rlimit rlim;
1083 	struct proc *p = td->td_proc;
1084 	struct rlimit *bsd_rlp;
1085 	u_int which;
1086 
1087 #ifdef DEBUG
1088 	if (ldebug(old_getrlimit))
1089 		printf(ARGS(old_getrlimit, "%d, %p"),
1090 		    args->resource, (void *)args->rlim);
1091 #endif
1092 
1093 	if (args->resource >= LINUX_RLIM_NLIMITS)
1094 		return (EINVAL);
1095 
1096 	which = linux_to_bsd_resource[args->resource];
1097 	if (which == -1)
1098 		return (EINVAL);
1099 	bsd_rlp = &p->p_rlimit[which];
1100 
1101 	rlim.rlim_cur = (unsigned long)bsd_rlp->rlim_cur;
1102 	if (rlim.rlim_cur == ULONG_MAX)
1103 		rlim.rlim_cur = LONG_MAX;
1104 	rlim.rlim_max = (unsigned long)bsd_rlp->rlim_max;
1105 	if (rlim.rlim_max == ULONG_MAX)
1106 		rlim.rlim_max = LONG_MAX;
1107 	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1108 }
1109 
1110 int
1111 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1112 {
1113 	struct l_rlimit rlim;
1114 	struct proc *p = td->td_proc;
1115 	struct rlimit *bsd_rlp;
1116 	u_int which;
1117 
1118 #ifdef DEBUG
1119 	if (ldebug(getrlimit))
1120 		printf(ARGS(getrlimit, "%d, %p"),
1121 		    args->resource, (void *)args->rlim);
1122 #endif
1123 
1124 	if (args->resource >= LINUX_RLIM_NLIMITS)
1125 		return (EINVAL);
1126 
1127 	which = linux_to_bsd_resource[args->resource];
1128 	if (which == -1)
1129 		return (EINVAL);
1130 	bsd_rlp = &p->p_rlimit[which];
1131 
1132 	rlim.rlim_cur = (l_ulong)bsd_rlp->rlim_cur;
1133 	rlim.rlim_max = (l_ulong)bsd_rlp->rlim_max;
1134 	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1135 }
1136 #endif /*!__alpha__*/
1137 
1138 int
1139 linux_sched_setscheduler(struct thread *td,
1140     struct linux_sched_setscheduler_args *args)
1141 {
1142 	struct sched_setscheduler_args bsd;
1143 
1144 #ifdef DEBUG
1145 	if (ldebug(sched_setscheduler))
1146 		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1147 		    args->pid, args->policy, (const void *)args->param);
1148 #endif
1149 
1150 	switch (args->policy) {
1151 	case LINUX_SCHED_OTHER:
1152 		bsd.policy = SCHED_OTHER;
1153 		break;
1154 	case LINUX_SCHED_FIFO:
1155 		bsd.policy = SCHED_FIFO;
1156 		break;
1157 	case LINUX_SCHED_RR:
1158 		bsd.policy = SCHED_RR;
1159 		break;
1160 	default:
1161 		return EINVAL;
1162 	}
1163 
1164 	bsd.pid = args->pid;
1165 	bsd.param = (struct sched_param *)args->param;
1166 	return sched_setscheduler(td, &bsd);
1167 }
1168 
1169 int
1170 linux_sched_getscheduler(struct thread *td,
1171     struct linux_sched_getscheduler_args *args)
1172 {
1173 	struct sched_getscheduler_args bsd;
1174 	int error;
1175 
1176 #ifdef DEBUG
1177 	if (ldebug(sched_getscheduler))
1178 		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1179 #endif
1180 
1181 	bsd.pid = args->pid;
1182 	error = sched_getscheduler(td, &bsd);
1183 
1184 	switch (td->td_retval[0]) {
1185 	case SCHED_OTHER:
1186 		td->td_retval[0] = LINUX_SCHED_OTHER;
1187 		break;
1188 	case SCHED_FIFO:
1189 		td->td_retval[0] = LINUX_SCHED_FIFO;
1190 		break;
1191 	case SCHED_RR:
1192 		td->td_retval[0] = LINUX_SCHED_RR;
1193 		break;
1194 	}
1195 
1196 	return error;
1197 }
1198 
1199 int
1200 linux_sched_get_priority_max(struct thread *td,
1201     struct linux_sched_get_priority_max_args *args)
1202 {
1203 	struct sched_get_priority_max_args bsd;
1204 
1205 #ifdef DEBUG
1206 	if (ldebug(sched_get_priority_max))
1207 		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1208 #endif
1209 
1210 	switch (args->policy) {
1211 	case LINUX_SCHED_OTHER:
1212 		bsd.policy = SCHED_OTHER;
1213 		break;
1214 	case LINUX_SCHED_FIFO:
1215 		bsd.policy = SCHED_FIFO;
1216 		break;
1217 	case LINUX_SCHED_RR:
1218 		bsd.policy = SCHED_RR;
1219 		break;
1220 	default:
1221 		return EINVAL;
1222 	}
1223 	return sched_get_priority_max(td, &bsd);
1224 }
1225 
1226 int
1227 linux_sched_get_priority_min(struct thread *td,
1228     struct linux_sched_get_priority_min_args *args)
1229 {
1230 	struct sched_get_priority_min_args bsd;
1231 
1232 #ifdef DEBUG
1233 	if (ldebug(sched_get_priority_min))
1234 		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1235 #endif
1236 
1237 	switch (args->policy) {
1238 	case LINUX_SCHED_OTHER:
1239 		bsd.policy = SCHED_OTHER;
1240 		break;
1241 	case LINUX_SCHED_FIFO:
1242 		bsd.policy = SCHED_FIFO;
1243 		break;
1244 	case LINUX_SCHED_RR:
1245 		bsd.policy = SCHED_RR;
1246 		break;
1247 	default:
1248 		return EINVAL;
1249 	}
1250 	return sched_get_priority_min(td, &bsd);
1251 }
1252 
1253 #define REBOOT_CAD_ON	0x89abcdef
1254 #define REBOOT_CAD_OFF	0
1255 #define REBOOT_HALT	0xcdef0123
1256 
1257 int
1258 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1259 {
1260 	struct reboot_args bsd_args;
1261 
1262 #ifdef DEBUG
1263 	if (ldebug(reboot))
1264 		printf(ARGS(reboot, "0x%x"), args->cmd);
1265 #endif
1266 	if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1267 		return (0);
1268 	bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1269 	return (reboot(td, &bsd_args));
1270 }
1271 
1272 #ifndef __alpha__
1273 
1274 /*
1275  * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1276  * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This
1277  * globbers registers that are assumed to be preserved. The following
1278  * lightweight syscalls fixes this. See also linux_getgid16() and
1279  * linux_getuid16() in linux_uid16.c.
1280  *
1281  * linux_getpid() - MP SAFE
1282  * linux_getgid() - MP SAFE
1283  * linux_getuid() - MP SAFE
1284  */
1285 
1286 int
1287 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1288 {
1289 
1290 	td->td_retval[0] = td->td_proc->p_pid;
1291 	return (0);
1292 }
1293 
1294 int
1295 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1296 {
1297 
1298 	td->td_retval[0] = td->td_ucred->cr_rgid;
1299 	return (0);
1300 }
1301 
1302 int
1303 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1304 {
1305 
1306 	td->td_retval[0] = td->td_ucred->cr_ruid;
1307 	return (0);
1308 }
1309 
1310 #endif /*!__alpha__*/
1311 
1312 int
1313 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1314 {
1315 	struct getsid_args bsd;
1316 	bsd.pid = args->pid;
1317 	return getsid(td, &bsd);
1318 }
1319