xref: /freebsd/sys/compat/linux/linux_misc.c (revision b3b695604de3304f4f71506dccb70a1d06f44026)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1994-1995 Søren Schmidt
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_compat.h"
36 
37 #include <sys/param.h>
38 #include <sys/blist.h>
39 #include <sys/fcntl.h>
40 #if defined(__i386__)
41 #include <sys/imgact_aout.h>
42 #endif
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mman.h>
49 #include <sys/mount.h>
50 #include <sys/msgbuf.h>
51 #include <sys/mutex.h>
52 #include <sys/namei.h>
53 #include <sys/priv.h>
54 #include <sys/proc.h>
55 #include <sys/procctl.h>
56 #include <sys/reboot.h>
57 #include <sys/racct.h>
58 #include <sys/random.h>
59 #include <sys/resourcevar.h>
60 #include <sys/sched.h>
61 #include <sys/sdt.h>
62 #include <sys/signalvar.h>
63 #include <sys/stat.h>
64 #include <sys/syscallsubr.h>
65 #include <sys/sysctl.h>
66 #include <sys/sysproto.h>
67 #include <sys/systm.h>
68 #include <sys/time.h>
69 #include <sys/vmmeter.h>
70 #include <sys/vnode.h>
71 #include <sys/wait.h>
72 #include <sys/cpuset.h>
73 #include <sys/uio.h>
74 
75 #include <security/mac/mac_framework.h>
76 
77 #include <vm/vm.h>
78 #include <vm/pmap.h>
79 #include <vm/vm_kern.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_extern.h>
82 #include <vm/swap_pager.h>
83 
84 #ifdef COMPAT_LINUX32
85 #include <machine/../linux32/linux.h>
86 #include <machine/../linux32/linux32_proto.h>
87 #else
88 #include <machine/../linux/linux.h>
89 #include <machine/../linux/linux_proto.h>
90 #endif
91 
92 #include <compat/linux/linux_dtrace.h>
93 #include <compat/linux/linux_file.h>
94 #include <compat/linux/linux_mib.h>
95 #include <compat/linux/linux_signal.h>
96 #include <compat/linux/linux_timer.h>
97 #include <compat/linux/linux_util.h>
98 #include <compat/linux/linux_sysproto.h>
99 #include <compat/linux/linux_emul.h>
100 #include <compat/linux/linux_misc.h>
101 
102 int stclohz;				/* Statistics clock frequency */
103 
104 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
105 	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
106 	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
107 	RLIMIT_MEMLOCK, RLIMIT_AS
108 };
109 
110 struct l_sysinfo {
111 	l_long		uptime;		/* Seconds since boot */
112 	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
113 #define LINUX_SYSINFO_LOADS_SCALE 65536
114 	l_ulong		totalram;	/* Total usable main memory size */
115 	l_ulong		freeram;	/* Available memory size */
116 	l_ulong		sharedram;	/* Amount of shared memory */
117 	l_ulong		bufferram;	/* Memory used by buffers */
118 	l_ulong		totalswap;	/* Total swap space size */
119 	l_ulong		freeswap;	/* swap space still available */
120 	l_ushort	procs;		/* Number of current processes */
121 	l_ushort	pads;
122 	l_ulong		totalhigh;
123 	l_ulong		freehigh;
124 	l_uint		mem_unit;
125 	char		_f[20-2*sizeof(l_long)-sizeof(l_int)];	/* padding */
126 };
127 
128 struct l_pselect6arg {
129 	l_uintptr_t	ss;
130 	l_size_t	ss_len;
131 };
132 
133 static int	linux_utimensat_lts_to_ts(struct l_timespec *,
134 			struct timespec *);
135 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
136 static int	linux_utimensat_lts64_to_ts(struct l_timespec64 *,
137 			struct timespec *);
138 #endif
139 static int	linux_common_utimensat(struct thread *, int,
140 			const char *, struct timespec *, int);
141 
142 int
143 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
144 {
145 	struct l_sysinfo sysinfo;
146 	int i, j;
147 	struct timespec ts;
148 
149 	bzero(&sysinfo, sizeof(sysinfo));
150 	getnanouptime(&ts);
151 	if (ts.tv_nsec != 0)
152 		ts.tv_sec++;
153 	sysinfo.uptime = ts.tv_sec;
154 
155 	/* Use the information from the mib to get our load averages */
156 	for (i = 0; i < 3; i++)
157 		sysinfo.loads[i] = averunnable.ldavg[i] *
158 		    LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
159 
160 	sysinfo.totalram = physmem * PAGE_SIZE;
161 	sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE;
162 
163 	/*
164 	 * sharedram counts pages allocated to named, swap-backed objects such
165 	 * as shared memory segments and tmpfs files.  There is no cheap way to
166 	 * compute this, so just leave the field unpopulated.  Linux itself only
167 	 * started setting this field in the 3.x timeframe.
168 	 */
169 	sysinfo.sharedram = 0;
170 	sysinfo.bufferram = 0;
171 
172 	swap_pager_status(&i, &j);
173 	sysinfo.totalswap = i * PAGE_SIZE;
174 	sysinfo.freeswap = (i - j) * PAGE_SIZE;
175 
176 	sysinfo.procs = nprocs;
177 
178 	/*
179 	 * Platforms supported by the emulation layer do not have a notion of
180 	 * high memory.
181 	 */
182 	sysinfo.totalhigh = 0;
183 	sysinfo.freehigh = 0;
184 
185 	sysinfo.mem_unit = 1;
186 
187 	return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
188 }
189 
190 #ifdef LINUX_LEGACY_SYSCALLS
191 int
192 linux_alarm(struct thread *td, struct linux_alarm_args *args)
193 {
194 	struct itimerval it, old_it;
195 	u_int secs;
196 	int error;
197 
198 	secs = args->secs;
199 	/*
200 	 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
201 	 * to match kern_setitimer()'s limit to avoid error from it.
202 	 *
203 	 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
204 	 * platforms.
205 	 */
206 	if (secs > INT32_MAX / 2)
207 		secs = INT32_MAX / 2;
208 
209 	it.it_value.tv_sec = secs;
210 	it.it_value.tv_usec = 0;
211 	timevalclear(&it.it_interval);
212 	error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
213 	KASSERT(error == 0, ("kern_setitimer returns %d", error));
214 
215 	if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
216 	    old_it.it_value.tv_usec >= 500000)
217 		old_it.it_value.tv_sec++;
218 	td->td_retval[0] = old_it.it_value.tv_sec;
219 	return (0);
220 }
221 #endif
222 
223 int
224 linux_brk(struct thread *td, struct linux_brk_args *args)
225 {
226 	struct vmspace *vm = td->td_proc->p_vmspace;
227 	uintptr_t new, old;
228 
229 	old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
230 	new = (uintptr_t)args->dsend;
231 	if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
232 		td->td_retval[0] = (register_t)new;
233 	else
234 		td->td_retval[0] = (register_t)old;
235 
236 	return (0);
237 }
238 
239 #if defined(__i386__)
240 /* XXX: what about amd64/linux32? */
241 
242 int
243 linux_uselib(struct thread *td, struct linux_uselib_args *args)
244 {
245 	struct nameidata ni;
246 	struct vnode *vp;
247 	struct exec *a_out;
248 	vm_map_t map;
249 	vm_map_entry_t entry;
250 	struct vattr attr;
251 	vm_offset_t vmaddr;
252 	unsigned long file_offset;
253 	unsigned long bss_size;
254 	char *library;
255 	ssize_t aresid;
256 	int error;
257 	bool locked, opened, textset;
258 
259 	a_out = NULL;
260 	vp = NULL;
261 	locked = false;
262 	textset = false;
263 	opened = false;
264 
265 	if (!LUSECONVPATH(td)) {
266 		NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
267 		    UIO_USERSPACE, args->library, td);
268 		error = namei(&ni);
269 	} else {
270 		LCONVPATHEXIST(td, args->library, &library);
271 		NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
272 		    UIO_SYSSPACE, library, td);
273 		error = namei(&ni);
274 		LFREEPATH(library);
275 	}
276 	if (error)
277 		goto cleanup;
278 
279 	vp = ni.ni_vp;
280 	NDFREE(&ni, NDF_ONLY_PNBUF);
281 
282 	/*
283 	 * From here on down, we have a locked vnode that must be unlocked.
284 	 * XXX: The code below largely duplicates exec_check_permissions().
285 	 */
286 	locked = true;
287 
288 	/* Executable? */
289 	error = VOP_GETATTR(vp, &attr, td->td_ucred);
290 	if (error)
291 		goto cleanup;
292 
293 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
294 	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
295 		/* EACCESS is what exec(2) returns. */
296 		error = ENOEXEC;
297 		goto cleanup;
298 	}
299 
300 	/* Sensible size? */
301 	if (attr.va_size == 0) {
302 		error = ENOEXEC;
303 		goto cleanup;
304 	}
305 
306 	/* Can we access it? */
307 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
308 	if (error)
309 		goto cleanup;
310 
311 	/*
312 	 * XXX: This should use vn_open() so that it is properly authorized,
313 	 * and to reduce code redundancy all over the place here.
314 	 * XXX: Not really, it duplicates far more of exec_check_permissions()
315 	 * than vn_open().
316 	 */
317 #ifdef MAC
318 	error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
319 	if (error)
320 		goto cleanup;
321 #endif
322 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
323 	if (error)
324 		goto cleanup;
325 	opened = true;
326 
327 	/* Pull in executable header into exec_map */
328 	error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
329 	    VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
330 	if (error)
331 		goto cleanup;
332 
333 	/* Is it a Linux binary ? */
334 	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
335 		error = ENOEXEC;
336 		goto cleanup;
337 	}
338 
339 	/*
340 	 * While we are here, we should REALLY do some more checks
341 	 */
342 
343 	/* Set file/virtual offset based on a.out variant. */
344 	switch ((int)(a_out->a_magic & 0xffff)) {
345 	case 0413:			/* ZMAGIC */
346 		file_offset = 1024;
347 		break;
348 	case 0314:			/* QMAGIC */
349 		file_offset = 0;
350 		break;
351 	default:
352 		error = ENOEXEC;
353 		goto cleanup;
354 	}
355 
356 	bss_size = round_page(a_out->a_bss);
357 
358 	/* Check various fields in header for validity/bounds. */
359 	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
360 		error = ENOEXEC;
361 		goto cleanup;
362 	}
363 
364 	/* text + data can't exceed file size */
365 	if (a_out->a_data + a_out->a_text > attr.va_size) {
366 		error = EFAULT;
367 		goto cleanup;
368 	}
369 
370 	/*
371 	 * text/data/bss must not exceed limits
372 	 * XXX - this is not complete. it should check current usage PLUS
373 	 * the resources needed by this library.
374 	 */
375 	PROC_LOCK(td->td_proc);
376 	if (a_out->a_text > maxtsiz ||
377 	    a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
378 	    racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
379 	    bss_size) != 0) {
380 		PROC_UNLOCK(td->td_proc);
381 		error = ENOMEM;
382 		goto cleanup;
383 	}
384 	PROC_UNLOCK(td->td_proc);
385 
386 	/*
387 	 * Prevent more writers.
388 	 */
389 	error = VOP_SET_TEXT(vp);
390 	if (error != 0)
391 		goto cleanup;
392 	textset = true;
393 
394 	/*
395 	 * Lock no longer needed
396 	 */
397 	locked = false;
398 	VOP_UNLOCK(vp);
399 
400 	/*
401 	 * Check if file_offset page aligned. Currently we cannot handle
402 	 * misalinged file offsets, and so we read in the entire image
403 	 * (what a waste).
404 	 */
405 	if (file_offset & PAGE_MASK) {
406 		/* Map text+data read/write/execute */
407 
408 		/* a_entry is the load address and is page aligned */
409 		vmaddr = trunc_page(a_out->a_entry);
410 
411 		/* get anon user mapping, read+write+execute */
412 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
413 		    &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
414 		    VM_PROT_ALL, VM_PROT_ALL, 0);
415 		if (error)
416 			goto cleanup;
417 
418 		error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
419 		    a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
420 		    td->td_ucred, NOCRED, &aresid, td);
421 		if (error != 0)
422 			goto cleanup;
423 		if (aresid != 0) {
424 			error = ENOEXEC;
425 			goto cleanup;
426 		}
427 	} else {
428 		/*
429 		 * for QMAGIC, a_entry is 20 bytes beyond the load address
430 		 * to skip the executable header
431 		 */
432 		vmaddr = trunc_page(a_out->a_entry);
433 
434 		/*
435 		 * Map it all into the process's space as a single
436 		 * copy-on-write "data" segment.
437 		 */
438 		map = &td->td_proc->p_vmspace->vm_map;
439 		error = vm_mmap(map, &vmaddr,
440 		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
441 		    MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
442 		if (error)
443 			goto cleanup;
444 		vm_map_lock(map);
445 		if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
446 			vm_map_unlock(map);
447 			error = EDOOFUS;
448 			goto cleanup;
449 		}
450 		entry->eflags |= MAP_ENTRY_VN_EXEC;
451 		vm_map_unlock(map);
452 		textset = false;
453 	}
454 
455 	if (bss_size != 0) {
456 		/* Calculate BSS start address */
457 		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
458 		    a_out->a_data;
459 
460 		/* allocate some 'anon' space */
461 		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
462 		    &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
463 		    VM_PROT_ALL, 0);
464 		if (error)
465 			goto cleanup;
466 	}
467 
468 cleanup:
469 	if (opened) {
470 		if (locked)
471 			VOP_UNLOCK(vp);
472 		locked = false;
473 		VOP_CLOSE(vp, FREAD, td->td_ucred, td);
474 	}
475 	if (textset) {
476 		if (!locked) {
477 			locked = true;
478 			VOP_LOCK(vp, LK_SHARED | LK_RETRY);
479 		}
480 		VOP_UNSET_TEXT_CHECKED(vp);
481 	}
482 	if (locked)
483 		VOP_UNLOCK(vp);
484 
485 	/* Release the temporary mapping. */
486 	if (a_out)
487 		kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
488 
489 	return (error);
490 }
491 
492 #endif	/* __i386__ */
493 
494 #ifdef LINUX_LEGACY_SYSCALLS
495 int
496 linux_select(struct thread *td, struct linux_select_args *args)
497 {
498 	l_timeval ltv;
499 	struct timeval tv0, tv1, utv, *tvp;
500 	int error;
501 
502 	/*
503 	 * Store current time for computation of the amount of
504 	 * time left.
505 	 */
506 	if (args->timeout) {
507 		if ((error = copyin(args->timeout, &ltv, sizeof(ltv))))
508 			goto select_out;
509 		utv.tv_sec = ltv.tv_sec;
510 		utv.tv_usec = ltv.tv_usec;
511 
512 		if (itimerfix(&utv)) {
513 			/*
514 			 * The timeval was invalid.  Convert it to something
515 			 * valid that will act as it does under Linux.
516 			 */
517 			utv.tv_sec += utv.tv_usec / 1000000;
518 			utv.tv_usec %= 1000000;
519 			if (utv.tv_usec < 0) {
520 				utv.tv_sec -= 1;
521 				utv.tv_usec += 1000000;
522 			}
523 			if (utv.tv_sec < 0)
524 				timevalclear(&utv);
525 		}
526 		microtime(&tv0);
527 		tvp = &utv;
528 	} else
529 		tvp = NULL;
530 
531 	error = kern_select(td, args->nfds, args->readfds, args->writefds,
532 	    args->exceptfds, tvp, LINUX_NFDBITS);
533 	if (error)
534 		goto select_out;
535 
536 	if (args->timeout) {
537 		if (td->td_retval[0]) {
538 			/*
539 			 * Compute how much time was left of the timeout,
540 			 * by subtracting the current time and the time
541 			 * before we started the call, and subtracting
542 			 * that result from the user-supplied value.
543 			 */
544 			microtime(&tv1);
545 			timevalsub(&tv1, &tv0);
546 			timevalsub(&utv, &tv1);
547 			if (utv.tv_sec < 0)
548 				timevalclear(&utv);
549 		} else
550 			timevalclear(&utv);
551 		ltv.tv_sec = utv.tv_sec;
552 		ltv.tv_usec = utv.tv_usec;
553 		if ((error = copyout(&ltv, args->timeout, sizeof(ltv))))
554 			goto select_out;
555 	}
556 
557 select_out:
558 	return (error);
559 }
560 #endif
561 
562 int
563 linux_mremap(struct thread *td, struct linux_mremap_args *args)
564 {
565 	uintptr_t addr;
566 	size_t len;
567 	int error = 0;
568 
569 	if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
570 		td->td_retval[0] = 0;
571 		return (EINVAL);
572 	}
573 
574 	/*
575 	 * Check for the page alignment.
576 	 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
577 	 */
578 	if (args->addr & PAGE_MASK) {
579 		td->td_retval[0] = 0;
580 		return (EINVAL);
581 	}
582 
583 	args->new_len = round_page(args->new_len);
584 	args->old_len = round_page(args->old_len);
585 
586 	if (args->new_len > args->old_len) {
587 		td->td_retval[0] = 0;
588 		return (ENOMEM);
589 	}
590 
591 	if (args->new_len < args->old_len) {
592 		addr = args->addr + args->new_len;
593 		len = args->old_len - args->new_len;
594 		error = kern_munmap(td, addr, len);
595 	}
596 
597 	td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
598 	return (error);
599 }
600 
601 #define LINUX_MS_ASYNC       0x0001
602 #define LINUX_MS_INVALIDATE  0x0002
603 #define LINUX_MS_SYNC        0x0004
604 
605 int
606 linux_msync(struct thread *td, struct linux_msync_args *args)
607 {
608 
609 	return (kern_msync(td, args->addr, args->len,
610 	    args->fl & ~LINUX_MS_SYNC));
611 }
612 
613 #ifdef LINUX_LEGACY_SYSCALLS
614 int
615 linux_time(struct thread *td, struct linux_time_args *args)
616 {
617 	struct timeval tv;
618 	l_time_t tm;
619 	int error;
620 
621 	microtime(&tv);
622 	tm = tv.tv_sec;
623 	if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
624 		return (error);
625 	td->td_retval[0] = tm;
626 	return (0);
627 }
628 #endif
629 
630 struct l_times_argv {
631 	l_clock_t	tms_utime;
632 	l_clock_t	tms_stime;
633 	l_clock_t	tms_cutime;
634 	l_clock_t	tms_cstime;
635 };
636 
637 /*
638  * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
639  * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
640  * auxiliary vector entry.
641  */
642 #define	CLK_TCK		100
643 
644 #define	CONVOTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
645 #define	CONVNTCK(r)	(r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
646 
647 #define	CONVTCK(r)	(linux_kernver(td) >= LINUX_KERNVER_2004000 ?		\
648 			    CONVNTCK(r) : CONVOTCK(r))
649 
650 int
651 linux_times(struct thread *td, struct linux_times_args *args)
652 {
653 	struct timeval tv, utime, stime, cutime, cstime;
654 	struct l_times_argv tms;
655 	struct proc *p;
656 	int error;
657 
658 	if (args->buf != NULL) {
659 		p = td->td_proc;
660 		PROC_LOCK(p);
661 		PROC_STATLOCK(p);
662 		calcru(p, &utime, &stime);
663 		PROC_STATUNLOCK(p);
664 		calccru(p, &cutime, &cstime);
665 		PROC_UNLOCK(p);
666 
667 		tms.tms_utime = CONVTCK(utime);
668 		tms.tms_stime = CONVTCK(stime);
669 
670 		tms.tms_cutime = CONVTCK(cutime);
671 		tms.tms_cstime = CONVTCK(cstime);
672 
673 		if ((error = copyout(&tms, args->buf, sizeof(tms))))
674 			return (error);
675 	}
676 
677 	microuptime(&tv);
678 	td->td_retval[0] = (int)CONVTCK(tv);
679 	return (0);
680 }
681 
682 int
683 linux_newuname(struct thread *td, struct linux_newuname_args *args)
684 {
685 	struct l_new_utsname utsname;
686 	char osname[LINUX_MAX_UTSNAME];
687 	char osrelease[LINUX_MAX_UTSNAME];
688 	char *p;
689 
690 	linux_get_osname(td, osname);
691 	linux_get_osrelease(td, osrelease);
692 
693 	bzero(&utsname, sizeof(utsname));
694 	strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
695 	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
696 	getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
697 	strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
698 	strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
699 	for (p = utsname.version; *p != '\0'; ++p)
700 		if (*p == '\n') {
701 			*p = '\0';
702 			break;
703 		}
704 #if defined(__amd64__)
705 	/*
706 	 * On amd64, Linux uname(2) needs to return "x86_64"
707 	 * for both 64-bit and 32-bit applications.  On 32-bit,
708 	 * the string returned by getauxval(AT_PLATFORM) needs
709 	 * to remain "i686", though.
710 	 */
711 	strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
712 #else
713 	strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME);
714 #endif
715 
716 	return (copyout(&utsname, args->buf, sizeof(utsname)));
717 }
718 
719 struct l_utimbuf {
720 	l_time_t l_actime;
721 	l_time_t l_modtime;
722 };
723 
724 #ifdef LINUX_LEGACY_SYSCALLS
725 int
726 linux_utime(struct thread *td, struct linux_utime_args *args)
727 {
728 	struct timeval tv[2], *tvp;
729 	struct l_utimbuf lut;
730 	char *fname;
731 	int error;
732 
733 	if (args->times) {
734 		if ((error = copyin(args->times, &lut, sizeof lut)) != 0)
735 			return (error);
736 		tv[0].tv_sec = lut.l_actime;
737 		tv[0].tv_usec = 0;
738 		tv[1].tv_sec = lut.l_modtime;
739 		tv[1].tv_usec = 0;
740 		tvp = tv;
741 	} else
742 		tvp = NULL;
743 
744 	if (!LUSECONVPATH(td)) {
745 		error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
746 		    tvp, UIO_SYSSPACE);
747 	} else {
748 		LCONVPATHEXIST(td, args->fname, &fname);
749 		error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp,
750 		    UIO_SYSSPACE);
751 		LFREEPATH(fname);
752 	}
753 	return (error);
754 }
755 #endif
756 
757 #ifdef LINUX_LEGACY_SYSCALLS
758 int
759 linux_utimes(struct thread *td, struct linux_utimes_args *args)
760 {
761 	l_timeval ltv[2];
762 	struct timeval tv[2], *tvp = NULL;
763 	char *fname;
764 	int error;
765 
766 	if (args->tptr != NULL) {
767 		if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0)
768 			return (error);
769 		tv[0].tv_sec = ltv[0].tv_sec;
770 		tv[0].tv_usec = ltv[0].tv_usec;
771 		tv[1].tv_sec = ltv[1].tv_sec;
772 		tv[1].tv_usec = ltv[1].tv_usec;
773 		tvp = tv;
774 	}
775 
776 	if (!LUSECONVPATH(td)) {
777 		error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
778 		    tvp, UIO_SYSSPACE);
779 	} else {
780 		LCONVPATHEXIST(td, args->fname, &fname);
781 		error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE,
782 		    tvp, UIO_SYSSPACE);
783 		LFREEPATH(fname);
784 	}
785 	return (error);
786 }
787 #endif
788 
789 static int
790 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times)
791 {
792 
793 	if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
794 	    l_times->tv_nsec != LINUX_UTIME_NOW &&
795 	    (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
796 		return (EINVAL);
797 
798 	times->tv_sec = l_times->tv_sec;
799 	switch (l_times->tv_nsec)
800 	{
801 	case LINUX_UTIME_OMIT:
802 		times->tv_nsec = UTIME_OMIT;
803 		break;
804 	case LINUX_UTIME_NOW:
805 		times->tv_nsec = UTIME_NOW;
806 		break;
807 	default:
808 		times->tv_nsec = l_times->tv_nsec;
809 	}
810 
811 	return (0);
812 }
813 
814 static int
815 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname,
816     struct timespec *timesp, int lflags)
817 {
818 	char *path = NULL;
819 	int error, dfd, flags = 0;
820 
821 	dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd;
822 
823 	if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH))
824 		return (EINVAL);
825 
826 	if (timesp != NULL) {
827 		/* This breaks POSIX, but is what the Linux kernel does
828 		 * _on purpose_ (documented in the man page for utimensat(2)),
829 		 * so we must follow that behaviour. */
830 		if (timesp[0].tv_nsec == UTIME_OMIT &&
831 		    timesp[1].tv_nsec == UTIME_OMIT)
832 			return (0);
833 	}
834 
835 	if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
836 		flags |= AT_SYMLINK_NOFOLLOW;
837 	if (lflags & LINUX_AT_EMPTY_PATH)
838 		flags |= AT_EMPTY_PATH;
839 
840 	if (!LUSECONVPATH(td)) {
841 		if (pathname != NULL) {
842 			return (kern_utimensat(td, dfd, pathname,
843 			    UIO_USERSPACE, timesp, UIO_SYSSPACE, flags));
844 		}
845 	}
846 
847 	if (pathname != NULL)
848 		LCONVPATHEXIST_AT(td, pathname, &path, dfd);
849 	else if (lflags != 0)
850 		return (EINVAL);
851 
852 	if (path == NULL)
853 		error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE);
854 	else {
855 		error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp,
856 			UIO_SYSSPACE, flags);
857 		LFREEPATH(path);
858 	}
859 
860 	return (error);
861 }
862 
863 int
864 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
865 {
866 	struct l_timespec l_times[2];
867 	struct timespec times[2], *timesp;
868 	int error;
869 
870 	if (args->times != NULL) {
871 		error = copyin(args->times, l_times, sizeof(l_times));
872 		if (error != 0)
873 			return (error);
874 
875 		error = linux_utimensat_lts_to_ts(&l_times[0], &times[0]);
876 		if (error != 0)
877 			return (error);
878 		error = linux_utimensat_lts_to_ts(&l_times[1], &times[1]);
879 		if (error != 0)
880 			return (error);
881 		timesp = times;
882 	} else
883 		timesp = NULL;
884 
885 	return (linux_common_utimensat(td, args->dfd, args->pathname,
886 	    timesp, args->flags));
887 }
888 
889 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
890 static int
891 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times)
892 {
893 
894 	if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
895 	    l_times->tv_nsec != LINUX_UTIME_NOW &&
896 	    (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
897 		return (EINVAL);
898 
899 	times->tv_sec = l_times->tv_sec;
900 	switch (l_times->tv_nsec)
901 	{
902 	case LINUX_UTIME_OMIT:
903 		times->tv_nsec = UTIME_OMIT;
904 		break;
905 	case LINUX_UTIME_NOW:
906 		times->tv_nsec = UTIME_NOW;
907 		break;
908 	default:
909 		times->tv_nsec = l_times->tv_nsec;
910 	}
911 
912 	return (0);
913 }
914 
915 int
916 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args)
917 {
918 	struct l_timespec64 l_times[2];
919 	struct timespec times[2], *timesp;
920 	int error;
921 
922 	if (args->times64 != NULL) {
923 		error = copyin(args->times64, l_times, sizeof(l_times));
924 		if (error != 0)
925 			return (error);
926 
927 		error = linux_utimensat_lts64_to_ts(&l_times[0], &times[0]);
928 		if (error != 0)
929 			return (error);
930 		error = linux_utimensat_lts64_to_ts(&l_times[1], &times[1]);
931 		if (error != 0)
932 			return (error);
933 		timesp = times;
934 	} else
935 		timesp = NULL;
936 
937 	return (linux_common_utimensat(td, args->dfd, args->pathname,
938 	    timesp, args->flags));
939 }
940 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
941 
942 #ifdef LINUX_LEGACY_SYSCALLS
943 int
944 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
945 {
946 	l_timeval ltv[2];
947 	struct timeval tv[2], *tvp = NULL;
948 	char *fname;
949 	int error, dfd;
950 
951 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
952 
953 	if (args->utimes != NULL) {
954 		if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0)
955 			return (error);
956 		tv[0].tv_sec = ltv[0].tv_sec;
957 		tv[0].tv_usec = ltv[0].tv_usec;
958 		tv[1].tv_sec = ltv[1].tv_sec;
959 		tv[1].tv_usec = ltv[1].tv_usec;
960 		tvp = tv;
961 	}
962 
963 	if (!LUSECONVPATH(td)) {
964 		error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE,
965 		    tvp, UIO_SYSSPACE);
966 	} else {
967 		LCONVPATHEXIST_AT(td, args->filename, &fname, dfd);
968 		error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE,
969 		    tvp, UIO_SYSSPACE);
970 		LFREEPATH(fname);
971 	}
972 	return (error);
973 }
974 #endif
975 
976 static int
977 linux_common_wait(struct thread *td, int pid, int *statusp,
978     int options, struct __wrusage *wrup)
979 {
980 	siginfo_t siginfo;
981 	idtype_t idtype;
982 	id_t id;
983 	int error, status, tmpstat;
984 
985 	if (pid == WAIT_ANY) {
986 		idtype = P_ALL;
987 		id = 0;
988 	} else if (pid < 0) {
989 		idtype = P_PGID;
990 		id = (id_t)-pid;
991 	} else {
992 		idtype = P_PID;
993 		id = (id_t)pid;
994 	}
995 
996 	/*
997 	 * For backward compatibility we implicitly add flags WEXITED
998 	 * and WTRAPPED here.
999 	 */
1000 	options |= WEXITED | WTRAPPED;
1001 	error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo);
1002 	if (error)
1003 		return (error);
1004 
1005 	if (statusp) {
1006 		tmpstat = status & 0xffff;
1007 		if (WIFSIGNALED(tmpstat)) {
1008 			tmpstat = (tmpstat & 0xffffff80) |
1009 			    bsd_to_linux_signal(WTERMSIG(tmpstat));
1010 		} else if (WIFSTOPPED(tmpstat)) {
1011 			tmpstat = (tmpstat & 0xffff00ff) |
1012 			    (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
1013 #if defined(__amd64__) && !defined(COMPAT_LINUX32)
1014 			if (WSTOPSIG(status) == SIGTRAP) {
1015 				tmpstat = linux_ptrace_status(td,
1016 				    siginfo.si_pid, tmpstat);
1017 			}
1018 #endif
1019 		} else if (WIFCONTINUED(tmpstat)) {
1020 			tmpstat = 0xffff;
1021 		}
1022 		error = copyout(&tmpstat, statusp, sizeof(int));
1023 	}
1024 
1025 	return (error);
1026 }
1027 
1028 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1029 int
1030 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
1031 {
1032 	struct linux_wait4_args wait4_args;
1033 
1034 	wait4_args.pid = args->pid;
1035 	wait4_args.status = args->status;
1036 	wait4_args.options = args->options;
1037 	wait4_args.rusage = NULL;
1038 
1039 	return (linux_wait4(td, &wait4_args));
1040 }
1041 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1042 
1043 int
1044 linux_wait4(struct thread *td, struct linux_wait4_args *args)
1045 {
1046 	int error, options;
1047 	struct __wrusage wru, *wrup;
1048 
1049 	if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
1050 	    LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
1051 		return (EINVAL);
1052 
1053 	options = WEXITED;
1054 	linux_to_bsd_waitopts(args->options, &options);
1055 
1056 	if (args->rusage != NULL)
1057 		wrup = &wru;
1058 	else
1059 		wrup = NULL;
1060 	error = linux_common_wait(td, args->pid, args->status, options, wrup);
1061 	if (error != 0)
1062 		return (error);
1063 	if (args->rusage != NULL)
1064 		error = linux_copyout_rusage(&wru.wru_self, args->rusage);
1065 	return (error);
1066 }
1067 
1068 int
1069 linux_waitid(struct thread *td, struct linux_waitid_args *args)
1070 {
1071 	int status, options, sig;
1072 	struct __wrusage wru;
1073 	siginfo_t siginfo;
1074 	l_siginfo_t lsi;
1075 	idtype_t idtype;
1076 	struct proc *p;
1077 	int error;
1078 
1079 	options = 0;
1080 	linux_to_bsd_waitopts(args->options, &options);
1081 
1082 	if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED))
1083 		return (EINVAL);
1084 	if (!(options & (WEXITED | WUNTRACED | WCONTINUED)))
1085 		return (EINVAL);
1086 
1087 	switch (args->idtype) {
1088 	case LINUX_P_ALL:
1089 		idtype = P_ALL;
1090 		break;
1091 	case LINUX_P_PID:
1092 		if (args->id <= 0)
1093 			return (EINVAL);
1094 		idtype = P_PID;
1095 		break;
1096 	case LINUX_P_PGID:
1097 		if (args->id <= 0)
1098 			return (EINVAL);
1099 		idtype = P_PGID;
1100 		break;
1101 	default:
1102 		return (EINVAL);
1103 	}
1104 
1105 	error = kern_wait6(td, idtype, args->id, &status, options,
1106 	    &wru, &siginfo);
1107 	if (error != 0)
1108 		return (error);
1109 	if (args->rusage != NULL) {
1110 		error = linux_copyout_rusage(&wru.wru_children,
1111 		    args->rusage);
1112 		if (error != 0)
1113 			return (error);
1114 	}
1115 	if (args->info != NULL) {
1116 		p = td->td_proc;
1117 		bzero(&lsi, sizeof(lsi));
1118 		if (td->td_retval[0] != 0) {
1119 			sig = bsd_to_linux_signal(siginfo.si_signo);
1120 			siginfo_to_lsiginfo(&siginfo, &lsi, sig);
1121 		}
1122 		error = copyout(&lsi, args->info, sizeof(lsi));
1123 	}
1124 	td->td_retval[0] = 0;
1125 
1126 	return (error);
1127 }
1128 
1129 #ifdef LINUX_LEGACY_SYSCALLS
1130 int
1131 linux_mknod(struct thread *td, struct linux_mknod_args *args)
1132 {
1133 	char *path;
1134 	int error;
1135 	enum uio_seg seg;
1136 	bool convpath;
1137 
1138 	convpath = LUSECONVPATH(td);
1139 	if (!convpath) {
1140 		path = args->path;
1141 		seg = UIO_USERSPACE;
1142 	} else {
1143 		LCONVPATHCREAT(td, args->path, &path);
1144 		seg = UIO_SYSSPACE;
1145 	}
1146 
1147 	switch (args->mode & S_IFMT) {
1148 	case S_IFIFO:
1149 	case S_IFSOCK:
1150 		error = kern_mkfifoat(td, AT_FDCWD, path, seg,
1151 		    args->mode);
1152 		break;
1153 
1154 	case S_IFCHR:
1155 	case S_IFBLK:
1156 		error = kern_mknodat(td, AT_FDCWD, path, seg,
1157 		    args->mode, args->dev);
1158 		break;
1159 
1160 	case S_IFDIR:
1161 		error = EPERM;
1162 		break;
1163 
1164 	case 0:
1165 		args->mode |= S_IFREG;
1166 		/* FALLTHROUGH */
1167 	case S_IFREG:
1168 		error = kern_openat(td, AT_FDCWD, path, seg,
1169 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1170 		if (error == 0)
1171 			kern_close(td, td->td_retval[0]);
1172 		break;
1173 
1174 	default:
1175 		error = EINVAL;
1176 		break;
1177 	}
1178 	if (convpath)
1179 		LFREEPATH(path);
1180 	return (error);
1181 }
1182 #endif
1183 
1184 int
1185 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
1186 {
1187 	char *path;
1188 	int error, dfd;
1189 	enum uio_seg seg;
1190 	bool convpath;
1191 
1192 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
1193 
1194 	convpath = LUSECONVPATH(td);
1195 	if (!convpath) {
1196 		path = __DECONST(char *, args->filename);
1197 		seg = UIO_USERSPACE;
1198 	} else {
1199 		LCONVPATHCREAT_AT(td, args->filename, &path, dfd);
1200 		seg = UIO_SYSSPACE;
1201 	}
1202 
1203 	switch (args->mode & S_IFMT) {
1204 	case S_IFIFO:
1205 	case S_IFSOCK:
1206 		error = kern_mkfifoat(td, dfd, path, seg, args->mode);
1207 		break;
1208 
1209 	case S_IFCHR:
1210 	case S_IFBLK:
1211 		error = kern_mknodat(td, dfd, path, seg, args->mode,
1212 		    args->dev);
1213 		break;
1214 
1215 	case S_IFDIR:
1216 		error = EPERM;
1217 		break;
1218 
1219 	case 0:
1220 		args->mode |= S_IFREG;
1221 		/* FALLTHROUGH */
1222 	case S_IFREG:
1223 		error = kern_openat(td, dfd, path, seg,
1224 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1225 		if (error == 0)
1226 			kern_close(td, td->td_retval[0]);
1227 		break;
1228 
1229 	default:
1230 		error = EINVAL;
1231 		break;
1232 	}
1233 	if (convpath)
1234 		LFREEPATH(path);
1235 	return (error);
1236 }
1237 
1238 /*
1239  * UGH! This is just about the dumbest idea I've ever heard!!
1240  */
1241 int
1242 linux_personality(struct thread *td, struct linux_personality_args *args)
1243 {
1244 	struct linux_pemuldata *pem;
1245 	struct proc *p = td->td_proc;
1246 	uint32_t old;
1247 
1248 	PROC_LOCK(p);
1249 	pem = pem_find(p);
1250 	old = pem->persona;
1251 	if (args->per != 0xffffffff)
1252 		pem->persona = args->per;
1253 	PROC_UNLOCK(p);
1254 
1255 	td->td_retval[0] = old;
1256 	return (0);
1257 }
1258 
1259 struct l_itimerval {
1260 	l_timeval it_interval;
1261 	l_timeval it_value;
1262 };
1263 
1264 #define	B2L_ITIMERVAL(bip, lip)						\
1265 	(bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec;		\
1266 	(bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec;	\
1267 	(bip)->it_value.tv_sec = (lip)->it_value.tv_sec;		\
1268 	(bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
1269 
1270 int
1271 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
1272 {
1273 	int error;
1274 	struct l_itimerval ls;
1275 	struct itimerval aitv, oitv;
1276 
1277 	if (uap->itv == NULL) {
1278 		uap->itv = uap->oitv;
1279 		return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
1280 	}
1281 
1282 	error = copyin(uap->itv, &ls, sizeof(ls));
1283 	if (error != 0)
1284 		return (error);
1285 	B2L_ITIMERVAL(&aitv, &ls);
1286 	error = kern_setitimer(td, uap->which, &aitv, &oitv);
1287 	if (error != 0 || uap->oitv == NULL)
1288 		return (error);
1289 	B2L_ITIMERVAL(&ls, &oitv);
1290 
1291 	return (copyout(&ls, uap->oitv, sizeof(ls)));
1292 }
1293 
1294 int
1295 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
1296 {
1297 	int error;
1298 	struct l_itimerval ls;
1299 	struct itimerval aitv;
1300 
1301 	error = kern_getitimer(td, uap->which, &aitv);
1302 	if (error != 0)
1303 		return (error);
1304 	B2L_ITIMERVAL(&ls, &aitv);
1305 	return (copyout(&ls, uap->itv, sizeof(ls)));
1306 }
1307 
1308 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1309 int
1310 linux_nice(struct thread *td, struct linux_nice_args *args)
1311 {
1312 
1313 	return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc));
1314 }
1315 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1316 
1317 int
1318 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1319 {
1320 	struct ucred *newcred, *oldcred;
1321 	l_gid_t *linux_gidset;
1322 	gid_t *bsd_gidset;
1323 	int ngrp, error;
1324 	struct proc *p;
1325 
1326 	ngrp = args->gidsetsize;
1327 	if (ngrp < 0 || ngrp >= ngroups_max + 1)
1328 		return (EINVAL);
1329 	linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
1330 	error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1331 	if (error)
1332 		goto out;
1333 	newcred = crget();
1334 	crextend(newcred, ngrp + 1);
1335 	p = td->td_proc;
1336 	PROC_LOCK(p);
1337 	oldcred = p->p_ucred;
1338 	crcopy(newcred, oldcred);
1339 
1340 	/*
1341 	 * cr_groups[0] holds egid. Setting the whole set from
1342 	 * the supplied set will cause egid to be changed too.
1343 	 * Keep cr_groups[0] unchanged to prevent that.
1344 	 */
1345 
1346 	if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
1347 		PROC_UNLOCK(p);
1348 		crfree(newcred);
1349 		goto out;
1350 	}
1351 
1352 	if (ngrp > 0) {
1353 		newcred->cr_ngroups = ngrp + 1;
1354 
1355 		bsd_gidset = newcred->cr_groups;
1356 		ngrp--;
1357 		while (ngrp >= 0) {
1358 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1359 			ngrp--;
1360 		}
1361 	} else
1362 		newcred->cr_ngroups = 1;
1363 
1364 	setsugid(p);
1365 	proc_set_cred(p, newcred);
1366 	PROC_UNLOCK(p);
1367 	crfree(oldcred);
1368 	error = 0;
1369 out:
1370 	free(linux_gidset, M_LINUX);
1371 	return (error);
1372 }
1373 
1374 int
1375 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1376 {
1377 	struct ucred *cred;
1378 	l_gid_t *linux_gidset;
1379 	gid_t *bsd_gidset;
1380 	int bsd_gidsetsz, ngrp, error;
1381 
1382 	cred = td->td_ucred;
1383 	bsd_gidset = cred->cr_groups;
1384 	bsd_gidsetsz = cred->cr_ngroups - 1;
1385 
1386 	/*
1387 	 * cr_groups[0] holds egid. Returning the whole set
1388 	 * here will cause a duplicate. Exclude cr_groups[0]
1389 	 * to prevent that.
1390 	 */
1391 
1392 	if ((ngrp = args->gidsetsize) == 0) {
1393 		td->td_retval[0] = bsd_gidsetsz;
1394 		return (0);
1395 	}
1396 
1397 	if (ngrp < bsd_gidsetsz)
1398 		return (EINVAL);
1399 
1400 	ngrp = 0;
1401 	linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
1402 	    M_LINUX, M_WAITOK);
1403 	while (ngrp < bsd_gidsetsz) {
1404 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1405 		ngrp++;
1406 	}
1407 
1408 	error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
1409 	free(linux_gidset, M_LINUX);
1410 	if (error)
1411 		return (error);
1412 
1413 	td->td_retval[0] = ngrp;
1414 	return (0);
1415 }
1416 
1417 static bool
1418 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim)
1419 {
1420 
1421 	if (linux_dummy_rlimits == 0)
1422 		return (false);
1423 
1424 	switch (resource) {
1425 	case LINUX_RLIMIT_LOCKS:
1426 	case LINUX_RLIMIT_SIGPENDING:
1427 	case LINUX_RLIMIT_MSGQUEUE:
1428 	case LINUX_RLIMIT_RTTIME:
1429 		rlim->rlim_cur = LINUX_RLIM_INFINITY;
1430 		rlim->rlim_max = LINUX_RLIM_INFINITY;
1431 		return (true);
1432 	case LINUX_RLIMIT_NICE:
1433 	case LINUX_RLIMIT_RTPRIO:
1434 		rlim->rlim_cur = 0;
1435 		rlim->rlim_max = 0;
1436 		return (true);
1437 	default:
1438 		return (false);
1439 	}
1440 }
1441 
1442 int
1443 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1444 {
1445 	struct rlimit bsd_rlim;
1446 	struct l_rlimit rlim;
1447 	u_int which;
1448 	int error;
1449 
1450 	if (args->resource >= LINUX_RLIM_NLIMITS)
1451 		return (EINVAL);
1452 
1453 	which = linux_to_bsd_resource[args->resource];
1454 	if (which == -1)
1455 		return (EINVAL);
1456 
1457 	error = copyin(args->rlim, &rlim, sizeof(rlim));
1458 	if (error)
1459 		return (error);
1460 
1461 	bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1462 	bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1463 	return (kern_setrlimit(td, which, &bsd_rlim));
1464 }
1465 
1466 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1467 int
1468 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1469 {
1470 	struct l_rlimit rlim;
1471 	struct rlimit bsd_rlim;
1472 	u_int which;
1473 
1474 	if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
1475 		rlim.rlim_cur = bsd_rlim.rlim_cur;
1476 		rlim.rlim_max = bsd_rlim.rlim_max;
1477 		return (copyout(&rlim, args->rlim, sizeof(rlim)));
1478 	}
1479 
1480 	if (args->resource >= LINUX_RLIM_NLIMITS)
1481 		return (EINVAL);
1482 
1483 	which = linux_to_bsd_resource[args->resource];
1484 	if (which == -1)
1485 		return (EINVAL);
1486 
1487 	lim_rlimit(td, which, &bsd_rlim);
1488 
1489 #ifdef COMPAT_LINUX32
1490 	rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1491 	if (rlim.rlim_cur == UINT_MAX)
1492 		rlim.rlim_cur = INT_MAX;
1493 	rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1494 	if (rlim.rlim_max == UINT_MAX)
1495 		rlim.rlim_max = INT_MAX;
1496 #else
1497 	rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1498 	if (rlim.rlim_cur == ULONG_MAX)
1499 		rlim.rlim_cur = LONG_MAX;
1500 	rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1501 	if (rlim.rlim_max == ULONG_MAX)
1502 		rlim.rlim_max = LONG_MAX;
1503 #endif
1504 	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1505 }
1506 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1507 
1508 int
1509 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1510 {
1511 	struct l_rlimit rlim;
1512 	struct rlimit bsd_rlim;
1513 	u_int which;
1514 
1515 	if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
1516 		rlim.rlim_cur = bsd_rlim.rlim_cur;
1517 		rlim.rlim_max = bsd_rlim.rlim_max;
1518 		return (copyout(&rlim, args->rlim, sizeof(rlim)));
1519 	}
1520 
1521 	if (args->resource >= LINUX_RLIM_NLIMITS)
1522 		return (EINVAL);
1523 
1524 	which = linux_to_bsd_resource[args->resource];
1525 	if (which == -1)
1526 		return (EINVAL);
1527 
1528 	lim_rlimit(td, which, &bsd_rlim);
1529 
1530 	rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1531 	rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1532 	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1533 }
1534 
1535 int
1536 linux_sched_setscheduler(struct thread *td,
1537     struct linux_sched_setscheduler_args *args)
1538 {
1539 	struct sched_param sched_param;
1540 	struct thread *tdt;
1541 	int error, policy;
1542 
1543 	switch (args->policy) {
1544 	case LINUX_SCHED_OTHER:
1545 		policy = SCHED_OTHER;
1546 		break;
1547 	case LINUX_SCHED_FIFO:
1548 		policy = SCHED_FIFO;
1549 		break;
1550 	case LINUX_SCHED_RR:
1551 		policy = SCHED_RR;
1552 		break;
1553 	default:
1554 		return (EINVAL);
1555 	}
1556 
1557 	error = copyin(args->param, &sched_param, sizeof(sched_param));
1558 	if (error)
1559 		return (error);
1560 
1561 	if (linux_map_sched_prio) {
1562 		switch (policy) {
1563 		case SCHED_OTHER:
1564 			if (sched_param.sched_priority != 0)
1565 				return (EINVAL);
1566 
1567 			sched_param.sched_priority =
1568 			    PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
1569 			break;
1570 		case SCHED_FIFO:
1571 		case SCHED_RR:
1572 			if (sched_param.sched_priority < 1 ||
1573 			    sched_param.sched_priority >= LINUX_MAX_RT_PRIO)
1574 				return (EINVAL);
1575 
1576 			/*
1577 			 * Map [1, LINUX_MAX_RT_PRIO - 1] to
1578 			 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
1579 			 */
1580 			sched_param.sched_priority =
1581 			    (sched_param.sched_priority - 1) *
1582 			    (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
1583 			    (LINUX_MAX_RT_PRIO - 1);
1584 			break;
1585 		}
1586 	}
1587 
1588 	tdt = linux_tdfind(td, args->pid, -1);
1589 	if (tdt == NULL)
1590 		return (ESRCH);
1591 
1592 	error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
1593 	PROC_UNLOCK(tdt->td_proc);
1594 	return (error);
1595 }
1596 
1597 int
1598 linux_sched_getscheduler(struct thread *td,
1599     struct linux_sched_getscheduler_args *args)
1600 {
1601 	struct thread *tdt;
1602 	int error, policy;
1603 
1604 	tdt = linux_tdfind(td, args->pid, -1);
1605 	if (tdt == NULL)
1606 		return (ESRCH);
1607 
1608 	error = kern_sched_getscheduler(td, tdt, &policy);
1609 	PROC_UNLOCK(tdt->td_proc);
1610 
1611 	switch (policy) {
1612 	case SCHED_OTHER:
1613 		td->td_retval[0] = LINUX_SCHED_OTHER;
1614 		break;
1615 	case SCHED_FIFO:
1616 		td->td_retval[0] = LINUX_SCHED_FIFO;
1617 		break;
1618 	case SCHED_RR:
1619 		td->td_retval[0] = LINUX_SCHED_RR;
1620 		break;
1621 	}
1622 	return (error);
1623 }
1624 
1625 int
1626 linux_sched_get_priority_max(struct thread *td,
1627     struct linux_sched_get_priority_max_args *args)
1628 {
1629 	struct sched_get_priority_max_args bsd;
1630 
1631 	if (linux_map_sched_prio) {
1632 		switch (args->policy) {
1633 		case LINUX_SCHED_OTHER:
1634 			td->td_retval[0] = 0;
1635 			return (0);
1636 		case LINUX_SCHED_FIFO:
1637 		case LINUX_SCHED_RR:
1638 			td->td_retval[0] = LINUX_MAX_RT_PRIO - 1;
1639 			return (0);
1640 		default:
1641 			return (EINVAL);
1642 		}
1643 	}
1644 
1645 	switch (args->policy) {
1646 	case LINUX_SCHED_OTHER:
1647 		bsd.policy = SCHED_OTHER;
1648 		break;
1649 	case LINUX_SCHED_FIFO:
1650 		bsd.policy = SCHED_FIFO;
1651 		break;
1652 	case LINUX_SCHED_RR:
1653 		bsd.policy = SCHED_RR;
1654 		break;
1655 	default:
1656 		return (EINVAL);
1657 	}
1658 	return (sys_sched_get_priority_max(td, &bsd));
1659 }
1660 
1661 int
1662 linux_sched_get_priority_min(struct thread *td,
1663     struct linux_sched_get_priority_min_args *args)
1664 {
1665 	struct sched_get_priority_min_args bsd;
1666 
1667 	if (linux_map_sched_prio) {
1668 		switch (args->policy) {
1669 		case LINUX_SCHED_OTHER:
1670 			td->td_retval[0] = 0;
1671 			return (0);
1672 		case LINUX_SCHED_FIFO:
1673 		case LINUX_SCHED_RR:
1674 			td->td_retval[0] = 1;
1675 			return (0);
1676 		default:
1677 			return (EINVAL);
1678 		}
1679 	}
1680 
1681 	switch (args->policy) {
1682 	case LINUX_SCHED_OTHER:
1683 		bsd.policy = SCHED_OTHER;
1684 		break;
1685 	case LINUX_SCHED_FIFO:
1686 		bsd.policy = SCHED_FIFO;
1687 		break;
1688 	case LINUX_SCHED_RR:
1689 		bsd.policy = SCHED_RR;
1690 		break;
1691 	default:
1692 		return (EINVAL);
1693 	}
1694 	return (sys_sched_get_priority_min(td, &bsd));
1695 }
1696 
1697 #define REBOOT_CAD_ON	0x89abcdef
1698 #define REBOOT_CAD_OFF	0
1699 #define REBOOT_HALT	0xcdef0123
1700 #define REBOOT_RESTART	0x01234567
1701 #define REBOOT_RESTART2	0xA1B2C3D4
1702 #define REBOOT_POWEROFF	0x4321FEDC
1703 #define REBOOT_MAGIC1	0xfee1dead
1704 #define REBOOT_MAGIC2	0x28121969
1705 #define REBOOT_MAGIC2A	0x05121996
1706 #define REBOOT_MAGIC2B	0x16041998
1707 
1708 int
1709 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1710 {
1711 	struct reboot_args bsd_args;
1712 
1713 	if (args->magic1 != REBOOT_MAGIC1)
1714 		return (EINVAL);
1715 
1716 	switch (args->magic2) {
1717 	case REBOOT_MAGIC2:
1718 	case REBOOT_MAGIC2A:
1719 	case REBOOT_MAGIC2B:
1720 		break;
1721 	default:
1722 		return (EINVAL);
1723 	}
1724 
1725 	switch (args->cmd) {
1726 	case REBOOT_CAD_ON:
1727 	case REBOOT_CAD_OFF:
1728 		return (priv_check(td, PRIV_REBOOT));
1729 	case REBOOT_HALT:
1730 		bsd_args.opt = RB_HALT;
1731 		break;
1732 	case REBOOT_RESTART:
1733 	case REBOOT_RESTART2:
1734 		bsd_args.opt = 0;
1735 		break;
1736 	case REBOOT_POWEROFF:
1737 		bsd_args.opt = RB_POWEROFF;
1738 		break;
1739 	default:
1740 		return (EINVAL);
1741 	}
1742 	return (sys_reboot(td, &bsd_args));
1743 }
1744 
1745 int
1746 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1747 {
1748 
1749 	td->td_retval[0] = td->td_proc->p_pid;
1750 
1751 	return (0);
1752 }
1753 
1754 int
1755 linux_gettid(struct thread *td, struct linux_gettid_args *args)
1756 {
1757 	struct linux_emuldata *em;
1758 
1759 	em = em_find(td);
1760 	KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
1761 
1762 	td->td_retval[0] = em->em_tid;
1763 
1764 	return (0);
1765 }
1766 
1767 int
1768 linux_getppid(struct thread *td, struct linux_getppid_args *args)
1769 {
1770 
1771 	td->td_retval[0] = kern_getppid(td);
1772 	return (0);
1773 }
1774 
1775 int
1776 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1777 {
1778 
1779 	td->td_retval[0] = td->td_ucred->cr_rgid;
1780 	return (0);
1781 }
1782 
1783 int
1784 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1785 {
1786 
1787 	td->td_retval[0] = td->td_ucred->cr_ruid;
1788 	return (0);
1789 }
1790 
1791 int
1792 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1793 {
1794 
1795 	return (kern_getsid(td, args->pid));
1796 }
1797 
1798 int
1799 linux_nosys(struct thread *td, struct nosys_args *ignore)
1800 {
1801 
1802 	return (ENOSYS);
1803 }
1804 
1805 int
1806 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1807 {
1808 	int error;
1809 
1810 	error = kern_getpriority(td, args->which, args->who);
1811 	td->td_retval[0] = 20 - td->td_retval[0];
1812 	return (error);
1813 }
1814 
1815 int
1816 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
1817 {
1818 	int name[2];
1819 
1820 	name[0] = CTL_KERN;
1821 	name[1] = KERN_HOSTNAME;
1822 	return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
1823 	    args->len, 0, 0));
1824 }
1825 
1826 int
1827 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
1828 {
1829 	int name[2];
1830 
1831 	name[0] = CTL_KERN;
1832 	name[1] = KERN_NISDOMAINNAME;
1833 	return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
1834 	    args->len, 0, 0));
1835 }
1836 
1837 int
1838 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
1839 {
1840 
1841 	LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
1842 	    args->error_code);
1843 
1844 	/*
1845 	 * XXX: we should send a signal to the parent if
1846 	 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
1847 	 * as it doesnt occur often.
1848 	 */
1849 	exit1(td, args->error_code, 0);
1850 		/* NOTREACHED */
1851 }
1852 
1853 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
1854 #define _LINUX_CAPABILITY_VERSION_2  0x20071026
1855 #define _LINUX_CAPABILITY_VERSION_3  0x20080522
1856 
1857 struct l_user_cap_header {
1858 	l_int	version;
1859 	l_int	pid;
1860 };
1861 
1862 struct l_user_cap_data {
1863 	l_int	effective;
1864 	l_int	permitted;
1865 	l_int	inheritable;
1866 };
1867 
1868 int
1869 linux_capget(struct thread *td, struct linux_capget_args *uap)
1870 {
1871 	struct l_user_cap_header luch;
1872 	struct l_user_cap_data lucd[2];
1873 	int error, u32s;
1874 
1875 	if (uap->hdrp == NULL)
1876 		return (EFAULT);
1877 
1878 	error = copyin(uap->hdrp, &luch, sizeof(luch));
1879 	if (error != 0)
1880 		return (error);
1881 
1882 	switch (luch.version) {
1883 	case _LINUX_CAPABILITY_VERSION_1:
1884 		u32s = 1;
1885 		break;
1886 	case _LINUX_CAPABILITY_VERSION_2:
1887 	case _LINUX_CAPABILITY_VERSION_3:
1888 		u32s = 2;
1889 		break;
1890 	default:
1891 		luch.version = _LINUX_CAPABILITY_VERSION_1;
1892 		error = copyout(&luch, uap->hdrp, sizeof(luch));
1893 		if (error)
1894 			return (error);
1895 		return (EINVAL);
1896 	}
1897 
1898 	if (luch.pid)
1899 		return (EPERM);
1900 
1901 	if (uap->datap) {
1902 		/*
1903 		 * The current implementation doesn't support setting
1904 		 * a capability (it's essentially a stub) so indicate
1905 		 * that no capabilities are currently set or available
1906 		 * to request.
1907 		 */
1908 		memset(&lucd, 0, u32s * sizeof(lucd[0]));
1909 		error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
1910 	}
1911 
1912 	return (error);
1913 }
1914 
1915 int
1916 linux_capset(struct thread *td, struct linux_capset_args *uap)
1917 {
1918 	struct l_user_cap_header luch;
1919 	struct l_user_cap_data lucd[2];
1920 	int error, i, u32s;
1921 
1922 	if (uap->hdrp == NULL || uap->datap == NULL)
1923 		return (EFAULT);
1924 
1925 	error = copyin(uap->hdrp, &luch, sizeof(luch));
1926 	if (error != 0)
1927 		return (error);
1928 
1929 	switch (luch.version) {
1930 	case _LINUX_CAPABILITY_VERSION_1:
1931 		u32s = 1;
1932 		break;
1933 	case _LINUX_CAPABILITY_VERSION_2:
1934 	case _LINUX_CAPABILITY_VERSION_3:
1935 		u32s = 2;
1936 		break;
1937 	default:
1938 		luch.version = _LINUX_CAPABILITY_VERSION_1;
1939 		error = copyout(&luch, uap->hdrp, sizeof(luch));
1940 		if (error)
1941 			return (error);
1942 		return (EINVAL);
1943 	}
1944 
1945 	if (luch.pid)
1946 		return (EPERM);
1947 
1948 	error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
1949 	if (error != 0)
1950 		return (error);
1951 
1952 	/* We currently don't support setting any capabilities. */
1953 	for (i = 0; i < u32s; i++) {
1954 		if (lucd[i].effective || lucd[i].permitted ||
1955 		    lucd[i].inheritable) {
1956 			linux_msg(td,
1957 			    "capset[%d] effective=0x%x, permitted=0x%x, "
1958 			    "inheritable=0x%x is not implemented", i,
1959 			    (int)lucd[i].effective, (int)lucd[i].permitted,
1960 			    (int)lucd[i].inheritable);
1961 			return (EPERM);
1962 		}
1963 	}
1964 
1965 	return (0);
1966 }
1967 
1968 int
1969 linux_prctl(struct thread *td, struct linux_prctl_args *args)
1970 {
1971 	int error = 0, max_size;
1972 	struct proc *p = td->td_proc;
1973 	char comm[LINUX_MAX_COMM_LEN];
1974 	int pdeath_signal, trace_state;
1975 
1976 	switch (args->option) {
1977 	case LINUX_PR_SET_PDEATHSIG:
1978 		if (!LINUX_SIG_VALID(args->arg2))
1979 			return (EINVAL);
1980 		pdeath_signal = linux_to_bsd_signal(args->arg2);
1981 		return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL,
1982 		    &pdeath_signal));
1983 	case LINUX_PR_GET_PDEATHSIG:
1984 		error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS,
1985 		    &pdeath_signal);
1986 		if (error != 0)
1987 			return (error);
1988 		pdeath_signal = bsd_to_linux_signal(pdeath_signal);
1989 		return (copyout(&pdeath_signal,
1990 		    (void *)(register_t)args->arg2,
1991 		    sizeof(pdeath_signal)));
1992 	/*
1993 	 * In Linux, this flag controls if set[gu]id processes can coredump.
1994 	 * There are additional semantics imposed on processes that cannot
1995 	 * coredump:
1996 	 * - Such processes can not be ptraced.
1997 	 * - There are some semantics around ownership of process-related files
1998 	 *   in the /proc namespace.
1999 	 *
2000 	 * In FreeBSD, we can (and by default, do) disable setuid coredump
2001 	 * system-wide with 'sugid_coredump.'  We control tracability on a
2002 	 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag).
2003 	 * By happy coincidence, P2_NOTRACE also prevents coredumping.  So the
2004 	 * procctl is roughly analogous to Linux's DUMPABLE.
2005 	 *
2006 	 * So, proxy these knobs to the corresponding PROC_TRACE setting.
2007 	 */
2008 	case LINUX_PR_GET_DUMPABLE:
2009 		error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS,
2010 		    &trace_state);
2011 		if (error != 0)
2012 			return (error);
2013 		td->td_retval[0] = (trace_state != -1);
2014 		return (0);
2015 	case LINUX_PR_SET_DUMPABLE:
2016 		/*
2017 		 * It is only valid for userspace to set one of these two
2018 		 * flags, and only one at a time.
2019 		 */
2020 		switch (args->arg2) {
2021 		case LINUX_SUID_DUMP_DISABLE:
2022 			trace_state = PROC_TRACE_CTL_DISABLE_EXEC;
2023 			break;
2024 		case LINUX_SUID_DUMP_USER:
2025 			trace_state = PROC_TRACE_CTL_ENABLE;
2026 			break;
2027 		default:
2028 			return (EINVAL);
2029 		}
2030 		return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL,
2031 		    &trace_state));
2032 	case LINUX_PR_GET_KEEPCAPS:
2033 		/*
2034 		 * Indicate that we always clear the effective and
2035 		 * permitted capability sets when the user id becomes
2036 		 * non-zero (actually the capability sets are simply
2037 		 * always zero in the current implementation).
2038 		 */
2039 		td->td_retval[0] = 0;
2040 		break;
2041 	case LINUX_PR_SET_KEEPCAPS:
2042 		/*
2043 		 * Ignore requests to keep the effective and permitted
2044 		 * capability sets when the user id becomes non-zero.
2045 		 */
2046 		break;
2047 	case LINUX_PR_SET_NAME:
2048 		/*
2049 		 * To be on the safe side we need to make sure to not
2050 		 * overflow the size a Linux program expects. We already
2051 		 * do this here in the copyin, so that we don't need to
2052 		 * check on copyout.
2053 		 */
2054 		max_size = MIN(sizeof(comm), sizeof(p->p_comm));
2055 		error = copyinstr((void *)(register_t)args->arg2, comm,
2056 		    max_size, NULL);
2057 
2058 		/* Linux silently truncates the name if it is too long. */
2059 		if (error == ENAMETOOLONG) {
2060 			/*
2061 			 * XXX: copyinstr() isn't documented to populate the
2062 			 * array completely, so do a copyin() to be on the
2063 			 * safe side. This should be changed in case
2064 			 * copyinstr() is changed to guarantee this.
2065 			 */
2066 			error = copyin((void *)(register_t)args->arg2, comm,
2067 			    max_size - 1);
2068 			comm[max_size - 1] = '\0';
2069 		}
2070 		if (error)
2071 			return (error);
2072 
2073 		PROC_LOCK(p);
2074 		strlcpy(p->p_comm, comm, sizeof(p->p_comm));
2075 		PROC_UNLOCK(p);
2076 		break;
2077 	case LINUX_PR_GET_NAME:
2078 		PROC_LOCK(p);
2079 		strlcpy(comm, p->p_comm, sizeof(comm));
2080 		PROC_UNLOCK(p);
2081 		error = copyout(comm, (void *)(register_t)args->arg2,
2082 		    strlen(comm) + 1);
2083 		break;
2084 	case LINUX_PR_GET_SECCOMP:
2085 	case LINUX_PR_SET_SECCOMP:
2086 		/*
2087 		 * Same as returned by Linux without CONFIG_SECCOMP enabled.
2088 		 */
2089 		error = EINVAL;
2090 		break;
2091 	case LINUX_PR_CAPBSET_READ:
2092 #if 0
2093 		/*
2094 		 * This makes too much noise with Ubuntu Focal.
2095 		 */
2096 		linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d",
2097 		    (int)args->arg2);
2098 #endif
2099 		error = EINVAL;
2100 		break;
2101 	case LINUX_PR_SET_NO_NEW_PRIVS:
2102 		linux_msg(td, "unsupported prctl PR_SET_NO_NEW_PRIVS");
2103 		error = EINVAL;
2104 		break;
2105 	case LINUX_PR_SET_PTRACER:
2106 		linux_msg(td, "unsupported prctl PR_SET_PTRACER");
2107 		error = EINVAL;
2108 		break;
2109 	default:
2110 		linux_msg(td, "unsupported prctl option %d", args->option);
2111 		error = EINVAL;
2112 		break;
2113 	}
2114 
2115 	return (error);
2116 }
2117 
2118 int
2119 linux_sched_setparam(struct thread *td,
2120     struct linux_sched_setparam_args *uap)
2121 {
2122 	struct sched_param sched_param;
2123 	struct thread *tdt;
2124 	int error, policy;
2125 
2126 	error = copyin(uap->param, &sched_param, sizeof(sched_param));
2127 	if (error)
2128 		return (error);
2129 
2130 	tdt = linux_tdfind(td, uap->pid, -1);
2131 	if (tdt == NULL)
2132 		return (ESRCH);
2133 
2134 	if (linux_map_sched_prio) {
2135 		error = kern_sched_getscheduler(td, tdt, &policy);
2136 		if (error)
2137 			goto out;
2138 
2139 		switch (policy) {
2140 		case SCHED_OTHER:
2141 			if (sched_param.sched_priority != 0) {
2142 				error = EINVAL;
2143 				goto out;
2144 			}
2145 			sched_param.sched_priority =
2146 			    PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
2147 			break;
2148 		case SCHED_FIFO:
2149 		case SCHED_RR:
2150 			if (sched_param.sched_priority < 1 ||
2151 			    sched_param.sched_priority >= LINUX_MAX_RT_PRIO) {
2152 				error = EINVAL;
2153 				goto out;
2154 			}
2155 			/*
2156 			 * Map [1, LINUX_MAX_RT_PRIO - 1] to
2157 			 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
2158 			 */
2159 			sched_param.sched_priority =
2160 			    (sched_param.sched_priority - 1) *
2161 			    (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
2162 			    (LINUX_MAX_RT_PRIO - 1);
2163 			break;
2164 		}
2165 	}
2166 
2167 	error = kern_sched_setparam(td, tdt, &sched_param);
2168 out:	PROC_UNLOCK(tdt->td_proc);
2169 	return (error);
2170 }
2171 
2172 int
2173 linux_sched_getparam(struct thread *td,
2174     struct linux_sched_getparam_args *uap)
2175 {
2176 	struct sched_param sched_param;
2177 	struct thread *tdt;
2178 	int error, policy;
2179 
2180 	tdt = linux_tdfind(td, uap->pid, -1);
2181 	if (tdt == NULL)
2182 		return (ESRCH);
2183 
2184 	error = kern_sched_getparam(td, tdt, &sched_param);
2185 	if (error) {
2186 		PROC_UNLOCK(tdt->td_proc);
2187 		return (error);
2188 	}
2189 
2190 	if (linux_map_sched_prio) {
2191 		error = kern_sched_getscheduler(td, tdt, &policy);
2192 		PROC_UNLOCK(tdt->td_proc);
2193 		if (error)
2194 			return (error);
2195 
2196 		switch (policy) {
2197 		case SCHED_OTHER:
2198 			sched_param.sched_priority = 0;
2199 			break;
2200 		case SCHED_FIFO:
2201 		case SCHED_RR:
2202 			/*
2203 			 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to
2204 			 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up).
2205 			 */
2206 			sched_param.sched_priority =
2207 			    (sched_param.sched_priority *
2208 			    (LINUX_MAX_RT_PRIO - 1) +
2209 			    (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) /
2210 			    (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1;
2211 			break;
2212 		}
2213 	} else
2214 		PROC_UNLOCK(tdt->td_proc);
2215 
2216 	error = copyout(&sched_param, uap->param, sizeof(sched_param));
2217 	return (error);
2218 }
2219 
2220 /*
2221  * Get affinity of a process.
2222  */
2223 int
2224 linux_sched_getaffinity(struct thread *td,
2225     struct linux_sched_getaffinity_args *args)
2226 {
2227 	int error;
2228 	struct thread *tdt;
2229 
2230 	if (args->len < sizeof(cpuset_t))
2231 		return (EINVAL);
2232 
2233 	tdt = linux_tdfind(td, args->pid, -1);
2234 	if (tdt == NULL)
2235 		return (ESRCH);
2236 
2237 	PROC_UNLOCK(tdt->td_proc);
2238 
2239 	error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2240 	    tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr);
2241 	if (error == 0)
2242 		td->td_retval[0] = sizeof(cpuset_t);
2243 
2244 	return (error);
2245 }
2246 
2247 /*
2248  *  Set affinity of a process.
2249  */
2250 int
2251 linux_sched_setaffinity(struct thread *td,
2252     struct linux_sched_setaffinity_args *args)
2253 {
2254 	struct thread *tdt;
2255 
2256 	if (args->len < sizeof(cpuset_t))
2257 		return (EINVAL);
2258 
2259 	tdt = linux_tdfind(td, args->pid, -1);
2260 	if (tdt == NULL)
2261 		return (ESRCH);
2262 
2263 	PROC_UNLOCK(tdt->td_proc);
2264 
2265 	return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2266 	    tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr));
2267 }
2268 
2269 struct linux_rlimit64 {
2270 	uint64_t	rlim_cur;
2271 	uint64_t	rlim_max;
2272 };
2273 
2274 int
2275 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
2276 {
2277 	struct rlimit rlim, nrlim;
2278 	struct linux_rlimit64 lrlim;
2279 	struct proc *p;
2280 	u_int which;
2281 	int flags;
2282 	int error;
2283 
2284 	if (args->new == NULL && args->old != NULL) {
2285 		if (linux_get_dummy_limit(args->resource, &rlim)) {
2286 			lrlim.rlim_cur = rlim.rlim_cur;
2287 			lrlim.rlim_max = rlim.rlim_max;
2288 			return (copyout(&lrlim, args->old, sizeof(lrlim)));
2289 		}
2290 	}
2291 
2292 	if (args->resource >= LINUX_RLIM_NLIMITS)
2293 		return (EINVAL);
2294 
2295 	which = linux_to_bsd_resource[args->resource];
2296 	if (which == -1)
2297 		return (EINVAL);
2298 
2299 	if (args->new != NULL) {
2300 		/*
2301 		 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
2302 		 * rlim is unsigned 64-bit. FreeBSD treats negative limits
2303 		 * as INFINITY so we do not need a conversion even.
2304 		 */
2305 		error = copyin(args->new, &nrlim, sizeof(nrlim));
2306 		if (error != 0)
2307 			return (error);
2308 	}
2309 
2310 	flags = PGET_HOLD | PGET_NOTWEXIT;
2311 	if (args->new != NULL)
2312 		flags |= PGET_CANDEBUG;
2313 	else
2314 		flags |= PGET_CANSEE;
2315 	if (args->pid == 0) {
2316 		p = td->td_proc;
2317 		PHOLD(p);
2318 	} else {
2319 		error = pget(args->pid, flags, &p);
2320 		if (error != 0)
2321 			return (error);
2322 	}
2323 	if (args->old != NULL) {
2324 		PROC_LOCK(p);
2325 		lim_rlimit_proc(p, which, &rlim);
2326 		PROC_UNLOCK(p);
2327 		if (rlim.rlim_cur == RLIM_INFINITY)
2328 			lrlim.rlim_cur = LINUX_RLIM_INFINITY;
2329 		else
2330 			lrlim.rlim_cur = rlim.rlim_cur;
2331 		if (rlim.rlim_max == RLIM_INFINITY)
2332 			lrlim.rlim_max = LINUX_RLIM_INFINITY;
2333 		else
2334 			lrlim.rlim_max = rlim.rlim_max;
2335 		error = copyout(&lrlim, args->old, sizeof(lrlim));
2336 		if (error != 0)
2337 			goto out;
2338 	}
2339 
2340 	if (args->new != NULL)
2341 		error = kern_proc_setrlimit(td, p, which, &nrlim);
2342 
2343  out:
2344 	PRELE(p);
2345 	return (error);
2346 }
2347 
2348 int
2349 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
2350 {
2351 	struct timeval utv, tv0, tv1, *tvp;
2352 	struct l_pselect6arg lpse6;
2353 	struct l_timespec lts;
2354 	struct timespec uts;
2355 	l_sigset_t l_ss;
2356 	sigset_t *ssp;
2357 	sigset_t ss;
2358 	int error;
2359 
2360 	ssp = NULL;
2361 	if (args->sig != NULL) {
2362 		error = copyin(args->sig, &lpse6, sizeof(lpse6));
2363 		if (error != 0)
2364 			return (error);
2365 		if (lpse6.ss_len != sizeof(l_ss))
2366 			return (EINVAL);
2367 		if (lpse6.ss != 0) {
2368 			error = copyin(PTRIN(lpse6.ss), &l_ss,
2369 			    sizeof(l_ss));
2370 			if (error != 0)
2371 				return (error);
2372 			linux_to_bsd_sigset(&l_ss, &ss);
2373 			ssp = &ss;
2374 		}
2375 	}
2376 
2377 	/*
2378 	 * Currently glibc changes nanosecond number to microsecond.
2379 	 * This mean losing precision but for now it is hardly seen.
2380 	 */
2381 	if (args->tsp != NULL) {
2382 		error = copyin(args->tsp, &lts, sizeof(lts));
2383 		if (error != 0)
2384 			return (error);
2385 		error = linux_to_native_timespec(&uts, &lts);
2386 		if (error != 0)
2387 			return (error);
2388 
2389 		TIMESPEC_TO_TIMEVAL(&utv, &uts);
2390 		if (itimerfix(&utv))
2391 			return (EINVAL);
2392 
2393 		microtime(&tv0);
2394 		tvp = &utv;
2395 	} else
2396 		tvp = NULL;
2397 
2398 	error = kern_pselect(td, args->nfds, args->readfds, args->writefds,
2399 	    args->exceptfds, tvp, ssp, LINUX_NFDBITS);
2400 
2401 	if (error == 0 && args->tsp != NULL) {
2402 		if (td->td_retval[0] != 0) {
2403 			/*
2404 			 * Compute how much time was left of the timeout,
2405 			 * by subtracting the current time and the time
2406 			 * before we started the call, and subtracting
2407 			 * that result from the user-supplied value.
2408 			 */
2409 
2410 			microtime(&tv1);
2411 			timevalsub(&tv1, &tv0);
2412 			timevalsub(&utv, &tv1);
2413 			if (utv.tv_sec < 0)
2414 				timevalclear(&utv);
2415 		} else
2416 			timevalclear(&utv);
2417 
2418 		TIMEVAL_TO_TIMESPEC(&utv, &uts);
2419 
2420 		error = native_to_linux_timespec(&lts, &uts);
2421 		if (error == 0)
2422 			error = copyout(&lts, args->tsp, sizeof(lts));
2423 	}
2424 
2425 	return (error);
2426 }
2427 
2428 int
2429 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
2430 {
2431 	struct timespec ts0, ts1;
2432 	struct l_timespec lts;
2433 	struct timespec uts, *tsp;
2434 	l_sigset_t l_ss;
2435 	sigset_t *ssp;
2436 	sigset_t ss;
2437 	int error;
2438 
2439 	if (args->sset != NULL) {
2440 		if (args->ssize != sizeof(l_ss))
2441 			return (EINVAL);
2442 		error = copyin(args->sset, &l_ss, sizeof(l_ss));
2443 		if (error)
2444 			return (error);
2445 		linux_to_bsd_sigset(&l_ss, &ss);
2446 		ssp = &ss;
2447 	} else
2448 		ssp = NULL;
2449 	if (args->tsp != NULL) {
2450 		error = copyin(args->tsp, &lts, sizeof(lts));
2451 		if (error)
2452 			return (error);
2453 		error = linux_to_native_timespec(&uts, &lts);
2454 		if (error != 0)
2455 			return (error);
2456 
2457 		nanotime(&ts0);
2458 		tsp = &uts;
2459 	} else
2460 		tsp = NULL;
2461 
2462 	error = kern_poll(td, args->fds, args->nfds, tsp, ssp);
2463 
2464 	if (error == 0 && args->tsp != NULL) {
2465 		if (td->td_retval[0]) {
2466 			nanotime(&ts1);
2467 			timespecsub(&ts1, &ts0, &ts1);
2468 			timespecsub(&uts, &ts1, &uts);
2469 			if (uts.tv_sec < 0)
2470 				timespecclear(&uts);
2471 		} else
2472 			timespecclear(&uts);
2473 
2474 		error = native_to_linux_timespec(&lts, &uts);
2475 		if (error == 0)
2476 			error = copyout(&lts, args->tsp, sizeof(lts));
2477 	}
2478 
2479 	return (error);
2480 }
2481 
2482 int
2483 linux_sched_rr_get_interval(struct thread *td,
2484     struct linux_sched_rr_get_interval_args *uap)
2485 {
2486 	struct timespec ts;
2487 	struct l_timespec lts;
2488 	struct thread *tdt;
2489 	int error;
2490 
2491 	/*
2492 	 * According to man in case the invalid pid specified
2493 	 * EINVAL should be returned.
2494 	 */
2495 	if (uap->pid < 0)
2496 		return (EINVAL);
2497 
2498 	tdt = linux_tdfind(td, uap->pid, -1);
2499 	if (tdt == NULL)
2500 		return (ESRCH);
2501 
2502 	error = kern_sched_rr_get_interval_td(td, tdt, &ts);
2503 	PROC_UNLOCK(tdt->td_proc);
2504 	if (error != 0)
2505 		return (error);
2506 	error = native_to_linux_timespec(&lts, &ts);
2507 	if (error != 0)
2508 		return (error);
2509 	return (copyout(&lts, uap->interval, sizeof(lts)));
2510 }
2511 
2512 /*
2513  * In case when the Linux thread is the initial thread in
2514  * the thread group thread id is equal to the process id.
2515  * Glibc depends on this magic (assert in pthread_getattr_np.c).
2516  */
2517 struct thread *
2518 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
2519 {
2520 	struct linux_emuldata *em;
2521 	struct thread *tdt;
2522 	struct proc *p;
2523 
2524 	tdt = NULL;
2525 	if (tid == 0 || tid == td->td_tid) {
2526 		tdt = td;
2527 		PROC_LOCK(tdt->td_proc);
2528 	} else if (tid > PID_MAX)
2529 		tdt = tdfind(tid, pid);
2530 	else {
2531 		/*
2532 		 * Initial thread where the tid equal to the pid.
2533 		 */
2534 		p = pfind(tid);
2535 		if (p != NULL) {
2536 			if (SV_PROC_ABI(p) != SV_ABI_LINUX) {
2537 				/*
2538 				 * p is not a Linuxulator process.
2539 				 */
2540 				PROC_UNLOCK(p);
2541 				return (NULL);
2542 			}
2543 			FOREACH_THREAD_IN_PROC(p, tdt) {
2544 				em = em_find(tdt);
2545 				if (tid == em->em_tid)
2546 					return (tdt);
2547 			}
2548 			PROC_UNLOCK(p);
2549 		}
2550 		return (NULL);
2551 	}
2552 
2553 	return (tdt);
2554 }
2555 
2556 void
2557 linux_to_bsd_waitopts(int options, int *bsdopts)
2558 {
2559 
2560 	if (options & LINUX_WNOHANG)
2561 		*bsdopts |= WNOHANG;
2562 	if (options & LINUX_WUNTRACED)
2563 		*bsdopts |= WUNTRACED;
2564 	if (options & LINUX_WEXITED)
2565 		*bsdopts |= WEXITED;
2566 	if (options & LINUX_WCONTINUED)
2567 		*bsdopts |= WCONTINUED;
2568 	if (options & LINUX_WNOWAIT)
2569 		*bsdopts |= WNOWAIT;
2570 
2571 	if (options & __WCLONE)
2572 		*bsdopts |= WLINUXCLONE;
2573 }
2574 
2575 int
2576 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
2577 {
2578 	struct uio uio;
2579 	struct iovec iov;
2580 	int error;
2581 
2582 	if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
2583 		return (EINVAL);
2584 	if (args->count > INT_MAX)
2585 		args->count = INT_MAX;
2586 
2587 	iov.iov_base = args->buf;
2588 	iov.iov_len = args->count;
2589 
2590 	uio.uio_iov = &iov;
2591 	uio.uio_iovcnt = 1;
2592 	uio.uio_resid = iov.iov_len;
2593 	uio.uio_segflg = UIO_USERSPACE;
2594 	uio.uio_rw = UIO_READ;
2595 	uio.uio_td = td;
2596 
2597 	error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
2598 	if (error == 0)
2599 		td->td_retval[0] = args->count - uio.uio_resid;
2600 	return (error);
2601 }
2602 
2603 int
2604 linux_mincore(struct thread *td, struct linux_mincore_args *args)
2605 {
2606 
2607 	/* Needs to be page-aligned */
2608 	if (args->start & PAGE_MASK)
2609 		return (EINVAL);
2610 	return (kern_mincore(td, args->start, args->len, args->vec));
2611 }
2612 
2613 #define	SYSLOG_TAG	"<6>"
2614 
2615 int
2616 linux_syslog(struct thread *td, struct linux_syslog_args *args)
2617 {
2618 	char buf[128], *src, *dst;
2619 	u_int seq;
2620 	int buflen, error;
2621 
2622 	if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) {
2623 		linux_msg(td, "syslog unsupported type 0x%x", args->type);
2624 		return (EINVAL);
2625 	}
2626 
2627 	if (args->len < 6) {
2628 		td->td_retval[0] = 0;
2629 		return (0);
2630 	}
2631 
2632 	error = priv_check(td, PRIV_MSGBUF);
2633 	if (error)
2634 		return (error);
2635 
2636 	mtx_lock(&msgbuf_lock);
2637 	msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
2638 	mtx_unlock(&msgbuf_lock);
2639 
2640 	dst = args->buf;
2641 	error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG));
2642 	/* The -1 is to skip the trailing '\0'. */
2643 	dst += sizeof(SYSLOG_TAG) - 1;
2644 
2645 	while (error == 0) {
2646 		mtx_lock(&msgbuf_lock);
2647 		buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
2648 		mtx_unlock(&msgbuf_lock);
2649 
2650 		if (buflen == 0)
2651 			break;
2652 
2653 		for (src = buf; src < buf + buflen && error == 0; src++) {
2654 			if (*src == '\0')
2655 				continue;
2656 
2657 			if (dst >= args->buf + args->len)
2658 				goto out;
2659 
2660 			error = copyout(src, dst, 1);
2661 			dst++;
2662 
2663 			if (*src == '\n' && *(src + 1) != '<' &&
2664 			    dst + sizeof(SYSLOG_TAG) < args->buf + args->len) {
2665 				error = copyout(&SYSLOG_TAG,
2666 				    dst, sizeof(SYSLOG_TAG));
2667 				dst += sizeof(SYSLOG_TAG) - 1;
2668 			}
2669 		}
2670 	}
2671 out:
2672 	td->td_retval[0] = dst - args->buf;
2673 	return (error);
2674 }
2675 
2676 int
2677 linux_getcpu(struct thread *td, struct linux_getcpu_args *args)
2678 {
2679 	int cpu, error, node;
2680 
2681 	cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */
2682 	error = 0;
2683 	node = cpuid_to_pcpu[cpu]->pc_domain;
2684 
2685 	if (args->cpu != NULL)
2686 		error = copyout(&cpu, args->cpu, sizeof(l_int));
2687 	if (args->node != NULL)
2688 		error = copyout(&node, args->node, sizeof(l_int));
2689 	return (error);
2690 }
2691